Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/events/amd/lbr.c
26535 views
1
// SPDX-License-Identifier: GPL-2.0
2
#include <linux/perf_event.h>
3
#include <asm/msr.h>
4
#include <asm/perf_event.h>
5
6
#include "../perf_event.h"
7
8
/* LBR Branch Select valid bits */
9
#define LBR_SELECT_MASK 0x1ff
10
11
/*
12
* LBR Branch Select filter bits which when set, ensures that the
13
* corresponding type of branches are not recorded
14
*/
15
#define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */
16
#define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */
17
#define LBR_SELECT_JCC 2 /* Conditional branches */
18
#define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */
19
#define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */
20
#define LBR_SELECT_RET_NEAR 5 /* Near returns */
21
#define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */
22
#define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */
23
#define LBR_SELECT_FAR_BRANCH 8 /* Far branches */
24
25
#define LBR_KERNEL BIT(LBR_SELECT_KERNEL)
26
#define LBR_USER BIT(LBR_SELECT_USER)
27
#define LBR_JCC BIT(LBR_SELECT_JCC)
28
#define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL)
29
#define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND)
30
#define LBR_RETURN BIT(LBR_SELECT_RET_NEAR)
31
#define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL)
32
#define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND)
33
#define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH)
34
#define LBR_NOT_SUPP -1 /* unsupported filter */
35
#define LBR_IGNORE 0
36
37
#define LBR_ANY \
38
(LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \
39
LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
40
41
struct branch_entry {
42
union {
43
struct {
44
u64 ip:58;
45
u64 ip_sign_ext:5;
46
u64 mispredict:1;
47
} split;
48
u64 full;
49
} from;
50
51
union {
52
struct {
53
u64 ip:58;
54
u64 ip_sign_ext:3;
55
u64 reserved:1;
56
u64 spec:1;
57
u64 valid:1;
58
} split;
59
u64 full;
60
} to;
61
};
62
63
static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
64
{
65
wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
66
}
67
68
static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
69
{
70
wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
71
}
72
73
static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
74
{
75
u64 val;
76
77
rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
78
79
return val;
80
}
81
82
static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
83
{
84
u64 val;
85
86
rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
87
88
return val;
89
}
90
91
static __always_inline u64 sign_ext_branch_ip(u64 ip)
92
{
93
u32 shift = 64 - boot_cpu_data.x86_virt_bits;
94
95
return (u64)(((s64)ip << shift) >> shift);
96
}
97
98
static void amd_pmu_lbr_filter(void)
99
{
100
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
101
int br_sel = cpuc->br_sel, offset, type, i, j;
102
bool compress = false;
103
bool fused_only = false;
104
u64 from, to;
105
106
/* If sampling all branches, there is nothing to filter */
107
if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
108
((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
109
fused_only = true;
110
111
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
112
from = cpuc->lbr_entries[i].from;
113
to = cpuc->lbr_entries[i].to;
114
type = branch_type_fused(from, to, 0, &offset);
115
116
/*
117
* Adjust the branch from address in case of instruction
118
* fusion where it points to an instruction preceding the
119
* actual branch
120
*/
121
if (offset) {
122
cpuc->lbr_entries[i].from += offset;
123
if (fused_only)
124
continue;
125
}
126
127
/* If type does not correspond, then discard */
128
if (type == X86_BR_NONE || (br_sel & type) != type) {
129
cpuc->lbr_entries[i].from = 0; /* mark invalid */
130
compress = true;
131
}
132
133
if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
134
cpuc->lbr_entries[i].type = common_branch_type(type);
135
}
136
137
if (!compress)
138
return;
139
140
/* Remove all invalid entries */
141
for (i = 0; i < cpuc->lbr_stack.nr; ) {
142
if (!cpuc->lbr_entries[i].from) {
143
j = i;
144
while (++j < cpuc->lbr_stack.nr)
145
cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
146
cpuc->lbr_stack.nr--;
147
if (!cpuc->lbr_entries[i].from)
148
continue;
149
}
150
i++;
151
}
152
}
153
154
static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
155
PERF_BR_SPEC_NA,
156
PERF_BR_SPEC_WRONG_PATH,
157
PERF_BR_NON_SPEC_CORRECT_PATH,
158
PERF_BR_SPEC_CORRECT_PATH,
159
};
160
161
void amd_pmu_lbr_read(void)
162
{
163
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
164
struct perf_branch_entry *br = cpuc->lbr_entries;
165
struct branch_entry entry;
166
int out = 0, idx, i;
167
168
if (!cpuc->lbr_users)
169
return;
170
171
for (i = 0; i < x86_pmu.lbr_nr; i++) {
172
entry.from.full = amd_pmu_lbr_get_from(i);
173
entry.to.full = amd_pmu_lbr_get_to(i);
174
175
/*
176
* Check if a branch has been logged; if valid = 0, spec = 0
177
* then no branch was recorded; if reserved = 1 then an
178
* erroneous branch was recorded (see Erratum 1452)
179
*/
180
if ((!entry.to.split.valid && !entry.to.split.spec) ||
181
entry.to.split.reserved)
182
continue;
183
184
perf_clear_branch_entry_bitfields(br + out);
185
186
br[out].from = sign_ext_branch_ip(entry.from.split.ip);
187
br[out].to = sign_ext_branch_ip(entry.to.split.ip);
188
br[out].mispred = entry.from.split.mispredict;
189
br[out].predicted = !br[out].mispred;
190
191
/*
192
* Set branch speculation information using the status of
193
* the valid and spec bits.
194
*
195
* When valid = 0, spec = 0, no branch was recorded and the
196
* entry is discarded as seen above.
197
*
198
* When valid = 0, spec = 1, the recorded branch was
199
* speculative but took the wrong path.
200
*
201
* When valid = 1, spec = 0, the recorded branch was
202
* non-speculative but took the correct path.
203
*
204
* When valid = 1, spec = 1, the recorded branch was
205
* speculative and took the correct path
206
*/
207
idx = (entry.to.split.valid << 1) | entry.to.split.spec;
208
br[out].spec = lbr_spec_map[idx];
209
out++;
210
}
211
212
cpuc->lbr_stack.nr = out;
213
214
/*
215
* Internal register renaming always ensures that LBR From[0] and
216
* LBR To[0] always represent the TOS
217
*/
218
cpuc->lbr_stack.hw_idx = 0;
219
220
/* Perform further software filtering */
221
amd_pmu_lbr_filter();
222
}
223
224
static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
225
[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
226
[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
227
[PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE,
228
229
[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
230
[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
231
[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
232
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
233
[PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP,
234
[PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP,
235
[PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP,
236
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
237
238
[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP,
239
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
240
[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
241
242
[PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP,
243
[PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP,
244
};
245
246
static int amd_pmu_lbr_setup_filter(struct perf_event *event)
247
{
248
struct hw_perf_event_extra *reg = &event->hw.branch_reg;
249
u64 br_type = event->attr.branch_sample_type;
250
u64 mask = 0, v;
251
int i;
252
253
/* No LBR support */
254
if (!x86_pmu.lbr_nr)
255
return -EOPNOTSUPP;
256
257
if (br_type & PERF_SAMPLE_BRANCH_USER)
258
mask |= X86_BR_USER;
259
260
if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
261
mask |= X86_BR_KERNEL;
262
263
/* Ignore BRANCH_HV here */
264
265
if (br_type & PERF_SAMPLE_BRANCH_ANY)
266
mask |= X86_BR_ANY;
267
268
if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
269
mask |= X86_BR_ANY_CALL;
270
271
if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
272
mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
273
274
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
275
mask |= X86_BR_IND_CALL;
276
277
if (br_type & PERF_SAMPLE_BRANCH_COND)
278
mask |= X86_BR_JCC;
279
280
if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
281
mask |= X86_BR_IND_JMP;
282
283
if (br_type & PERF_SAMPLE_BRANCH_CALL)
284
mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
285
286
if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
287
mask |= X86_BR_TYPE_SAVE;
288
289
reg->reg = mask;
290
mask = 0;
291
292
for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
293
if (!(br_type & BIT_ULL(i)))
294
continue;
295
296
v = lbr_select_map[i];
297
if (v == LBR_NOT_SUPP)
298
return -EOPNOTSUPP;
299
300
if (v != LBR_IGNORE)
301
mask |= v;
302
}
303
304
/* Filter bits operate in suppress mode */
305
reg->config = mask ^ LBR_SELECT_MASK;
306
307
return 0;
308
}
309
310
int amd_pmu_lbr_hw_config(struct perf_event *event)
311
{
312
int ret = 0;
313
314
ret = amd_pmu_lbr_setup_filter(event);
315
if (!ret)
316
event->attach_state |= PERF_ATTACH_SCHED_CB;
317
318
return ret;
319
}
320
321
void amd_pmu_lbr_reset(void)
322
{
323
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
324
int i;
325
326
if (!x86_pmu.lbr_nr)
327
return;
328
329
/* Reset all branch records individually */
330
for (i = 0; i < x86_pmu.lbr_nr; i++) {
331
amd_pmu_lbr_set_from(i, 0);
332
amd_pmu_lbr_set_to(i, 0);
333
}
334
335
cpuc->last_task_ctx = NULL;
336
cpuc->last_log_id = 0;
337
wrmsrq(MSR_AMD64_LBR_SELECT, 0);
338
}
339
340
void amd_pmu_lbr_add(struct perf_event *event)
341
{
342
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
343
struct hw_perf_event_extra *reg = &event->hw.branch_reg;
344
345
if (!x86_pmu.lbr_nr)
346
return;
347
348
if (has_branch_stack(event)) {
349
cpuc->lbr_select = 1;
350
cpuc->lbr_sel->config = reg->config;
351
cpuc->br_sel = reg->reg;
352
}
353
354
perf_sched_cb_inc(event->pmu);
355
356
if (!cpuc->lbr_users++ && !event->total_time_running)
357
amd_pmu_lbr_reset();
358
}
359
360
void amd_pmu_lbr_del(struct perf_event *event)
361
{
362
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
363
364
if (!x86_pmu.lbr_nr)
365
return;
366
367
if (has_branch_stack(event))
368
cpuc->lbr_select = 0;
369
370
cpuc->lbr_users--;
371
WARN_ON_ONCE(cpuc->lbr_users < 0);
372
perf_sched_cb_dec(event->pmu);
373
}
374
375
void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
376
struct task_struct *task, bool sched_in)
377
{
378
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
379
380
/*
381
* A context switch can flip the address space and LBR entries are
382
* not tagged with an identifier. Hence, branches cannot be resolved
383
* from the old address space and the LBR records should be wiped.
384
*/
385
if (cpuc->lbr_users && sched_in)
386
amd_pmu_lbr_reset();
387
}
388
389
void amd_pmu_lbr_enable_all(void)
390
{
391
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
392
u64 lbr_select, dbg_ctl, dbg_extn_cfg;
393
394
if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
395
return;
396
397
/* Set hardware branch filter */
398
if (cpuc->lbr_select) {
399
lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
400
wrmsrq(MSR_AMD64_LBR_SELECT, lbr_select);
401
}
402
403
if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
404
rdmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
405
wrmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
406
}
407
408
rdmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
409
wrmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
410
}
411
412
void amd_pmu_lbr_disable_all(void)
413
{
414
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
415
416
if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
417
return;
418
419
__amd_pmu_lbr_disable();
420
}
421
422
__init int amd_pmu_lbr_init(void)
423
{
424
union cpuid_0x80000022_ebx ebx;
425
426
if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
427
return -EOPNOTSUPP;
428
429
/* Set number of entries */
430
ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
431
x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
432
433
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
434
435
return 0;
436
}
437
438