Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/events/intel/lbr.c
26481 views
1
// SPDX-License-Identifier: GPL-2.0
2
#include <linux/perf_event.h>
3
#include <linux/types.h>
4
5
#include <asm/cpu_device_id.h>
6
#include <asm/perf_event.h>
7
#include <asm/msr.h>
8
9
#include "../perf_event.h"
10
11
/*
12
* Intel LBR_SELECT bits
13
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
14
*
15
* Hardware branch filter (not available on all CPUs)
16
*/
17
#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
18
#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
19
#define LBR_JCC_BIT 2 /* do not capture conditional branches */
20
#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
21
#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
22
#define LBR_RETURN_BIT 5 /* do not capture near returns */
23
#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
24
#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
25
#define LBR_FAR_BIT 8 /* do not capture far branches */
26
#define LBR_CALL_STACK_BIT 9 /* enable call stack */
27
28
/*
29
* Following bit only exists in Linux; we mask it out before writing it to
30
* the actual MSR. But it helps the constraint perf code to understand
31
* that this is a separate configuration.
32
*/
33
#define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */
34
35
#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
36
#define LBR_USER (1 << LBR_USER_BIT)
37
#define LBR_JCC (1 << LBR_JCC_BIT)
38
#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
39
#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
40
#define LBR_RETURN (1 << LBR_RETURN_BIT)
41
#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
42
#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
43
#define LBR_FAR (1 << LBR_FAR_BIT)
44
#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
45
#define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT)
46
47
#define LBR_PLM (LBR_KERNEL | LBR_USER)
48
49
#define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */
50
#define LBR_NOT_SUPP -1 /* LBR filter not supported */
51
#define LBR_IGN 0 /* ignored */
52
53
#define LBR_ANY \
54
(LBR_JCC |\
55
LBR_REL_CALL |\
56
LBR_IND_CALL |\
57
LBR_RETURN |\
58
LBR_REL_JMP |\
59
LBR_IND_JMP |\
60
LBR_FAR)
61
62
#define LBR_FROM_FLAG_MISPRED BIT_ULL(63)
63
#define LBR_FROM_FLAG_IN_TX BIT_ULL(62)
64
#define LBR_FROM_FLAG_ABORT BIT_ULL(61)
65
66
#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59))
67
68
/*
69
* Intel LBR_CTL bits
70
*
71
* Hardware branch filter for Arch LBR
72
*/
73
#define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */
74
#define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */
75
#define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */
76
#define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */
77
#define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */
78
#define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */
79
#define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */
80
#define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */
81
#define ARCH_LBR_RETURN_BIT 21 /* capture near returns */
82
#define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */
83
84
#define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT)
85
#define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT)
86
#define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT)
87
#define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT)
88
#define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT)
89
#define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT)
90
#define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT)
91
#define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT)
92
#define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT)
93
#define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT)
94
95
#define ARCH_LBR_ANY \
96
(ARCH_LBR_JCC |\
97
ARCH_LBR_REL_JMP |\
98
ARCH_LBR_IND_JMP |\
99
ARCH_LBR_REL_CALL |\
100
ARCH_LBR_IND_CALL |\
101
ARCH_LBR_RETURN |\
102
ARCH_LBR_OTHER_BRANCH)
103
104
#define ARCH_LBR_CTL_MASK 0x7f000e
105
106
static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
107
108
static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
109
{
110
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
111
return !!(config & ARCH_LBR_CALL_STACK);
112
113
return !!(config & LBR_CALL_STACK);
114
}
115
116
/*
117
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
118
* otherwise it becomes near impossible to get a reliable stack.
119
*/
120
121
static void __intel_pmu_lbr_enable(bool pmi)
122
{
123
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
124
u64 debugctl, lbr_select = 0, orig_debugctl;
125
126
/*
127
* No need to unfreeze manually, as v4 can do that as part
128
* of the GLOBAL_STATUS ack.
129
*/
130
if (pmi && x86_pmu.version >= 4)
131
return;
132
133
/*
134
* No need to reprogram LBR_SELECT in a PMI, as it
135
* did not change.
136
*/
137
if (cpuc->lbr_sel)
138
lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
139
if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel)
140
wrmsrq(MSR_LBR_SELECT, lbr_select);
141
142
rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
143
orig_debugctl = debugctl;
144
145
if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
146
debugctl |= DEBUGCTLMSR_LBR;
147
/*
148
* LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
149
* If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
150
* may cause superfluous increase/decrease of LBR_TOS.
151
*/
152
if (is_lbr_call_stack_bit_set(lbr_select))
153
debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
154
else
155
debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
156
157
if (orig_debugctl != debugctl)
158
wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
159
160
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
161
wrmsrq(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
162
}
163
164
void intel_pmu_lbr_reset_32(void)
165
{
166
int i;
167
168
for (i = 0; i < x86_pmu.lbr_nr; i++)
169
wrmsrq(x86_pmu.lbr_from + i, 0);
170
}
171
172
void intel_pmu_lbr_reset_64(void)
173
{
174
int i;
175
176
for (i = 0; i < x86_pmu.lbr_nr; i++) {
177
wrmsrq(x86_pmu.lbr_from + i, 0);
178
wrmsrq(x86_pmu.lbr_to + i, 0);
179
if (x86_pmu.lbr_has_info)
180
wrmsrq(x86_pmu.lbr_info + i, 0);
181
}
182
}
183
184
static void intel_pmu_arch_lbr_reset(void)
185
{
186
/* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */
187
wrmsrq(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr);
188
}
189
190
void intel_pmu_lbr_reset(void)
191
{
192
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
193
194
if (!x86_pmu.lbr_nr)
195
return;
196
197
x86_pmu.lbr_reset();
198
199
cpuc->last_task_ctx = NULL;
200
cpuc->last_log_id = 0;
201
if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select)
202
wrmsrq(MSR_LBR_SELECT, 0);
203
}
204
205
/*
206
* TOS = most recently recorded branch
207
*/
208
static inline u64 intel_pmu_lbr_tos(void)
209
{
210
u64 tos;
211
212
rdmsrq(x86_pmu.lbr_tos, tos);
213
return tos;
214
}
215
216
enum {
217
LBR_NONE,
218
LBR_VALID,
219
};
220
221
/*
222
* For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x
223
* are the TSX flags when TSX is supported, but when TSX is not supported
224
* they have no consistent behavior:
225
*
226
* - For wrmsr(), bits 61:62 are considered part of the sign extension.
227
* - For HW updates (branch captures) bits 61:62 are always OFF and are not
228
* part of the sign extension.
229
*
230
* Therefore, if:
231
*
232
* 1) LBR format LBR_FORMAT_EIP_FLAGS2
233
* 2) CPU has no TSX support enabled
234
*
235
* ... then any value passed to wrmsr() must be sign extended to 63 bits and any
236
* value from rdmsr() must be converted to have a 61 bits sign extension,
237
* ignoring the TSX flags.
238
*/
239
static inline bool lbr_from_signext_quirk_needed(void)
240
{
241
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
242
boot_cpu_has(X86_FEATURE_RTM);
243
244
return !tsx_support;
245
}
246
247
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
248
249
/* If quirk is enabled, ensure sign extension is 63 bits: */
250
inline u64 lbr_from_signext_quirk_wr(u64 val)
251
{
252
if (static_branch_unlikely(&lbr_from_quirk_key)) {
253
/*
254
* Sign extend into bits 61:62 while preserving bit 63.
255
*
256
* Quirk is enabled when TSX is disabled. Therefore TSX bits
257
* in val are always OFF and must be changed to be sign
258
* extension bits. Since bits 59:60 are guaranteed to be
259
* part of the sign extension bits, we can just copy them
260
* to 61:62.
261
*/
262
val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
263
}
264
return val;
265
}
266
267
/*
268
* If quirk is needed, ensure sign extension is 61 bits:
269
*/
270
static u64 lbr_from_signext_quirk_rd(u64 val)
271
{
272
if (static_branch_unlikely(&lbr_from_quirk_key)) {
273
/*
274
* Quirk is on when TSX is not enabled. Therefore TSX
275
* flags must be read as OFF.
276
*/
277
val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
278
}
279
return val;
280
}
281
282
static __always_inline void wrlbr_from(unsigned int idx, u64 val)
283
{
284
val = lbr_from_signext_quirk_wr(val);
285
wrmsrq(x86_pmu.lbr_from + idx, val);
286
}
287
288
static __always_inline void wrlbr_to(unsigned int idx, u64 val)
289
{
290
wrmsrq(x86_pmu.lbr_to + idx, val);
291
}
292
293
static __always_inline void wrlbr_info(unsigned int idx, u64 val)
294
{
295
wrmsrq(x86_pmu.lbr_info + idx, val);
296
}
297
298
static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
299
{
300
u64 val;
301
302
if (lbr)
303
return lbr->from;
304
305
rdmsrq(x86_pmu.lbr_from + idx, val);
306
307
return lbr_from_signext_quirk_rd(val);
308
}
309
310
static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
311
{
312
u64 val;
313
314
if (lbr)
315
return lbr->to;
316
317
rdmsrq(x86_pmu.lbr_to + idx, val);
318
319
return val;
320
}
321
322
static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
323
{
324
u64 val;
325
326
if (lbr)
327
return lbr->info;
328
329
rdmsrq(x86_pmu.lbr_info + idx, val);
330
331
return val;
332
}
333
334
static inline void
335
wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
336
{
337
wrlbr_from(idx, lbr->from);
338
wrlbr_to(idx, lbr->to);
339
if (need_info)
340
wrlbr_info(idx, lbr->info);
341
}
342
343
static inline bool
344
rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
345
{
346
u64 from = rdlbr_from(idx, NULL);
347
348
/* Don't read invalid entry */
349
if (!from)
350
return false;
351
352
lbr->from = from;
353
lbr->to = rdlbr_to(idx, NULL);
354
if (need_info)
355
lbr->info = rdlbr_info(idx, NULL);
356
357
return true;
358
}
359
360
void intel_pmu_lbr_restore(void *ctx)
361
{
362
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
363
struct x86_perf_task_context *task_ctx = ctx;
364
bool need_info = x86_pmu.lbr_has_info;
365
u64 tos = task_ctx->tos;
366
unsigned lbr_idx, mask;
367
int i;
368
369
mask = x86_pmu.lbr_nr - 1;
370
for (i = 0; i < task_ctx->valid_lbrs; i++) {
371
lbr_idx = (tos - i) & mask;
372
wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info);
373
}
374
375
for (; i < x86_pmu.lbr_nr; i++) {
376
lbr_idx = (tos - i) & mask;
377
wrlbr_from(lbr_idx, 0);
378
wrlbr_to(lbr_idx, 0);
379
if (need_info)
380
wrlbr_info(lbr_idx, 0);
381
}
382
383
wrmsrq(x86_pmu.lbr_tos, tos);
384
385
if (cpuc->lbr_select)
386
wrmsrq(MSR_LBR_SELECT, task_ctx->lbr_sel);
387
}
388
389
static void intel_pmu_arch_lbr_restore(void *ctx)
390
{
391
struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
392
struct lbr_entry *entries = task_ctx->entries;
393
int i;
394
395
/* Fast reset the LBRs before restore if the call stack is not full. */
396
if (!entries[x86_pmu.lbr_nr - 1].from)
397
intel_pmu_arch_lbr_reset();
398
399
for (i = 0; i < x86_pmu.lbr_nr; i++) {
400
if (!entries[i].from)
401
break;
402
wrlbr_all(&entries[i], i, true);
403
}
404
}
405
406
/*
407
* Restore the Architecture LBR state from the xsave area in the perf
408
* context data for the task via the XRSTORS instruction.
409
*/
410
static void intel_pmu_arch_lbr_xrstors(void *ctx)
411
{
412
struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
413
414
xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR);
415
}
416
417
static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
418
{
419
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
420
return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL);
421
422
return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
423
}
424
425
static inline bool has_lbr_callstack_users(void *ctx)
426
{
427
return task_context_opt(ctx)->lbr_callstack_users ||
428
x86_pmu.lbr_callstack_users;
429
}
430
431
static void __intel_pmu_lbr_restore(void *ctx)
432
{
433
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
434
435
if (!has_lbr_callstack_users(ctx) ||
436
task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
437
intel_pmu_lbr_reset();
438
return;
439
}
440
441
/*
442
* Does not restore the LBR registers, if
443
* - No one else touched them, and
444
* - Was not cleared in Cstate
445
*/
446
if ((ctx == cpuc->last_task_ctx) &&
447
(task_context_opt(ctx)->log_id == cpuc->last_log_id) &&
448
!lbr_is_reset_in_cstate(ctx)) {
449
task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
450
return;
451
}
452
453
x86_pmu.lbr_restore(ctx);
454
455
task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
456
}
457
458
void intel_pmu_lbr_save(void *ctx)
459
{
460
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
461
struct x86_perf_task_context *task_ctx = ctx;
462
bool need_info = x86_pmu.lbr_has_info;
463
unsigned lbr_idx, mask;
464
u64 tos;
465
int i;
466
467
mask = x86_pmu.lbr_nr - 1;
468
tos = intel_pmu_lbr_tos();
469
for (i = 0; i < x86_pmu.lbr_nr; i++) {
470
lbr_idx = (tos - i) & mask;
471
if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info))
472
break;
473
}
474
task_ctx->valid_lbrs = i;
475
task_ctx->tos = tos;
476
477
if (cpuc->lbr_select)
478
rdmsrq(MSR_LBR_SELECT, task_ctx->lbr_sel);
479
}
480
481
static void intel_pmu_arch_lbr_save(void *ctx)
482
{
483
struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
484
struct lbr_entry *entries = task_ctx->entries;
485
int i;
486
487
for (i = 0; i < x86_pmu.lbr_nr; i++) {
488
if (!rdlbr_all(&entries[i], i, true))
489
break;
490
}
491
492
/* LBR call stack is not full. Reset is required in restore. */
493
if (i < x86_pmu.lbr_nr)
494
entries[x86_pmu.lbr_nr - 1].from = 0;
495
}
496
497
/*
498
* Save the Architecture LBR state to the xsave area in the perf
499
* context data for the task via the XSAVES instruction.
500
*/
501
static void intel_pmu_arch_lbr_xsaves(void *ctx)
502
{
503
struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
504
505
xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR);
506
}
507
508
static void __intel_pmu_lbr_save(void *ctx)
509
{
510
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
511
512
if (!has_lbr_callstack_users(ctx)) {
513
task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
514
return;
515
}
516
517
x86_pmu.lbr_save(ctx);
518
519
task_context_opt(ctx)->lbr_stack_state = LBR_VALID;
520
521
cpuc->last_task_ctx = ctx;
522
cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
523
}
524
525
void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
526
struct task_struct *task, bool sched_in)
527
{
528
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
529
struct perf_ctx_data *ctx_data;
530
void *task_ctx;
531
532
if (!cpuc->lbr_users)
533
return;
534
535
/*
536
* If LBR callstack feature is enabled and the stack was saved when
537
* the task was scheduled out, restore the stack. Otherwise flush
538
* the LBR stack.
539
*/
540
rcu_read_lock();
541
ctx_data = rcu_dereference(task->perf_ctx_data);
542
task_ctx = ctx_data ? ctx_data->data : NULL;
543
if (task_ctx) {
544
if (sched_in)
545
__intel_pmu_lbr_restore(task_ctx);
546
else
547
__intel_pmu_lbr_save(task_ctx);
548
rcu_read_unlock();
549
return;
550
}
551
rcu_read_unlock();
552
553
/*
554
* Since a context switch can flip the address space and LBR entries
555
* are not tagged with an identifier, we need to wipe the LBR, even for
556
* per-cpu events. You simply cannot resolve the branches from the old
557
* address space.
558
*/
559
if (sched_in)
560
intel_pmu_lbr_reset();
561
}
562
563
static inline bool branch_user_callstack(unsigned br_sel)
564
{
565
return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
566
}
567
568
void intel_pmu_lbr_add(struct perf_event *event)
569
{
570
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
571
572
if (!x86_pmu.lbr_nr)
573
return;
574
575
if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
576
cpuc->lbr_select = 1;
577
578
cpuc->br_sel = event->hw.branch_reg.reg;
579
580
if (branch_user_callstack(cpuc->br_sel)) {
581
if (event->attach_state & PERF_ATTACH_TASK) {
582
struct task_struct *task = event->hw.target;
583
struct perf_ctx_data *ctx_data;
584
585
rcu_read_lock();
586
ctx_data = rcu_dereference(task->perf_ctx_data);
587
if (ctx_data)
588
task_context_opt(ctx_data->data)->lbr_callstack_users++;
589
rcu_read_unlock();
590
} else
591
x86_pmu.lbr_callstack_users++;
592
}
593
/*
594
* Request pmu::sched_task() callback, which will fire inside the
595
* regular perf event scheduling, so that call will:
596
*
597
* - restore or wipe; when LBR-callstack,
598
* - wipe; otherwise,
599
*
600
* when this is from __perf_event_task_sched_in().
601
*
602
* However, if this is from perf_install_in_context(), no such callback
603
* will follow and we'll need to reset the LBR here if this is the
604
* first LBR event.
605
*
606
* The problem is, we cannot tell these cases apart... but we can
607
* exclude the biggest chunk of cases by looking at
608
* event->total_time_running. An event that has accrued runtime cannot
609
* be 'new'. Conversely, a new event can get installed through the
610
* context switch path for the first time.
611
*/
612
if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
613
cpuc->lbr_pebs_users++;
614
perf_sched_cb_inc(event->pmu);
615
if (!cpuc->lbr_users++ && !event->total_time_running)
616
intel_pmu_lbr_reset();
617
}
618
619
void release_lbr_buffers(void)
620
{
621
struct kmem_cache *kmem_cache;
622
struct cpu_hw_events *cpuc;
623
int cpu;
624
625
if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
626
return;
627
628
for_each_possible_cpu(cpu) {
629
cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
630
kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
631
if (kmem_cache && cpuc->lbr_xsave) {
632
kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
633
cpuc->lbr_xsave = NULL;
634
}
635
}
636
}
637
638
void reserve_lbr_buffers(void)
639
{
640
struct kmem_cache *kmem_cache;
641
struct cpu_hw_events *cpuc;
642
int cpu;
643
644
if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
645
return;
646
647
for_each_possible_cpu(cpu) {
648
cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
649
kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
650
if (!kmem_cache || cpuc->lbr_xsave)
651
continue;
652
653
cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache,
654
GFP_KERNEL | __GFP_ZERO,
655
cpu_to_node(cpu));
656
}
657
}
658
659
void intel_pmu_lbr_del(struct perf_event *event)
660
{
661
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
662
663
if (!x86_pmu.lbr_nr)
664
return;
665
666
if (branch_user_callstack(cpuc->br_sel)) {
667
if (event->attach_state & PERF_ATTACH_TASK) {
668
struct task_struct *task = event->hw.target;
669
struct perf_ctx_data *ctx_data;
670
671
rcu_read_lock();
672
ctx_data = rcu_dereference(task->perf_ctx_data);
673
if (ctx_data)
674
task_context_opt(ctx_data->data)->lbr_callstack_users--;
675
rcu_read_unlock();
676
} else
677
x86_pmu.lbr_callstack_users--;
678
}
679
680
if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
681
cpuc->lbr_select = 0;
682
683
if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
684
cpuc->lbr_pebs_users--;
685
cpuc->lbr_users--;
686
WARN_ON_ONCE(cpuc->lbr_users < 0);
687
WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
688
perf_sched_cb_dec(event->pmu);
689
690
/*
691
* The logged occurrences information is only valid for the
692
* current LBR group. If another LBR group is scheduled in
693
* later, the information from the stale LBRs will be wrongly
694
* interpreted. Reset the LBRs here.
695
*
696
* Only clear once for a branch counter group with the leader
697
* event. Because
698
* - Cannot simply reset the LBRs with the !cpuc->lbr_users.
699
* Because it's possible that the last LBR user is not in a
700
* branch counter group, e.g., a branch_counters group +
701
* several normal LBR events.
702
* - The LBR reset can be done with any one of the events in a
703
* branch counter group, since they are always scheduled together.
704
* It's easy to force the leader event an LBR event.
705
*/
706
if (is_branch_counters_group(event) && event == event->group_leader)
707
intel_pmu_lbr_reset();
708
}
709
710
static inline bool vlbr_exclude_host(void)
711
{
712
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
713
714
return test_bit(INTEL_PMC_IDX_FIXED_VLBR,
715
(unsigned long *)&cpuc->intel_ctrl_guest_mask);
716
}
717
718
void intel_pmu_lbr_enable_all(bool pmi)
719
{
720
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
721
722
if (cpuc->lbr_users && !vlbr_exclude_host())
723
__intel_pmu_lbr_enable(pmi);
724
}
725
726
void intel_pmu_lbr_disable_all(void)
727
{
728
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
729
730
if (cpuc->lbr_users && !vlbr_exclude_host()) {
731
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
732
return __intel_pmu_arch_lbr_disable();
733
734
__intel_pmu_lbr_disable();
735
}
736
}
737
738
void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
739
{
740
unsigned long mask = x86_pmu.lbr_nr - 1;
741
struct perf_branch_entry *br = cpuc->lbr_entries;
742
u64 tos = intel_pmu_lbr_tos();
743
int i;
744
745
for (i = 0; i < x86_pmu.lbr_nr; i++) {
746
unsigned long lbr_idx = (tos - i) & mask;
747
union {
748
struct {
749
u32 from;
750
u32 to;
751
};
752
u64 lbr;
753
} msr_lastbranch;
754
755
rdmsrq(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
756
757
perf_clear_branch_entry_bitfields(br);
758
759
br->from = msr_lastbranch.from;
760
br->to = msr_lastbranch.to;
761
br++;
762
}
763
cpuc->lbr_stack.nr = i;
764
cpuc->lbr_stack.hw_idx = tos;
765
}
766
767
/*
768
* Due to lack of segmentation in Linux the effective address (offset)
769
* is the same as the linear address, allowing us to merge the LIP and EIP
770
* LBR formats.
771
*/
772
void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
773
{
774
bool need_info = false, call_stack = false;
775
unsigned long mask = x86_pmu.lbr_nr - 1;
776
struct perf_branch_entry *br = cpuc->lbr_entries;
777
u64 tos = intel_pmu_lbr_tos();
778
int i;
779
int out = 0;
780
int num = x86_pmu.lbr_nr;
781
782
if (cpuc->lbr_sel) {
783
need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
784
if (cpuc->lbr_sel->config & LBR_CALL_STACK)
785
call_stack = true;
786
}
787
788
for (i = 0; i < num; i++) {
789
unsigned long lbr_idx = (tos - i) & mask;
790
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
791
u16 cycles = 0;
792
793
from = rdlbr_from(lbr_idx, NULL);
794
to = rdlbr_to(lbr_idx, NULL);
795
796
/*
797
* Read LBR call stack entries
798
* until invalid entry (0s) is detected.
799
*/
800
if (call_stack && !from)
801
break;
802
803
if (x86_pmu.lbr_has_info) {
804
if (need_info) {
805
u64 info;
806
807
info = rdlbr_info(lbr_idx, NULL);
808
mis = !!(info & LBR_INFO_MISPRED);
809
pred = !mis;
810
cycles = (info & LBR_INFO_CYCLES);
811
if (x86_pmu.lbr_has_tsx) {
812
in_tx = !!(info & LBR_INFO_IN_TX);
813
abort = !!(info & LBR_INFO_ABORT);
814
}
815
}
816
} else {
817
int skip = 0;
818
819
if (x86_pmu.lbr_from_flags) {
820
mis = !!(from & LBR_FROM_FLAG_MISPRED);
821
pred = !mis;
822
skip = 1;
823
}
824
if (x86_pmu.lbr_has_tsx) {
825
in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
826
abort = !!(from & LBR_FROM_FLAG_ABORT);
827
skip = 3;
828
}
829
from = (u64)((((s64)from) << skip) >> skip);
830
831
if (x86_pmu.lbr_to_cycles) {
832
cycles = ((to >> 48) & LBR_INFO_CYCLES);
833
to = (u64)((((s64)to) << 16) >> 16);
834
}
835
}
836
837
/*
838
* Some CPUs report duplicated abort records,
839
* with the second entry not having an abort bit set.
840
* Skip them here. This loop runs backwards,
841
* so we need to undo the previous record.
842
* If the abort just happened outside the window
843
* the extra entry cannot be removed.
844
*/
845
if (abort && x86_pmu.lbr_double_abort && out > 0)
846
out--;
847
848
perf_clear_branch_entry_bitfields(br+out);
849
br[out].from = from;
850
br[out].to = to;
851
br[out].mispred = mis;
852
br[out].predicted = pred;
853
br[out].in_tx = in_tx;
854
br[out].abort = abort;
855
br[out].cycles = cycles;
856
out++;
857
}
858
cpuc->lbr_stack.nr = out;
859
cpuc->lbr_stack.hw_idx = tos;
860
}
861
862
static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
863
static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
864
static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
865
866
static __always_inline int get_lbr_br_type(u64 info)
867
{
868
int type = 0;
869
870
if (static_branch_likely(&x86_lbr_type))
871
type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
872
873
return type;
874
}
875
876
static __always_inline bool get_lbr_mispred(u64 info)
877
{
878
bool mispred = 0;
879
880
if (static_branch_likely(&x86_lbr_mispred))
881
mispred = !!(info & LBR_INFO_MISPRED);
882
883
return mispred;
884
}
885
886
static __always_inline u16 get_lbr_cycles(u64 info)
887
{
888
u16 cycles = info & LBR_INFO_CYCLES;
889
890
if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
891
(!static_branch_likely(&x86_lbr_cycles) ||
892
!(info & LBR_INFO_CYC_CNT_VALID)))
893
cycles = 0;
894
895
return cycles;
896
}
897
898
static_assert((64 - PERF_BRANCH_ENTRY_INFO_BITS_MAX) > LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS);
899
900
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
901
struct lbr_entry *entries)
902
{
903
struct perf_branch_entry *e;
904
struct lbr_entry *lbr;
905
u64 from, to, info;
906
int i;
907
908
for (i = 0; i < x86_pmu.lbr_nr; i++) {
909
lbr = entries ? &entries[i] : NULL;
910
e = &cpuc->lbr_entries[i];
911
912
from = rdlbr_from(i, lbr);
913
/*
914
* Read LBR entries until invalid entry (0s) is detected.
915
*/
916
if (!from)
917
break;
918
919
to = rdlbr_to(i, lbr);
920
info = rdlbr_info(i, lbr);
921
922
perf_clear_branch_entry_bitfields(e);
923
924
e->from = from;
925
e->to = to;
926
e->mispred = get_lbr_mispred(info);
927
e->predicted = !e->mispred;
928
e->in_tx = !!(info & LBR_INFO_IN_TX);
929
e->abort = !!(info & LBR_INFO_ABORT);
930
e->cycles = get_lbr_cycles(info);
931
e->type = get_lbr_br_type(info);
932
933
/*
934
* Leverage the reserved field of cpuc->lbr_entries[i] to
935
* temporarily store the branch counters information.
936
* The later code will decide what content can be disclosed
937
* to the perf tool. Pleae see intel_pmu_lbr_counters_reorder().
938
*/
939
e->reserved = (info >> LBR_INFO_BR_CNTR_OFFSET) & LBR_INFO_BR_CNTR_FULL_MASK;
940
}
941
942
cpuc->lbr_stack.nr = i;
943
}
944
945
/*
946
* The enabled order may be different from the counter order.
947
* Update the lbr_counters with the enabled order.
948
*/
949
static void intel_pmu_lbr_counters_reorder(struct cpu_hw_events *cpuc,
950
struct perf_event *event)
951
{
952
int i, j, pos = 0, order[X86_PMC_IDX_MAX];
953
struct perf_event *leader, *sibling;
954
u64 src, dst, cnt;
955
956
leader = event->group_leader;
957
if (branch_sample_counters(leader))
958
order[pos++] = leader->hw.idx;
959
960
for_each_sibling_event(sibling, leader) {
961
if (!branch_sample_counters(sibling))
962
continue;
963
order[pos++] = sibling->hw.idx;
964
}
965
966
WARN_ON_ONCE(!pos);
967
968
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
969
src = cpuc->lbr_entries[i].reserved;
970
dst = 0;
971
for (j = 0; j < pos; j++) {
972
cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK;
973
dst |= cnt << j * LBR_INFO_BR_CNTR_BITS;
974
}
975
cpuc->lbr_counters[i] = dst;
976
cpuc->lbr_entries[i].reserved = 0;
977
}
978
}
979
980
void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
981
struct cpu_hw_events *cpuc,
982
struct perf_event *event)
983
{
984
if (is_branch_counters_group(event)) {
985
intel_pmu_lbr_counters_reorder(cpuc, event);
986
perf_sample_save_brstack(data, event, &cpuc->lbr_stack, cpuc->lbr_counters);
987
return;
988
}
989
990
perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
991
}
992
993
static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
994
{
995
intel_pmu_store_lbr(cpuc, NULL);
996
}
997
998
static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc)
999
{
1000
struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave;
1001
1002
if (!xsave) {
1003
intel_pmu_store_lbr(cpuc, NULL);
1004
return;
1005
}
1006
xsaves(&xsave->xsave, XFEATURE_MASK_LBR);
1007
1008
intel_pmu_store_lbr(cpuc, xsave->lbr.entries);
1009
}
1010
1011
void intel_pmu_lbr_read(void)
1012
{
1013
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1014
1015
/*
1016
* Don't read when all LBRs users are using adaptive PEBS.
1017
*
1018
* This could be smarter and actually check the event,
1019
* but this simple approach seems to work for now.
1020
*/
1021
if (!cpuc->lbr_users || vlbr_exclude_host() ||
1022
cpuc->lbr_users == cpuc->lbr_pebs_users)
1023
return;
1024
1025
x86_pmu.lbr_read(cpuc);
1026
1027
intel_pmu_lbr_filter(cpuc);
1028
}
1029
1030
/*
1031
* SW filter is used:
1032
* - in case there is no HW filter
1033
* - in case the HW filter has errata or limitations
1034
*/
1035
static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
1036
{
1037
u64 br_type = event->attr.branch_sample_type;
1038
int mask = 0;
1039
1040
if (br_type & PERF_SAMPLE_BRANCH_USER)
1041
mask |= X86_BR_USER;
1042
1043
if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
1044
mask |= X86_BR_KERNEL;
1045
1046
/* we ignore BRANCH_HV here */
1047
1048
if (br_type & PERF_SAMPLE_BRANCH_ANY)
1049
mask |= X86_BR_ANY;
1050
1051
if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
1052
mask |= X86_BR_ANY_CALL;
1053
1054
if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
1055
mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
1056
1057
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
1058
mask |= X86_BR_IND_CALL;
1059
1060
if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
1061
mask |= X86_BR_ABORT;
1062
1063
if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
1064
mask |= X86_BR_IN_TX;
1065
1066
if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
1067
mask |= X86_BR_NO_TX;
1068
1069
if (br_type & PERF_SAMPLE_BRANCH_COND)
1070
mask |= X86_BR_JCC;
1071
1072
if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
1073
if (!x86_pmu_has_lbr_callstack())
1074
return -EOPNOTSUPP;
1075
if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
1076
return -EINVAL;
1077
mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
1078
X86_BR_CALL_STACK;
1079
}
1080
1081
if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
1082
mask |= X86_BR_IND_JMP;
1083
1084
if (br_type & PERF_SAMPLE_BRANCH_CALL)
1085
mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
1086
1087
if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
1088
mask |= X86_BR_TYPE_SAVE;
1089
1090
/*
1091
* stash actual user request into reg, it may
1092
* be used by fixup code for some CPU
1093
*/
1094
event->hw.branch_reg.reg = mask;
1095
return 0;
1096
}
1097
1098
/*
1099
* setup the HW LBR filter
1100
* Used only when available, may not be enough to disambiguate
1101
* all branches, may need the help of the SW filter
1102
*/
1103
static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
1104
{
1105
struct hw_perf_event_extra *reg;
1106
u64 br_type = event->attr.branch_sample_type;
1107
u64 mask = 0, v;
1108
int i;
1109
1110
for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
1111
if (!(br_type & (1ULL << i)))
1112
continue;
1113
1114
v = x86_pmu.lbr_sel_map[i];
1115
if (v == LBR_NOT_SUPP)
1116
return -EOPNOTSUPP;
1117
1118
if (v != LBR_IGN)
1119
mask |= v;
1120
}
1121
1122
reg = &event->hw.branch_reg;
1123
reg->idx = EXTRA_REG_LBR;
1124
1125
if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
1126
reg->config = mask;
1127
1128
/*
1129
* The Arch LBR HW can retrieve the common branch types
1130
* from the LBR_INFO. It doesn't require the high overhead
1131
* SW disassemble.
1132
* Enable the branch type by default for the Arch LBR.
1133
*/
1134
reg->reg |= X86_BR_TYPE_SAVE;
1135
return 0;
1136
}
1137
1138
/*
1139
* The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
1140
* in suppress mode. So LBR_SELECT should be set to
1141
* (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
1142
* But the 10th bit LBR_CALL_STACK does not operate
1143
* in suppress mode.
1144
*/
1145
reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
1146
1147
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
1148
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
1149
x86_pmu.lbr_has_info)
1150
reg->config |= LBR_NO_INFO;
1151
1152
return 0;
1153
}
1154
1155
int intel_pmu_setup_lbr_filter(struct perf_event *event)
1156
{
1157
int ret = 0;
1158
1159
/*
1160
* no LBR on this PMU
1161
*/
1162
if (!x86_pmu.lbr_nr)
1163
return -EOPNOTSUPP;
1164
1165
/*
1166
* setup SW LBR filter
1167
*/
1168
ret = intel_pmu_setup_sw_lbr_filter(event);
1169
if (ret)
1170
return ret;
1171
1172
/*
1173
* setup HW LBR filter, if any
1174
*/
1175
if (x86_pmu.lbr_sel_map)
1176
ret = intel_pmu_setup_hw_lbr_filter(event);
1177
1178
return ret;
1179
}
1180
1181
enum {
1182
ARCH_LBR_BR_TYPE_JCC = 0,
1183
ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1,
1184
ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2,
1185
ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3,
1186
ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4,
1187
ARCH_LBR_BR_TYPE_NEAR_RET = 5,
1188
ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET,
1189
1190
ARCH_LBR_BR_TYPE_MAP_MAX = 16,
1191
};
1192
1193
static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = {
1194
[ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC,
1195
[ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP,
1196
[ARCH_LBR_BR_TYPE_NEAR_REL_JMP] = X86_BR_JMP,
1197
[ARCH_LBR_BR_TYPE_NEAR_IND_CALL] = X86_BR_IND_CALL,
1198
[ARCH_LBR_BR_TYPE_NEAR_REL_CALL] = X86_BR_CALL,
1199
[ARCH_LBR_BR_TYPE_NEAR_RET] = X86_BR_RET,
1200
};
1201
1202
/*
1203
* implement actual branch filter based on user demand.
1204
* Hardware may not exactly satisfy that request, thus
1205
* we need to inspect opcodes. Mismatched branches are
1206
* discarded. Therefore, the number of branches returned
1207
* in PERF_SAMPLE_BRANCH_STACK sample may vary.
1208
*/
1209
static void
1210
intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
1211
{
1212
u64 from, to;
1213
int br_sel = cpuc->br_sel;
1214
int i, j, type, to_plm;
1215
bool compress = false;
1216
1217
/* if sampling all branches, then nothing to filter */
1218
if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
1219
((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
1220
return;
1221
1222
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
1223
1224
from = cpuc->lbr_entries[i].from;
1225
to = cpuc->lbr_entries[i].to;
1226
type = cpuc->lbr_entries[i].type;
1227
1228
/*
1229
* Parse the branch type recorded in LBR_x_INFO MSR.
1230
* Doesn't support OTHER_BRANCH decoding for now.
1231
* OTHER_BRANCH branch type still rely on software decoding.
1232
*/
1233
if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
1234
type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) {
1235
to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
1236
type = arch_lbr_br_type_map[type] | to_plm;
1237
} else
1238
type = branch_type(from, to, cpuc->lbr_entries[i].abort);
1239
if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
1240
if (cpuc->lbr_entries[i].in_tx)
1241
type |= X86_BR_IN_TX;
1242
else
1243
type |= X86_BR_NO_TX;
1244
}
1245
1246
/* if type does not correspond, then discard */
1247
if (type == X86_BR_NONE || (br_sel & type) != type) {
1248
cpuc->lbr_entries[i].from = 0;
1249
compress = true;
1250
}
1251
1252
if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
1253
cpuc->lbr_entries[i].type = common_branch_type(type);
1254
}
1255
1256
if (!compress)
1257
return;
1258
1259
/* remove all entries with from=0 */
1260
for (i = 0; i < cpuc->lbr_stack.nr; ) {
1261
if (!cpuc->lbr_entries[i].from) {
1262
j = i;
1263
while (++j < cpuc->lbr_stack.nr) {
1264
cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
1265
cpuc->lbr_counters[j-1] = cpuc->lbr_counters[j];
1266
}
1267
cpuc->lbr_stack.nr--;
1268
if (!cpuc->lbr_entries[i].from)
1269
continue;
1270
}
1271
i++;
1272
}
1273
}
1274
1275
void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr)
1276
{
1277
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1278
1279
/* Cannot get TOS for large PEBS and Arch LBR */
1280
if (static_cpu_has(X86_FEATURE_ARCH_LBR) ||
1281
(cpuc->n_pebs == cpuc->n_large_pebs))
1282
cpuc->lbr_stack.hw_idx = -1ULL;
1283
else
1284
cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
1285
1286
intel_pmu_store_lbr(cpuc, lbr);
1287
intel_pmu_lbr_filter(cpuc);
1288
}
1289
1290
/*
1291
* Map interface branch filters onto LBR filters
1292
*/
1293
static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1294
[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
1295
[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
1296
[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
1297
[PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1298
[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP
1299
| LBR_IND_JMP | LBR_FAR,
1300
/*
1301
* NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
1302
*/
1303
[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
1304
LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
1305
/*
1306
* NHM/WSM erratum: must include IND_JMP to capture IND_CALL
1307
*/
1308
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
1309
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
1310
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1311
};
1312
1313
static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1314
[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
1315
[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
1316
[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
1317
[PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1318
[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
1319
[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
1320
| LBR_FAR,
1321
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
1322
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
1323
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1324
[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
1325
};
1326
1327
static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1328
[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
1329
[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
1330
[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
1331
[PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1332
[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
1333
[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
1334
| LBR_FAR,
1335
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
1336
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
1337
[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
1338
| LBR_RETURN | LBR_CALL_STACK,
1339
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1340
[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
1341
};
1342
1343
static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1344
[PERF_SAMPLE_BRANCH_ANY_SHIFT] = ARCH_LBR_ANY,
1345
[PERF_SAMPLE_BRANCH_USER_SHIFT] = ARCH_LBR_USER,
1346
[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = ARCH_LBR_KERNEL,
1347
[PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1348
[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = ARCH_LBR_RETURN |
1349
ARCH_LBR_OTHER_BRANCH,
1350
[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = ARCH_LBR_REL_CALL |
1351
ARCH_LBR_IND_CALL |
1352
ARCH_LBR_OTHER_BRANCH,
1353
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = ARCH_LBR_IND_CALL,
1354
[PERF_SAMPLE_BRANCH_COND_SHIFT] = ARCH_LBR_JCC,
1355
[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = ARCH_LBR_REL_CALL |
1356
ARCH_LBR_IND_CALL |
1357
ARCH_LBR_RETURN |
1358
ARCH_LBR_CALL_STACK,
1359
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP,
1360
[PERF_SAMPLE_BRANCH_CALL_SHIFT] = ARCH_LBR_REL_CALL,
1361
};
1362
1363
/* core */
1364
void __init intel_pmu_lbr_init_core(void)
1365
{
1366
x86_pmu.lbr_nr = 4;
1367
x86_pmu.lbr_tos = MSR_LBR_TOS;
1368
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
1369
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
1370
1371
/*
1372
* SW branch filter usage:
1373
* - compensate for lack of HW filter
1374
*/
1375
}
1376
1377
/* nehalem/westmere */
1378
void __init intel_pmu_lbr_init_nhm(void)
1379
{
1380
x86_pmu.lbr_nr = 16;
1381
x86_pmu.lbr_tos = MSR_LBR_TOS;
1382
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1383
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1384
1385
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1386
x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
1387
1388
/*
1389
* SW branch filter usage:
1390
* - workaround LBR_SEL errata (see above)
1391
* - support syscall, sysret capture.
1392
* That requires LBR_FAR but that means far
1393
* jmp need to be filtered out
1394
*/
1395
}
1396
1397
/* sandy bridge */
1398
void __init intel_pmu_lbr_init_snb(void)
1399
{
1400
x86_pmu.lbr_nr = 16;
1401
x86_pmu.lbr_tos = MSR_LBR_TOS;
1402
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1403
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1404
1405
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1406
x86_pmu.lbr_sel_map = snb_lbr_sel_map;
1407
1408
/*
1409
* SW branch filter usage:
1410
* - support syscall, sysret capture.
1411
* That requires LBR_FAR but that means far
1412
* jmp need to be filtered out
1413
*/
1414
}
1415
1416
static inline struct kmem_cache *
1417
create_lbr_kmem_cache(size_t size, size_t align)
1418
{
1419
return kmem_cache_create("x86_lbr", size, align, 0, NULL);
1420
}
1421
1422
/* haswell */
1423
void intel_pmu_lbr_init_hsw(void)
1424
{
1425
size_t size = sizeof(struct x86_perf_task_context);
1426
1427
x86_pmu.lbr_nr = 16;
1428
x86_pmu.lbr_tos = MSR_LBR_TOS;
1429
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1430
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1431
1432
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1433
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
1434
1435
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1436
}
1437
1438
/* skylake */
1439
__init void intel_pmu_lbr_init_skl(void)
1440
{
1441
size_t size = sizeof(struct x86_perf_task_context);
1442
1443
x86_pmu.lbr_nr = 32;
1444
x86_pmu.lbr_tos = MSR_LBR_TOS;
1445
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1446
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1447
x86_pmu.lbr_info = MSR_LBR_INFO_0;
1448
1449
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1450
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
1451
1452
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1453
1454
/*
1455
* SW branch filter usage:
1456
* - support syscall, sysret capture.
1457
* That requires LBR_FAR but that means far
1458
* jmp need to be filtered out
1459
*/
1460
}
1461
1462
/* atom */
1463
void __init intel_pmu_lbr_init_atom(void)
1464
{
1465
/*
1466
* only models starting at stepping 10 seems
1467
* to have an operational LBR which can freeze
1468
* on PMU interrupt
1469
*/
1470
if (boot_cpu_data.x86_vfm == INTEL_ATOM_BONNELL
1471
&& boot_cpu_data.x86_stepping < 10) {
1472
pr_cont("LBR disabled due to erratum");
1473
return;
1474
}
1475
1476
x86_pmu.lbr_nr = 8;
1477
x86_pmu.lbr_tos = MSR_LBR_TOS;
1478
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
1479
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
1480
1481
/*
1482
* SW branch filter usage:
1483
* - compensate for lack of HW filter
1484
*/
1485
}
1486
1487
/* slm */
1488
void __init intel_pmu_lbr_init_slm(void)
1489
{
1490
x86_pmu.lbr_nr = 8;
1491
x86_pmu.lbr_tos = MSR_LBR_TOS;
1492
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
1493
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
1494
1495
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1496
x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
1497
1498
/*
1499
* SW branch filter usage:
1500
* - compensate for lack of HW filter
1501
*/
1502
pr_cont("8-deep LBR, ");
1503
}
1504
1505
/* Knights Landing */
1506
void intel_pmu_lbr_init_knl(void)
1507
{
1508
x86_pmu.lbr_nr = 8;
1509
x86_pmu.lbr_tos = MSR_LBR_TOS;
1510
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1511
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1512
1513
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1514
x86_pmu.lbr_sel_map = snb_lbr_sel_map;
1515
1516
/* Knights Landing does have MISPREDICT bit */
1517
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
1518
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
1519
}
1520
1521
void intel_pmu_lbr_init(void)
1522
{
1523
switch (x86_pmu.intel_cap.lbr_format) {
1524
case LBR_FORMAT_EIP_FLAGS2:
1525
x86_pmu.lbr_has_tsx = 1;
1526
x86_pmu.lbr_from_flags = 1;
1527
if (lbr_from_signext_quirk_needed())
1528
static_branch_enable(&lbr_from_quirk_key);
1529
break;
1530
1531
case LBR_FORMAT_EIP_FLAGS:
1532
x86_pmu.lbr_from_flags = 1;
1533
break;
1534
1535
case LBR_FORMAT_INFO:
1536
x86_pmu.lbr_has_tsx = 1;
1537
fallthrough;
1538
case LBR_FORMAT_INFO2:
1539
x86_pmu.lbr_has_info = 1;
1540
break;
1541
1542
case LBR_FORMAT_TIME:
1543
x86_pmu.lbr_from_flags = 1;
1544
x86_pmu.lbr_to_cycles = 1;
1545
break;
1546
}
1547
1548
if (x86_pmu.lbr_has_info) {
1549
/*
1550
* Only used in combination with baseline pebs.
1551
*/
1552
static_branch_enable(&x86_lbr_mispred);
1553
static_branch_enable(&x86_lbr_cycles);
1554
}
1555
}
1556
1557
/*
1558
* LBR state size is variable based on the max number of registers.
1559
* This calculates the expected state size, which should match
1560
* what the hardware enumerates for the size of XFEATURE_LBR.
1561
*/
1562
static inline unsigned int get_lbr_state_size(void)
1563
{
1564
return sizeof(struct arch_lbr_state) +
1565
x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1566
}
1567
1568
static bool is_arch_lbr_xsave_available(void)
1569
{
1570
if (!boot_cpu_has(X86_FEATURE_XSAVES))
1571
return false;
1572
1573
/*
1574
* Check the LBR state with the corresponding software structure.
1575
* Disable LBR XSAVES support if the size doesn't match.
1576
*/
1577
if (xfeature_size(XFEATURE_LBR) == 0)
1578
return false;
1579
1580
if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
1581
return false;
1582
1583
return true;
1584
}
1585
1586
void __init intel_pmu_arch_lbr_init(void)
1587
{
1588
struct pmu *pmu = x86_get_pmu(smp_processor_id());
1589
union cpuid28_eax eax;
1590
union cpuid28_ebx ebx;
1591
union cpuid28_ecx ecx;
1592
unsigned int unused_edx;
1593
bool arch_lbr_xsave;
1594
size_t size;
1595
u64 lbr_nr;
1596
1597
/* Arch LBR Capabilities */
1598
cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx);
1599
1600
lbr_nr = fls(eax.split.lbr_depth_mask) * 8;
1601
if (!lbr_nr)
1602
goto clear_arch_lbr;
1603
1604
/* Apply the max depth of Arch LBR */
1605
if (wrmsrq_safe(MSR_ARCH_LBR_DEPTH, lbr_nr))
1606
goto clear_arch_lbr;
1607
1608
x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask;
1609
x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset;
1610
x86_pmu.lbr_lip = eax.split.lbr_lip;
1611
x86_pmu.lbr_cpl = ebx.split.lbr_cpl;
1612
x86_pmu.lbr_filter = ebx.split.lbr_filter;
1613
x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack;
1614
x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
1615
x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
1616
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
1617
x86_pmu.lbr_counters = ecx.split.lbr_counters;
1618
x86_pmu.lbr_nr = lbr_nr;
1619
1620
if (!!x86_pmu.lbr_counters)
1621
x86_pmu.flags |= PMU_FL_BR_CNTR | PMU_FL_DYN_CONSTRAINT;
1622
1623
if (x86_pmu.lbr_mispred)
1624
static_branch_enable(&x86_lbr_mispred);
1625
if (x86_pmu.lbr_timed_lbr)
1626
static_branch_enable(&x86_lbr_cycles);
1627
if (x86_pmu.lbr_br_type)
1628
static_branch_enable(&x86_lbr_type);
1629
1630
arch_lbr_xsave = is_arch_lbr_xsave_available();
1631
if (arch_lbr_xsave) {
1632
size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) +
1633
get_lbr_state_size();
1634
pmu->task_ctx_cache = create_lbr_kmem_cache(size,
1635
XSAVE_ALIGNMENT);
1636
}
1637
1638
if (!pmu->task_ctx_cache) {
1639
arch_lbr_xsave = false;
1640
1641
size = sizeof(struct x86_perf_task_context_arch_lbr) +
1642
lbr_nr * sizeof(struct lbr_entry);
1643
pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1644
}
1645
1646
x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0;
1647
x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0;
1648
x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0;
1649
1650
/* LBR callstack requires both CPL and Branch Filtering support */
1651
if (!x86_pmu.lbr_cpl ||
1652
!x86_pmu.lbr_filter ||
1653
!x86_pmu.lbr_call_stack)
1654
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP;
1655
1656
if (!x86_pmu.lbr_cpl) {
1657
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP;
1658
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP;
1659
} else if (!x86_pmu.lbr_filter) {
1660
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP;
1661
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP;
1662
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP;
1663
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP;
1664
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP;
1665
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP;
1666
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP;
1667
}
1668
1669
x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK;
1670
x86_pmu.lbr_ctl_map = arch_lbr_ctl_map;
1671
1672
if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter)
1673
x86_pmu.lbr_ctl_map = NULL;
1674
1675
x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset;
1676
if (arch_lbr_xsave) {
1677
x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves;
1678
x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors;
1679
x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave;
1680
pr_cont("XSAVE ");
1681
} else {
1682
x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
1683
x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
1684
x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
1685
}
1686
1687
pr_cont("Architectural LBR, ");
1688
1689
return;
1690
1691
clear_arch_lbr:
1692
setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR);
1693
}
1694
1695
/**
1696
* x86_perf_get_lbr - get the LBR records information
1697
*
1698
* @lbr: the caller's memory to store the LBR records information
1699
*/
1700
void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
1701
{
1702
lbr->nr = x86_pmu.lbr_nr;
1703
lbr->from = x86_pmu.lbr_from;
1704
lbr->to = x86_pmu.lbr_to;
1705
lbr->info = x86_pmu.lbr_info;
1706
lbr->has_callstack = x86_pmu_has_lbr_callstack();
1707
}
1708
EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
1709
1710
struct event_constraint vlbr_constraint =
1711
__EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR),
1712
FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT);
1713
1714