Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/events/intel/lbr.c
51349 views
1
// SPDX-License-Identifier: GPL-2.0
2
#include <linux/kvm_types.h>
3
#include <linux/perf_event.h>
4
#include <linux/types.h>
5
6
#include <asm/cpu_device_id.h>
7
#include <asm/perf_event.h>
8
#include <asm/msr.h>
9
10
#include "../perf_event.h"
11
12
/*
13
* Intel LBR_SELECT bits
14
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
15
*
16
* Hardware branch filter (not available on all CPUs)
17
*/
18
#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
19
#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
20
#define LBR_JCC_BIT 2 /* do not capture conditional branches */
21
#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
22
#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
23
#define LBR_RETURN_BIT 5 /* do not capture near returns */
24
#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
25
#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
26
#define LBR_FAR_BIT 8 /* do not capture far branches */
27
#define LBR_CALL_STACK_BIT 9 /* enable call stack */
28
29
/*
30
* Following bit only exists in Linux; we mask it out before writing it to
31
* the actual MSR. But it helps the constraint perf code to understand
32
* that this is a separate configuration.
33
*/
34
#define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */
35
36
#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
37
#define LBR_USER (1 << LBR_USER_BIT)
38
#define LBR_JCC (1 << LBR_JCC_BIT)
39
#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
40
#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
41
#define LBR_RETURN (1 << LBR_RETURN_BIT)
42
#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
43
#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
44
#define LBR_FAR (1 << LBR_FAR_BIT)
45
#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
46
#define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT)
47
48
#define LBR_PLM (LBR_KERNEL | LBR_USER)
49
50
#define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */
51
#define LBR_NOT_SUPP -1 /* LBR filter not supported */
52
#define LBR_IGN 0 /* ignored */
53
54
#define LBR_ANY \
55
(LBR_JCC |\
56
LBR_REL_CALL |\
57
LBR_IND_CALL |\
58
LBR_RETURN |\
59
LBR_REL_JMP |\
60
LBR_IND_JMP |\
61
LBR_FAR)
62
63
#define LBR_FROM_FLAG_MISPRED BIT_ULL(63)
64
#define LBR_FROM_FLAG_IN_TX BIT_ULL(62)
65
#define LBR_FROM_FLAG_ABORT BIT_ULL(61)
66
67
#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59))
68
69
/*
70
* Intel LBR_CTL bits
71
*
72
* Hardware branch filter for Arch LBR
73
*/
74
#define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */
75
#define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */
76
#define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */
77
#define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */
78
#define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */
79
#define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */
80
#define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */
81
#define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */
82
#define ARCH_LBR_RETURN_BIT 21 /* capture near returns */
83
#define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */
84
85
#define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT)
86
#define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT)
87
#define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT)
88
#define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT)
89
#define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT)
90
#define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT)
91
#define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT)
92
#define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT)
93
#define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT)
94
#define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT)
95
96
#define ARCH_LBR_ANY \
97
(ARCH_LBR_JCC |\
98
ARCH_LBR_REL_JMP |\
99
ARCH_LBR_IND_JMP |\
100
ARCH_LBR_REL_CALL |\
101
ARCH_LBR_IND_CALL |\
102
ARCH_LBR_RETURN |\
103
ARCH_LBR_OTHER_BRANCH)
104
105
#define ARCH_LBR_CTL_MASK 0x7f000e
106
107
static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
108
109
static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
110
{
111
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
112
return !!(config & ARCH_LBR_CALL_STACK);
113
114
return !!(config & LBR_CALL_STACK);
115
}
116
117
/*
118
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
119
* otherwise it becomes near impossible to get a reliable stack.
120
*/
121
122
static void __intel_pmu_lbr_enable(bool pmi)
123
{
124
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
125
u64 debugctl, lbr_select = 0, orig_debugctl;
126
127
/*
128
* No need to unfreeze manually, as v4 can do that as part
129
* of the GLOBAL_STATUS ack.
130
*/
131
if (pmi && x86_pmu.version >= 4)
132
return;
133
134
/*
135
* No need to reprogram LBR_SELECT in a PMI, as it
136
* did not change.
137
*/
138
if (cpuc->lbr_sel)
139
lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
140
if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel)
141
wrmsrq(MSR_LBR_SELECT, lbr_select);
142
143
rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
144
orig_debugctl = debugctl;
145
146
if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
147
debugctl |= DEBUGCTLMSR_LBR;
148
/*
149
* LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
150
* If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
151
* may cause superfluous increase/decrease of LBR_TOS.
152
*/
153
if (is_lbr_call_stack_bit_set(lbr_select))
154
debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
155
else
156
debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
157
158
if (orig_debugctl != debugctl)
159
wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
160
161
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
162
wrmsrq(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
163
}
164
165
void intel_pmu_lbr_reset_32(void)
166
{
167
int i;
168
169
for (i = 0; i < x86_pmu.lbr_nr; i++)
170
wrmsrq(x86_pmu.lbr_from + i, 0);
171
}
172
173
void intel_pmu_lbr_reset_64(void)
174
{
175
int i;
176
177
for (i = 0; i < x86_pmu.lbr_nr; i++) {
178
wrmsrq(x86_pmu.lbr_from + i, 0);
179
wrmsrq(x86_pmu.lbr_to + i, 0);
180
if (x86_pmu.lbr_has_info)
181
wrmsrq(x86_pmu.lbr_info + i, 0);
182
}
183
}
184
185
static void intel_pmu_arch_lbr_reset(void)
186
{
187
/* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */
188
wrmsrq(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr);
189
}
190
191
void intel_pmu_lbr_reset(void)
192
{
193
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
194
195
if (!x86_pmu.lbr_nr)
196
return;
197
198
x86_pmu.lbr_reset();
199
200
cpuc->last_task_ctx = NULL;
201
cpuc->last_log_id = 0;
202
if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select)
203
wrmsrq(MSR_LBR_SELECT, 0);
204
}
205
206
/*
207
* TOS = most recently recorded branch
208
*/
209
static inline u64 intel_pmu_lbr_tos(void)
210
{
211
u64 tos;
212
213
rdmsrq(x86_pmu.lbr_tos, tos);
214
return tos;
215
}
216
217
enum {
218
LBR_NONE,
219
LBR_VALID,
220
};
221
222
/*
223
* For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x
224
* are the TSX flags when TSX is supported, but when TSX is not supported
225
* they have no consistent behavior:
226
*
227
* - For wrmsr(), bits 61:62 are considered part of the sign extension.
228
* - For HW updates (branch captures) bits 61:62 are always OFF and are not
229
* part of the sign extension.
230
*
231
* Therefore, if:
232
*
233
* 1) LBR format LBR_FORMAT_EIP_FLAGS2
234
* 2) CPU has no TSX support enabled
235
*
236
* ... then any value passed to wrmsr() must be sign extended to 63 bits and any
237
* value from rdmsr() must be converted to have a 61 bits sign extension,
238
* ignoring the TSX flags.
239
*/
240
static inline bool lbr_from_signext_quirk_needed(void)
241
{
242
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
243
boot_cpu_has(X86_FEATURE_RTM);
244
245
return !tsx_support;
246
}
247
248
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
249
250
/* If quirk is enabled, ensure sign extension is 63 bits: */
251
inline u64 lbr_from_signext_quirk_wr(u64 val)
252
{
253
if (static_branch_unlikely(&lbr_from_quirk_key)) {
254
/*
255
* Sign extend into bits 61:62 while preserving bit 63.
256
*
257
* Quirk is enabled when TSX is disabled. Therefore TSX bits
258
* in val are always OFF and must be changed to be sign
259
* extension bits. Since bits 59:60 are guaranteed to be
260
* part of the sign extension bits, we can just copy them
261
* to 61:62.
262
*/
263
val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
264
}
265
return val;
266
}
267
268
/*
269
* If quirk is needed, ensure sign extension is 61 bits:
270
*/
271
static u64 lbr_from_signext_quirk_rd(u64 val)
272
{
273
if (static_branch_unlikely(&lbr_from_quirk_key)) {
274
/*
275
* Quirk is on when TSX is not enabled. Therefore TSX
276
* flags must be read as OFF.
277
*/
278
val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
279
}
280
return val;
281
}
282
283
static __always_inline void wrlbr_from(unsigned int idx, u64 val)
284
{
285
val = lbr_from_signext_quirk_wr(val);
286
wrmsrq(x86_pmu.lbr_from + idx, val);
287
}
288
289
static __always_inline void wrlbr_to(unsigned int idx, u64 val)
290
{
291
wrmsrq(x86_pmu.lbr_to + idx, val);
292
}
293
294
static __always_inline void wrlbr_info(unsigned int idx, u64 val)
295
{
296
wrmsrq(x86_pmu.lbr_info + idx, val);
297
}
298
299
static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
300
{
301
u64 val;
302
303
if (lbr)
304
return lbr->from;
305
306
rdmsrq(x86_pmu.lbr_from + idx, val);
307
308
return lbr_from_signext_quirk_rd(val);
309
}
310
311
static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
312
{
313
u64 val;
314
315
if (lbr)
316
return lbr->to;
317
318
rdmsrq(x86_pmu.lbr_to + idx, val);
319
320
return val;
321
}
322
323
static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
324
{
325
u64 val;
326
327
if (lbr)
328
return lbr->info;
329
330
rdmsrq(x86_pmu.lbr_info + idx, val);
331
332
return val;
333
}
334
335
static inline void
336
wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
337
{
338
wrlbr_from(idx, lbr->from);
339
wrlbr_to(idx, lbr->to);
340
if (need_info)
341
wrlbr_info(idx, lbr->info);
342
}
343
344
static inline bool
345
rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
346
{
347
u64 from = rdlbr_from(idx, NULL);
348
349
/* Don't read invalid entry */
350
if (!from)
351
return false;
352
353
lbr->from = from;
354
lbr->to = rdlbr_to(idx, NULL);
355
if (need_info)
356
lbr->info = rdlbr_info(idx, NULL);
357
358
return true;
359
}
360
361
void intel_pmu_lbr_restore(void *ctx)
362
{
363
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
364
struct x86_perf_task_context *task_ctx = ctx;
365
bool need_info = x86_pmu.lbr_has_info;
366
u64 tos = task_ctx->tos;
367
unsigned lbr_idx, mask;
368
int i;
369
370
mask = x86_pmu.lbr_nr - 1;
371
for (i = 0; i < task_ctx->valid_lbrs; i++) {
372
lbr_idx = (tos - i) & mask;
373
wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info);
374
}
375
376
for (; i < x86_pmu.lbr_nr; i++) {
377
lbr_idx = (tos - i) & mask;
378
wrlbr_from(lbr_idx, 0);
379
wrlbr_to(lbr_idx, 0);
380
if (need_info)
381
wrlbr_info(lbr_idx, 0);
382
}
383
384
wrmsrq(x86_pmu.lbr_tos, tos);
385
386
if (cpuc->lbr_select)
387
wrmsrq(MSR_LBR_SELECT, task_ctx->lbr_sel);
388
}
389
390
static void intel_pmu_arch_lbr_restore(void *ctx)
391
{
392
struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
393
struct lbr_entry *entries = task_ctx->entries;
394
int i;
395
396
/* Fast reset the LBRs before restore if the call stack is not full. */
397
if (!entries[x86_pmu.lbr_nr - 1].from)
398
intel_pmu_arch_lbr_reset();
399
400
for (i = 0; i < x86_pmu.lbr_nr; i++) {
401
if (!entries[i].from)
402
break;
403
wrlbr_all(&entries[i], i, true);
404
}
405
}
406
407
/*
408
* Restore the Architecture LBR state from the xsave area in the perf
409
* context data for the task via the XRSTORS instruction.
410
*/
411
static void intel_pmu_arch_lbr_xrstors(void *ctx)
412
{
413
struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
414
415
xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR);
416
}
417
418
static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
419
{
420
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
421
return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL);
422
423
return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
424
}
425
426
static inline bool has_lbr_callstack_users(void *ctx)
427
{
428
return task_context_opt(ctx)->lbr_callstack_users ||
429
x86_pmu.lbr_callstack_users;
430
}
431
432
static void __intel_pmu_lbr_restore(void *ctx)
433
{
434
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
435
436
if (!has_lbr_callstack_users(ctx) ||
437
task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
438
intel_pmu_lbr_reset();
439
return;
440
}
441
442
/*
443
* Does not restore the LBR registers, if
444
* - No one else touched them, and
445
* - Was not cleared in Cstate
446
*/
447
if ((ctx == cpuc->last_task_ctx) &&
448
(task_context_opt(ctx)->log_id == cpuc->last_log_id) &&
449
!lbr_is_reset_in_cstate(ctx)) {
450
task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
451
return;
452
}
453
454
x86_pmu.lbr_restore(ctx);
455
456
task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
457
}
458
459
void intel_pmu_lbr_save(void *ctx)
460
{
461
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
462
struct x86_perf_task_context *task_ctx = ctx;
463
bool need_info = x86_pmu.lbr_has_info;
464
unsigned lbr_idx, mask;
465
u64 tos;
466
int i;
467
468
mask = x86_pmu.lbr_nr - 1;
469
tos = intel_pmu_lbr_tos();
470
for (i = 0; i < x86_pmu.lbr_nr; i++) {
471
lbr_idx = (tos - i) & mask;
472
if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info))
473
break;
474
}
475
task_ctx->valid_lbrs = i;
476
task_ctx->tos = tos;
477
478
if (cpuc->lbr_select)
479
rdmsrq(MSR_LBR_SELECT, task_ctx->lbr_sel);
480
}
481
482
static void intel_pmu_arch_lbr_save(void *ctx)
483
{
484
struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
485
struct lbr_entry *entries = task_ctx->entries;
486
int i;
487
488
for (i = 0; i < x86_pmu.lbr_nr; i++) {
489
if (!rdlbr_all(&entries[i], i, true))
490
break;
491
}
492
493
/* LBR call stack is not full. Reset is required in restore. */
494
if (i < x86_pmu.lbr_nr)
495
entries[x86_pmu.lbr_nr - 1].from = 0;
496
}
497
498
/*
499
* Save the Architecture LBR state to the xsave area in the perf
500
* context data for the task via the XSAVES instruction.
501
*/
502
static void intel_pmu_arch_lbr_xsaves(void *ctx)
503
{
504
struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
505
506
xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR);
507
}
508
509
static void __intel_pmu_lbr_save(void *ctx)
510
{
511
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
512
513
if (!has_lbr_callstack_users(ctx)) {
514
task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
515
return;
516
}
517
518
x86_pmu.lbr_save(ctx);
519
520
task_context_opt(ctx)->lbr_stack_state = LBR_VALID;
521
522
cpuc->last_task_ctx = ctx;
523
cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
524
}
525
526
void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
527
struct task_struct *task, bool sched_in)
528
{
529
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
530
struct perf_ctx_data *ctx_data;
531
void *task_ctx;
532
533
if (!cpuc->lbr_users)
534
return;
535
536
/*
537
* If LBR callstack feature is enabled and the stack was saved when
538
* the task was scheduled out, restore the stack. Otherwise flush
539
* the LBR stack.
540
*/
541
rcu_read_lock();
542
ctx_data = rcu_dereference(task->perf_ctx_data);
543
task_ctx = ctx_data ? ctx_data->data : NULL;
544
if (task_ctx) {
545
if (sched_in)
546
__intel_pmu_lbr_restore(task_ctx);
547
else
548
__intel_pmu_lbr_save(task_ctx);
549
rcu_read_unlock();
550
return;
551
}
552
rcu_read_unlock();
553
554
/*
555
* Since a context switch can flip the address space and LBR entries
556
* are not tagged with an identifier, we need to wipe the LBR, even for
557
* per-cpu events. You simply cannot resolve the branches from the old
558
* address space.
559
*/
560
if (sched_in)
561
intel_pmu_lbr_reset();
562
}
563
564
static inline bool branch_user_callstack(unsigned br_sel)
565
{
566
return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
567
}
568
569
void intel_pmu_lbr_add(struct perf_event *event)
570
{
571
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
572
573
if (!x86_pmu.lbr_nr)
574
return;
575
576
if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
577
cpuc->lbr_select = 1;
578
579
cpuc->br_sel = event->hw.branch_reg.reg;
580
581
if (branch_user_callstack(cpuc->br_sel)) {
582
if (event->attach_state & PERF_ATTACH_TASK) {
583
struct task_struct *task = event->hw.target;
584
struct perf_ctx_data *ctx_data;
585
586
rcu_read_lock();
587
ctx_data = rcu_dereference(task->perf_ctx_data);
588
if (ctx_data)
589
task_context_opt(ctx_data->data)->lbr_callstack_users++;
590
rcu_read_unlock();
591
} else
592
x86_pmu.lbr_callstack_users++;
593
}
594
/*
595
* Request pmu::sched_task() callback, which will fire inside the
596
* regular perf event scheduling, so that call will:
597
*
598
* - restore or wipe; when LBR-callstack,
599
* - wipe; otherwise,
600
*
601
* when this is from __perf_event_task_sched_in().
602
*
603
* However, if this is from perf_install_in_context(), no such callback
604
* will follow and we'll need to reset the LBR here if this is the
605
* first LBR event.
606
*
607
* The problem is, we cannot tell these cases apart... but we can
608
* exclude the biggest chunk of cases by looking at
609
* event->total_time_running. An event that has accrued runtime cannot
610
* be 'new'. Conversely, a new event can get installed through the
611
* context switch path for the first time.
612
*/
613
if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
614
cpuc->lbr_pebs_users++;
615
perf_sched_cb_inc(event->pmu);
616
if (!cpuc->lbr_users++ && !event->total_time_running)
617
intel_pmu_lbr_reset();
618
}
619
620
void release_lbr_buffers(void)
621
{
622
struct kmem_cache *kmem_cache;
623
struct cpu_hw_events *cpuc;
624
int cpu;
625
626
if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
627
return;
628
629
for_each_possible_cpu(cpu) {
630
cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
631
kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
632
if (kmem_cache && cpuc->lbr_xsave) {
633
kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
634
cpuc->lbr_xsave = NULL;
635
}
636
}
637
}
638
639
void reserve_lbr_buffers(void)
640
{
641
struct kmem_cache *kmem_cache;
642
struct cpu_hw_events *cpuc;
643
int cpu;
644
645
if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
646
return;
647
648
for_each_possible_cpu(cpu) {
649
cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
650
kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
651
if (!kmem_cache || cpuc->lbr_xsave)
652
continue;
653
654
cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache,
655
GFP_KERNEL | __GFP_ZERO,
656
cpu_to_node(cpu));
657
}
658
}
659
660
void intel_pmu_lbr_del(struct perf_event *event)
661
{
662
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
663
664
if (!x86_pmu.lbr_nr)
665
return;
666
667
if (branch_user_callstack(cpuc->br_sel)) {
668
if (event->attach_state & PERF_ATTACH_TASK) {
669
struct task_struct *task = event->hw.target;
670
struct perf_ctx_data *ctx_data;
671
672
rcu_read_lock();
673
ctx_data = rcu_dereference(task->perf_ctx_data);
674
if (ctx_data)
675
task_context_opt(ctx_data->data)->lbr_callstack_users--;
676
rcu_read_unlock();
677
} else
678
x86_pmu.lbr_callstack_users--;
679
}
680
681
if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
682
cpuc->lbr_select = 0;
683
684
if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
685
cpuc->lbr_pebs_users--;
686
cpuc->lbr_users--;
687
WARN_ON_ONCE(cpuc->lbr_users < 0);
688
WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
689
perf_sched_cb_dec(event->pmu);
690
691
/*
692
* The logged occurrences information is only valid for the
693
* current LBR group. If another LBR group is scheduled in
694
* later, the information from the stale LBRs will be wrongly
695
* interpreted. Reset the LBRs here.
696
*
697
* Only clear once for a branch counter group with the leader
698
* event. Because
699
* - Cannot simply reset the LBRs with the !cpuc->lbr_users.
700
* Because it's possible that the last LBR user is not in a
701
* branch counter group, e.g., a branch_counters group +
702
* several normal LBR events.
703
* - The LBR reset can be done with any one of the events in a
704
* branch counter group, since they are always scheduled together.
705
* It's easy to force the leader event an LBR event.
706
*/
707
if (is_branch_counters_group(event) && event == event->group_leader)
708
intel_pmu_lbr_reset();
709
}
710
711
static inline bool vlbr_exclude_host(void)
712
{
713
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
714
715
return test_bit(INTEL_PMC_IDX_FIXED_VLBR,
716
(unsigned long *)&cpuc->intel_ctrl_guest_mask);
717
}
718
719
void intel_pmu_lbr_enable_all(bool pmi)
720
{
721
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
722
723
if (cpuc->lbr_users && !vlbr_exclude_host())
724
__intel_pmu_lbr_enable(pmi);
725
}
726
727
void intel_pmu_lbr_disable_all(void)
728
{
729
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
730
731
if (cpuc->lbr_users && !vlbr_exclude_host()) {
732
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
733
return __intel_pmu_arch_lbr_disable();
734
735
__intel_pmu_lbr_disable();
736
}
737
}
738
739
void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
740
{
741
unsigned long mask = x86_pmu.lbr_nr - 1;
742
struct perf_branch_entry *br = cpuc->lbr_entries;
743
u64 tos = intel_pmu_lbr_tos();
744
int i;
745
746
for (i = 0; i < x86_pmu.lbr_nr; i++) {
747
unsigned long lbr_idx = (tos - i) & mask;
748
union {
749
struct {
750
u32 from;
751
u32 to;
752
};
753
u64 lbr;
754
} msr_lastbranch;
755
756
rdmsrq(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
757
758
perf_clear_branch_entry_bitfields(br);
759
760
br->from = msr_lastbranch.from;
761
br->to = msr_lastbranch.to;
762
br++;
763
}
764
cpuc->lbr_stack.nr = i;
765
cpuc->lbr_stack.hw_idx = tos;
766
}
767
768
/*
769
* Due to lack of segmentation in Linux the effective address (offset)
770
* is the same as the linear address, allowing us to merge the LIP and EIP
771
* LBR formats.
772
*/
773
void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
774
{
775
bool need_info = false, call_stack = false;
776
unsigned long mask = x86_pmu.lbr_nr - 1;
777
struct perf_branch_entry *br = cpuc->lbr_entries;
778
u64 tos = intel_pmu_lbr_tos();
779
int i;
780
int out = 0;
781
int num = x86_pmu.lbr_nr;
782
783
if (cpuc->lbr_sel) {
784
need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
785
if (cpuc->lbr_sel->config & LBR_CALL_STACK)
786
call_stack = true;
787
}
788
789
for (i = 0; i < num; i++) {
790
unsigned long lbr_idx = (tos - i) & mask;
791
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
792
u16 cycles = 0;
793
794
from = rdlbr_from(lbr_idx, NULL);
795
to = rdlbr_to(lbr_idx, NULL);
796
797
/*
798
* Read LBR call stack entries
799
* until invalid entry (0s) is detected.
800
*/
801
if (call_stack && !from)
802
break;
803
804
if (x86_pmu.lbr_has_info) {
805
if (need_info) {
806
u64 info;
807
808
info = rdlbr_info(lbr_idx, NULL);
809
mis = !!(info & LBR_INFO_MISPRED);
810
pred = !mis;
811
cycles = (info & LBR_INFO_CYCLES);
812
if (x86_pmu.lbr_has_tsx) {
813
in_tx = !!(info & LBR_INFO_IN_TX);
814
abort = !!(info & LBR_INFO_ABORT);
815
}
816
}
817
} else {
818
int skip = 0;
819
820
if (x86_pmu.lbr_from_flags) {
821
mis = !!(from & LBR_FROM_FLAG_MISPRED);
822
pred = !mis;
823
skip = 1;
824
}
825
if (x86_pmu.lbr_has_tsx) {
826
in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
827
abort = !!(from & LBR_FROM_FLAG_ABORT);
828
skip = 3;
829
}
830
from = (u64)((((s64)from) << skip) >> skip);
831
832
if (x86_pmu.lbr_to_cycles) {
833
cycles = ((to >> 48) & LBR_INFO_CYCLES);
834
to = (u64)((((s64)to) << 16) >> 16);
835
}
836
}
837
838
/*
839
* Some CPUs report duplicated abort records,
840
* with the second entry not having an abort bit set.
841
* Skip them here. This loop runs backwards,
842
* so we need to undo the previous record.
843
* If the abort just happened outside the window
844
* the extra entry cannot be removed.
845
*/
846
if (abort && x86_pmu.lbr_double_abort && out > 0)
847
out--;
848
849
perf_clear_branch_entry_bitfields(br+out);
850
br[out].from = from;
851
br[out].to = to;
852
br[out].mispred = mis;
853
br[out].predicted = pred;
854
br[out].in_tx = in_tx;
855
br[out].abort = abort;
856
br[out].cycles = cycles;
857
out++;
858
}
859
cpuc->lbr_stack.nr = out;
860
cpuc->lbr_stack.hw_idx = tos;
861
}
862
863
static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
864
static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
865
static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
866
867
static __always_inline int get_lbr_br_type(u64 info)
868
{
869
int type = 0;
870
871
if (static_branch_likely(&x86_lbr_type))
872
type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
873
874
return type;
875
}
876
877
static __always_inline bool get_lbr_mispred(u64 info)
878
{
879
bool mispred = 0;
880
881
if (static_branch_likely(&x86_lbr_mispred))
882
mispred = !!(info & LBR_INFO_MISPRED);
883
884
return mispred;
885
}
886
887
static __always_inline u16 get_lbr_cycles(u64 info)
888
{
889
u16 cycles = info & LBR_INFO_CYCLES;
890
891
if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
892
(!static_branch_likely(&x86_lbr_cycles) ||
893
!(info & LBR_INFO_CYC_CNT_VALID)))
894
cycles = 0;
895
896
return cycles;
897
}
898
899
static_assert((64 - PERF_BRANCH_ENTRY_INFO_BITS_MAX) > LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS);
900
901
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
902
struct lbr_entry *entries)
903
{
904
struct perf_branch_entry *e;
905
struct lbr_entry *lbr;
906
u64 from, to, info;
907
int i;
908
909
for (i = 0; i < x86_pmu.lbr_nr; i++) {
910
lbr = entries ? &entries[i] : NULL;
911
e = &cpuc->lbr_entries[i];
912
913
from = rdlbr_from(i, lbr);
914
/*
915
* Read LBR entries until invalid entry (0s) is detected.
916
*/
917
if (!from)
918
break;
919
920
to = rdlbr_to(i, lbr);
921
info = rdlbr_info(i, lbr);
922
923
perf_clear_branch_entry_bitfields(e);
924
925
e->from = from;
926
e->to = to;
927
e->mispred = get_lbr_mispred(info);
928
e->predicted = !e->mispred;
929
e->in_tx = !!(info & LBR_INFO_IN_TX);
930
e->abort = !!(info & LBR_INFO_ABORT);
931
e->cycles = get_lbr_cycles(info);
932
e->type = get_lbr_br_type(info);
933
934
/*
935
* Leverage the reserved field of cpuc->lbr_entries[i] to
936
* temporarily store the branch counters information.
937
* The later code will decide what content can be disclosed
938
* to the perf tool. Pleae see intel_pmu_lbr_counters_reorder().
939
*/
940
e->reserved = (info >> LBR_INFO_BR_CNTR_OFFSET) & LBR_INFO_BR_CNTR_FULL_MASK;
941
}
942
943
cpuc->lbr_stack.nr = i;
944
}
945
946
/*
947
* The enabled order may be different from the counter order.
948
* Update the lbr_counters with the enabled order.
949
*/
950
static void intel_pmu_lbr_counters_reorder(struct cpu_hw_events *cpuc,
951
struct perf_event *event)
952
{
953
int i, j, pos = 0, order[X86_PMC_IDX_MAX];
954
struct perf_event *leader, *sibling;
955
u64 src, dst, cnt;
956
957
leader = event->group_leader;
958
if (branch_sample_counters(leader))
959
order[pos++] = leader->hw.idx;
960
961
for_each_sibling_event(sibling, leader) {
962
if (!branch_sample_counters(sibling))
963
continue;
964
order[pos++] = sibling->hw.idx;
965
}
966
967
WARN_ON_ONCE(!pos);
968
969
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
970
src = cpuc->lbr_entries[i].reserved;
971
dst = 0;
972
for (j = 0; j < pos; j++) {
973
cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK;
974
dst |= cnt << j * LBR_INFO_BR_CNTR_BITS;
975
}
976
cpuc->lbr_counters[i] = dst;
977
cpuc->lbr_entries[i].reserved = 0;
978
}
979
}
980
981
void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
982
struct cpu_hw_events *cpuc,
983
struct perf_event *event)
984
{
985
if (is_branch_counters_group(event)) {
986
intel_pmu_lbr_counters_reorder(cpuc, event);
987
perf_sample_save_brstack(data, event, &cpuc->lbr_stack, cpuc->lbr_counters);
988
return;
989
}
990
991
perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
992
}
993
994
static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
995
{
996
intel_pmu_store_lbr(cpuc, NULL);
997
}
998
999
static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc)
1000
{
1001
struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave;
1002
1003
if (!xsave) {
1004
intel_pmu_store_lbr(cpuc, NULL);
1005
return;
1006
}
1007
xsaves(&xsave->xsave, XFEATURE_MASK_LBR);
1008
1009
intel_pmu_store_lbr(cpuc, xsave->lbr.entries);
1010
}
1011
1012
void intel_pmu_lbr_read(void)
1013
{
1014
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1015
1016
/*
1017
* Don't read when all LBRs users are using adaptive PEBS.
1018
*
1019
* This could be smarter and actually check the event,
1020
* but this simple approach seems to work for now.
1021
*/
1022
if (!cpuc->lbr_users || vlbr_exclude_host() ||
1023
cpuc->lbr_users == cpuc->lbr_pebs_users)
1024
return;
1025
1026
x86_pmu.lbr_read(cpuc);
1027
1028
intel_pmu_lbr_filter(cpuc);
1029
}
1030
1031
/*
1032
* SW filter is used:
1033
* - in case there is no HW filter
1034
* - in case the HW filter has errata or limitations
1035
*/
1036
static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
1037
{
1038
u64 br_type = event->attr.branch_sample_type;
1039
int mask = 0;
1040
1041
if (br_type & PERF_SAMPLE_BRANCH_USER)
1042
mask |= X86_BR_USER;
1043
1044
if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
1045
mask |= X86_BR_KERNEL;
1046
1047
/* we ignore BRANCH_HV here */
1048
1049
if (br_type & PERF_SAMPLE_BRANCH_ANY)
1050
mask |= X86_BR_ANY;
1051
1052
if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
1053
mask |= X86_BR_ANY_CALL;
1054
1055
if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
1056
mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
1057
1058
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
1059
mask |= X86_BR_IND_CALL;
1060
1061
if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
1062
mask |= X86_BR_ABORT;
1063
1064
if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
1065
mask |= X86_BR_IN_TX;
1066
1067
if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
1068
mask |= X86_BR_NO_TX;
1069
1070
if (br_type & PERF_SAMPLE_BRANCH_COND)
1071
mask |= X86_BR_JCC;
1072
1073
if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
1074
if (!x86_pmu_has_lbr_callstack())
1075
return -EOPNOTSUPP;
1076
if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
1077
return -EINVAL;
1078
mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
1079
X86_BR_CALL_STACK;
1080
}
1081
1082
if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
1083
mask |= X86_BR_IND_JMP;
1084
1085
if (br_type & PERF_SAMPLE_BRANCH_CALL)
1086
mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
1087
1088
if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
1089
mask |= X86_BR_TYPE_SAVE;
1090
1091
/*
1092
* stash actual user request into reg, it may
1093
* be used by fixup code for some CPU
1094
*/
1095
event->hw.branch_reg.reg = mask;
1096
return 0;
1097
}
1098
1099
/*
1100
* setup the HW LBR filter
1101
* Used only when available, may not be enough to disambiguate
1102
* all branches, may need the help of the SW filter
1103
*/
1104
static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
1105
{
1106
struct hw_perf_event_extra *reg;
1107
u64 br_type = event->attr.branch_sample_type;
1108
u64 mask = 0, v;
1109
int i;
1110
1111
for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
1112
if (!(br_type & (1ULL << i)))
1113
continue;
1114
1115
v = x86_pmu.lbr_sel_map[i];
1116
if (v == LBR_NOT_SUPP)
1117
return -EOPNOTSUPP;
1118
1119
if (v != LBR_IGN)
1120
mask |= v;
1121
}
1122
1123
reg = &event->hw.branch_reg;
1124
reg->idx = EXTRA_REG_LBR;
1125
1126
if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
1127
reg->config = mask;
1128
1129
/*
1130
* The Arch LBR HW can retrieve the common branch types
1131
* from the LBR_INFO. It doesn't require the high overhead
1132
* SW disassemble.
1133
* Enable the branch type by default for the Arch LBR.
1134
*/
1135
reg->reg |= X86_BR_TYPE_SAVE;
1136
return 0;
1137
}
1138
1139
/*
1140
* The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
1141
* in suppress mode. So LBR_SELECT should be set to
1142
* (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
1143
* But the 10th bit LBR_CALL_STACK does not operate
1144
* in suppress mode.
1145
*/
1146
reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
1147
1148
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
1149
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
1150
x86_pmu.lbr_has_info)
1151
reg->config |= LBR_NO_INFO;
1152
1153
return 0;
1154
}
1155
1156
int intel_pmu_setup_lbr_filter(struct perf_event *event)
1157
{
1158
int ret = 0;
1159
1160
/*
1161
* no LBR on this PMU
1162
*/
1163
if (!x86_pmu.lbr_nr)
1164
return -EOPNOTSUPP;
1165
1166
/*
1167
* setup SW LBR filter
1168
*/
1169
ret = intel_pmu_setup_sw_lbr_filter(event);
1170
if (ret)
1171
return ret;
1172
1173
/*
1174
* setup HW LBR filter, if any
1175
*/
1176
if (x86_pmu.lbr_sel_map)
1177
ret = intel_pmu_setup_hw_lbr_filter(event);
1178
1179
return ret;
1180
}
1181
1182
enum {
1183
ARCH_LBR_BR_TYPE_JCC = 0,
1184
ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1,
1185
ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2,
1186
ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3,
1187
ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4,
1188
ARCH_LBR_BR_TYPE_NEAR_RET = 5,
1189
ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET,
1190
1191
ARCH_LBR_BR_TYPE_MAP_MAX = 16,
1192
};
1193
1194
static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = {
1195
[ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC,
1196
[ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP,
1197
[ARCH_LBR_BR_TYPE_NEAR_REL_JMP] = X86_BR_JMP,
1198
[ARCH_LBR_BR_TYPE_NEAR_IND_CALL] = X86_BR_IND_CALL,
1199
[ARCH_LBR_BR_TYPE_NEAR_REL_CALL] = X86_BR_CALL,
1200
[ARCH_LBR_BR_TYPE_NEAR_RET] = X86_BR_RET,
1201
};
1202
1203
/*
1204
* implement actual branch filter based on user demand.
1205
* Hardware may not exactly satisfy that request, thus
1206
* we need to inspect opcodes. Mismatched branches are
1207
* discarded. Therefore, the number of branches returned
1208
* in PERF_SAMPLE_BRANCH_STACK sample may vary.
1209
*/
1210
static void
1211
intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
1212
{
1213
u64 from, to;
1214
int br_sel = cpuc->br_sel;
1215
int i, j, type, to_plm;
1216
bool compress = false;
1217
1218
/* if sampling all branches, then nothing to filter */
1219
if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
1220
((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
1221
return;
1222
1223
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
1224
1225
from = cpuc->lbr_entries[i].from;
1226
to = cpuc->lbr_entries[i].to;
1227
type = cpuc->lbr_entries[i].type;
1228
1229
/*
1230
* Parse the branch type recorded in LBR_x_INFO MSR.
1231
* Doesn't support OTHER_BRANCH decoding for now.
1232
* OTHER_BRANCH branch type still rely on software decoding.
1233
*/
1234
if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
1235
type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) {
1236
to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
1237
type = arch_lbr_br_type_map[type] | to_plm;
1238
} else
1239
type = branch_type(from, to, cpuc->lbr_entries[i].abort);
1240
if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
1241
if (cpuc->lbr_entries[i].in_tx)
1242
type |= X86_BR_IN_TX;
1243
else
1244
type |= X86_BR_NO_TX;
1245
}
1246
1247
/* if type does not correspond, then discard */
1248
if (type == X86_BR_NONE || (br_sel & type) != type) {
1249
cpuc->lbr_entries[i].from = 0;
1250
compress = true;
1251
}
1252
1253
if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
1254
cpuc->lbr_entries[i].type = common_branch_type(type);
1255
}
1256
1257
if (!compress)
1258
return;
1259
1260
/* remove all entries with from=0 */
1261
for (i = 0; i < cpuc->lbr_stack.nr; ) {
1262
if (!cpuc->lbr_entries[i].from) {
1263
j = i;
1264
while (++j < cpuc->lbr_stack.nr) {
1265
cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
1266
cpuc->lbr_counters[j-1] = cpuc->lbr_counters[j];
1267
}
1268
cpuc->lbr_stack.nr--;
1269
if (!cpuc->lbr_entries[i].from)
1270
continue;
1271
}
1272
i++;
1273
}
1274
}
1275
1276
void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr)
1277
{
1278
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1279
1280
/* Cannot get TOS for large PEBS and Arch LBR */
1281
if (static_cpu_has(X86_FEATURE_ARCH_LBR) ||
1282
(cpuc->n_pebs == cpuc->n_large_pebs))
1283
cpuc->lbr_stack.hw_idx = -1ULL;
1284
else
1285
cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
1286
1287
intel_pmu_store_lbr(cpuc, lbr);
1288
intel_pmu_lbr_filter(cpuc);
1289
}
1290
1291
/*
1292
* Map interface branch filters onto LBR filters
1293
*/
1294
static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1295
[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
1296
[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
1297
[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
1298
[PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1299
[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP
1300
| LBR_IND_JMP | LBR_FAR,
1301
/*
1302
* NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
1303
*/
1304
[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
1305
LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
1306
/*
1307
* NHM/WSM erratum: must include IND_JMP to capture IND_CALL
1308
*/
1309
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
1310
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
1311
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1312
};
1313
1314
static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1315
[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
1316
[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
1317
[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
1318
[PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1319
[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
1320
[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
1321
| LBR_FAR,
1322
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
1323
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
1324
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1325
[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
1326
};
1327
1328
static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1329
[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
1330
[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
1331
[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
1332
[PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1333
[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
1334
[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
1335
| LBR_FAR,
1336
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
1337
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
1338
[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
1339
| LBR_RETURN | LBR_CALL_STACK,
1340
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1341
[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
1342
};
1343
1344
static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1345
[PERF_SAMPLE_BRANCH_ANY_SHIFT] = ARCH_LBR_ANY,
1346
[PERF_SAMPLE_BRANCH_USER_SHIFT] = ARCH_LBR_USER,
1347
[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = ARCH_LBR_KERNEL,
1348
[PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1349
[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = ARCH_LBR_RETURN |
1350
ARCH_LBR_OTHER_BRANCH,
1351
[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = ARCH_LBR_REL_CALL |
1352
ARCH_LBR_IND_CALL |
1353
ARCH_LBR_OTHER_BRANCH,
1354
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = ARCH_LBR_IND_CALL,
1355
[PERF_SAMPLE_BRANCH_COND_SHIFT] = ARCH_LBR_JCC,
1356
[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = ARCH_LBR_REL_CALL |
1357
ARCH_LBR_IND_CALL |
1358
ARCH_LBR_RETURN |
1359
ARCH_LBR_CALL_STACK,
1360
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP,
1361
[PERF_SAMPLE_BRANCH_CALL_SHIFT] = ARCH_LBR_REL_CALL,
1362
};
1363
1364
/* core */
1365
void __init intel_pmu_lbr_init_core(void)
1366
{
1367
x86_pmu.lbr_nr = 4;
1368
x86_pmu.lbr_tos = MSR_LBR_TOS;
1369
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
1370
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
1371
1372
/*
1373
* SW branch filter usage:
1374
* - compensate for lack of HW filter
1375
*/
1376
}
1377
1378
/* nehalem/westmere */
1379
void __init intel_pmu_lbr_init_nhm(void)
1380
{
1381
x86_pmu.lbr_nr = 16;
1382
x86_pmu.lbr_tos = MSR_LBR_TOS;
1383
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1384
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1385
1386
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1387
x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
1388
1389
/*
1390
* SW branch filter usage:
1391
* - workaround LBR_SEL errata (see above)
1392
* - support syscall, sysret capture.
1393
* That requires LBR_FAR but that means far
1394
* jmp need to be filtered out
1395
*/
1396
}
1397
1398
/* sandy bridge */
1399
void __init intel_pmu_lbr_init_snb(void)
1400
{
1401
x86_pmu.lbr_nr = 16;
1402
x86_pmu.lbr_tos = MSR_LBR_TOS;
1403
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1404
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1405
1406
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1407
x86_pmu.lbr_sel_map = snb_lbr_sel_map;
1408
1409
/*
1410
* SW branch filter usage:
1411
* - support syscall, sysret capture.
1412
* That requires LBR_FAR but that means far
1413
* jmp need to be filtered out
1414
*/
1415
}
1416
1417
static inline struct kmem_cache *
1418
create_lbr_kmem_cache(size_t size, size_t align)
1419
{
1420
return kmem_cache_create("x86_lbr", size, align, 0, NULL);
1421
}
1422
1423
/* haswell */
1424
void intel_pmu_lbr_init_hsw(void)
1425
{
1426
size_t size = sizeof(struct x86_perf_task_context);
1427
1428
x86_pmu.lbr_nr = 16;
1429
x86_pmu.lbr_tos = MSR_LBR_TOS;
1430
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1431
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1432
1433
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1434
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
1435
1436
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1437
}
1438
1439
/* skylake */
1440
__init void intel_pmu_lbr_init_skl(void)
1441
{
1442
size_t size = sizeof(struct x86_perf_task_context);
1443
1444
x86_pmu.lbr_nr = 32;
1445
x86_pmu.lbr_tos = MSR_LBR_TOS;
1446
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1447
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1448
x86_pmu.lbr_info = MSR_LBR_INFO_0;
1449
1450
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1451
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
1452
1453
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1454
1455
/*
1456
* SW branch filter usage:
1457
* - support syscall, sysret capture.
1458
* That requires LBR_FAR but that means far
1459
* jmp need to be filtered out
1460
*/
1461
}
1462
1463
/* atom */
1464
void __init intel_pmu_lbr_init_atom(void)
1465
{
1466
/*
1467
* only models starting at stepping 10 seems
1468
* to have an operational LBR which can freeze
1469
* on PMU interrupt
1470
*/
1471
if (boot_cpu_data.x86_vfm == INTEL_ATOM_BONNELL
1472
&& boot_cpu_data.x86_stepping < 10) {
1473
pr_cont("LBR disabled due to erratum");
1474
return;
1475
}
1476
1477
x86_pmu.lbr_nr = 8;
1478
x86_pmu.lbr_tos = MSR_LBR_TOS;
1479
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
1480
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
1481
1482
/*
1483
* SW branch filter usage:
1484
* - compensate for lack of HW filter
1485
*/
1486
}
1487
1488
/* slm */
1489
void __init intel_pmu_lbr_init_slm(void)
1490
{
1491
x86_pmu.lbr_nr = 8;
1492
x86_pmu.lbr_tos = MSR_LBR_TOS;
1493
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
1494
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
1495
1496
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1497
x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
1498
1499
/*
1500
* SW branch filter usage:
1501
* - compensate for lack of HW filter
1502
*/
1503
pr_cont("8-deep LBR, ");
1504
}
1505
1506
/* Knights Landing */
1507
void intel_pmu_lbr_init_knl(void)
1508
{
1509
x86_pmu.lbr_nr = 8;
1510
x86_pmu.lbr_tos = MSR_LBR_TOS;
1511
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1512
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1513
1514
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1515
x86_pmu.lbr_sel_map = snb_lbr_sel_map;
1516
1517
/* Knights Landing does have MISPREDICT bit */
1518
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
1519
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
1520
}
1521
1522
void intel_pmu_lbr_init(void)
1523
{
1524
switch (x86_pmu.intel_cap.lbr_format) {
1525
case LBR_FORMAT_EIP_FLAGS2:
1526
x86_pmu.lbr_has_tsx = 1;
1527
x86_pmu.lbr_from_flags = 1;
1528
if (lbr_from_signext_quirk_needed())
1529
static_branch_enable(&lbr_from_quirk_key);
1530
break;
1531
1532
case LBR_FORMAT_EIP_FLAGS:
1533
x86_pmu.lbr_from_flags = 1;
1534
break;
1535
1536
case LBR_FORMAT_INFO:
1537
x86_pmu.lbr_has_tsx = 1;
1538
fallthrough;
1539
case LBR_FORMAT_INFO2:
1540
x86_pmu.lbr_has_info = 1;
1541
break;
1542
1543
case LBR_FORMAT_TIME:
1544
x86_pmu.lbr_from_flags = 1;
1545
x86_pmu.lbr_to_cycles = 1;
1546
break;
1547
}
1548
1549
if (x86_pmu.lbr_has_info) {
1550
/*
1551
* Only used in combination with baseline pebs.
1552
*/
1553
static_branch_enable(&x86_lbr_mispred);
1554
static_branch_enable(&x86_lbr_cycles);
1555
}
1556
}
1557
1558
/*
1559
* LBR state size is variable based on the max number of registers.
1560
* This calculates the expected state size, which should match
1561
* what the hardware enumerates for the size of XFEATURE_LBR.
1562
*/
1563
static inline unsigned int get_lbr_state_size(void)
1564
{
1565
return sizeof(struct arch_lbr_state) +
1566
x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1567
}
1568
1569
static bool is_arch_lbr_xsave_available(void)
1570
{
1571
if (!boot_cpu_has(X86_FEATURE_XSAVES))
1572
return false;
1573
1574
/*
1575
* Check the LBR state with the corresponding software structure.
1576
* Disable LBR XSAVES support if the size doesn't match.
1577
*/
1578
if (xfeature_size(XFEATURE_LBR) == 0)
1579
return false;
1580
1581
if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
1582
return false;
1583
1584
return true;
1585
}
1586
1587
void __init intel_pmu_arch_lbr_init(void)
1588
{
1589
struct pmu *pmu = x86_get_pmu(smp_processor_id());
1590
union cpuid28_eax eax;
1591
union cpuid28_ebx ebx;
1592
union cpuid28_ecx ecx;
1593
unsigned int unused_edx;
1594
bool arch_lbr_xsave;
1595
size_t size;
1596
u64 lbr_nr;
1597
1598
/* Arch LBR Capabilities */
1599
cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx);
1600
1601
lbr_nr = fls(eax.split.lbr_depth_mask) * 8;
1602
if (!lbr_nr)
1603
goto clear_arch_lbr;
1604
1605
/* Apply the max depth of Arch LBR */
1606
if (wrmsrq_safe(MSR_ARCH_LBR_DEPTH, lbr_nr))
1607
goto clear_arch_lbr;
1608
1609
x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask;
1610
x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset;
1611
x86_pmu.lbr_lip = eax.split.lbr_lip;
1612
x86_pmu.lbr_cpl = ebx.split.lbr_cpl;
1613
x86_pmu.lbr_filter = ebx.split.lbr_filter;
1614
x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack;
1615
x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
1616
x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
1617
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
1618
x86_pmu.lbr_counters = ecx.split.lbr_counters;
1619
x86_pmu.lbr_nr = lbr_nr;
1620
1621
if (!!x86_pmu.lbr_counters)
1622
x86_pmu.flags |= PMU_FL_BR_CNTR | PMU_FL_DYN_CONSTRAINT;
1623
1624
if (x86_pmu.lbr_mispred)
1625
static_branch_enable(&x86_lbr_mispred);
1626
if (x86_pmu.lbr_timed_lbr)
1627
static_branch_enable(&x86_lbr_cycles);
1628
if (x86_pmu.lbr_br_type)
1629
static_branch_enable(&x86_lbr_type);
1630
1631
arch_lbr_xsave = is_arch_lbr_xsave_available();
1632
if (arch_lbr_xsave) {
1633
size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) +
1634
get_lbr_state_size();
1635
pmu->task_ctx_cache = create_lbr_kmem_cache(size,
1636
XSAVE_ALIGNMENT);
1637
}
1638
1639
if (!pmu->task_ctx_cache) {
1640
arch_lbr_xsave = false;
1641
1642
size = sizeof(struct x86_perf_task_context_arch_lbr) +
1643
lbr_nr * sizeof(struct lbr_entry);
1644
pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1645
}
1646
1647
x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0;
1648
x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0;
1649
x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0;
1650
1651
/* LBR callstack requires both CPL and Branch Filtering support */
1652
if (!x86_pmu.lbr_cpl ||
1653
!x86_pmu.lbr_filter ||
1654
!x86_pmu.lbr_call_stack)
1655
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP;
1656
1657
if (!x86_pmu.lbr_cpl) {
1658
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP;
1659
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP;
1660
} else if (!x86_pmu.lbr_filter) {
1661
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP;
1662
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP;
1663
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP;
1664
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP;
1665
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP;
1666
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP;
1667
arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP;
1668
}
1669
1670
x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK;
1671
x86_pmu.lbr_ctl_map = arch_lbr_ctl_map;
1672
1673
if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter)
1674
x86_pmu.lbr_ctl_map = NULL;
1675
1676
x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset;
1677
if (arch_lbr_xsave) {
1678
x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves;
1679
x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors;
1680
x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave;
1681
pr_cont("XSAVE ");
1682
} else {
1683
x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
1684
x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
1685
x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
1686
}
1687
1688
pr_cont("Architectural LBR, ");
1689
1690
return;
1691
1692
clear_arch_lbr:
1693
setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR);
1694
}
1695
1696
/**
1697
* x86_perf_get_lbr - get the LBR records information
1698
*
1699
* @lbr: the caller's memory to store the LBR records information
1700
*/
1701
void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
1702
{
1703
lbr->nr = x86_pmu.lbr_nr;
1704
lbr->from = x86_pmu.lbr_from;
1705
lbr->to = x86_pmu.lbr_to;
1706
lbr->info = x86_pmu.lbr_info;
1707
lbr->has_callstack = x86_pmu_has_lbr_callstack();
1708
}
1709
EXPORT_SYMBOL_FOR_KVM(x86_perf_get_lbr);
1710
1711
struct event_constraint vlbr_constraint =
1712
__EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR),
1713
FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT);
1714
1715