Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/events/intel/ds.c
50920 views
1
// SPDX-License-Identifier: GPL-2.0
2
#include <linux/bitops.h>
3
#include <linux/types.h>
4
#include <linux/slab.h>
5
#include <linux/sched/clock.h>
6
7
#include <asm/cpu_entry_area.h>
8
#include <asm/debugreg.h>
9
#include <asm/perf_event.h>
10
#include <asm/tlbflush.h>
11
#include <asm/insn.h>
12
#include <asm/io.h>
13
#include <asm/msr.h>
14
#include <asm/timer.h>
15
16
#include "../perf_event.h"
17
18
/* Waste a full page so it can be mapped into the cpu_entry_area */
19
DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
20
21
/* The size of a BTS record in bytes: */
22
#define BTS_RECORD_SIZE 24
23
24
#define PEBS_FIXUP_SIZE PAGE_SIZE
25
26
/*
27
* pebs_record_32 for p4 and core not supported
28
29
struct pebs_record_32 {
30
u32 flags, ip;
31
u32 ax, bc, cx, dx;
32
u32 si, di, bp, sp;
33
};
34
35
*/
36
37
union omr_encoding {
38
struct {
39
u8 omr_source : 4;
40
u8 omr_remote : 1;
41
u8 omr_hitm : 1;
42
u8 omr_snoop : 1;
43
u8 omr_promoted : 1;
44
};
45
u8 omr_full;
46
};
47
48
union intel_x86_pebs_dse {
49
u64 val;
50
struct {
51
unsigned int ld_dse:4;
52
unsigned int ld_stlb_miss:1;
53
unsigned int ld_locked:1;
54
unsigned int ld_data_blk:1;
55
unsigned int ld_addr_blk:1;
56
unsigned int ld_reserved:24;
57
};
58
struct {
59
unsigned int st_l1d_hit:1;
60
unsigned int st_reserved1:3;
61
unsigned int st_stlb_miss:1;
62
unsigned int st_locked:1;
63
unsigned int st_reserved2:26;
64
};
65
struct {
66
unsigned int st_lat_dse:4;
67
unsigned int st_lat_stlb_miss:1;
68
unsigned int st_lat_locked:1;
69
unsigned int ld_reserved3:26;
70
};
71
struct {
72
unsigned int mtl_dse:5;
73
unsigned int mtl_locked:1;
74
unsigned int mtl_stlb_miss:1;
75
unsigned int mtl_fwd_blk:1;
76
unsigned int ld_reserved4:24;
77
};
78
struct {
79
unsigned int lnc_dse:8;
80
unsigned int ld_reserved5:2;
81
unsigned int lnc_stlb_miss:1;
82
unsigned int lnc_locked:1;
83
unsigned int lnc_data_blk:1;
84
unsigned int lnc_addr_blk:1;
85
unsigned int ld_reserved6:18;
86
};
87
struct {
88
unsigned int pnc_dse: 8;
89
unsigned int pnc_l2_miss:1;
90
unsigned int pnc_stlb_clean_hit:1;
91
unsigned int pnc_stlb_any_hit:1;
92
unsigned int pnc_stlb_miss:1;
93
unsigned int pnc_locked:1;
94
unsigned int pnc_data_blk:1;
95
unsigned int pnc_addr_blk:1;
96
unsigned int pnc_fb_full:1;
97
unsigned int ld_reserved8:16;
98
};
99
struct {
100
unsigned int arw_dse:8;
101
unsigned int arw_l2_miss:1;
102
unsigned int arw_xq_promotion:1;
103
unsigned int arw_reissue:1;
104
unsigned int arw_stlb_miss:1;
105
unsigned int arw_locked:1;
106
unsigned int arw_data_blk:1;
107
unsigned int arw_addr_blk:1;
108
unsigned int arw_fb_full:1;
109
unsigned int ld_reserved9:16;
110
};
111
};
112
113
114
/*
115
* Map PEBS Load Latency Data Source encodings to generic
116
* memory data source information
117
*/
118
#define P(a, b) PERF_MEM_S(a, b)
119
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
120
#define LEVEL(x) P(LVLNUM, x)
121
#define REM P(REMOTE, REMOTE)
122
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
123
124
/* Version for Sandy Bridge and later */
125
static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
126
P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
127
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */
128
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
129
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
130
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
131
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
132
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
133
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
134
OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
135
OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
136
OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
137
OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
138
OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* 0x0c: L3 miss, excl */
139
OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
140
OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
141
OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
142
};
143
144
/* Patch up minor differences in the bits */
145
void __init intel_pmu_pebs_data_source_nhm(void)
146
{
147
pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
148
pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
149
pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
150
}
151
152
static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source)
153
{
154
u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
155
156
data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
157
data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
158
data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
159
data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
160
data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
161
}
162
163
void __init intel_pmu_pebs_data_source_skl(bool pmem)
164
{
165
__intel_pmu_pebs_data_source_skl(pmem, pebs_data_source);
166
}
167
168
static void __init __intel_pmu_pebs_data_source_grt(u64 *data_source)
169
{
170
data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
171
data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
172
data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
173
}
174
175
void __init intel_pmu_pebs_data_source_grt(void)
176
{
177
__intel_pmu_pebs_data_source_grt(pebs_data_source);
178
}
179
180
void __init intel_pmu_pebs_data_source_adl(void)
181
{
182
u64 *data_source;
183
184
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
185
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
186
__intel_pmu_pebs_data_source_skl(false, data_source);
187
188
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
189
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
190
__intel_pmu_pebs_data_source_grt(data_source);
191
}
192
193
static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
194
{
195
data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
196
data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
197
data_source[0x0a] = OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE);
198
data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
199
data_source[0x0c] = OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD);
200
data_source[0x0d] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM);
201
}
202
203
void __init intel_pmu_pebs_data_source_mtl(void)
204
{
205
u64 *data_source;
206
207
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
208
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
209
__intel_pmu_pebs_data_source_skl(false, data_source);
210
211
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
212
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
213
__intel_pmu_pebs_data_source_cmt(data_source);
214
}
215
216
void __init intel_pmu_pebs_data_source_arl_h(void)
217
{
218
u64 *data_source;
219
220
intel_pmu_pebs_data_source_lnl();
221
222
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX].pebs_data_source;
223
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
224
__intel_pmu_pebs_data_source_cmt(data_source);
225
}
226
227
void __init intel_pmu_pebs_data_source_cmt(void)
228
{
229
__intel_pmu_pebs_data_source_cmt(pebs_data_source);
230
}
231
232
/* Version for Lion Cove and later */
233
static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
234
P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* 0x00: ukn L3 */
235
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 hit */
236
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x02: L1 hit */
237
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x03: LFB/L1 Miss Handling Buffer hit */
238
0, /* 0x04: Reserved */
239
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x05: L2 Hit */
240
OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE), /* 0x06: L2 Miss Handling Buffer Hit */
241
0, /* 0x07: Reserved */
242
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x08: L3 Hit */
243
0, /* 0x09: Reserved */
244
0, /* 0x0a: Reserved */
245
0, /* 0x0b: Reserved */
246
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* 0x0c: L3 Hit Snoop Fwd */
247
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0d: L3 Hit Snoop HitM */
248
0, /* 0x0e: Reserved */
249
P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0f: L3 Miss Snoop HitM */
250
OP_LH | LEVEL(MSC) | P(SNOOP, NONE), /* 0x10: Memory-side Cache Hit */
251
OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Memory Hit */
252
};
253
254
void __init intel_pmu_pebs_data_source_lnl(void)
255
{
256
u64 *data_source;
257
258
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
259
memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source));
260
261
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
262
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
263
__intel_pmu_pebs_data_source_cmt(data_source);
264
}
265
266
/* Version for Panthercove and later */
267
268
/* L2 hit */
269
#define PNC_PEBS_DATA_SOURCE_MAX 16
270
static u64 pnc_pebs_l2_hit_data_source[PNC_PEBS_DATA_SOURCE_MAX] = {
271
P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA), /* 0x00: non-cache access */
272
OP_LH | LEVEL(L0) | P(SNOOP, NONE), /* 0x01: L0 hit */
273
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x02: L1 hit */
274
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x03: L1 Miss Handling Buffer hit */
275
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x04: L2 Hit Clean */
276
0, /* 0x05: Reserved */
277
0, /* 0x06: Reserved */
278
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT), /* 0x07: L2 Hit Snoop HIT */
279
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM), /* 0x08: L2 Hit Snoop Hit Modified */
280
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, MISS), /* 0x09: Prefetch Promotion */
281
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, MISS), /* 0x0a: Cross Core Prefetch Promotion */
282
0, /* 0x0b: Reserved */
283
0, /* 0x0c: Reserved */
284
0, /* 0x0d: Reserved */
285
0, /* 0x0e: Reserved */
286
OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
287
};
288
289
/* Version for Arctic Wolf and later */
290
291
/* L2 hit */
292
#define ARW_PEBS_DATA_SOURCE_MAX 16
293
static u64 arw_pebs_l2_hit_data_source[ARW_PEBS_DATA_SOURCE_MAX] = {
294
P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA), /* 0x00: non-cache access */
295
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 hit */
296
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: WCB Hit */
297
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 Hit Clean */
298
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT), /* 0x04: L2 Hit Snoop HIT */
299
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM), /* 0x05: L2 Hit Snoop Hit Modified */
300
OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x06: uncached */
301
0, /* 0x07: Reserved */
302
0, /* 0x08: Reserved */
303
0, /* 0x09: Reserved */
304
0, /* 0x0a: Reserved */
305
0, /* 0x0b: Reserved */
306
0, /* 0x0c: Reserved */
307
0, /* 0x0d: Reserved */
308
0, /* 0x0e: Reserved */
309
0, /* 0x0f: Reserved */
310
};
311
312
/* L2 miss */
313
#define OMR_DATA_SOURCE_MAX 16
314
static u64 omr_data_source[OMR_DATA_SOURCE_MAX] = {
315
P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA), /* 0x00: invalid */
316
0, /* 0x01: Reserved */
317
OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, L_SHARE), /* 0x02: local CA shared cache */
318
OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, L_NON_SHARE),/* 0x03: local CA non-shared cache */
319
OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_IO), /* 0x04: other CA IO agent */
320
OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_SHARE), /* 0x05: other CA shared cache */
321
OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_NON_SHARE),/* 0x06: other CA non-shared cache */
322
OP_LH | LEVEL(RAM) | P(REGION, MMIO), /* 0x07: MMIO */
323
OP_LH | LEVEL(RAM) | P(REGION, MEM0), /* 0x08: Memory region 0 */
324
OP_LH | LEVEL(RAM) | P(REGION, MEM1), /* 0x09: Memory region 1 */
325
OP_LH | LEVEL(RAM) | P(REGION, MEM2), /* 0x0a: Memory region 2 */
326
OP_LH | LEVEL(RAM) | P(REGION, MEM3), /* 0x0b: Memory region 3 */
327
OP_LH | LEVEL(RAM) | P(REGION, MEM4), /* 0x0c: Memory region 4 */
328
OP_LH | LEVEL(RAM) | P(REGION, MEM5), /* 0x0d: Memory region 5 */
329
OP_LH | LEVEL(RAM) | P(REGION, MEM6), /* 0x0e: Memory region 6 */
330
OP_LH | LEVEL(RAM) | P(REGION, MEM7), /* 0x0f: Memory region 7 */
331
};
332
333
static u64 parse_omr_data_source(u8 dse)
334
{
335
union omr_encoding omr;
336
u64 val = 0;
337
338
omr.omr_full = dse;
339
val = omr_data_source[omr.omr_source];
340
if (omr.omr_source > 0x1 && omr.omr_source < 0x7)
341
val |= omr.omr_remote ? P(LVL, REM_CCE1) : 0;
342
else if (omr.omr_source > 0x7)
343
val |= omr.omr_remote ? P(LVL, REM_RAM1) : P(LVL, LOC_RAM);
344
345
if (omr.omr_remote)
346
val |= REM;
347
348
val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT);
349
350
if (omr.omr_source == 0x2) {
351
u8 snoop = omr.omr_snoop | omr.omr_promoted;
352
353
if (snoop == 0x0)
354
val |= P(SNOOP, NA);
355
else if (snoop == 0x1)
356
val |= P(SNOOP, MISS);
357
else if (snoop == 0x2)
358
val |= P(SNOOP, HIT);
359
else if (snoop == 0x3)
360
val |= P(SNOOP, NONE);
361
} else if (omr.omr_source > 0x2 && omr.omr_source < 0x7) {
362
val |= omr.omr_snoop ? P(SNOOPX, FWD) : 0;
363
}
364
365
return val;
366
}
367
368
static u64 precise_store_data(u64 status)
369
{
370
union intel_x86_pebs_dse dse;
371
u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
372
373
dse.val = status;
374
375
/*
376
* bit 4: TLB access
377
* 1 = stored missed 2nd level TLB
378
*
379
* so it either hit the walker or the OS
380
* otherwise hit 2nd level TLB
381
*/
382
if (dse.st_stlb_miss)
383
val |= P(TLB, MISS);
384
else
385
val |= P(TLB, HIT);
386
387
/*
388
* bit 0: hit L1 data cache
389
* if not set, then all we know is that
390
* it missed L1D
391
*/
392
if (dse.st_l1d_hit)
393
val |= P(LVL, HIT);
394
else
395
val |= P(LVL, MISS);
396
397
/*
398
* bit 5: Locked prefix
399
*/
400
if (dse.st_locked)
401
val |= P(LOCK, LOCKED);
402
403
return val;
404
}
405
406
static u64 precise_datala_hsw(struct perf_event *event, u64 status)
407
{
408
union perf_mem_data_src dse;
409
410
dse.val = PERF_MEM_NA;
411
412
if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
413
dse.mem_op = PERF_MEM_OP_STORE;
414
else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
415
dse.mem_op = PERF_MEM_OP_LOAD;
416
417
/*
418
* L1 info only valid for following events:
419
*
420
* MEM_UOPS_RETIRED.STLB_MISS_STORES
421
* MEM_UOPS_RETIRED.LOCK_STORES
422
* MEM_UOPS_RETIRED.SPLIT_STORES
423
* MEM_UOPS_RETIRED.ALL_STORES
424
*/
425
if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
426
if (status & 1)
427
dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
428
else
429
dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
430
}
431
return dse.val;
432
}
433
434
static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
435
{
436
/*
437
* TLB access
438
* 0 = did not miss 2nd level TLB
439
* 1 = missed 2nd level TLB
440
*/
441
if (tlb)
442
*val |= P(TLB, MISS) | P(TLB, L2);
443
else
444
*val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
445
446
/* locked prefix */
447
if (lock)
448
*val |= P(LOCK, LOCKED);
449
}
450
451
/* Retrieve the latency data for e-core of ADL */
452
static u64 __grt_latency_data(struct perf_event *event, u64 status,
453
u8 dse, bool tlb, bool lock, bool blk)
454
{
455
u64 val;
456
457
WARN_ON_ONCE(is_hybrid() &&
458
hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
459
460
dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
461
val = hybrid_var(event->pmu, pebs_data_source)[dse];
462
463
pebs_set_tlb_lock(&val, tlb, lock);
464
465
if (blk)
466
val |= P(BLK, DATA);
467
else
468
val |= P(BLK, NA);
469
470
return val;
471
}
472
473
u64 grt_latency_data(struct perf_event *event, u64 status)
474
{
475
union intel_x86_pebs_dse dse;
476
477
dse.val = status;
478
479
return __grt_latency_data(event, status, dse.ld_dse,
480
dse.ld_locked, dse.ld_stlb_miss,
481
dse.ld_data_blk);
482
}
483
484
/* Retrieve the latency data for e-core of MTL */
485
u64 cmt_latency_data(struct perf_event *event, u64 status)
486
{
487
union intel_x86_pebs_dse dse;
488
489
dse.val = status;
490
491
return __grt_latency_data(event, status, dse.mtl_dse,
492
dse.mtl_stlb_miss, dse.mtl_locked,
493
dse.mtl_fwd_blk);
494
}
495
496
static u64 arw_latency_data(struct perf_event *event, u64 status)
497
{
498
union intel_x86_pebs_dse dse;
499
union perf_mem_data_src src;
500
u64 val;
501
502
dse.val = status;
503
504
if (!dse.arw_l2_miss)
505
val = arw_pebs_l2_hit_data_source[dse.arw_dse & 0xf];
506
else
507
val = parse_omr_data_source(dse.arw_dse);
508
509
if (!val)
510
val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
511
512
if (dse.arw_stlb_miss)
513
val |= P(TLB, MISS) | P(TLB, L2);
514
else
515
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
516
517
if (dse.arw_locked)
518
val |= P(LOCK, LOCKED);
519
520
if (dse.arw_data_blk)
521
val |= P(BLK, DATA);
522
if (dse.arw_addr_blk)
523
val |= P(BLK, ADDR);
524
if (!dse.arw_data_blk && !dse.arw_addr_blk)
525
val |= P(BLK, NA);
526
527
src.val = val;
528
if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
529
src.mem_op = P(OP, STORE);
530
531
return src.val;
532
}
533
534
static u64 lnc_latency_data(struct perf_event *event, u64 status)
535
{
536
union intel_x86_pebs_dse dse;
537
union perf_mem_data_src src;
538
u64 val;
539
540
dse.val = status;
541
542
/* LNC core latency data */
543
val = hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_SOURCE_MASK];
544
if (!val)
545
val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
546
547
if (dse.lnc_stlb_miss)
548
val |= P(TLB, MISS) | P(TLB, L2);
549
else
550
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
551
552
if (dse.lnc_locked)
553
val |= P(LOCK, LOCKED);
554
555
if (dse.lnc_data_blk)
556
val |= P(BLK, DATA);
557
if (dse.lnc_addr_blk)
558
val |= P(BLK, ADDR);
559
if (!dse.lnc_data_blk && !dse.lnc_addr_blk)
560
val |= P(BLK, NA);
561
562
src.val = val;
563
if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
564
src.mem_op = P(OP, STORE);
565
566
return src.val;
567
}
568
569
u64 lnl_latency_data(struct perf_event *event, u64 status)
570
{
571
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
572
573
if (pmu->pmu_type == hybrid_small)
574
return cmt_latency_data(event, status);
575
576
return lnc_latency_data(event, status);
577
}
578
579
u64 arl_h_latency_data(struct perf_event *event, u64 status)
580
{
581
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
582
583
if (pmu->pmu_type == hybrid_tiny)
584
return cmt_latency_data(event, status);
585
586
return lnl_latency_data(event, status);
587
}
588
589
u64 pnc_latency_data(struct perf_event *event, u64 status)
590
{
591
union intel_x86_pebs_dse dse;
592
union perf_mem_data_src src;
593
u64 val;
594
595
dse.val = status;
596
597
if (!dse.pnc_l2_miss)
598
val = pnc_pebs_l2_hit_data_source[dse.pnc_dse & 0xf];
599
else
600
val = parse_omr_data_source(dse.pnc_dse);
601
602
if (!val)
603
val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
604
605
if (dse.pnc_stlb_miss)
606
val |= P(TLB, MISS) | P(TLB, L2);
607
else
608
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
609
610
if (dse.pnc_locked)
611
val |= P(LOCK, LOCKED);
612
613
if (dse.pnc_data_blk)
614
val |= P(BLK, DATA);
615
if (dse.pnc_addr_blk)
616
val |= P(BLK, ADDR);
617
if (!dse.pnc_data_blk && !dse.pnc_addr_blk)
618
val |= P(BLK, NA);
619
620
src.val = val;
621
if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
622
src.mem_op = P(OP, STORE);
623
624
return src.val;
625
}
626
627
u64 nvl_latency_data(struct perf_event *event, u64 status)
628
{
629
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
630
631
if (pmu->pmu_type == hybrid_small)
632
return arw_latency_data(event, status);
633
634
return pnc_latency_data(event, status);
635
}
636
637
static u64 load_latency_data(struct perf_event *event, u64 status)
638
{
639
union intel_x86_pebs_dse dse;
640
u64 val;
641
642
dse.val = status;
643
644
/*
645
* use the mapping table for bit 0-3
646
*/
647
val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
648
649
/*
650
* Nehalem models do not support TLB, Lock infos
651
*/
652
if (x86_pmu.pebs_no_tlb) {
653
val |= P(TLB, NA) | P(LOCK, NA);
654
return val;
655
}
656
657
pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked);
658
659
/*
660
* Ice Lake and earlier models do not support block infos.
661
*/
662
if (!x86_pmu.pebs_block) {
663
val |= P(BLK, NA);
664
return val;
665
}
666
/*
667
* bit 6: load was blocked since its data could not be forwarded
668
* from a preceding store
669
*/
670
if (dse.ld_data_blk)
671
val |= P(BLK, DATA);
672
673
/*
674
* bit 7: load was blocked due to potential address conflict with
675
* a preceding store
676
*/
677
if (dse.ld_addr_blk)
678
val |= P(BLK, ADDR);
679
680
if (!dse.ld_data_blk && !dse.ld_addr_blk)
681
val |= P(BLK, NA);
682
683
return val;
684
}
685
686
static u64 store_latency_data(struct perf_event *event, u64 status)
687
{
688
union intel_x86_pebs_dse dse;
689
union perf_mem_data_src src;
690
u64 val;
691
692
dse.val = status;
693
694
/*
695
* use the mapping table for bit 0-3
696
*/
697
val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse];
698
699
pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked);
700
701
val |= P(BLK, NA);
702
703
/*
704
* the pebs_data_source table is only for loads
705
* so override the mem_op to say STORE instead
706
*/
707
src.val = val;
708
src.mem_op = P(OP,STORE);
709
710
return src.val;
711
}
712
713
struct pebs_record_core {
714
u64 flags, ip;
715
u64 ax, bx, cx, dx;
716
u64 si, di, bp, sp;
717
u64 r8, r9, r10, r11;
718
u64 r12, r13, r14, r15;
719
};
720
721
struct pebs_record_nhm {
722
u64 flags, ip;
723
u64 ax, bx, cx, dx;
724
u64 si, di, bp, sp;
725
u64 r8, r9, r10, r11;
726
u64 r12, r13, r14, r15;
727
u64 status, dla, dse, lat;
728
};
729
730
/*
731
* Same as pebs_record_nhm, with two additional fields.
732
*/
733
struct pebs_record_hsw {
734
u64 flags, ip;
735
u64 ax, bx, cx, dx;
736
u64 si, di, bp, sp;
737
u64 r8, r9, r10, r11;
738
u64 r12, r13, r14, r15;
739
u64 status, dla, dse, lat;
740
u64 real_ip, tsx_tuning;
741
};
742
743
union hsw_tsx_tuning {
744
struct {
745
u32 cycles_last_block : 32,
746
hle_abort : 1,
747
rtm_abort : 1,
748
instruction_abort : 1,
749
non_instruction_abort : 1,
750
retry : 1,
751
data_conflict : 1,
752
capacity_writes : 1,
753
capacity_reads : 1;
754
};
755
u64 value;
756
};
757
758
#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
759
760
/* Same as HSW, plus TSC */
761
762
struct pebs_record_skl {
763
u64 flags, ip;
764
u64 ax, bx, cx, dx;
765
u64 si, di, bp, sp;
766
u64 r8, r9, r10, r11;
767
u64 r12, r13, r14, r15;
768
u64 status, dla, dse, lat;
769
u64 real_ip, tsx_tuning;
770
u64 tsc;
771
};
772
773
void init_debug_store_on_cpu(int cpu)
774
{
775
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
776
777
if (!ds)
778
return;
779
780
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
781
(u32)((u64)(unsigned long)ds),
782
(u32)((u64)(unsigned long)ds >> 32));
783
}
784
785
void fini_debug_store_on_cpu(int cpu)
786
{
787
if (!per_cpu(cpu_hw_events, cpu).ds)
788
return;
789
790
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
791
}
792
793
static DEFINE_PER_CPU(void *, insn_buffer);
794
795
static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
796
{
797
unsigned long start = (unsigned long)cea;
798
phys_addr_t pa;
799
size_t msz = 0;
800
801
pa = virt_to_phys(addr);
802
803
preempt_disable();
804
for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
805
cea_set_pte(cea, pa, prot);
806
807
/*
808
* This is a cross-CPU update of the cpu_entry_area, we must shoot down
809
* all TLB entries for it.
810
*/
811
flush_tlb_kernel_range(start, start + size);
812
preempt_enable();
813
}
814
815
static void ds_clear_cea(void *cea, size_t size)
816
{
817
unsigned long start = (unsigned long)cea;
818
size_t msz = 0;
819
820
preempt_disable();
821
for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
822
cea_set_pte(cea, 0, PAGE_NONE);
823
824
flush_tlb_kernel_range(start, start + size);
825
preempt_enable();
826
}
827
828
static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
829
{
830
unsigned int order = get_order(size);
831
int node = cpu_to_node(cpu);
832
struct page *page;
833
834
page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
835
return page ? page_address(page) : NULL;
836
}
837
838
static void dsfree_pages(const void *buffer, size_t size)
839
{
840
if (buffer)
841
free_pages((unsigned long)buffer, get_order(size));
842
}
843
844
static int alloc_pebs_buffer(int cpu)
845
{
846
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
847
struct debug_store *ds = hwev->ds;
848
size_t bsiz = x86_pmu.pebs_buffer_size;
849
int max, node = cpu_to_node(cpu);
850
void *buffer, *insn_buff, *cea;
851
852
if (!intel_pmu_has_pebs())
853
return 0;
854
855
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
856
if (unlikely(!buffer))
857
return -ENOMEM;
858
859
if (x86_pmu.arch_pebs) {
860
hwev->pebs_vaddr = buffer;
861
return 0;
862
}
863
864
/*
865
* HSW+ already provides us the eventing ip; no need to allocate this
866
* buffer then.
867
*/
868
if (x86_pmu.intel_cap.pebs_format < 2) {
869
insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
870
if (!insn_buff) {
871
dsfree_pages(buffer, bsiz);
872
return -ENOMEM;
873
}
874
per_cpu(insn_buffer, cpu) = insn_buff;
875
}
876
hwev->pebs_vaddr = buffer;
877
/* Update the cpu entry area mapping */
878
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
879
ds->pebs_buffer_base = (unsigned long) cea;
880
ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
881
ds->pebs_index = ds->pebs_buffer_base;
882
max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
883
ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
884
return 0;
885
}
886
887
static void release_pebs_buffer(int cpu)
888
{
889
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
890
void *cea;
891
892
if (!intel_pmu_has_pebs())
893
return;
894
895
if (x86_pmu.ds_pebs) {
896
kfree(per_cpu(insn_buffer, cpu));
897
per_cpu(insn_buffer, cpu) = NULL;
898
899
/* Clear the fixmap */
900
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
901
ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
902
}
903
904
dsfree_pages(hwev->pebs_vaddr, x86_pmu.pebs_buffer_size);
905
hwev->pebs_vaddr = NULL;
906
}
907
908
static int alloc_bts_buffer(int cpu)
909
{
910
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
911
struct debug_store *ds = hwev->ds;
912
void *buffer, *cea;
913
int max;
914
915
if (!x86_pmu.bts)
916
return 0;
917
918
buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
919
if (unlikely(!buffer)) {
920
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
921
return -ENOMEM;
922
}
923
hwev->ds_bts_vaddr = buffer;
924
/* Update the fixmap */
925
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
926
ds->bts_buffer_base = (unsigned long) cea;
927
ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
928
ds->bts_index = ds->bts_buffer_base;
929
max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
930
ds->bts_absolute_maximum = ds->bts_buffer_base +
931
max * BTS_RECORD_SIZE;
932
ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
933
(max / 16) * BTS_RECORD_SIZE;
934
return 0;
935
}
936
937
static void release_bts_buffer(int cpu)
938
{
939
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
940
void *cea;
941
942
if (!x86_pmu.bts)
943
return;
944
945
/* Clear the fixmap */
946
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
947
ds_clear_cea(cea, BTS_BUFFER_SIZE);
948
dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
949
hwev->ds_bts_vaddr = NULL;
950
}
951
952
static int alloc_ds_buffer(int cpu)
953
{
954
struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
955
956
memset(ds, 0, sizeof(*ds));
957
per_cpu(cpu_hw_events, cpu).ds = ds;
958
return 0;
959
}
960
961
static void release_ds_buffer(int cpu)
962
{
963
per_cpu(cpu_hw_events, cpu).ds = NULL;
964
}
965
966
void release_ds_buffers(void)
967
{
968
int cpu;
969
970
if (!x86_pmu.bts && !x86_pmu.ds_pebs)
971
return;
972
973
for_each_possible_cpu(cpu)
974
release_ds_buffer(cpu);
975
976
for_each_possible_cpu(cpu) {
977
/*
978
* Again, ignore errors from offline CPUs, they will no longer
979
* observe cpu_hw_events.ds and not program the DS_AREA when
980
* they come up.
981
*/
982
fini_debug_store_on_cpu(cpu);
983
}
984
985
for_each_possible_cpu(cpu) {
986
if (x86_pmu.ds_pebs)
987
release_pebs_buffer(cpu);
988
release_bts_buffer(cpu);
989
}
990
}
991
992
void reserve_ds_buffers(void)
993
{
994
int bts_err = 0, pebs_err = 0;
995
int cpu;
996
997
x86_pmu.bts_active = 0;
998
999
if (x86_pmu.ds_pebs)
1000
x86_pmu.pebs_active = 0;
1001
1002
if (!x86_pmu.bts && !x86_pmu.ds_pebs)
1003
return;
1004
1005
if (!x86_pmu.bts)
1006
bts_err = 1;
1007
1008
if (!x86_pmu.ds_pebs)
1009
pebs_err = 1;
1010
1011
for_each_possible_cpu(cpu) {
1012
if (alloc_ds_buffer(cpu)) {
1013
bts_err = 1;
1014
pebs_err = 1;
1015
}
1016
1017
if (!bts_err && alloc_bts_buffer(cpu))
1018
bts_err = 1;
1019
1020
if (x86_pmu.ds_pebs && !pebs_err &&
1021
alloc_pebs_buffer(cpu))
1022
pebs_err = 1;
1023
1024
if (bts_err && pebs_err)
1025
break;
1026
}
1027
1028
if (bts_err) {
1029
for_each_possible_cpu(cpu)
1030
release_bts_buffer(cpu);
1031
}
1032
1033
if (x86_pmu.ds_pebs && pebs_err) {
1034
for_each_possible_cpu(cpu)
1035
release_pebs_buffer(cpu);
1036
}
1037
1038
if (bts_err && pebs_err) {
1039
for_each_possible_cpu(cpu)
1040
release_ds_buffer(cpu);
1041
} else {
1042
if (x86_pmu.bts && !bts_err)
1043
x86_pmu.bts_active = 1;
1044
1045
if (x86_pmu.ds_pebs && !pebs_err)
1046
x86_pmu.pebs_active = 1;
1047
1048
for_each_possible_cpu(cpu) {
1049
/*
1050
* Ignores wrmsr_on_cpu() errors for offline CPUs they
1051
* will get this call through intel_pmu_cpu_starting().
1052
*/
1053
init_debug_store_on_cpu(cpu);
1054
}
1055
}
1056
}
1057
1058
inline int alloc_arch_pebs_buf_on_cpu(int cpu)
1059
{
1060
if (!x86_pmu.arch_pebs)
1061
return 0;
1062
1063
return alloc_pebs_buffer(cpu);
1064
}
1065
1066
inline void release_arch_pebs_buf_on_cpu(int cpu)
1067
{
1068
if (!x86_pmu.arch_pebs)
1069
return;
1070
1071
release_pebs_buffer(cpu);
1072
}
1073
1074
void init_arch_pebs_on_cpu(int cpu)
1075
{
1076
struct cpu_hw_events *cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
1077
u64 arch_pebs_base;
1078
1079
if (!x86_pmu.arch_pebs)
1080
return;
1081
1082
if (!cpuc->pebs_vaddr) {
1083
WARN(1, "Fail to allocate PEBS buffer on CPU %d\n", cpu);
1084
x86_pmu.pebs_active = 0;
1085
return;
1086
}
1087
1088
/*
1089
* 4KB-aligned pointer of the output buffer
1090
* (__alloc_pages_node() return page aligned address)
1091
* Buffer Size = 4KB * 2^SIZE
1092
* contiguous physical buffer (__alloc_pages_node() with order)
1093
*/
1094
arch_pebs_base = virt_to_phys(cpuc->pebs_vaddr) | PEBS_BUFFER_SHIFT;
1095
wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, (u32)arch_pebs_base,
1096
(u32)(arch_pebs_base >> 32));
1097
x86_pmu.pebs_active = 1;
1098
}
1099
1100
inline void fini_arch_pebs_on_cpu(int cpu)
1101
{
1102
if (!x86_pmu.arch_pebs)
1103
return;
1104
1105
wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, 0, 0);
1106
}
1107
1108
/*
1109
* BTS
1110
*/
1111
1112
struct event_constraint bts_constraint =
1113
EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
1114
1115
void intel_pmu_enable_bts(u64 config)
1116
{
1117
unsigned long debugctlmsr;
1118
1119
debugctlmsr = get_debugctlmsr();
1120
1121
debugctlmsr |= DEBUGCTLMSR_TR;
1122
debugctlmsr |= DEBUGCTLMSR_BTS;
1123
if (config & ARCH_PERFMON_EVENTSEL_INT)
1124
debugctlmsr |= DEBUGCTLMSR_BTINT;
1125
1126
if (!(config & ARCH_PERFMON_EVENTSEL_OS))
1127
debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
1128
1129
if (!(config & ARCH_PERFMON_EVENTSEL_USR))
1130
debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
1131
1132
update_debugctlmsr(debugctlmsr);
1133
}
1134
1135
void intel_pmu_disable_bts(void)
1136
{
1137
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1138
unsigned long debugctlmsr;
1139
1140
if (!cpuc->ds)
1141
return;
1142
1143
debugctlmsr = get_debugctlmsr();
1144
1145
debugctlmsr &=
1146
~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
1147
DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
1148
1149
update_debugctlmsr(debugctlmsr);
1150
}
1151
1152
int intel_pmu_drain_bts_buffer(void)
1153
{
1154
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1155
struct debug_store *ds = cpuc->ds;
1156
struct bts_record {
1157
u64 from;
1158
u64 to;
1159
u64 flags;
1160
};
1161
struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
1162
struct bts_record *at, *base, *top;
1163
struct perf_output_handle handle;
1164
struct perf_event_header header;
1165
struct perf_sample_data data;
1166
unsigned long skip = 0;
1167
struct pt_regs regs;
1168
1169
if (!event)
1170
return 0;
1171
1172
if (!x86_pmu.bts_active)
1173
return 0;
1174
1175
base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1176
top = (struct bts_record *)(unsigned long)ds->bts_index;
1177
1178
if (top <= base)
1179
return 0;
1180
1181
memset(&regs, 0, sizeof(regs));
1182
1183
ds->bts_index = ds->bts_buffer_base;
1184
1185
perf_sample_data_init(&data, 0, event->hw.last_period);
1186
1187
/*
1188
* BTS leaks kernel addresses in branches across the cpl boundary,
1189
* such as traps or system calls, so unless the user is asking for
1190
* kernel tracing (and right now it's not possible), we'd need to
1191
* filter them out. But first we need to count how many of those we
1192
* have in the current batch. This is an extra O(n) pass, however,
1193
* it's much faster than the other one especially considering that
1194
* n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
1195
* alloc_bts_buffer()).
1196
*/
1197
for (at = base; at < top; at++) {
1198
/*
1199
* Note that right now *this* BTS code only works if
1200
* attr::exclude_kernel is set, but let's keep this extra
1201
* check here in case that changes.
1202
*/
1203
if (event->attr.exclude_kernel &&
1204
(kernel_ip(at->from) || kernel_ip(at->to)))
1205
skip++;
1206
}
1207
1208
/*
1209
* Prepare a generic sample, i.e. fill in the invariant fields.
1210
* We will overwrite the from and to address before we output
1211
* the sample.
1212
*/
1213
rcu_read_lock();
1214
perf_prepare_sample(&data, event, &regs);
1215
perf_prepare_header(&header, &data, event, &regs);
1216
1217
if (perf_output_begin(&handle, &data, event,
1218
header.size * (top - base - skip)))
1219
goto unlock;
1220
1221
for (at = base; at < top; at++) {
1222
/* Filter out any records that contain kernel addresses. */
1223
if (event->attr.exclude_kernel &&
1224
(kernel_ip(at->from) || kernel_ip(at->to)))
1225
continue;
1226
1227
data.ip = at->from;
1228
data.addr = at->to;
1229
1230
perf_output_sample(&handle, &header, &data, event);
1231
}
1232
1233
perf_output_end(&handle);
1234
1235
/* There's new data available. */
1236
event->hw.interrupts++;
1237
event->pending_kill = POLL_IN;
1238
unlock:
1239
rcu_read_unlock();
1240
return 1;
1241
}
1242
1243
void intel_pmu_drain_pebs_buffer(void)
1244
{
1245
struct perf_sample_data data;
1246
1247
static_call(x86_pmu_drain_pebs)(NULL, &data);
1248
}
1249
1250
/*
1251
* PEBS
1252
*/
1253
struct event_constraint intel_core2_pebs_event_constraints[] = {
1254
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
1255
INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
1256
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
1257
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
1258
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
1259
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1260
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
1261
EVENT_CONSTRAINT_END
1262
};
1263
1264
struct event_constraint intel_atom_pebs_event_constraints[] = {
1265
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
1266
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
1267
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
1268
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1269
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
1270
/* Allow all events as PEBS with no flags */
1271
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
1272
EVENT_CONSTRAINT_END
1273
};
1274
1275
struct event_constraint intel_slm_pebs_event_constraints[] = {
1276
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1277
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
1278
/* Allow all events as PEBS with no flags */
1279
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
1280
EVENT_CONSTRAINT_END
1281
};
1282
1283
struct event_constraint intel_glm_pebs_event_constraints[] = {
1284
/* Allow all events as PEBS with no flags */
1285
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
1286
EVENT_CONSTRAINT_END
1287
};
1288
1289
struct event_constraint intel_grt_pebs_event_constraints[] = {
1290
/* Allow all events as PEBS with no flags */
1291
INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3),
1292
INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
1293
EVENT_CONSTRAINT_END
1294
};
1295
1296
struct event_constraint intel_arw_pebs_event_constraints[] = {
1297
/* Allow all events as PEBS with no flags */
1298
INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xff),
1299
INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xff),
1300
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01d4, 0x1),
1301
INTEL_FLAGS_UEVENT_CONSTRAINT(0x02d4, 0x2),
1302
INTEL_FLAGS_UEVENT_CONSTRAINT(0x04d4, 0x4),
1303
INTEL_FLAGS_UEVENT_CONSTRAINT(0x08d4, 0x8),
1304
EVENT_CONSTRAINT_END
1305
};
1306
1307
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
1308
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
1309
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
1310
INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
1311
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
1312
INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
1313
INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
1314
INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
1315
INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
1316
INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
1317
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
1318
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
1319
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1320
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
1321
EVENT_CONSTRAINT_END
1322
};
1323
1324
struct event_constraint intel_westmere_pebs_event_constraints[] = {
1325
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
1326
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
1327
INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
1328
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
1329
INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
1330
INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
1331
INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
1332
INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
1333
INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
1334
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
1335
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
1336
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1337
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
1338
EVENT_CONSTRAINT_END
1339
};
1340
1341
struct event_constraint intel_snb_pebs_event_constraints[] = {
1342
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
1343
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
1344
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
1345
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
1346
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
1347
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
1348
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
1349
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
1350
INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
1351
/* Allow all events as PEBS with no flags */
1352
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1353
EVENT_CONSTRAINT_END
1354
};
1355
1356
struct event_constraint intel_ivb_pebs_event_constraints[] = {
1357
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
1358
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
1359
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
1360
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
1361
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
1362
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1363
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1364
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
1365
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
1366
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
1367
INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
1368
/* Allow all events as PEBS with no flags */
1369
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1370
EVENT_CONSTRAINT_END
1371
};
1372
1373
struct event_constraint intel_hsw_pebs_event_constraints[] = {
1374
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
1375
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
1376
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
1377
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
1378
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1379
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1380
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
1381
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
1382
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
1383
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
1384
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
1385
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
1386
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
1387
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
1388
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
1389
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
1390
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
1391
/* Allow all events as PEBS with no flags */
1392
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1393
EVENT_CONSTRAINT_END
1394
};
1395
1396
struct event_constraint intel_bdw_pebs_event_constraints[] = {
1397
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
1398
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
1399
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
1400
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
1401
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1402
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1403
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
1404
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
1405
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
1406
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
1407
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
1408
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
1409
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
1410
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
1411
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
1412
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
1413
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
1414
/* Allow all events as PEBS with no flags */
1415
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1416
EVENT_CONSTRAINT_END
1417
};
1418
1419
1420
struct event_constraint intel_skl_pebs_event_constraints[] = {
1421
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
1422
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1423
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1424
/* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
1425
INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
1426
INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
1427
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
1428
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
1429
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
1430
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
1431
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
1432
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
1433
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
1434
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
1435
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
1436
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
1437
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */
1438
/* Allow all events as PEBS with no flags */
1439
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1440
EVENT_CONSTRAINT_END
1441
};
1442
1443
struct event_constraint intel_icl_pebs_event_constraints[] = {
1444
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL), /* old INST_RETIRED.PREC_DIST */
1445
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
1446
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
1447
1448
INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
1449
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
1450
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
1451
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
1452
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
1453
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
1454
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
1455
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
1456
1457
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
1458
1459
INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
1460
1461
/*
1462
* Everything else is handled by PMU_FL_PEBS_ALL, because we
1463
* need the full constraints from the main table.
1464
*/
1465
1466
EVENT_CONSTRAINT_END
1467
};
1468
1469
struct event_constraint intel_glc_pebs_event_constraints[] = {
1470
INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
1471
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
1472
1473
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
1474
INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
1475
INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
1476
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
1477
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
1478
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
1479
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
1480
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
1481
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
1482
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
1483
1484
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
1485
1486
INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
1487
1488
/*
1489
* Everything else is handled by PMU_FL_PEBS_ALL, because we
1490
* need the full constraints from the main table.
1491
*/
1492
1493
EVENT_CONSTRAINT_END
1494
};
1495
1496
struct event_constraint intel_lnc_pebs_event_constraints[] = {
1497
INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
1498
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
1499
1500
INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3fc),
1501
INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
1502
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
1503
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
1504
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
1505
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
1506
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
1507
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
1508
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
1509
1510
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
1511
1512
INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
1513
1514
/*
1515
* Everything else is handled by PMU_FL_PEBS_ALL, because we
1516
* need the full constraints from the main table.
1517
*/
1518
1519
EVENT_CONSTRAINT_END
1520
};
1521
1522
struct event_constraint intel_pnc_pebs_event_constraints[] = {
1523
INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
1524
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
1525
1526
INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0xfc),
1527
INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
1528
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
1529
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
1530
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
1531
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
1532
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
1533
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
1534
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
1535
1536
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
1537
1538
INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
1539
INTEL_FLAGS_EVENT_CONSTRAINT(0xd6, 0xf),
1540
1541
/*
1542
* Everything else is handled by PMU_FL_PEBS_ALL, because we
1543
* need the full constraints from the main table.
1544
*/
1545
1546
EVENT_CONSTRAINT_END
1547
};
1548
1549
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
1550
{
1551
struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
1552
struct event_constraint *c;
1553
1554
if (!event->attr.precise_ip)
1555
return NULL;
1556
1557
if (pebs_constraints) {
1558
for_each_event_constraint(c, pebs_constraints) {
1559
if (constraint_match(c, event->hw.config)) {
1560
event->hw.flags |= c->flags;
1561
return c;
1562
}
1563
}
1564
}
1565
1566
/*
1567
* Extended PEBS support
1568
* Makes the PEBS code search the normal constraints.
1569
*/
1570
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1571
return NULL;
1572
1573
return &emptyconstraint;
1574
}
1575
1576
/*
1577
* We need the sched_task callback even for per-cpu events when we use
1578
* the large interrupt threshold, such that we can provide PID and TID
1579
* to PEBS samples.
1580
*/
1581
static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
1582
{
1583
if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
1584
return false;
1585
1586
return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
1587
}
1588
1589
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
1590
{
1591
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1592
1593
if (!sched_in && pebs_needs_sched_cb(cpuc))
1594
intel_pmu_drain_pebs_buffer();
1595
}
1596
1597
static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
1598
{
1599
struct debug_store *ds = cpuc->ds;
1600
int max_pebs_events = intel_pmu_max_num_pebs(cpuc->pmu);
1601
u64 threshold;
1602
int reserved;
1603
1604
if (cpuc->n_pebs_via_pt)
1605
return;
1606
1607
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1608
reserved = max_pebs_events + x86_pmu_max_num_counters_fixed(cpuc->pmu);
1609
else
1610
reserved = max_pebs_events;
1611
1612
if (cpuc->n_pebs == cpuc->n_large_pebs) {
1613
threshold = ds->pebs_absolute_maximum -
1614
reserved * cpuc->pebs_record_size;
1615
} else {
1616
threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
1617
}
1618
1619
ds->pebs_interrupt_threshold = threshold;
1620
}
1621
1622
#define PEBS_DATACFG_CNTRS(x) \
1623
((x >> PEBS_DATACFG_CNTR_SHIFT) & PEBS_DATACFG_CNTR_MASK)
1624
1625
#define PEBS_DATACFG_CNTR_BIT(x) \
1626
(((1ULL << x) & PEBS_DATACFG_CNTR_MASK) << PEBS_DATACFG_CNTR_SHIFT)
1627
1628
#define PEBS_DATACFG_FIX(x) \
1629
((x >> PEBS_DATACFG_FIX_SHIFT) & PEBS_DATACFG_FIX_MASK)
1630
1631
#define PEBS_DATACFG_FIX_BIT(x) \
1632
(((1ULL << (x)) & PEBS_DATACFG_FIX_MASK) \
1633
<< PEBS_DATACFG_FIX_SHIFT)
1634
1635
static void adaptive_pebs_record_size_update(void)
1636
{
1637
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1638
u64 pebs_data_cfg = cpuc->pebs_data_cfg;
1639
int sz = sizeof(struct pebs_basic);
1640
1641
if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
1642
sz += sizeof(struct pebs_meminfo);
1643
if (pebs_data_cfg & PEBS_DATACFG_GP)
1644
sz += sizeof(struct pebs_gprs);
1645
if (pebs_data_cfg & PEBS_DATACFG_XMMS)
1646
sz += sizeof(struct pebs_xmm);
1647
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
1648
sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1649
if (pebs_data_cfg & (PEBS_DATACFG_METRICS | PEBS_DATACFG_CNTR)) {
1650
sz += sizeof(struct pebs_cntr_header);
1651
1652
/* Metrics base and Metrics Data */
1653
if (pebs_data_cfg & PEBS_DATACFG_METRICS)
1654
sz += 2 * sizeof(u64);
1655
1656
if (pebs_data_cfg & PEBS_DATACFG_CNTR) {
1657
sz += (hweight64(PEBS_DATACFG_CNTRS(pebs_data_cfg)) +
1658
hweight64(PEBS_DATACFG_FIX(pebs_data_cfg))) *
1659
sizeof(u64);
1660
}
1661
}
1662
1663
cpuc->pebs_record_size = sz;
1664
}
1665
1666
static void __intel_pmu_pebs_update_cfg(struct perf_event *event,
1667
int idx, u64 *pebs_data_cfg)
1668
{
1669
if (is_metric_event(event)) {
1670
*pebs_data_cfg |= PEBS_DATACFG_METRICS;
1671
return;
1672
}
1673
1674
*pebs_data_cfg |= PEBS_DATACFG_CNTR;
1675
1676
if (idx >= INTEL_PMC_IDX_FIXED)
1677
*pebs_data_cfg |= PEBS_DATACFG_FIX_BIT(idx - INTEL_PMC_IDX_FIXED);
1678
else
1679
*pebs_data_cfg |= PEBS_DATACFG_CNTR_BIT(idx);
1680
}
1681
1682
1683
void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc)
1684
{
1685
struct perf_event *event;
1686
u64 pebs_data_cfg = 0;
1687
int i;
1688
1689
for (i = 0; i < cpuc->n_events; i++) {
1690
event = cpuc->event_list[i];
1691
if (!is_pebs_counter_event_group(event))
1692
continue;
1693
__intel_pmu_pebs_update_cfg(event, cpuc->assign[i], &pebs_data_cfg);
1694
}
1695
1696
if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
1697
cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
1698
}
1699
1700
#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \
1701
PERF_SAMPLE_PHYS_ADDR | \
1702
PERF_SAMPLE_WEIGHT_TYPE | \
1703
PERF_SAMPLE_TRANSACTION | \
1704
PERF_SAMPLE_DATA_PAGE_SIZE)
1705
1706
static u64 pebs_update_adaptive_cfg(struct perf_event *event)
1707
{
1708
struct perf_event_attr *attr = &event->attr;
1709
u64 sample_type = attr->sample_type;
1710
u64 pebs_data_cfg = 0;
1711
bool gprs, tsx_weight;
1712
1713
if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
1714
attr->precise_ip > 1)
1715
return pebs_data_cfg;
1716
1717
if (sample_type & PERF_PEBS_MEMINFO_TYPE)
1718
pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
1719
1720
/*
1721
* We need GPRs when:
1722
* + user requested them
1723
* + precise_ip < 2 for the non event IP
1724
* + For RTM TSX weight we need GPRs for the abort code.
1725
*/
1726
gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1727
(attr->sample_regs_intr & PEBS_GP_REGS)) ||
1728
((sample_type & PERF_SAMPLE_REGS_USER) &&
1729
(attr->sample_regs_user & PEBS_GP_REGS));
1730
1731
tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
1732
((attr->config & INTEL_ARCH_EVENT_MASK) ==
1733
x86_pmu.rtm_abort_event);
1734
1735
if (gprs || (attr->precise_ip < 2) || tsx_weight)
1736
pebs_data_cfg |= PEBS_DATACFG_GP;
1737
1738
if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1739
(attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
1740
pebs_data_cfg |= PEBS_DATACFG_XMMS;
1741
1742
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1743
/*
1744
* For now always log all LBRs. Could configure this
1745
* later.
1746
*/
1747
pebs_data_cfg |= PEBS_DATACFG_LBRS |
1748
((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
1749
}
1750
1751
return pebs_data_cfg;
1752
}
1753
1754
static void
1755
pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
1756
struct perf_event *event, bool add)
1757
{
1758
struct pmu *pmu = event->pmu;
1759
1760
/*
1761
* Make sure we get updated with the first PEBS event.
1762
* During removal, ->pebs_data_cfg is still valid for
1763
* the last PEBS event. Don't clear it.
1764
*/
1765
if ((cpuc->n_pebs == 1) && add)
1766
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
1767
1768
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
1769
if (!needed_cb)
1770
perf_sched_cb_inc(pmu);
1771
else
1772
perf_sched_cb_dec(pmu);
1773
1774
cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW;
1775
}
1776
1777
/*
1778
* The PEBS record doesn't shrink on pmu::del(). Doing so would require
1779
* iterating all remaining PEBS events to reconstruct the config.
1780
*/
1781
if (x86_pmu.intel_cap.pebs_baseline && add) {
1782
u64 pebs_data_cfg;
1783
1784
pebs_data_cfg = pebs_update_adaptive_cfg(event);
1785
/*
1786
* Be sure to update the thresholds when we change the record.
1787
*/
1788
if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
1789
cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
1790
}
1791
}
1792
1793
u64 intel_get_arch_pebs_data_config(struct perf_event *event)
1794
{
1795
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1796
u64 pebs_data_cfg = 0;
1797
u64 cntr_mask;
1798
1799
if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
1800
return 0;
1801
1802
pebs_data_cfg |= pebs_update_adaptive_cfg(event);
1803
1804
cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) |
1805
(PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) |
1806
PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS;
1807
pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask;
1808
1809
return pebs_data_cfg;
1810
}
1811
1812
void intel_pmu_pebs_add(struct perf_event *event)
1813
{
1814
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1815
struct hw_perf_event *hwc = &event->hw;
1816
bool needed_cb = pebs_needs_sched_cb(cpuc);
1817
1818
cpuc->n_pebs++;
1819
if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1820
cpuc->n_large_pebs++;
1821
if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1822
cpuc->n_pebs_via_pt++;
1823
1824
pebs_update_state(needed_cb, cpuc, event, true);
1825
}
1826
1827
static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
1828
{
1829
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1830
1831
if (!is_pebs_pt(event))
1832
return;
1833
1834
if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
1835
cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
1836
}
1837
1838
static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
1839
{
1840
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1841
struct hw_perf_event *hwc = &event->hw;
1842
struct debug_store *ds = cpuc->ds;
1843
u64 value = ds->pebs_event_reset[hwc->idx];
1844
u32 base = MSR_RELOAD_PMC0;
1845
unsigned int idx = hwc->idx;
1846
1847
if (!is_pebs_pt(event))
1848
return;
1849
1850
if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
1851
cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
1852
1853
cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
1854
1855
if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
1856
base = MSR_RELOAD_FIXED_CTR0;
1857
idx = hwc->idx - INTEL_PMC_IDX_FIXED;
1858
if (x86_pmu.intel_cap.pebs_format < 5)
1859
value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx];
1860
else
1861
value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
1862
}
1863
wrmsrq(base + idx, value);
1864
}
1865
1866
static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
1867
{
1868
if (cpuc->n_pebs == cpuc->n_large_pebs &&
1869
cpuc->n_pebs != cpuc->n_pebs_via_pt)
1870
intel_pmu_drain_pebs_buffer();
1871
}
1872
1873
static void __intel_pmu_pebs_enable(struct perf_event *event)
1874
{
1875
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1876
struct hw_perf_event *hwc = &event->hw;
1877
1878
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
1879
cpuc->pebs_enabled |= 1ULL << hwc->idx;
1880
}
1881
1882
void intel_pmu_pebs_enable(struct perf_event *event)
1883
{
1884
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1885
u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW;
1886
struct hw_perf_event *hwc = &event->hw;
1887
struct debug_store *ds = cpuc->ds;
1888
unsigned int idx = hwc->idx;
1889
1890
__intel_pmu_pebs_enable(event);
1891
1892
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
1893
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
1894
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1895
cpuc->pebs_enabled |= 1ULL << 63;
1896
1897
if (x86_pmu.intel_cap.pebs_baseline) {
1898
hwc->config |= ICL_EVENTSEL_ADAPTIVE;
1899
if (pebs_data_cfg != cpuc->active_pebs_data_cfg) {
1900
/*
1901
* drain_pebs() assumes uniform record size;
1902
* hence we need to drain when changing said
1903
* size.
1904
*/
1905
intel_pmu_drain_pebs_buffer();
1906
adaptive_pebs_record_size_update();
1907
wrmsrq(MSR_PEBS_DATA_CFG, pebs_data_cfg);
1908
cpuc->active_pebs_data_cfg = pebs_data_cfg;
1909
}
1910
}
1911
if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) {
1912
cpuc->pebs_data_cfg = pebs_data_cfg;
1913
pebs_update_threshold(cpuc);
1914
}
1915
1916
if (idx >= INTEL_PMC_IDX_FIXED) {
1917
if (x86_pmu.intel_cap.pebs_format < 5)
1918
idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED);
1919
else
1920
idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
1921
}
1922
1923
/*
1924
* Use auto-reload if possible to save a MSR write in the PMI.
1925
* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
1926
*/
1927
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1928
ds->pebs_event_reset[idx] =
1929
(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
1930
} else {
1931
ds->pebs_event_reset[idx] = 0;
1932
}
1933
1934
intel_pmu_pebs_via_pt_enable(event);
1935
}
1936
1937
void intel_pmu_pebs_del(struct perf_event *event)
1938
{
1939
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1940
struct hw_perf_event *hwc = &event->hw;
1941
bool needed_cb = pebs_needs_sched_cb(cpuc);
1942
1943
cpuc->n_pebs--;
1944
if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1945
cpuc->n_large_pebs--;
1946
if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1947
cpuc->n_pebs_via_pt--;
1948
1949
pebs_update_state(needed_cb, cpuc, event, false);
1950
}
1951
1952
static void __intel_pmu_pebs_disable(struct perf_event *event)
1953
{
1954
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1955
struct hw_perf_event *hwc = &event->hw;
1956
1957
intel_pmu_drain_large_pebs(cpuc);
1958
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
1959
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
1960
}
1961
1962
void intel_pmu_pebs_disable(struct perf_event *event)
1963
{
1964
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1965
struct hw_perf_event *hwc = &event->hw;
1966
1967
__intel_pmu_pebs_disable(event);
1968
1969
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
1970
(x86_pmu.version < 5))
1971
cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
1972
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1973
cpuc->pebs_enabled &= ~(1ULL << 63);
1974
1975
intel_pmu_pebs_via_pt_disable(event);
1976
1977
if (cpuc->enabled)
1978
wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1979
}
1980
1981
void intel_pmu_pebs_enable_all(void)
1982
{
1983
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1984
1985
if (cpuc->pebs_enabled)
1986
wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1987
}
1988
1989
void intel_pmu_pebs_disable_all(void)
1990
{
1991
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1992
1993
if (cpuc->pebs_enabled)
1994
__intel_pmu_pebs_disable_all();
1995
}
1996
1997
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
1998
{
1999
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2000
unsigned long from = cpuc->lbr_entries[0].from;
2001
unsigned long old_to, to = cpuc->lbr_entries[0].to;
2002
unsigned long ip = regs->ip;
2003
int is_64bit = 0;
2004
void *kaddr;
2005
int size;
2006
2007
/*
2008
* We don't need to fixup if the PEBS assist is fault like
2009
*/
2010
if (!x86_pmu.intel_cap.pebs_trap)
2011
return 1;
2012
2013
/*
2014
* No LBR entry, no basic block, no rewinding
2015
*/
2016
if (!cpuc->lbr_stack.nr || !from || !to)
2017
return 0;
2018
2019
/*
2020
* Basic blocks should never cross user/kernel boundaries
2021
*/
2022
if (kernel_ip(ip) != kernel_ip(to))
2023
return 0;
2024
2025
/*
2026
* unsigned math, either ip is before the start (impossible) or
2027
* the basic block is larger than 1 page (sanity)
2028
*/
2029
if ((ip - to) > PEBS_FIXUP_SIZE)
2030
return 0;
2031
2032
/*
2033
* We sampled a branch insn, rewind using the LBR stack
2034
*/
2035
if (ip == to) {
2036
set_linear_ip(regs, from);
2037
return 1;
2038
}
2039
2040
size = ip - to;
2041
if (!kernel_ip(ip)) {
2042
int bytes;
2043
u8 *buf = this_cpu_read(insn_buffer);
2044
2045
/* 'size' must fit our buffer, see above */
2046
bytes = copy_from_user_nmi(buf, (void __user *)to, size);
2047
if (bytes != 0)
2048
return 0;
2049
2050
kaddr = buf;
2051
} else {
2052
kaddr = (void *)to;
2053
}
2054
2055
do {
2056
struct insn insn;
2057
2058
old_to = to;
2059
2060
#ifdef CONFIG_X86_64
2061
is_64bit = kernel_ip(to) || any_64bit_mode(regs);
2062
#endif
2063
insn_init(&insn, kaddr, size, is_64bit);
2064
2065
/*
2066
* Make sure there was not a problem decoding the instruction.
2067
* This is doubly important because we have an infinite loop if
2068
* insn.length=0.
2069
*/
2070
if (insn_get_length(&insn))
2071
break;
2072
2073
to += insn.length;
2074
kaddr += insn.length;
2075
size -= insn.length;
2076
} while (to < ip);
2077
2078
if (to == ip) {
2079
set_linear_ip(regs, old_to);
2080
return 1;
2081
}
2082
2083
/*
2084
* Even though we decoded the basic block, the instruction stream
2085
* never matched the given IP, either the TO or the IP got corrupted.
2086
*/
2087
return 0;
2088
}
2089
2090
static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
2091
{
2092
if (tsx_tuning) {
2093
union hsw_tsx_tuning tsx = { .value = tsx_tuning };
2094
return tsx.cycles_last_block;
2095
}
2096
return 0;
2097
}
2098
2099
static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
2100
{
2101
u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
2102
2103
/* For RTM XABORTs also log the abort code from AX */
2104
if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
2105
txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
2106
return txn;
2107
}
2108
2109
static inline u64 get_pebs_status(void *n)
2110
{
2111
if (x86_pmu.intel_cap.pebs_format < 4)
2112
return ((struct pebs_record_nhm *)n)->status;
2113
return ((struct pebs_basic *)n)->applicable_counters;
2114
}
2115
2116
#define PERF_X86_EVENT_PEBS_HSW_PREC \
2117
(PERF_X86_EVENT_PEBS_ST_HSW | \
2118
PERF_X86_EVENT_PEBS_LD_HSW | \
2119
PERF_X86_EVENT_PEBS_NA_HSW)
2120
2121
static u64 get_data_src(struct perf_event *event, u64 aux)
2122
{
2123
u64 val = PERF_MEM_NA;
2124
int fl = event->hw.flags;
2125
bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
2126
2127
if (fl & PERF_X86_EVENT_PEBS_LDLAT)
2128
val = load_latency_data(event, aux);
2129
else if (fl & PERF_X86_EVENT_PEBS_STLAT)
2130
val = store_latency_data(event, aux);
2131
else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID)
2132
val = x86_pmu.pebs_latency_data(event, aux);
2133
else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
2134
val = precise_datala_hsw(event, aux);
2135
else if (fst)
2136
val = precise_store_data(aux);
2137
return val;
2138
}
2139
2140
static void setup_pebs_time(struct perf_event *event,
2141
struct perf_sample_data *data,
2142
u64 tsc)
2143
{
2144
/* Converting to a user-defined clock is not supported yet. */
2145
if (event->attr.use_clockid != 0)
2146
return;
2147
2148
/*
2149
* Doesn't support the conversion when the TSC is unstable.
2150
* The TSC unstable case is a corner case and very unlikely to
2151
* happen. If it happens, the TSC in a PEBS record will be
2152
* dropped and fall back to perf_event_clock().
2153
*/
2154
if (!using_native_sched_clock() || !sched_clock_stable())
2155
return;
2156
2157
data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset;
2158
data->sample_flags |= PERF_SAMPLE_TIME;
2159
}
2160
2161
#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
2162
PERF_SAMPLE_PHYS_ADDR | \
2163
PERF_SAMPLE_DATA_PAGE_SIZE)
2164
2165
static void setup_pebs_fixed_sample_data(struct perf_event *event,
2166
struct pt_regs *iregs, void *__pebs,
2167
struct perf_sample_data *data,
2168
struct pt_regs *regs)
2169
{
2170
/*
2171
* We cast to the biggest pebs_record but are careful not to
2172
* unconditionally access the 'extra' entries.
2173
*/
2174
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2175
struct pebs_record_skl *pebs = __pebs;
2176
u64 sample_type;
2177
int fll;
2178
2179
if (pebs == NULL)
2180
return;
2181
2182
sample_type = event->attr.sample_type;
2183
fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
2184
2185
perf_sample_data_init(data, 0, event->hw.last_period);
2186
2187
/*
2188
* Use latency for weight (only avail with PEBS-LL)
2189
*/
2190
if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
2191
data->weight.full = pebs->lat;
2192
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
2193
}
2194
2195
/*
2196
* data.data_src encodes the data source
2197
*/
2198
if (sample_type & PERF_SAMPLE_DATA_SRC) {
2199
data->data_src.val = get_data_src(event, pebs->dse);
2200
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
2201
}
2202
2203
/*
2204
* We must however always use iregs for the unwinder to stay sane; the
2205
* record BP,SP,IP can point into thin air when the record is from a
2206
* previous PMI context or an (I)RET happened between the record and
2207
* PMI.
2208
*/
2209
perf_sample_save_callchain(data, event, iregs);
2210
2211
/*
2212
* We use the interrupt regs as a base because the PEBS record does not
2213
* contain a full regs set, specifically it seems to lack segment
2214
* descriptors, which get used by things like user_mode().
2215
*
2216
* In the simple case fix up only the IP for PERF_SAMPLE_IP.
2217
*/
2218
*regs = *iregs;
2219
2220
/*
2221
* Initialize regs_>flags from PEBS,
2222
* Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
2223
* i.e., do not rely on it being zero:
2224
*/
2225
regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
2226
2227
if (sample_type & PERF_SAMPLE_REGS_INTR) {
2228
regs->ax = pebs->ax;
2229
regs->bx = pebs->bx;
2230
regs->cx = pebs->cx;
2231
regs->dx = pebs->dx;
2232
regs->si = pebs->si;
2233
regs->di = pebs->di;
2234
2235
regs->bp = pebs->bp;
2236
regs->sp = pebs->sp;
2237
2238
#ifndef CONFIG_X86_32
2239
regs->r8 = pebs->r8;
2240
regs->r9 = pebs->r9;
2241
regs->r10 = pebs->r10;
2242
regs->r11 = pebs->r11;
2243
regs->r12 = pebs->r12;
2244
regs->r13 = pebs->r13;
2245
regs->r14 = pebs->r14;
2246
regs->r15 = pebs->r15;
2247
#endif
2248
}
2249
2250
if (event->attr.precise_ip > 1) {
2251
/*
2252
* Haswell and later processors have an 'eventing IP'
2253
* (real IP) which fixes the off-by-1 skid in hardware.
2254
* Use it when precise_ip >= 2 :
2255
*/
2256
if (x86_pmu.intel_cap.pebs_format >= 2) {
2257
set_linear_ip(regs, pebs->real_ip);
2258
regs->flags |= PERF_EFLAGS_EXACT;
2259
} else {
2260
/* Otherwise, use PEBS off-by-1 IP: */
2261
set_linear_ip(regs, pebs->ip);
2262
2263
/*
2264
* With precise_ip >= 2, try to fix up the off-by-1 IP
2265
* using the LBR. If successful, the fixup function
2266
* corrects regs->ip and calls set_linear_ip() on regs:
2267
*/
2268
if (intel_pmu_pebs_fixup_ip(regs))
2269
regs->flags |= PERF_EFLAGS_EXACT;
2270
}
2271
} else {
2272
/*
2273
* When precise_ip == 1, return the PEBS off-by-1 IP,
2274
* no fixup attempted:
2275
*/
2276
set_linear_ip(regs, pebs->ip);
2277
}
2278
2279
2280
if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
2281
x86_pmu.intel_cap.pebs_format >= 1) {
2282
data->addr = pebs->dla;
2283
data->sample_flags |= PERF_SAMPLE_ADDR;
2284
}
2285
2286
if (x86_pmu.intel_cap.pebs_format >= 2) {
2287
/* Only set the TSX weight when no memory weight. */
2288
if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
2289
data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
2290
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
2291
}
2292
if (sample_type & PERF_SAMPLE_TRANSACTION) {
2293
data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
2294
pebs->ax);
2295
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
2296
}
2297
}
2298
2299
/*
2300
* v3 supplies an accurate time stamp, so we use that
2301
* for the time stamp.
2302
*
2303
* We can only do this for the default trace clock.
2304
*/
2305
if (x86_pmu.intel_cap.pebs_format >= 3)
2306
setup_pebs_time(event, data, pebs->tsc);
2307
2308
perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
2309
}
2310
2311
static void adaptive_pebs_save_regs(struct pt_regs *regs,
2312
struct pebs_gprs *gprs)
2313
{
2314
regs->ax = gprs->ax;
2315
regs->bx = gprs->bx;
2316
regs->cx = gprs->cx;
2317
regs->dx = gprs->dx;
2318
regs->si = gprs->si;
2319
regs->di = gprs->di;
2320
regs->bp = gprs->bp;
2321
regs->sp = gprs->sp;
2322
#ifndef CONFIG_X86_32
2323
regs->r8 = gprs->r8;
2324
regs->r9 = gprs->r9;
2325
regs->r10 = gprs->r10;
2326
regs->r11 = gprs->r11;
2327
regs->r12 = gprs->r12;
2328
regs->r13 = gprs->r13;
2329
regs->r14 = gprs->r14;
2330
regs->r15 = gprs->r15;
2331
#endif
2332
}
2333
2334
static void intel_perf_event_update_pmc(struct perf_event *event, u64 pmc)
2335
{
2336
int shift = 64 - x86_pmu.cntval_bits;
2337
struct hw_perf_event *hwc;
2338
u64 delta, prev_pmc;
2339
2340
/*
2341
* A recorded counter may not have an assigned event in the
2342
* following cases. The value should be dropped.
2343
* - An event is deleted. There is still an active PEBS event.
2344
* The PEBS record doesn't shrink on pmu::del().
2345
* If the counter of the deleted event once occurred in a PEBS
2346
* record, PEBS still records the counter until the counter is
2347
* reassigned.
2348
* - An event is stopped for some reason, e.g., throttled.
2349
* During this period, another event is added and takes the
2350
* counter of the stopped event. The stopped event is assigned
2351
* to another new and uninitialized counter, since the
2352
* x86_pmu_start(RELOAD) is not invoked for a stopped event.
2353
* The PEBS__DATA_CFG is updated regardless of the event state.
2354
* The uninitialized counter can be recorded in a PEBS record.
2355
* But the cpuc->events[uninitialized_counter] is always NULL,
2356
* because the event is stopped. The uninitialized value is
2357
* safely dropped.
2358
*/
2359
if (!event)
2360
return;
2361
2362
hwc = &event->hw;
2363
prev_pmc = local64_read(&hwc->prev_count);
2364
2365
/* Only update the count when the PMU is disabled */
2366
WARN_ON(this_cpu_read(cpu_hw_events.enabled));
2367
local64_set(&hwc->prev_count, pmc);
2368
2369
delta = (pmc << shift) - (prev_pmc << shift);
2370
delta >>= shift;
2371
2372
local64_add(delta, &event->count);
2373
local64_sub(delta, &hwc->period_left);
2374
}
2375
2376
static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc,
2377
struct perf_event *event,
2378
struct pebs_cntr_header *cntr,
2379
void *next_record)
2380
{
2381
int bit;
2382
2383
for_each_set_bit(bit, (unsigned long *)&cntr->cntr, INTEL_PMC_MAX_GENERIC) {
2384
intel_perf_event_update_pmc(cpuc->events[bit], *(u64 *)next_record);
2385
next_record += sizeof(u64);
2386
}
2387
2388
for_each_set_bit(bit, (unsigned long *)&cntr->fixed, INTEL_PMC_MAX_FIXED) {
2389
/* The slots event will be handled with perf_metric later */
2390
if ((cntr->metrics == INTEL_CNTR_METRICS) &&
2391
(bit + INTEL_PMC_IDX_FIXED == INTEL_PMC_IDX_FIXED_SLOTS)) {
2392
next_record += sizeof(u64);
2393
continue;
2394
}
2395
intel_perf_event_update_pmc(cpuc->events[bit + INTEL_PMC_IDX_FIXED],
2396
*(u64 *)next_record);
2397
next_record += sizeof(u64);
2398
}
2399
2400
/* HW will reload the value right after the overflow. */
2401
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2402
local64_set(&event->hw.prev_count, (u64)-event->hw.sample_period);
2403
2404
if (cntr->metrics == INTEL_CNTR_METRICS) {
2405
static_call(intel_pmu_update_topdown_event)
2406
(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS],
2407
(u64 *)next_record);
2408
next_record += 2 * sizeof(u64);
2409
}
2410
}
2411
2412
#define PEBS_LATENCY_MASK 0xffff
2413
2414
static inline void __setup_perf_sample_data(struct perf_event *event,
2415
struct pt_regs *iregs,
2416
struct perf_sample_data *data)
2417
{
2418
perf_sample_data_init(data, 0, event->hw.last_period);
2419
2420
/*
2421
* We must however always use iregs for the unwinder to stay sane; the
2422
* record BP,SP,IP can point into thin air when the record is from a
2423
* previous PMI context or an (I)RET happened between the record and
2424
* PMI.
2425
*/
2426
perf_sample_save_callchain(data, event, iregs);
2427
}
2428
2429
static inline void __setup_pebs_basic_group(struct perf_event *event,
2430
struct pt_regs *regs,
2431
struct perf_sample_data *data,
2432
u64 sample_type, u64 ip,
2433
u64 tsc, u16 retire)
2434
{
2435
/* The ip in basic is EventingIP */
2436
set_linear_ip(regs, ip);
2437
regs->flags = PERF_EFLAGS_EXACT;
2438
setup_pebs_time(event, data, tsc);
2439
2440
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
2441
data->weight.var3_w = retire;
2442
}
2443
2444
static inline void __setup_pebs_gpr_group(struct perf_event *event,
2445
struct pt_regs *regs,
2446
struct pebs_gprs *gprs,
2447
u64 sample_type)
2448
{
2449
if (event->attr.precise_ip < 2) {
2450
set_linear_ip(regs, gprs->ip);
2451
regs->flags &= ~PERF_EFLAGS_EXACT;
2452
}
2453
2454
if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
2455
adaptive_pebs_save_regs(regs, gprs);
2456
}
2457
2458
static inline void __setup_pebs_meminfo_group(struct perf_event *event,
2459
struct perf_sample_data *data,
2460
u64 sample_type, u64 latency,
2461
u16 instr_latency, u64 address,
2462
u64 aux, u64 tsx_tuning, u64 ax)
2463
{
2464
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
2465
u64 tsx_latency = intel_get_tsx_weight(tsx_tuning);
2466
2467
data->weight.var2_w = instr_latency;
2468
2469
/*
2470
* Although meminfo::latency is defined as a u64,
2471
* only the lower 32 bits include the valid data
2472
* in practice on Ice Lake and earlier platforms.
2473
*/
2474
if (sample_type & PERF_SAMPLE_WEIGHT)
2475
data->weight.full = latency ?: tsx_latency;
2476
else
2477
data->weight.var1_dw = (u32)latency ?: tsx_latency;
2478
2479
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
2480
}
2481
2482
if (sample_type & PERF_SAMPLE_DATA_SRC) {
2483
data->data_src.val = get_data_src(event, aux);
2484
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
2485
}
2486
2487
if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
2488
data->addr = address;
2489
data->sample_flags |= PERF_SAMPLE_ADDR;
2490
}
2491
2492
if (sample_type & PERF_SAMPLE_TRANSACTION) {
2493
data->txn = intel_get_tsx_transaction(tsx_tuning, ax);
2494
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
2495
}
2496
}
2497
2498
/*
2499
* With adaptive PEBS the layout depends on what fields are configured.
2500
*/
2501
static void setup_pebs_adaptive_sample_data(struct perf_event *event,
2502
struct pt_regs *iregs, void *__pebs,
2503
struct perf_sample_data *data,
2504
struct pt_regs *regs)
2505
{
2506
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2507
u64 sample_type = event->attr.sample_type;
2508
struct pebs_basic *basic = __pebs;
2509
void *next_record = basic + 1;
2510
struct pebs_meminfo *meminfo = NULL;
2511
struct pebs_gprs *gprs = NULL;
2512
struct x86_perf_regs *perf_regs;
2513
u64 format_group;
2514
u16 retire;
2515
2516
if (basic == NULL)
2517
return;
2518
2519
perf_regs = container_of(regs, struct x86_perf_regs, regs);
2520
perf_regs->xmm_regs = NULL;
2521
2522
format_group = basic->format_group;
2523
2524
__setup_perf_sample_data(event, iregs, data);
2525
2526
*regs = *iregs;
2527
2528
/* basic group */
2529
retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ?
2530
basic->retire_latency : 0;
2531
__setup_pebs_basic_group(event, regs, data, sample_type,
2532
basic->ip, basic->tsc, retire);
2533
2534
/*
2535
* The record for MEMINFO is in front of GP
2536
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
2537
* Save the pointer here but process later.
2538
*/
2539
if (format_group & PEBS_DATACFG_MEMINFO) {
2540
meminfo = next_record;
2541
next_record = meminfo + 1;
2542
}
2543
2544
if (format_group & PEBS_DATACFG_GP) {
2545
gprs = next_record;
2546
next_record = gprs + 1;
2547
2548
__setup_pebs_gpr_group(event, regs, gprs, sample_type);
2549
}
2550
2551
if (format_group & PEBS_DATACFG_MEMINFO) {
2552
u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
2553
meminfo->cache_latency : meminfo->mem_latency;
2554
u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
2555
meminfo->instr_latency : 0;
2556
u64 ax = gprs ? gprs->ax : 0;
2557
2558
__setup_pebs_meminfo_group(event, data, sample_type, latency,
2559
instr_latency, meminfo->address,
2560
meminfo->aux, meminfo->tsx_tuning,
2561
ax);
2562
}
2563
2564
if (format_group & PEBS_DATACFG_XMMS) {
2565
struct pebs_xmm *xmm = next_record;
2566
2567
next_record = xmm + 1;
2568
perf_regs->xmm_regs = xmm->xmm;
2569
}
2570
2571
if (format_group & PEBS_DATACFG_LBRS) {
2572
struct lbr_entry *lbr = next_record;
2573
int num_lbr = ((format_group >> PEBS_DATACFG_LBR_SHIFT)
2574
& 0xff) + 1;
2575
next_record = next_record + num_lbr * sizeof(struct lbr_entry);
2576
2577
if (has_branch_stack(event)) {
2578
intel_pmu_store_pebs_lbrs(lbr);
2579
intel_pmu_lbr_save_brstack(data, cpuc, event);
2580
}
2581
}
2582
2583
if (format_group & (PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS)) {
2584
struct pebs_cntr_header *cntr = next_record;
2585
unsigned int nr;
2586
2587
next_record += sizeof(struct pebs_cntr_header);
2588
/*
2589
* The PEBS_DATA_CFG is a global register, which is the
2590
* superset configuration for all PEBS events.
2591
* For the PEBS record of non-sample-read group, ignore
2592
* the counter snapshot fields.
2593
*/
2594
if (is_pebs_counter_event_group(event)) {
2595
__setup_pebs_counter_group(cpuc, event, cntr, next_record);
2596
data->sample_flags |= PERF_SAMPLE_READ;
2597
}
2598
2599
nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
2600
if (cntr->metrics == INTEL_CNTR_METRICS)
2601
nr += 2;
2602
next_record += nr * sizeof(u64);
2603
}
2604
2605
WARN_ONCE(next_record != __pebs + basic->format_size,
2606
"PEBS record size %u, expected %llu, config %llx\n",
2607
basic->format_size,
2608
(u64)(next_record - __pebs),
2609
format_group);
2610
}
2611
2612
static inline bool arch_pebs_record_continued(struct arch_pebs_header *header)
2613
{
2614
/* Continue bit or null PEBS record indicates fragment follows. */
2615
return header->cont || !(header->format & GENMASK_ULL(63, 16));
2616
}
2617
2618
static void setup_arch_pebs_sample_data(struct perf_event *event,
2619
struct pt_regs *iregs,
2620
void *__pebs,
2621
struct perf_sample_data *data,
2622
struct pt_regs *regs)
2623
{
2624
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2625
u64 sample_type = event->attr.sample_type;
2626
struct arch_pebs_header *header = NULL;
2627
struct arch_pebs_aux *meminfo = NULL;
2628
struct arch_pebs_gprs *gprs = NULL;
2629
struct x86_perf_regs *perf_regs;
2630
void *next_record;
2631
void *at = __pebs;
2632
2633
if (at == NULL)
2634
return;
2635
2636
perf_regs = container_of(regs, struct x86_perf_regs, regs);
2637
perf_regs->xmm_regs = NULL;
2638
2639
__setup_perf_sample_data(event, iregs, data);
2640
2641
*regs = *iregs;
2642
2643
again:
2644
header = at;
2645
next_record = at + sizeof(struct arch_pebs_header);
2646
if (header->basic) {
2647
struct arch_pebs_basic *basic = next_record;
2648
u16 retire = 0;
2649
2650
next_record = basic + 1;
2651
2652
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
2653
retire = basic->valid ? basic->retire : 0;
2654
__setup_pebs_basic_group(event, regs, data, sample_type,
2655
basic->ip, basic->tsc, retire);
2656
}
2657
2658
/*
2659
* The record for MEMINFO is in front of GP
2660
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
2661
* Save the pointer here but process later.
2662
*/
2663
if (header->aux) {
2664
meminfo = next_record;
2665
next_record = meminfo + 1;
2666
}
2667
2668
if (header->gpr) {
2669
gprs = next_record;
2670
next_record = gprs + 1;
2671
2672
__setup_pebs_gpr_group(event, regs,
2673
(struct pebs_gprs *)gprs,
2674
sample_type);
2675
}
2676
2677
if (header->aux) {
2678
u64 ax = gprs ? gprs->ax : 0;
2679
2680
__setup_pebs_meminfo_group(event, data, sample_type,
2681
meminfo->cache_latency,
2682
meminfo->instr_latency,
2683
meminfo->address, meminfo->aux,
2684
meminfo->tsx_tuning, ax);
2685
}
2686
2687
if (header->xmm) {
2688
struct pebs_xmm *xmm;
2689
2690
next_record += sizeof(struct arch_pebs_xer_header);
2691
2692
xmm = next_record;
2693
perf_regs->xmm_regs = xmm->xmm;
2694
next_record = xmm + 1;
2695
}
2696
2697
if (header->lbr) {
2698
struct arch_pebs_lbr_header *lbr_header = next_record;
2699
struct lbr_entry *lbr;
2700
int num_lbr;
2701
2702
next_record = lbr_header + 1;
2703
lbr = next_record;
2704
2705
num_lbr = header->lbr == ARCH_PEBS_LBR_NUM_VAR ?
2706
lbr_header->depth :
2707
header->lbr * ARCH_PEBS_BASE_LBR_ENTRIES;
2708
next_record += num_lbr * sizeof(struct lbr_entry);
2709
2710
if (has_branch_stack(event)) {
2711
intel_pmu_store_pebs_lbrs(lbr);
2712
intel_pmu_lbr_save_brstack(data, cpuc, event);
2713
}
2714
}
2715
2716
if (header->cntr) {
2717
struct arch_pebs_cntr_header *cntr = next_record;
2718
unsigned int nr;
2719
2720
next_record += sizeof(struct arch_pebs_cntr_header);
2721
2722
if (is_pebs_counter_event_group(event)) {
2723
__setup_pebs_counter_group(cpuc, event,
2724
(struct pebs_cntr_header *)cntr, next_record);
2725
data->sample_flags |= PERF_SAMPLE_READ;
2726
}
2727
2728
nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
2729
if (cntr->metrics == INTEL_CNTR_METRICS)
2730
nr += 2;
2731
next_record += nr * sizeof(u64);
2732
}
2733
2734
/* Parse followed fragments if there are. */
2735
if (arch_pebs_record_continued(header)) {
2736
at = at + header->size;
2737
goto again;
2738
}
2739
}
2740
2741
static inline void *
2742
get_next_pebs_record_by_bit(void *base, void *top, int bit)
2743
{
2744
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2745
void *at;
2746
u64 pebs_status;
2747
2748
/*
2749
* fmt0 does not have a status bitfield (does not use
2750
* perf_record_nhm format)
2751
*/
2752
if (x86_pmu.intel_cap.pebs_format < 1)
2753
return base;
2754
2755
if (base == NULL)
2756
return NULL;
2757
2758
for (at = base; at < top; at += cpuc->pebs_record_size) {
2759
unsigned long status = get_pebs_status(at);
2760
2761
if (test_bit(bit, (unsigned long *)&status)) {
2762
/* PEBS v3 has accurate status bits */
2763
if (x86_pmu.intel_cap.pebs_format >= 3)
2764
return at;
2765
2766
if (status == (1 << bit))
2767
return at;
2768
2769
/* clear non-PEBS bit and re-check */
2770
pebs_status = status & cpuc->pebs_enabled;
2771
pebs_status &= PEBS_COUNTER_MASK;
2772
if (pebs_status == (1 << bit))
2773
return at;
2774
}
2775
}
2776
return NULL;
2777
}
2778
2779
/*
2780
* Special variant of intel_pmu_save_and_restart() for auto-reload.
2781
*/
2782
static int
2783
intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
2784
{
2785
struct hw_perf_event *hwc = &event->hw;
2786
int shift = 64 - x86_pmu.cntval_bits;
2787
u64 period = hwc->sample_period;
2788
u64 prev_raw_count, new_raw_count;
2789
s64 new, old;
2790
2791
WARN_ON(!period);
2792
2793
/*
2794
* drain_pebs() only happens when the PMU is disabled.
2795
*/
2796
WARN_ON(this_cpu_read(cpu_hw_events.enabled));
2797
2798
prev_raw_count = local64_read(&hwc->prev_count);
2799
new_raw_count = rdpmc(hwc->event_base_rdpmc);
2800
local64_set(&hwc->prev_count, new_raw_count);
2801
2802
/*
2803
* Since the counter increments a negative counter value and
2804
* overflows on the sign switch, giving the interval:
2805
*
2806
* [-period, 0]
2807
*
2808
* the difference between two consecutive reads is:
2809
*
2810
* A) value2 - value1;
2811
* when no overflows have happened in between,
2812
*
2813
* B) (0 - value1) + (value2 - (-period));
2814
* when one overflow happened in between,
2815
*
2816
* C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
2817
* when @n overflows happened in between.
2818
*
2819
* Here A) is the obvious difference, B) is the extension to the
2820
* discrete interval, where the first term is to the top of the
2821
* interval and the second term is from the bottom of the next
2822
* interval and C) the extension to multiple intervals, where the
2823
* middle term is the whole intervals covered.
2824
*
2825
* An equivalent of C, by reduction, is:
2826
*
2827
* value2 - value1 + n * period
2828
*/
2829
new = ((s64)(new_raw_count << shift) >> shift);
2830
old = ((s64)(prev_raw_count << shift) >> shift);
2831
local64_add(new - old + count * period, &event->count);
2832
2833
local64_set(&hwc->period_left, -new);
2834
2835
perf_event_update_userpage(event);
2836
2837
return 0;
2838
}
2839
2840
typedef void (*setup_fn)(struct perf_event *, struct pt_regs *, void *,
2841
struct perf_sample_data *, struct pt_regs *);
2842
2843
static struct pt_regs dummy_iregs;
2844
2845
static __always_inline void
2846
__intel_pmu_pebs_event(struct perf_event *event,
2847
struct pt_regs *iregs,
2848
struct pt_regs *regs,
2849
struct perf_sample_data *data,
2850
void *at,
2851
setup_fn setup_sample)
2852
{
2853
setup_sample(event, iregs, at, data, regs);
2854
perf_event_output(event, data, regs);
2855
}
2856
2857
static __always_inline void
2858
__intel_pmu_pebs_last_event(struct perf_event *event,
2859
struct pt_regs *iregs,
2860
struct pt_regs *regs,
2861
struct perf_sample_data *data,
2862
void *at,
2863
int count,
2864
setup_fn setup_sample)
2865
{
2866
struct hw_perf_event *hwc = &event->hw;
2867
2868
setup_sample(event, iregs, at, data, regs);
2869
if (iregs == &dummy_iregs) {
2870
/*
2871
* The PEBS records may be drained in the non-overflow context,
2872
* e.g., large PEBS + context switch. Perf should treat the
2873
* last record the same as other PEBS records, and doesn't
2874
* invoke the generic overflow handler.
2875
*/
2876
perf_event_output(event, data, regs);
2877
} else {
2878
/*
2879
* All but the last records are processed.
2880
* The last one is left to be able to call the overflow handler.
2881
*/
2882
perf_event_overflow(event, data, regs);
2883
}
2884
2885
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
2886
if ((is_pebs_counter_event_group(event))) {
2887
/*
2888
* The value of each sample has been updated when setup
2889
* the corresponding sample data.
2890
*/
2891
perf_event_update_userpage(event);
2892
} else {
2893
/*
2894
* Now, auto-reload is only enabled in fixed period mode.
2895
* The reload value is always hwc->sample_period.
2896
* May need to change it, if auto-reload is enabled in
2897
* freq mode later.
2898
*/
2899
intel_pmu_save_and_restart_reload(event, count);
2900
}
2901
} else {
2902
/*
2903
* For a non-precise event, it's possible the
2904
* counters-snapshotting records a positive value for the
2905
* overflowed event. Then the HW auto-reload mechanism
2906
* reset the counter to 0 immediately, because the
2907
* pebs_event_reset is cleared if the PERF_X86_EVENT_AUTO_RELOAD
2908
* is not set. The counter backwards may be observed in a
2909
* PMI handler.
2910
*
2911
* Since the event value has been updated when processing the
2912
* counters-snapshotting record, only needs to set the new
2913
* period for the counter.
2914
*/
2915
if (is_pebs_counter_event_group(event))
2916
static_call(x86_pmu_set_period)(event);
2917
else
2918
intel_pmu_save_and_restart(event);
2919
}
2920
}
2921
2922
static __always_inline void
2923
__intel_pmu_pebs_events(struct perf_event *event,
2924
struct pt_regs *iregs,
2925
struct perf_sample_data *data,
2926
void *base, void *top,
2927
int bit, int count,
2928
setup_fn setup_sample)
2929
{
2930
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2931
struct x86_perf_regs perf_regs;
2932
struct pt_regs *regs = &perf_regs.regs;
2933
void *at = get_next_pebs_record_by_bit(base, top, bit);
2934
int cnt = count;
2935
2936
if (!iregs)
2937
iregs = &dummy_iregs;
2938
2939
while (cnt > 1) {
2940
__intel_pmu_pebs_event(event, iregs, regs, data, at, setup_sample);
2941
at += cpuc->pebs_record_size;
2942
at = get_next_pebs_record_by_bit(at, top, bit);
2943
cnt--;
2944
}
2945
2946
__intel_pmu_pebs_last_event(event, iregs, regs, data, at, count, setup_sample);
2947
}
2948
2949
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
2950
{
2951
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2952
struct debug_store *ds = cpuc->ds;
2953
struct perf_event *event = cpuc->events[0]; /* PMC0 only */
2954
struct pebs_record_core *at, *top;
2955
int n;
2956
2957
if (!x86_pmu.pebs_active)
2958
return;
2959
2960
at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
2961
top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
2962
2963
/*
2964
* Whatever else happens, drain the thing
2965
*/
2966
ds->pebs_index = ds->pebs_buffer_base;
2967
2968
if (!test_bit(0, cpuc->active_mask))
2969
return;
2970
2971
WARN_ON_ONCE(!event);
2972
2973
if (!event->attr.precise_ip)
2974
return;
2975
2976
n = top - at;
2977
if (n <= 0) {
2978
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2979
intel_pmu_save_and_restart_reload(event, 0);
2980
return;
2981
}
2982
2983
__intel_pmu_pebs_events(event, iregs, data, at, top, 0, n,
2984
setup_pebs_fixed_sample_data);
2985
}
2986
2987
static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask)
2988
{
2989
u64 pebs_enabled = cpuc->pebs_enabled & mask;
2990
struct perf_event *event;
2991
int bit;
2992
2993
/*
2994
* The drain_pebs() could be called twice in a short period
2995
* for auto-reload event in pmu::read(). There are no
2996
* overflows have happened in between.
2997
* It needs to call intel_pmu_save_and_restart_reload() to
2998
* update the event->count for this case.
2999
*/
3000
for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
3001
event = cpuc->events[bit];
3002
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
3003
intel_pmu_save_and_restart_reload(event, 0);
3004
}
3005
}
3006
3007
static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
3008
{
3009
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3010
struct debug_store *ds = cpuc->ds;
3011
struct perf_event *event;
3012
void *base, *at, *top;
3013
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
3014
short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
3015
int max_pebs_events = intel_pmu_max_num_pebs(NULL);
3016
int bit, i, size;
3017
u64 mask;
3018
3019
if (!x86_pmu.pebs_active)
3020
return;
3021
3022
base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
3023
top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
3024
3025
ds->pebs_index = ds->pebs_buffer_base;
3026
3027
mask = x86_pmu.pebs_events_mask;
3028
size = max_pebs_events;
3029
if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
3030
mask |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
3031
size = INTEL_PMC_IDX_FIXED + x86_pmu_max_num_counters_fixed(NULL);
3032
}
3033
3034
if (unlikely(base >= top)) {
3035
intel_pmu_pebs_event_update_no_drain(cpuc, mask);
3036
return;
3037
}
3038
3039
for (at = base; at < top; at += x86_pmu.pebs_record_size) {
3040
struct pebs_record_nhm *p = at;
3041
u64 pebs_status;
3042
3043
pebs_status = p->status & cpuc->pebs_enabled;
3044
pebs_status &= mask;
3045
3046
/* PEBS v3 has more accurate status bits */
3047
if (x86_pmu.intel_cap.pebs_format >= 3) {
3048
for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
3049
counts[bit]++;
3050
3051
continue;
3052
}
3053
3054
/*
3055
* On some CPUs the PEBS status can be zero when PEBS is
3056
* racing with clearing of GLOBAL_STATUS.
3057
*
3058
* Normally we would drop that record, but in the
3059
* case when there is only a single active PEBS event
3060
* we can assume it's for that event.
3061
*/
3062
if (!pebs_status && cpuc->pebs_enabled &&
3063
!(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
3064
pebs_status = p->status = cpuc->pebs_enabled;
3065
3066
bit = find_first_bit((unsigned long *)&pebs_status,
3067
max_pebs_events);
3068
3069
if (!(x86_pmu.pebs_events_mask & (1 << bit)))
3070
continue;
3071
3072
/*
3073
* The PEBS hardware does not deal well with the situation
3074
* when events happen near to each other and multiple bits
3075
* are set. But it should happen rarely.
3076
*
3077
* If these events include one PEBS and multiple non-PEBS
3078
* events, it doesn't impact PEBS record. The record will
3079
* be handled normally. (slow path)
3080
*
3081
* If these events include two or more PEBS events, the
3082
* records for the events can be collapsed into a single
3083
* one, and it's not possible to reconstruct all events
3084
* that caused the PEBS record. It's called collision.
3085
* If collision happened, the record will be dropped.
3086
*/
3087
if (pebs_status != (1ULL << bit)) {
3088
for_each_set_bit(i, (unsigned long *)&pebs_status, size)
3089
error[i]++;
3090
continue;
3091
}
3092
3093
counts[bit]++;
3094
}
3095
3096
for_each_set_bit(bit, (unsigned long *)&mask, size) {
3097
if ((counts[bit] == 0) && (error[bit] == 0))
3098
continue;
3099
3100
event = cpuc->events[bit];
3101
if (WARN_ON_ONCE(!event))
3102
continue;
3103
3104
if (WARN_ON_ONCE(!event->attr.precise_ip))
3105
continue;
3106
3107
/* log dropped samples number */
3108
if (error[bit]) {
3109
perf_log_lost_samples(event, error[bit]);
3110
3111
if (iregs)
3112
perf_event_account_interrupt(event);
3113
}
3114
3115
if (counts[bit]) {
3116
__intel_pmu_pebs_events(event, iregs, data, base,
3117
top, bit, counts[bit],
3118
setup_pebs_fixed_sample_data);
3119
}
3120
}
3121
}
3122
3123
static __always_inline void
3124
__intel_pmu_handle_pebs_record(struct pt_regs *iregs,
3125
struct pt_regs *regs,
3126
struct perf_sample_data *data,
3127
void *at, u64 pebs_status,
3128
short *counts, void **last,
3129
setup_fn setup_sample)
3130
{
3131
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3132
struct perf_event *event;
3133
int bit;
3134
3135
for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
3136
event = cpuc->events[bit];
3137
3138
if (WARN_ON_ONCE(!event) ||
3139
WARN_ON_ONCE(!event->attr.precise_ip))
3140
continue;
3141
3142
if (counts[bit]++) {
3143
__intel_pmu_pebs_event(event, iregs, regs, data,
3144
last[bit], setup_sample);
3145
}
3146
3147
last[bit] = at;
3148
}
3149
}
3150
3151
static __always_inline void
3152
__intel_pmu_handle_last_pebs_record(struct pt_regs *iregs,
3153
struct pt_regs *regs,
3154
struct perf_sample_data *data,
3155
u64 mask, short *counts, void **last,
3156
setup_fn setup_sample)
3157
{
3158
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3159
struct perf_event *event;
3160
int bit;
3161
3162
for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
3163
if (!counts[bit])
3164
continue;
3165
3166
event = cpuc->events[bit];
3167
3168
__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
3169
counts[bit], setup_sample);
3170
}
3171
3172
}
3173
3174
static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
3175
{
3176
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
3177
void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
3178
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3179
struct debug_store *ds = cpuc->ds;
3180
struct x86_perf_regs perf_regs;
3181
struct pt_regs *regs = &perf_regs.regs;
3182
struct pebs_basic *basic;
3183
void *base, *at, *top;
3184
u64 mask;
3185
3186
if (!x86_pmu.pebs_active)
3187
return;
3188
3189
base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
3190
top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
3191
3192
ds->pebs_index = ds->pebs_buffer_base;
3193
3194
mask = hybrid(cpuc->pmu, pebs_events_mask) |
3195
(hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
3196
mask &= cpuc->pebs_enabled;
3197
3198
if (unlikely(base >= top)) {
3199
intel_pmu_pebs_event_update_no_drain(cpuc, mask);
3200
return;
3201
}
3202
3203
if (!iregs)
3204
iregs = &dummy_iregs;
3205
3206
/* Process all but the last event for each counter. */
3207
for (at = base; at < top; at += basic->format_size) {
3208
u64 pebs_status;
3209
3210
basic = at;
3211
if (basic->format_size != cpuc->pebs_record_size)
3212
continue;
3213
3214
pebs_status = mask & basic->applicable_counters;
3215
__intel_pmu_handle_pebs_record(iregs, regs, data, at,
3216
pebs_status, counts, last,
3217
setup_pebs_adaptive_sample_data);
3218
}
3219
3220
__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask, counts, last,
3221
setup_pebs_adaptive_sample_data);
3222
}
3223
3224
static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
3225
struct perf_sample_data *data)
3226
{
3227
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
3228
void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
3229
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3230
union arch_pebs_index index;
3231
struct x86_perf_regs perf_regs;
3232
struct pt_regs *regs = &perf_regs.regs;
3233
void *base, *at, *top;
3234
u64 mask;
3235
3236
rdmsrq(MSR_IA32_PEBS_INDEX, index.whole);
3237
3238
if (unlikely(!index.wr)) {
3239
intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
3240
return;
3241
}
3242
3243
base = cpuc->pebs_vaddr;
3244
top = cpuc->pebs_vaddr + (index.wr << ARCH_PEBS_INDEX_WR_SHIFT);
3245
3246
index.wr = 0;
3247
index.full = 0;
3248
index.en = 1;
3249
if (cpuc->n_pebs == cpuc->n_large_pebs)
3250
index.thresh = ARCH_PEBS_THRESH_MULTI;
3251
else
3252
index.thresh = ARCH_PEBS_THRESH_SINGLE;
3253
wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
3254
3255
mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
3256
3257
if (!iregs)
3258
iregs = &dummy_iregs;
3259
3260
/* Process all but the last event for each counter. */
3261
for (at = base; at < top;) {
3262
struct arch_pebs_header *header;
3263
struct arch_pebs_basic *basic;
3264
u64 pebs_status;
3265
3266
header = at;
3267
3268
if (WARN_ON_ONCE(!header->size))
3269
break;
3270
3271
/* 1st fragment or single record must have basic group */
3272
if (!header->basic) {
3273
at += header->size;
3274
continue;
3275
}
3276
3277
basic = at + sizeof(struct arch_pebs_header);
3278
pebs_status = mask & basic->applicable_counters;
3279
__intel_pmu_handle_pebs_record(iregs, regs, data, at,
3280
pebs_status, counts, last,
3281
setup_arch_pebs_sample_data);
3282
3283
/* Skip non-last fragments */
3284
while (arch_pebs_record_continued(header)) {
3285
if (!header->size)
3286
break;
3287
at += header->size;
3288
header = at;
3289
}
3290
3291
/* Skip last fragment or the single record */
3292
at += header->size;
3293
}
3294
3295
__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask,
3296
counts, last,
3297
setup_arch_pebs_sample_data);
3298
}
3299
3300
static void __init intel_arch_pebs_init(void)
3301
{
3302
/*
3303
* Current hybrid platforms always both support arch-PEBS or not
3304
* on all kinds of cores. So directly set x86_pmu.arch_pebs flag
3305
* if boot cpu supports arch-PEBS.
3306
*/
3307
x86_pmu.arch_pebs = 1;
3308
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
3309
x86_pmu.drain_pebs = intel_pmu_drain_arch_pebs;
3310
x86_pmu.pebs_capable = ~0ULL;
3311
x86_pmu.flags |= PMU_FL_PEBS_ALL;
3312
3313
x86_pmu.pebs_enable = __intel_pmu_pebs_enable;
3314
x86_pmu.pebs_disable = __intel_pmu_pebs_disable;
3315
}
3316
3317
/*
3318
* PEBS probe and setup
3319
*/
3320
3321
static void __init intel_ds_pebs_init(void)
3322
{
3323
/*
3324
* No support for 32bit formats
3325
*/
3326
if (!boot_cpu_has(X86_FEATURE_DTES64))
3327
return;
3328
3329
x86_pmu.ds_pebs = boot_cpu_has(X86_FEATURE_PEBS);
3330
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
3331
if (x86_pmu.version <= 4)
3332
x86_pmu.pebs_no_isolation = 1;
3333
3334
if (x86_pmu.ds_pebs) {
3335
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
3336
char *pebs_qual = "";
3337
int format = x86_pmu.intel_cap.pebs_format;
3338
3339
if (format < 4)
3340
x86_pmu.intel_cap.pebs_baseline = 0;
3341
3342
x86_pmu.pebs_enable = intel_pmu_pebs_enable;
3343
x86_pmu.pebs_disable = intel_pmu_pebs_disable;
3344
x86_pmu.pebs_enable_all = intel_pmu_pebs_enable_all;
3345
x86_pmu.pebs_disable_all = intel_pmu_pebs_disable_all;
3346
3347
switch (format) {
3348
case 0:
3349
pr_cont("PEBS fmt0%c, ", pebs_type);
3350
x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
3351
/*
3352
* Using >PAGE_SIZE buffers makes the WRMSR to
3353
* PERF_GLOBAL_CTRL in intel_pmu_enable_all()
3354
* mysteriously hang on Core2.
3355
*
3356
* As a workaround, we don't do this.
3357
*/
3358
x86_pmu.pebs_buffer_size = PAGE_SIZE;
3359
x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
3360
break;
3361
3362
case 1:
3363
pr_cont("PEBS fmt1%c, ", pebs_type);
3364
x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
3365
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
3366
break;
3367
3368
case 2:
3369
pr_cont("PEBS fmt2%c, ", pebs_type);
3370
x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
3371
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
3372
break;
3373
3374
case 3:
3375
pr_cont("PEBS fmt3%c, ", pebs_type);
3376
x86_pmu.pebs_record_size =
3377
sizeof(struct pebs_record_skl);
3378
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
3379
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
3380
break;
3381
3382
case 6:
3383
if (x86_pmu.intel_cap.pebs_baseline)
3384
x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
3385
fallthrough;
3386
case 5:
3387
x86_pmu.pebs_ept = 1;
3388
fallthrough;
3389
case 4:
3390
x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
3391
x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
3392
if (x86_pmu.intel_cap.pebs_baseline) {
3393
x86_pmu.large_pebs_flags |=
3394
PERF_SAMPLE_BRANCH_STACK |
3395
PERF_SAMPLE_TIME;
3396
x86_pmu.flags |= PMU_FL_PEBS_ALL;
3397
x86_pmu.pebs_capable = ~0ULL;
3398
pebs_qual = "-baseline";
3399
x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
3400
} else {
3401
/* Only basic record supported */
3402
x86_pmu.large_pebs_flags &=
3403
~(PERF_SAMPLE_ADDR |
3404
PERF_SAMPLE_TIME |
3405
PERF_SAMPLE_DATA_SRC |
3406
PERF_SAMPLE_TRANSACTION |
3407
PERF_SAMPLE_REGS_USER |
3408
PERF_SAMPLE_REGS_INTR);
3409
}
3410
pr_cont("PEBS fmt%d%c%s, ", format, pebs_type, pebs_qual);
3411
3412
/*
3413
* The PEBS-via-PT is not supported on hybrid platforms,
3414
* because not all CPUs of a hybrid machine support it.
3415
* The global x86_pmu.intel_cap, which only contains the
3416
* common capabilities, is used to check the availability
3417
* of the feature. The per-PMU pebs_output_pt_available
3418
* in a hybrid machine should be ignored.
3419
*/
3420
if (x86_pmu.intel_cap.pebs_output_pt_available) {
3421
pr_cont("PEBS-via-PT, ");
3422
x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
3423
}
3424
3425
break;
3426
3427
default:
3428
pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
3429
x86_pmu.ds_pebs = 0;
3430
}
3431
}
3432
}
3433
3434
void __init intel_pebs_init(void)
3435
{
3436
if (x86_pmu.intel_cap.pebs_format == 0xf)
3437
intel_arch_pebs_init();
3438
else
3439
intel_ds_pebs_init();
3440
}
3441
3442
void perf_restore_debug_store(void)
3443
{
3444
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
3445
3446
if (!x86_pmu.bts && !x86_pmu.ds_pebs)
3447
return;
3448
3449
wrmsrq(MSR_IA32_DS_AREA, (unsigned long)ds);
3450
}
3451
3452