Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/kernel/cpu/perf_event_intel.c
10699 views
1
#ifdef CONFIG_CPU_SUP_INTEL
2
3
#define MAX_EXTRA_REGS 2
4
5
/*
6
* Per register state.
7
*/
8
struct er_account {
9
int ref; /* reference count */
10
unsigned int extra_reg; /* extra MSR number */
11
u64 extra_config; /* extra MSR config */
12
};
13
14
/*
15
* Per core state
16
* This used to coordinate shared registers for HT threads.
17
*/
18
struct intel_percore {
19
raw_spinlock_t lock; /* protect structure */
20
struct er_account regs[MAX_EXTRA_REGS];
21
int refcnt; /* number of threads */
22
unsigned core_id;
23
};
24
25
/*
26
* Intel PerfMon, used on Core and later.
27
*/
28
static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
29
{
30
[PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
31
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
32
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
33
[PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
34
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
35
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
36
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
37
};
38
39
static struct event_constraint intel_core_event_constraints[] __read_mostly =
40
{
41
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
42
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
43
INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
44
INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
45
INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
46
INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
47
EVENT_CONSTRAINT_END
48
};
49
50
static struct event_constraint intel_core2_event_constraints[] __read_mostly =
51
{
52
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
53
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
54
/*
55
* Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
56
* 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
57
* ratio between these counters.
58
*/
59
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
60
INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
61
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
62
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
63
INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
64
INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
65
INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
66
INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
67
INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
68
INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
69
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
70
EVENT_CONSTRAINT_END
71
};
72
73
static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
74
{
75
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
76
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
77
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
78
INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
79
INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
80
INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
81
INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
82
INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
83
INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
84
INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
85
INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
86
EVENT_CONSTRAINT_END
87
};
88
89
static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
90
{
91
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
92
EVENT_EXTRA_END
93
};
94
95
static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
96
{
97
INTEL_EVENT_CONSTRAINT(0xb7, 0),
98
EVENT_CONSTRAINT_END
99
};
100
101
static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
102
{
103
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
104
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
105
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
106
INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
107
INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
108
INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
109
INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
110
EVENT_CONSTRAINT_END
111
};
112
113
static struct event_constraint intel_snb_event_constraints[] __read_mostly =
114
{
115
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
116
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
117
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
118
INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
119
INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
120
INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
121
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
122
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
123
EVENT_CONSTRAINT_END
124
};
125
126
static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
127
{
128
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
129
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
130
EVENT_EXTRA_END
131
};
132
133
static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =
134
{
135
INTEL_EVENT_CONSTRAINT(0xb7, 0),
136
INTEL_EVENT_CONSTRAINT(0xbb, 0),
137
EVENT_CONSTRAINT_END
138
};
139
140
static struct event_constraint intel_gen_event_constraints[] __read_mostly =
141
{
142
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
143
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
144
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
145
EVENT_CONSTRAINT_END
146
};
147
148
static u64 intel_pmu_event_map(int hw_event)
149
{
150
return intel_perfmon_event_map[hw_event];
151
}
152
153
static __initconst const u64 snb_hw_cache_event_ids
154
[PERF_COUNT_HW_CACHE_MAX]
155
[PERF_COUNT_HW_CACHE_OP_MAX]
156
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
157
{
158
[ C(L1D) ] = {
159
[ C(OP_READ) ] = {
160
[ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
161
[ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
162
},
163
[ C(OP_WRITE) ] = {
164
[ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
165
[ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
166
},
167
[ C(OP_PREFETCH) ] = {
168
[ C(RESULT_ACCESS) ] = 0x0,
169
[ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
170
},
171
},
172
[ C(L1I ) ] = {
173
[ C(OP_READ) ] = {
174
[ C(RESULT_ACCESS) ] = 0x0,
175
[ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
176
},
177
[ C(OP_WRITE) ] = {
178
[ C(RESULT_ACCESS) ] = -1,
179
[ C(RESULT_MISS) ] = -1,
180
},
181
[ C(OP_PREFETCH) ] = {
182
[ C(RESULT_ACCESS) ] = 0x0,
183
[ C(RESULT_MISS) ] = 0x0,
184
},
185
},
186
[ C(LL ) ] = {
187
[ C(OP_READ) ] = {
188
/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
189
[ C(RESULT_ACCESS) ] = 0x01b7,
190
/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
191
[ C(RESULT_MISS) ] = 0x01b7,
192
},
193
[ C(OP_WRITE) ] = {
194
/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
195
[ C(RESULT_ACCESS) ] = 0x01b7,
196
/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
197
[ C(RESULT_MISS) ] = 0x01b7,
198
},
199
[ C(OP_PREFETCH) ] = {
200
/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
201
[ C(RESULT_ACCESS) ] = 0x01b7,
202
/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
203
[ C(RESULT_MISS) ] = 0x01b7,
204
},
205
},
206
[ C(DTLB) ] = {
207
[ C(OP_READ) ] = {
208
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
209
[ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
210
},
211
[ C(OP_WRITE) ] = {
212
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
213
[ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
214
},
215
[ C(OP_PREFETCH) ] = {
216
[ C(RESULT_ACCESS) ] = 0x0,
217
[ C(RESULT_MISS) ] = 0x0,
218
},
219
},
220
[ C(ITLB) ] = {
221
[ C(OP_READ) ] = {
222
[ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
223
[ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
224
},
225
[ C(OP_WRITE) ] = {
226
[ C(RESULT_ACCESS) ] = -1,
227
[ C(RESULT_MISS) ] = -1,
228
},
229
[ C(OP_PREFETCH) ] = {
230
[ C(RESULT_ACCESS) ] = -1,
231
[ C(RESULT_MISS) ] = -1,
232
},
233
},
234
[ C(BPU ) ] = {
235
[ C(OP_READ) ] = {
236
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
237
[ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
238
},
239
[ C(OP_WRITE) ] = {
240
[ C(RESULT_ACCESS) ] = -1,
241
[ C(RESULT_MISS) ] = -1,
242
},
243
[ C(OP_PREFETCH) ] = {
244
[ C(RESULT_ACCESS) ] = -1,
245
[ C(RESULT_MISS) ] = -1,
246
},
247
},
248
};
249
250
static __initconst const u64 westmere_hw_cache_event_ids
251
[PERF_COUNT_HW_CACHE_MAX]
252
[PERF_COUNT_HW_CACHE_OP_MAX]
253
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
254
{
255
[ C(L1D) ] = {
256
[ C(OP_READ) ] = {
257
[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
258
[ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
259
},
260
[ C(OP_WRITE) ] = {
261
[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
262
[ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
263
},
264
[ C(OP_PREFETCH) ] = {
265
[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
266
[ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
267
},
268
},
269
[ C(L1I ) ] = {
270
[ C(OP_READ) ] = {
271
[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
272
[ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
273
},
274
[ C(OP_WRITE) ] = {
275
[ C(RESULT_ACCESS) ] = -1,
276
[ C(RESULT_MISS) ] = -1,
277
},
278
[ C(OP_PREFETCH) ] = {
279
[ C(RESULT_ACCESS) ] = 0x0,
280
[ C(RESULT_MISS) ] = 0x0,
281
},
282
},
283
[ C(LL ) ] = {
284
[ C(OP_READ) ] = {
285
/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
286
[ C(RESULT_ACCESS) ] = 0x01b7,
287
/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
288
[ C(RESULT_MISS) ] = 0x01b7,
289
},
290
/*
291
* Use RFO, not WRITEBACK, because a write miss would typically occur
292
* on RFO.
293
*/
294
[ C(OP_WRITE) ] = {
295
/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
296
[ C(RESULT_ACCESS) ] = 0x01b7,
297
/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
298
[ C(RESULT_MISS) ] = 0x01b7,
299
},
300
[ C(OP_PREFETCH) ] = {
301
/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
302
[ C(RESULT_ACCESS) ] = 0x01b7,
303
/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
304
[ C(RESULT_MISS) ] = 0x01b7,
305
},
306
},
307
[ C(DTLB) ] = {
308
[ C(OP_READ) ] = {
309
[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
310
[ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
311
},
312
[ C(OP_WRITE) ] = {
313
[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
314
[ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
315
},
316
[ C(OP_PREFETCH) ] = {
317
[ C(RESULT_ACCESS) ] = 0x0,
318
[ C(RESULT_MISS) ] = 0x0,
319
},
320
},
321
[ C(ITLB) ] = {
322
[ C(OP_READ) ] = {
323
[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
324
[ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
325
},
326
[ C(OP_WRITE) ] = {
327
[ C(RESULT_ACCESS) ] = -1,
328
[ C(RESULT_MISS) ] = -1,
329
},
330
[ C(OP_PREFETCH) ] = {
331
[ C(RESULT_ACCESS) ] = -1,
332
[ C(RESULT_MISS) ] = -1,
333
},
334
},
335
[ C(BPU ) ] = {
336
[ C(OP_READ) ] = {
337
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
338
[ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
339
},
340
[ C(OP_WRITE) ] = {
341
[ C(RESULT_ACCESS) ] = -1,
342
[ C(RESULT_MISS) ] = -1,
343
},
344
[ C(OP_PREFETCH) ] = {
345
[ C(RESULT_ACCESS) ] = -1,
346
[ C(RESULT_MISS) ] = -1,
347
},
348
},
349
};
350
351
/*
352
* Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
353
* See IA32 SDM Vol 3B 30.6.1.3
354
*/
355
356
#define NHM_DMND_DATA_RD (1 << 0)
357
#define NHM_DMND_RFO (1 << 1)
358
#define NHM_DMND_IFETCH (1 << 2)
359
#define NHM_DMND_WB (1 << 3)
360
#define NHM_PF_DATA_RD (1 << 4)
361
#define NHM_PF_DATA_RFO (1 << 5)
362
#define NHM_PF_IFETCH (1 << 6)
363
#define NHM_OFFCORE_OTHER (1 << 7)
364
#define NHM_UNCORE_HIT (1 << 8)
365
#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
366
#define NHM_OTHER_CORE_HITM (1 << 10)
367
/* reserved */
368
#define NHM_REMOTE_CACHE_FWD (1 << 12)
369
#define NHM_REMOTE_DRAM (1 << 13)
370
#define NHM_LOCAL_DRAM (1 << 14)
371
#define NHM_NON_DRAM (1 << 15)
372
373
#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
374
375
#define NHM_DMND_READ (NHM_DMND_DATA_RD)
376
#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
377
#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
378
379
#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
380
#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
381
#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
382
383
static __initconst const u64 nehalem_hw_cache_extra_regs
384
[PERF_COUNT_HW_CACHE_MAX]
385
[PERF_COUNT_HW_CACHE_OP_MAX]
386
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
387
{
388
[ C(LL ) ] = {
389
[ C(OP_READ) ] = {
390
[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
391
[ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS,
392
},
393
[ C(OP_WRITE) ] = {
394
[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
395
[ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS,
396
},
397
[ C(OP_PREFETCH) ] = {
398
[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
399
[ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
400
},
401
}
402
};
403
404
static __initconst const u64 nehalem_hw_cache_event_ids
405
[PERF_COUNT_HW_CACHE_MAX]
406
[PERF_COUNT_HW_CACHE_OP_MAX]
407
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
408
{
409
[ C(L1D) ] = {
410
[ C(OP_READ) ] = {
411
[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
412
[ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
413
},
414
[ C(OP_WRITE) ] = {
415
[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
416
[ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
417
},
418
[ C(OP_PREFETCH) ] = {
419
[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
420
[ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
421
},
422
},
423
[ C(L1I ) ] = {
424
[ C(OP_READ) ] = {
425
[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
426
[ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
427
},
428
[ C(OP_WRITE) ] = {
429
[ C(RESULT_ACCESS) ] = -1,
430
[ C(RESULT_MISS) ] = -1,
431
},
432
[ C(OP_PREFETCH) ] = {
433
[ C(RESULT_ACCESS) ] = 0x0,
434
[ C(RESULT_MISS) ] = 0x0,
435
},
436
},
437
[ C(LL ) ] = {
438
[ C(OP_READ) ] = {
439
/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
440
[ C(RESULT_ACCESS) ] = 0x01b7,
441
/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
442
[ C(RESULT_MISS) ] = 0x01b7,
443
},
444
/*
445
* Use RFO, not WRITEBACK, because a write miss would typically occur
446
* on RFO.
447
*/
448
[ C(OP_WRITE) ] = {
449
/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
450
[ C(RESULT_ACCESS) ] = 0x01b7,
451
/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
452
[ C(RESULT_MISS) ] = 0x01b7,
453
},
454
[ C(OP_PREFETCH) ] = {
455
/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
456
[ C(RESULT_ACCESS) ] = 0x01b7,
457
/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
458
[ C(RESULT_MISS) ] = 0x01b7,
459
},
460
},
461
[ C(DTLB) ] = {
462
[ C(OP_READ) ] = {
463
[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
464
[ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
465
},
466
[ C(OP_WRITE) ] = {
467
[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
468
[ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
469
},
470
[ C(OP_PREFETCH) ] = {
471
[ C(RESULT_ACCESS) ] = 0x0,
472
[ C(RESULT_MISS) ] = 0x0,
473
},
474
},
475
[ C(ITLB) ] = {
476
[ C(OP_READ) ] = {
477
[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
478
[ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
479
},
480
[ C(OP_WRITE) ] = {
481
[ C(RESULT_ACCESS) ] = -1,
482
[ C(RESULT_MISS) ] = -1,
483
},
484
[ C(OP_PREFETCH) ] = {
485
[ C(RESULT_ACCESS) ] = -1,
486
[ C(RESULT_MISS) ] = -1,
487
},
488
},
489
[ C(BPU ) ] = {
490
[ C(OP_READ) ] = {
491
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
492
[ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
493
},
494
[ C(OP_WRITE) ] = {
495
[ C(RESULT_ACCESS) ] = -1,
496
[ C(RESULT_MISS) ] = -1,
497
},
498
[ C(OP_PREFETCH) ] = {
499
[ C(RESULT_ACCESS) ] = -1,
500
[ C(RESULT_MISS) ] = -1,
501
},
502
},
503
};
504
505
static __initconst const u64 core2_hw_cache_event_ids
506
[PERF_COUNT_HW_CACHE_MAX]
507
[PERF_COUNT_HW_CACHE_OP_MAX]
508
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
509
{
510
[ C(L1D) ] = {
511
[ C(OP_READ) ] = {
512
[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
513
[ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
514
},
515
[ C(OP_WRITE) ] = {
516
[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
517
[ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
518
},
519
[ C(OP_PREFETCH) ] = {
520
[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
521
[ C(RESULT_MISS) ] = 0,
522
},
523
},
524
[ C(L1I ) ] = {
525
[ C(OP_READ) ] = {
526
[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
527
[ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
528
},
529
[ C(OP_WRITE) ] = {
530
[ C(RESULT_ACCESS) ] = -1,
531
[ C(RESULT_MISS) ] = -1,
532
},
533
[ C(OP_PREFETCH) ] = {
534
[ C(RESULT_ACCESS) ] = 0,
535
[ C(RESULT_MISS) ] = 0,
536
},
537
},
538
[ C(LL ) ] = {
539
[ C(OP_READ) ] = {
540
[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
541
[ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
542
},
543
[ C(OP_WRITE) ] = {
544
[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
545
[ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
546
},
547
[ C(OP_PREFETCH) ] = {
548
[ C(RESULT_ACCESS) ] = 0,
549
[ C(RESULT_MISS) ] = 0,
550
},
551
},
552
[ C(DTLB) ] = {
553
[ C(OP_READ) ] = {
554
[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
555
[ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
556
},
557
[ C(OP_WRITE) ] = {
558
[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
559
[ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
560
},
561
[ C(OP_PREFETCH) ] = {
562
[ C(RESULT_ACCESS) ] = 0,
563
[ C(RESULT_MISS) ] = 0,
564
},
565
},
566
[ C(ITLB) ] = {
567
[ C(OP_READ) ] = {
568
[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
569
[ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
570
},
571
[ C(OP_WRITE) ] = {
572
[ C(RESULT_ACCESS) ] = -1,
573
[ C(RESULT_MISS) ] = -1,
574
},
575
[ C(OP_PREFETCH) ] = {
576
[ C(RESULT_ACCESS) ] = -1,
577
[ C(RESULT_MISS) ] = -1,
578
},
579
},
580
[ C(BPU ) ] = {
581
[ C(OP_READ) ] = {
582
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
583
[ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
584
},
585
[ C(OP_WRITE) ] = {
586
[ C(RESULT_ACCESS) ] = -1,
587
[ C(RESULT_MISS) ] = -1,
588
},
589
[ C(OP_PREFETCH) ] = {
590
[ C(RESULT_ACCESS) ] = -1,
591
[ C(RESULT_MISS) ] = -1,
592
},
593
},
594
};
595
596
static __initconst const u64 atom_hw_cache_event_ids
597
[PERF_COUNT_HW_CACHE_MAX]
598
[PERF_COUNT_HW_CACHE_OP_MAX]
599
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
600
{
601
[ C(L1D) ] = {
602
[ C(OP_READ) ] = {
603
[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
604
[ C(RESULT_MISS) ] = 0,
605
},
606
[ C(OP_WRITE) ] = {
607
[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
608
[ C(RESULT_MISS) ] = 0,
609
},
610
[ C(OP_PREFETCH) ] = {
611
[ C(RESULT_ACCESS) ] = 0x0,
612
[ C(RESULT_MISS) ] = 0,
613
},
614
},
615
[ C(L1I ) ] = {
616
[ C(OP_READ) ] = {
617
[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
618
[ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
619
},
620
[ C(OP_WRITE) ] = {
621
[ C(RESULT_ACCESS) ] = -1,
622
[ C(RESULT_MISS) ] = -1,
623
},
624
[ C(OP_PREFETCH) ] = {
625
[ C(RESULT_ACCESS) ] = 0,
626
[ C(RESULT_MISS) ] = 0,
627
},
628
},
629
[ C(LL ) ] = {
630
[ C(OP_READ) ] = {
631
[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
632
[ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
633
},
634
[ C(OP_WRITE) ] = {
635
[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
636
[ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
637
},
638
[ C(OP_PREFETCH) ] = {
639
[ C(RESULT_ACCESS) ] = 0,
640
[ C(RESULT_MISS) ] = 0,
641
},
642
},
643
[ C(DTLB) ] = {
644
[ C(OP_READ) ] = {
645
[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
646
[ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
647
},
648
[ C(OP_WRITE) ] = {
649
[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
650
[ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
651
},
652
[ C(OP_PREFETCH) ] = {
653
[ C(RESULT_ACCESS) ] = 0,
654
[ C(RESULT_MISS) ] = 0,
655
},
656
},
657
[ C(ITLB) ] = {
658
[ C(OP_READ) ] = {
659
[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
660
[ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
661
},
662
[ C(OP_WRITE) ] = {
663
[ C(RESULT_ACCESS) ] = -1,
664
[ C(RESULT_MISS) ] = -1,
665
},
666
[ C(OP_PREFETCH) ] = {
667
[ C(RESULT_ACCESS) ] = -1,
668
[ C(RESULT_MISS) ] = -1,
669
},
670
},
671
[ C(BPU ) ] = {
672
[ C(OP_READ) ] = {
673
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
674
[ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
675
},
676
[ C(OP_WRITE) ] = {
677
[ C(RESULT_ACCESS) ] = -1,
678
[ C(RESULT_MISS) ] = -1,
679
},
680
[ C(OP_PREFETCH) ] = {
681
[ C(RESULT_ACCESS) ] = -1,
682
[ C(RESULT_MISS) ] = -1,
683
},
684
},
685
};
686
687
static void intel_pmu_disable_all(void)
688
{
689
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
690
691
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
692
693
if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
694
intel_pmu_disable_bts();
695
696
intel_pmu_pebs_disable_all();
697
intel_pmu_lbr_disable_all();
698
}
699
700
static void intel_pmu_enable_all(int added)
701
{
702
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
703
704
intel_pmu_pebs_enable_all();
705
intel_pmu_lbr_enable_all();
706
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
707
708
if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
709
struct perf_event *event =
710
cpuc->events[X86_PMC_IDX_FIXED_BTS];
711
712
if (WARN_ON_ONCE(!event))
713
return;
714
715
intel_pmu_enable_bts(event->hw.config);
716
}
717
}
718
719
/*
720
* Workaround for:
721
* Intel Errata AAK100 (model 26)
722
* Intel Errata AAP53 (model 30)
723
* Intel Errata BD53 (model 44)
724
*
725
* The official story:
726
* These chips need to be 'reset' when adding counters by programming the
727
* magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
728
* in sequence on the same PMC or on different PMCs.
729
*
730
* In practise it appears some of these events do in fact count, and
731
* we need to programm all 4 events.
732
*/
733
static void intel_pmu_nhm_workaround(void)
734
{
735
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
736
static const unsigned long nhm_magic[4] = {
737
0x4300B5,
738
0x4300D2,
739
0x4300B1,
740
0x4300B1
741
};
742
struct perf_event *event;
743
int i;
744
745
/*
746
* The Errata requires below steps:
747
* 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
748
* 2) Configure 4 PERFEVTSELx with the magic events and clear
749
* the corresponding PMCx;
750
* 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
751
* 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
752
* 5) Clear 4 pairs of ERFEVTSELx and PMCx;
753
*/
754
755
/*
756
* The real steps we choose are a little different from above.
757
* A) To reduce MSR operations, we don't run step 1) as they
758
* are already cleared before this function is called;
759
* B) Call x86_perf_event_update to save PMCx before configuring
760
* PERFEVTSELx with magic number;
761
* C) With step 5), we do clear only when the PERFEVTSELx is
762
* not used currently.
763
* D) Call x86_perf_event_set_period to restore PMCx;
764
*/
765
766
/* We always operate 4 pairs of PERF Counters */
767
for (i = 0; i < 4; i++) {
768
event = cpuc->events[i];
769
if (event)
770
x86_perf_event_update(event);
771
}
772
773
for (i = 0; i < 4; i++) {
774
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
775
wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
776
}
777
778
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
779
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
780
781
for (i = 0; i < 4; i++) {
782
event = cpuc->events[i];
783
784
if (event) {
785
x86_perf_event_set_period(event);
786
__x86_pmu_enable_event(&event->hw,
787
ARCH_PERFMON_EVENTSEL_ENABLE);
788
} else
789
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
790
}
791
}
792
793
static void intel_pmu_nhm_enable_all(int added)
794
{
795
if (added)
796
intel_pmu_nhm_workaround();
797
intel_pmu_enable_all(added);
798
}
799
800
static inline u64 intel_pmu_get_status(void)
801
{
802
u64 status;
803
804
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
805
806
return status;
807
}
808
809
static inline void intel_pmu_ack_status(u64 ack)
810
{
811
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
812
}
813
814
static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
815
{
816
int idx = hwc->idx - X86_PMC_IDX_FIXED;
817
u64 ctrl_val, mask;
818
819
mask = 0xfULL << (idx * 4);
820
821
rdmsrl(hwc->config_base, ctrl_val);
822
ctrl_val &= ~mask;
823
wrmsrl(hwc->config_base, ctrl_val);
824
}
825
826
static void intel_pmu_disable_event(struct perf_event *event)
827
{
828
struct hw_perf_event *hwc = &event->hw;
829
830
if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
831
intel_pmu_disable_bts();
832
intel_pmu_drain_bts_buffer();
833
return;
834
}
835
836
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
837
intel_pmu_disable_fixed(hwc);
838
return;
839
}
840
841
x86_pmu_disable_event(event);
842
843
if (unlikely(event->attr.precise_ip))
844
intel_pmu_pebs_disable(event);
845
}
846
847
static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
848
{
849
int idx = hwc->idx - X86_PMC_IDX_FIXED;
850
u64 ctrl_val, bits, mask;
851
852
/*
853
* Enable IRQ generation (0x8),
854
* and enable ring-3 counting (0x2) and ring-0 counting (0x1)
855
* if requested:
856
*/
857
bits = 0x8ULL;
858
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
859
bits |= 0x2;
860
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
861
bits |= 0x1;
862
863
/*
864
* ANY bit is supported in v3 and up
865
*/
866
if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
867
bits |= 0x4;
868
869
bits <<= (idx * 4);
870
mask = 0xfULL << (idx * 4);
871
872
rdmsrl(hwc->config_base, ctrl_val);
873
ctrl_val &= ~mask;
874
ctrl_val |= bits;
875
wrmsrl(hwc->config_base, ctrl_val);
876
}
877
878
static void intel_pmu_enable_event(struct perf_event *event)
879
{
880
struct hw_perf_event *hwc = &event->hw;
881
882
if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
883
if (!__this_cpu_read(cpu_hw_events.enabled))
884
return;
885
886
intel_pmu_enable_bts(hwc->config);
887
return;
888
}
889
890
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
891
intel_pmu_enable_fixed(hwc);
892
return;
893
}
894
895
if (unlikely(event->attr.precise_ip))
896
intel_pmu_pebs_enable(event);
897
898
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
899
}
900
901
/*
902
* Save and restart an expired event. Called by NMI contexts,
903
* so it has to be careful about preempting normal event ops:
904
*/
905
static int intel_pmu_save_and_restart(struct perf_event *event)
906
{
907
x86_perf_event_update(event);
908
return x86_perf_event_set_period(event);
909
}
910
911
static void intel_pmu_reset(void)
912
{
913
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
914
unsigned long flags;
915
int idx;
916
917
if (!x86_pmu.num_counters)
918
return;
919
920
local_irq_save(flags);
921
922
printk("clearing PMU state on CPU#%d\n", smp_processor_id());
923
924
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
925
checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
926
checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
927
}
928
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
929
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
930
931
if (ds)
932
ds->bts_index = ds->bts_buffer_base;
933
934
local_irq_restore(flags);
935
}
936
937
/*
938
* This handler is triggered by the local APIC, so the APIC IRQ handling
939
* rules apply:
940
*/
941
static int intel_pmu_handle_irq(struct pt_regs *regs)
942
{
943
struct perf_sample_data data;
944
struct cpu_hw_events *cpuc;
945
int bit, loops;
946
u64 status;
947
int handled;
948
949
perf_sample_data_init(&data, 0);
950
951
cpuc = &__get_cpu_var(cpu_hw_events);
952
953
/*
954
* Some chipsets need to unmask the LVTPC in a particular spot
955
* inside the nmi handler. As a result, the unmasking was pushed
956
* into all the nmi handlers.
957
*
958
* This handler doesn't seem to have any issues with the unmasking
959
* so it was left at the top.
960
*/
961
apic_write(APIC_LVTPC, APIC_DM_NMI);
962
963
intel_pmu_disable_all();
964
handled = intel_pmu_drain_bts_buffer();
965
status = intel_pmu_get_status();
966
if (!status) {
967
intel_pmu_enable_all(0);
968
return handled;
969
}
970
971
loops = 0;
972
again:
973
intel_pmu_ack_status(status);
974
if (++loops > 100) {
975
WARN_ONCE(1, "perfevents: irq loop stuck!\n");
976
perf_event_print_debug();
977
intel_pmu_reset();
978
goto done;
979
}
980
981
inc_irq_stat(apic_perf_irqs);
982
983
intel_pmu_lbr_read();
984
985
/*
986
* PEBS overflow sets bit 62 in the global status register
987
*/
988
if (__test_and_clear_bit(62, (unsigned long *)&status)) {
989
handled++;
990
x86_pmu.drain_pebs(regs);
991
}
992
993
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
994
struct perf_event *event = cpuc->events[bit];
995
996
handled++;
997
998
if (!test_bit(bit, cpuc->active_mask))
999
continue;
1000
1001
if (!intel_pmu_save_and_restart(event))
1002
continue;
1003
1004
data.period = event->hw.last_period;
1005
1006
if (perf_event_overflow(event, 1, &data, regs))
1007
x86_pmu_stop(event, 0);
1008
}
1009
1010
/*
1011
* Repeat if there is more work to be done:
1012
*/
1013
status = intel_pmu_get_status();
1014
if (status)
1015
goto again;
1016
1017
done:
1018
intel_pmu_enable_all(0);
1019
return handled;
1020
}
1021
1022
static struct event_constraint *
1023
intel_bts_constraints(struct perf_event *event)
1024
{
1025
struct hw_perf_event *hwc = &event->hw;
1026
unsigned int hw_event, bts_event;
1027
1028
if (event->attr.freq)
1029
return NULL;
1030
1031
hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
1032
bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
1033
1034
if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
1035
return &bts_constraint;
1036
1037
return NULL;
1038
}
1039
1040
static struct event_constraint *
1041
intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1042
{
1043
struct hw_perf_event *hwc = &event->hw;
1044
unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
1045
struct event_constraint *c;
1046
struct intel_percore *pc;
1047
struct er_account *era;
1048
int i;
1049
int free_slot;
1050
int found;
1051
1052
if (!x86_pmu.percore_constraints || hwc->extra_alloc)
1053
return NULL;
1054
1055
for (c = x86_pmu.percore_constraints; c->cmask; c++) {
1056
if (e != c->code)
1057
continue;
1058
1059
/*
1060
* Allocate resource per core.
1061
*/
1062
pc = cpuc->per_core;
1063
if (!pc)
1064
break;
1065
c = &emptyconstraint;
1066
raw_spin_lock(&pc->lock);
1067
free_slot = -1;
1068
found = 0;
1069
for (i = 0; i < MAX_EXTRA_REGS; i++) {
1070
era = &pc->regs[i];
1071
if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
1072
/* Allow sharing same config */
1073
if (hwc->extra_config == era->extra_config) {
1074
era->ref++;
1075
cpuc->percore_used = 1;
1076
hwc->extra_alloc = 1;
1077
c = NULL;
1078
}
1079
/* else conflict */
1080
found = 1;
1081
break;
1082
} else if (era->ref == 0 && free_slot == -1)
1083
free_slot = i;
1084
}
1085
if (!found && free_slot != -1) {
1086
era = &pc->regs[free_slot];
1087
era->ref = 1;
1088
era->extra_reg = hwc->extra_reg;
1089
era->extra_config = hwc->extra_config;
1090
cpuc->percore_used = 1;
1091
hwc->extra_alloc = 1;
1092
c = NULL;
1093
}
1094
raw_spin_unlock(&pc->lock);
1095
return c;
1096
}
1097
1098
return NULL;
1099
}
1100
1101
static struct event_constraint *
1102
intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1103
{
1104
struct event_constraint *c;
1105
1106
c = intel_bts_constraints(event);
1107
if (c)
1108
return c;
1109
1110
c = intel_pebs_constraints(event);
1111
if (c)
1112
return c;
1113
1114
c = intel_percore_constraints(cpuc, event);
1115
if (c)
1116
return c;
1117
1118
return x86_get_event_constraints(cpuc, event);
1119
}
1120
1121
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1122
struct perf_event *event)
1123
{
1124
struct extra_reg *er;
1125
struct intel_percore *pc;
1126
struct er_account *era;
1127
struct hw_perf_event *hwc = &event->hw;
1128
int i, allref;
1129
1130
if (!cpuc->percore_used)
1131
return;
1132
1133
for (er = x86_pmu.extra_regs; er->msr; er++) {
1134
if (er->event != (hwc->config & er->config_mask))
1135
continue;
1136
1137
pc = cpuc->per_core;
1138
raw_spin_lock(&pc->lock);
1139
for (i = 0; i < MAX_EXTRA_REGS; i++) {
1140
era = &pc->regs[i];
1141
if (era->ref > 0 &&
1142
era->extra_config == hwc->extra_config &&
1143
era->extra_reg == er->msr) {
1144
era->ref--;
1145
hwc->extra_alloc = 0;
1146
break;
1147
}
1148
}
1149
allref = 0;
1150
for (i = 0; i < MAX_EXTRA_REGS; i++)
1151
allref += pc->regs[i].ref;
1152
if (allref == 0)
1153
cpuc->percore_used = 0;
1154
raw_spin_unlock(&pc->lock);
1155
break;
1156
}
1157
}
1158
1159
static int intel_pmu_hw_config(struct perf_event *event)
1160
{
1161
int ret = x86_pmu_hw_config(event);
1162
1163
if (ret)
1164
return ret;
1165
1166
if (event->attr.precise_ip &&
1167
(event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1168
/*
1169
* Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1170
* (0x003c) so that we can use it with PEBS.
1171
*
1172
* The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1173
* PEBS capable. However we can use INST_RETIRED.ANY_P
1174
* (0x00c0), which is a PEBS capable event, to get the same
1175
* count.
1176
*
1177
* INST_RETIRED.ANY_P counts the number of cycles that retires
1178
* CNTMASK instructions. By setting CNTMASK to a value (16)
1179
* larger than the maximum number of instructions that can be
1180
* retired per cycle (4) and then inverting the condition, we
1181
* count all cycles that retire 16 or less instructions, which
1182
* is every cycle.
1183
*
1184
* Thereby we gain a PEBS capable cycle counter.
1185
*/
1186
u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
1187
1188
alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1189
event->hw.config = alt_config;
1190
}
1191
1192
if (event->attr.type != PERF_TYPE_RAW)
1193
return 0;
1194
1195
if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
1196
return 0;
1197
1198
if (x86_pmu.version < 3)
1199
return -EINVAL;
1200
1201
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1202
return -EACCES;
1203
1204
event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
1205
1206
return 0;
1207
}
1208
1209
static __initconst const struct x86_pmu core_pmu = {
1210
.name = "core",
1211
.handle_irq = x86_pmu_handle_irq,
1212
.disable_all = x86_pmu_disable_all,
1213
.enable_all = x86_pmu_enable_all,
1214
.enable = x86_pmu_enable_event,
1215
.disable = x86_pmu_disable_event,
1216
.hw_config = x86_pmu_hw_config,
1217
.schedule_events = x86_schedule_events,
1218
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
1219
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
1220
.event_map = intel_pmu_event_map,
1221
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
1222
.apic = 1,
1223
/*
1224
* Intel PMCs cannot be accessed sanely above 32 bit width,
1225
* so we install an artificial 1<<31 period regardless of
1226
* the generic event period:
1227
*/
1228
.max_period = (1ULL << 31) - 1,
1229
.get_event_constraints = intel_get_event_constraints,
1230
.put_event_constraints = intel_put_event_constraints,
1231
.event_constraints = intel_core_event_constraints,
1232
};
1233
1234
static int intel_pmu_cpu_prepare(int cpu)
1235
{
1236
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1237
1238
if (!cpu_has_ht_siblings())
1239
return NOTIFY_OK;
1240
1241
cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
1242
GFP_KERNEL, cpu_to_node(cpu));
1243
if (!cpuc->per_core)
1244
return NOTIFY_BAD;
1245
1246
raw_spin_lock_init(&cpuc->per_core->lock);
1247
cpuc->per_core->core_id = -1;
1248
return NOTIFY_OK;
1249
}
1250
1251
static void intel_pmu_cpu_starting(int cpu)
1252
{
1253
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1254
int core_id = topology_core_id(cpu);
1255
int i;
1256
1257
init_debug_store_on_cpu(cpu);
1258
/*
1259
* Deal with CPUs that don't clear their LBRs on power-up.
1260
*/
1261
intel_pmu_lbr_reset();
1262
1263
if (!cpu_has_ht_siblings())
1264
return;
1265
1266
for_each_cpu(i, topology_thread_cpumask(cpu)) {
1267
struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
1268
1269
if (pc && pc->core_id == core_id) {
1270
kfree(cpuc->per_core);
1271
cpuc->per_core = pc;
1272
break;
1273
}
1274
}
1275
1276
cpuc->per_core->core_id = core_id;
1277
cpuc->per_core->refcnt++;
1278
}
1279
1280
static void intel_pmu_cpu_dying(int cpu)
1281
{
1282
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1283
struct intel_percore *pc = cpuc->per_core;
1284
1285
if (pc) {
1286
if (pc->core_id == -1 || --pc->refcnt == 0)
1287
kfree(pc);
1288
cpuc->per_core = NULL;
1289
}
1290
1291
fini_debug_store_on_cpu(cpu);
1292
}
1293
1294
static __initconst const struct x86_pmu intel_pmu = {
1295
.name = "Intel",
1296
.handle_irq = intel_pmu_handle_irq,
1297
.disable_all = intel_pmu_disable_all,
1298
.enable_all = intel_pmu_enable_all,
1299
.enable = intel_pmu_enable_event,
1300
.disable = intel_pmu_disable_event,
1301
.hw_config = intel_pmu_hw_config,
1302
.schedule_events = x86_schedule_events,
1303
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
1304
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
1305
.event_map = intel_pmu_event_map,
1306
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
1307
.apic = 1,
1308
/*
1309
* Intel PMCs cannot be accessed sanely above 32 bit width,
1310
* so we install an artificial 1<<31 period regardless of
1311
* the generic event period:
1312
*/
1313
.max_period = (1ULL << 31) - 1,
1314
.get_event_constraints = intel_get_event_constraints,
1315
.put_event_constraints = intel_put_event_constraints,
1316
1317
.cpu_prepare = intel_pmu_cpu_prepare,
1318
.cpu_starting = intel_pmu_cpu_starting,
1319
.cpu_dying = intel_pmu_cpu_dying,
1320
};
1321
1322
static void intel_clovertown_quirks(void)
1323
{
1324
/*
1325
* PEBS is unreliable due to:
1326
*
1327
* AJ67 - PEBS may experience CPL leaks
1328
* AJ68 - PEBS PMI may be delayed by one event
1329
* AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
1330
* AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
1331
*
1332
* AJ67 could be worked around by restricting the OS/USR flags.
1333
* AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
1334
*
1335
* AJ106 could possibly be worked around by not allowing LBR
1336
* usage from PEBS, including the fixup.
1337
* AJ68 could possibly be worked around by always programming
1338
* a pebs_event_reset[0] value and coping with the lost events.
1339
*
1340
* But taken together it might just make sense to not enable PEBS on
1341
* these chips.
1342
*/
1343
printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
1344
x86_pmu.pebs = 0;
1345
x86_pmu.pebs_constraints = NULL;
1346
}
1347
1348
static __init int intel_pmu_init(void)
1349
{
1350
union cpuid10_edx edx;
1351
union cpuid10_eax eax;
1352
unsigned int unused;
1353
unsigned int ebx;
1354
int version;
1355
1356
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1357
switch (boot_cpu_data.x86) {
1358
case 0x6:
1359
return p6_pmu_init();
1360
case 0xf:
1361
return p4_pmu_init();
1362
}
1363
return -ENODEV;
1364
}
1365
1366
/*
1367
* Check whether the Architectural PerfMon supports
1368
* Branch Misses Retired hw_event or not.
1369
*/
1370
cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1371
if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1372
return -ENODEV;
1373
1374
version = eax.split.version_id;
1375
if (version < 2)
1376
x86_pmu = core_pmu;
1377
else
1378
x86_pmu = intel_pmu;
1379
1380
x86_pmu.version = version;
1381
x86_pmu.num_counters = eax.split.num_counters;
1382
x86_pmu.cntval_bits = eax.split.bit_width;
1383
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
1384
1385
/*
1386
* Quirk: v2 perfmon does not report fixed-purpose events, so
1387
* assume at least 3 events:
1388
*/
1389
if (version > 1)
1390
x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
1391
1392
/*
1393
* v2 and above have a perf capabilities MSR
1394
*/
1395
if (version > 1) {
1396
u64 capabilities;
1397
1398
rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
1399
x86_pmu.intel_cap.capabilities = capabilities;
1400
}
1401
1402
intel_ds_init();
1403
1404
/*
1405
* Install the hw-cache-events table:
1406
*/
1407
switch (boot_cpu_data.x86_model) {
1408
case 14: /* 65 nm core solo/duo, "Yonah" */
1409
pr_cont("Core events, ");
1410
break;
1411
1412
case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1413
x86_pmu.quirks = intel_clovertown_quirks;
1414
case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1415
case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1416
case 29: /* six-core 45 nm xeon "Dunnington" */
1417
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
1418
sizeof(hw_cache_event_ids));
1419
1420
intel_pmu_lbr_init_core();
1421
1422
x86_pmu.event_constraints = intel_core2_event_constraints;
1423
x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
1424
pr_cont("Core2 events, ");
1425
break;
1426
1427
case 26: /* 45 nm nehalem, "Bloomfield" */
1428
case 30: /* 45 nm nehalem, "Lynnfield" */
1429
case 46: /* 45 nm nehalem-ex, "Beckton" */
1430
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1431
sizeof(hw_cache_event_ids));
1432
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1433
sizeof(hw_cache_extra_regs));
1434
1435
intel_pmu_lbr_init_nhm();
1436
1437
x86_pmu.event_constraints = intel_nehalem_event_constraints;
1438
x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
1439
x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1440
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1441
x86_pmu.extra_regs = intel_nehalem_extra_regs;
1442
1443
/* UOPS_ISSUED.STALLED_CYCLES */
1444
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
1445
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1446
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
1447
1448
if (ebx & 0x40) {
1449
/*
1450
* Erratum AAJ80 detected, we work it around by using
1451
* the BR_MISP_EXEC.ANY event. This will over-count
1452
* branch-misses, but it's still much better than the
1453
* architectural event which is often completely bogus:
1454
*/
1455
intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1456
1457
pr_cont("erratum AAJ80 worked around, ");
1458
}
1459
pr_cont("Nehalem events, ");
1460
break;
1461
1462
case 28: /* Atom */
1463
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
1464
sizeof(hw_cache_event_ids));
1465
1466
intel_pmu_lbr_init_atom();
1467
1468
x86_pmu.event_constraints = intel_gen_event_constraints;
1469
x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
1470
pr_cont("Atom events, ");
1471
break;
1472
1473
case 37: /* 32 nm nehalem, "Clarkdale" */
1474
case 44: /* 32 nm nehalem, "Gulftown" */
1475
case 47: /* 32 nm Xeon E7 */
1476
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
1477
sizeof(hw_cache_event_ids));
1478
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1479
sizeof(hw_cache_extra_regs));
1480
1481
intel_pmu_lbr_init_nhm();
1482
1483
x86_pmu.event_constraints = intel_westmere_event_constraints;
1484
x86_pmu.percore_constraints = intel_westmere_percore_constraints;
1485
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1486
x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1487
x86_pmu.extra_regs = intel_westmere_extra_regs;
1488
1489
/* UOPS_ISSUED.STALLED_CYCLES */
1490
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
1491
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1492
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
1493
1494
pr_cont("Westmere events, ");
1495
break;
1496
1497
case 42: /* SandyBridge */
1498
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1499
sizeof(hw_cache_event_ids));
1500
1501
intel_pmu_lbr_init_nhm();
1502
1503
x86_pmu.event_constraints = intel_snb_event_constraints;
1504
x86_pmu.pebs_constraints = intel_snb_pebs_events;
1505
1506
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
1507
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
1508
/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
1509
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
1510
1511
pr_cont("SandyBridge events, ");
1512
break;
1513
1514
default:
1515
/*
1516
* default constraints for v2 and up
1517
*/
1518
x86_pmu.event_constraints = intel_gen_event_constraints;
1519
pr_cont("generic architected perfmon, ");
1520
}
1521
return 0;
1522
}
1523
1524
#else /* CONFIG_CPU_SUP_INTEL */
1525
1526
static int intel_pmu_init(void)
1527
{
1528
return 0;
1529
}
1530
1531
#endif /* CONFIG_CPU_SUP_INTEL */
1532
1533