CoCalc -- core.c

GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/events/amd/core.c
²⁶⁵³⁵ views
1
// SPDX-License-Identifier: GPL-2.0-only
2
#include <linux/perf_event.h>
3
#include <linux/jump_label.h>
4
#include <linux/export.h>
5
#include <linux/types.h>
6
#include <linux/init.h>
7
#include <linux/slab.h>
8
#include <linux/delay.h>
9
#include <linux/jiffies.h>
10
#include <asm/apicdef.h>
11
#include <asm/apic.h>
12
#include <asm/msr.h>
13
#include <asm/nmi.h>
14

15
#include "../perf_event.h"
16

17
static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
18
static unsigned long perf_nmi_window;
19

20
/* AMD Event 0xFFF: Merge.  Used with Large Increment per Cycle events */
21
#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
22
#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
23

24
/* PMC Enable and Overflow bits for PerfCntrGlobal* registers */
25
static u64 amd_pmu_global_cntr_mask __read_mostly;
26

27
static __initconst const u64 amd_hw_cache_event_ids
28
				[PERF_COUNT_HW_CACHE_MAX]
29
				[PERF_COUNT_HW_CACHE_OP_MAX]
30
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
31
{
32
 [ C(L1D) ] = {
33
	[ C(OP_READ) ] = {
34
		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
35
		[ C(RESULT_MISS)   ] = 0x0141, /* Data Cache Misses          */
36
	},
37
	[ C(OP_WRITE) ] = {
38
		[ C(RESULT_ACCESS) ] = 0,
39
		[ C(RESULT_MISS)   ] = 0,
40
	},
41
	[ C(OP_PREFETCH) ] = {
42
		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
43
		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
44
	},
45
 },
46
 [ C(L1I ) ] = {
47
	[ C(OP_READ) ] = {
48
		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
49
		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
50
	},
51
	[ C(OP_WRITE) ] = {
52
		[ C(RESULT_ACCESS) ] = -1,
53
		[ C(RESULT_MISS)   ] = -1,
54
	},
55
	[ C(OP_PREFETCH) ] = {
56
		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
57
		[ C(RESULT_MISS)   ] = 0,
58
	},
59
 },
60
 [ C(LL  ) ] = {
61
	[ C(OP_READ) ] = {
62
		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
63
		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
64
	},
65
	[ C(OP_WRITE) ] = {
66
		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
67
		[ C(RESULT_MISS)   ] = 0,
68
	},
69
	[ C(OP_PREFETCH) ] = {
70
		[ C(RESULT_ACCESS) ] = 0,
71
		[ C(RESULT_MISS)   ] = 0,
72
	},
73
 },
74
 [ C(DTLB) ] = {
75
	[ C(OP_READ) ] = {
76
		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
77
		[ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
78
	},
79
	[ C(OP_WRITE) ] = {
80
		[ C(RESULT_ACCESS) ] = 0,
81
		[ C(RESULT_MISS)   ] = 0,
82
	},
83
	[ C(OP_PREFETCH) ] = {
84
		[ C(RESULT_ACCESS) ] = 0,
85
		[ C(RESULT_MISS)   ] = 0,
86
	},
87
 },
88
 [ C(ITLB) ] = {
89
	[ C(OP_READ) ] = {
90
		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
91
		[ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
92
	},
93
	[ C(OP_WRITE) ] = {
94
		[ C(RESULT_ACCESS) ] = -1,
95
		[ C(RESULT_MISS)   ] = -1,
96
	},
97
	[ C(OP_PREFETCH) ] = {
98
		[ C(RESULT_ACCESS) ] = -1,
99
		[ C(RESULT_MISS)   ] = -1,
100
	},
101
 },
102
 [ C(BPU ) ] = {
103
	[ C(OP_READ) ] = {
104
		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
105
		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
106
	},
107
	[ C(OP_WRITE) ] = {
108
		[ C(RESULT_ACCESS) ] = -1,
109
		[ C(RESULT_MISS)   ] = -1,
110
	},
111
	[ C(OP_PREFETCH) ] = {
112
		[ C(RESULT_ACCESS) ] = -1,
113
		[ C(RESULT_MISS)   ] = -1,
114
	},
115
 },
116
 [ C(NODE) ] = {
117
	[ C(OP_READ) ] = {
118
		[ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
119
		[ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */
120
	},
121
	[ C(OP_WRITE) ] = {
122
		[ C(RESULT_ACCESS) ] = -1,
123
		[ C(RESULT_MISS)   ] = -1,
124
	},
125
	[ C(OP_PREFETCH) ] = {
126
		[ C(RESULT_ACCESS) ] = -1,
127
		[ C(RESULT_MISS)   ] = -1,
128
	},
129
 },
130
};
131

132
static __initconst const u64 amd_hw_cache_event_ids_f17h
133
				[PERF_COUNT_HW_CACHE_MAX]
134
				[PERF_COUNT_HW_CACHE_OP_MAX]
135
				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
136
[C(L1D)] = {
137
	[C(OP_READ)] = {
138
		[C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */
139
		[C(RESULT_MISS)]   = 0xc860, /* L2$ access from DC Miss */
140
	},
141
	[C(OP_WRITE)] = {
142
		[C(RESULT_ACCESS)] = 0,
143
		[C(RESULT_MISS)]   = 0,
144
	},
145
	[C(OP_PREFETCH)] = {
146
		[C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */
147
		[C(RESULT_MISS)]   = 0,
148
	},
149
},
150
[C(L1I)] = {
151
	[C(OP_READ)] = {
152
		[C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches  */
153
		[C(RESULT_MISS)]   = 0x0081, /* Instruction cache misses   */
154
	},
155
	[C(OP_WRITE)] = {
156
		[C(RESULT_ACCESS)] = -1,
157
		[C(RESULT_MISS)]   = -1,
158
	},
159
	[C(OP_PREFETCH)] = {
160
		[C(RESULT_ACCESS)] = 0,
161
		[C(RESULT_MISS)]   = 0,
162
	},
163
},
164
[C(LL)] = {
165
	[C(OP_READ)] = {
166
		[C(RESULT_ACCESS)] = 0,
167
		[C(RESULT_MISS)]   = 0,
168
	},
169
	[C(OP_WRITE)] = {
170
		[C(RESULT_ACCESS)] = 0,
171
		[C(RESULT_MISS)]   = 0,
172
	},
173
	[C(OP_PREFETCH)] = {
174
		[C(RESULT_ACCESS)] = 0,
175
		[C(RESULT_MISS)]   = 0,
176
	},
177
},
178
[C(DTLB)] = {
179
	[C(OP_READ)] = {
180
		[C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */
181
		[C(RESULT_MISS)]   = 0xf045, /* L2 DTLB misses (PT walks) */
182
	},
183
	[C(OP_WRITE)] = {
184
		[C(RESULT_ACCESS)] = 0,
185
		[C(RESULT_MISS)]   = 0,
186
	},
187
	[C(OP_PREFETCH)] = {
188
		[C(RESULT_ACCESS)] = 0,
189
		[C(RESULT_MISS)]   = 0,
190
	},
191
},
192
[C(ITLB)] = {
193
	[C(OP_READ)] = {
194
		[C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */
195
		[C(RESULT_MISS)]   = 0xff85, /* L1 ITLB misses, L2 misses */
196
	},
197
	[C(OP_WRITE)] = {
198
		[C(RESULT_ACCESS)] = -1,
199
		[C(RESULT_MISS)]   = -1,
200
	},
201
	[C(OP_PREFETCH)] = {
202
		[C(RESULT_ACCESS)] = -1,
203
		[C(RESULT_MISS)]   = -1,
204
	},
205
},
206
[C(BPU)] = {
207
	[C(OP_READ)] = {
208
		[C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr.      */
209
		[C(RESULT_MISS)]   = 0x00c3, /* Retired Mispredicted BI    */
210
	},
211
	[C(OP_WRITE)] = {
212
		[C(RESULT_ACCESS)] = -1,
213
		[C(RESULT_MISS)]   = -1,
214
	},
215
	[C(OP_PREFETCH)] = {
216
		[C(RESULT_ACCESS)] = -1,
217
		[C(RESULT_MISS)]   = -1,
218
	},
219
},
220
[C(NODE)] = {
221
	[C(OP_READ)] = {
222
		[C(RESULT_ACCESS)] = 0,
223
		[C(RESULT_MISS)]   = 0,
224
	},
225
	[C(OP_WRITE)] = {
226
		[C(RESULT_ACCESS)] = -1,
227
		[C(RESULT_MISS)]   = -1,
228
	},
229
	[C(OP_PREFETCH)] = {
230
		[C(RESULT_ACCESS)] = -1,
231
		[C(RESULT_MISS)]   = -1,
232
	},
233
},
234
};
235

236
/*
237
 * AMD Performance Monitor K7 and later, up to and including Family 16h:
238
 */
239
static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
240
{
241
	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
242
	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
243
	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x077d,
244
	[PERF_COUNT_HW_CACHE_MISSES]		= 0x077e,
245
	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
246
	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
247
	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00d0, /* "Decoder empty" event */
248
	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x00d1, /* "Dispatch stalls" event */
249
};
250

251
/*
252
 * AMD Performance Monitor Family 17h and later:
253
 */
254
static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
255
{
256
	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
257
	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
258
	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
259
	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0964,
260
	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
261
	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
262
	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x0287,
263
	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x0187,
264
};
265

266
static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
267
{
268
	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
269
	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
270
	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
271
	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0964,
272
	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
273
	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
274
	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00a9,
275
};
276

277
static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] =
278
{
279
	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
280
	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
281
	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
282
	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0964,
283
	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
284
	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
285
	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00a9,
286
	[PERF_COUNT_HW_REF_CPU_CYCLES]		= 0x100000120,
287
};
288

289
static u64 amd_pmu_event_map(int hw_event)
290
{
291
	if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a)
292
		return amd_zen4_perfmon_event_map[hw_event];
293

294
	if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
295
		return amd_zen2_perfmon_event_map[hw_event];
296

297
	if (cpu_feature_enabled(X86_FEATURE_ZEN1))
298
		return amd_zen1_perfmon_event_map[hw_event];
299

300
	return amd_perfmon_event_map[hw_event];
301
}
302

303
/*
304
 * Previously calculated offsets
305
 */
306
static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
307
static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
308

309
/*
310
 * Legacy CPUs:
311
 *   4 counters starting at 0xc0010000 each offset by 1
312
 *
313
 * CPUs with core performance counter extensions:
314
 *   6 counters starting at 0xc0010200 each offset by 2
315
 */
316
static inline int amd_pmu_addr_offset(int index, bool eventsel)
317
{
318
	int offset;
319

320
	if (!index)
321
		return index;
322

323
	if (eventsel)
324
		offset = event_offsets[index];
325
	else
326
		offset = count_offsets[index];
327

328
	if (offset)
329
		return offset;
330

331
	if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
332
		offset = index;
333
	else
334
		offset = index << 1;
335

336
	if (eventsel)
337
		event_offsets[index] = offset;
338
	else
339
		count_offsets[index] = offset;
340

341
	return offset;
342
}
343

344
/*
345
 * AMD64 events are detected based on their event codes.
346
 */
347
static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
348
{
349
	return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
350
}
351

352
static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
353
{
354
	if (!(x86_pmu.flags & PMU_FL_PAIR))
355
		return false;
356

357
	switch (amd_get_event_code(hwc)) {
358
	case 0x003:	return true;	/* Retired SSE/AVX FLOPs */
359
	default:	return false;
360
	}
361
}
362

363
DEFINE_STATIC_CALL_RET0(amd_pmu_branch_hw_config, *x86_pmu.hw_config);
364

365
static int amd_core_hw_config(struct perf_event *event)
366
{
367
	if (event->attr.exclude_host && event->attr.exclude_guest)
368
		/*
369
		 * When HO == GO == 1 the hardware treats that as GO == HO == 0
370
		 * and will count in both modes. We don't want to count in that
371
		 * case so we emulate no-counting by setting US = OS = 0.
372
		 */
373
		event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
374
				      ARCH_PERFMON_EVENTSEL_OS);
375
	else if (event->attr.exclude_host)
376
		event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
377
	else if (event->attr.exclude_guest)
378
		event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
379

380
	if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
381
		event->hw.flags |= PERF_X86_EVENT_PAIR;
382

383
	if (has_branch_stack(event))
384
		return static_call(amd_pmu_branch_hw_config)(event);
385

386
	return 0;
387
}
388

389
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
390
{
391
	return (hwc->config & 0xe0) == 0xe0;
392
}
393

394
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
395
{
396
	struct amd_nb *nb = cpuc->amd_nb;
397

398
	return nb && nb->nb_id != -1;
399
}
400

401
static int amd_pmu_hw_config(struct perf_event *event)
402
{
403
	int ret;
404

405
	/* pass precise event sampling to ibs: */
406
	if (event->attr.precise_ip && get_ibs_caps())
407
		return forward_event_to_ibs(event);
408

409
	if (has_branch_stack(event) && !x86_pmu.lbr_nr)
410
		return -EOPNOTSUPP;
411

412
	ret = x86_pmu_hw_config(event);
413
	if (ret)
414
		return ret;
415

416
	if (event->attr.type == PERF_TYPE_RAW)
417
		event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
418

419
	return amd_core_hw_config(event);
420
}
421

422
static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
423
					   struct perf_event *event)
424
{
425
	struct amd_nb *nb = cpuc->amd_nb;
426
	int i;
427

428
	/*
429
	 * need to scan whole list because event may not have
430
	 * been assigned during scheduling
431
	 *
432
	 * no race condition possible because event can only
433
	 * be removed on one CPU at a time AND PMU is disabled
434
	 * when we come here
435
	 */
436
	for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
437
		struct perf_event *tmp = event;
438

439
		if (try_cmpxchg(nb->owners + i, &tmp, NULL))
440
			break;
441
	}
442
}
443

444
 /*
445
  * AMD64 NorthBridge events need special treatment because
446
  * counter access needs to be synchronized across all cores
447
  * of a package. Refer to BKDG section 3.12
448
  *
449
  * NB events are events measuring L3 cache, Hypertransport
450
  * traffic. They are identified by an event code >= 0xe00.
451
  * They measure events on the NorthBride which is shared
452
  * by all cores on a package. NB events are counted on a
453
  * shared set of counters. When a NB event is programmed
454
  * in a counter, the data actually comes from a shared
455
  * counter. Thus, access to those counters needs to be
456
  * synchronized.
457
  *
458
  * We implement the synchronization such that no two cores
459
  * can be measuring NB events using the same counters. Thus,
460
  * we maintain a per-NB allocation table. The available slot
461
  * is propagated using the event_constraint structure.
462
  *
463
  * We provide only one choice for each NB event based on
464
  * the fact that only NB events have restrictions. Consequently,
465
  * if a counter is available, there is a guarantee the NB event
466
  * will be assigned to it. If no slot is available, an empty
467
  * constraint is returned and scheduling will eventually fail
468
  * for this event.
469
  *
470
  * Note that all cores attached the same NB compete for the same
471
  * counters to host NB events, this is why we use atomic ops. Some
472
  * multi-chip CPUs may have more than one NB.
473
  *
474
  * Given that resources are allocated (cmpxchg), they must be
475
  * eventually freed for others to use. This is accomplished by
476
  * calling __amd_put_nb_event_constraints()
477
  *
478
  * Non NB events are not impacted by this restriction.
479
  */
480
static struct event_constraint *
481
__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
482
			       struct event_constraint *c)
483
{
484
	struct hw_perf_event *hwc = &event->hw;
485
	struct amd_nb *nb = cpuc->amd_nb;
486
	struct perf_event *old;
487
	int idx, new = -1;
488

489
	if (!c)
490
		c = &unconstrained;
491

492
	if (cpuc->is_fake)
493
		return c;
494

495
	/*
496
	 * detect if already present, if so reuse
497
	 *
498
	 * cannot merge with actual allocation
499
	 * because of possible holes
500
	 *
501
	 * event can already be present yet not assigned (in hwc->idx)
502
	 * because of successive calls to x86_schedule_events() from
503
	 * hw_perf_group_sched_in() without hw_perf_enable()
504
	 */
505
	for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
506
		if (new == -1 || hwc->idx == idx)
507
			/* assign free slot, prefer hwc->idx */
508
			old = cmpxchg(nb->owners + idx, NULL, event);
509
		else if (nb->owners[idx] == event)
510
			/* event already present */
511
			old = event;
512
		else
513
			continue;
514

515
		if (old && old != event)
516
			continue;
517

518
		/* reassign to this slot */
519
		if (new != -1)
520
			cmpxchg(nb->owners + new, event, NULL);
521
		new = idx;
522

523
		/* already present, reuse */
524
		if (old == event)
525
			break;
526
	}
527

528
	if (new == -1)
529
		return &emptyconstraint;
530

531
	return &nb->event_constraints[new];
532
}
533

534
static struct amd_nb *amd_alloc_nb(int cpu)
535
{
536
	struct amd_nb *nb;
537
	int i;
538

539
	nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
540
	if (!nb)
541
		return NULL;
542

543
	nb->nb_id = -1;
544

545
	/*
546
	 * initialize all possible NB constraints
547
	 */
548
	for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
549
		__set_bit(i, nb->event_constraints[i].idxmsk);
550
		nb->event_constraints[i].weight = 1;
551
	}
552
	return nb;
553
}
554

555
typedef void (amd_pmu_branch_reset_t)(void);
556
DEFINE_STATIC_CALL_NULL(amd_pmu_branch_reset, amd_pmu_branch_reset_t);
557

558
static void amd_pmu_cpu_reset(int cpu)
559
{
560
	if (x86_pmu.lbr_nr)
561
		static_call(amd_pmu_branch_reset)();
562

563
	if (x86_pmu.version < 2)
564
		return;
565

566
	/* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */
567
	wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
568

569
	/*
570
	 * Clear freeze and overflow bits i.e. PerfCntrGLobalStatus.LbrFreeze
571
	 * and PerfCntrGLobalStatus.PerfCntrOvfl
572
	 */
573
	wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
574
	       GLOBAL_STATUS_LBRS_FROZEN | amd_pmu_global_cntr_mask);
575
}
576

577
static int amd_pmu_cpu_prepare(int cpu)
578
{
579
	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
580

581
	cpuc->lbr_sel = kzalloc_node(sizeof(struct er_account), GFP_KERNEL,
582
				     cpu_to_node(cpu));
583
	if (!cpuc->lbr_sel)
584
		return -ENOMEM;
585

586
	WARN_ON_ONCE(cpuc->amd_nb);
587

588
	if (!x86_pmu.amd_nb_constraints)
589
		return 0;
590

591
	cpuc->amd_nb = amd_alloc_nb(cpu);
592
	if (cpuc->amd_nb)
593
		return 0;
594

595
	kfree(cpuc->lbr_sel);
596
	cpuc->lbr_sel = NULL;
597

598
	return -ENOMEM;
599
}
600

601
static void amd_pmu_cpu_starting(int cpu)
602
{
603
	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
604
	void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
605
	struct amd_nb *nb;
606
	int i, nb_id;
607

608
	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
609
	amd_pmu_cpu_reset(cpu);
610

611
	if (!x86_pmu.amd_nb_constraints)
612
		return;
613

614
	nb_id = topology_amd_node_id(cpu);
615
	WARN_ON_ONCE(nb_id == BAD_APICID);
616

617
	for_each_online_cpu(i) {
618
		nb = per_cpu(cpu_hw_events, i).amd_nb;
619
		if (WARN_ON_ONCE(!nb))
620
			continue;
621

622
		if (nb->nb_id == nb_id) {
623
			*onln = cpuc->amd_nb;
624
			cpuc->amd_nb = nb;
625
			break;
626
		}
627
	}
628

629
	cpuc->amd_nb->nb_id = nb_id;
630
	cpuc->amd_nb->refcnt++;
631
}
632

633
static void amd_pmu_cpu_dead(int cpu)
634
{
635
	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
636

637
	kfree(cpuhw->lbr_sel);
638
	cpuhw->lbr_sel = NULL;
639

640
	if (!x86_pmu.amd_nb_constraints)
641
		return;
642

643
	if (cpuhw->amd_nb) {
644
		struct amd_nb *nb = cpuhw->amd_nb;
645

646
		if (nb->nb_id == -1 || --nb->refcnt == 0)
647
			kfree(nb);
648

649
		cpuhw->amd_nb = NULL;
650
	}
651
}
652

653
static __always_inline void amd_pmu_set_global_ctl(u64 ctl)
654
{
655
	wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
656
}
657

658
static inline u64 amd_pmu_get_global_status(void)
659
{
660
	u64 status;
661

662
	/* PerfCntrGlobalStatus is read-only */
663
	rdmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
664

665
	return status;
666
}
667

668
static inline void amd_pmu_ack_global_status(u64 status)
669
{
670
	/*
671
	 * PerfCntrGlobalStatus is read-only but an overflow acknowledgment
672
	 * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr
673
	 * clears the same bit in PerfCntrGlobalStatus
674
	 */
675

676
	wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
677
}
678

679
static bool amd_pmu_test_overflow_topbit(int idx)
680
{
681
	u64 counter;
682

683
	rdmsrq(x86_pmu_event_addr(idx), counter);
684

685
	return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
686
}
687

688
static bool amd_pmu_test_overflow_status(int idx)
689
{
690
	return amd_pmu_get_global_status() & BIT_ULL(idx);
691
}
692

693
DEFINE_STATIC_CALL(amd_pmu_test_overflow, amd_pmu_test_overflow_topbit);
694

695
/*
696
 * When a PMC counter overflows, an NMI is used to process the event and
697
 * reset the counter. NMI latency can result in the counter being updated
698
 * before the NMI can run, which can result in what appear to be spurious
699
 * NMIs. This function is intended to wait for the NMI to run and reset
700
 * the counter to avoid possible unhandled NMI messages.
701
 */
702
#define OVERFLOW_WAIT_COUNT	50
703

704
static void amd_pmu_wait_on_overflow(int idx)
705
{
706
	unsigned int i;
707

708
	/*
709
	 * Wait for the counter to be reset if it has overflowed. This loop
710
	 * should exit very, very quickly, but just in case, don't wait
711
	 * forever...
712
	 */
713
	for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
714
		if (!static_call(amd_pmu_test_overflow)(idx))
715
			break;
716

717
		/* Might be in IRQ context, so can't sleep */
718
		udelay(1);
719
	}
720
}
721

722
static void amd_pmu_check_overflow(void)
723
{
724
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
725
	int idx;
726

727
	/*
728
	 * This shouldn't be called from NMI context, but add a safeguard here
729
	 * to return, since if we're in NMI context we can't wait for an NMI
730
	 * to reset an overflowed counter value.
731
	 */
732
	if (in_nmi())
733
		return;
734

735
	/*
736
	 * Check each counter for overflow and wait for it to be reset by the
737
	 * NMI if it has overflowed. This relies on the fact that all active
738
	 * counters are always enabled when this function is called and
739
	 * ARCH_PERFMON_EVENTSEL_INT is always set.
740
	 */
741
	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
742
		if (!test_bit(idx, cpuc->active_mask))
743
			continue;
744

745
		amd_pmu_wait_on_overflow(idx);
746
	}
747
}
748

749
static void amd_pmu_enable_event(struct perf_event *event)
750
{
751
	x86_pmu_enable_event(event);
752
}
753

754
static void amd_pmu_enable_all(int added)
755
{
756
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
757
	int idx;
758

759
	amd_brs_enable_all();
760

761
	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
762
		/* only activate events which are marked as active */
763
		if (!test_bit(idx, cpuc->active_mask))
764
			continue;
765

766
		amd_pmu_enable_event(cpuc->events[idx]);
767
	}
768
}
769

770
static void amd_pmu_v2_enable_event(struct perf_event *event)
771
{
772
	struct hw_perf_event *hwc = &event->hw;
773

774
	/*
775
	 * Testing cpu_hw_events.enabled should be skipped in this case unlike
776
	 * in x86_pmu_enable_event().
777
	 *
778
	 * Since cpu_hw_events.enabled is set only after returning from
779
	 * x86_pmu_start(), the PMCs must be programmed and kept ready.
780
	 * Counting starts only after x86_pmu_enable_all() is called.
781
	 */
782
	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
783
}
784

785
static __always_inline void amd_pmu_core_enable_all(void)
786
{
787
	amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask);
788
}
789

790
static void amd_pmu_v2_enable_all(int added)
791
{
792
	amd_pmu_lbr_enable_all();
793
	amd_pmu_core_enable_all();
794
}
795

796
static void amd_pmu_disable_event(struct perf_event *event)
797
{
798
	x86_pmu_disable_event(event);
799

800
	/*
801
	 * This can be called from NMI context (via x86_pmu_stop). The counter
802
	 * may have overflowed, but either way, we'll never see it get reset
803
	 * by the NMI if we're already in the NMI. And the NMI latency support
804
	 * below will take care of any pending NMI that might have been
805
	 * generated by the overflow.
806
	 */
807
	if (in_nmi())
808
		return;
809

810
	amd_pmu_wait_on_overflow(event->hw.idx);
811
}
812

813
static void amd_pmu_disable_all(void)
814
{
815
	amd_brs_disable_all();
816
	x86_pmu_disable_all();
817
	amd_pmu_check_overflow();
818
}
819

820
static __always_inline void amd_pmu_core_disable_all(void)
821
{
822
	amd_pmu_set_global_ctl(0);
823
}
824

825
static void amd_pmu_v2_disable_all(void)
826
{
827
	amd_pmu_core_disable_all();
828
	amd_pmu_lbr_disable_all();
829
	amd_pmu_check_overflow();
830
}
831

832
DEFINE_STATIC_CALL_NULL(amd_pmu_branch_add, *x86_pmu.add);
833

834
static void amd_pmu_add_event(struct perf_event *event)
835
{
836
	if (needs_branch_stack(event))
837
		static_call(amd_pmu_branch_add)(event);
838
}
839

840
DEFINE_STATIC_CALL_NULL(amd_pmu_branch_del, *x86_pmu.del);
841

842
static void amd_pmu_del_event(struct perf_event *event)
843
{
844
	if (needs_branch_stack(event))
845
		static_call(amd_pmu_branch_del)(event);
846
}
847

848
/*
849
 * Because of NMI latency, if multiple PMC counters are active or other sources
850
 * of NMIs are received, the perf NMI handler can handle one or more overflowed
851
 * PMC counters outside of the NMI associated with the PMC overflow. If the NMI
852
 * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel
853
 * back-to-back NMI support won't be active. This PMC handler needs to take into
854
 * account that this can occur, otherwise this could result in unknown NMI
855
 * messages being issued. Examples of this is PMC overflow while in the NMI
856
 * handler when multiple PMCs are active or PMC overflow while handling some
857
 * other source of an NMI.
858
 *
859
 * Attempt to mitigate this by creating an NMI window in which un-handled NMIs
860
 * received during this window will be claimed. This prevents extending the
861
 * window past when it is possible that latent NMIs should be received. The
862
 * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has
863
 * handled a counter. When an un-handled NMI is received, it will be claimed
864
 * only if arriving within that window.
865
 */
866
static inline int amd_pmu_adjust_nmi_window(int handled)
867
{
868
	/*
869
	 * If a counter was handled, record a timestamp such that un-handled
870
	 * NMIs will be claimed if arriving within that window.
871
	 */
872
	if (handled) {
873
		this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window);
874

875
		return handled;
876
	}
877

878
	if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp)))
879
		return NMI_DONE;
880

881
	return NMI_HANDLED;
882
}
883

884
static int amd_pmu_handle_irq(struct pt_regs *regs)
885
{
886
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
887
	int handled;
888
	int pmu_enabled;
889

890
	/*
891
	 * Save the PMU state.
892
	 * It needs to be restored when leaving the handler.
893
	 */
894
	pmu_enabled = cpuc->enabled;
895
	cpuc->enabled = 0;
896

897
	amd_brs_disable_all();
898

899
	/* Drain BRS is in use (could be inactive) */
900
	if (cpuc->lbr_users)
901
		amd_brs_drain();
902

903
	/* Process any counter overflows */
904
	handled = x86_pmu_handle_irq(regs);
905

906
	cpuc->enabled = pmu_enabled;
907
	if (pmu_enabled)
908
		amd_brs_enable_all();
909

910
	return amd_pmu_adjust_nmi_window(handled);
911
}
912

913
/*
914
 * AMD-specific callback invoked through perf_snapshot_branch_stack static
915
 * call, defined in include/linux/perf_event.h. See its definition for API
916
 * details. It's up to caller to provide enough space in *entries* to fit all
917
 * LBR records, otherwise returned result will be truncated to *cnt* entries.
918
 */
919
static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
920
{
921
	struct cpu_hw_events *cpuc;
922
	unsigned long flags;
923

924
	/*
925
	 * The sequence of steps to freeze LBR should be completely inlined
926
	 * and contain no branches to minimize contamination of LBR snapshot
927
	 */
928
	local_irq_save(flags);
929
	amd_pmu_core_disable_all();
930
	__amd_pmu_lbr_disable();
931

932
	cpuc = this_cpu_ptr(&cpu_hw_events);
933

934
	amd_pmu_lbr_read();
935
	cnt = min(cnt, x86_pmu.lbr_nr);
936
	memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
937

938
	amd_pmu_v2_enable_all(0);
939
	local_irq_restore(flags);
940

941
	return cnt;
942
}
943

944
static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
945
{
946
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
947
	static atomic64_t status_warned = ATOMIC64_INIT(0);
948
	u64 reserved, status, mask, new_bits, prev_bits;
949
	struct perf_sample_data data;
950
	struct hw_perf_event *hwc;
951
	struct perf_event *event;
952
	int handled = 0, idx;
953
	bool pmu_enabled;
954

955
	/*
956
	 * Save the PMU state as it needs to be restored when leaving the
957
	 * handler
958
	 */
959
	pmu_enabled = cpuc->enabled;
960
	cpuc->enabled = 0;
961

962
	/* Stop counting but do not disable LBR */
963
	amd_pmu_core_disable_all();
964

965
	status = amd_pmu_get_global_status();
966

967
	/* Check if any overflows are pending */
968
	if (!status)
969
		goto done;
970

971
	/* Read branch records */
972
	if (x86_pmu.lbr_nr) {
973
		amd_pmu_lbr_read();
974
		status &= ~GLOBAL_STATUS_LBRS_FROZEN;
975
	}
976

977
	reserved = status & ~amd_pmu_global_cntr_mask;
978
	if (reserved)
979
		pr_warn_once("Reserved PerfCntrGlobalStatus bits are set (0x%llx), please consider updating microcode\n",
980
			     reserved);
981

982
	/* Clear any reserved bits set by buggy microcode */
983
	status &= amd_pmu_global_cntr_mask;
984

985
	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
986
		if (!test_bit(idx, cpuc->active_mask))
987
			continue;
988

989
		event = cpuc->events[idx];
990
		hwc = &event->hw;
991
		x86_perf_event_update(event);
992
		mask = BIT_ULL(idx);
993

994
		if (!(status & mask))
995
			continue;
996

997
		/* Event overflow */
998
		handled++;
999
		status &= ~mask;
1000
		perf_sample_data_init(&data, 0, hwc->last_period);
1001

1002
		if (!x86_perf_event_set_period(event))
1003
			continue;
1004

1005
		perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
1006

1007
		perf_event_overflow(event, &data, regs);
1008
	}
1009

1010
	/*
1011
	 * It should never be the case that some overflows are not handled as
1012
	 * the corresponding PMCs are expected to be inactive according to the
1013
	 * active_mask
1014
	 */
1015
	if (status > 0) {
1016
		prev_bits = atomic64_fetch_or(status, &status_warned);
1017
		// A new bit was set for the very first time.
1018
		new_bits = status & ~prev_bits;
1019
		WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits);
1020
	}
1021

1022
	/* Clear overflow and freeze bits */
1023
	amd_pmu_ack_global_status(~status);
1024

1025
	/*
1026
	 * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT
1027
	 * PMI entry is not set by the local APIC when a PMC overflow occurs
1028
	 */
1029
	inc_irq_stat(apic_perf_irqs);
1030

1031
done:
1032
	cpuc->enabled = pmu_enabled;
1033

1034
	/* Resume counting only if PMU is active */
1035
	if (pmu_enabled)
1036
		amd_pmu_core_enable_all();
1037

1038
	return amd_pmu_adjust_nmi_window(handled);
1039
}
1040

1041
static struct event_constraint *
1042
amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
1043
			  struct perf_event *event)
1044
{
1045
	/*
1046
	 * if not NB event or no NB, then no constraints
1047
	 */
1048
	if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
1049
		return &unconstrained;
1050

1051
	return __amd_get_nb_event_constraints(cpuc, event, NULL);
1052
}
1053

1054
static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
1055
				      struct perf_event *event)
1056
{
1057
	if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
1058
		__amd_put_nb_event_constraints(cpuc, event);
1059
}
1060

1061
PMU_FORMAT_ATTR(event,	"config:0-7,32-35");
1062
PMU_FORMAT_ATTR(umask,	"config:8-15"	);
1063
PMU_FORMAT_ATTR(edge,	"config:18"	);
1064
PMU_FORMAT_ATTR(inv,	"config:23"	);
1065
PMU_FORMAT_ATTR(cmask,	"config:24-31"	);
1066

1067
static struct attribute *amd_format_attr[] = {
1068
	&format_attr_event.attr,
1069
	&format_attr_umask.attr,
1070
	&format_attr_edge.attr,
1071
	&format_attr_inv.attr,
1072
	&format_attr_cmask.attr,
1073
	NULL,
1074
};
1075

1076
/* AMD Family 15h */
1077

1078
#define AMD_EVENT_TYPE_MASK	0x000000F0ULL
1079

1080
#define AMD_EVENT_FP		0x00000000ULL ... 0x00000010ULL
1081
#define AMD_EVENT_LS		0x00000020ULL ... 0x00000030ULL
1082
#define AMD_EVENT_DC		0x00000040ULL ... 0x00000050ULL
1083
#define AMD_EVENT_CU		0x00000060ULL ... 0x00000070ULL
1084
#define AMD_EVENT_IC_DE		0x00000080ULL ... 0x00000090ULL
1085
#define AMD_EVENT_EX_LS		0x000000C0ULL
1086
#define AMD_EVENT_DE		0x000000D0ULL
1087
#define AMD_EVENT_NB		0x000000E0ULL ... 0x000000F0ULL
1088

1089
/*
1090
 * AMD family 15h event code/PMC mappings:
1091
 *
1092
 * type = event_code & 0x0F0:
1093
 *
1094
 * 0x000	FP	PERF_CTL[5:3]
1095
 * 0x010	FP	PERF_CTL[5:3]
1096
 * 0x020	LS	PERF_CTL[5:0]
1097
 * 0x030	LS	PERF_CTL[5:0]
1098
 * 0x040	DC	PERF_CTL[5:0]
1099
 * 0x050	DC	PERF_CTL[5:0]
1100
 * 0x060	CU	PERF_CTL[2:0]
1101
 * 0x070	CU	PERF_CTL[2:0]
1102
 * 0x080	IC/DE	PERF_CTL[2:0]
1103
 * 0x090	IC/DE	PERF_CTL[2:0]
1104
 * 0x0A0	---
1105
 * 0x0B0	---
1106
 * 0x0C0	EX/LS	PERF_CTL[5:0]
1107
 * 0x0D0	DE	PERF_CTL[2:0]
1108
 * 0x0E0	NB	NB_PERF_CTL[3:0]
1109
 * 0x0F0	NB	NB_PERF_CTL[3:0]
1110
 *
1111
 * Exceptions:
1112
 *
1113
 * 0x000	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
1114
 * 0x003	FP	PERF_CTL[3]
1115
 * 0x004	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
1116
 * 0x00B	FP	PERF_CTL[3]
1117
 * 0x00D	FP	PERF_CTL[3]
1118
 * 0x023	DE	PERF_CTL[2:0]
1119
 * 0x02D	LS	PERF_CTL[3]
1120
 * 0x02E	LS	PERF_CTL[3,0]
1121
 * 0x031	LS	PERF_CTL[2:0] (**)
1122
 * 0x043	CU	PERF_CTL[2:0]
1123
 * 0x045	CU	PERF_CTL[2:0]
1124
 * 0x046	CU	PERF_CTL[2:0]
1125
 * 0x054	CU	PERF_CTL[2:0]
1126
 * 0x055	CU	PERF_CTL[2:0]
1127
 * 0x08F	IC	PERF_CTL[0]
1128
 * 0x187	DE	PERF_CTL[0]
1129
 * 0x188	DE	PERF_CTL[0]
1130
 * 0x0DB	EX	PERF_CTL[5:0]
1131
 * 0x0DC	LS	PERF_CTL[5:0]
1132
 * 0x0DD	LS	PERF_CTL[5:0]
1133
 * 0x0DE	LS	PERF_CTL[5:0]
1134
 * 0x0DF	LS	PERF_CTL[5:0]
1135
 * 0x1C0	EX	PERF_CTL[5:3]
1136
 * 0x1D6	EX	PERF_CTL[5:0]
1137
 * 0x1D8	EX	PERF_CTL[5:0]
1138
 *
1139
 * (*)  depending on the umask all FPU counters may be used
1140
 * (**) only one unitmask enabled at a time
1141
 */
1142

1143
static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
1144
static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
1145
static struct event_constraint amd_f15_PMC3  = EVENT_CONSTRAINT(0, 0x08, 0);
1146
static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
1147
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
1148
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
1149

1150
static struct event_constraint *
1151
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
1152
			       struct perf_event *event)
1153
{
1154
	struct hw_perf_event *hwc = &event->hw;
1155
	unsigned int event_code = amd_get_event_code(hwc);
1156

1157
	switch (event_code & AMD_EVENT_TYPE_MASK) {
1158
	case AMD_EVENT_FP:
1159
		switch (event_code) {
1160
		case 0x000:
1161
			if (!(hwc->config & 0x0000F000ULL))
1162
				break;
1163
			if (!(hwc->config & 0x00000F00ULL))
1164
				break;
1165
			return &amd_f15_PMC3;
1166
		case 0x004:
1167
			if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
1168
				break;
1169
			return &amd_f15_PMC3;
1170
		case 0x003:
1171
		case 0x00B:
1172
		case 0x00D:
1173
			return &amd_f15_PMC3;
1174
		}
1175
		return &amd_f15_PMC53;
1176
	case AMD_EVENT_LS:
1177
	case AMD_EVENT_DC:
1178
	case AMD_EVENT_EX_LS:
1179
		switch (event_code) {
1180
		case 0x023:
1181
		case 0x043:
1182
		case 0x045:
1183
		case 0x046:
1184
		case 0x054:
1185
		case 0x055:
1186
			return &amd_f15_PMC20;
1187
		case 0x02D:
1188
			return &amd_f15_PMC3;
1189
		case 0x02E:
1190
			return &amd_f15_PMC30;
1191
		case 0x031:
1192
			if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
1193
				return &amd_f15_PMC20;
1194
			return &emptyconstraint;
1195
		case 0x1C0:
1196
			return &amd_f15_PMC53;
1197
		default:
1198
			return &amd_f15_PMC50;
1199
		}
1200
	case AMD_EVENT_CU:
1201
	case AMD_EVENT_IC_DE:
1202
	case AMD_EVENT_DE:
1203
		switch (event_code) {
1204
		case 0x08F:
1205
		case 0x187:
1206
		case 0x188:
1207
			return &amd_f15_PMC0;
1208
		case 0x0DB ... 0x0DF:
1209
		case 0x1D6:
1210
		case 0x1D8:
1211
			return &amd_f15_PMC50;
1212
		default:
1213
			return &amd_f15_PMC20;
1214
		}
1215
	case AMD_EVENT_NB:
1216
		/* moved to uncore.c */
1217
		return &emptyconstraint;
1218
	default:
1219
		return &emptyconstraint;
1220
	}
1221
}
1222

1223
static struct event_constraint pair_constraint;
1224

1225
static struct event_constraint *
1226
amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx,
1227
			       struct perf_event *event)
1228
{
1229
	struct hw_perf_event *hwc = &event->hw;
1230

1231
	if (amd_is_pair_event_code(hwc))
1232
		return &pair_constraint;
1233

1234
	return &unconstrained;
1235
}
1236

1237
static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
1238
					   struct perf_event *event)
1239
{
1240
	struct hw_perf_event *hwc = &event->hw;
1241

1242
	if (is_counter_pair(hwc))
1243
		--cpuc->n_pair;
1244
}
1245

1246
/*
1247
 * Because of the way BRS operates with an inactive and active phases, and
1248
 * the link to one counter, it is not possible to have two events using BRS
1249
 * scheduled at the same time. There would be an issue with enforcing the
1250
 * period of each one and given that the BRS saturates, it would not be possible
1251
 * to guarantee correlated content for all events. Therefore, in situations
1252
 * where multiple events want to use BRS, the kernel enforces mutual exclusion.
1253
 * Exclusion is enforced by choosing only one counter for events using BRS.
1254
 * The event scheduling logic will then automatically multiplex the
1255
 * events and ensure that at most one event is actively using BRS.
1256
 *
1257
 * The BRS counter could be any counter, but there is no constraint on Fam19h,
1258
 * therefore all counters are equal and thus we pick the first one: PMC0
1259
 */
1260
static struct event_constraint amd_fam19h_brs_cntr0_constraint =
1261
	EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK);
1262

1263
static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint =
1264
	__EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAIR);
1265

1266
static struct event_constraint *
1267
amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx,
1268
			  struct perf_event *event)
1269
{
1270
	struct hw_perf_event *hwc = &event->hw;
1271
	bool has_brs = has_amd_brs(hwc);
1272

1273
	/*
1274
	 * In case BRS is used with an event requiring a counter pair,
1275
	 * the kernel allows it but only on counter 0 & 1 to enforce
1276
	 * multiplexing requiring to protect BRS in case of multiple
1277
	 * BRS users
1278
	 */
1279
	if (amd_is_pair_event_code(hwc)) {
1280
		return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint
1281
			       : &pair_constraint;
1282
	}
1283

1284
	if (has_brs)
1285
		return &amd_fam19h_brs_cntr0_constraint;
1286

1287
	return &unconstrained;
1288
}
1289

1290

1291
static ssize_t amd_event_sysfs_show(char *page, u64 config)
1292
{
1293
	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
1294
		    (config & AMD64_EVENTSEL_EVENT) >> 24;
1295

1296
	return x86_event_sysfs_show(page, config, event);
1297
}
1298

1299
static void amd_pmu_limit_period(struct perf_event *event, s64 *left)
1300
{
1301
	/*
1302
	 * Decrease period by the depth of the BRS feature to get the last N
1303
	 * taken branches and approximate the desired period
1304
	 */
1305
	if (has_branch_stack(event) && *left > x86_pmu.lbr_nr)
1306
		*left -= x86_pmu.lbr_nr;
1307
}
1308

1309
static __initconst const struct x86_pmu amd_pmu = {
1310
	.name			= "AMD",
1311
	.handle_irq		= amd_pmu_handle_irq,
1312
	.disable_all		= amd_pmu_disable_all,
1313
	.enable_all		= amd_pmu_enable_all,
1314
	.enable			= amd_pmu_enable_event,
1315
	.disable		= amd_pmu_disable_event,
1316
	.hw_config		= amd_pmu_hw_config,
1317
	.schedule_events	= x86_schedule_events,
1318
	.eventsel		= MSR_K7_EVNTSEL0,
1319
	.perfctr		= MSR_K7_PERFCTR0,
1320
	.addr_offset            = amd_pmu_addr_offset,
1321
	.event_map		= amd_pmu_event_map,
1322
	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
1323
	.cntr_mask64		= GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
1324
	.add			= amd_pmu_add_event,
1325
	.del			= amd_pmu_del_event,
1326
	.cntval_bits		= 48,
1327
	.cntval_mask		= (1ULL << 48) - 1,
1328
	.apic			= 1,
1329
	/* use highest bit to detect overflow */
1330
	.max_period		= (1ULL << 47) - 1,
1331
	.get_event_constraints	= amd_get_event_constraints,
1332
	.put_event_constraints	= amd_put_event_constraints,
1333

1334
	.format_attrs		= amd_format_attr,
1335
	.events_sysfs_show	= amd_event_sysfs_show,
1336

1337
	.cpu_prepare		= amd_pmu_cpu_prepare,
1338
	.cpu_starting		= amd_pmu_cpu_starting,
1339
	.cpu_dead		= amd_pmu_cpu_dead,
1340

1341
	.amd_nb_constraints	= 1,
1342
};
1343

1344
static ssize_t branches_show(struct device *cdev,
1345
			      struct device_attribute *attr,
1346
			      char *buf)
1347
{
1348
	return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
1349
}
1350

1351
static DEVICE_ATTR_RO(branches);
1352

1353
static struct attribute *amd_pmu_branches_attrs[] = {
1354
	&dev_attr_branches.attr,
1355
	NULL,
1356
};
1357

1358
static umode_t
1359
amd_branches_is_visible(struct kobject *kobj, struct attribute *attr, int i)
1360
{
1361
	return x86_pmu.lbr_nr ? attr->mode : 0;
1362
}
1363

1364
static struct attribute_group group_caps_amd_branches = {
1365
	.name  = "caps",
1366
	.attrs = amd_pmu_branches_attrs,
1367
	.is_visible = amd_branches_is_visible,
1368
};
1369

1370
#ifdef CONFIG_PERF_EVENTS_AMD_BRS
1371

1372
EVENT_ATTR_STR(branch-brs, amd_branch_brs,
1373
	       "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n");
1374

1375
static struct attribute *amd_brs_events_attrs[] = {
1376
	EVENT_PTR(amd_branch_brs),
1377
	NULL,
1378
};
1379

1380
static umode_t
1381
amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
1382
{
1383
	return static_cpu_has(X86_FEATURE_BRS) && x86_pmu.lbr_nr ?
1384
	       attr->mode : 0;
1385
}
1386

1387
static struct attribute_group group_events_amd_brs = {
1388
	.name       = "events",
1389
	.attrs      = amd_brs_events_attrs,
1390
	.is_visible = amd_brs_is_visible,
1391
};
1392

1393
#endif	/* CONFIG_PERF_EVENTS_AMD_BRS */
1394

1395
static const struct attribute_group *amd_attr_update[] = {
1396
	&group_caps_amd_branches,
1397
#ifdef CONFIG_PERF_EVENTS_AMD_BRS
1398
	&group_events_amd_brs,
1399
#endif
1400
	NULL,
1401
};
1402

1403
static int __init amd_core_pmu_init(void)
1404
{
1405
	union cpuid_0x80000022_ebx ebx;
1406
	u64 even_ctr_mask = 0ULL;
1407
	int i;
1408

1409
	if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
1410
		return 0;
1411

1412
	/* Avoid calculating the value each time in the NMI handler */
1413
	perf_nmi_window = msecs_to_jiffies(100);
1414

1415
	/*
1416
	 * If core performance counter extensions exists, we must use
1417
	 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
1418
	 * amd_pmu_addr_offset().
1419
	 */
1420
	x86_pmu.eventsel	= MSR_F15H_PERF_CTL;
1421
	x86_pmu.perfctr		= MSR_F15H_PERF_CTR;
1422
	x86_pmu.cntr_mask64	= GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
1423

1424
	/* Check for Performance Monitoring v2 support */
1425
	if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
1426
		ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
1427

1428
		/* Update PMU version for later usage */
1429
		x86_pmu.version = 2;
1430

1431
		/* Find the number of available Core PMCs */
1432
		x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
1433

1434
		amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;
1435

1436
		/* Update PMC handling functions */
1437
		x86_pmu.enable_all = amd_pmu_v2_enable_all;
1438
		x86_pmu.disable_all = amd_pmu_v2_disable_all;
1439
		x86_pmu.enable = amd_pmu_v2_enable_event;
1440
		x86_pmu.handle_irq = amd_pmu_v2_handle_irq;
1441
		static_call_update(amd_pmu_test_overflow, amd_pmu_test_overflow_status);
1442
	}
1443

1444
	/*
1445
	 * AMD Core perfctr has separate MSRs for the NB events, see
1446
	 * the amd/uncore.c driver.
1447
	 */
1448
	x86_pmu.amd_nb_constraints = 0;
1449

1450
	if (boot_cpu_data.x86 == 0x15) {
1451
		pr_cont("Fam15h ");
1452
		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
1453
	}
1454
	if (boot_cpu_data.x86 >= 0x17) {
1455
		pr_cont("Fam17h+ ");
1456
		/*
1457
		 * Family 17h and compatibles have constraints for Large
1458
		 * Increment per Cycle events: they may only be assigned an
1459
		 * even numbered counter that has a consecutive adjacent odd
1460
		 * numbered counter following it.
1461
		 */
1462
		for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
1463
			even_ctr_mask |= BIT_ULL(i);
1464

1465
		pair_constraint = (struct event_constraint)
1466
				    __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
1467
				    x86_pmu_max_num_counters(NULL) / 2, 0,
1468
				    PERF_X86_EVENT_PAIR);
1469

1470
		x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
1471
		x86_pmu.put_event_constraints = amd_put_event_constraints_f17h;
1472
		x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE;
1473
		x86_pmu.flags |= PMU_FL_PAIR;
1474
	}
1475

1476
	/* LBR and BRS are mutually exclusive features */
1477
	if (!amd_pmu_lbr_init()) {
1478
		/* LBR requires flushing on context switch */
1479
		x86_pmu.sched_task = amd_pmu_lbr_sched_task;
1480
		static_call_update(amd_pmu_branch_hw_config, amd_pmu_lbr_hw_config);
1481
		static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset);
1482
		static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add);
1483
		static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del);
1484

1485
		/* Only support branch_stack snapshot on perfmon v2 */
1486
		if (x86_pmu.handle_irq == amd_pmu_v2_handle_irq)
1487
			static_call_update(perf_snapshot_branch_stack, amd_pmu_v2_snapshot_branch_stack);
1488
	} else if (!amd_brs_init()) {
1489
		/*
1490
		 * BRS requires special event constraints and flushing on ctxsw.
1491
		 */
1492
		x86_pmu.get_event_constraints = amd_get_event_constraints_f19h;
1493
		x86_pmu.sched_task = amd_pmu_brs_sched_task;
1494
		x86_pmu.limit_period = amd_pmu_limit_period;
1495

1496
		static_call_update(amd_pmu_branch_hw_config, amd_brs_hw_config);
1497
		static_call_update(amd_pmu_branch_reset, amd_brs_reset);
1498
		static_call_update(amd_pmu_branch_add, amd_pmu_brs_add);
1499
		static_call_update(amd_pmu_branch_del, amd_pmu_brs_del);
1500

1501
		/*
1502
		 * put_event_constraints callback same as Fam17h, set above
1503
		 */
1504

1505
		/* branch sampling must be stopped when entering low power */
1506
		amd_brs_lopwr_init();
1507
	}
1508

1509
	x86_pmu.attr_update = amd_attr_update;
1510

1511
	pr_cont("core perfctr, ");
1512
	return 0;
1513
}
1514

1515
__init int amd_pmu_init(void)
1516
{
1517
	int ret;
1518

1519
	/* Performance-monitoring supported from K7 and later: */
1520
	if (boot_cpu_data.x86 < 6)
1521
		return -ENODEV;
1522

1523
	x86_pmu = amd_pmu;
1524

1525
	ret = amd_core_pmu_init();
1526
	if (ret)
1527
		return ret;
1528

1529
	if (num_possible_cpus() == 1) {
1530
		/*
1531
		 * No point in allocating data structures to serialize
1532
		 * against other CPUs, when there is only the one CPU.
1533
		 */
1534
		x86_pmu.amd_nb_constraints = 0;
1535
	}
1536

1537
	if (boot_cpu_data.x86 >= 0x17)
1538
		memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids));
1539
	else
1540
		memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids));
1541

1542
	return 0;
1543
}
1544

1545
static inline void amd_pmu_reload_virt(void)
1546
{
1547
	if (x86_pmu.version >= 2) {
1548
		/*
1549
		 * Clear global enable bits, reprogram the PERF_CTL
1550
		 * registers with updated perf_ctr_virt_mask and then
1551
		 * set global enable bits once again
1552
		 */
1553
		amd_pmu_v2_disable_all();
1554
		amd_pmu_enable_all(0);
1555
		amd_pmu_v2_enable_all(0);
1556
		return;
1557
	}
1558

1559
	amd_pmu_disable_all();
1560
	amd_pmu_enable_all(0);
1561
}
1562

1563
void amd_pmu_enable_virt(void)
1564
{
1565
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1566

1567
	cpuc->perf_ctr_virt_mask = 0;
1568

1569
	/* Reload all events */
1570
	amd_pmu_reload_virt();
1571
}
1572
EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
1573

1574
void amd_pmu_disable_virt(void)
1575
{
1576
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1577

1578
	/*
1579
	 * We only mask out the Host-only bit so that host-only counting works
1580
	 * when SVM is disabled. If someone sets up a guest-only counter when
1581
	 * SVM is disabled the Guest-only bits still gets set and the counter
1582
	 * will not count anything.
1583
	 */
1584
	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
1585

1586
	/* Reload all events */
1587
	amd_pmu_reload_virt();
1588
}
1589
EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
1590

1591
Product

Resources

Company