CoCalc -- pmu_counters

GitHub Repository: torvalds/linux
Path: blob/master/tools/testing/selftests/kvm/x86/pmu_counters_test.c
⁵⁰⁷¹³ views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 * Copyright (C) 2023, Tencent, Inc.
4
 */
5
#include <x86intrin.h>
6

7
#include "pmu.h"
8
#include "processor.h"
9

10
/* Number of iterations of the loop for the guest measurement payload. */
11
#define NUM_LOOPS			10
12

13
/* Each iteration of the loop retires one branch instruction. */
14
#define NUM_BRANCH_INSNS_RETIRED	(NUM_LOOPS)
15

16
/*
17
 * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP,
18
 * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP.
19
 */
20
#define NUM_INSNS_PER_LOOP		6
21

22
/*
23
 * Number of "extra" instructions that will be counted, i.e. the number of
24
 * instructions that are needed to set up the loop and then disable the
25
 * counter.  2 MOV, 2 XOR, 1 WRMSR.
26
 */
27
#define NUM_EXTRA_INSNS			5
28

29
/* Total number of instructions retired within the measured section. */
30
#define NUM_INSNS_RETIRED		(NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
31

32
/* Track which architectural events are supported by hardware. */
33
static uint32_t hardware_pmu_arch_events;
34

35
static uint8_t kvm_pmu_version;
36
static bool kvm_has_perf_caps;
37

38
#define X86_PMU_FEATURE_NULL						\
39
({									\
40
	struct kvm_x86_pmu_feature feature = {};			\
41
									\
42
	feature;							\
43
})
44

45
static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
46
{
47
	return !(*(u64 *)&event);
48
}
49

50
struct kvm_intel_pmu_event {
51
	struct kvm_x86_pmu_feature gp_event;
52
	struct kvm_x86_pmu_feature fixed_event;
53
};
54

55
/*
56
 * Wrap the array to appease the compiler, as the macros used to construct each
57
 * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
58
 * compiler often thinks the feature definitions aren't compile-time constants.
59
 */
60
static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx)
61
{
62
	const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
63
		[INTEL_ARCH_CPU_CYCLES_INDEX]		 = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
64
		[INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]	 = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
65
		/*
66
		 * Note, the fixed counter for reference cycles is NOT the same as the
67
		 * general purpose architectural event.  The fixed counter explicitly
68
		 * counts at the same frequency as the TSC, whereas the GP event counts
69
		 * at a fixed, but uarch specific, frequency.  Bundle them here for
70
		 * simplicity.
71
		 */
72
		[INTEL_ARCH_REFERENCE_CYCLES_INDEX]	 = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
73
		[INTEL_ARCH_LLC_REFERENCES_INDEX]	 = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
74
		[INTEL_ARCH_LLC_MISSES_INDEX]		 = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
75
		[INTEL_ARCH_BRANCHES_RETIRED_INDEX]	 = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
76
		[INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
77
		[INTEL_ARCH_TOPDOWN_SLOTS_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
78
		[INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL },
79
		[INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL },
80
		[INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL },
81
		[INTEL_ARCH_TOPDOWN_RETIRING_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL },
82
		[INTEL_ARCH_LBR_INSERTS_INDEX]		 = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL },
83
	};
84

85
	kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
86

87
	return __intel_event_to_feature[idx];
88
}
89

90
static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
91
						  void *guest_code,
92
						  uint8_t pmu_version,
93
						  uint64_t perf_capabilities)
94
{
95
	struct kvm_vm *vm;
96

97
	vm = vm_create_with_one_vcpu(vcpu, guest_code);
98
	sync_global_to_guest(vm, kvm_pmu_version);
99
	sync_global_to_guest(vm, hardware_pmu_arch_events);
100

101
	/*
102
	 * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
103
	 * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
104
	 */
105
	if (kvm_has_perf_caps)
106
		vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
107

108
	vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
109
	return vm;
110
}
111

112
static void run_vcpu(struct kvm_vcpu *vcpu)
113
{
114
	struct ucall uc;
115

116
	do {
117
		vcpu_run(vcpu);
118
		switch (get_ucall(vcpu, &uc)) {
119
		case UCALL_SYNC:
120
			break;
121
		case UCALL_ABORT:
122
			REPORT_GUEST_ASSERT(uc);
123
			break;
124
		case UCALL_PRINTF:
125
			pr_info("%s", uc.buffer);
126
			break;
127
		case UCALL_DONE:
128
			break;
129
		default:
130
			TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
131
		}
132
	} while (uc.cmd != UCALL_DONE);
133
}
134

135
static uint8_t guest_get_pmu_version(void)
136
{
137
	/*
138
	 * Return the effective PMU version, i.e. the minimum between what KVM
139
	 * supports and what is enumerated to the guest.  The host deliberately
140
	 * advertises a PMU version to the guest beyond what is actually
141
	 * supported by KVM to verify KVM doesn't freak out and do something
142
	 * bizarre with an architecturally valid, but unsupported, version.
143
	 */
144
	return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
145
}
146

147
/*
148
 * If an architectural event is supported and guaranteed to generate at least
149
 * one "hit, assert that its count is non-zero.  If an event isn't supported or
150
 * the test can't guarantee the associated action will occur, then all bets are
151
 * off regarding the count, i.e. no checks can be done.
152
 *
153
 * Sanity check that in all cases, the event doesn't count when it's disabled,
154
 * and that KVM correctly emulates the write of an arbitrary value.
155
 */
156
static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr)
157
{
158
	uint64_t count;
159

160
	count = _rdpmc(pmc);
161
	if (!(hardware_pmu_arch_events & BIT(idx)))
162
		goto sanity_checks;
163

164
	switch (idx) {
165
	case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
166
		/* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
167
		if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT))
168
			GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
169
		else
170
			GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
171
		break;
172
	case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
173
		/* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
174
		if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT))
175
			GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED);
176
		else
177
			GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
178
		break;
179
	case INTEL_ARCH_LLC_REFERENCES_INDEX:
180
	case INTEL_ARCH_LLC_MISSES_INDEX:
181
		if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
182
		    !this_cpu_has(X86_FEATURE_CLFLUSH))
183
			break;
184
		fallthrough;
185
	case INTEL_ARCH_CPU_CYCLES_INDEX:
186
	case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
187
	case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX:
188
	case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX:
189
		GUEST_ASSERT_NE(count, 0);
190
		break;
191
	case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
192
	case INTEL_ARCH_TOPDOWN_RETIRING_INDEX:
193
		__GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
194
			       "Expected top-down slots >= %u, got count = %lu",
195
			       NUM_INSNS_RETIRED, count);
196
		break;
197
	default:
198
		break;
199
	}
200

201
sanity_checks:
202
	__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
203
	GUEST_ASSERT_EQ(_rdpmc(pmc), count);
204

205
	wrmsr(pmc_msr, 0xdead);
206
	GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
207
}
208

209
/*
210
 * Enable and disable the PMC in a monolithic asm blob to ensure that the
211
 * compiler can't insert _any_ code into the measured sequence.  Note, ECX
212
 * doesn't need to be clobbered as the input value, @pmc_msr, is restored
213
 * before the end of the sequence.
214
 *
215
 * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
216
 * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
217
 * misses, i.e. to allow testing that those events actually count.
218
 *
219
 * If forced emulation is enabled (and specified), force emulation on a subset
220
 * of the measured code to verify that KVM correctly emulates instructions and
221
 * branches retired events in conjunction with hardware also counting said
222
 * events.
223
 */
224
#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)				\
225
do {										\
226
	__asm__ __volatile__("wrmsr\n\t"					\
227
			     " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t"	\
228
			     "1:\n\t"						\
229
			     FEP "enter $0, $0\n\t"				\
230
			     clflush "\n\t"					\
231
			     "mfence\n\t"					\
232
			     "mov %[m], %%eax\n\t"				\
233
			     FEP "leave\n\t"					\
234
			     FEP "loop 1b\n\t"					\
235
			     FEP "mov %%edi, %%ecx\n\t"				\
236
			     FEP "xor %%eax, %%eax\n\t"				\
237
			     FEP "xor %%edx, %%edx\n\t"				\
238
			     "wrmsr\n\t"					\
239
			     :: "a"((uint32_t)_value), "d"(_value >> 32),	\
240
				"c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version)	\
241
	);									\
242
} while (0)
243

244
#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP)		\
245
do {										\
246
	wrmsr(_pmc_msr, 0);							\
247
										\
248
	if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))				\
249
		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP);	\
250
	else if (this_cpu_has(X86_FEATURE_CLFLUSH))				\
251
		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush  %[m]", FEP);	\
252
	else									\
253
		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);		\
254
										\
255
	guest_assert_event_count(_idx, _pmc, _pmc_msr);				\
256
} while (0)
257

258
static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr,
259
				    uint32_t ctrl_msr, uint64_t ctrl_msr_value)
260
{
261
	GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
262

263
	if (is_forced_emulation_enabled)
264
		GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
265
}
266

267
static void guest_test_arch_event(uint8_t idx)
268
{
269
	uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
270
	uint32_t pmu_version = guest_get_pmu_version();
271
	/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
272
	bool guest_has_perf_global_ctrl = pmu_version >= 2;
273
	struct kvm_x86_pmu_feature gp_event, fixed_event;
274
	uint32_t base_pmc_msr;
275
	unsigned int i;
276

277
	/* The host side shouldn't invoke this without a guest PMU. */
278
	GUEST_ASSERT(pmu_version);
279

280
	if (this_cpu_has(X86_FEATURE_PDCM) &&
281
	    rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
282
		base_pmc_msr = MSR_IA32_PMC0;
283
	else
284
		base_pmc_msr = MSR_IA32_PERFCTR0;
285

286
	gp_event = intel_event_to_feature(idx).gp_event;
287
	GUEST_ASSERT_EQ(idx, gp_event.f.bit);
288

289
	GUEST_ASSERT(nr_gp_counters);
290

291
	for (i = 0; i < nr_gp_counters; i++) {
292
		uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
293
				    ARCH_PERFMON_EVENTSEL_ENABLE |
294
				    intel_pmu_arch_events[idx];
295

296
		wrmsr(MSR_P6_EVNTSEL0 + i, 0);
297
		if (guest_has_perf_global_ctrl)
298
			wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
299

300
		__guest_test_arch_event(idx, i, base_pmc_msr + i,
301
					MSR_P6_EVNTSEL0 + i, eventsel);
302
	}
303

304
	if (!guest_has_perf_global_ctrl)
305
		return;
306

307
	fixed_event = intel_event_to_feature(idx).fixed_event;
308
	if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
309
		return;
310

311
	i = fixed_event.f.bit;
312

313
	wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
314

315
	__guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
316
				MSR_CORE_PERF_FIXED_CTR0 + i,
317
				MSR_CORE_PERF_GLOBAL_CTRL,
318
				FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
319
}
320

321
static void guest_test_arch_events(void)
322
{
323
	uint8_t i;
324

325
	for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
326
		guest_test_arch_event(i);
327

328
	GUEST_DONE();
329
}
330

331
static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
332
			     uint8_t length, uint32_t unavailable_mask)
333
{
334
	struct kvm_vcpu *vcpu;
335
	struct kvm_vm *vm;
336

337
	/* Testing arch events requires a vPMU (there are no negative tests). */
338
	if (!pmu_version)
339
		return;
340

341
	unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit,
342
				    X86_PROPERTY_PMU_EVENTS_MASK.lo_bit);
343

344
	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
345
					 pmu_version, perf_capabilities);
346

347
	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
348
				length);
349
	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
350
				unavailable_mask);
351

352
	run_vcpu(vcpu);
353

354
	kvm_vm_free(vm);
355
}
356

357
/*
358
 * Limit testing to MSRs that are actually defined by Intel (in the SDM).  MSRs
359
 * that aren't defined counter MSRs *probably* don't exist, but there's no
360
 * guarantee that currently undefined MSR indices won't be used for something
361
 * other than PMCs in the future.
362
 */
363
#define MAX_NR_GP_COUNTERS	8
364
#define MAX_NR_FIXED_COUNTERS	3
365

366
#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector)		\
367
__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector,			\
368
	       "Expected %s on " #insn "(0x%x), got %s",			\
369
	       expect_gp ? "#GP" : "no fault", msr, ex_str(vector))		\
370

371
#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected)			\
372
	__GUEST_ASSERT(val == expected,					\
373
		       "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx",	\
374
		       msr, expected, val);
375

376
static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
377
			     uint64_t expected_val)
378
{
379
	uint8_t vector;
380
	uint64_t val;
381

382
	vector = rdpmc_safe(rdpmc_idx, &val);
383
	GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
384
	if (expect_success)
385
		GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
386

387
	if (!is_forced_emulation_enabled)
388
		return;
389

390
	vector = rdpmc_safe_fep(rdpmc_idx, &val);
391
	GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
392
	if (expect_success)
393
		GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
394
}
395

396
static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
397
				 uint8_t nr_counters, uint32_t or_mask)
398
{
399
	const bool pmu_has_fast_mode = !guest_get_pmu_version();
400
	uint8_t i;
401

402
	for (i = 0; i < nr_possible_counters; i++) {
403
		/*
404
		 * TODO: Test a value that validates full-width writes and the
405
		 * width of the counters.
406
		 */
407
		const uint64_t test_val = 0xffff;
408
		const uint32_t msr = base_msr + i;
409

410
		/*
411
		 * Fixed counters are supported if the counter is less than the
412
		 * number of enumerated contiguous counters *or* the counter is
413
		 * explicitly enumerated in the supported counters mask.
414
		 */
415
		const bool expect_success = i < nr_counters || (or_mask & BIT(i));
416

417
		/*
418
		 * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
419
		 * unsupported, i.e. doesn't #GP and reads back '0'.
420
		 */
421
		const uint64_t expected_val = expect_success ? test_val : 0;
422
		const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
423
				       msr != MSR_P6_PERFCTR1;
424
		uint32_t rdpmc_idx;
425
		uint8_t vector;
426
		uint64_t val;
427

428
		vector = wrmsr_safe(msr, test_val);
429
		GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
430

431
		vector = rdmsr_safe(msr, &val);
432
		GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
433

434
		/* On #GP, the result of RDMSR is undefined. */
435
		if (!expect_gp)
436
			GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
437

438
		/*
439
		 * Redo the read tests with RDPMC, which has different indexing
440
		 * semantics and additional capabilities.
441
		 */
442
		rdpmc_idx = i;
443
		if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
444
			rdpmc_idx |= INTEL_RDPMC_FIXED;
445

446
		guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
447

448
		/*
449
		 * KVM doesn't support non-architectural PMUs, i.e. it should
450
		 * impossible to have fast mode RDPMC.  Verify that attempting
451
		 * to use fast RDPMC always #GPs.
452
		 */
453
		GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
454
		rdpmc_idx |= INTEL_RDPMC_FAST;
455
		guest_test_rdpmc(rdpmc_idx, false, -1ull);
456

457
		vector = wrmsr_safe(msr, 0);
458
		GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
459
	}
460
}
461

462
static void guest_test_gp_counters(void)
463
{
464
	uint8_t pmu_version = guest_get_pmu_version();
465
	uint8_t nr_gp_counters = 0;
466
	uint32_t base_msr;
467

468
	if (pmu_version)
469
		nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
470

471
	/*
472
	 * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
473
	 * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
474
	 * of GP counters.  If there are no GP counters, require KVM to leave
475
	 * PERF_GLOBAL_CTRL '0'.  This edge case isn't covered by the SDM, but
476
	 * follow the spirit of the architecture and only globally enable GP
477
	 * counters, of which there are none.
478
	 */
479
	if (pmu_version > 1) {
480
		uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
481

482
		if (nr_gp_counters)
483
			GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
484
		else
485
			GUEST_ASSERT_EQ(global_ctrl, 0);
486
	}
487

488
	if (this_cpu_has(X86_FEATURE_PDCM) &&
489
	    rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
490
		base_msr = MSR_IA32_PMC0;
491
	else
492
		base_msr = MSR_IA32_PERFCTR0;
493

494
	guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
495
	GUEST_DONE();
496
}
497

498
static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
499
			     uint8_t nr_gp_counters)
500
{
501
	struct kvm_vcpu *vcpu;
502
	struct kvm_vm *vm;
503

504
	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
505
					 pmu_version, perf_capabilities);
506

507
	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
508
				nr_gp_counters);
509

510
	run_vcpu(vcpu);
511

512
	kvm_vm_free(vm);
513
}
514

515
static void guest_test_fixed_counters(void)
516
{
517
	uint64_t supported_bitmask = 0;
518
	uint8_t nr_fixed_counters = 0;
519
	uint8_t i;
520

521
	/* Fixed counters require Architectural vPMU Version 2+. */
522
	if (guest_get_pmu_version() >= 2)
523
		nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
524

525
	/*
526
	 * The supported bitmask for fixed counters was introduced in PMU
527
	 * version 5.
528
	 */
529
	if (guest_get_pmu_version() >= 5)
530
		supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
531

532
	guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
533
			     nr_fixed_counters, supported_bitmask);
534

535
	for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
536
		uint8_t vector;
537
		uint64_t val;
538

539
		if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
540
			vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
541
					    FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
542
			__GUEST_ASSERT(vector == GP_VECTOR,
543
				       "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
544

545
			vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
546
					    FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
547
			__GUEST_ASSERT(vector == GP_VECTOR,
548
				       "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
549
			continue;
550
		}
551

552
		wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
553
		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
554
		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
555
		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
556
		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
557
		val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
558

559
		GUEST_ASSERT_NE(val, 0);
560
	}
561
	GUEST_DONE();
562
}
563

564
static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
565
				uint8_t nr_fixed_counters,
566
				uint32_t supported_bitmask)
567
{
568
	struct kvm_vcpu *vcpu;
569
	struct kvm_vm *vm;
570

571
	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
572
					 pmu_version, perf_capabilities);
573

574
	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
575
				supported_bitmask);
576
	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
577
				nr_fixed_counters);
578

579
	run_vcpu(vcpu);
580

581
	kvm_vm_free(vm);
582
}
583

584
static void test_intel_counters(void)
585
{
586
	uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
587
	uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
588
	uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
589
	unsigned int i;
590
	uint8_t v, j;
591
	uint32_t k;
592

593
	const uint64_t perf_caps[] = {
594
		0,
595
		PMU_CAP_FW_WRITES,
596
	};
597

598
	/*
599
	 * To keep the total runtime reasonable, test only a handful of select,
600
	 * semi-arbitrary values for the mask of unavailable PMU events.  Test
601
	 * 0 (all events available) and all ones (no events available) as well
602
	 * as alternating bit sequencues, e.g. to detect if KVM is checking the
603
	 * wrong bit(s).
604
	 */
605
	const uint32_t unavailable_masks[] = {
606
		0x0,
607
		0xffffffffu,
608
		0xaaaaaaaau,
609
		0x55555555u,
610
		0xf0f0f0f0u,
611
		0x0f0f0f0fu,
612
		0xa0a0a0a0u,
613
		0x0a0a0a0au,
614
		0x50505050u,
615
		0x05050505u,
616
	};
617

618
	/*
619
	 * Test up to PMU v5, which is the current maximum version defined by
620
	 * Intel, i.e. is the last version that is guaranteed to be backwards
621
	 * compatible with KVM's existing behavior.
622
	 */
623
	uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
624

625
	/*
626
	 * Detect the existence of events that aren't supported by selftests.
627
	 * This will (obviously) fail any time hardware adds support for a new
628
	 * event, but it's worth paying that price to keep the test fresh.
629
	 */
630
	TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
631
		    "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
632
		    this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
633
		    this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
634

635
	/*
636
	 * Iterate over known arch events irrespective of KVM/hardware support
637
	 * to verify that KVM doesn't reject programming of events just because
638
	 * the *architectural* encoding is unsupported.  Track which events are
639
	 * supported in hardware; the guest side will validate supported events
640
	 * count correctly, even if *enumeration* of the event is unsupported
641
	 * by KVM and/or isn't exposed to the guest.
642
	 */
643
	for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
644
		if (this_pmu_has(intel_event_to_feature(i).gp_event))
645
			hardware_pmu_arch_events |= BIT(i);
646
	}
647

648
	for (v = 0; v <= max_pmu_version; v++) {
649
		for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
650
			if (!kvm_has_perf_caps && perf_caps[i])
651
				continue;
652

653
			pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
654
				v, perf_caps[i]);
655

656
			/*
657
			 * Test single bits for all PMU version and lengths up
658
			 * the number of events +1 (to verify KVM doesn't do
659
			 * weird things if the guest length is greater than the
660
			 * host length).  Explicitly test a mask of '0' and all
661
			 * ones i.e. all events being available and unavailable.
662
			 */
663
			for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
664
				for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++)
665
					test_arch_events(v, perf_caps[i], j, unavailable_masks[k]);
666
			}
667

668
			pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
669
				v, perf_caps[i]);
670
			for (j = 0; j <= nr_gp_counters; j++)
671
				test_gp_counters(v, perf_caps[i], j);
672

673
			pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
674
				v, perf_caps[i]);
675
			for (j = 0; j <= nr_fixed_counters; j++) {
676
				for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
677
					test_fixed_counters(v, perf_caps[i], j, k);
678
			}
679
		}
680
	}
681
}
682

683
int main(int argc, char *argv[])
684
{
685
	TEST_REQUIRE(kvm_is_pmu_enabled());
686

687
	TEST_REQUIRE(host_cpu_is_intel);
688
	TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
689
	TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
690

691
	kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
692
	kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
693

694
	test_intel_counters();
695

696
	return 0;
697
}
698

699
Product

Resources

Company