CoCalc -- vgic_lpi

GitHub Repository: torvalds/linux
Path: blob/master/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
⁵³¹⁵⁰ views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 * vgic_lpi_stress - Stress test for KVM's ITS emulation
4
 *
5
 * Copyright (c) 2024 Google LLC
6
 */
7

8
#include <linux/sizes.h>
9
#include <pthread.h>
10
#include <stdatomic.h>
11
#include <sys/sysinfo.h>
12

13
#include "kvm_util.h"
14
#include "gic.h"
15
#include "gic_v3.h"
16
#include "gic_v3_its.h"
17
#include "processor.h"
18
#include "ucall.h"
19
#include "vgic.h"
20

21
#define TEST_MEMSLOT_INDEX	1
22

23
#define GIC_LPI_OFFSET	8192
24

25
static size_t nr_iterations = 1000;
26
static vm_paddr_t gpa_base;
27

28
static struct kvm_vm *vm;
29
static struct kvm_vcpu **vcpus;
30
static int its_fd;
31

32
static struct test_data {
33
	bool		request_vcpus_stop;
34
	u32		nr_cpus;
35
	u32		nr_devices;
36
	u32		nr_event_ids;
37

38
	vm_paddr_t	device_table;
39
	vm_paddr_t	collection_table;
40
	vm_paddr_t	cmdq_base;
41
	void		*cmdq_base_va;
42
	vm_paddr_t	itt_tables;
43

44
	vm_paddr_t	lpi_prop_table;
45
	vm_paddr_t	lpi_pend_tables;
46
} test_data =  {
47
	.nr_cpus	= 1,
48
	.nr_devices	= 1,
49
	.nr_event_ids	= 16,
50
};
51

52
static void guest_irq_handler(struct ex_regs *regs)
53
{
54
	u32 intid = gic_get_and_ack_irq();
55

56
	if (intid == IAR_SPURIOUS)
57
		return;
58

59
	GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
60
	gic_set_eoi(intid);
61
}
62

63
static void guest_setup_its_mappings(void)
64
{
65
	u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
66
	u32 nr_events = test_data.nr_event_ids;
67
	u32 nr_devices = test_data.nr_devices;
68
	u32 nr_cpus = test_data.nr_cpus;
69

70
	for (coll_id = 0; coll_id < nr_cpus; coll_id++)
71
		its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
72

73
	/* Round-robin the LPIs to all of the vCPUs in the VM */
74
	coll_id = 0;
75
	for (device_id = 0; device_id < nr_devices; device_id++) {
76
		vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
77

78
		its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
79
				  itt_base, SZ_64K, true);
80

81
		for (event_id = 0; event_id < nr_events; event_id++) {
82
			its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
83
					   event_id, coll_id, intid++);
84

85
			coll_id = (coll_id + 1) % test_data.nr_cpus;
86
		}
87
	}
88
}
89

90
static void guest_invalidate_all_rdists(void)
91
{
92
	int i;
93

94
	for (i = 0; i < test_data.nr_cpus; i++)
95
		its_send_invall_cmd(test_data.cmdq_base_va, i);
96
}
97

98
static void guest_setup_gic(void)
99
{
100
	static atomic_int nr_cpus_ready = 0;
101
	u32 cpuid = guest_get_vcpuid();
102

103
	gic_init(GIC_V3, test_data.nr_cpus);
104
	gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
105
			      test_data.lpi_pend_tables + (cpuid * SZ_64K));
106

107
	atomic_fetch_add(&nr_cpus_ready, 1);
108

109
	if (cpuid > 0)
110
		return;
111

112
	while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
113
		cpu_relax();
114

115
	its_init(test_data.collection_table, SZ_64K,
116
		 test_data.device_table, SZ_64K,
117
		 test_data.cmdq_base, SZ_64K);
118

119
	guest_setup_its_mappings();
120
	guest_invalidate_all_rdists();
121

122
	/* SYNC to ensure ITS setup is complete */
123
	for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++)
124
		its_send_sync_cmd(test_data.cmdq_base_va, cpuid);
125
}
126

127
static void guest_code(size_t nr_lpis)
128
{
129
	guest_setup_gic();
130
	local_irq_enable();
131

132
	GUEST_SYNC(0);
133

134
	/*
135
	 * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
136
	 * never getting the stop signal.
137
	 */
138
	while (!READ_ONCE(test_data.request_vcpus_stop))
139
		cpu_relax();
140

141
	GUEST_DONE();
142
}
143

144
static void setup_memslot(void)
145
{
146
	size_t pages;
147
	size_t sz;
148

149
	/*
150
	 * For the ITS:
151
	 *  - A single level device table
152
	 *  - A single level collection table
153
	 *  - The command queue
154
	 *  - An ITT for each device
155
	 */
156
	sz = (3 + test_data.nr_devices) * SZ_64K;
157

158
	/*
159
	 * For the redistributors:
160
	 *  - A shared LPI configuration table
161
	 *  - An LPI pending table for each vCPU
162
	 */
163
	sz += (1 + test_data.nr_cpus) * SZ_64K;
164

165
	pages = sz / vm->page_size;
166
	gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
167
	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
168
				    TEST_MEMSLOT_INDEX, pages, 0);
169
}
170

171
#define LPI_PROP_DEFAULT_PRIO	0xa0
172

173
static void configure_lpis(void)
174
{
175
	size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
176
	u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
177
	size_t i;
178

179
	for (i = 0; i < nr_lpis; i++) {
180
		tbl[i] = LPI_PROP_DEFAULT_PRIO |
181
			 LPI_PROP_GROUP1 |
182
			 LPI_PROP_ENABLED;
183
	}
184
}
185

186
static void setup_test_data(void)
187
{
188
	size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
189
	u32 nr_devices = test_data.nr_devices;
190
	u32 nr_cpus = test_data.nr_cpus;
191
	vm_paddr_t cmdq_base;
192

193
	test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
194
						    gpa_base,
195
						    TEST_MEMSLOT_INDEX);
196

197
	test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
198
							gpa_base,
199
							TEST_MEMSLOT_INDEX);
200

201
	cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
202
				       TEST_MEMSLOT_INDEX);
203
	virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
204
	test_data.cmdq_base = cmdq_base;
205
	test_data.cmdq_base_va = (void *)cmdq_base;
206

207
	test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
208
						  gpa_base, TEST_MEMSLOT_INDEX);
209

210
	test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
211
						      gpa_base, TEST_MEMSLOT_INDEX);
212
	configure_lpis();
213

214
	test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
215
						       gpa_base, TEST_MEMSLOT_INDEX);
216

217
	sync_global_to_guest(vm, test_data);
218
}
219

220
static void setup_gic(void)
221
{
222
	its_fd = vgic_its_setup(vm);
223
}
224

225
static void signal_lpi(u32 device_id, u32 event_id)
226
{
227
	vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
228

229
	struct kvm_msi msi = {
230
		.address_lo	= db_addr,
231
		.address_hi	= db_addr >> 32,
232
		.data		= event_id,
233
		.devid		= device_id,
234
		.flags		= KVM_MSI_VALID_DEVID,
235
	};
236

237
	/*
238
	 * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
239
	 * which for arm64 implies having a valid translation in the ITS.
240
	 */
241
	TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
242
		    "KVM_SIGNAL_MSI ioctl failed");
243
}
244

245
static pthread_barrier_t test_setup_barrier;
246

247
static void *lpi_worker_thread(void *data)
248
{
249
	u32 device_id = (size_t)data;
250
	u32 event_id;
251
	size_t i;
252

253
	pthread_barrier_wait(&test_setup_barrier);
254

255
	for (i = 0; i < nr_iterations; i++)
256
		for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
257
			signal_lpi(device_id, event_id);
258

259
	return NULL;
260
}
261

262
static void *vcpu_worker_thread(void *data)
263
{
264
	struct kvm_vcpu *vcpu = data;
265
	struct ucall uc;
266

267
	while (true) {
268
		vcpu_run(vcpu);
269

270
		switch (get_ucall(vcpu, &uc)) {
271
		case UCALL_SYNC:
272
			pthread_barrier_wait(&test_setup_barrier);
273
			continue;
274
		case UCALL_DONE:
275
			return NULL;
276
		case UCALL_ABORT:
277
			REPORT_GUEST_ASSERT(uc);
278
			break;
279
		default:
280
			TEST_FAIL("Unknown ucall: %lu", uc.cmd);
281
		}
282
	}
283

284
	return NULL;
285
}
286

287
static void report_stats(struct timespec delta)
288
{
289
	double nr_lpis;
290
	double time;
291

292
	nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
293

294
	time = delta.tv_sec;
295
	time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
296

297
	pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
298
}
299

300
static void run_test(void)
301
{
302
	u32 nr_devices = test_data.nr_devices;
303
	u32 nr_vcpus = test_data.nr_cpus;
304
	pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
305
	pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
306
	struct timespec start, delta;
307
	size_t i;
308

309
	TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
310

311
	pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
312

313
	for (i = 0; i < nr_vcpus; i++)
314
		pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
315

316
	for (i = 0; i < nr_devices; i++)
317
		pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
318

319
	pthread_barrier_wait(&test_setup_barrier);
320

321
	clock_gettime(CLOCK_MONOTONIC, &start);
322

323
	for (i = 0; i < nr_devices; i++)
324
		pthread_join(lpi_threads[i], NULL);
325

326
	delta = timespec_elapsed(start);
327
	write_guest_global(vm, test_data.request_vcpus_stop, true);
328

329
	for (i = 0; i < nr_vcpus; i++)
330
		pthread_join(vcpu_threads[i], NULL);
331

332
	report_stats(delta);
333
}
334

335
static void setup_vm(void)
336
{
337
	int i;
338

339
	vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *));
340
	TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
341

342
	vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
343

344
	vm_init_descriptor_tables(vm);
345
	for (i = 0; i < test_data.nr_cpus; i++)
346
		vcpu_init_descriptor_tables(vcpus[i]);
347

348
	vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
349

350
	setup_memslot();
351

352
	setup_gic();
353

354
	setup_test_data();
355
}
356

357
static void destroy_vm(void)
358
{
359
	close(its_fd);
360
	kvm_vm_free(vm);
361
	free(vcpus);
362
}
363

364
static void pr_usage(const char *name)
365
{
366
	pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
367
	pr_info("  -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
368
	pr_info("  -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
369
	pr_info("  -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
370
	pr_info("  -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
371
}
372

373
int main(int argc, char **argv)
374
{
375
	u32 nr_threads;
376
	int c;
377

378
	TEST_REQUIRE(kvm_supports_vgic_v3());
379

380
	while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
381
		switch (c) {
382
		case 'v':
383
			test_data.nr_cpus = atoi(optarg);
384
			break;
385
		case 'd':
386
			test_data.nr_devices = atoi(optarg);
387
			break;
388
		case 'e':
389
			test_data.nr_event_ids = atoi(optarg);
390
			break;
391
		case 'i':
392
			nr_iterations = strtoul(optarg, NULL, 0);
393
			break;
394
		case 'h':
395
		default:
396
			pr_usage(argv[0]);
397
			return 1;
398
		}
399
	}
400

401
	nr_threads = test_data.nr_cpus + test_data.nr_devices;
402
	if (nr_threads > get_nprocs())
403
		pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
404
			 nr_threads, get_nprocs());
405

406
	setup_vm();
407

408
	run_test();
409

410
	destroy_vm();
411

412
	return 0;
413
}
414

415
Product

Resources

Company