Path: blob/master/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
38237 views
// SPDX-License-Identifier: GPL-2.01/*2* vgic_lpi_stress - Stress test for KVM's ITS emulation3*4* Copyright (c) 2024 Google LLC5*/67#include <linux/sizes.h>8#include <pthread.h>9#include <stdatomic.h>10#include <sys/sysinfo.h>1112#include "kvm_util.h"13#include "gic.h"14#include "gic_v3.h"15#include "gic_v3_its.h"16#include "processor.h"17#include "ucall.h"18#include "vgic.h"1920#define TEST_MEMSLOT_INDEX 12122#define GIC_LPI_OFFSET 81922324static size_t nr_iterations = 1000;25static vm_paddr_t gpa_base;2627static struct kvm_vm *vm;28static struct kvm_vcpu **vcpus;29static int its_fd;3031static struct test_data {32bool request_vcpus_stop;33u32 nr_cpus;34u32 nr_devices;35u32 nr_event_ids;3637vm_paddr_t device_table;38vm_paddr_t collection_table;39vm_paddr_t cmdq_base;40void *cmdq_base_va;41vm_paddr_t itt_tables;4243vm_paddr_t lpi_prop_table;44vm_paddr_t lpi_pend_tables;45} test_data = {46.nr_cpus = 1,47.nr_devices = 1,48.nr_event_ids = 16,49};5051static void guest_irq_handler(struct ex_regs *regs)52{53u32 intid = gic_get_and_ack_irq();5455if (intid == IAR_SPURIOUS)56return;5758GUEST_ASSERT(intid >= GIC_LPI_OFFSET);59gic_set_eoi(intid);60}6162static void guest_setup_its_mappings(void)63{64u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;65u32 nr_events = test_data.nr_event_ids;66u32 nr_devices = test_data.nr_devices;67u32 nr_cpus = test_data.nr_cpus;6869for (coll_id = 0; coll_id < nr_cpus; coll_id++)70its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);7172/* Round-robin the LPIs to all of the vCPUs in the VM */73coll_id = 0;74for (device_id = 0; device_id < nr_devices; device_id++) {75vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);7677its_send_mapd_cmd(test_data.cmdq_base_va, device_id,78itt_base, SZ_64K, true);7980for (event_id = 0; event_id < nr_events; event_id++) {81its_send_mapti_cmd(test_data.cmdq_base_va, device_id,82event_id, coll_id, intid++);8384coll_id = (coll_id + 1) % test_data.nr_cpus;85}86}87}8889static void guest_invalidate_all_rdists(void)90{91int i;9293for (i = 0; i < test_data.nr_cpus; i++)94its_send_invall_cmd(test_data.cmdq_base_va, i);95}9697static void guest_setup_gic(void)98{99static atomic_int nr_cpus_ready = 0;100u32 cpuid = guest_get_vcpuid();101102gic_init(GIC_V3, test_data.nr_cpus);103gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,104test_data.lpi_pend_tables + (cpuid * SZ_64K));105106atomic_fetch_add(&nr_cpus_ready, 1);107108if (cpuid > 0)109return;110111while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)112cpu_relax();113114its_init(test_data.collection_table, SZ_64K,115test_data.device_table, SZ_64K,116test_data.cmdq_base, SZ_64K);117118guest_setup_its_mappings();119guest_invalidate_all_rdists();120121/* SYNC to ensure ITS setup is complete */122for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++)123its_send_sync_cmd(test_data.cmdq_base_va, cpuid);124}125126static void guest_code(size_t nr_lpis)127{128guest_setup_gic();129local_irq_enable();130131GUEST_SYNC(0);132133/*134* Don't use WFI here to avoid blocking the vCPU thread indefinitely and135* never getting the stop signal.136*/137while (!READ_ONCE(test_data.request_vcpus_stop))138cpu_relax();139140GUEST_DONE();141}142143static void setup_memslot(void)144{145size_t pages;146size_t sz;147148/*149* For the ITS:150* - A single level device table151* - A single level collection table152* - The command queue153* - An ITT for each device154*/155sz = (3 + test_data.nr_devices) * SZ_64K;156157/*158* For the redistributors:159* - A shared LPI configuration table160* - An LPI pending table for each vCPU161*/162sz += (1 + test_data.nr_cpus) * SZ_64K;163164pages = sz / vm->page_size;165gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;166vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,167TEST_MEMSLOT_INDEX, pages, 0);168}169170#define LPI_PROP_DEFAULT_PRIO 0xa0171172static void configure_lpis(void)173{174size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;175u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);176size_t i;177178for (i = 0; i < nr_lpis; i++) {179tbl[i] = LPI_PROP_DEFAULT_PRIO |180LPI_PROP_GROUP1 |181LPI_PROP_ENABLED;182}183}184185static void setup_test_data(void)186{187size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);188u32 nr_devices = test_data.nr_devices;189u32 nr_cpus = test_data.nr_cpus;190vm_paddr_t cmdq_base;191192test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,193gpa_base,194TEST_MEMSLOT_INDEX);195196test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,197gpa_base,198TEST_MEMSLOT_INDEX);199200cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,201TEST_MEMSLOT_INDEX);202virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);203test_data.cmdq_base = cmdq_base;204test_data.cmdq_base_va = (void *)cmdq_base;205206test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,207gpa_base, TEST_MEMSLOT_INDEX);208209test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,210gpa_base, TEST_MEMSLOT_INDEX);211configure_lpis();212213test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,214gpa_base, TEST_MEMSLOT_INDEX);215216sync_global_to_guest(vm, test_data);217}218219static void setup_gic(void)220{221its_fd = vgic_its_setup(vm);222}223224static void signal_lpi(u32 device_id, u32 event_id)225{226vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;227228struct kvm_msi msi = {229.address_lo = db_addr,230.address_hi = db_addr >> 32,231.data = event_id,232.devid = device_id,233.flags = KVM_MSI_VALID_DEVID,234};235236/*237* KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,238* which for arm64 implies having a valid translation in the ITS.239*/240TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,241"KVM_SIGNAL_MSI ioctl failed");242}243244static pthread_barrier_t test_setup_barrier;245246static void *lpi_worker_thread(void *data)247{248u32 device_id = (size_t)data;249u32 event_id;250size_t i;251252pthread_barrier_wait(&test_setup_barrier);253254for (i = 0; i < nr_iterations; i++)255for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)256signal_lpi(device_id, event_id);257258return NULL;259}260261static void *vcpu_worker_thread(void *data)262{263struct kvm_vcpu *vcpu = data;264struct ucall uc;265266while (true) {267vcpu_run(vcpu);268269switch (get_ucall(vcpu, &uc)) {270case UCALL_SYNC:271pthread_barrier_wait(&test_setup_barrier);272continue;273case UCALL_DONE:274return NULL;275case UCALL_ABORT:276REPORT_GUEST_ASSERT(uc);277break;278default:279TEST_FAIL("Unknown ucall: %lu", uc.cmd);280}281}282283return NULL;284}285286static void report_stats(struct timespec delta)287{288double nr_lpis;289double time;290291nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;292293time = delta.tv_sec;294time += ((double)delta.tv_nsec) / NSEC_PER_SEC;295296pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);297}298299static void run_test(void)300{301u32 nr_devices = test_data.nr_devices;302u32 nr_vcpus = test_data.nr_cpus;303pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));304pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));305struct timespec start, delta;306size_t i;307308TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");309310pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);311312for (i = 0; i < nr_vcpus; i++)313pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);314315for (i = 0; i < nr_devices; i++)316pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);317318pthread_barrier_wait(&test_setup_barrier);319320clock_gettime(CLOCK_MONOTONIC, &start);321322for (i = 0; i < nr_devices; i++)323pthread_join(lpi_threads[i], NULL);324325delta = timespec_elapsed(start);326write_guest_global(vm, test_data.request_vcpus_stop, true);327328for (i = 0; i < nr_vcpus; i++)329pthread_join(vcpu_threads[i], NULL);330331report_stats(delta);332}333334static void setup_vm(void)335{336int i;337338vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *));339TEST_ASSERT(vcpus, "Failed to allocate vCPU array");340341vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);342343vm_init_descriptor_tables(vm);344for (i = 0; i < test_data.nr_cpus; i++)345vcpu_init_descriptor_tables(vcpus[i]);346347vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);348349setup_memslot();350351setup_gic();352353setup_test_data();354}355356static void destroy_vm(void)357{358close(its_fd);359kvm_vm_free(vm);360free(vcpus);361}362363static void pr_usage(const char *name)364{365pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);366pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);367pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);368pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);369pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations);370}371372int main(int argc, char **argv)373{374u32 nr_threads;375int c;376377TEST_REQUIRE(kvm_supports_vgic_v3());378379while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {380switch (c) {381case 'v':382test_data.nr_cpus = atoi(optarg);383break;384case 'd':385test_data.nr_devices = atoi(optarg);386break;387case 'e':388test_data.nr_event_ids = atoi(optarg);389break;390case 'i':391nr_iterations = strtoul(optarg, NULL, 0);392break;393case 'h':394default:395pr_usage(argv[0]);396return 1;397}398}399400nr_threads = test_data.nr_cpus + test_data.nr_devices;401if (nr_threads > get_nprocs())402pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",403nr_threads, get_nprocs());404405setup_vm();406407run_test();408409destroy_vm();410411return 0;412}413414415