Path: blob/master/tools/testing/selftests/kvm/lib/kvm_util.c
38237 views
// SPDX-License-Identifier: GPL-2.0-only1/*2* tools/testing/selftests/kvm/lib/kvm_util.c3*4* Copyright (C) 2018, Google LLC.5*/6#include "test_util.h"7#include "kvm_util.h"8#include "processor.h"9#include "ucall_common.h"1011#include <assert.h>12#include <sched.h>13#include <sys/mman.h>14#include <sys/resource.h>15#include <sys/types.h>16#include <sys/stat.h>17#include <unistd.h>18#include <linux/kernel.h>1920#define KVM_UTIL_MIN_PFN 22122uint32_t guest_random_seed;23struct guest_random_state guest_rng;24static uint32_t last_guest_seed;2526static size_t vcpu_mmap_sz(void);2728int __open_path_or_exit(const char *path, int flags, const char *enoent_help)29{30int fd;3132fd = open(path, flags);33if (fd < 0)34goto error;3536return fd;3738error:39if (errno == EACCES || errno == ENOENT)40ksft_exit_skip("- Cannot open '%s': %s. %s\n",41path, strerror(errno),42errno == EACCES ? "Root required?" : enoent_help);43TEST_FAIL("Failed to open '%s'", path);44}4546int open_path_or_exit(const char *path, int flags)47{48return __open_path_or_exit(path, flags, "");49}5051/*52* Open KVM_DEV_PATH if available, otherwise exit the entire program.53*54* Input Args:55* flags - The flags to pass when opening KVM_DEV_PATH.56*57* Return:58* The opened file descriptor of /dev/kvm.59*/60static int _open_kvm_dev_path_or_exit(int flags)61{62return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?");63}6465int open_kvm_dev_path_or_exit(void)66{67return _open_kvm_dev_path_or_exit(O_RDONLY);68}6970static ssize_t get_module_param(const char *module_name, const char *param,71void *buffer, size_t buffer_size)72{73const int path_size = 128;74char path[path_size];75ssize_t bytes_read;76int fd, r;7778/* Verify KVM is loaded, to provide a more helpful SKIP message. */79close(open_kvm_dev_path_or_exit());8081r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",82module_name, param);83TEST_ASSERT(r < path_size,84"Failed to construct sysfs path in %d bytes.", path_size);8586fd = open_path_or_exit(path, O_RDONLY);8788bytes_read = read(fd, buffer, buffer_size);89TEST_ASSERT(bytes_read > 0, "read(%s) returned %ld, wanted %ld bytes",90path, bytes_read, buffer_size);9192r = close(fd);93TEST_ASSERT(!r, "close(%s) failed", path);94return bytes_read;95}9697int kvm_get_module_param_integer(const char *module_name, const char *param)98{99/*100* 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the101* NUL char, and 1 byte because the kernel sucks and inserts a newline102* at the end.103*/104char value[16 + 1 + 1];105ssize_t r;106107memset(value, '\0', sizeof(value));108109r = get_module_param(module_name, param, value, sizeof(value));110TEST_ASSERT(value[r - 1] == '\n',111"Expected trailing newline, got char '%c'", value[r - 1]);112113/*114* Squash the newline, otherwise atoi_paranoid() will complain about115* trailing non-NUL characters in the string.116*/117value[r - 1] = '\0';118return atoi_paranoid(value);119}120121bool kvm_get_module_param_bool(const char *module_name, const char *param)122{123char value;124ssize_t r;125126r = get_module_param(module_name, param, &value, sizeof(value));127TEST_ASSERT_EQ(r, 1);128129if (value == 'Y')130return true;131else if (value == 'N')132return false;133134TEST_FAIL("Unrecognized value '%c' for boolean module param", value);135}136137/*138* Capability139*140* Input Args:141* cap - Capability142*143* Output Args: None144*145* Return:146* On success, the Value corresponding to the capability (KVM_CAP_*)147* specified by the value of cap. On failure a TEST_ASSERT failure148* is produced.149*150* Looks up and returns the value corresponding to the capability151* (KVM_CAP_*) given by cap.152*/153unsigned int kvm_check_cap(long cap)154{155int ret;156int kvm_fd;157158kvm_fd = open_kvm_dev_path_or_exit();159ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap);160TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret));161162close(kvm_fd);163164return (unsigned int)ret;165}166167void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)168{169if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL))170vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size);171else172vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size);173vm->dirty_ring_size = ring_size;174}175176static void vm_open(struct kvm_vm *vm)177{178vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR);179180TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT));181182vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);183TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));184185if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))186vm->stats.fd = vm_get_stats_fd(vm);187else188vm->stats.fd = -1;189}190191const char *vm_guest_mode_string(uint32_t i)192{193static const char * const strings[] = {194[VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages",195[VM_MODE_P52V48_16K] = "PA-bits:52, VA-bits:48, 16K pages",196[VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages",197[VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages",198[VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages",199[VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages",200[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",201[VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",202[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",203[VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages",204[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",205[VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",206[VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",207[VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",208[VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",209[VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages",210[VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",211};212_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,213"Missing new mode strings?");214215TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i);216217return strings[i];218}219220const struct vm_guest_mode_params vm_guest_mode_params[] = {221[VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 },222[VM_MODE_P52V48_16K] = { 52, 48, 0x4000, 14 },223[VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 },224[VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 },225[VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 },226[VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 },227[VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },228[VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },229[VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },230[VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 },231[VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },232[VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },233[VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },234[VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },235[VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },236[VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 },237[VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },238};239_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,240"Missing new mode params?");241242/*243* Initializes vm->vpages_valid to match the canonical VA space of the244* architecture.245*246* The default implementation is valid for architectures which split the247* range addressed by a single page table into a low and high region248* based on the MSB of the VA. On architectures with this behavior249* the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1].250*/251__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm)252{253sparsebit_set_num(vm->vpages_valid,2540, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);255sparsebit_set_num(vm->vpages_valid,256(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,257(1ULL << (vm->va_bits - 1)) >> vm->page_shift);258}259260struct kvm_vm *____vm_create(struct vm_shape shape)261{262struct kvm_vm *vm;263264vm = calloc(1, sizeof(*vm));265TEST_ASSERT(vm != NULL, "Insufficient Memory");266267INIT_LIST_HEAD(&vm->vcpus);268vm->regions.gpa_tree = RB_ROOT;269vm->regions.hva_tree = RB_ROOT;270hash_init(vm->regions.slot_hash);271272vm->mode = shape.mode;273vm->type = shape.type;274275vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits;276vm->va_bits = vm_guest_mode_params[vm->mode].va_bits;277vm->page_size = vm_guest_mode_params[vm->mode].page_size;278vm->page_shift = vm_guest_mode_params[vm->mode].page_shift;279280/* Setup mode specific traits. */281switch (vm->mode) {282case VM_MODE_P52V48_4K:283vm->pgtable_levels = 4;284break;285case VM_MODE_P52V48_64K:286vm->pgtable_levels = 3;287break;288case VM_MODE_P48V48_4K:289vm->pgtable_levels = 4;290break;291case VM_MODE_P48V48_64K:292vm->pgtable_levels = 3;293break;294case VM_MODE_P40V48_4K:295case VM_MODE_P36V48_4K:296vm->pgtable_levels = 4;297break;298case VM_MODE_P40V48_64K:299case VM_MODE_P36V48_64K:300vm->pgtable_levels = 3;301break;302case VM_MODE_P52V48_16K:303case VM_MODE_P48V48_16K:304case VM_MODE_P40V48_16K:305case VM_MODE_P36V48_16K:306vm->pgtable_levels = 4;307break;308case VM_MODE_P47V47_16K:309case VM_MODE_P36V47_16K:310vm->pgtable_levels = 3;311break;312case VM_MODE_PXXVYY_4K:313#ifdef __x86_64__314kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);315kvm_init_vm_address_properties(vm);316317pr_debug("Guest physical address width detected: %d\n",318vm->pa_bits);319pr_debug("Guest virtual address width detected: %d\n",320vm->va_bits);321322if (vm->va_bits == 57) {323vm->pgtable_levels = 5;324} else {325TEST_ASSERT(vm->va_bits == 48,326"Unexpected guest virtual address width: %d",327vm->va_bits);328vm->pgtable_levels = 4;329}330#else331TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms");332#endif333break;334case VM_MODE_P47V64_4K:335vm->pgtable_levels = 5;336break;337case VM_MODE_P44V64_4K:338vm->pgtable_levels = 5;339break;340default:341TEST_FAIL("Unknown guest mode: 0x%x", vm->mode);342}343344#ifdef __aarch64__345TEST_ASSERT(!vm->type, "ARM doesn't support test-provided types");346if (vm->pa_bits != 40)347vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);348#endif349350vm_open(vm);351352/* Limit to VA-bit canonical virtual addresses. */353vm->vpages_valid = sparsebit_alloc();354vm_vaddr_populate_bitmap(vm);355356/* Limit physical addresses to PA-bits. */357vm->max_gfn = vm_compute_max_gfn(vm);358359/* Allocate and setup memory for guest. */360vm->vpages_mapped = sparsebit_alloc();361362return vm;363}364365static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,366uint32_t nr_runnable_vcpus,367uint64_t extra_mem_pages)368{369uint64_t page_size = vm_guest_mode_params[mode].page_size;370uint64_t nr_pages;371372TEST_ASSERT(nr_runnable_vcpus,373"Use vm_create_barebones() for VMs that _never_ have vCPUs");374375TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),376"nr_vcpus = %d too large for host, max-vcpus = %d",377nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));378379/*380* Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the381* test code and other per-VM assets that will be loaded into memslot0.382*/383nr_pages = 512;384385/* Account for the per-vCPU stacks on behalf of the test. */386nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS;387388/*389* Account for the number of pages needed for the page tables. The390* maximum page table size for a memory region will be when the391* smallest page size is used. Considering each page contains x page392* table descriptors, the total extra size for page tables (for extra393* N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller394* than N/x*2.395*/396nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2;397398/* Account for the number of pages needed by ucall. */399nr_pages += ucall_nr_pages_required(page_size);400401return vm_adjust_num_guest_pages(mode, nr_pages);402}403404void kvm_set_files_rlimit(uint32_t nr_vcpus)405{406/*407* Each vCPU will open two file descriptors: the vCPU itself and the408* vCPU's binary stats file descriptor. Add an arbitrary amount of409* buffer for all other files a test may open.410*/411int nr_fds_wanted = nr_vcpus * 2 + 100;412struct rlimit rl;413414/*415* Check that we're allowed to open nr_fds_wanted file descriptors and416* try raising the limits if needed.417*/418TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");419420if (rl.rlim_cur < nr_fds_wanted) {421rl.rlim_cur = nr_fds_wanted;422if (rl.rlim_max < nr_fds_wanted) {423int old_rlim_max = rl.rlim_max;424425rl.rlim_max = nr_fds_wanted;426__TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0,427"RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",428old_rlim_max, nr_fds_wanted);429} else {430TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");431}432}433434}435436static bool is_guest_memfd_required(struct vm_shape shape)437{438#ifdef __x86_64__439return shape.type == KVM_X86_SNP_VM;440#else441return false;442#endif443}444445struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,446uint64_t nr_extra_pages)447{448uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus,449nr_extra_pages);450struct userspace_mem_region *slot0;451struct kvm_vm *vm;452int i, flags;453454kvm_set_files_rlimit(nr_runnable_vcpus);455456pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__,457vm_guest_mode_string(shape.mode), shape.type, nr_pages);458459vm = ____vm_create(shape);460461/*462* Force GUEST_MEMFD for the primary memory region if necessary, e.g.463* for CoCo VMs that require GUEST_MEMFD backed private memory.464*/465flags = 0;466if (is_guest_memfd_required(shape))467flags |= KVM_MEM_GUEST_MEMFD;468469vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags);470for (i = 0; i < NR_MEM_REGIONS; i++)471vm->memslots[i] = 0;472473kvm_vm_elf_load(vm, program_invocation_name);474475/*476* TODO: Add proper defines to protect the library's memslots, and then477* carve out memslot1 for the ucall MMIO address. KVM treats writes to478* read-only memslots as MMIO, and creating a read-only memslot for the479* MMIO region would prevent silently clobbering the MMIO region.480*/481slot0 = memslot2region(vm, 0);482ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);483484if (guest_random_seed != last_guest_seed) {485pr_info("Random seed: 0x%x\n", guest_random_seed);486last_guest_seed = guest_random_seed;487}488guest_rng = new_guest_random_state(guest_random_seed);489sync_global_to_guest(vm, guest_rng);490491kvm_arch_vm_post_create(vm, nr_runnable_vcpus);492493return vm;494}495496/*497* VM Create with customized parameters498*499* Input Args:500* mode - VM Mode (e.g. VM_MODE_P52V48_4K)501* nr_vcpus - VCPU count502* extra_mem_pages - Non-slot0 physical memory total size503* guest_code - Guest entry point504* vcpuids - VCPU IDs505*506* Output Args: None507*508* Return:509* Pointer to opaque structure that describes the created VM.510*511* Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).512* extra_mem_pages is only used to calculate the maximum page table size,513* no real memory allocation for non-slot0 memory in this function.514*/515struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,516uint64_t extra_mem_pages,517void *guest_code, struct kvm_vcpu *vcpus[])518{519struct kvm_vm *vm;520int i;521522TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array");523524vm = __vm_create(shape, nr_vcpus, extra_mem_pages);525526for (i = 0; i < nr_vcpus; ++i)527vcpus[i] = vm_vcpu_add(vm, i, guest_code);528529kvm_arch_vm_finalize_vcpus(vm);530return vm;531}532533struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,534struct kvm_vcpu **vcpu,535uint64_t extra_mem_pages,536void *guest_code)537{538struct kvm_vcpu *vcpus[1];539struct kvm_vm *vm;540541vm = __vm_create_with_vcpus(shape, 1, extra_mem_pages, guest_code, vcpus);542543*vcpu = vcpus[0];544return vm;545}546547/*548* VM Restart549*550* Input Args:551* vm - VM that has been released before552*553* Output Args: None554*555* Reopens the file descriptors associated to the VM and reinstates the556* global state, such as the irqchip and the memory regions that are mapped557* into the guest.558*/559void kvm_vm_restart(struct kvm_vm *vmp)560{561int ctr;562struct userspace_mem_region *region;563564vm_open(vmp);565if (vmp->has_irqchip)566vm_create_irqchip(vmp);567568hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {569int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION2, ®ion->region);570571TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"572" rc: %i errno: %i\n"573" slot: %u flags: 0x%x\n"574" guest_phys_addr: 0x%llx size: 0x%llx",575ret, errno, region->region.slot,576region->region.flags,577region->region.guest_phys_addr,578region->region.memory_size);579}580}581582__weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm,583uint32_t vcpu_id)584{585return __vm_vcpu_add(vm, vcpu_id);586}587588struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)589{590kvm_vm_restart(vm);591592return vm_vcpu_recreate(vm, 0);593}594595int __pin_task_to_cpu(pthread_t task, int cpu)596{597cpu_set_t cpuset;598599CPU_ZERO(&cpuset);600CPU_SET(cpu, &cpuset);601602return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset);603}604605static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)606{607uint32_t pcpu = atoi_non_negative("CPU number", cpu_str);608609TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask),610"Not allowed to run on pCPU '%d', check cgroups?", pcpu);611return pcpu;612}613614void kvm_print_vcpu_pinning_help(void)615{616const char *name = program_invocation_name;617618printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n"619" values (target pCPU), one for each vCPU, plus an optional\n"620" entry for the main application task (specified via entry\n"621" <nr_vcpus + 1>). If used, entries must be provided for all\n"622" vCPUs, i.e. pinning vCPUs is all or nothing.\n\n"623" E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n"624" vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n"625" %s -v 3 -c 22,23,24,50\n\n"626" To leave the application task unpinned, drop the final entry:\n\n"627" %s -v 3 -c 22,23,24\n\n"628" (default: no pinning)\n", name, name);629}630631void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],632int nr_vcpus)633{634cpu_set_t allowed_mask;635char *cpu, *cpu_list;636char delim[2] = ",";637int i, r;638639cpu_list = strdup(pcpus_string);640TEST_ASSERT(cpu_list, "strdup() allocation failed.");641642r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);643TEST_ASSERT(!r, "sched_getaffinity() failed");644645cpu = strtok(cpu_list, delim);646647/* 1. Get all pcpus for vcpus. */648for (i = 0; i < nr_vcpus; i++) {649TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'", i);650vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask);651cpu = strtok(NULL, delim);652}653654/* 2. Check if the main worker needs to be pinned. */655if (cpu) {656pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask));657cpu = strtok(NULL, delim);658}659660TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu);661free(cpu_list);662}663664/*665* Userspace Memory Region Find666*667* Input Args:668* vm - Virtual Machine669* start - Starting VM physical address670* end - Ending VM physical address, inclusive.671*672* Output Args: None673*674* Return:675* Pointer to overlapping region, NULL if no such region.676*677* Searches for a region with any physical memory that overlaps with678* any portion of the guest physical addresses from start to end679* inclusive. If multiple overlapping regions exist, a pointer to any680* of the regions is returned. Null is returned only when no overlapping681* region exists.682*/683static struct userspace_mem_region *684userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)685{686struct rb_node *node;687688for (node = vm->regions.gpa_tree.rb_node; node; ) {689struct userspace_mem_region *region =690container_of(node, struct userspace_mem_region, gpa_node);691uint64_t existing_start = region->region.guest_phys_addr;692uint64_t existing_end = region->region.guest_phys_addr693+ region->region.memory_size - 1;694if (start <= existing_end && end >= existing_start)695return region;696697if (start < existing_start)698node = node->rb_left;699else700node = node->rb_right;701}702703return NULL;704}705706static void kvm_stats_release(struct kvm_binary_stats *stats)707{708if (stats->fd < 0)709return;710711if (stats->desc) {712free(stats->desc);713stats->desc = NULL;714}715716kvm_close(stats->fd);717stats->fd = -1;718}719720__weak void vcpu_arch_free(struct kvm_vcpu *vcpu)721{722723}724725/*726* VM VCPU Remove727*728* Input Args:729* vcpu - VCPU to remove730*731* Output Args: None732*733* Return: None, TEST_ASSERT failures for all error conditions734*735* Removes a vCPU from a VM and frees its resources.736*/737static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)738{739if (vcpu->dirty_gfns) {740kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size);741vcpu->dirty_gfns = NULL;742}743744kvm_munmap(vcpu->run, vcpu_mmap_sz());745746kvm_close(vcpu->fd);747kvm_stats_release(&vcpu->stats);748749list_del(&vcpu->list);750751vcpu_arch_free(vcpu);752free(vcpu);753}754755void kvm_vm_release(struct kvm_vm *vmp)756{757struct kvm_vcpu *vcpu, *tmp;758759list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)760vm_vcpu_rm(vmp, vcpu);761762kvm_close(vmp->fd);763kvm_close(vmp->kvm_fd);764765/* Free cached stats metadata and close FD */766kvm_stats_release(&vmp->stats);767768kvm_arch_vm_release(vmp);769}770771static void __vm_mem_region_delete(struct kvm_vm *vm,772struct userspace_mem_region *region)773{774rb_erase(®ion->gpa_node, &vm->regions.gpa_tree);775rb_erase(®ion->hva_node, &vm->regions.hva_tree);776hash_del(®ion->slot_node);777778sparsebit_free(®ion->unused_phy_pages);779sparsebit_free(®ion->protected_phy_pages);780kvm_munmap(region->mmap_start, region->mmap_size);781if (region->fd >= 0) {782/* There's an extra map when using shared memory. */783kvm_munmap(region->mmap_alias, region->mmap_size);784close(region->fd);785}786if (region->region.guest_memfd >= 0)787close(region->region.guest_memfd);788789free(region);790}791792/*793* Destroys and frees the VM pointed to by vmp.794*/795void kvm_vm_free(struct kvm_vm *vmp)796{797int ctr;798struct hlist_node *node;799struct userspace_mem_region *region;800801if (vmp == NULL)802return;803804/* Free userspace_mem_regions. */805hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)806__vm_mem_region_delete(vmp, region);807808/* Free sparsebit arrays. */809sparsebit_free(&vmp->vpages_valid);810sparsebit_free(&vmp->vpages_mapped);811812kvm_vm_release(vmp);813814/* Free the structure describing the VM. */815free(vmp);816}817818int kvm_memfd_alloc(size_t size, bool hugepages)819{820int memfd_flags = MFD_CLOEXEC;821int fd;822823if (hugepages)824memfd_flags |= MFD_HUGETLB;825826fd = memfd_create("kvm_selftest", memfd_flags);827TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd));828829kvm_ftruncate(fd, size);830kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);831832return fd;833}834835static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,836struct userspace_mem_region *region)837{838struct rb_node **cur, *parent;839840for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {841struct userspace_mem_region *cregion;842843cregion = container_of(*cur, typeof(*cregion), gpa_node);844parent = *cur;845if (region->region.guest_phys_addr <846cregion->region.guest_phys_addr)847cur = &(*cur)->rb_left;848else {849TEST_ASSERT(region->region.guest_phys_addr !=850cregion->region.guest_phys_addr,851"Duplicate GPA in region tree");852853cur = &(*cur)->rb_right;854}855}856857rb_link_node(®ion->gpa_node, parent, cur);858rb_insert_color(®ion->gpa_node, gpa_tree);859}860861static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,862struct userspace_mem_region *region)863{864struct rb_node **cur, *parent;865866for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {867struct userspace_mem_region *cregion;868869cregion = container_of(*cur, typeof(*cregion), hva_node);870parent = *cur;871if (region->host_mem < cregion->host_mem)872cur = &(*cur)->rb_left;873else {874TEST_ASSERT(region->host_mem !=875cregion->host_mem,876"Duplicate HVA in region tree");877878cur = &(*cur)->rb_right;879}880}881882rb_link_node(®ion->hva_node, parent, cur);883rb_insert_color(®ion->hva_node, hva_tree);884}885886887int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,888uint64_t gpa, uint64_t size, void *hva)889{890struct kvm_userspace_memory_region region = {891.slot = slot,892.flags = flags,893.guest_phys_addr = gpa,894.memory_size = size,895.userspace_addr = (uintptr_t)hva,896};897898return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion);899}900901void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,902uint64_t gpa, uint64_t size, void *hva)903{904int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva);905906TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)",907errno, strerror(errno));908}909910#define TEST_REQUIRE_SET_USER_MEMORY_REGION2() \911__TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \912"KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)")913914int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,915uint64_t gpa, uint64_t size, void *hva,916uint32_t guest_memfd, uint64_t guest_memfd_offset)917{918struct kvm_userspace_memory_region2 region = {919.slot = slot,920.flags = flags,921.guest_phys_addr = gpa,922.memory_size = size,923.userspace_addr = (uintptr_t)hva,924.guest_memfd = guest_memfd,925.guest_memfd_offset = guest_memfd_offset,926};927928TEST_REQUIRE_SET_USER_MEMORY_REGION2();929930return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, ®ion);931}932933void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,934uint64_t gpa, uint64_t size, void *hva,935uint32_t guest_memfd, uint64_t guest_memfd_offset)936{937int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva,938guest_memfd, guest_memfd_offset);939940TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed, errno = %d (%s)",941errno, strerror(errno));942}943944945/* FIXME: This thing needs to be ripped apart and rewritten. */946void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,947uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags,948int guest_memfd, uint64_t guest_memfd_offset)949{950int ret;951struct userspace_mem_region *region;952size_t backing_src_pagesz = get_backing_src_pagesz(src_type);953size_t mem_size = npages * vm->page_size;954size_t alignment;955956TEST_REQUIRE_SET_USER_MEMORY_REGION2();957958TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,959"Number of guest pages is not compatible with the host. "960"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));961962TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical "963"address not on a page boundary.\n"964" gpa: 0x%lx vm->page_size: 0x%x",965gpa, vm->page_size);966TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1)967<= vm->max_gfn, "Physical range beyond maximum "968"supported physical address,\n"969" gpa: 0x%lx npages: 0x%lx\n"970" vm->max_gfn: 0x%lx vm->page_size: 0x%x",971gpa, npages, vm->max_gfn, vm->page_size);972973/*974* Confirm a mem region with an overlapping address doesn't975* already exist.976*/977region = (struct userspace_mem_region *) userspace_mem_region_find(978vm, gpa, (gpa + npages * vm->page_size) - 1);979if (region != NULL)980TEST_FAIL("overlapping userspace_mem_region already "981"exists\n"982" requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n"983" existing gpa: 0x%lx size: 0x%lx",984gpa, npages, vm->page_size,985(uint64_t) region->region.guest_phys_addr,986(uint64_t) region->region.memory_size);987988/* Confirm no region with the requested slot already exists. */989hash_for_each_possible(vm->regions.slot_hash, region, slot_node,990slot) {991if (region->region.slot != slot)992continue;993994TEST_FAIL("A mem region with the requested slot "995"already exists.\n"996" requested slot: %u paddr: 0x%lx npages: 0x%lx\n"997" existing slot: %u paddr: 0x%lx size: 0x%lx",998slot, gpa, npages, region->region.slot,999(uint64_t) region->region.guest_phys_addr,1000(uint64_t) region->region.memory_size);1001}10021003/* Allocate and initialize new mem region structure. */1004region = calloc(1, sizeof(*region));1005TEST_ASSERT(region != NULL, "Insufficient Memory");1006region->mmap_size = mem_size;10071008#ifdef __s390x__1009/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */1010alignment = 0x100000;1011#else1012alignment = 1;1013#endif10141015/*1016* When using THP mmap is not guaranteed to returned a hugepage aligned1017* address so we have to pad the mmap. Padding is not needed for HugeTLB1018* because mmap will always return an address aligned to the HugeTLB1019* page size.1020*/1021if (src_type == VM_MEM_SRC_ANONYMOUS_THP)1022alignment = max(backing_src_pagesz, alignment);10231024TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz));10251026/* Add enough memory to align up if necessary */1027if (alignment > 1)1028region->mmap_size += alignment;10291030region->fd = -1;1031if (backing_src_is_shared(src_type))1032region->fd = kvm_memfd_alloc(region->mmap_size,1033src_type == VM_MEM_SRC_SHARED_HUGETLB);10341035region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,1036vm_mem_backing_src_alias(src_type)->flag,1037region->fd);10381039TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||1040region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),1041"mmap_start %p is not aligned to HugeTLB page size 0x%lx",1042region->mmap_start, backing_src_pagesz);10431044/* Align host address */1045region->host_mem = align_ptr_up(region->mmap_start, alignment);10461047/* As needed perform madvise */1048if ((src_type == VM_MEM_SRC_ANONYMOUS ||1049src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {1050ret = madvise(region->host_mem, mem_size,1051src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);1052TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",1053region->host_mem, mem_size,1054vm_mem_backing_src_alias(src_type)->name);1055}10561057region->backing_src_type = src_type;10581059if (flags & KVM_MEM_GUEST_MEMFD) {1060if (guest_memfd < 0) {1061uint32_t guest_memfd_flags = 0;1062TEST_ASSERT(!guest_memfd_offset,1063"Offset must be zero when creating new guest_memfd");1064guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);1065} else {1066/*1067* Install a unique fd for each memslot so that the fd1068* can be closed when the region is deleted without1069* needing to track if the fd is owned by the framework1070* or by the caller.1071*/1072guest_memfd = kvm_dup(guest_memfd);1073}10741075region->region.guest_memfd = guest_memfd;1076region->region.guest_memfd_offset = guest_memfd_offset;1077} else {1078region->region.guest_memfd = -1;1079}10801081region->unused_phy_pages = sparsebit_alloc();1082if (vm_arch_has_protected_memory(vm))1083region->protected_phy_pages = sparsebit_alloc();1084sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages);1085region->region.slot = slot;1086region->region.flags = flags;1087region->region.guest_phys_addr = gpa;1088region->region.memory_size = npages * vm->page_size;1089region->region.userspace_addr = (uintptr_t) region->host_mem;1090ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);1091TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"1092" rc: %i errno: %i\n"1093" slot: %u flags: 0x%x\n"1094" guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d",1095ret, errno, slot, flags, gpa, region->region.memory_size,1096region->region.guest_memfd);10971098/* Add to quick lookup data structures */1099vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);1100vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);1101hash_add(vm->regions.slot_hash, ®ion->slot_node, slot);11021103/* If shared memory, create an alias. */1104if (region->fd >= 0) {1105region->mmap_alias = kvm_mmap(region->mmap_size,1106PROT_READ | PROT_WRITE,1107vm_mem_backing_src_alias(src_type)->flag,1108region->fd);11091110/* Align host alias address */1111region->host_alias = align_ptr_up(region->mmap_alias, alignment);1112}1113}11141115void vm_userspace_mem_region_add(struct kvm_vm *vm,1116enum vm_mem_backing_src_type src_type,1117uint64_t gpa, uint32_t slot, uint64_t npages,1118uint32_t flags)1119{1120vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0);1121}11221123/*1124* Memslot to region1125*1126* Input Args:1127* vm - Virtual Machine1128* memslot - KVM memory slot ID1129*1130* Output Args: None1131*1132* Return:1133* Pointer to memory region structure that describe memory region1134* using kvm memory slot ID given by memslot. TEST_ASSERT failure1135* on error (e.g. currently no memory region using memslot as a KVM1136* memory slot ID).1137*/1138struct userspace_mem_region *1139memslot2region(struct kvm_vm *vm, uint32_t memslot)1140{1141struct userspace_mem_region *region;11421143hash_for_each_possible(vm->regions.slot_hash, region, slot_node,1144memslot)1145if (region->region.slot == memslot)1146return region;11471148fprintf(stderr, "No mem region with the requested slot found,\n"1149" requested slot: %u\n", memslot);1150fputs("---- vm dump ----\n", stderr);1151vm_dump(stderr, vm, 2);1152TEST_FAIL("Mem region not found");1153return NULL;1154}11551156/*1157* VM Memory Region Flags Set1158*1159* Input Args:1160* vm - Virtual Machine1161* flags - Starting guest physical address1162*1163* Output Args: None1164*1165* Return: None1166*1167* Sets the flags of the memory region specified by the value of slot,1168* to the values given by flags.1169*/1170void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)1171{1172int ret;1173struct userspace_mem_region *region;11741175region = memslot2region(vm, slot);11761177region->region.flags = flags;11781179ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);11801181TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"1182" rc: %i errno: %i slot: %u flags: 0x%x",1183ret, errno, slot, flags);1184}11851186void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot)1187{1188struct userspace_mem_region *region = memslot2region(vm, slot);1189struct kvm_userspace_memory_region2 tmp = region->region;11901191tmp.memory_size = 0;1192vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp);1193vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);1194}11951196/*1197* VM Memory Region Move1198*1199* Input Args:1200* vm - Virtual Machine1201* slot - Slot of the memory region to move1202* new_gpa - Starting guest physical address1203*1204* Output Args: None1205*1206* Return: None1207*1208* Change the gpa of a memory region.1209*/1210void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)1211{1212struct userspace_mem_region *region;1213int ret;12141215region = memslot2region(vm, slot);12161217region->region.guest_phys_addr = new_gpa;12181219ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);12201221TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed\n"1222"ret: %i errno: %i slot: %u new_gpa: 0x%lx",1223ret, errno, slot, new_gpa);1224}12251226/*1227* VM Memory Region Delete1228*1229* Input Args:1230* vm - Virtual Machine1231* slot - Slot of the memory region to delete1232*1233* Output Args: None1234*1235* Return: None1236*1237* Delete a memory region.1238*/1239void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)1240{1241struct userspace_mem_region *region = memslot2region(vm, slot);12421243region->region.memory_size = 0;1244vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);12451246__vm_mem_region_delete(vm, region);1247}12481249void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,1250bool punch_hole)1251{1252const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0);1253struct userspace_mem_region *region;1254uint64_t end = base + size;1255uint64_t gpa, len;1256off_t fd_offset;1257int ret;12581259for (gpa = base; gpa < end; gpa += len) {1260uint64_t offset;12611262region = userspace_mem_region_find(vm, gpa, gpa);1263TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD,1264"Private memory region not found for GPA 0x%lx", gpa);12651266offset = gpa - region->region.guest_phys_addr;1267fd_offset = region->region.guest_memfd_offset + offset;1268len = min_t(uint64_t, end - gpa, region->region.memory_size - offset);12691270ret = fallocate(region->region.guest_memfd, mode, fd_offset, len);1271TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx",1272punch_hole ? "punch hole" : "allocate", gpa, len,1273region->region.guest_memfd, mode, fd_offset);1274}1275}12761277/* Returns the size of a vCPU's kvm_run structure. */1278static size_t vcpu_mmap_sz(void)1279{1280int dev_fd, ret;12811282dev_fd = open_kvm_dev_path_or_exit();12831284ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);1285TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run),1286KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret));12871288close(dev_fd);12891290return ret;1291}12921293static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id)1294{1295struct kvm_vcpu *vcpu;12961297list_for_each_entry(vcpu, &vm->vcpus, list) {1298if (vcpu->id == vcpu_id)1299return true;1300}13011302return false;1303}13041305/*1306* Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id.1307* No additional vCPU setup is done. Returns the vCPU.1308*/1309struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)1310{1311struct kvm_vcpu *vcpu;13121313/* Confirm a vcpu with the specified id doesn't already exist. */1314TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists", vcpu_id);13151316/* Allocate and initialize new vcpu structure. */1317vcpu = calloc(1, sizeof(*vcpu));1318TEST_ASSERT(vcpu != NULL, "Insufficient Memory");13191320vcpu->vm = vm;1321vcpu->id = vcpu_id;1322vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id);1323TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm);13241325TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "1326"smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi",1327vcpu_mmap_sz(), sizeof(*vcpu->run));1328vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE,1329MAP_SHARED, vcpu->fd);13301331if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))1332vcpu->stats.fd = vcpu_get_stats_fd(vcpu);1333else1334vcpu->stats.fd = -1;13351336/* Add to linked-list of VCPUs. */1337list_add(&vcpu->list, &vm->vcpus);13381339return vcpu;1340}13411342/*1343* VM Virtual Address Unused Gap1344*1345* Input Args:1346* vm - Virtual Machine1347* sz - Size (bytes)1348* vaddr_min - Minimum Virtual Address1349*1350* Output Args: None1351*1352* Return:1353* Lowest virtual address at or below vaddr_min, with at least1354* sz unused bytes. TEST_ASSERT failure if no area of at least1355* size sz is available.1356*1357* Within the VM specified by vm, locates the lowest starting virtual1358* address >= vaddr_min, that has at least sz unallocated bytes. A1359* TEST_ASSERT failure occurs for invalid input or no area of at least1360* sz unallocated bytes >= vaddr_min is available.1361*/1362vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,1363vm_vaddr_t vaddr_min)1364{1365uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;13661367/* Determine lowest permitted virtual page index. */1368uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;1369if ((pgidx_start * vm->page_size) < vaddr_min)1370goto no_va_found;13711372/* Loop over section with enough valid virtual page indexes. */1373if (!sparsebit_is_set_num(vm->vpages_valid,1374pgidx_start, pages))1375pgidx_start = sparsebit_next_set_num(vm->vpages_valid,1376pgidx_start, pages);1377do {1378/*1379* Are there enough unused virtual pages available at1380* the currently proposed starting virtual page index.1381* If not, adjust proposed starting index to next1382* possible.1383*/1384if (sparsebit_is_clear_num(vm->vpages_mapped,1385pgidx_start, pages))1386goto va_found;1387pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,1388pgidx_start, pages);1389if (pgidx_start == 0)1390goto no_va_found;13911392/*1393* If needed, adjust proposed starting virtual address,1394* to next range of valid virtual addresses.1395*/1396if (!sparsebit_is_set_num(vm->vpages_valid,1397pgidx_start, pages)) {1398pgidx_start = sparsebit_next_set_num(1399vm->vpages_valid, pgidx_start, pages);1400if (pgidx_start == 0)1401goto no_va_found;1402}1403} while (pgidx_start != 0);14041405no_va_found:1406TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);14071408/* NOT REACHED */1409return -1;14101411va_found:1412TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,1413pgidx_start, pages),1414"Unexpected, invalid virtual page index range,\n"1415" pgidx_start: 0x%lx\n"1416" pages: 0x%lx",1417pgidx_start, pages);1418TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,1419pgidx_start, pages),1420"Unexpected, pages already mapped,\n"1421" pgidx_start: 0x%lx\n"1422" pages: 0x%lx",1423pgidx_start, pages);14241425return pgidx_start * vm->page_size;1426}14271428static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz,1429vm_vaddr_t vaddr_min,1430enum kvm_mem_region_type type,1431bool protected)1432{1433uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);14341435virt_pgd_alloc(vm);1436vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages,1437KVM_UTIL_MIN_PFN * vm->page_size,1438vm->memslots[type], protected);14391440/*1441* Find an unused range of virtual page addresses of at least1442* pages in length.1443*/1444vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);14451446/* Map the virtual pages. */1447for (vm_vaddr_t vaddr = vaddr_start; pages > 0;1448pages--, vaddr += vm->page_size, paddr += vm->page_size) {14491450virt_pg_map(vm, vaddr, paddr);1451}14521453return vaddr_start;1454}14551456vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,1457enum kvm_mem_region_type type)1458{1459return ____vm_vaddr_alloc(vm, sz, vaddr_min, type,1460vm_arch_has_protected_memory(vm));1461}14621463vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,1464vm_vaddr_t vaddr_min,1465enum kvm_mem_region_type type)1466{1467return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false);1468}14691470/*1471* VM Virtual Address Allocate1472*1473* Input Args:1474* vm - Virtual Machine1475* sz - Size in bytes1476* vaddr_min - Minimum starting virtual address1477*1478* Output Args: None1479*1480* Return:1481* Starting guest virtual address1482*1483* Allocates at least sz bytes within the virtual address space of the vm1484* given by vm. The allocated bytes are mapped to a virtual address >=1485* the address given by vaddr_min. Note that each allocation uses a1486* a unique set of pages, with the minimum real allocation being at least1487* a page. The allocated physical space comes from the TEST_DATA memory region.1488*/1489vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)1490{1491return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA);1492}14931494/*1495* VM Virtual Address Allocate Pages1496*1497* Input Args:1498* vm - Virtual Machine1499*1500* Output Args: None1501*1502* Return:1503* Starting guest virtual address1504*1505* Allocates at least N system pages worth of bytes within the virtual address1506* space of the vm.1507*/1508vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)1509{1510return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);1511}15121513vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type)1514{1515return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type);1516}15171518/*1519* VM Virtual Address Allocate Page1520*1521* Input Args:1522* vm - Virtual Machine1523*1524* Output Args: None1525*1526* Return:1527* Starting guest virtual address1528*1529* Allocates at least one system page worth of bytes within the virtual address1530* space of the vm.1531*/1532vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)1533{1534return vm_vaddr_alloc_pages(vm, 1);1535}15361537/*1538* Map a range of VM virtual address to the VM's physical address1539*1540* Input Args:1541* vm - Virtual Machine1542* vaddr - Virtuall address to map1543* paddr - VM Physical Address1544* npages - The number of pages to map1545*1546* Output Args: None1547*1548* Return: None1549*1550* Within the VM given by @vm, creates a virtual translation for1551* @npages starting at @vaddr to the page range starting at @paddr.1552*/1553void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,1554unsigned int npages)1555{1556size_t page_size = vm->page_size;1557size_t size = npages * page_size;15581559TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");1560TEST_ASSERT(paddr + size > paddr, "Paddr overflow");15611562while (npages--) {1563virt_pg_map(vm, vaddr, paddr);15641565vaddr += page_size;1566paddr += page_size;1567}1568}15691570/*1571* Address VM Physical to Host Virtual1572*1573* Input Args:1574* vm - Virtual Machine1575* gpa - VM physical address1576*1577* Output Args: None1578*1579* Return:1580* Equivalent host virtual address1581*1582* Locates the memory region containing the VM physical address given1583* by gpa, within the VM given by vm. When found, the host virtual1584* address providing the memory to the vm physical address is returned.1585* A TEST_ASSERT failure occurs if no region containing gpa exists.1586*/1587void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)1588{1589struct userspace_mem_region *region;15901591gpa = vm_untag_gpa(vm, gpa);15921593region = userspace_mem_region_find(vm, gpa, gpa);1594if (!region) {1595TEST_FAIL("No vm physical memory at 0x%lx", gpa);1596return NULL;1597}15981599return (void *)((uintptr_t)region->host_mem1600+ (gpa - region->region.guest_phys_addr));1601}16021603/*1604* Address Host Virtual to VM Physical1605*1606* Input Args:1607* vm - Virtual Machine1608* hva - Host virtual address1609*1610* Output Args: None1611*1612* Return:1613* Equivalent VM physical address1614*1615* Locates the memory region containing the host virtual address given1616* by hva, within the VM given by vm. When found, the equivalent1617* VM physical address is returned. A TEST_ASSERT failure occurs if no1618* region containing hva exists.1619*/1620vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)1621{1622struct rb_node *node;16231624for (node = vm->regions.hva_tree.rb_node; node; ) {1625struct userspace_mem_region *region =1626container_of(node, struct userspace_mem_region, hva_node);16271628if (hva >= region->host_mem) {1629if (hva <= (region->host_mem1630+ region->region.memory_size - 1))1631return (vm_paddr_t)((uintptr_t)1632region->region.guest_phys_addr1633+ (hva - (uintptr_t)region->host_mem));16341635node = node->rb_right;1636} else1637node = node->rb_left;1638}16391640TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);1641return -1;1642}16431644/*1645* Address VM physical to Host Virtual *alias*.1646*1647* Input Args:1648* vm - Virtual Machine1649* gpa - VM physical address1650*1651* Output Args: None1652*1653* Return:1654* Equivalent address within the host virtual *alias* area, or NULL1655* (without failing the test) if the guest memory is not shared (so1656* no alias exists).1657*1658* Create a writable, shared virtual=>physical alias for the specific GPA.1659* The primary use case is to allow the host selftest to manipulate guest1660* memory without mapping said memory in the guest's address space. And, for1661* userfaultfd-based demand paging, to do so without triggering userfaults.1662*/1663void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)1664{1665struct userspace_mem_region *region;1666uintptr_t offset;16671668region = userspace_mem_region_find(vm, gpa, gpa);1669if (!region)1670return NULL;16711672if (!region->host_alias)1673return NULL;16741675offset = gpa - region->region.guest_phys_addr;1676return (void *) ((uintptr_t) region->host_alias + offset);1677}16781679/* Create an interrupt controller chip for the specified VM. */1680void vm_create_irqchip(struct kvm_vm *vm)1681{1682int r;16831684/*1685* Allocate a fully in-kernel IRQ chip by default, but fall back to a1686* split model (x86 only) if that fails (KVM x86 allows compiling out1687* support for KVM_CREATE_IRQCHIP).1688*/1689r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);1690if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP))1691vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);1692else1693TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm);16941695vm->has_irqchip = true;1696}16971698int _vcpu_run(struct kvm_vcpu *vcpu)1699{1700int rc;17011702do {1703rc = __vcpu_run(vcpu);1704} while (rc == -1 && errno == EINTR);17051706if (!rc)1707assert_on_unhandled_exception(vcpu);17081709return rc;1710}17111712/*1713* Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR.1714* Assert if the KVM returns an error (other than -EINTR).1715*/1716void vcpu_run(struct kvm_vcpu *vcpu)1717{1718int ret = _vcpu_run(vcpu);17191720TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret));1721}17221723void vcpu_run_complete_io(struct kvm_vcpu *vcpu)1724{1725int ret;17261727vcpu->run->immediate_exit = 1;1728ret = __vcpu_run(vcpu);1729vcpu->run->immediate_exit = 0;17301731TEST_ASSERT(ret == -1 && errno == EINTR,1732"KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",1733ret, errno);1734}17351736/*1737* Get the list of guest registers which are supported for1738* KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer,1739* it is the caller's responsibility to free the list.1740*/1741struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu)1742{1743struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;1744int ret;17451746ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n);1747TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");17481749reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));1750reg_list->n = reg_list_n.n;1751vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list);1752return reg_list;1753}17541755void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)1756{1757uint32_t page_size = getpagesize();1758uint32_t size = vcpu->vm->dirty_ring_size;17591760TEST_ASSERT(size > 0, "Should enable dirty ring first");17611762if (!vcpu->dirty_gfns) {1763void *addr;17641765addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd,1766page_size * KVM_DIRTY_LOG_PAGE_OFFSET);1767TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");17681769addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd,1770page_size * KVM_DIRTY_LOG_PAGE_OFFSET);1771TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");17721773addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,1774page_size * KVM_DIRTY_LOG_PAGE_OFFSET);17751776vcpu->dirty_gfns = addr;1777vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);1778}17791780return vcpu->dirty_gfns;1781}17821783/*1784* Device Ioctl1785*/17861787int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)1788{1789struct kvm_device_attr attribute = {1790.group = group,1791.attr = attr,1792.flags = 0,1793};17941795return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute);1796}17971798int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type)1799{1800struct kvm_create_device create_dev = {1801.type = type,1802.flags = KVM_CREATE_DEVICE_TEST,1803};18041805return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);1806}18071808int __kvm_create_device(struct kvm_vm *vm, uint64_t type)1809{1810struct kvm_create_device create_dev = {1811.type = type,1812.fd = -1,1813.flags = 0,1814};1815int err;18161817err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);1818TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value");1819return err ? : create_dev.fd;1820}18211822int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val)1823{1824struct kvm_device_attr kvmattr = {1825.group = group,1826.attr = attr,1827.flags = 0,1828.addr = (uintptr_t)val,1829};18301831return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr);1832}18331834int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val)1835{1836struct kvm_device_attr kvmattr = {1837.group = group,1838.attr = attr,1839.flags = 0,1840.addr = (uintptr_t)val,1841};18421843return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr);1844}18451846/*1847* IRQ related functions.1848*/18491850int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)1851{1852struct kvm_irq_level irq_level = {1853.irq = irq,1854.level = level,1855};18561857return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);1858}18591860void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)1861{1862int ret = _kvm_irq_line(vm, irq, level);18631864TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));1865}18661867struct kvm_irq_routing *kvm_gsi_routing_create(void)1868{1869struct kvm_irq_routing *routing;1870size_t size;18711872size = sizeof(struct kvm_irq_routing);1873/* Allocate space for the max number of entries: this wastes 196 KBs. */1874size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry);1875routing = calloc(1, size);1876assert(routing);18771878return routing;1879}18801881void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,1882uint32_t gsi, uint32_t pin)1883{1884int i;18851886assert(routing);1887assert(routing->nr < KVM_MAX_IRQ_ROUTES);18881889i = routing->nr;1890routing->entries[i].gsi = gsi;1891routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;1892routing->entries[i].flags = 0;1893routing->entries[i].u.irqchip.irqchip = 0;1894routing->entries[i].u.irqchip.pin = pin;1895routing->nr++;1896}18971898int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)1899{1900int ret;19011902assert(routing);1903ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);1904free(routing);19051906return ret;1907}19081909void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)1910{1911int ret;19121913ret = _kvm_gsi_routing_write(vm, routing);1914TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret));1915}19161917/*1918* VM Dump1919*1920* Input Args:1921* vm - Virtual Machine1922* indent - Left margin indent amount1923*1924* Output Args:1925* stream - Output FILE stream1926*1927* Return: None1928*1929* Dumps the current state of the VM given by vm, to the FILE stream1930* given by stream.1931*/1932void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)1933{1934int ctr;1935struct userspace_mem_region *region;1936struct kvm_vcpu *vcpu;19371938fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);1939fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);1940fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);1941fprintf(stream, "%*sMem Regions:\n", indent, "");1942hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {1943fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "1944"host_virt: %p\n", indent + 2, "",1945(uint64_t) region->region.guest_phys_addr,1946(uint64_t) region->region.memory_size,1947region->host_mem);1948fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");1949sparsebit_dump(stream, region->unused_phy_pages, 0);1950if (region->protected_phy_pages) {1951fprintf(stream, "%*sprotected_phy_pages: ", indent + 2, "");1952sparsebit_dump(stream, region->protected_phy_pages, 0);1953}1954}1955fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");1956sparsebit_dump(stream, vm->vpages_mapped, indent + 2);1957fprintf(stream, "%*spgd_created: %u\n", indent, "",1958vm->pgd_created);1959if (vm->pgd_created) {1960fprintf(stream, "%*sVirtual Translation Tables:\n",1961indent + 2, "");1962virt_dump(stream, vm, indent + 4);1963}1964fprintf(stream, "%*sVCPUs:\n", indent, "");19651966list_for_each_entry(vcpu, &vm->vcpus, list)1967vcpu_dump(stream, vcpu, indent + 2);1968}19691970#define KVM_EXIT_STRING(x) {KVM_EXIT_##x, #x}19711972/* Known KVM exit reasons */1973static struct exit_reason {1974unsigned int reason;1975const char *name;1976} exit_reasons_known[] = {1977KVM_EXIT_STRING(UNKNOWN),1978KVM_EXIT_STRING(EXCEPTION),1979KVM_EXIT_STRING(IO),1980KVM_EXIT_STRING(HYPERCALL),1981KVM_EXIT_STRING(DEBUG),1982KVM_EXIT_STRING(HLT),1983KVM_EXIT_STRING(MMIO),1984KVM_EXIT_STRING(IRQ_WINDOW_OPEN),1985KVM_EXIT_STRING(SHUTDOWN),1986KVM_EXIT_STRING(FAIL_ENTRY),1987KVM_EXIT_STRING(INTR),1988KVM_EXIT_STRING(SET_TPR),1989KVM_EXIT_STRING(TPR_ACCESS),1990KVM_EXIT_STRING(S390_SIEIC),1991KVM_EXIT_STRING(S390_RESET),1992KVM_EXIT_STRING(DCR),1993KVM_EXIT_STRING(NMI),1994KVM_EXIT_STRING(INTERNAL_ERROR),1995KVM_EXIT_STRING(OSI),1996KVM_EXIT_STRING(PAPR_HCALL),1997KVM_EXIT_STRING(S390_UCONTROL),1998KVM_EXIT_STRING(WATCHDOG),1999KVM_EXIT_STRING(S390_TSCH),2000KVM_EXIT_STRING(EPR),2001KVM_EXIT_STRING(SYSTEM_EVENT),2002KVM_EXIT_STRING(S390_STSI),2003KVM_EXIT_STRING(IOAPIC_EOI),2004KVM_EXIT_STRING(HYPERV),2005KVM_EXIT_STRING(ARM_NISV),2006KVM_EXIT_STRING(X86_RDMSR),2007KVM_EXIT_STRING(X86_WRMSR),2008KVM_EXIT_STRING(DIRTY_RING_FULL),2009KVM_EXIT_STRING(AP_RESET_HOLD),2010KVM_EXIT_STRING(X86_BUS_LOCK),2011KVM_EXIT_STRING(XEN),2012KVM_EXIT_STRING(RISCV_SBI),2013KVM_EXIT_STRING(RISCV_CSR),2014KVM_EXIT_STRING(NOTIFY),2015KVM_EXIT_STRING(LOONGARCH_IOCSR),2016KVM_EXIT_STRING(MEMORY_FAULT),2017KVM_EXIT_STRING(ARM_SEA),2018};20192020/*2021* Exit Reason String2022*2023* Input Args:2024* exit_reason - Exit reason2025*2026* Output Args: None2027*2028* Return:2029* Constant string pointer describing the exit reason.2030*2031* Locates and returns a constant string that describes the KVM exit2032* reason given by exit_reason. If no such string is found, a constant2033* string of "Unknown" is returned.2034*/2035const char *exit_reason_str(unsigned int exit_reason)2036{2037unsigned int n1;20382039for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {2040if (exit_reason == exit_reasons_known[n1].reason)2041return exit_reasons_known[n1].name;2042}20432044return "Unknown";2045}20462047/*2048* Physical Contiguous Page Allocator2049*2050* Input Args:2051* vm - Virtual Machine2052* num - number of pages2053* paddr_min - Physical address minimum2054* memslot - Memory region to allocate page from2055* protected - True if the pages will be used as protected/private memory2056*2057* Output Args: None2058*2059* Return:2060* Starting physical address2061*2062* Within the VM specified by vm, locates a range of available physical2063* pages at or above paddr_min. If found, the pages are marked as in use2064* and their base address is returned. A TEST_ASSERT failure occurs if2065* not enough pages are available at or above paddr_min.2066*/2067vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,2068vm_paddr_t paddr_min, uint32_t memslot,2069bool protected)2070{2071struct userspace_mem_region *region;2072sparsebit_idx_t pg, base;20732074TEST_ASSERT(num > 0, "Must allocate at least one page");20752076TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "2077"not divisible by page size.\n"2078" paddr_min: 0x%lx page_size: 0x%x",2079paddr_min, vm->page_size);20802081region = memslot2region(vm, memslot);2082TEST_ASSERT(!protected || region->protected_phy_pages,2083"Region doesn't support protected memory");20842085base = pg = paddr_min >> vm->page_shift;2086do {2087for (; pg < base + num; ++pg) {2088if (!sparsebit_is_set(region->unused_phy_pages, pg)) {2089base = pg = sparsebit_next_set(region->unused_phy_pages, pg);2090break;2091}2092}2093} while (pg && pg != base + num);20942095if (pg == 0) {2096fprintf(stderr, "No guest physical page available, "2097"paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",2098paddr_min, vm->page_size, memslot);2099fputs("---- vm dump ----\n", stderr);2100vm_dump(stderr, vm, 2);2101abort();2102}21032104for (pg = base; pg < base + num; ++pg) {2105sparsebit_clear(region->unused_phy_pages, pg);2106if (protected)2107sparsebit_set(region->protected_phy_pages, pg);2108}21092110return base * vm->page_size;2111}21122113vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,2114uint32_t memslot)2115{2116return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);2117}21182119vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)2120{2121return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR,2122vm->memslots[MEM_REGION_PT]);2123}21242125/*2126* Address Guest Virtual to Host Virtual2127*2128* Input Args:2129* vm - Virtual Machine2130* gva - VM virtual address2131*2132* Output Args: None2133*2134* Return:2135* Equivalent host virtual address2136*/2137void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)2138{2139return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));2140}21412142unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm)2143{2144return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;2145}21462147static unsigned int vm_calc_num_pages(unsigned int num_pages,2148unsigned int page_shift,2149unsigned int new_page_shift,2150bool ceil)2151{2152unsigned int n = 1 << (new_page_shift - page_shift);21532154if (page_shift >= new_page_shift)2155return num_pages * (1 << (page_shift - new_page_shift));21562157return num_pages / n + !!(ceil && num_pages % n);2158}21592160static inline int getpageshift(void)2161{2162return __builtin_ffs(getpagesize()) - 1;2163}21642165unsigned int2166vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)2167{2168return vm_calc_num_pages(num_guest_pages,2169vm_guest_mode_params[mode].page_shift,2170getpageshift(), true);2171}21722173unsigned int2174vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages)2175{2176return vm_calc_num_pages(num_host_pages, getpageshift(),2177vm_guest_mode_params[mode].page_shift, false);2178}21792180unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)2181{2182unsigned int n;2183n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);2184return vm_adjust_num_guest_pages(mode, n);2185}21862187/*2188* Read binary stats descriptors2189*2190* Input Args:2191* stats_fd - the file descriptor for the binary stats file from which to read2192* header - the binary stats metadata header corresponding to the given FD2193*2194* Output Args: None2195*2196* Return:2197* A pointer to a newly allocated series of stat descriptors.2198* Caller is responsible for freeing the returned kvm_stats_desc.2199*2200* Read the stats descriptors from the binary stats interface.2201*/2202struct kvm_stats_desc *read_stats_descriptors(int stats_fd,2203struct kvm_stats_header *header)2204{2205struct kvm_stats_desc *stats_desc;2206ssize_t desc_size, total_size, ret;22072208desc_size = get_stats_descriptor_size(header);2209total_size = header->num_desc * desc_size;22102211stats_desc = calloc(header->num_desc, desc_size);2212TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors");22132214ret = pread(stats_fd, stats_desc, total_size, header->desc_offset);2215TEST_ASSERT(ret == total_size, "Read KVM stats descriptors");22162217return stats_desc;2218}22192220/*2221* Read stat data for a particular stat2222*2223* Input Args:2224* stats_fd - the file descriptor for the binary stats file from which to read2225* header - the binary stats metadata header corresponding to the given FD2226* desc - the binary stat metadata for the particular stat to be read2227* max_elements - the maximum number of 8-byte values to read into data2228*2229* Output Args:2230* data - the buffer into which stat data should be read2231*2232* Read the data values of a specified stat from the binary stats interface.2233*/2234void read_stat_data(int stats_fd, struct kvm_stats_header *header,2235struct kvm_stats_desc *desc, uint64_t *data,2236size_t max_elements)2237{2238size_t nr_elements = min_t(ssize_t, desc->size, max_elements);2239size_t size = nr_elements * sizeof(*data);2240ssize_t ret;22412242TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name);2243TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name);22442245ret = pread(stats_fd, data, size,2246header->data_offset + desc->offset);22472248TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)",2249desc->name, errno, strerror(errno));2250TEST_ASSERT(ret == size,2251"pread() on stat '%s' read %ld bytes, wanted %lu bytes",2252desc->name, size, ret);2253}22542255void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,2256uint64_t *data, size_t max_elements)2257{2258struct kvm_stats_desc *desc;2259size_t size_desc;2260int i;22612262if (!stats->desc) {2263read_stats_header(stats->fd, &stats->header);2264stats->desc = read_stats_descriptors(stats->fd, &stats->header);2265}22662267size_desc = get_stats_descriptor_size(&stats->header);22682269for (i = 0; i < stats->header.num_desc; ++i) {2270desc = (void *)stats->desc + (i * size_desc);22712272if (strcmp(desc->name, name))2273continue;22742275read_stat_data(stats->fd, &stats->header, desc, data, max_elements);2276return;2277}22782279TEST_FAIL("Unable to find stat '%s'", name);2280}22812282__weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)2283{2284}22852286__weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm)2287{2288}22892290__weak void kvm_arch_vm_release(struct kvm_vm *vm)2291{2292}22932294__weak void kvm_selftest_arch_init(void)2295{2296}22972298static void report_unexpected_signal(int signum)2299{2300#define KVM_CASE_SIGNUM(sig) \2301case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum)23022303switch (signum) {2304KVM_CASE_SIGNUM(SIGBUS);2305KVM_CASE_SIGNUM(SIGSEGV);2306KVM_CASE_SIGNUM(SIGILL);2307KVM_CASE_SIGNUM(SIGFPE);2308default:2309TEST_FAIL("Unexpected signal %d\n", signum);2310}2311}23122313void __attribute((constructor)) kvm_selftest_init(void)2314{2315struct sigaction sig_sa = {2316.sa_handler = report_unexpected_signal,2317};23182319/* Tell stdout not to buffer its content. */2320setbuf(stdout, NULL);23212322sigaction(SIGBUS, &sig_sa, NULL);2323sigaction(SIGSEGV, &sig_sa, NULL);2324sigaction(SIGILL, &sig_sa, NULL);2325sigaction(SIGFPE, &sig_sa, NULL);23262327guest_random_seed = last_guest_seed = random();2328pr_info("Random seed: 0x%x\n", guest_random_seed);23292330kvm_selftest_arch_init();2331}23322333bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)2334{2335sparsebit_idx_t pg = 0;2336struct userspace_mem_region *region;23372338if (!vm_arch_has_protected_memory(vm))2339return false;23402341region = userspace_mem_region_find(vm, paddr, paddr);2342TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr);23432344pg = paddr >> vm->page_shift;2345return sparsebit_is_set(region->protected_phy_pages, pg);2346}23472348__weak bool kvm_arch_has_default_irqchip(void)2349{2350return false;2351}235223532354