Path: blob/master/tools/testing/selftests/kvm/kvm_page_table_test.c
38189 views
// SPDX-License-Identifier: GPL-2.01/*2* KVM page table test3*4* Copyright (C) 2021, Huawei, Inc.5*6* Make sure that THP has been enabled or enough HUGETLB pages with specific7* page size have been pre-allocated on your system, if you are planning to8* use hugepages to back the guest memory for testing.9*/10#include <stdio.h>11#include <stdlib.h>12#include <time.h>13#include <pthread.h>14#include <semaphore.h>1516#include "test_util.h"17#include "kvm_util.h"18#include "processor.h"19#include "guest_modes.h"20#include "ucall_common.h"2122#define TEST_MEM_SLOT_INDEX 12324/* Default size(1GB) of the memory for testing */25#define DEFAULT_TEST_MEM_SIZE (1 << 30)2627/* Default guest test virtual memory offset */28#define DEFAULT_GUEST_TEST_MEM 0xc00000002930/* Different guest memory accessing stages */31enum test_stage {32KVM_BEFORE_MAPPINGS,33KVM_CREATE_MAPPINGS,34KVM_UPDATE_MAPPINGS,35KVM_ADJUST_MAPPINGS,36NUM_TEST_STAGES,37};3839static const char * const test_stage_string[] = {40"KVM_BEFORE_MAPPINGS",41"KVM_CREATE_MAPPINGS",42"KVM_UPDATE_MAPPINGS",43"KVM_ADJUST_MAPPINGS",44};4546struct test_args {47struct kvm_vm *vm;48uint64_t guest_test_virt_mem;49uint64_t host_page_size;50uint64_t host_num_pages;51uint64_t large_page_size;52uint64_t large_num_pages;53uint64_t host_pages_per_lpage;54enum vm_mem_backing_src_type src_type;55struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];56};5758/*59* Guest variables. Use addr_gva2hva() if these variables need60* to be changed in host.61*/62static enum test_stage guest_test_stage;6364/* Host variables */65static uint32_t nr_vcpus = 1;66static struct test_args test_args;67static enum test_stage *current_stage;68static bool host_quit;6970/* Whether the test stage is updated, or completed */71static sem_t test_stage_updated;72static sem_t test_stage_completed;7374/*75* Guest physical memory offset of the testing memory slot.76* This will be set to the topmost valid physical address minus77* the test memory size.78*/79static uint64_t guest_test_phys_mem;8081/*82* Guest virtual memory offset of the testing memory slot.83* Must not conflict with identity mapped test code.84*/85static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;8687static void guest_code(bool do_write)88{89struct test_args *p = &test_args;90enum test_stage *current_stage = &guest_test_stage;91uint64_t addr;92int i, j;9394while (true) {95addr = p->guest_test_virt_mem;9697switch (READ_ONCE(*current_stage)) {98/*99* All vCPU threads will be started in this stage,100* where guest code of each vCPU will do nothing.101*/102case KVM_BEFORE_MAPPINGS:103break;104105/*106* Before dirty logging, vCPUs concurrently access the first107* 8 bytes of each page (host page/large page) within the same108* memory region with different accessing types (read/write).109* Then KVM will create normal page mappings or huge block110* mappings for them.111*/112case KVM_CREATE_MAPPINGS:113for (i = 0; i < p->large_num_pages; i++) {114if (do_write)115*(uint64_t *)addr = 0x0123456789ABCDEF;116else117READ_ONCE(*(uint64_t *)addr);118119addr += p->large_page_size;120}121break;122123/*124* During dirty logging, KVM will only update attributes of the125* normal page mappings from RO to RW if memory backing src type126* is anonymous. In other cases, KVM will split the huge block127* mappings into normal page mappings if memory backing src type128* is THP or HUGETLB.129*/130case KVM_UPDATE_MAPPINGS:131if (p->src_type == VM_MEM_SRC_ANONYMOUS) {132for (i = 0; i < p->host_num_pages; i++) {133*(uint64_t *)addr = 0x0123456789ABCDEF;134addr += p->host_page_size;135}136break;137}138139for (i = 0; i < p->large_num_pages; i++) {140/*141* Write to the first host page in each large142* page region, and triger break of large pages.143*/144*(uint64_t *)addr = 0x0123456789ABCDEF;145146/*147* Access the middle host pages in each large148* page region. Since dirty logging is enabled,149* this will create new mappings at the smallest150* granularity.151*/152addr += p->large_page_size / 2;153for (j = 0; j < p->host_pages_per_lpage / 2; j++) {154READ_ONCE(*(uint64_t *)addr);155addr += p->host_page_size;156}157}158break;159160/*161* After dirty logging is stopped, vCPUs concurrently read162* from every single host page. Then KVM will coalesce the163* split page mappings back to block mappings. And a TLB164* conflict abort could occur here if TLB entries of the165* page mappings are not fully invalidated.166*/167case KVM_ADJUST_MAPPINGS:168for (i = 0; i < p->host_num_pages; i++) {169READ_ONCE(*(uint64_t *)addr);170addr += p->host_page_size;171}172break;173174default:175GUEST_ASSERT(0);176}177178GUEST_SYNC(1);179}180}181182static void *vcpu_worker(void *data)183{184struct kvm_vcpu *vcpu = data;185bool do_write = !(vcpu->id % 2);186struct timespec start;187struct timespec ts_diff;188enum test_stage stage;189int ret;190191vcpu_args_set(vcpu, 1, do_write);192193while (!READ_ONCE(host_quit)) {194ret = sem_wait(&test_stage_updated);195TEST_ASSERT(ret == 0, "Error in sem_wait");196197if (READ_ONCE(host_quit))198return NULL;199200clock_gettime(CLOCK_MONOTONIC, &start);201ret = _vcpu_run(vcpu);202ts_diff = timespec_elapsed(start);203204TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);205TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,206"Invalid guest sync status: exit_reason=%s",207exit_reason_str(vcpu->run->exit_reason));208209pr_debug("Got sync event from vCPU %d\n", vcpu->id);210stage = READ_ONCE(*current_stage);211212/*213* Here we can know the execution time of every214* single vcpu running in different test stages.215*/216pr_debug("vCPU %d has completed stage %s\n"217"execution time is: %ld.%.9lds\n\n",218vcpu->id, test_stage_string[stage],219ts_diff.tv_sec, ts_diff.tv_nsec);220221ret = sem_post(&test_stage_completed);222TEST_ASSERT(ret == 0, "Error in sem_post");223}224225return NULL;226}227228struct test_params {229uint64_t phys_offset;230uint64_t test_mem_size;231enum vm_mem_backing_src_type src_type;232};233234static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)235{236int ret;237struct test_params *p = arg;238enum vm_mem_backing_src_type src_type = p->src_type;239uint64_t large_page_size = get_backing_src_pagesz(src_type);240uint64_t guest_page_size = vm_guest_mode_params[mode].page_size;241uint64_t host_page_size = getpagesize();242uint64_t test_mem_size = p->test_mem_size;243uint64_t guest_num_pages;244uint64_t alignment;245void *host_test_mem;246struct kvm_vm *vm;247248/* Align up the test memory size */249alignment = max(large_page_size, guest_page_size);250test_mem_size = (test_mem_size + alignment - 1) & ~(alignment - 1);251252/* Create a VM with enough guest pages */253guest_num_pages = test_mem_size / guest_page_size;254vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus, guest_num_pages,255guest_code, test_args.vcpus);256257/* Align down GPA of the testing memslot */258if (!p->phys_offset)259guest_test_phys_mem = (vm->max_gfn - guest_num_pages) *260guest_page_size;261else262guest_test_phys_mem = p->phys_offset;263#ifdef __s390x__264alignment = max(0x100000UL, alignment);265#endif266guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);267268/* Set up the shared data structure test_args */269test_args.vm = vm;270test_args.guest_test_virt_mem = guest_test_virt_mem;271test_args.host_page_size = host_page_size;272test_args.host_num_pages = test_mem_size / host_page_size;273test_args.large_page_size = large_page_size;274test_args.large_num_pages = test_mem_size / large_page_size;275test_args.host_pages_per_lpage = large_page_size / host_page_size;276test_args.src_type = src_type;277278/* Add an extra memory slot with specified backing src type */279vm_userspace_mem_region_add(vm, src_type, guest_test_phys_mem,280TEST_MEM_SLOT_INDEX, guest_num_pages, 0);281282/* Do mapping(GVA->GPA) for the testing memory slot */283virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);284285/* Cache the HVA pointer of the region */286host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);287288/* Export shared structure test_args to guest */289sync_global_to_guest(vm, test_args);290291ret = sem_init(&test_stage_updated, 0, 0);292TEST_ASSERT(ret == 0, "Error in sem_init");293294ret = sem_init(&test_stage_completed, 0, 0);295TEST_ASSERT(ret == 0, "Error in sem_init");296297current_stage = addr_gva2hva(vm, (vm_vaddr_t)(&guest_test_stage));298*current_stage = NUM_TEST_STAGES;299300pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));301pr_info("Testing memory backing src type: %s\n",302vm_mem_backing_src_alias(src_type)->name);303pr_info("Testing memory backing src granularity: 0x%lx\n",304large_page_size);305pr_info("Testing memory size(aligned): 0x%lx\n", test_mem_size);306pr_info("Guest physical test memory offset: 0x%lx\n",307guest_test_phys_mem);308pr_info("Host virtual test memory offset: 0x%lx\n",309(uint64_t)host_test_mem);310pr_info("Number of testing vCPUs: %d\n", nr_vcpus);311312return vm;313}314315static void vcpus_complete_new_stage(enum test_stage stage)316{317int ret;318int vcpus;319320/* Wake up all the vcpus to run new test stage */321for (vcpus = 0; vcpus < nr_vcpus; vcpus++) {322ret = sem_post(&test_stage_updated);323TEST_ASSERT(ret == 0, "Error in sem_post");324}325pr_debug("All vcpus have been notified to continue\n");326327/* Wait for all the vcpus to complete new test stage */328for (vcpus = 0; vcpus < nr_vcpus; vcpus++) {329ret = sem_wait(&test_stage_completed);330TEST_ASSERT(ret == 0, "Error in sem_wait");331332pr_debug("%d vcpus have completed stage %s\n",333vcpus + 1, test_stage_string[stage]);334}335336pr_debug("All vcpus have completed stage %s\n",337test_stage_string[stage]);338}339340static void run_test(enum vm_guest_mode mode, void *arg)341{342pthread_t *vcpu_threads;343struct kvm_vm *vm;344struct timespec start;345struct timespec ts_diff;346int ret, i;347348/* Create VM with vCPUs and make some pre-initialization */349vm = pre_init_before_test(mode, arg);350351vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));352TEST_ASSERT(vcpu_threads, "Memory allocation failed");353354host_quit = false;355*current_stage = KVM_BEFORE_MAPPINGS;356357for (i = 0; i < nr_vcpus; i++)358pthread_create(&vcpu_threads[i], NULL, vcpu_worker,359test_args.vcpus[i]);360361vcpus_complete_new_stage(*current_stage);362pr_info("Started all vCPUs successfully\n");363364/* Test the stage of KVM creating mappings */365*current_stage = KVM_CREATE_MAPPINGS;366367clock_gettime(CLOCK_MONOTONIC, &start);368vcpus_complete_new_stage(*current_stage);369ts_diff = timespec_elapsed(start);370371pr_info("KVM_CREATE_MAPPINGS: total execution time: %ld.%.9lds\n\n",372ts_diff.tv_sec, ts_diff.tv_nsec);373374/* Test the stage of KVM updating mappings */375vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX,376KVM_MEM_LOG_DIRTY_PAGES);377378*current_stage = KVM_UPDATE_MAPPINGS;379380clock_gettime(CLOCK_MONOTONIC, &start);381vcpus_complete_new_stage(*current_stage);382ts_diff = timespec_elapsed(start);383384pr_info("KVM_UPDATE_MAPPINGS: total execution time: %ld.%.9lds\n\n",385ts_diff.tv_sec, ts_diff.tv_nsec);386387/* Test the stage of KVM adjusting mappings */388vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0);389390*current_stage = KVM_ADJUST_MAPPINGS;391392clock_gettime(CLOCK_MONOTONIC, &start);393vcpus_complete_new_stage(*current_stage);394ts_diff = timespec_elapsed(start);395396pr_info("KVM_ADJUST_MAPPINGS: total execution time: %ld.%.9lds\n\n",397ts_diff.tv_sec, ts_diff.tv_nsec);398399/* Tell the vcpu thread to quit */400host_quit = true;401for (i = 0; i < nr_vcpus; i++) {402ret = sem_post(&test_stage_updated);403TEST_ASSERT(ret == 0, "Error in sem_post");404}405406for (i = 0; i < nr_vcpus; i++)407pthread_join(vcpu_threads[i], NULL);408409ret = sem_destroy(&test_stage_updated);410TEST_ASSERT(ret == 0, "Error in sem_destroy");411412ret = sem_destroy(&test_stage_completed);413TEST_ASSERT(ret == 0, "Error in sem_destroy");414415free(vcpu_threads);416kvm_vm_free(vm);417}418419static void help(char *name)420{421puts("");422printf("usage: %s [-h] [-p offset] [-m mode] "423"[-b mem-size] [-v vcpus] [-s mem-type]\n", name);424puts("");425printf(" -p: specify guest physical test memory offset\n"426" Warning: a low offset can conflict with the loaded test code.\n");427guest_modes_help();428printf(" -b: specify size of the memory region for testing. e.g. 10M or 3G.\n"429" (default: 1G)\n");430printf(" -v: specify the number of vCPUs to run\n"431" (default: 1)\n");432backing_src_help("-s");433puts("");434}435436int main(int argc, char *argv[])437{438int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);439struct test_params p = {440.test_mem_size = DEFAULT_TEST_MEM_SIZE,441.src_type = DEFAULT_VM_MEM_SRC,442};443int opt;444445guest_modes_append_default();446447while ((opt = getopt(argc, argv, "hp:m:b:v:s:")) != -1) {448switch (opt) {449case 'p':450p.phys_offset = strtoull(optarg, NULL, 0);451break;452case 'm':453guest_modes_cmdline(optarg);454break;455case 'b':456p.test_mem_size = parse_size(optarg);457break;458case 'v':459nr_vcpus = atoi_positive("Number of vCPUs", optarg);460TEST_ASSERT(nr_vcpus <= max_vcpus,461"Invalid number of vcpus, must be between 1 and %d", max_vcpus);462break;463case 's':464p.src_type = parse_backing_src_type(optarg);465break;466case 'h':467default:468help(argv[0]);469exit(0);470}471}472473for_each_guest_mode(run_test, &p);474475return 0;476}477478479