Path: blob/master/tools/testing/selftests/kvm/arm64/page_fault_test.c
38237 views
// SPDX-License-Identifier: GPL-2.01/*2* page_fault_test.c - Test stage 2 faults.3*4* This test tries different combinations of guest accesses (e.g., write,5* S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on6* hugetlbfs with a hole). It checks that the expected handling method is7* called (e.g., uffd faults with the right address and write/read flag).8*/9#include <linux/bitmap.h>10#include <fcntl.h>11#include <test_util.h>12#include <kvm_util.h>13#include <processor.h>14#include <asm/sysreg.h>15#include <linux/bitfield.h>16#include "guest_modes.h"17#include "userfaultfd_util.h"1819/* Guest virtual addresses that point to the test page and its PTE. */20#define TEST_GVA 0xc000000021#define TEST_EXEC_GVA (TEST_GVA + 0x8)22#define TEST_PTE_GVA 0xb000000023#define TEST_DATA 0x0123456789ABCDEF2425static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;2627#define CMD_NONE (0)28#define CMD_SKIP_TEST (1ULL << 1)29#define CMD_HOLE_PT (1ULL << 2)30#define CMD_HOLE_DATA (1ULL << 3)31#define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4)32#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5)33#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6)34#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7)35#define CMD_SET_PTE_AF (1ULL << 8)3637#define PREPARE_FN_NR 1038#define CHECK_FN_NR 103940static struct event_cnt {41int mmio_exits;42int fail_vcpu_runs;43int uffd_faults;44/* uffd_faults is incremented from multiple threads. */45pthread_mutex_t uffd_faults_mutex;46} events;4748struct test_desc {49const char *name;50uint64_t mem_mark_cmd;51/* Skip the test if any prepare function returns false */52bool (*guest_prepare[PREPARE_FN_NR])(void);53void (*guest_test)(void);54void (*guest_test_check[CHECK_FN_NR])(void);55uffd_handler_t uffd_pt_handler;56uffd_handler_t uffd_data_handler;57void (*dabt_handler)(struct ex_regs *regs);58void (*iabt_handler)(struct ex_regs *regs);59void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);60void (*fail_vcpu_run_handler)(int ret);61uint32_t pt_memslot_flags;62uint32_t data_memslot_flags;63bool skip;64struct event_cnt expected_events;65};6667struct test_params {68enum vm_mem_backing_src_type src_type;69struct test_desc *test_desc;70};7172static inline void flush_tlb_page(uint64_t vaddr)73{74uint64_t page = vaddr >> 12;7576dsb(ishst);77asm volatile("tlbi vaae1is, %0" :: "r" (page));78dsb(ish);79isb();80}8182static void guest_write64(void)83{84uint64_t val;8586WRITE_ONCE(*guest_test_memory, TEST_DATA);87val = READ_ONCE(*guest_test_memory);88GUEST_ASSERT_EQ(val, TEST_DATA);89}9091/* Check the system for atomic instructions. */92static bool guest_check_lse(void)93{94uint64_t isar0 = read_sysreg(id_aa64isar0_el1);95uint64_t atomic;9697atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0);98return atomic >= 2;99}100101static bool guest_check_dc_zva(void)102{103uint64_t dczid = read_sysreg(dczid_el0);104uint64_t dzp = FIELD_GET(DCZID_EL0_DZP, dczid);105106return dzp == 0;107}108109/* Compare and swap instruction. */110static void guest_cas(void)111{112uint64_t val;113114GUEST_ASSERT(guest_check_lse());115asm volatile(".arch_extension lse\n"116"casal %0, %1, [%2]\n"117:: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));118val = READ_ONCE(*guest_test_memory);119GUEST_ASSERT_EQ(val, TEST_DATA);120}121122static void guest_read64(void)123{124uint64_t val;125126val = READ_ONCE(*guest_test_memory);127GUEST_ASSERT_EQ(val, 0);128}129130/* Address translation instruction */131static void guest_at(void)132{133uint64_t par;134135asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));136isb();137par = read_sysreg(par_el1);138139/* Bit 1 indicates whether the AT was successful */140GUEST_ASSERT_EQ(par & 1, 0);141}142143/*144* The size of the block written by "dc zva" is guaranteed to be between (2 <<145* 0) and (2 << 9), which is safe in our case as we need the write to happen146* for at least a word, and not more than a page.147*/148static void guest_dc_zva(void)149{150uint16_t val;151152asm volatile("dc zva, %0" :: "r" (guest_test_memory));153dsb(ish);154val = READ_ONCE(*guest_test_memory);155GUEST_ASSERT_EQ(val, 0);156}157158/*159* Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0).160* And that's special because KVM must take special care with those: they161* should still count as accesses for dirty logging or user-faulting, but162* should be handled differently on mmio.163*/164static void guest_ld_preidx(void)165{166uint64_t val;167uint64_t addr = TEST_GVA - 8;168169/*170* This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is171* in a gap between memslots not backing by anything.172*/173asm volatile("ldr %0, [%1, #8]!"174: "=r" (val), "+r" (addr));175GUEST_ASSERT_EQ(val, 0);176GUEST_ASSERT_EQ(addr, TEST_GVA);177}178179static void guest_st_preidx(void)180{181uint64_t val = TEST_DATA;182uint64_t addr = TEST_GVA - 8;183184asm volatile("str %0, [%1, #8]!"185: "+r" (val), "+r" (addr));186187GUEST_ASSERT_EQ(addr, TEST_GVA);188val = READ_ONCE(*guest_test_memory);189}190191static bool guest_set_ha(void)192{193uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);194uint64_t hadbs, tcr;195196/* Skip if HA is not supported. */197hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1);198if (hadbs == 0)199return false;200201tcr = read_sysreg(tcr_el1) | TCR_HA;202write_sysreg(tcr, tcr_el1);203isb();204205return true;206}207208static bool guest_clear_pte_af(void)209{210*((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;211flush_tlb_page(TEST_GVA);212213return true;214}215216static void guest_check_pte_af(void)217{218dsb(ish);219GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);220}221222static void guest_check_write_in_dirty_log(void)223{224GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG);225}226227static void guest_check_no_write_in_dirty_log(void)228{229GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG);230}231232static void guest_check_s1ptw_wr_in_dirty_log(void)233{234GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);235}236237static void guest_check_no_s1ptw_wr_in_dirty_log(void)238{239GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG);240}241242static void guest_exec(void)243{244int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;245int ret;246247ret = code();248GUEST_ASSERT_EQ(ret, 0x77);249}250251static bool guest_prepare(struct test_desc *test)252{253bool (*prepare_fn)(void);254int i;255256for (i = 0; i < PREPARE_FN_NR; i++) {257prepare_fn = test->guest_prepare[i];258if (prepare_fn && !prepare_fn())259return false;260}261262return true;263}264265static void guest_test_check(struct test_desc *test)266{267void (*check_fn)(void);268int i;269270for (i = 0; i < CHECK_FN_NR; i++) {271check_fn = test->guest_test_check[i];272if (check_fn)273check_fn();274}275}276277static void guest_code(struct test_desc *test)278{279if (!guest_prepare(test))280GUEST_SYNC(CMD_SKIP_TEST);281282GUEST_SYNC(test->mem_mark_cmd);283284if (test->guest_test)285test->guest_test();286287guest_test_check(test);288GUEST_DONE();289}290291static void no_dabt_handler(struct ex_regs *regs)292{293GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1));294}295296static void no_iabt_handler(struct ex_regs *regs)297{298GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);299}300301static struct uffd_args {302char *copy;303void *hva;304uint64_t paging_size;305} pt_args, data_args;306307/* Returns true to continue the test, and false if it should be skipped. */308static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,309struct uffd_args *args)310{311uint64_t addr = msg->arg.pagefault.address;312uint64_t flags = msg->arg.pagefault.flags;313struct uffdio_copy copy;314int ret;315316TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,317"The only expected UFFD mode is MISSING");318TEST_ASSERT_EQ(addr, (uint64_t)args->hva);319320pr_debug("uffd fault: addr=%p write=%d\n",321(void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));322323copy.src = (uint64_t)args->copy;324copy.dst = addr;325copy.len = args->paging_size;326copy.mode = 0;327328ret = ioctl(uffd, UFFDIO_COPY, ©);329if (ret == -1) {330pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n",331addr, errno);332return ret;333}334335pthread_mutex_lock(&events.uffd_faults_mutex);336events.uffd_faults += 1;337pthread_mutex_unlock(&events.uffd_faults_mutex);338return 0;339}340341static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg)342{343return uffd_generic_handler(mode, uffd, msg, &pt_args);344}345346static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg)347{348return uffd_generic_handler(mode, uffd, msg, &data_args);349}350351static void setup_uffd_args(struct userspace_mem_region *region,352struct uffd_args *args)353{354args->hva = (void *)region->region.userspace_addr;355args->paging_size = region->region.memory_size;356357args->copy = malloc(args->paging_size);358TEST_ASSERT(args->copy, "Failed to allocate data copy.");359memcpy(args->copy, args->hva, args->paging_size);360}361362static void setup_uffd(struct kvm_vm *vm, struct test_params *p,363struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd)364{365struct test_desc *test = p->test_desc;366int uffd_mode = UFFDIO_REGISTER_MODE_MISSING;367368setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args);369setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args);370371*pt_uffd = NULL;372if (test->uffd_pt_handler)373*pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,374pt_args.hva,375pt_args.paging_size,3761, test->uffd_pt_handler);377378*data_uffd = NULL;379if (test->uffd_data_handler)380*data_uffd = uffd_setup_demand_paging(uffd_mode, 0,381data_args.hva,382data_args.paging_size,3831, test->uffd_data_handler);384}385386static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,387struct uffd_desc *data_uffd)388{389if (test->uffd_pt_handler)390uffd_stop_demand_paging(pt_uffd);391if (test->uffd_data_handler)392uffd_stop_demand_paging(data_uffd);393394free(pt_args.copy);395free(data_args.copy);396}397398static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg)399{400TEST_FAIL("There was no UFFD fault expected.");401return -1;402}403404/* Returns false if the test should be skipped. */405static bool punch_hole_in_backing_store(struct kvm_vm *vm,406struct userspace_mem_region *region)407{408void *hva = (void *)region->region.userspace_addr;409uint64_t paging_size = region->region.memory_size;410int ret, fd = region->fd;411412if (fd != -1) {413ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,4140, paging_size);415TEST_ASSERT(ret == 0, "fallocate failed");416} else {417ret = madvise(hva, paging_size, MADV_DONTNEED);418TEST_ASSERT(ret == 0, "madvise failed");419}420421return true;422}423424static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)425{426struct userspace_mem_region *region;427void *hva;428429region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);430hva = (void *)region->region.userspace_addr;431432TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);433434memcpy(hva, run->mmio.data, run->mmio.len);435events.mmio_exits += 1;436}437438static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)439{440uint64_t data;441442memcpy(&data, run->mmio.data, sizeof(data));443pr_debug("addr=%lld len=%d w=%d data=%lx\n",444run->mmio.phys_addr, run->mmio.len,445run->mmio.is_write, data);446TEST_FAIL("There was no MMIO exit expected.");447}448449static bool check_write_in_dirty_log(struct kvm_vm *vm,450struct userspace_mem_region *region,451uint64_t host_pg_nr)452{453unsigned long *bmap;454bool first_page_dirty;455uint64_t size = region->region.memory_size;456457/* getpage_size() is not always equal to vm->page_size */458bmap = bitmap_zalloc(size / getpagesize());459kvm_vm_get_dirty_log(vm, region->region.slot, bmap);460first_page_dirty = test_bit(host_pg_nr, bmap);461free(bmap);462return first_page_dirty;463}464465/* Returns true to continue the test, and false if it should be skipped. */466static bool handle_cmd(struct kvm_vm *vm, int cmd)467{468struct userspace_mem_region *data_region, *pt_region;469bool continue_test = true;470uint64_t pte_gpa, pte_pg;471472data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);473pt_region = vm_get_mem_region(vm, MEM_REGION_PT);474pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));475pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize();476477if (cmd == CMD_SKIP_TEST)478continue_test = false;479480if (cmd & CMD_HOLE_PT)481continue_test = punch_hole_in_backing_store(vm, pt_region);482if (cmd & CMD_HOLE_DATA)483continue_test = punch_hole_in_backing_store(vm, data_region);484if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)485TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),486"Missing write in dirty log");487if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)488TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg),489"Missing s1ptw write in dirty log");490if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)491TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),492"Unexpected write in dirty log");493if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)494TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg),495"Unexpected s1ptw write in dirty log");496497return continue_test;498}499500void fail_vcpu_run_no_handler(int ret)501{502TEST_FAIL("Unexpected vcpu run failure");503}504505void fail_vcpu_run_mmio_no_syndrome_handler(int ret)506{507TEST_ASSERT(errno == ENOSYS,508"The mmio handler should have returned not implemented.");509events.fail_vcpu_runs += 1;510}511512typedef uint32_t aarch64_insn_t;513extern aarch64_insn_t __exec_test[2];514515noinline void __return_0x77(void)516{517asm volatile("__exec_test: mov x0, #0x77\n"518"ret\n");519}520521/*522* Note that this function runs on the host before the test VM starts: there's523* no need to sync the D$ and I$ caches.524*/525static void load_exec_code_for_test(struct kvm_vm *vm)526{527uint64_t *code;528struct userspace_mem_region *region;529void *hva;530531region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);532hva = (void *)region->region.userspace_addr;533534assert(TEST_EXEC_GVA > TEST_GVA);535code = hva + TEST_EXEC_GVA - TEST_GVA;536memcpy(code, __exec_test, sizeof(__exec_test));537}538539static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,540struct test_desc *test)541{542vm_init_descriptor_tables(vm);543vcpu_init_descriptor_tables(vcpu);544545vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,546ESR_ELx_EC_DABT_CUR, no_dabt_handler);547vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,548ESR_ELx_EC_IABT_CUR, no_iabt_handler);549}550551static void setup_gva_maps(struct kvm_vm *vm)552{553struct userspace_mem_region *region;554uint64_t pte_gpa;555556region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);557/* Map TEST_GVA first. This will install a new PTE. */558virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr);559/* Then map TEST_PTE_GVA to the above PTE. */560pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));561virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);562}563564enum pf_test_memslots {565CODE_AND_DATA_MEMSLOT,566PAGE_TABLE_MEMSLOT,567TEST_DATA_MEMSLOT,568};569570/*571* Create a memslot for code and data at pfn=0, and test-data and PT ones572* at max_gfn.573*/574static void setup_memslots(struct kvm_vm *vm, struct test_params *p)575{576uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);577uint64_t guest_page_size = vm->page_size;578uint64_t max_gfn = vm_compute_max_gfn(vm);579/* Enough for 2M of code when using 4K guest pages. */580uint64_t code_npages = 512;581uint64_t pt_size, data_size, data_gpa;582583/*584* This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using585* VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13586* pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use587* twice that just in case.588*/589pt_size = 26 * guest_page_size;590591/* memslot sizes and gpa's must be aligned to the backing page size */592pt_size = align_up(pt_size, backing_src_pagesz);593data_size = align_up(guest_page_size, backing_src_pagesz);594data_gpa = (max_gfn * guest_page_size) - data_size;595data_gpa = align_down(data_gpa, backing_src_pagesz);596597vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0,598CODE_AND_DATA_MEMSLOT, code_npages, 0);599vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT;600vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT;601602vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size,603PAGE_TABLE_MEMSLOT, pt_size / guest_page_size,604p->test_desc->pt_memslot_flags);605vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT;606607vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT,608data_size / guest_page_size,609p->test_desc->data_memslot_flags);610vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;611}612613static void setup_ucall(struct kvm_vm *vm)614{615struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);616617ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);618}619620static void setup_default_handlers(struct test_desc *test)621{622if (!test->mmio_handler)623test->mmio_handler = mmio_no_handler;624625if (!test->fail_vcpu_run_handler)626test->fail_vcpu_run_handler = fail_vcpu_run_no_handler;627}628629static void check_event_counts(struct test_desc *test)630{631TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);632TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);633TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);634}635636static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)637{638struct test_desc *test = p->test_desc;639640pr_debug("Test: %s\n", test->name);641pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));642pr_debug("Testing memory backing src type: %s\n",643vm_mem_backing_src_alias(p->src_type)->name);644}645646static void reset_event_counts(void)647{648memset(&events, 0, sizeof(events));649}650651/*652* This function either succeeds, skips the test (after setting test->skip), or653* fails with a TEST_FAIL that aborts all tests.654*/655static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,656struct test_desc *test)657{658struct kvm_run *run;659struct ucall uc;660int ret;661662run = vcpu->run;663664for (;;) {665ret = _vcpu_run(vcpu);666if (ret) {667test->fail_vcpu_run_handler(ret);668goto done;669}670671switch (get_ucall(vcpu, &uc)) {672case UCALL_SYNC:673if (!handle_cmd(vm, uc.args[1])) {674test->skip = true;675goto done;676}677break;678case UCALL_ABORT:679REPORT_GUEST_ASSERT(uc);680break;681case UCALL_DONE:682goto done;683case UCALL_NONE:684if (run->exit_reason == KVM_EXIT_MMIO)685test->mmio_handler(vm, run);686break;687default:688TEST_FAIL("Unknown ucall %lu", uc.cmd);689}690}691692done:693pr_debug(test->skip ? "Skipped.\n" : "Done.\n");694}695696static void run_test(enum vm_guest_mode mode, void *arg)697{698struct test_params *p = (struct test_params *)arg;699struct test_desc *test = p->test_desc;700struct kvm_vm *vm;701struct kvm_vcpu *vcpu;702struct uffd_desc *pt_uffd, *data_uffd;703704print_test_banner(mode, p);705706vm = ____vm_create(VM_SHAPE(mode));707setup_memslots(vm, p);708kvm_vm_elf_load(vm, program_invocation_name);709setup_ucall(vm);710vcpu = vm_vcpu_add(vm, 0, guest_code);711712setup_gva_maps(vm);713714reset_event_counts();715716/*717* Set some code in the data memslot for the guest to execute (only718* applicable to the EXEC tests). This has to be done before719* setup_uffd() as that function copies the memslot data for the uffd720* handler.721*/722load_exec_code_for_test(vm);723setup_uffd(vm, p, &pt_uffd, &data_uffd);724setup_abort_handlers(vm, vcpu, test);725setup_default_handlers(test);726vcpu_args_set(vcpu, 1, test);727728vcpu_run_loop(vm, vcpu, test);729730kvm_vm_free(vm);731free_uffd(test, pt_uffd, data_uffd);732733/*734* Make sure we check the events after the uffd threads have exited,735* which means they updated their respective event counters.736*/737if (!test->skip)738check_event_counts(test);739}740741static void help(char *name)742{743puts("");744printf("usage: %s [-h] [-s mem-type]\n", name);745puts("");746guest_modes_help();747backing_src_help("-s");748puts("");749}750751#define SNAME(s) #s752#define SCAT2(a, b) SNAME(a ## _ ## b)753#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c))754#define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d))755756#define _CHECK(_test) _CHECK_##_test757#define _PREPARE(_test) _PREPARE_##_test758#define _PREPARE_guest_read64 NULL759#define _PREPARE_guest_ld_preidx NULL760#define _PREPARE_guest_write64 NULL761#define _PREPARE_guest_st_preidx NULL762#define _PREPARE_guest_exec NULL763#define _PREPARE_guest_at NULL764#define _PREPARE_guest_dc_zva guest_check_dc_zva765#define _PREPARE_guest_cas guest_check_lse766767/* With or without access flag checks */768#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af769#define _PREPARE_no_af NULL770#define _CHECK_with_af guest_check_pte_af771#define _CHECK_no_af NULL772773/* Performs an access and checks that no faults were triggered. */774#define TEST_ACCESS(_access, _with_af, _mark_cmd) \775{ \776.name = SCAT3(_access, _with_af, #_mark_cmd), \777.guest_prepare = { _PREPARE(_with_af), \778_PREPARE(_access) }, \779.mem_mark_cmd = _mark_cmd, \780.guest_test = _access, \781.guest_test_check = { _CHECK(_with_af) }, \782.expected_events = { 0 }, \783}784785#define TEST_UFFD(_access, _with_af, _mark_cmd, \786_uffd_data_handler, _uffd_pt_handler, _uffd_faults) \787{ \788.name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \789.guest_prepare = { _PREPARE(_with_af), \790_PREPARE(_access) }, \791.guest_test = _access, \792.mem_mark_cmd = _mark_cmd, \793.guest_test_check = { _CHECK(_with_af) }, \794.uffd_data_handler = _uffd_data_handler, \795.uffd_pt_handler = _uffd_pt_handler, \796.expected_events = { .uffd_faults = _uffd_faults, }, \797}798799#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check) \800{ \801.name = SCAT3(dirty_log, _access, _with_af), \802.data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \803.pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \804.guest_prepare = { _PREPARE(_with_af), \805_PREPARE(_access) }, \806.guest_test = _access, \807.guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \808.expected_events = { 0 }, \809}810811#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \812_uffd_faults, _test_check, _pt_check) \813{ \814.name = SCAT3(uffd_and_dirty_log, _access, _with_af), \815.data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \816.pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \817.guest_prepare = { _PREPARE(_with_af), \818_PREPARE(_access) }, \819.guest_test = _access, \820.mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \821.guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \822.uffd_data_handler = _uffd_data_handler, \823.uffd_pt_handler = uffd_pt_handler, \824.expected_events = { .uffd_faults = _uffd_faults, }, \825}826827#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \828{ \829.name = SCAT2(ro_memslot, _access), \830.data_memslot_flags = KVM_MEM_READONLY, \831.pt_memslot_flags = KVM_MEM_READONLY, \832.guest_prepare = { _PREPARE(_access) }, \833.guest_test = _access, \834.mmio_handler = _mmio_handler, \835.expected_events = { .mmio_exits = _mmio_exits }, \836}837838#define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \839{ \840.name = SCAT2(ro_memslot_no_syndrome, _access), \841.data_memslot_flags = KVM_MEM_READONLY, \842.pt_memslot_flags = KVM_MEM_READONLY, \843.guest_prepare = { _PREPARE(_access) }, \844.guest_test = _access, \845.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \846.expected_events = { .fail_vcpu_runs = 1 }, \847}848849#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \850_test_check) \851{ \852.name = SCAT2(ro_memslot, _access), \853.data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \854.pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \855.guest_prepare = { _PREPARE(_access) }, \856.guest_test = _access, \857.guest_test_check = { _test_check }, \858.mmio_handler = _mmio_handler, \859.expected_events = { .mmio_exits = _mmio_exits}, \860}861862#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \863{ \864.name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \865.data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \866.pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \867.guest_prepare = { _PREPARE(_access) }, \868.guest_test = _access, \869.guest_test_check = { _test_check }, \870.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \871.expected_events = { .fail_vcpu_runs = 1 }, \872}873874#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \875_uffd_data_handler, _uffd_faults) \876{ \877.name = SCAT2(ro_memslot_uffd, _access), \878.data_memslot_flags = KVM_MEM_READONLY, \879.pt_memslot_flags = KVM_MEM_READONLY, \880.mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \881.guest_prepare = { _PREPARE(_access) }, \882.guest_test = _access, \883.uffd_data_handler = _uffd_data_handler, \884.uffd_pt_handler = uffd_pt_handler, \885.mmio_handler = _mmio_handler, \886.expected_events = { .mmio_exits = _mmio_exits, \887.uffd_faults = _uffd_faults }, \888}889890#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \891_uffd_faults) \892{ \893.name = SCAT2(ro_memslot_no_syndrome, _access), \894.data_memslot_flags = KVM_MEM_READONLY, \895.pt_memslot_flags = KVM_MEM_READONLY, \896.mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \897.guest_prepare = { _PREPARE(_access) }, \898.guest_test = _access, \899.uffd_data_handler = _uffd_data_handler, \900.uffd_pt_handler = uffd_pt_handler, \901.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \902.expected_events = { .fail_vcpu_runs = 1, \903.uffd_faults = _uffd_faults }, \904}905906static struct test_desc tests[] = {907908/* Check that HW is setting the Access Flag (AF) (sanity checks). */909TEST_ACCESS(guest_read64, with_af, CMD_NONE),910TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),911TEST_ACCESS(guest_cas, with_af, CMD_NONE),912TEST_ACCESS(guest_write64, with_af, CMD_NONE),913TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),914TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),915TEST_ACCESS(guest_exec, with_af, CMD_NONE),916917/*918* Punch a hole in the data backing store, and then try multiple919* accesses: reads should rturn zeroes, and writes should920* re-populate the page. Moreover, the test also check that no921* exception was generated in the guest. Note that this922* reading/writing behavior is the same as reading/writing a923* punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from924* userspace.925*/926TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),927TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),928TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),929TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),930TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),931TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),932TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),933934/*935* Punch holes in the data and PT backing stores and mark them for936* userfaultfd handling. This should result in 2 faults: the access937* on the data backing store, and its respective S1 page table walk938* (S1PTW).939*/940TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,941uffd_data_handler, uffd_pt_handler, 2),942TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,943uffd_data_handler, uffd_pt_handler, 2),944TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,945uffd_data_handler, uffd_pt_handler, 2),946/*947* Can't test guest_at with_af as it's IMPDEF whether the AF is set.948* The S1PTW fault should still be marked as a write.949*/950TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,951uffd_no_handler, uffd_pt_handler, 1),952TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,953uffd_data_handler, uffd_pt_handler, 2),954TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,955uffd_data_handler, uffd_pt_handler, 2),956TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,957uffd_data_handler, uffd_pt_handler, 2),958TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,959uffd_data_handler, uffd_pt_handler, 2),960TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,961uffd_data_handler, uffd_pt_handler, 2),962963/*964* Try accesses when the data and PT memory regions are both965* tracked for dirty logging.966*/967TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,968guest_check_s1ptw_wr_in_dirty_log),969TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,970guest_check_no_s1ptw_wr_in_dirty_log),971TEST_DIRTY_LOG(guest_ld_preidx, with_af,972guest_check_no_write_in_dirty_log,973guest_check_s1ptw_wr_in_dirty_log),974TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,975guest_check_no_s1ptw_wr_in_dirty_log),976TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,977guest_check_s1ptw_wr_in_dirty_log),978TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,979guest_check_s1ptw_wr_in_dirty_log),980TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,981guest_check_s1ptw_wr_in_dirty_log),982TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,983guest_check_s1ptw_wr_in_dirty_log),984TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,985guest_check_s1ptw_wr_in_dirty_log),986987/*988* Access when the data and PT memory regions are both marked for989* dirty logging and UFFD at the same time. The expected result is990* that writes should mark the dirty log and trigger a userfaultfd991* write fault. Reads/execs should result in a read userfaultfd992* fault, and nothing in the dirty log. Any S1PTW should result in993* a write in the dirty log and a userfaultfd write.994*/995TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,996uffd_data_handler, 2,997guest_check_no_write_in_dirty_log,998guest_check_s1ptw_wr_in_dirty_log),999TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,1000uffd_data_handler, 2,1001guest_check_no_write_in_dirty_log,1002guest_check_no_s1ptw_wr_in_dirty_log),1003TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,1004uffd_data_handler,10052, guest_check_no_write_in_dirty_log,1006guest_check_s1ptw_wr_in_dirty_log),1007TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,1008guest_check_no_write_in_dirty_log,1009guest_check_s1ptw_wr_in_dirty_log),1010TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,1011uffd_data_handler, 2,1012guest_check_no_write_in_dirty_log,1013guest_check_s1ptw_wr_in_dirty_log),1014TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,1015uffd_data_handler,10162, guest_check_write_in_dirty_log,1017guest_check_s1ptw_wr_in_dirty_log),1018TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,1019uffd_data_handler, 2,1020guest_check_write_in_dirty_log,1021guest_check_s1ptw_wr_in_dirty_log),1022TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,1023uffd_data_handler,10242, guest_check_write_in_dirty_log,1025guest_check_s1ptw_wr_in_dirty_log),1026TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,1027uffd_data_handler, 2,1028guest_check_write_in_dirty_log,1029guest_check_s1ptw_wr_in_dirty_log),1030/*1031* Access when both the PT and data regions are marked read-only1032* (with KVM_MEM_READONLY). Writes with a syndrome result in an1033* MMIO exit, writes with no syndrome (e.g., CAS) result in a1034* failed vcpu run, and reads/execs with and without syndroms do1035* not fault.1036*/1037TEST_RO_MEMSLOT(guest_read64, 0, 0),1038TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),1039TEST_RO_MEMSLOT(guest_at, 0, 0),1040TEST_RO_MEMSLOT(guest_exec, 0, 0),1041TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),1042TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),1043TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),1044TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),10451046/*1047* The PT and data regions are both read-only and marked1048* for dirty logging at the same time. The expected result is that1049* for writes there should be no write in the dirty log. The1050* readonly handling is the same as if the memslot was not marked1051* for dirty logging: writes with a syndrome result in an MMIO1052* exit, and writes with no syndrome result in a failed vcpu run.1053*/1054TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,1055guest_check_no_write_in_dirty_log),1056TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,1057guest_check_no_write_in_dirty_log),1058TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,1059guest_check_no_write_in_dirty_log),1060TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,1061guest_check_no_write_in_dirty_log),1062TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,10631, guest_check_no_write_in_dirty_log),1064TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,1065guest_check_no_write_in_dirty_log),1066TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,1067guest_check_no_write_in_dirty_log),1068TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,1069guest_check_no_write_in_dirty_log),10701071/*1072* The PT and data regions are both read-only and punched with1073* holes tracked with userfaultfd. The expected result is the1074* union of both userfaultfd and read-only behaviors. For example,1075* write accesses result in a userfaultfd write fault and an MMIO1076* exit. Writes with no syndrome result in a failed vcpu run and1077* no userfaultfd write fault. Reads result in userfaultfd getting1078* triggered.1079*/1080TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),1081TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),1082TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),1083TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),1084TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,1085uffd_data_handler, 2),1086TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),1087TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),1088TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),10891090{ 0 }1091};10921093static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type)1094{1095struct test_desc *t;10961097for (t = &tests[0]; t->name; t++) {1098if (t->skip)1099continue;11001101struct test_params p = {1102.src_type = src_type,1103.test_desc = t,1104};11051106for_each_guest_mode(run_test, &p);1107}1108}11091110int main(int argc, char *argv[])1111{1112enum vm_mem_backing_src_type src_type;1113int opt;11141115src_type = DEFAULT_VM_MEM_SRC;11161117while ((opt = getopt(argc, argv, "hm:s:")) != -1) {1118switch (opt) {1119case 'm':1120guest_modes_cmdline(optarg);1121break;1122case 's':1123src_type = parse_backing_src_type(optarg);1124break;1125case 'h':1126default:1127help(argv[0]);1128exit(0);1129}1130}11311132for_each_test_and_guest_mode(src_type);1133return 0;1134}113511361137