Path: blob/master/tools/testing/selftests/kvm/guest_memfd_test.c
38189 views
// SPDX-License-Identifier: GPL-2.01/*2* Copyright Intel Corporation, 20233*4* Author: Chao Peng <[email protected]>5*/6#include <stdlib.h>7#include <string.h>8#include <unistd.h>9#include <errno.h>10#include <stdio.h>11#include <fcntl.h>1213#include <linux/bitmap.h>14#include <linux/falloc.h>15#include <linux/sizes.h>16#include <sys/mman.h>17#include <sys/types.h>18#include <sys/stat.h>1920#include "kvm_util.h"21#include "numaif.h"22#include "test_util.h"23#include "ucall_common.h"2425static size_t page_size;2627static void test_file_read_write(int fd, size_t total_size)28{29char buf[64];3031TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0,32"read on a guest_mem fd should fail");33TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0,34"write on a guest_mem fd should fail");35TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0,36"pread on a guest_mem fd should fail");37TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0,38"pwrite on a guest_mem fd should fail");39}4041static void test_mmap_cow(int fd, size_t size)42{43void *mem;4445mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);46TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");47}4849static void test_mmap_supported(int fd, size_t total_size)50{51const char val = 0xaa;52char *mem;53size_t i;54int ret;5556mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);5758memset(mem, val, total_size);59for (i = 0; i < total_size; i++)60TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);6162ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0,63page_size);64TEST_ASSERT(!ret, "fallocate the first page should succeed.");6566for (i = 0; i < page_size; i++)67TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00);68for (; i < total_size; i++)69TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);7071memset(mem, val, page_size);72for (i = 0; i < total_size; i++)73TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);7475kvm_munmap(mem, total_size);76}7778static void test_mbind(int fd, size_t total_size)79{80const unsigned long nodemask_0 = 1; /* nid: 0 */81unsigned long nodemask = 0;82unsigned long maxnode = 8;83int policy;84char *mem;85int ret;8687if (!is_multi_numa_node_system())88return;8990mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);9192/* Test MPOL_INTERLEAVE policy */93kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0);94kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);95TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0,96"Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx",97MPOL_INTERLEAVE, nodemask_0, policy, nodemask);9899/* Test basic MPOL_BIND policy */100kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0);101kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR);102TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0,103"Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx",104MPOL_BIND, nodemask_0, policy, nodemask);105106/* Test MPOL_DEFAULT policy */107kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0);108kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);109TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask,110"Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx",111MPOL_DEFAULT, policy, nodemask);112113/* Test with invalid policy */114ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0);115TEST_ASSERT(ret == -1 && errno == EINVAL,116"mbind with invalid policy should fail with EINVAL");117118kvm_munmap(mem, total_size);119}120121static void test_numa_allocation(int fd, size_t total_size)122{123unsigned long node0_mask = 1; /* Node 0 */124unsigned long node1_mask = 2; /* Node 1 */125unsigned long maxnode = 8;126void *pages[4];127int status[4];128char *mem;129int i;130131if (!is_multi_numa_node_system())132return;133134mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);135136for (i = 0; i < 4; i++)137pages[i] = (char *)mem + page_size * i;138139/* Set NUMA policy after allocation */140memset(mem, 0xaa, page_size);141kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0);142kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size);143144/* Set NUMA policy before allocation */145kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);146kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);147memset(mem, 0xaa, total_size);148149/* Validate if pages are allocated on specified NUMA nodes */150kvm_move_pages(0, 4, pages, NULL, status, 0);151TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]);152TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]);153TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]);154TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]);155156/* Punch hole for all pages */157kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size);158159/* Change NUMA policy nodes and reallocate */160kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);161kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);162memset(mem, 0xaa, total_size);163164kvm_move_pages(0, 4, pages, NULL, status, 0);165TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]);166TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]);167TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]);168TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]);169170kvm_munmap(mem, total_size);171}172173static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)174{175const char val = 0xaa;176char *mem;177size_t i;178179mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);180181TEST_EXPECT_SIGBUS(memset(mem, val, map_size));182TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size]));183184for (i = 0; i < accessible_size; i++)185TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);186187kvm_munmap(mem, map_size);188}189190static void test_fault_overflow(int fd, size_t total_size)191{192test_fault_sigbus(fd, total_size, total_size * 4);193}194195static void test_fault_private(int fd, size_t total_size)196{197test_fault_sigbus(fd, 0, total_size);198}199200static void test_mmap_not_supported(int fd, size_t total_size)201{202char *mem;203204mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);205TEST_ASSERT_EQ(mem, MAP_FAILED);206207mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);208TEST_ASSERT_EQ(mem, MAP_FAILED);209}210211static void test_file_size(int fd, size_t total_size)212{213struct stat sb;214int ret;215216ret = fstat(fd, &sb);217TEST_ASSERT(!ret, "fstat should succeed");218TEST_ASSERT_EQ(sb.st_size, total_size);219TEST_ASSERT_EQ(sb.st_blksize, page_size);220}221222static void test_fallocate(int fd, size_t total_size)223{224int ret;225226ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size);227TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed");228229ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,230page_size - 1, page_size);231TEST_ASSERT(ret, "fallocate with unaligned offset should fail");232233ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size);234TEST_ASSERT(ret, "fallocate beginning at total_size should fail");235236ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, page_size);237TEST_ASSERT(ret, "fallocate beginning after total_size should fail");238239ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,240total_size, page_size);241TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed");242243ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,244total_size + page_size, page_size);245TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should succeed");246247ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,248page_size, page_size - 1);249TEST_ASSERT(ret, "fallocate with unaligned size should fail");250251ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,252page_size, page_size);253TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size should succeed");254255ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size);256TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");257}258259static void test_invalid_punch_hole(int fd, size_t total_size)260{261struct {262off_t offset;263off_t len;264} testcases[] = {265{0, 1},266{0, page_size - 1},267{0, page_size + 1},268269{1, 1},270{1, page_size - 1},271{1, page_size},272{1, page_size + 1},273274{page_size, 1},275{page_size, page_size - 1},276{page_size, page_size + 1},277};278int ret, i;279280for (i = 0; i < ARRAY_SIZE(testcases); i++) {281ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,282testcases[i].offset, testcases[i].len);283TEST_ASSERT(ret == -1 && errno == EINVAL,284"PUNCH_HOLE with !PAGE_SIZE offset (%lx) and/or length (%lx) should fail",285testcases[i].offset, testcases[i].len);286}287}288289static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,290uint64_t guest_memfd_flags)291{292size_t size;293int fd;294295for (size = 1; size < page_size; size++) {296fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags);297TEST_ASSERT(fd < 0 && errno == EINVAL,298"guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL",299size);300}301}302303static void test_create_guest_memfd_multiple(struct kvm_vm *vm)304{305int fd1, fd2, ret;306struct stat st1, st2;307308fd1 = __vm_create_guest_memfd(vm, page_size, 0);309TEST_ASSERT(fd1 != -1, "memfd creation should succeed");310311ret = fstat(fd1, &st1);312TEST_ASSERT(ret != -1, "memfd fstat should succeed");313TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size");314315fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0);316TEST_ASSERT(fd2 != -1, "memfd creation should succeed");317318ret = fstat(fd2, &st2);319TEST_ASSERT(ret != -1, "memfd fstat should succeed");320TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size");321322ret = fstat(fd1, &st1);323TEST_ASSERT(ret != -1, "memfd fstat should succeed");324TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size");325TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");326327close(fd2);328close(fd1);329}330331static void test_guest_memfd_flags(struct kvm_vm *vm)332{333uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);334uint64_t flag;335int fd;336337for (flag = BIT(0); flag; flag <<= 1) {338fd = __vm_create_guest_memfd(vm, page_size, flag);339if (flag & valid_flags) {340TEST_ASSERT(fd >= 0,341"guest_memfd() with flag '0x%lx' should succeed",342flag);343close(fd);344} else {345TEST_ASSERT(fd < 0 && errno == EINVAL,346"guest_memfd() with flag '0x%lx' should fail with EINVAL",347flag);348}349}350}351352#define gmem_test(__test, __vm, __flags) \353do { \354int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \355\356test_##__test(fd, page_size * 4); \357close(fd); \358} while (0)359360static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)361{362test_create_guest_memfd_multiple(vm);363test_create_guest_memfd_invalid_sizes(vm, flags);364365gmem_test(file_read_write, vm, flags);366367if (flags & GUEST_MEMFD_FLAG_MMAP) {368if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {369gmem_test(mmap_supported, vm, flags);370gmem_test(fault_overflow, vm, flags);371gmem_test(numa_allocation, vm, flags);372} else {373gmem_test(fault_private, vm, flags);374}375376gmem_test(mmap_cow, vm, flags);377gmem_test(mbind, vm, flags);378} else {379gmem_test(mmap_not_supported, vm, flags);380}381382gmem_test(file_size, vm, flags);383gmem_test(fallocate, vm, flags);384gmem_test(invalid_punch_hole, vm, flags);385}386387static void test_guest_memfd(unsigned long vm_type)388{389struct kvm_vm *vm = vm_create_barebones_type(vm_type);390uint64_t flags;391392test_guest_memfd_flags(vm);393394__test_guest_memfd(vm, 0);395396flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);397if (flags & GUEST_MEMFD_FLAG_MMAP)398__test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP);399400/* MMAP should always be supported if INIT_SHARED is supported. */401if (flags & GUEST_MEMFD_FLAG_INIT_SHARED)402__test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP |403GUEST_MEMFD_FLAG_INIT_SHARED);404405kvm_vm_free(vm);406}407408static void guest_code(uint8_t *mem, uint64_t size)409{410size_t i;411412for (i = 0; i < size; i++)413__GUEST_ASSERT(mem[i] == 0xaa,414"Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]);415416memset(mem, 0xff, size);417GUEST_DONE();418}419420static void test_guest_memfd_guest(void)421{422/*423* Skip the first 4gb and slot0. slot0 maps <1gb and is used to back424* the guest's code, stack, and page tables, and low memory contains425* the PCI hole and other MMIO regions that need to be avoided.426*/427const uint64_t gpa = SZ_4G;428const int slot = 1;429430struct kvm_vcpu *vcpu;431struct kvm_vm *vm;432uint8_t *mem;433size_t size;434int fd, i;435436if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS))437return;438439vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code);440441TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP,442"Default VM type should support MMAP, supported flags = 0x%x",443vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));444TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED,445"Default VM type should support INIT_SHARED, supported flags = 0x%x",446vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));447448size = vm->page_size;449fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |450GUEST_MEMFD_FLAG_INIT_SHARED);451vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);452453mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);454memset(mem, 0xaa, size);455kvm_munmap(mem, size);456457virt_pg_map(vm, gpa, gpa);458vcpu_args_set(vcpu, 2, gpa, size);459vcpu_run(vcpu);460461TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);462463mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);464for (i = 0; i < size; i++)465TEST_ASSERT_EQ(mem[i], 0xff);466467close(fd);468kvm_vm_free(vm);469}470471int main(int argc, char *argv[])472{473unsigned long vm_types, vm_type;474475TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));476477page_size = getpagesize();478479/*480* Not all architectures support KVM_CAP_VM_TYPES. However, those that481* support guest_memfd have that support for the default VM type.482*/483vm_types = kvm_check_cap(KVM_CAP_VM_TYPES);484if (!vm_types)485vm_types = BIT(VM_TYPE_DEFAULT);486487for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types))488test_guest_memfd(vm_type);489490test_guest_memfd_guest();491}492493494