Path: blob/master/tools/testing/selftests/kvm/x86/state_test.c
38237 views
// SPDX-License-Identifier: GPL-2.0-only1/*2* KVM_GET/SET_* tests3*4* Copyright (C) 2018, Red Hat, Inc.5*6* Tests for vCPU state save/restore, including nested guest state.7*/8#include <fcntl.h>9#include <stdio.h>10#include <stdlib.h>11#include <string.h>12#include <sys/ioctl.h>1314#include "test_util.h"1516#include "kvm_util.h"17#include "processor.h"18#include "vmx.h"19#include "svm_util.h"2021#define L2_GUEST_STACK_SIZE 2562223void svm_l2_guest_code(void)24{25GUEST_SYNC(4);26/* Exit to L1 */27vmcall();28GUEST_SYNC(6);29/* Done, exit to L1 and never come back. */30vmcall();31}3233static void svm_l1_guest_code(struct svm_test_data *svm)34{35unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];36struct vmcb *vmcb = svm->vmcb;3738GUEST_ASSERT(svm->vmcb_gpa);39/* Prepare for L2 execution. */40generic_svm_setup(svm, svm_l2_guest_code,41&l2_guest_stack[L2_GUEST_STACK_SIZE]);4243GUEST_SYNC(3);44run_guest(vmcb, svm->vmcb_gpa);45GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);46GUEST_SYNC(5);47vmcb->save.rip += 3;48run_guest(vmcb, svm->vmcb_gpa);49GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);50GUEST_SYNC(7);51}5253void vmx_l2_guest_code(void)54{55GUEST_SYNC(6);5657/* Exit to L1 */58vmcall();5960/* L1 has now set up a shadow VMCS for us. */61GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);62GUEST_SYNC(10);63GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);64GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));65GUEST_SYNC(11);66GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);67GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));68GUEST_SYNC(12);6970/* Done, exit to L1 and never come back. */71vmcall();72}7374static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)75{76unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];7778GUEST_ASSERT(vmx_pages->vmcs_gpa);79GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));80GUEST_SYNC(3);81GUEST_ASSERT(load_vmcs(vmx_pages));82GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);8384GUEST_SYNC(4);85GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);8687prepare_vmcs(vmx_pages, vmx_l2_guest_code,88&l2_guest_stack[L2_GUEST_STACK_SIZE]);8990GUEST_SYNC(5);91GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);92GUEST_ASSERT(!vmlaunch());93GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);94GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);9596/* Check that the launched state is preserved. */97GUEST_ASSERT(vmlaunch());9899GUEST_ASSERT(!vmresume());100GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);101102GUEST_SYNC(7);103GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);104105GUEST_ASSERT(!vmresume());106GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);107108vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);109110vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);111vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);112113GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));114GUEST_ASSERT(vmlaunch());115GUEST_SYNC(8);116GUEST_ASSERT(vmlaunch());117GUEST_ASSERT(vmresume());118119vmwrite(GUEST_RIP, 0xc0ffee);120GUEST_SYNC(9);121GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);122123GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));124GUEST_ASSERT(!vmresume());125GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);126127GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));128GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);129GUEST_ASSERT(vmlaunch());130GUEST_ASSERT(vmresume());131GUEST_SYNC(13);132GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);133GUEST_ASSERT(vmlaunch());134GUEST_ASSERT(vmresume());135}136137static void __attribute__((__flatten__)) guest_code(void *arg)138{139GUEST_SYNC(1);140141if (this_cpu_has(X86_FEATURE_XSAVE)) {142uint64_t supported_xcr0 = this_cpu_supported_xcr0();143uint8_t buffer[PAGE_SIZE];144145memset(buffer, 0xcc, sizeof(buffer));146147/*148* Modify state for all supported xfeatures to take them out of149* their "init" state, i.e. to make them show up in XSTATE_BV.150*151* Note off-by-default features, e.g. AMX, are out of scope for152* this particular testcase as they have a different ABI.153*/154GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);155asm volatile ("fincstp");156157GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);158asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));159160if (supported_xcr0 & XFEATURE_MASK_YMM)161asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));162163if (supported_xcr0 & XFEATURE_MASK_AVX512) {164asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));165asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));166asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));167}168169if (this_cpu_has(X86_FEATURE_MPX)) {170uint64_t bounds[2] = { 10, 0xffffffffull };171uint64_t output[2] = { };172173GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);174GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);175176/*177* Don't bother trying to get BNDCSR into the INUSE178* state. MSR_IA32_BNDCFGS doesn't count as it isn't179* managed via XSAVE/XRSTOR, and BNDCFGU can only be180* modified by XRSTOR. Stuffing XSTATE_BV in the host181* is simpler than doing XRSTOR here in the guest.182*183* However, temporarily enable MPX in BNDCFGS so that184* BNDMOV actually loads BND1. If MPX isn't *fully*185* enabled, all MPX instructions are treated as NOPs.186*187* Hand encode "bndmov (%rax),%bnd1" as support for MPX188* mnemonics/registers has been removed from gcc and189* clang (and was never fully supported by clang).190*/191wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));192asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));193/*194* Hand encode "bndmov %bnd1, (%rax)" to sanity check195* that BND1 actually got loaded.196*/197asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));198wrmsr(MSR_IA32_BNDCFGS, 0);199200GUEST_ASSERT_EQ(bounds[0], output[0]);201GUEST_ASSERT_EQ(bounds[1], output[1]);202}203if (this_cpu_has(X86_FEATURE_PKU)) {204GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);205set_cr4(get_cr4() | X86_CR4_PKE);206GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));207208wrpkru(-1u);209}210}211212GUEST_SYNC(2);213214if (arg) {215if (this_cpu_has(X86_FEATURE_SVM))216svm_l1_guest_code(arg);217else218vmx_l1_guest_code(arg);219}220221GUEST_DONE();222}223224int main(int argc, char *argv[])225{226uint64_t *xstate_bv, saved_xstate_bv;227vm_vaddr_t nested_gva = 0;228struct kvm_cpuid2 empty_cpuid = {};229struct kvm_regs regs1, regs2;230struct kvm_vcpu *vcpu, *vcpuN;231struct kvm_vm *vm;232struct kvm_x86_state *state;233struct ucall uc;234int stage;235236/* Create VM */237vm = vm_create_with_one_vcpu(&vcpu, guest_code);238239vcpu_regs_get(vcpu, ®s1);240241if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {242if (kvm_cpu_has(X86_FEATURE_SVM))243vcpu_alloc_svm(vm, &nested_gva);244else if (kvm_cpu_has(X86_FEATURE_VMX))245vcpu_alloc_vmx(vm, &nested_gva);246}247248if (!nested_gva)249pr_info("will skip nested state checks\n");250251vcpu_args_set(vcpu, 1, nested_gva);252253for (stage = 1;; stage++) {254vcpu_run(vcpu);255TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);256257switch (get_ucall(vcpu, &uc)) {258case UCALL_ABORT:259REPORT_GUEST_ASSERT(uc);260/* NOT REACHED */261case UCALL_SYNC:262break;263case UCALL_DONE:264goto done;265default:266TEST_FAIL("Unknown ucall %lu", uc.cmd);267}268269/* UCALL_SYNC is handled here. */270TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&271uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",272stage, (ulong)uc.args[1]);273274state = vcpu_save_state(vcpu);275memset(®s1, 0, sizeof(regs1));276vcpu_regs_get(vcpu, ®s1);277278kvm_vm_release(vm);279280/* Restore state in a new VM. */281vcpu = vm_recreate_with_one_vcpu(vm);282vcpu_load_state(vcpu, state);283284/*285* Restore XSAVE state in a dummy vCPU, first without doing286* KVM_SET_CPUID2, and then with an empty guest CPUID. Except287* for off-by-default xfeatures, e.g. AMX, KVM is supposed to288* allow KVM_SET_XSAVE regardless of guest CPUID. Manually289* load only XSAVE state, MSRs in particular have a much more290* convoluted ABI.291*292* Load two versions of XSAVE state: one with the actual guest293* XSAVE state, and one with all supported features forced "on"294* in xstate_bv, e.g. to ensure that KVM allows loading all295* supported features, even if something goes awry in saving296* the original snapshot.297*/298xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];299saved_xstate_bv = *xstate_bv;300301vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);302vcpu_xsave_set(vcpuN, state->xsave);303*xstate_bv = kvm_cpu_supported_xcr0();304vcpu_xsave_set(vcpuN, state->xsave);305306vcpu_init_cpuid(vcpuN, &empty_cpuid);307vcpu_xsave_set(vcpuN, state->xsave);308*xstate_bv = saved_xstate_bv;309vcpu_xsave_set(vcpuN, state->xsave);310311kvm_x86_state_cleanup(state);312313memset(®s2, 0, sizeof(regs2));314vcpu_regs_get(vcpu, ®s2);315TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),316"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",317(ulong) regs2.rdi, (ulong) regs2.rsi);318}319320done:321kvm_vm_free(vm);322}323324325