/* SPDX-License-Identifier: GPL-2.0 */1#include <linux/linkage.h>2#include <asm/asm.h>3#include <asm/bitsperlong.h>4#include <asm/kvm_vcpu_regs.h>5#include <asm/nospec-branch.h>6#include <asm/percpu.h>7#include <asm/segment.h>8#include "kvm-asm-offsets.h"9#include "run_flags.h"1011#define WORD_SIZE (BITS_PER_LONG / 8)1213#define VCPU_RAX __VCPU_REGS_RAX * WORD_SIZE14#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE15#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE16#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE17/* Intentionally omit RSP as it's context switched by hardware */18#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE19#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE20#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE2122#ifdef CONFIG_X86_6423#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE24#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE25#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE26#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE27#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE28#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE29#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE30#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE31#endif3233.macro VMX_DO_EVENT_IRQOFF call_insn call_target34/*35* Unconditionally create a stack frame, getting the correct RSP on the36* stack (for x86-64) would take two instructions anyways, and RBP can37* be used to restore RSP to make objtool happy (see below).38*/39push %_ASM_BP40mov %_ASM_SP, %_ASM_BP4142#ifdef CONFIG_X86_6443/*44* Align RSP to a 16-byte boundary (to emulate CPU behavior) before45* creating the synthetic interrupt stack frame for the IRQ/NMI.46*/47and $-16, %rsp48push $__KERNEL_DS49push %rbp50#endif51pushf52push $__KERNEL_CS53\call_insn \call_target5455/*56* "Restore" RSP from RBP, even though IRET has already unwound RSP to57* the correct value. objtool doesn't know the callee will IRET and,58* without the explicit restore, thinks the stack is getting walloped.59* Using an unwind hint is problematic due to x86-64's dynamic alignment.60*/61leave62RET63.endm6465.section .noinstr.text, "ax"6667/**68* __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode69* @vmx: struct vcpu_vmx *70* @regs: unsigned long * (to guest registers)71* @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH72* VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl73* VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO: vCPU can access host MMIO74*75* Returns:76* 0 on VM-Exit, 1 on VM-Fail77*/78SYM_FUNC_START(__vmx_vcpu_run)79push %_ASM_BP80mov %_ASM_SP, %_ASM_BP81#ifdef CONFIG_X86_6482push %r1583push %r1484push %r1385push %r1286#else87push %edi88push %esi89#endif90push %_ASM_BX9192/* Save @vmx for SPEC_CTRL handling */93push %_ASM_ARG19495/* Save @flags (used for VMLAUNCH vs. VMRESUME and mitigations). */96push %_ASM_ARG39798/*99* Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and100* @regs is needed after VM-Exit to save the guest's register values.101*/102push %_ASM_ARG2103104lea (%_ASM_SP), %_ASM_ARG2105call vmx_update_host_rsp106107ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL108109/*110* SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the111* host's, write the MSR.112*113* IMPORTANT: To avoid RSB underflow attacks and any other nastiness,114* there must not be any returns or indirect branches between this code115* and vmentry.116*/117mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI118#ifdef CONFIG_X86_64119mov VMX_spec_ctrl(%rdi), %rdx120cmp PER_CPU_VAR(x86_spec_ctrl_current), %rdx121je .Lspec_ctrl_done122movl %edx, %eax123shr $32, %rdx124#else125mov VMX_spec_ctrl(%edi), %eax126mov PER_CPU_VAR(x86_spec_ctrl_current), %ecx127xor %eax, %ecx128mov VMX_spec_ctrl + 4(%edi), %edx129mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %edi130xor %edx, %edi131or %edi, %ecx132je .Lspec_ctrl_done133#endif134mov $MSR_IA32_SPEC_CTRL, %ecx135wrmsr136137.Lspec_ctrl_done:138139/*140* Since vmentry is serializing on affected CPUs, there's no need for141* an LFENCE to stop speculation from skipping the wrmsr.142*/143144/* Load @regs to RAX. */145mov (%_ASM_SP), %_ASM_AX146147/* Load guest registers. Don't clobber flags. */148mov VCPU_RCX(%_ASM_AX), %_ASM_CX149mov VCPU_RDX(%_ASM_AX), %_ASM_DX150mov VCPU_RBX(%_ASM_AX), %_ASM_BX151mov VCPU_RBP(%_ASM_AX), %_ASM_BP152mov VCPU_RSI(%_ASM_AX), %_ASM_SI153mov VCPU_RDI(%_ASM_AX), %_ASM_DI154#ifdef CONFIG_X86_64155mov VCPU_R8 (%_ASM_AX), %r8156mov VCPU_R9 (%_ASM_AX), %r9157mov VCPU_R10(%_ASM_AX), %r10158mov VCPU_R11(%_ASM_AX), %r11159mov VCPU_R12(%_ASM_AX), %r12160mov VCPU_R13(%_ASM_AX), %r13161mov VCPU_R14(%_ASM_AX), %r14162mov VCPU_R15(%_ASM_AX), %r15163#endif164/* Load guest RAX. This kills the @regs pointer! */165mov VCPU_RAX(%_ASM_AX), %_ASM_AX166167/*168* Note, ALTERNATIVE_2 works in reverse order. If CLEAR_CPU_BUF_VM is169* enabled, do VERW unconditionally. If CPU_BUF_VM_MMIO is enabled,170* check @flags to see if the vCPU has access to host MMIO, and if so,171* do VERW. Else, do nothing (no mitigations needed/enabled).172*/173ALTERNATIVE_2 "", \174__stringify(testl $VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO, WORD_SIZE(%_ASM_SP); \175jz .Lskip_mmio_verw; \176VERW; \177.Lskip_mmio_verw:), \178X86_FEATURE_CLEAR_CPU_BUF_VM_MMIO, \179__stringify(VERW), X86_FEATURE_CLEAR_CPU_BUF_VM180181/* Check @flags to see if VMLAUNCH or VMRESUME is needed. */182testl $VMX_RUN_VMRESUME, WORD_SIZE(%_ASM_SP)183jz .Lvmlaunch184185/*186* After a successful VMRESUME/VMLAUNCH, control flow "magically"187* resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.188* So this isn't a typical function and objtool needs to be told to189* save the unwind state here and restore it below.190*/191UNWIND_HINT_SAVE192193/*194* If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at195* the 'vmx_vmexit' label below.196*/197.Lvmresume:198vmresume199jmp .Lvmfail200201.Lvmlaunch:202vmlaunch203jmp .Lvmfail204205_ASM_EXTABLE(.Lvmresume, .Lfixup)206_ASM_EXTABLE(.Lvmlaunch, .Lfixup)207208SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)209210/* Restore unwind state from before the VMRESUME/VMLAUNCH. */211UNWIND_HINT_RESTORE212ENDBR213214/* Temporarily save guest's RAX. */215push %_ASM_AX216217/* Reload @regs to RAX. */218mov WORD_SIZE(%_ASM_SP), %_ASM_AX219220/* Save all guest registers, including RAX from the stack */221pop VCPU_RAX(%_ASM_AX)222mov %_ASM_CX, VCPU_RCX(%_ASM_AX)223mov %_ASM_DX, VCPU_RDX(%_ASM_AX)224mov %_ASM_BX, VCPU_RBX(%_ASM_AX)225mov %_ASM_BP, VCPU_RBP(%_ASM_AX)226mov %_ASM_SI, VCPU_RSI(%_ASM_AX)227mov %_ASM_DI, VCPU_RDI(%_ASM_AX)228#ifdef CONFIG_X86_64229mov %r8, VCPU_R8 (%_ASM_AX)230mov %r9, VCPU_R9 (%_ASM_AX)231mov %r10, VCPU_R10(%_ASM_AX)232mov %r11, VCPU_R11(%_ASM_AX)233mov %r12, VCPU_R12(%_ASM_AX)234mov %r13, VCPU_R13(%_ASM_AX)235mov %r14, VCPU_R14(%_ASM_AX)236mov %r15, VCPU_R15(%_ASM_AX)237#endif238239/* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */240xor %ebx, %ebx241242.Lclear_regs:243/* Discard @regs. The register is irrelevant, it just can't be RBX. */244pop %_ASM_AX245246/*247* Clear all general purpose registers except RSP and RBX to prevent248* speculative use of the guest's values, even those that are reloaded249* via the stack. In theory, an L1 cache miss when restoring registers250* could lead to speculative execution with the guest's values.251* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially252* free. RSP and RBX are exempt as RSP is restored by hardware during253* VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return254* value.255*/256xor %eax, %eax257xor %ecx, %ecx258xor %edx, %edx259xor %ebp, %ebp260xor %esi, %esi261xor %edi, %edi262#ifdef CONFIG_X86_64263xor %r8d, %r8d264xor %r9d, %r9d265xor %r10d, %r10d266xor %r11d, %r11d267xor %r12d, %r12d268xor %r13d, %r13d269xor %r14d, %r14d270xor %r15d, %r15d271#endif272273/*274* IMPORTANT: RSB filling and SPEC_CTRL handling must be done before275* the first unbalanced RET after vmexit!276*277* For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB278* entries and (in some cases) RSB underflow.279*280* eIBRS has its own protection against poisoned RSB, so it doesn't281* need the RSB filling sequence. But it does need to be enabled, and a282* single call to retire, before the first unbalanced RET.283*/284285FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\286X86_FEATURE_RSB_VMEXIT_LITE287288pop %_ASM_ARG2 /* @flags */289pop %_ASM_ARG1 /* @vmx */290291call vmx_spec_ctrl_restore_host292293CLEAR_BRANCH_HISTORY_VMEXIT294295/* Put return value in AX */296mov %_ASM_BX, %_ASM_AX297298pop %_ASM_BX299#ifdef CONFIG_X86_64300pop %r12301pop %r13302pop %r14303pop %r15304#else305pop %esi306pop %edi307#endif308pop %_ASM_BP309RET310311.Lfixup:312cmpb $0, _ASM_RIP(kvm_rebooting)313jne .Lvmfail314ud2315.Lvmfail:316/* VM-Fail: set return value to 1 */317mov $1, %_ASM_BX318jmp .Lclear_regs319320SYM_FUNC_END(__vmx_vcpu_run)321322SYM_FUNC_START(vmx_do_nmi_irqoff)323VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx324SYM_FUNC_END(vmx_do_nmi_irqoff)325326#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT327328/**329* vmread_error_trampoline - Trampoline from inline asm to vmread_error()330* @field: VMCS field encoding that failed331* @fault: %true if the VMREAD faulted, %false if it failed332*333* Save and restore volatile registers across a call to vmread_error(). Note,334* all parameters are passed on the stack.335*/336SYM_FUNC_START(vmread_error_trampoline)337push %_ASM_BP338mov %_ASM_SP, %_ASM_BP339340push %_ASM_AX341push %_ASM_CX342push %_ASM_DX343#ifdef CONFIG_X86_64344push %rdi345push %rsi346push %r8347push %r9348push %r10349push %r11350#endif351352/* Load @field and @fault to arg1 and arg2 respectively. */353mov 3*WORD_SIZE(%_ASM_BP), %_ASM_ARG2354mov 2*WORD_SIZE(%_ASM_BP), %_ASM_ARG1355356call vmread_error_trampoline2357358/* Zero out @fault, which will be popped into the result register. */359_ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)360361#ifdef CONFIG_X86_64362pop %r11363pop %r10364pop %r9365pop %r8366pop %rsi367pop %rdi368#endif369pop %_ASM_DX370pop %_ASM_CX371pop %_ASM_AX372pop %_ASM_BP373374RET375SYM_FUNC_END(vmread_error_trampoline)376#endif377378.section .text, "ax"379380#ifndef CONFIG_X86_FRED381382SYM_FUNC_START(vmx_do_interrupt_irqoff)383VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1384SYM_FUNC_END(vmx_do_interrupt_irqoff)385386#endif387388389