/* SPDX-License-Identifier: GPL-2.0 */1/*2* linux/boot/head.S3*4* Copyright (C) 1991, 1992, 1993 Linus Torvalds5*/67/*8* head.S contains the 32-bit startup code.9*10* NOTE!!! Startup happens at absolute address 0x00001000, which is also where11* the page directory will exist. The startup code will be overwritten by12* the page directory. [According to comments etc elsewhere on a compressed13* kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]14*15* Page 0 is deliberately kept safe, since System Management Mode code in16* laptops may need to access the BIOS data stored there. This is also17* useful for future device drivers that either access the BIOS via VM8618* mode.19*/2021/*22* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 199623*/24.code3225.text2627#include <linux/init.h>28#include <linux/linkage.h>29#include <asm/segment.h>30#include <asm/boot.h>31#include <asm/msr.h>32#include <asm/processor-flags.h>33#include <asm/asm-offsets.h>34#include <asm/bootparam.h>35#include <asm/desc_defs.h>36#include <asm/trapnr.h>3738/*39* Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result40* in assembly errors due to trying to move .org backward due to the excessive41* alignment.42*/43#undef __ALIGN44#define __ALIGN .balign 16, 0x904546/*47* Locally defined symbols should be marked hidden:48*/49.hidden _bss50.hidden _ebss51.hidden _end5253__HEAD5455/*56* This macro gives the relative virtual address of X, i.e. the offset of X57* from startup_32. This is the same as the link-time virtual address of X,58* since startup_32 is at 0, but defining it this way tells the59* assembler/linker that we do not want the actual run-time address of X. This60* prevents the linker from trying to create unwanted run-time relocation61* entries for the reference when the compressed kernel is linked as PIE.62*63* A reference X(%reg) will result in the link-time VA of X being stored with64* the instruction, and a run-time R_X86_64_RELATIVE relocation entry that65* adds the 64-bit base address where the kernel is loaded.66*67* Replacing it with (X-startup_32)(%reg) results in the offset being stored,68* and no run-time relocation.69*70* The macro should be used as a displacement with a base register containing71* the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate72* [$ rva(X)].73*74* This macro can only be used from within the .head.text section, since the75* expression requires startup_32 to be in the same section as the code being76* assembled.77*/78#define rva(X) ((X) - startup_32)7980.code3281SYM_FUNC_START(startup_32)82/*83* 32bit entry is 0 and it is ABI so immutable!84* If we come here directly from a bootloader,85* kernel(text+data+bss+brk) ramdisk, zero_page, command line86* all need to be under the 4G limit.87*/88cld89cli9091/*92* Calculate the delta between where we were compiled to run93* at and where we were actually loaded at. This can only be done94* with a short local call on x86. Nothing else will tell us what95* address we are running at. The reserved chunk of the real-mode96* data at 0x1e4 (defined as a scratch field) are used as the stack97* for this calculation. Only 4 bytes are needed.98*/99leal (BP_scratch+4)(%esi), %esp100call 1f1011: popl %ebp102subl $ rva(1b), %ebp103104/* Load new GDT with the 64bit segments using 32bit descriptor */105leal rva(gdt)(%ebp), %eax106movl %eax, 2(%eax)107lgdt (%eax)108109/* Load segment registers with our descriptors */110movl $__BOOT_DS, %eax111movl %eax, %ds112movl %eax, %es113movl %eax, %fs114movl %eax, %gs115movl %eax, %ss116117/* Setup a stack and load CS from current GDT */118leal rva(boot_stack_end)(%ebp), %esp119120pushl $__KERNEL32_CS121leal rva(1f)(%ebp), %eax122pushl %eax123lretl1241:125126/* Setup Exception handling for SEV-ES */127#ifdef CONFIG_AMD_MEM_ENCRYPT128call startup32_load_idt129#endif130131/* Make sure cpu supports long mode. */132call verify_cpu133testl %eax, %eax134jnz .Lno_longmode135136/*137* Compute the delta between where we were compiled to run at138* and where the code will actually run at.139*140* %ebp contains the address we are loaded at by the boot loader and %ebx141* contains the address where we should move the kernel image temporarily142* for safe in-place decompression.143*/144145#ifdef CONFIG_RELOCATABLE146movl %ebp, %ebx147movl BP_kernel_alignment(%esi), %eax148decl %eax149addl %eax, %ebx150notl %eax151andl %eax, %ebx152cmpl $LOAD_PHYSICAL_ADDR, %ebx153jae 1f154#endif155movl $LOAD_PHYSICAL_ADDR, %ebx1561:157158/* Target address to relocate to for decompression */159addl BP_init_size(%esi), %ebx160subl $ rva(_end), %ebx161162/*163* Prepare for entering 64 bit mode164*/165166/* Enable PAE mode */167movl %cr4, %eax168orl $X86_CR4_PAE, %eax169movl %eax, %cr4170171/*172* Build early 4G boot pagetable173*/174/*175* If SEV is active then set the encryption mask in the page tables.176* This will ensure that when the kernel is copied and decompressed177* it will be done so encrypted.178*/179xorl %edx, %edx180#ifdef CONFIG_AMD_MEM_ENCRYPT181call get_sev_encryption_bit182xorl %edx, %edx183testl %eax, %eax184jz 1f185subl $32, %eax /* Encryption bit is always above bit 31 */186bts %eax, %edx /* Set encryption mask for page tables */187/*188* Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that189* startup32_check_sev_cbit() will do a check. sev_enable() will190* initialize sev_status with all the bits reported by191* MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT192* needs to be set for now.193*/194movl $1, rva(sev_status)(%ebp)1951:196#endif197198/* Initialize Page tables to 0 */199leal rva(pgtable)(%ebx), %edi200xorl %eax, %eax201movl $(BOOT_INIT_PGT_SIZE/4), %ecx202rep stosl203204/* Build Level 4 */205leal rva(pgtable + 0)(%ebx), %edi206leal 0x1007 (%edi), %eax207movl %eax, 0(%edi)208addl %edx, 4(%edi)209210/* Build Level 3 */211leal rva(pgtable + 0x1000)(%ebx), %edi212leal 0x1007(%edi), %eax213movl $4, %ecx2141: movl %eax, 0x00(%edi)215addl %edx, 0x04(%edi)216addl $0x00001000, %eax217addl $8, %edi218decl %ecx219jnz 1b220221/* Build Level 2 */222leal rva(pgtable + 0x2000)(%ebx), %edi223movl $0x00000183, %eax224movl $2048, %ecx2251: movl %eax, 0(%edi)226addl %edx, 4(%edi)227addl $0x00200000, %eax228addl $8, %edi229decl %ecx230jnz 1b231232/* Enable the boot page tables */233leal rva(pgtable)(%ebx), %eax234movl %eax, %cr3235236/* Enable Long mode in EFER (Extended Feature Enable Register) */237movl $MSR_EFER, %ecx238rdmsr239btsl $_EFER_LME, %eax240wrmsr241242/* After gdt is loaded */243xorl %eax, %eax244lldt %ax245movl $__BOOT_TSS, %eax246ltr %ax247248#ifdef CONFIG_AMD_MEM_ENCRYPT249/* Check if the C-bit position is correct when SEV is active */250call startup32_check_sev_cbit251#endif252253/*254* Setup for the jump to 64bit mode255*256* When the jump is performed we will be in long mode but257* in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1258* (and in turn EFER.LMA = 1). To jump into 64bit mode we use259* the new gdt/idt that has __KERNEL_CS with CS.L = 1.260* We place all of the values on our mini stack so lret can261* used to perform that far jump.262*/263leal rva(startup_64)(%ebp), %eax264pushl $__KERNEL_CS265pushl %eax266267/* Enter paged protected Mode, activating Long Mode */268movl $CR0_STATE, %eax269movl %eax, %cr0270271/* Jump from 32bit compatibility mode into 64bit mode. */272lret273SYM_FUNC_END(startup_32)274275.code64276.org 0x200277SYM_CODE_START(startup_64)278/*279* 64bit entry is 0x200 and it is ABI so immutable!280* We come here either from startup_32 or directly from a281* 64bit bootloader.282* If we come here from a bootloader, kernel(text+data+bss+brk),283* ramdisk, zero_page, command line could be above 4G.284* We depend on an identity mapped page table being provided285* that maps our entire kernel(text+data+bss+brk), zero page286* and command line.287*/288289cld290cli291292/* Setup data segments. */293xorl %eax, %eax294movl %eax, %ds295movl %eax, %es296movl %eax, %ss297movl %eax, %fs298movl %eax, %gs299300/*301* Compute the decompressed kernel start address. It is where302* we were loaded at aligned to a 2M boundary. %rbp contains the303* decompressed kernel start address.304*305* If it is a relocatable kernel then decompress and run the kernel306* from load address aligned to 2MB addr, otherwise decompress and307* run the kernel from LOAD_PHYSICAL_ADDR308*309* We cannot rely on the calculation done in 32-bit mode, since we310* may have been invoked via the 64-bit entry point.311*/312313/* Start with the delta to where the kernel will run at. */314#ifdef CONFIG_RELOCATABLE315leaq startup_32(%rip) /* - $startup_32 */, %rbp316movl BP_kernel_alignment(%rsi), %eax317decl %eax318addq %rax, %rbp319notq %rax320andq %rax, %rbp321cmpq $LOAD_PHYSICAL_ADDR, %rbp322jae 1f323#endif324movq $LOAD_PHYSICAL_ADDR, %rbp3251:326327/* Target address to relocate to for decompression */328movl BP_init_size(%rsi), %ebx329subl $ rva(_end), %ebx330addq %rbp, %rbx331332/* Set up the stack */333leaq rva(boot_stack_end)(%rbx), %rsp334335/*336* At this point we are in long mode with 4-level paging enabled,337* but we might want to enable 5-level paging or vice versa.338*339* The problem is that we cannot do it directly. Setting or clearing340* CR4.LA57 in long mode would trigger #GP. So we need to switch off341* long mode and paging first.342*343* We also need a trampoline in lower memory to switch over from344* 4- to 5-level paging for cases when the bootloader puts the kernel345* above 4G, but didn't enable 5-level paging for us.346*347* The same trampoline can be used to switch from 5- to 4-level paging348* mode, like when starting 4-level paging kernel via kexec() when349* original kernel worked in 5-level paging mode.350*351* For the trampoline, we need the top page table to reside in lower352* memory as we don't have a way to load 64-bit values into CR3 in353* 32-bit mode.354*/355356/* Make sure we have GDT with 32-bit code segment */357leaq gdt64(%rip), %rax358addq %rax, 2(%rax)359lgdt (%rax)360361/* Reload CS so IRET returns to a CS actually in the GDT */362pushq $__KERNEL_CS363leaq .Lon_kernel_cs(%rip), %rax364pushq %rax365lretq366367.Lon_kernel_cs:368/*369* RSI holds a pointer to a boot_params structure provided by the370* loader, and this needs to be preserved across C function calls. So371* move it into a callee saved register.372*/373movq %rsi, %r15374375call load_stage1_idt376377#ifdef CONFIG_AMD_MEM_ENCRYPT378/*379* Now that the stage1 interrupt handlers are set up, #VC exceptions from380* CPUID instructions can be properly handled for SEV-ES guests.381*382* For SEV-SNP, the CPUID table also needs to be set up in advance of any383* CPUID instructions being issued, so go ahead and do that now via384* sev_enable(), which will also handle the rest of the SEV-related385* detection/setup to ensure that has been done in advance of any dependent386* code. Pass the boot_params pointer as the first argument.387*/388movq %r15, %rdi389call sev_enable390#endif391392/* Preserve only the CR4 bits that must be preserved, and clear the rest */393movq %cr4, %rax394andl $(X86_CR4_PAE | X86_CR4_MCE | X86_CR4_LA57), %eax395movq %rax, %cr4396397/*398* configure_5level_paging() updates the number of paging levels using399* a trampoline in 32-bit addressable memory if the current number does400* not match the desired number.401*402* Pass the boot_params pointer as the first argument. The second403* argument is the relocated address of the page table to use instead404* of the page table in trampoline memory (if required).405*/406movq %r15, %rdi407leaq rva(top_pgtable)(%rbx), %rsi408call configure_5level_paging409410/* Zero EFLAGS */411pushq $0412popfq413414/*415* Copy the compressed kernel to the end of our buffer416* where decompression in place becomes safe.417*/418leaq (_bss-8)(%rip), %rsi419leaq rva(_bss-8)(%rbx), %rdi420movl $(_bss - startup_32), %ecx421shrl $3, %ecx422std423rep movsq424cld425426/*427* The GDT may get overwritten either during the copy we just did or428* during extract_kernel below. To avoid any issues, repoint the GDTR429* to the new copy of the GDT.430*/431leaq rva(gdt64)(%rbx), %rax432leaq rva(gdt)(%rbx), %rdx433movq %rdx, 2(%rax)434lgdt (%rax)435436/*437* Jump to the relocated address.438*/439leaq rva(.Lrelocated)(%rbx), %rax440jmp *%rax441SYM_CODE_END(startup_64)442443.text444SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)445446/*447* Clear BSS (stack is currently empty)448*/449xorl %eax, %eax450leaq _bss(%rip), %rdi451leaq _ebss(%rip), %rcx452subq %rdi, %rcx453shrq $3, %rcx454rep stosq455456call load_stage2_idt457458/* Pass boot_params to initialize_identity_maps() */459movq %r15, %rdi460call initialize_identity_maps461462/*463* Do the extraction, and jump to the new kernel..464*/465/* pass struct boot_params pointer and output target address */466movq %r15, %rdi467movq %rbp, %rsi468call extract_kernel /* returns kernel entry point in %rax */469470/*471* Jump to the decompressed kernel.472*/473movq %r15, %rsi474jmp *%rax475SYM_FUNC_END(.Lrelocated)476477.code32478SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)479/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */4801:481hlt482jmp 1b483SYM_FUNC_END(.Lno_longmode)484485.globl verify_cpu486#include "../../kernel/verify_cpu.S"487488.data489SYM_DATA_START_LOCAL(gdt64)490.word gdt_end - gdt - 1491.quad gdt - gdt64492SYM_DATA_END(gdt64)493.balign 8494SYM_DATA_START_LOCAL(gdt)495.word gdt_end - gdt - 1496.long 0497.word 0498.quad 0x00cf9a000000ffff /* __KERNEL32_CS */499.quad 0x00af9a000000ffff /* __KERNEL_CS */500.quad 0x00cf92000000ffff /* __KERNEL_DS */501.quad 0x0080890000000000 /* TS descriptor */502.quad 0x0000000000000000 /* TS continued */503SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)504505SYM_DATA_START(boot_idt_desc)506.word boot_idt_end - boot_idt - 1507.quad 0508SYM_DATA_END(boot_idt_desc)509.balign 8510SYM_DATA_START(boot_idt)511.rept BOOT_IDT_ENTRIES512.quad 0513.quad 0514.endr515SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)516517/*518* Stack and heap for uncompression519*/520.bss521.balign 4522SYM_DATA_START_LOCAL(boot_stack)523.fill BOOT_STACK_SIZE, 1, 0524.balign 16525SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)526527/*528* Space for page tables (not in .bss so not zeroed)529*/530.section ".pgtable","aw",@nobits531.balign 4096532SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0)533534/*535* The page table is going to be used instead of page table in the trampoline536* memory.537*/538SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0)539540541