/*1* linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit2*3* Copyright (C) 2000 Andrea Arcangeli <[email protected]> SuSE4* Copyright (C) 2000 Pavel Machek <[email protected]>5* Copyright (C) 2000 Karsten Keil <[email protected]>6* Copyright (C) 2001,2002 Andi Kleen <[email protected]>7* Copyright (C) 2005 Eric Biederman <[email protected]>8*/91011#include <linux/linkage.h>12#include <linux/threads.h>13#include <linux/init.h>14#include <asm/segment.h>15#include <asm/pgtable.h>16#include <asm/page.h>17#include <asm/msr.h>18#include <asm/cache.h>19#include <asm/processor-flags.h>20#include <asm/percpu.h>2122#ifdef CONFIG_PARAVIRT23#include <asm/asm-offsets.h>24#include <asm/paravirt.h>25#else26#define GET_CR2_INTO_RCX movq %cr2, %rcx27#endif2829/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE30* because we need identity-mapped pages.31*32*/3334#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))3536L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)37L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)38L4_START_KERNEL = pgd_index(__START_KERNEL_map)39L3_START_KERNEL = pud_index(__START_KERNEL_map)4041.text42__HEAD43.code6444.globl startup_6445startup_64:4647/*48* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,49* and someone has loaded an identity mapped page table50* for us. These identity mapped page tables map all of the51* kernel pages and possibly all of memory.52*53* %esi holds a physical pointer to real_mode_data.54*55* We come here either directly from a 64bit bootloader, or from56* arch/x86_64/boot/compressed/head.S.57*58* We only come here initially at boot nothing else comes here.59*60* Since we may be loaded at an address different from what we were61* compiled to run at we first fixup the physical addresses in our page62* tables and then reload them.63*/6465/* Compute the delta between the address I am compiled to run at and the66* address I am actually running at.67*/68leaq _text(%rip), %rbp69subq $_text - __START_KERNEL_map, %rbp7071/* Is the address not 2M aligned? */72movq %rbp, %rax73andl $~PMD_PAGE_MASK, %eax74testl %eax, %eax75jnz bad_address7677/* Is the address too large? */78leaq _text(%rip), %rdx79movq $PGDIR_SIZE, %rax80cmpq %rax, %rdx81jae bad_address8283/* Fixup the physical addresses in the page table84*/85addq %rbp, init_level4_pgt + 0(%rip)86addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip)87addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip)8889addq %rbp, level3_ident_pgt + 0(%rip)9091addq %rbp, level3_kernel_pgt + (510*8)(%rip)92addq %rbp, level3_kernel_pgt + (511*8)(%rip)9394addq %rbp, level2_fixmap_pgt + (506*8)(%rip)9596/* Add an Identity mapping if I am above 1G */97leaq _text(%rip), %rdi98andq $PMD_PAGE_MASK, %rdi99100movq %rdi, %rax101shrq $PUD_SHIFT, %rax102andq $(PTRS_PER_PUD - 1), %rax103jz ident_complete104105leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx106leaq level3_ident_pgt(%rip), %rbx107movq %rdx, 0(%rbx, %rax, 8)108109movq %rdi, %rax110shrq $PMD_SHIFT, %rax111andq $(PTRS_PER_PMD - 1), %rax112leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx113leaq level2_spare_pgt(%rip), %rbx114movq %rdx, 0(%rbx, %rax, 8)115ident_complete:116117/*118* Fixup the kernel text+data virtual addresses. Note that119* we might write invalid pmds, when the kernel is relocated120* cleanup_highmap() fixes this up along with the mappings121* beyond _end.122*/123124leaq level2_kernel_pgt(%rip), %rdi125leaq 4096(%rdi), %r8126/* See if it is a valid page table entry */1271: testq $1, 0(%rdi)128jz 2f129addq %rbp, 0(%rdi)130/* Go to the next page */1312: addq $8, %rdi132cmp %r8, %rdi133jne 1b134135/* Fixup phys_base */136addq %rbp, phys_base(%rip)137138/* Fixup trampoline */139addq %rbp, trampoline_level4_pgt + 0(%rip)140addq %rbp, trampoline_level4_pgt + (511*8)(%rip)141142/* Due to ENTRY(), sometimes the empty space gets filled with143* zeros. Better take a jmp than relying on empty space being144* filled with 0x90 (nop)145*/146jmp secondary_startup_64147ENTRY(secondary_startup_64)148/*149* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,150* and someone has loaded a mapped page table.151*152* %esi holds a physical pointer to real_mode_data.153*154* We come here either from startup_64 (using physical addresses)155* or from trampoline.S (using virtual addresses).156*157* Using virtual addresses from trampoline.S removes the need158* to have any identity mapped pages in the kernel page table159* after the boot processor executes this code.160*/161162/* Enable PAE mode and PGE */163movl $(X86_CR4_PAE | X86_CR4_PGE), %eax164movq %rax, %cr4165166/* Setup early boot stage 4 level pagetables. */167movq $(init_level4_pgt - __START_KERNEL_map), %rax168addq phys_base(%rip), %rax169movq %rax, %cr3170171/* Ensure I am executing from virtual addresses */172movq $1f, %rax173jmp *%rax1741:175176/* Check if nx is implemented */177movl $0x80000001, %eax178cpuid179movl %edx,%edi180181/* Setup EFER (Extended Feature Enable Register) */182movl $MSR_EFER, %ecx183rdmsr184btsl $_EFER_SCE, %eax /* Enable System Call */185btl $20,%edi /* No Execute supported? */186jnc 1f187btsl $_EFER_NX, %eax1881: wrmsr /* Make changes effective */189190/* Setup cr0 */191#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \192X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \193X86_CR0_PG)194movl $CR0_STATE, %eax195/* Make changes effective */196movq %rax, %cr0197198/* Setup a boot time stack */199movq stack_start(%rip),%rsp200201/* zero EFLAGS after setting rsp */202pushq $0203popfq204205/*206* We must switch to a new descriptor in kernel space for the GDT207* because soon the kernel won't have access anymore to the userspace208* addresses where we're currently running on. We have to do that here209* because in 32bit we couldn't load a 64bit linear address.210*/211lgdt early_gdt_descr(%rip)212213/* set up data segments */214xorl %eax,%eax215movl %eax,%ds216movl %eax,%ss217movl %eax,%es218219/*220* We don't really need to load %fs or %gs, but load them anyway221* to kill any stale realmode selectors. This allows execution222* under VT hardware.223*/224movl %eax,%fs225movl %eax,%gs226227/* Set up %gs.228*229* The base of %gs always points to the bottom of the irqstack230* union. If the stack protector canary is enabled, it is231* located at %gs:40. Note that, on SMP, the boot cpu uses232* init data section till per cpu areas are set up.233*/234movl $MSR_GS_BASE,%ecx235movl initial_gs(%rip),%eax236movl initial_gs+4(%rip),%edx237wrmsr238239/* esi is pointer to real mode structure with interesting info.240pass it to C */241movl %esi, %edi242243/* Finally jump to run C code and to be on real kernel address244* Since we are running on identity-mapped space we have to jump245* to the full 64bit address, this is only possible as indirect246* jump. In addition we need to ensure %cs is set so we make this247* a far return.248*/249movq initial_code(%rip),%rax250pushq $0 # fake return address to stop unwinder251pushq $__KERNEL_CS # set correct cs252pushq %rax # target address in negative space253lretq254255/* SMP bootup changes these two */256__REFDATA257.align 8258ENTRY(initial_code)259.quad x86_64_start_kernel260ENTRY(initial_gs)261.quad INIT_PER_CPU_VAR(irq_stack_union)262263ENTRY(stack_start)264.quad init_thread_union+THREAD_SIZE-8265.word 0266__FINITDATA267268bad_address:269jmp bad_address270271.section ".init.text","ax"272#ifdef CONFIG_EARLY_PRINTK273.globl early_idt_handlers274early_idt_handlers:275i = 0276.rept NUM_EXCEPTION_VECTORS277movl $i, %esi278jmp early_idt_handler279i = i + 1280.endr281#endif282283ENTRY(early_idt_handler)284#ifdef CONFIG_EARLY_PRINTK285cmpl $2,early_recursion_flag(%rip)286jz 1f287incl early_recursion_flag(%rip)288GET_CR2_INTO_RCX289movq %rcx,%r9290xorl %r8d,%r8d # zero for error code291movl %esi,%ecx # get vector number292# Test %ecx against mask of vectors that push error code.293cmpl $31,%ecx294ja 0f295movl $1,%eax296salq %cl,%rax297testl $0x27d00,%eax298je 0f299popq %r8 # get error code3000: movq 0(%rsp),%rcx # get ip301movq 8(%rsp),%rdx # get cs302xorl %eax,%eax303leaq early_idt_msg(%rip),%rdi304call early_printk305cmpl $2,early_recursion_flag(%rip)306jz 1f307call dump_stack308#ifdef CONFIG_KALLSYMS309leaq early_idt_ripmsg(%rip),%rdi310movq 0(%rsp),%rsi # get rip again311call __print_symbol312#endif313#endif /* EARLY_PRINTK */3141: hlt315jmp 1b316317#ifdef CONFIG_EARLY_PRINTK318early_recursion_flag:319.long 0320321early_idt_msg:322.asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"323early_idt_ripmsg:324.asciz "RIP %s\n"325#endif /* CONFIG_EARLY_PRINTK */326.previous327328#define NEXT_PAGE(name) \329.balign PAGE_SIZE; \330ENTRY(name)331332/* Automate the creation of 1 to 1 mapping pmd entries */333#define PMDS(START, PERM, COUNT) \334i = 0 ; \335.rept (COUNT) ; \336.quad (START) + (i << PMD_SHIFT) + (PERM) ; \337i = i + 1 ; \338.endr339340.data341/*342* This default setting generates an ident mapping at address 0x100000343* and a mapping for the kernel that precisely maps virtual address344* 0xffffffff80000000 to physical address 0x000000. (always using345* 2Mbyte large pages provided by PAE mode)346*/347NEXT_PAGE(init_level4_pgt)348.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE349.org init_level4_pgt + L4_PAGE_OFFSET*8, 0350.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE351.org init_level4_pgt + L4_START_KERNEL*8, 0352/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */353.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE354355NEXT_PAGE(level3_ident_pgt)356.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE357.fill 511,8,0358359NEXT_PAGE(level3_kernel_pgt)360.fill L3_START_KERNEL,8,0361/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */362.quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE363.quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE364365NEXT_PAGE(level2_fixmap_pgt)366.fill 506,8,0367.quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE368/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */369.fill 5,8,0370371NEXT_PAGE(level1_fixmap_pgt)372.fill 512,8,0373374NEXT_PAGE(level2_ident_pgt)375/* Since I easily can, map the first 1G.376* Don't set NX because code runs from these pages.377*/378PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)379380NEXT_PAGE(level2_kernel_pgt)381/*382* 512 MB kernel mapping. We spend a full page on this pagetable383* anyway.384*385* The kernel code+data+bss must not be bigger than that.386*387* (NOTE: at +512MB starts the module area, see MODULES_VADDR.388* If you want to increase this then increase MODULES_VADDR389* too.)390*/391PMDS(0, __PAGE_KERNEL_LARGE_EXEC,392KERNEL_IMAGE_SIZE/PMD_SIZE)393394NEXT_PAGE(level2_spare_pgt)395.fill 512, 8, 0396397#undef PMDS398#undef NEXT_PAGE399400.data401.align 16402.globl early_gdt_descr403early_gdt_descr:404.word GDT_ENTRIES*8-1405early_gdt_descr_base:406.quad INIT_PER_CPU_VAR(gdt_page)407408ENTRY(phys_base)409/* This must match the first entry in level2_kernel_pgt */410.quad 0x0000000000000000411412#include "../../x86/xen/xen-head.S"413414.section .bss, "aw", @nobits415.align L1_CACHE_BYTES416ENTRY(idt_table)417.skip IDT_ENTRIES * 16418419__PAGE_ALIGNED_BSS420.align PAGE_SIZE421ENTRY(empty_zero_page)422.skip PAGE_SIZE423424425