// SPDX-License-Identifier: GPL-2.012#include <linux/init.h>3#include <linux/linkage.h>4#include <linux/types.h>5#include <linux/kernel.h>6#include <linux/pgtable.h>78#include <asm/init.h>9#include <asm/sections.h>10#include <asm/setup.h>11#include <asm/sev.h>1213extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];14extern unsigned int next_early_pgt;1516static inline bool check_la57_support(void)17{18/*19* 5-level paging is detected and enabled at kernel decompression20* stage. Only check if it has been enabled there.21*/22if (!(native_read_cr4() & X86_CR4_LA57))23return false;2425__pgtable_l5_enabled = 1;26pgdir_shift = 48;27ptrs_per_p4d = 512;2829return true;30}3132static unsigned long __head sme_postprocess_startup(struct boot_params *bp,33pmdval_t *pmd,34unsigned long p2v_offset)35{36unsigned long paddr, paddr_end;37int i;3839/* Encrypt the kernel and related (if SME is active) */40sme_encrypt_kernel(bp);4142/*43* Clear the memory encryption mask from the .bss..decrypted section.44* The bss section will be memset to zero later in the initialization so45* there is no need to zero it after changing the memory encryption46* attribute.47*/48if (sme_get_me_mask()) {49paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted);50paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted);5152for (; paddr < paddr_end; paddr += PMD_SIZE) {53/*54* On SNP, transition the page to shared in the RMP table so that55* it is consistent with the page table attribute change.56*57* __start_bss_decrypted has a virtual address in the high range58* mapping (kernel .text). PVALIDATE, by way of59* early_snp_set_memory_shared(), requires a valid virtual60* address but the kernel is currently running off of the identity61* mapping so use the PA to get a *currently* valid virtual address.62*/63early_snp_set_memory_shared(paddr, paddr, PTRS_PER_PMD);6465i = pmd_index(paddr - p2v_offset);66pmd[i] -= sme_get_me_mask();67}68}6970/*71* Return the SME encryption mask (if SME is active) to be used as a72* modifier for the initial pgdir entry programmed into CR3.73*/74return sme_get_me_mask();75}7677/*78* This code is compiled using PIC codegen because it will execute from the79* early 1:1 mapping of memory, which deviates from the mapping expected by the80* linker. Due to this deviation, taking the address of a global variable will81* produce an ambiguous result when using the plain & operator. Instead,82* rip_rel_ptr() must be used, which will return the RIP-relative address in83* the 1:1 mapping of memory. Kernel virtual addresses can be determined by84* subtracting p2v_offset from the RIP-relative address.85*/86unsigned long __head __startup_64(unsigned long p2v_offset,87struct boot_params *bp)88{89pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts);90unsigned long physaddr = (unsigned long)rip_rel_ptr(_text);91unsigned long va_text, va_end;92unsigned long pgtable_flags;93unsigned long load_delta;94pgdval_t *pgd;95p4dval_t *p4d;96pudval_t *pud;97pmdval_t *pmd, pmd_entry;98bool la57;99int i;100101la57 = check_la57_support();102103/* Is the address too large? */104if (physaddr >> MAX_PHYSMEM_BITS)105for (;;);106107/*108* Compute the delta between the address I am compiled to run at109* and the address I am actually running at.110*/111phys_base = load_delta = __START_KERNEL_map + p2v_offset;112113/* Is the address not 2M aligned? */114if (load_delta & ~PMD_MASK)115for (;;);116117va_text = physaddr - p2v_offset;118va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset;119120/* Include the SME encryption mask in the fixup value */121load_delta += sme_get_me_mask();122123/* Fixup the physical addresses in the page table */124125pgd = rip_rel_ptr(early_top_pgt);126pgd[pgd_index(__START_KERNEL_map)] += load_delta;127128if (la57) {129p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt);130p4d[MAX_PTRS_PER_P4D - 1] += load_delta;131132pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;133}134135level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta;136level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta;137138for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)139level2_fixmap_pgt[i].pmd += load_delta;140141/*142* Set up the identity mapping for the switchover. These143* entries should *NOT* have the global bit set! This also144* creates a bunch of nonsense entries but that is fine --145* it avoids problems around wraparound.146*/147148pud = &early_pgts[0]->pmd;149pmd = &early_pgts[1]->pmd;150next_early_pgt = 2;151152pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();153154if (la57) {155p4d = &early_pgts[next_early_pgt++]->pmd;156157i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;158pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;159pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;160161i = physaddr >> P4D_SHIFT;162p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;163p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;164} else {165i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;166pgd[i + 0] = (pgdval_t)pud + pgtable_flags;167pgd[i + 1] = (pgdval_t)pud + pgtable_flags;168}169170i = physaddr >> PUD_SHIFT;171pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;172pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;173174pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;175pmd_entry += sme_get_me_mask();176pmd_entry += physaddr;177178for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) {179int idx = i + (physaddr >> PMD_SHIFT);180181pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;182}183184/*185* Fixup the kernel text+data virtual addresses. Note that186* we might write invalid pmds, when the kernel is relocated187* cleanup_highmap() fixes this up along with the mappings188* beyond _end.189*190* Only the region occupied by the kernel image has so far191* been checked against the table of usable memory regions192* provided by the firmware, so invalidate pages outside that193* region. A page table entry that maps to a reserved area of194* memory would allow processor speculation into that area,195* and on some hardware (particularly the UV platform) even196* speculative access to some reserved areas is caught as an197* error, causing the BIOS to halt the system.198*/199200pmd = rip_rel_ptr(level2_kernel_pgt);201202/* invalidate pages before the kernel image */203for (i = 0; i < pmd_index(va_text); i++)204pmd[i] &= ~_PAGE_PRESENT;205206/* fixup pages that are part of the kernel image */207for (; i <= pmd_index(va_end); i++)208if (pmd[i] & _PAGE_PRESENT)209pmd[i] += load_delta;210211/* invalidate pages after the kernel image */212for (; i < PTRS_PER_PMD; i++)213pmd[i] &= ~_PAGE_PRESENT;214215return sme_postprocess_startup(bp, pmd, p2v_offset);216}217218219