Path: blob/master/arch/powerpc/include/asm/book3s/64/radix.h
26519 views
/* SPDX-License-Identifier: GPL-2.0 */1#ifndef _ASM_POWERPC_PGTABLE_RADIX_H2#define _ASM_POWERPC_PGTABLE_RADIX_H34#include <asm/asm-const.h>56#ifndef __ASSEMBLY__7#include <asm/cmpxchg.h>8#endif910#ifdef CONFIG_PPC_64K_PAGES11#include <asm/book3s/64/radix-64k.h>12#else13#include <asm/book3s/64/radix-4k.h>14#endif1516#ifndef __ASSEMBLY__17#include <asm/book3s/64/tlbflush-radix.h>18#include <asm/cpu_has_feature.h>19#endif2021/* An empty PTE can still have a R or C writeback */22#define RADIX_PTE_NONE_MASK (_PAGE_DIRTY | _PAGE_ACCESSED)2324/* Bits to set in a RPMD/RPUD/RPGD */25#define RADIX_PMD_VAL_BITS (0x8000000000000000UL | RADIX_PTE_INDEX_SIZE)26#define RADIX_PUD_VAL_BITS (0x8000000000000000UL | RADIX_PMD_INDEX_SIZE)27#define RADIX_PGD_VAL_BITS (0x8000000000000000UL | RADIX_PUD_INDEX_SIZE)2829/* Don't have anything in the reserved bits and leaf bits */30#define RADIX_PMD_BAD_BITS 0x60000000000000e0UL31#define RADIX_PUD_BAD_BITS 0x60000000000000e0UL32#define RADIX_P4D_BAD_BITS 0x60000000000000e0UL3334#define RADIX_PMD_SHIFT (PAGE_SHIFT + RADIX_PTE_INDEX_SIZE)35#define RADIX_PUD_SHIFT (RADIX_PMD_SHIFT + RADIX_PMD_INDEX_SIZE)36#define RADIX_PGD_SHIFT (RADIX_PUD_SHIFT + RADIX_PUD_INDEX_SIZE)3738#define R_PTRS_PER_PTE (1 << RADIX_PTE_INDEX_SIZE)39#define R_PTRS_PER_PMD (1 << RADIX_PMD_INDEX_SIZE)40#define R_PTRS_PER_PUD (1 << RADIX_PUD_INDEX_SIZE)4142/*43* Size of EA range mapped by our pagetables.44*/45#define RADIX_PGTABLE_EADDR_SIZE (RADIX_PTE_INDEX_SIZE + RADIX_PMD_INDEX_SIZE + \46RADIX_PUD_INDEX_SIZE + RADIX_PGD_INDEX_SIZE + PAGE_SHIFT)47#define RADIX_PGTABLE_RANGE (ASM_CONST(1) << RADIX_PGTABLE_EADDR_SIZE)4849/*50* We support 52 bit address space, Use top bit for kernel51* virtual mapping. Also make sure kernel fit in the top52* quadrant.53*54* +------------------+55* +------------------+ Kernel virtual map (0xc008000000000000)56* | |57* | |58* | |59* 0b11......+------------------+ Kernel linear map (0xc....)60* | |61* | 2 quadrant |62* | |63* 0b10......+------------------+64* | |65* | 1 quadrant |66* | |67* 0b01......+------------------+68* | |69* | 0 quadrant |70* | |71* 0b00......+------------------+72*73*74* 3rd quadrant expanded:75* +------------------------------+ Highest address (0xc010000000000000)76* +------------------------------+ KASAN shadow end (0xc00fc00000000000)77* | |78* | |79* +------------------------------+ Kernel vmemmap end/shadow start (0xc00e000000000000)80* | |81* | 512TB |82* | |83* +------------------------------+ Kernel IO map end/vmemap start84* | |85* | 512TB |86* | |87* +------------------------------+ Kernel vmap end/ IO map start88* | |89* | 512TB |90* | |91* +------------------------------+ Kernel virt start (0xc008000000000000)92* | |93* | |94* | |95* +------------------------------+ Kernel linear (0xc.....)96*/9798/* For the sizes of the shadow area, see kasan.h */99100/*101* If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS102* if we increase SECTIONS_WIDTH we will not store node details in page->flags and103* page_to_nid does a page->section->node lookup104* Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce105* memory requirements with large number of sections.106* 51 bits is the max physical real address on POWER9107*/108109#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)110#define R_MAX_PHYSMEM_BITS 51111#else112#define R_MAX_PHYSMEM_BITS 46113#endif114115#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)116/*117* 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick118* the same value as hash.119*/120#define RADIX_KERN_MAP_SIZE (1UL << 49)121122#define RADIX_VMALLOC_START RADIX_KERN_VIRT_START123#define RADIX_VMALLOC_SIZE RADIX_KERN_MAP_SIZE124#define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE)125126#define RADIX_KERN_IO_START RADIX_VMALLOC_END127#define RADIX_KERN_IO_SIZE RADIX_KERN_MAP_SIZE128#define RADIX_KERN_IO_END (RADIX_KERN_IO_START + RADIX_KERN_IO_SIZE)129130#define RADIX_VMEMMAP_START RADIX_KERN_IO_END131#define RADIX_VMEMMAP_SIZE RADIX_KERN_MAP_SIZE132#define RADIX_VMEMMAP_END (RADIX_VMEMMAP_START + RADIX_VMEMMAP_SIZE)133134#ifndef __ASSEMBLY__135#define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE)136#define RADIX_PMD_TABLE_SIZE (sizeof(pmd_t) << RADIX_PMD_INDEX_SIZE)137#define RADIX_PUD_TABLE_SIZE (sizeof(pud_t) << RADIX_PUD_INDEX_SIZE)138#define RADIX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE)139140#ifdef CONFIG_STRICT_KERNEL_RWX141extern void radix__mark_rodata_ro(void);142extern void radix__mark_initmem_nx(void);143#endif144145extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,146pte_t entry, unsigned long address,147int psize);148149extern void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,150unsigned long addr, pte_t *ptep,151pte_t old_pte, pte_t pte);152153static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,154unsigned long set)155{156__be64 old_be, tmp_be;157158__asm__ __volatile__(159"1: ldarx %0,0,%3 # pte_update\n"160" andc %1,%0,%5 \n"161" or %1,%1,%4 \n"162" stdcx. %1,0,%3 \n"163" bne- 1b"164: "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)165: "r" (ptep), "r" (cpu_to_be64(set)), "r" (cpu_to_be64(clr))166: "cc" );167168return be64_to_cpu(old_be);169}170171static inline unsigned long radix__pte_update(struct mm_struct *mm,172unsigned long addr,173pte_t *ptep, unsigned long clr,174unsigned long set,175int huge)176{177unsigned long old_pte;178179old_pte = __radix_pte_update(ptep, clr, set);180if (!huge)181assert_pte_locked(mm, addr);182183return old_pte;184}185186static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,187unsigned long addr,188pte_t *ptep, int full)189{190unsigned long old_pte;191192if (full) {193old_pte = pte_val(*ptep);194*ptep = __pte(0);195} else196old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0);197198return __pte(old_pte);199}200201static inline int radix__pte_same(pte_t pte_a, pte_t pte_b)202{203return ((pte_raw(pte_a) ^ pte_raw(pte_b)) == 0);204}205206static inline int radix__pte_none(pte_t pte)207{208return (pte_val(pte) & ~RADIX_PTE_NONE_MASK) == 0;209}210211static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr,212pte_t *ptep, pte_t pte, int percpu)213{214*ptep = pte;215216/*217* The architecture suggests a ptesync after setting the pte, which218* orders the store that updates the pte with subsequent page table219* walk accesses which may load the pte. Without this it may be220* possible for a subsequent access to result in spurious fault.221*222* This is not necessary for correctness, because a spurious fault223* is tolerated by the page fault handler, and this store will224* eventually be seen. In testing, there was no noticable increase225* in user faults on POWER9. Avoiding ptesync here is a significant226* win for things like fork. If a future microarchitecture benefits227* from ptesync, it should probably go into update_mmu_cache, rather228* than set_pte_at (which is used to set ptes unrelated to faults).229*230* Spurious faults from the kernel memory are not tolerated, so there231* is a ptesync in flush_cache_vmap, and __map_kernel_page() follows232* the pte update sequence from ISA Book III 6.10 Translation Table233* Update Synchronization Requirements.234*/235}236237static inline int radix__pmd_bad(pmd_t pmd)238{239return !!(pmd_val(pmd) & RADIX_PMD_BAD_BITS);240}241242static inline int radix__pmd_same(pmd_t pmd_a, pmd_t pmd_b)243{244return ((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) == 0);245}246247static inline int radix__pud_bad(pud_t pud)248{249return !!(pud_val(pud) & RADIX_PUD_BAD_BITS);250}251252static inline int radix__pud_same(pud_t pud_a, pud_t pud_b)253{254return ((pud_raw(pud_a) ^ pud_raw(pud_b)) == 0);255}256257static inline int radix__p4d_bad(p4d_t p4d)258{259return !!(p4d_val(p4d) & RADIX_P4D_BAD_BITS);260}261262#ifdef CONFIG_TRANSPARENT_HUGEPAGE263264static inline int radix__pmd_trans_huge(pmd_t pmd)265{266return (pmd_val(pmd) & _PAGE_PTE) == _PAGE_PTE;267}268269static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)270{271return __pmd(pmd_val(pmd) | _PAGE_PTE);272}273274static inline int radix__pud_trans_huge(pud_t pud)275{276return (pud_val(pud) & _PAGE_PTE) == _PAGE_PTE;277}278279static inline pud_t radix__pud_mkhuge(pud_t pud)280{281return __pud(pud_val(pud) | _PAGE_PTE);282}283284extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,285pmd_t *pmdp, unsigned long clr,286unsigned long set);287extern unsigned long radix__pud_hugepage_update(struct mm_struct *mm, unsigned long addr,288pud_t *pudp, unsigned long clr,289unsigned long set);290extern pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma,291unsigned long address, pmd_t *pmdp);292extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,293pgtable_t pgtable);294extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);295extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,296unsigned long addr, pmd_t *pmdp);297pud_t radix__pudp_huge_get_and_clear(struct mm_struct *mm,298unsigned long addr, pud_t *pudp);299300static inline int radix__has_transparent_hugepage(void)301{302/* For radix 2M at PMD level means thp */303if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)304return 1;305return 0;306}307308static inline int radix__has_transparent_pud_hugepage(void)309{310/* For radix 1G at PUD level means pud hugepage support */311if (mmu_psize_defs[MMU_PAGE_1G].shift == PUD_SHIFT)312return 1;313return 0;314}315#endif316317struct vmem_altmap;318struct dev_pagemap;319extern int __meminit radix__vmemmap_create_mapping(unsigned long start,320unsigned long page_size,321unsigned long phys);322int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end,323int node, struct vmem_altmap *altmap);324void __ref radix__vmemmap_free(unsigned long start, unsigned long end,325struct vmem_altmap *altmap);326extern void radix__vmemmap_remove_mapping(unsigned long start,327unsigned long page_size);328329extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,330pgprot_t flags, unsigned int psz);331332static inline unsigned long radix__get_tree_size(void)333{334unsigned long rts_field;335/*336* We support 52 bits, hence:337* bits 52 - 31 = 21, 0b10101338* RTS encoding details339* bits 0 - 3 of rts -> bits 6 - 8 unsigned long340* bits 4 - 5 of rts -> bits 62 - 63 of unsigned long341*/342rts_field = (0x5UL << 5); /* 6 - 8 bits */343rts_field |= (0x2UL << 61);344345return rts_field;346}347348#ifdef CONFIG_MEMORY_HOTPLUG349int radix__create_section_mapping(unsigned long start, unsigned long end,350int nid, pgprot_t prot);351int radix__remove_section_mapping(unsigned long start, unsigned long end);352#endif /* CONFIG_MEMORY_HOTPLUG */353354#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP355#define vmemmap_can_optimize vmemmap_can_optimize356bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap);357#endif358359#define vmemmap_populate_compound_pages vmemmap_populate_compound_pages360int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,361unsigned long start,362unsigned long end, int node,363struct dev_pagemap *pgmap);364#endif /* __ASSEMBLY__ */365#endif366367368