Path: blob/master/arch/arm64/kvm/hyp/nvhe/mem_protect.c
26516 views
// SPDX-License-Identifier: GPL-2.0-only1/*2* Copyright (C) 2020 Google LLC3* Author: Quentin Perret <[email protected]>4*/56#include <linux/kvm_host.h>7#include <asm/kvm_emulate.h>8#include <asm/kvm_hyp.h>9#include <asm/kvm_mmu.h>10#include <asm/kvm_pgtable.h>11#include <asm/kvm_pkvm.h>12#include <asm/stage2_pgtable.h>1314#include <hyp/fault.h>1516#include <nvhe/gfp.h>17#include <nvhe/memory.h>18#include <nvhe/mem_protect.h>19#include <nvhe/mm.h>2021#define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP)2223struct host_mmu host_mmu;2425static struct hyp_pool host_s2_pool;2627static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);28#define current_vm (*this_cpu_ptr(&__current_vm))2930static void guest_lock_component(struct pkvm_hyp_vm *vm)31{32hyp_spin_lock(&vm->lock);33current_vm = vm;34}3536static void guest_unlock_component(struct pkvm_hyp_vm *vm)37{38current_vm = NULL;39hyp_spin_unlock(&vm->lock);40}4142static void host_lock_component(void)43{44hyp_spin_lock(&host_mmu.lock);45}4647static void host_unlock_component(void)48{49hyp_spin_unlock(&host_mmu.lock);50}5152static void hyp_lock_component(void)53{54hyp_spin_lock(&pkvm_pgd_lock);55}5657static void hyp_unlock_component(void)58{59hyp_spin_unlock(&pkvm_pgd_lock);60}6162#define for_each_hyp_page(__p, __st, __sz) \63for (struct hyp_page *__p = hyp_phys_to_page(__st), \64*__e = __p + ((__sz) >> PAGE_SHIFT); \65__p < __e; __p++)6667static void *host_s2_zalloc_pages_exact(size_t size)68{69void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));7071hyp_split_page(hyp_virt_to_page(addr));7273/*74* The size of concatenated PGDs is always a power of two of PAGE_SIZE,75* so there should be no need to free any of the tail pages to make the76* allocation exact.77*/78WARN_ON(size != (PAGE_SIZE << get_order(size)));7980return addr;81}8283static void *host_s2_zalloc_page(void *pool)84{85return hyp_alloc_pages(pool, 0);86}8788static void host_s2_get_page(void *addr)89{90hyp_get_page(&host_s2_pool, addr);91}9293static void host_s2_put_page(void *addr)94{95hyp_put_page(&host_s2_pool, addr);96}9798static void host_s2_free_unlinked_table(void *addr, s8 level)99{100kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);101}102103static int prepare_s2_pool(void *pgt_pool_base)104{105unsigned long nr_pages, pfn;106int ret;107108pfn = hyp_virt_to_pfn(pgt_pool_base);109nr_pages = host_s2_pgtable_pages();110ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);111if (ret)112return ret;113114host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {115.zalloc_pages_exact = host_s2_zalloc_pages_exact,116.zalloc_page = host_s2_zalloc_page,117.free_unlinked_table = host_s2_free_unlinked_table,118.phys_to_virt = hyp_phys_to_virt,119.virt_to_phys = hyp_virt_to_phys,120.page_count = hyp_page_count,121.get_page = host_s2_get_page,122.put_page = host_s2_put_page,123};124125return 0;126}127128static void prepare_host_vtcr(void)129{130u32 parange, phys_shift;131132/* The host stage 2 is id-mapped, so use parange for T0SZ */133parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);134phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);135136host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,137id_aa64mmfr1_el1_sys_val, phys_shift);138}139140static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);141142int kvm_host_prepare_stage2(void *pgt_pool_base)143{144struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;145int ret;146147prepare_host_vtcr();148hyp_spin_lock_init(&host_mmu.lock);149mmu->arch = &host_mmu.arch;150151ret = prepare_s2_pool(pgt_pool_base);152if (ret)153return ret;154155ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,156&host_mmu.mm_ops, KVM_HOST_S2_FLAGS,157host_stage2_force_pte_cb);158if (ret)159return ret;160161mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);162mmu->pgt = &host_mmu.pgt;163atomic64_set(&mmu->vmid.id, 0);164165return 0;166}167168static void *guest_s2_zalloc_pages_exact(size_t size)169{170void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size));171172WARN_ON(size != (PAGE_SIZE << get_order(size)));173hyp_split_page(hyp_virt_to_page(addr));174175return addr;176}177178static void guest_s2_free_pages_exact(void *addr, unsigned long size)179{180u8 order = get_order(size);181unsigned int i;182183for (i = 0; i < (1 << order); i++)184hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE));185}186187static void *guest_s2_zalloc_page(void *mc)188{189struct hyp_page *p;190void *addr;191192addr = hyp_alloc_pages(¤t_vm->pool, 0);193if (addr)194return addr;195196addr = pop_hyp_memcache(mc, hyp_phys_to_virt);197if (!addr)198return addr;199200memset(addr, 0, PAGE_SIZE);201p = hyp_virt_to_page(addr);202p->refcount = 1;203p->order = 0;204205return addr;206}207208static void guest_s2_get_page(void *addr)209{210hyp_get_page(¤t_vm->pool, addr);211}212213static void guest_s2_put_page(void *addr)214{215hyp_put_page(¤t_vm->pool, addr);216}217218static void __apply_guest_page(void *va, size_t size,219void (*func)(void *addr, size_t size))220{221size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE);222va = PTR_ALIGN_DOWN(va, PAGE_SIZE);223size = PAGE_ALIGN(size);224225while (size) {226size_t map_size = PAGE_SIZE;227void *map;228229if (IS_ALIGNED((unsigned long)va, PMD_SIZE) && size >= PMD_SIZE)230map = hyp_fixblock_map(__hyp_pa(va), &map_size);231else232map = hyp_fixmap_map(__hyp_pa(va));233234func(map, map_size);235236if (map_size == PMD_SIZE)237hyp_fixblock_unmap();238else239hyp_fixmap_unmap();240241size -= map_size;242va += map_size;243}244}245246static void clean_dcache_guest_page(void *va, size_t size)247{248__apply_guest_page(va, size, __clean_dcache_guest_page);249}250251static void invalidate_icache_guest_page(void *va, size_t size)252{253__apply_guest_page(va, size, __invalidate_icache_guest_page);254}255256int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)257{258struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;259unsigned long nr_pages;260int ret;261262nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;263ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);264if (ret)265return ret;266267hyp_spin_lock_init(&vm->lock);268vm->mm_ops = (struct kvm_pgtable_mm_ops) {269.zalloc_pages_exact = guest_s2_zalloc_pages_exact,270.free_pages_exact = guest_s2_free_pages_exact,271.zalloc_page = guest_s2_zalloc_page,272.phys_to_virt = hyp_phys_to_virt,273.virt_to_phys = hyp_virt_to_phys,274.page_count = hyp_page_count,275.get_page = guest_s2_get_page,276.put_page = guest_s2_put_page,277.dcache_clean_inval_poc = clean_dcache_guest_page,278.icache_inval_pou = invalidate_icache_guest_page,279};280281guest_lock_component(vm);282ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, NULL);283guest_unlock_component(vm);284if (ret)285return ret;286287vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);288289return 0;290}291292void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)293{294struct hyp_page *page;295void *addr;296297/* Dump all pgtable pages in the hyp_pool */298guest_lock_component(vm);299kvm_pgtable_stage2_destroy(&vm->pgt);300vm->kvm.arch.mmu.pgd_phys = 0ULL;301guest_unlock_component(vm);302303/* Drain the hyp_pool into the memcache */304addr = hyp_alloc_pages(&vm->pool, 0);305while (addr) {306page = hyp_virt_to_page(addr);307page->refcount = 0;308page->order = 0;309push_hyp_memcache(mc, addr, hyp_virt_to_phys);310WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));311addr = hyp_alloc_pages(&vm->pool, 0);312}313}314315int __pkvm_prot_finalize(void)316{317struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;318struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);319320if (params->hcr_el2 & HCR_VM)321return -EPERM;322323params->vttbr = kvm_get_vttbr(mmu);324params->vtcr = mmu->vtcr;325params->hcr_el2 |= HCR_VM;326327/*328* The CMO below not only cleans the updated params to the329* PoC, but also provides the DSB that ensures ongoing330* page-table walks that have started before we trapped to EL2331* have completed.332*/333kvm_flush_dcache_to_poc(params, sizeof(*params));334335write_sysreg_hcr(params->hcr_el2);336__load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);337338/*339* Make sure to have an ISB before the TLB maintenance below but only340* when __load_stage2() doesn't include one already.341*/342asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));343344/* Invalidate stale HCR bits that may be cached in TLBs */345__tlbi(vmalls12e1);346dsb(nsh);347isb();348349return 0;350}351352static int host_stage2_unmap_dev_all(void)353{354struct kvm_pgtable *pgt = &host_mmu.pgt;355struct memblock_region *reg;356u64 addr = 0;357int i, ret;358359/* Unmap all non-memory regions to recycle the pages */360for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) {361reg = &hyp_memory[i];362ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr);363if (ret)364return ret;365}366return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);367}368369struct kvm_mem_range {370u64 start;371u64 end;372};373374static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)375{376int cur, left = 0, right = hyp_memblock_nr;377struct memblock_region *reg;378phys_addr_t end;379380range->start = 0;381range->end = ULONG_MAX;382383/* The list of memblock regions is sorted, binary search it */384while (left < right) {385cur = (left + right) >> 1;386reg = &hyp_memory[cur];387end = reg->base + reg->size;388if (addr < reg->base) {389right = cur;390range->end = reg->base;391} else if (addr >= end) {392left = cur + 1;393range->start = end;394} else {395range->start = reg->base;396range->end = end;397return reg;398}399}400401return NULL;402}403404bool addr_is_memory(phys_addr_t phys)405{406struct kvm_mem_range range;407408return !!find_mem_range(phys, &range);409}410411static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)412{413return range->start <= addr && addr < range->end;414}415416static int check_range_allowed_memory(u64 start, u64 end)417{418struct memblock_region *reg;419struct kvm_mem_range range;420421/*422* Callers can't check the state of a range that overlaps memory and423* MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.424*/425reg = find_mem_range(start, &range);426if (!is_in_mem_range(end - 1, &range))427return -EINVAL;428429if (!reg || reg->flags & MEMBLOCK_NOMAP)430return -EPERM;431432return 0;433}434435static bool range_is_memory(u64 start, u64 end)436{437struct kvm_mem_range r;438439if (!find_mem_range(start, &r))440return false;441442return is_in_mem_range(end - 1, &r);443}444445static inline int __host_stage2_idmap(u64 start, u64 end,446enum kvm_pgtable_prot prot)447{448return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,449prot, &host_s2_pool, 0);450}451452/*453* The pool has been provided with enough pages to cover all of memory with454* page granularity, but it is difficult to know how much of the MMIO range455* we will need to cover upfront, so we may need to 'recycle' the pages if we456* run out.457*/458#define host_stage2_try(fn, ...) \459({ \460int __ret; \461hyp_assert_lock_held(&host_mmu.lock); \462__ret = fn(__VA_ARGS__); \463if (__ret == -ENOMEM) { \464__ret = host_stage2_unmap_dev_all(); \465if (!__ret) \466__ret = fn(__VA_ARGS__); \467} \468__ret; \469})470471static inline bool range_included(struct kvm_mem_range *child,472struct kvm_mem_range *parent)473{474return parent->start <= child->start && child->end <= parent->end;475}476477static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)478{479struct kvm_mem_range cur;480kvm_pte_t pte;481u64 granule;482s8 level;483int ret;484485hyp_assert_lock_held(&host_mmu.lock);486ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);487if (ret)488return ret;489490if (kvm_pte_valid(pte))491return -EAGAIN;492493if (pte) {494WARN_ON(addr_is_memory(addr) &&495get_host_state(hyp_phys_to_page(addr)) != PKVM_NOPAGE);496return -EPERM;497}498499for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) {500if (!kvm_level_supports_block_mapping(level))501continue;502granule = kvm_granule_size(level);503cur.start = ALIGN_DOWN(addr, granule);504cur.end = cur.start + granule;505if (!range_included(&cur, range))506continue;507*range = cur;508return 0;509}510511WARN_ON(1);512513return -EINVAL;514}515516int host_stage2_idmap_locked(phys_addr_t addr, u64 size,517enum kvm_pgtable_prot prot)518{519return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);520}521522static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)523{524for_each_hyp_page(page, addr, size)525set_host_state(page, state);526}527528int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)529{530int ret;531532if (!range_is_memory(addr, addr + size))533return -EPERM;534535ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,536addr, size, &host_s2_pool, owner_id);537if (ret)538return ret;539540/* Don't forget to update the vmemmap tracking for the host */541if (owner_id == PKVM_ID_HOST)542__host_update_page_state(addr, size, PKVM_PAGE_OWNED);543else544__host_update_page_state(addr, size, PKVM_NOPAGE);545546return 0;547}548549static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)550{551/*552* Block mappings must be used with care in the host stage-2 as a553* kvm_pgtable_stage2_map() operation targeting a page in the range of554* an existing block will delete the block under the assumption that555* mappings in the rest of the block range can always be rebuilt lazily.556* That assumption is correct for the host stage-2 with RWX mappings557* targeting memory or RW mappings targeting MMIO ranges (see558* host_stage2_idmap() below which implements some of the host memory559* abort logic). However, this is not safe for any other mappings where560* the host stage-2 page-table is in fact the only place where this561* state is stored. In all those cases, it is safer to use page-level562* mappings, hence avoiding to lose the state because of side-effects in563* kvm_pgtable_stage2_map().564*/565if (range_is_memory(addr, end))566return prot != PKVM_HOST_MEM_PROT;567else568return prot != PKVM_HOST_MMIO_PROT;569}570571static int host_stage2_idmap(u64 addr)572{573struct kvm_mem_range range;574bool is_memory = !!find_mem_range(addr, &range);575enum kvm_pgtable_prot prot;576int ret;577578prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;579580host_lock_component();581ret = host_stage2_adjust_range(addr, &range);582if (ret)583goto unlock;584585ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);586unlock:587host_unlock_component();588589return ret;590}591592void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)593{594struct kvm_vcpu_fault_info fault;595u64 esr, addr;596int ret = 0;597598esr = read_sysreg_el2(SYS_ESR);599if (!__get_fault_info(esr, &fault)) {600/*601* We've presumably raced with a page-table change which caused602* AT to fail, try again.603*/604return;605}606607608/*609* Yikes, we couldn't resolve the fault IPA. This should reinject an610* abort into the host when we figure out how to do that.611*/612BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));613addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;614615ret = host_stage2_idmap(addr);616BUG_ON(ret && ret != -EAGAIN);617}618619struct check_walk_data {620enum pkvm_page_state desired;621enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);622};623624static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,625enum kvm_pgtable_walk_flags visit)626{627struct check_walk_data *d = ctx->arg;628629return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;630}631632static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,633struct check_walk_data *data)634{635struct kvm_pgtable_walker walker = {636.cb = __check_page_state_visitor,637.arg = data,638.flags = KVM_PGTABLE_WALK_LEAF,639};640641return kvm_pgtable_walk(pgt, addr, size, &walker);642}643644static int __host_check_page_state_range(u64 addr, u64 size,645enum pkvm_page_state state)646{647int ret;648649ret = check_range_allowed_memory(addr, addr + size);650if (ret)651return ret;652653hyp_assert_lock_held(&host_mmu.lock);654655for_each_hyp_page(page, addr, size) {656if (get_host_state(page) != state)657return -EPERM;658}659660return 0;661}662663static int __host_set_page_state_range(u64 addr, u64 size,664enum pkvm_page_state state)665{666if (get_host_state(hyp_phys_to_page(addr)) == PKVM_NOPAGE) {667int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);668669if (ret)670return ret;671}672673__host_update_page_state(addr, size, state);674675return 0;676}677678static void __hyp_set_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)679{680for_each_hyp_page(page, phys, size)681set_hyp_state(page, state);682}683684static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)685{686for_each_hyp_page(page, phys, size) {687if (get_hyp_state(page) != state)688return -EPERM;689}690691return 0;692}693694static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)695{696if (!kvm_pte_valid(pte))697return PKVM_NOPAGE;698699return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));700}701702static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr,703u64 size, enum pkvm_page_state state)704{705struct check_walk_data d = {706.desired = state,707.get_page_state = guest_get_page_state,708};709710hyp_assert_lock_held(&vm->lock);711return check_page_state_range(&vm->pgt, addr, size, &d);712}713714int __pkvm_host_share_hyp(u64 pfn)715{716u64 phys = hyp_pfn_to_phys(pfn);717u64 size = PAGE_SIZE;718int ret;719720host_lock_component();721hyp_lock_component();722723ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);724if (ret)725goto unlock;726ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);727if (ret)728goto unlock;729730__hyp_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);731WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));732733unlock:734hyp_unlock_component();735host_unlock_component();736737return ret;738}739740int __pkvm_host_unshare_hyp(u64 pfn)741{742u64 phys = hyp_pfn_to_phys(pfn);743u64 virt = (u64)__hyp_va(phys);744u64 size = PAGE_SIZE;745int ret;746747host_lock_component();748hyp_lock_component();749750ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);751if (ret)752goto unlock;753ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);754if (ret)755goto unlock;756if (hyp_page_count((void *)virt)) {757ret = -EBUSY;758goto unlock;759}760761__hyp_set_page_state_range(phys, size, PKVM_NOPAGE);762WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));763764unlock:765hyp_unlock_component();766host_unlock_component();767768return ret;769}770771int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)772{773u64 phys = hyp_pfn_to_phys(pfn);774u64 size = PAGE_SIZE * nr_pages;775void *virt = __hyp_va(phys);776int ret;777778host_lock_component();779hyp_lock_component();780781ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);782if (ret)783goto unlock;784ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);785if (ret)786goto unlock;787788__hyp_set_page_state_range(phys, size, PKVM_PAGE_OWNED);789WARN_ON(pkvm_create_mappings_locked(virt, virt + size, PAGE_HYP));790WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));791792unlock:793hyp_unlock_component();794host_unlock_component();795796return ret;797}798799int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)800{801u64 phys = hyp_pfn_to_phys(pfn);802u64 size = PAGE_SIZE * nr_pages;803u64 virt = (u64)__hyp_va(phys);804int ret;805806host_lock_component();807hyp_lock_component();808809ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_OWNED);810if (ret)811goto unlock;812ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);813if (ret)814goto unlock;815816__hyp_set_page_state_range(phys, size, PKVM_NOPAGE);817WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);818WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));819820unlock:821hyp_unlock_component();822host_unlock_component();823824return ret;825}826827int hyp_pin_shared_mem(void *from, void *to)828{829u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);830u64 end = PAGE_ALIGN((u64)to);831u64 phys = __hyp_pa(start);832u64 size = end - start;833struct hyp_page *p;834int ret;835836host_lock_component();837hyp_lock_component();838839ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);840if (ret)841goto unlock;842843ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);844if (ret)845goto unlock;846847for (cur = start; cur < end; cur += PAGE_SIZE) {848p = hyp_virt_to_page(cur);849hyp_page_ref_inc(p);850if (p->refcount == 1)851WARN_ON(pkvm_create_mappings_locked((void *)cur,852(void *)cur + PAGE_SIZE,853PAGE_HYP));854}855856unlock:857hyp_unlock_component();858host_unlock_component();859860return ret;861}862863void hyp_unpin_shared_mem(void *from, void *to)864{865u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);866u64 end = PAGE_ALIGN((u64)to);867struct hyp_page *p;868869host_lock_component();870hyp_lock_component();871872for (cur = start; cur < end; cur += PAGE_SIZE) {873p = hyp_virt_to_page(cur);874if (p->refcount == 1)875WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, cur, PAGE_SIZE) != PAGE_SIZE);876hyp_page_ref_dec(p);877}878879hyp_unlock_component();880host_unlock_component();881}882883int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)884{885u64 phys = hyp_pfn_to_phys(pfn);886u64 size = PAGE_SIZE * nr_pages;887int ret;888889host_lock_component();890ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);891if (!ret)892ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);893host_unlock_component();894895return ret;896}897898int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)899{900u64 phys = hyp_pfn_to_phys(pfn);901u64 size = PAGE_SIZE * nr_pages;902int ret;903904host_lock_component();905ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);906if (!ret)907ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED);908host_unlock_component();909910return ret;911}912913static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *size)914{915size_t block_size;916917if (nr_pages == 1) {918*size = PAGE_SIZE;919return 0;920}921922/* We solely support second to last level huge mapping */923block_size = kvm_granule_size(KVM_PGTABLE_LAST_LEVEL - 1);924925if (nr_pages != block_size >> PAGE_SHIFT)926return -EINVAL;927928if (!IS_ALIGNED(phys | ipa, block_size))929return -EINVAL;930931*size = block_size;932return 0;933}934935int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,936enum kvm_pgtable_prot prot)937{938struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);939u64 phys = hyp_pfn_to_phys(pfn);940u64 ipa = hyp_pfn_to_phys(gfn);941u64 size;942int ret;943944if (prot & ~KVM_PGTABLE_PROT_RWX)945return -EINVAL;946947ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);948if (ret)949return ret;950951ret = check_range_allowed_memory(phys, phys + size);952if (ret)953return ret;954955host_lock_component();956guest_lock_component(vm);957958ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE);959if (ret)960goto unlock;961962for_each_hyp_page(page, phys, size) {963switch (get_host_state(page)) {964case PKVM_PAGE_OWNED:965continue;966case PKVM_PAGE_SHARED_OWNED:967if (page->host_share_guest_count == U32_MAX) {968ret = -EBUSY;969goto unlock;970}971972/* Only host to np-guest multi-sharing is tolerated */973if (page->host_share_guest_count)974continue;975976fallthrough;977default:978ret = -EPERM;979goto unlock;980}981}982983for_each_hyp_page(page, phys, size) {984set_host_state(page, PKVM_PAGE_SHARED_OWNED);985page->host_share_guest_count++;986}987988WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,989pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),990&vcpu->vcpu.arch.pkvm_memcache, 0));991992unlock:993guest_unlock_component(vm);994host_unlock_component();995996return ret;997}998999static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa, u64 size)1000{1001enum pkvm_page_state state;1002kvm_pte_t pte;1003u64 phys;1004s8 level;1005int ret;10061007ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);1008if (ret)1009return ret;1010if (!kvm_pte_valid(pte))1011return -ENOENT;1012if (kvm_granule_size(level) != size)1013return -E2BIG;10141015state = guest_get_page_state(pte, ipa);1016if (state != PKVM_PAGE_SHARED_BORROWED)1017return -EPERM;10181019phys = kvm_pte_to_phys(pte);1020ret = check_range_allowed_memory(phys, phys + size);1021if (WARN_ON(ret))1022return ret;10231024for_each_hyp_page(page, phys, size) {1025if (get_host_state(page) != PKVM_PAGE_SHARED_OWNED)1026return -EPERM;1027if (WARN_ON(!page->host_share_guest_count))1028return -EINVAL;1029}10301031*__phys = phys;10321033return 0;1034}10351036int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm)1037{1038u64 ipa = hyp_pfn_to_phys(gfn);1039u64 size, phys;1040int ret;10411042ret = __guest_check_transition_size(0, ipa, nr_pages, &size);1043if (ret)1044return ret;10451046host_lock_component();1047guest_lock_component(vm);10481049ret = __check_host_shared_guest(vm, &phys, ipa, size);1050if (ret)1051goto unlock;10521053ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, size);1054if (ret)1055goto unlock;10561057for_each_hyp_page(page, phys, size) {1058/* __check_host_shared_guest() protects against underflow */1059page->host_share_guest_count--;1060if (!page->host_share_guest_count)1061set_host_state(page, PKVM_PAGE_OWNED);1062}10631064unlock:1065guest_unlock_component(vm);1066host_unlock_component();10671068return ret;1069}10701071static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa, u64 size)1072{1073u64 phys;1074int ret;10751076if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))1077return;10781079host_lock_component();1080guest_lock_component(vm);10811082ret = __check_host_shared_guest(vm, &phys, ipa, size);10831084guest_unlock_component(vm);1085host_unlock_component();10861087WARN_ON(ret && ret != -ENOENT);1088}10891090int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)1091{1092struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);1093u64 ipa = hyp_pfn_to_phys(gfn);1094int ret;10951096if (pkvm_hyp_vm_is_protected(vm))1097return -EPERM;10981099if (prot & ~KVM_PGTABLE_PROT_RWX)1100return -EINVAL;11011102assert_host_shared_guest(vm, ipa, PAGE_SIZE);1103guest_lock_component(vm);1104ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);1105guest_unlock_component(vm);11061107return ret;1108}11091110int __pkvm_host_wrprotect_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm)1111{1112u64 size, ipa = hyp_pfn_to_phys(gfn);1113int ret;11141115if (pkvm_hyp_vm_is_protected(vm))1116return -EPERM;11171118ret = __guest_check_transition_size(0, ipa, nr_pages, &size);1119if (ret)1120return ret;11211122assert_host_shared_guest(vm, ipa, size);1123guest_lock_component(vm);1124ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, size);1125guest_unlock_component(vm);11261127return ret;1128}11291130int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct pkvm_hyp_vm *vm)1131{1132u64 size, ipa = hyp_pfn_to_phys(gfn);1133int ret;11341135if (pkvm_hyp_vm_is_protected(vm))1136return -EPERM;11371138ret = __guest_check_transition_size(0, ipa, nr_pages, &size);1139if (ret)1140return ret;11411142assert_host_shared_guest(vm, ipa, size);1143guest_lock_component(vm);1144ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, size, mkold);1145guest_unlock_component(vm);11461147return ret;1148}11491150int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)1151{1152struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);1153u64 ipa = hyp_pfn_to_phys(gfn);11541155if (pkvm_hyp_vm_is_protected(vm))1156return -EPERM;11571158assert_host_shared_guest(vm, ipa, PAGE_SIZE);1159guest_lock_component(vm);1160kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);1161guest_unlock_component(vm);11621163return 0;1164}11651166#ifdef CONFIG_NVHE_EL2_DEBUG1167struct pkvm_expected_state {1168enum pkvm_page_state host;1169enum pkvm_page_state hyp;1170enum pkvm_page_state guest[2]; /* [ gfn, gfn + 1 ] */1171};11721173static struct pkvm_expected_state selftest_state;1174static struct hyp_page *selftest_page;11751176static struct pkvm_hyp_vm selftest_vm = {1177.kvm = {1178.arch = {1179.mmu = {1180.arch = &selftest_vm.kvm.arch,1181.pgt = &selftest_vm.pgt,1182},1183},1184},1185};11861187static struct pkvm_hyp_vcpu selftest_vcpu = {1188.vcpu = {1189.arch = {1190.hw_mmu = &selftest_vm.kvm.arch.mmu,1191},1192.kvm = &selftest_vm.kvm,1193},1194};11951196static void init_selftest_vm(void *virt)1197{1198struct hyp_page *p = hyp_virt_to_page(virt);1199int i;12001201selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;1202WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt));12031204for (i = 0; i < pkvm_selftest_pages(); i++) {1205if (p[i].refcount)1206continue;1207p[i].refcount = 1;1208hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));1209}1210}12111212static u64 selftest_ipa(void)1213{1214return BIT(selftest_vm.pgt.ia_bits - 1);1215}12161217static void assert_page_state(void)1218{1219void *virt = hyp_page_to_virt(selftest_page);1220u64 size = PAGE_SIZE << selftest_page->order;1221struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu;1222u64 phys = hyp_virt_to_phys(virt);1223u64 ipa[2] = { selftest_ipa(), selftest_ipa() + PAGE_SIZE };1224struct pkvm_hyp_vm *vm;12251226vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);12271228host_lock_component();1229WARN_ON(__host_check_page_state_range(phys, size, selftest_state.host));1230host_unlock_component();12311232hyp_lock_component();1233WARN_ON(__hyp_check_page_state_range(phys, size, selftest_state.hyp));1234hyp_unlock_component();12351236guest_lock_component(&selftest_vm);1237WARN_ON(__guest_check_page_state_range(vm, ipa[0], size, selftest_state.guest[0]));1238WARN_ON(__guest_check_page_state_range(vm, ipa[1], size, selftest_state.guest[1]));1239guest_unlock_component(&selftest_vm);1240}12411242#define assert_transition_res(res, fn, ...) \1243do { \1244WARN_ON(fn(__VA_ARGS__) != res); \1245assert_page_state(); \1246} while (0)12471248void pkvm_ownership_selftest(void *base)1249{1250enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX;1251void *virt = hyp_alloc_pages(&host_s2_pool, 0);1252struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu;1253struct pkvm_hyp_vm *vm = &selftest_vm;1254u64 phys, size, pfn, gfn;12551256WARN_ON(!virt);1257selftest_page = hyp_virt_to_page(virt);1258selftest_page->refcount = 0;1259init_selftest_vm(base);12601261size = PAGE_SIZE << selftest_page->order;1262phys = hyp_virt_to_phys(virt);1263pfn = hyp_phys_to_pfn(phys);1264gfn = hyp_phys_to_pfn(selftest_ipa());12651266selftest_state.host = PKVM_NOPAGE;1267selftest_state.hyp = PKVM_PAGE_OWNED;1268selftest_state.guest[0] = selftest_state.guest[1] = PKVM_NOPAGE;1269assert_page_state();1270assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);1271assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);1272assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);1273assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);1274assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);1275assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);1276assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);1277assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);12781279selftest_state.host = PKVM_PAGE_OWNED;1280selftest_state.hyp = PKVM_NOPAGE;1281assert_transition_res(0, __pkvm_hyp_donate_host, pfn, 1);1282assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);1283assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);1284assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);1285assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);1286assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);12871288selftest_state.host = PKVM_PAGE_SHARED_OWNED;1289selftest_state.hyp = PKVM_PAGE_SHARED_BORROWED;1290assert_transition_res(0, __pkvm_host_share_hyp, pfn);1291assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);1292assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);1293assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);1294assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);1295assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);1296assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);12971298assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);1299assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);1300hyp_unpin_shared_mem(virt, virt + size);1301WARN_ON(hyp_page_count(virt) != 1);1302assert_transition_res(-EBUSY, __pkvm_host_unshare_hyp, pfn);1303assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);1304assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);1305assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);1306assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);1307assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);1308assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);13091310hyp_unpin_shared_mem(virt, virt + size);1311assert_page_state();1312WARN_ON(hyp_page_count(virt));13131314selftest_state.host = PKVM_PAGE_OWNED;1315selftest_state.hyp = PKVM_NOPAGE;1316assert_transition_res(0, __pkvm_host_unshare_hyp, pfn);13171318selftest_state.host = PKVM_PAGE_SHARED_OWNED;1319selftest_state.hyp = PKVM_NOPAGE;1320assert_transition_res(0, __pkvm_host_share_ffa, pfn, 1);1321assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);1322assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);1323assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);1324assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);1325assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);1326assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);1327assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);1328assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);13291330selftest_state.host = PKVM_PAGE_OWNED;1331selftest_state.hyp = PKVM_NOPAGE;1332assert_transition_res(0, __pkvm_host_unshare_ffa, pfn, 1);1333assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);13341335selftest_state.host = PKVM_PAGE_SHARED_OWNED;1336selftest_state.guest[0] = PKVM_PAGE_SHARED_BORROWED;1337assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);1338assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);1339assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);1340assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);1341assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);1342assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);1343assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);1344assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);13451346selftest_state.guest[1] = PKVM_PAGE_SHARED_BORROWED;1347assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);1348WARN_ON(hyp_virt_to_page(virt)->host_share_guest_count != 2);13491350selftest_state.guest[0] = PKVM_NOPAGE;1351assert_transition_res(0, __pkvm_host_unshare_guest, gfn, 1, vm);13521353selftest_state.guest[1] = PKVM_NOPAGE;1354selftest_state.host = PKVM_PAGE_OWNED;1355assert_transition_res(0, __pkvm_host_unshare_guest, gfn + 1, 1, vm);13561357selftest_state.host = PKVM_NOPAGE;1358selftest_state.hyp = PKVM_PAGE_OWNED;1359assert_transition_res(0, __pkvm_host_donate_hyp, pfn, 1);13601361selftest_page->refcount = 1;1362hyp_put_page(&host_s2_pool, virt);1363}1364#endif136513661367