Path: blob/master/arch/arm64/kvm/hyp/nvhe/mem_protect.c
53294 views
// SPDX-License-Identifier: GPL-2.0-only1/*2* Copyright (C) 2020 Google LLC3* Author: Quentin Perret <[email protected]>4*/56#include <linux/kvm_host.h>7#include <asm/kvm_emulate.h>8#include <asm/kvm_hyp.h>9#include <asm/kvm_mmu.h>10#include <asm/kvm_pgtable.h>11#include <asm/kvm_pkvm.h>12#include <asm/stage2_pgtable.h>1314#include <hyp/fault.h>1516#include <nvhe/gfp.h>17#include <nvhe/memory.h>18#include <nvhe/mem_protect.h>19#include <nvhe/mm.h>2021#define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_AS_S1 | KVM_PGTABLE_S2_IDMAP)2223struct host_mmu host_mmu;2425static struct hyp_pool host_s2_pool;2627static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);28#define current_vm (*this_cpu_ptr(&__current_vm))2930static void guest_lock_component(struct pkvm_hyp_vm *vm)31{32hyp_spin_lock(&vm->lock);33current_vm = vm;34}3536static void guest_unlock_component(struct pkvm_hyp_vm *vm)37{38current_vm = NULL;39hyp_spin_unlock(&vm->lock);40}4142static void host_lock_component(void)43{44hyp_spin_lock(&host_mmu.lock);45}4647static void host_unlock_component(void)48{49hyp_spin_unlock(&host_mmu.lock);50}5152static void hyp_lock_component(void)53{54hyp_spin_lock(&pkvm_pgd_lock);55}5657static void hyp_unlock_component(void)58{59hyp_spin_unlock(&pkvm_pgd_lock);60}6162#define for_each_hyp_page(__p, __st, __sz) \63for (struct hyp_page *__p = hyp_phys_to_page(__st), \64*__e = __p + ((__sz) >> PAGE_SHIFT); \65__p < __e; __p++)6667static void *host_s2_zalloc_pages_exact(size_t size)68{69void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));7071hyp_split_page(hyp_virt_to_page(addr));7273/*74* The size of concatenated PGDs is always a power of two of PAGE_SIZE,75* so there should be no need to free any of the tail pages to make the76* allocation exact.77*/78WARN_ON(size != (PAGE_SIZE << get_order(size)));7980return addr;81}8283static void *host_s2_zalloc_page(void *pool)84{85return hyp_alloc_pages(pool, 0);86}8788static void host_s2_get_page(void *addr)89{90hyp_get_page(&host_s2_pool, addr);91}9293static void host_s2_put_page(void *addr)94{95hyp_put_page(&host_s2_pool, addr);96}9798static void host_s2_free_unlinked_table(void *addr, s8 level)99{100kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);101}102103static int prepare_s2_pool(void *pgt_pool_base)104{105unsigned long nr_pages, pfn;106int ret;107108pfn = hyp_virt_to_pfn(pgt_pool_base);109nr_pages = host_s2_pgtable_pages();110ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);111if (ret)112return ret;113114host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {115.zalloc_pages_exact = host_s2_zalloc_pages_exact,116.zalloc_page = host_s2_zalloc_page,117.free_unlinked_table = host_s2_free_unlinked_table,118.phys_to_virt = hyp_phys_to_virt,119.virt_to_phys = hyp_virt_to_phys,120.page_count = hyp_page_count,121.get_page = host_s2_get_page,122.put_page = host_s2_put_page,123};124125return 0;126}127128static void prepare_host_vtcr(void)129{130u32 parange, phys_shift;131132/* The host stage 2 is id-mapped, so use parange for T0SZ */133parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);134phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);135136host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,137id_aa64mmfr1_el1_sys_val, phys_shift);138}139140static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);141142int kvm_host_prepare_stage2(void *pgt_pool_base)143{144struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;145int ret;146147prepare_host_vtcr();148hyp_spin_lock_init(&host_mmu.lock);149mmu->arch = &host_mmu.arch;150151ret = prepare_s2_pool(pgt_pool_base);152if (ret)153return ret;154155ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,156&host_mmu.mm_ops, KVM_HOST_S2_FLAGS,157host_stage2_force_pte_cb);158if (ret)159return ret;160161mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);162mmu->pgt = &host_mmu.pgt;163atomic64_set(&mmu->vmid.id, 0);164165return 0;166}167168static void *guest_s2_zalloc_pages_exact(size_t size)169{170void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size));171172WARN_ON(size != (PAGE_SIZE << get_order(size)));173hyp_split_page(hyp_virt_to_page(addr));174175return addr;176}177178static void guest_s2_free_pages_exact(void *addr, unsigned long size)179{180u8 order = get_order(size);181unsigned int i;182183for (i = 0; i < (1 << order); i++)184hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE));185}186187static void *guest_s2_zalloc_page(void *mc)188{189struct hyp_page *p;190void *addr;191192addr = hyp_alloc_pages(¤t_vm->pool, 0);193if (addr)194return addr;195196addr = pop_hyp_memcache(mc, hyp_phys_to_virt);197if (!addr)198return addr;199200memset(addr, 0, PAGE_SIZE);201p = hyp_virt_to_page(addr);202p->refcount = 1;203p->order = 0;204205return addr;206}207208static void guest_s2_get_page(void *addr)209{210hyp_get_page(¤t_vm->pool, addr);211}212213static void guest_s2_put_page(void *addr)214{215hyp_put_page(¤t_vm->pool, addr);216}217218static void __apply_guest_page(void *va, size_t size,219void (*func)(void *addr, size_t size))220{221size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE);222va = PTR_ALIGN_DOWN(va, PAGE_SIZE);223size = PAGE_ALIGN(size);224225while (size) {226size_t map_size = PAGE_SIZE;227void *map;228229if (IS_ALIGNED((unsigned long)va, PMD_SIZE) && size >= PMD_SIZE)230map = hyp_fixblock_map(__hyp_pa(va), &map_size);231else232map = hyp_fixmap_map(__hyp_pa(va));233234func(map, map_size);235236if (map_size == PMD_SIZE)237hyp_fixblock_unmap();238else239hyp_fixmap_unmap();240241size -= map_size;242va += map_size;243}244}245246static void clean_dcache_guest_page(void *va, size_t size)247{248__apply_guest_page(va, size, __clean_dcache_guest_page);249}250251static void invalidate_icache_guest_page(void *va, size_t size)252{253__apply_guest_page(va, size, __invalidate_icache_guest_page);254}255256int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)257{258struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;259unsigned long nr_pages;260int ret;261262nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;263ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);264if (ret)265return ret;266267hyp_spin_lock_init(&vm->lock);268vm->mm_ops = (struct kvm_pgtable_mm_ops) {269.zalloc_pages_exact = guest_s2_zalloc_pages_exact,270.free_pages_exact = guest_s2_free_pages_exact,271.zalloc_page = guest_s2_zalloc_page,272.phys_to_virt = hyp_phys_to_virt,273.virt_to_phys = hyp_virt_to_phys,274.page_count = hyp_page_count,275.get_page = guest_s2_get_page,276.put_page = guest_s2_put_page,277.dcache_clean_inval_poc = clean_dcache_guest_page,278.icache_inval_pou = invalidate_icache_guest_page,279};280281guest_lock_component(vm);282ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, NULL);283guest_unlock_component(vm);284if (ret)285return ret;286287vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);288289return 0;290}291292void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)293{294struct hyp_page *page;295void *addr;296297/* Dump all pgtable pages in the hyp_pool */298guest_lock_component(vm);299kvm_pgtable_stage2_destroy(&vm->pgt);300vm->kvm.arch.mmu.pgd_phys = 0ULL;301guest_unlock_component(vm);302303/* Drain the hyp_pool into the memcache */304addr = hyp_alloc_pages(&vm->pool, 0);305while (addr) {306page = hyp_virt_to_page(addr);307page->refcount = 0;308page->order = 0;309push_hyp_memcache(mc, addr, hyp_virt_to_phys);310WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));311addr = hyp_alloc_pages(&vm->pool, 0);312}313}314315int __pkvm_prot_finalize(void)316{317struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;318struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);319320if (params->hcr_el2 & HCR_VM)321return -EPERM;322323params->vttbr = kvm_get_vttbr(mmu);324params->vtcr = mmu->vtcr;325params->hcr_el2 |= HCR_VM;326if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))327params->hcr_el2 |= HCR_FWB;328329/*330* The CMO below not only cleans the updated params to the331* PoC, but also provides the DSB that ensures ongoing332* page-table walks that have started before we trapped to EL2333* have completed.334*/335kvm_flush_dcache_to_poc(params, sizeof(*params));336337write_sysreg_hcr(params->hcr_el2);338__load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);339340/*341* Make sure to have an ISB before the TLB maintenance below but only342* when __load_stage2() doesn't include one already.343*/344asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));345346/* Invalidate stale HCR bits that may be cached in TLBs */347__tlbi(vmalls12e1);348dsb(nsh);349isb();350351return 0;352}353354static int host_stage2_unmap_dev_all(void)355{356struct kvm_pgtable *pgt = &host_mmu.pgt;357struct memblock_region *reg;358u64 addr = 0;359int i, ret;360361/* Unmap all non-memory regions to recycle the pages */362for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) {363reg = &hyp_memory[i];364ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr);365if (ret)366return ret;367}368return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);369}370371/*372* Ensure the PFN range is contained within PA-range.373*374* This check is also robust to overflows and is therefore a requirement before375* using a pfn/nr_pages pair from an untrusted source.376*/377static bool pfn_range_is_valid(u64 pfn, u64 nr_pages)378{379u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT);380381return pfn < limit && ((limit - pfn) >= nr_pages);382}383384struct kvm_mem_range {385u64 start;386u64 end;387};388389static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)390{391int cur, left = 0, right = hyp_memblock_nr;392struct memblock_region *reg;393phys_addr_t end;394395range->start = 0;396range->end = ULONG_MAX;397398/* The list of memblock regions is sorted, binary search it */399while (left < right) {400cur = (left + right) >> 1;401reg = &hyp_memory[cur];402end = reg->base + reg->size;403if (addr < reg->base) {404right = cur;405range->end = reg->base;406} else if (addr >= end) {407left = cur + 1;408range->start = end;409} else {410range->start = reg->base;411range->end = end;412return reg;413}414}415416return NULL;417}418419bool addr_is_memory(phys_addr_t phys)420{421struct kvm_mem_range range;422423return !!find_mem_range(phys, &range);424}425426static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)427{428return range->start <= addr && addr < range->end;429}430431static int check_range_allowed_memory(u64 start, u64 end)432{433struct memblock_region *reg;434struct kvm_mem_range range;435436/*437* Callers can't check the state of a range that overlaps memory and438* MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.439*/440reg = find_mem_range(start, &range);441if (!is_in_mem_range(end - 1, &range))442return -EINVAL;443444if (!reg || reg->flags & MEMBLOCK_NOMAP)445return -EPERM;446447return 0;448}449450static bool range_is_memory(u64 start, u64 end)451{452struct kvm_mem_range r;453454if (!find_mem_range(start, &r))455return false;456457return is_in_mem_range(end - 1, &r);458}459460static inline int __host_stage2_idmap(u64 start, u64 end,461enum kvm_pgtable_prot prot)462{463return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,464prot, &host_s2_pool, 0);465}466467/*468* The pool has been provided with enough pages to cover all of memory with469* page granularity, but it is difficult to know how much of the MMIO range470* we will need to cover upfront, so we may need to 'recycle' the pages if we471* run out.472*/473#define host_stage2_try(fn, ...) \474({ \475int __ret; \476hyp_assert_lock_held(&host_mmu.lock); \477__ret = fn(__VA_ARGS__); \478if (__ret == -ENOMEM) { \479__ret = host_stage2_unmap_dev_all(); \480if (!__ret) \481__ret = fn(__VA_ARGS__); \482} \483__ret; \484})485486static inline bool range_included(struct kvm_mem_range *child,487struct kvm_mem_range *parent)488{489return parent->start <= child->start && child->end <= parent->end;490}491492static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)493{494struct kvm_mem_range cur;495kvm_pte_t pte;496u64 granule;497s8 level;498int ret;499500hyp_assert_lock_held(&host_mmu.lock);501ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);502if (ret)503return ret;504505if (kvm_pte_valid(pte))506return -EAGAIN;507508if (pte) {509WARN_ON(addr_is_memory(addr) &&510get_host_state(hyp_phys_to_page(addr)) != PKVM_NOPAGE);511return -EPERM;512}513514for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) {515if (!kvm_level_supports_block_mapping(level))516continue;517granule = kvm_granule_size(level);518cur.start = ALIGN_DOWN(addr, granule);519cur.end = cur.start + granule;520if (!range_included(&cur, range))521continue;522*range = cur;523return 0;524}525526WARN_ON(1);527528return -EINVAL;529}530531int host_stage2_idmap_locked(phys_addr_t addr, u64 size,532enum kvm_pgtable_prot prot)533{534return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);535}536537static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)538{539for_each_hyp_page(page, addr, size)540set_host_state(page, state);541}542543int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)544{545int ret;546547if (!range_is_memory(addr, addr + size))548return -EPERM;549550ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,551addr, size, &host_s2_pool, owner_id);552if (ret)553return ret;554555/* Don't forget to update the vmemmap tracking for the host */556if (owner_id == PKVM_ID_HOST)557__host_update_page_state(addr, size, PKVM_PAGE_OWNED);558else559__host_update_page_state(addr, size, PKVM_NOPAGE);560561return 0;562}563564static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)565{566/*567* Block mappings must be used with care in the host stage-2 as a568* kvm_pgtable_stage2_map() operation targeting a page in the range of569* an existing block will delete the block under the assumption that570* mappings in the rest of the block range can always be rebuilt lazily.571* That assumption is correct for the host stage-2 with RWX mappings572* targeting memory or RW mappings targeting MMIO ranges (see573* host_stage2_idmap() below which implements some of the host memory574* abort logic). However, this is not safe for any other mappings where575* the host stage-2 page-table is in fact the only place where this576* state is stored. In all those cases, it is safer to use page-level577* mappings, hence avoiding to lose the state because of side-effects in578* kvm_pgtable_stage2_map().579*/580if (range_is_memory(addr, end))581return prot != PKVM_HOST_MEM_PROT;582else583return prot != PKVM_HOST_MMIO_PROT;584}585586static int host_stage2_idmap(u64 addr)587{588struct kvm_mem_range range;589bool is_memory = !!find_mem_range(addr, &range);590enum kvm_pgtable_prot prot;591int ret;592593prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;594595host_lock_component();596ret = host_stage2_adjust_range(addr, &range);597if (ret)598goto unlock;599600ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);601unlock:602host_unlock_component();603604return ret;605}606607void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)608{609struct kvm_vcpu_fault_info fault;610u64 esr, addr;611int ret = 0;612613esr = read_sysreg_el2(SYS_ESR);614if (!__get_fault_info(esr, &fault)) {615/*616* We've presumably raced with a page-table change which caused617* AT to fail, try again.618*/619return;620}621622623/*624* Yikes, we couldn't resolve the fault IPA. This should reinject an625* abort into the host when we figure out how to do that.626*/627BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));628addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;629630ret = host_stage2_idmap(addr);631BUG_ON(ret && ret != -EAGAIN);632}633634struct check_walk_data {635enum pkvm_page_state desired;636enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);637};638639static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,640enum kvm_pgtable_walk_flags visit)641{642struct check_walk_data *d = ctx->arg;643644return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;645}646647static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,648struct check_walk_data *data)649{650struct kvm_pgtable_walker walker = {651.cb = __check_page_state_visitor,652.arg = data,653.flags = KVM_PGTABLE_WALK_LEAF,654};655656return kvm_pgtable_walk(pgt, addr, size, &walker);657}658659static int __host_check_page_state_range(u64 addr, u64 size,660enum pkvm_page_state state)661{662int ret;663664ret = check_range_allowed_memory(addr, addr + size);665if (ret)666return ret;667668hyp_assert_lock_held(&host_mmu.lock);669670for_each_hyp_page(page, addr, size) {671if (get_host_state(page) != state)672return -EPERM;673}674675return 0;676}677678static int __host_set_page_state_range(u64 addr, u64 size,679enum pkvm_page_state state)680{681if (get_host_state(hyp_phys_to_page(addr)) == PKVM_NOPAGE) {682int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);683684if (ret)685return ret;686}687688__host_update_page_state(addr, size, state);689690return 0;691}692693static void __hyp_set_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)694{695for_each_hyp_page(page, phys, size)696set_hyp_state(page, state);697}698699static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)700{701for_each_hyp_page(page, phys, size) {702if (get_hyp_state(page) != state)703return -EPERM;704}705706return 0;707}708709static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)710{711if (!kvm_pte_valid(pte))712return PKVM_NOPAGE;713714return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));715}716717static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr,718u64 size, enum pkvm_page_state state)719{720struct check_walk_data d = {721.desired = state,722.get_page_state = guest_get_page_state,723};724725hyp_assert_lock_held(&vm->lock);726return check_page_state_range(&vm->pgt, addr, size, &d);727}728729int __pkvm_host_share_hyp(u64 pfn)730{731u64 phys = hyp_pfn_to_phys(pfn);732u64 size = PAGE_SIZE;733int ret;734735host_lock_component();736hyp_lock_component();737738ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);739if (ret)740goto unlock;741ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);742if (ret)743goto unlock;744745__hyp_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);746WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));747748unlock:749hyp_unlock_component();750host_unlock_component();751752return ret;753}754755int __pkvm_host_unshare_hyp(u64 pfn)756{757u64 phys = hyp_pfn_to_phys(pfn);758u64 virt = (u64)__hyp_va(phys);759u64 size = PAGE_SIZE;760int ret;761762host_lock_component();763hyp_lock_component();764765ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);766if (ret)767goto unlock;768ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);769if (ret)770goto unlock;771if (hyp_page_count((void *)virt)) {772ret = -EBUSY;773goto unlock;774}775776__hyp_set_page_state_range(phys, size, PKVM_NOPAGE);777WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));778779unlock:780hyp_unlock_component();781host_unlock_component();782783return ret;784}785786int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)787{788u64 phys = hyp_pfn_to_phys(pfn);789u64 size = PAGE_SIZE * nr_pages;790void *virt = __hyp_va(phys);791int ret;792793if (!pfn_range_is_valid(pfn, nr_pages))794return -EINVAL;795796host_lock_component();797hyp_lock_component();798799ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);800if (ret)801goto unlock;802ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);803if (ret)804goto unlock;805806__hyp_set_page_state_range(phys, size, PKVM_PAGE_OWNED);807WARN_ON(pkvm_create_mappings_locked(virt, virt + size, PAGE_HYP));808WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));809810unlock:811hyp_unlock_component();812host_unlock_component();813814return ret;815}816817int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)818{819u64 phys = hyp_pfn_to_phys(pfn);820u64 size = PAGE_SIZE * nr_pages;821u64 virt = (u64)__hyp_va(phys);822int ret;823824if (!pfn_range_is_valid(pfn, nr_pages))825return -EINVAL;826827host_lock_component();828hyp_lock_component();829830ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_OWNED);831if (ret)832goto unlock;833ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);834if (ret)835goto unlock;836837__hyp_set_page_state_range(phys, size, PKVM_NOPAGE);838WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);839WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));840841unlock:842hyp_unlock_component();843host_unlock_component();844845return ret;846}847848int hyp_pin_shared_mem(void *from, void *to)849{850u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);851u64 end = PAGE_ALIGN((u64)to);852u64 phys = __hyp_pa(start);853u64 size = end - start;854struct hyp_page *p;855int ret;856857host_lock_component();858hyp_lock_component();859860ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);861if (ret)862goto unlock;863864ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);865if (ret)866goto unlock;867868for (cur = start; cur < end; cur += PAGE_SIZE) {869p = hyp_virt_to_page(cur);870hyp_page_ref_inc(p);871if (p->refcount == 1)872WARN_ON(pkvm_create_mappings_locked((void *)cur,873(void *)cur + PAGE_SIZE,874PAGE_HYP));875}876877unlock:878hyp_unlock_component();879host_unlock_component();880881return ret;882}883884void hyp_unpin_shared_mem(void *from, void *to)885{886u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);887u64 end = PAGE_ALIGN((u64)to);888struct hyp_page *p;889890host_lock_component();891hyp_lock_component();892893for (cur = start; cur < end; cur += PAGE_SIZE) {894p = hyp_virt_to_page(cur);895if (p->refcount == 1)896WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, cur, PAGE_SIZE) != PAGE_SIZE);897hyp_page_ref_dec(p);898}899900hyp_unlock_component();901host_unlock_component();902}903904int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)905{906u64 phys = hyp_pfn_to_phys(pfn);907u64 size = PAGE_SIZE * nr_pages;908int ret;909910if (!pfn_range_is_valid(pfn, nr_pages))911return -EINVAL;912913host_lock_component();914ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);915if (!ret)916ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);917host_unlock_component();918919return ret;920}921922int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)923{924u64 phys = hyp_pfn_to_phys(pfn);925u64 size = PAGE_SIZE * nr_pages;926int ret;927928if (!pfn_range_is_valid(pfn, nr_pages))929return -EINVAL;930931host_lock_component();932ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);933if (!ret)934ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED);935host_unlock_component();936937return ret;938}939940static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *size)941{942size_t block_size;943944if (nr_pages == 1) {945*size = PAGE_SIZE;946return 0;947}948949/* We solely support second to last level huge mapping */950block_size = kvm_granule_size(KVM_PGTABLE_LAST_LEVEL - 1);951952if (nr_pages != block_size >> PAGE_SHIFT)953return -EINVAL;954955if (!IS_ALIGNED(phys | ipa, block_size))956return -EINVAL;957958*size = block_size;959return 0;960}961962int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,963enum kvm_pgtable_prot prot)964{965struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);966u64 phys = hyp_pfn_to_phys(pfn);967u64 ipa = hyp_pfn_to_phys(gfn);968u64 size;969int ret;970971if (prot & ~KVM_PGTABLE_PROT_RWX)972return -EINVAL;973974if (!pfn_range_is_valid(pfn, nr_pages))975return -EINVAL;976977ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);978if (ret)979return ret;980981ret = check_range_allowed_memory(phys, phys + size);982if (ret)983return ret;984985host_lock_component();986guest_lock_component(vm);987988ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE);989if (ret)990goto unlock;991992for_each_hyp_page(page, phys, size) {993switch (get_host_state(page)) {994case PKVM_PAGE_OWNED:995continue;996case PKVM_PAGE_SHARED_OWNED:997if (page->host_share_guest_count == U32_MAX) {998ret = -EBUSY;999goto unlock;1000}10011002/* Only host to np-guest multi-sharing is tolerated */1003if (page->host_share_guest_count)1004continue;10051006fallthrough;1007default:1008ret = -EPERM;1009goto unlock;1010}1011}10121013for_each_hyp_page(page, phys, size) {1014set_host_state(page, PKVM_PAGE_SHARED_OWNED);1015page->host_share_guest_count++;1016}10171018WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,1019pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),1020&vcpu->vcpu.arch.pkvm_memcache, 0));10211022unlock:1023guest_unlock_component(vm);1024host_unlock_component();10251026return ret;1027}10281029static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa, u64 size)1030{1031enum pkvm_page_state state;1032kvm_pte_t pte;1033u64 phys;1034s8 level;1035int ret;10361037ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);1038if (ret)1039return ret;1040if (!kvm_pte_valid(pte))1041return -ENOENT;1042if (size && kvm_granule_size(level) != size)1043return -E2BIG;10441045if (!size)1046size = kvm_granule_size(level);10471048state = guest_get_page_state(pte, ipa);1049if (state != PKVM_PAGE_SHARED_BORROWED)1050return -EPERM;10511052phys = kvm_pte_to_phys(pte);1053ret = check_range_allowed_memory(phys, phys + size);1054if (WARN_ON(ret))1055return ret;10561057for_each_hyp_page(page, phys, size) {1058if (get_host_state(page) != PKVM_PAGE_SHARED_OWNED)1059return -EPERM;1060if (WARN_ON(!page->host_share_guest_count))1061return -EINVAL;1062}10631064*__phys = phys;10651066return 0;1067}10681069int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm)1070{1071u64 ipa = hyp_pfn_to_phys(gfn);1072u64 size, phys;1073int ret;10741075ret = __guest_check_transition_size(0, ipa, nr_pages, &size);1076if (ret)1077return ret;10781079host_lock_component();1080guest_lock_component(vm);10811082ret = __check_host_shared_guest(vm, &phys, ipa, size);1083if (ret)1084goto unlock;10851086ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, size);1087if (ret)1088goto unlock;10891090for_each_hyp_page(page, phys, size) {1091/* __check_host_shared_guest() protects against underflow */1092page->host_share_guest_count--;1093if (!page->host_share_guest_count)1094set_host_state(page, PKVM_PAGE_OWNED);1095}10961097unlock:1098guest_unlock_component(vm);1099host_unlock_component();11001101return ret;1102}11031104static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa, u64 size)1105{1106u64 phys;1107int ret;11081109if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))1110return;11111112host_lock_component();1113guest_lock_component(vm);11141115ret = __check_host_shared_guest(vm, &phys, ipa, size);11161117guest_unlock_component(vm);1118host_unlock_component();11191120WARN_ON(ret && ret != -ENOENT);1121}11221123int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)1124{1125struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);1126u64 ipa = hyp_pfn_to_phys(gfn);1127int ret;11281129if (pkvm_hyp_vm_is_protected(vm))1130return -EPERM;11311132if (prot & ~KVM_PGTABLE_PROT_RWX)1133return -EINVAL;11341135assert_host_shared_guest(vm, ipa, 0);1136guest_lock_component(vm);1137ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);1138guest_unlock_component(vm);11391140return ret;1141}11421143int __pkvm_host_wrprotect_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm)1144{1145u64 size, ipa = hyp_pfn_to_phys(gfn);1146int ret;11471148if (pkvm_hyp_vm_is_protected(vm))1149return -EPERM;11501151ret = __guest_check_transition_size(0, ipa, nr_pages, &size);1152if (ret)1153return ret;11541155assert_host_shared_guest(vm, ipa, size);1156guest_lock_component(vm);1157ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, size);1158guest_unlock_component(vm);11591160return ret;1161}11621163int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct pkvm_hyp_vm *vm)1164{1165u64 size, ipa = hyp_pfn_to_phys(gfn);1166int ret;11671168if (pkvm_hyp_vm_is_protected(vm))1169return -EPERM;11701171ret = __guest_check_transition_size(0, ipa, nr_pages, &size);1172if (ret)1173return ret;11741175assert_host_shared_guest(vm, ipa, size);1176guest_lock_component(vm);1177ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, size, mkold);1178guest_unlock_component(vm);11791180return ret;1181}11821183int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)1184{1185struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);1186u64 ipa = hyp_pfn_to_phys(gfn);11871188if (pkvm_hyp_vm_is_protected(vm))1189return -EPERM;11901191assert_host_shared_guest(vm, ipa, 0);1192guest_lock_component(vm);1193kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);1194guest_unlock_component(vm);11951196return 0;1197}11981199#ifdef CONFIG_NVHE_EL2_DEBUG1200struct pkvm_expected_state {1201enum pkvm_page_state host;1202enum pkvm_page_state hyp;1203enum pkvm_page_state guest[2]; /* [ gfn, gfn + 1 ] */1204};12051206static struct pkvm_expected_state selftest_state;1207static struct hyp_page *selftest_page;12081209static struct pkvm_hyp_vm selftest_vm = {1210.kvm = {1211.arch = {1212.mmu = {1213.arch = &selftest_vm.kvm.arch,1214.pgt = &selftest_vm.pgt,1215},1216},1217},1218};12191220static struct pkvm_hyp_vcpu selftest_vcpu = {1221.vcpu = {1222.arch = {1223.hw_mmu = &selftest_vm.kvm.arch.mmu,1224},1225.kvm = &selftest_vm.kvm,1226},1227};12281229static void init_selftest_vm(void *virt)1230{1231struct hyp_page *p = hyp_virt_to_page(virt);1232int i;12331234selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;1235WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt));12361237for (i = 0; i < pkvm_selftest_pages(); i++) {1238if (p[i].refcount)1239continue;1240p[i].refcount = 1;1241hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));1242}1243}12441245static u64 selftest_ipa(void)1246{1247return BIT(selftest_vm.pgt.ia_bits - 1);1248}12491250static void assert_page_state(void)1251{1252void *virt = hyp_page_to_virt(selftest_page);1253u64 size = PAGE_SIZE << selftest_page->order;1254struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu;1255u64 phys = hyp_virt_to_phys(virt);1256u64 ipa[2] = { selftest_ipa(), selftest_ipa() + PAGE_SIZE };1257struct pkvm_hyp_vm *vm;12581259vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);12601261host_lock_component();1262WARN_ON(__host_check_page_state_range(phys, size, selftest_state.host));1263host_unlock_component();12641265hyp_lock_component();1266WARN_ON(__hyp_check_page_state_range(phys, size, selftest_state.hyp));1267hyp_unlock_component();12681269guest_lock_component(&selftest_vm);1270WARN_ON(__guest_check_page_state_range(vm, ipa[0], size, selftest_state.guest[0]));1271WARN_ON(__guest_check_page_state_range(vm, ipa[1], size, selftest_state.guest[1]));1272guest_unlock_component(&selftest_vm);1273}12741275#define assert_transition_res(res, fn, ...) \1276do { \1277WARN_ON(fn(__VA_ARGS__) != res); \1278assert_page_state(); \1279} while (0)12801281void pkvm_ownership_selftest(void *base)1282{1283enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX;1284void *virt = hyp_alloc_pages(&host_s2_pool, 0);1285struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu;1286struct pkvm_hyp_vm *vm = &selftest_vm;1287u64 phys, size, pfn, gfn;12881289WARN_ON(!virt);1290selftest_page = hyp_virt_to_page(virt);1291selftest_page->refcount = 0;1292init_selftest_vm(base);12931294size = PAGE_SIZE << selftest_page->order;1295phys = hyp_virt_to_phys(virt);1296pfn = hyp_phys_to_pfn(phys);1297gfn = hyp_phys_to_pfn(selftest_ipa());12981299selftest_state.host = PKVM_NOPAGE;1300selftest_state.hyp = PKVM_PAGE_OWNED;1301selftest_state.guest[0] = selftest_state.guest[1] = PKVM_NOPAGE;1302assert_page_state();1303assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);1304assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);1305assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);1306assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);1307assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);1308assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);1309assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);1310assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);13111312selftest_state.host = PKVM_PAGE_OWNED;1313selftest_state.hyp = PKVM_NOPAGE;1314assert_transition_res(0, __pkvm_hyp_donate_host, pfn, 1);1315assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);1316assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);1317assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);1318assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);1319assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);13201321selftest_state.host = PKVM_PAGE_SHARED_OWNED;1322selftest_state.hyp = PKVM_PAGE_SHARED_BORROWED;1323assert_transition_res(0, __pkvm_host_share_hyp, pfn);1324assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);1325assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);1326assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);1327assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);1328assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);1329assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);13301331assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);1332assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);1333hyp_unpin_shared_mem(virt, virt + size);1334WARN_ON(hyp_page_count(virt) != 1);1335assert_transition_res(-EBUSY, __pkvm_host_unshare_hyp, pfn);1336assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);1337assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);1338assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);1339assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);1340assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);1341assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);13421343hyp_unpin_shared_mem(virt, virt + size);1344assert_page_state();1345WARN_ON(hyp_page_count(virt));13461347selftest_state.host = PKVM_PAGE_OWNED;1348selftest_state.hyp = PKVM_NOPAGE;1349assert_transition_res(0, __pkvm_host_unshare_hyp, pfn);13501351selftest_state.host = PKVM_PAGE_SHARED_OWNED;1352selftest_state.hyp = PKVM_NOPAGE;1353assert_transition_res(0, __pkvm_host_share_ffa, pfn, 1);1354assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);1355assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);1356assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);1357assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);1358assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);1359assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);1360assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);1361assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);13621363selftest_state.host = PKVM_PAGE_OWNED;1364selftest_state.hyp = PKVM_NOPAGE;1365assert_transition_res(0, __pkvm_host_unshare_ffa, pfn, 1);1366assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);13671368selftest_state.host = PKVM_PAGE_SHARED_OWNED;1369selftest_state.guest[0] = PKVM_PAGE_SHARED_BORROWED;1370assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);1371assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);1372assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);1373assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);1374assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);1375assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);1376assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);1377assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);13781379selftest_state.guest[1] = PKVM_PAGE_SHARED_BORROWED;1380assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);1381WARN_ON(hyp_virt_to_page(virt)->host_share_guest_count != 2);13821383selftest_state.guest[0] = PKVM_NOPAGE;1384assert_transition_res(0, __pkvm_host_unshare_guest, gfn, 1, vm);13851386selftest_state.guest[1] = PKVM_NOPAGE;1387selftest_state.host = PKVM_PAGE_OWNED;1388assert_transition_res(0, __pkvm_host_unshare_guest, gfn + 1, 1, vm);13891390selftest_state.host = PKVM_NOPAGE;1391selftest_state.hyp = PKVM_PAGE_OWNED;1392assert_transition_res(0, __pkvm_host_donate_hyp, pfn, 1);13931394selftest_page->refcount = 1;1395hyp_put_page(&host_s2_pool, virt);1396}1397#endif139813991400