// SPDX-License-Identifier: GPL-2.01/*2* Helper functions for KVM guest address space mapping code3*4* Copyright IBM Corp. 2007, 20255*/67#include <linux/export.h>8#include <linux/mm_types.h>9#include <linux/mmap_lock.h>10#include <linux/mm.h>11#include <linux/hugetlb.h>12#include <linux/swap.h>13#include <linux/leafops.h>14#include <linux/pagewalk.h>15#include <linux/ksm.h>16#include <asm/gmap_helpers.h>17#include <asm/pgtable.h>1819/**20* ptep_zap_softleaf_entry() - discard a software leaf entry.21* @mm: the mm22* @entry: the software leaf entry that needs to be zapped23*24* Discards the given software leaf entry. If the leaf entry was an actual25* swap entry (and not a migration entry, for example), the actual swapped26* page is also discarded from swap.27*/28static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)29{30if (softleaf_is_swap(entry))31dec_mm_counter(mm, MM_SWAPENTS);32else if (softleaf_is_migration(entry))33dec_mm_counter(mm, mm_counter(softleaf_to_folio(entry)));34swap_put_entries_direct(entry, 1);35}3637/**38* gmap_helper_zap_one_page() - discard a page if it was swapped.39* @mm: the mm40* @vmaddr: the userspace virtual address that needs to be discarded41*42* If the given address maps to a swap entry, discard it.43*44* Context: needs to be called while holding the mmap lock.45*/46void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)47{48struct vm_area_struct *vma;49unsigned long pgstev;50spinlock_t *ptl;51pgste_t pgste;52pte_t *ptep;5354mmap_assert_locked(mm);5556/* Find the vm address for the guest address */57vma = vma_lookup(mm, vmaddr);58if (!vma || is_vm_hugetlb_page(vma))59return;6061/* Get pointer to the page table entry */62ptep = get_locked_pte(mm, vmaddr, &ptl);63if (unlikely(!ptep))64return;65if (pte_swap(*ptep)) {66preempt_disable();67pgste = pgste_get_lock(ptep);68pgstev = pgste_val(pgste);6970if ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||71(pgstev & _PGSTE_GPS_ZERO)) {72ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));73pte_clear(mm, vmaddr, ptep);74}7576pgste_set_unlock(ptep, pgste);77preempt_enable();78}79pte_unmap_unlock(ptep, ptl);80}81EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);8283/**84* gmap_helper_discard() - discard user pages in the given range85* @mm: the mm86* @vmaddr: starting userspace address87* @end: end address (first address outside the range)88*89* All userpace pages in the range [@vamddr, @end) are discarded and unmapped.90*91* Context: needs to be called while holding the mmap lock.92*/93void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)94{95struct vm_area_struct *vma;9697mmap_assert_locked(mm);9899while (vmaddr < end) {100vma = find_vma_intersection(mm, vmaddr, end);101if (!vma)102return;103if (!is_vm_hugetlb_page(vma))104zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);105vmaddr = vma->vm_end;106}107}108EXPORT_SYMBOL_GPL(gmap_helper_discard);109110static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,111unsigned long end, struct mm_walk *walk)112{113unsigned long *found_addr = walk->private;114115/* Return 1 of the page is a zeropage. */116if (is_zero_pfn(pte_pfn(*pte))) {117/*118* Shared zeropage in e.g., a FS DAX mapping? We cannot do the119* right thing and likely don't care: FAULT_FLAG_UNSHARE120* currently only works in COW mappings, which is also where121* mm_forbids_zeropage() is checked.122*/123if (!is_cow_mapping(walk->vma->vm_flags))124return -EFAULT;125126*found_addr = addr;127return 1;128}129return 0;130}131132static const struct mm_walk_ops find_zeropage_ops = {133.pte_entry = find_zeropage_pte_entry,134.walk_lock = PGWALK_WRLOCK,135};136137/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages138* @mm: the mm whose zero pages are to be unshared139*140* Unshare all shared zeropages, replacing them by anonymous pages. Note that141* we cannot simply zap all shared zeropages, because this could later142* trigger unexpected userfaultfd missing events.143*144* This must be called after mm->context.allow_cow_sharing was145* set to 0, to avoid future mappings of shared zeropages.146*147* mm contracts with s390, that even if mm were to remove a page table,148* and racing with walk_page_range_vma() calling pte_offset_map_lock()149* would fail, it will never insert a page table containing empty zero150* pages once mm_forbids_zeropage(mm) i.e.151* mm->context.allow_cow_sharing is set to 0.152*/153static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)154{155struct vm_area_struct *vma;156VMA_ITERATOR(vmi, mm, 0);157unsigned long addr;158vm_fault_t fault;159int rc;160161for_each_vma(vmi, vma) {162/*163* We could only look at COW mappings, but it's more future164* proof to catch unexpected zeropages in other mappings and165* fail.166*/167if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))168continue;169addr = vma->vm_start;170171retry:172rc = walk_page_range_vma(vma, addr, vma->vm_end,173&find_zeropage_ops, &addr);174if (rc < 0)175return rc;176else if (!rc)177continue;178179/* addr was updated by find_zeropage_pte_entry() */180fault = handle_mm_fault(vma, addr,181FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,182NULL);183if (fault & VM_FAULT_OOM)184return -ENOMEM;185/*186* See break_ksm(): even after handle_mm_fault() returned 0, we187* must start the lookup from the current address, because188* handle_mm_fault() may back out if there's any difficulty.189*190* VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but191* maybe they could trigger in the future on concurrent192* truncation. In that case, the shared zeropage would be gone193* and we can simply retry and make progress.194*/195cond_resched();196goto retry;197}198199return 0;200}201202/**203* gmap_helper_disable_cow_sharing() - disable all COW sharing204*205* Disable most COW-sharing of memory pages for the whole process:206* (1) Disable KSM and unmerge/unshare any KSM pages.207* (2) Disallow shared zeropages and unshare any zerpages that are mapped.208*209* Not that we currently don't bother with COW-shared pages that are shared210* with parent/child processes due to fork().211*/212int gmap_helper_disable_cow_sharing(void)213{214struct mm_struct *mm = current->mm;215int rc;216217mmap_assert_write_locked(mm);218219if (!mm->context.allow_cow_sharing)220return 0;221222mm->context.allow_cow_sharing = 0;223224/* Replace all shared zeropages by anonymous pages. */225rc = __gmap_helper_unshare_zeropages(mm);226/*227* Make sure to disable KSM (if enabled for the whole process or228* individual VMAs). Note that nothing currently hinders user space229* from re-enabling it.230*/231if (!rc)232rc = ksm_disable(mm);233if (rc)234mm->context.allow_cow_sharing = 1;235return rc;236}237EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);238239240