// SPDX-License-Identifier: GPL-2.01/*2* Helper functions for KVM guest address space mapping code3*4* Copyright IBM Corp. 2007, 20255*/67#include <linux/export.h>8#include <linux/mm_types.h>9#include <linux/mmap_lock.h>10#include <linux/mm.h>11#include <linux/hugetlb.h>12#include <linux/swap.h>13#include <linux/swapops.h>14#include <linux/pagewalk.h>15#include <linux/ksm.h>16#include <asm/gmap_helpers.h>1718/**19* ptep_zap_swap_entry() - discard a swap entry.20* @mm: the mm21* @entry: the swap entry that needs to be zapped22*23* Discards the given swap entry. If the swap entry was an actual swap24* entry (and not a migration entry, for example), the actual swapped25* page is also discarded from swap.26*/27static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)28{29if (!non_swap_entry(entry))30dec_mm_counter(mm, MM_SWAPENTS);31else if (is_migration_entry(entry))32dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));33free_swap_and_cache(entry);34}3536/**37* gmap_helper_zap_one_page() - discard a page if it was swapped.38* @mm: the mm39* @vmaddr: the userspace virtual address that needs to be discarded40*41* If the given address maps to a swap entry, discard it.42*43* Context: needs to be called while holding the mmap lock.44*/45void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)46{47struct vm_area_struct *vma;48spinlock_t *ptl;49pte_t *ptep;5051mmap_assert_locked(mm);5253/* Find the vm address for the guest address */54vma = vma_lookup(mm, vmaddr);55if (!vma || is_vm_hugetlb_page(vma))56return;5758/* Get pointer to the page table entry */59ptep = get_locked_pte(mm, vmaddr, &ptl);60if (unlikely(!ptep))61return;62if (pte_swap(*ptep))63ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));64pte_unmap_unlock(ptep, ptl);65}66EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);6768/**69* gmap_helper_discard() - discard user pages in the given range70* @mm: the mm71* @vmaddr: starting userspace address72* @end: end address (first address outside the range)73*74* All userpace pages in the range [@vamddr, @end) are discarded and unmapped.75*76* Context: needs to be called while holding the mmap lock.77*/78void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)79{80struct vm_area_struct *vma;8182mmap_assert_locked(mm);8384while (vmaddr < end) {85vma = find_vma_intersection(mm, vmaddr, end);86if (!vma)87return;88if (!is_vm_hugetlb_page(vma))89zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);90vmaddr = vma->vm_end;91}92}93EXPORT_SYMBOL_GPL(gmap_helper_discard);9495static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,96unsigned long end, struct mm_walk *walk)97{98unsigned long *found_addr = walk->private;99100/* Return 1 of the page is a zeropage. */101if (is_zero_pfn(pte_pfn(*pte))) {102/*103* Shared zeropage in e.g., a FS DAX mapping? We cannot do the104* right thing and likely don't care: FAULT_FLAG_UNSHARE105* currently only works in COW mappings, which is also where106* mm_forbids_zeropage() is checked.107*/108if (!is_cow_mapping(walk->vma->vm_flags))109return -EFAULT;110111*found_addr = addr;112return 1;113}114return 0;115}116117static const struct mm_walk_ops find_zeropage_ops = {118.pte_entry = find_zeropage_pte_entry,119.walk_lock = PGWALK_WRLOCK,120};121122/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages123* @mm: the mm whose zero pages are to be unshared124*125* Unshare all shared zeropages, replacing them by anonymous pages. Note that126* we cannot simply zap all shared zeropages, because this could later127* trigger unexpected userfaultfd missing events.128*129* This must be called after mm->context.allow_cow_sharing was130* set to 0, to avoid future mappings of shared zeropages.131*132* mm contracts with s390, that even if mm were to remove a page table,133* and racing with walk_page_range_vma() calling pte_offset_map_lock()134* would fail, it will never insert a page table containing empty zero135* pages once mm_forbids_zeropage(mm) i.e.136* mm->context.allow_cow_sharing is set to 0.137*/138static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)139{140struct vm_area_struct *vma;141VMA_ITERATOR(vmi, mm, 0);142unsigned long addr;143vm_fault_t fault;144int rc;145146for_each_vma(vmi, vma) {147/*148* We could only look at COW mappings, but it's more future149* proof to catch unexpected zeropages in other mappings and150* fail.151*/152if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))153continue;154addr = vma->vm_start;155156retry:157rc = walk_page_range_vma(vma, addr, vma->vm_end,158&find_zeropage_ops, &addr);159if (rc < 0)160return rc;161else if (!rc)162continue;163164/* addr was updated by find_zeropage_pte_entry() */165fault = handle_mm_fault(vma, addr,166FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,167NULL);168if (fault & VM_FAULT_OOM)169return -ENOMEM;170/*171* See break_ksm(): even after handle_mm_fault() returned 0, we172* must start the lookup from the current address, because173* handle_mm_fault() may back out if there's any difficulty.174*175* VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but176* maybe they could trigger in the future on concurrent177* truncation. In that case, the shared zeropage would be gone178* and we can simply retry and make progress.179*/180cond_resched();181goto retry;182}183184return 0;185}186187/**188* gmap_helper_disable_cow_sharing() - disable all COW sharing189*190* Disable most COW-sharing of memory pages for the whole process:191* (1) Disable KSM and unmerge/unshare any KSM pages.192* (2) Disallow shared zeropages and unshare any zerpages that are mapped.193*194* Not that we currently don't bother with COW-shared pages that are shared195* with parent/child processes due to fork().196*/197int gmap_helper_disable_cow_sharing(void)198{199struct mm_struct *mm = current->mm;200int rc;201202mmap_assert_write_locked(mm);203204if (!mm->context.allow_cow_sharing)205return 0;206207mm->context.allow_cow_sharing = 0;208209/* Replace all shared zeropages by anonymous pages. */210rc = __gmap_helper_unshare_zeropages(mm);211/*212* Make sure to disable KSM (if enabled for the whole process or213* individual VMAs). Note that nothing currently hinders user space214* from re-enabling it.215*/216if (!rc)217rc = ksm_disable(mm);218if (rc)219mm->context.allow_cow_sharing = 1;220return rc;221}222EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);223224225