Path: blob/main/tests/sys/vm/shared_shadow_inval_test.c
39507 views
/*1* Copyright (c) 2021 Dell Inc. or its subsidiaries. All Rights Reserved.2* Copyright (c) 2022 The FreeBSD Foundation3*4* Portions of this software were developed by Mark Johnston under sponsorship5* from the FreeBSD Foundation.6*7* Redistribution and use in source and binary forms, with or without8* modification, are permitted provided that the following conditions9* are met:10* 1. Redistributions of source code must retain the above copyright11* notice, this list of conditions and the following disclaimer.12* 2. Redistributions in binary form must reproduce the above copyright13* notice, this list of conditions and the following disclaimer in the14* documentation and/or other materials provided with the distribution.15*16* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND17* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE18* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE19* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE20* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL21* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS22* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)23* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT24* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY25* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF26* SUCH DAMAGE.27*/2829/*30* Test behavior when a mapping of a shared shadow vm object is31* invalidated by COW from another mapping. In particular, when32* minherit(INHERT_SHARE) is applied to a COW mapping, a subsequently33* forked child process will share the parent's shadow object. Thus,34* pages already mapped into one sharing process may be written from35* another, triggering a copy into the shadow object. The VM system36* expects that a fully shadowed page is unmapped, but at one point the37* use of a shared shadow object could break this invariant.38*39* This is a regression test for an issue isolated by [email protected]40* from an issue detected by stress2's collapse.sh by [email protected].41* The issue became CVE-2021-29626.42*43* This file is written as an ATF test suite but may be compiled as a44* standalone program with -DSTANDALONE (and optionally -DDEBUG).45*/4647#include <sys/param.h>48#include <sys/mman.h>49#include <sys/procctl.h>50#include <sys/resource.h>51#include <sys/sysctl.h>52#include <sys/wait.h>5354#include <machine/atomic.h>5556#include <err.h>57#include <errno.h>58#include <stdbool.h>59#include <stddef.h>60#include <stdio.h>61#include <stdlib.h>62#include <unistd.h>6364#ifdef STANDALONE65#define ATF_REQUIRE(x) do { \66if (!(x)) \67errx(1, "%s", #x); \68} while (0)69#else70#include <atf-c.h>71#endif7273#ifdef DEBUG74#define dprintf(...) printf(__VA_ARGS__)75#else76#define dprintf(...)77#endif7879#define DEPTH 58081#define FLAG_COLLAPSE 0x182#define FLAG_BLOCK_XFER 0x283#define FLAG_FULLMOD 0x484#define FLAG_MASK (FLAG_COLLAPSE | FLAG_BLOCK_XFER | FLAG_FULLMOD)8586struct shared_state {87void *p;88size_t len;89size_t modlen;90size_t pagesize;91bool collapse;92bool block_xfer;93bool lazy_cow;94bool okay;95volatile bool exiting[DEPTH];96volatile bool exit;97volatile bool p3_did_write;98};99100/*101* Program flow. There are three or four processes that are descendants102* of the process running the test (P0), where arrows go from parents to103* children, and thicker arrows indicate sharing a certain memory region104* without COW semantics:105* P0 -> P1 -> P2 => P3106* \=> P4107* The main idea is that P1 maps a memory region, and that region is108* shared with P2/P3, but with COW semantics. When P3 modifies the109* memory, P2 ought to see that modification. P4 optionally exists to110* defeat a COW optimization.111*/112113#define child_err(...) do { \114ss->exit = true; \115err(1, __VA_ARGS__); \116} while (0)117118#define child_errx(...) do { \119ss->exit = true; \120errx(1, __VA_ARGS__); \121} while (0)122123#define SLEEP_TIME_US 1000124125static void child(struct shared_state *ss, int depth);126127static pid_t128child_fork(struct shared_state *ss, int depth)129{130pid_t pid = fork();131if (pid == -1)132child_err("fork");133else if (pid == 0)134child(ss, depth);135return pid;136}137138static void139child_fault(struct shared_state *ss)140{141size_t i;142143for (i = 0; i < ss->len; i += ss->pagesize)144(void)((volatile char *)ss->p)[i];145}146147static void148child_write(struct shared_state *ss, int val, size_t len)149{150size_t i;151152for (i = 0; i < len; i += ss->pagesize)153((int *)ss->p)[i / sizeof(int)] = val;154atomic_thread_fence_rel();155}156157static void158child_wait_p3_write(struct shared_state *ss)159{160while (!ss->p3_did_write) {161if (ss->exit)162exit(1);163usleep(SLEEP_TIME_US);164}165atomic_thread_fence_acq();166}167168static void169child_verify(struct shared_state *ss, int depth, int newval, int oldval)170{171size_t i;172int expectval, foundval;173174for (i = 0; i < ss->len; i += ss->pagesize) {175expectval = i < ss->modlen ? newval : oldval;176foundval = ((int *)ss->p)[i / sizeof(int)];177if (foundval == expectval)178continue;179child_errx("P%d saw %d but expected %d, %d was the old value",180depth, foundval, expectval, oldval);181}182}183184static void185child(struct shared_state *ss, int depth)186{187pid_t mypid, oldval, pid;188189if (depth < 1 || depth >= DEPTH)190child_errx("Bad depth %d", depth);191mypid = getpid();192dprintf("P%d (pid %d) started\n", depth, mypid);193switch (depth) {194case 1:195/* Shared memory undergoing test. */196ss->p = mmap(NULL, ss->len, PROT_READ | PROT_WRITE,197MAP_SHARED | MAP_ANON, -1, 0);198if (ss->p == MAP_FAILED)199child_err("mmap");200201/* P1 stamps the shared memory. */202child_write(ss, mypid, ss->len);203if (!ss->lazy_cow) {204if (mlock(ss->p, ss->len) == -1)205child_err("mlock");206if (mprotect(ss->p, ss->len, PROT_READ) == -1)207child_err("mprotect");208}209if (ss->block_xfer) {210/*211* P4 is forked so that its existence blocks a page COW212* path where the page is simply transferred between213* objects, rather than being copied.214*/215child_fork(ss, 4);216}217/*218* P1 specifies that modifications from its child processes not219* be shared with P1. Child process reads can be serviced from220* pages in P1's object, but writes must be COW'd.221*/222if (minherit(ss->p, ss->len, INHERIT_COPY) != 0)223child_err("minherit");224/* Fork P2. */225child_fork(ss, depth + 1);226/* P1 and P4 wait for P3's writes before exiting. */227child_wait_p3_write(ss);228child_verify(ss, depth, mypid, mypid);229if (!ss->collapse) {230/* Hang around to prevent collapse. */231while (!ss->exit)232usleep(SLEEP_TIME_US);233}234/* Exit so the P2 -> P1/P4 shadow chain can collapse. */235break;236case 2:237/*238* P2 now specifies that modifications from its child processes239* be shared. P2 and P3 will share a shadow object.240*/241if (minherit(ss->p, ss->len, INHERIT_SHARE) != 0)242child_err("minherit");243244/*245* P2 faults a page in P1's object before P1 exits and the246* shadow chain is collapsed. This may be redundant if the247* (read-only) mappings were copied by fork(), but it doesn't248* hurt.249*/250child_fault(ss);251oldval = atomic_load_acq_int(ss->p);252253/* Fork P3. */254pid = child_fork(ss, depth + 1);255if (ss->collapse) {256/* Wait for P1 and P4 to exit, triggering collapse. */257while (!ss->exiting[1] ||258(ss->block_xfer && !ss->exiting[4]))259usleep(SLEEP_TIME_US);260/*261* This is racy, just guess at how long it may take262* them to finish exiting.263*/264usleep(100 * 1000);265}266/* P2 waits for P3's modification. */267child_wait_p3_write(ss);268child_verify(ss, depth, pid, oldval);269ss->okay = true;270ss->exit = true;271break;272case 3:273/*274* Use mlock()+mprotect() to trigger the COW. This275* exercises a different COW handler than the one used276* for lazy faults.277*/278if (!ss->lazy_cow) {279if (mlock(ss->p, ss->len) == -1)280child_err("mlock");281if (mprotect(ss->p, ss->len, PROT_READ | PROT_WRITE) ==282-1)283child_err("mprotect");284}285286/*287* P3 writes the memory. A page is faulted into the shared288* P2/P3 shadow object. P2's mapping of the page in P1's289* object must now be shot down, or else P2 will wrongly290* continue to have that page mapped.291*/292child_write(ss, mypid, ss->modlen);293ss->p3_did_write = true;294dprintf("P3 (pid %d) wrote its pid\n", mypid);295break;296case 4:297/* Just hang around until P3 is done writing. */298oldval = atomic_load_acq_int(ss->p);299child_wait_p3_write(ss);300child_verify(ss, depth, oldval, oldval);301break;302default:303child_errx("Bad depth %d", depth);304}305306dprintf("P%d (pid %d) exiting\n", depth, mypid);307ss->exiting[depth] = true;308exit(0);309}310311static void312do_one_shared_shadow_inval(bool lazy_cow, size_t pagesize, size_t len,313unsigned int flags)314{315struct shared_state *ss;316pid_t pid;317int status;318319pid = getpid();320321dprintf("P0 (pid %d) %s(collapse=%d, block_xfer=%d, full_mod=%d)\n",322pid, __func__, (int)collapse, (int)block_xfer, (int)full_mod);323324ATF_REQUIRE(procctl(P_PID, pid, PROC_REAP_ACQUIRE, NULL) == 0);325326/* Shared memory for coordination. */327ss = mmap(NULL, sizeof(*ss), PROT_READ | PROT_WRITE,328MAP_SHARED | MAP_ANON, -1, 0);329ATF_REQUIRE(ss != MAP_FAILED);330331ss->len = len;332ss->modlen = (flags & FLAG_FULLMOD) ? ss->len : ss->len / 2;333ss->pagesize = pagesize;334ss->collapse = (flags & FLAG_COLLAPSE) != 0;335ss->block_xfer = (flags & FLAG_BLOCK_XFER) != 0;336ss->lazy_cow = lazy_cow;337338pid = fork();339ATF_REQUIRE(pid != -1);340if (pid == 0)341child(ss, 1);342343/* Wait for all descendants to exit. */344do {345pid = wait(&status);346ATF_REQUIRE(WIFEXITED(status));347} while (pid != -1 || errno != ECHILD);348349atomic_thread_fence_acq();350ATF_REQUIRE(ss->okay);351352ATF_REQUIRE(munmap(ss, sizeof(*ss)) == 0);353ATF_REQUIRE(procctl(P_PID, getpid(), PROC_REAP_RELEASE, NULL) == 0);354}355356static void357do_shared_shadow_inval(bool lazy_cow)358{359size_t largepagesize, pagesize, pagesizes[MAXPAGESIZES], sysctllen;360361sysctllen = sizeof(pagesizes);362ATF_REQUIRE(sysctlbyname("hw.pagesizes", pagesizes, &sysctllen, NULL,3630) == 0);364ATF_REQUIRE(sysctllen >= sizeof(size_t));365366pagesize = pagesizes[0];367largepagesize = MAXPAGESIZES >= 2 &&368sysctllen >= 2 * sizeof(size_t) && pagesizes[1] != 0 ?369pagesizes[1] : 2 * 1024 * 1024;370371for (unsigned int i = 0; i <= FLAG_MASK; i++) {372do_one_shared_shadow_inval(lazy_cow, pagesize,373pagesize, i);374do_one_shared_shadow_inval(lazy_cow, pagesize,3752 * pagesize, i);376do_one_shared_shadow_inval(lazy_cow, pagesize,377largepagesize - pagesize, i);378do_one_shared_shadow_inval(lazy_cow, pagesize,379largepagesize, i);380do_one_shared_shadow_inval(lazy_cow, pagesize,381largepagesize + pagesize, i);382}383}384385static void386do_shared_shadow_inval_eager(void)387{388struct rlimit rl;389390rl.rlim_cur = rl.rlim_max = RLIM_INFINITY;391ATF_REQUIRE(setrlimit(RLIMIT_MEMLOCK, &rl) == 0);392393do_shared_shadow_inval(false);394}395396static void397do_shared_shadow_inval_lazy(void)398{399do_shared_shadow_inval(true);400}401402#ifdef STANDALONE403int404main(void)405{406do_shared_shadow_inval_lazy();407do_shared_shadow_inval_eager();408printf("pass\n");409}410#else411ATF_TC_WITHOUT_HEAD(shared_shadow_inval__lazy_cow);412ATF_TC_BODY(shared_shadow_inval__lazy_cow, tc)413{414do_shared_shadow_inval_lazy();415}416417ATF_TC(shared_shadow_inval__eager_cow);418ATF_TC_HEAD(shared_shadow_inval__eager_cow, tc)419{420/* Needed to raise the mlock() limit. */421atf_tc_set_md_var(tc, "require.user", "root");422}423ATF_TC_BODY(shared_shadow_inval__eager_cow, tc)424{425do_shared_shadow_inval_eager();426}427428ATF_TP_ADD_TCS(tp)429{430ATF_TP_ADD_TC(tp, shared_shadow_inval__lazy_cow);431ATF_TP_ADD_TC(tp, shared_shadow_inval__eager_cow);432return (atf_no_error());433}434#endif /* !STANDALONE */435436437