#include <sys/cdefs.h>
#include "opt_vm.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/counter.h>
#include <sys/domainset.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/sleepqueue.h>
#include <sys/sbuf.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_param.h>
#include <vm/vm_domainset.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/vm_pager.h>
#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
#include <vm/vm_extern.h>
#include <vm/vm_dumpset.h>
#include <vm/uma.h>
#include <vm/uma_int.h>
#include <machine/md_var.h>
struct vm_domain vm_dom[MAXMEMDOM];
DPCPU_DEFINE_STATIC(struct vm_batchqueue, pqbatch[MAXMEMDOM][PQ_COUNT]);
struct mtx_padalign __exclusive_cache_line vm_domainset_lock;
domainset_t __exclusive_cache_line vm_min_domains;
domainset_t __exclusive_cache_line vm_severe_domains;
static int vm_min_waiters;
static int vm_severe_waiters;
static int vm_pageproc_waiters;
static SYSCTL_NODE(_vm_stats, OID_AUTO, page, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"VM page statistics");
static COUNTER_U64_DEFINE_EARLY(pqstate_commit_retries);
SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, pqstate_commit_retries,
CTLFLAG_RD, &pqstate_commit_retries,
"Number of failed per-page atomic queue state updates");
static COUNTER_U64_DEFINE_EARLY(queue_ops);
SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, queue_ops,
CTLFLAG_RD, &queue_ops,
"Number of batched queue operations");
static COUNTER_U64_DEFINE_EARLY(queue_nops);
SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, queue_nops,
CTLFLAG_RD, &queue_nops,
"Number of batched queue operations with no effects");
static unsigned long nofreeq_size;
SYSCTL_ULONG(_vm_stats_page, OID_AUTO, nofreeq_size, CTLFLAG_RD,
&nofreeq_size, 0,
"Size of the nofree queue");
vm_page_t bogus_page;
vm_page_t vm_page_array;
long vm_page_array_size;
long first_page;
struct bitset *vm_page_dump;
long vm_page_dump_pages;
static TAILQ_HEAD(, vm_page) blacklist_head;
static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD |
CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages");
static uma_zone_t fakepg_zone;
static void vm_page_alloc_check(vm_page_t m);
static vm_page_t vm_page_alloc_nofree_domain(int domain, int req);
static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m,
vm_pindex_t pindex, const char *wmesg, int allocflags, bool locked);
static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
static void vm_page_enqueue(vm_page_t m, uint8_t queue);
static bool vm_page_free_prep(vm_page_t m);
static void vm_page_free_toq(vm_page_t m);
static void vm_page_init(void *dummy);
static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object);
static void vm_page_mvqueue(vm_page_t m, const uint8_t queue,
const uint16_t nflag);
static int vm_page_reclaim_run(int req_class, int domain, u_long npages,
vm_page_t m_run, vm_paddr_t high);
static void vm_page_release_toq(vm_page_t m, uint8_t nqueue, bool noreuse);
static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object,
int req);
static int vm_page_zone_import(void *arg, void **store, int cnt, int domain,
int flags);
static void vm_page_zone_release(void *arg, void **store, int cnt);
SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL);
static void
vm_page_init(void *dummy)
{
fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
bogus_page = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_NOFREE);
}
static int pgcache_zone_max_pcpu;
SYSCTL_INT(_vm, OID_AUTO, pgcache_zone_max_pcpu,
CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pgcache_zone_max_pcpu, 0,
"Per-CPU page cache size");
static void
vm_page_init_cache_zones(void *dummy __unused)
{
struct vm_domain *vmd;
struct vm_pgcache *pgcache;
int cache, domain, maxcache, pool;
TUNABLE_INT_FETCH("vm.pgcache_zone_max_pcpu", &pgcache_zone_max_pcpu);
maxcache = pgcache_zone_max_pcpu * mp_ncpus;
for (domain = 0; domain < vm_ndomains; domain++) {
vmd = VM_DOMAIN(domain);
for (pool = 0; pool < VM_NFREEPOOL; pool++) {
#ifdef VM_FREEPOOL_LAZYINIT
if (pool == VM_FREEPOOL_LAZYINIT)
continue;
#endif
pgcache = &vmd->vmd_pgcache[pool];
pgcache->domain = domain;
pgcache->pool = pool;
pgcache->zone = uma_zcache_create("vm pgcache",
PAGE_SIZE, NULL, NULL, NULL, NULL,
vm_page_zone_import, vm_page_zone_release, pgcache,
UMA_ZONE_VM);
cache = maxcache != 0 ? maxcache :
vmd->vmd_page_count / 1000;
uma_zone_set_maxcache(pgcache->zone, cache);
}
}
}
SYSINIT(vm_page2, SI_SUB_VM_CONF, SI_ORDER_ANY, vm_page_init_cache_zones, NULL);
#if PAGE_SIZE == 32768
#ifdef CTASSERT
CTASSERT(sizeof(u_long) >= 8);
#endif
#endif
void
vm_set_page_size(void)
{
if (vm_cnt.v_page_size == 0)
vm_cnt.v_page_size = PAGE_SIZE;
if (((vm_cnt.v_page_size - 1) & vm_cnt.v_page_size) != 0)
panic("vm_set_page_size: page size not a power of two");
}
static vm_paddr_t
vm_page_blacklist_next(char **list, char *end)
{
vm_paddr_t bad;
char *cp, *pos;
if (list == NULL || *list == NULL)
return (0);
if (**list =='\0') {
*list = NULL;
return (0);
}
if (end == NULL)
end = *list + strlen(*list);
if (*end != '\0') {
if (*end == '\n' || *end == ' ' || *end == ',')
*end = '\0';
else {
printf("Blacklist not terminated, skipping\n");
*list = NULL;
return (0);
}
}
for (pos = *list; *pos != '\0'; pos = cp) {
bad = strtoq(pos, &cp, 0);
if (*cp == '\0' || *cp == ' ' || *cp == ',' || *cp == '\n') {
if (bad == 0) {
if (++cp < end)
continue;
else
break;
}
} else
break;
if (*cp == '\0' || ++cp >= end)
*list = NULL;
else
*list = cp;
return (trunc_page(bad));
}
printf("Garbage in RAM blacklist, skipping\n");
*list = NULL;
return (0);
}
bool
vm_page_blacklist_add(vm_paddr_t pa, bool verbose)
{
struct vm_domain *vmd;
vm_page_t m;
bool found;
m = vm_phys_paddr_to_vm_page(pa);
if (m == NULL)
return (true);
vmd = VM_DOMAIN(vm_phys_domain(pa));
vm_domain_free_lock(vmd);
found = vm_phys_unfree_page(pa);
vm_domain_free_unlock(vmd);
if (found) {
vm_domain_freecnt_inc(vmd, -1);
TAILQ_INSERT_TAIL(&blacklist_head, m, plinks.q);
if (verbose)
printf("Skipping page with pa 0x%jx\n", (uintmax_t)pa);
}
return (found);
}
static void
vm_page_blacklist_check(char *list, char *end)
{
vm_paddr_t pa;
char *next;
next = list;
while (next != NULL) {
if ((pa = vm_page_blacklist_next(&next, end)) == 0)
continue;
vm_page_blacklist_add(pa, bootverbose);
}
}
static void
vm_page_blacklist_load(char **list, char **end)
{
void *mod;
u_char *ptr;
u_int len;
mod = NULL;
ptr = NULL;
mod = preload_search_by_type("ram_blacklist");
if (mod != NULL) {
ptr = preload_fetch_addr(mod);
len = preload_fetch_size(mod);
}
if (ptr != NULL && len > 0) {
*list = ptr;
*end = ptr + len - 1;
} else {
*list = NULL;
*end = NULL;
}
return;
}
static int
sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS)
{
vm_page_t m;
struct sbuf sbuf;
int error, first;
first = 1;
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
return (error);
sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
TAILQ_FOREACH(m, &blacklist_head, plinks.q) {
sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",",
(uintmax_t)m->phys_addr);
first = 0;
}
error = sbuf_finish(&sbuf);
sbuf_delete(&sbuf);
return (error);
}
void
vm_page_init_marker(vm_page_t marker, int queue, uint16_t aflags)
{
bzero(marker, sizeof(*marker));
marker->flags = PG_MARKER;
marker->a.flags = aflags;
marker->busy_lock = VPB_CURTHREAD_EXCLUSIVE;
marker->a.queue = queue;
}
static void
vm_page_domain_init(int domain)
{
struct vm_domain *vmd;
struct vm_pagequeue *pq;
int i;
vmd = VM_DOMAIN(domain);
bzero(vmd, sizeof(*vmd));
*__DECONST(const char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) =
"vm inactive pagequeue";
*__DECONST(const char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) =
"vm active pagequeue";
*__DECONST(const char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) =
"vm laundry pagequeue";
*__DECONST(const char **,
&vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) =
"vm unswappable pagequeue";
vmd->vmd_domain = domain;
vmd->vmd_page_count = 0;
vmd->vmd_free_count = 0;
vmd->vmd_segs = 0;
vmd->vmd_oom = false;
vmd->vmd_helper_threads_enabled = true;
for (i = 0; i < PQ_COUNT; i++) {
pq = &vmd->vmd_pagequeues[i];
TAILQ_INIT(&pq->pq_pl);
mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue",
MTX_DEF | MTX_DUPOK);
pq->pq_pdpages = 0;
vm_page_init_marker(&vmd->vmd_markers[i], i, 0);
}
mtx_init(&vmd->vmd_free_mtx, "vm page free queue", NULL, MTX_DEF);
mtx_init(&vmd->vmd_pageout_mtx, "vm pageout lock", NULL, MTX_DEF);
snprintf(vmd->vmd_name, sizeof(vmd->vmd_name), "%d", domain);
vm_page_init_marker(&vmd->vmd_inacthead, PQ_INACTIVE, PGA_ENQUEUED);
TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_INACTIVE].pq_pl,
&vmd->vmd_inacthead, plinks.q);
vm_page_init_marker(&vmd->vmd_clock[0], PQ_ACTIVE, PGA_ENQUEUED);
vm_page_init_marker(&vmd->vmd_clock[1], PQ_ACTIVE, PGA_ENQUEUED);
TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_ACTIVE].pq_pl,
&vmd->vmd_clock[0], plinks.q);
TAILQ_INSERT_TAIL(&vmd->vmd_pagequeues[PQ_ACTIVE].pq_pl,
&vmd->vmd_clock[1], plinks.q);
}
void
vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind, int pool)
{
m->object = NULL;
m->ref_count = 0;
m->busy_lock = VPB_FREED;
m->flags = m->a.flags = 0;
m->phys_addr = pa;
m->a.queue = PQ_NONE;
m->psind = 0;
m->segind = segind;
m->order = VM_NFREEORDER;
m->pool = pool;
m->valid = m->dirty = 0;
pmap_page_init(m);
}
#ifndef PMAP_HAS_PAGE_ARRAY
static vm_paddr_t
vm_page_array_alloc(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t page_range)
{
vm_paddr_t new_end;
*vaddr += PAGE_SIZE;
new_end = trunc_page(end - page_range * sizeof(struct vm_page));
vm_page_array = (vm_page_t)pmap_map(vaddr, new_end, end,
VM_PROT_READ | VM_PROT_WRITE);
vm_page_array_size = page_range;
return (new_end);
}
#endif
vm_offset_t
vm_page_startup(vm_offset_t vaddr)
{
struct vm_phys_seg *seg;
struct vm_domain *vmd;
vm_page_t m;
char *list, *listend;
vm_paddr_t end, high_avail, low_avail, new_end, size;
vm_paddr_t page_range __unused;
vm_paddr_t last_pa, pa, startp, endp;
u_long pagecount;
#if MINIDUMP_PAGE_TRACKING
u_long vm_page_dump_size;
#endif
int biggestone, i, segind;
#ifdef WITNESS
vm_offset_t mapped;
int witness_size;
#endif
#if defined(__i386__) && defined(VM_PHYSSEG_DENSE)
long ii;
#endif
int pool;
#ifdef VM_FREEPOOL_LAZYINIT
int lazyinit;
#endif
vaddr = round_page(vaddr);
vm_phys_early_startup();
biggestone = vm_phys_avail_largest();
end = phys_avail[biggestone+1];
mtx_init(&vm_domainset_lock, "vm domainset lock", NULL, MTX_DEF);
for (i = 0; i < vm_ndomains; i++)
vm_page_domain_init(i);
new_end = end;
#ifdef WITNESS
witness_size = round_page(witness_startup_count());
new_end -= witness_size;
mapped = pmap_map(&vaddr, new_end, new_end + witness_size,
VM_PROT_READ | VM_PROT_WRITE);
bzero((void *)mapped, witness_size);
witness_startup((void *)mapped);
#endif
#if MINIDUMP_PAGE_TRACKING
last_pa = 0;
vm_page_dump_pages = 0;
for (i = 0; dump_avail[i + 1] != 0; i += 2) {
vm_page_dump_pages += howmany(dump_avail[i + 1], PAGE_SIZE) -
dump_avail[i] / PAGE_SIZE;
if (dump_avail[i + 1] > last_pa)
last_pa = dump_avail[i + 1];
}
vm_page_dump_size = round_page(BITSET_SIZE(vm_page_dump_pages));
new_end -= vm_page_dump_size;
vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end,
new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE);
bzero((void *)vm_page_dump, vm_page_dump_size);
#if MINIDUMP_STARTUP_PAGE_TRACKING
for (pa = new_end; pa < end; pa += PAGE_SIZE)
dump_add_page(pa);
#endif
#else
(void)last_pa;
#endif
phys_avail[biggestone + 1] = new_end;
#ifdef __amd64__
pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr);
last_pa = pa + round_page(msgbufsize);
while (pa < last_pa) {
dump_add_page(pa);
pa += PAGE_SIZE;
}
#else
(void)pa;
#endif
#ifdef VM_PHYSSEG_SPARSE
size = phys_avail[1] - phys_avail[0];
#endif
low_avail = phys_avail[0];
high_avail = phys_avail[1];
for (i = 2; phys_avail[i + 1] != 0; i += 2) {
#ifdef VM_PHYSSEG_SPARSE
size += phys_avail[i + 1] - phys_avail[i];
#endif
if (phys_avail[i] < low_avail)
low_avail = phys_avail[i];
if (phys_avail[i + 1] > high_avail)
high_avail = phys_avail[i + 1];
}
for (i = 0; i < vm_phys_nsegs; i++) {
#ifdef VM_PHYSSEG_SPARSE
size += vm_phys_segs[i].end - vm_phys_segs[i].start;
#endif
if (vm_phys_segs[i].start < low_avail)
low_avail = vm_phys_segs[i].start;
if (vm_phys_segs[i].end > high_avail)
high_avail = vm_phys_segs[i].end;
}
first_page = low_avail / PAGE_SIZE;
#ifdef VM_PHYSSEG_DENSE
size = high_avail - low_avail;
#endif
#ifdef PMAP_HAS_PAGE_ARRAY
pmap_page_array_startup(size / PAGE_SIZE);
biggestone = vm_phys_avail_largest();
end = new_end = phys_avail[biggestone + 1];
#else
#ifdef VM_PHYSSEG_DENSE
if (new_end != high_avail)
page_range = size / PAGE_SIZE;
else
#endif
{
page_range = size / (PAGE_SIZE + sizeof(struct vm_page));
if (size % (PAGE_SIZE + sizeof(struct vm_page)) >= PAGE_SIZE) {
if (new_end == high_avail)
high_avail -= PAGE_SIZE;
new_end -= PAGE_SIZE;
}
}
end = new_end;
new_end = vm_page_array_alloc(&vaddr, end, page_range);
#endif
#if VM_NRESERVLEVEL > 0
new_end = vm_reserv_startup(&vaddr, new_end);
#endif
#if MINIDUMP_PAGE_TRACKING && MINIDUMP_STARTUP_PAGE_TRACKING
for (pa = new_end; pa < end; pa += PAGE_SIZE)
dump_add_page(pa);
#endif
phys_avail[biggestone + 1] = new_end;
for (i = 0; phys_avail[i + 1] != 0; i += 2)
vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]);
vm_phys_init();
pool = VM_FREEPOOL_DEFAULT;
#ifdef VM_FREEPOOL_LAZYINIT
lazyinit = 1;
TUNABLE_INT_FETCH("debug.vm.lazy_page_init", &lazyinit);
if (lazyinit)
pool = VM_FREEPOOL_LAZYINIT;
#endif
#if defined(__i386__) && defined(VM_PHYSSEG_DENSE)
for (ii = 0; ii < vm_page_array_size; ii++) {
m = &vm_page_array[ii];
vm_page_init_page(m, (first_page + ii) << PAGE_SHIFT, 0,
VM_FREEPOOL_DEFAULT);
m->flags = PG_FICTITIOUS;
}
#endif
vm_cnt.v_page_count = 0;
for (segind = 0; segind < vm_phys_nsegs; segind++) {
seg = &vm_phys_segs[segind];
startp = seg->start;
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
if (startp >= seg->end)
break;
if (phys_avail[i + 1] < startp)
continue;
if (phys_avail[i] <= startp) {
startp = phys_avail[i + 1];
continue;
}
m = vm_phys_seg_paddr_to_vm_page(seg, startp);
for (endp = MIN(phys_avail[i], seg->end);
startp < endp; startp += PAGE_SIZE, m++) {
vm_page_init_page(m, startp, segind,
VM_FREEPOOL_DEFAULT);
}
}
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
if (seg->end <= phys_avail[i] ||
seg->start >= phys_avail[i + 1])
continue;
startp = MAX(seg->start, phys_avail[i]);
endp = MIN(seg->end, phys_avail[i + 1]);
pagecount = (u_long)atop(endp - startp);
if (pagecount == 0)
continue;
m = vm_phys_seg_paddr_to_vm_page(seg, startp);
vm_page_init_page(m, startp, segind, pool);
if (pool == VM_FREEPOOL_DEFAULT) {
for (u_long j = 1; j < pagecount; j++) {
vm_page_init_page(&m[j],
startp + ptoa((vm_paddr_t)j),
segind, pool);
}
}
vmd = VM_DOMAIN(seg->domain);
vm_domain_free_lock(vmd);
vm_phys_enqueue_contig(m, pool, pagecount);
vm_domain_free_unlock(vmd);
vm_domain_freecnt_inc(vmd, pagecount);
vm_cnt.v_page_count += (u_int)pagecount;
vmd->vmd_page_count += (u_int)pagecount;
vmd->vmd_segs |= 1UL << segind;
}
}
TAILQ_INIT(&blacklist_head);
vm_page_blacklist_load(&list, &listend);
vm_page_blacklist_check(list, listend);
list = kern_getenv("vm.blacklist");
vm_page_blacklist_check(list, NULL);
freeenv(list);
#if VM_NRESERVLEVEL > 0
vm_reserv_init();
#endif
return (vaddr);
}
void
vm_page_reference(vm_page_t m)
{
vm_page_aflag_set(m, PGA_REFERENCED);
}
static bool
vm_page_trybusy(vm_page_t m, int allocflags)
{
if ((allocflags & (VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY)) != 0)
return (vm_page_trysbusy(m));
else
return (vm_page_tryxbusy(m));
}
static inline bool
vm_page_tryacquire(vm_page_t m, int allocflags)
{
bool locked;
locked = vm_page_trybusy(m, allocflags);
if (locked && (allocflags & VM_ALLOC_WIRED) != 0)
vm_page_wire(m);
return (locked);
}
bool
vm_page_busy_acquire(vm_page_t m, int allocflags)
{
vm_object_t obj;
bool locked;
obj = atomic_load_ptr(&m->object);
for (;;) {
if (vm_page_tryacquire(m, allocflags))
return (true);
if ((allocflags & VM_ALLOC_NOWAIT) != 0)
return (false);
if (obj != NULL)
locked = VM_OBJECT_WOWNED(obj);
else
locked = false;
MPASS(locked || vm_page_wired(m));
if (_vm_page_busy_sleep(obj, m, m->pindex, "vmpba", allocflags,
locked) && locked)
VM_OBJECT_WLOCK(obj);
if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
return (false);
KASSERT(m->object == obj || m->object == NULL,
("vm_page_busy_acquire: page %p does not belong to %p",
m, obj));
}
}
void
vm_page_busy_downgrade(vm_page_t m)
{
u_int x;
vm_page_assert_xbusied(m);
x = vm_page_busy_fetch(m);
for (;;) {
if (atomic_fcmpset_rel_int(&m->busy_lock,
&x, VPB_SHARERS_WORD(1)))
break;
}
if ((x & VPB_BIT_WAITERS) != 0)
wakeup(m);
}
int
vm_page_busy_tryupgrade(vm_page_t m)
{
u_int ce, x;
vm_page_assert_sbusied(m);
x = vm_page_busy_fetch(m);
ce = VPB_CURTHREAD_EXCLUSIVE;
for (;;) {
if (VPB_SHARERS(x) > 1)
return (0);
KASSERT((x & ~VPB_BIT_WAITERS) == VPB_SHARERS_WORD(1),
("vm_page_busy_tryupgrade: invalid lock state"));
if (!atomic_fcmpset_acq_int(&m->busy_lock, &x,
ce | (x & VPB_BIT_WAITERS)))
continue;
return (1);
}
}
int
vm_page_sbusied(vm_page_t m)
{
u_int x;
x = vm_page_busy_fetch(m);
return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED);
}
void
vm_page_sunbusy(vm_page_t m)
{
u_int x;
vm_page_assert_sbusied(m);
x = vm_page_busy_fetch(m);
for (;;) {
KASSERT(x != VPB_FREED,
("vm_page_sunbusy: Unlocking freed page."));
if (VPB_SHARERS(x) > 1) {
if (atomic_fcmpset_int(&m->busy_lock, &x,
x - VPB_ONE_SHARER))
break;
continue;
}
KASSERT((x & ~VPB_BIT_WAITERS) == VPB_SHARERS_WORD(1),
("vm_page_sunbusy: invalid lock state"));
if (!atomic_fcmpset_rel_int(&m->busy_lock, &x, VPB_UNBUSIED))
continue;
if ((x & VPB_BIT_WAITERS) == 0)
break;
wakeup(m);
break;
}
}
bool
vm_page_busy_sleep(vm_page_t m, const char *wmesg, int allocflags)
{
vm_object_t obj;
obj = m->object;
VM_OBJECT_ASSERT_LOCKED(obj);
return (_vm_page_busy_sleep(obj, m, m->pindex, wmesg, allocflags,
true));
}
void
vm_page_busy_sleep_unlocked(vm_object_t obj, vm_page_t m, vm_pindex_t pindex,
const char *wmesg, int allocflags)
{
VM_OBJECT_ASSERT_UNLOCKED(obj);
(void)_vm_page_busy_sleep(obj, m, pindex, wmesg, allocflags, false);
}
static bool
_vm_page_busy_sleep(vm_object_t obj, vm_page_t m, vm_pindex_t pindex,
const char *wmesg, int allocflags, bool locked)
{
bool xsleep;
u_int x;
if (obj != NULL && vm_object_busied(obj)) {
if (locked)
VM_OBJECT_DROP(obj);
vm_object_busy_wait(obj, wmesg);
return (true);
}
if (!vm_page_busied(m))
return (false);
xsleep = (allocflags & (VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY)) != 0;
sleepq_lock(m);
x = vm_page_busy_fetch(m);
do {
if (x == VPB_UNBUSIED ||
(xsleep && (x & VPB_BIT_SHARED) != 0) ||
m->object != obj || m->pindex != pindex) {
sleepq_release(m);
return (false);
}
if ((x & VPB_BIT_WAITERS) != 0)
break;
} while (!atomic_fcmpset_int(&m->busy_lock, &x, x | VPB_BIT_WAITERS));
if (locked)
VM_OBJECT_DROP(obj);
DROP_GIANT();
sleepq_add(m, NULL, wmesg, 0, 0);
sleepq_wait(m, PVM);
PICKUP_GIANT();
return (true);
}
int
vm_page_trysbusy(vm_page_t m)
{
vm_object_t obj;
u_int x;
obj = m->object;
x = vm_page_busy_fetch(m);
for (;;) {
if ((x & VPB_BIT_SHARED) == 0)
return (0);
if (obj != NULL && vm_object_busied(obj))
return (0);
if (atomic_fcmpset_acq_int(&m->busy_lock, &x,
x + VPB_ONE_SHARER))
break;
}
obj = m->object;
if (obj != NULL && vm_object_busied(obj)) {
vm_page_sunbusy(m);
return (0);
}
return (1);
}
int
vm_page_tryxbusy(vm_page_t m)
{
vm_object_t obj;
if (atomic_cmpset_acq_int(&m->busy_lock, VPB_UNBUSIED,
VPB_CURTHREAD_EXCLUSIVE) == 0)
return (0);
obj = m->object;
if (obj != NULL && vm_object_busied(obj)) {
vm_page_xunbusy(m);
return (0);
}
return (1);
}
static void
vm_page_xunbusy_hard_tail(vm_page_t m)
{
atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
wakeup(m);
}
void
vm_page_xunbusy_hard(vm_page_t m)
{
vm_page_assert_xbusied(m);
vm_page_xunbusy_hard_tail(m);
}
void
vm_page_xunbusy_hard_unchecked(vm_page_t m)
{
vm_page_assert_xbusied_unchecked(m);
vm_page_xunbusy_hard_tail(m);
}
static void
vm_page_busy_free(vm_page_t m)
{
u_int x;
atomic_thread_fence_rel();
x = atomic_swap_int(&m->busy_lock, VPB_FREED);
if ((x & VPB_BIT_WAITERS) != 0)
wakeup(m);
}
void
vm_page_unhold_pages(vm_page_t *ma, int count)
{
for (; count != 0; count--) {
vm_page_unwire(*ma, PQ_ACTIVE);
ma++;
}
}
vm_page_t
PHYS_TO_VM_PAGE(vm_paddr_t pa)
{
vm_page_t m;
#ifdef VM_PHYSSEG_SPARSE
m = vm_phys_paddr_to_vm_page(pa);
if (m == NULL)
m = vm_phys_fictitious_to_vm_page(pa);
return (m);
#elif defined(VM_PHYSSEG_DENSE)
long pi;
pi = atop(pa);
if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
m = &vm_page_array[pi - first_page];
return (m);
}
return (vm_phys_fictitious_to_vm_page(pa));
#else
#error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
#endif
}
vm_page_t
vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr)
{
vm_page_t m;
m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO);
vm_page_initfake(m, paddr, memattr);
return (m);
}
void
vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
{
if ((m->flags & PG_FICTITIOUS) != 0) {
goto memattr;
}
m->phys_addr = paddr;
m->a.queue = PQ_NONE;
m->flags = PG_FICTITIOUS;
m->oflags = VPO_UNMANAGED;
m->busy_lock = VPB_CURTHREAD_EXCLUSIVE;
m->ref_count = 1;
pmap_page_init(m);
memattr:
pmap_page_set_memattr(m, memattr);
}
void
vm_page_putfake(vm_page_t m)
{
KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m));
KASSERT((m->flags & PG_FICTITIOUS) != 0,
("vm_page_putfake: bad page %p", m));
vm_page_assert_xbusied(m);
vm_page_busy_free(m);
uma_zfree(fakepg_zone, m);
}
void
vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
{
KASSERT((m->flags & PG_FICTITIOUS) != 0,
("vm_page_updatefake: bad page %p", m));
m->phys_addr = paddr;
pmap_page_set_memattr(m, memattr);
}
void
vm_page_free(vm_page_t m)
{
m->flags &= ~PG_ZERO;
vm_page_free_toq(m);
}
void
vm_page_free_zero(vm_page_t m)
{
m->flags |= PG_ZERO;
vm_page_free_toq(m);
}
void
vm_page_readahead_finish(vm_page_t m)
{
KASSERT(!vm_page_none_valid(m), ("%s: %p is invalid", __func__, m));
if ((vm_page_busy_fetch(m) & VPB_BIT_WAITERS) != 0)
vm_page_activate(m);
else
vm_page_deactivate(m);
vm_page_xunbusy_unchecked(m);
}
void
vm_page_free_invalid(vm_page_t m)
{
KASSERT(vm_page_none_valid(m), ("page %p is valid", m));
KASSERT(!pmap_page_is_mapped(m), ("page %p is mapped", m));
KASSERT(m->object != NULL, ("page %p has no object", m));
VM_OBJECT_ASSERT_WLOCKED(m->object);
vm_page_xbusy_claim(m);
if (vm_page_remove(m))
vm_page_free(m);
}
void
vm_page_dirty_KBI(vm_page_t m)
{
KASSERT(vm_page_all_valid(m), ("vm_page_dirty: page is invalid!"));
m->dirty = VM_PAGE_BITS_ALL;
}
static __always_inline int
vm_page_insert_lookup(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
bool iter, struct pctrie_iter *pages)
{
int error;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(m->object == NULL,
("vm_page_insert: page %p already inserted", m));
m->object = object;
m->pindex = pindex;
m->ref_count |= VPRC_OBJREF;
if (iter)
error = vm_radix_iter_insert(pages, m);
else
error = vm_radix_insert(&object->rtree, m);
if (__predict_false(error != 0)) {
m->object = NULL;
m->pindex = 0;
m->ref_count &= ~VPRC_OBJREF;
return (1);
}
vm_page_insert_radixdone(m, object);
vm_pager_page_inserted(object, m);
return (0);
}
int
vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
{
return (vm_page_insert_lookup(m, object, pindex, false, NULL));
}
int
vm_page_iter_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
struct pctrie_iter *pages)
{
return (vm_page_insert_lookup(m, object, pindex, true, pages));
}
static void
vm_page_insert_radixdone(vm_page_t m, vm_object_t object)
{
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(object != NULL && m->object == object,
("vm_page_insert_radixdone: page %p has inconsistent object", m));
KASSERT((m->ref_count & VPRC_OBJREF) != 0,
("vm_page_insert_radixdone: page %p is missing object ref", m));
object->resident_page_count++;
if (object->resident_page_count == 1 && object->type == OBJT_VNODE)
vhold(object->handle);
if (pmap_page_is_write_mapped(m))
vm_object_set_writeable_dirty(object);
}
static void
vm_page_remove_radixdone(vm_page_t m)
{
vm_object_t object;
vm_page_assert_xbusied(m);
object = m->object;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT((m->ref_count & VPRC_OBJREF) != 0,
("page %p is missing its object ref", m));
if ((m->a.flags & PGA_SWAP_FREE) != 0)
vm_pager_page_unswapped(m);
vm_pager_page_removed(object, m);
m->object = NULL;
object->resident_page_count--;
if (object->resident_page_count == 0 && object->type == OBJT_VNODE)
vdrop(object->handle);
}
static void
vm_page_free_object_prep(vm_page_t m)
{
KASSERT(((m->oflags & VPO_UNMANAGED) != 0) ==
((m->object->flags & OBJ_UNMANAGED) != 0),
("%s: managed flag mismatch for page %p",
__func__, m));
vm_page_assert_xbusied(m);
KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
m->ref_count == VPRC_OBJREF,
("%s: page %p has unexpected ref_count %u",
__func__, m, m->ref_count));
vm_page_remove_radixdone(m);
m->ref_count -= VPRC_OBJREF;
}
void
vm_page_iter_free(struct pctrie_iter *pages, vm_page_t m)
{
vm_radix_iter_remove(pages);
vm_page_free_object_prep(m);
vm_page_xunbusy(m);
m->flags &= ~PG_ZERO;
vm_page_free_toq(m);
}
bool
vm_page_remove(vm_page_t m)
{
bool dropped;
dropped = vm_page_remove_xbusy(m);
vm_page_xunbusy(m);
return (dropped);
}
bool
vm_page_iter_remove(struct pctrie_iter *pages, vm_page_t m)
{
bool dropped;
vm_radix_iter_remove(pages);
vm_page_remove_radixdone(m);
dropped = (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF);
vm_page_xunbusy(m);
return (dropped);
}
static void
vm_page_radix_remove(vm_page_t m)
{
vm_page_t mrem __diagused;
mrem = vm_radix_remove(&m->object->rtree, m->pindex);
KASSERT(mrem == m,
("removed page %p, expected page %p", mrem, m));
}
bool
vm_page_remove_xbusy(vm_page_t m)
{
vm_page_radix_remove(m);
vm_page_remove_radixdone(m);
return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF);
}
vm_page_t
vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
{
VM_OBJECT_ASSERT_LOCKED(object);
return (vm_radix_lookup(&object->rtree, pindex));
}
void
vm_page_iter_init(struct pctrie_iter *pages, vm_object_t object)
{
vm_radix_iter_init(pages, &object->rtree);
}
void
vm_page_iter_limit_init(struct pctrie_iter *pages, vm_object_t object,
vm_pindex_t limit)
{
vm_radix_iter_limit_init(pages, &object->rtree, limit);
}
vm_page_t
vm_page_lookup_unlocked(vm_object_t object, vm_pindex_t pindex)
{
return (vm_radix_lookup_unlocked(&object->rtree, pindex));
}
vm_page_t
vm_page_relookup(vm_object_t object, vm_pindex_t pindex)
{
vm_page_t m;
m = vm_page_lookup_unlocked(object, pindex);
KASSERT(m != NULL && (vm_page_busied(m) || vm_page_wired(m)) &&
m->object == object && m->pindex == pindex,
("vm_page_relookup: Invalid page %p", m));
return (m);
}
static void
vm_page_busy_release(vm_page_t m)
{
u_int x;
x = vm_page_busy_fetch(m);
for (;;) {
if (x == VPB_FREED)
break;
if ((x & VPB_BIT_SHARED) != 0 && VPB_SHARERS(x) > 1) {
if (atomic_fcmpset_int(&m->busy_lock, &x,
x - VPB_ONE_SHARER))
break;
continue;
}
KASSERT((x & VPB_BIT_SHARED) != 0 ||
(x & ~VPB_BIT_WAITERS) == VPB_CURTHREAD_EXCLUSIVE,
("vm_page_busy_release: %p xbusy not owned.", m));
if (!atomic_fcmpset_rel_int(&m->busy_lock, &x, VPB_UNBUSIED))
continue;
if ((x & VPB_BIT_WAITERS) != 0)
wakeup(m);
break;
}
}
static bool
vm_page_replace_hold(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex,
vm_page_t mold)
{
vm_page_t mret __diagused;
bool dropped;
VM_OBJECT_ASSERT_WLOCKED(object);
vm_page_assert_xbusied(mold);
KASSERT(mnew->object == NULL && (mnew->ref_count & VPRC_OBJREF) == 0,
("vm_page_replace: page %p already in object", mnew));
mnew->object = object;
mnew->pindex = pindex;
atomic_set_int(&mnew->ref_count, VPRC_OBJREF);
mret = vm_radix_replace(&object->rtree, mnew);
KASSERT(mret == mold,
("invalid page replacement, mold=%p, mret=%p", mold, mret));
KASSERT((mold->oflags & VPO_UNMANAGED) ==
(mnew->oflags & VPO_UNMANAGED),
("vm_page_replace: mismatched VPO_UNMANAGED"));
mold->object = NULL;
if (pmap_page_is_write_mapped(mnew))
vm_object_set_writeable_dirty(object);
dropped = vm_page_drop(mold, VPRC_OBJREF) == VPRC_OBJREF;
vm_page_xunbusy(mold);
return (dropped);
}
void
vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex,
vm_page_t mold)
{
vm_page_assert_xbusied(mnew);
if (vm_page_replace_hold(mnew, object, pindex, mold))
vm_page_free(mold);
}
bool
vm_page_iter_rename(struct pctrie_iter *old_pages, vm_page_t m,
vm_object_t new_object, vm_pindex_t new_pindex)
{
vm_pindex_t opidx;
KASSERT((m->ref_count & VPRC_OBJREF) != 0,
("%s: page %p is missing object ref", __func__, m));
VM_OBJECT_ASSERT_WLOCKED(m->object);
VM_OBJECT_ASSERT_WLOCKED(new_object);
opidx = m->pindex;
m->pindex = new_pindex;
if (vm_radix_insert(&new_object->rtree, m) != 0) {
m->pindex = opidx;
return (false);
}
m->pindex = opidx;
vm_radix_iter_remove(old_pages);
vm_page_remove_radixdone(m);
m->pindex = new_pindex;
m->object = new_object;
vm_page_insert_radixdone(m, new_object);
if (vm_page_any_valid(m))
vm_page_dirty(m);
vm_pager_page_inserted(new_object, m);
return (true);
}
vm_page_t
vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
{
struct pctrie_iter pages;
vm_page_iter_init(&pages, object);
return (vm_page_alloc_iter(object, pindex, req, &pages));
}
vm_page_t
vm_page_alloc_iter(vm_object_t object, vm_pindex_t pindex, int req,
struct pctrie_iter *pages)
{
struct vm_domainset_iter di;
vm_page_t m;
int domain;
if (vm_domainset_iter_page_init(&di, object, pindex, &domain, &req) != 0)
return (NULL);
do {
m = vm_page_alloc_domain_iter(object, pindex, domain, req,
pages);
if (m != NULL)
break;
} while (vm_domainset_iter_page(&di, object, &domain, pages) == 0);
return (m);
}
static int
_vm_domain_allocate(struct vm_domain *vmd, int req_class, int npages)
{
u_int limit, old, new;
if (req_class == VM_ALLOC_INTERRUPT)
limit = 0;
else if (req_class == VM_ALLOC_SYSTEM)
limit = vmd->vmd_interrupt_free_min;
else
limit = vmd->vmd_free_reserved;
limit += npages;
old = atomic_load_int(&vmd->vmd_free_count);
do {
if (old < limit)
return (0);
new = old - npages;
} while (atomic_fcmpset_int(&vmd->vmd_free_count, &old, new) == 0);
if (vm_paging_needed(vmd, new) && !vm_paging_needed(vmd, old))
pagedaemon_wakeup(vmd->vmd_domain);
if ((old >= vmd->vmd_free_min && new < vmd->vmd_free_min) ||
(old >= vmd->vmd_free_severe && new < vmd->vmd_free_severe))
vm_domain_set(vmd);
return (1);
}
int
vm_domain_allocate(struct vm_domain *vmd, int req, int npages)
{
int req_class;
req_class = req & VM_ALLOC_CLASS_MASK;
if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
req_class = VM_ALLOC_SYSTEM;
return (_vm_domain_allocate(vmd, req_class, npages));
}
vm_page_t
vm_page_alloc_domain_iter(vm_object_t object, vm_pindex_t pindex, int domain,
int req, struct pctrie_iter *pages)
{
struct vm_domain *vmd;
vm_page_t m;
int flags;
#define VM_ALLOC_COMMON (VM_ALLOC_CLASS_MASK | VM_ALLOC_NODUMP | \
VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL | \
VM_ALLOC_WIRED | VM_ALLOC_ZERO)
#define VPA_FLAGS (VM_ALLOC_COMMON | VM_ALLOC_COUNT_MASK | \
VM_ALLOC_NOBUSY | VM_ALLOC_NOFREE | \
VM_ALLOC_SBUSY)
KASSERT((req & ~VPA_FLAGS) == 0,
("invalid request %#x", req));
KASSERT(((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
("invalid request %#x", req));
VM_OBJECT_ASSERT_WLOCKED(object);
flags = 0;
m = NULL;
if (!vm_pager_can_alloc_page(object, pindex))
return (NULL);
#if VM_NRESERVLEVEL > 0
again:
#endif
if (__predict_false((req & VM_ALLOC_NOFREE) != 0)) {
m = vm_page_alloc_nofree_domain(domain, req);
if (m != NULL)
goto found;
}
#if VM_NRESERVLEVEL > 0
if (vm_object_reserv(object) &&
(m = vm_reserv_alloc_page(object, pindex, domain, req, pages)) !=
NULL) {
goto found;
}
#endif
vmd = VM_DOMAIN(domain);
if (vmd->vmd_pgcache[VM_FREEPOOL_DEFAULT].zone != NULL) {
m = uma_zalloc(vmd->vmd_pgcache[VM_FREEPOOL_DEFAULT].zone,
M_NOWAIT | M_NOVM);
if (m != NULL) {
flags |= PG_PCPU_CACHE;
goto found;
}
}
if (vm_domain_allocate(vmd, req, 1)) {
vm_domain_free_lock(vmd);
m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, 0);
vm_domain_free_unlock(vmd);
if (m == NULL) {
vm_domain_freecnt_inc(vmd, 1);
#if VM_NRESERVLEVEL > 0
if (vm_reserv_reclaim_inactive(domain))
goto again;
#endif
}
}
if (m == NULL) {
(void)vm_domain_alloc_fail(vmd, object, req);
if ((req & VM_ALLOC_WAITFAIL) != 0)
pctrie_iter_reset(pages);
return (NULL);
}
found:
vm_page_dequeue(m);
vm_page_alloc_check(m);
flags |= m->flags & PG_ZERO;
if ((req & VM_ALLOC_NODUMP) != 0)
flags |= PG_NODUMP;
if ((req & VM_ALLOC_NOFREE) != 0)
flags |= PG_NOFREE;
m->flags = flags;
m->a.flags = 0;
m->oflags = (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0;
m->pool = VM_FREEPOOL_DEFAULT;
if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
m->busy_lock = VPB_CURTHREAD_EXCLUSIVE;
else if ((req & VM_ALLOC_SBUSY) != 0)
m->busy_lock = VPB_SHARERS_WORD(1);
else
m->busy_lock = VPB_UNBUSIED;
if (req & VM_ALLOC_WIRED) {
vm_wire_add(1);
m->ref_count = 1;
}
m->a.act_count = 0;
if (vm_page_iter_insert(m, object, pindex, pages)) {
if (req & VM_ALLOC_WIRED) {
vm_wire_sub(1);
m->ref_count = 0;
}
KASSERT(m->object == NULL, ("page %p has object", m));
m->oflags = VPO_UNMANAGED;
m->busy_lock = VPB_UNBUSIED;
vm_page_free_toq(m);
if (req & VM_ALLOC_WAITFAIL) {
VM_OBJECT_WUNLOCK(object);
vm_radix_wait();
pctrie_iter_reset(pages);
VM_OBJECT_WLOCK(object);
}
return (NULL);
}
if (object->memattr != VM_MEMATTR_DEFAULT &&
(object->flags & OBJ_FICTITIOUS) == 0)
pmap_page_set_memattr(m, object->memattr);
return (m);
}
vm_page_t
vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_memattr_t memattr)
{
struct vm_domainset_iter di;
vm_page_t bounds[2];
vm_page_t m;
int domain;
int start_segind;
start_segind = -1;
if (vm_domainset_iter_page_init(&di, object, pindex, &domain, &req) != 0)
return (NULL);
do {
m = vm_page_alloc_contig_domain(object, pindex, domain, req,
npages, low, high, alignment, boundary, memattr);
if (m != NULL)
break;
if (start_segind == -1)
start_segind = vm_phys_lookup_segind(low);
if (vm_phys_find_range(bounds, start_segind, domain,
npages, low, high) == -1) {
vm_domainset_iter_ignore(&di, domain);
}
} while (vm_domainset_iter_page(&di, object, &domain, NULL) == 0);
return (m);
}
static vm_page_t
vm_page_find_contig_domain(int domain, int req, u_long npages, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
{
struct vm_domain *vmd;
vm_page_t m_ret;
vmd = VM_DOMAIN(domain);
if (!vm_domain_allocate(vmd, req, npages))
return (NULL);
vm_domain_free_lock(vmd);
m_ret = vm_phys_alloc_contig(domain, npages, low, high,
alignment, boundary);
vm_domain_free_unlock(vmd);
if (m_ret != NULL)
return (m_ret);
#if VM_NRESERVLEVEL > 0
if ((req & VM_ALLOC_NORECLAIM) == 0) {
m_ret = vm_reserv_reclaim_contig(domain, npages, low,
high, alignment, boundary);
if (m_ret != NULL)
return (m_ret);
}
#endif
vm_domain_freecnt_inc(vmd, npages);
return (NULL);
}
vm_page_t
vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_memattr_t memattr)
{
struct pctrie_iter pages;
vm_page_t m, m_ret, mpred;
u_int busy_lock, flags, oflags;
#define VPAC_FLAGS (VM_ALLOC_COMMON | VM_ALLOC_COUNT_MASK | \
VM_ALLOC_NOBUSY | VM_ALLOC_NORECLAIM | \
VM_ALLOC_SBUSY)
KASSERT((req & ~VPAC_FLAGS) == 0,
("invalid request %#x", req));
KASSERT(((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
("invalid request %#x", req));
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT((object->flags & OBJ_FICTITIOUS) == 0,
("vm_page_alloc_contig: object %p has fictitious pages",
object));
KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
vm_page_iter_init(&pages, object);
m_ret = NULL;
#if VM_NRESERVLEVEL > 0
if (vm_object_reserv(object)) {
m_ret = vm_reserv_alloc_contig(object, pindex, domain,
req, npages, low, high, alignment, boundary, &pages);
}
#endif
if (m_ret == NULL) {
m_ret = vm_page_find_contig_domain(domain, req, npages,
low, high, alignment, boundary);
}
if (m_ret == NULL) {
(void)vm_domain_alloc_fail(VM_DOMAIN(domain), object, req);
return (NULL);
}
flags = PG_ZERO;
if ((req & VM_ALLOC_NODUMP) != 0)
flags |= PG_NODUMP;
oflags = (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0;
if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
busy_lock = VPB_CURTHREAD_EXCLUSIVE;
else if ((req & VM_ALLOC_SBUSY) != 0)
busy_lock = VPB_SHARERS_WORD(1);
else
busy_lock = VPB_UNBUSIED;
if ((req & VM_ALLOC_WIRED) != 0)
vm_wire_add(npages);
if (object->memattr != VM_MEMATTR_DEFAULT &&
memattr == VM_MEMATTR_DEFAULT)
memattr = object->memattr;
for (m = m_ret; m < &m_ret[npages]; m++) {
vm_page_dequeue(m);
vm_page_alloc_check(m);
m->a.flags = 0;
m->flags = (m->flags | PG_NODUMP) & flags;
m->busy_lock = busy_lock;
if ((req & VM_ALLOC_WIRED) != 0)
m->ref_count = 1;
m->a.act_count = 0;
m->oflags = oflags;
m->pool = VM_FREEPOOL_DEFAULT;
if (vm_page_iter_insert(m, object, pindex, &pages)) {
if ((req & VM_ALLOC_WIRED) != 0)
vm_wire_sub(npages);
KASSERT(m->object == NULL,
("page %p has object", m));
mpred = m;
for (m = m_ret; m < &m_ret[npages]; m++) {
if (m <= mpred &&
(req & VM_ALLOC_WIRED) != 0)
m->ref_count = 0;
m->oflags = VPO_UNMANAGED;
m->busy_lock = VPB_UNBUSIED;
vm_page_free_toq(m);
}
if (req & VM_ALLOC_WAITFAIL) {
VM_OBJECT_WUNLOCK(object);
vm_radix_wait();
VM_OBJECT_WLOCK(object);
}
return (NULL);
}
if (memattr != VM_MEMATTR_DEFAULT)
pmap_page_set_memattr(m, memattr);
pindex++;
}
return (m_ret);
}
vm_page_t
vm_page_alloc_noobj_domain(int domain, int req)
{
struct vm_domain *vmd;
vm_page_t m;
int flags;
#define VPAN_FLAGS (VM_ALLOC_COMMON | VM_ALLOC_COUNT_MASK | \
VM_ALLOC_NOFREE | VM_ALLOC_WAITOK)
KASSERT((req & ~VPAN_FLAGS) == 0,
("invalid request %#x", req));
flags = ((req & VM_ALLOC_NODUMP) != 0 ? PG_NODUMP : 0) |
((req & VM_ALLOC_NOFREE) != 0 ? PG_NOFREE : 0);
vmd = VM_DOMAIN(domain);
again:
if (__predict_false((req & VM_ALLOC_NOFREE) != 0)) {
m = vm_page_alloc_nofree_domain(domain, req);
if (m != NULL)
goto found;
}
if (vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone != NULL) {
m = uma_zalloc(vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone,
M_NOWAIT | M_NOVM);
if (m != NULL) {
flags |= PG_PCPU_CACHE;
goto found;
}
}
if (vm_domain_allocate(vmd, req, 1)) {
vm_domain_free_lock(vmd);
m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DIRECT, 0);
vm_domain_free_unlock(vmd);
if (m == NULL) {
vm_domain_freecnt_inc(vmd, 1);
#if VM_NRESERVLEVEL > 0
if (vm_reserv_reclaim_inactive(domain))
goto again;
#endif
}
}
if (m == NULL) {
if (!vm_domain_alloc_fail(vmd, NULL, req))
return (NULL);
goto again;
}
found:
vm_page_dequeue(m);
vm_page_alloc_check(m);
m->pindex = 0xdeadc0dedeadc0de;
m->flags = (m->flags & PG_ZERO) | flags;
m->a.flags = 0;
m->oflags = VPO_UNMANAGED;
m->pool = VM_FREEPOOL_DIRECT;
m->busy_lock = VPB_UNBUSIED;
if ((req & VM_ALLOC_WIRED) != 0) {
vm_wire_add(1);
m->ref_count = 1;
}
if ((req & VM_ALLOC_ZERO) != 0 && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
return (m);
}
#if VM_NRESERVLEVEL > 1
#define VM_NOFREE_IMPORT_ORDER (VM_LEVEL_1_ORDER + VM_LEVEL_0_ORDER)
#elif VM_NRESERVLEVEL > 0
#define VM_NOFREE_IMPORT_ORDER VM_LEVEL_0_ORDER
#else
#define VM_NOFREE_IMPORT_ORDER 8
#endif
static vm_page_t __noinline
vm_page_alloc_nofree_domain(int domain, int req)
{
vm_page_t m;
struct vm_domain *vmd;
KASSERT((req & VM_ALLOC_NOFREE) != 0, ("invalid request %#x", req));
vmd = VM_DOMAIN(domain);
vm_domain_free_lock(vmd);
if (TAILQ_EMPTY(&vmd->vmd_nofreeq)) {
int count;
count = 1 << VM_NOFREE_IMPORT_ORDER;
if (!vm_domain_allocate(vmd, req, count)) {
vm_domain_free_unlock(vmd);
return (NULL);
}
m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT,
VM_NOFREE_IMPORT_ORDER);
if (m == NULL) {
vm_domain_freecnt_inc(vmd, count);
vm_domain_free_unlock(vmd);
return (NULL);
}
m->ref_count = count - 1;
TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, plinks.q);
atomic_add_long(&nofreeq_size, count);
}
m = TAILQ_FIRST(&vmd->vmd_nofreeq);
TAILQ_REMOVE(&vmd->vmd_nofreeq, m, plinks.q);
if (m->ref_count > 0) {
vm_page_t m_next;
m_next = &m[1];
vm_page_dequeue(m_next);
m_next->ref_count = m->ref_count - 1;
TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m_next, plinks.q);
m->ref_count = 0;
}
vm_domain_free_unlock(vmd);
atomic_add_long(&nofreeq_size, -1);
VM_CNT_INC(v_nofree_count);
return (m);
}
static void __noinline
vm_page_free_nofree(struct vm_domain *vmd, vm_page_t m)
{
VM_CNT_ADD(v_nofree_count, -1);
atomic_add_long(&nofreeq_size, 1);
vm_domain_free_lock(vmd);
MPASS(m->ref_count == 0);
TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, plinks.q);
vm_domain_free_unlock(vmd);
}
vm_page_t
vm_page_alloc_noobj(int req)
{
struct vm_domainset_iter di;
vm_page_t m;
int domain;
if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0)
return (NULL);
do {
m = vm_page_alloc_noobj_domain(domain, req);
if (m != NULL)
break;
} while (vm_domainset_iter_page(&di, NULL, &domain, NULL) == 0);
return (m);
}
vm_page_t
vm_page_alloc_noobj_contig(int req, u_long npages, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
vm_memattr_t memattr)
{
struct vm_domainset_iter di;
vm_page_t m;
int domain;
if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0)
return (NULL);
do {
m = vm_page_alloc_noobj_contig_domain(domain, req, npages, low,
high, alignment, boundary, memattr);
if (m != NULL)
break;
} while (vm_domainset_iter_page(&di, NULL, &domain, NULL) == 0);
return (m);
}
vm_page_t
vm_page_alloc_noobj_contig_domain(int domain, int req, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
vm_memattr_t memattr)
{
vm_page_t m, m_ret;
u_int flags;
#define VPANC_FLAGS (VM_ALLOC_COMMON | VM_ALLOC_COUNT_MASK | \
VM_ALLOC_NORECLAIM | VM_ALLOC_WAITOK)
KASSERT((req & ~VPANC_FLAGS) == 0,
("invalid request %#x", req));
KASSERT((req & (VM_ALLOC_WAITOK | VM_ALLOC_NORECLAIM)) !=
(VM_ALLOC_WAITOK | VM_ALLOC_NORECLAIM),
("invalid request %#x", req));
KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
while ((m_ret = vm_page_find_contig_domain(domain, req, npages,
low, high, alignment, boundary)) == NULL) {
if (!vm_domain_alloc_fail(VM_DOMAIN(domain), NULL, req))
return (NULL);
}
flags = PG_ZERO;
if ((req & VM_ALLOC_NODUMP) != 0)
flags |= PG_NODUMP;
if ((req & VM_ALLOC_WIRED) != 0)
vm_wire_add(npages);
for (m = m_ret; m < &m_ret[npages]; m++) {
vm_page_dequeue(m);
vm_page_alloc_check(m);
m->pindex = 0xdeadc0dedeadc0de;
m->a.flags = 0;
m->flags = (m->flags | PG_NODUMP) & flags;
m->busy_lock = VPB_UNBUSIED;
if ((req & VM_ALLOC_WIRED) != 0)
m->ref_count = 1;
m->a.act_count = 0;
m->oflags = VPO_UNMANAGED;
m->pool = VM_FREEPOOL_DIRECT;
if ((req & VM_ALLOC_ZERO) != 0 && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
if (memattr != VM_MEMATTR_DEFAULT)
pmap_page_set_memattr(m, memattr);
}
return (m_ret);
}
static void
vm_page_alloc_check(vm_page_t m)
{
KASSERT(m->object == NULL, ("page %p has object", m));
KASSERT(m->a.queue == PQ_NONE &&
(m->a.flags & PGA_QUEUE_STATE_MASK) == 0,
("page %p has unexpected queue %d, flags %#x",
m, m->a.queue, (m->a.flags & PGA_QUEUE_STATE_MASK)));
KASSERT(m->ref_count == 0, ("page %p has references", m));
KASSERT(vm_page_busy_freed(m), ("page %p is not freed", m));
KASSERT(m->dirty == 0, ("page %p is dirty", m));
KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
("page %p has unexpected memattr %d",
m, pmap_page_get_memattr(m)));
KASSERT(vm_page_none_valid(m), ("free page %p is valid", m));
pmap_vm_page_alloc_check(m);
}
static int
vm_page_zone_import(void *arg, void **store, int cnt, int domain, int flags)
{
struct vm_domain *vmd;
struct vm_pgcache *pgcache;
int i;
pgcache = arg;
vmd = VM_DOMAIN(pgcache->domain);
if (vmd->vmd_severeset || curproc == pageproc ||
!_vm_domain_allocate(vmd, VM_ALLOC_NORMAL, cnt))
return (0);
domain = vmd->vmd_domain;
vm_domain_free_lock(vmd);
i = vm_phys_alloc_npages(domain, pgcache->pool, cnt,
(vm_page_t *)store);
vm_domain_free_unlock(vmd);
if (cnt != i)
vm_domain_freecnt_inc(vmd, cnt - i);
return (i);
}
static void
vm_page_zone_release(void *arg, void **store, int cnt)
{
struct vm_domain *vmd;
struct vm_pgcache *pgcache;
vm_page_t m;
int i;
pgcache = arg;
vmd = VM_DOMAIN(pgcache->domain);
vm_domain_free_lock(vmd);
for (i = 0; i < cnt; i++) {
m = (vm_page_t)store[i];
vm_phys_free_pages(m, pgcache->pool, 0);
}
vm_domain_free_unlock(vmd);
vm_domain_freecnt_inc(vmd, cnt);
}
#define VPSC_ANY 0
#define VPSC_NORESERV 1
#define VPSC_NOSUPER 2
static vm_page_t
vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end,
u_long alignment, vm_paddr_t boundary, int options)
{
vm_object_t object;
vm_paddr_t pa;
vm_page_t m, m_run;
#if VM_NRESERVLEVEL > 0
int level;
#endif
int m_inc, order, run_ext, run_len;
KASSERT(npages > 0, ("npages is 0"));
KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
m_run = NULL;
run_len = 0;
for (m = m_start; m < m_end && run_len < npages; m += m_inc) {
KASSERT((m->flags & PG_MARKER) == 0,
("page %p is PG_MARKER", m));
KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1,
("fictitious page %p has invalid ref count", m));
if (run_len == 0) {
KASSERT(m_run == NULL, ("m_run != NULL"));
if (m + npages > m_end)
break;
pa = VM_PAGE_TO_PHYS(m);
if (!vm_addr_align_ok(pa, alignment)) {
m_inc = atop(roundup2(pa, alignment) - pa);
continue;
}
if (!vm_addr_bound_ok(pa, ptoa(npages), boundary)) {
m_inc = atop(roundup2(pa, boundary) - pa);
continue;
}
} else
KASSERT(m_run != NULL, ("m_run == NULL"));
retry:
m_inc = 1;
if (vm_page_wired(m))
run_ext = 0;
#if VM_NRESERVLEVEL > 0
else if ((level = vm_reserv_level(m)) >= 0 &&
(options & VPSC_NORESERV) != 0) {
run_ext = 0;
pa = VM_PAGE_TO_PHYS(m);
m_inc = atop(roundup2(pa + 1, vm_reserv_size(level)) -
pa);
}
#endif
else if ((object = atomic_load_ptr(&m->object)) != NULL) {
VM_OBJECT_RLOCK(object);
if (object != m->object) {
VM_OBJECT_RUNLOCK(object);
goto retry;
}
if ((object->flags & OBJ_SWAP) == 0 &&
object->type != OBJT_VNODE) {
run_ext = 0;
#if VM_NRESERVLEVEL > 0
} else if ((options & VPSC_NOSUPER) != 0 &&
(level = vm_reserv_level_iffullpop(m)) >= 0) {
run_ext = 0;
pa = VM_PAGE_TO_PHYS(m);
m_inc = atop(roundup2(pa + 1,
vm_reserv_size(level)) - pa);
#endif
} else if (object->memattr == VM_MEMATTR_DEFAULT &&
vm_page_queue(m) != PQ_NONE && !vm_page_busied(m)) {
KASSERT(pmap_page_get_memattr(m) ==
VM_MEMATTR_DEFAULT,
("page %p has an unexpected memattr", m));
KASSERT((m->oflags & (VPO_SWAPINPROG |
VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0,
("page %p has unexpected oflags", m));
run_ext = 1;
} else
run_ext = 0;
VM_OBJECT_RUNLOCK(object);
#if VM_NRESERVLEVEL > 0
} else if (level >= 0) {
run_ext = 1;
#endif
} else if ((order = m->order) < VM_NFREEORDER) {
run_ext = 1 << order;
m_inc = 1 << order;
} else {
run_ext = 0;
}
if (run_ext > 0) {
if (run_len == 0)
m_run = m;
run_len += run_ext;
} else {
if (run_len > 0) {
m_run = NULL;
run_len = 0;
}
}
}
if (run_len >= npages)
return (m_run);
return (NULL);
}
static int
vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run,
vm_paddr_t high)
{
struct vm_domain *vmd;
struct spglist free;
vm_object_t object;
vm_paddr_t pa;
vm_page_t m, m_end, m_new;
int error, order, req;
KASSERT((req_class & VM_ALLOC_CLASS_MASK) == req_class,
("req_class is not an allocation class"));
SLIST_INIT(&free);
error = 0;
m = m_run;
m_end = m_run + npages;
for (; error == 0 && m < m_end; m++) {
KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0,
("page %p is PG_FICTITIOUS or PG_MARKER", m));
if (vm_page_wired(m))
error = EBUSY;
else if ((object = atomic_load_ptr(&m->object)) != NULL) {
VM_OBJECT_WLOCK(object);
if (m->object != object ||
((object->flags & OBJ_SWAP) == 0 &&
object->type != OBJT_VNODE))
error = EINVAL;
else if (object->memattr != VM_MEMATTR_DEFAULT)
error = EINVAL;
else if (vm_page_queue(m) != PQ_NONE &&
vm_page_tryxbusy(m) != 0) {
if (vm_page_wired(m)) {
vm_page_xunbusy(m);
error = EBUSY;
goto unlock;
}
KASSERT(pmap_page_get_memattr(m) ==
VM_MEMATTR_DEFAULT,
("page %p has an unexpected memattr", m));
KASSERT(m->oflags == 0,
("page %p has unexpected oflags", m));
if (!vm_page_none_valid(m)) {
req = req_class;
if ((m->flags & PG_NODUMP) != 0)
req |= VM_ALLOC_NODUMP;
if (trunc_page(high) !=
~(vm_paddr_t)PAGE_MASK) {
m_new =
vm_page_alloc_noobj_contig(
req, 1, round_page(high),
~(vm_paddr_t)0, PAGE_SIZE,
0, VM_MEMATTR_DEFAULT);
} else
m_new = NULL;
if (m_new == NULL) {
pa = VM_PAGE_TO_PHYS(m_run);
m_new =
vm_page_alloc_noobj_contig(
req, 1, 0, pa - 1,
PAGE_SIZE, 0,
VM_MEMATTR_DEFAULT);
}
if (m_new == NULL) {
pa += ptoa(npages);
m_new =
vm_page_alloc_noobj_contig(
req, 1, pa, high, PAGE_SIZE,
0, VM_MEMATTR_DEFAULT);
}
if (m_new == NULL) {
vm_page_xunbusy(m);
error = ENOMEM;
goto unlock;
}
if (object->ref_count != 0 &&
!vm_page_try_remove_all(m)) {
vm_page_xunbusy(m);
vm_page_free(m_new);
error = EBUSY;
goto unlock;
}
m_new->a.flags = m->a.flags &
~PGA_QUEUE_STATE_MASK;
KASSERT(m_new->oflags == VPO_UNMANAGED,
("page %p is managed", m_new));
m_new->oflags = 0;
pmap_copy_page(m, m_new);
m_new->valid = m->valid;
m_new->dirty = m->dirty;
m->flags &= ~PG_ZERO;
vm_page_dequeue(m);
if (vm_page_replace_hold(m_new, object,
m->pindex, m) &&
vm_page_free_prep(m))
SLIST_INSERT_HEAD(&free, m,
plinks.s.ss);
vm_page_deactivate(m_new);
} else {
m->flags &= ~PG_ZERO;
vm_page_dequeue(m);
if (vm_page_free_prep(m))
SLIST_INSERT_HEAD(&free, m,
plinks.s.ss);
KASSERT(m->dirty == 0,
("page %p is dirty", m));
}
} else
error = EBUSY;
unlock:
VM_OBJECT_WUNLOCK(object);
} else {
MPASS(vm_page_domain(m) == domain);
vmd = VM_DOMAIN(domain);
vm_domain_free_lock(vmd);
order = m->order;
if (order < VM_NFREEORDER) {
m += (1 << order) - 1;
}
#if VM_NRESERVLEVEL > 0
else if (vm_reserv_is_page_free(m))
order = 0;
#endif
vm_domain_free_unlock(vmd);
if (order == VM_NFREEORDER)
error = EINVAL;
}
}
if ((m = SLIST_FIRST(&free)) != NULL) {
int cnt;
vmd = VM_DOMAIN(domain);
cnt = 0;
vm_domain_free_lock(vmd);
do {
MPASS(vm_page_domain(m) == domain);
SLIST_REMOVE_HEAD(&free, plinks.s.ss);
vm_phys_free_pages(m, m->pool, 0);
cnt++;
} while ((m = SLIST_FIRST(&free)) != NULL);
vm_domain_free_unlock(vmd);
vm_domain_freecnt_inc(vmd, cnt);
}
return (error);
}
#define NRUNS 16
#define RUN_INDEX(count, nruns) ((count) % (nruns))
#define MIN_RECLAIM 8
int
vm_page_reclaim_contig_domain_ext(int domain, int req, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
int desired_runs)
{
struct vm_domain *vmd;
vm_page_t bounds[2], m_run, _m_runs[NRUNS], *m_runs;
u_long count, minalign, reclaimed;
int error, i, min_reclaim, nruns, options, req_class;
int segind, start_segind;
int ret;
KASSERT(npages > 0, ("npages is 0"));
KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
ret = ENOMEM;
if (desired_runs > 1)
m_runs = malloc((NRUNS + desired_runs) * sizeof(*m_runs),
M_TEMP, M_NOWAIT);
else
m_runs = NULL;
if (m_runs == NULL) {
m_runs = _m_runs;
nruns = NRUNS;
} else {
nruns = NRUNS + desired_runs - 1;
}
min_reclaim = MAX(desired_runs * npages, MIN_RECLAIM);
minalign = 1ul << imin(flsl(npages - 1), VM_NFREEORDER - 1);
npages = roundup2(npages, minalign);
if (alignment < ptoa(minalign))
alignment = ptoa(minalign);
req_class = req & VM_ALLOC_CLASS_MASK;
if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
req_class = VM_ALLOC_SYSTEM;
start_segind = vm_phys_lookup_segind(low);
vmd = VM_DOMAIN(domain);
count = vmd->vmd_free_count;
if (count < npages + vmd->vmd_free_reserved || (count < npages +
vmd->vmd_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) ||
(count < npages && req_class == VM_ALLOC_INTERRUPT))
goto done;
for (options = VPSC_NORESERV;;) {
bool phys_range_exists = false;
count = 0;
segind = start_segind;
while ((segind = vm_phys_find_range(bounds, segind, domain,
npages, low, high)) != -1) {
phys_range_exists = true;
while ((m_run = vm_page_scan_contig(npages, bounds[0],
bounds[1], alignment, boundary, options))) {
bounds[0] = m_run + npages;
m_runs[RUN_INDEX(count, nruns)] = m_run;
count++;
}
segind++;
}
if (!phys_range_exists) {
ret = ERANGE;
goto done;
}
reclaimed = 0;
for (i = 0; count > 0 && i < nruns; i++) {
count--;
m_run = m_runs[RUN_INDEX(count, nruns)];
error = vm_page_reclaim_run(req_class, domain, npages,
m_run, high);
if (error == 0) {
reclaimed += npages;
if (reclaimed >= min_reclaim) {
ret = 0;
goto done;
}
}
}
if (options == VPSC_NORESERV)
options = VPSC_NOSUPER;
else if (options == VPSC_NOSUPER)
options = VPSC_ANY;
else if (options == VPSC_ANY) {
if (reclaimed != 0)
ret = 0;
goto done;
}
}
done:
if (m_runs != _m_runs)
free(m_runs, M_TEMP);
return (ret);
}
int
vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
{
return (vm_page_reclaim_contig_domain_ext(domain, req, npages, low,
high, alignment, boundary, 1));
}
int
vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary)
{
struct vm_domainset_iter di;
int domain, ret, status;
ret = ERANGE;
if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0)
return (ret);
do {
status = vm_page_reclaim_contig_domain(domain, req, npages, low,
high, alignment, boundary);
if (status == 0)
return (0);
else if (status == ERANGE)
vm_domainset_iter_ignore(&di, domain);
else {
KASSERT(status == ENOMEM, ("Unrecognized error %d "
"from vm_page_reclaim_contig_domain()", status));
ret = ENOMEM;
}
} while (vm_domainset_iter_page(&di, NULL, &domain, NULL) == 0);
return (ret);
}
void
vm_domain_set(struct vm_domain *vmd)
{
mtx_lock(&vm_domainset_lock);
if (!vmd->vmd_minset && vm_paging_min(vmd)) {
vmd->vmd_minset = 1;
DOMAINSET_SET(vmd->vmd_domain, &vm_min_domains);
}
if (!vmd->vmd_severeset && vm_paging_severe(vmd)) {
vmd->vmd_severeset = 1;
DOMAINSET_SET(vmd->vmd_domain, &vm_severe_domains);
}
mtx_unlock(&vm_domainset_lock);
}
void
vm_domain_clear(struct vm_domain *vmd)
{
mtx_lock(&vm_domainset_lock);
if (vmd->vmd_minset && !vm_paging_min(vmd)) {
vmd->vmd_minset = 0;
DOMAINSET_CLR(vmd->vmd_domain, &vm_min_domains);
if (vm_min_waiters != 0) {
vm_min_waiters = 0;
wakeup(&vm_min_domains);
}
}
if (vmd->vmd_severeset && !vm_paging_severe(vmd)) {
vmd->vmd_severeset = 0;
DOMAINSET_CLR(vmd->vmd_domain, &vm_severe_domains);
if (vm_severe_waiters != 0) {
vm_severe_waiters = 0;
wakeup(&vm_severe_domains);
}
}
if (vmd->vmd_pageout_pages_needed &&
vmd->vmd_free_count >= vmd->vmd_pageout_free_min) {
wakeup(&vmd->vmd_pageout_pages_needed);
vmd->vmd_pageout_pages_needed = 0;
}
if (vm_pageproc_waiters) {
vm_pageproc_waiters = 0;
wakeup(&vm_pageproc_waiters);
}
mtx_unlock(&vm_domainset_lock);
}
void
vm_wait_min(void)
{
mtx_lock(&vm_domainset_lock);
while (vm_page_count_min()) {
vm_min_waiters++;
msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", 0);
}
mtx_unlock(&vm_domainset_lock);
}
void
vm_wait_severe(void)
{
mtx_lock(&vm_domainset_lock);
while (vm_page_count_severe()) {
vm_severe_waiters++;
msleep(&vm_severe_domains, &vm_domainset_lock, PVM,
"vmwait", 0);
}
mtx_unlock(&vm_domainset_lock);
}
u_int
vm_wait_count(void)
{
return (vm_severe_waiters + vm_min_waiters + vm_pageproc_waiters);
}
int
vm_wait_doms(const domainset_t *wdoms, int mflags)
{
int error;
error = 0;
if (curproc == pageproc) {
mtx_lock(&vm_domainset_lock);
vm_pageproc_waiters++;
error = msleep(&vm_pageproc_waiters, &vm_domainset_lock,
PVM | PDROP | mflags, "pageprocwait", 1);
} else {
mtx_lock(&vm_domainset_lock);
if (vm_page_count_min_set(wdoms)) {
if (pageproc == NULL)
panic("vm_wait in early boot");
vm_min_waiters++;
error = msleep(&vm_min_domains, &vm_domainset_lock,
PVM | PDROP | mflags, "vmwait", 0);
} else
mtx_unlock(&vm_domainset_lock);
}
return (error);
}
void
vm_wait_domain(int domain)
{
struct vm_domain *vmd;
domainset_t wdom;
vmd = VM_DOMAIN(domain);
vm_domain_free_assert_unlocked(vmd);
if (curproc == pageproc) {
mtx_lock(&vm_domainset_lock);
if (vmd->vmd_free_count < vmd->vmd_pageout_free_min) {
vmd->vmd_pageout_pages_needed = 1;
msleep(&vmd->vmd_pageout_pages_needed,
&vm_domainset_lock, PDROP | PSWP, "VMWait", 0);
} else
mtx_unlock(&vm_domainset_lock);
} else {
DOMAINSET_ZERO(&wdom);
DOMAINSET_SET(vmd->vmd_domain, &wdom);
vm_wait_doms(&wdom, 0);
}
}
static int
vm_wait_flags(vm_object_t obj, int mflags)
{
struct domainset *d;
d = NULL;
if (obj != NULL)
d = obj->domain.dr_policy;
if (d == NULL)
d = curthread->td_domain.dr_policy;
return (vm_wait_doms(&d->ds_mask, mflags));
}
void
vm_wait(vm_object_t obj)
{
(void)vm_wait_flags(obj, 0);
}
int
vm_wait_intr(vm_object_t obj)
{
return (vm_wait_flags(obj, PCATCH));
}
static int
vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req)
{
vm_domain_free_assert_unlocked(vmd);
atomic_add_int(&vmd->vmd_pageout_deficit,
max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) {
if (object != NULL)
VM_OBJECT_WUNLOCK(object);
vm_wait_domain(vmd->vmd_domain);
if (object != NULL)
VM_OBJECT_WLOCK(object);
if (req & VM_ALLOC_WAITOK)
return (EAGAIN);
}
return (0);
}
void
vm_waitpfault(struct domainset *dset, int timo)
{
mtx_lock(&vm_domainset_lock);
if (vm_page_count_min_set(&dset->ds_mask)) {
vm_min_waiters++;
msleep(&vm_min_domains, &vm_domainset_lock, PUSER | PDROP,
"pfault", timo);
} else
mtx_unlock(&vm_domainset_lock);
}
static struct vm_pagequeue *
_vm_page_pagequeue(vm_page_t m, uint8_t queue)
{
return (&vm_pagequeue_domain(m)->vmd_pagequeues[queue]);
}
#ifdef INVARIANTS
static struct vm_pagequeue *
vm_page_pagequeue(vm_page_t m)
{
return (_vm_page_pagequeue(m, vm_page_astate_load(m).queue));
}
#endif
static __always_inline bool
vm_page_pqstate_fcmpset(vm_page_t m, vm_page_astate_t *old,
vm_page_astate_t new)
{
vm_page_astate_t tmp;
tmp = *old;
do {
if (__predict_true(vm_page_astate_fcmpset(m, old, new)))
return (true);
counter_u64_add(pqstate_commit_retries, 1);
} while (old->_bits == tmp._bits);
return (false);
}
static bool
_vm_page_pqstate_commit_dequeue(struct vm_pagequeue *pq, vm_page_t m,
vm_page_astate_t *old, vm_page_astate_t new)
{
vm_page_t next;
vm_pagequeue_assert_locked(pq);
KASSERT(vm_page_pagequeue(m) == pq,
("%s: queue %p does not match page %p", __func__, pq, m));
KASSERT(old->queue != PQ_NONE && new.queue != old->queue,
("%s: invalid queue indices %d %d",
__func__, old->queue, new.queue));
if ((old->flags & PGA_ENQUEUED) != 0) {
new.flags &= ~PGA_ENQUEUED;
next = TAILQ_NEXT(m, plinks.q);
TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
vm_pagequeue_cnt_dec(pq);
if (!vm_page_pqstate_fcmpset(m, old, new)) {
if (next == NULL)
TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
else
TAILQ_INSERT_BEFORE(next, m, plinks.q);
vm_pagequeue_cnt_inc(pq);
return (false);
} else {
return (true);
}
} else {
return (vm_page_pqstate_fcmpset(m, old, new));
}
}
static bool
vm_page_pqstate_commit_dequeue(vm_page_t m, vm_page_astate_t *old,
vm_page_astate_t new)
{
struct vm_pagequeue *pq;
vm_page_astate_t as;
bool ret;
pq = _vm_page_pagequeue(m, old->queue);
vm_pagequeue_lock(pq);
as = vm_page_astate_load(m);
if (__predict_false(as._bits != old->_bits)) {
*old = as;
ret = false;
} else {
ret = _vm_page_pqstate_commit_dequeue(pq, m, old, new);
}
vm_pagequeue_unlock(pq);
return (ret);
}
static bool
_vm_page_pqstate_commit_requeue(struct vm_pagequeue *pq, vm_page_t m,
vm_page_astate_t *old, vm_page_astate_t new)
{
struct vm_domain *vmd;
vm_pagequeue_assert_locked(pq);
KASSERT(old->queue != PQ_NONE && new.queue == old->queue,
("%s: invalid queue indices %d %d",
__func__, old->queue, new.queue));
new.flags |= PGA_ENQUEUED;
if (!vm_page_pqstate_fcmpset(m, old, new))
return (false);
if ((old->flags & PGA_ENQUEUED) != 0)
TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
else
vm_pagequeue_cnt_inc(pq);
if ((old->flags & PGA_REQUEUE_HEAD) != 0) {
vmd = vm_pagequeue_domain(m);
KASSERT(pq == &vmd->vmd_pagequeues[PQ_INACTIVE],
("%s: invalid page queue for page %p", __func__, m));
TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q);
} else {
TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
}
return (true);
}
static bool
vm_page_pqstate_commit_request(vm_page_t m, vm_page_astate_t *old,
vm_page_astate_t new)
{
KASSERT(old->queue == new.queue || new.queue != PQ_NONE,
("%s: invalid state, queue %d flags %x",
__func__, new.queue, new.flags));
if (old->_bits != new._bits &&
!vm_page_pqstate_fcmpset(m, old, new))
return (false);
vm_page_pqbatch_submit(m, new.queue);
return (true);
}
bool
vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new)
{
if (old->_bits == new._bits)
return (true);
if (old->queue != PQ_NONE && new.queue != old->queue) {
if (!vm_page_pqstate_commit_dequeue(m, old, new))
return (false);
if (new.queue != PQ_NONE)
vm_page_pqbatch_submit(m, new.queue);
} else {
if (!vm_page_pqstate_fcmpset(m, old, new))
return (false);
if (new.queue != PQ_NONE &&
((new.flags & ~old->flags) & PGA_QUEUE_OP_MASK) != 0)
vm_page_pqbatch_submit(m, new.queue);
}
return (true);
}
static inline void
vm_pqbatch_process_page(struct vm_pagequeue *pq, vm_page_t m, uint8_t queue)
{
vm_page_astate_t new, old;
CRITICAL_ASSERT(curthread);
vm_pagequeue_assert_locked(pq);
KASSERT(queue < PQ_COUNT,
("%s: invalid queue index %d", __func__, queue));
KASSERT(pq == _vm_page_pagequeue(m, queue),
("%s: page %p does not belong to queue %p", __func__, m, pq));
for (old = vm_page_astate_load(m);;) {
if (__predict_false(old.queue != queue ||
(old.flags & PGA_QUEUE_OP_MASK) == 0)) {
counter_u64_add(queue_nops, 1);
break;
}
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("%s: page %p is unmanaged", __func__, m));
new = old;
if ((old.flags & PGA_DEQUEUE) != 0) {
new.flags &= ~PGA_QUEUE_OP_MASK;
new.queue = PQ_NONE;
if (__predict_true(_vm_page_pqstate_commit_dequeue(pq,
m, &old, new))) {
counter_u64_add(queue_ops, 1);
break;
}
} else {
new.flags &= ~(PGA_REQUEUE | PGA_REQUEUE_HEAD);
if (__predict_true(_vm_page_pqstate_commit_requeue(pq,
m, &old, new))) {
counter_u64_add(queue_ops, 1);
break;
}
}
}
}
static void
vm_pqbatch_process(struct vm_pagequeue *pq, struct vm_batchqueue *bq,
uint8_t queue)
{
int i;
for (i = 0; i < bq->bq_cnt; i++)
vm_pqbatch_process_page(pq, bq->bq_pa[i], queue);
vm_batchqueue_init(bq);
}
void
vm_page_pqbatch_submit(vm_page_t m, uint8_t queue)
{
struct vm_batchqueue *bq;
struct vm_pagequeue *pq;
int domain, slots_remaining;
KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue));
domain = vm_page_domain(m);
critical_enter();
bq = DPCPU_PTR(pqbatch[domain][queue]);
slots_remaining = vm_batchqueue_insert(bq, m);
if (slots_remaining > (VM_BATCHQUEUE_SIZE >> 1)) {
critical_exit();
return;
} else if (slots_remaining > 0 ) {
pq = &VM_DOMAIN(domain)->vmd_pagequeues[queue];
if (vm_pagequeue_trylock(pq)) {
vm_pqbatch_process(pq, bq, queue);
vm_pagequeue_unlock(pq);
}
critical_exit();
return;
}
critical_exit();
pq = &VM_DOMAIN(domain)->vmd_pagequeues[queue];
vm_pagequeue_lock(pq);
critical_enter();
bq = DPCPU_PTR(pqbatch[domain][queue]);
vm_pqbatch_process(pq, bq, queue);
vm_pqbatch_process_page(pq, m, queue);
vm_pagequeue_unlock(pq);
critical_exit();
}
void
vm_page_pqbatch_drain(void)
{
struct thread *td;
struct vm_domain *vmd;
struct vm_pagequeue *pq;
int cpu, domain, queue;
td = curthread;
CPU_FOREACH(cpu) {
thread_lock(td);
sched_bind(td, cpu);
thread_unlock(td);
for (domain = 0; domain < vm_ndomains; domain++) {
vmd = VM_DOMAIN(domain);
for (queue = 0; queue < PQ_COUNT; queue++) {
pq = &vmd->vmd_pagequeues[queue];
vm_pagequeue_lock(pq);
critical_enter();
vm_pqbatch_process(pq,
DPCPU_PTR(pqbatch[domain][queue]), queue);
critical_exit();
vm_pagequeue_unlock(pq);
}
}
}
thread_lock(td);
sched_unbind(td);
thread_unlock(td);
}
void
vm_page_dequeue_deferred(vm_page_t m)
{
vm_page_astate_t new, old;
old = vm_page_astate_load(m);
do {
if (old.queue == PQ_NONE) {
KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0,
("%s: page %p has unexpected queue state",
__func__, m));
break;
}
new = old;
new.flags |= PGA_DEQUEUE;
} while (!vm_page_pqstate_commit_request(m, &old, new));
}
void
vm_page_dequeue(vm_page_t m)
{
vm_page_astate_t new, old;
old = vm_page_astate_load(m);
do {
if (__predict_true(old.queue == PQ_NONE)) {
KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0,
("%s: page %p has unexpected queue state",
__func__, m));
break;
}
new = old;
new.flags &= ~PGA_QUEUE_OP_MASK;
new.queue = PQ_NONE;
} while (!vm_page_pqstate_commit_dequeue(m, &old, new));
}
static void
vm_page_enqueue(vm_page_t m, uint8_t queue)
{
KASSERT(m->a.queue == PQ_NONE &&
(m->a.flags & PGA_QUEUE_STATE_MASK) == 0,
("%s: page %p is already enqueued", __func__, m));
KASSERT(m->ref_count > 0,
("%s: page %p does not carry any references", __func__, m));
m->a.queue = queue;
if ((m->a.flags & PGA_REQUEUE) == 0)
vm_page_aflag_set(m, PGA_REQUEUE);
vm_page_pqbatch_submit(m, queue);
}
static bool
vm_page_free_prep(vm_page_t m)
{
atomic_thread_fence_acq();
#if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP)
if (PMAP_HAS_DMAP && (m->flags & PG_ZERO) != 0) {
uint64_t *p;
int i;
p = (uint64_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
for (i = 0; i < PAGE_SIZE / sizeof(uint64_t); i++, p++)
KASSERT(*p == 0, ("vm_page_free_prep %p PG_ZERO %d %jx",
m, i, (uintmax_t)*p));
}
#endif
if ((m->oflags & VPO_UNMANAGED) == 0) {
KASSERT(!pmap_page_is_mapped(m),
("vm_page_free_prep: freeing mapped page %p", m));
KASSERT((m->a.flags & (PGA_EXECUTABLE | PGA_WRITEABLE)) == 0,
("vm_page_free_prep: mapping flags set in page %p", m));
} else {
KASSERT(m->a.queue == PQ_NONE,
("vm_page_free_prep: unmanaged page %p is queued", m));
}
VM_CNT_INC(v_tfree);
if (m->object != NULL) {
vm_page_radix_remove(m);
vm_page_free_object_prep(m);
} else
vm_page_assert_unbusied(m);
vm_page_busy_free(m);
if ((m->flags & PG_FICTITIOUS) != 0) {
KASSERT(m->ref_count == 1,
("fictitious page %p is referenced", m));
KASSERT(m->a.queue == PQ_NONE,
("fictitious page %p is queued", m));
return (false);
}
if ((m->oflags & VPO_UNMANAGED) == 0)
vm_page_dequeue_deferred(m);
m->valid = 0;
vm_page_undirty(m);
if (m->ref_count != 0)
panic("vm_page_free_prep: page %p has references", m);
if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
#if VM_NRESERVLEVEL > 0
if ((m->flags & PG_PCPU_CACHE) == 0 && vm_reserv_free_page(m))
return (false);
#endif
return (true);
}
static void
vm_page_free_toq(vm_page_t m)
{
struct vm_domain *vmd;
uma_zone_t zone;
if (!vm_page_free_prep(m))
return;
vmd = vm_pagequeue_domain(m);
if (__predict_false((m->flags & PG_NOFREE) != 0)) {
vm_page_free_nofree(vmd, m);
return;
}
zone = vmd->vmd_pgcache[m->pool].zone;
if ((m->flags & PG_PCPU_CACHE) != 0 && zone != NULL) {
uma_zfree(zone, m);
return;
}
vm_domain_free_lock(vmd);
vm_phys_free_pages(m, m->pool, 0);
vm_domain_free_unlock(vmd);
vm_domain_freecnt_inc(vmd, 1);
}
int
vm_page_free_pages_toq(struct spglist *free, bool update_wire_count)
{
vm_page_t m;
int count;
if (SLIST_EMPTY(free))
return (0);
count = 0;
while ((m = SLIST_FIRST(free)) != NULL) {
count++;
SLIST_REMOVE_HEAD(free, plinks.s.ss);
vm_page_free_toq(m);
}
if (update_wire_count)
vm_wire_sub(count);
return (count);
}
void
vm_page_wire(vm_page_t m)
{
u_int old;
#ifdef INVARIANTS
if (m->object != NULL && !vm_page_busied(m) &&
!vm_object_busied(m->object))
VM_OBJECT_ASSERT_LOCKED(m->object);
#endif
KASSERT((m->flags & PG_FICTITIOUS) == 0 ||
VPRC_WIRE_COUNT(m->ref_count) >= 1,
("vm_page_wire: fictitious page %p has zero wirings", m));
old = atomic_fetchadd_int(&m->ref_count, 1);
KASSERT(VPRC_WIRE_COUNT(old) != VPRC_WIRE_COUNT_MAX,
("vm_page_wire: counter overflow for page %p", m));
if (VPRC_WIRE_COUNT(old) == 0) {
if ((m->oflags & VPO_UNMANAGED) == 0)
vm_page_aflag_set(m, PGA_DEQUEUE);
vm_wire_add(1);
}
}
bool
vm_page_wire_mapped(vm_page_t m)
{
u_int old;
old = atomic_load_int(&m->ref_count);
do {
KASSERT(old > 0,
("vm_page_wire_mapped: wiring unreferenced page %p", m));
if ((old & VPRC_BLOCKED) != 0)
return (false);
} while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1));
if (VPRC_WIRE_COUNT(old) == 0) {
if ((m->oflags & VPO_UNMANAGED) == 0)
vm_page_aflag_set(m, PGA_DEQUEUE);
vm_wire_add(1);
}
return (true);
}
static void
vm_page_unwire_managed(vm_page_t m, uint8_t nqueue, bool noreuse)
{
u_int old;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("%s: page %p is unmanaged", __func__, m));
old = atomic_load_int(&m->ref_count);
do {
u_int count;
KASSERT(VPRC_WIRE_COUNT(old) > 0,
("vm_page_unwire: wire count underflow for page %p", m));
count = old & ~VPRC_BLOCKED;
if (count > VPRC_OBJREF + 1) {
if ((vm_page_astate_load(m).flags & PGA_DEQUEUE) == 0)
vm_page_aflag_set(m, PGA_DEQUEUE);
} else if (count == VPRC_OBJREF + 1) {
vm_page_release_toq(m, nqueue, noreuse);
} else if (count == 1) {
vm_page_aflag_clear(m, PGA_DEQUEUE);
}
} while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1));
if (VPRC_WIRE_COUNT(old) == 1) {
vm_wire_sub(1);
if (old == 1)
vm_page_free(m);
}
}
void
vm_page_unwire(vm_page_t m, uint8_t nqueue)
{
KASSERT(nqueue < PQ_COUNT,
("vm_page_unwire: invalid queue %u request for page %p",
nqueue, m));
if ((m->oflags & VPO_UNMANAGED) != 0) {
if (vm_page_unwire_noq(m) && m->ref_count == 0)
vm_page_free(m);
return;
}
vm_page_unwire_managed(m, nqueue, false);
}
bool
vm_page_unwire_noq(vm_page_t m)
{
u_int old;
old = vm_page_drop(m, 1);
KASSERT(VPRC_WIRE_COUNT(old) != 0,
("%s: counter underflow for page %p", __func__, m));
KASSERT((m->flags & PG_FICTITIOUS) == 0 || VPRC_WIRE_COUNT(old) > 1,
("%s: missing ref on fictitious page %p", __func__, m));
if (VPRC_WIRE_COUNT(old) > 1)
return (false);
if ((m->oflags & VPO_UNMANAGED) == 0)
vm_page_aflag_clear(m, PGA_DEQUEUE);
vm_wire_sub(1);
return (true);
}
static __always_inline void
vm_page_mvqueue(vm_page_t m, const uint8_t nqueue, const uint16_t nflag)
{
vm_page_astate_t old, new;
KASSERT(m->ref_count > 0,
("%s: page %p does not carry any references", __func__, m));
KASSERT(nflag == PGA_REQUEUE || nflag == PGA_REQUEUE_HEAD,
("%s: invalid flags %x", __func__, nflag));
if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))
return;
old = vm_page_astate_load(m);
do {
if ((old.flags & PGA_DEQUEUE) != 0)
break;
new = old;
new.flags &= ~PGA_QUEUE_OP_MASK;
if (nqueue == PQ_ACTIVE)
new.act_count = max(old.act_count, ACT_INIT);
if (old.queue == nqueue) {
if (nqueue != PQ_ACTIVE ||
(old.flags & PGA_ENQUEUED) == 0)
new.flags |= nflag;
} else {
new.flags |= nflag;
new.queue = nqueue;
}
} while (!vm_page_pqstate_commit(m, &old, new));
}
void
vm_page_activate(vm_page_t m)
{
vm_page_mvqueue(m, PQ_ACTIVE, PGA_REQUEUE);
}
void
vm_page_deactivate(vm_page_t m)
{
vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE);
}
void
vm_page_deactivate_noreuse(vm_page_t m)
{
vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE_HEAD);
}
void
vm_page_launder(vm_page_t m)
{
vm_page_mvqueue(m, PQ_LAUNDRY, PGA_REQUEUE);
}
void
vm_page_unswappable(vm_page_t m)
{
VM_OBJECT_ASSERT_LOCKED(m->object);
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("page %p already unswappable", m));
vm_page_dequeue(m);
vm_page_enqueue(m, PQ_UNSWAPPABLE);
}
static void
vm_page_release_toq(vm_page_t m, uint8_t nqueue, const bool noreuse)
{
vm_page_astate_t old, new;
uint16_t nflag;
if (noreuse || vm_page_none_valid(m)) {
nqueue = PQ_INACTIVE;
nflag = PGA_REQUEUE_HEAD;
} else {
nflag = PGA_REQUEUE;
}
old = vm_page_astate_load(m);
do {
new = old;
new.flags &= ~PGA_QUEUE_OP_MASK;
if (nflag != PGA_REQUEUE_HEAD && old.queue == PQ_ACTIVE &&
(old.flags & PGA_ENQUEUED) != 0)
new.flags |= PGA_REFERENCED;
else {
new.flags |= nflag;
new.queue = nqueue;
}
} while (!vm_page_pqstate_commit(m, &old, new));
}
void
vm_page_release(vm_page_t m, int flags)
{
vm_object_t object;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("vm_page_release: page %p is unmanaged", m));
if ((flags & VPR_TRYFREE) != 0) {
for (;;) {
object = atomic_load_ptr(&m->object);
if (object == NULL)
break;
if (vm_page_busied(m) || !VM_OBJECT_TRYWLOCK(object))
break;
if (object == m->object) {
vm_page_release_locked(m, flags);
VM_OBJECT_WUNLOCK(object);
return;
}
VM_OBJECT_WUNLOCK(object);
}
}
vm_page_unwire_managed(m, PQ_INACTIVE, flags != 0);
}
void
vm_page_release_locked(vm_page_t m, int flags)
{
VM_OBJECT_ASSERT_WLOCKED(m->object);
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("vm_page_release_locked: page %p is unmanaged", m));
if (vm_page_unwire_noq(m)) {
if ((flags & VPR_TRYFREE) != 0 &&
(m->object->ref_count == 0 || !pmap_page_is_mapped(m)) &&
m->dirty == 0 && vm_page_tryxbusy(m)) {
if (__predict_true(!vm_page_wired(m))) {
vm_page_free(m);
return;
}
vm_page_xunbusy(m);
} else {
vm_page_release_toq(m, PQ_INACTIVE, flags != 0);
}
}
}
static bool
vm_page_try_blocked_op(vm_page_t m, void (*op)(vm_page_t))
{
u_int old;
KASSERT(m->object != NULL && (m->oflags & VPO_UNMANAGED) == 0,
("vm_page_try_blocked_op: page %p has no object", m));
KASSERT(vm_page_busied(m),
("vm_page_try_blocked_op: page %p is not busy", m));
VM_OBJECT_ASSERT_LOCKED(m->object);
old = atomic_load_int(&m->ref_count);
do {
KASSERT(old != 0,
("vm_page_try_blocked_op: page %p has no references", m));
KASSERT((old & VPRC_BLOCKED) == 0,
("vm_page_try_blocked_op: page %p blocks wirings", m));
if (VPRC_WIRE_COUNT(old) != 0)
return (false);
} while (!atomic_fcmpset_int(&m->ref_count, &old, old | VPRC_BLOCKED));
(op)(m);
old = vm_page_drop(m, VPRC_BLOCKED);
KASSERT(!VM_OBJECT_WOWNED(m->object) ||
old == (VPRC_BLOCKED | VPRC_OBJREF),
("vm_page_try_blocked_op: unexpected refcount value %u for %p",
old, m));
return (true);
}
bool
vm_page_try_remove_all(vm_page_t m)
{
return (vm_page_try_blocked_op(m, pmap_remove_all));
}
bool
vm_page_try_remove_write(vm_page_t m)
{
return (vm_page_try_blocked_op(m, pmap_remove_write));
}
void
vm_page_advise(vm_page_t m, int advice)
{
VM_OBJECT_ASSERT_WLOCKED(m->object);
vm_page_assert_xbusied(m);
if (advice == MADV_FREE)
vm_page_undirty(m);
else if (advice != MADV_DONTNEED) {
if (advice == MADV_WILLNEED)
vm_page_activate(m);
return;
}
if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m))
vm_page_dirty(m);
vm_page_aflag_clear(m, PGA_REFERENCED);
if (m->dirty == 0)
vm_page_deactivate_noreuse(m);
else if (!vm_page_in_laundry(m))
vm_page_launder(m);
}
static inline void
vm_page_grab_release(vm_page_t m, int allocflags)
{
if ((allocflags & VM_ALLOC_NOBUSY) != 0) {
if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0)
vm_page_sunbusy(m);
else
vm_page_xunbusy(m);
}
}
static bool
vm_page_grab_sleep(vm_object_t object, vm_page_t m, vm_pindex_t pindex,
const char *wmesg, int allocflags, bool locked)
{
if ((allocflags & VM_ALLOC_NOWAIT) != 0)
return (false);
if (locked && (allocflags & VM_ALLOC_NOCREAT) == 0)
vm_page_reference(m);
if (_vm_page_busy_sleep(object, m, pindex, wmesg, allocflags, locked) &&
locked)
VM_OBJECT_WLOCK(object);
if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
return (false);
return (true);
}
static inline void
vm_page_grab_check(int allocflags)
{
KASSERT((allocflags & VM_ALLOC_NOBUSY) == 0 ||
(allocflags & VM_ALLOC_WIRED) != 0,
("vm_page_grab*: the pages must be busied or wired"));
KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
(allocflags & VM_ALLOC_IGN_SBUSY) != 0,
("vm_page_grab*: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch"));
}
static inline int
vm_page_grab_pflags(int allocflags)
{
int pflags;
pflags = allocflags &
~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL |
VM_ALLOC_NOBUSY | VM_ALLOC_IGN_SBUSY | VM_ALLOC_NOCREAT);
if ((allocflags & VM_ALLOC_NOWAIT) == 0)
pflags |= VM_ALLOC_WAITFAIL;
if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0)
pflags |= VM_ALLOC_SBUSY;
return (pflags);
}
static inline vm_page_t
vm_page_grab_lookup(vm_object_t object, vm_pindex_t pindex, int allocflags,
bool *found, struct pctrie_iter *pages)
{
vm_page_t m;
while ((*found = (m = vm_radix_iter_lookup(pages, pindex)) != NULL) &&
!vm_page_tryacquire(m, allocflags)) {
if (!vm_page_grab_sleep(object, m, pindex, "pgrbwt",
allocflags, true))
return (NULL);
pctrie_iter_reset(pages);
}
return (m);
}
vm_page_t
vm_page_grab_iter(vm_object_t object, vm_pindex_t pindex, int allocflags,
struct pctrie_iter *pages)
{
vm_page_t m;
bool found;
VM_OBJECT_ASSERT_WLOCKED(object);
vm_page_grab_check(allocflags);
while ((m = vm_page_grab_lookup(
object, pindex, allocflags, &found, pages)) == NULL) {
if ((allocflags & VM_ALLOC_NOCREAT) != 0)
return (NULL);
if (found &&
(allocflags & (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL)) != 0)
return (NULL);
m = vm_page_alloc_iter(object, pindex,
vm_page_grab_pflags(allocflags), pages);
if (m != NULL) {
if ((allocflags & VM_ALLOC_ZERO) != 0 &&
(m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
break;
}
if ((allocflags &
(VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL)) != 0)
return (NULL);
}
vm_page_grab_release(m, allocflags);
return (m);
}
vm_page_t
vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
{
struct pctrie_iter pages;
VM_OBJECT_ASSERT_WLOCKED(object);
vm_page_iter_init(&pages, object);
return (vm_page_grab_iter(object, pindex, allocflags, &pages));
}
#define PAGE_NOT_ACQUIRED ((vm_page_t)1)
static vm_page_t
vm_page_acquire_unlocked(vm_object_t object, vm_pindex_t pindex, vm_page_t m,
int allocflags)
{
if (m == NULL)
m = vm_page_lookup_unlocked(object, pindex);
for (; m != NULL; m = vm_page_lookup_unlocked(object, pindex)) {
if (vm_page_trybusy(m, allocflags)) {
if (m->object == object && m->pindex == pindex) {
if ((allocflags & VM_ALLOC_WIRED) != 0)
vm_page_wire(m);
vm_page_grab_release(m, allocflags);
break;
}
vm_page_busy_release(m);
cpu_spinwait();
continue;
}
if (!vm_page_grab_sleep(object, m, pindex, "pgnslp",
allocflags, false))
return (PAGE_NOT_ACQUIRED);
}
return (m);
}
vm_page_t
vm_page_grab_unlocked(vm_object_t object, vm_pindex_t pindex, int allocflags)
{
vm_page_t m;
vm_page_grab_check(allocflags);
m = vm_page_acquire_unlocked(object, pindex, NULL, allocflags);
if (m == PAGE_NOT_ACQUIRED)
return (NULL);
if (m != NULL)
return (m);
if ((allocflags & VM_ALLOC_NOCREAT) != 0)
return (NULL);
VM_OBJECT_WLOCK(object);
m = vm_page_grab(object, pindex, allocflags);
VM_OBJECT_WUNLOCK(object);
return (m);
}
int
vm_page_grab_valid_iter(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex,
int allocflags, struct pctrie_iter *pages)
{
vm_page_t m;
vm_page_t ma[VM_INITIAL_PAGEIN];
int after, ahead, i, pflags, rv;
KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
(allocflags & VM_ALLOC_IGN_SBUSY) != 0,
("vm_page_grab_valid: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch"));
KASSERT((allocflags &
(VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL | VM_ALLOC_ZERO)) == 0,
("vm_page_grab_valid: Invalid flags 0x%X", allocflags));
VM_OBJECT_ASSERT_WLOCKED(object);
pflags = allocflags & ~(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY |
VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY);
pflags |= VM_ALLOC_WAITFAIL;
retrylookup:
if ((m = vm_radix_iter_lookup(pages, pindex)) != NULL) {
if (!vm_page_trybusy(m,
vm_page_all_valid(m) ? allocflags : 0)) {
(void)vm_page_grab_sleep(object, m, pindex, "pgrbwt",
allocflags, true);
pctrie_iter_reset(pages);
goto retrylookup;
}
if (vm_page_all_valid(m))
goto out;
if ((allocflags & VM_ALLOC_NOCREAT) != 0) {
vm_page_busy_release(m);
*mp = NULL;
return (VM_PAGER_FAIL);
}
} else if ((allocflags & VM_ALLOC_NOCREAT) != 0) {
*mp = NULL;
return (VM_PAGER_FAIL);
} else {
m = vm_page_alloc_iter(object, pindex, pflags, pages);
if (m == NULL) {
if (!vm_pager_can_alloc_page(object, pindex)) {
*mp = NULL;
return (VM_PAGER_AGAIN);
}
goto retrylookup;
}
}
vm_page_assert_xbusied(m);
if (vm_pager_has_page(object, pindex, NULL, &after)) {
after = MIN(after, VM_INITIAL_PAGEIN);
after = MIN(after, allocflags >> VM_ALLOC_COUNT_SHIFT);
after = MAX(after, 1);
ma[0] = m;
pctrie_iter_reset(pages);
for (i = 1; i < after; i++) {
m = vm_radix_iter_lookup_ge(pages, pindex + i);
ahead = after;
if (m != NULL)
ahead = MIN(ahead, m->pindex - pindex);
for (; i < ahead; i++) {
ma[i] = vm_page_alloc_iter(object, pindex + i,
VM_ALLOC_NORMAL, pages);
if (ma[i] == NULL)
break;
}
if (m == NULL || m->pindex != pindex + i ||
vm_page_any_valid(m) || !vm_page_tryxbusy(m))
break;
ma[i] = m;
}
after = i;
vm_object_pip_add(object, after);
VM_OBJECT_WUNLOCK(object);
rv = vm_pager_get_pages(object, ma, after, NULL, NULL);
pctrie_iter_reset(pages);
VM_OBJECT_WLOCK(object);
vm_object_pip_wakeupn(object, after);
m = ma[0];
if (rv != VM_PAGER_OK) {
for (i = 0; i < after; i++) {
if (!vm_page_wired(ma[i]))
vm_page_free(ma[i]);
else
vm_page_xunbusy(ma[i]);
}
*mp = NULL;
return (rv);
}
for (i = 1; i < after; i++)
vm_page_readahead_finish(ma[i]);
MPASS(vm_page_all_valid(m));
} else {
vm_page_zero_invalid(m, TRUE);
pctrie_iter_reset(pages);
}
out:
if ((allocflags & VM_ALLOC_WIRED) != 0)
vm_page_wire(m);
if ((allocflags & VM_ALLOC_SBUSY) != 0 && vm_page_xbusied(m))
vm_page_busy_downgrade(m);
else if ((allocflags & VM_ALLOC_NOBUSY) != 0)
vm_page_busy_release(m);
*mp = m;
return (VM_PAGER_OK);
}
int
vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex,
int allocflags)
{
struct pctrie_iter pages;
VM_OBJECT_ASSERT_WLOCKED(object);
vm_page_iter_init(&pages, object);
return (vm_page_grab_valid_iter(mp, object, pindex, allocflags,
&pages));
}
int
vm_page_grab_zero_partial(vm_object_t object, vm_pindex_t pindex, int base,
int end)
{
struct pctrie_iter pages;
vm_page_t m;
int allocflags, rv;
bool found;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(base >= 0, ("%s: base %d", __func__, base));
KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base,
end));
allocflags = VM_ALLOC_NOCREAT | VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL;
vm_page_iter_init(&pages, object);
while ((m = vm_page_grab_lookup(
object, pindex, allocflags, &found, &pages)) == NULL) {
if (!vm_pager_has_page(object, pindex, NULL, NULL))
return (0);
m = vm_page_alloc_iter(object, pindex,
vm_page_grab_pflags(allocflags), &pages);
if (m != NULL) {
vm_object_pip_add(object, 1);
VM_OBJECT_WUNLOCK(object);
rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
VM_OBJECT_WLOCK(object);
vm_object_pip_wakeup(object);
if (rv != VM_PAGER_OK) {
vm_page_free(m);
return (EIO);
}
vm_page_launder(m);
break;
}
}
pmap_zero_page_area(m, base, end - base);
KASSERT(vm_page_all_valid(m), ("%s: page %p is invalid", __func__, m));
vm_page_set_dirty(m);
vm_page_xunbusy(m);
return (0);
}
int
vm_page_grab_valid_unlocked(vm_page_t *mp, vm_object_t object,
vm_pindex_t pindex, int allocflags)
{
vm_page_t m;
int flags;
int error;
KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
(allocflags & VM_ALLOC_IGN_SBUSY) != 0,
("vm_page_grab_valid_unlocked: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY "
"mismatch"));
KASSERT((allocflags &
(VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL | VM_ALLOC_ZERO)) == 0,
("vm_page_grab_valid_unlocked: Invalid flags 0x%X", allocflags));
flags = allocflags & ~(VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
vm_page_grab_check(flags);
m = vm_page_acquire_unlocked(object, pindex, NULL, flags);
if (m == PAGE_NOT_ACQUIRED)
return (VM_PAGER_FAIL);
if (m != NULL) {
if (vm_page_all_valid(m)) {
if ((allocflags & VM_ALLOC_WIRED) != 0)
vm_page_wire(m);
vm_page_grab_release(m, allocflags);
*mp = m;
return (VM_PAGER_OK);
}
vm_page_busy_release(m);
}
if ((allocflags & VM_ALLOC_NOCREAT) != 0) {
*mp = NULL;
return (VM_PAGER_FAIL);
}
VM_OBJECT_WLOCK(object);
error = vm_page_grab_valid(mp, object, pindex, allocflags);
VM_OBJECT_WUNLOCK(object);
return (error);
}
int
vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
vm_page_t *ma, int count)
{
struct pctrie_iter pages;
vm_page_t m;
int pflags;
int ahead, i;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(((u_int)allocflags >> VM_ALLOC_COUNT_SHIFT) == 0,
("vm_page_grap_pages: VM_ALLOC_COUNT() is not allowed"));
KASSERT(count > 0,
("vm_page_grab_pages: invalid page count %d", count));
vm_page_grab_check(allocflags);
pflags = vm_page_grab_pflags(allocflags);
i = 0;
vm_page_iter_init(&pages, object);
retrylookup:
ahead = -1;
for (; i < count; i++) {
if (ahead < 0) {
ahead = vm_radix_iter_lookup_range(
&pages, pindex + i, &ma[i], count - i);
}
if (ahead-- > 0) {
m = ma[i];
if (!vm_page_tryacquire(m, allocflags)) {
if (vm_page_grab_sleep(object, m, pindex + i,
"grbmaw", allocflags, true)) {
pctrie_iter_reset(&pages);
goto retrylookup;
}
break;
}
} else {
if ((allocflags & VM_ALLOC_NOCREAT) != 0)
break;
m = vm_page_alloc_iter(object, pindex + i,
pflags | VM_ALLOC_COUNT(count - i), &pages);
if (m == NULL) {
if ((allocflags & (VM_ALLOC_NOWAIT |
VM_ALLOC_WAITFAIL)) != 0)
break;
goto retrylookup;
}
ma[i] = m;
}
if (vm_page_none_valid(m) &&
(allocflags & VM_ALLOC_ZERO) != 0) {
if ((m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
vm_page_valid(m);
}
vm_page_grab_release(m, allocflags);
}
return (i);
}
int
vm_page_grab_pages_unlocked(vm_object_t object, vm_pindex_t pindex,
int allocflags, vm_page_t *ma, int count)
{
vm_page_t m;
int flags;
int i, num_fetched;
KASSERT(count > 0,
("vm_page_grab_pages_unlocked: invalid page count %d", count));
vm_page_grab_check(allocflags);
flags = allocflags & ~VM_ALLOC_NOBUSY;
vm_page_grab_check(flags);
num_fetched = vm_radix_lookup_range_unlocked(&object->rtree, pindex,
ma, count);
for (i = 0; i < num_fetched; i++, pindex++) {
m = vm_page_acquire_unlocked(object, pindex, ma[i], flags);
if (m == PAGE_NOT_ACQUIRED)
return (i);
if (m == NULL)
break;
if ((flags & VM_ALLOC_ZERO) != 0 && vm_page_none_valid(m)) {
if ((m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
vm_page_valid(m);
}
vm_page_grab_release(m, allocflags);
ma[i] = m;
}
if (i == count || (allocflags & VM_ALLOC_NOCREAT) != 0)
return (i);
count -= i;
VM_OBJECT_WLOCK(object);
i += vm_page_grab_pages(object, pindex, allocflags, &ma[i], count);
VM_OBJECT_WUNLOCK(object);
return (i);
}
vm_page_bits_t
vm_page_bits(int base, int size)
{
int first_bit;
int last_bit;
KASSERT(
base + size <= PAGE_SIZE,
("vm_page_bits: illegal base/size %d/%d", base, size)
);
if (size == 0)
return (0);
first_bit = base >> DEV_BSHIFT;
last_bit = (base + size - 1) >> DEV_BSHIFT;
return (((vm_page_bits_t)2 << last_bit) -
((vm_page_bits_t)1 << first_bit));
}
void
vm_page_bits_set(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t set)
{
#if PAGE_SIZE == 32768
atomic_set_64((uint64_t *)bits, set);
#elif PAGE_SIZE == 16384
atomic_set_32((uint32_t *)bits, set);
#elif (PAGE_SIZE == 8192) && defined(atomic_set_16)
atomic_set_16((uint16_t *)bits, set);
#elif (PAGE_SIZE == 4096) && defined(atomic_set_8)
atomic_set_8((uint8_t *)bits, set);
#else
uintptr_t addr;
int shift;
addr = (uintptr_t)bits;
shift = addr & (sizeof(uint32_t) - 1);
#if BYTE_ORDER == BIG_ENDIAN
shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY;
#else
shift *= NBBY;
#endif
addr &= ~(sizeof(uint32_t) - 1);
atomic_set_32((uint32_t *)addr, set << shift);
#endif
}
static inline void
vm_page_bits_clear(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t clear)
{
#if PAGE_SIZE == 32768
atomic_clear_64((uint64_t *)bits, clear);
#elif PAGE_SIZE == 16384
atomic_clear_32((uint32_t *)bits, clear);
#elif (PAGE_SIZE == 8192) && defined(atomic_clear_16)
atomic_clear_16((uint16_t *)bits, clear);
#elif (PAGE_SIZE == 4096) && defined(atomic_clear_8)
atomic_clear_8((uint8_t *)bits, clear);
#else
uintptr_t addr;
int shift;
addr = (uintptr_t)bits;
shift = addr & (sizeof(uint32_t) - 1);
#if BYTE_ORDER == BIG_ENDIAN
shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY;
#else
shift *= NBBY;
#endif
addr &= ~(sizeof(uint32_t) - 1);
atomic_clear_32((uint32_t *)addr, clear << shift);
#endif
}
static inline vm_page_bits_t
vm_page_bits_swap(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t newbits)
{
#if PAGE_SIZE == 32768
uint64_t old;
old = *bits;
while (atomic_fcmpset_64(bits, &old, newbits) == 0);
return (old);
#elif PAGE_SIZE == 16384
uint32_t old;
old = *bits;
while (atomic_fcmpset_32(bits, &old, newbits) == 0);
return (old);
#elif (PAGE_SIZE == 8192) && defined(atomic_fcmpset_16)
uint16_t old;
old = *bits;
while (atomic_fcmpset_16(bits, &old, newbits) == 0);
return (old);
#elif (PAGE_SIZE == 4096) && defined(atomic_fcmpset_8)
uint8_t old;
old = *bits;
while (atomic_fcmpset_8(bits, &old, newbits) == 0);
return (old);
#else
uintptr_t addr;
uint32_t old, new, mask;
int shift;
addr = (uintptr_t)bits;
shift = addr & (sizeof(uint32_t) - 1);
#if BYTE_ORDER == BIG_ENDIAN
shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY;
#else
shift *= NBBY;
#endif
addr &= ~(sizeof(uint32_t) - 1);
mask = VM_PAGE_BITS_ALL << shift;
old = *bits;
do {
new = old & ~mask;
new |= newbits << shift;
} while (atomic_fcmpset_32((uint32_t *)addr, &old, new) == 0);
return (old >> shift);
#endif
}
void
vm_page_set_valid_range(vm_page_t m, int base, int size)
{
int endoff, frag;
vm_page_bits_t pagebits;
vm_page_assert_busied(m);
if (size == 0)
return;
if ((frag = rounddown2(base, DEV_BSIZE)) != base &&
(m->valid & (1 << (base >> DEV_BSHIFT))) == 0)
pmap_zero_page_area(m, frag, base - frag);
endoff = base + size;
if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff &&
(m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0)
pmap_zero_page_area(m, endoff,
DEV_BSIZE - (endoff & (DEV_BSIZE - 1)));
KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0,
("vm_page_set_valid_range: page %p is dirty", m));
pagebits = vm_page_bits(base, size);
if (vm_page_xbusied(m))
m->valid |= pagebits;
else
vm_page_bits_set(m, &m->valid, pagebits);
}
vm_page_bits_t
vm_page_set_dirty(vm_page_t m)
{
vm_page_bits_t old;
VM_PAGE_OBJECT_BUSY_ASSERT(m);
if (vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) {
old = m->dirty;
m->dirty = VM_PAGE_BITS_ALL;
} else
old = vm_page_bits_swap(m, &m->dirty, VM_PAGE_BITS_ALL);
if (old == 0 && (m->a.flags & PGA_SWAP_SPACE) != 0)
vm_pager_page_unswapped(m);
return (old);
}
static __inline void
vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits)
{
vm_page_assert_busied(m);
if (vm_page_xbusied(m) && !pmap_page_is_write_mapped(m))
m->dirty &= ~pagebits;
else
vm_page_bits_clear(m, &m->dirty, pagebits);
}
void
vm_page_set_validclean(vm_page_t m, int base, int size)
{
vm_page_bits_t oldvalid, pagebits;
int endoff, frag;
vm_page_assert_busied(m);
if (size == 0)
return;
if ((frag = rounddown2(base, DEV_BSIZE)) != base &&
(m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0)
pmap_zero_page_area(m, frag, base - frag);
endoff = base + size;
if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff &&
(m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0)
pmap_zero_page_area(m, endoff,
DEV_BSIZE - (endoff & (DEV_BSIZE - 1)));
oldvalid = m->valid;
pagebits = vm_page_bits(base, size);
if (vm_page_xbusied(m))
m->valid |= pagebits;
else
vm_page_bits_set(m, &m->valid, pagebits);
#if 0
if ((frag = base & (DEV_BSIZE - 1)) != 0) {
frag = DEV_BSIZE - frag;
base += frag;
size -= frag;
if (size < 0)
size = 0;
}
pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1));
#endif
if (base == 0 && size == PAGE_SIZE) {
if (oldvalid == VM_PAGE_BITS_ALL)
pmap_clear_modify(m);
m->dirty = 0;
vm_page_aflag_clear(m, PGA_NOSYNC);
} else if (oldvalid != VM_PAGE_BITS_ALL && vm_page_xbusied(m))
m->dirty &= ~pagebits;
else
vm_page_clear_dirty_mask(m, pagebits);
}
void
vm_page_clear_dirty(vm_page_t m, int base, int size)
{
vm_page_clear_dirty_mask(m, vm_page_bits(base, size));
}
void
vm_page_set_invalid(vm_page_t m, int base, int size)
{
vm_page_bits_t bits;
vm_object_t object;
object = m->object;
VM_OBJECT_ASSERT_WLOCKED(object);
vm_page_assert_busied(m);
if (object->type == OBJT_VNODE && base == 0 && IDX_TO_OFF(m->pindex) +
size >= object->un_pager.vnp.vnp_size)
bits = VM_PAGE_BITS_ALL;
else
bits = vm_page_bits(base, size);
if (object->ref_count != 0 && vm_page_all_valid(m) && bits != 0)
pmap_remove_all(m);
KASSERT((bits == 0 && vm_page_all_valid(m)) ||
!pmap_page_is_mapped(m),
("vm_page_set_invalid: page %p is mapped", m));
if (vm_page_xbusied(m)) {
m->valid &= ~bits;
m->dirty &= ~bits;
} else {
vm_page_bits_clear(m, &m->valid, bits);
vm_page_bits_clear(m, &m->dirty, bits);
}
}
void
vm_page_invalid(vm_page_t m)
{
vm_page_assert_busied(m);
VM_OBJECT_ASSERT_WLOCKED(m->object);
MPASS(!pmap_page_is_mapped(m));
if (vm_page_xbusied(m))
m->valid = 0;
else
vm_page_bits_clear(m, &m->valid, VM_PAGE_BITS_ALL);
}
void
vm_page_zero_invalid(vm_page_t m, boolean_t setvalid)
{
int b;
int i;
for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) {
if (i == (PAGE_SIZE / DEV_BSIZE) ||
(m->valid & ((vm_page_bits_t)1 << i))) {
if (i > b) {
pmap_zero_page_area(m,
b << DEV_BSHIFT, (i - b) << DEV_BSHIFT);
}
b = i + 1;
}
}
if (setvalid)
vm_page_valid(m);
}
int
vm_page_is_valid(vm_page_t m, int base, int size)
{
vm_page_bits_t bits;
bits = vm_page_bits(base, size);
return (vm_page_any_valid(m) && (m->valid & bits) == bits);
}
bool
vm_page_ps_test(vm_page_t m, int psind, int flags, vm_page_t skip_m)
{
vm_object_t object;
int i, npages;
object = m->object;
if (skip_m != NULL && skip_m->object != object)
return (false);
VM_OBJECT_ASSERT_LOCKED(object);
KASSERT(psind <= m->psind,
("psind %d > psind %d of m %p", psind, m->psind, m));
npages = atop(pagesizes[psind]);
for (i = 0; i < npages; i++) {
if (m[i].object != object)
return (false);
if (&m[i] == skip_m)
continue;
if ((flags & PS_NONE_BUSY) != 0 && vm_page_busied(&m[i]))
return (false);
if ((flags & PS_ALL_DIRTY) != 0) {
if (m[i].dirty != VM_PAGE_BITS_ALL)
return (false);
}
if ((flags & PS_ALL_VALID) != 0 &&
m[i].valid != VM_PAGE_BITS_ALL)
return (false);
}
return (true);
}
void
vm_page_test_dirty(vm_page_t m)
{
vm_page_assert_busied(m);
if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m))
vm_page_dirty(m);
}
void
vm_page_valid(vm_page_t m)
{
vm_page_assert_busied(m);
if (vm_page_xbusied(m))
m->valid = VM_PAGE_BITS_ALL;
else
vm_page_bits_set(m, &m->valid, VM_PAGE_BITS_ALL);
}
#ifdef INVARIANTS
void
vm_page_object_busy_assert(vm_page_t m)
{
if (m->object != NULL && !vm_page_busied(m))
VM_OBJECT_ASSERT_BUSY(m->object);
}
void
vm_page_assert_pga_writeable(vm_page_t m, uint16_t bits)
{
if ((bits & PGA_WRITEABLE) == 0)
return;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("PGA_WRITEABLE on unmanaged page"));
if (!vm_page_xbusied(m))
VM_OBJECT_ASSERT_BUSY(m->object);
}
#endif
#include "opt_ddb.h"
#ifdef DDB
#include <sys/kernel.h>
#include <ddb/ddb.h>
DB_SHOW_COMMAND_FLAGS(page, vm_page_print_page_info, DB_CMD_MEMSAFE)
{
db_printf("vm_cnt.v_free_count: %d\n", vm_free_count());
db_printf("vm_cnt.v_inactive_count: %d\n", vm_inactive_count());
db_printf("vm_cnt.v_active_count: %d\n", vm_active_count());
db_printf("vm_cnt.v_laundry_count: %d\n", vm_laundry_count());
db_printf("vm_cnt.v_wire_count: %d\n", vm_wire_count());
db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved);
db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min);
db_printf("vm_cnt.v_free_target: %d\n", vm_cnt.v_free_target);
db_printf("vm_cnt.v_inactive_target: %d\n", vm_cnt.v_inactive_target);
}
DB_SHOW_COMMAND_FLAGS(pageq, vm_page_print_pageq_info, DB_CMD_MEMSAFE)
{
int dom;
db_printf("pq_free %d\n", vm_free_count());
for (dom = 0; dom < vm_ndomains; dom++) {
db_printf(
"dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n",
dom,
vm_dom[dom].vmd_page_count,
vm_dom[dom].vmd_free_count,
vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt,
vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt,
vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt,
vm_dom[dom].vmd_pagequeues[PQ_UNSWAPPABLE].pq_cnt);
}
}
DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo)
{
vm_page_t m;
boolean_t phys, virt;
if (!have_addr) {
db_printf("show pginfo addr\n");
return;
}
phys = strchr(modif, 'p') != NULL;
virt = strchr(modif, 'v') != NULL;
if (virt)
m = PHYS_TO_VM_PAGE(pmap_kextract(addr));
else if (phys)
m = PHYS_TO_VM_PAGE(addr);
else
m = (vm_page_t)addr;
db_printf(
"page %p obj %p pidx 0x%jx phys 0x%jx q %d ref 0x%x\n"
" af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n",
m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr,
m->a.queue, m->ref_count, m->a.flags, m->oflags,
m->flags, m->a.act_count, m->busy_lock, m->valid, m->dirty);
}
#endif