#include <sys/cdefs.h>
#include "opt_vm.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/queue.h>
#include <sys/rwlock.h>
#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/bitstring.h>
#include <sys/counter.h>
#include <sys/ktr.h>
#include <sys/vmmeter.h>
#include <sys/smp.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pagequeue.h>
#include <vm/vm_phys.h>
#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
#if VM_NRESERVLEVEL > 0
#if VM_NRESERVLEVEL == 2
#undef VM_NRESERVLEVEL
#define VM_NRESERVLEVEL 1
#if VM_LEVEL_0_ORDER == 4
#undef VM_LEVEL_0_ORDER
#define VM_LEVEL_0_ORDER (4 + VM_LEVEL_1_ORDER)
#define VM_SUBLEVEL_0_NPAGES (1 << 4)
#elif VM_LEVEL_0_ORDER == 7
#undef VM_LEVEL_0_ORDER
#define VM_LEVEL_0_ORDER (7 + VM_LEVEL_1_ORDER)
#define VM_SUBLEVEL_0_NPAGES (1 << 7)
#else
#error "Unsupported level 0 reservation size"
#endif
#define VM_LEVEL_0_PSIND 2
#else
#define VM_LEVEL_0_PSIND 1
#endif
#ifndef VM_LEVEL_0_ORDER_MAX
#define VM_LEVEL_0_ORDER_MAX VM_LEVEL_0_ORDER
#endif
#define VM_LEVEL_0_NPAGES (1 << VM_LEVEL_0_ORDER)
#define VM_LEVEL_0_NPAGES_MAX (1 << VM_LEVEL_0_ORDER_MAX)
#define VM_LEVEL_0_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT)
#define VM_LEVEL_0_SIZE (1 << VM_LEVEL_0_SHIFT)
#define VM_RESERV_INDEX(object, pindex) \
(((object)->pg_color + (pindex)) & (VM_LEVEL_0_NPAGES - 1))
#define PARTPOPSLOP 1
struct vm_reserv {
struct mtx lock;
TAILQ_ENTRY(vm_reserv) partpopq;
LIST_ENTRY(vm_reserv) objq;
vm_object_t object;
vm_pindex_t pindex;
vm_page_t pages;
uint16_t popcnt;
uint8_t domain;
char inpartpopq;
int lasttick;
bitstr_t bit_decl(popmap, VM_LEVEL_0_NPAGES_MAX);
};
TAILQ_HEAD(vm_reserv_queue, vm_reserv);
#define vm_reserv_lockptr(rv) (&(rv)->lock)
#define vm_reserv_assert_locked(rv) \
mtx_assert(vm_reserv_lockptr(rv), MA_OWNED)
#define vm_reserv_lock(rv) mtx_lock(vm_reserv_lockptr(rv))
#define vm_reserv_trylock(rv) mtx_trylock(vm_reserv_lockptr(rv))
#define vm_reserv_unlock(rv) mtx_unlock(vm_reserv_lockptr(rv))
static vm_reserv_t vm_reserv_array;
struct vm_reserv_domain {
struct mtx lock;
struct vm_reserv_queue partpop;
struct vm_reserv marker;
} __aligned(CACHE_LINE_SIZE);
static struct vm_reserv_domain vm_rvd[MAXMEMDOM];
#define vm_reserv_domain_lockptr(d) (&vm_rvd[(d)].lock)
#define vm_reserv_domain_assert_locked(d) \
mtx_assert(vm_reserv_domain_lockptr(d), MA_OWNED)
#define vm_reserv_domain_lock(d) mtx_lock(vm_reserv_domain_lockptr(d))
#define vm_reserv_domain_unlock(d) mtx_unlock(vm_reserv_domain_lockptr(d))
#define vm_reserv_domain_scan_lock(d) mtx_lock(&vm_rvd[(d)].marker.lock)
#define vm_reserv_domain_scan_unlock(d) mtx_unlock(&vm_rvd[(d)].marker.lock)
static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"Reservation Info");
static COUNTER_U64_DEFINE_EARLY(vm_reserv_broken);
SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD,
&vm_reserv_broken, "Cumulative number of broken reservations");
static COUNTER_U64_DEFINE_EARLY(vm_reserv_freed);
SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD,
&vm_reserv_freed, "Cumulative number of freed reservations");
static int sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_vm_reserv, OID_AUTO, fullpop, CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD,
NULL, 0, sysctl_vm_reserv_fullpop, "I", "Current number of full reservations");
static int sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS);
SYSCTL_OID(_vm_reserv, OID_AUTO, partpopq,
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
sysctl_vm_reserv_partpopq, "A",
"Partially populated reservation queues");
static COUNTER_U64_DEFINE_EARLY(vm_reserv_reclaimed);
SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
&vm_reserv_reclaimed, "Cumulative number of reclaimed reservations");
#define VM_RESERV_OBJ_LOCK_COUNT MAXCPU
struct mtx_padalign vm_reserv_object_mtx[VM_RESERV_OBJ_LOCK_COUNT];
#define vm_reserv_object_lock_idx(object) \
(((uintptr_t)object / sizeof(*object)) % VM_RESERV_OBJ_LOCK_COUNT)
#define vm_reserv_object_lock_ptr(object) \
&vm_reserv_object_mtx[vm_reserv_object_lock_idx((object))]
#define vm_reserv_object_lock(object) \
mtx_lock(vm_reserv_object_lock_ptr((object)))
#define vm_reserv_object_unlock(object) \
mtx_unlock(vm_reserv_object_lock_ptr((object)))
static void vm_reserv_break(vm_reserv_t rv);
static void vm_reserv_depopulate(vm_reserv_t rv, int index);
static vm_reserv_t vm_reserv_from_page(vm_page_t m);
static boolean_t vm_reserv_has_pindex(vm_reserv_t rv,
vm_pindex_t pindex);
static void vm_reserv_populate(vm_reserv_t rv, int index);
static void vm_reserv_reclaim(vm_reserv_t rv);
static int
sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS)
{
vm_paddr_t paddr;
struct vm_phys_seg *seg;
vm_reserv_t rv;
int fullpop, segind;
fullpop = 0;
for (segind = 0; segind < vm_phys_nsegs; segind++) {
seg = &vm_phys_segs[segind];
paddr = roundup2(seg->start, VM_LEVEL_0_SIZE);
#ifdef VM_PHYSSEG_SPARSE
rv = seg->first_reserv + (paddr >> VM_LEVEL_0_SHIFT) -
(seg->start >> VM_LEVEL_0_SHIFT);
#else
rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT];
#endif
while (paddr + VM_LEVEL_0_SIZE > paddr && paddr +
VM_LEVEL_0_SIZE <= seg->end) {
fullpop += rv->popcnt == VM_LEVEL_0_NPAGES;
paddr += VM_LEVEL_0_SIZE;
rv++;
}
}
return (sysctl_handle_int(oidp, &fullpop, 0, req));
}
static int
sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
{
struct sbuf sbuf;
vm_reserv_t rv;
int counter, error, domain, level, unused_pages;
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
return (error);
sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
sbuf_printf(&sbuf, "\nDOMAIN LEVEL SIZE NUMBER\n\n");
for (domain = 0; domain < vm_ndomains; domain++) {
for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
counter = 0;
unused_pages = 0;
vm_reserv_domain_lock(domain);
TAILQ_FOREACH(rv, &vm_rvd[domain].partpop, partpopq) {
if (rv == &vm_rvd[domain].marker)
continue;
counter++;
unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
}
vm_reserv_domain_unlock(domain);
sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n",
domain, level,
unused_pages * ((int)PAGE_SIZE / 1024), counter);
}
}
error = sbuf_finish(&sbuf);
sbuf_delete(&sbuf);
return (error);
}
static void
vm_reserv_remove(vm_reserv_t rv)
{
vm_object_t object;
vm_reserv_assert_locked(rv);
CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
__FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
KASSERT(rv->object != NULL,
("vm_reserv_remove: reserv %p is free", rv));
KASSERT(!rv->inpartpopq,
("vm_reserv_remove: reserv %p's inpartpopq is TRUE", rv));
object = rv->object;
vm_reserv_object_lock(object);
LIST_REMOVE(rv, objq);
rv->object = NULL;
vm_reserv_object_unlock(object);
}
static void
vm_reserv_insert(vm_reserv_t rv, vm_object_t object, vm_pindex_t pindex)
{
vm_reserv_assert_locked(rv);
CTR6(KTR_VM,
"%s: rv %p(%p) object %p new %p popcnt %d",
__FUNCTION__, rv, rv->pages, rv->object, object,
rv->popcnt);
KASSERT(rv->object == NULL,
("vm_reserv_insert: reserv %p isn't free", rv));
KASSERT(rv->popcnt == 0,
("vm_reserv_insert: reserv %p's popcnt is corrupted", rv));
KASSERT(!rv->inpartpopq,
("vm_reserv_insert: reserv %p's inpartpopq is TRUE", rv));
KASSERT(bit_ntest(rv->popmap, 0, VM_LEVEL_0_NPAGES - 1, 0),
("vm_reserv_insert: reserv %p's popmap is corrupted", rv));
vm_reserv_object_lock(object);
rv->pindex = pindex;
rv->object = object;
rv->lasttick = ticks;
LIST_INSERT_HEAD(&object->rvq, rv, objq);
vm_reserv_object_unlock(object);
}
#ifdef VM_SUBLEVEL_0_NPAGES
static inline bool
vm_reserv_is_sublevel_full(vm_reserv_t rv, int index)
{
_Static_assert(VM_SUBLEVEL_0_NPAGES == 16 ||
VM_SUBLEVEL_0_NPAGES == 128,
"vm_reserv_is_sublevel_full: unsupported VM_SUBLEVEL_0_NPAGES");
switch (VM_SUBLEVEL_0_NPAGES) {
case 16:
return (((uint16_t *)rv->popmap)[index / 16] == UINT16_MAX);
case 128:
index = rounddown2(index, 128) / 64;
return (((uint64_t *)rv->popmap)[index] == UINT64_MAX &&
((uint64_t *)rv->popmap)[index + 1] == UINT64_MAX);
default:
__unreachable();
}
}
#endif
static void
vm_reserv_depopulate(vm_reserv_t rv, int index)
{
struct vm_domain *vmd;
vm_reserv_assert_locked(rv);
CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
__FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
KASSERT(rv->object != NULL,
("vm_reserv_depopulate: reserv %p is free", rv));
KASSERT(bit_test(rv->popmap, index),
("vm_reserv_depopulate: reserv %p's popmap[%d] is clear", rv,
index));
KASSERT(rv->popcnt > 0,
("vm_reserv_depopulate: reserv %p's popcnt is corrupted", rv));
KASSERT(rv->domain < vm_ndomains,
("vm_reserv_depopulate: reserv %p's domain is corrupted %d",
rv, rv->domain));
if (rv->popcnt == VM_LEVEL_0_NPAGES) {
KASSERT(rv->pages->psind == VM_LEVEL_0_PSIND,
("vm_reserv_depopulate: reserv %p is already demoted",
rv));
rv->pages->psind = VM_LEVEL_0_PSIND - 1;
}
#ifdef VM_SUBLEVEL_0_NPAGES
if (vm_reserv_is_sublevel_full(rv, index))
rv->pages[rounddown2(index, VM_SUBLEVEL_0_NPAGES)].psind = 0;
#endif
bit_clear(rv->popmap, index);
rv->popcnt--;
if ((unsigned)(ticks - rv->lasttick) >= PARTPOPSLOP ||
rv->popcnt == 0) {
vm_reserv_domain_lock(rv->domain);
if (rv->inpartpopq) {
TAILQ_REMOVE(&vm_rvd[rv->domain].partpop, rv, partpopq);
rv->inpartpopq = FALSE;
}
if (rv->popcnt != 0) {
rv->inpartpopq = TRUE;
TAILQ_INSERT_TAIL(&vm_rvd[rv->domain].partpop, rv,
partpopq);
}
vm_reserv_domain_unlock(rv->domain);
rv->lasttick = ticks;
}
vmd = VM_DOMAIN(rv->domain);
if (rv->popcnt == 0) {
vm_reserv_remove(rv);
vm_domain_free_lock(vmd);
vm_phys_free_pages(rv->pages, VM_FREEPOOL_DEFAULT,
VM_LEVEL_0_ORDER);
vm_domain_free_unlock(vmd);
counter_u64_add(vm_reserv_freed, 1);
}
vm_domain_freecnt_inc(vmd, 1);
}
static __inline vm_reserv_t
vm_reserv_from_page(vm_page_t m)
{
#ifdef VM_PHYSSEG_SPARSE
struct vm_phys_seg *seg;
seg = &vm_phys_segs[m->segind];
return (seg->first_reserv + (VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT) -
(seg->start >> VM_LEVEL_0_SHIFT));
#else
return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]);
#endif
}
static vm_reserv_t
vm_reserv_from_object(vm_object_t object, vm_pindex_t pindex,
vm_page_t *mpredp, vm_page_t *msuccp, struct pctrie_iter *pages)
{
vm_reserv_t rv;
vm_page_t mpred, msucc;
mpred = vm_radix_iter_lookup_lt(pages, pindex);
if (mpred != NULL) {
KASSERT(mpred->object == object,
("vm_reserv_from_object: object doesn't contain mpred"));
KASSERT(mpred->pindex < pindex,
("vm_reserv_from_object: mpred doesn't precede pindex"));
rv = vm_reserv_from_page(mpred);
if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
return (rv);
}
msucc = vm_radix_iter_lookup_ge(pages, pindex);
if (msucc != NULL) {
KASSERT(msucc->pindex > pindex,
("vm_reserv_from_object: msucc doesn't succeed pindex"));
rv = vm_reserv_from_page(msucc);
if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
return (rv);
}
*mpredp = mpred;
*msuccp = msucc;
return (NULL);
}
static __inline boolean_t
vm_reserv_has_pindex(vm_reserv_t rv, vm_pindex_t pindex)
{
return (((pindex - rv->pindex) & ~(VM_LEVEL_0_NPAGES - 1)) == 0);
}
static u_long
vm_reserv_num_alloc_pages(vm_object_t object, vm_pindex_t first,
u_long minpages, vm_page_t mpred, vm_page_t msucc)
{
vm_pindex_t leftcap, rightcap;
vm_reserv_t rv;
u_int allocpages;
allocpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
vm_reserv_object_lock(object);
if (mpred != NULL) {
if ((rv = vm_reserv_from_page(mpred))->object != object)
leftcap = mpred->pindex + 1;
else
leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
if (leftcap > first)
allocpages = 0;
}
if (minpages < allocpages) {
if (msucc == NULL) {
if ((object->flags & OBJ_ANON) == 0)
rightcap = object->size;
else
rightcap = OBJ_MAX_SIZE;
} else {
if ((rv = vm_reserv_from_page(msucc))->object != object)
rightcap = msucc->pindex;
else
rightcap = rv->pindex;
}
if (first + allocpages > rightcap) {
allocpages = minpages;
}
}
vm_reserv_object_unlock(object);
return (allocpages);
}
static void
vm_reserv_populate(vm_reserv_t rv, int index)
{
vm_reserv_assert_locked(rv);
CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
__FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
KASSERT(rv->object != NULL,
("vm_reserv_populate: reserv %p is free", rv));
KASSERT(!bit_test(rv->popmap, index),
("vm_reserv_populate: reserv %p's popmap[%d] is set", rv,
index));
KASSERT(rv->popcnt < VM_LEVEL_0_NPAGES,
("vm_reserv_populate: reserv %p is already full", rv));
KASSERT(rv->pages->psind >= 0 &&
rv->pages->psind < VM_LEVEL_0_PSIND,
("vm_reserv_populate: reserv %p is already promoted", rv));
KASSERT(rv->domain < vm_ndomains,
("vm_reserv_populate: reserv %p's domain is corrupted %d",
rv, rv->domain));
bit_set(rv->popmap, index);
#ifdef VM_SUBLEVEL_0_NPAGES
if (vm_reserv_is_sublevel_full(rv, index))
rv->pages[rounddown2(index, VM_SUBLEVEL_0_NPAGES)].psind = 1;
#endif
rv->popcnt++;
if ((unsigned)(ticks - rv->lasttick) < PARTPOPSLOP &&
rv->inpartpopq && rv->popcnt != VM_LEVEL_0_NPAGES)
return;
rv->lasttick = ticks;
vm_reserv_domain_lock(rv->domain);
if (rv->inpartpopq) {
TAILQ_REMOVE(&vm_rvd[rv->domain].partpop, rv, partpopq);
rv->inpartpopq = FALSE;
}
if (rv->popcnt < VM_LEVEL_0_NPAGES) {
rv->inpartpopq = TRUE;
TAILQ_INSERT_TAIL(&vm_rvd[rv->domain].partpop, rv, partpopq);
} else {
KASSERT(rv->pages->psind == VM_LEVEL_0_PSIND - 1,
("vm_reserv_populate: reserv %p is already promoted",
rv));
rv->pages->psind = VM_LEVEL_0_PSIND;
}
vm_reserv_domain_unlock(rv->domain);
}
vm_page_t
vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, struct pctrie_iter *pages)
{
struct vm_domain *vmd;
vm_paddr_t pa, size;
vm_page_t m, m_ret, mpred, msucc;
vm_pindex_t first;
vm_reserv_t rv;
u_long allocpages;
int i, index, n;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
if (pindex < VM_RESERV_INDEX(object, pindex) ||
pindex + npages > object->size)
return (NULL);
pa = VM_RESERV_INDEX(object, pindex) << PAGE_SHIFT;
size = npages << PAGE_SHIFT;
if (!vm_addr_ok(pa, size, alignment, boundary))
return (NULL);
rv = vm_reserv_from_object(object, pindex, &mpred, &msucc, pages);
if (rv != NULL) {
KASSERT(object != kernel_object || rv->domain == domain,
("vm_reserv_alloc_contig: domain mismatch"));
index = VM_RESERV_INDEX(object, pindex);
if (index + npages > VM_LEVEL_0_NPAGES)
return (NULL);
domain = rv->domain;
vmd = VM_DOMAIN(domain);
vm_reserv_lock(rv);
if (rv->object != object)
goto out;
m = &rv->pages[index];
pa = VM_PAGE_TO_PHYS(m);
if (pa < low || pa + size > high ||
!vm_addr_ok(pa, size, alignment, boundary))
goto out;
if (!bit_ntest(rv->popmap, index, index + npages - 1, 0))
goto out;
if (!vm_domain_allocate(vmd, req, npages))
goto out;
for (i = 0; i < npages; i++)
vm_reserv_populate(rv, index + i);
vm_reserv_unlock(rv);
return (m);
out:
vm_reserv_unlock(rv);
return (NULL);
}
first = pindex - VM_RESERV_INDEX(object, pindex);
allocpages = vm_reserv_num_alloc_pages(object, first,
VM_RESERV_INDEX(object, pindex) + npages, mpred, msucc);
if (allocpages < VM_LEVEL_0_NPAGES)
return (NULL);
m = NULL;
vmd = VM_DOMAIN(domain);
if (vm_domain_allocate(vmd, req, npages)) {
vm_domain_free_lock(vmd);
m = vm_phys_alloc_contig(domain, allocpages, low, high,
ulmax(alignment, VM_LEVEL_0_SIZE),
boundary > VM_LEVEL_0_SIZE ? boundary : 0);
vm_domain_free_unlock(vmd);
if (m == NULL) {
vm_domain_freecnt_inc(vmd, npages);
return (NULL);
}
} else
return (NULL);
KASSERT(vm_page_domain(m) == domain,
("vm_reserv_alloc_contig: Page domain does not match requested."));
m_ret = NULL;
index = VM_RESERV_INDEX(object, pindex);
do {
rv = vm_reserv_from_page(m);
KASSERT(rv->pages == m,
("vm_reserv_alloc_contig: reserv %p's pages is corrupted",
rv));
vm_reserv_lock(rv);
vm_reserv_insert(rv, object, first);
n = ulmin(VM_LEVEL_0_NPAGES - index, npages);
for (i = 0; i < n; i++)
vm_reserv_populate(rv, index + i);
npages -= n;
if (m_ret == NULL) {
m_ret = &rv->pages[index];
index = 0;
}
vm_reserv_unlock(rv);
m += VM_LEVEL_0_NPAGES;
first += VM_LEVEL_0_NPAGES;
allocpages -= VM_LEVEL_0_NPAGES;
} while (allocpages >= VM_LEVEL_0_NPAGES);
return (m_ret);
}
vm_page_t
vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
int req, struct pctrie_iter *pages)
{
struct vm_domain *vmd;
vm_page_t m, mpred, msucc;
vm_pindex_t first;
vm_reserv_t rv;
int index;
VM_OBJECT_ASSERT_WLOCKED(object);
if (pindex < VM_RESERV_INDEX(object, pindex) ||
pindex >= object->size)
return (NULL);
rv = vm_reserv_from_object(object, pindex, &mpred, &msucc, pages);
if (rv != NULL) {
KASSERT(object != kernel_object || rv->domain == domain,
("vm_reserv_alloc_page: domain mismatch"));
domain = rv->domain;
vmd = VM_DOMAIN(domain);
index = VM_RESERV_INDEX(object, pindex);
m = &rv->pages[index];
vm_reserv_lock(rv);
if (rv->object != object ||
bit_test(rv->popmap, index)) {
m = NULL;
goto out;
}
if (vm_domain_allocate(vmd, req, 1) == 0)
m = NULL;
else
vm_reserv_populate(rv, index);
out:
vm_reserv_unlock(rv);
return (m);
}
first = pindex - VM_RESERV_INDEX(object, pindex);
if (vm_reserv_num_alloc_pages(object, first, 1, mpred, msucc) <
VM_LEVEL_0_NPAGES)
return (NULL);
m = NULL;
vmd = VM_DOMAIN(domain);
if (vm_domain_allocate(vmd, req, 1)) {
vm_domain_free_lock(vmd);
m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT,
VM_LEVEL_0_ORDER);
vm_domain_free_unlock(vmd);
if (m == NULL) {
vm_domain_freecnt_inc(vmd, 1);
return (NULL);
}
} else
return (NULL);
rv = vm_reserv_from_page(m);
vm_reserv_lock(rv);
KASSERT(rv->pages == m,
("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv));
vm_reserv_insert(rv, object, first);
index = VM_RESERV_INDEX(object, pindex);
vm_reserv_populate(rv, index);
vm_reserv_unlock(rv);
return (&rv->pages[index]);
}
static void
vm_reserv_break(vm_reserv_t rv)
{
vm_page_t m;
int pos, pos0, pos1;
vm_reserv_assert_locked(rv);
CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
__FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
vm_reserv_remove(rv);
m = rv->pages;
#ifdef VM_SUBLEVEL_0_NPAGES
for (; m < rv->pages + VM_LEVEL_0_NPAGES; m += VM_SUBLEVEL_0_NPAGES)
#endif
m->psind = 0;
pos0 = bit_test(rv->popmap, 0) ? -1 : 0;
pos1 = -1 - pos0;
for (pos = 0; pos < VM_LEVEL_0_NPAGES; ) {
bit_ff_at(rv->popmap, pos + 1, VM_LEVEL_0_NPAGES,
pos1 < pos0, &pos);
if (pos == -1)
pos = VM_LEVEL_0_NPAGES;
if (pos0 < pos1) {
pos0 = pos;
continue;
}
pos1 = pos;
vm_domain_free_lock(VM_DOMAIN(rv->domain));
vm_phys_enqueue_contig(&rv->pages[pos0], VM_FREEPOOL_DEFAULT,
pos1 - pos0);
vm_domain_free_unlock(VM_DOMAIN(rv->domain));
}
bit_nclear(rv->popmap, 0, VM_LEVEL_0_NPAGES - 1);
rv->popcnt = 0;
counter_u64_add(vm_reserv_broken, 1);
}
void
vm_reserv_break_all(vm_object_t object)
{
vm_reserv_t rv;
while ((rv = LIST_FIRST(&object->rvq)) != NULL) {
vm_reserv_lock(rv);
if (rv->object != object) {
vm_reserv_unlock(rv);
continue;
}
vm_reserv_domain_lock(rv->domain);
if (rv->inpartpopq) {
TAILQ_REMOVE(&vm_rvd[rv->domain].partpop, rv, partpopq);
rv->inpartpopq = FALSE;
}
vm_reserv_domain_unlock(rv->domain);
vm_reserv_break(rv);
vm_reserv_unlock(rv);
}
}
boolean_t
vm_reserv_free_page(vm_page_t m)
{
vm_reserv_t rv;
boolean_t ret;
rv = vm_reserv_from_page(m);
if (rv->object == NULL)
return (FALSE);
vm_reserv_lock(rv);
if (rv->object != NULL) {
vm_reserv_depopulate(rv, m - rv->pages);
ret = TRUE;
} else
ret = FALSE;
vm_reserv_unlock(rv);
return (ret);
}
void
vm_reserv_init(void)
{
vm_paddr_t paddr;
struct vm_phys_seg *seg;
struct vm_reserv *rv;
struct vm_reserv_domain *rvd;
#ifdef VM_PHYSSEG_SPARSE
vm_pindex_t used;
#endif
int i, segind;
#ifdef VM_PHYSSEG_SPARSE
used = 0;
#endif
for (segind = 0; segind < vm_phys_nsegs; segind++) {
seg = &vm_phys_segs[segind];
#ifdef VM_PHYSSEG_SPARSE
seg->first_reserv = &vm_reserv_array[used];
used += howmany(seg->end, VM_LEVEL_0_SIZE) -
seg->start / VM_LEVEL_0_SIZE;
#else
seg->first_reserv =
&vm_reserv_array[seg->start >> VM_LEVEL_0_SHIFT];
#endif
paddr = roundup2(seg->start, VM_LEVEL_0_SIZE);
rv = seg->first_reserv + (paddr >> VM_LEVEL_0_SHIFT) -
(seg->start >> VM_LEVEL_0_SHIFT);
while (paddr + VM_LEVEL_0_SIZE > paddr && paddr +
VM_LEVEL_0_SIZE <= seg->end) {
rv->pages = PHYS_TO_VM_PAGE(paddr);
rv->domain = seg->domain;
mtx_init(&rv->lock, "vm reserv", NULL, MTX_DEF);
paddr += VM_LEVEL_0_SIZE;
rv++;
}
}
for (i = 0; i < MAXMEMDOM; i++) {
rvd = &vm_rvd[i];
mtx_init(&rvd->lock, "vm reserv domain", NULL, MTX_DEF);
TAILQ_INIT(&rvd->partpop);
mtx_init(&rvd->marker.lock, "vm reserv marker", NULL, MTX_DEF);
rvd->marker.popcnt = VM_LEVEL_0_NPAGES;
bit_nset(rvd->marker.popmap, 0, VM_LEVEL_0_NPAGES - 1);
}
for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++)
mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL,
MTX_DEF);
}
bool
vm_reserv_is_page_free(vm_page_t m)
{
vm_reserv_t rv;
rv = vm_reserv_from_page(m);
if (rv->object == NULL)
return (false);
return (!bit_test(rv->popmap, m - rv->pages));
}
bool
vm_reserv_is_populated(vm_page_t m, int npages)
{
vm_reserv_t rv;
int index;
KASSERT(npages <= VM_LEVEL_0_NPAGES,
("%s: npages %d exceeds VM_LEVEL_0_NPAGES", __func__, npages));
KASSERT(powerof2(npages),
("%s: npages %d is not a power of 2", __func__, npages));
rv = vm_reserv_from_page(m);
if (rv->object == NULL)
return (false);
index = rounddown2(m - rv->pages, npages);
return (bit_ntest(rv->popmap, index, index + npages - 1, 1));
}
int
vm_reserv_level(vm_page_t m)
{
vm_reserv_t rv;
rv = vm_reserv_from_page(m);
#ifdef VM_SUBLEVEL_0_NPAGES
return (rv->object != NULL ? 1 : -1);
#else
return (rv->object != NULL ? 0 : -1);
#endif
}
int
vm_reserv_level_iffullpop(vm_page_t m)
{
vm_reserv_t rv;
rv = vm_reserv_from_page(m);
if (rv->popcnt == VM_LEVEL_0_NPAGES) {
#ifdef VM_SUBLEVEL_0_NPAGES
return (1);
} else if (rv->pages != NULL &&
vm_reserv_is_sublevel_full(rv, m - rv->pages)) {
#endif
return (0);
}
return (-1);
}
static void
vm_reserv_dequeue(vm_reserv_t rv)
{
vm_reserv_domain_assert_locked(rv->domain);
vm_reserv_assert_locked(rv);
CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
__FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
KASSERT(rv->inpartpopq,
("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
TAILQ_REMOVE(&vm_rvd[rv->domain].partpop, rv, partpopq);
rv->inpartpopq = FALSE;
}
static void
vm_reserv_reclaim(vm_reserv_t rv)
{
vm_reserv_assert_locked(rv);
CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
__FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
if (rv->inpartpopq) {
vm_reserv_domain_lock(rv->domain);
vm_reserv_dequeue(rv);
vm_reserv_domain_unlock(rv->domain);
}
vm_reserv_break(rv);
counter_u64_add(vm_reserv_reclaimed, 1);
}
bool
vm_reserv_reclaim_inactive(int domain)
{
vm_reserv_t rv;
vm_reserv_domain_lock(domain);
TAILQ_FOREACH(rv, &vm_rvd[domain].partpop, partpopq) {
if (rv != &vm_rvd[domain].marker && vm_reserv_trylock(rv)) {
vm_reserv_dequeue(rv);
break;
}
}
vm_reserv_domain_unlock(domain);
if (rv != NULL) {
vm_reserv_reclaim(rv);
vm_reserv_unlock(rv);
return (true);
}
return (false);
}
static int
vm_reserv_find_contig(vm_reserv_t rv, int npages, int lo,
int hi, int ppn_align, int ppn_bound)
{
vm_reserv_assert_locked(rv);
KASSERT(npages <= VM_LEVEL_0_NPAGES - 1,
("%s: Too many pages", __func__));
KASSERT(ppn_bound <= VM_LEVEL_0_NPAGES,
("%s: Too big a boundary for reservation size", __func__));
KASSERT(npages <= ppn_bound,
("%s: Too many pages for given boundary", __func__));
KASSERT(ppn_align != 0 && powerof2(ppn_align),
("ppn_align is not a positive power of 2"));
KASSERT(ppn_bound != 0 && powerof2(ppn_bound),
("ppn_bound is not a positive power of 2"));
while (bit_ffc_area_at(rv->popmap, lo, hi, npages, &lo), lo != -1) {
if (lo < roundup2(lo, ppn_align)) {
lo = roundup2(lo, ppn_align);
} else if (roundup2(lo + 1, ppn_bound) >= lo + npages)
return (lo);
if (roundup2(lo + 1, ppn_bound) < lo + npages) {
lo = roundup2(lo + 1, ppn_bound);
}
}
return (-1);
}
vm_page_t
vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
{
struct vm_reserv_queue *queue;
vm_paddr_t pa, size;
vm_page_t m_ret;
vm_reserv_t marker, rv, rvn;
int hi, lo, posn, ppn_align, ppn_bound;
KASSERT(npages > 0, ("npages is 0"));
KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
if (npages > VM_LEVEL_0_NPAGES - 1)
return (NULL);
size = npages << PAGE_SHIFT;
if (!vm_addr_bound_ok(0, size, boundary))
return (NULL);
marker = &vm_rvd[domain].marker;
queue = &vm_rvd[domain].partpop;
ppn_align = (int)(ulmin(ulmax(PAGE_SIZE, alignment),
VM_LEVEL_0_SIZE) >> PAGE_SHIFT);
ppn_bound = boundary == 0 ? VM_LEVEL_0_NPAGES :
(int)(MIN(MAX(PAGE_SIZE, boundary),
VM_LEVEL_0_SIZE) >> PAGE_SHIFT);
vm_reserv_domain_scan_lock(domain);
vm_reserv_domain_lock(domain);
TAILQ_FOREACH_SAFE(rv, queue, partpopq, rvn) {
pa = VM_PAGE_TO_PHYS(&rv->pages[0]);
if (pa + VM_LEVEL_0_SIZE - size < low) {
continue;
}
if (pa + size > high) {
continue;
}
if (!vm_addr_align_ok(pa, alignment)) {
continue;
}
if (vm_reserv_trylock(rv) == 0) {
TAILQ_INSERT_AFTER(queue, rv, marker, partpopq);
vm_reserv_domain_unlock(domain);
vm_reserv_lock(rv);
if (TAILQ_PREV(marker, vm_reserv_queue, partpopq) !=
rv) {
vm_reserv_unlock(rv);
vm_reserv_domain_lock(domain);
rvn = TAILQ_NEXT(marker, partpopq);
TAILQ_REMOVE(queue, marker, partpopq);
continue;
}
vm_reserv_domain_lock(domain);
TAILQ_REMOVE(queue, marker, partpopq);
}
vm_reserv_domain_unlock(domain);
lo = (pa >= low) ? 0 :
(int)((low + PAGE_MASK - pa) >> PAGE_SHIFT);
hi = (pa + VM_LEVEL_0_SIZE <= high) ? VM_LEVEL_0_NPAGES :
(int)((high - pa) >> PAGE_SHIFT);
posn = vm_reserv_find_contig(rv, (int)npages, lo, hi,
ppn_align, ppn_bound);
if (posn >= 0) {
vm_reserv_domain_scan_unlock(domain);
rv->popcnt += npages;
bit_nset(rv->popmap, posn, posn + npages - 1);
vm_reserv_reclaim(rv);
vm_reserv_unlock(rv);
m_ret = &rv->pages[posn];
pa = VM_PAGE_TO_PHYS(m_ret);
KASSERT(vm_addr_ok(pa, size, alignment, boundary),
("%s: adjusted address not aligned/bounded to "
"%lx/%jx",
__func__, alignment, (uintmax_t)boundary));
return (m_ret);
}
vm_reserv_domain_lock(domain);
rvn = TAILQ_NEXT(rv, partpopq);
vm_reserv_unlock(rv);
}
vm_reserv_domain_unlock(domain);
vm_reserv_domain_scan_unlock(domain);
return (NULL);
}
void
vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object,
vm_pindex_t old_object_offset)
{
vm_reserv_t rv;
VM_OBJECT_ASSERT_WLOCKED(new_object);
rv = vm_reserv_from_page(m);
if (rv->object == old_object) {
vm_reserv_lock(rv);
CTR6(KTR_VM,
"%s: rv %p object %p new %p popcnt %d inpartpop %d",
__FUNCTION__, rv, rv->object, new_object, rv->popcnt,
rv->inpartpopq);
if (rv->object == old_object) {
vm_reserv_object_lock(old_object);
rv->object = NULL;
LIST_REMOVE(rv, objq);
vm_reserv_object_unlock(old_object);
vm_reserv_object_lock(new_object);
rv->object = new_object;
rv->pindex -= old_object_offset;
LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
vm_reserv_object_unlock(new_object);
}
vm_reserv_unlock(rv);
}
}
int
vm_reserv_size(int level)
{
switch (level) {
case 0:
#ifdef VM_SUBLEVEL_0_NPAGES
return (VM_SUBLEVEL_0_NPAGES * PAGE_SIZE);
case 1:
#endif
return (VM_LEVEL_0_SIZE);
case -1:
return (PAGE_SIZE);
default:
return (0);
}
}
vm_paddr_t
vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end)
{
vm_paddr_t new_end;
vm_pindex_t count;
size_t size;
int i;
count = 0;
for (i = 0; i < vm_phys_nsegs; i++) {
#ifdef VM_PHYSSEG_SPARSE
count += howmany(vm_phys_segs[i].end, VM_LEVEL_0_SIZE) -
vm_phys_segs[i].start / VM_LEVEL_0_SIZE;
#else
count = MAX(count,
howmany(vm_phys_segs[i].end, VM_LEVEL_0_SIZE));
#endif
}
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
#ifdef VM_PHYSSEG_SPARSE
count += howmany(phys_avail[i + 1], VM_LEVEL_0_SIZE) -
phys_avail[i] / VM_LEVEL_0_SIZE;
#else
count = MAX(count,
howmany(phys_avail[i + 1], VM_LEVEL_0_SIZE));
#endif
}
size = count * sizeof(struct vm_reserv);
new_end = end - round_page(size);
vm_reserv_array = (void *)(uintptr_t)pmap_map(vaddr, new_end, end,
VM_PROT_READ | VM_PROT_WRITE);
bzero(vm_reserv_array, size);
return (new_end);
}
vm_page_t
vm_reserv_to_superpage(vm_page_t m)
{
vm_reserv_t rv;
VM_OBJECT_ASSERT_LOCKED(m->object);
rv = vm_reserv_from_page(m);
if (rv->object == m->object) {
if (rv->popcnt == VM_LEVEL_0_NPAGES)
return (rv->pages);
#ifdef VM_SUBLEVEL_0_NPAGES
if (vm_reserv_is_sublevel_full(rv, m - rv->pages))
return (rv->pages + rounddown2(m - rv->pages,
VM_SUBLEVEL_0_NPAGES));
#endif
}
return (NULL);
}
#endif