/*1* Copyright (c) 2000, 2001, 2002, 2003, 2004, 2005, 2008, 20092* The President and Fellows of Harvard College.3*4* Redistribution and use in source and binary forms, with or without5* modification, are permitted provided that the following conditions6* are met:7* 1. Redistributions of source code must retain the above copyright8* notice, this list of conditions and the following disclaimer.9* 2. Redistributions in binary form must reproduce the above copyright10* notice, this list of conditions and the following disclaimer in the11* documentation and/or other materials provided with the distribution.12* 3. Neither the name of the University nor the names of its contributors13* may be used to endorse or promote products derived from this software14* without specific prior written permission.15*16* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND17* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE18* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE19* ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE20* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL21* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS22* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)23* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT24* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY25* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF26* SUCH DAMAGE.27*/2829#include <types.h>30#include <lib.h>31#include <spinlock.h>32#include <vm.h>3334/*35* Kernel malloc.36*/373839static40void41fill_deadbeef(void *vptr, size_t len)42{43uint32_t *ptr = vptr;44size_t i;4546for (i=0; i<len/sizeof(uint32_t); i++) {47ptr[i] = 0xdeadbeef;48}49}5051////////////////////////////////////////////////////////////52//53// Pool-based subpage allocator.54//55// It works like this:56//57// We allocate one page at a time and fill it with objects of size k,58// for various k. Each page has its own freelist, maintained by a59// linked list in the first word of each object. Each page also has a60// freecount, so we know when the page is completely free and can61// release it.62//63// No assumptions are made about the sizes k; they need not be64// powers of two. Note, however, that malloc must always return65// pointers aligned to the maximum alignment requirements of the66// platform; thus block sizes must at least be multiples of 4,67// preferably 8. They must also be at least sizeof(struct68// freelist). It is only worth defining an additional block size if69// more blocks would fit on a page than with the existing block70// sizes, and large numbers of items of the new size are allocated.71//72// The free counts and addresses of the pages are maintained in73// another list. Maintaining this table is a nuisance, because it74// cannot recursively use the subpage allocator. (We could probably75// make that work, but it would be painful.)76//7778#undef SLOW /* consistency checks */79#undef SLOWER /* lots of consistency checks */8081////////////////////////////////////////8283#if PAGE_SIZE == 40968485#define NSIZES 886static const size_t sizes[NSIZES] = { 16, 32, 64, 128, 256, 512, 1024, 2048 };8788#define SMALLEST_SUBPAGE_SIZE 1689#define LARGEST_SUBPAGE_SIZE 20489091#elif PAGE_SIZE == 819292#error "No support for 8k pages (yet?)"93#else94#error "Odd page size"95#endif9697////////////////////////////////////////9899struct freelist {100struct freelist *next;101};102103struct pageref {104struct pageref *next_samesize;105struct pageref *next_all;106vaddr_t pageaddr_and_blocktype;107uint16_t freelist_offset;108uint16_t nfree;109};110111#define INVALID_OFFSET (0xffff)112113#define PR_PAGEADDR(pr) ((pr)->pageaddr_and_blocktype & PAGE_FRAME)114#define PR_BLOCKTYPE(pr) ((pr)->pageaddr_and_blocktype & ~PAGE_FRAME)115#define MKPAB(pa, blk) (((pa)&PAGE_FRAME) | ((blk) & ~PAGE_FRAME))116117////////////////////////////////////////118119/*120* This is cheesy.121*122* The problem is not that it's wasteful - we can only allocate whole123* pages of pageref structures at a time anyway. The problem is that124* we really ought to be able to have more than one of these pages.125*126* However, for the time being, one page worth of pagerefs gives us127* 256 pagerefs; this lets us manage 256 * 4k = 1M of kernel heap.128* That would be twice as much memory as we get for *everything*.129* Thus, we will cheat and not allow any mechanism for having a second130* page of pageref structs.131*132* Then, since the size is fixed at one page, we'll simplify a bit133* further by allocating the page in the kernel BSS instead of calling134* alloc_kpages to get it.135*/136137#define NPAGEREFS (PAGE_SIZE / sizeof(struct pageref))138static struct pageref pagerefs[NPAGEREFS];139140#define INUSE_WORDS (NPAGEREFS/32)141static uint32_t pagerefs_inuse[INUSE_WORDS];142143static144struct pageref *145allocpageref(void)146{147unsigned i,j;148uint32_t k;149150for (i=0; i<INUSE_WORDS; i++) {151if (pagerefs_inuse[i]==0xffffffff) {152/* full */153continue;154}155for (k=1,j=0; k!=0; k<<=1,j++) {156if ((pagerefs_inuse[i] & k)==0) {157pagerefs_inuse[i] |= k;158return &pagerefs[i*32 + j];159}160}161KASSERT(0);162}163164/* ran out */165return NULL;166}167168static169void170freepageref(struct pageref *p)171{172size_t i, j;173uint32_t k;174175j = p-pagerefs;176KASSERT(j < NPAGEREFS); /* note: j is unsigned, don't test < 0 */177i = j/32;178k = ((uint32_t)1) << (j%32);179KASSERT((pagerefs_inuse[i] & k) != 0);180pagerefs_inuse[i] &= ~k;181}182183////////////////////////////////////////184185static struct pageref *sizebases[NSIZES];186static struct pageref *allbase;187188////////////////////////////////////////189190/*191* Use one spinlock for the whole thing. Making parts of the kmalloc192* logic per-cpu is worthwhile for scalability; however, for the time193* being at least we won't, because it adds a lot of complexity and in194* OS/161 performance and scalability aren't super-critical.195*/196197static struct spinlock kmalloc_spinlock = SPINLOCK_INITIALIZER;198199////////////////////////////////////////200201/* SLOWER implies SLOW */202#ifdef SLOWER203#ifndef SLOW204#define SLOW205#endif206#endif207208#ifdef SLOW209static210void211checksubpage(struct pageref *pr)212{213vaddr_t prpage, fla;214struct freelist *fl;215int blktype;216int nfree=0;217218KASSERT(spinlock_do_i_hold(&kmalloc_spinlock));219220if (pr->freelist_offset == INVALID_OFFSET) {221KASSERT(pr->nfree==0);222return;223}224225prpage = PR_PAGEADDR(pr);226blktype = PR_BLOCKTYPE(pr);227228KASSERT(pr->freelist_offset < PAGE_SIZE);229KASSERT(pr->freelist_offset % sizes[blktype] == 0);230231fla = prpage + pr->freelist_offset;232fl = (struct freelist *)fla;233234for (; fl != NULL; fl = fl->next) {235fla = (vaddr_t)fl;236KASSERT(fla >= prpage && fla < prpage + PAGE_SIZE);237KASSERT((fla-prpage) % sizes[blktype] == 0);238KASSERT(fla >= MIPS_KSEG0);239KASSERT(fla < MIPS_KSEG1);240nfree++;241}242KASSERT(nfree==pr->nfree);243}244#else245#define checksubpage(pr) ((void)(pr))246#endif247248#ifdef SLOWER249static250void251checksubpages(void)252{253struct pageref *pr;254int i;255unsigned sc=0, ac=0;256257KASSERT(spinlock_do_i_hold(&kmalloc_spinlock));258259for (i=0; i<NSIZES; i++) {260for (pr = sizebases[i]; pr != NULL; pr = pr->next_samesize) {261checksubpage(pr);262KASSERT(sc < NPAGEREFS);263sc++;264}265}266267for (pr = allbase; pr != NULL; pr = pr->next_all) {268checksubpage(pr);269KASSERT(ac < NPAGEREFS);270ac++;271}272273KASSERT(sc==ac);274}275#else276#define checksubpages()277#endif278279////////////////////////////////////////280281static282void283dumpsubpage(struct pageref *pr)284{285vaddr_t prpage, fla;286struct freelist *fl;287int blktype;288unsigned i, n, index;289uint32_t freemap[PAGE_SIZE / (SMALLEST_SUBPAGE_SIZE*32)];290291checksubpage(pr);292KASSERT(spinlock_do_i_hold(&kmalloc_spinlock));293294/* clear freemap[] */295for (i=0; i<sizeof(freemap)/sizeof(freemap[0]); i++) {296freemap[i] = 0;297}298299prpage = PR_PAGEADDR(pr);300blktype = PR_BLOCKTYPE(pr);301302/* compute how many bits we need in freemap and assert we fit */303n = PAGE_SIZE / sizes[blktype];304KASSERT(n <= 32*sizeof(freemap)/sizeof(freemap[0]));305306if (pr->freelist_offset != INVALID_OFFSET) {307fla = prpage + pr->freelist_offset;308fl = (struct freelist *)fla;309310for (; fl != NULL; fl = fl->next) {311fla = (vaddr_t)fl;312index = (fla-prpage) / sizes[blktype];313KASSERT(index<n);314freemap[index/32] |= (1<<(index%32));315}316}317318kprintf("at 0x%08lx: size %-4lu %u/%u free\n",319(unsigned long)prpage, (unsigned long) sizes[blktype],320(unsigned) pr->nfree, n);321kprintf(" ");322for (i=0; i<n; i++) {323int val = (freemap[i/32] & (1<<(i%32)))!=0;324kprintf("%c", val ? '.' : '*');325if (i%64==63 && i<n-1) {326kprintf("\n ");327}328}329kprintf("\n");330}331332void333kheap_printstats(void)334{335struct pageref *pr;336337/* print the whole thing with interrupts off */338spinlock_acquire(&kmalloc_spinlock);339340kprintf("Subpage allocator status:\n");341342for (pr = allbase; pr != NULL; pr = pr->next_all) {343dumpsubpage(pr);344}345346spinlock_release(&kmalloc_spinlock);347}348349////////////////////////////////////////350351static352void353remove_lists(struct pageref *pr, int blktype)354{355struct pageref **guy;356357KASSERT(blktype>=0 && blktype<NSIZES);358359for (guy = &sizebases[blktype]; *guy; guy = &(*guy)->next_samesize) {360checksubpage(*guy);361if (*guy == pr) {362*guy = pr->next_samesize;363break;364}365}366367for (guy = &allbase; *guy; guy = &(*guy)->next_all) {368checksubpage(*guy);369if (*guy == pr) {370*guy = pr->next_all;371break;372}373}374}375376static377inline378int blocktype(size_t sz)379{380unsigned i;381for (i=0; i<NSIZES; i++) {382if (sz <= sizes[i]) {383return i;384}385}386387panic("Subpage allocator cannot handle allocation of size %lu\n",388(unsigned long)sz);389390// keep compiler happy391return 0;392}393394static395void *396subpage_kmalloc(size_t sz)397{398unsigned blktype; // index into sizes[] that we're using399struct pageref *pr; // pageref for page we're allocating from400vaddr_t prpage; // PR_PAGEADDR(pr)401vaddr_t fla; // free list entry address402struct freelist *volatile fl; // free list entry403void *retptr; // our result404405volatile int i;406407408blktype = blocktype(sz);409sz = sizes[blktype];410411spinlock_acquire(&kmalloc_spinlock);412413checksubpages();414415for (pr = sizebases[blktype]; pr != NULL; pr = pr->next_samesize) {416417/* check for corruption */418KASSERT(PR_BLOCKTYPE(pr) == blktype);419checksubpage(pr);420421if (pr->nfree > 0) {422423doalloc: /* comes here after getting a whole fresh page */424425KASSERT(pr->freelist_offset < PAGE_SIZE);426prpage = PR_PAGEADDR(pr);427fla = prpage + pr->freelist_offset;428fl = (struct freelist *)fla;429430retptr = fl;431fl = fl->next;432pr->nfree--;433434if (fl != NULL) {435KASSERT(pr->nfree > 0);436fla = (vaddr_t)fl;437KASSERT(fla - prpage < PAGE_SIZE);438pr->freelist_offset = fla - prpage;439}440else {441KASSERT(pr->nfree == 0);442pr->freelist_offset = INVALID_OFFSET;443}444445checksubpages();446447spinlock_release(&kmalloc_spinlock);448return retptr;449}450}451452/*453* No page of the right size available.454* Make a new one.455*456* We release the spinlock while calling alloc_kpages. This457* avoids deadlock if alloc_kpages needs to come back here.458* Note that this means things can change behind our back...459*/460461spinlock_release(&kmalloc_spinlock);462prpage = alloc_kpages(1);463if (prpage==0) {464/* Out of memory. */465kprintf("kmalloc: Subpage allocator couldn't get a page\n");466return NULL;467}468spinlock_acquire(&kmalloc_spinlock);469470pr = allocpageref();471if (pr==NULL) {472/* Couldn't allocate accounting space for the new page. */473spinlock_release(&kmalloc_spinlock);474free_kpages(prpage);475kprintf("kmalloc: Subpage allocator couldn't get pageref\n");476return NULL;477}478479pr->pageaddr_and_blocktype = MKPAB(prpage, blktype);480pr->nfree = PAGE_SIZE / sizes[blktype];481482/*483* Note: fl is volatile because the MIPS toolchain we were484* using in spring 2001 attempted to optimize this loop and485* blew it. Making fl volatile inhibits the optimization.486*/487488fla = prpage;489fl = (struct freelist *)fla;490fl->next = NULL;491for (i=1; i<pr->nfree; i++) {492fl = (struct freelist *)(fla + i*sizes[blktype]);493fl->next = (struct freelist *)(fla + (i-1)*sizes[blktype]);494KASSERT(fl != fl->next);495}496fla = (vaddr_t) fl;497pr->freelist_offset = fla - prpage;498KASSERT(pr->freelist_offset == (pr->nfree-1)*sizes[blktype]);499500pr->next_samesize = sizebases[blktype];501sizebases[blktype] = pr;502503pr->next_all = allbase;504allbase = pr;505506/* This is kind of cheesy, but avoids duplicating the alloc code. */507goto doalloc;508}509510static511int512subpage_kfree(void *ptr)513{514int blktype; // index into sizes[] that we're using515vaddr_t ptraddr; // same as ptr516struct pageref *pr; // pageref for page we're freeing in517vaddr_t prpage; // PR_PAGEADDR(pr)518vaddr_t fla; // free list entry address519struct freelist *fl; // free list entry520vaddr_t offset; // offset into page521522ptraddr = (vaddr_t)ptr;523524spinlock_acquire(&kmalloc_spinlock);525526checksubpages();527528for (pr = allbase; pr; pr = pr->next_all) {529prpage = PR_PAGEADDR(pr);530blktype = PR_BLOCKTYPE(pr);531532/* check for corruption */533KASSERT(blktype>=0 && blktype<NSIZES);534checksubpage(pr);535536if (ptraddr >= prpage && ptraddr < prpage + PAGE_SIZE) {537break;538}539}540541if (pr==NULL) {542/* Not on any of our pages - not a subpage allocation */543spinlock_release(&kmalloc_spinlock);544return -1;545}546547offset = ptraddr - prpage;548549/* Check for proper positioning and alignment */550if (offset >= PAGE_SIZE || offset % sizes[blktype] != 0) {551panic("kfree: subpage free of invalid addr %p\n", ptr);552}553554/*555* Clear the block to 0xdeadbeef to make it easier to detect556* uses of dangling pointers.557*/558fill_deadbeef(ptr, sizes[blktype]);559560/*561* We probably ought to check for free twice by seeing if the block562* is already on the free list. But that's expensive, so we don't.563*/564565fla = prpage + offset;566fl = (struct freelist *)fla;567if (pr->freelist_offset == INVALID_OFFSET) {568fl->next = NULL;569} else {570fl->next = (struct freelist *)(prpage + pr->freelist_offset);571}572pr->freelist_offset = offset;573pr->nfree++;574575KASSERT(pr->nfree <= PAGE_SIZE / sizes[blktype]);576if (pr->nfree == PAGE_SIZE / sizes[blktype]) {577/* Whole page is free. */578remove_lists(pr, blktype);579freepageref(pr);580/* Call free_kpages without kmalloc_spinlock. */581spinlock_release(&kmalloc_spinlock);582free_kpages(prpage);583}584else {585spinlock_release(&kmalloc_spinlock);586}587588#ifdef SLOWER /* Don't get the lock unless checksubpages does something. */589spinlock_acquire(&kmalloc_spinlock);590checksubpages();591spinlock_release(&kmalloc_spinlock);592#endif593594return 0;595}596597//598////////////////////////////////////////////////////////////599600void *601kmalloc(size_t sz)602{603if (sz>=LARGEST_SUBPAGE_SIZE) {604unsigned long npages;605vaddr_t address;606607/* Round up to a whole number of pages. */608npages = (sz + PAGE_SIZE - 1)/PAGE_SIZE;609address = alloc_kpages(npages);610if (address==0) {611return NULL;612}613614return (void *)address;615}616617return subpage_kmalloc(sz);618}619620void621kfree(void *ptr)622{623/*624* Try subpage first; if that fails, assume it's a big allocation.625*/626if (ptr == NULL) {627return;628} else if (subpage_kfree(ptr)) {629KASSERT((vaddr_t)ptr%PAGE_SIZE==0);630free_kpages((vaddr_t)ptr);631}632}633634635636