Path: blob/master/drivers/infiniband/hw/mthca/mthca_memfree.c
15112 views
/*1* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.2* Copyright (c) 2005 Cisco Systems. All rights reserved.3* Copyright (c) 2005 Mellanox Technologies. All rights reserved.4*5* This software is available to you under a choice of one of two6* licenses. You may choose to be licensed under the terms of the GNU7* General Public License (GPL) Version 2, available from the file8* COPYING in the main directory of this source tree, or the9* OpenIB.org BSD license below:10*11* Redistribution and use in source and binary forms, with or12* without modification, are permitted provided that the following13* conditions are met:14*15* - Redistributions of source code must retain the above16* copyright notice, this list of conditions and the following17* disclaimer.18*19* - Redistributions in binary form must reproduce the above20* copyright notice, this list of conditions and the following21* disclaimer in the documentation and/or other materials22* provided with the distribution.23*24* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,25* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF26* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND27* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS28* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN29* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN30* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE31* SOFTWARE.32*/3334#include <linux/mm.h>35#include <linux/scatterlist.h>36#include <linux/sched.h>37#include <linux/slab.h>3839#include <asm/page.h>4041#include "mthca_memfree.h"42#include "mthca_dev.h"43#include "mthca_cmd.h"4445/*46* We allocate in as big chunks as we can, up to a maximum of 256 KB47* per chunk.48*/49enum {50MTHCA_ICM_ALLOC_SIZE = 1 << 18,51MTHCA_TABLE_CHUNK_SIZE = 1 << 1852};5354struct mthca_user_db_table {55struct mutex mutex;56struct {57u64 uvirt;58struct scatterlist mem;59int refcount;60} page[0];61};6263static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)64{65int i;6667if (chunk->nsg > 0)68pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,69PCI_DMA_BIDIRECTIONAL);7071for (i = 0; i < chunk->npages; ++i)72__free_pages(sg_page(&chunk->mem[i]),73get_order(chunk->mem[i].length));74}7576static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)77{78int i;7980for (i = 0; i < chunk->npages; ++i) {81dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,82lowmem_page_address(sg_page(&chunk->mem[i])),83sg_dma_address(&chunk->mem[i]));84}85}8687void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent)88{89struct mthca_icm_chunk *chunk, *tmp;9091if (!icm)92return;9394list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {95if (coherent)96mthca_free_icm_coherent(dev, chunk);97else98mthca_free_icm_pages(dev, chunk);99100kfree(chunk);101}102103kfree(icm);104}105106static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)107{108struct page *page;109110/*111* Use __GFP_ZERO because buggy firmware assumes ICM pages are112* cleared, and subtle failures are seen if they aren't.113*/114page = alloc_pages(gfp_mask | __GFP_ZERO, order);115if (!page)116return -ENOMEM;117118sg_set_page(mem, page, PAGE_SIZE << order, 0);119return 0;120}121122static int mthca_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,123int order, gfp_t gfp_mask)124{125void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, &sg_dma_address(mem),126gfp_mask);127if (!buf)128return -ENOMEM;129130sg_set_buf(mem, buf, PAGE_SIZE << order);131BUG_ON(mem->offset);132sg_dma_len(mem) = PAGE_SIZE << order;133return 0;134}135136struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,137gfp_t gfp_mask, int coherent)138{139struct mthca_icm *icm;140struct mthca_icm_chunk *chunk = NULL;141int cur_order;142int ret;143144/* We use sg_set_buf for coherent allocs, which assumes low memory */145BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));146147icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));148if (!icm)149return icm;150151icm->refcount = 0;152INIT_LIST_HEAD(&icm->chunk_list);153154cur_order = get_order(MTHCA_ICM_ALLOC_SIZE);155156while (npages > 0) {157if (!chunk) {158chunk = kmalloc(sizeof *chunk,159gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));160if (!chunk)161goto fail;162163sg_init_table(chunk->mem, MTHCA_ICM_CHUNK_LEN);164chunk->npages = 0;165chunk->nsg = 0;166list_add_tail(&chunk->list, &icm->chunk_list);167}168169while (1 << cur_order > npages)170--cur_order;171172if (coherent)173ret = mthca_alloc_icm_coherent(&dev->pdev->dev,174&chunk->mem[chunk->npages],175cur_order, gfp_mask);176else177ret = mthca_alloc_icm_pages(&chunk->mem[chunk->npages],178cur_order, gfp_mask);179180if (!ret) {181++chunk->npages;182183if (coherent)184++chunk->nsg;185else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {186chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,187chunk->npages,188PCI_DMA_BIDIRECTIONAL);189190if (chunk->nsg <= 0)191goto fail;192}193194if (chunk->npages == MTHCA_ICM_CHUNK_LEN)195chunk = NULL;196197npages -= 1 << cur_order;198} else {199--cur_order;200if (cur_order < 0)201goto fail;202}203}204205if (!coherent && chunk) {206chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,207chunk->npages,208PCI_DMA_BIDIRECTIONAL);209210if (chunk->nsg <= 0)211goto fail;212}213214return icm;215216fail:217mthca_free_icm(dev, icm, coherent);218return NULL;219}220221int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)222{223int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;224int ret = 0;225u8 status;226227mutex_lock(&table->mutex);228229if (table->icm[i]) {230++table->icm[i]->refcount;231goto out;232}233234table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,235(table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |236__GFP_NOWARN, table->coherent);237if (!table->icm[i]) {238ret = -ENOMEM;239goto out;240}241242if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,243&status) || status) {244mthca_free_icm(dev, table->icm[i], table->coherent);245table->icm[i] = NULL;246ret = -ENOMEM;247goto out;248}249250++table->icm[i]->refcount;251252out:253mutex_unlock(&table->mutex);254return ret;255}256257void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)258{259int i;260u8 status;261262if (!mthca_is_memfree(dev))263return;264265i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;266267mutex_lock(&table->mutex);268269if (--table->icm[i]->refcount == 0) {270mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,271MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,272&status);273mthca_free_icm(dev, table->icm[i], table->coherent);274table->icm[i] = NULL;275}276277mutex_unlock(&table->mutex);278}279280void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle)281{282int idx, offset, dma_offset, i;283struct mthca_icm_chunk *chunk;284struct mthca_icm *icm;285struct page *page = NULL;286287if (!table->lowmem)288return NULL;289290mutex_lock(&table->mutex);291292idx = (obj & (table->num_obj - 1)) * table->obj_size;293icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];294dma_offset = offset = idx % MTHCA_TABLE_CHUNK_SIZE;295296if (!icm)297goto out;298299list_for_each_entry(chunk, &icm->chunk_list, list) {300for (i = 0; i < chunk->npages; ++i) {301if (dma_handle && dma_offset >= 0) {302if (sg_dma_len(&chunk->mem[i]) > dma_offset)303*dma_handle = sg_dma_address(&chunk->mem[i]) +304dma_offset;305dma_offset -= sg_dma_len(&chunk->mem[i]);306}307/* DMA mapping can merge pages but not split them,308* so if we found the page, dma_handle has already309* been assigned to. */310if (chunk->mem[i].length > offset) {311page = sg_page(&chunk->mem[i]);312goto out;313}314offset -= chunk->mem[i].length;315}316}317318out:319mutex_unlock(&table->mutex);320return page ? lowmem_page_address(page) + offset : NULL;321}322323int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,324int start, int end)325{326int inc = MTHCA_TABLE_CHUNK_SIZE / table->obj_size;327int i, err;328329for (i = start; i <= end; i += inc) {330err = mthca_table_get(dev, table, i);331if (err)332goto fail;333}334335return 0;336337fail:338while (i > start) {339i -= inc;340mthca_table_put(dev, table, i);341}342343return err;344}345346void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,347int start, int end)348{349int i;350351if (!mthca_is_memfree(dev))352return;353354for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)355mthca_table_put(dev, table, i);356}357358struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,359u64 virt, int obj_size,360int nobj, int reserved,361int use_lowmem, int use_coherent)362{363struct mthca_icm_table *table;364int obj_per_chunk;365int num_icm;366unsigned chunk_size;367int i;368u8 status;369370obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size;371num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);372373table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);374if (!table)375return NULL;376377table->virt = virt;378table->num_icm = num_icm;379table->num_obj = nobj;380table->obj_size = obj_size;381table->lowmem = use_lowmem;382table->coherent = use_coherent;383mutex_init(&table->mutex);384385for (i = 0; i < num_icm; ++i)386table->icm[i] = NULL;387388for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {389chunk_size = MTHCA_TABLE_CHUNK_SIZE;390if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size)391chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE;392393table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT,394(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |395__GFP_NOWARN, use_coherent);396if (!table->icm[i])397goto err;398if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,399&status) || status) {400mthca_free_icm(dev, table->icm[i], table->coherent);401table->icm[i] = NULL;402goto err;403}404405/*406* Add a reference to this ICM chunk so that it never407* gets freed (since it contains reserved firmware objects).408*/409++table->icm[i]->refcount;410}411412return table;413414err:415for (i = 0; i < num_icm; ++i)416if (table->icm[i]) {417mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,418MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,419&status);420mthca_free_icm(dev, table->icm[i], table->coherent);421}422423kfree(table);424425return NULL;426}427428void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)429{430int i;431u8 status;432433for (i = 0; i < table->num_icm; ++i)434if (table->icm[i]) {435mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,436MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,437&status);438mthca_free_icm(dev, table->icm[i], table->coherent);439}440441kfree(table);442}443444static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page)445{446return dev->uar_table.uarc_base +447uar->index * dev->uar_table.uarc_size +448page * MTHCA_ICM_PAGE_SIZE;449}450451int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,452struct mthca_user_db_table *db_tab, int index, u64 uaddr)453{454struct page *pages[1];455int ret = 0;456u8 status;457int i;458459if (!mthca_is_memfree(dev))460return 0;461462if (index < 0 || index > dev->uar_table.uarc_size / 8)463return -EINVAL;464465mutex_lock(&db_tab->mutex);466467i = index / MTHCA_DB_REC_PER_PAGE;468469if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) ||470(db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||471(uaddr & 4095)) {472ret = -EINVAL;473goto out;474}475476if (db_tab->page[i].refcount) {477++db_tab->page[i].refcount;478goto out;479}480481ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,482pages, NULL);483if (ret < 0)484goto out;485486sg_set_page(&db_tab->page[i].mem, pages[0], MTHCA_ICM_PAGE_SIZE,487uaddr & ~PAGE_MASK);488489ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);490if (ret < 0) {491put_page(pages[0]);492goto out;493}494495ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),496mthca_uarc_virt(dev, uar, i), &status);497if (!ret && status)498ret = -EINVAL;499if (ret) {500pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);501put_page(sg_page(&db_tab->page[i].mem));502goto out;503}504505db_tab->page[i].uvirt = uaddr;506db_tab->page[i].refcount = 1;507508out:509mutex_unlock(&db_tab->mutex);510return ret;511}512513void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,514struct mthca_user_db_table *db_tab, int index)515{516if (!mthca_is_memfree(dev))517return;518519/*520* To make our bookkeeping simpler, we don't unmap DB521* pages until we clean up the whole db table.522*/523524mutex_lock(&db_tab->mutex);525526--db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;527528mutex_unlock(&db_tab->mutex);529}530531struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)532{533struct mthca_user_db_table *db_tab;534int npages;535int i;536537if (!mthca_is_memfree(dev))538return NULL;539540npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;541db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);542if (!db_tab)543return ERR_PTR(-ENOMEM);544545mutex_init(&db_tab->mutex);546for (i = 0; i < npages; ++i) {547db_tab->page[i].refcount = 0;548db_tab->page[i].uvirt = 0;549sg_init_table(&db_tab->page[i].mem, 1);550}551552return db_tab;553}554555void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,556struct mthca_user_db_table *db_tab)557{558int i;559u8 status;560561if (!mthca_is_memfree(dev))562return;563564for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {565if (db_tab->page[i].uvirt) {566mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);567pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);568put_page(sg_page(&db_tab->page[i].mem));569}570}571572kfree(db_tab);573}574575int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,576u32 qn, __be32 **db)577{578int group;579int start, end, dir;580int i, j;581struct mthca_db_page *page;582int ret = 0;583u8 status;584585mutex_lock(&dev->db_tab->mutex);586587switch (type) {588case MTHCA_DB_TYPE_CQ_ARM:589case MTHCA_DB_TYPE_SQ:590group = 0;591start = 0;592end = dev->db_tab->max_group1;593dir = 1;594break;595596case MTHCA_DB_TYPE_CQ_SET_CI:597case MTHCA_DB_TYPE_RQ:598case MTHCA_DB_TYPE_SRQ:599group = 1;600start = dev->db_tab->npages - 1;601end = dev->db_tab->min_group2;602dir = -1;603break;604605default:606ret = -EINVAL;607goto out;608}609610for (i = start; i != end; i += dir)611if (dev->db_tab->page[i].db_rec &&612!bitmap_full(dev->db_tab->page[i].used,613MTHCA_DB_REC_PER_PAGE)) {614page = dev->db_tab->page + i;615goto found;616}617618for (i = start; i != end; i += dir)619if (!dev->db_tab->page[i].db_rec) {620page = dev->db_tab->page + i;621goto alloc;622}623624if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) {625ret = -ENOMEM;626goto out;627}628629if (group == 0)630++dev->db_tab->max_group1;631else632--dev->db_tab->min_group2;633634page = dev->db_tab->page + end;635636alloc:637page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,638&page->mapping, GFP_KERNEL);639if (!page->db_rec) {640ret = -ENOMEM;641goto out;642}643memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);644645ret = mthca_MAP_ICM_page(dev, page->mapping,646mthca_uarc_virt(dev, &dev->driver_uar, i), &status);647if (!ret && status)648ret = -EINVAL;649if (ret) {650dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,651page->db_rec, page->mapping);652goto out;653}654655bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE);656657found:658j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE);659set_bit(j, page->used);660661if (group == 1)662j = MTHCA_DB_REC_PER_PAGE - 1 - j;663664ret = i * MTHCA_DB_REC_PER_PAGE + j;665666page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5));667668*db = (__be32 *) &page->db_rec[j];669670out:671mutex_unlock(&dev->db_tab->mutex);672673return ret;674}675676void mthca_free_db(struct mthca_dev *dev, int type, int db_index)677{678int i, j;679struct mthca_db_page *page;680u8 status;681682i = db_index / MTHCA_DB_REC_PER_PAGE;683j = db_index % MTHCA_DB_REC_PER_PAGE;684685page = dev->db_tab->page + i;686687mutex_lock(&dev->db_tab->mutex);688689page->db_rec[j] = 0;690if (i >= dev->db_tab->min_group2)691j = MTHCA_DB_REC_PER_PAGE - 1 - j;692clear_bit(j, page->used);693694if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&695i >= dev->db_tab->max_group1 - 1) {696mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);697698dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,699page->db_rec, page->mapping);700page->db_rec = NULL;701702if (i == dev->db_tab->max_group1) {703--dev->db_tab->max_group1;704/* XXX may be able to unmap more pages now */705}706if (i == dev->db_tab->min_group2)707++dev->db_tab->min_group2;708}709710mutex_unlock(&dev->db_tab->mutex);711}712713int mthca_init_db_tab(struct mthca_dev *dev)714{715int i;716717if (!mthca_is_memfree(dev))718return 0;719720dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL);721if (!dev->db_tab)722return -ENOMEM;723724mutex_init(&dev->db_tab->mutex);725726dev->db_tab->npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;727dev->db_tab->max_group1 = 0;728dev->db_tab->min_group2 = dev->db_tab->npages - 1;729730dev->db_tab->page = kmalloc(dev->db_tab->npages *731sizeof *dev->db_tab->page,732GFP_KERNEL);733if (!dev->db_tab->page) {734kfree(dev->db_tab);735return -ENOMEM;736}737738for (i = 0; i < dev->db_tab->npages; ++i)739dev->db_tab->page[i].db_rec = NULL;740741return 0;742}743744void mthca_cleanup_db_tab(struct mthca_dev *dev)745{746int i;747u8 status;748749if (!mthca_is_memfree(dev))750return;751752/*753* Because we don't always free our UARC pages when they754* become empty to make mthca_free_db() simpler we need to755* make a sweep through the doorbell pages and free any756* leftover pages now.757*/758for (i = 0; i < dev->db_tab->npages; ++i) {759if (!dev->db_tab->page[i].db_rec)760continue;761762if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))763mthca_warn(dev, "Kernel UARC page %d not empty\n", i);764765mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);766767dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,768dev->db_tab->page[i].db_rec,769dev->db_tab->page[i].mapping);770}771772kfree(dev->db_tab->page);773kfree(dev->db_tab);774}775776777