Path: blob/master/drivers/infiniband/core/fmr_pool.c
37212 views
/*1* Copyright (c) 2004 Topspin Communications. All rights reserved.2* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.3*4* This software is available to you under a choice of one of two5* licenses. You may choose to be licensed under the terms of the GNU6* General Public License (GPL) Version 2, available from the file7* COPYING in the main directory of this source tree, or the8* OpenIB.org BSD license below:9*10* Redistribution and use in source and binary forms, with or11* without modification, are permitted provided that the following12* conditions are met:13*14* - Redistributions of source code must retain the above15* copyright notice, this list of conditions and the following16* disclaimer.17*18* - Redistributions in binary form must reproduce the above19* copyright notice, this list of conditions and the following20* disclaimer in the documentation and/or other materials21* provided with the distribution.22*23* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,24* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF25* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND26* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS27* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN28* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN29* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE30* SOFTWARE.31*/3233#include <linux/errno.h>34#include <linux/spinlock.h>35#include <linux/slab.h>36#include <linux/jhash.h>37#include <linux/kthread.h>3839#include <rdma/ib_fmr_pool.h>4041#include "core_priv.h"4243#define PFX "fmr_pool: "4445enum {46IB_FMR_MAX_REMAPS = 32,4748IB_FMR_HASH_BITS = 8,49IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS,50IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 151};5253/*54* If an FMR is not in use, then the list member will point to either55* its pool's free_list (if the FMR can be mapped again; that is,56* remap_count < pool->max_remaps) or its pool's dirty_list (if the57* FMR needs to be unmapped before being remapped). In either of58* these cases it is a bug if the ref_count is not 0. In other words,59* if ref_count is > 0, then the list member must not be linked into60* either free_list or dirty_list.61*62* The cache_node member is used to link the FMR into a cache bucket63* (if caching is enabled). This is independent of the reference64* count of the FMR. When a valid FMR is released, its ref_count is65* decremented, and if ref_count reaches 0, the FMR is placed in66* either free_list or dirty_list as appropriate. However, it is not67* removed from the cache and may be "revived" if a call to68* ib_fmr_register_physical() occurs before the FMR is remapped. In69* this case we just increment the ref_count and remove the FMR from70* free_list/dirty_list.71*72* Before we remap an FMR from free_list, we remove it from the cache73* (to prevent another user from obtaining a stale FMR). When an FMR74* is released, we add it to the tail of the free list, so that our75* cache eviction policy is "least recently used."76*77* All manipulation of ref_count, list and cache_node is protected by78* pool_lock to maintain consistency.79*/8081struct ib_fmr_pool {82spinlock_t pool_lock;8384int pool_size;85int max_pages;86int max_remaps;87int dirty_watermark;88int dirty_len;89struct list_head free_list;90struct list_head dirty_list;91struct hlist_head *cache_bucket;9293void (*flush_function)(struct ib_fmr_pool *pool,94void * arg);95void *flush_arg;9697struct task_struct *thread;9899atomic_t req_ser;100atomic_t flush_ser;101102wait_queue_head_t force_wait;103};104105static inline u32 ib_fmr_hash(u64 first_page)106{107return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) &108(IB_FMR_HASH_SIZE - 1);109}110111/* Caller must hold pool_lock */112static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,113u64 *page_list,114int page_list_len,115u64 io_virtual_address)116{117struct hlist_head *bucket;118struct ib_pool_fmr *fmr;119struct hlist_node *pos;120121if (!pool->cache_bucket)122return NULL;123124bucket = pool->cache_bucket + ib_fmr_hash(*page_list);125126hlist_for_each_entry(fmr, pos, bucket, cache_node)127if (io_virtual_address == fmr->io_virtual_address &&128page_list_len == fmr->page_list_len &&129!memcmp(page_list, fmr->page_list,130page_list_len * sizeof *page_list))131return fmr;132133return NULL;134}135136static void ib_fmr_batch_release(struct ib_fmr_pool *pool)137{138int ret;139struct ib_pool_fmr *fmr;140LIST_HEAD(unmap_list);141LIST_HEAD(fmr_list);142143spin_lock_irq(&pool->pool_lock);144145list_for_each_entry(fmr, &pool->dirty_list, list) {146hlist_del_init(&fmr->cache_node);147fmr->remap_count = 0;148list_add_tail(&fmr->fmr->list, &fmr_list);149150#ifdef DEBUG151if (fmr->ref_count !=0) {152printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n",153fmr, fmr->ref_count);154}155#endif156}157158list_splice_init(&pool->dirty_list, &unmap_list);159pool->dirty_len = 0;160161spin_unlock_irq(&pool->pool_lock);162163if (list_empty(&unmap_list)) {164return;165}166167ret = ib_unmap_fmr(&fmr_list);168if (ret)169printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret);170171spin_lock_irq(&pool->pool_lock);172list_splice(&unmap_list, &pool->free_list);173spin_unlock_irq(&pool->pool_lock);174}175176static int ib_fmr_cleanup_thread(void *pool_ptr)177{178struct ib_fmr_pool *pool = pool_ptr;179180do {181if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {182ib_fmr_batch_release(pool);183184atomic_inc(&pool->flush_ser);185wake_up_interruptible(&pool->force_wait);186187if (pool->flush_function)188pool->flush_function(pool, pool->flush_arg);189}190191set_current_state(TASK_INTERRUPTIBLE);192if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&193!kthread_should_stop())194schedule();195__set_current_state(TASK_RUNNING);196} while (!kthread_should_stop());197198return 0;199}200201/**202* ib_create_fmr_pool - Create an FMR pool203* @pd:Protection domain for FMRs204* @params:FMR pool parameters205*206* Create a pool of FMRs. Return value is pointer to new pool or207* error code if creation failed.208*/209struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,210struct ib_fmr_pool_param *params)211{212struct ib_device *device;213struct ib_fmr_pool *pool;214struct ib_device_attr *attr;215int i;216int ret;217int max_remaps;218219if (!params)220return ERR_PTR(-EINVAL);221222device = pd->device;223if (!device->alloc_fmr || !device->dealloc_fmr ||224!device->map_phys_fmr || !device->unmap_fmr) {225printk(KERN_INFO PFX "Device %s does not support FMRs\n",226device->name);227return ERR_PTR(-ENOSYS);228}229230attr = kmalloc(sizeof *attr, GFP_KERNEL);231if (!attr) {232printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");233return ERR_PTR(-ENOMEM);234}235236ret = ib_query_device(device, attr);237if (ret) {238printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);239kfree(attr);240return ERR_PTR(ret);241}242243if (!attr->max_map_per_fmr)244max_remaps = IB_FMR_MAX_REMAPS;245else246max_remaps = attr->max_map_per_fmr;247248kfree(attr);249250pool = kmalloc(sizeof *pool, GFP_KERNEL);251if (!pool) {252printk(KERN_WARNING PFX "couldn't allocate pool struct\n");253return ERR_PTR(-ENOMEM);254}255256pool->cache_bucket = NULL;257258pool->flush_function = params->flush_function;259pool->flush_arg = params->flush_arg;260261INIT_LIST_HEAD(&pool->free_list);262INIT_LIST_HEAD(&pool->dirty_list);263264if (params->cache) {265pool->cache_bucket =266kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,267GFP_KERNEL);268if (!pool->cache_bucket) {269printk(KERN_WARNING PFX "Failed to allocate cache in pool\n");270ret = -ENOMEM;271goto out_free_pool;272}273274for (i = 0; i < IB_FMR_HASH_SIZE; ++i)275INIT_HLIST_HEAD(pool->cache_bucket + i);276}277278pool->pool_size = 0;279pool->max_pages = params->max_pages_per_fmr;280pool->max_remaps = max_remaps;281pool->dirty_watermark = params->dirty_watermark;282pool->dirty_len = 0;283spin_lock_init(&pool->pool_lock);284atomic_set(&pool->req_ser, 0);285atomic_set(&pool->flush_ser, 0);286init_waitqueue_head(&pool->force_wait);287288pool->thread = kthread_run(ib_fmr_cleanup_thread,289pool,290"ib_fmr(%s)",291device->name);292if (IS_ERR(pool->thread)) {293printk(KERN_WARNING PFX "couldn't start cleanup thread\n");294ret = PTR_ERR(pool->thread);295goto out_free_pool;296}297298{299struct ib_pool_fmr *fmr;300struct ib_fmr_attr fmr_attr = {301.max_pages = params->max_pages_per_fmr,302.max_maps = pool->max_remaps,303.page_shift = params->page_shift304};305int bytes_per_fmr = sizeof *fmr;306307if (pool->cache_bucket)308bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64);309310for (i = 0; i < params->pool_size; ++i) {311fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);312if (!fmr) {313printk(KERN_WARNING PFX "failed to allocate fmr "314"struct for FMR %d\n", i);315goto out_fail;316}317318fmr->pool = pool;319fmr->remap_count = 0;320fmr->ref_count = 0;321INIT_HLIST_NODE(&fmr->cache_node);322323fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);324if (IS_ERR(fmr->fmr)) {325printk(KERN_WARNING PFX "fmr_create failed "326"for FMR %d\n", i);327kfree(fmr);328goto out_fail;329}330331list_add_tail(&fmr->list, &pool->free_list);332++pool->pool_size;333}334}335336return pool;337338out_free_pool:339kfree(pool->cache_bucket);340kfree(pool);341342return ERR_PTR(ret);343344out_fail:345ib_destroy_fmr_pool(pool);346347return ERR_PTR(-ENOMEM);348}349EXPORT_SYMBOL(ib_create_fmr_pool);350351/**352* ib_destroy_fmr_pool - Free FMR pool353* @pool:FMR pool to free354*355* Destroy an FMR pool and free all associated resources.356*/357void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)358{359struct ib_pool_fmr *fmr;360struct ib_pool_fmr *tmp;361LIST_HEAD(fmr_list);362int i;363364kthread_stop(pool->thread);365ib_fmr_batch_release(pool);366367i = 0;368list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {369if (fmr->remap_count) {370INIT_LIST_HEAD(&fmr_list);371list_add_tail(&fmr->fmr->list, &fmr_list);372ib_unmap_fmr(&fmr_list);373}374ib_dealloc_fmr(fmr->fmr);375list_del(&fmr->list);376kfree(fmr);377++i;378}379380if (i < pool->pool_size)381printk(KERN_WARNING PFX "pool still has %d regions registered\n",382pool->pool_size - i);383384kfree(pool->cache_bucket);385kfree(pool);386}387EXPORT_SYMBOL(ib_destroy_fmr_pool);388389/**390* ib_flush_fmr_pool - Invalidate all unmapped FMRs391* @pool:FMR pool to flush392*393* Ensure that all unmapped FMRs are fully invalidated.394*/395int ib_flush_fmr_pool(struct ib_fmr_pool *pool)396{397int serial;398struct ib_pool_fmr *fmr, *next;399400/*401* The free_list holds FMRs that may have been used402* but have not been remapped enough times to be dirty.403* Put them on the dirty list now so that the cleanup404* thread will reap them too.405*/406spin_lock_irq(&pool->pool_lock);407list_for_each_entry_safe(fmr, next, &pool->free_list, list) {408if (fmr->remap_count > 0)409list_move(&fmr->list, &pool->dirty_list);410}411spin_unlock_irq(&pool->pool_lock);412413serial = atomic_inc_return(&pool->req_ser);414wake_up_process(pool->thread);415416if (wait_event_interruptible(pool->force_wait,417atomic_read(&pool->flush_ser) - serial >= 0))418return -EINTR;419420return 0;421}422EXPORT_SYMBOL(ib_flush_fmr_pool);423424/**425* ib_fmr_pool_map_phys -426* @pool:FMR pool to allocate FMR from427* @page_list:List of pages to map428* @list_len:Number of pages in @page_list429* @io_virtual_address:I/O virtual address for new FMR430*431* Map an FMR from an FMR pool.432*/433struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,434u64 *page_list,435int list_len,436u64 io_virtual_address)437{438struct ib_fmr_pool *pool = pool_handle;439struct ib_pool_fmr *fmr;440unsigned long flags;441int result;442443if (list_len < 1 || list_len > pool->max_pages)444return ERR_PTR(-EINVAL);445446spin_lock_irqsave(&pool->pool_lock, flags);447fmr = ib_fmr_cache_lookup(pool,448page_list,449list_len,450io_virtual_address);451if (fmr) {452/* found in cache */453++fmr->ref_count;454if (fmr->ref_count == 1) {455list_del(&fmr->list);456}457458spin_unlock_irqrestore(&pool->pool_lock, flags);459460return fmr;461}462463if (list_empty(&pool->free_list)) {464spin_unlock_irqrestore(&pool->pool_lock, flags);465return ERR_PTR(-EAGAIN);466}467468fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);469list_del(&fmr->list);470hlist_del_init(&fmr->cache_node);471spin_unlock_irqrestore(&pool->pool_lock, flags);472473result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,474io_virtual_address);475476if (result) {477spin_lock_irqsave(&pool->pool_lock, flags);478list_add(&fmr->list, &pool->free_list);479spin_unlock_irqrestore(&pool->pool_lock, flags);480481printk(KERN_WARNING PFX "fmr_map returns %d\n", result);482483return ERR_PTR(result);484}485486++fmr->remap_count;487fmr->ref_count = 1;488489if (pool->cache_bucket) {490fmr->io_virtual_address = io_virtual_address;491fmr->page_list_len = list_len;492memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));493494spin_lock_irqsave(&pool->pool_lock, flags);495hlist_add_head(&fmr->cache_node,496pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));497spin_unlock_irqrestore(&pool->pool_lock, flags);498}499500return fmr;501}502EXPORT_SYMBOL(ib_fmr_pool_map_phys);503504/**505* ib_fmr_pool_unmap - Unmap FMR506* @fmr:FMR to unmap507*508* Unmap an FMR. The FMR mapping may remain valid until the FMR is509* reused (or until ib_flush_fmr_pool() is called).510*/511int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)512{513struct ib_fmr_pool *pool;514unsigned long flags;515516pool = fmr->pool;517518spin_lock_irqsave(&pool->pool_lock, flags);519520--fmr->ref_count;521if (!fmr->ref_count) {522if (fmr->remap_count < pool->max_remaps) {523list_add_tail(&fmr->list, &pool->free_list);524} else {525list_add_tail(&fmr->list, &pool->dirty_list);526if (++pool->dirty_len >= pool->dirty_watermark) {527atomic_inc(&pool->req_ser);528wake_up_process(pool->thread);529}530}531}532533#ifdef DEBUG534if (fmr->ref_count < 0)535printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n",536fmr, fmr->ref_count);537#endif538539spin_unlock_irqrestore(&pool->pool_lock, flags);540541return 0;542}543EXPORT_SYMBOL(ib_fmr_pool_unmap);544545546