Path: blob/master/drivers/infiniband/ulp/iser/iser_memory.c
15112 views
/*1* Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.2*3* This software is available to you under a choice of one of two4* licenses. You may choose to be licensed under the terms of the GNU5* General Public License (GPL) Version 2, available from the file6* COPYING in the main directory of this source tree, or the7* OpenIB.org BSD license below:8*9* Redistribution and use in source and binary forms, with or10* without modification, are permitted provided that the following11* conditions are met:12*13* - Redistributions of source code must retain the above14* copyright notice, this list of conditions and the following15* disclaimer.16*17* - Redistributions in binary form must reproduce the above18* copyright notice, this list of conditions and the following19* disclaimer in the documentation and/or other materials20* provided with the distribution.21*22* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,23* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF24* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND25* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS26* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN27* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN28* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE29* SOFTWARE.30*/31#include <linux/module.h>32#include <linux/kernel.h>33#include <linux/slab.h>34#include <linux/mm.h>35#include <linux/highmem.h>36#include <linux/scatterlist.h>3738#include "iscsi_iser.h"3940#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */4142/**43* iser_start_rdma_unaligned_sg44*/45static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,46enum iser_data_dir cmd_dir)47{48int dma_nents;49struct ib_device *dev;50char *mem = NULL;51struct iser_data_buf *data = &iser_task->data[cmd_dir];52unsigned long cmd_data_len = data->data_len;5354if (cmd_data_len > ISER_KMALLOC_THRESHOLD)55mem = (void *)__get_free_pages(GFP_ATOMIC,56ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);57else58mem = kmalloc(cmd_data_len, GFP_ATOMIC);5960if (mem == NULL) {61iser_err("Failed to allocate mem size %d %d for copying sglist\n",62data->size,(int)cmd_data_len);63return -ENOMEM;64}6566if (cmd_dir == ISER_DIR_OUT) {67/* copy the unaligned sg the buffer which is used for RDMA */68struct scatterlist *sgl = (struct scatterlist *)data->buf;69struct scatterlist *sg;70int i;71char *p, *from;7273p = mem;74for_each_sg(sgl, sg, data->size, i) {75from = kmap_atomic(sg_page(sg), KM_USER0);76memcpy(p,77from + sg->offset,78sg->length);79kunmap_atomic(from, KM_USER0);80p += sg->length;81}82}8384sg_init_one(&iser_task->data_copy[cmd_dir].sg_single, mem, cmd_data_len);85iser_task->data_copy[cmd_dir].buf =86&iser_task->data_copy[cmd_dir].sg_single;87iser_task->data_copy[cmd_dir].size = 1;8889iser_task->data_copy[cmd_dir].copy_buf = mem;9091dev = iser_task->iser_conn->ib_conn->device->ib_device;92dma_nents = ib_dma_map_sg(dev,93&iser_task->data_copy[cmd_dir].sg_single,941,95(cmd_dir == ISER_DIR_OUT) ?96DMA_TO_DEVICE : DMA_FROM_DEVICE);97BUG_ON(dma_nents == 0);9899iser_task->data_copy[cmd_dir].dma_nents = dma_nents;100return 0;101}102103/**104* iser_finalize_rdma_unaligned_sg105*/106void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,107enum iser_data_dir cmd_dir)108{109struct ib_device *dev;110struct iser_data_buf *mem_copy;111unsigned long cmd_data_len;112113dev = iser_task->iser_conn->ib_conn->device->ib_device;114mem_copy = &iser_task->data_copy[cmd_dir];115116ib_dma_unmap_sg(dev, &mem_copy->sg_single, 1,117(cmd_dir == ISER_DIR_OUT) ?118DMA_TO_DEVICE : DMA_FROM_DEVICE);119120if (cmd_dir == ISER_DIR_IN) {121char *mem;122struct scatterlist *sgl, *sg;123unsigned char *p, *to;124unsigned int sg_size;125int i;126127/* copy back read RDMA to unaligned sg */128mem = mem_copy->copy_buf;129130sgl = (struct scatterlist *)iser_task->data[ISER_DIR_IN].buf;131sg_size = iser_task->data[ISER_DIR_IN].size;132133p = mem;134for_each_sg(sgl, sg, sg_size, i) {135to = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);136memcpy(to + sg->offset,137p,138sg->length);139kunmap_atomic(to, KM_SOFTIRQ0);140p += sg->length;141}142}143144cmd_data_len = iser_task->data[cmd_dir].data_len;145146if (cmd_data_len > ISER_KMALLOC_THRESHOLD)147free_pages((unsigned long)mem_copy->copy_buf,148ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);149else150kfree(mem_copy->copy_buf);151152mem_copy->copy_buf = NULL;153}154155#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)156157/**158* iser_sg_to_page_vec - Translates scatterlist entries to physical addresses159* and returns the length of resulting physical address array (may be less than160* the original due to possible compaction).161*162* we build a "page vec" under the assumption that the SG meets the RDMA163* alignment requirements. Other then the first and last SG elements, all164* the "internal" elements can be compacted into a list whose elements are165* dma addresses of physical pages. The code supports also the weird case166* where --few fragments of the same page-- are present in the SG as167* consecutive elements. Also, it handles one entry SG.168*/169170static int iser_sg_to_page_vec(struct iser_data_buf *data,171struct iser_page_vec *page_vec,172struct ib_device *ibdev)173{174struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;175u64 start_addr, end_addr, page, chunk_start = 0;176unsigned long total_sz = 0;177unsigned int dma_len;178int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;179180/* compute the offset of first element */181page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;182183new_chunk = 1;184cur_page = 0;185for_each_sg(sgl, sg, data->dma_nents, i) {186start_addr = ib_sg_dma_address(ibdev, sg);187if (new_chunk)188chunk_start = start_addr;189dma_len = ib_sg_dma_len(ibdev, sg);190end_addr = start_addr + dma_len;191total_sz += dma_len;192193/* collect page fragments until aligned or end of SG list */194if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {195new_chunk = 0;196continue;197}198new_chunk = 1;199200/* address of the first page in the contiguous chunk;201masking relevant for the very first SG entry,202which might be unaligned */203page = chunk_start & MASK_4K;204do {205page_vec->pages[cur_page++] = page;206page += SIZE_4K;207} while (page < end_addr);208}209210page_vec->data_size = total_sz;211iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);212return cur_page;213}214215216/**217* iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned218* for RDMA sub-list of a scatter-gather list of memory buffers, and returns219* the number of entries which are aligned correctly. Supports the case where220* consecutive SG elements are actually fragments of the same physcial page.221*/222static int iser_data_buf_aligned_len(struct iser_data_buf *data,223struct ib_device *ibdev)224{225struct scatterlist *sgl, *sg, *next_sg = NULL;226u64 start_addr, end_addr;227int i, ret_len, start_check = 0;228229if (data->dma_nents == 1)230return 1;231232sgl = (struct scatterlist *)data->buf;233start_addr = ib_sg_dma_address(ibdev, sgl);234235for_each_sg(sgl, sg, data->dma_nents, i) {236if (start_check && !IS_4K_ALIGNED(start_addr))237break;238239next_sg = sg_next(sg);240if (!next_sg)241break;242243end_addr = start_addr + ib_sg_dma_len(ibdev, sg);244start_addr = ib_sg_dma_address(ibdev, next_sg);245246if (end_addr == start_addr) {247start_check = 0;248continue;249} else250start_check = 1;251252if (!IS_4K_ALIGNED(end_addr))253break;254}255ret_len = (next_sg) ? i : i+1;256iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",257ret_len, data->dma_nents, data);258return ret_len;259}260261static void iser_data_buf_dump(struct iser_data_buf *data,262struct ib_device *ibdev)263{264struct scatterlist *sgl = (struct scatterlist *)data->buf;265struct scatterlist *sg;266int i;267268if (iser_debug_level == 0)269return;270271for_each_sg(sgl, sg, data->dma_nents, i)272iser_warn("sg[%d] dma_addr:0x%lX page:0x%p "273"off:0x%x sz:0x%x dma_len:0x%x\n",274i, (unsigned long)ib_sg_dma_address(ibdev, sg),275sg_page(sg), sg->offset,276sg->length, ib_sg_dma_len(ibdev, sg));277}278279static void iser_dump_page_vec(struct iser_page_vec *page_vec)280{281int i;282283iser_err("page vec length %d data size %d\n",284page_vec->length, page_vec->data_size);285for (i = 0; i < page_vec->length; i++)286iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);287}288289static void iser_page_vec_build(struct iser_data_buf *data,290struct iser_page_vec *page_vec,291struct ib_device *ibdev)292{293int page_vec_len = 0;294295page_vec->length = 0;296page_vec->offset = 0;297298iser_dbg("Translating sg sz: %d\n", data->dma_nents);299page_vec_len = iser_sg_to_page_vec(data, page_vec, ibdev);300iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents,page_vec_len);301302page_vec->length = page_vec_len;303304if (page_vec_len * SIZE_4K < page_vec->data_size) {305iser_err("page_vec too short to hold this SG\n");306iser_data_buf_dump(data, ibdev);307iser_dump_page_vec(page_vec);308BUG();309}310}311312int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,313struct iser_data_buf *data,314enum iser_data_dir iser_dir,315enum dma_data_direction dma_dir)316{317struct ib_device *dev;318319iser_task->dir[iser_dir] = 1;320dev = iser_task->iser_conn->ib_conn->device->ib_device;321322data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);323if (data->dma_nents == 0) {324iser_err("dma_map_sg failed!!!\n");325return -EINVAL;326}327return 0;328}329330void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task)331{332struct ib_device *dev;333struct iser_data_buf *data;334335dev = iser_task->iser_conn->ib_conn->device->ib_device;336337if (iser_task->dir[ISER_DIR_IN]) {338data = &iser_task->data[ISER_DIR_IN];339ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE);340}341342if (iser_task->dir[ISER_DIR_OUT]) {343data = &iser_task->data[ISER_DIR_OUT];344ib_dma_unmap_sg(dev, data->buf, data->size, DMA_TO_DEVICE);345}346}347348/**349* iser_reg_rdma_mem - Registers memory intended for RDMA,350* obtaining rkey and va351*352* returns 0 on success, errno code on failure353*/354int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,355enum iser_data_dir cmd_dir)356{357struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;358struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;359struct iser_device *device = ib_conn->device;360struct ib_device *ibdev = device->ib_device;361struct iser_data_buf *mem = &iser_task->data[cmd_dir];362struct iser_regd_buf *regd_buf;363int aligned_len;364int err;365int i;366struct scatterlist *sg;367368regd_buf = &iser_task->rdma_regd[cmd_dir];369370aligned_len = iser_data_buf_aligned_len(mem, ibdev);371if (aligned_len != mem->dma_nents) {372iscsi_conn->fmr_unalign_cnt++;373iser_warn("rdma alignment violation %d/%d aligned\n",374aligned_len, mem->size);375iser_data_buf_dump(mem, ibdev);376377/* unmap the command data before accessing it */378iser_dma_unmap_task_data(iser_task);379380/* allocate copy buf, if we are writing, copy the */381/* unaligned scatterlist, dma map the copy */382if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)383return -ENOMEM;384mem = &iser_task->data_copy[cmd_dir];385}386387/* if there a single dma entry, FMR is not needed */388if (mem->dma_nents == 1) {389sg = (struct scatterlist *)mem->buf;390391regd_buf->reg.lkey = device->mr->lkey;392regd_buf->reg.rkey = device->mr->rkey;393regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);394regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);395regd_buf->reg.is_fmr = 0;396397iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "398"va: 0x%08lX sz: %ld]\n",399(unsigned int)regd_buf->reg.lkey,400(unsigned int)regd_buf->reg.rkey,401(unsigned long)regd_buf->reg.va,402(unsigned long)regd_buf->reg.len);403} else { /* use FMR for multiple dma entries */404iser_page_vec_build(mem, ib_conn->page_vec, ibdev);405err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg);406if (err) {407iser_data_buf_dump(mem, ibdev);408iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",409mem->dma_nents,410ntoh24(iser_task->desc.iscsi_header.dlength));411iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",412ib_conn->page_vec->data_size, ib_conn->page_vec->length,413ib_conn->page_vec->offset);414for (i=0 ; i<ib_conn->page_vec->length ; i++)415iser_err("page_vec[%d] = 0x%llx\n", i,416(unsigned long long) ib_conn->page_vec->pages[i]);417return err;418}419}420return 0;421}422423424