Path: blob/master/drivers/infiniband/hw/qib/qib_cq.c
15112 views
/*1* Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved.2* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.3*4* This software is available to you under a choice of one of two5* licenses. You may choose to be licensed under the terms of the GNU6* General Public License (GPL) Version 2, available from the file7* COPYING in the main directory of this source tree, or the8* OpenIB.org BSD license below:9*10* Redistribution and use in source and binary forms, with or11* without modification, are permitted provided that the following12* conditions are met:13*14* - Redistributions of source code must retain the above15* copyright notice, this list of conditions and the following16* disclaimer.17*18* - Redistributions in binary form must reproduce the above19* copyright notice, this list of conditions and the following20* disclaimer in the documentation and/or other materials21* provided with the distribution.22*23* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,24* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF25* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND26* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS27* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN28* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN29* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE30* SOFTWARE.31*/3233#include <linux/err.h>34#include <linux/slab.h>35#include <linux/vmalloc.h>3637#include "qib_verbs.h"3839/**40* qib_cq_enter - add a new entry to the completion queue41* @cq: completion queue42* @entry: work completion entry to add43* @sig: true if @entry is a solicitated entry44*45* This may be called with qp->s_lock held.46*/47void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)48{49struct qib_cq_wc *wc;50unsigned long flags;51u32 head;52u32 next;5354spin_lock_irqsave(&cq->lock, flags);5556/*57* Note that the head pointer might be writable by user processes.58* Take care to verify it is a sane value.59*/60wc = cq->queue;61head = wc->head;62if (head >= (unsigned) cq->ibcq.cqe) {63head = cq->ibcq.cqe;64next = 0;65} else66next = head + 1;67if (unlikely(next == wc->tail)) {68spin_unlock_irqrestore(&cq->lock, flags);69if (cq->ibcq.event_handler) {70struct ib_event ev;7172ev.device = cq->ibcq.device;73ev.element.cq = &cq->ibcq;74ev.event = IB_EVENT_CQ_ERR;75cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);76}77return;78}79if (cq->ip) {80wc->uqueue[head].wr_id = entry->wr_id;81wc->uqueue[head].status = entry->status;82wc->uqueue[head].opcode = entry->opcode;83wc->uqueue[head].vendor_err = entry->vendor_err;84wc->uqueue[head].byte_len = entry->byte_len;85wc->uqueue[head].ex.imm_data =86(__u32 __force)entry->ex.imm_data;87wc->uqueue[head].qp_num = entry->qp->qp_num;88wc->uqueue[head].src_qp = entry->src_qp;89wc->uqueue[head].wc_flags = entry->wc_flags;90wc->uqueue[head].pkey_index = entry->pkey_index;91wc->uqueue[head].slid = entry->slid;92wc->uqueue[head].sl = entry->sl;93wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;94wc->uqueue[head].port_num = entry->port_num;95/* Make sure entry is written before the head index. */96smp_wmb();97} else98wc->kqueue[head] = *entry;99wc->head = next;100101if (cq->notify == IB_CQ_NEXT_COMP ||102(cq->notify == IB_CQ_SOLICITED &&103(solicited || entry->status != IB_WC_SUCCESS))) {104cq->notify = IB_CQ_NONE;105cq->triggered++;106/*107* This will cause send_complete() to be called in108* another thread.109*/110queue_work(qib_cq_wq, &cq->comptask);111}112113spin_unlock_irqrestore(&cq->lock, flags);114}115116/**117* qib_poll_cq - poll for work completion entries118* @ibcq: the completion queue to poll119* @num_entries: the maximum number of entries to return120* @entry: pointer to array where work completions are placed121*122* Returns the number of completion entries polled.123*124* This may be called from interrupt context. Also called by ib_poll_cq()125* in the generic verbs code.126*/127int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)128{129struct qib_cq *cq = to_icq(ibcq);130struct qib_cq_wc *wc;131unsigned long flags;132int npolled;133u32 tail;134135/* The kernel can only poll a kernel completion queue */136if (cq->ip) {137npolled = -EINVAL;138goto bail;139}140141spin_lock_irqsave(&cq->lock, flags);142143wc = cq->queue;144tail = wc->tail;145if (tail > (u32) cq->ibcq.cqe)146tail = (u32) cq->ibcq.cqe;147for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {148if (tail == wc->head)149break;150/* The kernel doesn't need a RMB since it has the lock. */151*entry = wc->kqueue[tail];152if (tail >= cq->ibcq.cqe)153tail = 0;154else155tail++;156}157wc->tail = tail;158159spin_unlock_irqrestore(&cq->lock, flags);160161bail:162return npolled;163}164165static void send_complete(struct work_struct *work)166{167struct qib_cq *cq = container_of(work, struct qib_cq, comptask);168169/*170* The completion handler will most likely rearm the notification171* and poll for all pending entries. If a new completion entry172* is added while we are in this routine, queue_work()173* won't call us again until we return so we check triggered to174* see if we need to call the handler again.175*/176for (;;) {177u8 triggered = cq->triggered;178179/*180* IPoIB connected mode assumes the callback is from a181* soft IRQ. We simulate this by blocking "bottom halves".182* See the implementation for ipoib_cm_handle_tx_wc(),183* netif_tx_lock_bh() and netif_tx_lock().184*/185local_bh_disable();186cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);187local_bh_enable();188189if (cq->triggered == triggered)190return;191}192}193194/**195* qib_create_cq - create a completion queue196* @ibdev: the device this completion queue is attached to197* @entries: the minimum size of the completion queue198* @context: unused by the QLogic_IB driver199* @udata: user data for libibverbs.so200*201* Returns a pointer to the completion queue or negative errno values202* for failure.203*204* Called by ib_create_cq() in the generic verbs code.205*/206struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,207int comp_vector, struct ib_ucontext *context,208struct ib_udata *udata)209{210struct qib_ibdev *dev = to_idev(ibdev);211struct qib_cq *cq;212struct qib_cq_wc *wc;213struct ib_cq *ret;214u32 sz;215216if (entries < 1 || entries > ib_qib_max_cqes) {217ret = ERR_PTR(-EINVAL);218goto done;219}220221/* Allocate the completion queue structure. */222cq = kmalloc(sizeof(*cq), GFP_KERNEL);223if (!cq) {224ret = ERR_PTR(-ENOMEM);225goto done;226}227228/*229* Allocate the completion queue entries and head/tail pointers.230* This is allocated separately so that it can be resized and231* also mapped into user space.232* We need to use vmalloc() in order to support mmap and large233* numbers of entries.234*/235sz = sizeof(*wc);236if (udata && udata->outlen >= sizeof(__u64))237sz += sizeof(struct ib_uverbs_wc) * (entries + 1);238else239sz += sizeof(struct ib_wc) * (entries + 1);240wc = vmalloc_user(sz);241if (!wc) {242ret = ERR_PTR(-ENOMEM);243goto bail_cq;244}245246/*247* Return the address of the WC as the offset to mmap.248* See qib_mmap() for details.249*/250if (udata && udata->outlen >= sizeof(__u64)) {251int err;252253cq->ip = qib_create_mmap_info(dev, sz, context, wc);254if (!cq->ip) {255ret = ERR_PTR(-ENOMEM);256goto bail_wc;257}258259err = ib_copy_to_udata(udata, &cq->ip->offset,260sizeof(cq->ip->offset));261if (err) {262ret = ERR_PTR(err);263goto bail_ip;264}265} else266cq->ip = NULL;267268spin_lock(&dev->n_cqs_lock);269if (dev->n_cqs_allocated == ib_qib_max_cqs) {270spin_unlock(&dev->n_cqs_lock);271ret = ERR_PTR(-ENOMEM);272goto bail_ip;273}274275dev->n_cqs_allocated++;276spin_unlock(&dev->n_cqs_lock);277278if (cq->ip) {279spin_lock_irq(&dev->pending_lock);280list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);281spin_unlock_irq(&dev->pending_lock);282}283284/*285* ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.286* The number of entries should be >= the number requested or return287* an error.288*/289cq->ibcq.cqe = entries;290cq->notify = IB_CQ_NONE;291cq->triggered = 0;292spin_lock_init(&cq->lock);293INIT_WORK(&cq->comptask, send_complete);294wc->head = 0;295wc->tail = 0;296cq->queue = wc;297298ret = &cq->ibcq;299300goto done;301302bail_ip:303kfree(cq->ip);304bail_wc:305vfree(wc);306bail_cq:307kfree(cq);308done:309return ret;310}311312/**313* qib_destroy_cq - destroy a completion queue314* @ibcq: the completion queue to destroy.315*316* Returns 0 for success.317*318* Called by ib_destroy_cq() in the generic verbs code.319*/320int qib_destroy_cq(struct ib_cq *ibcq)321{322struct qib_ibdev *dev = to_idev(ibcq->device);323struct qib_cq *cq = to_icq(ibcq);324325flush_work(&cq->comptask);326spin_lock(&dev->n_cqs_lock);327dev->n_cqs_allocated--;328spin_unlock(&dev->n_cqs_lock);329if (cq->ip)330kref_put(&cq->ip->ref, qib_release_mmap_info);331else332vfree(cq->queue);333kfree(cq);334335return 0;336}337338/**339* qib_req_notify_cq - change the notification type for a completion queue340* @ibcq: the completion queue341* @notify_flags: the type of notification to request342*343* Returns 0 for success.344*345* This may be called from interrupt context. Also called by346* ib_req_notify_cq() in the generic verbs code.347*/348int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)349{350struct qib_cq *cq = to_icq(ibcq);351unsigned long flags;352int ret = 0;353354spin_lock_irqsave(&cq->lock, flags);355/*356* Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow357* any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).358*/359if (cq->notify != IB_CQ_NEXT_COMP)360cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;361362if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&363cq->queue->head != cq->queue->tail)364ret = 1;365366spin_unlock_irqrestore(&cq->lock, flags);367368return ret;369}370371/**372* qib_resize_cq - change the size of the CQ373* @ibcq: the completion queue374*375* Returns 0 for success.376*/377int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)378{379struct qib_cq *cq = to_icq(ibcq);380struct qib_cq_wc *old_wc;381struct qib_cq_wc *wc;382u32 head, tail, n;383int ret;384u32 sz;385386if (cqe < 1 || cqe > ib_qib_max_cqes) {387ret = -EINVAL;388goto bail;389}390391/*392* Need to use vmalloc() if we want to support large #s of entries.393*/394sz = sizeof(*wc);395if (udata && udata->outlen >= sizeof(__u64))396sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);397else398sz += sizeof(struct ib_wc) * (cqe + 1);399wc = vmalloc_user(sz);400if (!wc) {401ret = -ENOMEM;402goto bail;403}404405/* Check that we can write the offset to mmap. */406if (udata && udata->outlen >= sizeof(__u64)) {407__u64 offset = 0;408409ret = ib_copy_to_udata(udata, &offset, sizeof(offset));410if (ret)411goto bail_free;412}413414spin_lock_irq(&cq->lock);415/*416* Make sure head and tail are sane since they417* might be user writable.418*/419old_wc = cq->queue;420head = old_wc->head;421if (head > (u32) cq->ibcq.cqe)422head = (u32) cq->ibcq.cqe;423tail = old_wc->tail;424if (tail > (u32) cq->ibcq.cqe)425tail = (u32) cq->ibcq.cqe;426if (head < tail)427n = cq->ibcq.cqe + 1 + head - tail;428else429n = head - tail;430if (unlikely((u32)cqe < n)) {431ret = -EINVAL;432goto bail_unlock;433}434for (n = 0; tail != head; n++) {435if (cq->ip)436wc->uqueue[n] = old_wc->uqueue[tail];437else438wc->kqueue[n] = old_wc->kqueue[tail];439if (tail == (u32) cq->ibcq.cqe)440tail = 0;441else442tail++;443}444cq->ibcq.cqe = cqe;445wc->head = n;446wc->tail = 0;447cq->queue = wc;448spin_unlock_irq(&cq->lock);449450vfree(old_wc);451452if (cq->ip) {453struct qib_ibdev *dev = to_idev(ibcq->device);454struct qib_mmap_info *ip = cq->ip;455456qib_update_mmap_info(dev, ip, sz, wc);457458/*459* Return the offset to mmap.460* See qib_mmap() for details.461*/462if (udata && udata->outlen >= sizeof(__u64)) {463ret = ib_copy_to_udata(udata, &ip->offset,464sizeof(ip->offset));465if (ret)466goto bail;467}468469spin_lock_irq(&dev->pending_lock);470if (list_empty(&ip->pending_mmaps))471list_add(&ip->pending_mmaps, &dev->pending_mmaps);472spin_unlock_irq(&dev->pending_lock);473}474475ret = 0;476goto bail;477478bail_unlock:479spin_unlock_irq(&cq->lock);480bail_free:481vfree(wc);482bail:483return ret;484}485486487