Path: blob/master/drivers/infiniband/hw/qib/qib_qp.c
15112 views
/*1* Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.2* All rights reserved.3* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.4*5* This software is available to you under a choice of one of two6* licenses. You may choose to be licensed under the terms of the GNU7* General Public License (GPL) Version 2, available from the file8* COPYING in the main directory of this source tree, or the9* OpenIB.org BSD license below:10*11* Redistribution and use in source and binary forms, with or12* without modification, are permitted provided that the following13* conditions are met:14*15* - Redistributions of source code must retain the above16* copyright notice, this list of conditions and the following17* disclaimer.18*19* - Redistributions in binary form must reproduce the above20* copyright notice, this list of conditions and the following21* disclaimer in the documentation and/or other materials22* provided with the distribution.23*24* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,25* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF26* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND27* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS28* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN29* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN30* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE31* SOFTWARE.32*/3334#include <linux/err.h>35#include <linux/vmalloc.h>3637#include "qib.h"3839#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE)40#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)4142static inline unsigned mk_qpn(struct qib_qpn_table *qpt,43struct qpn_map *map, unsigned off)44{45return (map - qpt->map) * BITS_PER_PAGE + off;46}4748static inline unsigned find_next_offset(struct qib_qpn_table *qpt,49struct qpn_map *map, unsigned off,50unsigned n)51{52if (qpt->mask) {53off++;54if (((off & qpt->mask) >> 1) >= n)55off = (off | qpt->mask) + 2;56} else57off = find_next_zero_bit(map->page, BITS_PER_PAGE, off);58return off;59}6061/*62* Convert the AETH credit code into the number of credits.63*/64static u32 credit_table[31] = {650, /* 0 */661, /* 1 */672, /* 2 */683, /* 3 */694, /* 4 */706, /* 5 */718, /* 6 */7212, /* 7 */7316, /* 8 */7424, /* 9 */7532, /* A */7648, /* B */7764, /* C */7896, /* D */79128, /* E */80192, /* F */81256, /* 10 */82384, /* 11 */83512, /* 12 */84768, /* 13 */851024, /* 14 */861536, /* 15 */872048, /* 16 */883072, /* 17 */894096, /* 18 */906144, /* 19 */918192, /* 1A */9212288, /* 1B */9316384, /* 1C */9424576, /* 1D */9532768 /* 1E */96};9798static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)99{100unsigned long page = get_zeroed_page(GFP_KERNEL);101102/*103* Free the page if someone raced with us installing it.104*/105106spin_lock(&qpt->lock);107if (map->page)108free_page(page);109else110map->page = (void *)page;111spin_unlock(&qpt->lock);112}113114/*115* Allocate the next available QPN or116* zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.117*/118static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,119enum ib_qp_type type, u8 port)120{121u32 i, offset, max_scan, qpn;122struct qpn_map *map;123u32 ret;124125if (type == IB_QPT_SMI || type == IB_QPT_GSI) {126unsigned n;127128ret = type == IB_QPT_GSI;129n = 1 << (ret + 2 * (port - 1));130spin_lock(&qpt->lock);131if (qpt->flags & n)132ret = -EINVAL;133else134qpt->flags |= n;135spin_unlock(&qpt->lock);136goto bail;137}138139qpn = qpt->last + 2;140if (qpn >= QPN_MAX)141qpn = 2;142if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues)143qpn = (qpn | qpt->mask) + 2;144offset = qpn & BITS_PER_PAGE_MASK;145map = &qpt->map[qpn / BITS_PER_PAGE];146max_scan = qpt->nmaps - !offset;147for (i = 0;;) {148if (unlikely(!map->page)) {149get_map_page(qpt, map);150if (unlikely(!map->page))151break;152}153do {154if (!test_and_set_bit(offset, map->page)) {155qpt->last = qpn;156ret = qpn;157goto bail;158}159offset = find_next_offset(qpt, map, offset,160dd->n_krcv_queues);161qpn = mk_qpn(qpt, map, offset);162/*163* This test differs from alloc_pidmap().164* If find_next_offset() does find a zero165* bit, we don't need to check for QPN166* wrapping around past our starting QPN.167* We just need to be sure we don't loop168* forever.169*/170} while (offset < BITS_PER_PAGE && qpn < QPN_MAX);171/*172* In order to keep the number of pages allocated to a173* minimum, we scan the all existing pages before increasing174* the size of the bitmap table.175*/176if (++i > max_scan) {177if (qpt->nmaps == QPNMAP_ENTRIES)178break;179map = &qpt->map[qpt->nmaps++];180offset = 0;181} else if (map < &qpt->map[qpt->nmaps]) {182++map;183offset = 0;184} else {185map = &qpt->map[0];186offset = 2;187}188qpn = mk_qpn(qpt, map, offset);189}190191ret = -ENOMEM;192193bail:194return ret;195}196197static void free_qpn(struct qib_qpn_table *qpt, u32 qpn)198{199struct qpn_map *map;200201map = qpt->map + qpn / BITS_PER_PAGE;202if (map->page)203clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);204}205206/*207* Put the QP into the hash table.208* The hash table holds a reference to the QP.209*/210static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)211{212struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);213unsigned n = qp->ibqp.qp_num % dev->qp_table_size;214unsigned long flags;215216spin_lock_irqsave(&dev->qpt_lock, flags);217218if (qp->ibqp.qp_num == 0)219ibp->qp0 = qp;220else if (qp->ibqp.qp_num == 1)221ibp->qp1 = qp;222else {223qp->next = dev->qp_table[n];224dev->qp_table[n] = qp;225}226atomic_inc(&qp->refcount);227228spin_unlock_irqrestore(&dev->qpt_lock, flags);229}230231/*232* Remove the QP from the table so it can't be found asynchronously by233* the receive interrupt routine.234*/235static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)236{237struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);238struct qib_qp *q, **qpp;239unsigned long flags;240241qpp = &dev->qp_table[qp->ibqp.qp_num % dev->qp_table_size];242243spin_lock_irqsave(&dev->qpt_lock, flags);244245if (ibp->qp0 == qp) {246ibp->qp0 = NULL;247atomic_dec(&qp->refcount);248} else if (ibp->qp1 == qp) {249ibp->qp1 = NULL;250atomic_dec(&qp->refcount);251} else252for (; (q = *qpp) != NULL; qpp = &q->next)253if (q == qp) {254*qpp = qp->next;255qp->next = NULL;256atomic_dec(&qp->refcount);257break;258}259260spin_unlock_irqrestore(&dev->qpt_lock, flags);261}262263/**264* qib_free_all_qps - check for QPs still in use265* @qpt: the QP table to empty266*267* There should not be any QPs still in use.268* Free memory for table.269*/270unsigned qib_free_all_qps(struct qib_devdata *dd)271{272struct qib_ibdev *dev = &dd->verbs_dev;273unsigned long flags;274struct qib_qp *qp;275unsigned n, qp_inuse = 0;276277for (n = 0; n < dd->num_pports; n++) {278struct qib_ibport *ibp = &dd->pport[n].ibport_data;279280if (!qib_mcast_tree_empty(ibp))281qp_inuse++;282if (ibp->qp0)283qp_inuse++;284if (ibp->qp1)285qp_inuse++;286}287288spin_lock_irqsave(&dev->qpt_lock, flags);289for (n = 0; n < dev->qp_table_size; n++) {290qp = dev->qp_table[n];291dev->qp_table[n] = NULL;292293for (; qp; qp = qp->next)294qp_inuse++;295}296spin_unlock_irqrestore(&dev->qpt_lock, flags);297298return qp_inuse;299}300301/**302* qib_lookup_qpn - return the QP with the given QPN303* @qpt: the QP table304* @qpn: the QP number to look up305*306* The caller is responsible for decrementing the QP reference count307* when done.308*/309struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)310{311struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;312unsigned long flags;313struct qib_qp *qp;314315spin_lock_irqsave(&dev->qpt_lock, flags);316317if (qpn == 0)318qp = ibp->qp0;319else if (qpn == 1)320qp = ibp->qp1;321else322for (qp = dev->qp_table[qpn % dev->qp_table_size]; qp;323qp = qp->next)324if (qp->ibqp.qp_num == qpn)325break;326if (qp)327atomic_inc(&qp->refcount);328329spin_unlock_irqrestore(&dev->qpt_lock, flags);330return qp;331}332333/**334* qib_reset_qp - initialize the QP state to the reset state335* @qp: the QP to reset336* @type: the QP type337*/338static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type)339{340qp->remote_qpn = 0;341qp->qkey = 0;342qp->qp_access_flags = 0;343atomic_set(&qp->s_dma_busy, 0);344qp->s_flags &= QIB_S_SIGNAL_REQ_WR;345qp->s_hdrwords = 0;346qp->s_wqe = NULL;347qp->s_draining = 0;348qp->s_next_psn = 0;349qp->s_last_psn = 0;350qp->s_sending_psn = 0;351qp->s_sending_hpsn = 0;352qp->s_psn = 0;353qp->r_psn = 0;354qp->r_msn = 0;355if (type == IB_QPT_RC) {356qp->s_state = IB_OPCODE_RC_SEND_LAST;357qp->r_state = IB_OPCODE_RC_SEND_LAST;358} else {359qp->s_state = IB_OPCODE_UC_SEND_LAST;360qp->r_state = IB_OPCODE_UC_SEND_LAST;361}362qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;363qp->r_nak_state = 0;364qp->r_aflags = 0;365qp->r_flags = 0;366qp->s_head = 0;367qp->s_tail = 0;368qp->s_cur = 0;369qp->s_acked = 0;370qp->s_last = 0;371qp->s_ssn = 1;372qp->s_lsn = 0;373qp->s_mig_state = IB_MIG_MIGRATED;374memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));375qp->r_head_ack_queue = 0;376qp->s_tail_ack_queue = 0;377qp->s_num_rd_atomic = 0;378if (qp->r_rq.wq) {379qp->r_rq.wq->head = 0;380qp->r_rq.wq->tail = 0;381}382qp->r_sge.num_sge = 0;383}384385static void clear_mr_refs(struct qib_qp *qp, int clr_sends)386{387unsigned n;388389if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))390while (qp->s_rdma_read_sge.num_sge) {391atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount);392if (--qp->s_rdma_read_sge.num_sge)393qp->s_rdma_read_sge.sge =394*qp->s_rdma_read_sge.sg_list++;395}396397while (qp->r_sge.num_sge) {398atomic_dec(&qp->r_sge.sge.mr->refcount);399if (--qp->r_sge.num_sge)400qp->r_sge.sge = *qp->r_sge.sg_list++;401}402403if (clr_sends) {404while (qp->s_last != qp->s_head) {405struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_last);406unsigned i;407408for (i = 0; i < wqe->wr.num_sge; i++) {409struct qib_sge *sge = &wqe->sg_list[i];410411atomic_dec(&sge->mr->refcount);412}413if (qp->ibqp.qp_type == IB_QPT_UD ||414qp->ibqp.qp_type == IB_QPT_SMI ||415qp->ibqp.qp_type == IB_QPT_GSI)416atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);417if (++qp->s_last >= qp->s_size)418qp->s_last = 0;419}420if (qp->s_rdma_mr) {421atomic_dec(&qp->s_rdma_mr->refcount);422qp->s_rdma_mr = NULL;423}424}425426if (qp->ibqp.qp_type != IB_QPT_RC)427return;428429for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {430struct qib_ack_entry *e = &qp->s_ack_queue[n];431432if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&433e->rdma_sge.mr) {434atomic_dec(&e->rdma_sge.mr->refcount);435e->rdma_sge.mr = NULL;436}437}438}439440/**441* qib_error_qp - put a QP into the error state442* @qp: the QP to put into the error state443* @err: the receive completion error to signal if a RWQE is active444*445* Flushes both send and receive work queues.446* Returns true if last WQE event should be generated.447* The QP r_lock and s_lock should be held and interrupts disabled.448* If we are already in error state, just return.449*/450int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)451{452struct qib_ibdev *dev = to_idev(qp->ibqp.device);453struct ib_wc wc;454int ret = 0;455456if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)457goto bail;458459qp->state = IB_QPS_ERR;460461if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {462qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);463del_timer(&qp->s_timer);464}465466if (qp->s_flags & QIB_S_ANY_WAIT_SEND)467qp->s_flags &= ~QIB_S_ANY_WAIT_SEND;468469spin_lock(&dev->pending_lock);470if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) {471qp->s_flags &= ~QIB_S_ANY_WAIT_IO;472list_del_init(&qp->iowait);473}474spin_unlock(&dev->pending_lock);475476if (!(qp->s_flags & QIB_S_BUSY)) {477qp->s_hdrwords = 0;478if (qp->s_rdma_mr) {479atomic_dec(&qp->s_rdma_mr->refcount);480qp->s_rdma_mr = NULL;481}482if (qp->s_tx) {483qib_put_txreq(qp->s_tx);484qp->s_tx = NULL;485}486}487488/* Schedule the sending tasklet to drain the send work queue. */489if (qp->s_last != qp->s_head)490qib_schedule_send(qp);491492clear_mr_refs(qp, 0);493494memset(&wc, 0, sizeof(wc));495wc.qp = &qp->ibqp;496wc.opcode = IB_WC_RECV;497498if (test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) {499wc.wr_id = qp->r_wr_id;500wc.status = err;501qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);502}503wc.status = IB_WC_WR_FLUSH_ERR;504505if (qp->r_rq.wq) {506struct qib_rwq *wq;507u32 head;508u32 tail;509510spin_lock(&qp->r_rq.lock);511512/* sanity check pointers before trusting them */513wq = qp->r_rq.wq;514head = wq->head;515if (head >= qp->r_rq.size)516head = 0;517tail = wq->tail;518if (tail >= qp->r_rq.size)519tail = 0;520while (tail != head) {521wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;522if (++tail >= qp->r_rq.size)523tail = 0;524qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);525}526wq->tail = tail;527528spin_unlock(&qp->r_rq.lock);529} else if (qp->ibqp.event_handler)530ret = 1;531532bail:533return ret;534}535536/**537* qib_modify_qp - modify the attributes of a queue pair538* @ibqp: the queue pair who's attributes we're modifying539* @attr: the new attributes540* @attr_mask: the mask of attributes to modify541* @udata: user data for libibverbs.so542*543* Returns 0 on success, otherwise returns an errno.544*/545int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,546int attr_mask, struct ib_udata *udata)547{548struct qib_ibdev *dev = to_idev(ibqp->device);549struct qib_qp *qp = to_iqp(ibqp);550enum ib_qp_state cur_state, new_state;551struct ib_event ev;552int lastwqe = 0;553int mig = 0;554int ret;555u32 pmtu = 0; /* for gcc warning only */556557spin_lock_irq(&qp->r_lock);558spin_lock(&qp->s_lock);559560cur_state = attr_mask & IB_QP_CUR_STATE ?561attr->cur_qp_state : qp->state;562new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;563564if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,565attr_mask))566goto inval;567568if (attr_mask & IB_QP_AV) {569if (attr->ah_attr.dlid >= QIB_MULTICAST_LID_BASE)570goto inval;571if (qib_check_ah(qp->ibqp.device, &attr->ah_attr))572goto inval;573}574575if (attr_mask & IB_QP_ALT_PATH) {576if (attr->alt_ah_attr.dlid >= QIB_MULTICAST_LID_BASE)577goto inval;578if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr))579goto inval;580if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev)))581goto inval;582}583584if (attr_mask & IB_QP_PKEY_INDEX)585if (attr->pkey_index >= qib_get_npkeys(dd_from_dev(dev)))586goto inval;587588if (attr_mask & IB_QP_MIN_RNR_TIMER)589if (attr->min_rnr_timer > 31)590goto inval;591592if (attr_mask & IB_QP_PORT)593if (qp->ibqp.qp_type == IB_QPT_SMI ||594qp->ibqp.qp_type == IB_QPT_GSI ||595attr->port_num == 0 ||596attr->port_num > ibqp->device->phys_port_cnt)597goto inval;598599if (attr_mask & IB_QP_DEST_QPN)600if (attr->dest_qp_num > QIB_QPN_MASK)601goto inval;602603if (attr_mask & IB_QP_RETRY_CNT)604if (attr->retry_cnt > 7)605goto inval;606607if (attr_mask & IB_QP_RNR_RETRY)608if (attr->rnr_retry > 7)609goto inval;610611/*612* Don't allow invalid path_mtu values. OK to set greater613* than the active mtu (or even the max_cap, if we have tuned614* that to a small mtu. We'll set qp->path_mtu615* to the lesser of requested attribute mtu and active,616* for packetizing messages.617* Note that the QP port has to be set in INIT and MTU in RTR.618*/619if (attr_mask & IB_QP_PATH_MTU) {620struct qib_devdata *dd = dd_from_dev(dev);621int mtu, pidx = qp->port_num - 1;622623mtu = ib_mtu_enum_to_int(attr->path_mtu);624if (mtu == -1)625goto inval;626if (mtu > dd->pport[pidx].ibmtu) {627switch (dd->pport[pidx].ibmtu) {628case 4096:629pmtu = IB_MTU_4096;630break;631case 2048:632pmtu = IB_MTU_2048;633break;634case 1024:635pmtu = IB_MTU_1024;636break;637case 512:638pmtu = IB_MTU_512;639break;640case 256:641pmtu = IB_MTU_256;642break;643default:644pmtu = IB_MTU_2048;645}646} else647pmtu = attr->path_mtu;648}649650if (attr_mask & IB_QP_PATH_MIG_STATE) {651if (attr->path_mig_state == IB_MIG_REARM) {652if (qp->s_mig_state == IB_MIG_ARMED)653goto inval;654if (new_state != IB_QPS_RTS)655goto inval;656} else if (attr->path_mig_state == IB_MIG_MIGRATED) {657if (qp->s_mig_state == IB_MIG_REARM)658goto inval;659if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)660goto inval;661if (qp->s_mig_state == IB_MIG_ARMED)662mig = 1;663} else664goto inval;665}666667if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)668if (attr->max_dest_rd_atomic > QIB_MAX_RDMA_ATOMIC)669goto inval;670671switch (new_state) {672case IB_QPS_RESET:673if (qp->state != IB_QPS_RESET) {674qp->state = IB_QPS_RESET;675spin_lock(&dev->pending_lock);676if (!list_empty(&qp->iowait))677list_del_init(&qp->iowait);678spin_unlock(&dev->pending_lock);679qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT);680spin_unlock(&qp->s_lock);681spin_unlock_irq(&qp->r_lock);682/* Stop the sending work queue and retry timer */683cancel_work_sync(&qp->s_work);684del_timer_sync(&qp->s_timer);685wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));686if (qp->s_tx) {687qib_put_txreq(qp->s_tx);688qp->s_tx = NULL;689}690remove_qp(dev, qp);691wait_event(qp->wait, !atomic_read(&qp->refcount));692spin_lock_irq(&qp->r_lock);693spin_lock(&qp->s_lock);694clear_mr_refs(qp, 1);695qib_reset_qp(qp, ibqp->qp_type);696}697break;698699case IB_QPS_RTR:700/* Allow event to retrigger if QP set to RTR more than once */701qp->r_flags &= ~QIB_R_COMM_EST;702qp->state = new_state;703break;704705case IB_QPS_SQD:706qp->s_draining = qp->s_last != qp->s_cur;707qp->state = new_state;708break;709710case IB_QPS_SQE:711if (qp->ibqp.qp_type == IB_QPT_RC)712goto inval;713qp->state = new_state;714break;715716case IB_QPS_ERR:717lastwqe = qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);718break;719720default:721qp->state = new_state;722break;723}724725if (attr_mask & IB_QP_PKEY_INDEX)726qp->s_pkey_index = attr->pkey_index;727728if (attr_mask & IB_QP_PORT)729qp->port_num = attr->port_num;730731if (attr_mask & IB_QP_DEST_QPN)732qp->remote_qpn = attr->dest_qp_num;733734if (attr_mask & IB_QP_SQ_PSN) {735qp->s_next_psn = attr->sq_psn & QIB_PSN_MASK;736qp->s_psn = qp->s_next_psn;737qp->s_sending_psn = qp->s_next_psn;738qp->s_last_psn = qp->s_next_psn - 1;739qp->s_sending_hpsn = qp->s_last_psn;740}741742if (attr_mask & IB_QP_RQ_PSN)743qp->r_psn = attr->rq_psn & QIB_PSN_MASK;744745if (attr_mask & IB_QP_ACCESS_FLAGS)746qp->qp_access_flags = attr->qp_access_flags;747748if (attr_mask & IB_QP_AV) {749qp->remote_ah_attr = attr->ah_attr;750qp->s_srate = attr->ah_attr.static_rate;751}752753if (attr_mask & IB_QP_ALT_PATH) {754qp->alt_ah_attr = attr->alt_ah_attr;755qp->s_alt_pkey_index = attr->alt_pkey_index;756}757758if (attr_mask & IB_QP_PATH_MIG_STATE) {759qp->s_mig_state = attr->path_mig_state;760if (mig) {761qp->remote_ah_attr = qp->alt_ah_attr;762qp->port_num = qp->alt_ah_attr.port_num;763qp->s_pkey_index = qp->s_alt_pkey_index;764}765}766767if (attr_mask & IB_QP_PATH_MTU)768qp->path_mtu = pmtu;769770if (attr_mask & IB_QP_RETRY_CNT) {771qp->s_retry_cnt = attr->retry_cnt;772qp->s_retry = attr->retry_cnt;773}774775if (attr_mask & IB_QP_RNR_RETRY) {776qp->s_rnr_retry_cnt = attr->rnr_retry;777qp->s_rnr_retry = attr->rnr_retry;778}779780if (attr_mask & IB_QP_MIN_RNR_TIMER)781qp->r_min_rnr_timer = attr->min_rnr_timer;782783if (attr_mask & IB_QP_TIMEOUT)784qp->timeout = attr->timeout;785786if (attr_mask & IB_QP_QKEY)787qp->qkey = attr->qkey;788789if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)790qp->r_max_rd_atomic = attr->max_dest_rd_atomic;791792if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)793qp->s_max_rd_atomic = attr->max_rd_atomic;794795spin_unlock(&qp->s_lock);796spin_unlock_irq(&qp->r_lock);797798if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)799insert_qp(dev, qp);800801if (lastwqe) {802ev.device = qp->ibqp.device;803ev.element.qp = &qp->ibqp;804ev.event = IB_EVENT_QP_LAST_WQE_REACHED;805qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);806}807if (mig) {808ev.device = qp->ibqp.device;809ev.element.qp = &qp->ibqp;810ev.event = IB_EVENT_PATH_MIG;811qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);812}813ret = 0;814goto bail;815816inval:817spin_unlock(&qp->s_lock);818spin_unlock_irq(&qp->r_lock);819ret = -EINVAL;820821bail:822return ret;823}824825int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,826int attr_mask, struct ib_qp_init_attr *init_attr)827{828struct qib_qp *qp = to_iqp(ibqp);829830attr->qp_state = qp->state;831attr->cur_qp_state = attr->qp_state;832attr->path_mtu = qp->path_mtu;833attr->path_mig_state = qp->s_mig_state;834attr->qkey = qp->qkey;835attr->rq_psn = qp->r_psn & QIB_PSN_MASK;836attr->sq_psn = qp->s_next_psn & QIB_PSN_MASK;837attr->dest_qp_num = qp->remote_qpn;838attr->qp_access_flags = qp->qp_access_flags;839attr->cap.max_send_wr = qp->s_size - 1;840attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;841attr->cap.max_send_sge = qp->s_max_sge;842attr->cap.max_recv_sge = qp->r_rq.max_sge;843attr->cap.max_inline_data = 0;844attr->ah_attr = qp->remote_ah_attr;845attr->alt_ah_attr = qp->alt_ah_attr;846attr->pkey_index = qp->s_pkey_index;847attr->alt_pkey_index = qp->s_alt_pkey_index;848attr->en_sqd_async_notify = 0;849attr->sq_draining = qp->s_draining;850attr->max_rd_atomic = qp->s_max_rd_atomic;851attr->max_dest_rd_atomic = qp->r_max_rd_atomic;852attr->min_rnr_timer = qp->r_min_rnr_timer;853attr->port_num = qp->port_num;854attr->timeout = qp->timeout;855attr->retry_cnt = qp->s_retry_cnt;856attr->rnr_retry = qp->s_rnr_retry_cnt;857attr->alt_port_num = qp->alt_ah_attr.port_num;858attr->alt_timeout = qp->alt_timeout;859860init_attr->event_handler = qp->ibqp.event_handler;861init_attr->qp_context = qp->ibqp.qp_context;862init_attr->send_cq = qp->ibqp.send_cq;863init_attr->recv_cq = qp->ibqp.recv_cq;864init_attr->srq = qp->ibqp.srq;865init_attr->cap = attr->cap;866if (qp->s_flags & QIB_S_SIGNAL_REQ_WR)867init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;868else869init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;870init_attr->qp_type = qp->ibqp.qp_type;871init_attr->port_num = qp->port_num;872return 0;873}874875/**876* qib_compute_aeth - compute the AETH (syndrome + MSN)877* @qp: the queue pair to compute the AETH for878*879* Returns the AETH.880*/881__be32 qib_compute_aeth(struct qib_qp *qp)882{883u32 aeth = qp->r_msn & QIB_MSN_MASK;884885if (qp->ibqp.srq) {886/*887* Shared receive queues don't generate credits.888* Set the credit field to the invalid value.889*/890aeth |= QIB_AETH_CREDIT_INVAL << QIB_AETH_CREDIT_SHIFT;891} else {892u32 min, max, x;893u32 credits;894struct qib_rwq *wq = qp->r_rq.wq;895u32 head;896u32 tail;897898/* sanity check pointers before trusting them */899head = wq->head;900if (head >= qp->r_rq.size)901head = 0;902tail = wq->tail;903if (tail >= qp->r_rq.size)904tail = 0;905/*906* Compute the number of credits available (RWQEs).907* XXX Not holding the r_rq.lock here so there is a small908* chance that the pair of reads are not atomic.909*/910credits = head - tail;911if ((int)credits < 0)912credits += qp->r_rq.size;913/*914* Binary search the credit table to find the code to915* use.916*/917min = 0;918max = 31;919for (;;) {920x = (min + max) / 2;921if (credit_table[x] == credits)922break;923if (credit_table[x] > credits)924max = x;925else if (min == x)926break;927else928min = x;929}930aeth |= x << QIB_AETH_CREDIT_SHIFT;931}932return cpu_to_be32(aeth);933}934935/**936* qib_create_qp - create a queue pair for a device937* @ibpd: the protection domain who's device we create the queue pair for938* @init_attr: the attributes of the queue pair939* @udata: user data for libibverbs.so940*941* Returns the queue pair on success, otherwise returns an errno.942*943* Called by the ib_create_qp() core verbs function.944*/945struct ib_qp *qib_create_qp(struct ib_pd *ibpd,946struct ib_qp_init_attr *init_attr,947struct ib_udata *udata)948{949struct qib_qp *qp;950int err;951struct qib_swqe *swq = NULL;952struct qib_ibdev *dev;953struct qib_devdata *dd;954size_t sz;955size_t sg_list_sz;956struct ib_qp *ret;957958if (init_attr->cap.max_send_sge > ib_qib_max_sges ||959init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) {960ret = ERR_PTR(-EINVAL);961goto bail;962}963964/* Check receive queue parameters if no SRQ is specified. */965if (!init_attr->srq) {966if (init_attr->cap.max_recv_sge > ib_qib_max_sges ||967init_attr->cap.max_recv_wr > ib_qib_max_qp_wrs) {968ret = ERR_PTR(-EINVAL);969goto bail;970}971if (init_attr->cap.max_send_sge +972init_attr->cap.max_send_wr +973init_attr->cap.max_recv_sge +974init_attr->cap.max_recv_wr == 0) {975ret = ERR_PTR(-EINVAL);976goto bail;977}978}979980switch (init_attr->qp_type) {981case IB_QPT_SMI:982case IB_QPT_GSI:983if (init_attr->port_num == 0 ||984init_attr->port_num > ibpd->device->phys_port_cnt) {985ret = ERR_PTR(-EINVAL);986goto bail;987}988case IB_QPT_UC:989case IB_QPT_RC:990case IB_QPT_UD:991sz = sizeof(struct qib_sge) *992init_attr->cap.max_send_sge +993sizeof(struct qib_swqe);994swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);995if (swq == NULL) {996ret = ERR_PTR(-ENOMEM);997goto bail;998}999sz = sizeof(*qp);1000sg_list_sz = 0;1001if (init_attr->srq) {1002struct qib_srq *srq = to_isrq(init_attr->srq);10031004if (srq->rq.max_sge > 1)1005sg_list_sz = sizeof(*qp->r_sg_list) *1006(srq->rq.max_sge - 1);1007} else if (init_attr->cap.max_recv_sge > 1)1008sg_list_sz = sizeof(*qp->r_sg_list) *1009(init_attr->cap.max_recv_sge - 1);1010qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);1011if (!qp) {1012ret = ERR_PTR(-ENOMEM);1013goto bail_swq;1014}1015if (init_attr->srq)1016sz = 0;1017else {1018qp->r_rq.size = init_attr->cap.max_recv_wr + 1;1019qp->r_rq.max_sge = init_attr->cap.max_recv_sge;1020sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +1021sizeof(struct qib_rwqe);1022qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) +1023qp->r_rq.size * sz);1024if (!qp->r_rq.wq) {1025ret = ERR_PTR(-ENOMEM);1026goto bail_qp;1027}1028}10291030/*1031* ib_create_qp() will initialize qp->ibqp1032* except for qp->ibqp.qp_num.1033*/1034spin_lock_init(&qp->r_lock);1035spin_lock_init(&qp->s_lock);1036spin_lock_init(&qp->r_rq.lock);1037atomic_set(&qp->refcount, 0);1038init_waitqueue_head(&qp->wait);1039init_waitqueue_head(&qp->wait_dma);1040init_timer(&qp->s_timer);1041qp->s_timer.data = (unsigned long)qp;1042INIT_WORK(&qp->s_work, qib_do_send);1043INIT_LIST_HEAD(&qp->iowait);1044INIT_LIST_HEAD(&qp->rspwait);1045qp->state = IB_QPS_RESET;1046qp->s_wq = swq;1047qp->s_size = init_attr->cap.max_send_wr + 1;1048qp->s_max_sge = init_attr->cap.max_send_sge;1049if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)1050qp->s_flags = QIB_S_SIGNAL_REQ_WR;1051dev = to_idev(ibpd->device);1052dd = dd_from_dev(dev);1053err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,1054init_attr->port_num);1055if (err < 0) {1056ret = ERR_PTR(err);1057vfree(qp->r_rq.wq);1058goto bail_qp;1059}1060qp->ibqp.qp_num = err;1061qp->port_num = init_attr->port_num;1062qib_reset_qp(qp, init_attr->qp_type);1063break;10641065default:1066/* Don't support raw QPs */1067ret = ERR_PTR(-ENOSYS);1068goto bail;1069}10701071init_attr->cap.max_inline_data = 0;10721073/*1074* Return the address of the RWQ as the offset to mmap.1075* See qib_mmap() for details.1076*/1077if (udata && udata->outlen >= sizeof(__u64)) {1078if (!qp->r_rq.wq) {1079__u64 offset = 0;10801081err = ib_copy_to_udata(udata, &offset,1082sizeof(offset));1083if (err) {1084ret = ERR_PTR(err);1085goto bail_ip;1086}1087} else {1088u32 s = sizeof(struct qib_rwq) + qp->r_rq.size * sz;10891090qp->ip = qib_create_mmap_info(dev, s,1091ibpd->uobject->context,1092qp->r_rq.wq);1093if (!qp->ip) {1094ret = ERR_PTR(-ENOMEM);1095goto bail_ip;1096}10971098err = ib_copy_to_udata(udata, &(qp->ip->offset),1099sizeof(qp->ip->offset));1100if (err) {1101ret = ERR_PTR(err);1102goto bail_ip;1103}1104}1105}11061107spin_lock(&dev->n_qps_lock);1108if (dev->n_qps_allocated == ib_qib_max_qps) {1109spin_unlock(&dev->n_qps_lock);1110ret = ERR_PTR(-ENOMEM);1111goto bail_ip;1112}11131114dev->n_qps_allocated++;1115spin_unlock(&dev->n_qps_lock);11161117if (qp->ip) {1118spin_lock_irq(&dev->pending_lock);1119list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);1120spin_unlock_irq(&dev->pending_lock);1121}11221123ret = &qp->ibqp;1124goto bail;11251126bail_ip:1127if (qp->ip)1128kref_put(&qp->ip->ref, qib_release_mmap_info);1129else1130vfree(qp->r_rq.wq);1131free_qpn(&dev->qpn_table, qp->ibqp.qp_num);1132bail_qp:1133kfree(qp);1134bail_swq:1135vfree(swq);1136bail:1137return ret;1138}11391140/**1141* qib_destroy_qp - destroy a queue pair1142* @ibqp: the queue pair to destroy1143*1144* Returns 0 on success.1145*1146* Note that this can be called while the QP is actively sending or1147* receiving!1148*/1149int qib_destroy_qp(struct ib_qp *ibqp)1150{1151struct qib_qp *qp = to_iqp(ibqp);1152struct qib_ibdev *dev = to_idev(ibqp->device);11531154/* Make sure HW and driver activity is stopped. */1155spin_lock_irq(&qp->s_lock);1156if (qp->state != IB_QPS_RESET) {1157qp->state = IB_QPS_RESET;1158spin_lock(&dev->pending_lock);1159if (!list_empty(&qp->iowait))1160list_del_init(&qp->iowait);1161spin_unlock(&dev->pending_lock);1162qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT);1163spin_unlock_irq(&qp->s_lock);1164cancel_work_sync(&qp->s_work);1165del_timer_sync(&qp->s_timer);1166wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));1167if (qp->s_tx) {1168qib_put_txreq(qp->s_tx);1169qp->s_tx = NULL;1170}1171remove_qp(dev, qp);1172wait_event(qp->wait, !atomic_read(&qp->refcount));1173clear_mr_refs(qp, 1);1174} else1175spin_unlock_irq(&qp->s_lock);11761177/* all user's cleaned up, mark it available */1178free_qpn(&dev->qpn_table, qp->ibqp.qp_num);1179spin_lock(&dev->n_qps_lock);1180dev->n_qps_allocated--;1181spin_unlock(&dev->n_qps_lock);11821183if (qp->ip)1184kref_put(&qp->ip->ref, qib_release_mmap_info);1185else1186vfree(qp->r_rq.wq);1187vfree(qp->s_wq);1188kfree(qp);1189return 0;1190}11911192/**1193* qib_init_qpn_table - initialize the QP number table for a device1194* @qpt: the QPN table1195*/1196void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt)1197{1198spin_lock_init(&qpt->lock);1199qpt->last = 1; /* start with QPN 2 */1200qpt->nmaps = 1;1201qpt->mask = dd->qpn_mask;1202}12031204/**1205* qib_free_qpn_table - free the QP number table for a device1206* @qpt: the QPN table1207*/1208void qib_free_qpn_table(struct qib_qpn_table *qpt)1209{1210int i;12111212for (i = 0; i < ARRAY_SIZE(qpt->map); i++)1213if (qpt->map[i].page)1214free_page((unsigned long) qpt->map[i].page);1215}12161217/**1218* qib_get_credit - flush the send work queue of a QP1219* @qp: the qp who's send work queue to flush1220* @aeth: the Acknowledge Extended Transport Header1221*1222* The QP s_lock should be held.1223*/1224void qib_get_credit(struct qib_qp *qp, u32 aeth)1225{1226u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK;12271228/*1229* If the credit is invalid, we can send1230* as many packets as we like. Otherwise, we have to1231* honor the credit field.1232*/1233if (credit == QIB_AETH_CREDIT_INVAL) {1234if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) {1235qp->s_flags |= QIB_S_UNLIMITED_CREDIT;1236if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) {1237qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT;1238qib_schedule_send(qp);1239}1240}1241} else if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) {1242/* Compute new LSN (i.e., MSN + credit) */1243credit = (aeth + credit_table[credit]) & QIB_MSN_MASK;1244if (qib_cmp24(credit, qp->s_lsn) > 0) {1245qp->s_lsn = credit;1246if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) {1247qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT;1248qib_schedule_send(qp);1249}1250}1251}1252}125312541255