Path: blob/master/drivers/infiniband/hw/ipath/ipath_verbs.c
15112 views
/*1* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.2* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.3*4* This software is available to you under a choice of one of two5* licenses. You may choose to be licensed under the terms of the GNU6* General Public License (GPL) Version 2, available from the file7* COPYING in the main directory of this source tree, or the8* OpenIB.org BSD license below:9*10* Redistribution and use in source and binary forms, with or11* without modification, are permitted provided that the following12* conditions are met:13*14* - Redistributions of source code must retain the above15* copyright notice, this list of conditions and the following16* disclaimer.17*18* - Redistributions in binary form must reproduce the above19* copyright notice, this list of conditions and the following20* disclaimer in the documentation and/or other materials21* provided with the distribution.22*23* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,24* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF25* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND26* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS27* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN28* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN29* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE30* SOFTWARE.31*/3233#include <rdma/ib_mad.h>34#include <rdma/ib_user_verbs.h>35#include <linux/io.h>36#include <linux/slab.h>37#include <linux/utsname.h>38#include <linux/rculist.h>3940#include "ipath_kernel.h"41#include "ipath_verbs.h"42#include "ipath_common.h"4344static unsigned int ib_ipath_qp_table_size = 251;45module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);46MODULE_PARM_DESC(qp_table_size, "QP table size");4748unsigned int ib_ipath_lkey_table_size = 12;49module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,50S_IRUGO);51MODULE_PARM_DESC(lkey_table_size,52"LKEY table size in bits (2^n, 1 <= n <= 23)");5354static unsigned int ib_ipath_max_pds = 0xFFFF;55module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);56MODULE_PARM_DESC(max_pds,57"Maximum number of protection domains to support");5859static unsigned int ib_ipath_max_ahs = 0xFFFF;60module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);61MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");6263unsigned int ib_ipath_max_cqes = 0x2FFFF;64module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);65MODULE_PARM_DESC(max_cqes,66"Maximum number of completion queue entries to support");6768unsigned int ib_ipath_max_cqs = 0x1FFFF;69module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);70MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");7172unsigned int ib_ipath_max_qp_wrs = 0x3FFF;73module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,74S_IWUSR | S_IRUGO);75MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");7677unsigned int ib_ipath_max_qps = 16384;78module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);79MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");8081unsigned int ib_ipath_max_sges = 0x60;82module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);83MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");8485unsigned int ib_ipath_max_mcast_grps = 16384;86module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,87S_IWUSR | S_IRUGO);88MODULE_PARM_DESC(max_mcast_grps,89"Maximum number of multicast groups to support");9091unsigned int ib_ipath_max_mcast_qp_attached = 16;92module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,93uint, S_IWUSR | S_IRUGO);94MODULE_PARM_DESC(max_mcast_qp_attached,95"Maximum number of attached QPs to support");9697unsigned int ib_ipath_max_srqs = 1024;98module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);99MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");100101unsigned int ib_ipath_max_srq_sges = 128;102module_param_named(max_srq_sges, ib_ipath_max_srq_sges,103uint, S_IWUSR | S_IRUGO);104MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");105106unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;107module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,108uint, S_IWUSR | S_IRUGO);109MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");110111static unsigned int ib_ipath_disable_sma;112module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);113MODULE_PARM_DESC(disable_sma, "Disable the SMA");114115/*116* Note that it is OK to post send work requests in the SQE and ERR117* states; ipath_do_send() will process them and generate error118* completions as per IB 1.2 C10-96.119*/120const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {121[IB_QPS_RESET] = 0,122[IB_QPS_INIT] = IPATH_POST_RECV_OK,123[IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,124[IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |125IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |126IPATH_PROCESS_NEXT_SEND_OK,127[IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |128IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,129[IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |130IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,131[IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |132IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,133};134135struct ipath_ucontext {136struct ib_ucontext ibucontext;137};138139static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext140*ibucontext)141{142return container_of(ibucontext, struct ipath_ucontext, ibucontext);143}144145/*146* Translate ib_wr_opcode into ib_wc_opcode.147*/148const enum ib_wc_opcode ib_ipath_wc_opcode[] = {149[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,150[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,151[IB_WR_SEND] = IB_WC_SEND,152[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,153[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,154[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,155[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD156};157158/*159* System image GUID.160*/161static __be64 sys_image_guid;162163/**164* ipath_copy_sge - copy data to SGE memory165* @ss: the SGE state166* @data: the data to copy167* @length: the length of the data168*/169void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)170{171struct ipath_sge *sge = &ss->sge;172173while (length) {174u32 len = sge->length;175176if (len > length)177len = length;178if (len > sge->sge_length)179len = sge->sge_length;180BUG_ON(len == 0);181memcpy(sge->vaddr, data, len);182sge->vaddr += len;183sge->length -= len;184sge->sge_length -= len;185if (sge->sge_length == 0) {186if (--ss->num_sge)187*sge = *ss->sg_list++;188} else if (sge->length == 0 && sge->mr != NULL) {189if (++sge->n >= IPATH_SEGSZ) {190if (++sge->m >= sge->mr->mapsz)191break;192sge->n = 0;193}194sge->vaddr =195sge->mr->map[sge->m]->segs[sge->n].vaddr;196sge->length =197sge->mr->map[sge->m]->segs[sge->n].length;198}199data += len;200length -= len;201}202}203204/**205* ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func206* @ss: the SGE state207* @length: the number of bytes to skip208*/209void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)210{211struct ipath_sge *sge = &ss->sge;212213while (length) {214u32 len = sge->length;215216if (len > length)217len = length;218if (len > sge->sge_length)219len = sge->sge_length;220BUG_ON(len == 0);221sge->vaddr += len;222sge->length -= len;223sge->sge_length -= len;224if (sge->sge_length == 0) {225if (--ss->num_sge)226*sge = *ss->sg_list++;227} else if (sge->length == 0 && sge->mr != NULL) {228if (++sge->n >= IPATH_SEGSZ) {229if (++sge->m >= sge->mr->mapsz)230break;231sge->n = 0;232}233sge->vaddr =234sge->mr->map[sge->m]->segs[sge->n].vaddr;235sge->length =236sge->mr->map[sge->m]->segs[sge->n].length;237}238length -= len;239}240}241242/*243* Count the number of DMA descriptors needed to send length bytes of data.244* Don't modify the ipath_sge_state to get the count.245* Return zero if any of the segments is not aligned.246*/247static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)248{249struct ipath_sge *sg_list = ss->sg_list;250struct ipath_sge sge = ss->sge;251u8 num_sge = ss->num_sge;252u32 ndesc = 1; /* count the header */253254while (length) {255u32 len = sge.length;256257if (len > length)258len = length;259if (len > sge.sge_length)260len = sge.sge_length;261BUG_ON(len == 0);262if (((long) sge.vaddr & (sizeof(u32) - 1)) ||263(len != length && (len & (sizeof(u32) - 1)))) {264ndesc = 0;265break;266}267ndesc++;268sge.vaddr += len;269sge.length -= len;270sge.sge_length -= len;271if (sge.sge_length == 0) {272if (--num_sge)273sge = *sg_list++;274} else if (sge.length == 0 && sge.mr != NULL) {275if (++sge.n >= IPATH_SEGSZ) {276if (++sge.m >= sge.mr->mapsz)277break;278sge.n = 0;279}280sge.vaddr =281sge.mr->map[sge.m]->segs[sge.n].vaddr;282sge.length =283sge.mr->map[sge.m]->segs[sge.n].length;284}285length -= len;286}287return ndesc;288}289290/*291* Copy from the SGEs to the data buffer.292*/293static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,294u32 length)295{296struct ipath_sge *sge = &ss->sge;297298while (length) {299u32 len = sge->length;300301if (len > length)302len = length;303if (len > sge->sge_length)304len = sge->sge_length;305BUG_ON(len == 0);306memcpy(data, sge->vaddr, len);307sge->vaddr += len;308sge->length -= len;309sge->sge_length -= len;310if (sge->sge_length == 0) {311if (--ss->num_sge)312*sge = *ss->sg_list++;313} else if (sge->length == 0 && sge->mr != NULL) {314if (++sge->n >= IPATH_SEGSZ) {315if (++sge->m >= sge->mr->mapsz)316break;317sge->n = 0;318}319sge->vaddr =320sge->mr->map[sge->m]->segs[sge->n].vaddr;321sge->length =322sge->mr->map[sge->m]->segs[sge->n].length;323}324data += len;325length -= len;326}327}328329/**330* ipath_post_one_send - post one RC, UC, or UD send work request331* @qp: the QP to post on332* @wr: the work request to send333*/334static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)335{336struct ipath_swqe *wqe;337u32 next;338int i;339int j;340int acc;341int ret;342unsigned long flags;343struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;344345spin_lock_irqsave(&qp->s_lock, flags);346347if (qp->ibqp.qp_type != IB_QPT_SMI &&348!(dd->ipath_flags & IPATH_LINKACTIVE)) {349ret = -ENETDOWN;350goto bail;351}352353/* Check that state is OK to post send. */354if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))355goto bail_inval;356357/* IB spec says that num_sge == 0 is OK. */358if (wr->num_sge > qp->s_max_sge)359goto bail_inval;360361/*362* Don't allow RDMA reads or atomic operations on UC or363* undefined operations.364* Make sure buffer is large enough to hold the result for atomics.365*/366if (qp->ibqp.qp_type == IB_QPT_UC) {367if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)368goto bail_inval;369} else if (qp->ibqp.qp_type == IB_QPT_UD) {370/* Check UD opcode */371if (wr->opcode != IB_WR_SEND &&372wr->opcode != IB_WR_SEND_WITH_IMM)373goto bail_inval;374/* Check UD destination address PD */375if (qp->ibqp.pd != wr->wr.ud.ah->pd)376goto bail_inval;377} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)378goto bail_inval;379else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&380(wr->num_sge == 0 ||381wr->sg_list[0].length < sizeof(u64) ||382wr->sg_list[0].addr & (sizeof(u64) - 1)))383goto bail_inval;384else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)385goto bail_inval;386387next = qp->s_head + 1;388if (next >= qp->s_size)389next = 0;390if (next == qp->s_last) {391ret = -ENOMEM;392goto bail;393}394395wqe = get_swqe_ptr(qp, qp->s_head);396wqe->wr = *wr;397wqe->length = 0;398if (wr->num_sge) {399acc = wr->opcode >= IB_WR_RDMA_READ ?400IB_ACCESS_LOCAL_WRITE : 0;401for (i = 0, j = 0; i < wr->num_sge; i++) {402u32 length = wr->sg_list[i].length;403int ok;404405if (length == 0)406continue;407ok = ipath_lkey_ok(qp, &wqe->sg_list[j],408&wr->sg_list[i], acc);409if (!ok)410goto bail_inval;411wqe->length += length;412j++;413}414wqe->wr.num_sge = j;415}416if (qp->ibqp.qp_type == IB_QPT_UC ||417qp->ibqp.qp_type == IB_QPT_RC) {418if (wqe->length > 0x80000000U)419goto bail_inval;420} else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)421goto bail_inval;422wqe->ssn = qp->s_ssn++;423qp->s_head = next;424425ret = 0;426goto bail;427428bail_inval:429ret = -EINVAL;430bail:431spin_unlock_irqrestore(&qp->s_lock, flags);432return ret;433}434435/**436* ipath_post_send - post a send on a QP437* @ibqp: the QP to post the send on438* @wr: the list of work requests to post439* @bad_wr: the first bad WR is put here440*441* This may be called from interrupt context.442*/443static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,444struct ib_send_wr **bad_wr)445{446struct ipath_qp *qp = to_iqp(ibqp);447int err = 0;448449for (; wr; wr = wr->next) {450err = ipath_post_one_send(qp, wr);451if (err) {452*bad_wr = wr;453goto bail;454}455}456457/* Try to do the send work in the caller's context. */458ipath_do_send((unsigned long) qp);459460bail:461return err;462}463464/**465* ipath_post_receive - post a receive on a QP466* @ibqp: the QP to post the receive on467* @wr: the WR to post468* @bad_wr: the first bad WR is put here469*470* This may be called from interrupt context.471*/472static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,473struct ib_recv_wr **bad_wr)474{475struct ipath_qp *qp = to_iqp(ibqp);476struct ipath_rwq *wq = qp->r_rq.wq;477unsigned long flags;478int ret;479480/* Check that state is OK to post receive. */481if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {482*bad_wr = wr;483ret = -EINVAL;484goto bail;485}486487for (; wr; wr = wr->next) {488struct ipath_rwqe *wqe;489u32 next;490int i;491492if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {493*bad_wr = wr;494ret = -EINVAL;495goto bail;496}497498spin_lock_irqsave(&qp->r_rq.lock, flags);499next = wq->head + 1;500if (next >= qp->r_rq.size)501next = 0;502if (next == wq->tail) {503spin_unlock_irqrestore(&qp->r_rq.lock, flags);504*bad_wr = wr;505ret = -ENOMEM;506goto bail;507}508509wqe = get_rwqe_ptr(&qp->r_rq, wq->head);510wqe->wr_id = wr->wr_id;511wqe->num_sge = wr->num_sge;512for (i = 0; i < wr->num_sge; i++)513wqe->sg_list[i] = wr->sg_list[i];514/* Make sure queue entry is written before the head index. */515smp_wmb();516wq->head = next;517spin_unlock_irqrestore(&qp->r_rq.lock, flags);518}519ret = 0;520521bail:522return ret;523}524525/**526* ipath_qp_rcv - processing an incoming packet on a QP527* @dev: the device the packet came on528* @hdr: the packet header529* @has_grh: true if the packet has a GRH530* @data: the packet data531* @tlen: the packet length532* @qp: the QP the packet came on533*534* This is called from ipath_ib_rcv() to process an incoming packet535* for the given QP.536* Called at interrupt level.537*/538static void ipath_qp_rcv(struct ipath_ibdev *dev,539struct ipath_ib_header *hdr, int has_grh,540void *data, u32 tlen, struct ipath_qp *qp)541{542/* Check for valid receive state. */543if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {544dev->n_pkt_drops++;545return;546}547548switch (qp->ibqp.qp_type) {549case IB_QPT_SMI:550case IB_QPT_GSI:551if (ib_ipath_disable_sma)552break;553/* FALLTHROUGH */554case IB_QPT_UD:555ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);556break;557558case IB_QPT_RC:559ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);560break;561562case IB_QPT_UC:563ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);564break;565566default:567break;568}569}570571/**572* ipath_ib_rcv - process an incoming packet573* @arg: the device pointer574* @rhdr: the header of the packet575* @data: the packet data576* @tlen: the packet length577*578* This is called from ipath_kreceive() to process an incoming packet at579* interrupt level. Tlen is the length of the header + data + CRC in bytes.580*/581void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,582u32 tlen)583{584struct ipath_ib_header *hdr = rhdr;585struct ipath_other_headers *ohdr;586struct ipath_qp *qp;587u32 qp_num;588int lnh;589u8 opcode;590u16 lid;591592if (unlikely(dev == NULL))593goto bail;594595if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */596dev->rcv_errors++;597goto bail;598}599600/* Check for a valid destination LID (see ch. 7.11.1). */601lid = be16_to_cpu(hdr->lrh[1]);602if (lid < IPATH_MULTICAST_LID_BASE) {603lid &= ~((1 << dev->dd->ipath_lmc) - 1);604if (unlikely(lid != dev->dd->ipath_lid)) {605dev->rcv_errors++;606goto bail;607}608}609610/* Check for GRH */611lnh = be16_to_cpu(hdr->lrh[0]) & 3;612if (lnh == IPATH_LRH_BTH)613ohdr = &hdr->u.oth;614else if (lnh == IPATH_LRH_GRH)615ohdr = &hdr->u.l.oth;616else {617dev->rcv_errors++;618goto bail;619}620621opcode = be32_to_cpu(ohdr->bth[0]) >> 24;622dev->opstats[opcode].n_bytes += tlen;623dev->opstats[opcode].n_packets++;624625/* Get the destination QP number. */626qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;627if (qp_num == IPATH_MULTICAST_QPN) {628struct ipath_mcast *mcast;629struct ipath_mcast_qp *p;630631if (lnh != IPATH_LRH_GRH) {632dev->n_pkt_drops++;633goto bail;634}635mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);636if (mcast == NULL) {637dev->n_pkt_drops++;638goto bail;639}640dev->n_multicast_rcv++;641list_for_each_entry_rcu(p, &mcast->qp_list, list)642ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);643/*644* Notify ipath_multicast_detach() if it is waiting for us645* to finish.646*/647if (atomic_dec_return(&mcast->refcount) <= 1)648wake_up(&mcast->wait);649} else {650qp = ipath_lookup_qpn(&dev->qp_table, qp_num);651if (qp) {652dev->n_unicast_rcv++;653ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,654tlen, qp);655/*656* Notify ipath_destroy_qp() if it is waiting657* for us to finish.658*/659if (atomic_dec_and_test(&qp->refcount))660wake_up(&qp->wait);661} else662dev->n_pkt_drops++;663}664665bail:;666}667668/**669* ipath_ib_timer - verbs timer670* @arg: the device pointer671*672* This is called from ipath_do_rcv_timer() at interrupt level to check for673* QPs which need retransmits and to collect performance numbers.674*/675static void ipath_ib_timer(struct ipath_ibdev *dev)676{677struct ipath_qp *resend = NULL;678struct ipath_qp *rnr = NULL;679struct list_head *last;680struct ipath_qp *qp;681unsigned long flags;682683if (dev == NULL)684return;685686spin_lock_irqsave(&dev->pending_lock, flags);687/* Start filling the next pending queue. */688if (++dev->pending_index >= ARRAY_SIZE(dev->pending))689dev->pending_index = 0;690/* Save any requests still in the new queue, they have timed out. */691last = &dev->pending[dev->pending_index];692while (!list_empty(last)) {693qp = list_entry(last->next, struct ipath_qp, timerwait);694list_del_init(&qp->timerwait);695qp->timer_next = resend;696resend = qp;697atomic_inc(&qp->refcount);698}699last = &dev->rnrwait;700if (!list_empty(last)) {701qp = list_entry(last->next, struct ipath_qp, timerwait);702if (--qp->s_rnr_timeout == 0) {703do {704list_del_init(&qp->timerwait);705qp->timer_next = rnr;706rnr = qp;707atomic_inc(&qp->refcount);708if (list_empty(last))709break;710qp = list_entry(last->next, struct ipath_qp,711timerwait);712} while (qp->s_rnr_timeout == 0);713}714}715/*716* We should only be in the started state if pma_sample_start != 0717*/718if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&719--dev->pma_sample_start == 0) {720dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;721ipath_snapshot_counters(dev->dd, &dev->ipath_sword,722&dev->ipath_rword,723&dev->ipath_spkts,724&dev->ipath_rpkts,725&dev->ipath_xmit_wait);726}727if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {728if (dev->pma_sample_interval == 0) {729u64 ta, tb, tc, td, te;730731dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;732ipath_snapshot_counters(dev->dd, &ta, &tb,733&tc, &td, &te);734735dev->ipath_sword = ta - dev->ipath_sword;736dev->ipath_rword = tb - dev->ipath_rword;737dev->ipath_spkts = tc - dev->ipath_spkts;738dev->ipath_rpkts = td - dev->ipath_rpkts;739dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;740}741else742dev->pma_sample_interval--;743}744spin_unlock_irqrestore(&dev->pending_lock, flags);745746/* XXX What if timer fires again while this is running? */747while (resend != NULL) {748qp = resend;749resend = qp->timer_next;750751spin_lock_irqsave(&qp->s_lock, flags);752if (qp->s_last != qp->s_tail &&753ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {754dev->n_timeouts++;755ipath_restart_rc(qp, qp->s_last_psn + 1);756}757spin_unlock_irqrestore(&qp->s_lock, flags);758759/* Notify ipath_destroy_qp() if it is waiting. */760if (atomic_dec_and_test(&qp->refcount))761wake_up(&qp->wait);762}763while (rnr != NULL) {764qp = rnr;765rnr = qp->timer_next;766767spin_lock_irqsave(&qp->s_lock, flags);768if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)769ipath_schedule_send(qp);770spin_unlock_irqrestore(&qp->s_lock, flags);771772/* Notify ipath_destroy_qp() if it is waiting. */773if (atomic_dec_and_test(&qp->refcount))774wake_up(&qp->wait);775}776}777778static void update_sge(struct ipath_sge_state *ss, u32 length)779{780struct ipath_sge *sge = &ss->sge;781782sge->vaddr += length;783sge->length -= length;784sge->sge_length -= length;785if (sge->sge_length == 0) {786if (--ss->num_sge)787*sge = *ss->sg_list++;788} else if (sge->length == 0 && sge->mr != NULL) {789if (++sge->n >= IPATH_SEGSZ) {790if (++sge->m >= sge->mr->mapsz)791return;792sge->n = 0;793}794sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;795sge->length = sge->mr->map[sge->m]->segs[sge->n].length;796}797}798799#ifdef __LITTLE_ENDIAN800static inline u32 get_upper_bits(u32 data, u32 shift)801{802return data >> shift;803}804805static inline u32 set_upper_bits(u32 data, u32 shift)806{807return data << shift;808}809810static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)811{812data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);813data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);814return data;815}816#else817static inline u32 get_upper_bits(u32 data, u32 shift)818{819return data << shift;820}821822static inline u32 set_upper_bits(u32 data, u32 shift)823{824return data >> shift;825}826827static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)828{829data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);830data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);831return data;832}833#endif834835static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,836u32 length, unsigned flush_wc)837{838u32 extra = 0;839u32 data = 0;840u32 last;841842while (1) {843u32 len = ss->sge.length;844u32 off;845846if (len > length)847len = length;848if (len > ss->sge.sge_length)849len = ss->sge.sge_length;850BUG_ON(len == 0);851/* If the source address is not aligned, try to align it. */852off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);853if (off) {854u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &855~(sizeof(u32) - 1));856u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);857u32 y;858859y = sizeof(u32) - off;860if (len > y)861len = y;862if (len + extra >= sizeof(u32)) {863data |= set_upper_bits(v, extra *864BITS_PER_BYTE);865len = sizeof(u32) - extra;866if (len == length) {867last = data;868break;869}870__raw_writel(data, piobuf);871piobuf++;872extra = 0;873data = 0;874} else {875/* Clear unused upper bytes */876data |= clear_upper_bytes(v, len, extra);877if (len == length) {878last = data;879break;880}881extra += len;882}883} else if (extra) {884/* Source address is aligned. */885u32 *addr = (u32 *) ss->sge.vaddr;886int shift = extra * BITS_PER_BYTE;887int ushift = 32 - shift;888u32 l = len;889890while (l >= sizeof(u32)) {891u32 v = *addr;892893data |= set_upper_bits(v, shift);894__raw_writel(data, piobuf);895data = get_upper_bits(v, ushift);896piobuf++;897addr++;898l -= sizeof(u32);899}900/*901* We still have 'extra' number of bytes leftover.902*/903if (l) {904u32 v = *addr;905906if (l + extra >= sizeof(u32)) {907data |= set_upper_bits(v, shift);908len -= l + extra - sizeof(u32);909if (len == length) {910last = data;911break;912}913__raw_writel(data, piobuf);914piobuf++;915extra = 0;916data = 0;917} else {918/* Clear unused upper bytes */919data |= clear_upper_bytes(v, l,920extra);921if (len == length) {922last = data;923break;924}925extra += l;926}927} else if (len == length) {928last = data;929break;930}931} else if (len == length) {932u32 w;933934/*935* Need to round up for the last dword in the936* packet.937*/938w = (len + 3) >> 2;939__iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);940piobuf += w - 1;941last = ((u32 *) ss->sge.vaddr)[w - 1];942break;943} else {944u32 w = len >> 2;945946__iowrite32_copy(piobuf, ss->sge.vaddr, w);947piobuf += w;948949extra = len & (sizeof(u32) - 1);950if (extra) {951u32 v = ((u32 *) ss->sge.vaddr)[w];952953/* Clear unused upper bytes */954data = clear_upper_bytes(v, extra, 0);955}956}957update_sge(ss, len);958length -= len;959}960/* Update address before sending packet. */961update_sge(ss, length);962if (flush_wc) {963/* must flush early everything before trigger word */964ipath_flush_wc();965__raw_writel(last, piobuf);966/* be sure trigger word is written */967ipath_flush_wc();968} else969__raw_writel(last, piobuf);970}971972/*973* Convert IB rate to delay multiplier.974*/975unsigned ipath_ib_rate_to_mult(enum ib_rate rate)976{977switch (rate) {978case IB_RATE_2_5_GBPS: return 8;979case IB_RATE_5_GBPS: return 4;980case IB_RATE_10_GBPS: return 2;981case IB_RATE_20_GBPS: return 1;982default: return 0;983}984}985986/*987* Convert delay multiplier to IB rate988*/989static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)990{991switch (mult) {992case 8: return IB_RATE_2_5_GBPS;993case 4: return IB_RATE_5_GBPS;994case 2: return IB_RATE_10_GBPS;995case 1: return IB_RATE_20_GBPS;996default: return IB_RATE_PORT_CURRENT;997}998}9991000static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)1001{1002struct ipath_verbs_txreq *tx = NULL;1003unsigned long flags;10041005spin_lock_irqsave(&dev->pending_lock, flags);1006if (!list_empty(&dev->txreq_free)) {1007struct list_head *l = dev->txreq_free.next;10081009list_del(l);1010tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);1011}1012spin_unlock_irqrestore(&dev->pending_lock, flags);1013return tx;1014}10151016static inline void put_txreq(struct ipath_ibdev *dev,1017struct ipath_verbs_txreq *tx)1018{1019unsigned long flags;10201021spin_lock_irqsave(&dev->pending_lock, flags);1022list_add(&tx->txreq.list, &dev->txreq_free);1023spin_unlock_irqrestore(&dev->pending_lock, flags);1024}10251026static void sdma_complete(void *cookie, int status)1027{1028struct ipath_verbs_txreq *tx = cookie;1029struct ipath_qp *qp = tx->qp;1030struct ipath_ibdev *dev = to_idev(qp->ibqp.device);1031unsigned long flags;1032enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?1033IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;10341035if (atomic_dec_and_test(&qp->s_dma_busy)) {1036spin_lock_irqsave(&qp->s_lock, flags);1037if (tx->wqe)1038ipath_send_complete(qp, tx->wqe, ibs);1039if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&1040qp->s_last != qp->s_head) ||1041(qp->s_flags & IPATH_S_WAIT_DMA))1042ipath_schedule_send(qp);1043spin_unlock_irqrestore(&qp->s_lock, flags);1044wake_up(&qp->wait_dma);1045} else if (tx->wqe) {1046spin_lock_irqsave(&qp->s_lock, flags);1047ipath_send_complete(qp, tx->wqe, ibs);1048spin_unlock_irqrestore(&qp->s_lock, flags);1049}10501051if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)1052kfree(tx->txreq.map_addr);1053put_txreq(dev, tx);10541055if (atomic_dec_and_test(&qp->refcount))1056wake_up(&qp->wait);1057}10581059static void decrement_dma_busy(struct ipath_qp *qp)1060{1061unsigned long flags;10621063if (atomic_dec_and_test(&qp->s_dma_busy)) {1064spin_lock_irqsave(&qp->s_lock, flags);1065if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&1066qp->s_last != qp->s_head) ||1067(qp->s_flags & IPATH_S_WAIT_DMA))1068ipath_schedule_send(qp);1069spin_unlock_irqrestore(&qp->s_lock, flags);1070wake_up(&qp->wait_dma);1071}1072}10731074/*1075* Compute the number of clock cycles of delay before sending the next packet.1076* The multipliers reflect the number of clocks for the fastest rate so1077* one tick at 4xDDR is 8 ticks at 1xSDR.1078* If the destination port will take longer to receive a packet than1079* the outgoing link can send it, we need to delay sending the next packet1080* by the difference in time it takes the receiver to receive and the sender1081* to send this packet.1082* Note that this delay is always correct for UC and RC but not always1083* optimal for UD. For UD, the destination HCA can be different for each1084* packet, in which case, we could send packets to a different destination1085* while "waiting" for the delay. The overhead for doing this without1086* HW support is more than just paying the cost of delaying some packets1087* unnecessarily.1088*/1089static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)1090{1091return (rcv_mult > snd_mult) ?1092(plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;1093}10941095static int ipath_verbs_send_dma(struct ipath_qp *qp,1096struct ipath_ib_header *hdr, u32 hdrwords,1097struct ipath_sge_state *ss, u32 len,1098u32 plen, u32 dwords)1099{1100struct ipath_ibdev *dev = to_idev(qp->ibqp.device);1101struct ipath_devdata *dd = dev->dd;1102struct ipath_verbs_txreq *tx;1103u32 *piobuf;1104u32 control;1105u32 ndesc;1106int ret;11071108tx = qp->s_tx;1109if (tx) {1110qp->s_tx = NULL;1111/* resend previously constructed packet */1112atomic_inc(&qp->s_dma_busy);1113ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);1114if (ret) {1115qp->s_tx = tx;1116decrement_dma_busy(qp);1117}1118goto bail;1119}11201121tx = get_txreq(dev);1122if (!tx) {1123ret = -EBUSY;1124goto bail;1125}11261127/*1128* Get the saved delay count we computed for the previous packet1129* and save the delay count for this packet to be used next time1130* we get here.1131*/1132control = qp->s_pkt_delay;1133qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);11341135tx->qp = qp;1136atomic_inc(&qp->refcount);1137tx->wqe = qp->s_wqe;1138tx->txreq.callback = sdma_complete;1139tx->txreq.callback_cookie = tx;1140tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |1141IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;1142if (plen + 1 >= IPATH_SMALLBUF_DWORDS)1143tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;11441145/* VL15 packets bypass credit check */1146if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {1147control |= 1ULL << 31;1148tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;1149}11501151if (len) {1152/*1153* Don't try to DMA if it takes more descriptors than1154* the queue holds.1155*/1156ndesc = ipath_count_sge(ss, len);1157if (ndesc >= dd->ipath_sdma_descq_cnt)1158ndesc = 0;1159} else1160ndesc = 1;1161if (ndesc) {1162tx->hdr.pbc[0] = cpu_to_le32(plen);1163tx->hdr.pbc[1] = cpu_to_le32(control);1164memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);1165tx->txreq.sg_count = ndesc;1166tx->map_len = (hdrwords + 2) << 2;1167tx->txreq.map_addr = &tx->hdr;1168atomic_inc(&qp->s_dma_busy);1169ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);1170if (ret) {1171/* save ss and length in dwords */1172tx->ss = ss;1173tx->len = dwords;1174qp->s_tx = tx;1175decrement_dma_busy(qp);1176}1177goto bail;1178}11791180/* Allocate a buffer and copy the header and payload to it. */1181tx->map_len = (plen + 1) << 2;1182piobuf = kmalloc(tx->map_len, GFP_ATOMIC);1183if (unlikely(piobuf == NULL)) {1184ret = -EBUSY;1185goto err_tx;1186}1187tx->txreq.map_addr = piobuf;1188tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;1189tx->txreq.sg_count = 1;11901191*piobuf++ = (__force u32) cpu_to_le32(plen);1192*piobuf++ = (__force u32) cpu_to_le32(control);1193memcpy(piobuf, hdr, hdrwords << 2);1194ipath_copy_from_sge(piobuf + hdrwords, ss, len);11951196atomic_inc(&qp->s_dma_busy);1197ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);1198/*1199* If we couldn't queue the DMA request, save the info1200* and try again later rather than destroying the1201* buffer and undoing the side effects of the copy.1202*/1203if (ret) {1204tx->ss = NULL;1205tx->len = 0;1206qp->s_tx = tx;1207decrement_dma_busy(qp);1208}1209dev->n_unaligned++;1210goto bail;12111212err_tx:1213if (atomic_dec_and_test(&qp->refcount))1214wake_up(&qp->wait);1215put_txreq(dev, tx);1216bail:1217return ret;1218}12191220static int ipath_verbs_send_pio(struct ipath_qp *qp,1221struct ipath_ib_header *ibhdr, u32 hdrwords,1222struct ipath_sge_state *ss, u32 len,1223u32 plen, u32 dwords)1224{1225struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;1226u32 *hdr = (u32 *) ibhdr;1227u32 __iomem *piobuf;1228unsigned flush_wc;1229u32 control;1230int ret;1231unsigned long flags;12321233piobuf = ipath_getpiobuf(dd, plen, NULL);1234if (unlikely(piobuf == NULL)) {1235ret = -EBUSY;1236goto bail;1237}12381239/*1240* Get the saved delay count we computed for the previous packet1241* and save the delay count for this packet to be used next time1242* we get here.1243*/1244control = qp->s_pkt_delay;1245qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);12461247/* VL15 packets bypass credit check */1248if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)1249control |= 1ULL << 31;12501251/*1252* Write the length to the control qword plus any needed flags.1253* We have to flush after the PBC for correctness on some cpus1254* or WC buffer can be written out of order.1255*/1256writeq(((u64) control << 32) | plen, piobuf);1257piobuf += 2;12581259flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;1260if (len == 0) {1261/*1262* If there is just the header portion, must flush before1263* writing last word of header for correctness, and after1264* the last header word (trigger word).1265*/1266if (flush_wc) {1267ipath_flush_wc();1268__iowrite32_copy(piobuf, hdr, hdrwords - 1);1269ipath_flush_wc();1270__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);1271ipath_flush_wc();1272} else1273__iowrite32_copy(piobuf, hdr, hdrwords);1274goto done;1275}12761277if (flush_wc)1278ipath_flush_wc();1279__iowrite32_copy(piobuf, hdr, hdrwords);1280piobuf += hdrwords;12811282/* The common case is aligned and contained in one segment. */1283if (likely(ss->num_sge == 1 && len <= ss->sge.length &&1284!((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {1285u32 *addr = (u32 *) ss->sge.vaddr;12861287/* Update address before sending packet. */1288update_sge(ss, len);1289if (flush_wc) {1290__iowrite32_copy(piobuf, addr, dwords - 1);1291/* must flush early everything before trigger word */1292ipath_flush_wc();1293__raw_writel(addr[dwords - 1], piobuf + dwords - 1);1294/* be sure trigger word is written */1295ipath_flush_wc();1296} else1297__iowrite32_copy(piobuf, addr, dwords);1298goto done;1299}1300copy_io(piobuf, ss, len, flush_wc);1301done:1302if (qp->s_wqe) {1303spin_lock_irqsave(&qp->s_lock, flags);1304ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);1305spin_unlock_irqrestore(&qp->s_lock, flags);1306}1307ret = 0;1308bail:1309return ret;1310}13111312/**1313* ipath_verbs_send - send a packet1314* @qp: the QP to send on1315* @hdr: the packet header1316* @hdrwords: the number of 32-bit words in the header1317* @ss: the SGE to send1318* @len: the length of the packet in bytes1319*/1320int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,1321u32 hdrwords, struct ipath_sge_state *ss, u32 len)1322{1323struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;1324u32 plen;1325int ret;1326u32 dwords = (len + 3) >> 2;13271328/*1329* Calculate the send buffer trigger address.1330* The +1 counts for the pbc control dword following the pbc length.1331*/1332plen = hdrwords + dwords + 1;13331334/*1335* VL15 packets (IB_QPT_SMI) will always use PIO, so we1336* can defer SDMA restart until link goes ACTIVE without1337* worrying about just how we got there.1338*/1339if (qp->ibqp.qp_type == IB_QPT_SMI ||1340!(dd->ipath_flags & IPATH_HAS_SEND_DMA))1341ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,1342plen, dwords);1343else1344ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,1345plen, dwords);13461347return ret;1348}13491350int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,1351u64 *rwords, u64 *spkts, u64 *rpkts,1352u64 *xmit_wait)1353{1354int ret;13551356if (!(dd->ipath_flags & IPATH_INITTED)) {1357/* no hardware, freeze, etc. */1358ret = -EINVAL;1359goto bail;1360}1361*swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);1362*rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);1363*spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);1364*rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);1365*xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);13661367ret = 0;13681369bail:1370return ret;1371}13721373/**1374* ipath_get_counters - get various chip counters1375* @dd: the infinipath device1376* @cntrs: counters are placed here1377*1378* Return the counters needed by recv_pma_get_portcounters().1379*/1380int ipath_get_counters(struct ipath_devdata *dd,1381struct ipath_verbs_counters *cntrs)1382{1383struct ipath_cregs const *crp = dd->ipath_cregs;1384int ret;13851386if (!(dd->ipath_flags & IPATH_INITTED)) {1387/* no hardware, freeze, etc. */1388ret = -EINVAL;1389goto bail;1390}1391cntrs->symbol_error_counter =1392ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);1393cntrs->link_error_recovery_counter =1394ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);1395/*1396* The link downed counter counts when the other side downs the1397* connection. We add in the number of times we downed the link1398* due to local link integrity errors to compensate.1399*/1400cntrs->link_downed_counter =1401ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);1402cntrs->port_rcv_errors =1403ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +1404ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +1405ipath_snap_cntr(dd, crp->cr_portovflcnt) +1406ipath_snap_cntr(dd, crp->cr_err_rlencnt) +1407ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +1408ipath_snap_cntr(dd, crp->cr_errlinkcnt) +1409ipath_snap_cntr(dd, crp->cr_erricrccnt) +1410ipath_snap_cntr(dd, crp->cr_errvcrccnt) +1411ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +1412ipath_snap_cntr(dd, crp->cr_badformatcnt) +1413dd->ipath_rxfc_unsupvl_errs;1414if (crp->cr_rxotherlocalphyerrcnt)1415cntrs->port_rcv_errors +=1416ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);1417if (crp->cr_rxvlerrcnt)1418cntrs->port_rcv_errors +=1419ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);1420cntrs->port_rcv_remphys_errors =1421ipath_snap_cntr(dd, crp->cr_rcvebpcnt);1422cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);1423cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);1424cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);1425cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);1426cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);1427cntrs->local_link_integrity_errors =1428crp->cr_locallinkintegrityerrcnt ?1429ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :1430((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?1431dd->ipath_lli_errs : dd->ipath_lli_errors);1432cntrs->excessive_buffer_overrun_errors =1433crp->cr_excessbufferovflcnt ?1434ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :1435dd->ipath_overrun_thresh_errs;1436cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?1437ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;14381439ret = 0;14401441bail:1442return ret;1443}14441445/**1446* ipath_ib_piobufavail - callback when a PIO buffer is available1447* @arg: the device pointer1448*1449* This is called from ipath_intr() at interrupt level when a PIO buffer is1450* available after ipath_verbs_send() returned an error that no buffers were1451* available. Return 1 if we consumed all the PIO buffers and we still have1452* QPs waiting for buffers (for now, just restart the send tasklet and1453* return zero).1454*/1455int ipath_ib_piobufavail(struct ipath_ibdev *dev)1456{1457struct list_head *list;1458struct ipath_qp *qplist;1459struct ipath_qp *qp;1460unsigned long flags;14611462if (dev == NULL)1463goto bail;14641465list = &dev->piowait;1466qplist = NULL;14671468spin_lock_irqsave(&dev->pending_lock, flags);1469while (!list_empty(list)) {1470qp = list_entry(list->next, struct ipath_qp, piowait);1471list_del_init(&qp->piowait);1472qp->pio_next = qplist;1473qplist = qp;1474atomic_inc(&qp->refcount);1475}1476spin_unlock_irqrestore(&dev->pending_lock, flags);14771478while (qplist != NULL) {1479qp = qplist;1480qplist = qp->pio_next;14811482spin_lock_irqsave(&qp->s_lock, flags);1483if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)1484ipath_schedule_send(qp);1485spin_unlock_irqrestore(&qp->s_lock, flags);14861487/* Notify ipath_destroy_qp() if it is waiting. */1488if (atomic_dec_and_test(&qp->refcount))1489wake_up(&qp->wait);1490}14911492bail:1493return 0;1494}14951496static int ipath_query_device(struct ib_device *ibdev,1497struct ib_device_attr *props)1498{1499struct ipath_ibdev *dev = to_idev(ibdev);15001501memset(props, 0, sizeof(*props));15021503props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |1504IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |1505IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |1506IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;1507props->page_size_cap = PAGE_SIZE;1508props->vendor_id =1509IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;1510props->vendor_part_id = dev->dd->ipath_deviceid;1511props->hw_ver = dev->dd->ipath_pcirev;15121513props->sys_image_guid = dev->sys_image_guid;15141515props->max_mr_size = ~0ull;1516props->max_qp = ib_ipath_max_qps;1517props->max_qp_wr = ib_ipath_max_qp_wrs;1518props->max_sge = ib_ipath_max_sges;1519props->max_cq = ib_ipath_max_cqs;1520props->max_ah = ib_ipath_max_ahs;1521props->max_cqe = ib_ipath_max_cqes;1522props->max_mr = dev->lk_table.max;1523props->max_fmr = dev->lk_table.max;1524props->max_map_per_fmr = 32767;1525props->max_pd = ib_ipath_max_pds;1526props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;1527props->max_qp_init_rd_atom = 255;1528/* props->max_res_rd_atom */1529props->max_srq = ib_ipath_max_srqs;1530props->max_srq_wr = ib_ipath_max_srq_wrs;1531props->max_srq_sge = ib_ipath_max_srq_sges;1532/* props->local_ca_ack_delay */1533props->atomic_cap = IB_ATOMIC_GLOB;1534props->max_pkeys = ipath_get_npkeys(dev->dd);1535props->max_mcast_grp = ib_ipath_max_mcast_grps;1536props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;1537props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *1538props->max_mcast_grp;15391540return 0;1541}15421543const u8 ipath_cvt_physportstate[32] = {1544[INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,1545[INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,1546[INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,1547[INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,1548[INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,1549[INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,1550[INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =1551IB_PHYSPORTSTATE_CFG_TRAIN,1552[INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =1553IB_PHYSPORTSTATE_CFG_TRAIN,1554[INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =1555IB_PHYSPORTSTATE_CFG_TRAIN,1556[INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,1557[INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =1558IB_PHYSPORTSTATE_LINK_ERR_RECOVER,1559[INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =1560IB_PHYSPORTSTATE_LINK_ERR_RECOVER,1561[INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =1562IB_PHYSPORTSTATE_LINK_ERR_RECOVER,1563[0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,1564[0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,1565[0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,1566[0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,1567[0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,1568[0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,1569[0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,1570[0x17] = IB_PHYSPORTSTATE_CFG_TRAIN1571};15721573u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)1574{1575return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);1576}15771578static int ipath_query_port(struct ib_device *ibdev,1579u8 port, struct ib_port_attr *props)1580{1581struct ipath_ibdev *dev = to_idev(ibdev);1582struct ipath_devdata *dd = dev->dd;1583enum ib_mtu mtu;1584u16 lid = dd->ipath_lid;1585u64 ibcstat;15861587memset(props, 0, sizeof(*props));1588props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);1589props->lmc = dd->ipath_lmc;1590props->sm_lid = dev->sm_lid;1591props->sm_sl = dev->sm_sl;1592ibcstat = dd->ipath_lastibcstat;1593/* map LinkState to IB portinfo values. */1594props->state = ipath_ib_linkstate(dd, ibcstat) + 1;15951596/* See phys_state_show() */1597props->phys_state = /* MEA: assumes shift == 0 */1598ipath_cvt_physportstate[dd->ipath_lastibcstat &1599dd->ibcs_lts_mask];1600props->port_cap_flags = dev->port_cap_flags;1601props->gid_tbl_len = 1;1602props->max_msg_sz = 0x80000000;1603props->pkey_tbl_len = ipath_get_npkeys(dd);1604props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -1605dev->z_pkey_violations;1606props->qkey_viol_cntr = dev->qkey_violations;1607props->active_width = dd->ipath_link_width_active;1608/* See rate_show() */1609props->active_speed = dd->ipath_link_speed_active;1610props->max_vl_num = 1; /* VLCap = VL0 */1611props->init_type_reply = 0;16121613props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;1614switch (dd->ipath_ibmtu) {1615case 4096:1616mtu = IB_MTU_4096;1617break;1618case 2048:1619mtu = IB_MTU_2048;1620break;1621case 1024:1622mtu = IB_MTU_1024;1623break;1624case 512:1625mtu = IB_MTU_512;1626break;1627case 256:1628mtu = IB_MTU_256;1629break;1630default:1631mtu = IB_MTU_2048;1632}1633props->active_mtu = mtu;1634props->subnet_timeout = dev->subnet_timeout;16351636return 0;1637}16381639static int ipath_modify_device(struct ib_device *device,1640int device_modify_mask,1641struct ib_device_modify *device_modify)1642{1643int ret;16441645if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |1646IB_DEVICE_MODIFY_NODE_DESC)) {1647ret = -EOPNOTSUPP;1648goto bail;1649}16501651if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)1652memcpy(device->node_desc, device_modify->node_desc, 64);16531654if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)1655to_idev(device)->sys_image_guid =1656cpu_to_be64(device_modify->sys_image_guid);16571658ret = 0;16591660bail:1661return ret;1662}16631664static int ipath_modify_port(struct ib_device *ibdev,1665u8 port, int port_modify_mask,1666struct ib_port_modify *props)1667{1668struct ipath_ibdev *dev = to_idev(ibdev);16691670dev->port_cap_flags |= props->set_port_cap_mask;1671dev->port_cap_flags &= ~props->clr_port_cap_mask;1672if (port_modify_mask & IB_PORT_SHUTDOWN)1673ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);1674if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)1675dev->qkey_violations = 0;1676return 0;1677}16781679static int ipath_query_gid(struct ib_device *ibdev, u8 port,1680int index, union ib_gid *gid)1681{1682struct ipath_ibdev *dev = to_idev(ibdev);1683int ret;16841685if (index >= 1) {1686ret = -EINVAL;1687goto bail;1688}1689gid->global.subnet_prefix = dev->gid_prefix;1690gid->global.interface_id = dev->dd->ipath_guid;16911692ret = 0;16931694bail:1695return ret;1696}16971698static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,1699struct ib_ucontext *context,1700struct ib_udata *udata)1701{1702struct ipath_ibdev *dev = to_idev(ibdev);1703struct ipath_pd *pd;1704struct ib_pd *ret;17051706/*1707* This is actually totally arbitrary. Some correctness tests1708* assume there's a maximum number of PDs that can be allocated.1709* We don't actually have this limit, but we fail the test if1710* we allow allocations of more than we report for this value.1711*/17121713pd = kmalloc(sizeof *pd, GFP_KERNEL);1714if (!pd) {1715ret = ERR_PTR(-ENOMEM);1716goto bail;1717}17181719spin_lock(&dev->n_pds_lock);1720if (dev->n_pds_allocated == ib_ipath_max_pds) {1721spin_unlock(&dev->n_pds_lock);1722kfree(pd);1723ret = ERR_PTR(-ENOMEM);1724goto bail;1725}17261727dev->n_pds_allocated++;1728spin_unlock(&dev->n_pds_lock);17291730/* ib_alloc_pd() will initialize pd->ibpd. */1731pd->user = udata != NULL;17321733ret = &pd->ibpd;17341735bail:1736return ret;1737}17381739static int ipath_dealloc_pd(struct ib_pd *ibpd)1740{1741struct ipath_pd *pd = to_ipd(ibpd);1742struct ipath_ibdev *dev = to_idev(ibpd->device);17431744spin_lock(&dev->n_pds_lock);1745dev->n_pds_allocated--;1746spin_unlock(&dev->n_pds_lock);17471748kfree(pd);17491750return 0;1751}17521753/**1754* ipath_create_ah - create an address handle1755* @pd: the protection domain1756* @ah_attr: the attributes of the AH1757*1758* This may be called from interrupt context.1759*/1760static struct ib_ah *ipath_create_ah(struct ib_pd *pd,1761struct ib_ah_attr *ah_attr)1762{1763struct ipath_ah *ah;1764struct ib_ah *ret;1765struct ipath_ibdev *dev = to_idev(pd->device);1766unsigned long flags;17671768/* A multicast address requires a GRH (see ch. 8.4.1). */1769if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&1770ah_attr->dlid != IPATH_PERMISSIVE_LID &&1771!(ah_attr->ah_flags & IB_AH_GRH)) {1772ret = ERR_PTR(-EINVAL);1773goto bail;1774}17751776if (ah_attr->dlid == 0) {1777ret = ERR_PTR(-EINVAL);1778goto bail;1779}17801781if (ah_attr->port_num < 1 ||1782ah_attr->port_num > pd->device->phys_port_cnt) {1783ret = ERR_PTR(-EINVAL);1784goto bail;1785}17861787ah = kmalloc(sizeof *ah, GFP_ATOMIC);1788if (!ah) {1789ret = ERR_PTR(-ENOMEM);1790goto bail;1791}17921793spin_lock_irqsave(&dev->n_ahs_lock, flags);1794if (dev->n_ahs_allocated == ib_ipath_max_ahs) {1795spin_unlock_irqrestore(&dev->n_ahs_lock, flags);1796kfree(ah);1797ret = ERR_PTR(-ENOMEM);1798goto bail;1799}18001801dev->n_ahs_allocated++;1802spin_unlock_irqrestore(&dev->n_ahs_lock, flags);18031804/* ib_create_ah() will initialize ah->ibah. */1805ah->attr = *ah_attr;1806ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);18071808ret = &ah->ibah;18091810bail:1811return ret;1812}18131814/**1815* ipath_destroy_ah - destroy an address handle1816* @ibah: the AH to destroy1817*1818* This may be called from interrupt context.1819*/1820static int ipath_destroy_ah(struct ib_ah *ibah)1821{1822struct ipath_ibdev *dev = to_idev(ibah->device);1823struct ipath_ah *ah = to_iah(ibah);1824unsigned long flags;18251826spin_lock_irqsave(&dev->n_ahs_lock, flags);1827dev->n_ahs_allocated--;1828spin_unlock_irqrestore(&dev->n_ahs_lock, flags);18291830kfree(ah);18311832return 0;1833}18341835static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)1836{1837struct ipath_ah *ah = to_iah(ibah);18381839*ah_attr = ah->attr;1840ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);18411842return 0;1843}18441845/**1846* ipath_get_npkeys - return the size of the PKEY table for port 01847* @dd: the infinipath device1848*/1849unsigned ipath_get_npkeys(struct ipath_devdata *dd)1850{1851return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);1852}18531854/**1855* ipath_get_pkey - return the indexed PKEY from the port PKEY table1856* @dd: the infinipath device1857* @index: the PKEY index1858*/1859unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)1860{1861unsigned ret;18621863/* always a kernel port, no locking needed */1864if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))1865ret = 0;1866else1867ret = dd->ipath_pd[0]->port_pkeys[index];18681869return ret;1870}18711872static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,1873u16 *pkey)1874{1875struct ipath_ibdev *dev = to_idev(ibdev);1876int ret;18771878if (index >= ipath_get_npkeys(dev->dd)) {1879ret = -EINVAL;1880goto bail;1881}18821883*pkey = ipath_get_pkey(dev->dd, index);1884ret = 0;18851886bail:1887return ret;1888}18891890/**1891* ipath_alloc_ucontext - allocate a ucontest1892* @ibdev: the infiniband device1893* @udata: not used by the InfiniPath driver1894*/18951896static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,1897struct ib_udata *udata)1898{1899struct ipath_ucontext *context;1900struct ib_ucontext *ret;19011902context = kmalloc(sizeof *context, GFP_KERNEL);1903if (!context) {1904ret = ERR_PTR(-ENOMEM);1905goto bail;1906}19071908ret = &context->ibucontext;19091910bail:1911return ret;1912}19131914static int ipath_dealloc_ucontext(struct ib_ucontext *context)1915{1916kfree(to_iucontext(context));1917return 0;1918}19191920static int ipath_verbs_register_sysfs(struct ib_device *dev);19211922static void __verbs_timer(unsigned long arg)1923{1924struct ipath_devdata *dd = (struct ipath_devdata *) arg;19251926/* Handle verbs layer timeouts. */1927ipath_ib_timer(dd->verbs_dev);19281929mod_timer(&dd->verbs_timer, jiffies + 1);1930}19311932static int enable_timer(struct ipath_devdata *dd)1933{1934/*1935* Early chips had a design flaw where the chip and kernel idea1936* of the tail register don't always agree, and therefore we won't1937* get an interrupt on the next packet received.1938* If the board supports per packet receive interrupts, use it.1939* Otherwise, the timer function periodically checks for packets1940* to cover this case.1941* Either way, the timer is needed for verbs layer related1942* processing.1943*/1944if (dd->ipath_flags & IPATH_GPIO_INTR) {1945ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,19460x2074076542310ULL);1947/* Enable GPIO bit 2 interrupt */1948dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);1949ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,1950dd->ipath_gpio_mask);1951}19521953init_timer(&dd->verbs_timer);1954dd->verbs_timer.function = __verbs_timer;1955dd->verbs_timer.data = (unsigned long)dd;1956dd->verbs_timer.expires = jiffies + 1;1957add_timer(&dd->verbs_timer);19581959return 0;1960}19611962static int disable_timer(struct ipath_devdata *dd)1963{1964/* Disable GPIO bit 2 interrupt */1965if (dd->ipath_flags & IPATH_GPIO_INTR) {1966/* Disable GPIO bit 2 interrupt */1967dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));1968ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,1969dd->ipath_gpio_mask);1970/*1971* We might want to undo changes to debugportselect,1972* but how?1973*/1974}19751976del_timer_sync(&dd->verbs_timer);19771978return 0;1979}19801981/**1982* ipath_register_ib_device - register our device with the infiniband core1983* @dd: the device data structure1984* Return the allocated ipath_ibdev pointer or NULL on error.1985*/1986int ipath_register_ib_device(struct ipath_devdata *dd)1987{1988struct ipath_verbs_counters cntrs;1989struct ipath_ibdev *idev;1990struct ib_device *dev;1991struct ipath_verbs_txreq *tx;1992unsigned i;1993int ret;19941995idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);1996if (idev == NULL) {1997ret = -ENOMEM;1998goto bail;1999}20002001dev = &idev->ibdev;20022003if (dd->ipath_sdma_descq_cnt) {2004tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx,2005GFP_KERNEL);2006if (tx == NULL) {2007ret = -ENOMEM;2008goto err_tx;2009}2010} else2011tx = NULL;2012idev->txreq_bufs = tx;20132014/* Only need to initialize non-zero fields. */2015spin_lock_init(&idev->n_pds_lock);2016spin_lock_init(&idev->n_ahs_lock);2017spin_lock_init(&idev->n_cqs_lock);2018spin_lock_init(&idev->n_qps_lock);2019spin_lock_init(&idev->n_srqs_lock);2020spin_lock_init(&idev->n_mcast_grps_lock);20212022spin_lock_init(&idev->qp_table.lock);2023spin_lock_init(&idev->lk_table.lock);2024idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);2025/* Set the prefix to the default value (see ch. 4.1.1) */2026idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL);20272028ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);2029if (ret)2030goto err_qp;20312032/*2033* The top ib_ipath_lkey_table_size bits are used to index the2034* table. The lower 8 bits can be owned by the user (copied from2035* the LKEY). The remaining bits act as a generation number or tag.2036*/2037idev->lk_table.max = 1 << ib_ipath_lkey_table_size;2038idev->lk_table.table = kzalloc(idev->lk_table.max *2039sizeof(*idev->lk_table.table),2040GFP_KERNEL);2041if (idev->lk_table.table == NULL) {2042ret = -ENOMEM;2043goto err_lk;2044}2045INIT_LIST_HEAD(&idev->pending_mmaps);2046spin_lock_init(&idev->pending_lock);2047idev->mmap_offset = PAGE_SIZE;2048spin_lock_init(&idev->mmap_offset_lock);2049INIT_LIST_HEAD(&idev->pending[0]);2050INIT_LIST_HEAD(&idev->pending[1]);2051INIT_LIST_HEAD(&idev->pending[2]);2052INIT_LIST_HEAD(&idev->piowait);2053INIT_LIST_HEAD(&idev->rnrwait);2054INIT_LIST_HEAD(&idev->txreq_free);2055idev->pending_index = 0;2056idev->port_cap_flags =2057IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;2058if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)2059idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;2060idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;2061idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;2062idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;2063idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;2064idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;20652066/* Snapshot current HW counters to "clear" them. */2067ipath_get_counters(dd, &cntrs);2068idev->z_symbol_error_counter = cntrs.symbol_error_counter;2069idev->z_link_error_recovery_counter =2070cntrs.link_error_recovery_counter;2071idev->z_link_downed_counter = cntrs.link_downed_counter;2072idev->z_port_rcv_errors = cntrs.port_rcv_errors;2073idev->z_port_rcv_remphys_errors =2074cntrs.port_rcv_remphys_errors;2075idev->z_port_xmit_discards = cntrs.port_xmit_discards;2076idev->z_port_xmit_data = cntrs.port_xmit_data;2077idev->z_port_rcv_data = cntrs.port_rcv_data;2078idev->z_port_xmit_packets = cntrs.port_xmit_packets;2079idev->z_port_rcv_packets = cntrs.port_rcv_packets;2080idev->z_local_link_integrity_errors =2081cntrs.local_link_integrity_errors;2082idev->z_excessive_buffer_overrun_errors =2083cntrs.excessive_buffer_overrun_errors;2084idev->z_vl15_dropped = cntrs.vl15_dropped;20852086for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)2087list_add(&tx->txreq.list, &idev->txreq_free);20882089/*2090* The system image GUID is supposed to be the same for all2091* IB HCAs in a single system but since there can be other2092* device types in the system, we can't be sure this is unique.2093*/2094if (!sys_image_guid)2095sys_image_guid = dd->ipath_guid;2096idev->sys_image_guid = sys_image_guid;2097idev->ib_unit = dd->ipath_unit;2098idev->dd = dd;20992100strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);2101dev->owner = THIS_MODULE;2102dev->node_guid = dd->ipath_guid;2103dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;2104dev->uverbs_cmd_mask =2105(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |2106(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |2107(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |2108(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |2109(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |2110(1ull << IB_USER_VERBS_CMD_CREATE_AH) |2111(1ull << IB_USER_VERBS_CMD_DESTROY_AH) |2112(1ull << IB_USER_VERBS_CMD_QUERY_AH) |2113(1ull << IB_USER_VERBS_CMD_REG_MR) |2114(1ull << IB_USER_VERBS_CMD_DEREG_MR) |2115(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |2116(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |2117(1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |2118(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |2119(1ull << IB_USER_VERBS_CMD_POLL_CQ) |2120(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |2121(1ull << IB_USER_VERBS_CMD_CREATE_QP) |2122(1ull << IB_USER_VERBS_CMD_QUERY_QP) |2123(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |2124(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |2125(1ull << IB_USER_VERBS_CMD_POST_SEND) |2126(1ull << IB_USER_VERBS_CMD_POST_RECV) |2127(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |2128(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |2129(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |2130(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |2131(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |2132(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |2133(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);2134dev->node_type = RDMA_NODE_IB_CA;2135dev->phys_port_cnt = 1;2136dev->num_comp_vectors = 1;2137dev->dma_device = &dd->pcidev->dev;2138dev->query_device = ipath_query_device;2139dev->modify_device = ipath_modify_device;2140dev->query_port = ipath_query_port;2141dev->modify_port = ipath_modify_port;2142dev->query_pkey = ipath_query_pkey;2143dev->query_gid = ipath_query_gid;2144dev->alloc_ucontext = ipath_alloc_ucontext;2145dev->dealloc_ucontext = ipath_dealloc_ucontext;2146dev->alloc_pd = ipath_alloc_pd;2147dev->dealloc_pd = ipath_dealloc_pd;2148dev->create_ah = ipath_create_ah;2149dev->destroy_ah = ipath_destroy_ah;2150dev->query_ah = ipath_query_ah;2151dev->create_srq = ipath_create_srq;2152dev->modify_srq = ipath_modify_srq;2153dev->query_srq = ipath_query_srq;2154dev->destroy_srq = ipath_destroy_srq;2155dev->create_qp = ipath_create_qp;2156dev->modify_qp = ipath_modify_qp;2157dev->query_qp = ipath_query_qp;2158dev->destroy_qp = ipath_destroy_qp;2159dev->post_send = ipath_post_send;2160dev->post_recv = ipath_post_receive;2161dev->post_srq_recv = ipath_post_srq_receive;2162dev->create_cq = ipath_create_cq;2163dev->destroy_cq = ipath_destroy_cq;2164dev->resize_cq = ipath_resize_cq;2165dev->poll_cq = ipath_poll_cq;2166dev->req_notify_cq = ipath_req_notify_cq;2167dev->get_dma_mr = ipath_get_dma_mr;2168dev->reg_phys_mr = ipath_reg_phys_mr;2169dev->reg_user_mr = ipath_reg_user_mr;2170dev->dereg_mr = ipath_dereg_mr;2171dev->alloc_fmr = ipath_alloc_fmr;2172dev->map_phys_fmr = ipath_map_phys_fmr;2173dev->unmap_fmr = ipath_unmap_fmr;2174dev->dealloc_fmr = ipath_dealloc_fmr;2175dev->attach_mcast = ipath_multicast_attach;2176dev->detach_mcast = ipath_multicast_detach;2177dev->process_mad = ipath_process_mad;2178dev->mmap = ipath_mmap;2179dev->dma_ops = &ipath_dma_mapping_ops;21802181snprintf(dev->node_desc, sizeof(dev->node_desc),2182IPATH_IDSTR " %s", init_utsname()->nodename);21832184ret = ib_register_device(dev, NULL);2185if (ret)2186goto err_reg;21872188if (ipath_verbs_register_sysfs(dev))2189goto err_class;21902191enable_timer(dd);21922193goto bail;21942195err_class:2196ib_unregister_device(dev);2197err_reg:2198kfree(idev->lk_table.table);2199err_lk:2200kfree(idev->qp_table.table);2201err_qp:2202kfree(idev->txreq_bufs);2203err_tx:2204ib_dealloc_device(dev);2205ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);2206idev = NULL;22072208bail:2209dd->verbs_dev = idev;2210return ret;2211}22122213void ipath_unregister_ib_device(struct ipath_ibdev *dev)2214{2215struct ib_device *ibdev = &dev->ibdev;2216u32 qps_inuse;22172218ib_unregister_device(ibdev);22192220disable_timer(dev->dd);22212222if (!list_empty(&dev->pending[0]) ||2223!list_empty(&dev->pending[1]) ||2224!list_empty(&dev->pending[2]))2225ipath_dev_err(dev->dd, "pending list not empty!\n");2226if (!list_empty(&dev->piowait))2227ipath_dev_err(dev->dd, "piowait list not empty!\n");2228if (!list_empty(&dev->rnrwait))2229ipath_dev_err(dev->dd, "rnrwait list not empty!\n");2230if (!ipath_mcast_tree_empty())2231ipath_dev_err(dev->dd, "multicast table memory leak!\n");2232/*2233* Note that ipath_unregister_ib_device() can be called before all2234* the QPs are destroyed!2235*/2236qps_inuse = ipath_free_all_qps(&dev->qp_table);2237if (qps_inuse)2238ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",2239qps_inuse);2240kfree(dev->qp_table.table);2241kfree(dev->lk_table.table);2242kfree(dev->txreq_bufs);2243ib_dealloc_device(ibdev);2244}22452246static ssize_t show_rev(struct device *device, struct device_attribute *attr,2247char *buf)2248{2249struct ipath_ibdev *dev =2250container_of(device, struct ipath_ibdev, ibdev.dev);22512252return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);2253}22542255static ssize_t show_hca(struct device *device, struct device_attribute *attr,2256char *buf)2257{2258struct ipath_ibdev *dev =2259container_of(device, struct ipath_ibdev, ibdev.dev);2260int ret;22612262ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);2263if (ret < 0)2264goto bail;2265strcat(buf, "\n");2266ret = strlen(buf);22672268bail:2269return ret;2270}22712272static ssize_t show_stats(struct device *device, struct device_attribute *attr,2273char *buf)2274{2275struct ipath_ibdev *dev =2276container_of(device, struct ipath_ibdev, ibdev.dev);2277int i;2278int len;22792280len = sprintf(buf,2281"RC resends %d\n"2282"RC no QACK %d\n"2283"RC ACKs %d\n"2284"RC SEQ NAKs %d\n"2285"RC RDMA seq %d\n"2286"RC RNR NAKs %d\n"2287"RC OTH NAKs %d\n"2288"RC timeouts %d\n"2289"RC RDMA dup %d\n"2290"piobuf wait %d\n"2291"unaligned %d\n"2292"PKT drops %d\n"2293"WQE errs %d\n",2294dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,2295dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,2296dev->n_other_naks, dev->n_timeouts,2297dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,2298dev->n_pkt_drops, dev->n_wqe_errs);2299for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {2300const struct ipath_opcode_stats *si = &dev->opstats[i];23012302if (!si->n_packets && !si->n_bytes)2303continue;2304len += sprintf(buf + len, "%02x %llu/%llu\n", i,2305(unsigned long long) si->n_packets,2306(unsigned long long) si->n_bytes);2307}2308return len;2309}23102311static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);2312static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);2313static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);2314static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);23152316static struct device_attribute *ipath_class_attributes[] = {2317&dev_attr_hw_rev,2318&dev_attr_hca_type,2319&dev_attr_board_id,2320&dev_attr_stats2321};23222323static int ipath_verbs_register_sysfs(struct ib_device *dev)2324{2325int i;2326int ret;23272328for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)2329if (device_create_file(&dev->dev,2330ipath_class_attributes[i])) {2331ret = 1;2332goto bail;2333}23342335ret = 0;23362337bail:2338return ret;2339}234023412342