Path: blob/master/drivers/infiniband/hw/mthca/mthca_qp.c
15112 views
/*1* Copyright (c) 2004 Topspin Communications. All rights reserved.2* Copyright (c) 2005 Cisco Systems. All rights reserved.3* Copyright (c) 2005 Mellanox Technologies. All rights reserved.4* Copyright (c) 2004 Voltaire, Inc. All rights reserved.5*6* This software is available to you under a choice of one of two7* licenses. You may choose to be licensed under the terms of the GNU8* General Public License (GPL) Version 2, available from the file9* COPYING in the main directory of this source tree, or the10* OpenIB.org BSD license below:11*12* Redistribution and use in source and binary forms, with or13* without modification, are permitted provided that the following14* conditions are met:15*16* - Redistributions of source code must retain the above17* copyright notice, this list of conditions and the following18* disclaimer.19*20* - Redistributions in binary form must reproduce the above21* copyright notice, this list of conditions and the following22* disclaimer in the documentation and/or other materials23* provided with the distribution.24*25* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,26* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF27* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND28* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS29* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN30* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN31* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE32* SOFTWARE.33*/3435#include <linux/string.h>36#include <linux/slab.h>37#include <linux/sched.h>3839#include <asm/io.h>4041#include <rdma/ib_verbs.h>42#include <rdma/ib_cache.h>43#include <rdma/ib_pack.h>4445#include "mthca_dev.h"46#include "mthca_cmd.h"47#include "mthca_memfree.h"48#include "mthca_wqe.h"4950enum {51MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,52MTHCA_ACK_REQ_FREQ = 10,53MTHCA_FLIGHT_LIMIT = 9,54MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */55MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */56MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */57};5859enum {60MTHCA_QP_STATE_RST = 0,61MTHCA_QP_STATE_INIT = 1,62MTHCA_QP_STATE_RTR = 2,63MTHCA_QP_STATE_RTS = 3,64MTHCA_QP_STATE_SQE = 4,65MTHCA_QP_STATE_SQD = 5,66MTHCA_QP_STATE_ERR = 6,67MTHCA_QP_STATE_DRAINING = 768};6970enum {71MTHCA_QP_ST_RC = 0x0,72MTHCA_QP_ST_UC = 0x1,73MTHCA_QP_ST_RD = 0x2,74MTHCA_QP_ST_UD = 0x3,75MTHCA_QP_ST_MLX = 0x776};7778enum {79MTHCA_QP_PM_MIGRATED = 0x3,80MTHCA_QP_PM_ARMED = 0x0,81MTHCA_QP_PM_REARM = 0x182};8384enum {85/* qp_context flags */86MTHCA_QP_BIT_DE = 1 << 8,87/* params1 */88MTHCA_QP_BIT_SRE = 1 << 15,89MTHCA_QP_BIT_SWE = 1 << 14,90MTHCA_QP_BIT_SAE = 1 << 13,91MTHCA_QP_BIT_SIC = 1 << 4,92MTHCA_QP_BIT_SSC = 1 << 3,93/* params2 */94MTHCA_QP_BIT_RRE = 1 << 15,95MTHCA_QP_BIT_RWE = 1 << 14,96MTHCA_QP_BIT_RAE = 1 << 13,97MTHCA_QP_BIT_RIC = 1 << 4,98MTHCA_QP_BIT_RSC = 1 << 399};100101enum {102MTHCA_SEND_DOORBELL_FENCE = 1 << 5103};104105struct mthca_qp_path {106__be32 port_pkey;107u8 rnr_retry;108u8 g_mylmc;109__be16 rlid;110u8 ackto;111u8 mgid_index;112u8 static_rate;113u8 hop_limit;114__be32 sl_tclass_flowlabel;115u8 rgid[16];116} __attribute__((packed));117118struct mthca_qp_context {119__be32 flags;120__be32 tavor_sched_queue; /* Reserved on Arbel */121u8 mtu_msgmax;122u8 rq_size_stride; /* Reserved on Tavor */123u8 sq_size_stride; /* Reserved on Tavor */124u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */125__be32 usr_page;126__be32 local_qpn;127__be32 remote_qpn;128u32 reserved1[2];129struct mthca_qp_path pri_path;130struct mthca_qp_path alt_path;131__be32 rdd;132__be32 pd;133__be32 wqe_base;134__be32 wqe_lkey;135__be32 params1;136__be32 reserved2;137__be32 next_send_psn;138__be32 cqn_snd;139__be32 snd_wqe_base_l; /* Next send WQE on Tavor */140__be32 snd_db_index; /* (debugging only entries) */141__be32 last_acked_psn;142__be32 ssn;143__be32 params2;144__be32 rnr_nextrecvpsn;145__be32 ra_buff_indx;146__be32 cqn_rcv;147__be32 rcv_wqe_base_l; /* Next recv WQE on Tavor */148__be32 rcv_db_index; /* (debugging only entries) */149__be32 qkey;150__be32 srqn;151__be32 rmsn;152__be16 rq_wqe_counter; /* reserved on Tavor */153__be16 sq_wqe_counter; /* reserved on Tavor */154u32 reserved3[18];155} __attribute__((packed));156157struct mthca_qp_param {158__be32 opt_param_mask;159u32 reserved1;160struct mthca_qp_context context;161u32 reserved2[62];162} __attribute__((packed));163164enum {165MTHCA_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,166MTHCA_QP_OPTPAR_RRE = 1 << 1,167MTHCA_QP_OPTPAR_RAE = 1 << 2,168MTHCA_QP_OPTPAR_RWE = 1 << 3,169MTHCA_QP_OPTPAR_PKEY_INDEX = 1 << 4,170MTHCA_QP_OPTPAR_Q_KEY = 1 << 5,171MTHCA_QP_OPTPAR_RNR_TIMEOUT = 1 << 6,172MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7,173MTHCA_QP_OPTPAR_SRA_MAX = 1 << 8,174MTHCA_QP_OPTPAR_RRA_MAX = 1 << 9,175MTHCA_QP_OPTPAR_PM_STATE = 1 << 10,176MTHCA_QP_OPTPAR_PORT_NUM = 1 << 11,177MTHCA_QP_OPTPAR_RETRY_COUNT = 1 << 12,178MTHCA_QP_OPTPAR_ALT_RNR_RETRY = 1 << 13,179MTHCA_QP_OPTPAR_ACK_TIMEOUT = 1 << 14,180MTHCA_QP_OPTPAR_RNR_RETRY = 1 << 15,181MTHCA_QP_OPTPAR_SCHED_QUEUE = 1 << 16182};183184static const u8 mthca_opcode[] = {185[IB_WR_SEND] = MTHCA_OPCODE_SEND,186[IB_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM,187[IB_WR_RDMA_WRITE] = MTHCA_OPCODE_RDMA_WRITE,188[IB_WR_RDMA_WRITE_WITH_IMM] = MTHCA_OPCODE_RDMA_WRITE_IMM,189[IB_WR_RDMA_READ] = MTHCA_OPCODE_RDMA_READ,190[IB_WR_ATOMIC_CMP_AND_SWP] = MTHCA_OPCODE_ATOMIC_CS,191[IB_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA,192};193194static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp)195{196return qp->qpn >= dev->qp_table.sqp_start &&197qp->qpn <= dev->qp_table.sqp_start + 3;198}199200static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp)201{202return qp->qpn >= dev->qp_table.sqp_start &&203qp->qpn <= dev->qp_table.sqp_start + 1;204}205206static void *get_recv_wqe(struct mthca_qp *qp, int n)207{208if (qp->is_direct)209return qp->queue.direct.buf + (n << qp->rq.wqe_shift);210else211return qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].buf +212((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1));213}214215static void *get_send_wqe(struct mthca_qp *qp, int n)216{217if (qp->is_direct)218return qp->queue.direct.buf + qp->send_wqe_offset +219(n << qp->sq.wqe_shift);220else221return qp->queue.page_list[(qp->send_wqe_offset +222(n << qp->sq.wqe_shift)) >>223PAGE_SHIFT].buf +224((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) &225(PAGE_SIZE - 1));226}227228static void mthca_wq_reset(struct mthca_wq *wq)229{230wq->next_ind = 0;231wq->last_comp = wq->max - 1;232wq->head = 0;233wq->tail = 0;234}235236void mthca_qp_event(struct mthca_dev *dev, u32 qpn,237enum ib_event_type event_type)238{239struct mthca_qp *qp;240struct ib_event event;241242spin_lock(&dev->qp_table.lock);243qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1));244if (qp)245++qp->refcount;246spin_unlock(&dev->qp_table.lock);247248if (!qp) {249mthca_warn(dev, "Async event for bogus QP %08x\n", qpn);250return;251}252253if (event_type == IB_EVENT_PATH_MIG)254qp->port = qp->alt_port;255256event.device = &dev->ib_dev;257event.event = event_type;258event.element.qp = &qp->ibqp;259if (qp->ibqp.event_handler)260qp->ibqp.event_handler(&event, qp->ibqp.qp_context);261262spin_lock(&dev->qp_table.lock);263if (!--qp->refcount)264wake_up(&qp->wait);265spin_unlock(&dev->qp_table.lock);266}267268static int to_mthca_state(enum ib_qp_state ib_state)269{270switch (ib_state) {271case IB_QPS_RESET: return MTHCA_QP_STATE_RST;272case IB_QPS_INIT: return MTHCA_QP_STATE_INIT;273case IB_QPS_RTR: return MTHCA_QP_STATE_RTR;274case IB_QPS_RTS: return MTHCA_QP_STATE_RTS;275case IB_QPS_SQD: return MTHCA_QP_STATE_SQD;276case IB_QPS_SQE: return MTHCA_QP_STATE_SQE;277case IB_QPS_ERR: return MTHCA_QP_STATE_ERR;278default: return -1;279}280}281282enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS };283284static int to_mthca_st(int transport)285{286switch (transport) {287case RC: return MTHCA_QP_ST_RC;288case UC: return MTHCA_QP_ST_UC;289case UD: return MTHCA_QP_ST_UD;290case RD: return MTHCA_QP_ST_RD;291case MLX: return MTHCA_QP_ST_MLX;292default: return -1;293}294}295296static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr,297int attr_mask)298{299if (attr_mask & IB_QP_PKEY_INDEX)300sqp->pkey_index = attr->pkey_index;301if (attr_mask & IB_QP_QKEY)302sqp->qkey = attr->qkey;303if (attr_mask & IB_QP_SQ_PSN)304sqp->send_psn = attr->sq_psn;305}306307static void init_port(struct mthca_dev *dev, int port)308{309int err;310u8 status;311struct mthca_init_ib_param param;312313memset(¶m, 0, sizeof param);314315param.port_width = dev->limits.port_width_cap;316param.vl_cap = dev->limits.vl_cap;317param.mtu_cap = dev->limits.mtu_cap;318param.gid_cap = dev->limits.gid_table_len;319param.pkey_cap = dev->limits.pkey_table_len;320321err = mthca_INIT_IB(dev, ¶m, port, &status);322if (err)323mthca_warn(dev, "INIT_IB failed, return code %d.\n", err);324if (status)325mthca_warn(dev, "INIT_IB returned status %02x.\n", status);326}327328static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr,329int attr_mask)330{331u8 dest_rd_atomic;332u32 access_flags;333u32 hw_access_flags = 0;334335if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)336dest_rd_atomic = attr->max_dest_rd_atomic;337else338dest_rd_atomic = qp->resp_depth;339340if (attr_mask & IB_QP_ACCESS_FLAGS)341access_flags = attr->qp_access_flags;342else343access_flags = qp->atomic_rd_en;344345if (!dest_rd_atomic)346access_flags &= IB_ACCESS_REMOTE_WRITE;347348if (access_flags & IB_ACCESS_REMOTE_READ)349hw_access_flags |= MTHCA_QP_BIT_RRE;350if (access_flags & IB_ACCESS_REMOTE_ATOMIC)351hw_access_flags |= MTHCA_QP_BIT_RAE;352if (access_flags & IB_ACCESS_REMOTE_WRITE)353hw_access_flags |= MTHCA_QP_BIT_RWE;354355return cpu_to_be32(hw_access_flags);356}357358static inline enum ib_qp_state to_ib_qp_state(int mthca_state)359{360switch (mthca_state) {361case MTHCA_QP_STATE_RST: return IB_QPS_RESET;362case MTHCA_QP_STATE_INIT: return IB_QPS_INIT;363case MTHCA_QP_STATE_RTR: return IB_QPS_RTR;364case MTHCA_QP_STATE_RTS: return IB_QPS_RTS;365case MTHCA_QP_STATE_DRAINING:366case MTHCA_QP_STATE_SQD: return IB_QPS_SQD;367case MTHCA_QP_STATE_SQE: return IB_QPS_SQE;368case MTHCA_QP_STATE_ERR: return IB_QPS_ERR;369default: return -1;370}371}372373static inline enum ib_mig_state to_ib_mig_state(int mthca_mig_state)374{375switch (mthca_mig_state) {376case 0: return IB_MIG_ARMED;377case 1: return IB_MIG_REARM;378case 3: return IB_MIG_MIGRATED;379default: return -1;380}381}382383static int to_ib_qp_access_flags(int mthca_flags)384{385int ib_flags = 0;386387if (mthca_flags & MTHCA_QP_BIT_RRE)388ib_flags |= IB_ACCESS_REMOTE_READ;389if (mthca_flags & MTHCA_QP_BIT_RWE)390ib_flags |= IB_ACCESS_REMOTE_WRITE;391if (mthca_flags & MTHCA_QP_BIT_RAE)392ib_flags |= IB_ACCESS_REMOTE_ATOMIC;393394return ib_flags;395}396397static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,398struct mthca_qp_path *path)399{400memset(ib_ah_attr, 0, sizeof *ib_ah_attr);401ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;402403if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)404return;405406ib_ah_attr->dlid = be16_to_cpu(path->rlid);407ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;408ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;409ib_ah_attr->static_rate = mthca_rate_to_ib(dev,410path->static_rate & 0xf,411ib_ah_attr->port_num);412ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;413if (ib_ah_attr->ah_flags) {414ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);415ib_ah_attr->grh.hop_limit = path->hop_limit;416ib_ah_attr->grh.traffic_class =417(be32_to_cpu(path->sl_tclass_flowlabel) >> 20) & 0xff;418ib_ah_attr->grh.flow_label =419be32_to_cpu(path->sl_tclass_flowlabel) & 0xfffff;420memcpy(ib_ah_attr->grh.dgid.raw,421path->rgid, sizeof ib_ah_attr->grh.dgid.raw);422}423}424425int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,426struct ib_qp_init_attr *qp_init_attr)427{428struct mthca_dev *dev = to_mdev(ibqp->device);429struct mthca_qp *qp = to_mqp(ibqp);430int err = 0;431struct mthca_mailbox *mailbox = NULL;432struct mthca_qp_param *qp_param;433struct mthca_qp_context *context;434int mthca_state;435u8 status;436437mutex_lock(&qp->mutex);438439if (qp->state == IB_QPS_RESET) {440qp_attr->qp_state = IB_QPS_RESET;441goto done;442}443444mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);445if (IS_ERR(mailbox)) {446err = PTR_ERR(mailbox);447goto out;448}449450err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox, &status);451if (err)452goto out_mailbox;453if (status) {454mthca_warn(dev, "QUERY_QP returned status %02x\n", status);455err = -EINVAL;456goto out_mailbox;457}458459qp_param = mailbox->buf;460context = &qp_param->context;461mthca_state = be32_to_cpu(context->flags) >> 28;462463qp->state = to_ib_qp_state(mthca_state);464qp_attr->qp_state = qp->state;465qp_attr->path_mtu = context->mtu_msgmax >> 5;466qp_attr->path_mig_state =467to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);468qp_attr->qkey = be32_to_cpu(context->qkey);469qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;470qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff;471qp_attr->dest_qp_num = be32_to_cpu(context->remote_qpn) & 0xffffff;472qp_attr->qp_access_flags =473to_ib_qp_access_flags(be32_to_cpu(context->params2));474475if (qp->transport == RC || qp->transport == UC) {476to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);477to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);478qp_attr->alt_pkey_index =479be32_to_cpu(context->alt_path.port_pkey) & 0x7f;480qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;481}482483qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;484qp_attr->port_num =485(be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3;486487/* qp_attr->en_sqd_async_notify is only applicable in modify qp */488qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;489490qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);491492qp_attr->max_dest_rd_atomic =4931 << ((be32_to_cpu(context->params2) >> 21) & 0x7);494qp_attr->min_rnr_timer =495(be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;496qp_attr->timeout = context->pri_path.ackto >> 3;497qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;498qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5;499qp_attr->alt_timeout = context->alt_path.ackto >> 3;500501done:502qp_attr->cur_qp_state = qp_attr->qp_state;503qp_attr->cap.max_send_wr = qp->sq.max;504qp_attr->cap.max_recv_wr = qp->rq.max;505qp_attr->cap.max_send_sge = qp->sq.max_gs;506qp_attr->cap.max_recv_sge = qp->rq.max_gs;507qp_attr->cap.max_inline_data = qp->max_inline_data;508509qp_init_attr->cap = qp_attr->cap;510511out_mailbox:512mthca_free_mailbox(dev, mailbox);513514out:515mutex_unlock(&qp->mutex);516return err;517}518519static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah,520struct mthca_qp_path *path, u8 port)521{522path->g_mylmc = ah->src_path_bits & 0x7f;523path->rlid = cpu_to_be16(ah->dlid);524path->static_rate = mthca_get_rate(dev, ah->static_rate, port);525526if (ah->ah_flags & IB_AH_GRH) {527if (ah->grh.sgid_index >= dev->limits.gid_table_len) {528mthca_dbg(dev, "sgid_index (%u) too large. max is %d\n",529ah->grh.sgid_index, dev->limits.gid_table_len-1);530return -1;531}532533path->g_mylmc |= 1 << 7;534path->mgid_index = ah->grh.sgid_index;535path->hop_limit = ah->grh.hop_limit;536path->sl_tclass_flowlabel =537cpu_to_be32((ah->sl << 28) |538(ah->grh.traffic_class << 20) |539(ah->grh.flow_label));540memcpy(path->rgid, ah->grh.dgid.raw, 16);541} else542path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28);543544return 0;545}546547static int __mthca_modify_qp(struct ib_qp *ibqp,548const struct ib_qp_attr *attr, int attr_mask,549enum ib_qp_state cur_state, enum ib_qp_state new_state)550{551struct mthca_dev *dev = to_mdev(ibqp->device);552struct mthca_qp *qp = to_mqp(ibqp);553struct mthca_mailbox *mailbox;554struct mthca_qp_param *qp_param;555struct mthca_qp_context *qp_context;556u32 sqd_event = 0;557u8 status;558int err = -EINVAL;559560mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);561if (IS_ERR(mailbox)) {562err = PTR_ERR(mailbox);563goto out;564}565qp_param = mailbox->buf;566qp_context = &qp_param->context;567memset(qp_param, 0, sizeof *qp_param);568569qp_context->flags = cpu_to_be32((to_mthca_state(new_state) << 28) |570(to_mthca_st(qp->transport) << 16));571qp_context->flags |= cpu_to_be32(MTHCA_QP_BIT_DE);572if (!(attr_mask & IB_QP_PATH_MIG_STATE))573qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);574else {575qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PM_STATE);576switch (attr->path_mig_state) {577case IB_MIG_MIGRATED:578qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);579break;580case IB_MIG_REARM:581qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_REARM << 11);582break;583case IB_MIG_ARMED:584qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_ARMED << 11);585break;586}587}588589/* leave tavor_sched_queue as 0 */590591if (qp->transport == MLX || qp->transport == UD)592qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11;593else if (attr_mask & IB_QP_PATH_MTU) {594if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_2048) {595mthca_dbg(dev, "path MTU (%u) is invalid\n",596attr->path_mtu);597goto out_mailbox;598}599qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;600}601602if (mthca_is_memfree(dev)) {603if (qp->rq.max)604qp_context->rq_size_stride = ilog2(qp->rq.max) << 3;605qp_context->rq_size_stride |= qp->rq.wqe_shift - 4;606607if (qp->sq.max)608qp_context->sq_size_stride = ilog2(qp->sq.max) << 3;609qp_context->sq_size_stride |= qp->sq.wqe_shift - 4;610}611612/* leave arbel_sched_queue as 0 */613614if (qp->ibqp.uobject)615qp_context->usr_page =616cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);617else618qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);619qp_context->local_qpn = cpu_to_be32(qp->qpn);620if (attr_mask & IB_QP_DEST_QPN) {621qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);622}623624if (qp->transport == MLX)625qp_context->pri_path.port_pkey |=626cpu_to_be32(qp->port << 24);627else {628if (attr_mask & IB_QP_PORT) {629qp_context->pri_path.port_pkey |=630cpu_to_be32(attr->port_num << 24);631qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PORT_NUM);632}633}634635if (attr_mask & IB_QP_PKEY_INDEX) {636qp_context->pri_path.port_pkey |=637cpu_to_be32(attr->pkey_index);638qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PKEY_INDEX);639}640641if (attr_mask & IB_QP_RNR_RETRY) {642qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry =643attr->rnr_retry << 5;644qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY |645MTHCA_QP_OPTPAR_ALT_RNR_RETRY);646}647648if (attr_mask & IB_QP_AV) {649if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path,650attr_mask & IB_QP_PORT ? attr->port_num : qp->port))651goto out_mailbox;652653qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);654}655656if (ibqp->qp_type == IB_QPT_RC &&657cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {658u8 sched_queue = ibqp->uobject ? 0x2 : 0x1;659660if (mthca_is_memfree(dev))661qp_context->rlkey_arbel_sched_queue |= sched_queue;662else663qp_context->tavor_sched_queue |= cpu_to_be32(sched_queue);664665qp_param->opt_param_mask |=666cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE);667}668669if (attr_mask & IB_QP_TIMEOUT) {670qp_context->pri_path.ackto = attr->timeout << 3;671qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);672}673674if (attr_mask & IB_QP_ALT_PATH) {675if (attr->alt_pkey_index >= dev->limits.pkey_table_len) {676mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n",677attr->alt_pkey_index, dev->limits.pkey_table_len-1);678goto out_mailbox;679}680681if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {682mthca_dbg(dev, "Alternate port number (%u) is invalid\n",683attr->alt_port_num);684goto out_mailbox;685}686687if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,688attr->alt_ah_attr.port_num))689goto out_mailbox;690691qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |692attr->alt_port_num << 24);693qp_context->alt_path.ackto = attr->alt_timeout << 3;694qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH);695}696697/* leave rdd as 0 */698qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num);699/* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */700qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey);701qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) |702(MTHCA_FLIGHT_LIMIT << 24) |703MTHCA_QP_BIT_SWE);704if (qp->sq_policy == IB_SIGNAL_ALL_WR)705qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC);706if (attr_mask & IB_QP_RETRY_CNT) {707qp_context->params1 |= cpu_to_be32(attr->retry_cnt << 16);708qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);709}710711if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {712if (attr->max_rd_atomic) {713qp_context->params1 |=714cpu_to_be32(MTHCA_QP_BIT_SRE |715MTHCA_QP_BIT_SAE);716qp_context->params1 |=717cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);718}719qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);720}721722if (attr_mask & IB_QP_SQ_PSN)723qp_context->next_send_psn = cpu_to_be32(attr->sq_psn);724qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn);725726if (mthca_is_memfree(dev)) {727qp_context->snd_wqe_base_l = cpu_to_be32(qp->send_wqe_offset);728qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index);729}730731if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {732if (attr->max_dest_rd_atomic)733qp_context->params2 |=734cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);735736qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);737}738739if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {740qp_context->params2 |= get_hw_access_flags(qp, attr, attr_mask);741qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |742MTHCA_QP_OPTPAR_RRE |743MTHCA_QP_OPTPAR_RAE);744}745746qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);747748if (ibqp->srq)749qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RIC);750751if (attr_mask & IB_QP_MIN_RNR_TIMER) {752qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);753qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT);754}755if (attr_mask & IB_QP_RQ_PSN)756qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);757758qp_context->ra_buff_indx =759cpu_to_be32(dev->qp_table.rdb_base +760((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE <<761dev->qp_table.rdb_shift));762763qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn);764765if (mthca_is_memfree(dev))766qp_context->rcv_db_index = cpu_to_be32(qp->rq.db_index);767768if (attr_mask & IB_QP_QKEY) {769qp_context->qkey = cpu_to_be32(attr->qkey);770qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY);771}772773if (ibqp->srq)774qp_context->srqn = cpu_to_be32(1 << 24 |775to_msrq(ibqp->srq)->srqn);776777if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&778attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY &&779attr->en_sqd_async_notify)780sqd_event = 1 << 31;781782err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0,783mailbox, sqd_event, &status);784if (err)785goto out_mailbox;786if (status) {787mthca_warn(dev, "modify QP %d->%d returned status %02x.\n",788cur_state, new_state, status);789err = -EINVAL;790goto out_mailbox;791}792793qp->state = new_state;794if (attr_mask & IB_QP_ACCESS_FLAGS)795qp->atomic_rd_en = attr->qp_access_flags;796if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)797qp->resp_depth = attr->max_dest_rd_atomic;798if (attr_mask & IB_QP_PORT)799qp->port = attr->port_num;800if (attr_mask & IB_QP_ALT_PATH)801qp->alt_port = attr->alt_port_num;802803if (is_sqp(dev, qp))804store_attrs(to_msqp(qp), attr, attr_mask);805806/*807* If we moved QP0 to RTR, bring the IB link up; if we moved808* QP0 to RESET or ERROR, bring the link back down.809*/810if (is_qp0(dev, qp)) {811if (cur_state != IB_QPS_RTR &&812new_state == IB_QPS_RTR)813init_port(dev, qp->port);814815if (cur_state != IB_QPS_RESET &&816cur_state != IB_QPS_ERR &&817(new_state == IB_QPS_RESET ||818new_state == IB_QPS_ERR))819mthca_CLOSE_IB(dev, qp->port, &status);820}821822/*823* If we moved a kernel QP to RESET, clean up all old CQ824* entries and reinitialize the QP.825*/826if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {827mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,828qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);829if (qp->ibqp.send_cq != qp->ibqp.recv_cq)830mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL);831832mthca_wq_reset(&qp->sq);833qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);834835mthca_wq_reset(&qp->rq);836qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);837838if (mthca_is_memfree(dev)) {839*qp->sq.db = 0;840*qp->rq.db = 0;841}842}843844out_mailbox:845mthca_free_mailbox(dev, mailbox);846out:847return err;848}849850int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,851struct ib_udata *udata)852{853struct mthca_dev *dev = to_mdev(ibqp->device);854struct mthca_qp *qp = to_mqp(ibqp);855enum ib_qp_state cur_state, new_state;856int err = -EINVAL;857858mutex_lock(&qp->mutex);859if (attr_mask & IB_QP_CUR_STATE) {860cur_state = attr->cur_qp_state;861} else {862spin_lock_irq(&qp->sq.lock);863spin_lock(&qp->rq.lock);864cur_state = qp->state;865spin_unlock(&qp->rq.lock);866spin_unlock_irq(&qp->sq.lock);867}868869new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;870871if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {872mthca_dbg(dev, "Bad QP transition (transport %d) "873"%d->%d with attr 0x%08x\n",874qp->transport, cur_state, new_state,875attr_mask);876goto out;877}878879if ((attr_mask & IB_QP_PKEY_INDEX) &&880attr->pkey_index >= dev->limits.pkey_table_len) {881mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",882attr->pkey_index, dev->limits.pkey_table_len-1);883goto out;884}885886if ((attr_mask & IB_QP_PORT) &&887(attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {888mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);889goto out;890}891892if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&893attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {894mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",895attr->max_rd_atomic, dev->limits.max_qp_init_rdma);896goto out;897}898899if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&900attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {901mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",902attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);903goto out;904}905906if (cur_state == new_state && cur_state == IB_QPS_RESET) {907err = 0;908goto out;909}910911err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);912913out:914mutex_unlock(&qp->mutex);915return err;916}917918static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz)919{920/*921* Calculate the maximum size of WQE s/g segments, excluding922* the next segment and other non-data segments.923*/924int max_data_size = desc_sz - sizeof (struct mthca_next_seg);925926switch (qp->transport) {927case MLX:928max_data_size -= 2 * sizeof (struct mthca_data_seg);929break;930931case UD:932if (mthca_is_memfree(dev))933max_data_size -= sizeof (struct mthca_arbel_ud_seg);934else935max_data_size -= sizeof (struct mthca_tavor_ud_seg);936break;937938default:939max_data_size -= sizeof (struct mthca_raddr_seg);940break;941}942943return max_data_size;944}945946static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size)947{948/* We don't support inline data for kernel QPs (yet). */949return pd->ibpd.uobject ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0;950}951952static void mthca_adjust_qp_caps(struct mthca_dev *dev,953struct mthca_pd *pd,954struct mthca_qp *qp)955{956int max_data_size = mthca_max_data_size(dev, qp,957min(dev->limits.max_desc_sz,9581 << qp->sq.wqe_shift));959960qp->max_inline_data = mthca_max_inline_data(pd, max_data_size);961962qp->sq.max_gs = min_t(int, dev->limits.max_sg,963max_data_size / sizeof (struct mthca_data_seg));964qp->rq.max_gs = min_t(int, dev->limits.max_sg,965(min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) -966sizeof (struct mthca_next_seg)) /967sizeof (struct mthca_data_seg));968}969970/*971* Allocate and register buffer for WQEs. qp->rq.max, sq.max,972* rq.max_gs and sq.max_gs must all be assigned.973* mthca_alloc_wqe_buf will calculate rq.wqe_shift and974* sq.wqe_shift (as well as send_wqe_offset, is_direct, and975* queue)976*/977static int mthca_alloc_wqe_buf(struct mthca_dev *dev,978struct mthca_pd *pd,979struct mthca_qp *qp)980{981int size;982int err = -ENOMEM;983984size = sizeof (struct mthca_next_seg) +985qp->rq.max_gs * sizeof (struct mthca_data_seg);986987if (size > dev->limits.max_desc_sz)988return -EINVAL;989990for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;991qp->rq.wqe_shift++)992; /* nothing */993994size = qp->sq.max_gs * sizeof (struct mthca_data_seg);995switch (qp->transport) {996case MLX:997size += 2 * sizeof (struct mthca_data_seg);998break;9991000case UD:1001size += mthca_is_memfree(dev) ?1002sizeof (struct mthca_arbel_ud_seg) :1003sizeof (struct mthca_tavor_ud_seg);1004break;10051006case UC:1007size += sizeof (struct mthca_raddr_seg);1008break;10091010case RC:1011size += sizeof (struct mthca_raddr_seg);1012/*1013* An atomic op will require an atomic segment, a1014* remote address segment and one scatter entry.1015*/1016size = max_t(int, size,1017sizeof (struct mthca_atomic_seg) +1018sizeof (struct mthca_raddr_seg) +1019sizeof (struct mthca_data_seg));1020break;10211022default:1023break;1024}10251026/* Make sure that we have enough space for a bind request */1027size = max_t(int, size, sizeof (struct mthca_bind_seg));10281029size += sizeof (struct mthca_next_seg);10301031if (size > dev->limits.max_desc_sz)1032return -EINVAL;10331034for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;1035qp->sq.wqe_shift++)1036; /* nothing */10371038qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,10391 << qp->sq.wqe_shift);10401041/*1042* If this is a userspace QP, we don't actually have to1043* allocate anything. All we need is to calculate the WQE1044* sizes and the send_wqe_offset, so we're done now.1045*/1046if (pd->ibpd.uobject)1047return 0;10481049size = PAGE_ALIGN(qp->send_wqe_offset +1050(qp->sq.max << qp->sq.wqe_shift));10511052qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64),1053GFP_KERNEL);1054if (!qp->wrid)1055goto err_out;10561057err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE,1058&qp->queue, &qp->is_direct, pd, 0, &qp->mr);1059if (err)1060goto err_out;10611062return 0;10631064err_out:1065kfree(qp->wrid);1066return err;1067}10681069static void mthca_free_wqe_buf(struct mthca_dev *dev,1070struct mthca_qp *qp)1071{1072mthca_buf_free(dev, PAGE_ALIGN(qp->send_wqe_offset +1073(qp->sq.max << qp->sq.wqe_shift)),1074&qp->queue, qp->is_direct, &qp->mr);1075kfree(qp->wrid);1076}10771078static int mthca_map_memfree(struct mthca_dev *dev,1079struct mthca_qp *qp)1080{1081int ret;10821083if (mthca_is_memfree(dev)) {1084ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);1085if (ret)1086return ret;10871088ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn);1089if (ret)1090goto err_qpc;10911092ret = mthca_table_get(dev, dev->qp_table.rdb_table,1093qp->qpn << dev->qp_table.rdb_shift);1094if (ret)1095goto err_eqpc;10961097}10981099return 0;11001101err_eqpc:1102mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);11031104err_qpc:1105mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);11061107return ret;1108}11091110static void mthca_unmap_memfree(struct mthca_dev *dev,1111struct mthca_qp *qp)1112{1113mthca_table_put(dev, dev->qp_table.rdb_table,1114qp->qpn << dev->qp_table.rdb_shift);1115mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);1116mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);1117}11181119static int mthca_alloc_memfree(struct mthca_dev *dev,1120struct mthca_qp *qp)1121{1122if (mthca_is_memfree(dev)) {1123qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,1124qp->qpn, &qp->rq.db);1125if (qp->rq.db_index < 0)1126return -ENOMEM;11271128qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,1129qp->qpn, &qp->sq.db);1130if (qp->sq.db_index < 0) {1131mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);1132return -ENOMEM;1133}1134}11351136return 0;1137}11381139static void mthca_free_memfree(struct mthca_dev *dev,1140struct mthca_qp *qp)1141{1142if (mthca_is_memfree(dev)) {1143mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);1144mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);1145}1146}11471148static int mthca_alloc_qp_common(struct mthca_dev *dev,1149struct mthca_pd *pd,1150struct mthca_cq *send_cq,1151struct mthca_cq *recv_cq,1152enum ib_sig_type send_policy,1153struct mthca_qp *qp)1154{1155int ret;1156int i;1157struct mthca_next_seg *next;11581159qp->refcount = 1;1160init_waitqueue_head(&qp->wait);1161mutex_init(&qp->mutex);1162qp->state = IB_QPS_RESET;1163qp->atomic_rd_en = 0;1164qp->resp_depth = 0;1165qp->sq_policy = send_policy;1166mthca_wq_reset(&qp->sq);1167mthca_wq_reset(&qp->rq);11681169spin_lock_init(&qp->sq.lock);1170spin_lock_init(&qp->rq.lock);11711172ret = mthca_map_memfree(dev, qp);1173if (ret)1174return ret;11751176ret = mthca_alloc_wqe_buf(dev, pd, qp);1177if (ret) {1178mthca_unmap_memfree(dev, qp);1179return ret;1180}11811182mthca_adjust_qp_caps(dev, pd, qp);11831184/*1185* If this is a userspace QP, we're done now. The doorbells1186* will be allocated and buffers will be initialized in1187* userspace.1188*/1189if (pd->ibpd.uobject)1190return 0;11911192ret = mthca_alloc_memfree(dev, qp);1193if (ret) {1194mthca_free_wqe_buf(dev, qp);1195mthca_unmap_memfree(dev, qp);1196return ret;1197}11981199if (mthca_is_memfree(dev)) {1200struct mthca_data_seg *scatter;1201int size = (sizeof (struct mthca_next_seg) +1202qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16;12031204for (i = 0; i < qp->rq.max; ++i) {1205next = get_recv_wqe(qp, i);1206next->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) <<1207qp->rq.wqe_shift);1208next->ee_nds = cpu_to_be32(size);12091210for (scatter = (void *) (next + 1);1211(void *) scatter < (void *) next + (1 << qp->rq.wqe_shift);1212++scatter)1213scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);1214}12151216for (i = 0; i < qp->sq.max; ++i) {1217next = get_send_wqe(qp, i);1218next->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) <<1219qp->sq.wqe_shift) +1220qp->send_wqe_offset);1221}1222} else {1223for (i = 0; i < qp->rq.max; ++i) {1224next = get_recv_wqe(qp, i);1225next->nda_op = htonl((((i + 1) % qp->rq.max) <<1226qp->rq.wqe_shift) | 1);1227}12281229}12301231qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);1232qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);12331234return 0;1235}12361237static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,1238struct mthca_pd *pd, struct mthca_qp *qp)1239{1240int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz);12411242/* Sanity check QP size before proceeding */1243if (cap->max_send_wr > dev->limits.max_wqes ||1244cap->max_recv_wr > dev->limits.max_wqes ||1245cap->max_send_sge > dev->limits.max_sg ||1246cap->max_recv_sge > dev->limits.max_sg ||1247cap->max_inline_data > mthca_max_inline_data(pd, max_data_size))1248return -EINVAL;12491250/*1251* For MLX transport we need 2 extra send gather entries:1252* one for the header and one for the checksum at the end1253*/1254if (qp->transport == MLX && cap->max_send_sge + 2 > dev->limits.max_sg)1255return -EINVAL;12561257if (mthca_is_memfree(dev)) {1258qp->rq.max = cap->max_recv_wr ?1259roundup_pow_of_two(cap->max_recv_wr) : 0;1260qp->sq.max = cap->max_send_wr ?1261roundup_pow_of_two(cap->max_send_wr) : 0;1262} else {1263qp->rq.max = cap->max_recv_wr;1264qp->sq.max = cap->max_send_wr;1265}12661267qp->rq.max_gs = cap->max_recv_sge;1268qp->sq.max_gs = max_t(int, cap->max_send_sge,1269ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE,1270MTHCA_INLINE_CHUNK_SIZE) /1271sizeof (struct mthca_data_seg));12721273return 0;1274}12751276int mthca_alloc_qp(struct mthca_dev *dev,1277struct mthca_pd *pd,1278struct mthca_cq *send_cq,1279struct mthca_cq *recv_cq,1280enum ib_qp_type type,1281enum ib_sig_type send_policy,1282struct ib_qp_cap *cap,1283struct mthca_qp *qp)1284{1285int err;12861287switch (type) {1288case IB_QPT_RC: qp->transport = RC; break;1289case IB_QPT_UC: qp->transport = UC; break;1290case IB_QPT_UD: qp->transport = UD; break;1291default: return -EINVAL;1292}12931294err = mthca_set_qp_size(dev, cap, pd, qp);1295if (err)1296return err;12971298qp->qpn = mthca_alloc(&dev->qp_table.alloc);1299if (qp->qpn == -1)1300return -ENOMEM;13011302/* initialize port to zero for error-catching. */1303qp->port = 0;13041305err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,1306send_policy, qp);1307if (err) {1308mthca_free(&dev->qp_table.alloc, qp->qpn);1309return err;1310}13111312spin_lock_irq(&dev->qp_table.lock);1313mthca_array_set(&dev->qp_table.qp,1314qp->qpn & (dev->limits.num_qps - 1), qp);1315spin_unlock_irq(&dev->qp_table.lock);13161317return 0;1318}13191320static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)1321__acquires(&send_cq->lock) __acquires(&recv_cq->lock)1322{1323if (send_cq == recv_cq) {1324spin_lock_irq(&send_cq->lock);1325__acquire(&recv_cq->lock);1326} else if (send_cq->cqn < recv_cq->cqn) {1327spin_lock_irq(&send_cq->lock);1328spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);1329} else {1330spin_lock_irq(&recv_cq->lock);1331spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);1332}1333}13341335static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)1336__releases(&send_cq->lock) __releases(&recv_cq->lock)1337{1338if (send_cq == recv_cq) {1339__release(&recv_cq->lock);1340spin_unlock_irq(&send_cq->lock);1341} else if (send_cq->cqn < recv_cq->cqn) {1342spin_unlock(&recv_cq->lock);1343spin_unlock_irq(&send_cq->lock);1344} else {1345spin_unlock(&send_cq->lock);1346spin_unlock_irq(&recv_cq->lock);1347}1348}13491350int mthca_alloc_sqp(struct mthca_dev *dev,1351struct mthca_pd *pd,1352struct mthca_cq *send_cq,1353struct mthca_cq *recv_cq,1354enum ib_sig_type send_policy,1355struct ib_qp_cap *cap,1356int qpn,1357int port,1358struct mthca_sqp *sqp)1359{1360u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;1361int err;13621363sqp->qp.transport = MLX;1364err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);1365if (err)1366return err;13671368sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;1369sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,1370&sqp->header_dma, GFP_KERNEL);1371if (!sqp->header_buf)1372return -ENOMEM;13731374spin_lock_irq(&dev->qp_table.lock);1375if (mthca_array_get(&dev->qp_table.qp, mqpn))1376err = -EBUSY;1377else1378mthca_array_set(&dev->qp_table.qp, mqpn, sqp);1379spin_unlock_irq(&dev->qp_table.lock);13801381if (err)1382goto err_out;13831384sqp->qp.port = port;1385sqp->qp.qpn = mqpn;1386sqp->qp.transport = MLX;13871388err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,1389send_policy, &sqp->qp);1390if (err)1391goto err_out_free;13921393atomic_inc(&pd->sqp_count);13941395return 0;13961397err_out_free:1398/*1399* Lock CQs here, so that CQ polling code can do QP lookup1400* without taking a lock.1401*/1402mthca_lock_cqs(send_cq, recv_cq);14031404spin_lock(&dev->qp_table.lock);1405mthca_array_clear(&dev->qp_table.qp, mqpn);1406spin_unlock(&dev->qp_table.lock);14071408mthca_unlock_cqs(send_cq, recv_cq);14091410err_out:1411dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,1412sqp->header_buf, sqp->header_dma);14131414return err;1415}14161417static inline int get_qp_refcount(struct mthca_dev *dev, struct mthca_qp *qp)1418{1419int c;14201421spin_lock_irq(&dev->qp_table.lock);1422c = qp->refcount;1423spin_unlock_irq(&dev->qp_table.lock);14241425return c;1426}14271428void mthca_free_qp(struct mthca_dev *dev,1429struct mthca_qp *qp)1430{1431u8 status;1432struct mthca_cq *send_cq;1433struct mthca_cq *recv_cq;14341435send_cq = to_mcq(qp->ibqp.send_cq);1436recv_cq = to_mcq(qp->ibqp.recv_cq);14371438/*1439* Lock CQs here, so that CQ polling code can do QP lookup1440* without taking a lock.1441*/1442mthca_lock_cqs(send_cq, recv_cq);14431444spin_lock(&dev->qp_table.lock);1445mthca_array_clear(&dev->qp_table.qp,1446qp->qpn & (dev->limits.num_qps - 1));1447--qp->refcount;1448spin_unlock(&dev->qp_table.lock);14491450mthca_unlock_cqs(send_cq, recv_cq);14511452wait_event(qp->wait, !get_qp_refcount(dev, qp));14531454if (qp->state != IB_QPS_RESET)1455mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,1456NULL, 0, &status);14571458/*1459* If this is a userspace QP, the buffers, MR, CQs and so on1460* will be cleaned up in userspace, so all we have to do is1461* unref the mem-free tables and free the QPN in our table.1462*/1463if (!qp->ibqp.uobject) {1464mthca_cq_clean(dev, recv_cq, qp->qpn,1465qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);1466if (send_cq != recv_cq)1467mthca_cq_clean(dev, send_cq, qp->qpn, NULL);14681469mthca_free_memfree(dev, qp);1470mthca_free_wqe_buf(dev, qp);1471}14721473mthca_unmap_memfree(dev, qp);14741475if (is_sqp(dev, qp)) {1476atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));1477dma_free_coherent(&dev->pdev->dev,1478to_msqp(qp)->header_buf_size,1479to_msqp(qp)->header_buf,1480to_msqp(qp)->header_dma);1481} else1482mthca_free(&dev->qp_table.alloc, qp->qpn);1483}14841485/* Create UD header for an MLX send and build a data segment for it */1486static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,1487int ind, struct ib_send_wr *wr,1488struct mthca_mlx_seg *mlx,1489struct mthca_data_seg *data)1490{1491int header_size;1492int err;1493u16 pkey;14941495ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,1496mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0,1497&sqp->ud_header);14981499err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);1500if (err)1501return err;1502mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);1503mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |1504(sqp->ud_header.lrh.destination_lid ==1505IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) |1506(sqp->ud_header.lrh.service_level << 8));1507mlx->rlid = sqp->ud_header.lrh.destination_lid;1508mlx->vcrc = 0;15091510switch (wr->opcode) {1511case IB_WR_SEND:1512sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;1513sqp->ud_header.immediate_present = 0;1514break;1515case IB_WR_SEND_WITH_IMM:1516sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;1517sqp->ud_header.immediate_present = 1;1518sqp->ud_header.immediate_data = wr->ex.imm_data;1519break;1520default:1521return -EINVAL;1522}15231524sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;1525if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)1526sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;1527sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);1528if (!sqp->qp.ibqp.qp_num)1529ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,1530sqp->pkey_index, &pkey);1531else1532ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,1533wr->wr.ud.pkey_index, &pkey);1534sqp->ud_header.bth.pkey = cpu_to_be16(pkey);1535sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);1536sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));1537sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?1538sqp->qkey : wr->wr.ud.remote_qkey);1539sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);15401541header_size = ib_ud_header_pack(&sqp->ud_header,1542sqp->header_buf +1543ind * MTHCA_UD_HEADER_SIZE);15441545data->byte_count = cpu_to_be32(header_size);1546data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey);1547data->addr = cpu_to_be64(sqp->header_dma +1548ind * MTHCA_UD_HEADER_SIZE);15491550return 0;1551}15521553static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,1554struct ib_cq *ib_cq)1555{1556unsigned cur;1557struct mthca_cq *cq;15581559cur = wq->head - wq->tail;1560if (likely(cur + nreq < wq->max))1561return 0;15621563cq = to_mcq(ib_cq);1564spin_lock(&cq->lock);1565cur = wq->head - wq->tail;1566spin_unlock(&cq->lock);15671568return cur + nreq >= wq->max;1569}15701571static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,1572u64 remote_addr, u32 rkey)1573{1574rseg->raddr = cpu_to_be64(remote_addr);1575rseg->rkey = cpu_to_be32(rkey);1576rseg->reserved = 0;1577}15781579static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,1580struct ib_send_wr *wr)1581{1582if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {1583aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);1584aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);1585} else {1586aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);1587aseg->compare = 0;1588}15891590}15911592static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,1593struct ib_send_wr *wr)1594{1595useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key);1596useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);1597useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);1598useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);15991600}16011602static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,1603struct ib_send_wr *wr)1604{1605memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);1606useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);1607useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);1608}16091610int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,1611struct ib_send_wr **bad_wr)1612{1613struct mthca_dev *dev = to_mdev(ibqp->device);1614struct mthca_qp *qp = to_mqp(ibqp);1615void *wqe;1616void *prev_wqe;1617unsigned long flags;1618int err = 0;1619int nreq;1620int i;1621int size;1622/*1623* f0 and size0 are only used if nreq != 0, and they will1624* always be initialized the first time through the main loop1625* before nreq is incremented. So nreq cannot become non-zero1626* without initializing f0 and size0, and they are in fact1627* never used uninitialized.1628*/1629int uninitialized_var(size0);1630u32 uninitialized_var(f0);1631int ind;1632u8 op0 = 0;16331634spin_lock_irqsave(&qp->sq.lock, flags);16351636/* XXX check that state is OK to post send */16371638ind = qp->sq.next_ind;16391640for (nreq = 0; wr; ++nreq, wr = wr->next) {1641if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {1642mthca_err(dev, "SQ %06x full (%u head, %u tail,"1643" %d max, %d nreq)\n", qp->qpn,1644qp->sq.head, qp->sq.tail,1645qp->sq.max, nreq);1646err = -ENOMEM;1647*bad_wr = wr;1648goto out;1649}16501651wqe = get_send_wqe(qp, ind);1652prev_wqe = qp->sq.last;1653qp->sq.last = wqe;16541655((struct mthca_next_seg *) wqe)->nda_op = 0;1656((struct mthca_next_seg *) wqe)->ee_nds = 0;1657((struct mthca_next_seg *) wqe)->flags =1658((wr->send_flags & IB_SEND_SIGNALED) ?1659cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |1660((wr->send_flags & IB_SEND_SOLICITED) ?1661cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) |1662cpu_to_be32(1);1663if (wr->opcode == IB_WR_SEND_WITH_IMM ||1664wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)1665((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;16661667wqe += sizeof (struct mthca_next_seg);1668size = sizeof (struct mthca_next_seg) / 16;16691670switch (qp->transport) {1671case RC:1672switch (wr->opcode) {1673case IB_WR_ATOMIC_CMP_AND_SWP:1674case IB_WR_ATOMIC_FETCH_AND_ADD:1675set_raddr_seg(wqe, wr->wr.atomic.remote_addr,1676wr->wr.atomic.rkey);1677wqe += sizeof (struct mthca_raddr_seg);16781679set_atomic_seg(wqe, wr);1680wqe += sizeof (struct mthca_atomic_seg);1681size += (sizeof (struct mthca_raddr_seg) +1682sizeof (struct mthca_atomic_seg)) / 16;1683break;16841685case IB_WR_RDMA_WRITE:1686case IB_WR_RDMA_WRITE_WITH_IMM:1687case IB_WR_RDMA_READ:1688set_raddr_seg(wqe, wr->wr.rdma.remote_addr,1689wr->wr.rdma.rkey);1690wqe += sizeof (struct mthca_raddr_seg);1691size += sizeof (struct mthca_raddr_seg) / 16;1692break;16931694default:1695/* No extra segments required for sends */1696break;1697}16981699break;17001701case UC:1702switch (wr->opcode) {1703case IB_WR_RDMA_WRITE:1704case IB_WR_RDMA_WRITE_WITH_IMM:1705set_raddr_seg(wqe, wr->wr.rdma.remote_addr,1706wr->wr.rdma.rkey);1707wqe += sizeof (struct mthca_raddr_seg);1708size += sizeof (struct mthca_raddr_seg) / 16;1709break;17101711default:1712/* No extra segments required for sends */1713break;1714}17151716break;17171718case UD:1719set_tavor_ud_seg(wqe, wr);1720wqe += sizeof (struct mthca_tavor_ud_seg);1721size += sizeof (struct mthca_tavor_ud_seg) / 16;1722break;17231724case MLX:1725err = build_mlx_header(dev, to_msqp(qp), ind, wr,1726wqe - sizeof (struct mthca_next_seg),1727wqe);1728if (err) {1729*bad_wr = wr;1730goto out;1731}1732wqe += sizeof (struct mthca_data_seg);1733size += sizeof (struct mthca_data_seg) / 16;1734break;1735}17361737if (wr->num_sge > qp->sq.max_gs) {1738mthca_err(dev, "too many gathers\n");1739err = -EINVAL;1740*bad_wr = wr;1741goto out;1742}17431744for (i = 0; i < wr->num_sge; ++i) {1745mthca_set_data_seg(wqe, wr->sg_list + i);1746wqe += sizeof (struct mthca_data_seg);1747size += sizeof (struct mthca_data_seg) / 16;1748}17491750/* Add one more inline data segment for ICRC */1751if (qp->transport == MLX) {1752((struct mthca_data_seg *) wqe)->byte_count =1753cpu_to_be32((1 << 31) | 4);1754((u32 *) wqe)[1] = 0;1755wqe += sizeof (struct mthca_data_seg);1756size += sizeof (struct mthca_data_seg) / 16;1757}17581759qp->wrid[ind + qp->rq.max] = wr->wr_id;17601761if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {1762mthca_err(dev, "opcode invalid\n");1763err = -EINVAL;1764*bad_wr = wr;1765goto out;1766}17671768((struct mthca_next_seg *) prev_wqe)->nda_op =1769cpu_to_be32(((ind << qp->sq.wqe_shift) +1770qp->send_wqe_offset) |1771mthca_opcode[wr->opcode]);1772wmb();1773((struct mthca_next_seg *) prev_wqe)->ee_nds =1774cpu_to_be32((nreq ? 0 : MTHCA_NEXT_DBD) | size |1775((wr->send_flags & IB_SEND_FENCE) ?1776MTHCA_NEXT_FENCE : 0));17771778if (!nreq) {1779size0 = size;1780op0 = mthca_opcode[wr->opcode];1781f0 = wr->send_flags & IB_SEND_FENCE ?1782MTHCA_SEND_DOORBELL_FENCE : 0;1783}17841785++ind;1786if (unlikely(ind >= qp->sq.max))1787ind -= qp->sq.max;1788}17891790out:1791if (likely(nreq)) {1792wmb();17931794mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) +1795qp->send_wqe_offset) | f0 | op0,1796(qp->qpn << 8) | size0,1797dev->kar + MTHCA_SEND_DOORBELL,1798MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));1799/*1800* Make sure doorbells don't leak out of SQ spinlock1801* and reach the HCA out of order:1802*/1803mmiowb();1804}18051806qp->sq.next_ind = ind;1807qp->sq.head += nreq;18081809spin_unlock_irqrestore(&qp->sq.lock, flags);1810return err;1811}18121813int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,1814struct ib_recv_wr **bad_wr)1815{1816struct mthca_dev *dev = to_mdev(ibqp->device);1817struct mthca_qp *qp = to_mqp(ibqp);1818unsigned long flags;1819int err = 0;1820int nreq;1821int i;1822int size;1823/*1824* size0 is only used if nreq != 0, and it will always be1825* initialized the first time through the main loop before1826* nreq is incremented. So nreq cannot become non-zero1827* without initializing size0, and it is in fact never used1828* uninitialized.1829*/1830int uninitialized_var(size0);1831int ind;1832void *wqe;1833void *prev_wqe;18341835spin_lock_irqsave(&qp->rq.lock, flags);18361837/* XXX check that state is OK to post receive */18381839ind = qp->rq.next_ind;18401841for (nreq = 0; wr; wr = wr->next) {1842if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {1843mthca_err(dev, "RQ %06x full (%u head, %u tail,"1844" %d max, %d nreq)\n", qp->qpn,1845qp->rq.head, qp->rq.tail,1846qp->rq.max, nreq);1847err = -ENOMEM;1848*bad_wr = wr;1849goto out;1850}18511852wqe = get_recv_wqe(qp, ind);1853prev_wqe = qp->rq.last;1854qp->rq.last = wqe;18551856((struct mthca_next_seg *) wqe)->ee_nds =1857cpu_to_be32(MTHCA_NEXT_DBD);1858((struct mthca_next_seg *) wqe)->flags = 0;18591860wqe += sizeof (struct mthca_next_seg);1861size = sizeof (struct mthca_next_seg) / 16;18621863if (unlikely(wr->num_sge > qp->rq.max_gs)) {1864err = -EINVAL;1865*bad_wr = wr;1866goto out;1867}18681869for (i = 0; i < wr->num_sge; ++i) {1870mthca_set_data_seg(wqe, wr->sg_list + i);1871wqe += sizeof (struct mthca_data_seg);1872size += sizeof (struct mthca_data_seg) / 16;1873}18741875qp->wrid[ind] = wr->wr_id;18761877((struct mthca_next_seg *) prev_wqe)->ee_nds =1878cpu_to_be32(MTHCA_NEXT_DBD | size);18791880if (!nreq)1881size0 = size;18821883++ind;1884if (unlikely(ind >= qp->rq.max))1885ind -= qp->rq.max;18861887++nreq;1888if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {1889nreq = 0;18901891wmb();18921893mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,1894qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL,1895MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));18961897qp->rq.next_ind = ind;1898qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;1899}1900}19011902out:1903if (likely(nreq)) {1904wmb();19051906mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,1907qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL,1908MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));1909}19101911qp->rq.next_ind = ind;1912qp->rq.head += nreq;19131914/*1915* Make sure doorbells don't leak out of RQ spinlock and reach1916* the HCA out of order:1917*/1918mmiowb();19191920spin_unlock_irqrestore(&qp->rq.lock, flags);1921return err;1922}19231924int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,1925struct ib_send_wr **bad_wr)1926{1927struct mthca_dev *dev = to_mdev(ibqp->device);1928struct mthca_qp *qp = to_mqp(ibqp);1929u32 dbhi;1930void *wqe;1931void *prev_wqe;1932unsigned long flags;1933int err = 0;1934int nreq;1935int i;1936int size;1937/*1938* f0 and size0 are only used if nreq != 0, and they will1939* always be initialized the first time through the main loop1940* before nreq is incremented. So nreq cannot become non-zero1941* without initializing f0 and size0, and they are in fact1942* never used uninitialized.1943*/1944int uninitialized_var(size0);1945u32 uninitialized_var(f0);1946int ind;1947u8 op0 = 0;19481949spin_lock_irqsave(&qp->sq.lock, flags);19501951/* XXX check that state is OK to post send */19521953ind = qp->sq.head & (qp->sq.max - 1);19541955for (nreq = 0; wr; ++nreq, wr = wr->next) {1956if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {1957nreq = 0;19581959dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |1960((qp->sq.head & 0xffff) << 8) | f0 | op0;19611962qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;19631964/*1965* Make sure that descriptors are written before1966* doorbell record.1967*/1968wmb();1969*qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);19701971/*1972* Make sure doorbell record is written before we1973* write MMIO send doorbell.1974*/1975wmb();19761977mthca_write64(dbhi, (qp->qpn << 8) | size0,1978dev->kar + MTHCA_SEND_DOORBELL,1979MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));1980}19811982if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {1983mthca_err(dev, "SQ %06x full (%u head, %u tail,"1984" %d max, %d nreq)\n", qp->qpn,1985qp->sq.head, qp->sq.tail,1986qp->sq.max, nreq);1987err = -ENOMEM;1988*bad_wr = wr;1989goto out;1990}19911992wqe = get_send_wqe(qp, ind);1993prev_wqe = qp->sq.last;1994qp->sq.last = wqe;19951996((struct mthca_next_seg *) wqe)->flags =1997((wr->send_flags & IB_SEND_SIGNALED) ?1998cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |1999((wr->send_flags & IB_SEND_SOLICITED) ?2000cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) |2001((wr->send_flags & IB_SEND_IP_CSUM) ?2002cpu_to_be32(MTHCA_NEXT_IP_CSUM | MTHCA_NEXT_TCP_UDP_CSUM) : 0) |2003cpu_to_be32(1);2004if (wr->opcode == IB_WR_SEND_WITH_IMM ||2005wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)2006((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;20072008wqe += sizeof (struct mthca_next_seg);2009size = sizeof (struct mthca_next_seg) / 16;20102011switch (qp->transport) {2012case RC:2013switch (wr->opcode) {2014case IB_WR_ATOMIC_CMP_AND_SWP:2015case IB_WR_ATOMIC_FETCH_AND_ADD:2016set_raddr_seg(wqe, wr->wr.atomic.remote_addr,2017wr->wr.atomic.rkey);2018wqe += sizeof (struct mthca_raddr_seg);20192020set_atomic_seg(wqe, wr);2021wqe += sizeof (struct mthca_atomic_seg);2022size += (sizeof (struct mthca_raddr_seg) +2023sizeof (struct mthca_atomic_seg)) / 16;2024break;20252026case IB_WR_RDMA_READ:2027case IB_WR_RDMA_WRITE:2028case IB_WR_RDMA_WRITE_WITH_IMM:2029set_raddr_seg(wqe, wr->wr.rdma.remote_addr,2030wr->wr.rdma.rkey);2031wqe += sizeof (struct mthca_raddr_seg);2032size += sizeof (struct mthca_raddr_seg) / 16;2033break;20342035default:2036/* No extra segments required for sends */2037break;2038}20392040break;20412042case UC:2043switch (wr->opcode) {2044case IB_WR_RDMA_WRITE:2045case IB_WR_RDMA_WRITE_WITH_IMM:2046set_raddr_seg(wqe, wr->wr.rdma.remote_addr,2047wr->wr.rdma.rkey);2048wqe += sizeof (struct mthca_raddr_seg);2049size += sizeof (struct mthca_raddr_seg) / 16;2050break;20512052default:2053/* No extra segments required for sends */2054break;2055}20562057break;20582059case UD:2060set_arbel_ud_seg(wqe, wr);2061wqe += sizeof (struct mthca_arbel_ud_seg);2062size += sizeof (struct mthca_arbel_ud_seg) / 16;2063break;20642065case MLX:2066err = build_mlx_header(dev, to_msqp(qp), ind, wr,2067wqe - sizeof (struct mthca_next_seg),2068wqe);2069if (err) {2070*bad_wr = wr;2071goto out;2072}2073wqe += sizeof (struct mthca_data_seg);2074size += sizeof (struct mthca_data_seg) / 16;2075break;2076}20772078if (wr->num_sge > qp->sq.max_gs) {2079mthca_err(dev, "too many gathers\n");2080err = -EINVAL;2081*bad_wr = wr;2082goto out;2083}20842085for (i = 0; i < wr->num_sge; ++i) {2086mthca_set_data_seg(wqe, wr->sg_list + i);2087wqe += sizeof (struct mthca_data_seg);2088size += sizeof (struct mthca_data_seg) / 16;2089}20902091/* Add one more inline data segment for ICRC */2092if (qp->transport == MLX) {2093((struct mthca_data_seg *) wqe)->byte_count =2094cpu_to_be32((1 << 31) | 4);2095((u32 *) wqe)[1] = 0;2096wqe += sizeof (struct mthca_data_seg);2097size += sizeof (struct mthca_data_seg) / 16;2098}20992100qp->wrid[ind + qp->rq.max] = wr->wr_id;21012102if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {2103mthca_err(dev, "opcode invalid\n");2104err = -EINVAL;2105*bad_wr = wr;2106goto out;2107}21082109((struct mthca_next_seg *) prev_wqe)->nda_op =2110cpu_to_be32(((ind << qp->sq.wqe_shift) +2111qp->send_wqe_offset) |2112mthca_opcode[wr->opcode]);2113wmb();2114((struct mthca_next_seg *) prev_wqe)->ee_nds =2115cpu_to_be32(MTHCA_NEXT_DBD | size |2116((wr->send_flags & IB_SEND_FENCE) ?2117MTHCA_NEXT_FENCE : 0));21182119if (!nreq) {2120size0 = size;2121op0 = mthca_opcode[wr->opcode];2122f0 = wr->send_flags & IB_SEND_FENCE ?2123MTHCA_SEND_DOORBELL_FENCE : 0;2124}21252126++ind;2127if (unlikely(ind >= qp->sq.max))2128ind -= qp->sq.max;2129}21302131out:2132if (likely(nreq)) {2133dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0;21342135qp->sq.head += nreq;21362137/*2138* Make sure that descriptors are written before2139* doorbell record.2140*/2141wmb();2142*qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);21432144/*2145* Make sure doorbell record is written before we2146* write MMIO send doorbell.2147*/2148wmb();21492150mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL,2151MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));2152}21532154/*2155* Make sure doorbells don't leak out of SQ spinlock and reach2156* the HCA out of order:2157*/2158mmiowb();21592160spin_unlock_irqrestore(&qp->sq.lock, flags);2161return err;2162}21632164int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,2165struct ib_recv_wr **bad_wr)2166{2167struct mthca_dev *dev = to_mdev(ibqp->device);2168struct mthca_qp *qp = to_mqp(ibqp);2169unsigned long flags;2170int err = 0;2171int nreq;2172int ind;2173int i;2174void *wqe;21752176spin_lock_irqsave(&qp->rq.lock, flags);21772178/* XXX check that state is OK to post receive */21792180ind = qp->rq.head & (qp->rq.max - 1);21812182for (nreq = 0; wr; ++nreq, wr = wr->next) {2183if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {2184mthca_err(dev, "RQ %06x full (%u head, %u tail,"2185" %d max, %d nreq)\n", qp->qpn,2186qp->rq.head, qp->rq.tail,2187qp->rq.max, nreq);2188err = -ENOMEM;2189*bad_wr = wr;2190goto out;2191}21922193wqe = get_recv_wqe(qp, ind);21942195((struct mthca_next_seg *) wqe)->flags = 0;21962197wqe += sizeof (struct mthca_next_seg);21982199if (unlikely(wr->num_sge > qp->rq.max_gs)) {2200err = -EINVAL;2201*bad_wr = wr;2202goto out;2203}22042205for (i = 0; i < wr->num_sge; ++i) {2206mthca_set_data_seg(wqe, wr->sg_list + i);2207wqe += sizeof (struct mthca_data_seg);2208}22092210if (i < qp->rq.max_gs)2211mthca_set_data_seg_inval(wqe);22122213qp->wrid[ind] = wr->wr_id;22142215++ind;2216if (unlikely(ind >= qp->rq.max))2217ind -= qp->rq.max;2218}2219out:2220if (likely(nreq)) {2221qp->rq.head += nreq;22222223/*2224* Make sure that descriptors are written before2225* doorbell record.2226*/2227wmb();2228*qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff);2229}22302231spin_unlock_irqrestore(&qp->rq.lock, flags);2232return err;2233}22342235void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,2236int index, int *dbd, __be32 *new_wqe)2237{2238struct mthca_next_seg *next;22392240/*2241* For SRQs, all receive WQEs generate a CQE, so we're always2242* at the end of the doorbell chain.2243*/2244if (qp->ibqp.srq && !is_send) {2245*new_wqe = 0;2246return;2247}22482249if (is_send)2250next = get_send_wqe(qp, index);2251else2252next = get_recv_wqe(qp, index);22532254*dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD));2255if (next->ee_nds & cpu_to_be32(0x3f))2256*new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) |2257(next->ee_nds & cpu_to_be32(0x3f));2258else2259*new_wqe = 0;2260}22612262int mthca_init_qp_table(struct mthca_dev *dev)2263{2264int err;2265u8 status;2266int i;22672268spin_lock_init(&dev->qp_table.lock);22692270/*2271* We reserve 2 extra QPs per port for the special QPs. The2272* special QP for port 1 has to be even, so round up.2273*/2274dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL;2275err = mthca_alloc_init(&dev->qp_table.alloc,2276dev->limits.num_qps,2277(1 << 24) - 1,2278dev->qp_table.sqp_start +2279MTHCA_MAX_PORTS * 2);2280if (err)2281return err;22822283err = mthca_array_init(&dev->qp_table.qp,2284dev->limits.num_qps);2285if (err) {2286mthca_alloc_cleanup(&dev->qp_table.alloc);2287return err;2288}22892290for (i = 0; i < 2; ++i) {2291err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI,2292dev->qp_table.sqp_start + i * 2,2293&status);2294if (err)2295goto err_out;2296if (status) {2297mthca_warn(dev, "CONF_SPECIAL_QP returned "2298"status %02x, aborting.\n",2299status);2300err = -EINVAL;2301goto err_out;2302}2303}2304return 0;23052306err_out:2307for (i = 0; i < 2; ++i)2308mthca_CONF_SPECIAL_QP(dev, i, 0, &status);23092310mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);2311mthca_alloc_cleanup(&dev->qp_table.alloc);23122313return err;2314}23152316void mthca_cleanup_qp_table(struct mthca_dev *dev)2317{2318int i;2319u8 status;23202321for (i = 0; i < 2; ++i)2322mthca_CONF_SPECIAL_QP(dev, i, 0, &status);23232324mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);2325mthca_alloc_cleanup(&dev->qp_table.alloc);2326}232723282329