Path: blob/master/drivers/infiniband/hw/ehca/ehca_reqs.c
15112 views
/*1* IBM eServer eHCA Infiniband device driver for Linux on POWER2*3* post_send/recv, poll_cq, req_notify4*5* Authors: Hoang-Nam Nguyen <[email protected]>6* Waleri Fomin <[email protected]>7* Joachim Fenkes <[email protected]>8* Reinhard Ernst <[email protected]>9*10* Copyright (c) 2005 IBM Corporation11*12* All rights reserved.13*14* This source code is distributed under a dual license of GPL v2.0 and OpenIB15* BSD.16*17* OpenIB BSD License18*19* Redistribution and use in source and binary forms, with or without20* modification, are permitted provided that the following conditions are met:21*22* Redistributions of source code must retain the above copyright notice, this23* list of conditions and the following disclaimer.24*25* Redistributions in binary form must reproduce the above copyright notice,26* this list of conditions and the following disclaimer in the documentation27* and/or other materials28* provided with the distribution.29*30* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"31* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE32* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE33* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE34* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR35* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF36* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR37* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER38* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)39* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE40* POSSIBILITY OF SUCH DAMAGE.41*/424344#include <asm/system.h>45#include "ehca_classes.h"46#include "ehca_tools.h"47#include "ehca_qes.h"48#include "ehca_iverbs.h"49#include "hcp_if.h"50#include "hipz_fns.h"5152/* in RC traffic, insert an empty RDMA READ every this many packets */53#define ACK_CIRC_THRESHOLD 20000005455static u64 replace_wr_id(u64 wr_id, u16 idx)56{57u64 ret;5859ret = wr_id & ~QMAP_IDX_MASK;60ret |= idx & QMAP_IDX_MASK;6162return ret;63}6465static u16 get_app_wr_id(u64 wr_id)66{67return wr_id & QMAP_IDX_MASK;68}6970static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,71struct ehca_wqe *wqe_p,72struct ib_recv_wr *recv_wr,73u32 rq_map_idx)74{75u8 cnt_ds;76if (unlikely((recv_wr->num_sge < 0) ||77(recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {78ehca_gen_err("Invalid number of WQE SGE. "79"num_sqe=%x max_nr_of_sg=%x",80recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);81return -EINVAL; /* invalid SG list length */82}8384/* clear wqe header until sglist */85memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));8687wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);88wqe_p->nr_of_data_seg = recv_wr->num_sge;8990for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {91wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =92recv_wr->sg_list[cnt_ds].addr;93wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =94recv_wr->sg_list[cnt_ds].lkey;95wqe_p->u.all_rcv.sg_list[cnt_ds].length =96recv_wr->sg_list[cnt_ds].length;97}9899if (ehca_debug_level >= 3) {100ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",101ipz_rqueue);102ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");103}104105return 0;106}107108#if defined(DEBUG_GSI_SEND_WR)109110/* need ib_mad struct */111#include <rdma/ib_mad.h>112113static void trace_send_wr_ud(const struct ib_send_wr *send_wr)114{115int idx;116int j;117while (send_wr) {118struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;119struct ib_sge *sge = send_wr->sg_list;120ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "121"send_flags=%x opcode=%x", idx, send_wr->wr_id,122send_wr->num_sge, send_wr->send_flags,123send_wr->opcode);124if (mad_hdr) {125ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x "126"mgmt_class=%x class_version=%x method=%x "127"status=%x class_specific=%x tid=%lx "128"attr_id=%x resv=%x attr_mod=%x",129idx, mad_hdr->base_version,130mad_hdr->mgmt_class,131mad_hdr->class_version, mad_hdr->method,132mad_hdr->status, mad_hdr->class_specific,133mad_hdr->tid, mad_hdr->attr_id,134mad_hdr->resv,135mad_hdr->attr_mod);136}137for (j = 0; j < send_wr->num_sge; j++) {138u8 *data = (u8 *)abs_to_virt(sge->addr);139ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "140"lkey=%x",141idx, j, data, sge->length, sge->lkey);142/* assume length is n*16 */143ehca_dmp(data, sge->length, "send_wr#%x sge#%x",144idx, j);145sge++;146} /* eof for j */147idx++;148send_wr = send_wr->next;149} /* eof while send_wr */150}151152#endif /* DEBUG_GSI_SEND_WR */153154static inline int ehca_write_swqe(struct ehca_qp *qp,155struct ehca_wqe *wqe_p,156const struct ib_send_wr *send_wr,157u32 sq_map_idx,158int hidden)159{160u32 idx;161u64 dma_length;162struct ehca_av *my_av;163u32 remote_qkey = send_wr->wr.ud.remote_qkey;164struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];165166if (unlikely((send_wr->num_sge < 0) ||167(send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {168ehca_gen_err("Invalid number of WQE SGE. "169"num_sqe=%x max_nr_of_sg=%x",170send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);171return -EINVAL; /* invalid SG list length */172}173174/* clear wqe header until sglist */175memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));176177wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);178179qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);180qmap_entry->reported = 0;181qmap_entry->cqe_req = 0;182183switch (send_wr->opcode) {184case IB_WR_SEND:185case IB_WR_SEND_WITH_IMM:186wqe_p->optype = WQE_OPTYPE_SEND;187break;188case IB_WR_RDMA_WRITE:189case IB_WR_RDMA_WRITE_WITH_IMM:190wqe_p->optype = WQE_OPTYPE_RDMAWRITE;191break;192case IB_WR_RDMA_READ:193wqe_p->optype = WQE_OPTYPE_RDMAREAD;194break;195default:196ehca_gen_err("Invalid opcode=%x", send_wr->opcode);197return -EINVAL; /* invalid opcode */198}199200wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;201202wqe_p->wr_flag = 0;203204if ((send_wr->send_flags & IB_SEND_SIGNALED ||205qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)206&& !hidden) {207wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;208qmap_entry->cqe_req = 1;209}210211if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||212send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {213/* this might not work as long as HW does not support it */214wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);215wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;216}217218wqe_p->nr_of_data_seg = send_wr->num_sge;219220switch (qp->qp_type) {221case IB_QPT_SMI:222case IB_QPT_GSI:223/* no break is intential here */224case IB_QPT_UD:225/* IB 1.2 spec C10-15 compliance */226if (send_wr->wr.ud.remote_qkey & 0x80000000)227remote_qkey = qp->qkey;228229wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;230wqe_p->local_ee_context_qkey = remote_qkey;231if (unlikely(!send_wr->wr.ud.ah)) {232ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);233return -EINVAL;234}235if (unlikely(send_wr->wr.ud.remote_qpn == 0)) {236ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);237return -EINVAL;238}239my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);240wqe_p->u.ud_av.ud_av = my_av->av;241242/*243* omitted check of IB_SEND_INLINE244* since HW does not support it245*/246for (idx = 0; idx < send_wr->num_sge; idx++) {247wqe_p->u.ud_av.sg_list[idx].vaddr =248send_wr->sg_list[idx].addr;249wqe_p->u.ud_av.sg_list[idx].lkey =250send_wr->sg_list[idx].lkey;251wqe_p->u.ud_av.sg_list[idx].length =252send_wr->sg_list[idx].length;253} /* eof for idx */254if (qp->qp_type == IB_QPT_SMI ||255qp->qp_type == IB_QPT_GSI)256wqe_p->u.ud_av.ud_av.pmtu = 1;257if (qp->qp_type == IB_QPT_GSI) {258wqe_p->pkeyi = send_wr->wr.ud.pkey_index;259#ifdef DEBUG_GSI_SEND_WR260trace_send_wr_ud(send_wr);261#endif /* DEBUG_GSI_SEND_WR */262}263break;264265case IB_QPT_UC:266if (send_wr->send_flags & IB_SEND_FENCE)267wqe_p->wr_flag |= WQE_WRFLAG_FENCE;268/* no break is intentional here */269case IB_QPT_RC:270/* TODO: atomic not implemented */271wqe_p->u.nud.remote_virtual_address =272send_wr->wr.rdma.remote_addr;273wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;274275/*276* omitted checking of IB_SEND_INLINE277* since HW does not support it278*/279dma_length = 0;280for (idx = 0; idx < send_wr->num_sge; idx++) {281wqe_p->u.nud.sg_list[idx].vaddr =282send_wr->sg_list[idx].addr;283wqe_p->u.nud.sg_list[idx].lkey =284send_wr->sg_list[idx].lkey;285wqe_p->u.nud.sg_list[idx].length =286send_wr->sg_list[idx].length;287dma_length += send_wr->sg_list[idx].length;288} /* eof idx */289wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;290291/* unsolicited ack circumvention */292if (send_wr->opcode == IB_WR_RDMA_READ) {293/* on RDMA read, switch on and reset counters */294qp->message_count = qp->packet_count = 0;295qp->unsol_ack_circ = 1;296} else297/* else estimate #packets */298qp->packet_count += (dma_length >> qp->mtu_shift) + 1;299300break;301302default:303ehca_gen_err("Invalid qptype=%x", qp->qp_type);304return -EINVAL;305}306307if (ehca_debug_level >= 3) {308ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);309ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");310}311return 0;312}313314/* map_ib_wc_status converts raw cqe_status to ib_wc_status */315static inline void map_ib_wc_status(u32 cqe_status,316enum ib_wc_status *wc_status)317{318if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {319switch (cqe_status & 0x3F) {320case 0x01:321case 0x21:322*wc_status = IB_WC_LOC_LEN_ERR;323break;324case 0x02:325case 0x22:326*wc_status = IB_WC_LOC_QP_OP_ERR;327break;328case 0x03:329case 0x23:330*wc_status = IB_WC_LOC_EEC_OP_ERR;331break;332case 0x04:333case 0x24:334*wc_status = IB_WC_LOC_PROT_ERR;335break;336case 0x05:337case 0x25:338*wc_status = IB_WC_WR_FLUSH_ERR;339break;340case 0x06:341*wc_status = IB_WC_MW_BIND_ERR;342break;343case 0x07: /* remote error - look into bits 20:24 */344switch ((cqe_status345& WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {346case 0x0:347/*348* PSN Sequence Error!349* couldn't find a matching status!350*/351*wc_status = IB_WC_GENERAL_ERR;352break;353case 0x1:354*wc_status = IB_WC_REM_INV_REQ_ERR;355break;356case 0x2:357*wc_status = IB_WC_REM_ACCESS_ERR;358break;359case 0x3:360*wc_status = IB_WC_REM_OP_ERR;361break;362case 0x4:363*wc_status = IB_WC_REM_INV_RD_REQ_ERR;364break;365}366break;367case 0x08:368*wc_status = IB_WC_RETRY_EXC_ERR;369break;370case 0x09:371*wc_status = IB_WC_RNR_RETRY_EXC_ERR;372break;373case 0x0A:374case 0x2D:375*wc_status = IB_WC_REM_ABORT_ERR;376break;377case 0x0B:378case 0x2E:379*wc_status = IB_WC_INV_EECN_ERR;380break;381case 0x0C:382case 0x2F:383*wc_status = IB_WC_INV_EEC_STATE_ERR;384break;385case 0x0D:386*wc_status = IB_WC_BAD_RESP_ERR;387break;388case 0x10:389/* WQE purged */390*wc_status = IB_WC_WR_FLUSH_ERR;391break;392default:393*wc_status = IB_WC_FATAL_ERR;394395}396} else397*wc_status = IB_WC_SUCCESS;398}399400static inline int post_one_send(struct ehca_qp *my_qp,401struct ib_send_wr *cur_send_wr,402int hidden)403{404struct ehca_wqe *wqe_p;405int ret;406u32 sq_map_idx;407u64 start_offset = my_qp->ipz_squeue.current_q_offset;408409/* get pointer next to free WQE */410wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);411if (unlikely(!wqe_p)) {412/* too many posted work requests: queue overflow */413ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "414"qp_num=%x", my_qp->ib_qp.qp_num);415return -ENOMEM;416}417418/*419* Get the index of the WQE in the send queue. The same index is used420* for writing into the sq_map.421*/422sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size;423424/* write a SEND WQE into the QUEUE */425ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden);426/*427* if something failed,428* reset the free entry pointer to the start value429*/430if (unlikely(ret)) {431my_qp->ipz_squeue.current_q_offset = start_offset;432ehca_err(my_qp->ib_qp.device, "Could not write WQE "433"qp_num=%x", my_qp->ib_qp.qp_num);434return -EINVAL;435}436437return 0;438}439440int ehca_post_send(struct ib_qp *qp,441struct ib_send_wr *send_wr,442struct ib_send_wr **bad_send_wr)443{444struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);445int wqe_cnt = 0;446int ret = 0;447unsigned long flags;448449/* Reject WR if QP is in RESET, INIT or RTR state */450if (unlikely(my_qp->state < IB_QPS_RTS)) {451ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",452my_qp->state, qp->qp_num);453ret = -EINVAL;454goto out;455}456457/* LOCK the QUEUE */458spin_lock_irqsave(&my_qp->spinlock_s, flags);459460/* Send an empty extra RDMA read if:461* 1) there has been an RDMA read on this connection before462* 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets463* 3) we can be sure that any previous extra RDMA read has been464* processed so we don't overflow the SQ465*/466if (unlikely(my_qp->unsol_ack_circ &&467my_qp->packet_count > ACK_CIRC_THRESHOLD &&468my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {469/* insert an empty RDMA READ to fix up the remote QP state */470struct ib_send_wr circ_wr;471memset(&circ_wr, 0, sizeof(circ_wr));472circ_wr.opcode = IB_WR_RDMA_READ;473post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */474wqe_cnt++;475ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);476my_qp->message_count = my_qp->packet_count = 0;477}478479/* loop processes list of send reqs */480while (send_wr) {481ret = post_one_send(my_qp, send_wr, 0);482if (unlikely(ret)) {483goto post_send_exit0;484}485wqe_cnt++;486send_wr = send_wr->next;487}488489post_send_exit0:490iosync(); /* serialize GAL register access */491hipz_update_sqa(my_qp, wqe_cnt);492if (unlikely(ret || ehca_debug_level >= 2))493ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",494my_qp, qp->qp_num, wqe_cnt, ret);495my_qp->message_count += wqe_cnt;496spin_unlock_irqrestore(&my_qp->spinlock_s, flags);497498out:499if (ret)500*bad_send_wr = send_wr;501return ret;502}503504static int internal_post_recv(struct ehca_qp *my_qp,505struct ib_device *dev,506struct ib_recv_wr *recv_wr,507struct ib_recv_wr **bad_recv_wr)508{509struct ehca_wqe *wqe_p;510int wqe_cnt = 0;511int ret = 0;512u32 rq_map_idx;513unsigned long flags;514struct ehca_qmap_entry *qmap_entry;515516if (unlikely(!HAS_RQ(my_qp))) {517ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",518my_qp, my_qp->real_qp_num, my_qp->ext_type);519ret = -ENODEV;520goto out;521}522523/* LOCK the QUEUE */524spin_lock_irqsave(&my_qp->spinlock_r, flags);525526/* loop processes list of recv reqs */527while (recv_wr) {528u64 start_offset = my_qp->ipz_rqueue.current_q_offset;529/* get pointer next to free WQE */530wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);531if (unlikely(!wqe_p)) {532/* too many posted work requests: queue overflow */533ret = -ENOMEM;534ehca_err(dev, "Too many posted WQEs "535"qp_num=%x", my_qp->real_qp_num);536goto post_recv_exit0;537}538/*539* Get the index of the WQE in the recv queue. The same index540* is used for writing into the rq_map.541*/542rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;543544/* write a RECV WQE into the QUEUE */545ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,546rq_map_idx);547/*548* if something failed,549* reset the free entry pointer to the start value550*/551if (unlikely(ret)) {552my_qp->ipz_rqueue.current_q_offset = start_offset;553ret = -EINVAL;554ehca_err(dev, "Could not write WQE "555"qp_num=%x", my_qp->real_qp_num);556goto post_recv_exit0;557}558559qmap_entry = &my_qp->rq_map.map[rq_map_idx];560qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);561qmap_entry->reported = 0;562qmap_entry->cqe_req = 1;563564wqe_cnt++;565recv_wr = recv_wr->next;566} /* eof for recv_wr */567568post_recv_exit0:569iosync(); /* serialize GAL register access */570hipz_update_rqa(my_qp, wqe_cnt);571if (unlikely(ret || ehca_debug_level >= 2))572ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",573my_qp, my_qp->real_qp_num, wqe_cnt, ret);574spin_unlock_irqrestore(&my_qp->spinlock_r, flags);575576out:577if (ret)578*bad_recv_wr = recv_wr;579580return ret;581}582583int ehca_post_recv(struct ib_qp *qp,584struct ib_recv_wr *recv_wr,585struct ib_recv_wr **bad_recv_wr)586{587struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);588589/* Reject WR if QP is in RESET state */590if (unlikely(my_qp->state == IB_QPS_RESET)) {591ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",592my_qp->state, qp->qp_num);593*bad_recv_wr = recv_wr;594return -EINVAL;595}596597return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr);598}599600int ehca_post_srq_recv(struct ib_srq *srq,601struct ib_recv_wr *recv_wr,602struct ib_recv_wr **bad_recv_wr)603{604return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),605srq->device, recv_wr, bad_recv_wr);606}607608/*609* ib_wc_opcode table converts ehca wc opcode to ib610* Since we use zero to indicate invalid opcode, the actual ib opcode must611* be decremented!!!612*/613static const u8 ib_wc_opcode[255] = {614[0x01] = IB_WC_RECV+1,615[0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,616[0x04] = IB_WC_BIND_MW+1,617[0x08] = IB_WC_FETCH_ADD+1,618[0x10] = IB_WC_COMP_SWAP+1,619[0x20] = IB_WC_RDMA_WRITE+1,620[0x40] = IB_WC_RDMA_READ+1,621[0x80] = IB_WC_SEND+1622};623624/* internal function to poll one entry of cq */625static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)626{627int ret = 0, qmap_tail_idx;628struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);629struct ehca_cqe *cqe;630struct ehca_qp *my_qp;631struct ehca_qmap_entry *qmap_entry;632struct ehca_queue_map *qmap;633int cqe_count = 0, is_error;634635repoll:636cqe = (struct ehca_cqe *)637ipz_qeit_get_inc_valid(&my_cq->ipz_queue);638if (!cqe) {639ret = -EAGAIN;640if (ehca_debug_level >= 3)641ehca_dbg(cq->device, "Completion queue is empty "642"my_cq=%p cq_num=%x", my_cq, my_cq->cq_number);643goto poll_cq_one_exit0;644}645646/* prevents loads being reordered across this point */647rmb();648649cqe_count++;650if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {651struct ehca_qp *qp;652int purgeflag;653unsigned long flags;654655qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number);656if (!qp) {657ehca_err(cq->device, "cq_num=%x qp_num=%x "658"could not find qp -> ignore cqe",659my_cq->cq_number, cqe->local_qp_number);660ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",661my_cq->cq_number, cqe->local_qp_number);662/* ignore this purged cqe */663goto repoll;664}665spin_lock_irqsave(&qp->spinlock_s, flags);666purgeflag = qp->sqerr_purgeflag;667spin_unlock_irqrestore(&qp->spinlock_s, flags);668669if (purgeflag) {670ehca_dbg(cq->device,671"Got CQE with purged bit qp_num=%x src_qp=%x",672cqe->local_qp_number, cqe->remote_qp_number);673if (ehca_debug_level >= 2)674ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",675cqe->local_qp_number,676cqe->remote_qp_number);677/*678* ignore this to avoid double cqes of bad wqe679* that caused sqe and turn off purge flag680*/681qp->sqerr_purgeflag = 0;682goto repoll;683}684}685686is_error = cqe->status & WC_STATUS_ERROR_BIT;687688/* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */689if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) {690ehca_dbg(cq->device,691"Received %sCOMPLETION ehca_cq=%p cq_num=%x -----",692is_error ? "ERROR " : "", my_cq, my_cq->cq_number);693ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",694my_cq, my_cq->cq_number);695ehca_dbg(cq->device,696"ehca_cq=%p cq_num=%x -------------------------",697my_cq, my_cq->cq_number);698}699700read_lock(&ehca_qp_idr_lock);701my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);702read_unlock(&ehca_qp_idr_lock);703if (!my_qp)704goto repoll;705wc->qp = &my_qp->ib_qp;706707qmap_tail_idx = get_app_wr_id(cqe->work_request_id);708if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))709/* We got a send completion. */710qmap = &my_qp->sq_map;711else712/* We got a receive completion. */713qmap = &my_qp->rq_map;714715/* advance the tail pointer */716qmap->tail = qmap_tail_idx;717718if (is_error) {719/*720* set left_to_poll to 0 because in error state, we will not721* get any additional CQEs722*/723my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,724my_qp->sq_map.entries);725my_qp->sq_map.left_to_poll = 0;726ehca_add_to_err_list(my_qp, 1);727728my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,729my_qp->rq_map.entries);730my_qp->rq_map.left_to_poll = 0;731if (HAS_RQ(my_qp))732ehca_add_to_err_list(my_qp, 0);733}734735qmap_entry = &qmap->map[qmap_tail_idx];736if (qmap_entry->reported) {737ehca_warn(cq->device, "Double cqe on qp_num=%#x",738my_qp->real_qp_num);739/* found a double cqe, discard it and read next one */740goto repoll;741}742743wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);744qmap_entry->reported = 1;745746/* if left_to_poll is decremented to 0, add the QP to the error list */747if (qmap->left_to_poll > 0) {748qmap->left_to_poll--;749if ((my_qp->sq_map.left_to_poll == 0) &&750(my_qp->rq_map.left_to_poll == 0)) {751ehca_add_to_err_list(my_qp, 1);752if (HAS_RQ(my_qp))753ehca_add_to_err_list(my_qp, 0);754}755}756757/* eval ib_wc_opcode */758wc->opcode = ib_wc_opcode[cqe->optype]-1;759if (unlikely(wc->opcode == -1)) {760ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "761"ehca_cq=%p cq_num=%x",762cqe->optype, cqe->status, my_cq, my_cq->cq_number);763/* dump cqe for other infos */764ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",765my_cq, my_cq->cq_number);766/* update also queue adder to throw away this entry!!! */767goto repoll;768}769770/* eval ib_wc_status */771if (unlikely(is_error)) {772/* complete with errors */773map_ib_wc_status(cqe->status, &wc->status);774wc->vendor_err = wc->status;775} else776wc->status = IB_WC_SUCCESS;777778wc->byte_len = cqe->nr_bytes_transferred;779wc->pkey_index = cqe->pkey_index;780wc->slid = cqe->rlid;781wc->dlid_path_bits = cqe->dlid;782wc->src_qp = cqe->remote_qp_number;783/*784* HW has "Immed data present" and "GRH present" in bits 6 and 5.785* SW defines those in bits 1 and 0, so we can just shift and mask.786*/787wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;788wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);789wc->sl = cqe->service_level;790791poll_cq_one_exit0:792if (cqe_count > 0)793hipz_update_feca(my_cq, cqe_count);794795return ret;796}797798static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,799struct ib_wc *wc, int num_entries,800struct ipz_queue *ipz_queue, int on_sq)801{802int nr = 0;803struct ehca_wqe *wqe;804u64 offset;805struct ehca_queue_map *qmap;806struct ehca_qmap_entry *qmap_entry;807808if (on_sq)809qmap = &my_qp->sq_map;810else811qmap = &my_qp->rq_map;812813qmap_entry = &qmap->map[qmap->next_wqe_idx];814815while ((nr < num_entries) && (qmap_entry->reported == 0)) {816/* generate flush CQE */817818memset(wc, 0, sizeof(*wc));819820offset = qmap->next_wqe_idx * ipz_queue->qe_size;821wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);822if (!wqe) {823ehca_err(cq->device, "Invalid wqe offset=%#llx on "824"qp_num=%#x", offset, my_qp->real_qp_num);825return nr;826}827828wc->wr_id = replace_wr_id(wqe->work_request_id,829qmap_entry->app_wr_id);830831if (on_sq) {832switch (wqe->optype) {833case WQE_OPTYPE_SEND:834wc->opcode = IB_WC_SEND;835break;836case WQE_OPTYPE_RDMAWRITE:837wc->opcode = IB_WC_RDMA_WRITE;838break;839case WQE_OPTYPE_RDMAREAD:840wc->opcode = IB_WC_RDMA_READ;841break;842default:843ehca_err(cq->device, "Invalid optype=%x",844wqe->optype);845return nr;846}847} else848wc->opcode = IB_WC_RECV;849850if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {851wc->ex.imm_data = wqe->immediate_data;852wc->wc_flags |= IB_WC_WITH_IMM;853}854855wc->status = IB_WC_WR_FLUSH_ERR;856857wc->qp = &my_qp->ib_qp;858859/* mark as reported and advance next_wqe pointer */860qmap_entry->reported = 1;861qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,862qmap->entries);863qmap_entry = &qmap->map[qmap->next_wqe_idx];864865wc++; nr++;866}867868return nr;869870}871872int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)873{874struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);875int nr;876struct ehca_qp *err_qp;877struct ib_wc *current_wc = wc;878int ret = 0;879unsigned long flags;880int entries_left = num_entries;881882if (num_entries < 1) {883ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "884"cq_num=%x", num_entries, my_cq, my_cq->cq_number);885ret = -EINVAL;886goto poll_cq_exit0;887}888889spin_lock_irqsave(&my_cq->spinlock, flags);890891/* generate flush cqes for send queues */892list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {893nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,894&err_qp->ipz_squeue, 1);895entries_left -= nr;896current_wc += nr;897898if (entries_left == 0)899break;900}901902/* generate flush cqes for receive queues */903list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {904nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,905&err_qp->ipz_rqueue, 0);906entries_left -= nr;907current_wc += nr;908909if (entries_left == 0)910break;911}912913for (nr = 0; nr < entries_left; nr++) {914ret = ehca_poll_cq_one(cq, current_wc);915if (ret)916break;917current_wc++;918} /* eof for nr */919entries_left -= nr;920921spin_unlock_irqrestore(&my_cq->spinlock, flags);922if (ret == -EAGAIN || !ret)923ret = num_entries - entries_left;924925poll_cq_exit0:926return ret;927}928929int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)930{931struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);932int ret = 0;933934switch (notify_flags & IB_CQ_SOLICITED_MASK) {935case IB_CQ_SOLICITED:936hipz_set_cqx_n0(my_cq, 1);937break;938case IB_CQ_NEXT_COMP:939hipz_set_cqx_n1(my_cq, 1);940break;941default:942return -EINVAL;943}944945if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {946unsigned long spl_flags;947spin_lock_irqsave(&my_cq->spinlock, spl_flags);948ret = ipz_qeit_is_valid(&my_cq->ipz_queue);949spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);950}951952return ret;953}954955956