Path: blob/main/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c
39566 views
/*-1* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.02*3* Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved.4*5* This software is available to you under a choice of one of two6* licenses. You may choose to be licensed under the terms of the GNU7* General Public License (GPL) Version 2, available from the file8* COPYING in the main directory of this source tree, or the9* OpenIB.org BSD license below:10*11* Redistribution and use in source and binary forms, with or12* without modification, are permitted provided that the following13* conditions are met:14*15* - Redistributions of source code must retain the above16* copyright notice, this list of conditions and the following17* disclaimer.18*19* - Redistributions in binary form must reproduce the above20* copyright notice, this list of conditions and the following21* disclaimer in the documentation and/or other materials22* provided with the distribution.23*24* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,25* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF26* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND27* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS28* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN29* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN30* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE31* SOFTWARE.32*/33#include "sdp.h"3435#define sdp_cnt(var) do { (var)++; } while (0)3637SDP_MODPARAM_SINT(sdp_keepalive_probes_sent, 0,38"Total number of keepalive probes sent.");3940static int sdp_process_tx_cq(struct sdp_sock *ssk);41static void sdp_poll_tx_timeout(void *data);4243int44sdp_xmit_poll(struct sdp_sock *ssk, int force)45{46int wc_processed = 0;4748SDP_WLOCK_ASSERT(ssk);49sdp_prf(ssk->socket, NULL, "%s", __func__);5051/* If we don't have a pending timer, set one up to catch our recent52post in case the interface becomes idle */53if (!callout_pending(&ssk->tx_ring.timer))54callout_reset(&ssk->tx_ring.timer, SDP_TX_POLL_TIMEOUT,55sdp_poll_tx_timeout, ssk);5657/* Poll the CQ every SDP_TX_POLL_MODER packets */58if (force || (++ssk->tx_ring.poll_cnt & (SDP_TX_POLL_MODER - 1)) == 0)59wc_processed = sdp_process_tx_cq(ssk);6061return wc_processed;62}6364void65sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb)66{67struct sdp_buf *tx_req;68struct sdp_bsdh *h;69unsigned long mseq;70struct ib_device *dev;71const struct ib_send_wr *bad_wr;72struct ib_sge ibsge[SDP_MAX_SEND_SGES];73struct ib_sge *sge;74struct ib_send_wr tx_wr = { NULL };75int i, rc;76u64 addr;7778SDPSTATS_COUNTER_MID_INC(post_send, h->mid);79SDPSTATS_HIST(send_size, mb->len);8081if (!ssk->qp_active) {82m_freem(mb);83return;84}8586mseq = ring_head(ssk->tx_ring);87h = mtod(mb, struct sdp_bsdh *);88ssk->tx_packets++;89ssk->tx_bytes += mb->m_pkthdr.len;9091#ifdef SDP_ZCOPY92if (unlikely(h->mid == SDP_MID_SRCAVAIL)) {93struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(mb);94if (ssk->tx_sa != tx_sa) {95sdp_dbg_data(ssk->socket, "SrcAvail cancelled "96"before being sent!\n");97WARN_ON(1);98m_freem(mb);99return;100}101TX_SRCAVAIL_STATE(mb)->mseq = mseq;102}103#endif104105if (unlikely(mb->m_flags & M_URG))106h->flags = SDP_OOB_PRES | SDP_OOB_PEND;107else108h->flags = 0;109110mb->m_flags |= M_RDONLY; /* Don't allow compression once sent. */111h->bufs = htons(rx_ring_posted(ssk));112h->len = htonl(mb->m_pkthdr.len);113h->mseq = htonl(mseq);114h->mseq_ack = htonl(mseq_ack(ssk));115116sdp_prf1(ssk->socket, mb, "TX: %s bufs: %d mseq:%ld ack:%d",117mid2str(h->mid), rx_ring_posted(ssk), mseq,118ntohl(h->mseq_ack));119120SDP_DUMP_PACKET(ssk->socket, "TX", mb, h);121122tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)];123tx_req->mb = mb;124dev = ssk->ib_device;125sge = &ibsge[0];126for (i = 0; mb != NULL; i++, mb = mb->m_next, sge++) {127addr = ib_dma_map_single(dev, mb->m_data, mb->m_len,128DMA_TO_DEVICE);129/* TODO: proper error handling */130BUG_ON(ib_dma_mapping_error(dev, addr));131BUG_ON(i >= SDP_MAX_SEND_SGES);132tx_req->mapping[i] = addr;133sge->addr = addr;134sge->length = mb->m_len;135sge->lkey = ssk->sdp_dev->pd->local_dma_lkey;136}137tx_wr.next = NULL;138tx_wr.wr_id = mseq | SDP_OP_SEND;139tx_wr.sg_list = ibsge;140tx_wr.num_sge = i;141tx_wr.opcode = IB_WR_SEND;142tx_wr.send_flags = IB_SEND_SIGNALED;143if (unlikely(tx_req->mb->m_flags & M_URG))144tx_wr.send_flags |= IB_SEND_SOLICITED;145146rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr);147if (unlikely(rc)) {148sdp_dbg(ssk->socket,149"ib_post_send failed with status %d.\n", rc);150151sdp_cleanup_sdp_buf(ssk, tx_req, DMA_TO_DEVICE);152153sdp_notify(ssk, ECONNRESET);154m_freem(tx_req->mb);155return;156}157158atomic_inc(&ssk->tx_ring.head);159atomic_dec(&ssk->tx_ring.credits);160atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));161162return;163}164165static struct mbuf *166sdp_send_completion(struct sdp_sock *ssk, int mseq)167{168struct ib_device *dev;169struct sdp_buf *tx_req;170struct mbuf *mb = NULL;171struct sdp_tx_ring *tx_ring = &ssk->tx_ring;172173if (unlikely(mseq != ring_tail(*tx_ring))) {174printk(KERN_WARNING "Bogus send completion id %d tail %d\n",175mseq, ring_tail(*tx_ring));176goto out;177}178179dev = ssk->ib_device;180tx_req = &tx_ring->buffer[mseq & (SDP_TX_SIZE - 1)];181mb = tx_req->mb;182sdp_cleanup_sdp_buf(ssk, tx_req, DMA_TO_DEVICE);183184#ifdef SDP_ZCOPY185/* TODO: AIO and real zcopy code; add their context support here */186if (BZCOPY_STATE(mb))187BZCOPY_STATE(mb)->busy--;188#endif189190atomic_inc(&tx_ring->tail);191192out:193return mb;194}195196static int197sdp_handle_send_comp(struct sdp_sock *ssk, struct ib_wc *wc)198{199struct mbuf *mb = NULL;200struct sdp_bsdh *h;201202if (unlikely(wc->status)) {203if (wc->status != IB_WC_WR_FLUSH_ERR) {204sdp_prf(ssk->socket, mb, "Send completion with error. "205"Status %d", wc->status);206sdp_dbg_data(ssk->socket, "Send completion with error. "207"Status %d\n", wc->status);208sdp_notify(ssk, ECONNRESET);209}210}211212mb = sdp_send_completion(ssk, wc->wr_id);213if (unlikely(!mb))214return -1;215216h = mtod(mb, struct sdp_bsdh *);217sdp_prf1(ssk->socket, mb, "tx completion. mseq:%d", ntohl(h->mseq));218sdp_dbg(ssk->socket, "tx completion. %p %d mseq:%d",219mb, mb->m_pkthdr.len, ntohl(h->mseq));220m_freem(mb);221222return 0;223}224225static inline void226sdp_process_tx_wc(struct sdp_sock *ssk, struct ib_wc *wc)227{228229if (likely(wc->wr_id & SDP_OP_SEND)) {230sdp_handle_send_comp(ssk, wc);231return;232}233234#ifdef SDP_ZCOPY235if (wc->wr_id & SDP_OP_RDMA) {236/* TODO: handle failed RDMA read cqe */237238sdp_dbg_data(ssk->socket,239"TX comp: RDMA read. status: %d\n", wc->status);240sdp_prf1(sk, NULL, "TX comp: RDMA read");241242if (!ssk->tx_ring.rdma_inflight) {243sdp_warn(ssk->socket, "ERROR: unexpected RDMA read\n");244return;245}246247if (!ssk->tx_ring.rdma_inflight->busy) {248sdp_warn(ssk->socket,249"ERROR: too many RDMA read completions\n");250return;251}252253/* Only last RDMA read WR is signalled. Order is guaranteed -254* therefore if Last RDMA read WR is completed - all other255* have, too */256ssk->tx_ring.rdma_inflight->busy = 0;257sowwakeup(ssk->socket);258sdp_dbg_data(ssk->socket, "woke up sleepers\n");259return;260}261#endif262263/* Keepalive probe sent cleanup */264sdp_cnt(sdp_keepalive_probes_sent);265266if (likely(!wc->status))267return;268269sdp_dbg(ssk->socket, " %s consumes KEEPALIVE status %d\n",270__func__, wc->status);271272if (wc->status == IB_WC_WR_FLUSH_ERR)273return;274275sdp_notify(ssk, ECONNRESET);276}277278static int279sdp_process_tx_cq(struct sdp_sock *ssk)280{281struct ib_wc ibwc[SDP_NUM_WC];282int n, i;283int wc_processed = 0;284285SDP_WLOCK_ASSERT(ssk);286287if (!ssk->tx_ring.cq) {288sdp_dbg(ssk->socket, "tx irq on destroyed tx_cq\n");289return 0;290}291292do {293n = ib_poll_cq(ssk->tx_ring.cq, SDP_NUM_WC, ibwc);294for (i = 0; i < n; ++i) {295sdp_process_tx_wc(ssk, ibwc + i);296wc_processed++;297}298} while (n == SDP_NUM_WC);299300if (wc_processed) {301sdp_post_sends(ssk, M_NOWAIT);302sdp_prf1(sk, NULL, "Waking sendmsg. inflight=%d",303(u32) tx_ring_posted(ssk));304sowwakeup(ssk->socket);305}306307return wc_processed;308}309310static void311sdp_poll_tx(struct sdp_sock *ssk)312{313struct socket *sk = ssk->socket;314u32 inflight, wc_processed;315316sdp_prf1(ssk->socket, NULL, "TX timeout: inflight=%d, head=%d tail=%d",317(u32) tx_ring_posted(ssk),318ring_head(ssk->tx_ring), ring_tail(ssk->tx_ring));319320if (unlikely(ssk->state == TCPS_CLOSED)) {321sdp_warn(sk, "Socket is closed\n");322goto out;323}324325wc_processed = sdp_process_tx_cq(ssk);326if (!wc_processed)327SDPSTATS_COUNTER_INC(tx_poll_miss);328else329SDPSTATS_COUNTER_INC(tx_poll_hit);330331inflight = (u32) tx_ring_posted(ssk);332sdp_prf1(ssk->socket, NULL, "finished tx processing. inflight = %d",333inflight);334335/* If there are still packets in flight and the timer has not already336* been scheduled by the Tx routine then schedule it here to guarantee337* completion processing of these packets */338if (inflight)339callout_reset(&ssk->tx_ring.timer, SDP_TX_POLL_TIMEOUT,340sdp_poll_tx_timeout, ssk);341out:342#ifdef SDP_ZCOPY343if (ssk->tx_ring.rdma_inflight && ssk->tx_ring.rdma_inflight->busy) {344sdp_prf1(sk, NULL, "RDMA is inflight - arming irq");345sdp_arm_tx_cq(ssk);346}347#endif348return;349}350351static void352sdp_poll_tx_timeout(void *data)353{354struct sdp_sock *ssk = (struct sdp_sock *)data;355356if (!callout_active(&ssk->tx_ring.timer))357return;358callout_deactivate(&ssk->tx_ring.timer);359sdp_poll_tx(ssk);360}361362static void363sdp_tx_irq(struct ib_cq *cq, void *cq_context)364{365struct sdp_sock *ssk;366367ssk = cq_context;368sdp_prf1(ssk->socket, NULL, "tx irq");369sdp_dbg_data(ssk->socket, "Got tx comp interrupt\n");370SDPSTATS_COUNTER_INC(tx_int_count);371SDP_WLOCK(ssk);372sdp_poll_tx(ssk);373SDP_WUNLOCK(ssk);374}375376static377void sdp_tx_ring_purge(struct sdp_sock *ssk)378{379while (tx_ring_posted(ssk)) {380struct mbuf *mb;381mb = sdp_send_completion(ssk, ring_tail(ssk->tx_ring));382if (!mb)383break;384m_freem(mb);385}386}387388void389sdp_post_keepalive(struct sdp_sock *ssk)390{391int rc;392struct ib_send_wr wr;393const struct ib_send_wr *bad_wr;394395sdp_dbg(ssk->socket, "%s\n", __func__);396397memset(&wr, 0, sizeof(wr));398399wr.next = NULL;400wr.wr_id = 0;401wr.sg_list = NULL;402wr.num_sge = 0;403wr.opcode = IB_WR_RDMA_WRITE;404405rc = ib_post_send(ssk->qp, &wr, &bad_wr);406if (rc) {407sdp_dbg(ssk->socket,408"ib_post_keepalive failed with status %d.\n", rc);409sdp_notify(ssk, ECONNRESET);410}411412sdp_cnt(sdp_keepalive_probes_sent);413}414415static void416sdp_tx_cq_event_handler(struct ib_event *event, void *data)417{418}419420int421sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device)422{423struct ib_cq_init_attr tx_cq_attr = {424.cqe = SDP_TX_SIZE,425.comp_vector = 0,426.flags = 0,427};428struct ib_cq *tx_cq;429int rc = 0;430431sdp_dbg(ssk->socket, "tx ring create\n");432callout_init_rw(&ssk->tx_ring.timer, &ssk->lock, 0);433callout_init_rw(&ssk->nagle_timer, &ssk->lock, 0);434atomic_set(&ssk->tx_ring.head, 1);435atomic_set(&ssk->tx_ring.tail, 1);436437ssk->tx_ring.buffer = malloc(sizeof(*ssk->tx_ring.buffer) * SDP_TX_SIZE,438M_SDP, M_WAITOK);439440tx_cq = ib_create_cq(device, sdp_tx_irq, sdp_tx_cq_event_handler,441ssk, &tx_cq_attr);442if (IS_ERR(tx_cq)) {443rc = PTR_ERR(tx_cq);444sdp_warn(ssk->socket, "Unable to allocate TX CQ: %d.\n", rc);445goto err_cq;446}447ssk->tx_ring.cq = tx_cq;448ssk->tx_ring.poll_cnt = 0;449sdp_arm_tx_cq(ssk);450451return 0;452453err_cq:454free(ssk->tx_ring.buffer, M_SDP);455ssk->tx_ring.buffer = NULL;456return rc;457}458459void460sdp_tx_ring_destroy(struct sdp_sock *ssk)461{462463sdp_dbg(ssk->socket, "tx ring destroy\n");464SDP_WLOCK(ssk);465callout_stop(&ssk->tx_ring.timer);466callout_stop(&ssk->nagle_timer);467SDP_WUNLOCK(ssk);468callout_drain(&ssk->tx_ring.timer);469callout_drain(&ssk->nagle_timer);470471if (ssk->tx_ring.buffer) {472sdp_tx_ring_purge(ssk);473free(ssk->tx_ring.buffer, M_SDP);474ssk->tx_ring.buffer = NULL;475}476477if (ssk->tx_ring.cq) {478ib_destroy_cq(ssk->tx_ring.cq);479ssk->tx_ring.cq = NULL;480}481482WARN_ON(ring_head(ssk->tx_ring) != ring_tail(ssk->tx_ring));483}484485486