Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/infiniband/hw/ehca/ehca_reqs.c
15112 views
1
/*
2
* IBM eServer eHCA Infiniband device driver for Linux on POWER
3
*
4
* post_send/recv, poll_cq, req_notify
5
*
6
* Authors: Hoang-Nam Nguyen <[email protected]>
7
* Waleri Fomin <[email protected]>
8
* Joachim Fenkes <[email protected]>
9
* Reinhard Ernst <[email protected]>
10
*
11
* Copyright (c) 2005 IBM Corporation
12
*
13
* All rights reserved.
14
*
15
* This source code is distributed under a dual license of GPL v2.0 and OpenIB
16
* BSD.
17
*
18
* OpenIB BSD License
19
*
20
* Redistribution and use in source and binary forms, with or without
21
* modification, are permitted provided that the following conditions are met:
22
*
23
* Redistributions of source code must retain the above copyright notice, this
24
* list of conditions and the following disclaimer.
25
*
26
* Redistributions in binary form must reproduce the above copyright notice,
27
* this list of conditions and the following disclaimer in the documentation
28
* and/or other materials
29
* provided with the distribution.
30
*
31
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
39
* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41
* POSSIBILITY OF SUCH DAMAGE.
42
*/
43
44
45
#include <asm/system.h>
46
#include "ehca_classes.h"
47
#include "ehca_tools.h"
48
#include "ehca_qes.h"
49
#include "ehca_iverbs.h"
50
#include "hcp_if.h"
51
#include "hipz_fns.h"
52
53
/* in RC traffic, insert an empty RDMA READ every this many packets */
54
#define ACK_CIRC_THRESHOLD 2000000
55
56
static u64 replace_wr_id(u64 wr_id, u16 idx)
57
{
58
u64 ret;
59
60
ret = wr_id & ~QMAP_IDX_MASK;
61
ret |= idx & QMAP_IDX_MASK;
62
63
return ret;
64
}
65
66
static u16 get_app_wr_id(u64 wr_id)
67
{
68
return wr_id & QMAP_IDX_MASK;
69
}
70
71
static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
72
struct ehca_wqe *wqe_p,
73
struct ib_recv_wr *recv_wr,
74
u32 rq_map_idx)
75
{
76
u8 cnt_ds;
77
if (unlikely((recv_wr->num_sge < 0) ||
78
(recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
79
ehca_gen_err("Invalid number of WQE SGE. "
80
"num_sqe=%x max_nr_of_sg=%x",
81
recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
82
return -EINVAL; /* invalid SG list length */
83
}
84
85
/* clear wqe header until sglist */
86
memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
87
88
wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
89
wqe_p->nr_of_data_seg = recv_wr->num_sge;
90
91
for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
92
wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
93
recv_wr->sg_list[cnt_ds].addr;
94
wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
95
recv_wr->sg_list[cnt_ds].lkey;
96
wqe_p->u.all_rcv.sg_list[cnt_ds].length =
97
recv_wr->sg_list[cnt_ds].length;
98
}
99
100
if (ehca_debug_level >= 3) {
101
ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
102
ipz_rqueue);
103
ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
104
}
105
106
return 0;
107
}
108
109
#if defined(DEBUG_GSI_SEND_WR)
110
111
/* need ib_mad struct */
112
#include <rdma/ib_mad.h>
113
114
static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
115
{
116
int idx;
117
int j;
118
while (send_wr) {
119
struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
120
struct ib_sge *sge = send_wr->sg_list;
121
ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
122
"send_flags=%x opcode=%x", idx, send_wr->wr_id,
123
send_wr->num_sge, send_wr->send_flags,
124
send_wr->opcode);
125
if (mad_hdr) {
126
ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x "
127
"mgmt_class=%x class_version=%x method=%x "
128
"status=%x class_specific=%x tid=%lx "
129
"attr_id=%x resv=%x attr_mod=%x",
130
idx, mad_hdr->base_version,
131
mad_hdr->mgmt_class,
132
mad_hdr->class_version, mad_hdr->method,
133
mad_hdr->status, mad_hdr->class_specific,
134
mad_hdr->tid, mad_hdr->attr_id,
135
mad_hdr->resv,
136
mad_hdr->attr_mod);
137
}
138
for (j = 0; j < send_wr->num_sge; j++) {
139
u8 *data = (u8 *)abs_to_virt(sge->addr);
140
ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
141
"lkey=%x",
142
idx, j, data, sge->length, sge->lkey);
143
/* assume length is n*16 */
144
ehca_dmp(data, sge->length, "send_wr#%x sge#%x",
145
idx, j);
146
sge++;
147
} /* eof for j */
148
idx++;
149
send_wr = send_wr->next;
150
} /* eof while send_wr */
151
}
152
153
#endif /* DEBUG_GSI_SEND_WR */
154
155
static inline int ehca_write_swqe(struct ehca_qp *qp,
156
struct ehca_wqe *wqe_p,
157
const struct ib_send_wr *send_wr,
158
u32 sq_map_idx,
159
int hidden)
160
{
161
u32 idx;
162
u64 dma_length;
163
struct ehca_av *my_av;
164
u32 remote_qkey = send_wr->wr.ud.remote_qkey;
165
struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
166
167
if (unlikely((send_wr->num_sge < 0) ||
168
(send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
169
ehca_gen_err("Invalid number of WQE SGE. "
170
"num_sqe=%x max_nr_of_sg=%x",
171
send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
172
return -EINVAL; /* invalid SG list length */
173
}
174
175
/* clear wqe header until sglist */
176
memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
177
178
wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
179
180
qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
181
qmap_entry->reported = 0;
182
qmap_entry->cqe_req = 0;
183
184
switch (send_wr->opcode) {
185
case IB_WR_SEND:
186
case IB_WR_SEND_WITH_IMM:
187
wqe_p->optype = WQE_OPTYPE_SEND;
188
break;
189
case IB_WR_RDMA_WRITE:
190
case IB_WR_RDMA_WRITE_WITH_IMM:
191
wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
192
break;
193
case IB_WR_RDMA_READ:
194
wqe_p->optype = WQE_OPTYPE_RDMAREAD;
195
break;
196
default:
197
ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
198
return -EINVAL; /* invalid opcode */
199
}
200
201
wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
202
203
wqe_p->wr_flag = 0;
204
205
if ((send_wr->send_flags & IB_SEND_SIGNALED ||
206
qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
207
&& !hidden) {
208
wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
209
qmap_entry->cqe_req = 1;
210
}
211
212
if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
213
send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
214
/* this might not work as long as HW does not support it */
215
wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
216
wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
217
}
218
219
wqe_p->nr_of_data_seg = send_wr->num_sge;
220
221
switch (qp->qp_type) {
222
case IB_QPT_SMI:
223
case IB_QPT_GSI:
224
/* no break is intential here */
225
case IB_QPT_UD:
226
/* IB 1.2 spec C10-15 compliance */
227
if (send_wr->wr.ud.remote_qkey & 0x80000000)
228
remote_qkey = qp->qkey;
229
230
wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
231
wqe_p->local_ee_context_qkey = remote_qkey;
232
if (unlikely(!send_wr->wr.ud.ah)) {
233
ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
234
return -EINVAL;
235
}
236
if (unlikely(send_wr->wr.ud.remote_qpn == 0)) {
237
ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
238
return -EINVAL;
239
}
240
my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
241
wqe_p->u.ud_av.ud_av = my_av->av;
242
243
/*
244
* omitted check of IB_SEND_INLINE
245
* since HW does not support it
246
*/
247
for (idx = 0; idx < send_wr->num_sge; idx++) {
248
wqe_p->u.ud_av.sg_list[idx].vaddr =
249
send_wr->sg_list[idx].addr;
250
wqe_p->u.ud_av.sg_list[idx].lkey =
251
send_wr->sg_list[idx].lkey;
252
wqe_p->u.ud_av.sg_list[idx].length =
253
send_wr->sg_list[idx].length;
254
} /* eof for idx */
255
if (qp->qp_type == IB_QPT_SMI ||
256
qp->qp_type == IB_QPT_GSI)
257
wqe_p->u.ud_av.ud_av.pmtu = 1;
258
if (qp->qp_type == IB_QPT_GSI) {
259
wqe_p->pkeyi = send_wr->wr.ud.pkey_index;
260
#ifdef DEBUG_GSI_SEND_WR
261
trace_send_wr_ud(send_wr);
262
#endif /* DEBUG_GSI_SEND_WR */
263
}
264
break;
265
266
case IB_QPT_UC:
267
if (send_wr->send_flags & IB_SEND_FENCE)
268
wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
269
/* no break is intentional here */
270
case IB_QPT_RC:
271
/* TODO: atomic not implemented */
272
wqe_p->u.nud.remote_virtual_address =
273
send_wr->wr.rdma.remote_addr;
274
wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
275
276
/*
277
* omitted checking of IB_SEND_INLINE
278
* since HW does not support it
279
*/
280
dma_length = 0;
281
for (idx = 0; idx < send_wr->num_sge; idx++) {
282
wqe_p->u.nud.sg_list[idx].vaddr =
283
send_wr->sg_list[idx].addr;
284
wqe_p->u.nud.sg_list[idx].lkey =
285
send_wr->sg_list[idx].lkey;
286
wqe_p->u.nud.sg_list[idx].length =
287
send_wr->sg_list[idx].length;
288
dma_length += send_wr->sg_list[idx].length;
289
} /* eof idx */
290
wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
291
292
/* unsolicited ack circumvention */
293
if (send_wr->opcode == IB_WR_RDMA_READ) {
294
/* on RDMA read, switch on and reset counters */
295
qp->message_count = qp->packet_count = 0;
296
qp->unsol_ack_circ = 1;
297
} else
298
/* else estimate #packets */
299
qp->packet_count += (dma_length >> qp->mtu_shift) + 1;
300
301
break;
302
303
default:
304
ehca_gen_err("Invalid qptype=%x", qp->qp_type);
305
return -EINVAL;
306
}
307
308
if (ehca_debug_level >= 3) {
309
ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
310
ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
311
}
312
return 0;
313
}
314
315
/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
316
static inline void map_ib_wc_status(u32 cqe_status,
317
enum ib_wc_status *wc_status)
318
{
319
if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
320
switch (cqe_status & 0x3F) {
321
case 0x01:
322
case 0x21:
323
*wc_status = IB_WC_LOC_LEN_ERR;
324
break;
325
case 0x02:
326
case 0x22:
327
*wc_status = IB_WC_LOC_QP_OP_ERR;
328
break;
329
case 0x03:
330
case 0x23:
331
*wc_status = IB_WC_LOC_EEC_OP_ERR;
332
break;
333
case 0x04:
334
case 0x24:
335
*wc_status = IB_WC_LOC_PROT_ERR;
336
break;
337
case 0x05:
338
case 0x25:
339
*wc_status = IB_WC_WR_FLUSH_ERR;
340
break;
341
case 0x06:
342
*wc_status = IB_WC_MW_BIND_ERR;
343
break;
344
case 0x07: /* remote error - look into bits 20:24 */
345
switch ((cqe_status
346
& WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
347
case 0x0:
348
/*
349
* PSN Sequence Error!
350
* couldn't find a matching status!
351
*/
352
*wc_status = IB_WC_GENERAL_ERR;
353
break;
354
case 0x1:
355
*wc_status = IB_WC_REM_INV_REQ_ERR;
356
break;
357
case 0x2:
358
*wc_status = IB_WC_REM_ACCESS_ERR;
359
break;
360
case 0x3:
361
*wc_status = IB_WC_REM_OP_ERR;
362
break;
363
case 0x4:
364
*wc_status = IB_WC_REM_INV_RD_REQ_ERR;
365
break;
366
}
367
break;
368
case 0x08:
369
*wc_status = IB_WC_RETRY_EXC_ERR;
370
break;
371
case 0x09:
372
*wc_status = IB_WC_RNR_RETRY_EXC_ERR;
373
break;
374
case 0x0A:
375
case 0x2D:
376
*wc_status = IB_WC_REM_ABORT_ERR;
377
break;
378
case 0x0B:
379
case 0x2E:
380
*wc_status = IB_WC_INV_EECN_ERR;
381
break;
382
case 0x0C:
383
case 0x2F:
384
*wc_status = IB_WC_INV_EEC_STATE_ERR;
385
break;
386
case 0x0D:
387
*wc_status = IB_WC_BAD_RESP_ERR;
388
break;
389
case 0x10:
390
/* WQE purged */
391
*wc_status = IB_WC_WR_FLUSH_ERR;
392
break;
393
default:
394
*wc_status = IB_WC_FATAL_ERR;
395
396
}
397
} else
398
*wc_status = IB_WC_SUCCESS;
399
}
400
401
static inline int post_one_send(struct ehca_qp *my_qp,
402
struct ib_send_wr *cur_send_wr,
403
int hidden)
404
{
405
struct ehca_wqe *wqe_p;
406
int ret;
407
u32 sq_map_idx;
408
u64 start_offset = my_qp->ipz_squeue.current_q_offset;
409
410
/* get pointer next to free WQE */
411
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
412
if (unlikely(!wqe_p)) {
413
/* too many posted work requests: queue overflow */
414
ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
415
"qp_num=%x", my_qp->ib_qp.qp_num);
416
return -ENOMEM;
417
}
418
419
/*
420
* Get the index of the WQE in the send queue. The same index is used
421
* for writing into the sq_map.
422
*/
423
sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size;
424
425
/* write a SEND WQE into the QUEUE */
426
ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden);
427
/*
428
* if something failed,
429
* reset the free entry pointer to the start value
430
*/
431
if (unlikely(ret)) {
432
my_qp->ipz_squeue.current_q_offset = start_offset;
433
ehca_err(my_qp->ib_qp.device, "Could not write WQE "
434
"qp_num=%x", my_qp->ib_qp.qp_num);
435
return -EINVAL;
436
}
437
438
return 0;
439
}
440
441
int ehca_post_send(struct ib_qp *qp,
442
struct ib_send_wr *send_wr,
443
struct ib_send_wr **bad_send_wr)
444
{
445
struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
446
int wqe_cnt = 0;
447
int ret = 0;
448
unsigned long flags;
449
450
/* Reject WR if QP is in RESET, INIT or RTR state */
451
if (unlikely(my_qp->state < IB_QPS_RTS)) {
452
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
453
my_qp->state, qp->qp_num);
454
ret = -EINVAL;
455
goto out;
456
}
457
458
/* LOCK the QUEUE */
459
spin_lock_irqsave(&my_qp->spinlock_s, flags);
460
461
/* Send an empty extra RDMA read if:
462
* 1) there has been an RDMA read on this connection before
463
* 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets
464
* 3) we can be sure that any previous extra RDMA read has been
465
* processed so we don't overflow the SQ
466
*/
467
if (unlikely(my_qp->unsol_ack_circ &&
468
my_qp->packet_count > ACK_CIRC_THRESHOLD &&
469
my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {
470
/* insert an empty RDMA READ to fix up the remote QP state */
471
struct ib_send_wr circ_wr;
472
memset(&circ_wr, 0, sizeof(circ_wr));
473
circ_wr.opcode = IB_WR_RDMA_READ;
474
post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
475
wqe_cnt++;
476
ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
477
my_qp->message_count = my_qp->packet_count = 0;
478
}
479
480
/* loop processes list of send reqs */
481
while (send_wr) {
482
ret = post_one_send(my_qp, send_wr, 0);
483
if (unlikely(ret)) {
484
goto post_send_exit0;
485
}
486
wqe_cnt++;
487
send_wr = send_wr->next;
488
}
489
490
post_send_exit0:
491
iosync(); /* serialize GAL register access */
492
hipz_update_sqa(my_qp, wqe_cnt);
493
if (unlikely(ret || ehca_debug_level >= 2))
494
ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
495
my_qp, qp->qp_num, wqe_cnt, ret);
496
my_qp->message_count += wqe_cnt;
497
spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
498
499
out:
500
if (ret)
501
*bad_send_wr = send_wr;
502
return ret;
503
}
504
505
static int internal_post_recv(struct ehca_qp *my_qp,
506
struct ib_device *dev,
507
struct ib_recv_wr *recv_wr,
508
struct ib_recv_wr **bad_recv_wr)
509
{
510
struct ehca_wqe *wqe_p;
511
int wqe_cnt = 0;
512
int ret = 0;
513
u32 rq_map_idx;
514
unsigned long flags;
515
struct ehca_qmap_entry *qmap_entry;
516
517
if (unlikely(!HAS_RQ(my_qp))) {
518
ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
519
my_qp, my_qp->real_qp_num, my_qp->ext_type);
520
ret = -ENODEV;
521
goto out;
522
}
523
524
/* LOCK the QUEUE */
525
spin_lock_irqsave(&my_qp->spinlock_r, flags);
526
527
/* loop processes list of recv reqs */
528
while (recv_wr) {
529
u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
530
/* get pointer next to free WQE */
531
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
532
if (unlikely(!wqe_p)) {
533
/* too many posted work requests: queue overflow */
534
ret = -ENOMEM;
535
ehca_err(dev, "Too many posted WQEs "
536
"qp_num=%x", my_qp->real_qp_num);
537
goto post_recv_exit0;
538
}
539
/*
540
* Get the index of the WQE in the recv queue. The same index
541
* is used for writing into the rq_map.
542
*/
543
rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
544
545
/* write a RECV WQE into the QUEUE */
546
ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
547
rq_map_idx);
548
/*
549
* if something failed,
550
* reset the free entry pointer to the start value
551
*/
552
if (unlikely(ret)) {
553
my_qp->ipz_rqueue.current_q_offset = start_offset;
554
ret = -EINVAL;
555
ehca_err(dev, "Could not write WQE "
556
"qp_num=%x", my_qp->real_qp_num);
557
goto post_recv_exit0;
558
}
559
560
qmap_entry = &my_qp->rq_map.map[rq_map_idx];
561
qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
562
qmap_entry->reported = 0;
563
qmap_entry->cqe_req = 1;
564
565
wqe_cnt++;
566
recv_wr = recv_wr->next;
567
} /* eof for recv_wr */
568
569
post_recv_exit0:
570
iosync(); /* serialize GAL register access */
571
hipz_update_rqa(my_qp, wqe_cnt);
572
if (unlikely(ret || ehca_debug_level >= 2))
573
ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
574
my_qp, my_qp->real_qp_num, wqe_cnt, ret);
575
spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
576
577
out:
578
if (ret)
579
*bad_recv_wr = recv_wr;
580
581
return ret;
582
}
583
584
int ehca_post_recv(struct ib_qp *qp,
585
struct ib_recv_wr *recv_wr,
586
struct ib_recv_wr **bad_recv_wr)
587
{
588
struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
589
590
/* Reject WR if QP is in RESET state */
591
if (unlikely(my_qp->state == IB_QPS_RESET)) {
592
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
593
my_qp->state, qp->qp_num);
594
*bad_recv_wr = recv_wr;
595
return -EINVAL;
596
}
597
598
return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr);
599
}
600
601
int ehca_post_srq_recv(struct ib_srq *srq,
602
struct ib_recv_wr *recv_wr,
603
struct ib_recv_wr **bad_recv_wr)
604
{
605
return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),
606
srq->device, recv_wr, bad_recv_wr);
607
}
608
609
/*
610
* ib_wc_opcode table converts ehca wc opcode to ib
611
* Since we use zero to indicate invalid opcode, the actual ib opcode must
612
* be decremented!!!
613
*/
614
static const u8 ib_wc_opcode[255] = {
615
[0x01] = IB_WC_RECV+1,
616
[0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
617
[0x04] = IB_WC_BIND_MW+1,
618
[0x08] = IB_WC_FETCH_ADD+1,
619
[0x10] = IB_WC_COMP_SWAP+1,
620
[0x20] = IB_WC_RDMA_WRITE+1,
621
[0x40] = IB_WC_RDMA_READ+1,
622
[0x80] = IB_WC_SEND+1
623
};
624
625
/* internal function to poll one entry of cq */
626
static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
627
{
628
int ret = 0, qmap_tail_idx;
629
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
630
struct ehca_cqe *cqe;
631
struct ehca_qp *my_qp;
632
struct ehca_qmap_entry *qmap_entry;
633
struct ehca_queue_map *qmap;
634
int cqe_count = 0, is_error;
635
636
repoll:
637
cqe = (struct ehca_cqe *)
638
ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
639
if (!cqe) {
640
ret = -EAGAIN;
641
if (ehca_debug_level >= 3)
642
ehca_dbg(cq->device, "Completion queue is empty "
643
"my_cq=%p cq_num=%x", my_cq, my_cq->cq_number);
644
goto poll_cq_one_exit0;
645
}
646
647
/* prevents loads being reordered across this point */
648
rmb();
649
650
cqe_count++;
651
if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
652
struct ehca_qp *qp;
653
int purgeflag;
654
unsigned long flags;
655
656
qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number);
657
if (!qp) {
658
ehca_err(cq->device, "cq_num=%x qp_num=%x "
659
"could not find qp -> ignore cqe",
660
my_cq->cq_number, cqe->local_qp_number);
661
ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
662
my_cq->cq_number, cqe->local_qp_number);
663
/* ignore this purged cqe */
664
goto repoll;
665
}
666
spin_lock_irqsave(&qp->spinlock_s, flags);
667
purgeflag = qp->sqerr_purgeflag;
668
spin_unlock_irqrestore(&qp->spinlock_s, flags);
669
670
if (purgeflag) {
671
ehca_dbg(cq->device,
672
"Got CQE with purged bit qp_num=%x src_qp=%x",
673
cqe->local_qp_number, cqe->remote_qp_number);
674
if (ehca_debug_level >= 2)
675
ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
676
cqe->local_qp_number,
677
cqe->remote_qp_number);
678
/*
679
* ignore this to avoid double cqes of bad wqe
680
* that caused sqe and turn off purge flag
681
*/
682
qp->sqerr_purgeflag = 0;
683
goto repoll;
684
}
685
}
686
687
is_error = cqe->status & WC_STATUS_ERROR_BIT;
688
689
/* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */
690
if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) {
691
ehca_dbg(cq->device,
692
"Received %sCOMPLETION ehca_cq=%p cq_num=%x -----",
693
is_error ? "ERROR " : "", my_cq, my_cq->cq_number);
694
ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
695
my_cq, my_cq->cq_number);
696
ehca_dbg(cq->device,
697
"ehca_cq=%p cq_num=%x -------------------------",
698
my_cq, my_cq->cq_number);
699
}
700
701
read_lock(&ehca_qp_idr_lock);
702
my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
703
read_unlock(&ehca_qp_idr_lock);
704
if (!my_qp)
705
goto repoll;
706
wc->qp = &my_qp->ib_qp;
707
708
qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
709
if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
710
/* We got a send completion. */
711
qmap = &my_qp->sq_map;
712
else
713
/* We got a receive completion. */
714
qmap = &my_qp->rq_map;
715
716
/* advance the tail pointer */
717
qmap->tail = qmap_tail_idx;
718
719
if (is_error) {
720
/*
721
* set left_to_poll to 0 because in error state, we will not
722
* get any additional CQEs
723
*/
724
my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
725
my_qp->sq_map.entries);
726
my_qp->sq_map.left_to_poll = 0;
727
ehca_add_to_err_list(my_qp, 1);
728
729
my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
730
my_qp->rq_map.entries);
731
my_qp->rq_map.left_to_poll = 0;
732
if (HAS_RQ(my_qp))
733
ehca_add_to_err_list(my_qp, 0);
734
}
735
736
qmap_entry = &qmap->map[qmap_tail_idx];
737
if (qmap_entry->reported) {
738
ehca_warn(cq->device, "Double cqe on qp_num=%#x",
739
my_qp->real_qp_num);
740
/* found a double cqe, discard it and read next one */
741
goto repoll;
742
}
743
744
wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
745
qmap_entry->reported = 1;
746
747
/* if left_to_poll is decremented to 0, add the QP to the error list */
748
if (qmap->left_to_poll > 0) {
749
qmap->left_to_poll--;
750
if ((my_qp->sq_map.left_to_poll == 0) &&
751
(my_qp->rq_map.left_to_poll == 0)) {
752
ehca_add_to_err_list(my_qp, 1);
753
if (HAS_RQ(my_qp))
754
ehca_add_to_err_list(my_qp, 0);
755
}
756
}
757
758
/* eval ib_wc_opcode */
759
wc->opcode = ib_wc_opcode[cqe->optype]-1;
760
if (unlikely(wc->opcode == -1)) {
761
ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
762
"ehca_cq=%p cq_num=%x",
763
cqe->optype, cqe->status, my_cq, my_cq->cq_number);
764
/* dump cqe for other infos */
765
ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
766
my_cq, my_cq->cq_number);
767
/* update also queue adder to throw away this entry!!! */
768
goto repoll;
769
}
770
771
/* eval ib_wc_status */
772
if (unlikely(is_error)) {
773
/* complete with errors */
774
map_ib_wc_status(cqe->status, &wc->status);
775
wc->vendor_err = wc->status;
776
} else
777
wc->status = IB_WC_SUCCESS;
778
779
wc->byte_len = cqe->nr_bytes_transferred;
780
wc->pkey_index = cqe->pkey_index;
781
wc->slid = cqe->rlid;
782
wc->dlid_path_bits = cqe->dlid;
783
wc->src_qp = cqe->remote_qp_number;
784
/*
785
* HW has "Immed data present" and "GRH present" in bits 6 and 5.
786
* SW defines those in bits 1 and 0, so we can just shift and mask.
787
*/
788
wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
789
wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
790
wc->sl = cqe->service_level;
791
792
poll_cq_one_exit0:
793
if (cqe_count > 0)
794
hipz_update_feca(my_cq, cqe_count);
795
796
return ret;
797
}
798
799
static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
800
struct ib_wc *wc, int num_entries,
801
struct ipz_queue *ipz_queue, int on_sq)
802
{
803
int nr = 0;
804
struct ehca_wqe *wqe;
805
u64 offset;
806
struct ehca_queue_map *qmap;
807
struct ehca_qmap_entry *qmap_entry;
808
809
if (on_sq)
810
qmap = &my_qp->sq_map;
811
else
812
qmap = &my_qp->rq_map;
813
814
qmap_entry = &qmap->map[qmap->next_wqe_idx];
815
816
while ((nr < num_entries) && (qmap_entry->reported == 0)) {
817
/* generate flush CQE */
818
819
memset(wc, 0, sizeof(*wc));
820
821
offset = qmap->next_wqe_idx * ipz_queue->qe_size;
822
wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
823
if (!wqe) {
824
ehca_err(cq->device, "Invalid wqe offset=%#llx on "
825
"qp_num=%#x", offset, my_qp->real_qp_num);
826
return nr;
827
}
828
829
wc->wr_id = replace_wr_id(wqe->work_request_id,
830
qmap_entry->app_wr_id);
831
832
if (on_sq) {
833
switch (wqe->optype) {
834
case WQE_OPTYPE_SEND:
835
wc->opcode = IB_WC_SEND;
836
break;
837
case WQE_OPTYPE_RDMAWRITE:
838
wc->opcode = IB_WC_RDMA_WRITE;
839
break;
840
case WQE_OPTYPE_RDMAREAD:
841
wc->opcode = IB_WC_RDMA_READ;
842
break;
843
default:
844
ehca_err(cq->device, "Invalid optype=%x",
845
wqe->optype);
846
return nr;
847
}
848
} else
849
wc->opcode = IB_WC_RECV;
850
851
if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
852
wc->ex.imm_data = wqe->immediate_data;
853
wc->wc_flags |= IB_WC_WITH_IMM;
854
}
855
856
wc->status = IB_WC_WR_FLUSH_ERR;
857
858
wc->qp = &my_qp->ib_qp;
859
860
/* mark as reported and advance next_wqe pointer */
861
qmap_entry->reported = 1;
862
qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,
863
qmap->entries);
864
qmap_entry = &qmap->map[qmap->next_wqe_idx];
865
866
wc++; nr++;
867
}
868
869
return nr;
870
871
}
872
873
int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
874
{
875
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
876
int nr;
877
struct ehca_qp *err_qp;
878
struct ib_wc *current_wc = wc;
879
int ret = 0;
880
unsigned long flags;
881
int entries_left = num_entries;
882
883
if (num_entries < 1) {
884
ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
885
"cq_num=%x", num_entries, my_cq, my_cq->cq_number);
886
ret = -EINVAL;
887
goto poll_cq_exit0;
888
}
889
890
spin_lock_irqsave(&my_cq->spinlock, flags);
891
892
/* generate flush cqes for send queues */
893
list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
894
nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
895
&err_qp->ipz_squeue, 1);
896
entries_left -= nr;
897
current_wc += nr;
898
899
if (entries_left == 0)
900
break;
901
}
902
903
/* generate flush cqes for receive queues */
904
list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
905
nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
906
&err_qp->ipz_rqueue, 0);
907
entries_left -= nr;
908
current_wc += nr;
909
910
if (entries_left == 0)
911
break;
912
}
913
914
for (nr = 0; nr < entries_left; nr++) {
915
ret = ehca_poll_cq_one(cq, current_wc);
916
if (ret)
917
break;
918
current_wc++;
919
} /* eof for nr */
920
entries_left -= nr;
921
922
spin_unlock_irqrestore(&my_cq->spinlock, flags);
923
if (ret == -EAGAIN || !ret)
924
ret = num_entries - entries_left;
925
926
poll_cq_exit0:
927
return ret;
928
}
929
930
int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
931
{
932
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
933
int ret = 0;
934
935
switch (notify_flags & IB_CQ_SOLICITED_MASK) {
936
case IB_CQ_SOLICITED:
937
hipz_set_cqx_n0(my_cq, 1);
938
break;
939
case IB_CQ_NEXT_COMP:
940
hipz_set_cqx_n1(my_cq, 1);
941
break;
942
default:
943
return -EINVAL;
944
}
945
946
if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
947
unsigned long spl_flags;
948
spin_lock_irqsave(&my_cq->spinlock, spl_flags);
949
ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
950
spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
951
}
952
953
return ret;
954
}
955
956