Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/infiniband/hw/ipath/ipath_rc.c
15112 views
1
/*
2
* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4
*
5
* This software is available to you under a choice of one of two
6
* licenses. You may choose to be licensed under the terms of the GNU
7
* General Public License (GPL) Version 2, available from the file
8
* COPYING in the main directory of this source tree, or the
9
* OpenIB.org BSD license below:
10
*
11
* Redistribution and use in source and binary forms, with or
12
* without modification, are permitted provided that the following
13
* conditions are met:
14
*
15
* - Redistributions of source code must retain the above
16
* copyright notice, this list of conditions and the following
17
* disclaimer.
18
*
19
* - Redistributions in binary form must reproduce the above
20
* copyright notice, this list of conditions and the following
21
* disclaimer in the documentation and/or other materials
22
* provided with the distribution.
23
*
24
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31
* SOFTWARE.
32
*/
33
34
#include <linux/io.h>
35
36
#include "ipath_verbs.h"
37
#include "ipath_kernel.h"
38
39
/* cut down ridiculously long IB macro names */
40
#define OP(x) IB_OPCODE_RC_##x
41
42
static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
43
u32 psn, u32 pmtu)
44
{
45
u32 len;
46
47
len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
48
ss->sge = wqe->sg_list[0];
49
ss->sg_list = wqe->sg_list + 1;
50
ss->num_sge = wqe->wr.num_sge;
51
ipath_skip_sge(ss, len);
52
return wqe->length - len;
53
}
54
55
/**
56
* ipath_init_restart- initialize the qp->s_sge after a restart
57
* @qp: the QP who's SGE we're restarting
58
* @wqe: the work queue to initialize the QP's SGE from
59
*
60
* The QP s_lock should be held and interrupts disabled.
61
*/
62
static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
63
{
64
struct ipath_ibdev *dev;
65
66
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
67
ib_mtu_enum_to_int(qp->path_mtu));
68
dev = to_idev(qp->ibqp.device);
69
spin_lock(&dev->pending_lock);
70
if (list_empty(&qp->timerwait))
71
list_add_tail(&qp->timerwait,
72
&dev->pending[dev->pending_index]);
73
spin_unlock(&dev->pending_lock);
74
}
75
76
/**
77
* ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
78
* @qp: a pointer to the QP
79
* @ohdr: a pointer to the IB header being constructed
80
* @pmtu: the path MTU
81
*
82
* Return 1 if constructed; otherwise, return 0.
83
* Note that we are in the responder's side of the QP context.
84
* Note the QP s_lock must be held.
85
*/
86
static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
87
struct ipath_other_headers *ohdr, u32 pmtu)
88
{
89
struct ipath_ack_entry *e;
90
u32 hwords;
91
u32 len;
92
u32 bth0;
93
u32 bth2;
94
95
/* Don't send an ACK if we aren't supposed to. */
96
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
97
goto bail;
98
99
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
100
hwords = 5;
101
102
switch (qp->s_ack_state) {
103
case OP(RDMA_READ_RESPONSE_LAST):
104
case OP(RDMA_READ_RESPONSE_ONLY):
105
case OP(ATOMIC_ACKNOWLEDGE):
106
/*
107
* We can increment the tail pointer now that the last
108
* response has been sent instead of only being
109
* constructed.
110
*/
111
if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
112
qp->s_tail_ack_queue = 0;
113
/* FALLTHROUGH */
114
case OP(SEND_ONLY):
115
case OP(ACKNOWLEDGE):
116
/* Check for no next entry in the queue. */
117
if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
118
if (qp->s_flags & IPATH_S_ACK_PENDING)
119
goto normal;
120
qp->s_ack_state = OP(ACKNOWLEDGE);
121
goto bail;
122
}
123
124
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
125
if (e->opcode == OP(RDMA_READ_REQUEST)) {
126
/* Copy SGE state in case we need to resend */
127
qp->s_ack_rdma_sge = e->rdma_sge;
128
qp->s_cur_sge = &qp->s_ack_rdma_sge;
129
len = e->rdma_sge.sge.sge_length;
130
if (len > pmtu) {
131
len = pmtu;
132
qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
133
} else {
134
qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
135
e->sent = 1;
136
}
137
ohdr->u.aeth = ipath_compute_aeth(qp);
138
hwords++;
139
qp->s_ack_rdma_psn = e->psn;
140
bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
141
} else {
142
/* COMPARE_SWAP or FETCH_ADD */
143
qp->s_cur_sge = NULL;
144
len = 0;
145
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
146
ohdr->u.at.aeth = ipath_compute_aeth(qp);
147
ohdr->u.at.atomic_ack_eth[0] =
148
cpu_to_be32(e->atomic_data >> 32);
149
ohdr->u.at.atomic_ack_eth[1] =
150
cpu_to_be32(e->atomic_data);
151
hwords += sizeof(ohdr->u.at) / sizeof(u32);
152
bth2 = e->psn;
153
e->sent = 1;
154
}
155
bth0 = qp->s_ack_state << 24;
156
break;
157
158
case OP(RDMA_READ_RESPONSE_FIRST):
159
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
160
/* FALLTHROUGH */
161
case OP(RDMA_READ_RESPONSE_MIDDLE):
162
len = qp->s_ack_rdma_sge.sge.sge_length;
163
if (len > pmtu)
164
len = pmtu;
165
else {
166
ohdr->u.aeth = ipath_compute_aeth(qp);
167
hwords++;
168
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
169
qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
170
}
171
bth0 = qp->s_ack_state << 24;
172
bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
173
break;
174
175
default:
176
normal:
177
/*
178
* Send a regular ACK.
179
* Set the s_ack_state so we wait until after sending
180
* the ACK before setting s_ack_state to ACKNOWLEDGE
181
* (see above).
182
*/
183
qp->s_ack_state = OP(SEND_ONLY);
184
qp->s_flags &= ~IPATH_S_ACK_PENDING;
185
qp->s_cur_sge = NULL;
186
if (qp->s_nak_state)
187
ohdr->u.aeth =
188
cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
189
(qp->s_nak_state <<
190
IPATH_AETH_CREDIT_SHIFT));
191
else
192
ohdr->u.aeth = ipath_compute_aeth(qp);
193
hwords++;
194
len = 0;
195
bth0 = OP(ACKNOWLEDGE) << 24;
196
bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
197
}
198
qp->s_hdrwords = hwords;
199
qp->s_cur_size = len;
200
ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
201
return 1;
202
203
bail:
204
return 0;
205
}
206
207
/**
208
* ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
209
* @qp: a pointer to the QP
210
*
211
* Return 1 if constructed; otherwise, return 0.
212
*/
213
int ipath_make_rc_req(struct ipath_qp *qp)
214
{
215
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
216
struct ipath_other_headers *ohdr;
217
struct ipath_sge_state *ss;
218
struct ipath_swqe *wqe;
219
u32 hwords;
220
u32 len;
221
u32 bth0;
222
u32 bth2;
223
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
224
char newreq;
225
unsigned long flags;
226
int ret = 0;
227
228
ohdr = &qp->s_hdr.u.oth;
229
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
230
ohdr = &qp->s_hdr.u.l.oth;
231
232
/*
233
* The lock is needed to synchronize between the sending tasklet,
234
* the receive interrupt handler, and timeout resends.
235
*/
236
spin_lock_irqsave(&qp->s_lock, flags);
237
238
/* Sending responses has higher priority over sending requests. */
239
if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
240
(qp->s_flags & IPATH_S_ACK_PENDING) ||
241
qp->s_ack_state != OP(ACKNOWLEDGE)) &&
242
ipath_make_rc_ack(dev, qp, ohdr, pmtu))
243
goto done;
244
245
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
246
if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
247
goto bail;
248
/* We are in the error state, flush the work request. */
249
if (qp->s_last == qp->s_head)
250
goto bail;
251
/* If DMAs are in progress, we can't flush immediately. */
252
if (atomic_read(&qp->s_dma_busy)) {
253
qp->s_flags |= IPATH_S_WAIT_DMA;
254
goto bail;
255
}
256
wqe = get_swqe_ptr(qp, qp->s_last);
257
ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
258
goto done;
259
}
260
261
/* Leave BUSY set until RNR timeout. */
262
if (qp->s_rnr_timeout) {
263
qp->s_flags |= IPATH_S_WAITING;
264
goto bail;
265
}
266
267
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
268
hwords = 5;
269
bth0 = 1 << 22; /* Set M bit */
270
271
/* Send a request. */
272
wqe = get_swqe_ptr(qp, qp->s_cur);
273
switch (qp->s_state) {
274
default:
275
if (!(ib_ipath_state_ops[qp->state] &
276
IPATH_PROCESS_NEXT_SEND_OK))
277
goto bail;
278
/*
279
* Resend an old request or start a new one.
280
*
281
* We keep track of the current SWQE so that
282
* we don't reset the "furthest progress" state
283
* if we need to back up.
284
*/
285
newreq = 0;
286
if (qp->s_cur == qp->s_tail) {
287
/* Check if send work queue is empty. */
288
if (qp->s_tail == qp->s_head)
289
goto bail;
290
/*
291
* If a fence is requested, wait for previous
292
* RDMA read and atomic operations to finish.
293
*/
294
if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
295
qp->s_num_rd_atomic) {
296
qp->s_flags |= IPATH_S_FENCE_PENDING;
297
goto bail;
298
}
299
wqe->psn = qp->s_next_psn;
300
newreq = 1;
301
}
302
/*
303
* Note that we have to be careful not to modify the
304
* original work request since we may need to resend
305
* it.
306
*/
307
len = wqe->length;
308
ss = &qp->s_sge;
309
bth2 = 0;
310
switch (wqe->wr.opcode) {
311
case IB_WR_SEND:
312
case IB_WR_SEND_WITH_IMM:
313
/* If no credit, return. */
314
if (qp->s_lsn != (u32) -1 &&
315
ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
316
qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
317
goto bail;
318
}
319
wqe->lpsn = wqe->psn;
320
if (len > pmtu) {
321
wqe->lpsn += (len - 1) / pmtu;
322
qp->s_state = OP(SEND_FIRST);
323
len = pmtu;
324
break;
325
}
326
if (wqe->wr.opcode == IB_WR_SEND)
327
qp->s_state = OP(SEND_ONLY);
328
else {
329
qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
330
/* Immediate data comes after the BTH */
331
ohdr->u.imm_data = wqe->wr.ex.imm_data;
332
hwords += 1;
333
}
334
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
335
bth0 |= 1 << 23;
336
bth2 = 1 << 31; /* Request ACK. */
337
if (++qp->s_cur == qp->s_size)
338
qp->s_cur = 0;
339
break;
340
341
case IB_WR_RDMA_WRITE:
342
if (newreq && qp->s_lsn != (u32) -1)
343
qp->s_lsn++;
344
/* FALLTHROUGH */
345
case IB_WR_RDMA_WRITE_WITH_IMM:
346
/* If no credit, return. */
347
if (qp->s_lsn != (u32) -1 &&
348
ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
349
qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
350
goto bail;
351
}
352
ohdr->u.rc.reth.vaddr =
353
cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
354
ohdr->u.rc.reth.rkey =
355
cpu_to_be32(wqe->wr.wr.rdma.rkey);
356
ohdr->u.rc.reth.length = cpu_to_be32(len);
357
hwords += sizeof(struct ib_reth) / sizeof(u32);
358
wqe->lpsn = wqe->psn;
359
if (len > pmtu) {
360
wqe->lpsn += (len - 1) / pmtu;
361
qp->s_state = OP(RDMA_WRITE_FIRST);
362
len = pmtu;
363
break;
364
}
365
if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
366
qp->s_state = OP(RDMA_WRITE_ONLY);
367
else {
368
qp->s_state =
369
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
370
/* Immediate data comes after RETH */
371
ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
372
hwords += 1;
373
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
374
bth0 |= 1 << 23;
375
}
376
bth2 = 1 << 31; /* Request ACK. */
377
if (++qp->s_cur == qp->s_size)
378
qp->s_cur = 0;
379
break;
380
381
case IB_WR_RDMA_READ:
382
/*
383
* Don't allow more operations to be started
384
* than the QP limits allow.
385
*/
386
if (newreq) {
387
if (qp->s_num_rd_atomic >=
388
qp->s_max_rd_atomic) {
389
qp->s_flags |= IPATH_S_RDMAR_PENDING;
390
goto bail;
391
}
392
qp->s_num_rd_atomic++;
393
if (qp->s_lsn != (u32) -1)
394
qp->s_lsn++;
395
/*
396
* Adjust s_next_psn to count the
397
* expected number of responses.
398
*/
399
if (len > pmtu)
400
qp->s_next_psn += (len - 1) / pmtu;
401
wqe->lpsn = qp->s_next_psn++;
402
}
403
ohdr->u.rc.reth.vaddr =
404
cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
405
ohdr->u.rc.reth.rkey =
406
cpu_to_be32(wqe->wr.wr.rdma.rkey);
407
ohdr->u.rc.reth.length = cpu_to_be32(len);
408
qp->s_state = OP(RDMA_READ_REQUEST);
409
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
410
ss = NULL;
411
len = 0;
412
if (++qp->s_cur == qp->s_size)
413
qp->s_cur = 0;
414
break;
415
416
case IB_WR_ATOMIC_CMP_AND_SWP:
417
case IB_WR_ATOMIC_FETCH_AND_ADD:
418
/*
419
* Don't allow more operations to be started
420
* than the QP limits allow.
421
*/
422
if (newreq) {
423
if (qp->s_num_rd_atomic >=
424
qp->s_max_rd_atomic) {
425
qp->s_flags |= IPATH_S_RDMAR_PENDING;
426
goto bail;
427
}
428
qp->s_num_rd_atomic++;
429
if (qp->s_lsn != (u32) -1)
430
qp->s_lsn++;
431
wqe->lpsn = wqe->psn;
432
}
433
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
434
qp->s_state = OP(COMPARE_SWAP);
435
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
436
wqe->wr.wr.atomic.swap);
437
ohdr->u.atomic_eth.compare_data = cpu_to_be64(
438
wqe->wr.wr.atomic.compare_add);
439
} else {
440
qp->s_state = OP(FETCH_ADD);
441
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
442
wqe->wr.wr.atomic.compare_add);
443
ohdr->u.atomic_eth.compare_data = 0;
444
}
445
ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
446
wqe->wr.wr.atomic.remote_addr >> 32);
447
ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
448
wqe->wr.wr.atomic.remote_addr);
449
ohdr->u.atomic_eth.rkey = cpu_to_be32(
450
wqe->wr.wr.atomic.rkey);
451
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
452
ss = NULL;
453
len = 0;
454
if (++qp->s_cur == qp->s_size)
455
qp->s_cur = 0;
456
break;
457
458
default:
459
goto bail;
460
}
461
qp->s_sge.sge = wqe->sg_list[0];
462
qp->s_sge.sg_list = wqe->sg_list + 1;
463
qp->s_sge.num_sge = wqe->wr.num_sge;
464
qp->s_len = wqe->length;
465
if (newreq) {
466
qp->s_tail++;
467
if (qp->s_tail >= qp->s_size)
468
qp->s_tail = 0;
469
}
470
bth2 |= qp->s_psn & IPATH_PSN_MASK;
471
if (wqe->wr.opcode == IB_WR_RDMA_READ)
472
qp->s_psn = wqe->lpsn + 1;
473
else {
474
qp->s_psn++;
475
if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
476
qp->s_next_psn = qp->s_psn;
477
}
478
/*
479
* Put the QP on the pending list so lost ACKs will cause
480
* a retry. More than one request can be pending so the
481
* QP may already be on the dev->pending list.
482
*/
483
spin_lock(&dev->pending_lock);
484
if (list_empty(&qp->timerwait))
485
list_add_tail(&qp->timerwait,
486
&dev->pending[dev->pending_index]);
487
spin_unlock(&dev->pending_lock);
488
break;
489
490
case OP(RDMA_READ_RESPONSE_FIRST):
491
/*
492
* This case can only happen if a send is restarted.
493
* See ipath_restart_rc().
494
*/
495
ipath_init_restart(qp, wqe);
496
/* FALLTHROUGH */
497
case OP(SEND_FIRST):
498
qp->s_state = OP(SEND_MIDDLE);
499
/* FALLTHROUGH */
500
case OP(SEND_MIDDLE):
501
bth2 = qp->s_psn++ & IPATH_PSN_MASK;
502
if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
503
qp->s_next_psn = qp->s_psn;
504
ss = &qp->s_sge;
505
len = qp->s_len;
506
if (len > pmtu) {
507
len = pmtu;
508
break;
509
}
510
if (wqe->wr.opcode == IB_WR_SEND)
511
qp->s_state = OP(SEND_LAST);
512
else {
513
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
514
/* Immediate data comes after the BTH */
515
ohdr->u.imm_data = wqe->wr.ex.imm_data;
516
hwords += 1;
517
}
518
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
519
bth0 |= 1 << 23;
520
bth2 |= 1 << 31; /* Request ACK. */
521
qp->s_cur++;
522
if (qp->s_cur >= qp->s_size)
523
qp->s_cur = 0;
524
break;
525
526
case OP(RDMA_READ_RESPONSE_LAST):
527
/*
528
* This case can only happen if a RDMA write is restarted.
529
* See ipath_restart_rc().
530
*/
531
ipath_init_restart(qp, wqe);
532
/* FALLTHROUGH */
533
case OP(RDMA_WRITE_FIRST):
534
qp->s_state = OP(RDMA_WRITE_MIDDLE);
535
/* FALLTHROUGH */
536
case OP(RDMA_WRITE_MIDDLE):
537
bth2 = qp->s_psn++ & IPATH_PSN_MASK;
538
if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
539
qp->s_next_psn = qp->s_psn;
540
ss = &qp->s_sge;
541
len = qp->s_len;
542
if (len > pmtu) {
543
len = pmtu;
544
break;
545
}
546
if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
547
qp->s_state = OP(RDMA_WRITE_LAST);
548
else {
549
qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
550
/* Immediate data comes after the BTH */
551
ohdr->u.imm_data = wqe->wr.ex.imm_data;
552
hwords += 1;
553
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
554
bth0 |= 1 << 23;
555
}
556
bth2 |= 1 << 31; /* Request ACK. */
557
qp->s_cur++;
558
if (qp->s_cur >= qp->s_size)
559
qp->s_cur = 0;
560
break;
561
562
case OP(RDMA_READ_RESPONSE_MIDDLE):
563
/*
564
* This case can only happen if a RDMA read is restarted.
565
* See ipath_restart_rc().
566
*/
567
ipath_init_restart(qp, wqe);
568
len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
569
ohdr->u.rc.reth.vaddr =
570
cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
571
ohdr->u.rc.reth.rkey =
572
cpu_to_be32(wqe->wr.wr.rdma.rkey);
573
ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
574
qp->s_state = OP(RDMA_READ_REQUEST);
575
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
576
bth2 = qp->s_psn & IPATH_PSN_MASK;
577
qp->s_psn = wqe->lpsn + 1;
578
ss = NULL;
579
len = 0;
580
qp->s_cur++;
581
if (qp->s_cur == qp->s_size)
582
qp->s_cur = 0;
583
break;
584
}
585
if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
586
bth2 |= 1 << 31; /* Request ACK. */
587
qp->s_len -= len;
588
qp->s_hdrwords = hwords;
589
qp->s_cur_sge = ss;
590
qp->s_cur_size = len;
591
ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
592
done:
593
ret = 1;
594
goto unlock;
595
596
bail:
597
qp->s_flags &= ~IPATH_S_BUSY;
598
unlock:
599
spin_unlock_irqrestore(&qp->s_lock, flags);
600
return ret;
601
}
602
603
/**
604
* send_rc_ack - Construct an ACK packet and send it
605
* @qp: a pointer to the QP
606
*
607
* This is called from ipath_rc_rcv() and only uses the receive
608
* side QP state.
609
* Note that RDMA reads and atomics are handled in the
610
* send side QP state and tasklet.
611
*/
612
static void send_rc_ack(struct ipath_qp *qp)
613
{
614
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
615
struct ipath_devdata *dd;
616
u16 lrh0;
617
u32 bth0;
618
u32 hwords;
619
u32 __iomem *piobuf;
620
struct ipath_ib_header hdr;
621
struct ipath_other_headers *ohdr;
622
unsigned long flags;
623
624
spin_lock_irqsave(&qp->s_lock, flags);
625
626
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
627
if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
628
(qp->s_flags & IPATH_S_ACK_PENDING) ||
629
qp->s_ack_state != OP(ACKNOWLEDGE))
630
goto queue_ack;
631
632
spin_unlock_irqrestore(&qp->s_lock, flags);
633
634
/* Don't try to send ACKs if the link isn't ACTIVE */
635
dd = dev->dd;
636
if (!(dd->ipath_flags & IPATH_LINKACTIVE))
637
goto done;
638
639
piobuf = ipath_getpiobuf(dd, 0, NULL);
640
if (!piobuf) {
641
/*
642
* We are out of PIO buffers at the moment.
643
* Pass responsibility for sending the ACK to the
644
* send tasklet so that when a PIO buffer becomes
645
* available, the ACK is sent ahead of other outgoing
646
* packets.
647
*/
648
spin_lock_irqsave(&qp->s_lock, flags);
649
goto queue_ack;
650
}
651
652
/* Construct the header. */
653
ohdr = &hdr.u.oth;
654
lrh0 = IPATH_LRH_BTH;
655
/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
656
hwords = 6;
657
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
658
hwords += ipath_make_grh(dev, &hdr.u.l.grh,
659
&qp->remote_ah_attr.grh,
660
hwords, 0);
661
ohdr = &hdr.u.l.oth;
662
lrh0 = IPATH_LRH_GRH;
663
}
664
/* read pkey_index w/o lock (its atomic) */
665
bth0 = ipath_get_pkey(dd, qp->s_pkey_index) |
666
(OP(ACKNOWLEDGE) << 24) | (1 << 22);
667
if (qp->r_nak_state)
668
ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
669
(qp->r_nak_state <<
670
IPATH_AETH_CREDIT_SHIFT));
671
else
672
ohdr->u.aeth = ipath_compute_aeth(qp);
673
lrh0 |= qp->remote_ah_attr.sl << 4;
674
hdr.lrh[0] = cpu_to_be16(lrh0);
675
hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
676
hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
677
hdr.lrh[3] = cpu_to_be16(dd->ipath_lid |
678
qp->remote_ah_attr.src_path_bits);
679
ohdr->bth[0] = cpu_to_be32(bth0);
680
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
681
ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
682
683
writeq(hwords + 1, piobuf);
684
685
if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
686
u32 *hdrp = (u32 *) &hdr;
687
688
ipath_flush_wc();
689
__iowrite32_copy(piobuf + 2, hdrp, hwords - 1);
690
ipath_flush_wc();
691
__raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
692
} else
693
__iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords);
694
695
ipath_flush_wc();
696
697
dev->n_unicast_xmit++;
698
goto done;
699
700
queue_ack:
701
if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
702
dev->n_rc_qacks++;
703
qp->s_flags |= IPATH_S_ACK_PENDING;
704
qp->s_nak_state = qp->r_nak_state;
705
qp->s_ack_psn = qp->r_ack_psn;
706
707
/* Schedule the send tasklet. */
708
ipath_schedule_send(qp);
709
}
710
spin_unlock_irqrestore(&qp->s_lock, flags);
711
done:
712
return;
713
}
714
715
/**
716
* reset_psn - reset the QP state to send starting from PSN
717
* @qp: the QP
718
* @psn: the packet sequence number to restart at
719
*
720
* This is called from ipath_rc_rcv() to process an incoming RC ACK
721
* for the given QP.
722
* Called at interrupt level with the QP s_lock held.
723
*/
724
static void reset_psn(struct ipath_qp *qp, u32 psn)
725
{
726
u32 n = qp->s_last;
727
struct ipath_swqe *wqe = get_swqe_ptr(qp, n);
728
u32 opcode;
729
730
qp->s_cur = n;
731
732
/*
733
* If we are starting the request from the beginning,
734
* let the normal send code handle initialization.
735
*/
736
if (ipath_cmp24(psn, wqe->psn) <= 0) {
737
qp->s_state = OP(SEND_LAST);
738
goto done;
739
}
740
741
/* Find the work request opcode corresponding to the given PSN. */
742
opcode = wqe->wr.opcode;
743
for (;;) {
744
int diff;
745
746
if (++n == qp->s_size)
747
n = 0;
748
if (n == qp->s_tail)
749
break;
750
wqe = get_swqe_ptr(qp, n);
751
diff = ipath_cmp24(psn, wqe->psn);
752
if (diff < 0)
753
break;
754
qp->s_cur = n;
755
/*
756
* If we are starting the request from the beginning,
757
* let the normal send code handle initialization.
758
*/
759
if (diff == 0) {
760
qp->s_state = OP(SEND_LAST);
761
goto done;
762
}
763
opcode = wqe->wr.opcode;
764
}
765
766
/*
767
* Set the state to restart in the middle of a request.
768
* Don't change the s_sge, s_cur_sge, or s_cur_size.
769
* See ipath_make_rc_req().
770
*/
771
switch (opcode) {
772
case IB_WR_SEND:
773
case IB_WR_SEND_WITH_IMM:
774
qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
775
break;
776
777
case IB_WR_RDMA_WRITE:
778
case IB_WR_RDMA_WRITE_WITH_IMM:
779
qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
780
break;
781
782
case IB_WR_RDMA_READ:
783
qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
784
break;
785
786
default:
787
/*
788
* This case shouldn't happen since its only
789
* one PSN per req.
790
*/
791
qp->s_state = OP(SEND_LAST);
792
}
793
done:
794
qp->s_psn = psn;
795
}
796
797
/**
798
* ipath_restart_rc - back up requester to resend the last un-ACKed request
799
* @qp: the QP to restart
800
* @psn: packet sequence number for the request
801
* @wc: the work completion request
802
*
803
* The QP s_lock should be held and interrupts disabled.
804
*/
805
void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
806
{
807
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
808
struct ipath_ibdev *dev;
809
810
if (qp->s_retry == 0) {
811
ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
812
ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
813
goto bail;
814
}
815
qp->s_retry--;
816
817
/*
818
* Remove the QP from the timeout queue.
819
* Note: it may already have been removed by ipath_ib_timer().
820
*/
821
dev = to_idev(qp->ibqp.device);
822
spin_lock(&dev->pending_lock);
823
if (!list_empty(&qp->timerwait))
824
list_del_init(&qp->timerwait);
825
if (!list_empty(&qp->piowait))
826
list_del_init(&qp->piowait);
827
spin_unlock(&dev->pending_lock);
828
829
if (wqe->wr.opcode == IB_WR_RDMA_READ)
830
dev->n_rc_resends++;
831
else
832
dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
833
834
reset_psn(qp, psn);
835
ipath_schedule_send(qp);
836
837
bail:
838
return;
839
}
840
841
static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
842
{
843
qp->s_last_psn = psn;
844
}
845
846
/**
847
* do_rc_ack - process an incoming RC ACK
848
* @qp: the QP the ACK came in on
849
* @psn: the packet sequence number of the ACK
850
* @opcode: the opcode of the request that resulted in the ACK
851
*
852
* This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
853
* for the given QP.
854
* Called at interrupt level with the QP s_lock held and interrupts disabled.
855
* Returns 1 if OK, 0 if current operation should be aborted (NAK).
856
*/
857
static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
858
u64 val)
859
{
860
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
861
struct ib_wc wc;
862
enum ib_wc_status status;
863
struct ipath_swqe *wqe;
864
int ret = 0;
865
u32 ack_psn;
866
int diff;
867
868
/*
869
* Remove the QP from the timeout queue (or RNR timeout queue).
870
* If ipath_ib_timer() has already removed it,
871
* it's OK since we hold the QP s_lock and ipath_restart_rc()
872
* just won't find anything to restart if we ACK everything.
873
*/
874
spin_lock(&dev->pending_lock);
875
if (!list_empty(&qp->timerwait))
876
list_del_init(&qp->timerwait);
877
spin_unlock(&dev->pending_lock);
878
879
/*
880
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
881
* requests and implicitly NAK RDMA read and atomic requests issued
882
* before the NAK'ed request. The MSN won't include the NAK'ed
883
* request but will include an ACK'ed request(s).
884
*/
885
ack_psn = psn;
886
if (aeth >> 29)
887
ack_psn--;
888
wqe = get_swqe_ptr(qp, qp->s_last);
889
890
/*
891
* The MSN might be for a later WQE than the PSN indicates so
892
* only complete WQEs that the PSN finishes.
893
*/
894
while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
895
/*
896
* RDMA_READ_RESPONSE_ONLY is a special case since
897
* we want to generate completion events for everything
898
* before the RDMA read, copy the data, then generate
899
* the completion for the read.
900
*/
901
if (wqe->wr.opcode == IB_WR_RDMA_READ &&
902
opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
903
diff == 0) {
904
ret = 1;
905
goto bail;
906
}
907
/*
908
* If this request is a RDMA read or atomic, and the ACK is
909
* for a later operation, this ACK NAKs the RDMA read or
910
* atomic. In other words, only a RDMA_READ_LAST or ONLY
911
* can ACK a RDMA read and likewise for atomic ops. Note
912
* that the NAK case can only happen if relaxed ordering is
913
* used and requests are sent after an RDMA read or atomic
914
* is sent but before the response is received.
915
*/
916
if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
917
(opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
918
((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
919
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
920
(opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
921
/*
922
* The last valid PSN seen is the previous
923
* request's.
924
*/
925
update_last_psn(qp, wqe->psn - 1);
926
/* Retry this request. */
927
ipath_restart_rc(qp, wqe->psn);
928
/*
929
* No need to process the ACK/NAK since we are
930
* restarting an earlier request.
931
*/
932
goto bail;
933
}
934
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
935
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
936
*(u64 *) wqe->sg_list[0].vaddr = val;
937
if (qp->s_num_rd_atomic &&
938
(wqe->wr.opcode == IB_WR_RDMA_READ ||
939
wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
940
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
941
qp->s_num_rd_atomic--;
942
/* Restart sending task if fence is complete */
943
if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
944
!qp->s_num_rd_atomic) ||
945
qp->s_flags & IPATH_S_RDMAR_PENDING)
946
ipath_schedule_send(qp);
947
}
948
/* Post a send completion queue entry if requested. */
949
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
950
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
951
memset(&wc, 0, sizeof wc);
952
wc.wr_id = wqe->wr.wr_id;
953
wc.status = IB_WC_SUCCESS;
954
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
955
wc.byte_len = wqe->length;
956
wc.qp = &qp->ibqp;
957
wc.src_qp = qp->remote_qpn;
958
wc.slid = qp->remote_ah_attr.dlid;
959
wc.sl = qp->remote_ah_attr.sl;
960
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
961
}
962
qp->s_retry = qp->s_retry_cnt;
963
/*
964
* If we are completing a request which is in the process of
965
* being resent, we can stop resending it since we know the
966
* responder has already seen it.
967
*/
968
if (qp->s_last == qp->s_cur) {
969
if (++qp->s_cur >= qp->s_size)
970
qp->s_cur = 0;
971
qp->s_last = qp->s_cur;
972
if (qp->s_last == qp->s_tail)
973
break;
974
wqe = get_swqe_ptr(qp, qp->s_cur);
975
qp->s_state = OP(SEND_LAST);
976
qp->s_psn = wqe->psn;
977
} else {
978
if (++qp->s_last >= qp->s_size)
979
qp->s_last = 0;
980
if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
981
qp->s_draining = 0;
982
if (qp->s_last == qp->s_tail)
983
break;
984
wqe = get_swqe_ptr(qp, qp->s_last);
985
}
986
}
987
988
switch (aeth >> 29) {
989
case 0: /* ACK */
990
dev->n_rc_acks++;
991
/* If this is a partial ACK, reset the retransmit timer. */
992
if (qp->s_last != qp->s_tail) {
993
spin_lock(&dev->pending_lock);
994
if (list_empty(&qp->timerwait))
995
list_add_tail(&qp->timerwait,
996
&dev->pending[dev->pending_index]);
997
spin_unlock(&dev->pending_lock);
998
/*
999
* If we get a partial ACK for a resent operation,
1000
* we can stop resending the earlier packets and
1001
* continue with the next packet the receiver wants.
1002
*/
1003
if (ipath_cmp24(qp->s_psn, psn) <= 0) {
1004
reset_psn(qp, psn + 1);
1005
ipath_schedule_send(qp);
1006
}
1007
} else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
1008
qp->s_state = OP(SEND_LAST);
1009
qp->s_psn = psn + 1;
1010
}
1011
ipath_get_credit(qp, aeth);
1012
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1013
qp->s_retry = qp->s_retry_cnt;
1014
update_last_psn(qp, psn);
1015
ret = 1;
1016
goto bail;
1017
1018
case 1: /* RNR NAK */
1019
dev->n_rnr_naks++;
1020
if (qp->s_last == qp->s_tail)
1021
goto bail;
1022
if (qp->s_rnr_retry == 0) {
1023
status = IB_WC_RNR_RETRY_EXC_ERR;
1024
goto class_b;
1025
}
1026
if (qp->s_rnr_retry_cnt < 7)
1027
qp->s_rnr_retry--;
1028
1029
/* The last valid PSN is the previous PSN. */
1030
update_last_psn(qp, psn - 1);
1031
1032
if (wqe->wr.opcode == IB_WR_RDMA_READ)
1033
dev->n_rc_resends++;
1034
else
1035
dev->n_rc_resends +=
1036
(qp->s_psn - psn) & IPATH_PSN_MASK;
1037
1038
reset_psn(qp, psn);
1039
1040
qp->s_rnr_timeout =
1041
ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
1042
IPATH_AETH_CREDIT_MASK];
1043
ipath_insert_rnr_queue(qp);
1044
ipath_schedule_send(qp);
1045
goto bail;
1046
1047
case 3: /* NAK */
1048
if (qp->s_last == qp->s_tail)
1049
goto bail;
1050
/* The last valid PSN is the previous PSN. */
1051
update_last_psn(qp, psn - 1);
1052
switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
1053
IPATH_AETH_CREDIT_MASK) {
1054
case 0: /* PSN sequence error */
1055
dev->n_seq_naks++;
1056
/*
1057
* Back up to the responder's expected PSN.
1058
* Note that we might get a NAK in the middle of an
1059
* RDMA READ response which terminates the RDMA
1060
* READ.
1061
*/
1062
ipath_restart_rc(qp, psn);
1063
break;
1064
1065
case 1: /* Invalid Request */
1066
status = IB_WC_REM_INV_REQ_ERR;
1067
dev->n_other_naks++;
1068
goto class_b;
1069
1070
case 2: /* Remote Access Error */
1071
status = IB_WC_REM_ACCESS_ERR;
1072
dev->n_other_naks++;
1073
goto class_b;
1074
1075
case 3: /* Remote Operation Error */
1076
status = IB_WC_REM_OP_ERR;
1077
dev->n_other_naks++;
1078
class_b:
1079
ipath_send_complete(qp, wqe, status);
1080
ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1081
break;
1082
1083
default:
1084
/* Ignore other reserved NAK error codes */
1085
goto reserved;
1086
}
1087
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1088
goto bail;
1089
1090
default: /* 2: reserved */
1091
reserved:
1092
/* Ignore reserved NAK codes. */
1093
goto bail;
1094
}
1095
1096
bail:
1097
return ret;
1098
}
1099
1100
/**
1101
* ipath_rc_rcv_resp - process an incoming RC response packet
1102
* @dev: the device this packet came in on
1103
* @ohdr: the other headers for this packet
1104
* @data: the packet data
1105
* @tlen: the packet length
1106
* @qp: the QP for this packet
1107
* @opcode: the opcode for this packet
1108
* @psn: the packet sequence number for this packet
1109
* @hdrsize: the header length
1110
* @pmtu: the path MTU
1111
* @header_in_data: true if part of the header data is in the data buffer
1112
*
1113
* This is called from ipath_rc_rcv() to process an incoming RC response
1114
* packet for the given QP.
1115
* Called at interrupt level.
1116
*/
1117
static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1118
struct ipath_other_headers *ohdr,
1119
void *data, u32 tlen,
1120
struct ipath_qp *qp,
1121
u32 opcode,
1122
u32 psn, u32 hdrsize, u32 pmtu,
1123
int header_in_data)
1124
{
1125
struct ipath_swqe *wqe;
1126
enum ib_wc_status status;
1127
unsigned long flags;
1128
int diff;
1129
u32 pad;
1130
u32 aeth;
1131
u64 val;
1132
1133
spin_lock_irqsave(&qp->s_lock, flags);
1134
1135
/* Double check we can process this now that we hold the s_lock. */
1136
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1137
goto ack_done;
1138
1139
/* Ignore invalid responses. */
1140
if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
1141
goto ack_done;
1142
1143
/* Ignore duplicate responses. */
1144
diff = ipath_cmp24(psn, qp->s_last_psn);
1145
if (unlikely(diff <= 0)) {
1146
/* Update credits for "ghost" ACKs */
1147
if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
1148
if (!header_in_data)
1149
aeth = be32_to_cpu(ohdr->u.aeth);
1150
else {
1151
aeth = be32_to_cpu(((__be32 *) data)[0]);
1152
data += sizeof(__be32);
1153
}
1154
if ((aeth >> 29) == 0)
1155
ipath_get_credit(qp, aeth);
1156
}
1157
goto ack_done;
1158
}
1159
1160
if (unlikely(qp->s_last == qp->s_tail))
1161
goto ack_done;
1162
wqe = get_swqe_ptr(qp, qp->s_last);
1163
status = IB_WC_SUCCESS;
1164
1165
switch (opcode) {
1166
case OP(ACKNOWLEDGE):
1167
case OP(ATOMIC_ACKNOWLEDGE):
1168
case OP(RDMA_READ_RESPONSE_FIRST):
1169
if (!header_in_data)
1170
aeth = be32_to_cpu(ohdr->u.aeth);
1171
else {
1172
aeth = be32_to_cpu(((__be32 *) data)[0]);
1173
data += sizeof(__be32);
1174
}
1175
if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
1176
if (!header_in_data) {
1177
__be32 *p = ohdr->u.at.atomic_ack_eth;
1178
1179
val = ((u64) be32_to_cpu(p[0]) << 32) |
1180
be32_to_cpu(p[1]);
1181
} else
1182
val = be64_to_cpu(((__be64 *) data)[0]);
1183
} else
1184
val = 0;
1185
if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
1186
opcode != OP(RDMA_READ_RESPONSE_FIRST))
1187
goto ack_done;
1188
hdrsize += 4;
1189
wqe = get_swqe_ptr(qp, qp->s_last);
1190
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1191
goto ack_op_err;
1192
qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
1193
/*
1194
* If this is a response to a resent RDMA read, we
1195
* have to be careful to copy the data to the right
1196
* location.
1197
*/
1198
qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1199
wqe, psn, pmtu);
1200
goto read_middle;
1201
1202
case OP(RDMA_READ_RESPONSE_MIDDLE):
1203
/* no AETH, no ACK */
1204
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1205
dev->n_rdma_seq++;
1206
if (qp->r_flags & IPATH_R_RDMAR_SEQ)
1207
goto ack_done;
1208
qp->r_flags |= IPATH_R_RDMAR_SEQ;
1209
ipath_restart_rc(qp, qp->s_last_psn + 1);
1210
goto ack_done;
1211
}
1212
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1213
goto ack_op_err;
1214
read_middle:
1215
if (unlikely(tlen != (hdrsize + pmtu + 4)))
1216
goto ack_len_err;
1217
if (unlikely(pmtu >= qp->s_rdma_read_len))
1218
goto ack_len_err;
1219
1220
/* We got a response so update the timeout. */
1221
spin_lock(&dev->pending_lock);
1222
if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
1223
list_move_tail(&qp->timerwait,
1224
&dev->pending[dev->pending_index]);
1225
spin_unlock(&dev->pending_lock);
1226
1227
if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
1228
qp->s_retry = qp->s_retry_cnt;
1229
1230
/*
1231
* Update the RDMA receive state but do the copy w/o
1232
* holding the locks and blocking interrupts.
1233
*/
1234
qp->s_rdma_read_len -= pmtu;
1235
update_last_psn(qp, psn);
1236
spin_unlock_irqrestore(&qp->s_lock, flags);
1237
ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
1238
goto bail;
1239
1240
case OP(RDMA_READ_RESPONSE_ONLY):
1241
if (!header_in_data)
1242
aeth = be32_to_cpu(ohdr->u.aeth);
1243
else
1244
aeth = be32_to_cpu(((__be32 *) data)[0]);
1245
if (!do_rc_ack(qp, aeth, psn, opcode, 0))
1246
goto ack_done;
1247
/* Get the number of bytes the message was padded by. */
1248
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1249
/*
1250
* Check that the data size is >= 0 && <= pmtu.
1251
* Remember to account for the AETH header (4) and
1252
* ICRC (4).
1253
*/
1254
if (unlikely(tlen < (hdrsize + pad + 8)))
1255
goto ack_len_err;
1256
/*
1257
* If this is a response to a resent RDMA read, we
1258
* have to be careful to copy the data to the right
1259
* location.
1260
*/
1261
wqe = get_swqe_ptr(qp, qp->s_last);
1262
qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1263
wqe, psn, pmtu);
1264
goto read_last;
1265
1266
case OP(RDMA_READ_RESPONSE_LAST):
1267
/* ACKs READ req. */
1268
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1269
dev->n_rdma_seq++;
1270
if (qp->r_flags & IPATH_R_RDMAR_SEQ)
1271
goto ack_done;
1272
qp->r_flags |= IPATH_R_RDMAR_SEQ;
1273
ipath_restart_rc(qp, qp->s_last_psn + 1);
1274
goto ack_done;
1275
}
1276
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1277
goto ack_op_err;
1278
/* Get the number of bytes the message was padded by. */
1279
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1280
/*
1281
* Check that the data size is >= 1 && <= pmtu.
1282
* Remember to account for the AETH header (4) and
1283
* ICRC (4).
1284
*/
1285
if (unlikely(tlen <= (hdrsize + pad + 8)))
1286
goto ack_len_err;
1287
read_last:
1288
tlen -= hdrsize + pad + 8;
1289
if (unlikely(tlen != qp->s_rdma_read_len))
1290
goto ack_len_err;
1291
if (!header_in_data)
1292
aeth = be32_to_cpu(ohdr->u.aeth);
1293
else {
1294
aeth = be32_to_cpu(((__be32 *) data)[0]);
1295
data += sizeof(__be32);
1296
}
1297
ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
1298
(void) do_rc_ack(qp, aeth, psn,
1299
OP(RDMA_READ_RESPONSE_LAST), 0);
1300
goto ack_done;
1301
}
1302
1303
ack_op_err:
1304
status = IB_WC_LOC_QP_OP_ERR;
1305
goto ack_err;
1306
1307
ack_len_err:
1308
status = IB_WC_LOC_LEN_ERR;
1309
ack_err:
1310
ipath_send_complete(qp, wqe, status);
1311
ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1312
ack_done:
1313
spin_unlock_irqrestore(&qp->s_lock, flags);
1314
bail:
1315
return;
1316
}
1317
1318
/**
1319
* ipath_rc_rcv_error - process an incoming duplicate or error RC packet
1320
* @dev: the device this packet came in on
1321
* @ohdr: the other headers for this packet
1322
* @data: the packet data
1323
* @qp: the QP for this packet
1324
* @opcode: the opcode for this packet
1325
* @psn: the packet sequence number for this packet
1326
* @diff: the difference between the PSN and the expected PSN
1327
* @header_in_data: true if part of the header data is in the data buffer
1328
*
1329
* This is called from ipath_rc_rcv() to process an unexpected
1330
* incoming RC packet for the given QP.
1331
* Called at interrupt level.
1332
* Return 1 if no more processing is needed; otherwise return 0 to
1333
* schedule a response to be sent.
1334
*/
1335
static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1336
struct ipath_other_headers *ohdr,
1337
void *data,
1338
struct ipath_qp *qp,
1339
u32 opcode,
1340
u32 psn,
1341
int diff,
1342
int header_in_data)
1343
{
1344
struct ipath_ack_entry *e;
1345
u8 i, prev;
1346
int old_req;
1347
unsigned long flags;
1348
1349
if (diff > 0) {
1350
/*
1351
* Packet sequence error.
1352
* A NAK will ACK earlier sends and RDMA writes.
1353
* Don't queue the NAK if we already sent one.
1354
*/
1355
if (!qp->r_nak_state) {
1356
qp->r_nak_state = IB_NAK_PSN_ERROR;
1357
/* Use the expected PSN. */
1358
qp->r_ack_psn = qp->r_psn;
1359
goto send_ack;
1360
}
1361
goto done;
1362
}
1363
1364
/*
1365
* Handle a duplicate request. Don't re-execute SEND, RDMA
1366
* write or atomic op. Don't NAK errors, just silently drop
1367
* the duplicate request. Note that r_sge, r_len, and
1368
* r_rcv_len may be in use so don't modify them.
1369
*
1370
* We are supposed to ACK the earliest duplicate PSN but we
1371
* can coalesce an outstanding duplicate ACK. We have to
1372
* send the earliest so that RDMA reads can be restarted at
1373
* the requester's expected PSN.
1374
*
1375
* First, find where this duplicate PSN falls within the
1376
* ACKs previously sent.
1377
*/
1378
psn &= IPATH_PSN_MASK;
1379
e = NULL;
1380
old_req = 1;
1381
1382
spin_lock_irqsave(&qp->s_lock, flags);
1383
/* Double check we can process this now that we hold the s_lock. */
1384
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1385
goto unlock_done;
1386
1387
for (i = qp->r_head_ack_queue; ; i = prev) {
1388
if (i == qp->s_tail_ack_queue)
1389
old_req = 0;
1390
if (i)
1391
prev = i - 1;
1392
else
1393
prev = IPATH_MAX_RDMA_ATOMIC;
1394
if (prev == qp->r_head_ack_queue) {
1395
e = NULL;
1396
break;
1397
}
1398
e = &qp->s_ack_queue[prev];
1399
if (!e->opcode) {
1400
e = NULL;
1401
break;
1402
}
1403
if (ipath_cmp24(psn, e->psn) >= 0) {
1404
if (prev == qp->s_tail_ack_queue)
1405
old_req = 0;
1406
break;
1407
}
1408
}
1409
switch (opcode) {
1410
case OP(RDMA_READ_REQUEST): {
1411
struct ib_reth *reth;
1412
u32 offset;
1413
u32 len;
1414
1415
/*
1416
* If we didn't find the RDMA read request in the ack queue,
1417
* or the send tasklet is already backed up to send an
1418
* earlier entry, we can ignore this request.
1419
*/
1420
if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
1421
goto unlock_done;
1422
/* RETH comes after BTH */
1423
if (!header_in_data)
1424
reth = &ohdr->u.rc.reth;
1425
else {
1426
reth = (struct ib_reth *)data;
1427
data += sizeof(*reth);
1428
}
1429
/*
1430
* Address range must be a subset of the original
1431
* request and start on pmtu boundaries.
1432
* We reuse the old ack_queue slot since the requester
1433
* should not back up and request an earlier PSN for the
1434
* same request.
1435
*/
1436
offset = ((psn - e->psn) & IPATH_PSN_MASK) *
1437
ib_mtu_enum_to_int(qp->path_mtu);
1438
len = be32_to_cpu(reth->length);
1439
if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
1440
goto unlock_done;
1441
if (len != 0) {
1442
u32 rkey = be32_to_cpu(reth->rkey);
1443
u64 vaddr = be64_to_cpu(reth->vaddr);
1444
int ok;
1445
1446
ok = ipath_rkey_ok(qp, &e->rdma_sge,
1447
len, vaddr, rkey,
1448
IB_ACCESS_REMOTE_READ);
1449
if (unlikely(!ok))
1450
goto unlock_done;
1451
} else {
1452
e->rdma_sge.sg_list = NULL;
1453
e->rdma_sge.num_sge = 0;
1454
e->rdma_sge.sge.mr = NULL;
1455
e->rdma_sge.sge.vaddr = NULL;
1456
e->rdma_sge.sge.length = 0;
1457
e->rdma_sge.sge.sge_length = 0;
1458
}
1459
e->psn = psn;
1460
qp->s_ack_state = OP(ACKNOWLEDGE);
1461
qp->s_tail_ack_queue = prev;
1462
break;
1463
}
1464
1465
case OP(COMPARE_SWAP):
1466
case OP(FETCH_ADD): {
1467
/*
1468
* If we didn't find the atomic request in the ack queue
1469
* or the send tasklet is already backed up to send an
1470
* earlier entry, we can ignore this request.
1471
*/
1472
if (!e || e->opcode != (u8) opcode || old_req)
1473
goto unlock_done;
1474
qp->s_ack_state = OP(ACKNOWLEDGE);
1475
qp->s_tail_ack_queue = prev;
1476
break;
1477
}
1478
1479
default:
1480
if (old_req)
1481
goto unlock_done;
1482
/*
1483
* Resend the most recent ACK if this request is
1484
* after all the previous RDMA reads and atomics.
1485
*/
1486
if (i == qp->r_head_ack_queue) {
1487
spin_unlock_irqrestore(&qp->s_lock, flags);
1488
qp->r_nak_state = 0;
1489
qp->r_ack_psn = qp->r_psn - 1;
1490
goto send_ack;
1491
}
1492
/*
1493
* Try to send a simple ACK to work around a Mellanox bug
1494
* which doesn't accept a RDMA read response or atomic
1495
* response as an ACK for earlier SENDs or RDMA writes.
1496
*/
1497
if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
1498
!(qp->s_flags & IPATH_S_ACK_PENDING) &&
1499
qp->s_ack_state == OP(ACKNOWLEDGE)) {
1500
spin_unlock_irqrestore(&qp->s_lock, flags);
1501
qp->r_nak_state = 0;
1502
qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
1503
goto send_ack;
1504
}
1505
/*
1506
* Resend the RDMA read or atomic op which
1507
* ACKs this duplicate request.
1508
*/
1509
qp->s_ack_state = OP(ACKNOWLEDGE);
1510
qp->s_tail_ack_queue = i;
1511
break;
1512
}
1513
qp->r_nak_state = 0;
1514
ipath_schedule_send(qp);
1515
1516
unlock_done:
1517
spin_unlock_irqrestore(&qp->s_lock, flags);
1518
done:
1519
return 1;
1520
1521
send_ack:
1522
return 0;
1523
}
1524
1525
void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
1526
{
1527
unsigned long flags;
1528
int lastwqe;
1529
1530
spin_lock_irqsave(&qp->s_lock, flags);
1531
lastwqe = ipath_error_qp(qp, err);
1532
spin_unlock_irqrestore(&qp->s_lock, flags);
1533
1534
if (lastwqe) {
1535
struct ib_event ev;
1536
1537
ev.device = qp->ibqp.device;
1538
ev.element.qp = &qp->ibqp;
1539
ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1540
qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1541
}
1542
}
1543
1544
static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
1545
{
1546
unsigned next;
1547
1548
next = n + 1;
1549
if (next > IPATH_MAX_RDMA_ATOMIC)
1550
next = 0;
1551
if (n == qp->s_tail_ack_queue) {
1552
qp->s_tail_ack_queue = next;
1553
qp->s_ack_state = OP(ACKNOWLEDGE);
1554
}
1555
}
1556
1557
/**
1558
* ipath_rc_rcv - process an incoming RC packet
1559
* @dev: the device this packet came in on
1560
* @hdr: the header of this packet
1561
* @has_grh: true if the header has a GRH
1562
* @data: the packet data
1563
* @tlen: the packet length
1564
* @qp: the QP for this packet
1565
*
1566
* This is called from ipath_qp_rcv() to process an incoming RC packet
1567
* for the given QP.
1568
* Called at interrupt level.
1569
*/
1570
void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1571
int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
1572
{
1573
struct ipath_other_headers *ohdr;
1574
u32 opcode;
1575
u32 hdrsize;
1576
u32 psn;
1577
u32 pad;
1578
struct ib_wc wc;
1579
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
1580
int diff;
1581
struct ib_reth *reth;
1582
int header_in_data;
1583
unsigned long flags;
1584
1585
/* Validate the SLID. See Ch. 9.6.1.5 */
1586
if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
1587
goto done;
1588
1589
/* Check for GRH */
1590
if (!has_grh) {
1591
ohdr = &hdr->u.oth;
1592
hdrsize = 8 + 12; /* LRH + BTH */
1593
psn = be32_to_cpu(ohdr->bth[2]);
1594
header_in_data = 0;
1595
} else {
1596
ohdr = &hdr->u.l.oth;
1597
hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
1598
/*
1599
* The header with GRH is 60 bytes and the core driver sets
1600
* the eager header buffer size to 56 bytes so the last 4
1601
* bytes of the BTH header (PSN) is in the data buffer.
1602
*/
1603
header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
1604
if (header_in_data) {
1605
psn = be32_to_cpu(((__be32 *) data)[0]);
1606
data += sizeof(__be32);
1607
} else
1608
psn = be32_to_cpu(ohdr->bth[2]);
1609
}
1610
1611
/*
1612
* Process responses (ACKs) before anything else. Note that the
1613
* packet sequence number will be for something in the send work
1614
* queue rather than the expected receive packet sequence number.
1615
* In other words, this QP is the requester.
1616
*/
1617
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
1618
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
1619
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1620
ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
1621
hdrsize, pmtu, header_in_data);
1622
goto done;
1623
}
1624
1625
/* Compute 24 bits worth of difference. */
1626
diff = ipath_cmp24(psn, qp->r_psn);
1627
if (unlikely(diff)) {
1628
if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
1629
psn, diff, header_in_data))
1630
goto done;
1631
goto send_ack;
1632
}
1633
1634
/* Check for opcode sequence errors. */
1635
switch (qp->r_state) {
1636
case OP(SEND_FIRST):
1637
case OP(SEND_MIDDLE):
1638
if (opcode == OP(SEND_MIDDLE) ||
1639
opcode == OP(SEND_LAST) ||
1640
opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1641
break;
1642
goto nack_inv;
1643
1644
case OP(RDMA_WRITE_FIRST):
1645
case OP(RDMA_WRITE_MIDDLE):
1646
if (opcode == OP(RDMA_WRITE_MIDDLE) ||
1647
opcode == OP(RDMA_WRITE_LAST) ||
1648
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1649
break;
1650
goto nack_inv;
1651
1652
default:
1653
if (opcode == OP(SEND_MIDDLE) ||
1654
opcode == OP(SEND_LAST) ||
1655
opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
1656
opcode == OP(RDMA_WRITE_MIDDLE) ||
1657
opcode == OP(RDMA_WRITE_LAST) ||
1658
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1659
goto nack_inv;
1660
/*
1661
* Note that it is up to the requester to not send a new
1662
* RDMA read or atomic operation before receiving an ACK
1663
* for the previous operation.
1664
*/
1665
break;
1666
}
1667
1668
memset(&wc, 0, sizeof wc);
1669
1670
/* OK, process the packet. */
1671
switch (opcode) {
1672
case OP(SEND_FIRST):
1673
if (!ipath_get_rwqe(qp, 0))
1674
goto rnr_nak;
1675
qp->r_rcv_len = 0;
1676
/* FALLTHROUGH */
1677
case OP(SEND_MIDDLE):
1678
case OP(RDMA_WRITE_MIDDLE):
1679
send_middle:
1680
/* Check for invalid length PMTU or posted rwqe len. */
1681
if (unlikely(tlen != (hdrsize + pmtu + 4)))
1682
goto nack_inv;
1683
qp->r_rcv_len += pmtu;
1684
if (unlikely(qp->r_rcv_len > qp->r_len))
1685
goto nack_inv;
1686
ipath_copy_sge(&qp->r_sge, data, pmtu);
1687
break;
1688
1689
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
1690
/* consume RWQE */
1691
if (!ipath_get_rwqe(qp, 1))
1692
goto rnr_nak;
1693
goto send_last_imm;
1694
1695
case OP(SEND_ONLY):
1696
case OP(SEND_ONLY_WITH_IMMEDIATE):
1697
if (!ipath_get_rwqe(qp, 0))
1698
goto rnr_nak;
1699
qp->r_rcv_len = 0;
1700
if (opcode == OP(SEND_ONLY))
1701
goto send_last;
1702
/* FALLTHROUGH */
1703
case OP(SEND_LAST_WITH_IMMEDIATE):
1704
send_last_imm:
1705
if (header_in_data) {
1706
wc.ex.imm_data = *(__be32 *) data;
1707
data += sizeof(__be32);
1708
} else {
1709
/* Immediate data comes after BTH */
1710
wc.ex.imm_data = ohdr->u.imm_data;
1711
}
1712
hdrsize += 4;
1713
wc.wc_flags = IB_WC_WITH_IMM;
1714
/* FALLTHROUGH */
1715
case OP(SEND_LAST):
1716
case OP(RDMA_WRITE_LAST):
1717
send_last:
1718
/* Get the number of bytes the message was padded by. */
1719
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1720
/* Check for invalid length. */
1721
/* XXX LAST len should be >= 1 */
1722
if (unlikely(tlen < (hdrsize + pad + 4)))
1723
goto nack_inv;
1724
/* Don't count the CRC. */
1725
tlen -= (hdrsize + pad + 4);
1726
wc.byte_len = tlen + qp->r_rcv_len;
1727
if (unlikely(wc.byte_len > qp->r_len))
1728
goto nack_inv;
1729
ipath_copy_sge(&qp->r_sge, data, tlen);
1730
qp->r_msn++;
1731
if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
1732
break;
1733
wc.wr_id = qp->r_wr_id;
1734
wc.status = IB_WC_SUCCESS;
1735
if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
1736
opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
1737
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
1738
else
1739
wc.opcode = IB_WC_RECV;
1740
wc.qp = &qp->ibqp;
1741
wc.src_qp = qp->remote_qpn;
1742
wc.slid = qp->remote_ah_attr.dlid;
1743
wc.sl = qp->remote_ah_attr.sl;
1744
/* Signal completion event if the solicited bit is set. */
1745
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
1746
(ohdr->bth[0] &
1747
cpu_to_be32(1 << 23)) != 0);
1748
break;
1749
1750
case OP(RDMA_WRITE_FIRST):
1751
case OP(RDMA_WRITE_ONLY):
1752
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
1753
if (unlikely(!(qp->qp_access_flags &
1754
IB_ACCESS_REMOTE_WRITE)))
1755
goto nack_inv;
1756
/* consume RWQE */
1757
/* RETH comes after BTH */
1758
if (!header_in_data)
1759
reth = &ohdr->u.rc.reth;
1760
else {
1761
reth = (struct ib_reth *)data;
1762
data += sizeof(*reth);
1763
}
1764
hdrsize += sizeof(*reth);
1765
qp->r_len = be32_to_cpu(reth->length);
1766
qp->r_rcv_len = 0;
1767
if (qp->r_len != 0) {
1768
u32 rkey = be32_to_cpu(reth->rkey);
1769
u64 vaddr = be64_to_cpu(reth->vaddr);
1770
int ok;
1771
1772
/* Check rkey & NAK */
1773
ok = ipath_rkey_ok(qp, &qp->r_sge,
1774
qp->r_len, vaddr, rkey,
1775
IB_ACCESS_REMOTE_WRITE);
1776
if (unlikely(!ok))
1777
goto nack_acc;
1778
} else {
1779
qp->r_sge.sg_list = NULL;
1780
qp->r_sge.sge.mr = NULL;
1781
qp->r_sge.sge.vaddr = NULL;
1782
qp->r_sge.sge.length = 0;
1783
qp->r_sge.sge.sge_length = 0;
1784
}
1785
if (opcode == OP(RDMA_WRITE_FIRST))
1786
goto send_middle;
1787
else if (opcode == OP(RDMA_WRITE_ONLY))
1788
goto send_last;
1789
if (!ipath_get_rwqe(qp, 1))
1790
goto rnr_nak;
1791
goto send_last_imm;
1792
1793
case OP(RDMA_READ_REQUEST): {
1794
struct ipath_ack_entry *e;
1795
u32 len;
1796
u8 next;
1797
1798
if (unlikely(!(qp->qp_access_flags &
1799
IB_ACCESS_REMOTE_READ)))
1800
goto nack_inv;
1801
next = qp->r_head_ack_queue + 1;
1802
if (next > IPATH_MAX_RDMA_ATOMIC)
1803
next = 0;
1804
spin_lock_irqsave(&qp->s_lock, flags);
1805
/* Double check we can process this while holding the s_lock. */
1806
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1807
goto unlock;
1808
if (unlikely(next == qp->s_tail_ack_queue)) {
1809
if (!qp->s_ack_queue[next].sent)
1810
goto nack_inv_unlck;
1811
ipath_update_ack_queue(qp, next);
1812
}
1813
e = &qp->s_ack_queue[qp->r_head_ack_queue];
1814
/* RETH comes after BTH */
1815
if (!header_in_data)
1816
reth = &ohdr->u.rc.reth;
1817
else {
1818
reth = (struct ib_reth *)data;
1819
data += sizeof(*reth);
1820
}
1821
len = be32_to_cpu(reth->length);
1822
if (len) {
1823
u32 rkey = be32_to_cpu(reth->rkey);
1824
u64 vaddr = be64_to_cpu(reth->vaddr);
1825
int ok;
1826
1827
/* Check rkey & NAK */
1828
ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
1829
rkey, IB_ACCESS_REMOTE_READ);
1830
if (unlikely(!ok))
1831
goto nack_acc_unlck;
1832
/*
1833
* Update the next expected PSN. We add 1 later
1834
* below, so only add the remainder here.
1835
*/
1836
if (len > pmtu)
1837
qp->r_psn += (len - 1) / pmtu;
1838
} else {
1839
e->rdma_sge.sg_list = NULL;
1840
e->rdma_sge.num_sge = 0;
1841
e->rdma_sge.sge.mr = NULL;
1842
e->rdma_sge.sge.vaddr = NULL;
1843
e->rdma_sge.sge.length = 0;
1844
e->rdma_sge.sge.sge_length = 0;
1845
}
1846
e->opcode = opcode;
1847
e->sent = 0;
1848
e->psn = psn;
1849
/*
1850
* We need to increment the MSN here instead of when we
1851
* finish sending the result since a duplicate request would
1852
* increment it more than once.
1853
*/
1854
qp->r_msn++;
1855
qp->r_psn++;
1856
qp->r_state = opcode;
1857
qp->r_nak_state = 0;
1858
qp->r_head_ack_queue = next;
1859
1860
/* Schedule the send tasklet. */
1861
ipath_schedule_send(qp);
1862
1863
goto unlock;
1864
}
1865
1866
case OP(COMPARE_SWAP):
1867
case OP(FETCH_ADD): {
1868
struct ib_atomic_eth *ateth;
1869
struct ipath_ack_entry *e;
1870
u64 vaddr;
1871
atomic64_t *maddr;
1872
u64 sdata;
1873
u32 rkey;
1874
u8 next;
1875
1876
if (unlikely(!(qp->qp_access_flags &
1877
IB_ACCESS_REMOTE_ATOMIC)))
1878
goto nack_inv;
1879
next = qp->r_head_ack_queue + 1;
1880
if (next > IPATH_MAX_RDMA_ATOMIC)
1881
next = 0;
1882
spin_lock_irqsave(&qp->s_lock, flags);
1883
/* Double check we can process this while holding the s_lock. */
1884
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1885
goto unlock;
1886
if (unlikely(next == qp->s_tail_ack_queue)) {
1887
if (!qp->s_ack_queue[next].sent)
1888
goto nack_inv_unlck;
1889
ipath_update_ack_queue(qp, next);
1890
}
1891
if (!header_in_data)
1892
ateth = &ohdr->u.atomic_eth;
1893
else
1894
ateth = (struct ib_atomic_eth *)data;
1895
vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
1896
be32_to_cpu(ateth->vaddr[1]);
1897
if (unlikely(vaddr & (sizeof(u64) - 1)))
1898
goto nack_inv_unlck;
1899
rkey = be32_to_cpu(ateth->rkey);
1900
/* Check rkey & NAK */
1901
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
1902
sizeof(u64), vaddr, rkey,
1903
IB_ACCESS_REMOTE_ATOMIC)))
1904
goto nack_acc_unlck;
1905
/* Perform atomic OP and save result. */
1906
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
1907
sdata = be64_to_cpu(ateth->swap_data);
1908
e = &qp->s_ack_queue[qp->r_head_ack_queue];
1909
e->atomic_data = (opcode == OP(FETCH_ADD)) ?
1910
(u64) atomic64_add_return(sdata, maddr) - sdata :
1911
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
1912
be64_to_cpu(ateth->compare_data),
1913
sdata);
1914
e->opcode = opcode;
1915
e->sent = 0;
1916
e->psn = psn & IPATH_PSN_MASK;
1917
qp->r_msn++;
1918
qp->r_psn++;
1919
qp->r_state = opcode;
1920
qp->r_nak_state = 0;
1921
qp->r_head_ack_queue = next;
1922
1923
/* Schedule the send tasklet. */
1924
ipath_schedule_send(qp);
1925
1926
goto unlock;
1927
}
1928
1929
default:
1930
/* NAK unknown opcodes. */
1931
goto nack_inv;
1932
}
1933
qp->r_psn++;
1934
qp->r_state = opcode;
1935
qp->r_ack_psn = psn;
1936
qp->r_nak_state = 0;
1937
/* Send an ACK if requested or required. */
1938
if (psn & (1 << 31))
1939
goto send_ack;
1940
goto done;
1941
1942
rnr_nak:
1943
qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
1944
qp->r_ack_psn = qp->r_psn;
1945
goto send_ack;
1946
1947
nack_inv_unlck:
1948
spin_unlock_irqrestore(&qp->s_lock, flags);
1949
nack_inv:
1950
ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
1951
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
1952
qp->r_ack_psn = qp->r_psn;
1953
goto send_ack;
1954
1955
nack_acc_unlck:
1956
spin_unlock_irqrestore(&qp->s_lock, flags);
1957
nack_acc:
1958
ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
1959
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1960
qp->r_ack_psn = qp->r_psn;
1961
send_ack:
1962
send_rc_ack(qp);
1963
goto done;
1964
1965
unlock:
1966
spin_unlock_irqrestore(&qp->s_lock, flags);
1967
done:
1968
return;
1969
}
1970
1971