Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/infiniband/hw/ipath/ipath_verbs.c
15112 views
1
/*
2
* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4
*
5
* This software is available to you under a choice of one of two
6
* licenses. You may choose to be licensed under the terms of the GNU
7
* General Public License (GPL) Version 2, available from the file
8
* COPYING in the main directory of this source tree, or the
9
* OpenIB.org BSD license below:
10
*
11
* Redistribution and use in source and binary forms, with or
12
* without modification, are permitted provided that the following
13
* conditions are met:
14
*
15
* - Redistributions of source code must retain the above
16
* copyright notice, this list of conditions and the following
17
* disclaimer.
18
*
19
* - Redistributions in binary form must reproduce the above
20
* copyright notice, this list of conditions and the following
21
* disclaimer in the documentation and/or other materials
22
* provided with the distribution.
23
*
24
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31
* SOFTWARE.
32
*/
33
34
#include <rdma/ib_mad.h>
35
#include <rdma/ib_user_verbs.h>
36
#include <linux/io.h>
37
#include <linux/slab.h>
38
#include <linux/utsname.h>
39
#include <linux/rculist.h>
40
41
#include "ipath_kernel.h"
42
#include "ipath_verbs.h"
43
#include "ipath_common.h"
44
45
static unsigned int ib_ipath_qp_table_size = 251;
46
module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
47
MODULE_PARM_DESC(qp_table_size, "QP table size");
48
49
unsigned int ib_ipath_lkey_table_size = 12;
50
module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
51
S_IRUGO);
52
MODULE_PARM_DESC(lkey_table_size,
53
"LKEY table size in bits (2^n, 1 <= n <= 23)");
54
55
static unsigned int ib_ipath_max_pds = 0xFFFF;
56
module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
57
MODULE_PARM_DESC(max_pds,
58
"Maximum number of protection domains to support");
59
60
static unsigned int ib_ipath_max_ahs = 0xFFFF;
61
module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
62
MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
63
64
unsigned int ib_ipath_max_cqes = 0x2FFFF;
65
module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
66
MODULE_PARM_DESC(max_cqes,
67
"Maximum number of completion queue entries to support");
68
69
unsigned int ib_ipath_max_cqs = 0x1FFFF;
70
module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
71
MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
72
73
unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
74
module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
75
S_IWUSR | S_IRUGO);
76
MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
77
78
unsigned int ib_ipath_max_qps = 16384;
79
module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
80
MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
81
82
unsigned int ib_ipath_max_sges = 0x60;
83
module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
84
MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
85
86
unsigned int ib_ipath_max_mcast_grps = 16384;
87
module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
88
S_IWUSR | S_IRUGO);
89
MODULE_PARM_DESC(max_mcast_grps,
90
"Maximum number of multicast groups to support");
91
92
unsigned int ib_ipath_max_mcast_qp_attached = 16;
93
module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
94
uint, S_IWUSR | S_IRUGO);
95
MODULE_PARM_DESC(max_mcast_qp_attached,
96
"Maximum number of attached QPs to support");
97
98
unsigned int ib_ipath_max_srqs = 1024;
99
module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
100
MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
101
102
unsigned int ib_ipath_max_srq_sges = 128;
103
module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
104
uint, S_IWUSR | S_IRUGO);
105
MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
106
107
unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
108
module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
109
uint, S_IWUSR | S_IRUGO);
110
MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
111
112
static unsigned int ib_ipath_disable_sma;
113
module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
114
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
115
116
/*
117
* Note that it is OK to post send work requests in the SQE and ERR
118
* states; ipath_do_send() will process them and generate error
119
* completions as per IB 1.2 C10-96.
120
*/
121
const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
122
[IB_QPS_RESET] = 0,
123
[IB_QPS_INIT] = IPATH_POST_RECV_OK,
124
[IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
125
[IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
126
IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
127
IPATH_PROCESS_NEXT_SEND_OK,
128
[IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
129
IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
130
[IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
131
IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
132
[IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
133
IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
134
};
135
136
struct ipath_ucontext {
137
struct ib_ucontext ibucontext;
138
};
139
140
static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
141
*ibucontext)
142
{
143
return container_of(ibucontext, struct ipath_ucontext, ibucontext);
144
}
145
146
/*
147
* Translate ib_wr_opcode into ib_wc_opcode.
148
*/
149
const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
150
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
151
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
152
[IB_WR_SEND] = IB_WC_SEND,
153
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
154
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
155
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
156
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
157
};
158
159
/*
160
* System image GUID.
161
*/
162
static __be64 sys_image_guid;
163
164
/**
165
* ipath_copy_sge - copy data to SGE memory
166
* @ss: the SGE state
167
* @data: the data to copy
168
* @length: the length of the data
169
*/
170
void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
171
{
172
struct ipath_sge *sge = &ss->sge;
173
174
while (length) {
175
u32 len = sge->length;
176
177
if (len > length)
178
len = length;
179
if (len > sge->sge_length)
180
len = sge->sge_length;
181
BUG_ON(len == 0);
182
memcpy(sge->vaddr, data, len);
183
sge->vaddr += len;
184
sge->length -= len;
185
sge->sge_length -= len;
186
if (sge->sge_length == 0) {
187
if (--ss->num_sge)
188
*sge = *ss->sg_list++;
189
} else if (sge->length == 0 && sge->mr != NULL) {
190
if (++sge->n >= IPATH_SEGSZ) {
191
if (++sge->m >= sge->mr->mapsz)
192
break;
193
sge->n = 0;
194
}
195
sge->vaddr =
196
sge->mr->map[sge->m]->segs[sge->n].vaddr;
197
sge->length =
198
sge->mr->map[sge->m]->segs[sge->n].length;
199
}
200
data += len;
201
length -= len;
202
}
203
}
204
205
/**
206
* ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
207
* @ss: the SGE state
208
* @length: the number of bytes to skip
209
*/
210
void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
211
{
212
struct ipath_sge *sge = &ss->sge;
213
214
while (length) {
215
u32 len = sge->length;
216
217
if (len > length)
218
len = length;
219
if (len > sge->sge_length)
220
len = sge->sge_length;
221
BUG_ON(len == 0);
222
sge->vaddr += len;
223
sge->length -= len;
224
sge->sge_length -= len;
225
if (sge->sge_length == 0) {
226
if (--ss->num_sge)
227
*sge = *ss->sg_list++;
228
} else if (sge->length == 0 && sge->mr != NULL) {
229
if (++sge->n >= IPATH_SEGSZ) {
230
if (++sge->m >= sge->mr->mapsz)
231
break;
232
sge->n = 0;
233
}
234
sge->vaddr =
235
sge->mr->map[sge->m]->segs[sge->n].vaddr;
236
sge->length =
237
sge->mr->map[sge->m]->segs[sge->n].length;
238
}
239
length -= len;
240
}
241
}
242
243
/*
244
* Count the number of DMA descriptors needed to send length bytes of data.
245
* Don't modify the ipath_sge_state to get the count.
246
* Return zero if any of the segments is not aligned.
247
*/
248
static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
249
{
250
struct ipath_sge *sg_list = ss->sg_list;
251
struct ipath_sge sge = ss->sge;
252
u8 num_sge = ss->num_sge;
253
u32 ndesc = 1; /* count the header */
254
255
while (length) {
256
u32 len = sge.length;
257
258
if (len > length)
259
len = length;
260
if (len > sge.sge_length)
261
len = sge.sge_length;
262
BUG_ON(len == 0);
263
if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
264
(len != length && (len & (sizeof(u32) - 1)))) {
265
ndesc = 0;
266
break;
267
}
268
ndesc++;
269
sge.vaddr += len;
270
sge.length -= len;
271
sge.sge_length -= len;
272
if (sge.sge_length == 0) {
273
if (--num_sge)
274
sge = *sg_list++;
275
} else if (sge.length == 0 && sge.mr != NULL) {
276
if (++sge.n >= IPATH_SEGSZ) {
277
if (++sge.m >= sge.mr->mapsz)
278
break;
279
sge.n = 0;
280
}
281
sge.vaddr =
282
sge.mr->map[sge.m]->segs[sge.n].vaddr;
283
sge.length =
284
sge.mr->map[sge.m]->segs[sge.n].length;
285
}
286
length -= len;
287
}
288
return ndesc;
289
}
290
291
/*
292
* Copy from the SGEs to the data buffer.
293
*/
294
static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
295
u32 length)
296
{
297
struct ipath_sge *sge = &ss->sge;
298
299
while (length) {
300
u32 len = sge->length;
301
302
if (len > length)
303
len = length;
304
if (len > sge->sge_length)
305
len = sge->sge_length;
306
BUG_ON(len == 0);
307
memcpy(data, sge->vaddr, len);
308
sge->vaddr += len;
309
sge->length -= len;
310
sge->sge_length -= len;
311
if (sge->sge_length == 0) {
312
if (--ss->num_sge)
313
*sge = *ss->sg_list++;
314
} else if (sge->length == 0 && sge->mr != NULL) {
315
if (++sge->n >= IPATH_SEGSZ) {
316
if (++sge->m >= sge->mr->mapsz)
317
break;
318
sge->n = 0;
319
}
320
sge->vaddr =
321
sge->mr->map[sge->m]->segs[sge->n].vaddr;
322
sge->length =
323
sge->mr->map[sge->m]->segs[sge->n].length;
324
}
325
data += len;
326
length -= len;
327
}
328
}
329
330
/**
331
* ipath_post_one_send - post one RC, UC, or UD send work request
332
* @qp: the QP to post on
333
* @wr: the work request to send
334
*/
335
static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
336
{
337
struct ipath_swqe *wqe;
338
u32 next;
339
int i;
340
int j;
341
int acc;
342
int ret;
343
unsigned long flags;
344
struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
345
346
spin_lock_irqsave(&qp->s_lock, flags);
347
348
if (qp->ibqp.qp_type != IB_QPT_SMI &&
349
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
350
ret = -ENETDOWN;
351
goto bail;
352
}
353
354
/* Check that state is OK to post send. */
355
if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
356
goto bail_inval;
357
358
/* IB spec says that num_sge == 0 is OK. */
359
if (wr->num_sge > qp->s_max_sge)
360
goto bail_inval;
361
362
/*
363
* Don't allow RDMA reads or atomic operations on UC or
364
* undefined operations.
365
* Make sure buffer is large enough to hold the result for atomics.
366
*/
367
if (qp->ibqp.qp_type == IB_QPT_UC) {
368
if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
369
goto bail_inval;
370
} else if (qp->ibqp.qp_type == IB_QPT_UD) {
371
/* Check UD opcode */
372
if (wr->opcode != IB_WR_SEND &&
373
wr->opcode != IB_WR_SEND_WITH_IMM)
374
goto bail_inval;
375
/* Check UD destination address PD */
376
if (qp->ibqp.pd != wr->wr.ud.ah->pd)
377
goto bail_inval;
378
} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
379
goto bail_inval;
380
else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
381
(wr->num_sge == 0 ||
382
wr->sg_list[0].length < sizeof(u64) ||
383
wr->sg_list[0].addr & (sizeof(u64) - 1)))
384
goto bail_inval;
385
else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
386
goto bail_inval;
387
388
next = qp->s_head + 1;
389
if (next >= qp->s_size)
390
next = 0;
391
if (next == qp->s_last) {
392
ret = -ENOMEM;
393
goto bail;
394
}
395
396
wqe = get_swqe_ptr(qp, qp->s_head);
397
wqe->wr = *wr;
398
wqe->length = 0;
399
if (wr->num_sge) {
400
acc = wr->opcode >= IB_WR_RDMA_READ ?
401
IB_ACCESS_LOCAL_WRITE : 0;
402
for (i = 0, j = 0; i < wr->num_sge; i++) {
403
u32 length = wr->sg_list[i].length;
404
int ok;
405
406
if (length == 0)
407
continue;
408
ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
409
&wr->sg_list[i], acc);
410
if (!ok)
411
goto bail_inval;
412
wqe->length += length;
413
j++;
414
}
415
wqe->wr.num_sge = j;
416
}
417
if (qp->ibqp.qp_type == IB_QPT_UC ||
418
qp->ibqp.qp_type == IB_QPT_RC) {
419
if (wqe->length > 0x80000000U)
420
goto bail_inval;
421
} else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
422
goto bail_inval;
423
wqe->ssn = qp->s_ssn++;
424
qp->s_head = next;
425
426
ret = 0;
427
goto bail;
428
429
bail_inval:
430
ret = -EINVAL;
431
bail:
432
spin_unlock_irqrestore(&qp->s_lock, flags);
433
return ret;
434
}
435
436
/**
437
* ipath_post_send - post a send on a QP
438
* @ibqp: the QP to post the send on
439
* @wr: the list of work requests to post
440
* @bad_wr: the first bad WR is put here
441
*
442
* This may be called from interrupt context.
443
*/
444
static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
445
struct ib_send_wr **bad_wr)
446
{
447
struct ipath_qp *qp = to_iqp(ibqp);
448
int err = 0;
449
450
for (; wr; wr = wr->next) {
451
err = ipath_post_one_send(qp, wr);
452
if (err) {
453
*bad_wr = wr;
454
goto bail;
455
}
456
}
457
458
/* Try to do the send work in the caller's context. */
459
ipath_do_send((unsigned long) qp);
460
461
bail:
462
return err;
463
}
464
465
/**
466
* ipath_post_receive - post a receive on a QP
467
* @ibqp: the QP to post the receive on
468
* @wr: the WR to post
469
* @bad_wr: the first bad WR is put here
470
*
471
* This may be called from interrupt context.
472
*/
473
static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
474
struct ib_recv_wr **bad_wr)
475
{
476
struct ipath_qp *qp = to_iqp(ibqp);
477
struct ipath_rwq *wq = qp->r_rq.wq;
478
unsigned long flags;
479
int ret;
480
481
/* Check that state is OK to post receive. */
482
if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
483
*bad_wr = wr;
484
ret = -EINVAL;
485
goto bail;
486
}
487
488
for (; wr; wr = wr->next) {
489
struct ipath_rwqe *wqe;
490
u32 next;
491
int i;
492
493
if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
494
*bad_wr = wr;
495
ret = -EINVAL;
496
goto bail;
497
}
498
499
spin_lock_irqsave(&qp->r_rq.lock, flags);
500
next = wq->head + 1;
501
if (next >= qp->r_rq.size)
502
next = 0;
503
if (next == wq->tail) {
504
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
505
*bad_wr = wr;
506
ret = -ENOMEM;
507
goto bail;
508
}
509
510
wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
511
wqe->wr_id = wr->wr_id;
512
wqe->num_sge = wr->num_sge;
513
for (i = 0; i < wr->num_sge; i++)
514
wqe->sg_list[i] = wr->sg_list[i];
515
/* Make sure queue entry is written before the head index. */
516
smp_wmb();
517
wq->head = next;
518
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
519
}
520
ret = 0;
521
522
bail:
523
return ret;
524
}
525
526
/**
527
* ipath_qp_rcv - processing an incoming packet on a QP
528
* @dev: the device the packet came on
529
* @hdr: the packet header
530
* @has_grh: true if the packet has a GRH
531
* @data: the packet data
532
* @tlen: the packet length
533
* @qp: the QP the packet came on
534
*
535
* This is called from ipath_ib_rcv() to process an incoming packet
536
* for the given QP.
537
* Called at interrupt level.
538
*/
539
static void ipath_qp_rcv(struct ipath_ibdev *dev,
540
struct ipath_ib_header *hdr, int has_grh,
541
void *data, u32 tlen, struct ipath_qp *qp)
542
{
543
/* Check for valid receive state. */
544
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
545
dev->n_pkt_drops++;
546
return;
547
}
548
549
switch (qp->ibqp.qp_type) {
550
case IB_QPT_SMI:
551
case IB_QPT_GSI:
552
if (ib_ipath_disable_sma)
553
break;
554
/* FALLTHROUGH */
555
case IB_QPT_UD:
556
ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
557
break;
558
559
case IB_QPT_RC:
560
ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
561
break;
562
563
case IB_QPT_UC:
564
ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
565
break;
566
567
default:
568
break;
569
}
570
}
571
572
/**
573
* ipath_ib_rcv - process an incoming packet
574
* @arg: the device pointer
575
* @rhdr: the header of the packet
576
* @data: the packet data
577
* @tlen: the packet length
578
*
579
* This is called from ipath_kreceive() to process an incoming packet at
580
* interrupt level. Tlen is the length of the header + data + CRC in bytes.
581
*/
582
void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
583
u32 tlen)
584
{
585
struct ipath_ib_header *hdr = rhdr;
586
struct ipath_other_headers *ohdr;
587
struct ipath_qp *qp;
588
u32 qp_num;
589
int lnh;
590
u8 opcode;
591
u16 lid;
592
593
if (unlikely(dev == NULL))
594
goto bail;
595
596
if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */
597
dev->rcv_errors++;
598
goto bail;
599
}
600
601
/* Check for a valid destination LID (see ch. 7.11.1). */
602
lid = be16_to_cpu(hdr->lrh[1]);
603
if (lid < IPATH_MULTICAST_LID_BASE) {
604
lid &= ~((1 << dev->dd->ipath_lmc) - 1);
605
if (unlikely(lid != dev->dd->ipath_lid)) {
606
dev->rcv_errors++;
607
goto bail;
608
}
609
}
610
611
/* Check for GRH */
612
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
613
if (lnh == IPATH_LRH_BTH)
614
ohdr = &hdr->u.oth;
615
else if (lnh == IPATH_LRH_GRH)
616
ohdr = &hdr->u.l.oth;
617
else {
618
dev->rcv_errors++;
619
goto bail;
620
}
621
622
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
623
dev->opstats[opcode].n_bytes += tlen;
624
dev->opstats[opcode].n_packets++;
625
626
/* Get the destination QP number. */
627
qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
628
if (qp_num == IPATH_MULTICAST_QPN) {
629
struct ipath_mcast *mcast;
630
struct ipath_mcast_qp *p;
631
632
if (lnh != IPATH_LRH_GRH) {
633
dev->n_pkt_drops++;
634
goto bail;
635
}
636
mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
637
if (mcast == NULL) {
638
dev->n_pkt_drops++;
639
goto bail;
640
}
641
dev->n_multicast_rcv++;
642
list_for_each_entry_rcu(p, &mcast->qp_list, list)
643
ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
644
/*
645
* Notify ipath_multicast_detach() if it is waiting for us
646
* to finish.
647
*/
648
if (atomic_dec_return(&mcast->refcount) <= 1)
649
wake_up(&mcast->wait);
650
} else {
651
qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
652
if (qp) {
653
dev->n_unicast_rcv++;
654
ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
655
tlen, qp);
656
/*
657
* Notify ipath_destroy_qp() if it is waiting
658
* for us to finish.
659
*/
660
if (atomic_dec_and_test(&qp->refcount))
661
wake_up(&qp->wait);
662
} else
663
dev->n_pkt_drops++;
664
}
665
666
bail:;
667
}
668
669
/**
670
* ipath_ib_timer - verbs timer
671
* @arg: the device pointer
672
*
673
* This is called from ipath_do_rcv_timer() at interrupt level to check for
674
* QPs which need retransmits and to collect performance numbers.
675
*/
676
static void ipath_ib_timer(struct ipath_ibdev *dev)
677
{
678
struct ipath_qp *resend = NULL;
679
struct ipath_qp *rnr = NULL;
680
struct list_head *last;
681
struct ipath_qp *qp;
682
unsigned long flags;
683
684
if (dev == NULL)
685
return;
686
687
spin_lock_irqsave(&dev->pending_lock, flags);
688
/* Start filling the next pending queue. */
689
if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
690
dev->pending_index = 0;
691
/* Save any requests still in the new queue, they have timed out. */
692
last = &dev->pending[dev->pending_index];
693
while (!list_empty(last)) {
694
qp = list_entry(last->next, struct ipath_qp, timerwait);
695
list_del_init(&qp->timerwait);
696
qp->timer_next = resend;
697
resend = qp;
698
atomic_inc(&qp->refcount);
699
}
700
last = &dev->rnrwait;
701
if (!list_empty(last)) {
702
qp = list_entry(last->next, struct ipath_qp, timerwait);
703
if (--qp->s_rnr_timeout == 0) {
704
do {
705
list_del_init(&qp->timerwait);
706
qp->timer_next = rnr;
707
rnr = qp;
708
atomic_inc(&qp->refcount);
709
if (list_empty(last))
710
break;
711
qp = list_entry(last->next, struct ipath_qp,
712
timerwait);
713
} while (qp->s_rnr_timeout == 0);
714
}
715
}
716
/*
717
* We should only be in the started state if pma_sample_start != 0
718
*/
719
if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
720
--dev->pma_sample_start == 0) {
721
dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
722
ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
723
&dev->ipath_rword,
724
&dev->ipath_spkts,
725
&dev->ipath_rpkts,
726
&dev->ipath_xmit_wait);
727
}
728
if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
729
if (dev->pma_sample_interval == 0) {
730
u64 ta, tb, tc, td, te;
731
732
dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
733
ipath_snapshot_counters(dev->dd, &ta, &tb,
734
&tc, &td, &te);
735
736
dev->ipath_sword = ta - dev->ipath_sword;
737
dev->ipath_rword = tb - dev->ipath_rword;
738
dev->ipath_spkts = tc - dev->ipath_spkts;
739
dev->ipath_rpkts = td - dev->ipath_rpkts;
740
dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
741
}
742
else
743
dev->pma_sample_interval--;
744
}
745
spin_unlock_irqrestore(&dev->pending_lock, flags);
746
747
/* XXX What if timer fires again while this is running? */
748
while (resend != NULL) {
749
qp = resend;
750
resend = qp->timer_next;
751
752
spin_lock_irqsave(&qp->s_lock, flags);
753
if (qp->s_last != qp->s_tail &&
754
ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
755
dev->n_timeouts++;
756
ipath_restart_rc(qp, qp->s_last_psn + 1);
757
}
758
spin_unlock_irqrestore(&qp->s_lock, flags);
759
760
/* Notify ipath_destroy_qp() if it is waiting. */
761
if (atomic_dec_and_test(&qp->refcount))
762
wake_up(&qp->wait);
763
}
764
while (rnr != NULL) {
765
qp = rnr;
766
rnr = qp->timer_next;
767
768
spin_lock_irqsave(&qp->s_lock, flags);
769
if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
770
ipath_schedule_send(qp);
771
spin_unlock_irqrestore(&qp->s_lock, flags);
772
773
/* Notify ipath_destroy_qp() if it is waiting. */
774
if (atomic_dec_and_test(&qp->refcount))
775
wake_up(&qp->wait);
776
}
777
}
778
779
static void update_sge(struct ipath_sge_state *ss, u32 length)
780
{
781
struct ipath_sge *sge = &ss->sge;
782
783
sge->vaddr += length;
784
sge->length -= length;
785
sge->sge_length -= length;
786
if (sge->sge_length == 0) {
787
if (--ss->num_sge)
788
*sge = *ss->sg_list++;
789
} else if (sge->length == 0 && sge->mr != NULL) {
790
if (++sge->n >= IPATH_SEGSZ) {
791
if (++sge->m >= sge->mr->mapsz)
792
return;
793
sge->n = 0;
794
}
795
sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
796
sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
797
}
798
}
799
800
#ifdef __LITTLE_ENDIAN
801
static inline u32 get_upper_bits(u32 data, u32 shift)
802
{
803
return data >> shift;
804
}
805
806
static inline u32 set_upper_bits(u32 data, u32 shift)
807
{
808
return data << shift;
809
}
810
811
static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
812
{
813
data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
814
data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
815
return data;
816
}
817
#else
818
static inline u32 get_upper_bits(u32 data, u32 shift)
819
{
820
return data << shift;
821
}
822
823
static inline u32 set_upper_bits(u32 data, u32 shift)
824
{
825
return data >> shift;
826
}
827
828
static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
829
{
830
data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
831
data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
832
return data;
833
}
834
#endif
835
836
static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
837
u32 length, unsigned flush_wc)
838
{
839
u32 extra = 0;
840
u32 data = 0;
841
u32 last;
842
843
while (1) {
844
u32 len = ss->sge.length;
845
u32 off;
846
847
if (len > length)
848
len = length;
849
if (len > ss->sge.sge_length)
850
len = ss->sge.sge_length;
851
BUG_ON(len == 0);
852
/* If the source address is not aligned, try to align it. */
853
off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
854
if (off) {
855
u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
856
~(sizeof(u32) - 1));
857
u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
858
u32 y;
859
860
y = sizeof(u32) - off;
861
if (len > y)
862
len = y;
863
if (len + extra >= sizeof(u32)) {
864
data |= set_upper_bits(v, extra *
865
BITS_PER_BYTE);
866
len = sizeof(u32) - extra;
867
if (len == length) {
868
last = data;
869
break;
870
}
871
__raw_writel(data, piobuf);
872
piobuf++;
873
extra = 0;
874
data = 0;
875
} else {
876
/* Clear unused upper bytes */
877
data |= clear_upper_bytes(v, len, extra);
878
if (len == length) {
879
last = data;
880
break;
881
}
882
extra += len;
883
}
884
} else if (extra) {
885
/* Source address is aligned. */
886
u32 *addr = (u32 *) ss->sge.vaddr;
887
int shift = extra * BITS_PER_BYTE;
888
int ushift = 32 - shift;
889
u32 l = len;
890
891
while (l >= sizeof(u32)) {
892
u32 v = *addr;
893
894
data |= set_upper_bits(v, shift);
895
__raw_writel(data, piobuf);
896
data = get_upper_bits(v, ushift);
897
piobuf++;
898
addr++;
899
l -= sizeof(u32);
900
}
901
/*
902
* We still have 'extra' number of bytes leftover.
903
*/
904
if (l) {
905
u32 v = *addr;
906
907
if (l + extra >= sizeof(u32)) {
908
data |= set_upper_bits(v, shift);
909
len -= l + extra - sizeof(u32);
910
if (len == length) {
911
last = data;
912
break;
913
}
914
__raw_writel(data, piobuf);
915
piobuf++;
916
extra = 0;
917
data = 0;
918
} else {
919
/* Clear unused upper bytes */
920
data |= clear_upper_bytes(v, l,
921
extra);
922
if (len == length) {
923
last = data;
924
break;
925
}
926
extra += l;
927
}
928
} else if (len == length) {
929
last = data;
930
break;
931
}
932
} else if (len == length) {
933
u32 w;
934
935
/*
936
* Need to round up for the last dword in the
937
* packet.
938
*/
939
w = (len + 3) >> 2;
940
__iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
941
piobuf += w - 1;
942
last = ((u32 *) ss->sge.vaddr)[w - 1];
943
break;
944
} else {
945
u32 w = len >> 2;
946
947
__iowrite32_copy(piobuf, ss->sge.vaddr, w);
948
piobuf += w;
949
950
extra = len & (sizeof(u32) - 1);
951
if (extra) {
952
u32 v = ((u32 *) ss->sge.vaddr)[w];
953
954
/* Clear unused upper bytes */
955
data = clear_upper_bytes(v, extra, 0);
956
}
957
}
958
update_sge(ss, len);
959
length -= len;
960
}
961
/* Update address before sending packet. */
962
update_sge(ss, length);
963
if (flush_wc) {
964
/* must flush early everything before trigger word */
965
ipath_flush_wc();
966
__raw_writel(last, piobuf);
967
/* be sure trigger word is written */
968
ipath_flush_wc();
969
} else
970
__raw_writel(last, piobuf);
971
}
972
973
/*
974
* Convert IB rate to delay multiplier.
975
*/
976
unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
977
{
978
switch (rate) {
979
case IB_RATE_2_5_GBPS: return 8;
980
case IB_RATE_5_GBPS: return 4;
981
case IB_RATE_10_GBPS: return 2;
982
case IB_RATE_20_GBPS: return 1;
983
default: return 0;
984
}
985
}
986
987
/*
988
* Convert delay multiplier to IB rate
989
*/
990
static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
991
{
992
switch (mult) {
993
case 8: return IB_RATE_2_5_GBPS;
994
case 4: return IB_RATE_5_GBPS;
995
case 2: return IB_RATE_10_GBPS;
996
case 1: return IB_RATE_20_GBPS;
997
default: return IB_RATE_PORT_CURRENT;
998
}
999
}
1000
1001
static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
1002
{
1003
struct ipath_verbs_txreq *tx = NULL;
1004
unsigned long flags;
1005
1006
spin_lock_irqsave(&dev->pending_lock, flags);
1007
if (!list_empty(&dev->txreq_free)) {
1008
struct list_head *l = dev->txreq_free.next;
1009
1010
list_del(l);
1011
tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
1012
}
1013
spin_unlock_irqrestore(&dev->pending_lock, flags);
1014
return tx;
1015
}
1016
1017
static inline void put_txreq(struct ipath_ibdev *dev,
1018
struct ipath_verbs_txreq *tx)
1019
{
1020
unsigned long flags;
1021
1022
spin_lock_irqsave(&dev->pending_lock, flags);
1023
list_add(&tx->txreq.list, &dev->txreq_free);
1024
spin_unlock_irqrestore(&dev->pending_lock, flags);
1025
}
1026
1027
static void sdma_complete(void *cookie, int status)
1028
{
1029
struct ipath_verbs_txreq *tx = cookie;
1030
struct ipath_qp *qp = tx->qp;
1031
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1032
unsigned long flags;
1033
enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
1034
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
1035
1036
if (atomic_dec_and_test(&qp->s_dma_busy)) {
1037
spin_lock_irqsave(&qp->s_lock, flags);
1038
if (tx->wqe)
1039
ipath_send_complete(qp, tx->wqe, ibs);
1040
if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1041
qp->s_last != qp->s_head) ||
1042
(qp->s_flags & IPATH_S_WAIT_DMA))
1043
ipath_schedule_send(qp);
1044
spin_unlock_irqrestore(&qp->s_lock, flags);
1045
wake_up(&qp->wait_dma);
1046
} else if (tx->wqe) {
1047
spin_lock_irqsave(&qp->s_lock, flags);
1048
ipath_send_complete(qp, tx->wqe, ibs);
1049
spin_unlock_irqrestore(&qp->s_lock, flags);
1050
}
1051
1052
if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
1053
kfree(tx->txreq.map_addr);
1054
put_txreq(dev, tx);
1055
1056
if (atomic_dec_and_test(&qp->refcount))
1057
wake_up(&qp->wait);
1058
}
1059
1060
static void decrement_dma_busy(struct ipath_qp *qp)
1061
{
1062
unsigned long flags;
1063
1064
if (atomic_dec_and_test(&qp->s_dma_busy)) {
1065
spin_lock_irqsave(&qp->s_lock, flags);
1066
if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1067
qp->s_last != qp->s_head) ||
1068
(qp->s_flags & IPATH_S_WAIT_DMA))
1069
ipath_schedule_send(qp);
1070
spin_unlock_irqrestore(&qp->s_lock, flags);
1071
wake_up(&qp->wait_dma);
1072
}
1073
}
1074
1075
/*
1076
* Compute the number of clock cycles of delay before sending the next packet.
1077
* The multipliers reflect the number of clocks for the fastest rate so
1078
* one tick at 4xDDR is 8 ticks at 1xSDR.
1079
* If the destination port will take longer to receive a packet than
1080
* the outgoing link can send it, we need to delay sending the next packet
1081
* by the difference in time it takes the receiver to receive and the sender
1082
* to send this packet.
1083
* Note that this delay is always correct for UC and RC but not always
1084
* optimal for UD. For UD, the destination HCA can be different for each
1085
* packet, in which case, we could send packets to a different destination
1086
* while "waiting" for the delay. The overhead for doing this without
1087
* HW support is more than just paying the cost of delaying some packets
1088
* unnecessarily.
1089
*/
1090
static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
1091
{
1092
return (rcv_mult > snd_mult) ?
1093
(plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
1094
}
1095
1096
static int ipath_verbs_send_dma(struct ipath_qp *qp,
1097
struct ipath_ib_header *hdr, u32 hdrwords,
1098
struct ipath_sge_state *ss, u32 len,
1099
u32 plen, u32 dwords)
1100
{
1101
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1102
struct ipath_devdata *dd = dev->dd;
1103
struct ipath_verbs_txreq *tx;
1104
u32 *piobuf;
1105
u32 control;
1106
u32 ndesc;
1107
int ret;
1108
1109
tx = qp->s_tx;
1110
if (tx) {
1111
qp->s_tx = NULL;
1112
/* resend previously constructed packet */
1113
atomic_inc(&qp->s_dma_busy);
1114
ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
1115
if (ret) {
1116
qp->s_tx = tx;
1117
decrement_dma_busy(qp);
1118
}
1119
goto bail;
1120
}
1121
1122
tx = get_txreq(dev);
1123
if (!tx) {
1124
ret = -EBUSY;
1125
goto bail;
1126
}
1127
1128
/*
1129
* Get the saved delay count we computed for the previous packet
1130
* and save the delay count for this packet to be used next time
1131
* we get here.
1132
*/
1133
control = qp->s_pkt_delay;
1134
qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1135
1136
tx->qp = qp;
1137
atomic_inc(&qp->refcount);
1138
tx->wqe = qp->s_wqe;
1139
tx->txreq.callback = sdma_complete;
1140
tx->txreq.callback_cookie = tx;
1141
tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
1142
IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
1143
if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
1144
tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
1145
1146
/* VL15 packets bypass credit check */
1147
if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
1148
control |= 1ULL << 31;
1149
tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
1150
}
1151
1152
if (len) {
1153
/*
1154
* Don't try to DMA if it takes more descriptors than
1155
* the queue holds.
1156
*/
1157
ndesc = ipath_count_sge(ss, len);
1158
if (ndesc >= dd->ipath_sdma_descq_cnt)
1159
ndesc = 0;
1160
} else
1161
ndesc = 1;
1162
if (ndesc) {
1163
tx->hdr.pbc[0] = cpu_to_le32(plen);
1164
tx->hdr.pbc[1] = cpu_to_le32(control);
1165
memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
1166
tx->txreq.sg_count = ndesc;
1167
tx->map_len = (hdrwords + 2) << 2;
1168
tx->txreq.map_addr = &tx->hdr;
1169
atomic_inc(&qp->s_dma_busy);
1170
ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
1171
if (ret) {
1172
/* save ss and length in dwords */
1173
tx->ss = ss;
1174
tx->len = dwords;
1175
qp->s_tx = tx;
1176
decrement_dma_busy(qp);
1177
}
1178
goto bail;
1179
}
1180
1181
/* Allocate a buffer and copy the header and payload to it. */
1182
tx->map_len = (plen + 1) << 2;
1183
piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
1184
if (unlikely(piobuf == NULL)) {
1185
ret = -EBUSY;
1186
goto err_tx;
1187
}
1188
tx->txreq.map_addr = piobuf;
1189
tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
1190
tx->txreq.sg_count = 1;
1191
1192
*piobuf++ = (__force u32) cpu_to_le32(plen);
1193
*piobuf++ = (__force u32) cpu_to_le32(control);
1194
memcpy(piobuf, hdr, hdrwords << 2);
1195
ipath_copy_from_sge(piobuf + hdrwords, ss, len);
1196
1197
atomic_inc(&qp->s_dma_busy);
1198
ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
1199
/*
1200
* If we couldn't queue the DMA request, save the info
1201
* and try again later rather than destroying the
1202
* buffer and undoing the side effects of the copy.
1203
*/
1204
if (ret) {
1205
tx->ss = NULL;
1206
tx->len = 0;
1207
qp->s_tx = tx;
1208
decrement_dma_busy(qp);
1209
}
1210
dev->n_unaligned++;
1211
goto bail;
1212
1213
err_tx:
1214
if (atomic_dec_and_test(&qp->refcount))
1215
wake_up(&qp->wait);
1216
put_txreq(dev, tx);
1217
bail:
1218
return ret;
1219
}
1220
1221
static int ipath_verbs_send_pio(struct ipath_qp *qp,
1222
struct ipath_ib_header *ibhdr, u32 hdrwords,
1223
struct ipath_sge_state *ss, u32 len,
1224
u32 plen, u32 dwords)
1225
{
1226
struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1227
u32 *hdr = (u32 *) ibhdr;
1228
u32 __iomem *piobuf;
1229
unsigned flush_wc;
1230
u32 control;
1231
int ret;
1232
unsigned long flags;
1233
1234
piobuf = ipath_getpiobuf(dd, plen, NULL);
1235
if (unlikely(piobuf == NULL)) {
1236
ret = -EBUSY;
1237
goto bail;
1238
}
1239
1240
/*
1241
* Get the saved delay count we computed for the previous packet
1242
* and save the delay count for this packet to be used next time
1243
* we get here.
1244
*/
1245
control = qp->s_pkt_delay;
1246
qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1247
1248
/* VL15 packets bypass credit check */
1249
if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
1250
control |= 1ULL << 31;
1251
1252
/*
1253
* Write the length to the control qword plus any needed flags.
1254
* We have to flush after the PBC for correctness on some cpus
1255
* or WC buffer can be written out of order.
1256
*/
1257
writeq(((u64) control << 32) | plen, piobuf);
1258
piobuf += 2;
1259
1260
flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
1261
if (len == 0) {
1262
/*
1263
* If there is just the header portion, must flush before
1264
* writing last word of header for correctness, and after
1265
* the last header word (trigger word).
1266
*/
1267
if (flush_wc) {
1268
ipath_flush_wc();
1269
__iowrite32_copy(piobuf, hdr, hdrwords - 1);
1270
ipath_flush_wc();
1271
__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
1272
ipath_flush_wc();
1273
} else
1274
__iowrite32_copy(piobuf, hdr, hdrwords);
1275
goto done;
1276
}
1277
1278
if (flush_wc)
1279
ipath_flush_wc();
1280
__iowrite32_copy(piobuf, hdr, hdrwords);
1281
piobuf += hdrwords;
1282
1283
/* The common case is aligned and contained in one segment. */
1284
if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
1285
!((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
1286
u32 *addr = (u32 *) ss->sge.vaddr;
1287
1288
/* Update address before sending packet. */
1289
update_sge(ss, len);
1290
if (flush_wc) {
1291
__iowrite32_copy(piobuf, addr, dwords - 1);
1292
/* must flush early everything before trigger word */
1293
ipath_flush_wc();
1294
__raw_writel(addr[dwords - 1], piobuf + dwords - 1);
1295
/* be sure trigger word is written */
1296
ipath_flush_wc();
1297
} else
1298
__iowrite32_copy(piobuf, addr, dwords);
1299
goto done;
1300
}
1301
copy_io(piobuf, ss, len, flush_wc);
1302
done:
1303
if (qp->s_wqe) {
1304
spin_lock_irqsave(&qp->s_lock, flags);
1305
ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1306
spin_unlock_irqrestore(&qp->s_lock, flags);
1307
}
1308
ret = 0;
1309
bail:
1310
return ret;
1311
}
1312
1313
/**
1314
* ipath_verbs_send - send a packet
1315
* @qp: the QP to send on
1316
* @hdr: the packet header
1317
* @hdrwords: the number of 32-bit words in the header
1318
* @ss: the SGE to send
1319
* @len: the length of the packet in bytes
1320
*/
1321
int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
1322
u32 hdrwords, struct ipath_sge_state *ss, u32 len)
1323
{
1324
struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1325
u32 plen;
1326
int ret;
1327
u32 dwords = (len + 3) >> 2;
1328
1329
/*
1330
* Calculate the send buffer trigger address.
1331
* The +1 counts for the pbc control dword following the pbc length.
1332
*/
1333
plen = hdrwords + dwords + 1;
1334
1335
/*
1336
* VL15 packets (IB_QPT_SMI) will always use PIO, so we
1337
* can defer SDMA restart until link goes ACTIVE without
1338
* worrying about just how we got there.
1339
*/
1340
if (qp->ibqp.qp_type == IB_QPT_SMI ||
1341
!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
1342
ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1343
plen, dwords);
1344
else
1345
ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1346
plen, dwords);
1347
1348
return ret;
1349
}
1350
1351
int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
1352
u64 *rwords, u64 *spkts, u64 *rpkts,
1353
u64 *xmit_wait)
1354
{
1355
int ret;
1356
1357
if (!(dd->ipath_flags & IPATH_INITTED)) {
1358
/* no hardware, freeze, etc. */
1359
ret = -EINVAL;
1360
goto bail;
1361
}
1362
*swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
1363
*rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
1364
*spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
1365
*rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
1366
*xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
1367
1368
ret = 0;
1369
1370
bail:
1371
return ret;
1372
}
1373
1374
/**
1375
* ipath_get_counters - get various chip counters
1376
* @dd: the infinipath device
1377
* @cntrs: counters are placed here
1378
*
1379
* Return the counters needed by recv_pma_get_portcounters().
1380
*/
1381
int ipath_get_counters(struct ipath_devdata *dd,
1382
struct ipath_verbs_counters *cntrs)
1383
{
1384
struct ipath_cregs const *crp = dd->ipath_cregs;
1385
int ret;
1386
1387
if (!(dd->ipath_flags & IPATH_INITTED)) {
1388
/* no hardware, freeze, etc. */
1389
ret = -EINVAL;
1390
goto bail;
1391
}
1392
cntrs->symbol_error_counter =
1393
ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
1394
cntrs->link_error_recovery_counter =
1395
ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
1396
/*
1397
* The link downed counter counts when the other side downs the
1398
* connection. We add in the number of times we downed the link
1399
* due to local link integrity errors to compensate.
1400
*/
1401
cntrs->link_downed_counter =
1402
ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
1403
cntrs->port_rcv_errors =
1404
ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
1405
ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
1406
ipath_snap_cntr(dd, crp->cr_portovflcnt) +
1407
ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
1408
ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
1409
ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
1410
ipath_snap_cntr(dd, crp->cr_erricrccnt) +
1411
ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
1412
ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
1413
ipath_snap_cntr(dd, crp->cr_badformatcnt) +
1414
dd->ipath_rxfc_unsupvl_errs;
1415
if (crp->cr_rxotherlocalphyerrcnt)
1416
cntrs->port_rcv_errors +=
1417
ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
1418
if (crp->cr_rxvlerrcnt)
1419
cntrs->port_rcv_errors +=
1420
ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
1421
cntrs->port_rcv_remphys_errors =
1422
ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
1423
cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
1424
cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
1425
cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
1426
cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
1427
cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
1428
cntrs->local_link_integrity_errors =
1429
crp->cr_locallinkintegrityerrcnt ?
1430
ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
1431
((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
1432
dd->ipath_lli_errs : dd->ipath_lli_errors);
1433
cntrs->excessive_buffer_overrun_errors =
1434
crp->cr_excessbufferovflcnt ?
1435
ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
1436
dd->ipath_overrun_thresh_errs;
1437
cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
1438
ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
1439
1440
ret = 0;
1441
1442
bail:
1443
return ret;
1444
}
1445
1446
/**
1447
* ipath_ib_piobufavail - callback when a PIO buffer is available
1448
* @arg: the device pointer
1449
*
1450
* This is called from ipath_intr() at interrupt level when a PIO buffer is
1451
* available after ipath_verbs_send() returned an error that no buffers were
1452
* available. Return 1 if we consumed all the PIO buffers and we still have
1453
* QPs waiting for buffers (for now, just restart the send tasklet and
1454
* return zero).
1455
*/
1456
int ipath_ib_piobufavail(struct ipath_ibdev *dev)
1457
{
1458
struct list_head *list;
1459
struct ipath_qp *qplist;
1460
struct ipath_qp *qp;
1461
unsigned long flags;
1462
1463
if (dev == NULL)
1464
goto bail;
1465
1466
list = &dev->piowait;
1467
qplist = NULL;
1468
1469
spin_lock_irqsave(&dev->pending_lock, flags);
1470
while (!list_empty(list)) {
1471
qp = list_entry(list->next, struct ipath_qp, piowait);
1472
list_del_init(&qp->piowait);
1473
qp->pio_next = qplist;
1474
qplist = qp;
1475
atomic_inc(&qp->refcount);
1476
}
1477
spin_unlock_irqrestore(&dev->pending_lock, flags);
1478
1479
while (qplist != NULL) {
1480
qp = qplist;
1481
qplist = qp->pio_next;
1482
1483
spin_lock_irqsave(&qp->s_lock, flags);
1484
if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
1485
ipath_schedule_send(qp);
1486
spin_unlock_irqrestore(&qp->s_lock, flags);
1487
1488
/* Notify ipath_destroy_qp() if it is waiting. */
1489
if (atomic_dec_and_test(&qp->refcount))
1490
wake_up(&qp->wait);
1491
}
1492
1493
bail:
1494
return 0;
1495
}
1496
1497
static int ipath_query_device(struct ib_device *ibdev,
1498
struct ib_device_attr *props)
1499
{
1500
struct ipath_ibdev *dev = to_idev(ibdev);
1501
1502
memset(props, 0, sizeof(*props));
1503
1504
props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1505
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1506
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1507
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1508
props->page_size_cap = PAGE_SIZE;
1509
props->vendor_id =
1510
IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
1511
props->vendor_part_id = dev->dd->ipath_deviceid;
1512
props->hw_ver = dev->dd->ipath_pcirev;
1513
1514
props->sys_image_guid = dev->sys_image_guid;
1515
1516
props->max_mr_size = ~0ull;
1517
props->max_qp = ib_ipath_max_qps;
1518
props->max_qp_wr = ib_ipath_max_qp_wrs;
1519
props->max_sge = ib_ipath_max_sges;
1520
props->max_cq = ib_ipath_max_cqs;
1521
props->max_ah = ib_ipath_max_ahs;
1522
props->max_cqe = ib_ipath_max_cqes;
1523
props->max_mr = dev->lk_table.max;
1524
props->max_fmr = dev->lk_table.max;
1525
props->max_map_per_fmr = 32767;
1526
props->max_pd = ib_ipath_max_pds;
1527
props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
1528
props->max_qp_init_rd_atom = 255;
1529
/* props->max_res_rd_atom */
1530
props->max_srq = ib_ipath_max_srqs;
1531
props->max_srq_wr = ib_ipath_max_srq_wrs;
1532
props->max_srq_sge = ib_ipath_max_srq_sges;
1533
/* props->local_ca_ack_delay */
1534
props->atomic_cap = IB_ATOMIC_GLOB;
1535
props->max_pkeys = ipath_get_npkeys(dev->dd);
1536
props->max_mcast_grp = ib_ipath_max_mcast_grps;
1537
props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
1538
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
1539
props->max_mcast_grp;
1540
1541
return 0;
1542
}
1543
1544
const u8 ipath_cvt_physportstate[32] = {
1545
[INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
1546
[INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
1547
[INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
1548
[INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
1549
[INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
1550
[INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
1551
[INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
1552
IB_PHYSPORTSTATE_CFG_TRAIN,
1553
[INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
1554
IB_PHYSPORTSTATE_CFG_TRAIN,
1555
[INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
1556
IB_PHYSPORTSTATE_CFG_TRAIN,
1557
[INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
1558
[INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
1559
IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1560
[INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
1561
IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1562
[INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
1563
IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1564
[0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
1565
[0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
1566
[0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
1567
[0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
1568
[0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
1569
[0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
1570
[0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
1571
[0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
1572
};
1573
1574
u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
1575
{
1576
return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
1577
}
1578
1579
static int ipath_query_port(struct ib_device *ibdev,
1580
u8 port, struct ib_port_attr *props)
1581
{
1582
struct ipath_ibdev *dev = to_idev(ibdev);
1583
struct ipath_devdata *dd = dev->dd;
1584
enum ib_mtu mtu;
1585
u16 lid = dd->ipath_lid;
1586
u64 ibcstat;
1587
1588
memset(props, 0, sizeof(*props));
1589
props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1590
props->lmc = dd->ipath_lmc;
1591
props->sm_lid = dev->sm_lid;
1592
props->sm_sl = dev->sm_sl;
1593
ibcstat = dd->ipath_lastibcstat;
1594
/* map LinkState to IB portinfo values. */
1595
props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
1596
1597
/* See phys_state_show() */
1598
props->phys_state = /* MEA: assumes shift == 0 */
1599
ipath_cvt_physportstate[dd->ipath_lastibcstat &
1600
dd->ibcs_lts_mask];
1601
props->port_cap_flags = dev->port_cap_flags;
1602
props->gid_tbl_len = 1;
1603
props->max_msg_sz = 0x80000000;
1604
props->pkey_tbl_len = ipath_get_npkeys(dd);
1605
props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
1606
dev->z_pkey_violations;
1607
props->qkey_viol_cntr = dev->qkey_violations;
1608
props->active_width = dd->ipath_link_width_active;
1609
/* See rate_show() */
1610
props->active_speed = dd->ipath_link_speed_active;
1611
props->max_vl_num = 1; /* VLCap = VL0 */
1612
props->init_type_reply = 0;
1613
1614
props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
1615
switch (dd->ipath_ibmtu) {
1616
case 4096:
1617
mtu = IB_MTU_4096;
1618
break;
1619
case 2048:
1620
mtu = IB_MTU_2048;
1621
break;
1622
case 1024:
1623
mtu = IB_MTU_1024;
1624
break;
1625
case 512:
1626
mtu = IB_MTU_512;
1627
break;
1628
case 256:
1629
mtu = IB_MTU_256;
1630
break;
1631
default:
1632
mtu = IB_MTU_2048;
1633
}
1634
props->active_mtu = mtu;
1635
props->subnet_timeout = dev->subnet_timeout;
1636
1637
return 0;
1638
}
1639
1640
static int ipath_modify_device(struct ib_device *device,
1641
int device_modify_mask,
1642
struct ib_device_modify *device_modify)
1643
{
1644
int ret;
1645
1646
if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1647
IB_DEVICE_MODIFY_NODE_DESC)) {
1648
ret = -EOPNOTSUPP;
1649
goto bail;
1650
}
1651
1652
if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
1653
memcpy(device->node_desc, device_modify->node_desc, 64);
1654
1655
if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
1656
to_idev(device)->sys_image_guid =
1657
cpu_to_be64(device_modify->sys_image_guid);
1658
1659
ret = 0;
1660
1661
bail:
1662
return ret;
1663
}
1664
1665
static int ipath_modify_port(struct ib_device *ibdev,
1666
u8 port, int port_modify_mask,
1667
struct ib_port_modify *props)
1668
{
1669
struct ipath_ibdev *dev = to_idev(ibdev);
1670
1671
dev->port_cap_flags |= props->set_port_cap_mask;
1672
dev->port_cap_flags &= ~props->clr_port_cap_mask;
1673
if (port_modify_mask & IB_PORT_SHUTDOWN)
1674
ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
1675
if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
1676
dev->qkey_violations = 0;
1677
return 0;
1678
}
1679
1680
static int ipath_query_gid(struct ib_device *ibdev, u8 port,
1681
int index, union ib_gid *gid)
1682
{
1683
struct ipath_ibdev *dev = to_idev(ibdev);
1684
int ret;
1685
1686
if (index >= 1) {
1687
ret = -EINVAL;
1688
goto bail;
1689
}
1690
gid->global.subnet_prefix = dev->gid_prefix;
1691
gid->global.interface_id = dev->dd->ipath_guid;
1692
1693
ret = 0;
1694
1695
bail:
1696
return ret;
1697
}
1698
1699
static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
1700
struct ib_ucontext *context,
1701
struct ib_udata *udata)
1702
{
1703
struct ipath_ibdev *dev = to_idev(ibdev);
1704
struct ipath_pd *pd;
1705
struct ib_pd *ret;
1706
1707
/*
1708
* This is actually totally arbitrary. Some correctness tests
1709
* assume there's a maximum number of PDs that can be allocated.
1710
* We don't actually have this limit, but we fail the test if
1711
* we allow allocations of more than we report for this value.
1712
*/
1713
1714
pd = kmalloc(sizeof *pd, GFP_KERNEL);
1715
if (!pd) {
1716
ret = ERR_PTR(-ENOMEM);
1717
goto bail;
1718
}
1719
1720
spin_lock(&dev->n_pds_lock);
1721
if (dev->n_pds_allocated == ib_ipath_max_pds) {
1722
spin_unlock(&dev->n_pds_lock);
1723
kfree(pd);
1724
ret = ERR_PTR(-ENOMEM);
1725
goto bail;
1726
}
1727
1728
dev->n_pds_allocated++;
1729
spin_unlock(&dev->n_pds_lock);
1730
1731
/* ib_alloc_pd() will initialize pd->ibpd. */
1732
pd->user = udata != NULL;
1733
1734
ret = &pd->ibpd;
1735
1736
bail:
1737
return ret;
1738
}
1739
1740
static int ipath_dealloc_pd(struct ib_pd *ibpd)
1741
{
1742
struct ipath_pd *pd = to_ipd(ibpd);
1743
struct ipath_ibdev *dev = to_idev(ibpd->device);
1744
1745
spin_lock(&dev->n_pds_lock);
1746
dev->n_pds_allocated--;
1747
spin_unlock(&dev->n_pds_lock);
1748
1749
kfree(pd);
1750
1751
return 0;
1752
}
1753
1754
/**
1755
* ipath_create_ah - create an address handle
1756
* @pd: the protection domain
1757
* @ah_attr: the attributes of the AH
1758
*
1759
* This may be called from interrupt context.
1760
*/
1761
static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
1762
struct ib_ah_attr *ah_attr)
1763
{
1764
struct ipath_ah *ah;
1765
struct ib_ah *ret;
1766
struct ipath_ibdev *dev = to_idev(pd->device);
1767
unsigned long flags;
1768
1769
/* A multicast address requires a GRH (see ch. 8.4.1). */
1770
if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
1771
ah_attr->dlid != IPATH_PERMISSIVE_LID &&
1772
!(ah_attr->ah_flags & IB_AH_GRH)) {
1773
ret = ERR_PTR(-EINVAL);
1774
goto bail;
1775
}
1776
1777
if (ah_attr->dlid == 0) {
1778
ret = ERR_PTR(-EINVAL);
1779
goto bail;
1780
}
1781
1782
if (ah_attr->port_num < 1 ||
1783
ah_attr->port_num > pd->device->phys_port_cnt) {
1784
ret = ERR_PTR(-EINVAL);
1785
goto bail;
1786
}
1787
1788
ah = kmalloc(sizeof *ah, GFP_ATOMIC);
1789
if (!ah) {
1790
ret = ERR_PTR(-ENOMEM);
1791
goto bail;
1792
}
1793
1794
spin_lock_irqsave(&dev->n_ahs_lock, flags);
1795
if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
1796
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1797
kfree(ah);
1798
ret = ERR_PTR(-ENOMEM);
1799
goto bail;
1800
}
1801
1802
dev->n_ahs_allocated++;
1803
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1804
1805
/* ib_create_ah() will initialize ah->ibah. */
1806
ah->attr = *ah_attr;
1807
ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
1808
1809
ret = &ah->ibah;
1810
1811
bail:
1812
return ret;
1813
}
1814
1815
/**
1816
* ipath_destroy_ah - destroy an address handle
1817
* @ibah: the AH to destroy
1818
*
1819
* This may be called from interrupt context.
1820
*/
1821
static int ipath_destroy_ah(struct ib_ah *ibah)
1822
{
1823
struct ipath_ibdev *dev = to_idev(ibah->device);
1824
struct ipath_ah *ah = to_iah(ibah);
1825
unsigned long flags;
1826
1827
spin_lock_irqsave(&dev->n_ahs_lock, flags);
1828
dev->n_ahs_allocated--;
1829
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1830
1831
kfree(ah);
1832
1833
return 0;
1834
}
1835
1836
static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
1837
{
1838
struct ipath_ah *ah = to_iah(ibah);
1839
1840
*ah_attr = ah->attr;
1841
ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
1842
1843
return 0;
1844
}
1845
1846
/**
1847
* ipath_get_npkeys - return the size of the PKEY table for port 0
1848
* @dd: the infinipath device
1849
*/
1850
unsigned ipath_get_npkeys(struct ipath_devdata *dd)
1851
{
1852
return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
1853
}
1854
1855
/**
1856
* ipath_get_pkey - return the indexed PKEY from the port PKEY table
1857
* @dd: the infinipath device
1858
* @index: the PKEY index
1859
*/
1860
unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
1861
{
1862
unsigned ret;
1863
1864
/* always a kernel port, no locking needed */
1865
if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
1866
ret = 0;
1867
else
1868
ret = dd->ipath_pd[0]->port_pkeys[index];
1869
1870
return ret;
1871
}
1872
1873
static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
1874
u16 *pkey)
1875
{
1876
struct ipath_ibdev *dev = to_idev(ibdev);
1877
int ret;
1878
1879
if (index >= ipath_get_npkeys(dev->dd)) {
1880
ret = -EINVAL;
1881
goto bail;
1882
}
1883
1884
*pkey = ipath_get_pkey(dev->dd, index);
1885
ret = 0;
1886
1887
bail:
1888
return ret;
1889
}
1890
1891
/**
1892
* ipath_alloc_ucontext - allocate a ucontest
1893
* @ibdev: the infiniband device
1894
* @udata: not used by the InfiniPath driver
1895
*/
1896
1897
static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
1898
struct ib_udata *udata)
1899
{
1900
struct ipath_ucontext *context;
1901
struct ib_ucontext *ret;
1902
1903
context = kmalloc(sizeof *context, GFP_KERNEL);
1904
if (!context) {
1905
ret = ERR_PTR(-ENOMEM);
1906
goto bail;
1907
}
1908
1909
ret = &context->ibucontext;
1910
1911
bail:
1912
return ret;
1913
}
1914
1915
static int ipath_dealloc_ucontext(struct ib_ucontext *context)
1916
{
1917
kfree(to_iucontext(context));
1918
return 0;
1919
}
1920
1921
static int ipath_verbs_register_sysfs(struct ib_device *dev);
1922
1923
static void __verbs_timer(unsigned long arg)
1924
{
1925
struct ipath_devdata *dd = (struct ipath_devdata *) arg;
1926
1927
/* Handle verbs layer timeouts. */
1928
ipath_ib_timer(dd->verbs_dev);
1929
1930
mod_timer(&dd->verbs_timer, jiffies + 1);
1931
}
1932
1933
static int enable_timer(struct ipath_devdata *dd)
1934
{
1935
/*
1936
* Early chips had a design flaw where the chip and kernel idea
1937
* of the tail register don't always agree, and therefore we won't
1938
* get an interrupt on the next packet received.
1939
* If the board supports per packet receive interrupts, use it.
1940
* Otherwise, the timer function periodically checks for packets
1941
* to cover this case.
1942
* Either way, the timer is needed for verbs layer related
1943
* processing.
1944
*/
1945
if (dd->ipath_flags & IPATH_GPIO_INTR) {
1946
ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
1947
0x2074076542310ULL);
1948
/* Enable GPIO bit 2 interrupt */
1949
dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
1950
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1951
dd->ipath_gpio_mask);
1952
}
1953
1954
init_timer(&dd->verbs_timer);
1955
dd->verbs_timer.function = __verbs_timer;
1956
dd->verbs_timer.data = (unsigned long)dd;
1957
dd->verbs_timer.expires = jiffies + 1;
1958
add_timer(&dd->verbs_timer);
1959
1960
return 0;
1961
}
1962
1963
static int disable_timer(struct ipath_devdata *dd)
1964
{
1965
/* Disable GPIO bit 2 interrupt */
1966
if (dd->ipath_flags & IPATH_GPIO_INTR) {
1967
/* Disable GPIO bit 2 interrupt */
1968
dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
1969
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1970
dd->ipath_gpio_mask);
1971
/*
1972
* We might want to undo changes to debugportselect,
1973
* but how?
1974
*/
1975
}
1976
1977
del_timer_sync(&dd->verbs_timer);
1978
1979
return 0;
1980
}
1981
1982
/**
1983
* ipath_register_ib_device - register our device with the infiniband core
1984
* @dd: the device data structure
1985
* Return the allocated ipath_ibdev pointer or NULL on error.
1986
*/
1987
int ipath_register_ib_device(struct ipath_devdata *dd)
1988
{
1989
struct ipath_verbs_counters cntrs;
1990
struct ipath_ibdev *idev;
1991
struct ib_device *dev;
1992
struct ipath_verbs_txreq *tx;
1993
unsigned i;
1994
int ret;
1995
1996
idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
1997
if (idev == NULL) {
1998
ret = -ENOMEM;
1999
goto bail;
2000
}
2001
2002
dev = &idev->ibdev;
2003
2004
if (dd->ipath_sdma_descq_cnt) {
2005
tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx,
2006
GFP_KERNEL);
2007
if (tx == NULL) {
2008
ret = -ENOMEM;
2009
goto err_tx;
2010
}
2011
} else
2012
tx = NULL;
2013
idev->txreq_bufs = tx;
2014
2015
/* Only need to initialize non-zero fields. */
2016
spin_lock_init(&idev->n_pds_lock);
2017
spin_lock_init(&idev->n_ahs_lock);
2018
spin_lock_init(&idev->n_cqs_lock);
2019
spin_lock_init(&idev->n_qps_lock);
2020
spin_lock_init(&idev->n_srqs_lock);
2021
spin_lock_init(&idev->n_mcast_grps_lock);
2022
2023
spin_lock_init(&idev->qp_table.lock);
2024
spin_lock_init(&idev->lk_table.lock);
2025
idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
2026
/* Set the prefix to the default value (see ch. 4.1.1) */
2027
idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL);
2028
2029
ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
2030
if (ret)
2031
goto err_qp;
2032
2033
/*
2034
* The top ib_ipath_lkey_table_size bits are used to index the
2035
* table. The lower 8 bits can be owned by the user (copied from
2036
* the LKEY). The remaining bits act as a generation number or tag.
2037
*/
2038
idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
2039
idev->lk_table.table = kzalloc(idev->lk_table.max *
2040
sizeof(*idev->lk_table.table),
2041
GFP_KERNEL);
2042
if (idev->lk_table.table == NULL) {
2043
ret = -ENOMEM;
2044
goto err_lk;
2045
}
2046
INIT_LIST_HEAD(&idev->pending_mmaps);
2047
spin_lock_init(&idev->pending_lock);
2048
idev->mmap_offset = PAGE_SIZE;
2049
spin_lock_init(&idev->mmap_offset_lock);
2050
INIT_LIST_HEAD(&idev->pending[0]);
2051
INIT_LIST_HEAD(&idev->pending[1]);
2052
INIT_LIST_HEAD(&idev->pending[2]);
2053
INIT_LIST_HEAD(&idev->piowait);
2054
INIT_LIST_HEAD(&idev->rnrwait);
2055
INIT_LIST_HEAD(&idev->txreq_free);
2056
idev->pending_index = 0;
2057
idev->port_cap_flags =
2058
IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
2059
if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
2060
idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
2061
idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
2062
idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
2063
idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
2064
idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
2065
idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
2066
2067
/* Snapshot current HW counters to "clear" them. */
2068
ipath_get_counters(dd, &cntrs);
2069
idev->z_symbol_error_counter = cntrs.symbol_error_counter;
2070
idev->z_link_error_recovery_counter =
2071
cntrs.link_error_recovery_counter;
2072
idev->z_link_downed_counter = cntrs.link_downed_counter;
2073
idev->z_port_rcv_errors = cntrs.port_rcv_errors;
2074
idev->z_port_rcv_remphys_errors =
2075
cntrs.port_rcv_remphys_errors;
2076
idev->z_port_xmit_discards = cntrs.port_xmit_discards;
2077
idev->z_port_xmit_data = cntrs.port_xmit_data;
2078
idev->z_port_rcv_data = cntrs.port_rcv_data;
2079
idev->z_port_xmit_packets = cntrs.port_xmit_packets;
2080
idev->z_port_rcv_packets = cntrs.port_rcv_packets;
2081
idev->z_local_link_integrity_errors =
2082
cntrs.local_link_integrity_errors;
2083
idev->z_excessive_buffer_overrun_errors =
2084
cntrs.excessive_buffer_overrun_errors;
2085
idev->z_vl15_dropped = cntrs.vl15_dropped;
2086
2087
for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
2088
list_add(&tx->txreq.list, &idev->txreq_free);
2089
2090
/*
2091
* The system image GUID is supposed to be the same for all
2092
* IB HCAs in a single system but since there can be other
2093
* device types in the system, we can't be sure this is unique.
2094
*/
2095
if (!sys_image_guid)
2096
sys_image_guid = dd->ipath_guid;
2097
idev->sys_image_guid = sys_image_guid;
2098
idev->ib_unit = dd->ipath_unit;
2099
idev->dd = dd;
2100
2101
strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
2102
dev->owner = THIS_MODULE;
2103
dev->node_guid = dd->ipath_guid;
2104
dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
2105
dev->uverbs_cmd_mask =
2106
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
2107
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
2108
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
2109
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2110
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2111
(1ull << IB_USER_VERBS_CMD_CREATE_AH) |
2112
(1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
2113
(1ull << IB_USER_VERBS_CMD_QUERY_AH) |
2114
(1ull << IB_USER_VERBS_CMD_REG_MR) |
2115
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2116
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2117
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
2118
(1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
2119
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
2120
(1ull << IB_USER_VERBS_CMD_POLL_CQ) |
2121
(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
2122
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
2123
(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
2124
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
2125
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
2126
(1ull << IB_USER_VERBS_CMD_POST_SEND) |
2127
(1ull << IB_USER_VERBS_CMD_POST_RECV) |
2128
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
2129
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
2130
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
2131
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
2132
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
2133
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
2134
(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
2135
dev->node_type = RDMA_NODE_IB_CA;
2136
dev->phys_port_cnt = 1;
2137
dev->num_comp_vectors = 1;
2138
dev->dma_device = &dd->pcidev->dev;
2139
dev->query_device = ipath_query_device;
2140
dev->modify_device = ipath_modify_device;
2141
dev->query_port = ipath_query_port;
2142
dev->modify_port = ipath_modify_port;
2143
dev->query_pkey = ipath_query_pkey;
2144
dev->query_gid = ipath_query_gid;
2145
dev->alloc_ucontext = ipath_alloc_ucontext;
2146
dev->dealloc_ucontext = ipath_dealloc_ucontext;
2147
dev->alloc_pd = ipath_alloc_pd;
2148
dev->dealloc_pd = ipath_dealloc_pd;
2149
dev->create_ah = ipath_create_ah;
2150
dev->destroy_ah = ipath_destroy_ah;
2151
dev->query_ah = ipath_query_ah;
2152
dev->create_srq = ipath_create_srq;
2153
dev->modify_srq = ipath_modify_srq;
2154
dev->query_srq = ipath_query_srq;
2155
dev->destroy_srq = ipath_destroy_srq;
2156
dev->create_qp = ipath_create_qp;
2157
dev->modify_qp = ipath_modify_qp;
2158
dev->query_qp = ipath_query_qp;
2159
dev->destroy_qp = ipath_destroy_qp;
2160
dev->post_send = ipath_post_send;
2161
dev->post_recv = ipath_post_receive;
2162
dev->post_srq_recv = ipath_post_srq_receive;
2163
dev->create_cq = ipath_create_cq;
2164
dev->destroy_cq = ipath_destroy_cq;
2165
dev->resize_cq = ipath_resize_cq;
2166
dev->poll_cq = ipath_poll_cq;
2167
dev->req_notify_cq = ipath_req_notify_cq;
2168
dev->get_dma_mr = ipath_get_dma_mr;
2169
dev->reg_phys_mr = ipath_reg_phys_mr;
2170
dev->reg_user_mr = ipath_reg_user_mr;
2171
dev->dereg_mr = ipath_dereg_mr;
2172
dev->alloc_fmr = ipath_alloc_fmr;
2173
dev->map_phys_fmr = ipath_map_phys_fmr;
2174
dev->unmap_fmr = ipath_unmap_fmr;
2175
dev->dealloc_fmr = ipath_dealloc_fmr;
2176
dev->attach_mcast = ipath_multicast_attach;
2177
dev->detach_mcast = ipath_multicast_detach;
2178
dev->process_mad = ipath_process_mad;
2179
dev->mmap = ipath_mmap;
2180
dev->dma_ops = &ipath_dma_mapping_ops;
2181
2182
snprintf(dev->node_desc, sizeof(dev->node_desc),
2183
IPATH_IDSTR " %s", init_utsname()->nodename);
2184
2185
ret = ib_register_device(dev, NULL);
2186
if (ret)
2187
goto err_reg;
2188
2189
if (ipath_verbs_register_sysfs(dev))
2190
goto err_class;
2191
2192
enable_timer(dd);
2193
2194
goto bail;
2195
2196
err_class:
2197
ib_unregister_device(dev);
2198
err_reg:
2199
kfree(idev->lk_table.table);
2200
err_lk:
2201
kfree(idev->qp_table.table);
2202
err_qp:
2203
kfree(idev->txreq_bufs);
2204
err_tx:
2205
ib_dealloc_device(dev);
2206
ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
2207
idev = NULL;
2208
2209
bail:
2210
dd->verbs_dev = idev;
2211
return ret;
2212
}
2213
2214
void ipath_unregister_ib_device(struct ipath_ibdev *dev)
2215
{
2216
struct ib_device *ibdev = &dev->ibdev;
2217
u32 qps_inuse;
2218
2219
ib_unregister_device(ibdev);
2220
2221
disable_timer(dev->dd);
2222
2223
if (!list_empty(&dev->pending[0]) ||
2224
!list_empty(&dev->pending[1]) ||
2225
!list_empty(&dev->pending[2]))
2226
ipath_dev_err(dev->dd, "pending list not empty!\n");
2227
if (!list_empty(&dev->piowait))
2228
ipath_dev_err(dev->dd, "piowait list not empty!\n");
2229
if (!list_empty(&dev->rnrwait))
2230
ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
2231
if (!ipath_mcast_tree_empty())
2232
ipath_dev_err(dev->dd, "multicast table memory leak!\n");
2233
/*
2234
* Note that ipath_unregister_ib_device() can be called before all
2235
* the QPs are destroyed!
2236
*/
2237
qps_inuse = ipath_free_all_qps(&dev->qp_table);
2238
if (qps_inuse)
2239
ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
2240
qps_inuse);
2241
kfree(dev->qp_table.table);
2242
kfree(dev->lk_table.table);
2243
kfree(dev->txreq_bufs);
2244
ib_dealloc_device(ibdev);
2245
}
2246
2247
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2248
char *buf)
2249
{
2250
struct ipath_ibdev *dev =
2251
container_of(device, struct ipath_ibdev, ibdev.dev);
2252
2253
return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
2254
}
2255
2256
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2257
char *buf)
2258
{
2259
struct ipath_ibdev *dev =
2260
container_of(device, struct ipath_ibdev, ibdev.dev);
2261
int ret;
2262
2263
ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
2264
if (ret < 0)
2265
goto bail;
2266
strcat(buf, "\n");
2267
ret = strlen(buf);
2268
2269
bail:
2270
return ret;
2271
}
2272
2273
static ssize_t show_stats(struct device *device, struct device_attribute *attr,
2274
char *buf)
2275
{
2276
struct ipath_ibdev *dev =
2277
container_of(device, struct ipath_ibdev, ibdev.dev);
2278
int i;
2279
int len;
2280
2281
len = sprintf(buf,
2282
"RC resends %d\n"
2283
"RC no QACK %d\n"
2284
"RC ACKs %d\n"
2285
"RC SEQ NAKs %d\n"
2286
"RC RDMA seq %d\n"
2287
"RC RNR NAKs %d\n"
2288
"RC OTH NAKs %d\n"
2289
"RC timeouts %d\n"
2290
"RC RDMA dup %d\n"
2291
"piobuf wait %d\n"
2292
"unaligned %d\n"
2293
"PKT drops %d\n"
2294
"WQE errs %d\n",
2295
dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
2296
dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
2297
dev->n_other_naks, dev->n_timeouts,
2298
dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
2299
dev->n_pkt_drops, dev->n_wqe_errs);
2300
for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
2301
const struct ipath_opcode_stats *si = &dev->opstats[i];
2302
2303
if (!si->n_packets && !si->n_bytes)
2304
continue;
2305
len += sprintf(buf + len, "%02x %llu/%llu\n", i,
2306
(unsigned long long) si->n_packets,
2307
(unsigned long long) si->n_bytes);
2308
}
2309
return len;
2310
}
2311
2312
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
2313
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
2314
static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
2315
static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
2316
2317
static struct device_attribute *ipath_class_attributes[] = {
2318
&dev_attr_hw_rev,
2319
&dev_attr_hca_type,
2320
&dev_attr_board_id,
2321
&dev_attr_stats
2322
};
2323
2324
static int ipath_verbs_register_sysfs(struct ib_device *dev)
2325
{
2326
int i;
2327
int ret;
2328
2329
for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
2330
if (device_create_file(&dev->dev,
2331
ipath_class_attributes[i])) {
2332
ret = 1;
2333
goto bail;
2334
}
2335
2336
ret = 0;
2337
2338
bail:
2339
return ret;
2340
}
2341
2342