Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/rdma/krping/krping.c
48255 views
1
/*
2
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3
* Copyright (c) 2006-2009 Open Grid Computing, Inc. All rights reserved.
4
*
5
* This software is available to you under a choice of one of two
6
* licenses. You may choose to be licensed under the terms of the GNU
7
* General Public License (GPL) Version 2, available from the file
8
* COPYING in the main directory of this source tree, or the
9
* OpenIB.org BSD license below:
10
*
11
* Redistribution and use in source and binary forms, with or
12
* without modification, are permitted provided that the following
13
* conditions are met:
14
*
15
* - Redistributions of source code must retain the above
16
* copyright notice, this list of conditions and the following
17
* disclaimer.
18
*
19
* - Redistributions in binary form must reproduce the above
20
* copyright notice, this list of conditions and the following
21
* disclaimer in the documentation and/or other materials
22
* provided with the distribution.
23
*
24
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31
* SOFTWARE.
32
*/
33
34
#include <sys/cdefs.h>
35
__FBSDID("$FreeBSD$");
36
37
#include <linux/module.h>
38
#include <linux/moduleparam.h>
39
#include <linux/slab.h>
40
#include <linux/err.h>
41
#include <linux/string.h>
42
#include <linux/list.h>
43
#include <linux/in.h>
44
#include <linux/device.h>
45
#include <linux/pci.h>
46
#include <linux/sched.h>
47
#include <linux/wait.h>
48
49
#include <asm/atomic.h>
50
51
#include <rdma/ib_verbs.h>
52
#include <rdma/rdma_cm.h>
53
54
#include "krping.h"
55
#include "getopt.h"
56
57
#define PFX "krping: "
58
59
extern int krping_debug;
60
#define DEBUG_LOG(...) do { if (krping_debug) log(LOG_INFO, __VA_ARGS__); } while (0)
61
#define BIND_INFO 1
62
63
MODULE_AUTHOR("Steve Wise");
64
MODULE_DESCRIPTION("RDMA ping server");
65
MODULE_LICENSE("Dual BSD/GPL");
66
MODULE_VERSION(krping, 1);
67
MODULE_DEPEND(krping, linuxkpi, 1, 1, 1);
68
69
static __inline uint64_t
70
get_cycles(void)
71
{
72
return (get_cyclecount());
73
}
74
75
typedef uint64_t cycles_t;
76
77
enum mem_type {
78
DMA = 1,
79
REG = 2,
80
};
81
82
static const struct krping_option krping_opts[] = {
83
{"count", OPT_INT, 'C'},
84
{"size", OPT_INT, 'S'},
85
{"addr", OPT_STRING, 'a'},
86
{"addr6", OPT_STRING, 'A'},
87
{"port", OPT_INT, 'p'},
88
{"verbose", OPT_NOPARAM, 'v'},
89
{"validate", OPT_NOPARAM, 'V'},
90
{"server", OPT_NOPARAM, 's'},
91
{"client", OPT_NOPARAM, 'c'},
92
{"server_inv", OPT_NOPARAM, 'I'},
93
{"wlat", OPT_NOPARAM, 'l'},
94
{"rlat", OPT_NOPARAM, 'L'},
95
{"bw", OPT_NOPARAM, 'B'},
96
{"duplex", OPT_NOPARAM, 'd'},
97
{"tos", OPT_INT, 't'},
98
{"txdepth", OPT_INT, 'T'},
99
{"poll", OPT_NOPARAM, 'P'},
100
{"local_dma_lkey", OPT_NOPARAM, 'Z'},
101
{"read_inv", OPT_NOPARAM, 'R'},
102
{"fr", OPT_NOPARAM, 'f'},
103
{NULL, 0, 0}
104
};
105
106
#define htonll(x) cpu_to_be64((x))
107
#define ntohll(x) cpu_to_be64((x))
108
109
static DEFINE_MUTEX(krping_mutex);
110
111
/*
112
* List of running krping threads.
113
*/
114
static LIST_HEAD(krping_cbs);
115
116
/*
117
* Invoke like this, one on each side, using the server's address on
118
* the RDMA device (iw%d):
119
*
120
* /bin/echo server,port=9999,addr=192.168.69.142,validate > /proc/krping
121
* /bin/echo client,port=9999,addr=192.168.69.142,validate > /proc/krping
122
* /bin/echo client,port=9999,addr6=2001:db8:0:f101::1,validate > /proc/krping
123
*
124
* krping "ping/pong" loop:
125
* client sends source rkey/addr/len
126
* server receives source rkey/add/len
127
* server rdma reads "ping" data from source
128
* server sends "go ahead" on rdma read completion
129
* client sends sink rkey/addr/len
130
* server receives sink rkey/addr/len
131
* server rdma writes "pong" data to sink
132
* server sends "go ahead" on rdma write completion
133
* <repeat loop>
134
*/
135
136
/*
137
* These states are used to signal events between the completion handler
138
* and the main client or server thread.
139
*
140
* Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV,
141
* and RDMA_WRITE_COMPLETE for each ping.
142
*/
143
enum test_state {
144
IDLE = 1,
145
CONNECT_REQUEST,
146
ADDR_RESOLVED,
147
ROUTE_RESOLVED,
148
CONNECTED,
149
RDMA_READ_ADV,
150
RDMA_READ_COMPLETE,
151
RDMA_WRITE_ADV,
152
RDMA_WRITE_COMPLETE,
153
ERROR
154
};
155
156
struct krping_rdma_info {
157
uint64_t buf;
158
uint32_t rkey;
159
uint32_t size;
160
};
161
162
/*
163
* Default max buffer size for IO...
164
*/
165
#define RPING_BUFSIZE 128*1024
166
#define RPING_SQ_DEPTH 64
167
168
/*
169
* Control block struct.
170
*/
171
struct krping_cb {
172
int server; /* 0 iff client */
173
struct ib_cq *cq;
174
struct ib_pd *pd;
175
struct ib_qp *qp;
176
177
struct ib_mr *dma_mr;
178
179
struct ib_fast_reg_page_list *page_list;
180
int page_list_len;
181
struct ib_reg_wr reg_mr_wr;
182
struct ib_send_wr invalidate_wr;
183
struct ib_mr *reg_mr;
184
int server_invalidate;
185
int read_inv;
186
u8 key;
187
188
struct ib_recv_wr rq_wr; /* recv work request record */
189
struct ib_sge recv_sgl; /* recv single SGE */
190
struct krping_rdma_info recv_buf __aligned(16); /* malloc'd buffer */
191
u64 recv_dma_addr;
192
DECLARE_PCI_UNMAP_ADDR(recv_mapping)
193
194
struct ib_send_wr sq_wr; /* send work requrest record */
195
struct ib_sge send_sgl;
196
struct krping_rdma_info send_buf __aligned(16); /* single send buf */
197
u64 send_dma_addr;
198
DECLARE_PCI_UNMAP_ADDR(send_mapping)
199
200
struct ib_rdma_wr rdma_sq_wr; /* rdma work request record */
201
struct ib_sge rdma_sgl; /* rdma single SGE */
202
char *rdma_buf; /* used as rdma sink */
203
u64 rdma_dma_addr;
204
DECLARE_PCI_UNMAP_ADDR(rdma_mapping)
205
struct ib_mr *rdma_mr;
206
207
uint32_t remote_rkey; /* remote guys RKEY */
208
uint64_t remote_addr; /* remote guys TO */
209
uint32_t remote_len; /* remote guys LEN */
210
211
char *start_buf; /* rdma read src */
212
u64 start_dma_addr;
213
DECLARE_PCI_UNMAP_ADDR(start_mapping)
214
struct ib_mr *start_mr;
215
216
enum test_state state; /* used for cond/signalling */
217
wait_queue_head_t sem;
218
struct krping_stats stats;
219
220
uint16_t port; /* dst port in NBO */
221
u8 addr[16] __aligned(8); /* dst addr in NBO */
222
char *addr_str; /* dst addr string */
223
uint8_t addr_type; /* ADDR_FAMILY - IPv4/V6 */
224
int verbose; /* verbose logging */
225
int count; /* ping count */
226
int size; /* ping data size */
227
int validate; /* validate ping data */
228
int wlat; /* run wlat test */
229
int rlat; /* run rlat test */
230
int bw; /* run bw test */
231
int duplex; /* run bw full duplex test */
232
int poll; /* poll or block for rlat test */
233
int txdepth; /* SQ depth */
234
int local_dma_lkey; /* use 0 for lkey */
235
int frtest; /* reg test */
236
int tos; /* type of service */
237
238
/* CM stuff */
239
struct rdma_cm_id *cm_id; /* connection on client side,*/
240
/* listener on server side. */
241
struct rdma_cm_id *child_cm_id; /* connection on server side */
242
struct list_head list;
243
};
244
245
static int krping_cma_event_handler(struct rdma_cm_id *cma_id,
246
struct rdma_cm_event *event)
247
{
248
int ret;
249
struct krping_cb *cb = cma_id->context;
250
251
DEBUG_LOG("cma_event type %d cma_id %p (%s)\n", event->event, cma_id,
252
(cma_id == cb->cm_id) ? "parent" : "child");
253
254
switch (event->event) {
255
case RDMA_CM_EVENT_ADDR_RESOLVED:
256
cb->state = ADDR_RESOLVED;
257
ret = rdma_resolve_route(cma_id, 2000);
258
if (ret) {
259
printk(KERN_ERR PFX "rdma_resolve_route error %d\n",
260
ret);
261
wake_up_interruptible(&cb->sem);
262
}
263
break;
264
265
case RDMA_CM_EVENT_ROUTE_RESOLVED:
266
cb->state = ROUTE_RESOLVED;
267
wake_up_interruptible(&cb->sem);
268
break;
269
270
case RDMA_CM_EVENT_CONNECT_REQUEST:
271
cb->state = CONNECT_REQUEST;
272
cb->child_cm_id = cma_id;
273
DEBUG_LOG("child cma %p\n", cb->child_cm_id);
274
wake_up_interruptible(&cb->sem);
275
break;
276
277
case RDMA_CM_EVENT_ESTABLISHED:
278
DEBUG_LOG("ESTABLISHED\n");
279
if (!cb->server) {
280
cb->state = CONNECTED;
281
}
282
wake_up_interruptible(&cb->sem);
283
break;
284
285
case RDMA_CM_EVENT_ADDR_ERROR:
286
case RDMA_CM_EVENT_ROUTE_ERROR:
287
case RDMA_CM_EVENT_CONNECT_ERROR:
288
case RDMA_CM_EVENT_UNREACHABLE:
289
case RDMA_CM_EVENT_REJECTED:
290
printk(KERN_ERR PFX "cma event %d, error %d\n", event->event,
291
event->status);
292
cb->state = ERROR;
293
wake_up_interruptible(&cb->sem);
294
break;
295
296
case RDMA_CM_EVENT_DISCONNECTED:
297
printk(KERN_ERR PFX "DISCONNECT EVENT...\n");
298
cb->state = ERROR;
299
wake_up_interruptible(&cb->sem);
300
break;
301
302
case RDMA_CM_EVENT_DEVICE_REMOVAL:
303
printk(KERN_ERR PFX "cma detected device removal!!!!\n");
304
cb->state = ERROR;
305
wake_up_interruptible(&cb->sem);
306
break;
307
308
default:
309
printk(KERN_ERR PFX "oof bad type!\n");
310
wake_up_interruptible(&cb->sem);
311
break;
312
}
313
return 0;
314
}
315
316
static int server_recv(struct krping_cb *cb, struct ib_wc *wc)
317
{
318
if (wc->byte_len != sizeof(cb->recv_buf)) {
319
printk(KERN_ERR PFX "Received bogus data, size %d\n",
320
wc->byte_len);
321
return -1;
322
}
323
324
cb->remote_rkey = ntohl(cb->recv_buf.rkey);
325
cb->remote_addr = ntohll(cb->recv_buf.buf);
326
cb->remote_len = ntohl(cb->recv_buf.size);
327
DEBUG_LOG("Received rkey %x addr %llx len %d from peer\n",
328
cb->remote_rkey, (unsigned long long)cb->remote_addr,
329
cb->remote_len);
330
331
if (cb->state <= CONNECTED || cb->state == RDMA_WRITE_COMPLETE)
332
cb->state = RDMA_READ_ADV;
333
else
334
cb->state = RDMA_WRITE_ADV;
335
336
return 0;
337
}
338
339
static int client_recv(struct krping_cb *cb, struct ib_wc *wc)
340
{
341
if (wc->byte_len != sizeof(cb->recv_buf)) {
342
printk(KERN_ERR PFX "Received bogus data, size %d\n",
343
wc->byte_len);
344
return -1;
345
}
346
347
if (cb->state == RDMA_READ_ADV)
348
cb->state = RDMA_WRITE_ADV;
349
else
350
cb->state = RDMA_WRITE_COMPLETE;
351
352
return 0;
353
}
354
355
static void krping_cq_event_handler(struct ib_cq *cq, void *ctx)
356
{
357
struct krping_cb *cb = ctx;
358
struct ib_wc wc;
359
const struct ib_recv_wr *bad_wr;
360
int ret;
361
362
BUG_ON(cb->cq != cq);
363
if (cb->frtest) {
364
printk(KERN_ERR PFX "cq completion event in frtest!\n");
365
return;
366
}
367
if (!cb->wlat && !cb->rlat && !cb->bw)
368
ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
369
while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
370
if (wc.status) {
371
if (wc.status == IB_WC_WR_FLUSH_ERR) {
372
DEBUG_LOG("cq flushed\n");
373
continue;
374
} else {
375
printk(KERN_ERR PFX "cq completion failed with "
376
"wr_id %jx status %d opcode %d vender_err %x\n",
377
(uintmax_t)wc.wr_id, wc.status, wc.opcode, wc.vendor_err);
378
goto error;
379
}
380
}
381
if (cb->state == ERROR) {
382
printk(KERN_ERR PFX "cq completion in ERROR state\n");
383
return;
384
}
385
switch (wc.opcode) {
386
case IB_WC_SEND:
387
DEBUG_LOG("send completion\n");
388
cb->stats.send_bytes += cb->send_sgl.length;
389
cb->stats.send_msgs++;
390
break;
391
392
case IB_WC_RDMA_WRITE:
393
DEBUG_LOG("rdma write completion\n");
394
cb->stats.write_bytes += cb->rdma_sq_wr.wr.sg_list->length;
395
cb->stats.write_msgs++;
396
cb->state = RDMA_WRITE_COMPLETE;
397
wake_up_interruptible(&cb->sem);
398
break;
399
400
case IB_WC_RDMA_READ:
401
DEBUG_LOG("rdma read completion\n");
402
cb->stats.read_bytes += cb->rdma_sq_wr.wr.sg_list->length;
403
cb->stats.read_msgs++;
404
cb->state = RDMA_READ_COMPLETE;
405
wake_up_interruptible(&cb->sem);
406
break;
407
408
case IB_WC_RECV:
409
DEBUG_LOG("recv completion\n");
410
cb->stats.recv_bytes += sizeof(cb->recv_buf);
411
cb->stats.recv_msgs++;
412
if (cb->wlat || cb->rlat || cb->bw)
413
ret = server_recv(cb, &wc);
414
else
415
ret = cb->server ? server_recv(cb, &wc) :
416
client_recv(cb, &wc);
417
if (ret) {
418
printk(KERN_ERR PFX "recv wc error: %d\n", ret);
419
goto error;
420
}
421
422
ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
423
if (ret) {
424
printk(KERN_ERR PFX "post recv error: %d\n",
425
ret);
426
goto error;
427
}
428
wake_up_interruptible(&cb->sem);
429
break;
430
431
default:
432
printk(KERN_ERR PFX
433
"%s:%d Unexpected opcode %d, Shutting down\n",
434
__func__, __LINE__, wc.opcode);
435
goto error;
436
}
437
}
438
if (ret) {
439
printk(KERN_ERR PFX "poll error %d\n", ret);
440
goto error;
441
}
442
return;
443
error:
444
cb->state = ERROR;
445
wake_up_interruptible(&cb->sem);
446
}
447
448
static int krping_accept(struct krping_cb *cb)
449
{
450
struct rdma_conn_param conn_param;
451
int ret;
452
453
DEBUG_LOG("accepting client connection request\n");
454
455
memset(&conn_param, 0, sizeof conn_param);
456
conn_param.responder_resources = 1;
457
conn_param.initiator_depth = 1;
458
459
ret = rdma_accept(cb->child_cm_id, &conn_param);
460
if (ret) {
461
printk(KERN_ERR PFX "rdma_accept error: %d\n", ret);
462
return ret;
463
}
464
465
if (!cb->wlat && !cb->rlat && !cb->bw) {
466
wait_event_interruptible(cb->sem, cb->state >= CONNECTED);
467
if (cb->state == ERROR) {
468
printk(KERN_ERR PFX "wait for CONNECTED state %d\n",
469
cb->state);
470
return -1;
471
}
472
}
473
return 0;
474
}
475
476
static void krping_setup_wr(struct krping_cb *cb)
477
{
478
cb->recv_sgl.addr = cb->recv_dma_addr;
479
cb->recv_sgl.length = sizeof cb->recv_buf;
480
cb->recv_sgl.lkey = cb->pd->local_dma_lkey;
481
cb->rq_wr.sg_list = &cb->recv_sgl;
482
cb->rq_wr.num_sge = 1;
483
484
cb->send_sgl.addr = cb->send_dma_addr;
485
cb->send_sgl.length = sizeof cb->send_buf;
486
cb->send_sgl.lkey = cb->pd->local_dma_lkey;
487
488
cb->sq_wr.opcode = IB_WR_SEND;
489
cb->sq_wr.send_flags = IB_SEND_SIGNALED;
490
cb->sq_wr.sg_list = &cb->send_sgl;
491
cb->sq_wr.num_sge = 1;
492
493
if (cb->server || cb->wlat || cb->rlat || cb->bw) {
494
cb->rdma_sgl.addr = cb->rdma_dma_addr;
495
cb->rdma_sq_wr.wr.send_flags = IB_SEND_SIGNALED;
496
cb->rdma_sq_wr.wr.sg_list = &cb->rdma_sgl;
497
cb->rdma_sq_wr.wr.num_sge = 1;
498
}
499
500
/*
501
* A chain of 2 WRs, INVALDATE_MR + REG_MR.
502
* both unsignaled. The client uses them to reregister
503
* the rdma buffers with a new key each iteration.
504
*/
505
cb->reg_mr_wr.wr.opcode = IB_WR_REG_MR;
506
cb->reg_mr_wr.mr = cb->reg_mr;
507
508
cb->invalidate_wr.next = &cb->reg_mr_wr.wr;
509
cb->invalidate_wr.opcode = IB_WR_LOCAL_INV;
510
}
511
512
static int krping_setup_buffers(struct krping_cb *cb)
513
{
514
int ret;
515
516
DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb);
517
518
cb->recv_dma_addr = ib_dma_map_single(cb->pd->device,
519
&cb->recv_buf,
520
sizeof(cb->recv_buf), DMA_BIDIRECTIONAL);
521
pci_unmap_addr_set(cb, recv_mapping, cb->recv_dma_addr);
522
cb->send_dma_addr = ib_dma_map_single(cb->pd->device,
523
&cb->send_buf, sizeof(cb->send_buf),
524
DMA_BIDIRECTIONAL);
525
pci_unmap_addr_set(cb, send_mapping, cb->send_dma_addr);
526
527
cb->rdma_buf = ib_dma_alloc_coherent(cb->pd->device, cb->size,
528
&cb->rdma_dma_addr,
529
GFP_KERNEL);
530
if (!cb->rdma_buf) {
531
DEBUG_LOG(PFX "rdma_buf allocation failed\n");
532
ret = -ENOMEM;
533
goto bail;
534
}
535
pci_unmap_addr_set(cb, rdma_mapping, cb->rdma_dma_addr);
536
cb->page_list_len = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE)
537
>> PAGE_SHIFT;
538
cb->reg_mr = ib_alloc_mr(cb->pd, IB_MR_TYPE_MEM_REG,
539
cb->page_list_len);
540
if (IS_ERR(cb->reg_mr)) {
541
ret = PTR_ERR(cb->reg_mr);
542
DEBUG_LOG(PFX "recv_buf reg_mr failed %d\n", ret);
543
goto bail;
544
}
545
DEBUG_LOG(PFX "reg rkey 0x%x page_list_len %u\n",
546
cb->reg_mr->rkey, cb->page_list_len);
547
548
if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
549
550
cb->start_buf = ib_dma_alloc_coherent(cb->pd->device, cb->size,
551
&cb->start_dma_addr,
552
GFP_KERNEL);
553
if (!cb->start_buf) {
554
DEBUG_LOG(PFX "start_buf malloc failed\n");
555
ret = -ENOMEM;
556
goto bail;
557
}
558
pci_unmap_addr_set(cb, start_mapping, cb->start_dma_addr);
559
}
560
561
krping_setup_wr(cb);
562
DEBUG_LOG(PFX "allocated & registered buffers...\n");
563
return 0;
564
bail:
565
if (cb->reg_mr && !IS_ERR(cb->reg_mr))
566
ib_dereg_mr(cb->reg_mr);
567
if (cb->rdma_mr && !IS_ERR(cb->rdma_mr))
568
ib_dereg_mr(cb->rdma_mr);
569
if (cb->dma_mr && !IS_ERR(cb->dma_mr))
570
ib_dereg_mr(cb->dma_mr);
571
if (cb->rdma_buf) {
572
ib_dma_free_coherent(cb->pd->device, cb->size, cb->rdma_buf,
573
cb->rdma_dma_addr);
574
}
575
if (cb->start_buf) {
576
ib_dma_free_coherent(cb->pd->device, cb->size, cb->start_buf,
577
cb->start_dma_addr);
578
}
579
return ret;
580
}
581
582
static void krping_free_buffers(struct krping_cb *cb)
583
{
584
DEBUG_LOG("krping_free_buffers called on cb %p\n", cb);
585
586
if (cb->dma_mr)
587
ib_dereg_mr(cb->dma_mr);
588
if (cb->rdma_mr)
589
ib_dereg_mr(cb->rdma_mr);
590
if (cb->start_mr)
591
ib_dereg_mr(cb->start_mr);
592
if (cb->reg_mr)
593
ib_dereg_mr(cb->reg_mr);
594
595
dma_unmap_single(cb->pd->device->dma_device,
596
pci_unmap_addr(cb, recv_mapping),
597
sizeof(cb->recv_buf), DMA_BIDIRECTIONAL);
598
dma_unmap_single(cb->pd->device->dma_device,
599
pci_unmap_addr(cb, send_mapping),
600
sizeof(cb->send_buf), DMA_BIDIRECTIONAL);
601
602
ib_dma_free_coherent(cb->pd->device, cb->size, cb->rdma_buf,
603
cb->rdma_dma_addr);
604
605
if (cb->start_buf) {
606
ib_dma_free_coherent(cb->pd->device, cb->size, cb->start_buf,
607
cb->start_dma_addr);
608
}
609
}
610
611
static int krping_create_qp(struct krping_cb *cb)
612
{
613
struct ib_qp_init_attr init_attr;
614
int ret;
615
616
memset(&init_attr, 0, sizeof(init_attr));
617
init_attr.cap.max_send_wr = cb->txdepth;
618
init_attr.cap.max_recv_wr = 2;
619
620
/* For flush_qp() */
621
init_attr.cap.max_send_wr++;
622
init_attr.cap.max_recv_wr++;
623
624
init_attr.cap.max_recv_sge = 1;
625
init_attr.cap.max_send_sge = 1;
626
init_attr.qp_type = IB_QPT_RC;
627
init_attr.send_cq = cb->cq;
628
init_attr.recv_cq = cb->cq;
629
init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
630
631
if (cb->server) {
632
ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr);
633
if (!ret)
634
cb->qp = cb->child_cm_id->qp;
635
} else {
636
ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr);
637
if (!ret)
638
cb->qp = cb->cm_id->qp;
639
}
640
641
return ret;
642
}
643
644
static void krping_free_qp(struct krping_cb *cb)
645
{
646
ib_destroy_qp(cb->qp);
647
ib_destroy_cq(cb->cq);
648
ib_dealloc_pd(cb->pd);
649
}
650
651
static int krping_setup_qp(struct krping_cb *cb, struct rdma_cm_id *cm_id)
652
{
653
int ret;
654
struct ib_cq_init_attr attr = {0};
655
656
cb->pd = ib_alloc_pd(cm_id->device, 0);
657
if (IS_ERR(cb->pd)) {
658
printk(KERN_ERR PFX "ib_alloc_pd failed\n");
659
return PTR_ERR(cb->pd);
660
}
661
DEBUG_LOG("created pd %p\n", cb->pd);
662
663
strlcpy(cb->stats.name, cb->pd->device->name, sizeof(cb->stats.name));
664
665
attr.cqe = cb->txdepth * 2;
666
attr.comp_vector = 0;
667
cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL,
668
cb, &attr);
669
if (IS_ERR(cb->cq)) {
670
printk(KERN_ERR PFX "ib_create_cq failed\n");
671
ret = PTR_ERR(cb->cq);
672
goto err1;
673
}
674
DEBUG_LOG("created cq %p\n", cb->cq);
675
676
if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) {
677
ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
678
if (ret) {
679
printk(KERN_ERR PFX "ib_create_cq failed\n");
680
goto err2;
681
}
682
}
683
684
ret = krping_create_qp(cb);
685
if (ret) {
686
printk(KERN_ERR PFX "krping_create_qp failed: %d\n", ret);
687
goto err2;
688
}
689
DEBUG_LOG("created qp %p\n", cb->qp);
690
return 0;
691
err2:
692
ib_destroy_cq(cb->cq);
693
err1:
694
ib_dealloc_pd(cb->pd);
695
return ret;
696
}
697
698
/*
699
* return the (possibly rebound) rkey for the rdma buffer.
700
* REG mode: invalidate and rebind via reg wr.
701
* other modes: just return the mr rkey.
702
*/
703
static u32 krping_rdma_rkey(struct krping_cb *cb, u64 buf, int post_inv)
704
{
705
u32 rkey;
706
const struct ib_send_wr *bad_wr;
707
int ret;
708
struct scatterlist sg = {0};
709
710
cb->invalidate_wr.ex.invalidate_rkey = cb->reg_mr->rkey;
711
712
/*
713
* Update the reg key.
714
*/
715
ib_update_fast_reg_key(cb->reg_mr, ++cb->key);
716
cb->reg_mr_wr.key = cb->reg_mr->rkey;
717
718
/*
719
* Update the reg WR with new buf info.
720
*/
721
if (buf == (u64)cb->start_dma_addr)
722
cb->reg_mr_wr.access = IB_ACCESS_REMOTE_READ;
723
else
724
cb->reg_mr_wr.access = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
725
sg_dma_address(&sg) = buf;
726
sg_dma_len(&sg) = cb->size;
727
728
ret = ib_map_mr_sg(cb->reg_mr, &sg, 1, NULL, PAGE_SIZE);
729
BUG_ON(ret <= 0 || ret > cb->page_list_len);
730
731
DEBUG_LOG(PFX "post_inv = %d, reg_mr new rkey 0x%x pgsz %u len %u"
732
" iova_start %llx\n",
733
post_inv,
734
cb->reg_mr_wr.key,
735
cb->reg_mr->page_size,
736
(unsigned)cb->reg_mr->length,
737
(unsigned long long)cb->reg_mr->iova);
738
739
if (post_inv)
740
ret = ib_post_send(cb->qp, &cb->invalidate_wr, &bad_wr);
741
else
742
ret = ib_post_send(cb->qp, &cb->reg_mr_wr.wr, &bad_wr);
743
if (ret) {
744
printk(KERN_ERR PFX "post send error %d\n", ret);
745
cb->state = ERROR;
746
}
747
rkey = cb->reg_mr->rkey;
748
return rkey;
749
}
750
751
static void krping_format_send(struct krping_cb *cb, u64 buf)
752
{
753
struct krping_rdma_info *info = &cb->send_buf;
754
u32 rkey;
755
756
/*
757
* Client side will do reg or mw bind before
758
* advertising the rdma buffer. Server side
759
* sends have no data.
760
*/
761
if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
762
rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate);
763
info->buf = htonll(buf);
764
info->rkey = htonl(rkey);
765
info->size = htonl(cb->size);
766
DEBUG_LOG("RDMA addr %llx rkey %x len %d\n",
767
(unsigned long long)buf, rkey, cb->size);
768
}
769
}
770
771
static void krping_test_server(struct krping_cb *cb)
772
{
773
const struct ib_send_wr *bad_wr;
774
struct ib_send_wr inv;
775
int ret;
776
777
while (1) {
778
/* Wait for client's Start STAG/TO/Len */
779
wait_event_interruptible(cb->sem, cb->state >= RDMA_READ_ADV);
780
if (cb->state != RDMA_READ_ADV) {
781
printk(KERN_ERR PFX "wait for RDMA_READ_ADV state %d\n",
782
cb->state);
783
break;
784
}
785
786
DEBUG_LOG("server received sink adv\n");
787
788
cb->rdma_sq_wr.rkey = cb->remote_rkey;
789
cb->rdma_sq_wr.remote_addr = cb->remote_addr;
790
cb->rdma_sq_wr.wr.sg_list->length = cb->remote_len;
791
cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, !cb->read_inv);
792
cb->rdma_sq_wr.wr.next = NULL;
793
794
/* Issue RDMA Read. */
795
if (cb->read_inv)
796
cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
797
else {
798
799
cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_READ;
800
/*
801
* Immediately follow the read with a
802
* fenced LOCAL_INV.
803
*/
804
cb->rdma_sq_wr.wr.next = &inv;
805
memset(&inv, 0, sizeof inv);
806
inv.opcode = IB_WR_LOCAL_INV;
807
inv.ex.invalidate_rkey = cb->reg_mr->rkey;
808
inv.send_flags = IB_SEND_FENCE;
809
}
810
811
ret = ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr);
812
if (ret) {
813
printk(KERN_ERR PFX "post send error %d\n", ret);
814
break;
815
}
816
cb->rdma_sq_wr.wr.next = NULL;
817
818
DEBUG_LOG("server posted rdma read req \n");
819
820
/* Wait for read completion */
821
wait_event_interruptible(cb->sem,
822
cb->state >= RDMA_READ_COMPLETE);
823
if (cb->state != RDMA_READ_COMPLETE) {
824
printk(KERN_ERR PFX
825
"wait for RDMA_READ_COMPLETE state %d\n",
826
cb->state);
827
break;
828
}
829
DEBUG_LOG("server received read complete\n");
830
831
/* Display data in recv buf */
832
if (cb->verbose)
833
printk(KERN_INFO PFX "server ping data: %s\n",
834
cb->rdma_buf);
835
836
/* Tell client to continue */
837
if (cb->server && cb->server_invalidate) {
838
cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey;
839
cb->sq_wr.opcode = IB_WR_SEND_WITH_INV;
840
DEBUG_LOG("send-w-inv rkey 0x%x\n", cb->remote_rkey);
841
}
842
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
843
if (ret) {
844
printk(KERN_ERR PFX "post send error %d\n", ret);
845
break;
846
}
847
DEBUG_LOG("server posted go ahead\n");
848
849
/* Wait for client's RDMA STAG/TO/Len */
850
wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
851
if (cb->state != RDMA_WRITE_ADV) {
852
printk(KERN_ERR PFX
853
"wait for RDMA_WRITE_ADV state %d\n",
854
cb->state);
855
break;
856
}
857
DEBUG_LOG("server received sink adv\n");
858
859
/* RDMA Write echo data */
860
cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE;
861
cb->rdma_sq_wr.rkey = cb->remote_rkey;
862
cb->rdma_sq_wr.remote_addr = cb->remote_addr;
863
cb->rdma_sq_wr.wr.sg_list->length = strlen(cb->rdma_buf) + 1;
864
if (cb->local_dma_lkey)
865
cb->rdma_sgl.lkey = cb->pd->local_dma_lkey;
866
else
867
cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 0);
868
869
DEBUG_LOG("rdma write from lkey %x laddr %llx len %d\n",
870
cb->rdma_sq_wr.wr.sg_list->lkey,
871
(unsigned long long)cb->rdma_sq_wr.wr.sg_list->addr,
872
cb->rdma_sq_wr.wr.sg_list->length);
873
874
ret = ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr);
875
if (ret) {
876
printk(KERN_ERR PFX "post send error %d\n", ret);
877
break;
878
}
879
880
/* Wait for completion */
881
ret = wait_event_interruptible(cb->sem, cb->state >=
882
RDMA_WRITE_COMPLETE);
883
if (cb->state != RDMA_WRITE_COMPLETE) {
884
printk(KERN_ERR PFX
885
"wait for RDMA_WRITE_COMPLETE state %d\n",
886
cb->state);
887
break;
888
}
889
DEBUG_LOG("server rdma write complete \n");
890
891
cb->state = CONNECTED;
892
893
/* Tell client to begin again */
894
if (cb->server && cb->server_invalidate) {
895
cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey;
896
cb->sq_wr.opcode = IB_WR_SEND_WITH_INV;
897
DEBUG_LOG("send-w-inv rkey 0x%x\n", cb->remote_rkey);
898
}
899
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
900
if (ret) {
901
printk(KERN_ERR PFX "post send error %d\n", ret);
902
break;
903
}
904
DEBUG_LOG("server posted go ahead\n");
905
}
906
}
907
908
static void rlat_test(struct krping_cb *cb)
909
{
910
int scnt;
911
int iters = cb->count;
912
struct timeval start_tv, stop_tv;
913
int ret;
914
struct ib_wc wc;
915
const struct ib_send_wr *bad_wr;
916
int ne;
917
918
scnt = 0;
919
cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_READ;
920
cb->rdma_sq_wr.rkey = cb->remote_rkey;
921
cb->rdma_sq_wr.remote_addr = cb->remote_addr;
922
cb->rdma_sq_wr.wr.sg_list->length = cb->size;
923
924
microtime(&start_tv);
925
if (!cb->poll) {
926
cb->state = RDMA_READ_ADV;
927
ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
928
}
929
while (scnt < iters) {
930
931
cb->state = RDMA_READ_ADV;
932
ret = ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr);
933
if (ret) {
934
printk(KERN_ERR PFX
935
"Couldn't post send: ret=%d scnt %d\n",
936
ret, scnt);
937
return;
938
}
939
940
do {
941
if (!cb->poll) {
942
wait_event_interruptible(cb->sem,
943
cb->state != RDMA_READ_ADV);
944
if (cb->state == RDMA_READ_COMPLETE) {
945
ne = 1;
946
ib_req_notify_cq(cb->cq,
947
IB_CQ_NEXT_COMP);
948
} else {
949
ne = -1;
950
}
951
} else
952
ne = ib_poll_cq(cb->cq, 1, &wc);
953
if (cb->state == ERROR) {
954
printk(KERN_ERR PFX
955
"state == ERROR...bailing scnt %d\n",
956
scnt);
957
return;
958
}
959
} while (ne == 0);
960
961
if (ne < 0) {
962
printk(KERN_ERR PFX "poll CQ failed %d\n", ne);
963
return;
964
}
965
if (cb->poll && wc.status != IB_WC_SUCCESS) {
966
printk(KERN_ERR PFX "Completion wth error at %s:\n",
967
cb->server ? "server" : "client");
968
printk(KERN_ERR PFX "Failed status %d: wr_id %d\n",
969
wc.status, (int) wc.wr_id);
970
return;
971
}
972
++scnt;
973
}
974
microtime(&stop_tv);
975
976
if (stop_tv.tv_usec < start_tv.tv_usec) {
977
stop_tv.tv_usec += 1000000;
978
stop_tv.tv_sec -= 1;
979
}
980
981
printk(KERN_ERR PFX "delta sec %lu delta usec %lu iter %d size %d\n",
982
(unsigned long)(stop_tv.tv_sec - start_tv.tv_sec),
983
(unsigned long)(stop_tv.tv_usec - start_tv.tv_usec),
984
scnt, cb->size);
985
}
986
987
static void wlat_test(struct krping_cb *cb)
988
{
989
int ccnt, scnt, rcnt;
990
int iters=cb->count;
991
volatile char *poll_buf = (char *) cb->start_buf;
992
char *buf = (char *)cb->rdma_buf;
993
struct timeval start_tv, stop_tv;
994
cycles_t *post_cycles_start = NULL;
995
cycles_t *post_cycles_stop = NULL;
996
cycles_t *poll_cycles_start = NULL;
997
cycles_t *poll_cycles_stop = NULL;
998
cycles_t *last_poll_cycles_start = NULL;
999
cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0;
1000
int i;
1001
int cycle_iters = 1000;
1002
1003
ccnt = 0;
1004
scnt = 0;
1005
rcnt = 0;
1006
1007
post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
1008
if (!post_cycles_start) {
1009
printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
1010
goto done;
1011
}
1012
post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
1013
if (!post_cycles_stop) {
1014
printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
1015
goto done;
1016
}
1017
poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
1018
if (!poll_cycles_start) {
1019
printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
1020
goto done;
1021
}
1022
poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
1023
if (!poll_cycles_stop) {
1024
printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
1025
goto done;
1026
}
1027
last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t),
1028
GFP_KERNEL);
1029
if (!last_poll_cycles_start) {
1030
printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
1031
goto done;
1032
}
1033
cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE;
1034
cb->rdma_sq_wr.rkey = cb->remote_rkey;
1035
cb->rdma_sq_wr.remote_addr = cb->remote_addr;
1036
cb->rdma_sq_wr.wr.sg_list->length = cb->size;
1037
1038
if (cycle_iters > iters)
1039
cycle_iters = iters;
1040
microtime(&start_tv);
1041
while (scnt < iters || ccnt < iters || rcnt < iters) {
1042
1043
/* Wait till buffer changes. */
1044
if (rcnt < iters && !(scnt < 1 && !cb->server)) {
1045
++rcnt;
1046
while (*poll_buf != (char)rcnt) {
1047
if (cb->state == ERROR) {
1048
printk(KERN_ERR PFX
1049
"state = ERROR, bailing\n");
1050
goto done;
1051
}
1052
}
1053
}
1054
1055
if (scnt < iters) {
1056
const struct ib_send_wr *bad_wr;
1057
1058
*buf = (char)scnt+1;
1059
if (scnt < cycle_iters)
1060
post_cycles_start[scnt] = get_cycles();
1061
if (ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr)) {
1062
printk(KERN_ERR PFX
1063
"Couldn't post send: scnt=%d\n",
1064
scnt);
1065
goto done;
1066
}
1067
if (scnt < cycle_iters)
1068
post_cycles_stop[scnt] = get_cycles();
1069
scnt++;
1070
}
1071
1072
if (ccnt < iters) {
1073
struct ib_wc wc;
1074
int ne;
1075
1076
if (ccnt < cycle_iters)
1077
poll_cycles_start[ccnt] = get_cycles();
1078
do {
1079
if (ccnt < cycle_iters)
1080
last_poll_cycles_start[ccnt] =
1081
get_cycles();
1082
ne = ib_poll_cq(cb->cq, 1, &wc);
1083
} while (ne == 0);
1084
if (ccnt < cycle_iters)
1085
poll_cycles_stop[ccnt] = get_cycles();
1086
++ccnt;
1087
1088
if (ne < 0) {
1089
printk(KERN_ERR PFX "poll CQ failed %d\n", ne);
1090
goto done;
1091
}
1092
if (wc.status != IB_WC_SUCCESS) {
1093
printk(KERN_ERR PFX
1094
"Completion wth error at %s:\n",
1095
cb->server ? "server" : "client");
1096
printk(KERN_ERR PFX
1097
"Failed status %d: wr_id %d\n",
1098
wc.status, (int) wc.wr_id);
1099
printk(KERN_ERR PFX
1100
"scnt=%d, rcnt=%d, ccnt=%d\n",
1101
scnt, rcnt, ccnt);
1102
goto done;
1103
}
1104
}
1105
}
1106
microtime(&stop_tv);
1107
1108
if (stop_tv.tv_usec < start_tv.tv_usec) {
1109
stop_tv.tv_usec += 1000000;
1110
stop_tv.tv_sec -= 1;
1111
}
1112
1113
for (i=0; i < cycle_iters; i++) {
1114
sum_post += post_cycles_stop[i] - post_cycles_start[i];
1115
sum_poll += poll_cycles_stop[i] - poll_cycles_start[i];
1116
sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i];
1117
}
1118
printk(KERN_ERR PFX
1119
"delta sec %lu delta usec %lu iter %d size %d cycle_iters %d"
1120
" sum_post %llu sum_poll %llu sum_last_poll %llu\n",
1121
(unsigned long)(stop_tv.tv_sec - start_tv.tv_sec),
1122
(unsigned long)(stop_tv.tv_usec - start_tv.tv_usec),
1123
scnt, cb->size, cycle_iters,
1124
(unsigned long long)sum_post, (unsigned long long)sum_poll,
1125
(unsigned long long)sum_last_poll);
1126
done:
1127
kfree(post_cycles_start);
1128
kfree(post_cycles_stop);
1129
kfree(poll_cycles_start);
1130
kfree(poll_cycles_stop);
1131
kfree(last_poll_cycles_start);
1132
}
1133
1134
static void bw_test(struct krping_cb *cb)
1135
{
1136
int ccnt, scnt;
1137
int iters=cb->count;
1138
struct timeval start_tv, stop_tv;
1139
cycles_t *post_cycles_start = NULL;
1140
cycles_t *post_cycles_stop = NULL;
1141
cycles_t *poll_cycles_start = NULL;
1142
cycles_t *poll_cycles_stop = NULL;
1143
cycles_t *last_poll_cycles_start = NULL;
1144
cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0;
1145
int i;
1146
int cycle_iters = 1000;
1147
1148
ccnt = 0;
1149
scnt = 0;
1150
1151
post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
1152
if (!post_cycles_start) {
1153
printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
1154
goto done;
1155
}
1156
post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
1157
if (!post_cycles_stop) {
1158
printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
1159
goto done;
1160
}
1161
poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
1162
if (!poll_cycles_start) {
1163
printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
1164
goto done;
1165
}
1166
poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
1167
if (!poll_cycles_stop) {
1168
printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
1169
goto done;
1170
}
1171
last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t),
1172
GFP_KERNEL);
1173
if (!last_poll_cycles_start) {
1174
printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__);
1175
goto done;
1176
}
1177
cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE;
1178
cb->rdma_sq_wr.rkey = cb->remote_rkey;
1179
cb->rdma_sq_wr.remote_addr = cb->remote_addr;
1180
cb->rdma_sq_wr.wr.sg_list->length = cb->size;
1181
1182
if (cycle_iters > iters)
1183
cycle_iters = iters;
1184
microtime(&start_tv);
1185
while (scnt < iters || ccnt < iters) {
1186
1187
while (scnt < iters && scnt - ccnt < cb->txdepth) {
1188
const struct ib_send_wr *bad_wr;
1189
1190
if (scnt < cycle_iters)
1191
post_cycles_start[scnt] = get_cycles();
1192
if (ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr)) {
1193
printk(KERN_ERR PFX
1194
"Couldn't post send: scnt=%d\n",
1195
scnt);
1196
goto done;
1197
}
1198
if (scnt < cycle_iters)
1199
post_cycles_stop[scnt] = get_cycles();
1200
++scnt;
1201
}
1202
1203
if (ccnt < iters) {
1204
int ne;
1205
struct ib_wc wc;
1206
1207
if (ccnt < cycle_iters)
1208
poll_cycles_start[ccnt] = get_cycles();
1209
do {
1210
if (ccnt < cycle_iters)
1211
last_poll_cycles_start[ccnt] =
1212
get_cycles();
1213
ne = ib_poll_cq(cb->cq, 1, &wc);
1214
} while (ne == 0);
1215
if (ccnt < cycle_iters)
1216
poll_cycles_stop[ccnt] = get_cycles();
1217
ccnt += 1;
1218
1219
if (ne < 0) {
1220
printk(KERN_ERR PFX "poll CQ failed %d\n", ne);
1221
goto done;
1222
}
1223
if (wc.status != IB_WC_SUCCESS) {
1224
printk(KERN_ERR PFX
1225
"Completion wth error at %s:\n",
1226
cb->server ? "server" : "client");
1227
printk(KERN_ERR PFX
1228
"Failed status %d: wr_id %d\n",
1229
wc.status, (int) wc.wr_id);
1230
goto done;
1231
}
1232
}
1233
}
1234
microtime(&stop_tv);
1235
1236
if (stop_tv.tv_usec < start_tv.tv_usec) {
1237
stop_tv.tv_usec += 1000000;
1238
stop_tv.tv_sec -= 1;
1239
}
1240
1241
for (i=0; i < cycle_iters; i++) {
1242
sum_post += post_cycles_stop[i] - post_cycles_start[i];
1243
sum_poll += poll_cycles_stop[i] - poll_cycles_start[i];
1244
sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i];
1245
}
1246
printk(KERN_ERR PFX
1247
"delta sec %lu delta usec %lu iter %d size %d cycle_iters %d"
1248
" sum_post %llu sum_poll %llu sum_last_poll %llu\n",
1249
(unsigned long)(stop_tv.tv_sec - start_tv.tv_sec),
1250
(unsigned long)(stop_tv.tv_usec - start_tv.tv_usec),
1251
scnt, cb->size, cycle_iters,
1252
(unsigned long long)sum_post, (unsigned long long)sum_poll,
1253
(unsigned long long)sum_last_poll);
1254
done:
1255
kfree(post_cycles_start);
1256
kfree(post_cycles_stop);
1257
kfree(poll_cycles_start);
1258
kfree(poll_cycles_stop);
1259
kfree(last_poll_cycles_start);
1260
}
1261
1262
static void krping_rlat_test_server(struct krping_cb *cb)
1263
{
1264
const struct ib_send_wr *bad_wr;
1265
struct ib_wc wc;
1266
int ret;
1267
1268
/* Spin waiting for client's Start STAG/TO/Len */
1269
while (cb->state < RDMA_READ_ADV) {
1270
krping_cq_event_handler(cb->cq, cb);
1271
}
1272
1273
/* Send STAG/TO/Len to client */
1274
krping_format_send(cb, cb->start_dma_addr);
1275
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
1276
if (ret) {
1277
printk(KERN_ERR PFX "post send error %d\n", ret);
1278
return;
1279
}
1280
1281
/* Spin waiting for send completion */
1282
while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
1283
if (ret < 0) {
1284
printk(KERN_ERR PFX "poll error %d\n", ret);
1285
return;
1286
}
1287
if (wc.status) {
1288
printk(KERN_ERR PFX "send completiong error %d\n", wc.status);
1289
return;
1290
}
1291
1292
wait_event_interruptible(cb->sem, cb->state == ERROR);
1293
}
1294
1295
static void krping_wlat_test_server(struct krping_cb *cb)
1296
{
1297
const struct ib_send_wr *bad_wr;
1298
struct ib_wc wc;
1299
int ret;
1300
1301
/* Spin waiting for client's Start STAG/TO/Len */
1302
while (cb->state < RDMA_READ_ADV) {
1303
krping_cq_event_handler(cb->cq, cb);
1304
}
1305
1306
/* Send STAG/TO/Len to client */
1307
krping_format_send(cb, cb->start_dma_addr);
1308
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
1309
if (ret) {
1310
printk(KERN_ERR PFX "post send error %d\n", ret);
1311
return;
1312
}
1313
1314
/* Spin waiting for send completion */
1315
while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
1316
if (ret < 0) {
1317
printk(KERN_ERR PFX "poll error %d\n", ret);
1318
return;
1319
}
1320
if (wc.status) {
1321
printk(KERN_ERR PFX "send completiong error %d\n", wc.status);
1322
return;
1323
}
1324
1325
wlat_test(cb);
1326
wait_event_interruptible(cb->sem, cb->state == ERROR);
1327
}
1328
1329
static void krping_bw_test_server(struct krping_cb *cb)
1330
{
1331
const struct ib_send_wr *bad_wr;
1332
struct ib_wc wc;
1333
int ret;
1334
1335
/* Spin waiting for client's Start STAG/TO/Len */
1336
while (cb->state < RDMA_READ_ADV) {
1337
krping_cq_event_handler(cb->cq, cb);
1338
}
1339
1340
/* Send STAG/TO/Len to client */
1341
krping_format_send(cb, cb->start_dma_addr);
1342
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
1343
if (ret) {
1344
printk(KERN_ERR PFX "post send error %d\n", ret);
1345
return;
1346
}
1347
1348
/* Spin waiting for send completion */
1349
while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
1350
if (ret < 0) {
1351
printk(KERN_ERR PFX "poll error %d\n", ret);
1352
return;
1353
}
1354
if (wc.status) {
1355
printk(KERN_ERR PFX "send completiong error %d\n", wc.status);
1356
return;
1357
}
1358
1359
if (cb->duplex)
1360
bw_test(cb);
1361
wait_event_interruptible(cb->sem, cb->state == ERROR);
1362
}
1363
1364
static int reg_supported(struct ib_device *dev)
1365
{
1366
u64 needed_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
1367
1368
if ((dev->attrs.device_cap_flags & needed_flags) != needed_flags) {
1369
printk(KERN_ERR PFX
1370
"Fastreg not supported - device_cap_flags 0x%llx\n",
1371
(unsigned long long)dev->attrs.device_cap_flags);
1372
return 0;
1373
}
1374
DEBUG_LOG("Fastreg supported - device_cap_flags 0x%llx\n",
1375
(unsigned long long)dev->attrs.device_cap_flags);
1376
return 1;
1377
}
1378
1379
static void fill_sockaddr(struct sockaddr_storage *sin, struct krping_cb *cb)
1380
{
1381
memset(sin, 0, sizeof(*sin));
1382
1383
if (cb->addr_type == AF_INET) {
1384
struct sockaddr_in *sin4 = (struct sockaddr_in *)sin;
1385
sin4->sin_len = sizeof(*sin4);
1386
sin4->sin_family = AF_INET;
1387
memcpy((void *)&sin4->sin_addr.s_addr, cb->addr, 4);
1388
sin4->sin_port = cb->port;
1389
} else if (cb->addr_type == AF_INET6) {
1390
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sin;
1391
sin6->sin6_len = sizeof(*sin6);
1392
sin6->sin6_family = AF_INET6;
1393
memcpy((void *)&sin6->sin6_addr, cb->addr, 16);
1394
sin6->sin6_port = cb->port;
1395
}
1396
}
1397
1398
static int krping_bind_server(struct krping_cb *cb)
1399
{
1400
struct sockaddr_storage sin;
1401
int ret;
1402
1403
1404
fill_sockaddr(&sin, cb);
1405
1406
ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *)&sin);
1407
if (ret) {
1408
printk(KERN_ERR PFX "rdma_bind_addr error %d\n", ret);
1409
return ret;
1410
}
1411
DEBUG_LOG("rdma_bind_addr successful\n");
1412
1413
DEBUG_LOG("rdma_listen\n");
1414
ret = rdma_listen(cb->cm_id, 3);
1415
if (ret) {
1416
printk(KERN_ERR PFX "rdma_listen failed: %d\n", ret);
1417
return ret;
1418
}
1419
1420
wait_event_interruptible(cb->sem, cb->state >= CONNECT_REQUEST);
1421
if (cb->state != CONNECT_REQUEST) {
1422
printk(KERN_ERR PFX "wait for CONNECT_REQUEST state %d\n",
1423
cb->state);
1424
return -1;
1425
}
1426
1427
if (!reg_supported(cb->child_cm_id->device))
1428
return -EINVAL;
1429
1430
return 0;
1431
}
1432
1433
static void krping_run_server(struct krping_cb *cb)
1434
{
1435
const struct ib_recv_wr *bad_wr;
1436
int ret;
1437
1438
ret = krping_bind_server(cb);
1439
if (ret)
1440
return;
1441
1442
ret = krping_setup_qp(cb, cb->child_cm_id);
1443
if (ret) {
1444
printk(KERN_ERR PFX "setup_qp failed: %d\n", ret);
1445
goto err0;
1446
}
1447
1448
ret = krping_setup_buffers(cb);
1449
if (ret) {
1450
printk(KERN_ERR PFX "krping_setup_buffers failed: %d\n", ret);
1451
goto err1;
1452
}
1453
1454
ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
1455
if (ret) {
1456
printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
1457
goto err2;
1458
}
1459
1460
ret = krping_accept(cb);
1461
if (ret) {
1462
printk(KERN_ERR PFX "connect error %d\n", ret);
1463
goto err2;
1464
}
1465
1466
if (cb->wlat)
1467
krping_wlat_test_server(cb);
1468
else if (cb->rlat)
1469
krping_rlat_test_server(cb);
1470
else if (cb->bw)
1471
krping_bw_test_server(cb);
1472
else
1473
krping_test_server(cb);
1474
rdma_disconnect(cb->child_cm_id);
1475
err2:
1476
krping_free_buffers(cb);
1477
err1:
1478
krping_free_qp(cb);
1479
err0:
1480
rdma_destroy_id(cb->child_cm_id);
1481
}
1482
1483
static void krping_test_client(struct krping_cb *cb)
1484
{
1485
int ping, start, cc, i, ret;
1486
const struct ib_send_wr *bad_wr;
1487
unsigned char c;
1488
1489
start = 65;
1490
for (ping = 0; !cb->count || ping < cb->count; ping++) {
1491
cb->state = RDMA_READ_ADV;
1492
1493
/* Put some ascii text in the buffer. */
1494
cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping);
1495
for (i = cc, c = start; i < cb->size; i++) {
1496
cb->start_buf[i] = c;
1497
c++;
1498
if (c > 122)
1499
c = 65;
1500
}
1501
start++;
1502
if (start > 122)
1503
start = 65;
1504
cb->start_buf[cb->size - 1] = 0;
1505
1506
krping_format_send(cb, cb->start_dma_addr);
1507
if (cb->state == ERROR) {
1508
printk(KERN_ERR PFX "krping_format_send failed\n");
1509
break;
1510
}
1511
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
1512
if (ret) {
1513
printk(KERN_ERR PFX "post send error %d\n", ret);
1514
break;
1515
}
1516
1517
/* Wait for server to ACK */
1518
wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
1519
if (cb->state != RDMA_WRITE_ADV) {
1520
printk(KERN_ERR PFX
1521
"wait for RDMA_WRITE_ADV state %d\n",
1522
cb->state);
1523
break;
1524
}
1525
1526
krping_format_send(cb, cb->rdma_dma_addr);
1527
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
1528
if (ret) {
1529
printk(KERN_ERR PFX "post send error %d\n", ret);
1530
break;
1531
}
1532
1533
/* Wait for the server to say the RDMA Write is complete. */
1534
wait_event_interruptible(cb->sem,
1535
cb->state >= RDMA_WRITE_COMPLETE);
1536
if (cb->state != RDMA_WRITE_COMPLETE) {
1537
printk(KERN_ERR PFX
1538
"wait for RDMA_WRITE_COMPLETE state %d\n",
1539
cb->state);
1540
break;
1541
}
1542
1543
if (cb->validate)
1544
if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {
1545
printk(KERN_ERR PFX "data mismatch!\n");
1546
break;
1547
}
1548
1549
if (cb->verbose)
1550
printk(KERN_INFO PFX "ping data: %s\n", cb->rdma_buf);
1551
#ifdef SLOW_KRPING
1552
wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
1553
#endif
1554
}
1555
}
1556
1557
static void krping_rlat_test_client(struct krping_cb *cb)
1558
{
1559
const struct ib_send_wr *bad_wr;
1560
struct ib_wc wc;
1561
int ret;
1562
1563
cb->state = RDMA_READ_ADV;
1564
1565
/* Send STAG/TO/Len to client */
1566
krping_format_send(cb, cb->start_dma_addr);
1567
if (cb->state == ERROR) {
1568
printk(KERN_ERR PFX "krping_format_send failed\n");
1569
return;
1570
}
1571
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
1572
if (ret) {
1573
printk(KERN_ERR PFX "post send error %d\n", ret);
1574
return;
1575
}
1576
1577
/* Spin waiting for send completion */
1578
while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
1579
if (ret < 0) {
1580
printk(KERN_ERR PFX "poll error %d\n", ret);
1581
return;
1582
}
1583
if (wc.status) {
1584
printk(KERN_ERR PFX "send completion error %d\n", wc.status);
1585
return;
1586
}
1587
1588
/* Spin waiting for server's Start STAG/TO/Len */
1589
while (cb->state < RDMA_WRITE_ADV) {
1590
krping_cq_event_handler(cb->cq, cb);
1591
}
1592
1593
#if 0
1594
{
1595
int i;
1596
struct timeval start, stop;
1597
time_t sec;
1598
suseconds_t usec;
1599
unsigned long long elapsed;
1600
struct ib_wc wc;
1601
const struct ib_send_wr *bad_wr;
1602
int ne;
1603
1604
cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE;
1605
cb->rdma_sq_wr.rkey = cb->remote_rkey;
1606
cb->rdma_sq_wr.remote_addr = cb->remote_addr;
1607
cb->rdma_sq_wr.wr.sg_list->length = 0;
1608
cb->rdma_sq_wr.wr.num_sge = 0;
1609
1610
microtime(&start);
1611
for (i=0; i < 100000; i++) {
1612
if (ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr)) {
1613
printk(KERN_ERR PFX "Couldn't post send\n");
1614
return;
1615
}
1616
do {
1617
ne = ib_poll_cq(cb->cq, 1, &wc);
1618
} while (ne == 0);
1619
if (ne < 0) {
1620
printk(KERN_ERR PFX "poll CQ failed %d\n", ne);
1621
return;
1622
}
1623
if (wc.status != IB_WC_SUCCESS) {
1624
printk(KERN_ERR PFX "Completion wth error at %s:\n",
1625
cb->server ? "server" : "client");
1626
printk(KERN_ERR PFX "Failed status %d: wr_id %d\n",
1627
wc.status, (int) wc.wr_id);
1628
return;
1629
}
1630
}
1631
microtime(&stop);
1632
1633
if (stop.tv_usec < start.tv_usec) {
1634
stop.tv_usec += 1000000;
1635
stop.tv_sec -= 1;
1636
}
1637
sec = stop.tv_sec - start.tv_sec;
1638
usec = stop.tv_usec - start.tv_usec;
1639
elapsed = sec * 1000000 + usec;
1640
printk(KERN_ERR PFX "0B-write-lat iters 100000 usec %llu\n", elapsed);
1641
}
1642
#endif
1643
1644
rlat_test(cb);
1645
}
1646
1647
static void krping_wlat_test_client(struct krping_cb *cb)
1648
{
1649
const struct ib_send_wr *bad_wr;
1650
struct ib_wc wc;
1651
int ret;
1652
1653
cb->state = RDMA_READ_ADV;
1654
1655
/* Send STAG/TO/Len to client */
1656
krping_format_send(cb, cb->start_dma_addr);
1657
if (cb->state == ERROR) {
1658
printk(KERN_ERR PFX "krping_format_send failed\n");
1659
return;
1660
}
1661
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
1662
if (ret) {
1663
printk(KERN_ERR PFX "post send error %d\n", ret);
1664
return;
1665
}
1666
1667
/* Spin waiting for send completion */
1668
while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
1669
if (ret < 0) {
1670
printk(KERN_ERR PFX "poll error %d\n", ret);
1671
return;
1672
}
1673
if (wc.status) {
1674
printk(KERN_ERR PFX "send completion error %d\n", wc.status);
1675
return;
1676
}
1677
1678
/* Spin waiting for server's Start STAG/TO/Len */
1679
while (cb->state < RDMA_WRITE_ADV) {
1680
krping_cq_event_handler(cb->cq, cb);
1681
}
1682
1683
wlat_test(cb);
1684
}
1685
1686
static void krping_bw_test_client(struct krping_cb *cb)
1687
{
1688
const struct ib_send_wr *bad_wr;
1689
struct ib_wc wc;
1690
int ret;
1691
1692
cb->state = RDMA_READ_ADV;
1693
1694
/* Send STAG/TO/Len to client */
1695
krping_format_send(cb, cb->start_dma_addr);
1696
if (cb->state == ERROR) {
1697
printk(KERN_ERR PFX "krping_format_send failed\n");
1698
return;
1699
}
1700
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
1701
if (ret) {
1702
printk(KERN_ERR PFX "post send error %d\n", ret);
1703
return;
1704
}
1705
1706
/* Spin waiting for send completion */
1707
while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
1708
if (ret < 0) {
1709
printk(KERN_ERR PFX "poll error %d\n", ret);
1710
return;
1711
}
1712
if (wc.status) {
1713
printk(KERN_ERR PFX "send completion error %d\n", wc.status);
1714
return;
1715
}
1716
1717
/* Spin waiting for server's Start STAG/TO/Len */
1718
while (cb->state < RDMA_WRITE_ADV) {
1719
krping_cq_event_handler(cb->cq, cb);
1720
}
1721
1722
bw_test(cb);
1723
}
1724
1725
/*
1726
* Manual qp flush test
1727
*/
1728
static void flush_qp(struct krping_cb *cb)
1729
{
1730
struct ib_send_wr wr = { 0 };
1731
const struct ib_send_wr *bad;
1732
struct ib_recv_wr recv_wr = { 0 };
1733
const struct ib_recv_wr *recv_bad;
1734
struct ib_wc wc;
1735
int ret;
1736
int flushed = 0;
1737
int ccnt = 0;
1738
1739
rdma_disconnect(cb->cm_id);
1740
DEBUG_LOG("disconnected!\n");
1741
1742
wr.opcode = IB_WR_SEND;
1743
wr.wr_id = 0xdeadbeefcafebabe;
1744
ret = ib_post_send(cb->qp, &wr, &bad);
1745
if (ret) {
1746
printk(KERN_ERR PFX "%s post_send failed ret %d\n", __func__, ret);
1747
return;
1748
}
1749
1750
recv_wr.wr_id = 0xcafebabedeadbeef;
1751
ret = ib_post_recv(cb->qp, &recv_wr, &recv_bad);
1752
if (ret) {
1753
printk(KERN_ERR PFX "%s post_recv failed ret %d\n", __func__, ret);
1754
return;
1755
}
1756
1757
/* poll until the flush WRs complete */
1758
do {
1759
ret = ib_poll_cq(cb->cq, 1, &wc);
1760
if (ret < 0) {
1761
printk(KERN_ERR PFX "ib_poll_cq failed %d\n", ret);
1762
return;
1763
}
1764
if (ret == 0)
1765
continue;
1766
ccnt++;
1767
if (wc.wr_id == 0xdeadbeefcafebabe ||
1768
wc.wr_id == 0xcafebabedeadbeef)
1769
flushed++;
1770
} while (flushed != 2);
1771
DEBUG_LOG("qp_flushed! ccnt %u\n", ccnt);
1772
}
1773
1774
static void krping_fr_test(struct krping_cb *cb)
1775
{
1776
struct ib_send_wr inv;
1777
const struct ib_send_wr *bad;
1778
struct ib_reg_wr fr;
1779
struct ib_wc wc;
1780
u8 key = 0;
1781
struct ib_mr *mr;
1782
int ret;
1783
int size = cb->size;
1784
int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
1785
unsigned long start;
1786
int count = 0;
1787
int scnt = 0;
1788
struct scatterlist sg = {0};
1789
1790
mr = ib_alloc_mr(cb->pd, IB_MR_TYPE_MEM_REG, plen);
1791
if (IS_ERR(mr)) {
1792
printk(KERN_ERR PFX "ib_alloc_mr failed %ld\n", PTR_ERR(mr));
1793
return;
1794
}
1795
1796
sg_dma_address(&sg) = (dma_addr_t)0xcafebabe0000ULL;
1797
sg_dma_len(&sg) = size;
1798
ret = ib_map_mr_sg(mr, &sg, 1, NULL, PAGE_SIZE);
1799
if (ret <= 0) {
1800
printk(KERN_ERR PFX "ib_map_mr_sge err %d\n", ret);
1801
goto err2;
1802
}
1803
1804
memset(&fr, 0, sizeof fr);
1805
fr.wr.opcode = IB_WR_REG_MR;
1806
fr.access = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
1807
fr.mr = mr;
1808
fr.wr.next = &inv;
1809
1810
memset(&inv, 0, sizeof inv);
1811
inv.opcode = IB_WR_LOCAL_INV;
1812
inv.send_flags = IB_SEND_SIGNALED;
1813
1814
DEBUG_LOG("fr_test: stag index 0x%x plen %u size %u depth %u\n", mr->rkey >> 8, plen, cb->size, cb->txdepth);
1815
start = time_uptime;
1816
while (!cb->count || count <= cb->count) {
1817
if (SIGPENDING(curthread)) {
1818
printk(KERN_ERR PFX "signal!\n");
1819
break;
1820
}
1821
if ((time_uptime - start) >= 9) {
1822
DEBUG_LOG("fr_test: pausing 1 second! count %u latest size %u plen %u\n", count, size, plen);
1823
wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
1824
if (cb->state == ERROR)
1825
break;
1826
start = time_uptime;
1827
}
1828
while (scnt < (cb->txdepth>>1)) {
1829
ib_update_fast_reg_key(mr, ++key);
1830
fr.key = mr->rkey;
1831
inv.ex.invalidate_rkey = mr->rkey;
1832
1833
size = arc4random() % cb->size;
1834
if (size == 0)
1835
size = cb->size;
1836
sg_dma_len(&sg) = size;
1837
ret = ib_map_mr_sg(mr, &sg, 1, NULL, PAGE_SIZE);
1838
if (ret <= 0) {
1839
printk(KERN_ERR PFX "ib_map_mr_sge err %d\n", ret);
1840
goto err2;
1841
}
1842
ret = ib_post_send(cb->qp, &fr.wr, &bad);
1843
if (ret) {
1844
printk(KERN_ERR PFX "ib_post_send failed %d\n", ret);
1845
goto err2;
1846
}
1847
scnt++;
1848
}
1849
1850
ret = ib_poll_cq(cb->cq, 1, &wc);
1851
if (ret < 0) {
1852
printk(KERN_ERR PFX "ib_poll_cq failed %d\n", ret);
1853
goto err2;
1854
}
1855
if (ret == 1) {
1856
if (wc.status) {
1857
printk(KERN_ERR PFX "completion error %u\n", wc.status);
1858
goto err2;
1859
}
1860
count++;
1861
scnt--;
1862
}
1863
}
1864
err2:
1865
flush_qp(cb);
1866
DEBUG_LOG("fr_test: done!\n");
1867
ib_dereg_mr(mr);
1868
}
1869
1870
static int krping_connect_client(struct krping_cb *cb)
1871
{
1872
struct rdma_conn_param conn_param;
1873
int ret;
1874
1875
memset(&conn_param, 0, sizeof conn_param);
1876
conn_param.responder_resources = 1;
1877
conn_param.initiator_depth = 1;
1878
conn_param.retry_count = 10;
1879
1880
ret = rdma_connect(cb->cm_id, &conn_param);
1881
if (ret) {
1882
printk(KERN_ERR PFX "rdma_connect error %d\n", ret);
1883
return ret;
1884
}
1885
1886
wait_event_interruptible(cb->sem, cb->state >= CONNECTED);
1887
if (cb->state == ERROR) {
1888
printk(KERN_ERR PFX "wait for CONNECTED state %d\n", cb->state);
1889
return -1;
1890
}
1891
1892
DEBUG_LOG("rdma_connect successful\n");
1893
return 0;
1894
}
1895
1896
static int krping_bind_client(struct krping_cb *cb)
1897
{
1898
struct sockaddr_storage sin;
1899
int ret;
1900
1901
fill_sockaddr(&sin, cb);
1902
1903
ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *)&sin, 2000);
1904
if (ret) {
1905
printk(KERN_ERR PFX "rdma_resolve_addr error %d\n", ret);
1906
return ret;
1907
}
1908
1909
wait_event_interruptible(cb->sem, cb->state >= ROUTE_RESOLVED);
1910
if (cb->state != ROUTE_RESOLVED) {
1911
printk(KERN_ERR PFX
1912
"addr/route resolution did not resolve: state %d\n",
1913
cb->state);
1914
return -EINTR;
1915
}
1916
1917
if (!reg_supported(cb->cm_id->device))
1918
return -EINVAL;
1919
1920
DEBUG_LOG("rdma_resolve_addr - rdma_resolve_route successful\n");
1921
return 0;
1922
}
1923
1924
static void krping_run_client(struct krping_cb *cb)
1925
{
1926
const struct ib_recv_wr *bad_wr;
1927
int ret;
1928
1929
/* set type of service, if any */
1930
if (cb->tos != 0)
1931
rdma_set_service_type(cb->cm_id, cb->tos);
1932
1933
ret = krping_bind_client(cb);
1934
if (ret)
1935
return;
1936
1937
ret = krping_setup_qp(cb, cb->cm_id);
1938
if (ret) {
1939
printk(KERN_ERR PFX "setup_qp failed: %d\n", ret);
1940
return;
1941
}
1942
1943
ret = krping_setup_buffers(cb);
1944
if (ret) {
1945
printk(KERN_ERR PFX "krping_setup_buffers failed: %d\n", ret);
1946
goto err1;
1947
}
1948
1949
ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
1950
if (ret) {
1951
printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
1952
goto err2;
1953
}
1954
1955
ret = krping_connect_client(cb);
1956
if (ret) {
1957
printk(KERN_ERR PFX "connect error %d\n", ret);
1958
goto err2;
1959
}
1960
1961
if (cb->wlat)
1962
krping_wlat_test_client(cb);
1963
else if (cb->rlat)
1964
krping_rlat_test_client(cb);
1965
else if (cb->bw)
1966
krping_bw_test_client(cb);
1967
else if (cb->frtest)
1968
krping_fr_test(cb);
1969
else
1970
krping_test_client(cb);
1971
rdma_disconnect(cb->cm_id);
1972
err2:
1973
krping_free_buffers(cb);
1974
err1:
1975
krping_free_qp(cb);
1976
}
1977
1978
static uint16_t
1979
krping_get_ipv6_scope_id(char *name)
1980
{
1981
struct ifnet *ifp;
1982
uint16_t retval;
1983
1984
if (name == NULL)
1985
return (0);
1986
CURVNET_SET_QUIET(TD_TO_VNET(curthread));
1987
ifp = ifunit_ref(name);
1988
CURVNET_RESTORE();
1989
if (ifp == NULL)
1990
return (0);
1991
retval = if_getindex(ifp);
1992
if_rele(ifp);
1993
return (retval);
1994
}
1995
1996
int krping_doit(char *cmd)
1997
{
1998
struct krping_cb *cb;
1999
int op;
2000
int ret = 0;
2001
char *optarg;
2002
char *scope;
2003
unsigned long optint;
2004
2005
cb = kzalloc(sizeof(*cb), GFP_KERNEL);
2006
if (!cb)
2007
return -ENOMEM;
2008
2009
mutex_lock(&krping_mutex);
2010
list_add_tail(&cb->list, &krping_cbs);
2011
mutex_unlock(&krping_mutex);
2012
2013
cb->server = -1;
2014
cb->state = IDLE;
2015
cb->size = 64;
2016
cb->txdepth = RPING_SQ_DEPTH;
2017
init_waitqueue_head(&cb->sem);
2018
2019
while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg,
2020
&optint)) != 0) {
2021
switch (op) {
2022
case 'a':
2023
cb->addr_str = optarg;
2024
cb->addr_type = AF_INET;
2025
DEBUG_LOG("ipaddr (%s)\n", optarg);
2026
if (inet_pton(AF_INET, optarg, cb->addr) != 1) {
2027
printk(KERN_ERR PFX "bad addr string %s\n",
2028
optarg);
2029
ret = EINVAL;
2030
}
2031
break;
2032
case 'A':
2033
cb->addr_str = optarg;
2034
cb->addr_type = AF_INET6;
2035
DEBUG_LOG("ipv6addr (%s)\n", optarg);
2036
scope = strstr(optarg, "%");
2037
/* extract scope ID, if any */
2038
if (scope != NULL)
2039
*scope++ = 0;
2040
/* extract IPv6 network address */
2041
if (inet_pton(AF_INET6, optarg, cb->addr) != 1) {
2042
printk(KERN_ERR PFX "bad addr string %s\n",
2043
optarg);
2044
ret = EINVAL;
2045
} else if (IN6_IS_SCOPE_LINKLOCAL((struct in6_addr *)cb->addr) ||
2046
IN6_IS_ADDR_MC_INTFACELOCAL((struct in6_addr *)cb->addr)) {
2047
uint16_t scope_id = krping_get_ipv6_scope_id(scope);
2048
DEBUG_LOG("ipv6 scope ID = %d\n", scope_id);
2049
cb->addr[2] = scope_id >> 8;
2050
cb->addr[3] = scope_id & 0xFF;
2051
}
2052
break;
2053
case 'p':
2054
cb->port = htons(optint);
2055
DEBUG_LOG("port %d\n", (int)optint);
2056
break;
2057
case 'P':
2058
cb->poll = 1;
2059
DEBUG_LOG("server\n");
2060
break;
2061
case 's':
2062
cb->server = 1;
2063
DEBUG_LOG("server\n");
2064
break;
2065
case 'c':
2066
cb->server = 0;
2067
DEBUG_LOG("client\n");
2068
break;
2069
case 'S':
2070
cb->size = optint;
2071
if ((cb->size < 1) ||
2072
(cb->size > RPING_BUFSIZE)) {
2073
printk(KERN_ERR PFX "Invalid size %d "
2074
"(valid range is 1 to %d)\n",
2075
cb->size, RPING_BUFSIZE);
2076
ret = EINVAL;
2077
} else
2078
DEBUG_LOG("size %d\n", (int)optint);
2079
break;
2080
case 'C':
2081
cb->count = optint;
2082
if (cb->count < 0) {
2083
printk(KERN_ERR PFX "Invalid count %d\n",
2084
cb->count);
2085
ret = EINVAL;
2086
} else
2087
DEBUG_LOG("count %d\n", (int) cb->count);
2088
break;
2089
case 'v':
2090
cb->verbose++;
2091
DEBUG_LOG("verbose\n");
2092
break;
2093
case 'V':
2094
cb->validate++;
2095
DEBUG_LOG("validate data\n");
2096
break;
2097
case 'l':
2098
cb->wlat++;
2099
break;
2100
case 'L':
2101
cb->rlat++;
2102
break;
2103
case 'B':
2104
cb->bw++;
2105
break;
2106
case 'd':
2107
cb->duplex++;
2108
break;
2109
case 'I':
2110
cb->server_invalidate = 1;
2111
break;
2112
case 't':
2113
cb->tos = optint;
2114
DEBUG_LOG("type of service, tos=%d\n", (int) cb->tos);
2115
break;
2116
case 'T':
2117
cb->txdepth = optint;
2118
DEBUG_LOG("txdepth %d\n", (int) cb->txdepth);
2119
break;
2120
case 'Z':
2121
cb->local_dma_lkey = 1;
2122
DEBUG_LOG("using local dma lkey\n");
2123
break;
2124
case 'R':
2125
cb->read_inv = 1;
2126
DEBUG_LOG("using read-with-inv\n");
2127
break;
2128
case 'f':
2129
cb->frtest = 1;
2130
DEBUG_LOG("fast-reg test!\n");
2131
break;
2132
default:
2133
printk(KERN_ERR PFX "unknown opt %s\n", optarg);
2134
ret = -EINVAL;
2135
break;
2136
}
2137
}
2138
if (ret)
2139
goto out;
2140
2141
if (cb->server == -1) {
2142
printk(KERN_ERR PFX "must be either client or server\n");
2143
ret = -EINVAL;
2144
goto out;
2145
}
2146
2147
if (cb->server && cb->frtest) {
2148
printk(KERN_ERR PFX "must be client to run frtest\n");
2149
ret = -EINVAL;
2150
goto out;
2151
}
2152
2153
if ((cb->frtest + cb->bw + cb->rlat + cb->wlat) > 1) {
2154
printk(KERN_ERR PFX "Pick only one test: fr, bw, rlat, wlat\n");
2155
ret = -EINVAL;
2156
goto out;
2157
}
2158
2159
if (cb->wlat || cb->rlat || cb->bw) {
2160
printk(KERN_ERR PFX "wlat, rlat, and bw tests only support mem_mode MR - which is no longer supported\n");
2161
ret = -EINVAL;
2162
goto out;
2163
}
2164
2165
cb->cm_id = rdma_create_id(TD_TO_VNET(curthread), krping_cma_event_handler, cb, RDMA_PS_TCP, IB_QPT_RC);
2166
if (IS_ERR(cb->cm_id)) {
2167
ret = PTR_ERR(cb->cm_id);
2168
printk(KERN_ERR PFX "rdma_create_id error %d\n", ret);
2169
goto out;
2170
}
2171
DEBUG_LOG("created cm_id %p\n", cb->cm_id);
2172
2173
if (cb->server)
2174
krping_run_server(cb);
2175
else
2176
krping_run_client(cb);
2177
2178
DEBUG_LOG("destroy cm_id %p\n", cb->cm_id);
2179
rdma_destroy_id(cb->cm_id);
2180
out:
2181
mutex_lock(&krping_mutex);
2182
list_del(&cb->list);
2183
mutex_unlock(&krping_mutex);
2184
kfree(cb);
2185
return ret;
2186
}
2187
2188
void
2189
krping_walk_cb_list(void (*f)(struct krping_stats *, void *), void *arg)
2190
{
2191
struct krping_cb *cb;
2192
2193
mutex_lock(&krping_mutex);
2194
list_for_each_entry(cb, &krping_cbs, list)
2195
(*f)(cb->pd ? &cb->stats : NULL, arg);
2196
mutex_unlock(&krping_mutex);
2197
}
2198
2199
void
2200
krping_cancel_all(void)
2201
{
2202
struct krping_cb *cb;
2203
2204
mutex_lock(&krping_mutex);
2205
list_for_each_entry(cb, &krping_cbs, list) {
2206
cb->state = ERROR;
2207
wake_up_interruptible(&cb->sem);
2208
}
2209
mutex_unlock(&krping_mutex);
2210
}
2211
2212
2213