Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/rds/rds.h
15109 views
1
#ifndef _RDS_RDS_H
2
#define _RDS_RDS_H
3
4
#include <net/sock.h>
5
#include <linux/scatterlist.h>
6
#include <linux/highmem.h>
7
#include <rdma/rdma_cm.h>
8
#include <linux/mutex.h>
9
#include <linux/rds.h>
10
11
#include "info.h"
12
13
/*
14
* RDS Network protocol version
15
*/
16
#define RDS_PROTOCOL_3_0 0x0300
17
#define RDS_PROTOCOL_3_1 0x0301
18
#define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1
19
#define RDS_PROTOCOL_MAJOR(v) ((v) >> 8)
20
#define RDS_PROTOCOL_MINOR(v) ((v) & 255)
21
#define RDS_PROTOCOL(maj, min) (((maj) << 8) | min)
22
23
/*
24
* XXX randomly chosen, but at least seems to be unused:
25
* # 18464-18768 Unassigned
26
* We should do better. We want a reserved port to discourage unpriv'ed
27
* userspace from listening.
28
*/
29
#define RDS_PORT 18634
30
31
#ifdef ATOMIC64_INIT
32
#define KERNEL_HAS_ATOMIC64
33
#endif
34
35
#ifdef DEBUG
36
#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
37
#else
38
/* sigh, pr_debug() causes unused variable warnings */
39
static inline void __attribute__ ((format (printf, 1, 2)))
40
rdsdebug(char *fmt, ...)
41
{
42
}
43
#endif
44
45
/* XXX is there one of these somewhere? */
46
#define ceil(x, y) \
47
({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; })
48
49
#define RDS_FRAG_SHIFT 12
50
#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
51
52
#define RDS_CONG_MAP_BYTES (65536 / 8)
53
#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
54
#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
55
56
struct rds_cong_map {
57
struct rb_node m_rb_node;
58
__be32 m_addr;
59
wait_queue_head_t m_waitq;
60
struct list_head m_conn_list;
61
unsigned long m_page_addrs[RDS_CONG_MAP_PAGES];
62
};
63
64
65
/*
66
* This is how we will track the connection state:
67
* A connection is always in one of the following
68
* states. Updates to the state are atomic and imply
69
* a memory barrier.
70
*/
71
enum {
72
RDS_CONN_DOWN = 0,
73
RDS_CONN_CONNECTING,
74
RDS_CONN_DISCONNECTING,
75
RDS_CONN_UP,
76
RDS_CONN_ERROR,
77
};
78
79
/* Bits for c_flags */
80
#define RDS_LL_SEND_FULL 0
81
#define RDS_RECONNECT_PENDING 1
82
#define RDS_IN_XMIT 2
83
84
struct rds_connection {
85
struct hlist_node c_hash_node;
86
__be32 c_laddr;
87
__be32 c_faddr;
88
unsigned int c_loopback:1;
89
struct rds_connection *c_passive;
90
91
struct rds_cong_map *c_lcong;
92
struct rds_cong_map *c_fcong;
93
94
struct rds_message *c_xmit_rm;
95
unsigned long c_xmit_sg;
96
unsigned int c_xmit_hdr_off;
97
unsigned int c_xmit_data_off;
98
unsigned int c_xmit_atomic_sent;
99
unsigned int c_xmit_rdma_sent;
100
unsigned int c_xmit_data_sent;
101
102
spinlock_t c_lock; /* protect msg queues */
103
u64 c_next_tx_seq;
104
struct list_head c_send_queue;
105
struct list_head c_retrans;
106
107
u64 c_next_rx_seq;
108
109
struct rds_transport *c_trans;
110
void *c_transport_data;
111
112
atomic_t c_state;
113
unsigned long c_flags;
114
unsigned long c_reconnect_jiffies;
115
struct delayed_work c_send_w;
116
struct delayed_work c_recv_w;
117
struct delayed_work c_conn_w;
118
struct work_struct c_down_w;
119
struct mutex c_cm_lock; /* protect conn state & cm */
120
wait_queue_head_t c_waitq;
121
122
struct list_head c_map_item;
123
unsigned long c_map_queued;
124
125
unsigned int c_unacked_packets;
126
unsigned int c_unacked_bytes;
127
128
/* Protocol version */
129
unsigned int c_version;
130
};
131
132
#define RDS_FLAG_CONG_BITMAP 0x01
133
#define RDS_FLAG_ACK_REQUIRED 0x02
134
#define RDS_FLAG_RETRANSMITTED 0x04
135
#define RDS_MAX_ADV_CREDIT 255
136
137
/*
138
* Maximum space available for extension headers.
139
*/
140
#define RDS_HEADER_EXT_SPACE 16
141
142
struct rds_header {
143
__be64 h_sequence;
144
__be64 h_ack;
145
__be32 h_len;
146
__be16 h_sport;
147
__be16 h_dport;
148
u8 h_flags;
149
u8 h_credit;
150
u8 h_padding[4];
151
__sum16 h_csum;
152
153
u8 h_exthdr[RDS_HEADER_EXT_SPACE];
154
};
155
156
/*
157
* Reserved - indicates end of extensions
158
*/
159
#define RDS_EXTHDR_NONE 0
160
161
/*
162
* This extension header is included in the very
163
* first message that is sent on a new connection,
164
* and identifies the protocol level. This will help
165
* rolling updates if a future change requires breaking
166
* the protocol.
167
* NB: This is no longer true for IB, where we do a version
168
* negotiation during the connection setup phase (protocol
169
* version information is included in the RDMA CM private data).
170
*/
171
#define RDS_EXTHDR_VERSION 1
172
struct rds_ext_header_version {
173
__be32 h_version;
174
};
175
176
/*
177
* This extension header is included in the RDS message
178
* chasing an RDMA operation.
179
*/
180
#define RDS_EXTHDR_RDMA 2
181
struct rds_ext_header_rdma {
182
__be32 h_rdma_rkey;
183
};
184
185
/*
186
* This extension header tells the peer about the
187
* destination <R_Key,offset> of the requested RDMA
188
* operation.
189
*/
190
#define RDS_EXTHDR_RDMA_DEST 3
191
struct rds_ext_header_rdma_dest {
192
__be32 h_rdma_rkey;
193
__be32 h_rdma_offset;
194
};
195
196
#define __RDS_EXTHDR_MAX 16 /* for now */
197
198
struct rds_incoming {
199
atomic_t i_refcount;
200
struct list_head i_item;
201
struct rds_connection *i_conn;
202
struct rds_header i_hdr;
203
unsigned long i_rx_jiffies;
204
__be32 i_saddr;
205
206
rds_rdma_cookie_t i_rdma_cookie;
207
};
208
209
struct rds_mr {
210
struct rb_node r_rb_node;
211
atomic_t r_refcount;
212
u32 r_key;
213
214
/* A copy of the creation flags */
215
unsigned int r_use_once:1;
216
unsigned int r_invalidate:1;
217
unsigned int r_write:1;
218
219
/* This is for RDS_MR_DEAD.
220
* It would be nice & consistent to make this part of the above
221
* bit field here, but we need to use test_and_set_bit.
222
*/
223
unsigned long r_state;
224
struct rds_sock *r_sock; /* back pointer to the socket that owns us */
225
struct rds_transport *r_trans;
226
void *r_trans_private;
227
};
228
229
/* Flags for mr->r_state */
230
#define RDS_MR_DEAD 0
231
232
static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
233
{
234
return r_key | (((u64) offset) << 32);
235
}
236
237
static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
238
{
239
return cookie;
240
}
241
242
static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
243
{
244
return cookie >> 32;
245
}
246
247
/* atomic operation types */
248
#define RDS_ATOMIC_TYPE_CSWP 0
249
#define RDS_ATOMIC_TYPE_FADD 1
250
251
/*
252
* m_sock_item and m_conn_item are on lists that are serialized under
253
* conn->c_lock. m_sock_item has additional meaning in that once it is empty
254
* the message will not be put back on the retransmit list after being sent.
255
* messages that are canceled while being sent rely on this.
256
*
257
* m_inc is used by loopback so that it can pass an incoming message straight
258
* back up into the rx path. It embeds a wire header which is also used by
259
* the send path, which is kind of awkward.
260
*
261
* m_sock_item indicates the message's presence on a socket's send or receive
262
* queue. m_rs will point to that socket.
263
*
264
* m_daddr is used by cancellation to prune messages to a given destination.
265
*
266
* The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock
267
* nesting. As paths iterate over messages on a sock, or conn, they must
268
* also lock the conn, or sock, to remove the message from those lists too.
269
* Testing the flag to determine if the message is still on the lists lets
270
* us avoid testing the list_head directly. That means each path can use
271
* the message's list_head to keep it on a local list while juggling locks
272
* without confusing the other path.
273
*
274
* m_ack_seq is an optional field set by transports who need a different
275
* sequence number range to invalidate. They can use this in a callback
276
* that they pass to rds_send_drop_acked() to see if each message has been
277
* acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't
278
* had ack_seq set yet.
279
*/
280
#define RDS_MSG_ON_SOCK 1
281
#define RDS_MSG_ON_CONN 2
282
#define RDS_MSG_HAS_ACK_SEQ 3
283
#define RDS_MSG_ACK_REQUIRED 4
284
#define RDS_MSG_RETRANSMITTED 5
285
#define RDS_MSG_MAPPED 6
286
#define RDS_MSG_PAGEVEC 7
287
288
struct rds_message {
289
atomic_t m_refcount;
290
struct list_head m_sock_item;
291
struct list_head m_conn_item;
292
struct rds_incoming m_inc;
293
u64 m_ack_seq;
294
__be32 m_daddr;
295
unsigned long m_flags;
296
297
/* Never access m_rs without holding m_rs_lock.
298
* Lock nesting is
299
* rm->m_rs_lock
300
* -> rs->rs_lock
301
*/
302
spinlock_t m_rs_lock;
303
wait_queue_head_t m_flush_wait;
304
305
struct rds_sock *m_rs;
306
307
/* cookie to send to remote, in rds header */
308
rds_rdma_cookie_t m_rdma_cookie;
309
310
unsigned int m_used_sgs;
311
unsigned int m_total_sgs;
312
313
void *m_final_op;
314
315
struct {
316
struct rm_atomic_op {
317
int op_type;
318
union {
319
struct {
320
uint64_t compare;
321
uint64_t swap;
322
uint64_t compare_mask;
323
uint64_t swap_mask;
324
} op_m_cswp;
325
struct {
326
uint64_t add;
327
uint64_t nocarry_mask;
328
} op_m_fadd;
329
};
330
331
u32 op_rkey;
332
u64 op_remote_addr;
333
unsigned int op_notify:1;
334
unsigned int op_recverr:1;
335
unsigned int op_mapped:1;
336
unsigned int op_silent:1;
337
unsigned int op_active:1;
338
struct scatterlist *op_sg;
339
struct rds_notifier *op_notifier;
340
341
struct rds_mr *op_rdma_mr;
342
} atomic;
343
struct rm_rdma_op {
344
u32 op_rkey;
345
u64 op_remote_addr;
346
unsigned int op_write:1;
347
unsigned int op_fence:1;
348
unsigned int op_notify:1;
349
unsigned int op_recverr:1;
350
unsigned int op_mapped:1;
351
unsigned int op_silent:1;
352
unsigned int op_active:1;
353
unsigned int op_bytes;
354
unsigned int op_nents;
355
unsigned int op_count;
356
struct scatterlist *op_sg;
357
struct rds_notifier *op_notifier;
358
359
struct rds_mr *op_rdma_mr;
360
} rdma;
361
struct rm_data_op {
362
unsigned int op_active:1;
363
unsigned int op_nents;
364
unsigned int op_count;
365
struct scatterlist *op_sg;
366
} data;
367
};
368
};
369
370
/*
371
* The RDS notifier is used (optionally) to tell the application about
372
* completed RDMA operations. Rather than keeping the whole rds message
373
* around on the queue, we allocate a small notifier that is put on the
374
* socket's notifier_list. Notifications are delivered to the application
375
* through control messages.
376
*/
377
struct rds_notifier {
378
struct list_head n_list;
379
uint64_t n_user_token;
380
int n_status;
381
};
382
383
/**
384
* struct rds_transport - transport specific behavioural hooks
385
*
386
* @xmit: .xmit is called by rds_send_xmit() to tell the transport to send
387
* part of a message. The caller serializes on the send_sem so this
388
* doesn't need to be reentrant for a given conn. The header must be
389
* sent before the data payload. .xmit must be prepared to send a
390
* message with no data payload. .xmit should return the number of
391
* bytes that were sent down the connection, including header bytes.
392
* Returning 0 tells the caller that it doesn't need to perform any
393
* additional work now. This is usually the case when the transport has
394
* filled the sending queue for its connection and will handle
395
* triggering the rds thread to continue the send when space becomes
396
* available. Returning -EAGAIN tells the caller to retry the send
397
* immediately. Returning -ENOMEM tells the caller to retry the send at
398
* some point in the future.
399
*
400
* @conn_shutdown: conn_shutdown stops traffic on the given connection. Once
401
* it returns the connection can not call rds_recv_incoming().
402
* This will only be called once after conn_connect returns
403
* non-zero success and will The caller serializes this with
404
* the send and connecting paths (xmit_* and conn_*). The
405
* transport is responsible for other serialization, including
406
* rds_recv_incoming(). This is called in process context but
407
* should try hard not to block.
408
*/
409
410
#define RDS_TRANS_IB 0
411
#define RDS_TRANS_IWARP 1
412
#define RDS_TRANS_TCP 2
413
#define RDS_TRANS_COUNT 3
414
415
struct rds_transport {
416
char t_name[TRANSNAMSIZ];
417
struct list_head t_item;
418
struct module *t_owner;
419
unsigned int t_prefer_loopback:1;
420
unsigned int t_type;
421
422
int (*laddr_check)(__be32 addr);
423
int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
424
void (*conn_free)(void *data);
425
int (*conn_connect)(struct rds_connection *conn);
426
void (*conn_shutdown)(struct rds_connection *conn);
427
void (*xmit_prepare)(struct rds_connection *conn);
428
void (*xmit_complete)(struct rds_connection *conn);
429
int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
430
unsigned int hdr_off, unsigned int sg, unsigned int off);
431
int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op);
432
int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op);
433
int (*recv)(struct rds_connection *conn);
434
int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov,
435
size_t size);
436
void (*inc_free)(struct rds_incoming *inc);
437
438
int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
439
struct rdma_cm_event *event);
440
int (*cm_initiate_connect)(struct rdma_cm_id *cm_id);
441
void (*cm_connect_complete)(struct rds_connection *conn,
442
struct rdma_cm_event *event);
443
444
unsigned int (*stats_info_copy)(struct rds_info_iterator *iter,
445
unsigned int avail);
446
void (*exit)(void);
447
void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg,
448
struct rds_sock *rs, u32 *key_ret);
449
void (*sync_mr)(void *trans_private, int direction);
450
void (*free_mr)(void *trans_private, int invalidate);
451
void (*flush_mrs)(void);
452
};
453
454
struct rds_sock {
455
struct sock rs_sk;
456
457
u64 rs_user_addr;
458
u64 rs_user_bytes;
459
460
/*
461
* bound_addr used for both incoming and outgoing, no INADDR_ANY
462
* support.
463
*/
464
struct hlist_node rs_bound_node;
465
__be32 rs_bound_addr;
466
__be32 rs_conn_addr;
467
__be16 rs_bound_port;
468
__be16 rs_conn_port;
469
struct rds_transport *rs_transport;
470
471
/*
472
* rds_sendmsg caches the conn it used the last time around.
473
* This helps avoid costly lookups.
474
*/
475
struct rds_connection *rs_conn;
476
477
/* flag indicating we were congested or not */
478
int rs_congested;
479
/* seen congestion (ENOBUFS) when sending? */
480
int rs_seen_congestion;
481
482
/* rs_lock protects all these adjacent members before the newline */
483
spinlock_t rs_lock;
484
struct list_head rs_send_queue;
485
u32 rs_snd_bytes;
486
int rs_rcv_bytes;
487
struct list_head rs_notify_queue; /* currently used for failed RDMAs */
488
489
/* Congestion wake_up. If rs_cong_monitor is set, we use cong_mask
490
* to decide whether the application should be woken up.
491
* If not set, we use rs_cong_track to find out whether a cong map
492
* update arrived.
493
*/
494
uint64_t rs_cong_mask;
495
uint64_t rs_cong_notify;
496
struct list_head rs_cong_list;
497
unsigned long rs_cong_track;
498
499
/*
500
* rs_recv_lock protects the receive queue, and is
501
* used to serialize with rds_release.
502
*/
503
rwlock_t rs_recv_lock;
504
struct list_head rs_recv_queue;
505
506
/* just for stats reporting */
507
struct list_head rs_item;
508
509
/* these have their own lock */
510
spinlock_t rs_rdma_lock;
511
struct rb_root rs_rdma_keys;
512
513
/* Socket options - in case there will be more */
514
unsigned char rs_recverr,
515
rs_cong_monitor;
516
};
517
518
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
519
{
520
return container_of(sk, struct rds_sock, rs_sk);
521
}
522
static inline struct sock *rds_rs_to_sk(struct rds_sock *rs)
523
{
524
return &rs->rs_sk;
525
}
526
527
/*
528
* The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value
529
* to account for overhead. We don't account for overhead, we just apply
530
* the number of payload bytes to the specified value.
531
*/
532
static inline int rds_sk_sndbuf(struct rds_sock *rs)
533
{
534
return rds_rs_to_sk(rs)->sk_sndbuf / 2;
535
}
536
static inline int rds_sk_rcvbuf(struct rds_sock *rs)
537
{
538
return rds_rs_to_sk(rs)->sk_rcvbuf / 2;
539
}
540
541
struct rds_statistics {
542
uint64_t s_conn_reset;
543
uint64_t s_recv_drop_bad_checksum;
544
uint64_t s_recv_drop_old_seq;
545
uint64_t s_recv_drop_no_sock;
546
uint64_t s_recv_drop_dead_sock;
547
uint64_t s_recv_deliver_raced;
548
uint64_t s_recv_delivered;
549
uint64_t s_recv_queued;
550
uint64_t s_recv_immediate_retry;
551
uint64_t s_recv_delayed_retry;
552
uint64_t s_recv_ack_required;
553
uint64_t s_recv_rdma_bytes;
554
uint64_t s_recv_ping;
555
uint64_t s_send_queue_empty;
556
uint64_t s_send_queue_full;
557
uint64_t s_send_lock_contention;
558
uint64_t s_send_lock_queue_raced;
559
uint64_t s_send_immediate_retry;
560
uint64_t s_send_delayed_retry;
561
uint64_t s_send_drop_acked;
562
uint64_t s_send_ack_required;
563
uint64_t s_send_queued;
564
uint64_t s_send_rdma;
565
uint64_t s_send_rdma_bytes;
566
uint64_t s_send_pong;
567
uint64_t s_page_remainder_hit;
568
uint64_t s_page_remainder_miss;
569
uint64_t s_copy_to_user;
570
uint64_t s_copy_from_user;
571
uint64_t s_cong_update_queued;
572
uint64_t s_cong_update_received;
573
uint64_t s_cong_send_error;
574
uint64_t s_cong_send_blocked;
575
};
576
577
/* af_rds.c */
578
char *rds_str_array(char **array, size_t elements, size_t index);
579
void rds_sock_addref(struct rds_sock *rs);
580
void rds_sock_put(struct rds_sock *rs);
581
void rds_wake_sk_sleep(struct rds_sock *rs);
582
static inline void __rds_wake_sk_sleep(struct sock *sk)
583
{
584
wait_queue_head_t *waitq = sk_sleep(sk);
585
586
if (!sock_flag(sk, SOCK_DEAD) && waitq)
587
wake_up(waitq);
588
}
589
extern wait_queue_head_t rds_poll_waitq;
590
591
592
/* bind.c */
593
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
594
void rds_remove_bound(struct rds_sock *rs);
595
struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
596
597
/* cong.c */
598
int rds_cong_get_maps(struct rds_connection *conn);
599
void rds_cong_add_conn(struct rds_connection *conn);
600
void rds_cong_remove_conn(struct rds_connection *conn);
601
void rds_cong_set_bit(struct rds_cong_map *map, __be16 port);
602
void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port);
603
int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock, struct rds_sock *rs);
604
void rds_cong_queue_updates(struct rds_cong_map *map);
605
void rds_cong_map_updated(struct rds_cong_map *map, uint64_t);
606
int rds_cong_updated_since(unsigned long *recent);
607
void rds_cong_add_socket(struct rds_sock *);
608
void rds_cong_remove_socket(struct rds_sock *);
609
void rds_cong_exit(void);
610
struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
611
612
/* conn.c */
613
int rds_conn_init(void);
614
void rds_conn_exit(void);
615
struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
616
struct rds_transport *trans, gfp_t gfp);
617
struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
618
struct rds_transport *trans, gfp_t gfp);
619
void rds_conn_shutdown(struct rds_connection *conn);
620
void rds_conn_destroy(struct rds_connection *conn);
621
void rds_conn_drop(struct rds_connection *conn);
622
void rds_conn_connect_if_down(struct rds_connection *conn);
623
void rds_for_each_conn_info(struct socket *sock, unsigned int len,
624
struct rds_info_iterator *iter,
625
struct rds_info_lengths *lens,
626
int (*visitor)(struct rds_connection *, void *),
627
size_t item_len);
628
void __rds_conn_error(struct rds_connection *conn, const char *, ...)
629
__attribute__ ((format (printf, 2, 3)));
630
#define rds_conn_error(conn, fmt...) \
631
__rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
632
633
static inline int
634
rds_conn_transition(struct rds_connection *conn, int old, int new)
635
{
636
return atomic_cmpxchg(&conn->c_state, old, new) == old;
637
}
638
639
static inline int
640
rds_conn_state(struct rds_connection *conn)
641
{
642
return atomic_read(&conn->c_state);
643
}
644
645
static inline int
646
rds_conn_up(struct rds_connection *conn)
647
{
648
return atomic_read(&conn->c_state) == RDS_CONN_UP;
649
}
650
651
static inline int
652
rds_conn_connecting(struct rds_connection *conn)
653
{
654
return atomic_read(&conn->c_state) == RDS_CONN_CONNECTING;
655
}
656
657
/* message.c */
658
struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
659
struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
660
int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov,
661
size_t total_len);
662
struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
663
void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
664
__be16 dport, u64 seq);
665
int rds_message_add_extension(struct rds_header *hdr,
666
unsigned int type, const void *data, unsigned int len);
667
int rds_message_next_extension(struct rds_header *hdr,
668
unsigned int *pos, void *buf, unsigned int *buflen);
669
int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
670
int rds_message_inc_copy_to_user(struct rds_incoming *inc,
671
struct iovec *first_iov, size_t size);
672
void rds_message_inc_free(struct rds_incoming *inc);
673
void rds_message_addref(struct rds_message *rm);
674
void rds_message_put(struct rds_message *rm);
675
void rds_message_wait(struct rds_message *rm);
676
void rds_message_unmapped(struct rds_message *rm);
677
678
static inline void rds_message_make_checksum(struct rds_header *hdr)
679
{
680
hdr->h_csum = 0;
681
hdr->h_csum = ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2);
682
}
683
684
static inline int rds_message_verify_checksum(const struct rds_header *hdr)
685
{
686
return !hdr->h_csum || ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2) == 0;
687
}
688
689
690
/* page.c */
691
int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
692
gfp_t gfp);
693
int rds_page_copy_user(struct page *page, unsigned long offset,
694
void __user *ptr, unsigned long bytes,
695
int to_user);
696
#define rds_page_copy_to_user(page, offset, ptr, bytes) \
697
rds_page_copy_user(page, offset, ptr, bytes, 1)
698
#define rds_page_copy_from_user(page, offset, ptr, bytes) \
699
rds_page_copy_user(page, offset, ptr, bytes, 0)
700
void rds_page_exit(void);
701
702
/* recv.c */
703
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
704
__be32 saddr);
705
void rds_inc_put(struct rds_incoming *inc);
706
void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
707
struct rds_incoming *inc, gfp_t gfp, enum km_type km);
708
int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
709
size_t size, int msg_flags);
710
void rds_clear_recv_queue(struct rds_sock *rs);
711
int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg);
712
void rds_inc_info_copy(struct rds_incoming *inc,
713
struct rds_info_iterator *iter,
714
__be32 saddr, __be32 daddr, int flip);
715
716
/* send.c */
717
int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
718
size_t payload_len);
719
void rds_send_reset(struct rds_connection *conn);
720
int rds_send_xmit(struct rds_connection *conn);
721
struct sockaddr_in;
722
void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
723
typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
724
void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
725
is_acked_func is_acked);
726
int rds_send_pong(struct rds_connection *conn, __be16 dport);
727
struct rds_message *rds_send_get_message(struct rds_connection *,
728
struct rm_rdma_op *);
729
730
/* rdma.c */
731
void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
732
int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
733
int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
734
int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
735
void rds_rdma_drop_keys(struct rds_sock *rs);
736
int rds_rdma_extra_size(struct rds_rdma_args *args);
737
int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
738
struct cmsghdr *cmsg);
739
int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
740
struct cmsghdr *cmsg);
741
int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
742
struct cmsghdr *cmsg);
743
int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
744
struct cmsghdr *cmsg);
745
void rds_rdma_free_op(struct rm_rdma_op *ro);
746
void rds_atomic_free_op(struct rm_atomic_op *ao);
747
void rds_rdma_send_complete(struct rds_message *rm, int wc_status);
748
void rds_atomic_send_complete(struct rds_message *rm, int wc_status);
749
int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
750
struct cmsghdr *cmsg);
751
752
extern void __rds_put_mr_final(struct rds_mr *mr);
753
static inline void rds_mr_put(struct rds_mr *mr)
754
{
755
if (atomic_dec_and_test(&mr->r_refcount))
756
__rds_put_mr_final(mr);
757
}
758
759
/* stats.c */
760
DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
761
#define rds_stats_inc_which(which, member) do { \
762
per_cpu(which, get_cpu()).member++; \
763
put_cpu(); \
764
} while (0)
765
#define rds_stats_inc(member) rds_stats_inc_which(rds_stats, member)
766
#define rds_stats_add_which(which, member, count) do { \
767
per_cpu(which, get_cpu()).member += count; \
768
put_cpu(); \
769
} while (0)
770
#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count)
771
int rds_stats_init(void);
772
void rds_stats_exit(void);
773
void rds_stats_info_copy(struct rds_info_iterator *iter,
774
uint64_t *values, const char *const *names,
775
size_t nr);
776
777
/* sysctl.c */
778
int rds_sysctl_init(void);
779
void rds_sysctl_exit(void);
780
extern unsigned long rds_sysctl_sndbuf_min;
781
extern unsigned long rds_sysctl_sndbuf_default;
782
extern unsigned long rds_sysctl_sndbuf_max;
783
extern unsigned long rds_sysctl_reconnect_min_jiffies;
784
extern unsigned long rds_sysctl_reconnect_max_jiffies;
785
extern unsigned int rds_sysctl_max_unacked_packets;
786
extern unsigned int rds_sysctl_max_unacked_bytes;
787
extern unsigned int rds_sysctl_ping_enable;
788
extern unsigned long rds_sysctl_trace_flags;
789
extern unsigned int rds_sysctl_trace_level;
790
791
/* threads.c */
792
int rds_threads_init(void);
793
void rds_threads_exit(void);
794
extern struct workqueue_struct *rds_wq;
795
void rds_queue_reconnect(struct rds_connection *conn);
796
void rds_connect_worker(struct work_struct *);
797
void rds_shutdown_worker(struct work_struct *);
798
void rds_send_worker(struct work_struct *);
799
void rds_recv_worker(struct work_struct *);
800
void rds_connect_complete(struct rds_connection *conn);
801
802
/* transport.c */
803
int rds_trans_register(struct rds_transport *trans);
804
void rds_trans_unregister(struct rds_transport *trans);
805
struct rds_transport *rds_trans_get_preferred(__be32 addr);
806
void rds_trans_put(struct rds_transport *trans);
807
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
808
unsigned int avail);
809
int rds_trans_init(void);
810
void rds_trans_exit(void);
811
812
#endif
813
814