Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/dccp/proto.c
15109 views
1
/*
2
* net/dccp/proto.c
3
*
4
* An implementation of the DCCP protocol
5
* Arnaldo Carvalho de Melo <[email protected]>
6
*
7
* This program is free software; you can redistribute it and/or modify it
8
* under the terms of the GNU General Public License version 2 as
9
* published by the Free Software Foundation.
10
*/
11
12
#include <linux/dccp.h>
13
#include <linux/module.h>
14
#include <linux/types.h>
15
#include <linux/sched.h>
16
#include <linux/kernel.h>
17
#include <linux/skbuff.h>
18
#include <linux/netdevice.h>
19
#include <linux/in.h>
20
#include <linux/if_arp.h>
21
#include <linux/init.h>
22
#include <linux/random.h>
23
#include <linux/slab.h>
24
#include <net/checksum.h>
25
26
#include <net/inet_sock.h>
27
#include <net/sock.h>
28
#include <net/xfrm.h>
29
30
#include <asm/ioctls.h>
31
#include <linux/spinlock.h>
32
#include <linux/timer.h>
33
#include <linux/delay.h>
34
#include <linux/poll.h>
35
36
#include "ccid.h"
37
#include "dccp.h"
38
#include "feat.h"
39
40
DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42
EXPORT_SYMBOL_GPL(dccp_statistics);
43
44
struct percpu_counter dccp_orphan_count;
45
EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47
struct inet_hashinfo dccp_hashinfo;
48
EXPORT_SYMBOL_GPL(dccp_hashinfo);
49
50
/* the maximum queue length for tx in packets. 0 is no limit */
51
int sysctl_dccp_tx_qlen __read_mostly = 5;
52
53
#ifdef CONFIG_IP_DCCP_DEBUG
54
static const char *dccp_state_name(const int state)
55
{
56
static const char *const dccp_state_names[] = {
57
[DCCP_OPEN] = "OPEN",
58
[DCCP_REQUESTING] = "REQUESTING",
59
[DCCP_PARTOPEN] = "PARTOPEN",
60
[DCCP_LISTEN] = "LISTEN",
61
[DCCP_RESPOND] = "RESPOND",
62
[DCCP_CLOSING] = "CLOSING",
63
[DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
64
[DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
65
[DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
66
[DCCP_TIME_WAIT] = "TIME_WAIT",
67
[DCCP_CLOSED] = "CLOSED",
68
};
69
70
if (state >= DCCP_MAX_STATES)
71
return "INVALID STATE!";
72
else
73
return dccp_state_names[state];
74
}
75
#endif
76
77
void dccp_set_state(struct sock *sk, const int state)
78
{
79
const int oldstate = sk->sk_state;
80
81
dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
82
dccp_state_name(oldstate), dccp_state_name(state));
83
WARN_ON(state == oldstate);
84
85
switch (state) {
86
case DCCP_OPEN:
87
if (oldstate != DCCP_OPEN)
88
DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
89
/* Client retransmits all Confirm options until entering OPEN */
90
if (oldstate == DCCP_PARTOPEN)
91
dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
92
break;
93
94
case DCCP_CLOSED:
95
if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
96
oldstate == DCCP_CLOSING)
97
DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
98
99
sk->sk_prot->unhash(sk);
100
if (inet_csk(sk)->icsk_bind_hash != NULL &&
101
!(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
102
inet_put_port(sk);
103
/* fall through */
104
default:
105
if (oldstate == DCCP_OPEN)
106
DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
107
}
108
109
/* Change state AFTER socket is unhashed to avoid closed
110
* socket sitting in hash tables.
111
*/
112
sk->sk_state = state;
113
}
114
115
EXPORT_SYMBOL_GPL(dccp_set_state);
116
117
static void dccp_finish_passive_close(struct sock *sk)
118
{
119
switch (sk->sk_state) {
120
case DCCP_PASSIVE_CLOSE:
121
/* Node (client or server) has received Close packet. */
122
dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
123
dccp_set_state(sk, DCCP_CLOSED);
124
break;
125
case DCCP_PASSIVE_CLOSEREQ:
126
/*
127
* Client received CloseReq. We set the `active' flag so that
128
* dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
129
*/
130
dccp_send_close(sk, 1);
131
dccp_set_state(sk, DCCP_CLOSING);
132
}
133
}
134
135
void dccp_done(struct sock *sk)
136
{
137
dccp_set_state(sk, DCCP_CLOSED);
138
dccp_clear_xmit_timers(sk);
139
140
sk->sk_shutdown = SHUTDOWN_MASK;
141
142
if (!sock_flag(sk, SOCK_DEAD))
143
sk->sk_state_change(sk);
144
else
145
inet_csk_destroy_sock(sk);
146
}
147
148
EXPORT_SYMBOL_GPL(dccp_done);
149
150
const char *dccp_packet_name(const int type)
151
{
152
static const char *const dccp_packet_names[] = {
153
[DCCP_PKT_REQUEST] = "REQUEST",
154
[DCCP_PKT_RESPONSE] = "RESPONSE",
155
[DCCP_PKT_DATA] = "DATA",
156
[DCCP_PKT_ACK] = "ACK",
157
[DCCP_PKT_DATAACK] = "DATAACK",
158
[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
159
[DCCP_PKT_CLOSE] = "CLOSE",
160
[DCCP_PKT_RESET] = "RESET",
161
[DCCP_PKT_SYNC] = "SYNC",
162
[DCCP_PKT_SYNCACK] = "SYNCACK",
163
};
164
165
if (type >= DCCP_NR_PKT_TYPES)
166
return "INVALID";
167
else
168
return dccp_packet_names[type];
169
}
170
171
EXPORT_SYMBOL_GPL(dccp_packet_name);
172
173
int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174
{
175
struct dccp_sock *dp = dccp_sk(sk);
176
struct inet_connection_sock *icsk = inet_csk(sk);
177
178
icsk->icsk_rto = DCCP_TIMEOUT_INIT;
179
icsk->icsk_syn_retries = sysctl_dccp_request_retries;
180
sk->sk_state = DCCP_CLOSED;
181
sk->sk_write_space = dccp_write_space;
182
icsk->icsk_sync_mss = dccp_sync_mss;
183
dp->dccps_mss_cache = 536;
184
dp->dccps_rate_last = jiffies;
185
dp->dccps_role = DCCP_ROLE_UNDEFINED;
186
dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
187
dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
188
dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
189
190
dccp_init_xmit_timers(sk);
191
192
INIT_LIST_HEAD(&dp->dccps_featneg);
193
/* control socket doesn't need feat nego */
194
if (likely(ctl_sock_initialized))
195
return dccp_feat_init(sk);
196
return 0;
197
}
198
199
EXPORT_SYMBOL_GPL(dccp_init_sock);
200
201
void dccp_destroy_sock(struct sock *sk)
202
{
203
struct dccp_sock *dp = dccp_sk(sk);
204
205
/*
206
* DCCP doesn't use sk_write_queue, just sk_send_head
207
* for retransmissions
208
*/
209
if (sk->sk_send_head != NULL) {
210
kfree_skb(sk->sk_send_head);
211
sk->sk_send_head = NULL;
212
}
213
214
/* Clean up a referenced DCCP bind bucket. */
215
if (inet_csk(sk)->icsk_bind_hash != NULL)
216
inet_put_port(sk);
217
218
kfree(dp->dccps_service_list);
219
dp->dccps_service_list = NULL;
220
221
if (dp->dccps_hc_rx_ackvec != NULL) {
222
dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
223
dp->dccps_hc_rx_ackvec = NULL;
224
}
225
ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
226
ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
227
dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
228
229
/* clean up feature negotiation state */
230
dccp_feat_list_purge(&dp->dccps_featneg);
231
}
232
233
EXPORT_SYMBOL_GPL(dccp_destroy_sock);
234
235
static inline int dccp_listen_start(struct sock *sk, int backlog)
236
{
237
struct dccp_sock *dp = dccp_sk(sk);
238
239
dp->dccps_role = DCCP_ROLE_LISTEN;
240
/* do not start to listen if feature negotiation setup fails */
241
if (dccp_feat_finalise_settings(dp))
242
return -EPROTO;
243
return inet_csk_listen_start(sk, backlog);
244
}
245
246
static inline int dccp_need_reset(int state)
247
{
248
return state != DCCP_CLOSED && state != DCCP_LISTEN &&
249
state != DCCP_REQUESTING;
250
}
251
252
int dccp_disconnect(struct sock *sk, int flags)
253
{
254
struct inet_connection_sock *icsk = inet_csk(sk);
255
struct inet_sock *inet = inet_sk(sk);
256
int err = 0;
257
const int old_state = sk->sk_state;
258
259
if (old_state != DCCP_CLOSED)
260
dccp_set_state(sk, DCCP_CLOSED);
261
262
/*
263
* This corresponds to the ABORT function of RFC793, sec. 3.8
264
* TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
265
*/
266
if (old_state == DCCP_LISTEN) {
267
inet_csk_listen_stop(sk);
268
} else if (dccp_need_reset(old_state)) {
269
dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
270
sk->sk_err = ECONNRESET;
271
} else if (old_state == DCCP_REQUESTING)
272
sk->sk_err = ECONNRESET;
273
274
dccp_clear_xmit_timers(sk);
275
276
__skb_queue_purge(&sk->sk_receive_queue);
277
__skb_queue_purge(&sk->sk_write_queue);
278
if (sk->sk_send_head != NULL) {
279
__kfree_skb(sk->sk_send_head);
280
sk->sk_send_head = NULL;
281
}
282
283
inet->inet_dport = 0;
284
285
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
286
inet_reset_saddr(sk);
287
288
sk->sk_shutdown = 0;
289
sock_reset_flag(sk, SOCK_DONE);
290
291
icsk->icsk_backoff = 0;
292
inet_csk_delack_init(sk);
293
__sk_dst_reset(sk);
294
295
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
296
297
sk->sk_error_report(sk);
298
return err;
299
}
300
301
EXPORT_SYMBOL_GPL(dccp_disconnect);
302
303
/*
304
* Wait for a DCCP event.
305
*
306
* Note that we don't need to lock the socket, as the upper poll layers
307
* take care of normal races (between the test and the event) and we don't
308
* go look at any of the socket buffers directly.
309
*/
310
unsigned int dccp_poll(struct file *file, struct socket *sock,
311
poll_table *wait)
312
{
313
unsigned int mask;
314
struct sock *sk = sock->sk;
315
316
sock_poll_wait(file, sk_sleep(sk), wait);
317
if (sk->sk_state == DCCP_LISTEN)
318
return inet_csk_listen_poll(sk);
319
320
/* Socket is not locked. We are protected from async events
321
by poll logic and correct handling of state changes
322
made by another threads is impossible in any case.
323
*/
324
325
mask = 0;
326
if (sk->sk_err)
327
mask = POLLERR;
328
329
if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
330
mask |= POLLHUP;
331
if (sk->sk_shutdown & RCV_SHUTDOWN)
332
mask |= POLLIN | POLLRDNORM | POLLRDHUP;
333
334
/* Connected? */
335
if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
336
if (atomic_read(&sk->sk_rmem_alloc) > 0)
337
mask |= POLLIN | POLLRDNORM;
338
339
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
340
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
341
mask |= POLLOUT | POLLWRNORM;
342
} else { /* send SIGIO later */
343
set_bit(SOCK_ASYNC_NOSPACE,
344
&sk->sk_socket->flags);
345
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
346
347
/* Race breaker. If space is freed after
348
* wspace test but before the flags are set,
349
* IO signal will be lost.
350
*/
351
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
352
mask |= POLLOUT | POLLWRNORM;
353
}
354
}
355
}
356
return mask;
357
}
358
359
EXPORT_SYMBOL_GPL(dccp_poll);
360
361
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
362
{
363
int rc = -ENOTCONN;
364
365
lock_sock(sk);
366
367
if (sk->sk_state == DCCP_LISTEN)
368
goto out;
369
370
switch (cmd) {
371
case SIOCINQ: {
372
struct sk_buff *skb;
373
unsigned long amount = 0;
374
375
skb = skb_peek(&sk->sk_receive_queue);
376
if (skb != NULL) {
377
/*
378
* We will only return the amount of this packet since
379
* that is all that will be read.
380
*/
381
amount = skb->len;
382
}
383
rc = put_user(amount, (int __user *)arg);
384
}
385
break;
386
default:
387
rc = -ENOIOCTLCMD;
388
break;
389
}
390
out:
391
release_sock(sk);
392
return rc;
393
}
394
395
EXPORT_SYMBOL_GPL(dccp_ioctl);
396
397
static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
398
char __user *optval, unsigned int optlen)
399
{
400
struct dccp_sock *dp = dccp_sk(sk);
401
struct dccp_service_list *sl = NULL;
402
403
if (service == DCCP_SERVICE_INVALID_VALUE ||
404
optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
405
return -EINVAL;
406
407
if (optlen > sizeof(service)) {
408
sl = kmalloc(optlen, GFP_KERNEL);
409
if (sl == NULL)
410
return -ENOMEM;
411
412
sl->dccpsl_nr = optlen / sizeof(u32) - 1;
413
if (copy_from_user(sl->dccpsl_list,
414
optval + sizeof(service),
415
optlen - sizeof(service)) ||
416
dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
417
kfree(sl);
418
return -EFAULT;
419
}
420
}
421
422
lock_sock(sk);
423
dp->dccps_service = service;
424
425
kfree(dp->dccps_service_list);
426
427
dp->dccps_service_list = sl;
428
release_sock(sk);
429
return 0;
430
}
431
432
static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
433
{
434
u8 *list, len;
435
int i, rc;
436
437
if (cscov < 0 || cscov > 15)
438
return -EINVAL;
439
/*
440
* Populate a list of permissible values, in the range cscov...15. This
441
* is necessary since feature negotiation of single values only works if
442
* both sides incidentally choose the same value. Since the list starts
443
* lowest-value first, negotiation will pick the smallest shared value.
444
*/
445
if (cscov == 0)
446
return 0;
447
len = 16 - cscov;
448
449
list = kmalloc(len, GFP_KERNEL);
450
if (list == NULL)
451
return -ENOBUFS;
452
453
for (i = 0; i < len; i++)
454
list[i] = cscov++;
455
456
rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
457
458
if (rc == 0) {
459
if (rx)
460
dccp_sk(sk)->dccps_pcrlen = cscov;
461
else
462
dccp_sk(sk)->dccps_pcslen = cscov;
463
}
464
kfree(list);
465
return rc;
466
}
467
468
static int dccp_setsockopt_ccid(struct sock *sk, int type,
469
char __user *optval, unsigned int optlen)
470
{
471
u8 *val;
472
int rc = 0;
473
474
if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
475
return -EINVAL;
476
477
val = memdup_user(optval, optlen);
478
if (IS_ERR(val))
479
return PTR_ERR(val);
480
481
lock_sock(sk);
482
if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
483
rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
484
485
if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
486
rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
487
release_sock(sk);
488
489
kfree(val);
490
return rc;
491
}
492
493
static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
494
char __user *optval, unsigned int optlen)
495
{
496
struct dccp_sock *dp = dccp_sk(sk);
497
int val, err = 0;
498
499
switch (optname) {
500
case DCCP_SOCKOPT_PACKET_SIZE:
501
DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
502
return 0;
503
case DCCP_SOCKOPT_CHANGE_L:
504
case DCCP_SOCKOPT_CHANGE_R:
505
DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
506
return 0;
507
case DCCP_SOCKOPT_CCID:
508
case DCCP_SOCKOPT_RX_CCID:
509
case DCCP_SOCKOPT_TX_CCID:
510
return dccp_setsockopt_ccid(sk, optname, optval, optlen);
511
}
512
513
if (optlen < (int)sizeof(int))
514
return -EINVAL;
515
516
if (get_user(val, (int __user *)optval))
517
return -EFAULT;
518
519
if (optname == DCCP_SOCKOPT_SERVICE)
520
return dccp_setsockopt_service(sk, val, optval, optlen);
521
522
lock_sock(sk);
523
switch (optname) {
524
case DCCP_SOCKOPT_SERVER_TIMEWAIT:
525
if (dp->dccps_role != DCCP_ROLE_SERVER)
526
err = -EOPNOTSUPP;
527
else
528
dp->dccps_server_timewait = (val != 0);
529
break;
530
case DCCP_SOCKOPT_SEND_CSCOV:
531
err = dccp_setsockopt_cscov(sk, val, false);
532
break;
533
case DCCP_SOCKOPT_RECV_CSCOV:
534
err = dccp_setsockopt_cscov(sk, val, true);
535
break;
536
case DCCP_SOCKOPT_QPOLICY_ID:
537
if (sk->sk_state != DCCP_CLOSED)
538
err = -EISCONN;
539
else if (val < 0 || val >= DCCPQ_POLICY_MAX)
540
err = -EINVAL;
541
else
542
dp->dccps_qpolicy = val;
543
break;
544
case DCCP_SOCKOPT_QPOLICY_TXQLEN:
545
if (val < 0)
546
err = -EINVAL;
547
else
548
dp->dccps_tx_qlen = val;
549
break;
550
default:
551
err = -ENOPROTOOPT;
552
break;
553
}
554
release_sock(sk);
555
556
return err;
557
}
558
559
int dccp_setsockopt(struct sock *sk, int level, int optname,
560
char __user *optval, unsigned int optlen)
561
{
562
if (level != SOL_DCCP)
563
return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
564
optname, optval,
565
optlen);
566
return do_dccp_setsockopt(sk, level, optname, optval, optlen);
567
}
568
569
EXPORT_SYMBOL_GPL(dccp_setsockopt);
570
571
#ifdef CONFIG_COMPAT
572
int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
573
char __user *optval, unsigned int optlen)
574
{
575
if (level != SOL_DCCP)
576
return inet_csk_compat_setsockopt(sk, level, optname,
577
optval, optlen);
578
return do_dccp_setsockopt(sk, level, optname, optval, optlen);
579
}
580
581
EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
582
#endif
583
584
static int dccp_getsockopt_service(struct sock *sk, int len,
585
__be32 __user *optval,
586
int __user *optlen)
587
{
588
const struct dccp_sock *dp = dccp_sk(sk);
589
const struct dccp_service_list *sl;
590
int err = -ENOENT, slen = 0, total_len = sizeof(u32);
591
592
lock_sock(sk);
593
if ((sl = dp->dccps_service_list) != NULL) {
594
slen = sl->dccpsl_nr * sizeof(u32);
595
total_len += slen;
596
}
597
598
err = -EINVAL;
599
if (total_len > len)
600
goto out;
601
602
err = 0;
603
if (put_user(total_len, optlen) ||
604
put_user(dp->dccps_service, optval) ||
605
(sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
606
err = -EFAULT;
607
out:
608
release_sock(sk);
609
return err;
610
}
611
612
static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
613
char __user *optval, int __user *optlen)
614
{
615
struct dccp_sock *dp;
616
int val, len;
617
618
if (get_user(len, optlen))
619
return -EFAULT;
620
621
if (len < (int)sizeof(int))
622
return -EINVAL;
623
624
dp = dccp_sk(sk);
625
626
switch (optname) {
627
case DCCP_SOCKOPT_PACKET_SIZE:
628
DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
629
return 0;
630
case DCCP_SOCKOPT_SERVICE:
631
return dccp_getsockopt_service(sk, len,
632
(__be32 __user *)optval, optlen);
633
case DCCP_SOCKOPT_GET_CUR_MPS:
634
val = dp->dccps_mss_cache;
635
break;
636
case DCCP_SOCKOPT_AVAILABLE_CCIDS:
637
return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
638
case DCCP_SOCKOPT_TX_CCID:
639
val = ccid_get_current_tx_ccid(dp);
640
if (val < 0)
641
return -ENOPROTOOPT;
642
break;
643
case DCCP_SOCKOPT_RX_CCID:
644
val = ccid_get_current_rx_ccid(dp);
645
if (val < 0)
646
return -ENOPROTOOPT;
647
break;
648
case DCCP_SOCKOPT_SERVER_TIMEWAIT:
649
val = dp->dccps_server_timewait;
650
break;
651
case DCCP_SOCKOPT_SEND_CSCOV:
652
val = dp->dccps_pcslen;
653
break;
654
case DCCP_SOCKOPT_RECV_CSCOV:
655
val = dp->dccps_pcrlen;
656
break;
657
case DCCP_SOCKOPT_QPOLICY_ID:
658
val = dp->dccps_qpolicy;
659
break;
660
case DCCP_SOCKOPT_QPOLICY_TXQLEN:
661
val = dp->dccps_tx_qlen;
662
break;
663
case 128 ... 191:
664
return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
665
len, (u32 __user *)optval, optlen);
666
case 192 ... 255:
667
return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
668
len, (u32 __user *)optval, optlen);
669
default:
670
return -ENOPROTOOPT;
671
}
672
673
len = sizeof(val);
674
if (put_user(len, optlen) || copy_to_user(optval, &val, len))
675
return -EFAULT;
676
677
return 0;
678
}
679
680
int dccp_getsockopt(struct sock *sk, int level, int optname,
681
char __user *optval, int __user *optlen)
682
{
683
if (level != SOL_DCCP)
684
return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
685
optname, optval,
686
optlen);
687
return do_dccp_getsockopt(sk, level, optname, optval, optlen);
688
}
689
690
EXPORT_SYMBOL_GPL(dccp_getsockopt);
691
692
#ifdef CONFIG_COMPAT
693
int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
694
char __user *optval, int __user *optlen)
695
{
696
if (level != SOL_DCCP)
697
return inet_csk_compat_getsockopt(sk, level, optname,
698
optval, optlen);
699
return do_dccp_getsockopt(sk, level, optname, optval, optlen);
700
}
701
702
EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
703
#endif
704
705
static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
706
{
707
struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
708
709
/*
710
* Assign an (opaque) qpolicy priority value to skb->priority.
711
*
712
* We are overloading this skb field for use with the qpolicy subystem.
713
* The skb->priority is normally used for the SO_PRIORITY option, which
714
* is initialised from sk_priority. Since the assignment of sk_priority
715
* to skb->priority happens later (on layer 3), we overload this field
716
* for use with queueing priorities as long as the skb is on layer 4.
717
* The default priority value (if nothing is set) is 0.
718
*/
719
skb->priority = 0;
720
721
for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
722
723
if (!CMSG_OK(msg, cmsg))
724
return -EINVAL;
725
726
if (cmsg->cmsg_level != SOL_DCCP)
727
continue;
728
729
if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
730
!dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
731
return -EINVAL;
732
733
switch (cmsg->cmsg_type) {
734
case DCCP_SCM_PRIORITY:
735
if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
736
return -EINVAL;
737
skb->priority = *(__u32 *)CMSG_DATA(cmsg);
738
break;
739
default:
740
return -EINVAL;
741
}
742
}
743
return 0;
744
}
745
746
int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
747
size_t len)
748
{
749
const struct dccp_sock *dp = dccp_sk(sk);
750
const int flags = msg->msg_flags;
751
const int noblock = flags & MSG_DONTWAIT;
752
struct sk_buff *skb;
753
int rc, size;
754
long timeo;
755
756
if (len > dp->dccps_mss_cache)
757
return -EMSGSIZE;
758
759
lock_sock(sk);
760
761
if (dccp_qpolicy_full(sk)) {
762
rc = -EAGAIN;
763
goto out_release;
764
}
765
766
timeo = sock_sndtimeo(sk, noblock);
767
768
/*
769
* We have to use sk_stream_wait_connect here to set sk_write_pending,
770
* so that the trick in dccp_rcv_request_sent_state_process.
771
*/
772
/* Wait for a connection to finish. */
773
if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
774
if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
775
goto out_release;
776
777
size = sk->sk_prot->max_header + len;
778
release_sock(sk);
779
skb = sock_alloc_send_skb(sk, size, noblock, &rc);
780
lock_sock(sk);
781
if (skb == NULL)
782
goto out_release;
783
784
skb_reserve(skb, sk->sk_prot->max_header);
785
rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
786
if (rc != 0)
787
goto out_discard;
788
789
rc = dccp_msghdr_parse(msg, skb);
790
if (rc != 0)
791
goto out_discard;
792
793
dccp_qpolicy_push(sk, skb);
794
/*
795
* The xmit_timer is set if the TX CCID is rate-based and will expire
796
* when congestion control permits to release further packets into the
797
* network. Window-based CCIDs do not use this timer.
798
*/
799
if (!timer_pending(&dp->dccps_xmit_timer))
800
dccp_write_xmit(sk);
801
out_release:
802
release_sock(sk);
803
return rc ? : len;
804
out_discard:
805
kfree_skb(skb);
806
goto out_release;
807
}
808
809
EXPORT_SYMBOL_GPL(dccp_sendmsg);
810
811
int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
812
size_t len, int nonblock, int flags, int *addr_len)
813
{
814
const struct dccp_hdr *dh;
815
long timeo;
816
817
lock_sock(sk);
818
819
if (sk->sk_state == DCCP_LISTEN) {
820
len = -ENOTCONN;
821
goto out;
822
}
823
824
timeo = sock_rcvtimeo(sk, nonblock);
825
826
do {
827
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
828
829
if (skb == NULL)
830
goto verify_sock_status;
831
832
dh = dccp_hdr(skb);
833
834
switch (dh->dccph_type) {
835
case DCCP_PKT_DATA:
836
case DCCP_PKT_DATAACK:
837
goto found_ok_skb;
838
839
case DCCP_PKT_CLOSE:
840
case DCCP_PKT_CLOSEREQ:
841
if (!(flags & MSG_PEEK))
842
dccp_finish_passive_close(sk);
843
/* fall through */
844
case DCCP_PKT_RESET:
845
dccp_pr_debug("found fin (%s) ok!\n",
846
dccp_packet_name(dh->dccph_type));
847
len = 0;
848
goto found_fin_ok;
849
default:
850
dccp_pr_debug("packet_type=%s\n",
851
dccp_packet_name(dh->dccph_type));
852
sk_eat_skb(sk, skb, 0);
853
}
854
verify_sock_status:
855
if (sock_flag(sk, SOCK_DONE)) {
856
len = 0;
857
break;
858
}
859
860
if (sk->sk_err) {
861
len = sock_error(sk);
862
break;
863
}
864
865
if (sk->sk_shutdown & RCV_SHUTDOWN) {
866
len = 0;
867
break;
868
}
869
870
if (sk->sk_state == DCCP_CLOSED) {
871
if (!sock_flag(sk, SOCK_DONE)) {
872
/* This occurs when user tries to read
873
* from never connected socket.
874
*/
875
len = -ENOTCONN;
876
break;
877
}
878
len = 0;
879
break;
880
}
881
882
if (!timeo) {
883
len = -EAGAIN;
884
break;
885
}
886
887
if (signal_pending(current)) {
888
len = sock_intr_errno(timeo);
889
break;
890
}
891
892
sk_wait_data(sk, &timeo);
893
continue;
894
found_ok_skb:
895
if (len > skb->len)
896
len = skb->len;
897
else if (len < skb->len)
898
msg->msg_flags |= MSG_TRUNC;
899
900
if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
901
/* Exception. Bailout! */
902
len = -EFAULT;
903
break;
904
}
905
if (flags & MSG_TRUNC)
906
len = skb->len;
907
found_fin_ok:
908
if (!(flags & MSG_PEEK))
909
sk_eat_skb(sk, skb, 0);
910
break;
911
} while (1);
912
out:
913
release_sock(sk);
914
return len;
915
}
916
917
EXPORT_SYMBOL_GPL(dccp_recvmsg);
918
919
int inet_dccp_listen(struct socket *sock, int backlog)
920
{
921
struct sock *sk = sock->sk;
922
unsigned char old_state;
923
int err;
924
925
lock_sock(sk);
926
927
err = -EINVAL;
928
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
929
goto out;
930
931
old_state = sk->sk_state;
932
if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
933
goto out;
934
935
/* Really, if the socket is already in listen state
936
* we can only allow the backlog to be adjusted.
937
*/
938
if (old_state != DCCP_LISTEN) {
939
/*
940
* FIXME: here it probably should be sk->sk_prot->listen_start
941
* see tcp_listen_start
942
*/
943
err = dccp_listen_start(sk, backlog);
944
if (err)
945
goto out;
946
}
947
sk->sk_max_ack_backlog = backlog;
948
err = 0;
949
950
out:
951
release_sock(sk);
952
return err;
953
}
954
955
EXPORT_SYMBOL_GPL(inet_dccp_listen);
956
957
static void dccp_terminate_connection(struct sock *sk)
958
{
959
u8 next_state = DCCP_CLOSED;
960
961
switch (sk->sk_state) {
962
case DCCP_PASSIVE_CLOSE:
963
case DCCP_PASSIVE_CLOSEREQ:
964
dccp_finish_passive_close(sk);
965
break;
966
case DCCP_PARTOPEN:
967
dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
968
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
969
/* fall through */
970
case DCCP_OPEN:
971
dccp_send_close(sk, 1);
972
973
if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
974
!dccp_sk(sk)->dccps_server_timewait)
975
next_state = DCCP_ACTIVE_CLOSEREQ;
976
else
977
next_state = DCCP_CLOSING;
978
/* fall through */
979
default:
980
dccp_set_state(sk, next_state);
981
}
982
}
983
984
void dccp_close(struct sock *sk, long timeout)
985
{
986
struct dccp_sock *dp = dccp_sk(sk);
987
struct sk_buff *skb;
988
u32 data_was_unread = 0;
989
int state;
990
991
lock_sock(sk);
992
993
sk->sk_shutdown = SHUTDOWN_MASK;
994
995
if (sk->sk_state == DCCP_LISTEN) {
996
dccp_set_state(sk, DCCP_CLOSED);
997
998
/* Special case. */
999
inet_csk_listen_stop(sk);
1000
1001
goto adjudge_to_death;
1002
}
1003
1004
sk_stop_timer(sk, &dp->dccps_xmit_timer);
1005
1006
/*
1007
* We need to flush the recv. buffs. We do this only on the
1008
* descriptor close, not protocol-sourced closes, because the
1009
*reader process may not have drained the data yet!
1010
*/
1011
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1012
data_was_unread += skb->len;
1013
__kfree_skb(skb);
1014
}
1015
1016
if (data_was_unread) {
1017
/* Unread data was tossed, send an appropriate Reset Code */
1018
DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1019
dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1020
dccp_set_state(sk, DCCP_CLOSED);
1021
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1022
/* Check zero linger _after_ checking for unread data. */
1023
sk->sk_prot->disconnect(sk, 0);
1024
} else if (sk->sk_state != DCCP_CLOSED) {
1025
/*
1026
* Normal connection termination. May need to wait if there are
1027
* still packets in the TX queue that are delayed by the CCID.
1028
*/
1029
dccp_flush_write_queue(sk, &timeout);
1030
dccp_terminate_connection(sk);
1031
}
1032
1033
/*
1034
* Flush write queue. This may be necessary in several cases:
1035
* - we have been closed by the peer but still have application data;
1036
* - abortive termination (unread data or zero linger time),
1037
* - normal termination but queue could not be flushed within time limit
1038
*/
1039
__skb_queue_purge(&sk->sk_write_queue);
1040
1041
sk_stream_wait_close(sk, timeout);
1042
1043
adjudge_to_death:
1044
state = sk->sk_state;
1045
sock_hold(sk);
1046
sock_orphan(sk);
1047
1048
/*
1049
* It is the last release_sock in its life. It will remove backlog.
1050
*/
1051
release_sock(sk);
1052
/*
1053
* Now socket is owned by kernel and we acquire BH lock
1054
* to finish close. No need to check for user refs.
1055
*/
1056
local_bh_disable();
1057
bh_lock_sock(sk);
1058
WARN_ON(sock_owned_by_user(sk));
1059
1060
percpu_counter_inc(sk->sk_prot->orphan_count);
1061
1062
/* Have we already been destroyed by a softirq or backlog? */
1063
if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1064
goto out;
1065
1066
if (sk->sk_state == DCCP_CLOSED)
1067
inet_csk_destroy_sock(sk);
1068
1069
/* Otherwise, socket is reprieved until protocol close. */
1070
1071
out:
1072
bh_unlock_sock(sk);
1073
local_bh_enable();
1074
sock_put(sk);
1075
}
1076
1077
EXPORT_SYMBOL_GPL(dccp_close);
1078
1079
void dccp_shutdown(struct sock *sk, int how)
1080
{
1081
dccp_pr_debug("called shutdown(%x)\n", how);
1082
}
1083
1084
EXPORT_SYMBOL_GPL(dccp_shutdown);
1085
1086
static inline int dccp_mib_init(void)
1087
{
1088
return snmp_mib_init((void __percpu **)dccp_statistics,
1089
sizeof(struct dccp_mib),
1090
__alignof__(struct dccp_mib));
1091
}
1092
1093
static inline void dccp_mib_exit(void)
1094
{
1095
snmp_mib_free((void __percpu **)dccp_statistics);
1096
}
1097
1098
static int thash_entries;
1099
module_param(thash_entries, int, 0444);
1100
MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1101
1102
#ifdef CONFIG_IP_DCCP_DEBUG
1103
int dccp_debug;
1104
module_param(dccp_debug, bool, 0644);
1105
MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1106
1107
EXPORT_SYMBOL_GPL(dccp_debug);
1108
#endif
1109
1110
static int __init dccp_init(void)
1111
{
1112
unsigned long goal;
1113
int ehash_order, bhash_order, i;
1114
int rc;
1115
1116
BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1117
FIELD_SIZEOF(struct sk_buff, cb));
1118
rc = percpu_counter_init(&dccp_orphan_count, 0);
1119
if (rc)
1120
goto out_fail;
1121
rc = -ENOBUFS;
1122
inet_hashinfo_init(&dccp_hashinfo);
1123
dccp_hashinfo.bind_bucket_cachep =
1124
kmem_cache_create("dccp_bind_bucket",
1125
sizeof(struct inet_bind_bucket), 0,
1126
SLAB_HWCACHE_ALIGN, NULL);
1127
if (!dccp_hashinfo.bind_bucket_cachep)
1128
goto out_free_percpu;
1129
1130
/*
1131
* Size and allocate the main established and bind bucket
1132
* hash tables.
1133
*
1134
* The methodology is similar to that of the buffer cache.
1135
*/
1136
if (totalram_pages >= (128 * 1024))
1137
goal = totalram_pages >> (21 - PAGE_SHIFT);
1138
else
1139
goal = totalram_pages >> (23 - PAGE_SHIFT);
1140
1141
if (thash_entries)
1142
goal = (thash_entries *
1143
sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1144
for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1145
;
1146
do {
1147
unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1148
sizeof(struct inet_ehash_bucket);
1149
1150
while (hash_size & (hash_size - 1))
1151
hash_size--;
1152
dccp_hashinfo.ehash_mask = hash_size - 1;
1153
dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1154
__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1155
} while (!dccp_hashinfo.ehash && --ehash_order > 0);
1156
1157
if (!dccp_hashinfo.ehash) {
1158
DCCP_CRIT("Failed to allocate DCCP established hash table");
1159
goto out_free_bind_bucket_cachep;
1160
}
1161
1162
for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) {
1163
INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1164
INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1165
}
1166
1167
if (inet_ehash_locks_alloc(&dccp_hashinfo))
1168
goto out_free_dccp_ehash;
1169
1170
bhash_order = ehash_order;
1171
1172
do {
1173
dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1174
sizeof(struct inet_bind_hashbucket);
1175
if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1176
bhash_order > 0)
1177
continue;
1178
dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1179
__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1180
} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1181
1182
if (!dccp_hashinfo.bhash) {
1183
DCCP_CRIT("Failed to allocate DCCP bind hash table");
1184
goto out_free_dccp_locks;
1185
}
1186
1187
for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1188
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1189
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1190
}
1191
1192
rc = dccp_mib_init();
1193
if (rc)
1194
goto out_free_dccp_bhash;
1195
1196
rc = dccp_ackvec_init();
1197
if (rc)
1198
goto out_free_dccp_mib;
1199
1200
rc = dccp_sysctl_init();
1201
if (rc)
1202
goto out_ackvec_exit;
1203
1204
rc = ccid_initialize_builtins();
1205
if (rc)
1206
goto out_sysctl_exit;
1207
1208
dccp_timestamping_init();
1209
1210
return 0;
1211
1212
out_sysctl_exit:
1213
dccp_sysctl_exit();
1214
out_ackvec_exit:
1215
dccp_ackvec_exit();
1216
out_free_dccp_mib:
1217
dccp_mib_exit();
1218
out_free_dccp_bhash:
1219
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1220
out_free_dccp_locks:
1221
inet_ehash_locks_free(&dccp_hashinfo);
1222
out_free_dccp_ehash:
1223
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1224
out_free_bind_bucket_cachep:
1225
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1226
out_free_percpu:
1227
percpu_counter_destroy(&dccp_orphan_count);
1228
out_fail:
1229
dccp_hashinfo.bhash = NULL;
1230
dccp_hashinfo.ehash = NULL;
1231
dccp_hashinfo.bind_bucket_cachep = NULL;
1232
return rc;
1233
}
1234
1235
static void __exit dccp_fini(void)
1236
{
1237
ccid_cleanup_builtins();
1238
dccp_mib_exit();
1239
free_pages((unsigned long)dccp_hashinfo.bhash,
1240
get_order(dccp_hashinfo.bhash_size *
1241
sizeof(struct inet_bind_hashbucket)));
1242
free_pages((unsigned long)dccp_hashinfo.ehash,
1243
get_order((dccp_hashinfo.ehash_mask + 1) *
1244
sizeof(struct inet_ehash_bucket)));
1245
inet_ehash_locks_free(&dccp_hashinfo);
1246
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1247
dccp_ackvec_exit();
1248
dccp_sysctl_exit();
1249
percpu_counter_destroy(&dccp_orphan_count);
1250
}
1251
1252
module_init(dccp_init);
1253
module_exit(dccp_fini);
1254
1255
MODULE_LICENSE("GPL");
1256
MODULE_AUTHOR("Arnaldo Carvalho de Melo <[email protected]>");
1257
MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1258
1259