Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/ipv4/af_inet.c
26288 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* INET An implementation of the TCP/IP protocol suite for the LINUX
4
* operating system. INET is implemented using the BSD Socket
5
* interface as the means of communication with the user level.
6
*
7
* PF_INET protocol family socket handler.
8
*
9
* Authors: Ross Biro
10
* Fred N. van Kempen, <[email protected]>
11
* Florian La Roche, <[email protected]>
12
* Alan Cox, <[email protected]>
13
*
14
* Changes (see also sock.c)
15
*
16
* piggy,
17
* Karl Knutson : Socket protocol table
18
* A.N.Kuznetsov : Socket death error in accept().
19
* John Richardson : Fix non blocking error in connect()
20
* so sockets that fail to connect
21
* don't return -EINPROGRESS.
22
* Alan Cox : Asynchronous I/O support
23
* Alan Cox : Keep correct socket pointer on sock
24
* structures
25
* when accept() ed
26
* Alan Cox : Semantics of SO_LINGER aren't state
27
* moved to close when you look carefully.
28
* With this fixed and the accept bug fixed
29
* some RPC stuff seems happier.
30
* Niibe Yutaka : 4.4BSD style write async I/O
31
* Alan Cox,
32
* Tony Gale : Fixed reuse semantics.
33
* Alan Cox : bind() shouldn't abort existing but dead
34
* sockets. Stops FTP netin:.. I hope.
35
* Alan Cox : bind() works correctly for RAW sockets.
36
* Note that FreeBSD at least was broken
37
* in this respect so be careful with
38
* compatibility tests...
39
* Alan Cox : routing cache support
40
* Alan Cox : memzero the socket structure for
41
* compactness.
42
* Matt Day : nonblock connect error handler
43
* Alan Cox : Allow large numbers of pending sockets
44
* (eg for big web sites), but only if
45
* specifically application requested.
46
* Alan Cox : New buffering throughout IP. Used
47
* dumbly.
48
* Alan Cox : New buffering now used smartly.
49
* Alan Cox : BSD rather than common sense
50
* interpretation of listen.
51
* Germano Caronni : Assorted small races.
52
* Alan Cox : sendmsg/recvmsg basic support.
53
* Alan Cox : Only sendmsg/recvmsg now supported.
54
* Alan Cox : Locked down bind (see security list).
55
* Alan Cox : Loosened bind a little.
56
* Mike McLagan : ADD/DEL DLCI Ioctls
57
* Willy Konynenberg : Transparent proxying support.
58
* David S. Miller : New socket lookup architecture.
59
* Some other random speedups.
60
* Cyrus Durgin : Cleaned up file for kmod hacks.
61
* Andi Kleen : Fix inet_stream_connect TCP race.
62
*/
63
64
#define pr_fmt(fmt) "IPv4: " fmt
65
66
#include <linux/err.h>
67
#include <linux/errno.h>
68
#include <linux/types.h>
69
#include <linux/socket.h>
70
#include <linux/in.h>
71
#include <linux/kernel.h>
72
#include <linux/kmod.h>
73
#include <linux/sched.h>
74
#include <linux/timer.h>
75
#include <linux/string.h>
76
#include <linux/sockios.h>
77
#include <linux/net.h>
78
#include <linux/capability.h>
79
#include <linux/fcntl.h>
80
#include <linux/mm.h>
81
#include <linux/interrupt.h>
82
#include <linux/stat.h>
83
#include <linux/init.h>
84
#include <linux/poll.h>
85
#include <linux/netfilter_ipv4.h>
86
#include <linux/random.h>
87
#include <linux/slab.h>
88
89
#include <linux/uaccess.h>
90
91
#include <linux/inet.h>
92
#include <linux/igmp.h>
93
#include <linux/inetdevice.h>
94
#include <linux/netdevice.h>
95
#include <net/checksum.h>
96
#include <net/ip.h>
97
#include <net/protocol.h>
98
#include <net/arp.h>
99
#include <net/route.h>
100
#include <net/ip_fib.h>
101
#include <net/inet_connection_sock.h>
102
#include <net/gro.h>
103
#include <net/gso.h>
104
#include <net/tcp.h>
105
#include <net/udp.h>
106
#include <net/udplite.h>
107
#include <net/ping.h>
108
#include <linux/skbuff.h>
109
#include <net/sock.h>
110
#include <net/raw.h>
111
#include <net/icmp.h>
112
#include <net/inet_common.h>
113
#include <net/ip_tunnels.h>
114
#include <net/xfrm.h>
115
#include <net/net_namespace.h>
116
#include <net/secure_seq.h>
117
#ifdef CONFIG_IP_MROUTE
118
#include <linux/mroute.h>
119
#endif
120
#include <net/l3mdev.h>
121
#include <net/compat.h>
122
#include <net/rps.h>
123
124
#include <trace/events/sock.h>
125
126
/* The inetsw table contains everything that inet_create needs to
127
* build a new socket.
128
*/
129
static struct list_head inetsw[SOCK_MAX];
130
static DEFINE_SPINLOCK(inetsw_lock);
131
132
/* New destruction routine */
133
134
void inet_sock_destruct(struct sock *sk)
135
{
136
struct inet_sock *inet = inet_sk(sk);
137
138
__skb_queue_purge(&sk->sk_receive_queue);
139
__skb_queue_purge(&sk->sk_error_queue);
140
141
sk_mem_reclaim_final(sk);
142
143
if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
144
pr_err("Attempt to release TCP socket in state %d %p\n",
145
sk->sk_state, sk);
146
return;
147
}
148
if (!sock_flag(sk, SOCK_DEAD)) {
149
pr_err("Attempt to release alive inet socket %p\n", sk);
150
return;
151
}
152
153
WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc));
154
WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
155
WARN_ON_ONCE(sk->sk_wmem_queued);
156
WARN_ON_ONCE(sk->sk_forward_alloc);
157
158
kfree(rcu_dereference_protected(inet->inet_opt, 1));
159
dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
160
dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1));
161
}
162
EXPORT_SYMBOL(inet_sock_destruct);
163
164
/*
165
* The routines beyond this point handle the behaviour of an AF_INET
166
* socket object. Mostly it punts to the subprotocols of IP to do
167
* the work.
168
*/
169
170
/*
171
* Automatically bind an unbound socket.
172
*/
173
174
static int inet_autobind(struct sock *sk)
175
{
176
struct inet_sock *inet;
177
/* We may need to bind the socket. */
178
lock_sock(sk);
179
inet = inet_sk(sk);
180
if (!inet->inet_num) {
181
if (sk->sk_prot->get_port(sk, 0)) {
182
release_sock(sk);
183
return -EAGAIN;
184
}
185
inet->inet_sport = htons(inet->inet_num);
186
}
187
release_sock(sk);
188
return 0;
189
}
190
191
int __inet_listen_sk(struct sock *sk, int backlog)
192
{
193
unsigned char old_state = sk->sk_state;
194
int err, tcp_fastopen;
195
196
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
197
return -EINVAL;
198
199
WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
200
/* Really, if the socket is already in listen state
201
* we can only allow the backlog to be adjusted.
202
*/
203
if (old_state != TCP_LISTEN) {
204
/* Enable TFO w/o requiring TCP_FASTOPEN socket option.
205
* Note that only TCP sockets (SOCK_STREAM) will reach here.
206
* Also fastopen backlog may already been set via the option
207
* because the socket was in TCP_LISTEN state previously but
208
* was shutdown() rather than close().
209
*/
210
tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
211
if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
212
(tcp_fastopen & TFO_SERVER_ENABLE) &&
213
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
214
fastopen_queue_tune(sk, backlog);
215
tcp_fastopen_init_key_once(sock_net(sk));
216
}
217
218
err = inet_csk_listen_start(sk);
219
if (err)
220
return err;
221
222
tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
223
}
224
return 0;
225
}
226
227
/*
228
* Move a socket into listening state.
229
*/
230
int inet_listen(struct socket *sock, int backlog)
231
{
232
struct sock *sk = sock->sk;
233
int err = -EINVAL;
234
235
lock_sock(sk);
236
237
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
238
goto out;
239
240
err = __inet_listen_sk(sk, backlog);
241
242
out:
243
release_sock(sk);
244
return err;
245
}
246
EXPORT_SYMBOL(inet_listen);
247
248
/*
249
* Create an inet socket.
250
*/
251
252
static int inet_create(struct net *net, struct socket *sock, int protocol,
253
int kern)
254
{
255
struct sock *sk;
256
struct inet_protosw *answer;
257
struct inet_sock *inet;
258
struct proto *answer_prot;
259
unsigned char answer_flags;
260
int try_loading_module = 0;
261
int err;
262
263
if (protocol < 0 || protocol >= IPPROTO_MAX)
264
return -EINVAL;
265
266
sock->state = SS_UNCONNECTED;
267
268
/* Look for the requested type/protocol pair. */
269
lookup_protocol:
270
err = -ESOCKTNOSUPPORT;
271
rcu_read_lock();
272
list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
273
274
err = 0;
275
/* Check the non-wild match. */
276
if (protocol == answer->protocol) {
277
if (protocol != IPPROTO_IP)
278
break;
279
} else {
280
/* Check for the two wild cases. */
281
if (IPPROTO_IP == protocol) {
282
protocol = answer->protocol;
283
break;
284
}
285
if (IPPROTO_IP == answer->protocol)
286
break;
287
}
288
err = -EPROTONOSUPPORT;
289
}
290
291
if (unlikely(err)) {
292
if (try_loading_module < 2) {
293
rcu_read_unlock();
294
/*
295
* Be more specific, e.g. net-pf-2-proto-132-type-1
296
* (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
297
*/
298
if (++try_loading_module == 1)
299
request_module("net-pf-%d-proto-%d-type-%d",
300
PF_INET, protocol, sock->type);
301
/*
302
* Fall back to generic, e.g. net-pf-2-proto-132
303
* (net-pf-PF_INET-proto-IPPROTO_SCTP)
304
*/
305
else
306
request_module("net-pf-%d-proto-%d",
307
PF_INET, protocol);
308
goto lookup_protocol;
309
} else
310
goto out_rcu_unlock;
311
}
312
313
err = -EPERM;
314
if (sock->type == SOCK_RAW && !kern &&
315
!ns_capable(net->user_ns, CAP_NET_RAW))
316
goto out_rcu_unlock;
317
318
sock->ops = answer->ops;
319
answer_prot = answer->prot;
320
answer_flags = answer->flags;
321
rcu_read_unlock();
322
323
WARN_ON(!answer_prot->slab);
324
325
err = -ENOMEM;
326
sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
327
if (!sk)
328
goto out;
329
330
err = 0;
331
if (INET_PROTOSW_REUSE & answer_flags)
332
sk->sk_reuse = SK_CAN_REUSE;
333
334
if (INET_PROTOSW_ICSK & answer_flags)
335
inet_init_csk_locks(sk);
336
337
inet = inet_sk(sk);
338
inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
339
340
inet_clear_bit(NODEFRAG, sk);
341
342
if (SOCK_RAW == sock->type) {
343
inet->inet_num = protocol;
344
if (IPPROTO_RAW == protocol)
345
inet_set_bit(HDRINCL, sk);
346
}
347
348
if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
349
inet->pmtudisc = IP_PMTUDISC_DONT;
350
else
351
inet->pmtudisc = IP_PMTUDISC_WANT;
352
353
atomic_set(&inet->inet_id, 0);
354
355
sock_init_data(sock, sk);
356
357
sk->sk_destruct = inet_sock_destruct;
358
sk->sk_protocol = protocol;
359
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
360
sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
361
362
inet->uc_ttl = -1;
363
inet_set_bit(MC_LOOP, sk);
364
inet->mc_ttl = 1;
365
inet_set_bit(MC_ALL, sk);
366
inet->mc_index = 0;
367
inet->mc_list = NULL;
368
inet->rcv_tos = 0;
369
370
if (inet->inet_num) {
371
/* It assumes that any protocol which allows
372
* the user to assign a number at socket
373
* creation time automatically
374
* shares.
375
*/
376
inet->inet_sport = htons(inet->inet_num);
377
/* Add to protocol hash chains. */
378
err = sk->sk_prot->hash(sk);
379
if (err)
380
goto out_sk_release;
381
}
382
383
if (sk->sk_prot->init) {
384
err = sk->sk_prot->init(sk);
385
if (err)
386
goto out_sk_release;
387
}
388
389
if (!kern) {
390
err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
391
if (err)
392
goto out_sk_release;
393
}
394
out:
395
return err;
396
out_rcu_unlock:
397
rcu_read_unlock();
398
goto out;
399
out_sk_release:
400
sk_common_release(sk);
401
sock->sk = NULL;
402
goto out;
403
}
404
405
406
/*
407
* The peer socket should always be NULL (or else). When we call this
408
* function we are destroying the object and from then on nobody
409
* should refer to it.
410
*/
411
int inet_release(struct socket *sock)
412
{
413
struct sock *sk = sock->sk;
414
415
if (sk) {
416
long timeout;
417
418
if (!sk->sk_kern_sock)
419
BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk);
420
421
/* Applications forget to leave groups before exiting */
422
ip_mc_drop_socket(sk);
423
424
/* If linger is set, we don't return until the close
425
* is complete. Otherwise we return immediately. The
426
* actually closing is done the same either way.
427
*
428
* If the close is due to the process exiting, we never
429
* linger..
430
*/
431
timeout = 0;
432
if (sock_flag(sk, SOCK_LINGER) &&
433
!(current->flags & PF_EXITING))
434
timeout = sk->sk_lingertime;
435
sk->sk_prot->close(sk, timeout);
436
sock->sk = NULL;
437
}
438
return 0;
439
}
440
EXPORT_SYMBOL(inet_release);
441
442
int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
443
{
444
u32 flags = BIND_WITH_LOCK;
445
int err;
446
447
/* If the socket has its own bind function then use it. (RAW) */
448
if (sk->sk_prot->bind) {
449
return sk->sk_prot->bind(sk, uaddr, addr_len);
450
}
451
if (addr_len < sizeof(struct sockaddr_in))
452
return -EINVAL;
453
454
/* BPF prog is run before any checks are done so that if the prog
455
* changes context in a wrong way it will be caught.
456
*/
457
err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len,
458
CGROUP_INET4_BIND, &flags);
459
if (err)
460
return err;
461
462
return __inet_bind(sk, uaddr, addr_len, flags);
463
}
464
465
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
466
{
467
return inet_bind_sk(sock->sk, uaddr, addr_len);
468
}
469
EXPORT_SYMBOL(inet_bind);
470
471
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
472
u32 flags)
473
{
474
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
475
struct inet_sock *inet = inet_sk(sk);
476
struct net *net = sock_net(sk);
477
unsigned short snum;
478
int chk_addr_ret;
479
u32 tb_id = RT_TABLE_LOCAL;
480
int err;
481
482
if (addr->sin_family != AF_INET) {
483
/* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
484
* only if s_addr is INADDR_ANY.
485
*/
486
err = -EAFNOSUPPORT;
487
if (addr->sin_family != AF_UNSPEC ||
488
addr->sin_addr.s_addr != htonl(INADDR_ANY))
489
goto out;
490
}
491
492
tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
493
chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
494
495
/* Not specified by any standard per-se, however it breaks too
496
* many applications when removed. It is unfortunate since
497
* allowing applications to make a non-local bind solves
498
* several problems with systems using dynamic addressing.
499
* (ie. your servers still start up even if your ISDN link
500
* is temporarily down)
501
*/
502
err = -EADDRNOTAVAIL;
503
if (!inet_addr_valid_or_nonlocal(net, inet, addr->sin_addr.s_addr,
504
chk_addr_ret))
505
goto out;
506
507
snum = ntohs(addr->sin_port);
508
err = -EACCES;
509
if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
510
snum && inet_port_requires_bind_service(net, snum) &&
511
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
512
goto out;
513
514
/* We keep a pair of addresses. rcv_saddr is the one
515
* used by hash lookups, and saddr is used for transmit.
516
*
517
* In the BSD API these are the same except where it
518
* would be illegal to use them (multicast/broadcast) in
519
* which case the sending device address is used.
520
*/
521
if (flags & BIND_WITH_LOCK)
522
lock_sock(sk);
523
524
/* Check these errors (active socket, double bind). */
525
err = -EINVAL;
526
if (sk->sk_state != TCP_CLOSE || inet->inet_num)
527
goto out_release_sock;
528
529
inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
530
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
531
inet->inet_saddr = 0; /* Use device */
532
533
/* Make sure we are allowed to bind here. */
534
if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) ||
535
(flags & BIND_FORCE_ADDRESS_NO_PORT))) {
536
err = sk->sk_prot->get_port(sk, snum);
537
if (err) {
538
inet->inet_saddr = inet->inet_rcv_saddr = 0;
539
goto out_release_sock;
540
}
541
if (!(flags & BIND_FROM_BPF)) {
542
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
543
if (err) {
544
inet->inet_saddr = inet->inet_rcv_saddr = 0;
545
if (sk->sk_prot->put_port)
546
sk->sk_prot->put_port(sk);
547
goto out_release_sock;
548
}
549
}
550
}
551
552
if (inet->inet_rcv_saddr)
553
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
554
if (snum)
555
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
556
inet->inet_sport = htons(inet->inet_num);
557
inet->inet_daddr = 0;
558
inet->inet_dport = 0;
559
sk_dst_reset(sk);
560
err = 0;
561
out_release_sock:
562
if (flags & BIND_WITH_LOCK)
563
release_sock(sk);
564
out:
565
return err;
566
}
567
568
int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
569
int addr_len, int flags)
570
{
571
struct sock *sk = sock->sk;
572
const struct proto *prot;
573
int err;
574
575
if (addr_len < sizeof(uaddr->sa_family))
576
return -EINVAL;
577
578
/* IPV6_ADDRFORM can change sk->sk_prot under us. */
579
prot = READ_ONCE(sk->sk_prot);
580
581
if (uaddr->sa_family == AF_UNSPEC)
582
return prot->disconnect(sk, flags);
583
584
if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
585
err = prot->pre_connect(sk, uaddr, addr_len);
586
if (err)
587
return err;
588
}
589
590
if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk))
591
return -EAGAIN;
592
return prot->connect(sk, uaddr, addr_len);
593
}
594
EXPORT_SYMBOL(inet_dgram_connect);
595
596
static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
597
{
598
DEFINE_WAIT_FUNC(wait, woken_wake_function);
599
600
add_wait_queue(sk_sleep(sk), &wait);
601
sk->sk_write_pending += writebias;
602
603
/* Basic assumption: if someone sets sk->sk_err, he _must_
604
* change state of the socket from TCP_SYN_*.
605
* Connect() does not allow to get error notifications
606
* without closing the socket.
607
*/
608
while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
609
release_sock(sk);
610
timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
611
lock_sock(sk);
612
if (signal_pending(current) || !timeo)
613
break;
614
}
615
remove_wait_queue(sk_sleep(sk), &wait);
616
sk->sk_write_pending -= writebias;
617
return timeo;
618
}
619
620
/*
621
* Connect to a remote host. There is regrettably still a little
622
* TCP 'magic' in here.
623
*/
624
int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
625
int addr_len, int flags, int is_sendmsg)
626
{
627
struct sock *sk = sock->sk;
628
int err;
629
long timeo;
630
631
/*
632
* uaddr can be NULL and addr_len can be 0 if:
633
* sk is a TCP fastopen active socket and
634
* TCP_FASTOPEN_CONNECT sockopt is set and
635
* we already have a valid cookie for this socket.
636
* In this case, user can call write() after connect().
637
* write() will invoke tcp_sendmsg_fastopen() which calls
638
* __inet_stream_connect().
639
*/
640
if (uaddr) {
641
if (addr_len < sizeof(uaddr->sa_family))
642
return -EINVAL;
643
644
if (uaddr->sa_family == AF_UNSPEC) {
645
sk->sk_disconnects++;
646
err = sk->sk_prot->disconnect(sk, flags);
647
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
648
goto out;
649
}
650
}
651
652
switch (sock->state) {
653
default:
654
err = -EINVAL;
655
goto out;
656
case SS_CONNECTED:
657
err = -EISCONN;
658
goto out;
659
case SS_CONNECTING:
660
if (inet_test_bit(DEFER_CONNECT, sk))
661
err = is_sendmsg ? -EINPROGRESS : -EISCONN;
662
else
663
err = -EALREADY;
664
/* Fall out of switch with err, set for this state */
665
break;
666
case SS_UNCONNECTED:
667
err = -EISCONN;
668
if (sk->sk_state != TCP_CLOSE)
669
goto out;
670
671
if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
672
err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
673
if (err)
674
goto out;
675
}
676
677
err = sk->sk_prot->connect(sk, uaddr, addr_len);
678
if (err < 0)
679
goto out;
680
681
sock->state = SS_CONNECTING;
682
683
if (!err && inet_test_bit(DEFER_CONNECT, sk))
684
goto out;
685
686
/* Just entered SS_CONNECTING state; the only
687
* difference is that return value in non-blocking
688
* case is EINPROGRESS, rather than EALREADY.
689
*/
690
err = -EINPROGRESS;
691
break;
692
}
693
694
timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
695
696
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
697
int writebias = (sk->sk_protocol == IPPROTO_TCP) &&
698
tcp_sk(sk)->fastopen_req &&
699
tcp_sk(sk)->fastopen_req->data ? 1 : 0;
700
int dis = sk->sk_disconnects;
701
702
/* Error code is set above */
703
if (!timeo || !inet_wait_for_connect(sk, timeo, writebias))
704
goto out;
705
706
err = sock_intr_errno(timeo);
707
if (signal_pending(current))
708
goto out;
709
710
if (dis != sk->sk_disconnects) {
711
err = -EPIPE;
712
goto out;
713
}
714
}
715
716
/* Connection was closed by RST, timeout, ICMP error
717
* or another process disconnected us.
718
*/
719
if (sk->sk_state == TCP_CLOSE)
720
goto sock_error;
721
722
/* sk->sk_err may be not zero now, if RECVERR was ordered by user
723
* and error was received after socket entered established state.
724
* Hence, it is handled normally after connect() return successfully.
725
*/
726
727
sock->state = SS_CONNECTED;
728
err = 0;
729
out:
730
return err;
731
732
sock_error:
733
err = sock_error(sk) ? : -ECONNABORTED;
734
sock->state = SS_UNCONNECTED;
735
sk->sk_disconnects++;
736
if (sk->sk_prot->disconnect(sk, flags))
737
sock->state = SS_DISCONNECTING;
738
goto out;
739
}
740
EXPORT_SYMBOL(__inet_stream_connect);
741
742
int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
743
int addr_len, int flags)
744
{
745
int err;
746
747
lock_sock(sock->sk);
748
err = __inet_stream_connect(sock, uaddr, addr_len, flags, 0);
749
release_sock(sock->sk);
750
return err;
751
}
752
EXPORT_SYMBOL(inet_stream_connect);
753
754
void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk)
755
{
756
sock_rps_record_flow(newsk);
757
WARN_ON(!((1 << newsk->sk_state) &
758
(TCPF_ESTABLISHED | TCPF_SYN_RECV |
759
TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 |
760
TCPF_CLOSING | TCPF_CLOSE_WAIT |
761
TCPF_CLOSE)));
762
763
if (test_bit(SOCK_SUPPORT_ZC, &sock->flags))
764
set_bit(SOCK_SUPPORT_ZC, &newsock->flags);
765
sock_graft(newsk, newsock);
766
767
newsock->state = SS_CONNECTED;
768
}
769
770
/*
771
* Accept a pending connection. The TCP layer now gives BSD semantics.
772
*/
773
774
int inet_accept(struct socket *sock, struct socket *newsock,
775
struct proto_accept_arg *arg)
776
{
777
struct sock *sk1 = sock->sk, *sk2;
778
779
/* IPV6_ADDRFORM can change sk->sk_prot under us. */
780
arg->err = -EINVAL;
781
sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, arg);
782
if (!sk2)
783
return arg->err;
784
785
lock_sock(sk2);
786
__inet_accept(sock, newsock, sk2);
787
release_sock(sk2);
788
return 0;
789
}
790
EXPORT_SYMBOL(inet_accept);
791
792
/*
793
* This does both peername and sockname.
794
*/
795
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
796
int peer)
797
{
798
struct sock *sk = sock->sk;
799
struct inet_sock *inet = inet_sk(sk);
800
DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
801
int sin_addr_len = sizeof(*sin);
802
803
sin->sin_family = AF_INET;
804
lock_sock(sk);
805
if (peer) {
806
if (!inet->inet_dport ||
807
(((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
808
peer == 1)) {
809
release_sock(sk);
810
return -ENOTCONN;
811
}
812
sin->sin_port = inet->inet_dport;
813
sin->sin_addr.s_addr = inet->inet_daddr;
814
BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
815
CGROUP_INET4_GETPEERNAME);
816
} else {
817
__be32 addr = inet->inet_rcv_saddr;
818
if (!addr)
819
addr = inet->inet_saddr;
820
sin->sin_port = inet->inet_sport;
821
sin->sin_addr.s_addr = addr;
822
BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
823
CGROUP_INET4_GETSOCKNAME);
824
}
825
release_sock(sk);
826
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
827
return sin_addr_len;
828
}
829
EXPORT_SYMBOL(inet_getname);
830
831
int inet_send_prepare(struct sock *sk)
832
{
833
sock_rps_record_flow(sk);
834
835
/* We may need to bind the socket. */
836
if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind &&
837
inet_autobind(sk))
838
return -EAGAIN;
839
840
return 0;
841
}
842
EXPORT_SYMBOL_GPL(inet_send_prepare);
843
844
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
845
{
846
struct sock *sk = sock->sk;
847
848
if (unlikely(inet_send_prepare(sk)))
849
return -EAGAIN;
850
851
return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udp_sendmsg,
852
sk, msg, size);
853
}
854
EXPORT_SYMBOL(inet_sendmsg);
855
856
void inet_splice_eof(struct socket *sock)
857
{
858
const struct proto *prot;
859
struct sock *sk = sock->sk;
860
861
if (unlikely(inet_send_prepare(sk)))
862
return;
863
864
/* IPV6_ADDRFORM can change sk->sk_prot under us. */
865
prot = READ_ONCE(sk->sk_prot);
866
if (prot->splice_eof)
867
prot->splice_eof(sock);
868
}
869
EXPORT_SYMBOL_GPL(inet_splice_eof);
870
871
INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *,
872
size_t, int, int *));
873
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
874
int flags)
875
{
876
struct sock *sk = sock->sk;
877
int addr_len = 0;
878
int err;
879
880
if (likely(!(flags & MSG_ERRQUEUE)))
881
sock_rps_record_flow(sk);
882
883
err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg,
884
sk, msg, size, flags, &addr_len);
885
if (err >= 0)
886
msg->msg_namelen = addr_len;
887
return err;
888
}
889
EXPORT_SYMBOL(inet_recvmsg);
890
891
int inet_shutdown(struct socket *sock, int how)
892
{
893
struct sock *sk = sock->sk;
894
int err = 0;
895
896
/* This should really check to make sure
897
* the socket is a TCP socket. (WHY AC...)
898
*/
899
how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
900
1->2 bit 2 snds.
901
2->3 */
902
if ((how & ~SHUTDOWN_MASK) || !how) /* MAXINT->0 */
903
return -EINVAL;
904
905
lock_sock(sk);
906
if (sock->state == SS_CONNECTING) {
907
if ((1 << sk->sk_state) &
908
(TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
909
sock->state = SS_DISCONNECTING;
910
else
911
sock->state = SS_CONNECTED;
912
}
913
914
switch (sk->sk_state) {
915
case TCP_CLOSE:
916
err = -ENOTCONN;
917
/* Hack to wake up other listeners, who can poll for
918
EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
919
fallthrough;
920
default:
921
WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | how);
922
if (sk->sk_prot->shutdown)
923
sk->sk_prot->shutdown(sk, how);
924
break;
925
926
/* Remaining two branches are temporary solution for missing
927
* close() in multithreaded environment. It is _not_ a good idea,
928
* but we have no choice until close() is repaired at VFS level.
929
*/
930
case TCP_LISTEN:
931
if (!(how & RCV_SHUTDOWN))
932
break;
933
fallthrough;
934
case TCP_SYN_SENT:
935
err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
936
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
937
break;
938
}
939
940
/* Wake up anyone sleeping in poll. */
941
sk->sk_state_change(sk);
942
release_sock(sk);
943
return err;
944
}
945
EXPORT_SYMBOL(inet_shutdown);
946
947
/*
948
* ioctl() calls you can issue on an INET socket. Most of these are
949
* device configuration and stuff and very rarely used. Some ioctls
950
* pass on to the socket itself.
951
*
952
* NOTE: I like the idea of a module for the config stuff. ie ifconfig
953
* loads the devconfigure module does its configuring and unloads it.
954
* There's a good 20K of config code hanging around the kernel.
955
*/
956
957
int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
958
{
959
struct sock *sk = sock->sk;
960
int err = 0;
961
struct net *net = sock_net(sk);
962
void __user *p = (void __user *)arg;
963
struct ifreq ifr;
964
struct rtentry rt;
965
966
switch (cmd) {
967
case SIOCADDRT:
968
case SIOCDELRT:
969
if (copy_from_user(&rt, p, sizeof(struct rtentry)))
970
return -EFAULT;
971
err = ip_rt_ioctl(net, cmd, &rt);
972
break;
973
case SIOCRTMSG:
974
err = -EINVAL;
975
break;
976
case SIOCDARP:
977
case SIOCGARP:
978
case SIOCSARP:
979
err = arp_ioctl(net, cmd, (void __user *)arg);
980
break;
981
case SIOCGIFADDR:
982
case SIOCGIFBRDADDR:
983
case SIOCGIFNETMASK:
984
case SIOCGIFDSTADDR:
985
case SIOCGIFPFLAGS:
986
if (get_user_ifreq(&ifr, NULL, p))
987
return -EFAULT;
988
err = devinet_ioctl(net, cmd, &ifr);
989
if (!err && put_user_ifreq(&ifr, p))
990
err = -EFAULT;
991
break;
992
993
case SIOCSIFADDR:
994
case SIOCSIFBRDADDR:
995
case SIOCSIFNETMASK:
996
case SIOCSIFDSTADDR:
997
case SIOCSIFPFLAGS:
998
case SIOCSIFFLAGS:
999
if (get_user_ifreq(&ifr, NULL, p))
1000
return -EFAULT;
1001
err = devinet_ioctl(net, cmd, &ifr);
1002
break;
1003
default:
1004
if (sk->sk_prot->ioctl)
1005
err = sk_ioctl(sk, cmd, (void __user *)arg);
1006
else
1007
err = -ENOIOCTLCMD;
1008
break;
1009
}
1010
return err;
1011
}
1012
EXPORT_SYMBOL(inet_ioctl);
1013
1014
#ifdef CONFIG_COMPAT
1015
static int inet_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
1016
struct compat_rtentry __user *ur)
1017
{
1018
compat_uptr_t rtdev;
1019
struct rtentry rt;
1020
1021
if (copy_from_user(&rt.rt_dst, &ur->rt_dst,
1022
3 * sizeof(struct sockaddr)) ||
1023
get_user(rt.rt_flags, &ur->rt_flags) ||
1024
get_user(rt.rt_metric, &ur->rt_metric) ||
1025
get_user(rt.rt_mtu, &ur->rt_mtu) ||
1026
get_user(rt.rt_window, &ur->rt_window) ||
1027
get_user(rt.rt_irtt, &ur->rt_irtt) ||
1028
get_user(rtdev, &ur->rt_dev))
1029
return -EFAULT;
1030
1031
rt.rt_dev = compat_ptr(rtdev);
1032
return ip_rt_ioctl(sock_net(sk), cmd, &rt);
1033
}
1034
1035
static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1036
{
1037
void __user *argp = compat_ptr(arg);
1038
struct sock *sk = sock->sk;
1039
1040
switch (cmd) {
1041
case SIOCADDRT:
1042
case SIOCDELRT:
1043
return inet_compat_routing_ioctl(sk, cmd, argp);
1044
default:
1045
if (!sk->sk_prot->compat_ioctl)
1046
return -ENOIOCTLCMD;
1047
return sk->sk_prot->compat_ioctl(sk, cmd, arg);
1048
}
1049
}
1050
#endif /* CONFIG_COMPAT */
1051
1052
const struct proto_ops inet_stream_ops = {
1053
.family = PF_INET,
1054
.owner = THIS_MODULE,
1055
.release = inet_release,
1056
.bind = inet_bind,
1057
.connect = inet_stream_connect,
1058
.socketpair = sock_no_socketpair,
1059
.accept = inet_accept,
1060
.getname = inet_getname,
1061
.poll = tcp_poll,
1062
.ioctl = inet_ioctl,
1063
.gettstamp = sock_gettstamp,
1064
.listen = inet_listen,
1065
.shutdown = inet_shutdown,
1066
.setsockopt = sock_common_setsockopt,
1067
.getsockopt = sock_common_getsockopt,
1068
.sendmsg = inet_sendmsg,
1069
.recvmsg = inet_recvmsg,
1070
#ifdef CONFIG_MMU
1071
.mmap = tcp_mmap,
1072
#endif
1073
.splice_eof = inet_splice_eof,
1074
.splice_read = tcp_splice_read,
1075
.set_peek_off = sk_set_peek_off,
1076
.read_sock = tcp_read_sock,
1077
.read_skb = tcp_read_skb,
1078
.sendmsg_locked = tcp_sendmsg_locked,
1079
.peek_len = tcp_peek_len,
1080
#ifdef CONFIG_COMPAT
1081
.compat_ioctl = inet_compat_ioctl,
1082
#endif
1083
.set_rcvlowat = tcp_set_rcvlowat,
1084
};
1085
EXPORT_SYMBOL(inet_stream_ops);
1086
1087
const struct proto_ops inet_dgram_ops = {
1088
.family = PF_INET,
1089
.owner = THIS_MODULE,
1090
.release = inet_release,
1091
.bind = inet_bind,
1092
.connect = inet_dgram_connect,
1093
.socketpair = sock_no_socketpair,
1094
.accept = sock_no_accept,
1095
.getname = inet_getname,
1096
.poll = udp_poll,
1097
.ioctl = inet_ioctl,
1098
.gettstamp = sock_gettstamp,
1099
.listen = sock_no_listen,
1100
.shutdown = inet_shutdown,
1101
.setsockopt = sock_common_setsockopt,
1102
.getsockopt = sock_common_getsockopt,
1103
.sendmsg = inet_sendmsg,
1104
.read_skb = udp_read_skb,
1105
.recvmsg = inet_recvmsg,
1106
.mmap = sock_no_mmap,
1107
.splice_eof = inet_splice_eof,
1108
.set_peek_off = udp_set_peek_off,
1109
#ifdef CONFIG_COMPAT
1110
.compat_ioctl = inet_compat_ioctl,
1111
#endif
1112
};
1113
EXPORT_SYMBOL(inet_dgram_ops);
1114
1115
/*
1116
* For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
1117
* udp_poll
1118
*/
1119
static const struct proto_ops inet_sockraw_ops = {
1120
.family = PF_INET,
1121
.owner = THIS_MODULE,
1122
.release = inet_release,
1123
.bind = inet_bind,
1124
.connect = inet_dgram_connect,
1125
.socketpair = sock_no_socketpair,
1126
.accept = sock_no_accept,
1127
.getname = inet_getname,
1128
.poll = datagram_poll,
1129
.ioctl = inet_ioctl,
1130
.gettstamp = sock_gettstamp,
1131
.listen = sock_no_listen,
1132
.shutdown = inet_shutdown,
1133
.setsockopt = sock_common_setsockopt,
1134
.getsockopt = sock_common_getsockopt,
1135
.sendmsg = inet_sendmsg,
1136
.recvmsg = inet_recvmsg,
1137
.mmap = sock_no_mmap,
1138
.splice_eof = inet_splice_eof,
1139
#ifdef CONFIG_COMPAT
1140
.compat_ioctl = inet_compat_ioctl,
1141
#endif
1142
};
1143
1144
static const struct net_proto_family inet_family_ops = {
1145
.family = PF_INET,
1146
.create = inet_create,
1147
.owner = THIS_MODULE,
1148
};
1149
1150
/* Upon startup we insert all the elements in inetsw_array[] into
1151
* the linked list inetsw.
1152
*/
1153
static struct inet_protosw inetsw_array[] =
1154
{
1155
{
1156
.type = SOCK_STREAM,
1157
.protocol = IPPROTO_TCP,
1158
.prot = &tcp_prot,
1159
.ops = &inet_stream_ops,
1160
.flags = INET_PROTOSW_PERMANENT |
1161
INET_PROTOSW_ICSK,
1162
},
1163
1164
{
1165
.type = SOCK_DGRAM,
1166
.protocol = IPPROTO_UDP,
1167
.prot = &udp_prot,
1168
.ops = &inet_dgram_ops,
1169
.flags = INET_PROTOSW_PERMANENT,
1170
},
1171
1172
{
1173
.type = SOCK_DGRAM,
1174
.protocol = IPPROTO_ICMP,
1175
.prot = &ping_prot,
1176
.ops = &inet_sockraw_ops,
1177
.flags = INET_PROTOSW_REUSE,
1178
},
1179
1180
{
1181
.type = SOCK_RAW,
1182
.protocol = IPPROTO_IP, /* wild card */
1183
.prot = &raw_prot,
1184
.ops = &inet_sockraw_ops,
1185
.flags = INET_PROTOSW_REUSE,
1186
}
1187
};
1188
1189
#define INETSW_ARRAY_LEN ARRAY_SIZE(inetsw_array)
1190
1191
void inet_register_protosw(struct inet_protosw *p)
1192
{
1193
struct list_head *lh;
1194
struct inet_protosw *answer;
1195
int protocol = p->protocol;
1196
struct list_head *last_perm;
1197
1198
spin_lock_bh(&inetsw_lock);
1199
1200
if (p->type >= SOCK_MAX)
1201
goto out_illegal;
1202
1203
/* If we are trying to override a permanent protocol, bail. */
1204
last_perm = &inetsw[p->type];
1205
list_for_each(lh, &inetsw[p->type]) {
1206
answer = list_entry(lh, struct inet_protosw, list);
1207
/* Check only the non-wild match. */
1208
if ((INET_PROTOSW_PERMANENT & answer->flags) == 0)
1209
break;
1210
if (protocol == answer->protocol)
1211
goto out_permanent;
1212
last_perm = lh;
1213
}
1214
1215
/* Add the new entry after the last permanent entry if any, so that
1216
* the new entry does not override a permanent entry when matched with
1217
* a wild-card protocol. But it is allowed to override any existing
1218
* non-permanent entry. This means that when we remove this entry, the
1219
* system automatically returns to the old behavior.
1220
*/
1221
list_add_rcu(&p->list, last_perm);
1222
out:
1223
spin_unlock_bh(&inetsw_lock);
1224
1225
return;
1226
1227
out_permanent:
1228
pr_err("Attempt to override permanent protocol %d\n", protocol);
1229
goto out;
1230
1231
out_illegal:
1232
pr_err("Ignoring attempt to register invalid socket type %d\n",
1233
p->type);
1234
goto out;
1235
}
1236
EXPORT_SYMBOL(inet_register_protosw);
1237
1238
void inet_unregister_protosw(struct inet_protosw *p)
1239
{
1240
if (INET_PROTOSW_PERMANENT & p->flags) {
1241
pr_err("Attempt to unregister permanent protocol %d\n",
1242
p->protocol);
1243
} else {
1244
spin_lock_bh(&inetsw_lock);
1245
list_del_rcu(&p->list);
1246
spin_unlock_bh(&inetsw_lock);
1247
1248
synchronize_net();
1249
}
1250
}
1251
EXPORT_SYMBOL(inet_unregister_protosw);
1252
1253
static int inet_sk_reselect_saddr(struct sock *sk)
1254
{
1255
struct inet_sock *inet = inet_sk(sk);
1256
__be32 old_saddr = inet->inet_saddr;
1257
__be32 daddr = inet->inet_daddr;
1258
struct flowi4 *fl4;
1259
struct rtable *rt;
1260
__be32 new_saddr;
1261
struct ip_options_rcu *inet_opt;
1262
int err;
1263
1264
inet_opt = rcu_dereference_protected(inet->inet_opt,
1265
lockdep_sock_is_held(sk));
1266
if (inet_opt && inet_opt->opt.srr)
1267
daddr = inet_opt->opt.faddr;
1268
1269
/* Query new route. */
1270
fl4 = &inet->cork.fl.u.ip4;
1271
rt = ip_route_connect(fl4, daddr, 0, sk->sk_bound_dev_if,
1272
sk->sk_protocol, inet->inet_sport,
1273
inet->inet_dport, sk);
1274
if (IS_ERR(rt))
1275
return PTR_ERR(rt);
1276
1277
new_saddr = fl4->saddr;
1278
1279
if (new_saddr == old_saddr) {
1280
sk_setup_caps(sk, &rt->dst);
1281
return 0;
1282
}
1283
1284
err = inet_bhash2_update_saddr(sk, &new_saddr, AF_INET);
1285
if (err) {
1286
ip_rt_put(rt);
1287
return err;
1288
}
1289
1290
sk_setup_caps(sk, &rt->dst);
1291
1292
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {
1293
pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
1294
__func__, &old_saddr, &new_saddr);
1295
}
1296
1297
/*
1298
* XXX The only one ugly spot where we need to
1299
* XXX really change the sockets identity after
1300
* XXX it has entered the hashes. -DaveM
1301
*
1302
* Besides that, it does not check for connection
1303
* uniqueness. Wait for troubles.
1304
*/
1305
return __sk_prot_rehash(sk);
1306
}
1307
1308
int inet_sk_rebuild_header(struct sock *sk)
1309
{
1310
struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0));
1311
struct inet_sock *inet = inet_sk(sk);
1312
struct flowi4 *fl4;
1313
int err;
1314
1315
/* Route is OK, nothing to do. */
1316
if (rt)
1317
return 0;
1318
1319
/* Reroute. */
1320
fl4 = &inet->cork.fl.u.ip4;
1321
inet_sk_init_flowi4(inet, fl4);
1322
rt = ip_route_output_flow(sock_net(sk), fl4, sk);
1323
if (!IS_ERR(rt)) {
1324
err = 0;
1325
sk_setup_caps(sk, &rt->dst);
1326
} else {
1327
err = PTR_ERR(rt);
1328
1329
/* Routing failed... */
1330
sk->sk_route_caps = 0;
1331
1332
if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||
1333
sk->sk_state != TCP_SYN_SENT ||
1334
(sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
1335
(err = inet_sk_reselect_saddr(sk)) != 0)
1336
WRITE_ONCE(sk->sk_err_soft, -err);
1337
}
1338
1339
return err;
1340
}
1341
EXPORT_SYMBOL(inet_sk_rebuild_header);
1342
1343
void inet_sk_set_state(struct sock *sk, int state)
1344
{
1345
trace_inet_sock_set_state(sk, sk->sk_state, state);
1346
sk->sk_state = state;
1347
}
1348
EXPORT_SYMBOL(inet_sk_set_state);
1349
1350
void inet_sk_state_store(struct sock *sk, int newstate)
1351
{
1352
trace_inet_sock_set_state(sk, sk->sk_state, newstate);
1353
smp_store_release(&sk->sk_state, newstate);
1354
}
1355
1356
struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1357
netdev_features_t features)
1358
{
1359
bool udpfrag = false, fixedid = false, gso_partial, encap;
1360
struct sk_buff *segs = ERR_PTR(-EINVAL);
1361
const struct net_offload *ops;
1362
unsigned int offset = 0;
1363
struct iphdr *iph;
1364
int proto, tot_len;
1365
int nhoff;
1366
int ihl;
1367
int id;
1368
1369
skb_reset_network_header(skb);
1370
nhoff = skb_network_header(skb) - skb_mac_header(skb);
1371
if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
1372
goto out;
1373
1374
iph = ip_hdr(skb);
1375
ihl = iph->ihl * 4;
1376
if (ihl < sizeof(*iph))
1377
goto out;
1378
1379
id = ntohs(iph->id);
1380
proto = iph->protocol;
1381
1382
/* Warning: after this point, iph might be no longer valid */
1383
if (unlikely(!pskb_may_pull(skb, ihl)))
1384
goto out;
1385
__skb_pull(skb, ihl);
1386
1387
encap = SKB_GSO_CB(skb)->encap_level > 0;
1388
if (encap)
1389
features &= skb->dev->hw_enc_features;
1390
SKB_GSO_CB(skb)->encap_level += ihl;
1391
1392
skb_reset_transport_header(skb);
1393
1394
segs = ERR_PTR(-EPROTONOSUPPORT);
1395
1396
if (!skb->encapsulation || encap) {
1397
udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
1398
fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
1399
1400
/* fixed ID is invalid if DF bit is not set */
1401
if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF)))
1402
goto out;
1403
}
1404
1405
ops = rcu_dereference(inet_offloads[proto]);
1406
if (likely(ops && ops->callbacks.gso_segment)) {
1407
segs = ops->callbacks.gso_segment(skb, features);
1408
if (!segs)
1409
skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
1410
}
1411
1412
if (IS_ERR_OR_NULL(segs))
1413
goto out;
1414
1415
gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL);
1416
1417
skb = segs;
1418
do {
1419
iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
1420
if (udpfrag) {
1421
iph->frag_off = htons(offset >> 3);
1422
if (skb->next)
1423
iph->frag_off |= htons(IP_MF);
1424
offset += skb->len - nhoff - ihl;
1425
tot_len = skb->len - nhoff;
1426
} else if (skb_is_gso(skb)) {
1427
if (!fixedid) {
1428
iph->id = htons(id);
1429
id += skb_shinfo(skb)->gso_segs;
1430
}
1431
1432
if (gso_partial)
1433
tot_len = skb_shinfo(skb)->gso_size +
1434
SKB_GSO_CB(skb)->data_offset +
1435
skb->head - (unsigned char *)iph;
1436
else
1437
tot_len = skb->len - nhoff;
1438
} else {
1439
if (!fixedid)
1440
iph->id = htons(id++);
1441
tot_len = skb->len - nhoff;
1442
}
1443
iph->tot_len = htons(tot_len);
1444
ip_send_check(iph);
1445
if (encap)
1446
skb_reset_inner_headers(skb);
1447
skb->network_header = (u8 *)iph - skb->head;
1448
skb_reset_mac_len(skb);
1449
} while ((skb = skb->next));
1450
1451
out:
1452
return segs;
1453
}
1454
1455
static struct sk_buff *ipip_gso_segment(struct sk_buff *skb,
1456
netdev_features_t features)
1457
{
1458
if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4))
1459
return ERR_PTR(-EINVAL);
1460
1461
return inet_gso_segment(skb, features);
1462
}
1463
1464
struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
1465
{
1466
const struct net_offload *ops;
1467
struct sk_buff *pp = NULL;
1468
const struct iphdr *iph;
1469
struct sk_buff *p;
1470
unsigned int hlen;
1471
unsigned int off;
1472
int flush = 1;
1473
int proto;
1474
1475
off = skb_gro_offset(skb);
1476
hlen = off + sizeof(*iph);
1477
iph = skb_gro_header(skb, hlen, off);
1478
if (unlikely(!iph))
1479
goto out;
1480
1481
proto = iph->protocol;
1482
1483
ops = rcu_dereference(inet_offloads[proto]);
1484
if (!ops || !ops->callbacks.gro_receive)
1485
goto out;
1486
1487
if (*(u8 *)iph != 0x45)
1488
goto out;
1489
1490
if (ip_is_fragment(iph))
1491
goto out;
1492
1493
if (unlikely(ip_fast_csum((u8 *)iph, 5)))
1494
goto out;
1495
1496
NAPI_GRO_CB(skb)->proto = proto;
1497
flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (ntohl(*(__be32 *)&iph->id) & ~IP_DF));
1498
1499
list_for_each_entry(p, head, list) {
1500
struct iphdr *iph2;
1501
1502
if (!NAPI_GRO_CB(p)->same_flow)
1503
continue;
1504
1505
iph2 = (struct iphdr *)(p->data + off);
1506
/* The above works because, with the exception of the top
1507
* (inner most) layer, we only aggregate pkts with the same
1508
* hdr length so all the hdrs we'll need to verify will start
1509
* at the same offset.
1510
*/
1511
if ((iph->protocol ^ iph2->protocol) |
1512
((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
1513
((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
1514
NAPI_GRO_CB(p)->same_flow = 0;
1515
continue;
1516
}
1517
}
1518
1519
NAPI_GRO_CB(skb)->flush |= flush;
1520
NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off;
1521
1522
/* Note : No need to call skb_gro_postpull_rcsum() here,
1523
* as we already checked checksum over ipv4 header was 0
1524
*/
1525
skb_gro_pull(skb, sizeof(*iph));
1526
skb_set_transport_header(skb, skb_gro_offset(skb));
1527
1528
pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive,
1529
ops->callbacks.gro_receive, head, skb);
1530
1531
out:
1532
skb_gro_flush_final(skb, pp, flush);
1533
1534
return pp;
1535
}
1536
1537
static struct sk_buff *ipip_gro_receive(struct list_head *head,
1538
struct sk_buff *skb)
1539
{
1540
if (NAPI_GRO_CB(skb)->encap_mark) {
1541
NAPI_GRO_CB(skb)->flush = 1;
1542
return NULL;
1543
}
1544
1545
NAPI_GRO_CB(skb)->encap_mark = 1;
1546
1547
return inet_gro_receive(head, skb);
1548
}
1549
1550
#define SECONDS_PER_DAY 86400
1551
1552
/* inet_current_timestamp - Return IP network timestamp
1553
*
1554
* Return milliseconds since midnight in network byte order.
1555
*/
1556
__be32 inet_current_timestamp(void)
1557
{
1558
u32 secs;
1559
u32 msecs;
1560
struct timespec64 ts;
1561
1562
ktime_get_real_ts64(&ts);
1563
1564
/* Get secs since midnight. */
1565
(void)div_u64_rem(ts.tv_sec, SECONDS_PER_DAY, &secs);
1566
/* Convert to msecs. */
1567
msecs = secs * MSEC_PER_SEC;
1568
/* Convert nsec to msec. */
1569
msecs += (u32)ts.tv_nsec / NSEC_PER_MSEC;
1570
1571
/* Convert to network byte order. */
1572
return htonl(msecs);
1573
}
1574
EXPORT_SYMBOL(inet_current_timestamp);
1575
1576
int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
1577
{
1578
unsigned int family = READ_ONCE(sk->sk_family);
1579
1580
if (family == AF_INET)
1581
return ip_recv_error(sk, msg, len, addr_len);
1582
#if IS_ENABLED(CONFIG_IPV6)
1583
if (family == AF_INET6)
1584
return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
1585
#endif
1586
return -EINVAL;
1587
}
1588
EXPORT_SYMBOL(inet_recv_error);
1589
1590
int inet_gro_complete(struct sk_buff *skb, int nhoff)
1591
{
1592
struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
1593
const struct net_offload *ops;
1594
__be16 totlen = iph->tot_len;
1595
int proto = iph->protocol;
1596
int err = -ENOSYS;
1597
1598
if (skb->encapsulation) {
1599
skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
1600
skb_set_inner_network_header(skb, nhoff);
1601
}
1602
1603
iph_set_totlen(iph, skb->len - nhoff);
1604
csum_replace2(&iph->check, totlen, iph->tot_len);
1605
1606
ops = rcu_dereference(inet_offloads[proto]);
1607
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
1608
goto out;
1609
1610
/* Only need to add sizeof(*iph) to get to the next hdr below
1611
* because any hdr with option will have been flushed in
1612
* inet_gro_receive().
1613
*/
1614
err = INDIRECT_CALL_2(ops->callbacks.gro_complete,
1615
tcp4_gro_complete, udp4_gro_complete,
1616
skb, nhoff + sizeof(*iph));
1617
1618
out:
1619
return err;
1620
}
1621
1622
static int ipip_gro_complete(struct sk_buff *skb, int nhoff)
1623
{
1624
skb->encapsulation = 1;
1625
skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4;
1626
return inet_gro_complete(skb, nhoff);
1627
}
1628
1629
int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1630
unsigned short type, unsigned char protocol,
1631
struct net *net)
1632
{
1633
struct socket *sock;
1634
int rc = sock_create_kern(net, family, type, protocol, &sock);
1635
1636
if (rc == 0) {
1637
*sk = sock->sk;
1638
(*sk)->sk_allocation = GFP_ATOMIC;
1639
(*sk)->sk_use_task_frag = false;
1640
/*
1641
* Unhash it so that IP input processing does not even see it,
1642
* we do not wish this socket to see incoming packets.
1643
*/
1644
(*sk)->sk_prot->unhash(*sk);
1645
}
1646
return rc;
1647
}
1648
EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1649
1650
unsigned long snmp_fold_field(void __percpu *mib, int offt)
1651
{
1652
unsigned long res = 0;
1653
int i;
1654
1655
for_each_possible_cpu(i)
1656
res += snmp_get_cpu_field(mib, i, offt);
1657
return res;
1658
}
1659
EXPORT_SYMBOL_GPL(snmp_fold_field);
1660
1661
#if BITS_PER_LONG==32
1662
1663
u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offt,
1664
size_t syncp_offset)
1665
{
1666
void *bhptr;
1667
struct u64_stats_sync *syncp;
1668
u64 v;
1669
unsigned int start;
1670
1671
bhptr = per_cpu_ptr(mib, cpu);
1672
syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
1673
do {
1674
start = u64_stats_fetch_begin(syncp);
1675
v = *(((u64 *)bhptr) + offt);
1676
} while (u64_stats_fetch_retry(syncp, start));
1677
1678
return v;
1679
}
1680
EXPORT_SYMBOL_GPL(snmp_get_cpu_field64);
1681
1682
u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
1683
{
1684
u64 res = 0;
1685
int cpu;
1686
1687
for_each_possible_cpu(cpu) {
1688
res += snmp_get_cpu_field64(mib, cpu, offt, syncp_offset);
1689
}
1690
return res;
1691
}
1692
EXPORT_SYMBOL_GPL(snmp_fold_field64);
1693
#endif
1694
1695
#ifdef CONFIG_IP_MULTICAST
1696
static const struct net_protocol igmp_protocol = {
1697
.handler = igmp_rcv,
1698
};
1699
#endif
1700
1701
static const struct net_protocol icmp_protocol = {
1702
.handler = icmp_rcv,
1703
.err_handler = icmp_err,
1704
.no_policy = 1,
1705
};
1706
1707
static __net_init int ipv4_mib_init_net(struct net *net)
1708
{
1709
int i;
1710
1711
net->mib.tcp_statistics = alloc_percpu(struct tcp_mib);
1712
if (!net->mib.tcp_statistics)
1713
goto err_tcp_mib;
1714
net->mib.ip_statistics = alloc_percpu(struct ipstats_mib);
1715
if (!net->mib.ip_statistics)
1716
goto err_ip_mib;
1717
1718
for_each_possible_cpu(i) {
1719
struct ipstats_mib *af_inet_stats;
1720
af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i);
1721
u64_stats_init(&af_inet_stats->syncp);
1722
}
1723
1724
net->mib.net_statistics = alloc_percpu(struct linux_mib);
1725
if (!net->mib.net_statistics)
1726
goto err_net_mib;
1727
net->mib.udp_statistics = alloc_percpu(struct udp_mib);
1728
if (!net->mib.udp_statistics)
1729
goto err_udp_mib;
1730
net->mib.udplite_statistics = alloc_percpu(struct udp_mib);
1731
if (!net->mib.udplite_statistics)
1732
goto err_udplite_mib;
1733
net->mib.icmp_statistics = alloc_percpu(struct icmp_mib);
1734
if (!net->mib.icmp_statistics)
1735
goto err_icmp_mib;
1736
net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
1737
GFP_KERNEL);
1738
if (!net->mib.icmpmsg_statistics)
1739
goto err_icmpmsg_mib;
1740
1741
tcp_mib_init(net);
1742
return 0;
1743
1744
err_icmpmsg_mib:
1745
free_percpu(net->mib.icmp_statistics);
1746
err_icmp_mib:
1747
free_percpu(net->mib.udplite_statistics);
1748
err_udplite_mib:
1749
free_percpu(net->mib.udp_statistics);
1750
err_udp_mib:
1751
free_percpu(net->mib.net_statistics);
1752
err_net_mib:
1753
free_percpu(net->mib.ip_statistics);
1754
err_ip_mib:
1755
free_percpu(net->mib.tcp_statistics);
1756
err_tcp_mib:
1757
return -ENOMEM;
1758
}
1759
1760
static __net_exit void ipv4_mib_exit_net(struct net *net)
1761
{
1762
kfree(net->mib.icmpmsg_statistics);
1763
free_percpu(net->mib.icmp_statistics);
1764
free_percpu(net->mib.udplite_statistics);
1765
free_percpu(net->mib.udp_statistics);
1766
free_percpu(net->mib.net_statistics);
1767
free_percpu(net->mib.ip_statistics);
1768
free_percpu(net->mib.tcp_statistics);
1769
#ifdef CONFIG_MPTCP
1770
/* allocated on demand, see mptcp_init_sock() */
1771
free_percpu(net->mib.mptcp_statistics);
1772
#endif
1773
}
1774
1775
static __net_initdata struct pernet_operations ipv4_mib_ops = {
1776
.init = ipv4_mib_init_net,
1777
.exit = ipv4_mib_exit_net,
1778
};
1779
1780
static int __init init_ipv4_mibs(void)
1781
{
1782
return register_pernet_subsys(&ipv4_mib_ops);
1783
}
1784
1785
static __net_init int inet_init_net(struct net *net)
1786
{
1787
/*
1788
* Set defaults for local port range
1789
*/
1790
net->ipv4.ip_local_ports.range = 60999u << 16 | 32768u;
1791
1792
seqlock_init(&net->ipv4.ping_group_range.lock);
1793
/*
1794
* Sane defaults - nobody may create ping sockets.
1795
* Boot scripts should set this to distro-specific group.
1796
*/
1797
net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1);
1798
net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0);
1799
1800
/* Default values for sysctl-controlled parameters.
1801
* We set them here, in case sysctl is not compiled.
1802
*/
1803
net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
1804
net->ipv4.sysctl_ip_fwd_update_priority = 1;
1805
net->ipv4.sysctl_ip_dynaddr = 0;
1806
net->ipv4.sysctl_ip_early_demux = 1;
1807
net->ipv4.sysctl_udp_early_demux = 1;
1808
net->ipv4.sysctl_tcp_early_demux = 1;
1809
net->ipv4.sysctl_nexthop_compat_mode = 1;
1810
#ifdef CONFIG_SYSCTL
1811
net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
1812
#endif
1813
1814
/* Some igmp sysctl, whose values are always used */
1815
net->ipv4.sysctl_igmp_max_memberships = 20;
1816
net->ipv4.sysctl_igmp_max_msf = 10;
1817
/* IGMP reports for link-local multicast groups are enabled by default */
1818
net->ipv4.sysctl_igmp_llm_reports = 1;
1819
net->ipv4.sysctl_igmp_qrv = 2;
1820
1821
net->ipv4.sysctl_fib_notify_on_flag_change = 0;
1822
1823
return 0;
1824
}
1825
1826
static __net_initdata struct pernet_operations af_inet_ops = {
1827
.init = inet_init_net,
1828
};
1829
1830
static int __init init_inet_pernet_ops(void)
1831
{
1832
return register_pernet_subsys(&af_inet_ops);
1833
}
1834
1835
static int ipv4_proc_init(void);
1836
1837
/*
1838
* IP protocol layer initialiser
1839
*/
1840
1841
1842
static const struct net_offload ipip_offload = {
1843
.callbacks = {
1844
.gso_segment = ipip_gso_segment,
1845
.gro_receive = ipip_gro_receive,
1846
.gro_complete = ipip_gro_complete,
1847
},
1848
};
1849
1850
static int __init ipip_offload_init(void)
1851
{
1852
return inet_add_offload(&ipip_offload, IPPROTO_IPIP);
1853
}
1854
1855
static int __init ipv4_offload_init(void)
1856
{
1857
/*
1858
* Add offloads
1859
*/
1860
if (udpv4_offload_init() < 0)
1861
pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
1862
if (tcpv4_offload_init() < 0)
1863
pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
1864
if (ipip_offload_init() < 0)
1865
pr_crit("%s: Cannot add IPIP protocol offload\n", __func__);
1866
1867
net_hotdata.ip_packet_offload = (struct packet_offload) {
1868
.type = cpu_to_be16(ETH_P_IP),
1869
.callbacks = {
1870
.gso_segment = inet_gso_segment,
1871
.gro_receive = inet_gro_receive,
1872
.gro_complete = inet_gro_complete,
1873
},
1874
};
1875
dev_add_offload(&net_hotdata.ip_packet_offload);
1876
return 0;
1877
}
1878
1879
fs_initcall(ipv4_offload_init);
1880
1881
static struct packet_type ip_packet_type __read_mostly = {
1882
.type = cpu_to_be16(ETH_P_IP),
1883
.func = ip_rcv,
1884
.list_func = ip_list_rcv,
1885
};
1886
1887
static int __init inet_init(void)
1888
{
1889
struct inet_protosw *q;
1890
struct list_head *r;
1891
int rc;
1892
1893
sock_skb_cb_check_size(sizeof(struct inet_skb_parm));
1894
1895
raw_hashinfo_init(&raw_v4_hashinfo);
1896
1897
rc = proto_register(&tcp_prot, 1);
1898
if (rc)
1899
goto out;
1900
1901
rc = proto_register(&udp_prot, 1);
1902
if (rc)
1903
goto out_unregister_tcp_proto;
1904
1905
rc = proto_register(&raw_prot, 1);
1906
if (rc)
1907
goto out_unregister_udp_proto;
1908
1909
rc = proto_register(&ping_prot, 1);
1910
if (rc)
1911
goto out_unregister_raw_proto;
1912
1913
/*
1914
* Tell SOCKET that we are alive...
1915
*/
1916
1917
(void)sock_register(&inet_family_ops);
1918
1919
#ifdef CONFIG_SYSCTL
1920
ip_static_sysctl_init();
1921
#endif
1922
1923
/*
1924
* Add all the base protocols.
1925
*/
1926
1927
if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
1928
pr_crit("%s: Cannot add ICMP protocol\n", __func__);
1929
1930
net_hotdata.udp_protocol = (struct net_protocol) {
1931
.handler = udp_rcv,
1932
.err_handler = udp_err,
1933
.no_policy = 1,
1934
};
1935
if (inet_add_protocol(&net_hotdata.udp_protocol, IPPROTO_UDP) < 0)
1936
pr_crit("%s: Cannot add UDP protocol\n", __func__);
1937
1938
net_hotdata.tcp_protocol = (struct net_protocol) {
1939
.handler = tcp_v4_rcv,
1940
.err_handler = tcp_v4_err,
1941
.no_policy = 1,
1942
.icmp_strict_tag_validation = 1,
1943
};
1944
if (inet_add_protocol(&net_hotdata.tcp_protocol, IPPROTO_TCP) < 0)
1945
pr_crit("%s: Cannot add TCP protocol\n", __func__);
1946
#ifdef CONFIG_IP_MULTICAST
1947
if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
1948
pr_crit("%s: Cannot add IGMP protocol\n", __func__);
1949
#endif
1950
1951
/* Register the socket-side information for inet_create. */
1952
for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
1953
INIT_LIST_HEAD(r);
1954
1955
for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
1956
inet_register_protosw(q);
1957
1958
/*
1959
* Set the ARP module up
1960
*/
1961
1962
arp_init();
1963
1964
/*
1965
* Set the IP module up
1966
*/
1967
1968
ip_init();
1969
1970
/* Initialise per-cpu ipv4 mibs */
1971
if (init_ipv4_mibs())
1972
panic("%s: Cannot init ipv4 mibs\n", __func__);
1973
1974
/* Setup TCP slab cache for open requests. */
1975
tcp_init();
1976
1977
/* Setup UDP memory threshold */
1978
udp_init();
1979
1980
/* Add UDP-Lite (RFC 3828) */
1981
udplite4_register();
1982
1983
raw_init();
1984
1985
ping_init();
1986
1987
/*
1988
* Set the ICMP layer up
1989
*/
1990
1991
if (icmp_init() < 0)
1992
panic("Failed to create the ICMP control socket.\n");
1993
1994
/*
1995
* Initialise the multicast router
1996
*/
1997
#if defined(CONFIG_IP_MROUTE)
1998
if (ip_mr_init())
1999
pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
2000
#endif
2001
2002
if (init_inet_pernet_ops())
2003
pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
2004
2005
ipv4_proc_init();
2006
2007
ipfrag_init();
2008
2009
dev_add_pack(&ip_packet_type);
2010
2011
ip_tunnel_core_init();
2012
2013
rc = 0;
2014
out:
2015
return rc;
2016
out_unregister_raw_proto:
2017
proto_unregister(&raw_prot);
2018
out_unregister_udp_proto:
2019
proto_unregister(&udp_prot);
2020
out_unregister_tcp_proto:
2021
proto_unregister(&tcp_prot);
2022
goto out;
2023
}
2024
2025
fs_initcall(inet_init);
2026
2027
/* ------------------------------------------------------------------------ */
2028
2029
#ifdef CONFIG_PROC_FS
2030
static int __init ipv4_proc_init(void)
2031
{
2032
int rc = 0;
2033
2034
if (raw_proc_init())
2035
goto out_raw;
2036
if (tcp4_proc_init())
2037
goto out_tcp;
2038
if (udp4_proc_init())
2039
goto out_udp;
2040
if (ping_proc_init())
2041
goto out_ping;
2042
if (ip_misc_proc_init())
2043
goto out_misc;
2044
out:
2045
return rc;
2046
out_misc:
2047
ping_proc_exit();
2048
out_ping:
2049
udp4_proc_exit();
2050
out_udp:
2051
tcp4_proc_exit();
2052
out_tcp:
2053
raw_proc_exit();
2054
out_raw:
2055
rc = -ENOMEM;
2056
goto out;
2057
}
2058
2059
#else /* CONFIG_PROC_FS */
2060
static int __init ipv4_proc_init(void)
2061
{
2062
return 0;
2063
}
2064
#endif /* CONFIG_PROC_FS */
2065
2066