Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/ipv4/devinet.c
15109 views
1
/*
2
* NET3 IP device support routines.
3
*
4
* This program is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU General Public License
6
* as published by the Free Software Foundation; either version
7
* 2 of the License, or (at your option) any later version.
8
*
9
* Derived from the IP parts of dev.c 1.0.19
10
* Authors: Ross Biro
11
* Fred N. van Kempen, <[email protected]>
12
* Mark Evans, <[email protected]>
13
*
14
* Additional Authors:
15
* Alan Cox, <[email protected]>
16
* Alexey Kuznetsov, <[email protected]>
17
*
18
* Changes:
19
* Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20
* lists.
21
* Cyrus Durgin: updated for kmod
22
* Matthias Andree: in devinet_ioctl, compare label and
23
* address (4.4BSD alias style support),
24
* fall back to comparing just the label
25
* if no match found.
26
*/
27
28
29
#include <asm/uaccess.h>
30
#include <asm/system.h>
31
#include <linux/bitops.h>
32
#include <linux/capability.h>
33
#include <linux/module.h>
34
#include <linux/types.h>
35
#include <linux/kernel.h>
36
#include <linux/string.h>
37
#include <linux/mm.h>
38
#include <linux/socket.h>
39
#include <linux/sockios.h>
40
#include <linux/in.h>
41
#include <linux/errno.h>
42
#include <linux/interrupt.h>
43
#include <linux/if_addr.h>
44
#include <linux/if_ether.h>
45
#include <linux/inet.h>
46
#include <linux/netdevice.h>
47
#include <linux/etherdevice.h>
48
#include <linux/skbuff.h>
49
#include <linux/init.h>
50
#include <linux/notifier.h>
51
#include <linux/inetdevice.h>
52
#include <linux/igmp.h>
53
#include <linux/slab.h>
54
#include <linux/hash.h>
55
#ifdef CONFIG_SYSCTL
56
#include <linux/sysctl.h>
57
#endif
58
#include <linux/kmod.h>
59
60
#include <net/arp.h>
61
#include <net/ip.h>
62
#include <net/route.h>
63
#include <net/ip_fib.h>
64
#include <net/rtnetlink.h>
65
#include <net/net_namespace.h>
66
67
#include "fib_lookup.h"
68
69
static struct ipv4_devconf ipv4_devconf = {
70
.data = {
71
[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72
[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73
[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74
[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75
},
76
};
77
78
static struct ipv4_devconf ipv4_devconf_dflt = {
79
.data = {
80
[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
81
[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
82
[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
83
[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
84
[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85
},
86
};
87
88
#define IPV4_DEVCONF_DFLT(net, attr) \
89
IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90
91
static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92
[IFA_LOCAL] = { .type = NLA_U32 },
93
[IFA_ADDRESS] = { .type = NLA_U32 },
94
[IFA_BROADCAST] = { .type = NLA_U32 },
95
[IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96
};
97
98
/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
99
* value. So if you change this define, make appropriate changes to
100
* inet_addr_hash as well.
101
*/
102
#define IN4_ADDR_HSIZE 256
103
static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104
static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106
static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
107
{
108
u32 val = (__force u32) addr ^ hash_ptr(net, 8);
109
110
return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
111
(IN4_ADDR_HSIZE - 1));
112
}
113
114
static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115
{
116
unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
117
118
spin_lock(&inet_addr_hash_lock);
119
hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
120
spin_unlock(&inet_addr_hash_lock);
121
}
122
123
static void inet_hash_remove(struct in_ifaddr *ifa)
124
{
125
spin_lock(&inet_addr_hash_lock);
126
hlist_del_init_rcu(&ifa->hash);
127
spin_unlock(&inet_addr_hash_lock);
128
}
129
130
/**
131
* __ip_dev_find - find the first device with a given source address.
132
* @net: the net namespace
133
* @addr: the source address
134
* @devref: if true, take a reference on the found device
135
*
136
* If a caller uses devref=false, it should be protected by RCU, or RTNL
137
*/
138
struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139
{
140
unsigned int hash = inet_addr_hash(net, addr);
141
struct net_device *result = NULL;
142
struct in_ifaddr *ifa;
143
struct hlist_node *node;
144
145
rcu_read_lock();
146
hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
147
struct net_device *dev = ifa->ifa_dev->dev;
148
149
if (!net_eq(dev_net(dev), net))
150
continue;
151
if (ifa->ifa_local == addr) {
152
result = dev;
153
break;
154
}
155
}
156
if (!result) {
157
struct flowi4 fl4 = { .daddr = addr };
158
struct fib_result res = { 0 };
159
struct fib_table *local;
160
161
/* Fallback to FIB local table so that communication
162
* over loopback subnets work.
163
*/
164
local = fib_get_table(net, RT_TABLE_LOCAL);
165
if (local &&
166
!fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167
res.type == RTN_LOCAL)
168
result = FIB_RES_DEV(res);
169
}
170
if (result && devref)
171
dev_hold(result);
172
rcu_read_unlock();
173
return result;
174
}
175
EXPORT_SYMBOL(__ip_dev_find);
176
177
static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179
static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180
static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181
int destroy);
182
#ifdef CONFIG_SYSCTL
183
static void devinet_sysctl_register(struct in_device *idev);
184
static void devinet_sysctl_unregister(struct in_device *idev);
185
#else
186
static inline void devinet_sysctl_register(struct in_device *idev)
187
{
188
}
189
static inline void devinet_sysctl_unregister(struct in_device *idev)
190
{
191
}
192
#endif
193
194
/* Locks all the inet devices. */
195
196
static struct in_ifaddr *inet_alloc_ifa(void)
197
{
198
return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199
}
200
201
static void inet_rcu_free_ifa(struct rcu_head *head)
202
{
203
struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204
if (ifa->ifa_dev)
205
in_dev_put(ifa->ifa_dev);
206
kfree(ifa);
207
}
208
209
static inline void inet_free_ifa(struct in_ifaddr *ifa)
210
{
211
call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212
}
213
214
void in_dev_finish_destroy(struct in_device *idev)
215
{
216
struct net_device *dev = idev->dev;
217
218
WARN_ON(idev->ifa_list);
219
WARN_ON(idev->mc_list);
220
#ifdef NET_REFCNT_DEBUG
221
printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
222
idev, dev ? dev->name : "NIL");
223
#endif
224
dev_put(dev);
225
if (!idev->dead)
226
pr_err("Freeing alive in_device %p\n", idev);
227
else
228
kfree(idev);
229
}
230
EXPORT_SYMBOL(in_dev_finish_destroy);
231
232
static struct in_device *inetdev_init(struct net_device *dev)
233
{
234
struct in_device *in_dev;
235
236
ASSERT_RTNL();
237
238
in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239
if (!in_dev)
240
goto out;
241
memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
242
sizeof(in_dev->cnf));
243
in_dev->cnf.sysctl = NULL;
244
in_dev->dev = dev;
245
in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
246
if (!in_dev->arp_parms)
247
goto out_kfree;
248
if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
249
dev_disable_lro(dev);
250
/* Reference in_dev->dev */
251
dev_hold(dev);
252
/* Account for reference dev->ip_ptr (below) */
253
in_dev_hold(in_dev);
254
255
devinet_sysctl_register(in_dev);
256
ip_mc_init_dev(in_dev);
257
if (dev->flags & IFF_UP)
258
ip_mc_up(in_dev);
259
260
/* we can receive as soon as ip_ptr is set -- do this last */
261
rcu_assign_pointer(dev->ip_ptr, in_dev);
262
out:
263
return in_dev;
264
out_kfree:
265
kfree(in_dev);
266
in_dev = NULL;
267
goto out;
268
}
269
270
static void in_dev_rcu_put(struct rcu_head *head)
271
{
272
struct in_device *idev = container_of(head, struct in_device, rcu_head);
273
in_dev_put(idev);
274
}
275
276
static void inetdev_destroy(struct in_device *in_dev)
277
{
278
struct in_ifaddr *ifa;
279
struct net_device *dev;
280
281
ASSERT_RTNL();
282
283
dev = in_dev->dev;
284
285
in_dev->dead = 1;
286
287
ip_mc_destroy_dev(in_dev);
288
289
while ((ifa = in_dev->ifa_list) != NULL) {
290
inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
291
inet_free_ifa(ifa);
292
}
293
294
rcu_assign_pointer(dev->ip_ptr, NULL);
295
296
devinet_sysctl_unregister(in_dev);
297
neigh_parms_release(&arp_tbl, in_dev->arp_parms);
298
arp_ifdown(dev);
299
300
call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
301
}
302
303
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
304
{
305
rcu_read_lock();
306
for_primary_ifa(in_dev) {
307
if (inet_ifa_match(a, ifa)) {
308
if (!b || inet_ifa_match(b, ifa)) {
309
rcu_read_unlock();
310
return 1;
311
}
312
}
313
} endfor_ifa(in_dev);
314
rcu_read_unlock();
315
return 0;
316
}
317
318
static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
319
int destroy, struct nlmsghdr *nlh, u32 pid)
320
{
321
struct in_ifaddr *promote = NULL;
322
struct in_ifaddr *ifa, *ifa1 = *ifap;
323
struct in_ifaddr *last_prim = in_dev->ifa_list;
324
struct in_ifaddr *prev_prom = NULL;
325
int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
326
327
ASSERT_RTNL();
328
329
/* 1. Deleting primary ifaddr forces deletion all secondaries
330
* unless alias promotion is set
331
**/
332
333
if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
334
struct in_ifaddr **ifap1 = &ifa1->ifa_next;
335
336
while ((ifa = *ifap1) != NULL) {
337
if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
338
ifa1->ifa_scope <= ifa->ifa_scope)
339
last_prim = ifa;
340
341
if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
342
ifa1->ifa_mask != ifa->ifa_mask ||
343
!inet_ifa_match(ifa1->ifa_address, ifa)) {
344
ifap1 = &ifa->ifa_next;
345
prev_prom = ifa;
346
continue;
347
}
348
349
if (!do_promote) {
350
inet_hash_remove(ifa);
351
*ifap1 = ifa->ifa_next;
352
353
rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
354
blocking_notifier_call_chain(&inetaddr_chain,
355
NETDEV_DOWN, ifa);
356
inet_free_ifa(ifa);
357
} else {
358
promote = ifa;
359
break;
360
}
361
}
362
}
363
364
/* On promotion all secondaries from subnet are changing
365
* the primary IP, we must remove all their routes silently
366
* and later to add them back with new prefsrc. Do this
367
* while all addresses are on the device list.
368
*/
369
for (ifa = promote; ifa; ifa = ifa->ifa_next) {
370
if (ifa1->ifa_mask == ifa->ifa_mask &&
371
inet_ifa_match(ifa1->ifa_address, ifa))
372
fib_del_ifaddr(ifa, ifa1);
373
}
374
375
/* 2. Unlink it */
376
377
*ifap = ifa1->ifa_next;
378
inet_hash_remove(ifa1);
379
380
/* 3. Announce address deletion */
381
382
/* Send message first, then call notifier.
383
At first sight, FIB update triggered by notifier
384
will refer to already deleted ifaddr, that could confuse
385
netlink listeners. It is not true: look, gated sees
386
that route deleted and if it still thinks that ifaddr
387
is valid, it will try to restore deleted routes... Grr.
388
So that, this order is correct.
389
*/
390
rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
391
blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
392
393
if (promote) {
394
struct in_ifaddr *next_sec = promote->ifa_next;
395
396
if (prev_prom) {
397
prev_prom->ifa_next = promote->ifa_next;
398
promote->ifa_next = last_prim->ifa_next;
399
last_prim->ifa_next = promote;
400
}
401
402
promote->ifa_flags &= ~IFA_F_SECONDARY;
403
rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
404
blocking_notifier_call_chain(&inetaddr_chain,
405
NETDEV_UP, promote);
406
for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
407
if (ifa1->ifa_mask != ifa->ifa_mask ||
408
!inet_ifa_match(ifa1->ifa_address, ifa))
409
continue;
410
fib_add_ifaddr(ifa);
411
}
412
413
}
414
if (destroy)
415
inet_free_ifa(ifa1);
416
}
417
418
static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
419
int destroy)
420
{
421
__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
422
}
423
424
static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
425
u32 pid)
426
{
427
struct in_device *in_dev = ifa->ifa_dev;
428
struct in_ifaddr *ifa1, **ifap, **last_primary;
429
430
ASSERT_RTNL();
431
432
if (!ifa->ifa_local) {
433
inet_free_ifa(ifa);
434
return 0;
435
}
436
437
ifa->ifa_flags &= ~IFA_F_SECONDARY;
438
last_primary = &in_dev->ifa_list;
439
440
for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
441
ifap = &ifa1->ifa_next) {
442
if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
443
ifa->ifa_scope <= ifa1->ifa_scope)
444
last_primary = &ifa1->ifa_next;
445
if (ifa1->ifa_mask == ifa->ifa_mask &&
446
inet_ifa_match(ifa1->ifa_address, ifa)) {
447
if (ifa1->ifa_local == ifa->ifa_local) {
448
inet_free_ifa(ifa);
449
return -EEXIST;
450
}
451
if (ifa1->ifa_scope != ifa->ifa_scope) {
452
inet_free_ifa(ifa);
453
return -EINVAL;
454
}
455
ifa->ifa_flags |= IFA_F_SECONDARY;
456
}
457
}
458
459
if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
460
net_srandom(ifa->ifa_local);
461
ifap = last_primary;
462
}
463
464
ifa->ifa_next = *ifap;
465
*ifap = ifa;
466
467
inet_hash_insert(dev_net(in_dev->dev), ifa);
468
469
/* Send message first, then call notifier.
470
Notifier will trigger FIB update, so that
471
listeners of netlink will know about new ifaddr */
472
rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
473
blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
474
475
return 0;
476
}
477
478
static int inet_insert_ifa(struct in_ifaddr *ifa)
479
{
480
return __inet_insert_ifa(ifa, NULL, 0);
481
}
482
483
static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
484
{
485
struct in_device *in_dev = __in_dev_get_rtnl(dev);
486
487
ASSERT_RTNL();
488
489
if (!in_dev) {
490
inet_free_ifa(ifa);
491
return -ENOBUFS;
492
}
493
ipv4_devconf_setall(in_dev);
494
if (ifa->ifa_dev != in_dev) {
495
WARN_ON(ifa->ifa_dev);
496
in_dev_hold(in_dev);
497
ifa->ifa_dev = in_dev;
498
}
499
if (ipv4_is_loopback(ifa->ifa_local))
500
ifa->ifa_scope = RT_SCOPE_HOST;
501
return inet_insert_ifa(ifa);
502
}
503
504
/* Caller must hold RCU or RTNL :
505
* We dont take a reference on found in_device
506
*/
507
struct in_device *inetdev_by_index(struct net *net, int ifindex)
508
{
509
struct net_device *dev;
510
struct in_device *in_dev = NULL;
511
512
rcu_read_lock();
513
dev = dev_get_by_index_rcu(net, ifindex);
514
if (dev)
515
in_dev = rcu_dereference_rtnl(dev->ip_ptr);
516
rcu_read_unlock();
517
return in_dev;
518
}
519
EXPORT_SYMBOL(inetdev_by_index);
520
521
/* Called only from RTNL semaphored context. No locks. */
522
523
struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
524
__be32 mask)
525
{
526
ASSERT_RTNL();
527
528
for_primary_ifa(in_dev) {
529
if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
530
return ifa;
531
} endfor_ifa(in_dev);
532
return NULL;
533
}
534
535
static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
536
{
537
struct net *net = sock_net(skb->sk);
538
struct nlattr *tb[IFA_MAX+1];
539
struct in_device *in_dev;
540
struct ifaddrmsg *ifm;
541
struct in_ifaddr *ifa, **ifap;
542
int err = -EINVAL;
543
544
ASSERT_RTNL();
545
546
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
547
if (err < 0)
548
goto errout;
549
550
ifm = nlmsg_data(nlh);
551
in_dev = inetdev_by_index(net, ifm->ifa_index);
552
if (in_dev == NULL) {
553
err = -ENODEV;
554
goto errout;
555
}
556
557
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
558
ifap = &ifa->ifa_next) {
559
if (tb[IFA_LOCAL] &&
560
ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
561
continue;
562
563
if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
564
continue;
565
566
if (tb[IFA_ADDRESS] &&
567
(ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
568
!inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
569
continue;
570
571
__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
572
return 0;
573
}
574
575
err = -EADDRNOTAVAIL;
576
errout:
577
return err;
578
}
579
580
static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
581
{
582
struct nlattr *tb[IFA_MAX+1];
583
struct in_ifaddr *ifa;
584
struct ifaddrmsg *ifm;
585
struct net_device *dev;
586
struct in_device *in_dev;
587
int err;
588
589
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
590
if (err < 0)
591
goto errout;
592
593
ifm = nlmsg_data(nlh);
594
err = -EINVAL;
595
if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
596
goto errout;
597
598
dev = __dev_get_by_index(net, ifm->ifa_index);
599
err = -ENODEV;
600
if (dev == NULL)
601
goto errout;
602
603
in_dev = __in_dev_get_rtnl(dev);
604
err = -ENOBUFS;
605
if (in_dev == NULL)
606
goto errout;
607
608
ifa = inet_alloc_ifa();
609
if (ifa == NULL)
610
/*
611
* A potential indev allocation can be left alive, it stays
612
* assigned to its device and is destroy with it.
613
*/
614
goto errout;
615
616
ipv4_devconf_setall(in_dev);
617
in_dev_hold(in_dev);
618
619
if (tb[IFA_ADDRESS] == NULL)
620
tb[IFA_ADDRESS] = tb[IFA_LOCAL];
621
622
INIT_HLIST_NODE(&ifa->hash);
623
ifa->ifa_prefixlen = ifm->ifa_prefixlen;
624
ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
625
ifa->ifa_flags = ifm->ifa_flags;
626
ifa->ifa_scope = ifm->ifa_scope;
627
ifa->ifa_dev = in_dev;
628
629
ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
630
ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
631
632
if (tb[IFA_BROADCAST])
633
ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
634
635
if (tb[IFA_LABEL])
636
nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
637
else
638
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
639
640
return ifa;
641
642
errout:
643
return ERR_PTR(err);
644
}
645
646
static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
647
{
648
struct net *net = sock_net(skb->sk);
649
struct in_ifaddr *ifa;
650
651
ASSERT_RTNL();
652
653
ifa = rtm_to_ifaddr(net, nlh);
654
if (IS_ERR(ifa))
655
return PTR_ERR(ifa);
656
657
return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
658
}
659
660
/*
661
* Determine a default network mask, based on the IP address.
662
*/
663
664
static inline int inet_abc_len(__be32 addr)
665
{
666
int rc = -1; /* Something else, probably a multicast. */
667
668
if (ipv4_is_zeronet(addr))
669
rc = 0;
670
else {
671
__u32 haddr = ntohl(addr);
672
673
if (IN_CLASSA(haddr))
674
rc = 8;
675
else if (IN_CLASSB(haddr))
676
rc = 16;
677
else if (IN_CLASSC(haddr))
678
rc = 24;
679
}
680
681
return rc;
682
}
683
684
685
int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
686
{
687
struct ifreq ifr;
688
struct sockaddr_in sin_orig;
689
struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
690
struct in_device *in_dev;
691
struct in_ifaddr **ifap = NULL;
692
struct in_ifaddr *ifa = NULL;
693
struct net_device *dev;
694
char *colon;
695
int ret = -EFAULT;
696
int tryaddrmatch = 0;
697
698
/*
699
* Fetch the caller's info block into kernel space
700
*/
701
702
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
703
goto out;
704
ifr.ifr_name[IFNAMSIZ - 1] = 0;
705
706
/* save original address for comparison */
707
memcpy(&sin_orig, sin, sizeof(*sin));
708
709
colon = strchr(ifr.ifr_name, ':');
710
if (colon)
711
*colon = 0;
712
713
dev_load(net, ifr.ifr_name);
714
715
switch (cmd) {
716
case SIOCGIFADDR: /* Get interface address */
717
case SIOCGIFBRDADDR: /* Get the broadcast address */
718
case SIOCGIFDSTADDR: /* Get the destination address */
719
case SIOCGIFNETMASK: /* Get the netmask for the interface */
720
/* Note that these ioctls will not sleep,
721
so that we do not impose a lock.
722
One day we will be forced to put shlock here (I mean SMP)
723
*/
724
tryaddrmatch = (sin_orig.sin_family == AF_INET);
725
memset(sin, 0, sizeof(*sin));
726
sin->sin_family = AF_INET;
727
break;
728
729
case SIOCSIFFLAGS:
730
ret = -EACCES;
731
if (!capable(CAP_NET_ADMIN))
732
goto out;
733
break;
734
case SIOCSIFADDR: /* Set interface address (and family) */
735
case SIOCSIFBRDADDR: /* Set the broadcast address */
736
case SIOCSIFDSTADDR: /* Set the destination address */
737
case SIOCSIFNETMASK: /* Set the netmask for the interface */
738
ret = -EACCES;
739
if (!capable(CAP_NET_ADMIN))
740
goto out;
741
ret = -EINVAL;
742
if (sin->sin_family != AF_INET)
743
goto out;
744
break;
745
default:
746
ret = -EINVAL;
747
goto out;
748
}
749
750
rtnl_lock();
751
752
ret = -ENODEV;
753
dev = __dev_get_by_name(net, ifr.ifr_name);
754
if (!dev)
755
goto done;
756
757
if (colon)
758
*colon = ':';
759
760
in_dev = __in_dev_get_rtnl(dev);
761
if (in_dev) {
762
if (tryaddrmatch) {
763
/* Matthias Andree */
764
/* compare label and address (4.4BSD style) */
765
/* note: we only do this for a limited set of ioctls
766
and only if the original address family was AF_INET.
767
This is checked above. */
768
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
769
ifap = &ifa->ifa_next) {
770
if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
771
sin_orig.sin_addr.s_addr ==
772
ifa->ifa_local) {
773
break; /* found */
774
}
775
}
776
}
777
/* we didn't get a match, maybe the application is
778
4.3BSD-style and passed in junk so we fall back to
779
comparing just the label */
780
if (!ifa) {
781
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
782
ifap = &ifa->ifa_next)
783
if (!strcmp(ifr.ifr_name, ifa->ifa_label))
784
break;
785
}
786
}
787
788
ret = -EADDRNOTAVAIL;
789
if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
790
goto done;
791
792
switch (cmd) {
793
case SIOCGIFADDR: /* Get interface address */
794
sin->sin_addr.s_addr = ifa->ifa_local;
795
goto rarok;
796
797
case SIOCGIFBRDADDR: /* Get the broadcast address */
798
sin->sin_addr.s_addr = ifa->ifa_broadcast;
799
goto rarok;
800
801
case SIOCGIFDSTADDR: /* Get the destination address */
802
sin->sin_addr.s_addr = ifa->ifa_address;
803
goto rarok;
804
805
case SIOCGIFNETMASK: /* Get the netmask for the interface */
806
sin->sin_addr.s_addr = ifa->ifa_mask;
807
goto rarok;
808
809
case SIOCSIFFLAGS:
810
if (colon) {
811
ret = -EADDRNOTAVAIL;
812
if (!ifa)
813
break;
814
ret = 0;
815
if (!(ifr.ifr_flags & IFF_UP))
816
inet_del_ifa(in_dev, ifap, 1);
817
break;
818
}
819
ret = dev_change_flags(dev, ifr.ifr_flags);
820
break;
821
822
case SIOCSIFADDR: /* Set interface address (and family) */
823
ret = -EINVAL;
824
if (inet_abc_len(sin->sin_addr.s_addr) < 0)
825
break;
826
827
if (!ifa) {
828
ret = -ENOBUFS;
829
ifa = inet_alloc_ifa();
830
INIT_HLIST_NODE(&ifa->hash);
831
if (!ifa)
832
break;
833
if (colon)
834
memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
835
else
836
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
837
} else {
838
ret = 0;
839
if (ifa->ifa_local == sin->sin_addr.s_addr)
840
break;
841
inet_del_ifa(in_dev, ifap, 0);
842
ifa->ifa_broadcast = 0;
843
ifa->ifa_scope = 0;
844
}
845
846
ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
847
848
if (!(dev->flags & IFF_POINTOPOINT)) {
849
ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
850
ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
851
if ((dev->flags & IFF_BROADCAST) &&
852
ifa->ifa_prefixlen < 31)
853
ifa->ifa_broadcast = ifa->ifa_address |
854
~ifa->ifa_mask;
855
} else {
856
ifa->ifa_prefixlen = 32;
857
ifa->ifa_mask = inet_make_mask(32);
858
}
859
ret = inet_set_ifa(dev, ifa);
860
break;
861
862
case SIOCSIFBRDADDR: /* Set the broadcast address */
863
ret = 0;
864
if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
865
inet_del_ifa(in_dev, ifap, 0);
866
ifa->ifa_broadcast = sin->sin_addr.s_addr;
867
inet_insert_ifa(ifa);
868
}
869
break;
870
871
case SIOCSIFDSTADDR: /* Set the destination address */
872
ret = 0;
873
if (ifa->ifa_address == sin->sin_addr.s_addr)
874
break;
875
ret = -EINVAL;
876
if (inet_abc_len(sin->sin_addr.s_addr) < 0)
877
break;
878
ret = 0;
879
inet_del_ifa(in_dev, ifap, 0);
880
ifa->ifa_address = sin->sin_addr.s_addr;
881
inet_insert_ifa(ifa);
882
break;
883
884
case SIOCSIFNETMASK: /* Set the netmask for the interface */
885
886
/*
887
* The mask we set must be legal.
888
*/
889
ret = -EINVAL;
890
if (bad_mask(sin->sin_addr.s_addr, 0))
891
break;
892
ret = 0;
893
if (ifa->ifa_mask != sin->sin_addr.s_addr) {
894
__be32 old_mask = ifa->ifa_mask;
895
inet_del_ifa(in_dev, ifap, 0);
896
ifa->ifa_mask = sin->sin_addr.s_addr;
897
ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
898
899
/* See if current broadcast address matches
900
* with current netmask, then recalculate
901
* the broadcast address. Otherwise it's a
902
* funny address, so don't touch it since
903
* the user seems to know what (s)he's doing...
904
*/
905
if ((dev->flags & IFF_BROADCAST) &&
906
(ifa->ifa_prefixlen < 31) &&
907
(ifa->ifa_broadcast ==
908
(ifa->ifa_local|~old_mask))) {
909
ifa->ifa_broadcast = (ifa->ifa_local |
910
~sin->sin_addr.s_addr);
911
}
912
inet_insert_ifa(ifa);
913
}
914
break;
915
}
916
done:
917
rtnl_unlock();
918
out:
919
return ret;
920
rarok:
921
rtnl_unlock();
922
ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
923
goto out;
924
}
925
926
static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
927
{
928
struct in_device *in_dev = __in_dev_get_rtnl(dev);
929
struct in_ifaddr *ifa;
930
struct ifreq ifr;
931
int done = 0;
932
933
if (!in_dev)
934
goto out;
935
936
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
937
if (!buf) {
938
done += sizeof(ifr);
939
continue;
940
}
941
if (len < (int) sizeof(ifr))
942
break;
943
memset(&ifr, 0, sizeof(struct ifreq));
944
if (ifa->ifa_label)
945
strcpy(ifr.ifr_name, ifa->ifa_label);
946
else
947
strcpy(ifr.ifr_name, dev->name);
948
949
(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
950
(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
951
ifa->ifa_local;
952
953
if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
954
done = -EFAULT;
955
break;
956
}
957
buf += sizeof(struct ifreq);
958
len -= sizeof(struct ifreq);
959
done += sizeof(struct ifreq);
960
}
961
out:
962
return done;
963
}
964
965
__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
966
{
967
__be32 addr = 0;
968
struct in_device *in_dev;
969
struct net *net = dev_net(dev);
970
971
rcu_read_lock();
972
in_dev = __in_dev_get_rcu(dev);
973
if (!in_dev)
974
goto no_in_dev;
975
976
for_primary_ifa(in_dev) {
977
if (ifa->ifa_scope > scope)
978
continue;
979
if (!dst || inet_ifa_match(dst, ifa)) {
980
addr = ifa->ifa_local;
981
break;
982
}
983
if (!addr)
984
addr = ifa->ifa_local;
985
} endfor_ifa(in_dev);
986
987
if (addr)
988
goto out_unlock;
989
no_in_dev:
990
991
/* Not loopback addresses on loopback should be preferred
992
in this case. It is importnat that lo is the first interface
993
in dev_base list.
994
*/
995
for_each_netdev_rcu(net, dev) {
996
in_dev = __in_dev_get_rcu(dev);
997
if (!in_dev)
998
continue;
999
1000
for_primary_ifa(in_dev) {
1001
if (ifa->ifa_scope != RT_SCOPE_LINK &&
1002
ifa->ifa_scope <= scope) {
1003
addr = ifa->ifa_local;
1004
goto out_unlock;
1005
}
1006
} endfor_ifa(in_dev);
1007
}
1008
out_unlock:
1009
rcu_read_unlock();
1010
return addr;
1011
}
1012
EXPORT_SYMBOL(inet_select_addr);
1013
1014
static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1015
__be32 local, int scope)
1016
{
1017
int same = 0;
1018
__be32 addr = 0;
1019
1020
for_ifa(in_dev) {
1021
if (!addr &&
1022
(local == ifa->ifa_local || !local) &&
1023
ifa->ifa_scope <= scope) {
1024
addr = ifa->ifa_local;
1025
if (same)
1026
break;
1027
}
1028
if (!same) {
1029
same = (!local || inet_ifa_match(local, ifa)) &&
1030
(!dst || inet_ifa_match(dst, ifa));
1031
if (same && addr) {
1032
if (local || !dst)
1033
break;
1034
/* Is the selected addr into dst subnet? */
1035
if (inet_ifa_match(addr, ifa))
1036
break;
1037
/* No, then can we use new local src? */
1038
if (ifa->ifa_scope <= scope) {
1039
addr = ifa->ifa_local;
1040
break;
1041
}
1042
/* search for large dst subnet for addr */
1043
same = 0;
1044
}
1045
}
1046
} endfor_ifa(in_dev);
1047
1048
return same ? addr : 0;
1049
}
1050
1051
/*
1052
* Confirm that local IP address exists using wildcards:
1053
* - in_dev: only on this interface, 0=any interface
1054
* - dst: only in the same subnet as dst, 0=any dst
1055
* - local: address, 0=autoselect the local address
1056
* - scope: maximum allowed scope value for the local address
1057
*/
1058
__be32 inet_confirm_addr(struct in_device *in_dev,
1059
__be32 dst, __be32 local, int scope)
1060
{
1061
__be32 addr = 0;
1062
struct net_device *dev;
1063
struct net *net;
1064
1065
if (scope != RT_SCOPE_LINK)
1066
return confirm_addr_indev(in_dev, dst, local, scope);
1067
1068
net = dev_net(in_dev->dev);
1069
rcu_read_lock();
1070
for_each_netdev_rcu(net, dev) {
1071
in_dev = __in_dev_get_rcu(dev);
1072
if (in_dev) {
1073
addr = confirm_addr_indev(in_dev, dst, local, scope);
1074
if (addr)
1075
break;
1076
}
1077
}
1078
rcu_read_unlock();
1079
1080
return addr;
1081
}
1082
1083
/*
1084
* Device notifier
1085
*/
1086
1087
int register_inetaddr_notifier(struct notifier_block *nb)
1088
{
1089
return blocking_notifier_chain_register(&inetaddr_chain, nb);
1090
}
1091
EXPORT_SYMBOL(register_inetaddr_notifier);
1092
1093
int unregister_inetaddr_notifier(struct notifier_block *nb)
1094
{
1095
return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1096
}
1097
EXPORT_SYMBOL(unregister_inetaddr_notifier);
1098
1099
/* Rename ifa_labels for a device name change. Make some effort to preserve
1100
* existing alias numbering and to create unique labels if possible.
1101
*/
1102
static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1103
{
1104
struct in_ifaddr *ifa;
1105
int named = 0;
1106
1107
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1108
char old[IFNAMSIZ], *dot;
1109
1110
memcpy(old, ifa->ifa_label, IFNAMSIZ);
1111
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1112
if (named++ == 0)
1113
goto skip;
1114
dot = strchr(old, ':');
1115
if (dot == NULL) {
1116
sprintf(old, ":%d", named);
1117
dot = old;
1118
}
1119
if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1120
strcat(ifa->ifa_label, dot);
1121
else
1122
strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1123
skip:
1124
rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1125
}
1126
}
1127
1128
static inline bool inetdev_valid_mtu(unsigned mtu)
1129
{
1130
return mtu >= 68;
1131
}
1132
1133
static void inetdev_send_gratuitous_arp(struct net_device *dev,
1134
struct in_device *in_dev)
1135
1136
{
1137
struct in_ifaddr *ifa = in_dev->ifa_list;
1138
1139
if (!ifa)
1140
return;
1141
1142
arp_send(ARPOP_REQUEST, ETH_P_ARP,
1143
ifa->ifa_local, dev,
1144
ifa->ifa_local, NULL,
1145
dev->dev_addr, NULL);
1146
}
1147
1148
/* Called only under RTNL semaphore */
1149
1150
static int inetdev_event(struct notifier_block *this, unsigned long event,
1151
void *ptr)
1152
{
1153
struct net_device *dev = ptr;
1154
struct in_device *in_dev = __in_dev_get_rtnl(dev);
1155
1156
ASSERT_RTNL();
1157
1158
if (!in_dev) {
1159
if (event == NETDEV_REGISTER) {
1160
in_dev = inetdev_init(dev);
1161
if (!in_dev)
1162
return notifier_from_errno(-ENOMEM);
1163
if (dev->flags & IFF_LOOPBACK) {
1164
IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1165
IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1166
}
1167
} else if (event == NETDEV_CHANGEMTU) {
1168
/* Re-enabling IP */
1169
if (inetdev_valid_mtu(dev->mtu))
1170
in_dev = inetdev_init(dev);
1171
}
1172
goto out;
1173
}
1174
1175
switch (event) {
1176
case NETDEV_REGISTER:
1177
printk(KERN_DEBUG "inetdev_event: bug\n");
1178
rcu_assign_pointer(dev->ip_ptr, NULL);
1179
break;
1180
case NETDEV_UP:
1181
if (!inetdev_valid_mtu(dev->mtu))
1182
break;
1183
if (dev->flags & IFF_LOOPBACK) {
1184
struct in_ifaddr *ifa = inet_alloc_ifa();
1185
1186
if (ifa) {
1187
INIT_HLIST_NODE(&ifa->hash);
1188
ifa->ifa_local =
1189
ifa->ifa_address = htonl(INADDR_LOOPBACK);
1190
ifa->ifa_prefixlen = 8;
1191
ifa->ifa_mask = inet_make_mask(8);
1192
in_dev_hold(in_dev);
1193
ifa->ifa_dev = in_dev;
1194
ifa->ifa_scope = RT_SCOPE_HOST;
1195
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1196
inet_insert_ifa(ifa);
1197
}
1198
}
1199
ip_mc_up(in_dev);
1200
/* fall through */
1201
case NETDEV_CHANGEADDR:
1202
if (!IN_DEV_ARP_NOTIFY(in_dev))
1203
break;
1204
/* fall through */
1205
case NETDEV_NOTIFY_PEERS:
1206
/* Send gratuitous ARP to notify of link change */
1207
inetdev_send_gratuitous_arp(dev, in_dev);
1208
break;
1209
case NETDEV_DOWN:
1210
ip_mc_down(in_dev);
1211
break;
1212
case NETDEV_PRE_TYPE_CHANGE:
1213
ip_mc_unmap(in_dev);
1214
break;
1215
case NETDEV_POST_TYPE_CHANGE:
1216
ip_mc_remap(in_dev);
1217
break;
1218
case NETDEV_CHANGEMTU:
1219
if (inetdev_valid_mtu(dev->mtu))
1220
break;
1221
/* disable IP when MTU is not enough */
1222
case NETDEV_UNREGISTER:
1223
inetdev_destroy(in_dev);
1224
break;
1225
case NETDEV_CHANGENAME:
1226
/* Do not notify about label change, this event is
1227
* not interesting to applications using netlink.
1228
*/
1229
inetdev_changename(dev, in_dev);
1230
1231
devinet_sysctl_unregister(in_dev);
1232
devinet_sysctl_register(in_dev);
1233
break;
1234
}
1235
out:
1236
return NOTIFY_DONE;
1237
}
1238
1239
static struct notifier_block ip_netdev_notifier = {
1240
.notifier_call = inetdev_event,
1241
};
1242
1243
static inline size_t inet_nlmsg_size(void)
1244
{
1245
return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1246
+ nla_total_size(4) /* IFA_ADDRESS */
1247
+ nla_total_size(4) /* IFA_LOCAL */
1248
+ nla_total_size(4) /* IFA_BROADCAST */
1249
+ nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1250
}
1251
1252
static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1253
u32 pid, u32 seq, int event, unsigned int flags)
1254
{
1255
struct ifaddrmsg *ifm;
1256
struct nlmsghdr *nlh;
1257
1258
nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1259
if (nlh == NULL)
1260
return -EMSGSIZE;
1261
1262
ifm = nlmsg_data(nlh);
1263
ifm->ifa_family = AF_INET;
1264
ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1265
ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1266
ifm->ifa_scope = ifa->ifa_scope;
1267
ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1268
1269
if (ifa->ifa_address)
1270
NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1271
1272
if (ifa->ifa_local)
1273
NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1274
1275
if (ifa->ifa_broadcast)
1276
NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1277
1278
if (ifa->ifa_label[0])
1279
NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1280
1281
return nlmsg_end(skb, nlh);
1282
1283
nla_put_failure:
1284
nlmsg_cancel(skb, nlh);
1285
return -EMSGSIZE;
1286
}
1287
1288
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1289
{
1290
struct net *net = sock_net(skb->sk);
1291
int h, s_h;
1292
int idx, s_idx;
1293
int ip_idx, s_ip_idx;
1294
struct net_device *dev;
1295
struct in_device *in_dev;
1296
struct in_ifaddr *ifa;
1297
struct hlist_head *head;
1298
struct hlist_node *node;
1299
1300
s_h = cb->args[0];
1301
s_idx = idx = cb->args[1];
1302
s_ip_idx = ip_idx = cb->args[2];
1303
1304
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1305
idx = 0;
1306
head = &net->dev_index_head[h];
1307
rcu_read_lock();
1308
hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1309
if (idx < s_idx)
1310
goto cont;
1311
if (h > s_h || idx > s_idx)
1312
s_ip_idx = 0;
1313
in_dev = __in_dev_get_rcu(dev);
1314
if (!in_dev)
1315
goto cont;
1316
1317
for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1318
ifa = ifa->ifa_next, ip_idx++) {
1319
if (ip_idx < s_ip_idx)
1320
continue;
1321
if (inet_fill_ifaddr(skb, ifa,
1322
NETLINK_CB(cb->skb).pid,
1323
cb->nlh->nlmsg_seq,
1324
RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1325
rcu_read_unlock();
1326
goto done;
1327
}
1328
}
1329
cont:
1330
idx++;
1331
}
1332
rcu_read_unlock();
1333
}
1334
1335
done:
1336
cb->args[0] = h;
1337
cb->args[1] = idx;
1338
cb->args[2] = ip_idx;
1339
1340
return skb->len;
1341
}
1342
1343
static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1344
u32 pid)
1345
{
1346
struct sk_buff *skb;
1347
u32 seq = nlh ? nlh->nlmsg_seq : 0;
1348
int err = -ENOBUFS;
1349
struct net *net;
1350
1351
net = dev_net(ifa->ifa_dev->dev);
1352
skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1353
if (skb == NULL)
1354
goto errout;
1355
1356
err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1357
if (err < 0) {
1358
/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1359
WARN_ON(err == -EMSGSIZE);
1360
kfree_skb(skb);
1361
goto errout;
1362
}
1363
rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1364
return;
1365
errout:
1366
if (err < 0)
1367
rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1368
}
1369
1370
static size_t inet_get_link_af_size(const struct net_device *dev)
1371
{
1372
struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1373
1374
if (!in_dev)
1375
return 0;
1376
1377
return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1378
}
1379
1380
static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1381
{
1382
struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1383
struct nlattr *nla;
1384
int i;
1385
1386
if (!in_dev)
1387
return -ENODATA;
1388
1389
nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1390
if (nla == NULL)
1391
return -EMSGSIZE;
1392
1393
for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1394
((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1395
1396
return 0;
1397
}
1398
1399
static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1400
[IFLA_INET_CONF] = { .type = NLA_NESTED },
1401
};
1402
1403
static int inet_validate_link_af(const struct net_device *dev,
1404
const struct nlattr *nla)
1405
{
1406
struct nlattr *a, *tb[IFLA_INET_MAX+1];
1407
int err, rem;
1408
1409
if (dev && !__in_dev_get_rtnl(dev))
1410
return -EAFNOSUPPORT;
1411
1412
err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1413
if (err < 0)
1414
return err;
1415
1416
if (tb[IFLA_INET_CONF]) {
1417
nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1418
int cfgid = nla_type(a);
1419
1420
if (nla_len(a) < 4)
1421
return -EINVAL;
1422
1423
if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1424
return -EINVAL;
1425
}
1426
}
1427
1428
return 0;
1429
}
1430
1431
static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1432
{
1433
struct in_device *in_dev = __in_dev_get_rtnl(dev);
1434
struct nlattr *a, *tb[IFLA_INET_MAX+1];
1435
int rem;
1436
1437
if (!in_dev)
1438
return -EAFNOSUPPORT;
1439
1440
if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1441
BUG();
1442
1443
if (tb[IFLA_INET_CONF]) {
1444
nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1445
ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1446
}
1447
1448
return 0;
1449
}
1450
1451
#ifdef CONFIG_SYSCTL
1452
1453
static void devinet_copy_dflt_conf(struct net *net, int i)
1454
{
1455
struct net_device *dev;
1456
1457
rcu_read_lock();
1458
for_each_netdev_rcu(net, dev) {
1459
struct in_device *in_dev;
1460
1461
in_dev = __in_dev_get_rcu(dev);
1462
if (in_dev && !test_bit(i, in_dev->cnf.state))
1463
in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1464
}
1465
rcu_read_unlock();
1466
}
1467
1468
/* called with RTNL locked */
1469
static void inet_forward_change(struct net *net)
1470
{
1471
struct net_device *dev;
1472
int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1473
1474
IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1475
IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1476
1477
for_each_netdev(net, dev) {
1478
struct in_device *in_dev;
1479
if (on)
1480
dev_disable_lro(dev);
1481
rcu_read_lock();
1482
in_dev = __in_dev_get_rcu(dev);
1483
if (in_dev)
1484
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1485
rcu_read_unlock();
1486
}
1487
}
1488
1489
static int devinet_conf_proc(ctl_table *ctl, int write,
1490
void __user *buffer,
1491
size_t *lenp, loff_t *ppos)
1492
{
1493
int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1494
1495
if (write) {
1496
struct ipv4_devconf *cnf = ctl->extra1;
1497
struct net *net = ctl->extra2;
1498
int i = (int *)ctl->data - cnf->data;
1499
1500
set_bit(i, cnf->state);
1501
1502
if (cnf == net->ipv4.devconf_dflt)
1503
devinet_copy_dflt_conf(net, i);
1504
}
1505
1506
return ret;
1507
}
1508
1509
static int devinet_sysctl_forward(ctl_table *ctl, int write,
1510
void __user *buffer,
1511
size_t *lenp, loff_t *ppos)
1512
{
1513
int *valp = ctl->data;
1514
int val = *valp;
1515
loff_t pos = *ppos;
1516
int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1517
1518
if (write && *valp != val) {
1519
struct net *net = ctl->extra2;
1520
1521
if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1522
if (!rtnl_trylock()) {
1523
/* Restore the original values before restarting */
1524
*valp = val;
1525
*ppos = pos;
1526
return restart_syscall();
1527
}
1528
if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1529
inet_forward_change(net);
1530
} else if (*valp) {
1531
struct ipv4_devconf *cnf = ctl->extra1;
1532
struct in_device *idev =
1533
container_of(cnf, struct in_device, cnf);
1534
dev_disable_lro(idev->dev);
1535
}
1536
rtnl_unlock();
1537
rt_cache_flush(net, 0);
1538
}
1539
}
1540
1541
return ret;
1542
}
1543
1544
static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1545
void __user *buffer,
1546
size_t *lenp, loff_t *ppos)
1547
{
1548
int *valp = ctl->data;
1549
int val = *valp;
1550
int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1551
struct net *net = ctl->extra2;
1552
1553
if (write && *valp != val)
1554
rt_cache_flush(net, 0);
1555
1556
return ret;
1557
}
1558
1559
#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1560
{ \
1561
.procname = name, \
1562
.data = ipv4_devconf.data + \
1563
IPV4_DEVCONF_ ## attr - 1, \
1564
.maxlen = sizeof(int), \
1565
.mode = mval, \
1566
.proc_handler = proc, \
1567
.extra1 = &ipv4_devconf, \
1568
}
1569
1570
#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1571
DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1572
1573
#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1574
DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1575
1576
#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1577
DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1578
1579
#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1580
DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1581
1582
static struct devinet_sysctl_table {
1583
struct ctl_table_header *sysctl_header;
1584
struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1585
char *dev_name;
1586
} devinet_sysctl = {
1587
.devinet_vars = {
1588
DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1589
devinet_sysctl_forward),
1590
DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1591
1592
DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1593
DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1594
DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1595
DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1596
DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1597
DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1598
"accept_source_route"),
1599
DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1600
DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1601
DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1602
DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1603
DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1604
DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1605
DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1606
DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1607
DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1608
DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1609
DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1610
DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1611
DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1612
1613
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1614
DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1615
DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1616
"force_igmp_version"),
1617
DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1618
"promote_secondaries"),
1619
},
1620
};
1621
1622
static int __devinet_sysctl_register(struct net *net, char *dev_name,
1623
struct ipv4_devconf *p)
1624
{
1625
int i;
1626
struct devinet_sysctl_table *t;
1627
1628
#define DEVINET_CTL_PATH_DEV 3
1629
1630
struct ctl_path devinet_ctl_path[] = {
1631
{ .procname = "net", },
1632
{ .procname = "ipv4", },
1633
{ .procname = "conf", },
1634
{ /* to be set */ },
1635
{ },
1636
};
1637
1638
t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1639
if (!t)
1640
goto out;
1641
1642
for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1643
t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1644
t->devinet_vars[i].extra1 = p;
1645
t->devinet_vars[i].extra2 = net;
1646
}
1647
1648
/*
1649
* Make a copy of dev_name, because '.procname' is regarded as const
1650
* by sysctl and we wouldn't want anyone to change it under our feet
1651
* (see SIOCSIFNAME).
1652
*/
1653
t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1654
if (!t->dev_name)
1655
goto free;
1656
1657
devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1658
1659
t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1660
t->devinet_vars);
1661
if (!t->sysctl_header)
1662
goto free_procname;
1663
1664
p->sysctl = t;
1665
return 0;
1666
1667
free_procname:
1668
kfree(t->dev_name);
1669
free:
1670
kfree(t);
1671
out:
1672
return -ENOBUFS;
1673
}
1674
1675
static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1676
{
1677
struct devinet_sysctl_table *t = cnf->sysctl;
1678
1679
if (t == NULL)
1680
return;
1681
1682
cnf->sysctl = NULL;
1683
unregister_net_sysctl_table(t->sysctl_header);
1684
kfree(t->dev_name);
1685
kfree(t);
1686
}
1687
1688
static void devinet_sysctl_register(struct in_device *idev)
1689
{
1690
neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1691
__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1692
&idev->cnf);
1693
}
1694
1695
static void devinet_sysctl_unregister(struct in_device *idev)
1696
{
1697
__devinet_sysctl_unregister(&idev->cnf);
1698
neigh_sysctl_unregister(idev->arp_parms);
1699
}
1700
1701
static struct ctl_table ctl_forward_entry[] = {
1702
{
1703
.procname = "ip_forward",
1704
.data = &ipv4_devconf.data[
1705
IPV4_DEVCONF_FORWARDING - 1],
1706
.maxlen = sizeof(int),
1707
.mode = 0644,
1708
.proc_handler = devinet_sysctl_forward,
1709
.extra1 = &ipv4_devconf,
1710
.extra2 = &init_net,
1711
},
1712
{ },
1713
};
1714
1715
static __net_initdata struct ctl_path net_ipv4_path[] = {
1716
{ .procname = "net", },
1717
{ .procname = "ipv4", },
1718
{ },
1719
};
1720
#endif
1721
1722
static __net_init int devinet_init_net(struct net *net)
1723
{
1724
int err;
1725
struct ipv4_devconf *all, *dflt;
1726
#ifdef CONFIG_SYSCTL
1727
struct ctl_table *tbl = ctl_forward_entry;
1728
struct ctl_table_header *forw_hdr;
1729
#endif
1730
1731
err = -ENOMEM;
1732
all = &ipv4_devconf;
1733
dflt = &ipv4_devconf_dflt;
1734
1735
if (!net_eq(net, &init_net)) {
1736
all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1737
if (all == NULL)
1738
goto err_alloc_all;
1739
1740
dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1741
if (dflt == NULL)
1742
goto err_alloc_dflt;
1743
1744
#ifdef CONFIG_SYSCTL
1745
tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1746
if (tbl == NULL)
1747
goto err_alloc_ctl;
1748
1749
tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1750
tbl[0].extra1 = all;
1751
tbl[0].extra2 = net;
1752
#endif
1753
}
1754
1755
#ifdef CONFIG_SYSCTL
1756
err = __devinet_sysctl_register(net, "all", all);
1757
if (err < 0)
1758
goto err_reg_all;
1759
1760
err = __devinet_sysctl_register(net, "default", dflt);
1761
if (err < 0)
1762
goto err_reg_dflt;
1763
1764
err = -ENOMEM;
1765
forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1766
if (forw_hdr == NULL)
1767
goto err_reg_ctl;
1768
net->ipv4.forw_hdr = forw_hdr;
1769
#endif
1770
1771
net->ipv4.devconf_all = all;
1772
net->ipv4.devconf_dflt = dflt;
1773
return 0;
1774
1775
#ifdef CONFIG_SYSCTL
1776
err_reg_ctl:
1777
__devinet_sysctl_unregister(dflt);
1778
err_reg_dflt:
1779
__devinet_sysctl_unregister(all);
1780
err_reg_all:
1781
if (tbl != ctl_forward_entry)
1782
kfree(tbl);
1783
err_alloc_ctl:
1784
#endif
1785
if (dflt != &ipv4_devconf_dflt)
1786
kfree(dflt);
1787
err_alloc_dflt:
1788
if (all != &ipv4_devconf)
1789
kfree(all);
1790
err_alloc_all:
1791
return err;
1792
}
1793
1794
static __net_exit void devinet_exit_net(struct net *net)
1795
{
1796
#ifdef CONFIG_SYSCTL
1797
struct ctl_table *tbl;
1798
1799
tbl = net->ipv4.forw_hdr->ctl_table_arg;
1800
unregister_net_sysctl_table(net->ipv4.forw_hdr);
1801
__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1802
__devinet_sysctl_unregister(net->ipv4.devconf_all);
1803
kfree(tbl);
1804
#endif
1805
kfree(net->ipv4.devconf_dflt);
1806
kfree(net->ipv4.devconf_all);
1807
}
1808
1809
static __net_initdata struct pernet_operations devinet_ops = {
1810
.init = devinet_init_net,
1811
.exit = devinet_exit_net,
1812
};
1813
1814
static struct rtnl_af_ops inet_af_ops = {
1815
.family = AF_INET,
1816
.fill_link_af = inet_fill_link_af,
1817
.get_link_af_size = inet_get_link_af_size,
1818
.validate_link_af = inet_validate_link_af,
1819
.set_link_af = inet_set_link_af,
1820
};
1821
1822
void __init devinet_init(void)
1823
{
1824
int i;
1825
1826
for (i = 0; i < IN4_ADDR_HSIZE; i++)
1827
INIT_HLIST_HEAD(&inet_addr_lst[i]);
1828
1829
register_pernet_subsys(&devinet_ops);
1830
1831
register_gifconf(PF_INET, inet_gifconf);
1832
register_netdevice_notifier(&ip_netdev_notifier);
1833
1834
rtnl_af_register(&inet_af_ops);
1835
1836
rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1837
rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1838
rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1839
}
1840
1841
1842