Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/ipv6/route.c
15109 views
1
/*
2
* Linux INET6 implementation
3
* FIB front-end.
4
*
5
* Authors:
6
* Pedro Roque <[email protected]>
7
*
8
* This program is free software; you can redistribute it and/or
9
* modify it under the terms of the GNU General Public License
10
* as published by the Free Software Foundation; either version
11
* 2 of the License, or (at your option) any later version.
12
*/
13
14
/* Changes:
15
*
16
* YOSHIFUJI Hideaki @USAGI
17
* reworked default router selection.
18
* - respect outgoing interface
19
* - select from (probably) reachable routers (i.e.
20
* routers in REACHABLE, STALE, DELAY or PROBE states).
21
* - always select the same router if it is (probably)
22
* reachable. otherwise, round-robin the list.
23
* Ville Nuorvala
24
* Fixed routing subtrees.
25
*/
26
27
#include <linux/capability.h>
28
#include <linux/errno.h>
29
#include <linux/types.h>
30
#include <linux/times.h>
31
#include <linux/socket.h>
32
#include <linux/sockios.h>
33
#include <linux/net.h>
34
#include <linux/route.h>
35
#include <linux/netdevice.h>
36
#include <linux/in6.h>
37
#include <linux/mroute6.h>
38
#include <linux/init.h>
39
#include <linux/if_arp.h>
40
#include <linux/proc_fs.h>
41
#include <linux/seq_file.h>
42
#include <linux/nsproxy.h>
43
#include <linux/slab.h>
44
#include <net/net_namespace.h>
45
#include <net/snmp.h>
46
#include <net/ipv6.h>
47
#include <net/ip6_fib.h>
48
#include <net/ip6_route.h>
49
#include <net/ndisc.h>
50
#include <net/addrconf.h>
51
#include <net/tcp.h>
52
#include <linux/rtnetlink.h>
53
#include <net/dst.h>
54
#include <net/xfrm.h>
55
#include <net/netevent.h>
56
#include <net/netlink.h>
57
58
#include <asm/uaccess.h>
59
60
#ifdef CONFIG_SYSCTL
61
#include <linux/sysctl.h>
62
#endif
63
64
/* Set to 3 to get tracing. */
65
#define RT6_DEBUG 2
66
67
#if RT6_DEBUG >= 3
68
#define RDBG(x) printk x
69
#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70
#else
71
#define RDBG(x)
72
#define RT6_TRACE(x...) do { ; } while (0)
73
#endif
74
75
static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
78
static unsigned int ip6_default_mtu(const struct dst_entry *dst);
79
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80
static void ip6_dst_destroy(struct dst_entry *);
81
static void ip6_dst_ifdown(struct dst_entry *,
82
struct net_device *dev, int how);
83
static int ip6_dst_gc(struct dst_ops *ops);
84
85
static int ip6_pkt_discard(struct sk_buff *skb);
86
static int ip6_pkt_discard_out(struct sk_buff *skb);
87
static void ip6_link_failure(struct sk_buff *skb);
88
static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
90
#ifdef CONFIG_IPV6_ROUTE_INFO
91
static struct rt6_info *rt6_add_route_info(struct net *net,
92
const struct in6_addr *prefix, int prefixlen,
93
const struct in6_addr *gwaddr, int ifindex,
94
unsigned pref);
95
static struct rt6_info *rt6_get_route_info(struct net *net,
96
const struct in6_addr *prefix, int prefixlen,
97
const struct in6_addr *gwaddr, int ifindex);
98
#endif
99
100
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101
{
102
struct rt6_info *rt = (struct rt6_info *) dst;
103
struct inet_peer *peer;
104
u32 *p = NULL;
105
106
if (!rt->rt6i_peer)
107
rt6_bind_peer(rt, 1);
108
109
peer = rt->rt6i_peer;
110
if (peer) {
111
u32 *old_p = __DST_METRICS_PTR(old);
112
unsigned long prev, new;
113
114
p = peer->metrics;
115
if (inet_metrics_new(peer))
116
memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118
new = (unsigned long) p;
119
prev = cmpxchg(&dst->_metrics, old, new);
120
121
if (prev != old) {
122
p = __DST_METRICS_PTR(prev);
123
if (prev & DST_METRICS_READ_ONLY)
124
p = NULL;
125
}
126
}
127
return p;
128
}
129
130
static struct dst_ops ip6_dst_ops_template = {
131
.family = AF_INET6,
132
.protocol = cpu_to_be16(ETH_P_IPV6),
133
.gc = ip6_dst_gc,
134
.gc_thresh = 1024,
135
.check = ip6_dst_check,
136
.default_advmss = ip6_default_advmss,
137
.default_mtu = ip6_default_mtu,
138
.cow_metrics = ipv6_cow_metrics,
139
.destroy = ip6_dst_destroy,
140
.ifdown = ip6_dst_ifdown,
141
.negative_advice = ip6_negative_advice,
142
.link_failure = ip6_link_failure,
143
.update_pmtu = ip6_rt_update_pmtu,
144
.local_out = __ip6_local_out,
145
};
146
147
static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148
{
149
return 0;
150
}
151
152
static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153
{
154
}
155
156
static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
157
unsigned long old)
158
{
159
return NULL;
160
}
161
162
static struct dst_ops ip6_dst_blackhole_ops = {
163
.family = AF_INET6,
164
.protocol = cpu_to_be16(ETH_P_IPV6),
165
.destroy = ip6_dst_destroy,
166
.check = ip6_dst_check,
167
.default_mtu = ip6_blackhole_default_mtu,
168
.default_advmss = ip6_default_advmss,
169
.update_pmtu = ip6_rt_blackhole_update_pmtu,
170
.cow_metrics = ip6_rt_blackhole_cow_metrics,
171
};
172
173
static const u32 ip6_template_metrics[RTAX_MAX] = {
174
[RTAX_HOPLIMIT - 1] = 255,
175
};
176
177
static struct rt6_info ip6_null_entry_template = {
178
.dst = {
179
.__refcnt = ATOMIC_INIT(1),
180
.__use = 1,
181
.obsolete = -1,
182
.error = -ENETUNREACH,
183
.input = ip6_pkt_discard,
184
.output = ip6_pkt_discard_out,
185
},
186
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
187
.rt6i_protocol = RTPROT_KERNEL,
188
.rt6i_metric = ~(u32) 0,
189
.rt6i_ref = ATOMIC_INIT(1),
190
};
191
192
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
193
194
static int ip6_pkt_prohibit(struct sk_buff *skb);
195
static int ip6_pkt_prohibit_out(struct sk_buff *skb);
196
197
static struct rt6_info ip6_prohibit_entry_template = {
198
.dst = {
199
.__refcnt = ATOMIC_INIT(1),
200
.__use = 1,
201
.obsolete = -1,
202
.error = -EACCES,
203
.input = ip6_pkt_prohibit,
204
.output = ip6_pkt_prohibit_out,
205
},
206
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
207
.rt6i_protocol = RTPROT_KERNEL,
208
.rt6i_metric = ~(u32) 0,
209
.rt6i_ref = ATOMIC_INIT(1),
210
};
211
212
static struct rt6_info ip6_blk_hole_entry_template = {
213
.dst = {
214
.__refcnt = ATOMIC_INIT(1),
215
.__use = 1,
216
.obsolete = -1,
217
.error = -EINVAL,
218
.input = dst_discard,
219
.output = dst_discard,
220
},
221
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
222
.rt6i_protocol = RTPROT_KERNEL,
223
.rt6i_metric = ~(u32) 0,
224
.rt6i_ref = ATOMIC_INIT(1),
225
};
226
227
#endif
228
229
/* allocate dst with ip6_dst_ops */
230
static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
231
struct net_device *dev,
232
int flags)
233
{
234
struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
235
236
memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
237
238
return rt;
239
}
240
241
static void ip6_dst_destroy(struct dst_entry *dst)
242
{
243
struct rt6_info *rt = (struct rt6_info *)dst;
244
struct inet6_dev *idev = rt->rt6i_idev;
245
struct inet_peer *peer = rt->rt6i_peer;
246
247
if (idev != NULL) {
248
rt->rt6i_idev = NULL;
249
in6_dev_put(idev);
250
}
251
if (peer) {
252
rt->rt6i_peer = NULL;
253
inet_putpeer(peer);
254
}
255
}
256
257
static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
258
259
static u32 rt6_peer_genid(void)
260
{
261
return atomic_read(&__rt6_peer_genid);
262
}
263
264
void rt6_bind_peer(struct rt6_info *rt, int create)
265
{
266
struct inet_peer *peer;
267
268
peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
269
if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
270
inet_putpeer(peer);
271
else
272
rt->rt6i_peer_genid = rt6_peer_genid();
273
}
274
275
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
276
int how)
277
{
278
struct rt6_info *rt = (struct rt6_info *)dst;
279
struct inet6_dev *idev = rt->rt6i_idev;
280
struct net_device *loopback_dev =
281
dev_net(dev)->loopback_dev;
282
283
if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
284
struct inet6_dev *loopback_idev =
285
in6_dev_get(loopback_dev);
286
if (loopback_idev != NULL) {
287
rt->rt6i_idev = loopback_idev;
288
in6_dev_put(idev);
289
}
290
}
291
}
292
293
static __inline__ int rt6_check_expired(const struct rt6_info *rt)
294
{
295
return (rt->rt6i_flags & RTF_EXPIRES) &&
296
time_after(jiffies, rt->rt6i_expires);
297
}
298
299
static inline int rt6_need_strict(const struct in6_addr *daddr)
300
{
301
return ipv6_addr_type(daddr) &
302
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
303
}
304
305
/*
306
* Route lookup. Any table->tb6_lock is implied.
307
*/
308
309
static inline struct rt6_info *rt6_device_match(struct net *net,
310
struct rt6_info *rt,
311
const struct in6_addr *saddr,
312
int oif,
313
int flags)
314
{
315
struct rt6_info *local = NULL;
316
struct rt6_info *sprt;
317
318
if (!oif && ipv6_addr_any(saddr))
319
goto out;
320
321
for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
322
struct net_device *dev = sprt->rt6i_dev;
323
324
if (oif) {
325
if (dev->ifindex == oif)
326
return sprt;
327
if (dev->flags & IFF_LOOPBACK) {
328
if (sprt->rt6i_idev == NULL ||
329
sprt->rt6i_idev->dev->ifindex != oif) {
330
if (flags & RT6_LOOKUP_F_IFACE && oif)
331
continue;
332
if (local && (!oif ||
333
local->rt6i_idev->dev->ifindex == oif))
334
continue;
335
}
336
local = sprt;
337
}
338
} else {
339
if (ipv6_chk_addr(net, saddr, dev,
340
flags & RT6_LOOKUP_F_IFACE))
341
return sprt;
342
}
343
}
344
345
if (oif) {
346
if (local)
347
return local;
348
349
if (flags & RT6_LOOKUP_F_IFACE)
350
return net->ipv6.ip6_null_entry;
351
}
352
out:
353
return rt;
354
}
355
356
#ifdef CONFIG_IPV6_ROUTER_PREF
357
static void rt6_probe(struct rt6_info *rt)
358
{
359
struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
360
/*
361
* Okay, this does not seem to be appropriate
362
* for now, however, we need to check if it
363
* is really so; aka Router Reachability Probing.
364
*
365
* Router Reachability Probe MUST be rate-limited
366
* to no more than one per minute.
367
*/
368
if (!neigh || (neigh->nud_state & NUD_VALID))
369
return;
370
read_lock_bh(&neigh->lock);
371
if (!(neigh->nud_state & NUD_VALID) &&
372
time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
373
struct in6_addr mcaddr;
374
struct in6_addr *target;
375
376
neigh->updated = jiffies;
377
read_unlock_bh(&neigh->lock);
378
379
target = (struct in6_addr *)&neigh->primary_key;
380
addrconf_addr_solict_mult(target, &mcaddr);
381
ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
382
} else
383
read_unlock_bh(&neigh->lock);
384
}
385
#else
386
static inline void rt6_probe(struct rt6_info *rt)
387
{
388
}
389
#endif
390
391
/*
392
* Default Router Selection (RFC 2461 6.3.6)
393
*/
394
static inline int rt6_check_dev(struct rt6_info *rt, int oif)
395
{
396
struct net_device *dev = rt->rt6i_dev;
397
if (!oif || dev->ifindex == oif)
398
return 2;
399
if ((dev->flags & IFF_LOOPBACK) &&
400
rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
401
return 1;
402
return 0;
403
}
404
405
static inline int rt6_check_neigh(struct rt6_info *rt)
406
{
407
struct neighbour *neigh = rt->rt6i_nexthop;
408
int m;
409
if (rt->rt6i_flags & RTF_NONEXTHOP ||
410
!(rt->rt6i_flags & RTF_GATEWAY))
411
m = 1;
412
else if (neigh) {
413
read_lock_bh(&neigh->lock);
414
if (neigh->nud_state & NUD_VALID)
415
m = 2;
416
#ifdef CONFIG_IPV6_ROUTER_PREF
417
else if (neigh->nud_state & NUD_FAILED)
418
m = 0;
419
#endif
420
else
421
m = 1;
422
read_unlock_bh(&neigh->lock);
423
} else
424
m = 0;
425
return m;
426
}
427
428
static int rt6_score_route(struct rt6_info *rt, int oif,
429
int strict)
430
{
431
int m, n;
432
433
m = rt6_check_dev(rt, oif);
434
if (!m && (strict & RT6_LOOKUP_F_IFACE))
435
return -1;
436
#ifdef CONFIG_IPV6_ROUTER_PREF
437
m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
438
#endif
439
n = rt6_check_neigh(rt);
440
if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
441
return -1;
442
return m;
443
}
444
445
static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
446
int *mpri, struct rt6_info *match)
447
{
448
int m;
449
450
if (rt6_check_expired(rt))
451
goto out;
452
453
m = rt6_score_route(rt, oif, strict);
454
if (m < 0)
455
goto out;
456
457
if (m > *mpri) {
458
if (strict & RT6_LOOKUP_F_REACHABLE)
459
rt6_probe(match);
460
*mpri = m;
461
match = rt;
462
} else if (strict & RT6_LOOKUP_F_REACHABLE) {
463
rt6_probe(rt);
464
}
465
466
out:
467
return match;
468
}
469
470
static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
471
struct rt6_info *rr_head,
472
u32 metric, int oif, int strict)
473
{
474
struct rt6_info *rt, *match;
475
int mpri = -1;
476
477
match = NULL;
478
for (rt = rr_head; rt && rt->rt6i_metric == metric;
479
rt = rt->dst.rt6_next)
480
match = find_match(rt, oif, strict, &mpri, match);
481
for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
482
rt = rt->dst.rt6_next)
483
match = find_match(rt, oif, strict, &mpri, match);
484
485
return match;
486
}
487
488
static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
489
{
490
struct rt6_info *match, *rt0;
491
struct net *net;
492
493
RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
494
__func__, fn->leaf, oif);
495
496
rt0 = fn->rr_ptr;
497
if (!rt0)
498
fn->rr_ptr = rt0 = fn->leaf;
499
500
match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
501
502
if (!match &&
503
(strict & RT6_LOOKUP_F_REACHABLE)) {
504
struct rt6_info *next = rt0->dst.rt6_next;
505
506
/* no entries matched; do round-robin */
507
if (!next || next->rt6i_metric != rt0->rt6i_metric)
508
next = fn->leaf;
509
510
if (next != rt0)
511
fn->rr_ptr = next;
512
}
513
514
RT6_TRACE("%s() => %p\n",
515
__func__, match);
516
517
net = dev_net(rt0->rt6i_dev);
518
return match ? match : net->ipv6.ip6_null_entry;
519
}
520
521
#ifdef CONFIG_IPV6_ROUTE_INFO
522
int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
523
const struct in6_addr *gwaddr)
524
{
525
struct net *net = dev_net(dev);
526
struct route_info *rinfo = (struct route_info *) opt;
527
struct in6_addr prefix_buf, *prefix;
528
unsigned int pref;
529
unsigned long lifetime;
530
struct rt6_info *rt;
531
532
if (len < sizeof(struct route_info)) {
533
return -EINVAL;
534
}
535
536
/* Sanity check for prefix_len and length */
537
if (rinfo->length > 3) {
538
return -EINVAL;
539
} else if (rinfo->prefix_len > 128) {
540
return -EINVAL;
541
} else if (rinfo->prefix_len > 64) {
542
if (rinfo->length < 2) {
543
return -EINVAL;
544
}
545
} else if (rinfo->prefix_len > 0) {
546
if (rinfo->length < 1) {
547
return -EINVAL;
548
}
549
}
550
551
pref = rinfo->route_pref;
552
if (pref == ICMPV6_ROUTER_PREF_INVALID)
553
return -EINVAL;
554
555
lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
556
557
if (rinfo->length == 3)
558
prefix = (struct in6_addr *)rinfo->prefix;
559
else {
560
/* this function is safe */
561
ipv6_addr_prefix(&prefix_buf,
562
(struct in6_addr *)rinfo->prefix,
563
rinfo->prefix_len);
564
prefix = &prefix_buf;
565
}
566
567
rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
568
dev->ifindex);
569
570
if (rt && !lifetime) {
571
ip6_del_rt(rt);
572
rt = NULL;
573
}
574
575
if (!rt && lifetime)
576
rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
577
pref);
578
else if (rt)
579
rt->rt6i_flags = RTF_ROUTEINFO |
580
(rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
581
582
if (rt) {
583
if (!addrconf_finite_timeout(lifetime)) {
584
rt->rt6i_flags &= ~RTF_EXPIRES;
585
} else {
586
rt->rt6i_expires = jiffies + HZ * lifetime;
587
rt->rt6i_flags |= RTF_EXPIRES;
588
}
589
dst_release(&rt->dst);
590
}
591
return 0;
592
}
593
#endif
594
595
#define BACKTRACK(__net, saddr) \
596
do { \
597
if (rt == __net->ipv6.ip6_null_entry) { \
598
struct fib6_node *pn; \
599
while (1) { \
600
if (fn->fn_flags & RTN_TL_ROOT) \
601
goto out; \
602
pn = fn->parent; \
603
if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
604
fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
605
else \
606
fn = pn; \
607
if (fn->fn_flags & RTN_RTINFO) \
608
goto restart; \
609
} \
610
} \
611
} while(0)
612
613
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
614
struct fib6_table *table,
615
struct flowi6 *fl6, int flags)
616
{
617
struct fib6_node *fn;
618
struct rt6_info *rt;
619
620
read_lock_bh(&table->tb6_lock);
621
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
622
restart:
623
rt = fn->leaf;
624
rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
625
BACKTRACK(net, &fl6->saddr);
626
out:
627
dst_use(&rt->dst, jiffies);
628
read_unlock_bh(&table->tb6_lock);
629
return rt;
630
631
}
632
633
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
634
const struct in6_addr *saddr, int oif, int strict)
635
{
636
struct flowi6 fl6 = {
637
.flowi6_oif = oif,
638
.daddr = *daddr,
639
};
640
struct dst_entry *dst;
641
int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
642
643
if (saddr) {
644
memcpy(&fl6.saddr, saddr, sizeof(*saddr));
645
flags |= RT6_LOOKUP_F_HAS_SADDR;
646
}
647
648
dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
649
if (dst->error == 0)
650
return (struct rt6_info *) dst;
651
652
dst_release(dst);
653
654
return NULL;
655
}
656
657
EXPORT_SYMBOL(rt6_lookup);
658
659
/* ip6_ins_rt is called with FREE table->tb6_lock.
660
It takes new route entry, the addition fails by any reason the
661
route is freed. In any case, if caller does not hold it, it may
662
be destroyed.
663
*/
664
665
static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
666
{
667
int err;
668
struct fib6_table *table;
669
670
table = rt->rt6i_table;
671
write_lock_bh(&table->tb6_lock);
672
err = fib6_add(&table->tb6_root, rt, info);
673
write_unlock_bh(&table->tb6_lock);
674
675
return err;
676
}
677
678
int ip6_ins_rt(struct rt6_info *rt)
679
{
680
struct nl_info info = {
681
.nl_net = dev_net(rt->rt6i_dev),
682
};
683
return __ip6_ins_rt(rt, &info);
684
}
685
686
static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_addr *daddr,
687
const struct in6_addr *saddr)
688
{
689
struct rt6_info *rt;
690
691
/*
692
* Clone the route.
693
*/
694
695
rt = ip6_rt_copy(ort);
696
697
if (rt) {
698
struct neighbour *neigh;
699
int attempts = !in_softirq();
700
701
if (!(rt->rt6i_flags&RTF_GATEWAY)) {
702
if (rt->rt6i_dst.plen != 128 &&
703
ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
704
rt->rt6i_flags |= RTF_ANYCAST;
705
ipv6_addr_copy(&rt->rt6i_gateway, daddr);
706
}
707
708
ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
709
rt->rt6i_dst.plen = 128;
710
rt->rt6i_flags |= RTF_CACHE;
711
rt->dst.flags |= DST_HOST;
712
713
#ifdef CONFIG_IPV6_SUBTREES
714
if (rt->rt6i_src.plen && saddr) {
715
ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
716
rt->rt6i_src.plen = 128;
717
}
718
#endif
719
720
retry:
721
neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
722
if (IS_ERR(neigh)) {
723
struct net *net = dev_net(rt->rt6i_dev);
724
int saved_rt_min_interval =
725
net->ipv6.sysctl.ip6_rt_gc_min_interval;
726
int saved_rt_elasticity =
727
net->ipv6.sysctl.ip6_rt_gc_elasticity;
728
729
if (attempts-- > 0) {
730
net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
731
net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
732
733
ip6_dst_gc(&net->ipv6.ip6_dst_ops);
734
735
net->ipv6.sysctl.ip6_rt_gc_elasticity =
736
saved_rt_elasticity;
737
net->ipv6.sysctl.ip6_rt_gc_min_interval =
738
saved_rt_min_interval;
739
goto retry;
740
}
741
742
if (net_ratelimit())
743
printk(KERN_WARNING
744
"ipv6: Neighbour table overflow.\n");
745
dst_free(&rt->dst);
746
return NULL;
747
}
748
rt->rt6i_nexthop = neigh;
749
750
}
751
752
return rt;
753
}
754
755
static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, const struct in6_addr *daddr)
756
{
757
struct rt6_info *rt = ip6_rt_copy(ort);
758
if (rt) {
759
ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
760
rt->rt6i_dst.plen = 128;
761
rt->rt6i_flags |= RTF_CACHE;
762
rt->dst.flags |= DST_HOST;
763
rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
764
}
765
return rt;
766
}
767
768
static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
769
struct flowi6 *fl6, int flags)
770
{
771
struct fib6_node *fn;
772
struct rt6_info *rt, *nrt;
773
int strict = 0;
774
int attempts = 3;
775
int err;
776
int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
777
778
strict |= flags & RT6_LOOKUP_F_IFACE;
779
780
relookup:
781
read_lock_bh(&table->tb6_lock);
782
783
restart_2:
784
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
785
786
restart:
787
rt = rt6_select(fn, oif, strict | reachable);
788
789
BACKTRACK(net, &fl6->saddr);
790
if (rt == net->ipv6.ip6_null_entry ||
791
rt->rt6i_flags & RTF_CACHE)
792
goto out;
793
794
dst_hold(&rt->dst);
795
read_unlock_bh(&table->tb6_lock);
796
797
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
798
nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
799
else if (!(rt->dst.flags & DST_HOST))
800
nrt = rt6_alloc_clone(rt, &fl6->daddr);
801
else
802
goto out2;
803
804
dst_release(&rt->dst);
805
rt = nrt ? : net->ipv6.ip6_null_entry;
806
807
dst_hold(&rt->dst);
808
if (nrt) {
809
err = ip6_ins_rt(nrt);
810
if (!err)
811
goto out2;
812
}
813
814
if (--attempts <= 0)
815
goto out2;
816
817
/*
818
* Race condition! In the gap, when table->tb6_lock was
819
* released someone could insert this route. Relookup.
820
*/
821
dst_release(&rt->dst);
822
goto relookup;
823
824
out:
825
if (reachable) {
826
reachable = 0;
827
goto restart_2;
828
}
829
dst_hold(&rt->dst);
830
read_unlock_bh(&table->tb6_lock);
831
out2:
832
rt->dst.lastuse = jiffies;
833
rt->dst.__use++;
834
835
return rt;
836
}
837
838
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
839
struct flowi6 *fl6, int flags)
840
{
841
return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
842
}
843
844
void ip6_route_input(struct sk_buff *skb)
845
{
846
const struct ipv6hdr *iph = ipv6_hdr(skb);
847
struct net *net = dev_net(skb->dev);
848
int flags = RT6_LOOKUP_F_HAS_SADDR;
849
struct flowi6 fl6 = {
850
.flowi6_iif = skb->dev->ifindex,
851
.daddr = iph->daddr,
852
.saddr = iph->saddr,
853
.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
854
.flowi6_mark = skb->mark,
855
.flowi6_proto = iph->nexthdr,
856
};
857
858
if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
859
flags |= RT6_LOOKUP_F_IFACE;
860
861
skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
862
}
863
864
static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
865
struct flowi6 *fl6, int flags)
866
{
867
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
868
}
869
870
struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
871
struct flowi6 *fl6)
872
{
873
int flags = 0;
874
875
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
876
flags |= RT6_LOOKUP_F_IFACE;
877
878
if (!ipv6_addr_any(&fl6->saddr))
879
flags |= RT6_LOOKUP_F_HAS_SADDR;
880
else if (sk)
881
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
882
883
return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
884
}
885
886
EXPORT_SYMBOL(ip6_route_output);
887
888
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
889
{
890
struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
891
struct dst_entry *new = NULL;
892
893
rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
894
if (rt) {
895
memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
896
897
new = &rt->dst;
898
899
new->__use = 1;
900
new->input = dst_discard;
901
new->output = dst_discard;
902
903
dst_copy_metrics(new, &ort->dst);
904
rt->rt6i_idev = ort->rt6i_idev;
905
if (rt->rt6i_idev)
906
in6_dev_hold(rt->rt6i_idev);
907
rt->rt6i_expires = 0;
908
909
ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
910
rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
911
rt->rt6i_metric = 0;
912
913
memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
914
#ifdef CONFIG_IPV6_SUBTREES
915
memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
916
#endif
917
918
dst_free(new);
919
}
920
921
dst_release(dst_orig);
922
return new ? new : ERR_PTR(-ENOMEM);
923
}
924
925
/*
926
* Destination cache support functions
927
*/
928
929
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
930
{
931
struct rt6_info *rt;
932
933
rt = (struct rt6_info *) dst;
934
935
if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
936
if (rt->rt6i_peer_genid != rt6_peer_genid()) {
937
if (!rt->rt6i_peer)
938
rt6_bind_peer(rt, 0);
939
rt->rt6i_peer_genid = rt6_peer_genid();
940
}
941
return dst;
942
}
943
return NULL;
944
}
945
946
static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
947
{
948
struct rt6_info *rt = (struct rt6_info *) dst;
949
950
if (rt) {
951
if (rt->rt6i_flags & RTF_CACHE) {
952
if (rt6_check_expired(rt)) {
953
ip6_del_rt(rt);
954
dst = NULL;
955
}
956
} else {
957
dst_release(dst);
958
dst = NULL;
959
}
960
}
961
return dst;
962
}
963
964
static void ip6_link_failure(struct sk_buff *skb)
965
{
966
struct rt6_info *rt;
967
968
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
969
970
rt = (struct rt6_info *) skb_dst(skb);
971
if (rt) {
972
if (rt->rt6i_flags&RTF_CACHE) {
973
dst_set_expires(&rt->dst, 0);
974
rt->rt6i_flags |= RTF_EXPIRES;
975
} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
976
rt->rt6i_node->fn_sernum = -1;
977
}
978
}
979
980
static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
981
{
982
struct rt6_info *rt6 = (struct rt6_info*)dst;
983
984
if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
985
rt6->rt6i_flags |= RTF_MODIFIED;
986
if (mtu < IPV6_MIN_MTU) {
987
u32 features = dst_metric(dst, RTAX_FEATURES);
988
mtu = IPV6_MIN_MTU;
989
features |= RTAX_FEATURE_ALLFRAG;
990
dst_metric_set(dst, RTAX_FEATURES, features);
991
}
992
dst_metric_set(dst, RTAX_MTU, mtu);
993
}
994
}
995
996
static unsigned int ip6_default_advmss(const struct dst_entry *dst)
997
{
998
struct net_device *dev = dst->dev;
999
unsigned int mtu = dst_mtu(dst);
1000
struct net *net = dev_net(dev);
1001
1002
mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1003
1004
if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1005
mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1006
1007
/*
1008
* Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1009
* corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1010
* IPV6_MAXPLEN is also valid and means: "any MSS,
1011
* rely only on pmtu discovery"
1012
*/
1013
if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1014
mtu = IPV6_MAXPLEN;
1015
return mtu;
1016
}
1017
1018
static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1019
{
1020
unsigned int mtu = IPV6_MIN_MTU;
1021
struct inet6_dev *idev;
1022
1023
rcu_read_lock();
1024
idev = __in6_dev_get(dst->dev);
1025
if (idev)
1026
mtu = idev->cnf.mtu6;
1027
rcu_read_unlock();
1028
1029
return mtu;
1030
}
1031
1032
static struct dst_entry *icmp6_dst_gc_list;
1033
static DEFINE_SPINLOCK(icmp6_dst_lock);
1034
1035
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1036
struct neighbour *neigh,
1037
const struct in6_addr *addr)
1038
{
1039
struct rt6_info *rt;
1040
struct inet6_dev *idev = in6_dev_get(dev);
1041
struct net *net = dev_net(dev);
1042
1043
if (unlikely(idev == NULL))
1044
return NULL;
1045
1046
rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1047
if (unlikely(rt == NULL)) {
1048
in6_dev_put(idev);
1049
goto out;
1050
}
1051
1052
if (neigh)
1053
neigh_hold(neigh);
1054
else {
1055
neigh = ndisc_get_neigh(dev, addr);
1056
if (IS_ERR(neigh))
1057
neigh = NULL;
1058
}
1059
1060
rt->rt6i_idev = idev;
1061
rt->rt6i_nexthop = neigh;
1062
atomic_set(&rt->dst.__refcnt, 1);
1063
dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1064
rt->dst.output = ip6_output;
1065
1066
spin_lock_bh(&icmp6_dst_lock);
1067
rt->dst.next = icmp6_dst_gc_list;
1068
icmp6_dst_gc_list = &rt->dst;
1069
spin_unlock_bh(&icmp6_dst_lock);
1070
1071
fib6_force_start_gc(net);
1072
1073
out:
1074
return &rt->dst;
1075
}
1076
1077
int icmp6_dst_gc(void)
1078
{
1079
struct dst_entry *dst, **pprev;
1080
int more = 0;
1081
1082
spin_lock_bh(&icmp6_dst_lock);
1083
pprev = &icmp6_dst_gc_list;
1084
1085
while ((dst = *pprev) != NULL) {
1086
if (!atomic_read(&dst->__refcnt)) {
1087
*pprev = dst->next;
1088
dst_free(dst);
1089
} else {
1090
pprev = &dst->next;
1091
++more;
1092
}
1093
}
1094
1095
spin_unlock_bh(&icmp6_dst_lock);
1096
1097
return more;
1098
}
1099
1100
static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1101
void *arg)
1102
{
1103
struct dst_entry *dst, **pprev;
1104
1105
spin_lock_bh(&icmp6_dst_lock);
1106
pprev = &icmp6_dst_gc_list;
1107
while ((dst = *pprev) != NULL) {
1108
struct rt6_info *rt = (struct rt6_info *) dst;
1109
if (func(rt, arg)) {
1110
*pprev = dst->next;
1111
dst_free(dst);
1112
} else {
1113
pprev = &dst->next;
1114
}
1115
}
1116
spin_unlock_bh(&icmp6_dst_lock);
1117
}
1118
1119
static int ip6_dst_gc(struct dst_ops *ops)
1120
{
1121
unsigned long now = jiffies;
1122
struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1123
int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1124
int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1125
int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1126
int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1127
unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1128
int entries;
1129
1130
entries = dst_entries_get_fast(ops);
1131
if (time_after(rt_last_gc + rt_min_interval, now) &&
1132
entries <= rt_max_size)
1133
goto out;
1134
1135
net->ipv6.ip6_rt_gc_expire++;
1136
fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1137
net->ipv6.ip6_rt_last_gc = now;
1138
entries = dst_entries_get_slow(ops);
1139
if (entries < ops->gc_thresh)
1140
net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1141
out:
1142
net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1143
return entries > rt_max_size;
1144
}
1145
1146
/* Clean host part of a prefix. Not necessary in radix tree,
1147
but results in cleaner routing tables.
1148
1149
Remove it only when all the things will work!
1150
*/
1151
1152
int ip6_dst_hoplimit(struct dst_entry *dst)
1153
{
1154
int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1155
if (hoplimit == 0) {
1156
struct net_device *dev = dst->dev;
1157
struct inet6_dev *idev;
1158
1159
rcu_read_lock();
1160
idev = __in6_dev_get(dev);
1161
if (idev)
1162
hoplimit = idev->cnf.hop_limit;
1163
else
1164
hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1165
rcu_read_unlock();
1166
}
1167
return hoplimit;
1168
}
1169
EXPORT_SYMBOL(ip6_dst_hoplimit);
1170
1171
/*
1172
*
1173
*/
1174
1175
int ip6_route_add(struct fib6_config *cfg)
1176
{
1177
int err;
1178
struct net *net = cfg->fc_nlinfo.nl_net;
1179
struct rt6_info *rt = NULL;
1180
struct net_device *dev = NULL;
1181
struct inet6_dev *idev = NULL;
1182
struct fib6_table *table;
1183
int addr_type;
1184
1185
if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1186
return -EINVAL;
1187
#ifndef CONFIG_IPV6_SUBTREES
1188
if (cfg->fc_src_len)
1189
return -EINVAL;
1190
#endif
1191
if (cfg->fc_ifindex) {
1192
err = -ENODEV;
1193
dev = dev_get_by_index(net, cfg->fc_ifindex);
1194
if (!dev)
1195
goto out;
1196
idev = in6_dev_get(dev);
1197
if (!idev)
1198
goto out;
1199
}
1200
1201
if (cfg->fc_metric == 0)
1202
cfg->fc_metric = IP6_RT_PRIO_USER;
1203
1204
table = fib6_new_table(net, cfg->fc_table);
1205
if (table == NULL) {
1206
err = -ENOBUFS;
1207
goto out;
1208
}
1209
1210
rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1211
1212
if (rt == NULL) {
1213
err = -ENOMEM;
1214
goto out;
1215
}
1216
1217
rt->dst.obsolete = -1;
1218
rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1219
jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1220
0;
1221
1222
if (cfg->fc_protocol == RTPROT_UNSPEC)
1223
cfg->fc_protocol = RTPROT_BOOT;
1224
rt->rt6i_protocol = cfg->fc_protocol;
1225
1226
addr_type = ipv6_addr_type(&cfg->fc_dst);
1227
1228
if (addr_type & IPV6_ADDR_MULTICAST)
1229
rt->dst.input = ip6_mc_input;
1230
else if (cfg->fc_flags & RTF_LOCAL)
1231
rt->dst.input = ip6_input;
1232
else
1233
rt->dst.input = ip6_forward;
1234
1235
rt->dst.output = ip6_output;
1236
1237
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1238
rt->rt6i_dst.plen = cfg->fc_dst_len;
1239
if (rt->rt6i_dst.plen == 128)
1240
rt->dst.flags |= DST_HOST;
1241
1242
#ifdef CONFIG_IPV6_SUBTREES
1243
ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1244
rt->rt6i_src.plen = cfg->fc_src_len;
1245
#endif
1246
1247
rt->rt6i_metric = cfg->fc_metric;
1248
1249
/* We cannot add true routes via loopback here,
1250
they would result in kernel looping; promote them to reject routes
1251
*/
1252
if ((cfg->fc_flags & RTF_REJECT) ||
1253
(dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1254
&& !(cfg->fc_flags&RTF_LOCAL))) {
1255
/* hold loopback dev/idev if we haven't done so. */
1256
if (dev != net->loopback_dev) {
1257
if (dev) {
1258
dev_put(dev);
1259
in6_dev_put(idev);
1260
}
1261
dev = net->loopback_dev;
1262
dev_hold(dev);
1263
idev = in6_dev_get(dev);
1264
if (!idev) {
1265
err = -ENODEV;
1266
goto out;
1267
}
1268
}
1269
rt->dst.output = ip6_pkt_discard_out;
1270
rt->dst.input = ip6_pkt_discard;
1271
rt->dst.error = -ENETUNREACH;
1272
rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1273
goto install_route;
1274
}
1275
1276
if (cfg->fc_flags & RTF_GATEWAY) {
1277
const struct in6_addr *gw_addr;
1278
int gwa_type;
1279
1280
gw_addr = &cfg->fc_gateway;
1281
ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1282
gwa_type = ipv6_addr_type(gw_addr);
1283
1284
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1285
struct rt6_info *grt;
1286
1287
/* IPv6 strictly inhibits using not link-local
1288
addresses as nexthop address.
1289
Otherwise, router will not able to send redirects.
1290
It is very good, but in some (rare!) circumstances
1291
(SIT, PtP, NBMA NOARP links) it is handy to allow
1292
some exceptions. --ANK
1293
*/
1294
err = -EINVAL;
1295
if (!(gwa_type&IPV6_ADDR_UNICAST))
1296
goto out;
1297
1298
grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1299
1300
err = -EHOSTUNREACH;
1301
if (grt == NULL)
1302
goto out;
1303
if (dev) {
1304
if (dev != grt->rt6i_dev) {
1305
dst_release(&grt->dst);
1306
goto out;
1307
}
1308
} else {
1309
dev = grt->rt6i_dev;
1310
idev = grt->rt6i_idev;
1311
dev_hold(dev);
1312
in6_dev_hold(grt->rt6i_idev);
1313
}
1314
if (!(grt->rt6i_flags&RTF_GATEWAY))
1315
err = 0;
1316
dst_release(&grt->dst);
1317
1318
if (err)
1319
goto out;
1320
}
1321
err = -EINVAL;
1322
if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1323
goto out;
1324
}
1325
1326
err = -ENODEV;
1327
if (dev == NULL)
1328
goto out;
1329
1330
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1331
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1332
err = -EINVAL;
1333
goto out;
1334
}
1335
ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1336
rt->rt6i_prefsrc.plen = 128;
1337
} else
1338
rt->rt6i_prefsrc.plen = 0;
1339
1340
if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1341
rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1342
if (IS_ERR(rt->rt6i_nexthop)) {
1343
err = PTR_ERR(rt->rt6i_nexthop);
1344
rt->rt6i_nexthop = NULL;
1345
goto out;
1346
}
1347
}
1348
1349
rt->rt6i_flags = cfg->fc_flags;
1350
1351
install_route:
1352
if (cfg->fc_mx) {
1353
struct nlattr *nla;
1354
int remaining;
1355
1356
nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1357
int type = nla_type(nla);
1358
1359
if (type) {
1360
if (type > RTAX_MAX) {
1361
err = -EINVAL;
1362
goto out;
1363
}
1364
1365
dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1366
}
1367
}
1368
}
1369
1370
rt->dst.dev = dev;
1371
rt->rt6i_idev = idev;
1372
rt->rt6i_table = table;
1373
1374
cfg->fc_nlinfo.nl_net = dev_net(dev);
1375
1376
return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1377
1378
out:
1379
if (dev)
1380
dev_put(dev);
1381
if (idev)
1382
in6_dev_put(idev);
1383
if (rt)
1384
dst_free(&rt->dst);
1385
return err;
1386
}
1387
1388
static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1389
{
1390
int err;
1391
struct fib6_table *table;
1392
struct net *net = dev_net(rt->rt6i_dev);
1393
1394
if (rt == net->ipv6.ip6_null_entry)
1395
return -ENOENT;
1396
1397
table = rt->rt6i_table;
1398
write_lock_bh(&table->tb6_lock);
1399
1400
err = fib6_del(rt, info);
1401
dst_release(&rt->dst);
1402
1403
write_unlock_bh(&table->tb6_lock);
1404
1405
return err;
1406
}
1407
1408
int ip6_del_rt(struct rt6_info *rt)
1409
{
1410
struct nl_info info = {
1411
.nl_net = dev_net(rt->rt6i_dev),
1412
};
1413
return __ip6_del_rt(rt, &info);
1414
}
1415
1416
static int ip6_route_del(struct fib6_config *cfg)
1417
{
1418
struct fib6_table *table;
1419
struct fib6_node *fn;
1420
struct rt6_info *rt;
1421
int err = -ESRCH;
1422
1423
table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1424
if (table == NULL)
1425
return err;
1426
1427
read_lock_bh(&table->tb6_lock);
1428
1429
fn = fib6_locate(&table->tb6_root,
1430
&cfg->fc_dst, cfg->fc_dst_len,
1431
&cfg->fc_src, cfg->fc_src_len);
1432
1433
if (fn) {
1434
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1435
if (cfg->fc_ifindex &&
1436
(rt->rt6i_dev == NULL ||
1437
rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1438
continue;
1439
if (cfg->fc_flags & RTF_GATEWAY &&
1440
!ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1441
continue;
1442
if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1443
continue;
1444
dst_hold(&rt->dst);
1445
read_unlock_bh(&table->tb6_lock);
1446
1447
return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1448
}
1449
}
1450
read_unlock_bh(&table->tb6_lock);
1451
1452
return err;
1453
}
1454
1455
/*
1456
* Handle redirects
1457
*/
1458
struct ip6rd_flowi {
1459
struct flowi6 fl6;
1460
struct in6_addr gateway;
1461
};
1462
1463
static struct rt6_info *__ip6_route_redirect(struct net *net,
1464
struct fib6_table *table,
1465
struct flowi6 *fl6,
1466
int flags)
1467
{
1468
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1469
struct rt6_info *rt;
1470
struct fib6_node *fn;
1471
1472
/*
1473
* Get the "current" route for this destination and
1474
* check if the redirect has come from approriate router.
1475
*
1476
* RFC 2461 specifies that redirects should only be
1477
* accepted if they come from the nexthop to the target.
1478
* Due to the way the routes are chosen, this notion
1479
* is a bit fuzzy and one might need to check all possible
1480
* routes.
1481
*/
1482
1483
read_lock_bh(&table->tb6_lock);
1484
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1485
restart:
1486
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1487
/*
1488
* Current route is on-link; redirect is always invalid.
1489
*
1490
* Seems, previous statement is not true. It could
1491
* be node, which looks for us as on-link (f.e. proxy ndisc)
1492
* But then router serving it might decide, that we should
1493
* know truth 8)8) --ANK (980726).
1494
*/
1495
if (rt6_check_expired(rt))
1496
continue;
1497
if (!(rt->rt6i_flags & RTF_GATEWAY))
1498
continue;
1499
if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1500
continue;
1501
if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1502
continue;
1503
break;
1504
}
1505
1506
if (!rt)
1507
rt = net->ipv6.ip6_null_entry;
1508
BACKTRACK(net, &fl6->saddr);
1509
out:
1510
dst_hold(&rt->dst);
1511
1512
read_unlock_bh(&table->tb6_lock);
1513
1514
return rt;
1515
};
1516
1517
static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1518
const struct in6_addr *src,
1519
const struct in6_addr *gateway,
1520
struct net_device *dev)
1521
{
1522
int flags = RT6_LOOKUP_F_HAS_SADDR;
1523
struct net *net = dev_net(dev);
1524
struct ip6rd_flowi rdfl = {
1525
.fl6 = {
1526
.flowi6_oif = dev->ifindex,
1527
.daddr = *dest,
1528
.saddr = *src,
1529
},
1530
};
1531
1532
ipv6_addr_copy(&rdfl.gateway, gateway);
1533
1534
if (rt6_need_strict(dest))
1535
flags |= RT6_LOOKUP_F_IFACE;
1536
1537
return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1538
flags, __ip6_route_redirect);
1539
}
1540
1541
void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1542
const struct in6_addr *saddr,
1543
struct neighbour *neigh, u8 *lladdr, int on_link)
1544
{
1545
struct rt6_info *rt, *nrt = NULL;
1546
struct netevent_redirect netevent;
1547
struct net *net = dev_net(neigh->dev);
1548
1549
rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1550
1551
if (rt == net->ipv6.ip6_null_entry) {
1552
if (net_ratelimit())
1553
printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1554
"for redirect target\n");
1555
goto out;
1556
}
1557
1558
/*
1559
* We have finally decided to accept it.
1560
*/
1561
1562
neigh_update(neigh, lladdr, NUD_STALE,
1563
NEIGH_UPDATE_F_WEAK_OVERRIDE|
1564
NEIGH_UPDATE_F_OVERRIDE|
1565
(on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1566
NEIGH_UPDATE_F_ISROUTER))
1567
);
1568
1569
/*
1570
* Redirect received -> path was valid.
1571
* Look, redirects are sent only in response to data packets,
1572
* so that this nexthop apparently is reachable. --ANK
1573
*/
1574
dst_confirm(&rt->dst);
1575
1576
/* Duplicate redirect: silently ignore. */
1577
if (neigh == rt->dst.neighbour)
1578
goto out;
1579
1580
nrt = ip6_rt_copy(rt);
1581
if (nrt == NULL)
1582
goto out;
1583
1584
nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1585
if (on_link)
1586
nrt->rt6i_flags &= ~RTF_GATEWAY;
1587
1588
ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1589
nrt->rt6i_dst.plen = 128;
1590
nrt->dst.flags |= DST_HOST;
1591
1592
ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1593
nrt->rt6i_nexthop = neigh_clone(neigh);
1594
1595
if (ip6_ins_rt(nrt))
1596
goto out;
1597
1598
netevent.old = &rt->dst;
1599
netevent.new = &nrt->dst;
1600
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1601
1602
if (rt->rt6i_flags&RTF_CACHE) {
1603
ip6_del_rt(rt);
1604
return;
1605
}
1606
1607
out:
1608
dst_release(&rt->dst);
1609
}
1610
1611
/*
1612
* Handle ICMP "packet too big" messages
1613
* i.e. Path MTU discovery
1614
*/
1615
1616
static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1617
struct net *net, u32 pmtu, int ifindex)
1618
{
1619
struct rt6_info *rt, *nrt;
1620
int allfrag = 0;
1621
again:
1622
rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1623
if (rt == NULL)
1624
return;
1625
1626
if (rt6_check_expired(rt)) {
1627
ip6_del_rt(rt);
1628
goto again;
1629
}
1630
1631
if (pmtu >= dst_mtu(&rt->dst))
1632
goto out;
1633
1634
if (pmtu < IPV6_MIN_MTU) {
1635
/*
1636
* According to RFC2460, PMTU is set to the IPv6 Minimum Link
1637
* MTU (1280) and a fragment header should always be included
1638
* after a node receiving Too Big message reporting PMTU is
1639
* less than the IPv6 Minimum Link MTU.
1640
*/
1641
pmtu = IPV6_MIN_MTU;
1642
allfrag = 1;
1643
}
1644
1645
/* New mtu received -> path was valid.
1646
They are sent only in response to data packets,
1647
so that this nexthop apparently is reachable. --ANK
1648
*/
1649
dst_confirm(&rt->dst);
1650
1651
/* Host route. If it is static, it would be better
1652
not to override it, but add new one, so that
1653
when cache entry will expire old pmtu
1654
would return automatically.
1655
*/
1656
if (rt->rt6i_flags & RTF_CACHE) {
1657
dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1658
if (allfrag) {
1659
u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1660
features |= RTAX_FEATURE_ALLFRAG;
1661
dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1662
}
1663
dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1664
rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1665
goto out;
1666
}
1667
1668
/* Network route.
1669
Two cases are possible:
1670
1. It is connected route. Action: COW
1671
2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1672
*/
1673
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1674
nrt = rt6_alloc_cow(rt, daddr, saddr);
1675
else
1676
nrt = rt6_alloc_clone(rt, daddr);
1677
1678
if (nrt) {
1679
dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1680
if (allfrag) {
1681
u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1682
features |= RTAX_FEATURE_ALLFRAG;
1683
dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1684
}
1685
1686
/* According to RFC 1981, detecting PMTU increase shouldn't be
1687
* happened within 5 mins, the recommended timer is 10 mins.
1688
* Here this route expiration time is set to ip6_rt_mtu_expires
1689
* which is 10 mins. After 10 mins the decreased pmtu is expired
1690
* and detecting PMTU increase will be automatically happened.
1691
*/
1692
dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1693
nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1694
1695
ip6_ins_rt(nrt);
1696
}
1697
out:
1698
dst_release(&rt->dst);
1699
}
1700
1701
void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1702
struct net_device *dev, u32 pmtu)
1703
{
1704
struct net *net = dev_net(dev);
1705
1706
/*
1707
* RFC 1981 states that a node "MUST reduce the size of the packets it
1708
* is sending along the path" that caused the Packet Too Big message.
1709
* Since it's not possible in the general case to determine which
1710
* interface was used to send the original packet, we update the MTU
1711
* on the interface that will be used to send future packets. We also
1712
* update the MTU on the interface that received the Packet Too Big in
1713
* case the original packet was forced out that interface with
1714
* SO_BINDTODEVICE or similar. This is the next best thing to the
1715
* correct behaviour, which would be to update the MTU on all
1716
* interfaces.
1717
*/
1718
rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1719
rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1720
}
1721
1722
/*
1723
* Misc support functions
1724
*/
1725
1726
static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1727
{
1728
struct net *net = dev_net(ort->rt6i_dev);
1729
struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1730
ort->dst.dev, 0);
1731
1732
if (rt) {
1733
rt->dst.input = ort->dst.input;
1734
rt->dst.output = ort->dst.output;
1735
1736
dst_copy_metrics(&rt->dst, &ort->dst);
1737
rt->dst.error = ort->dst.error;
1738
rt->rt6i_idev = ort->rt6i_idev;
1739
if (rt->rt6i_idev)
1740
in6_dev_hold(rt->rt6i_idev);
1741
rt->dst.lastuse = jiffies;
1742
rt->rt6i_expires = 0;
1743
1744
ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1745
rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1746
rt->rt6i_metric = 0;
1747
1748
memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1749
#ifdef CONFIG_IPV6_SUBTREES
1750
memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1751
#endif
1752
memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1753
rt->rt6i_table = ort->rt6i_table;
1754
}
1755
return rt;
1756
}
1757
1758
#ifdef CONFIG_IPV6_ROUTE_INFO
1759
static struct rt6_info *rt6_get_route_info(struct net *net,
1760
const struct in6_addr *prefix, int prefixlen,
1761
const struct in6_addr *gwaddr, int ifindex)
1762
{
1763
struct fib6_node *fn;
1764
struct rt6_info *rt = NULL;
1765
struct fib6_table *table;
1766
1767
table = fib6_get_table(net, RT6_TABLE_INFO);
1768
if (table == NULL)
1769
return NULL;
1770
1771
write_lock_bh(&table->tb6_lock);
1772
fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1773
if (!fn)
1774
goto out;
1775
1776
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1777
if (rt->rt6i_dev->ifindex != ifindex)
1778
continue;
1779
if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1780
continue;
1781
if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1782
continue;
1783
dst_hold(&rt->dst);
1784
break;
1785
}
1786
out:
1787
write_unlock_bh(&table->tb6_lock);
1788
return rt;
1789
}
1790
1791
static struct rt6_info *rt6_add_route_info(struct net *net,
1792
const struct in6_addr *prefix, int prefixlen,
1793
const struct in6_addr *gwaddr, int ifindex,
1794
unsigned pref)
1795
{
1796
struct fib6_config cfg = {
1797
.fc_table = RT6_TABLE_INFO,
1798
.fc_metric = IP6_RT_PRIO_USER,
1799
.fc_ifindex = ifindex,
1800
.fc_dst_len = prefixlen,
1801
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1802
RTF_UP | RTF_PREF(pref),
1803
.fc_nlinfo.pid = 0,
1804
.fc_nlinfo.nlh = NULL,
1805
.fc_nlinfo.nl_net = net,
1806
};
1807
1808
ipv6_addr_copy(&cfg.fc_dst, prefix);
1809
ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1810
1811
/* We should treat it as a default route if prefix length is 0. */
1812
if (!prefixlen)
1813
cfg.fc_flags |= RTF_DEFAULT;
1814
1815
ip6_route_add(&cfg);
1816
1817
return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1818
}
1819
#endif
1820
1821
struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1822
{
1823
struct rt6_info *rt;
1824
struct fib6_table *table;
1825
1826
table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1827
if (table == NULL)
1828
return NULL;
1829
1830
write_lock_bh(&table->tb6_lock);
1831
for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1832
if (dev == rt->rt6i_dev &&
1833
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1834
ipv6_addr_equal(&rt->rt6i_gateway, addr))
1835
break;
1836
}
1837
if (rt)
1838
dst_hold(&rt->dst);
1839
write_unlock_bh(&table->tb6_lock);
1840
return rt;
1841
}
1842
1843
struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1844
struct net_device *dev,
1845
unsigned int pref)
1846
{
1847
struct fib6_config cfg = {
1848
.fc_table = RT6_TABLE_DFLT,
1849
.fc_metric = IP6_RT_PRIO_USER,
1850
.fc_ifindex = dev->ifindex,
1851
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1852
RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1853
.fc_nlinfo.pid = 0,
1854
.fc_nlinfo.nlh = NULL,
1855
.fc_nlinfo.nl_net = dev_net(dev),
1856
};
1857
1858
ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1859
1860
ip6_route_add(&cfg);
1861
1862
return rt6_get_dflt_router(gwaddr, dev);
1863
}
1864
1865
void rt6_purge_dflt_routers(struct net *net)
1866
{
1867
struct rt6_info *rt;
1868
struct fib6_table *table;
1869
1870
/* NOTE: Keep consistent with rt6_get_dflt_router */
1871
table = fib6_get_table(net, RT6_TABLE_DFLT);
1872
if (table == NULL)
1873
return;
1874
1875
restart:
1876
read_lock_bh(&table->tb6_lock);
1877
for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1878
if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1879
dst_hold(&rt->dst);
1880
read_unlock_bh(&table->tb6_lock);
1881
ip6_del_rt(rt);
1882
goto restart;
1883
}
1884
}
1885
read_unlock_bh(&table->tb6_lock);
1886
}
1887
1888
static void rtmsg_to_fib6_config(struct net *net,
1889
struct in6_rtmsg *rtmsg,
1890
struct fib6_config *cfg)
1891
{
1892
memset(cfg, 0, sizeof(*cfg));
1893
1894
cfg->fc_table = RT6_TABLE_MAIN;
1895
cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1896
cfg->fc_metric = rtmsg->rtmsg_metric;
1897
cfg->fc_expires = rtmsg->rtmsg_info;
1898
cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1899
cfg->fc_src_len = rtmsg->rtmsg_src_len;
1900
cfg->fc_flags = rtmsg->rtmsg_flags;
1901
1902
cfg->fc_nlinfo.nl_net = net;
1903
1904
ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1905
ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1906
ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1907
}
1908
1909
int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1910
{
1911
struct fib6_config cfg;
1912
struct in6_rtmsg rtmsg;
1913
int err;
1914
1915
switch(cmd) {
1916
case SIOCADDRT: /* Add a route */
1917
case SIOCDELRT: /* Delete a route */
1918
if (!capable(CAP_NET_ADMIN))
1919
return -EPERM;
1920
err = copy_from_user(&rtmsg, arg,
1921
sizeof(struct in6_rtmsg));
1922
if (err)
1923
return -EFAULT;
1924
1925
rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1926
1927
rtnl_lock();
1928
switch (cmd) {
1929
case SIOCADDRT:
1930
err = ip6_route_add(&cfg);
1931
break;
1932
case SIOCDELRT:
1933
err = ip6_route_del(&cfg);
1934
break;
1935
default:
1936
err = -EINVAL;
1937
}
1938
rtnl_unlock();
1939
1940
return err;
1941
}
1942
1943
return -EINVAL;
1944
}
1945
1946
/*
1947
* Drop the packet on the floor
1948
*/
1949
1950
static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1951
{
1952
int type;
1953
struct dst_entry *dst = skb_dst(skb);
1954
switch (ipstats_mib_noroutes) {
1955
case IPSTATS_MIB_INNOROUTES:
1956
type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1957
if (type == IPV6_ADDR_ANY) {
1958
IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1959
IPSTATS_MIB_INADDRERRORS);
1960
break;
1961
}
1962
/* FALLTHROUGH */
1963
case IPSTATS_MIB_OUTNOROUTES:
1964
IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1965
ipstats_mib_noroutes);
1966
break;
1967
}
1968
icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1969
kfree_skb(skb);
1970
return 0;
1971
}
1972
1973
static int ip6_pkt_discard(struct sk_buff *skb)
1974
{
1975
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1976
}
1977
1978
static int ip6_pkt_discard_out(struct sk_buff *skb)
1979
{
1980
skb->dev = skb_dst(skb)->dev;
1981
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1982
}
1983
1984
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1985
1986
static int ip6_pkt_prohibit(struct sk_buff *skb)
1987
{
1988
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1989
}
1990
1991
static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1992
{
1993
skb->dev = skb_dst(skb)->dev;
1994
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1995
}
1996
1997
#endif
1998
1999
/*
2000
* Allocate a dst for local (unicast / anycast) address.
2001
*/
2002
2003
struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2004
const struct in6_addr *addr,
2005
int anycast)
2006
{
2007
struct net *net = dev_net(idev->dev);
2008
struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2009
net->loopback_dev, 0);
2010
struct neighbour *neigh;
2011
2012
if (rt == NULL) {
2013
if (net_ratelimit())
2014
pr_warning("IPv6: Maximum number of routes reached,"
2015
" consider increasing route/max_size.\n");
2016
return ERR_PTR(-ENOMEM);
2017
}
2018
2019
in6_dev_hold(idev);
2020
2021
rt->dst.flags |= DST_HOST;
2022
rt->dst.input = ip6_input;
2023
rt->dst.output = ip6_output;
2024
rt->rt6i_idev = idev;
2025
rt->dst.obsolete = -1;
2026
2027
rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2028
if (anycast)
2029
rt->rt6i_flags |= RTF_ANYCAST;
2030
else
2031
rt->rt6i_flags |= RTF_LOCAL;
2032
neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2033
if (IS_ERR(neigh)) {
2034
dst_free(&rt->dst);
2035
2036
return ERR_CAST(neigh);
2037
}
2038
rt->rt6i_nexthop = neigh;
2039
2040
ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2041
rt->rt6i_dst.plen = 128;
2042
rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2043
2044
atomic_set(&rt->dst.__refcnt, 1);
2045
2046
return rt;
2047
}
2048
2049
int ip6_route_get_saddr(struct net *net,
2050
struct rt6_info *rt,
2051
const struct in6_addr *daddr,
2052
unsigned int prefs,
2053
struct in6_addr *saddr)
2054
{
2055
struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2056
int err = 0;
2057
if (rt->rt6i_prefsrc.plen)
2058
ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2059
else
2060
err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2061
daddr, prefs, saddr);
2062
return err;
2063
}
2064
2065
/* remove deleted ip from prefsrc entries */
2066
struct arg_dev_net_ip {
2067
struct net_device *dev;
2068
struct net *net;
2069
struct in6_addr *addr;
2070
};
2071
2072
static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2073
{
2074
struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2075
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2076
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2077
2078
if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2079
rt != net->ipv6.ip6_null_entry &&
2080
ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2081
/* remove prefsrc entry */
2082
rt->rt6i_prefsrc.plen = 0;
2083
}
2084
return 0;
2085
}
2086
2087
void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2088
{
2089
struct net *net = dev_net(ifp->idev->dev);
2090
struct arg_dev_net_ip adni = {
2091
.dev = ifp->idev->dev,
2092
.net = net,
2093
.addr = &ifp->addr,
2094
};
2095
fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2096
}
2097
2098
struct arg_dev_net {
2099
struct net_device *dev;
2100
struct net *net;
2101
};
2102
2103
static int fib6_ifdown(struct rt6_info *rt, void *arg)
2104
{
2105
const struct arg_dev_net *adn = arg;
2106
const struct net_device *dev = adn->dev;
2107
2108
if ((rt->rt6i_dev == dev || dev == NULL) &&
2109
rt != adn->net->ipv6.ip6_null_entry) {
2110
RT6_TRACE("deleted by ifdown %p\n", rt);
2111
return -1;
2112
}
2113
return 0;
2114
}
2115
2116
void rt6_ifdown(struct net *net, struct net_device *dev)
2117
{
2118
struct arg_dev_net adn = {
2119
.dev = dev,
2120
.net = net,
2121
};
2122
2123
fib6_clean_all(net, fib6_ifdown, 0, &adn);
2124
icmp6_clean_all(fib6_ifdown, &adn);
2125
}
2126
2127
struct rt6_mtu_change_arg
2128
{
2129
struct net_device *dev;
2130
unsigned mtu;
2131
};
2132
2133
static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2134
{
2135
struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2136
struct inet6_dev *idev;
2137
2138
/* In IPv6 pmtu discovery is not optional,
2139
so that RTAX_MTU lock cannot disable it.
2140
We still use this lock to block changes
2141
caused by addrconf/ndisc.
2142
*/
2143
2144
idev = __in6_dev_get(arg->dev);
2145
if (idev == NULL)
2146
return 0;
2147
2148
/* For administrative MTU increase, there is no way to discover
2149
IPv6 PMTU increase, so PMTU increase should be updated here.
2150
Since RFC 1981 doesn't include administrative MTU increase
2151
update PMTU increase is a MUST. (i.e. jumbo frame)
2152
*/
2153
/*
2154
If new MTU is less than route PMTU, this new MTU will be the
2155
lowest MTU in the path, update the route PMTU to reflect PMTU
2156
decreases; if new MTU is greater than route PMTU, and the
2157
old MTU is the lowest MTU in the path, update the route PMTU
2158
to reflect the increase. In this case if the other nodes' MTU
2159
also have the lowest MTU, TOO BIG MESSAGE will be lead to
2160
PMTU discouvery.
2161
*/
2162
if (rt->rt6i_dev == arg->dev &&
2163
!dst_metric_locked(&rt->dst, RTAX_MTU) &&
2164
(dst_mtu(&rt->dst) >= arg->mtu ||
2165
(dst_mtu(&rt->dst) < arg->mtu &&
2166
dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2167
dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2168
}
2169
return 0;
2170
}
2171
2172
void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2173
{
2174
struct rt6_mtu_change_arg arg = {
2175
.dev = dev,
2176
.mtu = mtu,
2177
};
2178
2179
fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2180
}
2181
2182
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2183
[RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2184
[RTA_OIF] = { .type = NLA_U32 },
2185
[RTA_IIF] = { .type = NLA_U32 },
2186
[RTA_PRIORITY] = { .type = NLA_U32 },
2187
[RTA_METRICS] = { .type = NLA_NESTED },
2188
};
2189
2190
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2191
struct fib6_config *cfg)
2192
{
2193
struct rtmsg *rtm;
2194
struct nlattr *tb[RTA_MAX+1];
2195
int err;
2196
2197
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2198
if (err < 0)
2199
goto errout;
2200
2201
err = -EINVAL;
2202
rtm = nlmsg_data(nlh);
2203
memset(cfg, 0, sizeof(*cfg));
2204
2205
cfg->fc_table = rtm->rtm_table;
2206
cfg->fc_dst_len = rtm->rtm_dst_len;
2207
cfg->fc_src_len = rtm->rtm_src_len;
2208
cfg->fc_flags = RTF_UP;
2209
cfg->fc_protocol = rtm->rtm_protocol;
2210
2211
if (rtm->rtm_type == RTN_UNREACHABLE)
2212
cfg->fc_flags |= RTF_REJECT;
2213
2214
if (rtm->rtm_type == RTN_LOCAL)
2215
cfg->fc_flags |= RTF_LOCAL;
2216
2217
cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2218
cfg->fc_nlinfo.nlh = nlh;
2219
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2220
2221
if (tb[RTA_GATEWAY]) {
2222
nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2223
cfg->fc_flags |= RTF_GATEWAY;
2224
}
2225
2226
if (tb[RTA_DST]) {
2227
int plen = (rtm->rtm_dst_len + 7) >> 3;
2228
2229
if (nla_len(tb[RTA_DST]) < plen)
2230
goto errout;
2231
2232
nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2233
}
2234
2235
if (tb[RTA_SRC]) {
2236
int plen = (rtm->rtm_src_len + 7) >> 3;
2237
2238
if (nla_len(tb[RTA_SRC]) < plen)
2239
goto errout;
2240
2241
nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2242
}
2243
2244
if (tb[RTA_PREFSRC])
2245
nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2246
2247
if (tb[RTA_OIF])
2248
cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2249
2250
if (tb[RTA_PRIORITY])
2251
cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2252
2253
if (tb[RTA_METRICS]) {
2254
cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2255
cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2256
}
2257
2258
if (tb[RTA_TABLE])
2259
cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2260
2261
err = 0;
2262
errout:
2263
return err;
2264
}
2265
2266
static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2267
{
2268
struct fib6_config cfg;
2269
int err;
2270
2271
err = rtm_to_fib6_config(skb, nlh, &cfg);
2272
if (err < 0)
2273
return err;
2274
2275
return ip6_route_del(&cfg);
2276
}
2277
2278
static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2279
{
2280
struct fib6_config cfg;
2281
int err;
2282
2283
err = rtm_to_fib6_config(skb, nlh, &cfg);
2284
if (err < 0)
2285
return err;
2286
2287
return ip6_route_add(&cfg);
2288
}
2289
2290
static inline size_t rt6_nlmsg_size(void)
2291
{
2292
return NLMSG_ALIGN(sizeof(struct rtmsg))
2293
+ nla_total_size(16) /* RTA_SRC */
2294
+ nla_total_size(16) /* RTA_DST */
2295
+ nla_total_size(16) /* RTA_GATEWAY */
2296
+ nla_total_size(16) /* RTA_PREFSRC */
2297
+ nla_total_size(4) /* RTA_TABLE */
2298
+ nla_total_size(4) /* RTA_IIF */
2299
+ nla_total_size(4) /* RTA_OIF */
2300
+ nla_total_size(4) /* RTA_PRIORITY */
2301
+ RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2302
+ nla_total_size(sizeof(struct rta_cacheinfo));
2303
}
2304
2305
static int rt6_fill_node(struct net *net,
2306
struct sk_buff *skb, struct rt6_info *rt,
2307
struct in6_addr *dst, struct in6_addr *src,
2308
int iif, int type, u32 pid, u32 seq,
2309
int prefix, int nowait, unsigned int flags)
2310
{
2311
struct rtmsg *rtm;
2312
struct nlmsghdr *nlh;
2313
long expires;
2314
u32 table;
2315
2316
if (prefix) { /* user wants prefix routes only */
2317
if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2318
/* success since this is not a prefix route */
2319
return 1;
2320
}
2321
}
2322
2323
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2324
if (nlh == NULL)
2325
return -EMSGSIZE;
2326
2327
rtm = nlmsg_data(nlh);
2328
rtm->rtm_family = AF_INET6;
2329
rtm->rtm_dst_len = rt->rt6i_dst.plen;
2330
rtm->rtm_src_len = rt->rt6i_src.plen;
2331
rtm->rtm_tos = 0;
2332
if (rt->rt6i_table)
2333
table = rt->rt6i_table->tb6_id;
2334
else
2335
table = RT6_TABLE_UNSPEC;
2336
rtm->rtm_table = table;
2337
NLA_PUT_U32(skb, RTA_TABLE, table);
2338
if (rt->rt6i_flags&RTF_REJECT)
2339
rtm->rtm_type = RTN_UNREACHABLE;
2340
else if (rt->rt6i_flags&RTF_LOCAL)
2341
rtm->rtm_type = RTN_LOCAL;
2342
else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2343
rtm->rtm_type = RTN_LOCAL;
2344
else
2345
rtm->rtm_type = RTN_UNICAST;
2346
rtm->rtm_flags = 0;
2347
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2348
rtm->rtm_protocol = rt->rt6i_protocol;
2349
if (rt->rt6i_flags&RTF_DYNAMIC)
2350
rtm->rtm_protocol = RTPROT_REDIRECT;
2351
else if (rt->rt6i_flags & RTF_ADDRCONF)
2352
rtm->rtm_protocol = RTPROT_KERNEL;
2353
else if (rt->rt6i_flags&RTF_DEFAULT)
2354
rtm->rtm_protocol = RTPROT_RA;
2355
2356
if (rt->rt6i_flags&RTF_CACHE)
2357
rtm->rtm_flags |= RTM_F_CLONED;
2358
2359
if (dst) {
2360
NLA_PUT(skb, RTA_DST, 16, dst);
2361
rtm->rtm_dst_len = 128;
2362
} else if (rtm->rtm_dst_len)
2363
NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2364
#ifdef CONFIG_IPV6_SUBTREES
2365
if (src) {
2366
NLA_PUT(skb, RTA_SRC, 16, src);
2367
rtm->rtm_src_len = 128;
2368
} else if (rtm->rtm_src_len)
2369
NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2370
#endif
2371
if (iif) {
2372
#ifdef CONFIG_IPV6_MROUTE
2373
if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2374
int err = ip6mr_get_route(net, skb, rtm, nowait);
2375
if (err <= 0) {
2376
if (!nowait) {
2377
if (err == 0)
2378
return 0;
2379
goto nla_put_failure;
2380
} else {
2381
if (err == -EMSGSIZE)
2382
goto nla_put_failure;
2383
}
2384
}
2385
} else
2386
#endif
2387
NLA_PUT_U32(skb, RTA_IIF, iif);
2388
} else if (dst) {
2389
struct in6_addr saddr_buf;
2390
if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2391
NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2392
}
2393
2394
if (rt->rt6i_prefsrc.plen) {
2395
struct in6_addr saddr_buf;
2396
ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2397
NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2398
}
2399
2400
if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2401
goto nla_put_failure;
2402
2403
if (rt->dst.neighbour)
2404
NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2405
2406
if (rt->dst.dev)
2407
NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2408
2409
NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2410
2411
if (!(rt->rt6i_flags & RTF_EXPIRES))
2412
expires = 0;
2413
else if (rt->rt6i_expires - jiffies < INT_MAX)
2414
expires = rt->rt6i_expires - jiffies;
2415
else
2416
expires = INT_MAX;
2417
2418
if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2419
expires, rt->dst.error) < 0)
2420
goto nla_put_failure;
2421
2422
return nlmsg_end(skb, nlh);
2423
2424
nla_put_failure:
2425
nlmsg_cancel(skb, nlh);
2426
return -EMSGSIZE;
2427
}
2428
2429
int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2430
{
2431
struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2432
int prefix;
2433
2434
if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2435
struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2436
prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2437
} else
2438
prefix = 0;
2439
2440
return rt6_fill_node(arg->net,
2441
arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2442
NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2443
prefix, 0, NLM_F_MULTI);
2444
}
2445
2446
static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2447
{
2448
struct net *net = sock_net(in_skb->sk);
2449
struct nlattr *tb[RTA_MAX+1];
2450
struct rt6_info *rt;
2451
struct sk_buff *skb;
2452
struct rtmsg *rtm;
2453
struct flowi6 fl6;
2454
int err, iif = 0;
2455
2456
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2457
if (err < 0)
2458
goto errout;
2459
2460
err = -EINVAL;
2461
memset(&fl6, 0, sizeof(fl6));
2462
2463
if (tb[RTA_SRC]) {
2464
if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2465
goto errout;
2466
2467
ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2468
}
2469
2470
if (tb[RTA_DST]) {
2471
if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2472
goto errout;
2473
2474
ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2475
}
2476
2477
if (tb[RTA_IIF])
2478
iif = nla_get_u32(tb[RTA_IIF]);
2479
2480
if (tb[RTA_OIF])
2481
fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2482
2483
if (iif) {
2484
struct net_device *dev;
2485
dev = __dev_get_by_index(net, iif);
2486
if (!dev) {
2487
err = -ENODEV;
2488
goto errout;
2489
}
2490
}
2491
2492
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2493
if (skb == NULL) {
2494
err = -ENOBUFS;
2495
goto errout;
2496
}
2497
2498
/* Reserve room for dummy headers, this skb can pass
2499
through good chunk of routing engine.
2500
*/
2501
skb_reset_mac_header(skb);
2502
skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2503
2504
rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2505
skb_dst_set(skb, &rt->dst);
2506
2507
err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2508
RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2509
nlh->nlmsg_seq, 0, 0, 0);
2510
if (err < 0) {
2511
kfree_skb(skb);
2512
goto errout;
2513
}
2514
2515
err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2516
errout:
2517
return err;
2518
}
2519
2520
void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2521
{
2522
struct sk_buff *skb;
2523
struct net *net = info->nl_net;
2524
u32 seq;
2525
int err;
2526
2527
err = -ENOBUFS;
2528
seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2529
2530
skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2531
if (skb == NULL)
2532
goto errout;
2533
2534
err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2535
event, info->pid, seq, 0, 0, 0);
2536
if (err < 0) {
2537
/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2538
WARN_ON(err == -EMSGSIZE);
2539
kfree_skb(skb);
2540
goto errout;
2541
}
2542
rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2543
info->nlh, gfp_any());
2544
return;
2545
errout:
2546
if (err < 0)
2547
rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2548
}
2549
2550
static int ip6_route_dev_notify(struct notifier_block *this,
2551
unsigned long event, void *data)
2552
{
2553
struct net_device *dev = (struct net_device *)data;
2554
struct net *net = dev_net(dev);
2555
2556
if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2557
net->ipv6.ip6_null_entry->dst.dev = dev;
2558
net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2559
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2560
net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2561
net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2562
net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2563
net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2564
#endif
2565
}
2566
2567
return NOTIFY_OK;
2568
}
2569
2570
/*
2571
* /proc
2572
*/
2573
2574
#ifdef CONFIG_PROC_FS
2575
2576
struct rt6_proc_arg
2577
{
2578
char *buffer;
2579
int offset;
2580
int length;
2581
int skip;
2582
int len;
2583
};
2584
2585
static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2586
{
2587
struct seq_file *m = p_arg;
2588
2589
seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2590
2591
#ifdef CONFIG_IPV6_SUBTREES
2592
seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2593
#else
2594
seq_puts(m, "00000000000000000000000000000000 00 ");
2595
#endif
2596
2597
if (rt->rt6i_nexthop) {
2598
seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2599
} else {
2600
seq_puts(m, "00000000000000000000000000000000");
2601
}
2602
seq_printf(m, " %08x %08x %08x %08x %8s\n",
2603
rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2604
rt->dst.__use, rt->rt6i_flags,
2605
rt->rt6i_dev ? rt->rt6i_dev->name : "");
2606
return 0;
2607
}
2608
2609
static int ipv6_route_show(struct seq_file *m, void *v)
2610
{
2611
struct net *net = (struct net *)m->private;
2612
fib6_clean_all(net, rt6_info_route, 0, m);
2613
return 0;
2614
}
2615
2616
static int ipv6_route_open(struct inode *inode, struct file *file)
2617
{
2618
return single_open_net(inode, file, ipv6_route_show);
2619
}
2620
2621
static const struct file_operations ipv6_route_proc_fops = {
2622
.owner = THIS_MODULE,
2623
.open = ipv6_route_open,
2624
.read = seq_read,
2625
.llseek = seq_lseek,
2626
.release = single_release_net,
2627
};
2628
2629
static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2630
{
2631
struct net *net = (struct net *)seq->private;
2632
seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2633
net->ipv6.rt6_stats->fib_nodes,
2634
net->ipv6.rt6_stats->fib_route_nodes,
2635
net->ipv6.rt6_stats->fib_rt_alloc,
2636
net->ipv6.rt6_stats->fib_rt_entries,
2637
net->ipv6.rt6_stats->fib_rt_cache,
2638
dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2639
net->ipv6.rt6_stats->fib_discarded_routes);
2640
2641
return 0;
2642
}
2643
2644
static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2645
{
2646
return single_open_net(inode, file, rt6_stats_seq_show);
2647
}
2648
2649
static const struct file_operations rt6_stats_seq_fops = {
2650
.owner = THIS_MODULE,
2651
.open = rt6_stats_seq_open,
2652
.read = seq_read,
2653
.llseek = seq_lseek,
2654
.release = single_release_net,
2655
};
2656
#endif /* CONFIG_PROC_FS */
2657
2658
#ifdef CONFIG_SYSCTL
2659
2660
static
2661
int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2662
void __user *buffer, size_t *lenp, loff_t *ppos)
2663
{
2664
struct net *net;
2665
int delay;
2666
if (!write)
2667
return -EINVAL;
2668
2669
net = (struct net *)ctl->extra1;
2670
delay = net->ipv6.sysctl.flush_delay;
2671
proc_dointvec(ctl, write, buffer, lenp, ppos);
2672
fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2673
return 0;
2674
}
2675
2676
ctl_table ipv6_route_table_template[] = {
2677
{
2678
.procname = "flush",
2679
.data = &init_net.ipv6.sysctl.flush_delay,
2680
.maxlen = sizeof(int),
2681
.mode = 0200,
2682
.proc_handler = ipv6_sysctl_rtcache_flush
2683
},
2684
{
2685
.procname = "gc_thresh",
2686
.data = &ip6_dst_ops_template.gc_thresh,
2687
.maxlen = sizeof(int),
2688
.mode = 0644,
2689
.proc_handler = proc_dointvec,
2690
},
2691
{
2692
.procname = "max_size",
2693
.data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2694
.maxlen = sizeof(int),
2695
.mode = 0644,
2696
.proc_handler = proc_dointvec,
2697
},
2698
{
2699
.procname = "gc_min_interval",
2700
.data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2701
.maxlen = sizeof(int),
2702
.mode = 0644,
2703
.proc_handler = proc_dointvec_jiffies,
2704
},
2705
{
2706
.procname = "gc_timeout",
2707
.data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2708
.maxlen = sizeof(int),
2709
.mode = 0644,
2710
.proc_handler = proc_dointvec_jiffies,
2711
},
2712
{
2713
.procname = "gc_interval",
2714
.data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2715
.maxlen = sizeof(int),
2716
.mode = 0644,
2717
.proc_handler = proc_dointvec_jiffies,
2718
},
2719
{
2720
.procname = "gc_elasticity",
2721
.data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2722
.maxlen = sizeof(int),
2723
.mode = 0644,
2724
.proc_handler = proc_dointvec,
2725
},
2726
{
2727
.procname = "mtu_expires",
2728
.data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2729
.maxlen = sizeof(int),
2730
.mode = 0644,
2731
.proc_handler = proc_dointvec_jiffies,
2732
},
2733
{
2734
.procname = "min_adv_mss",
2735
.data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2736
.maxlen = sizeof(int),
2737
.mode = 0644,
2738
.proc_handler = proc_dointvec,
2739
},
2740
{
2741
.procname = "gc_min_interval_ms",
2742
.data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2743
.maxlen = sizeof(int),
2744
.mode = 0644,
2745
.proc_handler = proc_dointvec_ms_jiffies,
2746
},
2747
{ }
2748
};
2749
2750
struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2751
{
2752
struct ctl_table *table;
2753
2754
table = kmemdup(ipv6_route_table_template,
2755
sizeof(ipv6_route_table_template),
2756
GFP_KERNEL);
2757
2758
if (table) {
2759
table[0].data = &net->ipv6.sysctl.flush_delay;
2760
table[0].extra1 = net;
2761
table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2762
table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2763
table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2764
table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2765
table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2766
table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2767
table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2768
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2769
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2770
}
2771
2772
return table;
2773
}
2774
#endif
2775
2776
static int __net_init ip6_route_net_init(struct net *net)
2777
{
2778
int ret = -ENOMEM;
2779
2780
memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2781
sizeof(net->ipv6.ip6_dst_ops));
2782
2783
if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2784
goto out_ip6_dst_ops;
2785
2786
net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2787
sizeof(*net->ipv6.ip6_null_entry),
2788
GFP_KERNEL);
2789
if (!net->ipv6.ip6_null_entry)
2790
goto out_ip6_dst_entries;
2791
net->ipv6.ip6_null_entry->dst.path =
2792
(struct dst_entry *)net->ipv6.ip6_null_entry;
2793
net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2794
dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2795
ip6_template_metrics, true);
2796
2797
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2798
net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2799
sizeof(*net->ipv6.ip6_prohibit_entry),
2800
GFP_KERNEL);
2801
if (!net->ipv6.ip6_prohibit_entry)
2802
goto out_ip6_null_entry;
2803
net->ipv6.ip6_prohibit_entry->dst.path =
2804
(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2805
net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2806
dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2807
ip6_template_metrics, true);
2808
2809
net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2810
sizeof(*net->ipv6.ip6_blk_hole_entry),
2811
GFP_KERNEL);
2812
if (!net->ipv6.ip6_blk_hole_entry)
2813
goto out_ip6_prohibit_entry;
2814
net->ipv6.ip6_blk_hole_entry->dst.path =
2815
(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2816
net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2817
dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2818
ip6_template_metrics, true);
2819
#endif
2820
2821
net->ipv6.sysctl.flush_delay = 0;
2822
net->ipv6.sysctl.ip6_rt_max_size = 4096;
2823
net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2824
net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2825
net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2826
net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2827
net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2828
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2829
2830
#ifdef CONFIG_PROC_FS
2831
proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2832
proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2833
#endif
2834
net->ipv6.ip6_rt_gc_expire = 30*HZ;
2835
2836
ret = 0;
2837
out:
2838
return ret;
2839
2840
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2841
out_ip6_prohibit_entry:
2842
kfree(net->ipv6.ip6_prohibit_entry);
2843
out_ip6_null_entry:
2844
kfree(net->ipv6.ip6_null_entry);
2845
#endif
2846
out_ip6_dst_entries:
2847
dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2848
out_ip6_dst_ops:
2849
goto out;
2850
}
2851
2852
static void __net_exit ip6_route_net_exit(struct net *net)
2853
{
2854
#ifdef CONFIG_PROC_FS
2855
proc_net_remove(net, "ipv6_route");
2856
proc_net_remove(net, "rt6_stats");
2857
#endif
2858
kfree(net->ipv6.ip6_null_entry);
2859
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2860
kfree(net->ipv6.ip6_prohibit_entry);
2861
kfree(net->ipv6.ip6_blk_hole_entry);
2862
#endif
2863
dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2864
}
2865
2866
static struct pernet_operations ip6_route_net_ops = {
2867
.init = ip6_route_net_init,
2868
.exit = ip6_route_net_exit,
2869
};
2870
2871
static struct notifier_block ip6_route_dev_notifier = {
2872
.notifier_call = ip6_route_dev_notify,
2873
.priority = 0,
2874
};
2875
2876
int __init ip6_route_init(void)
2877
{
2878
int ret;
2879
2880
ret = -ENOMEM;
2881
ip6_dst_ops_template.kmem_cachep =
2882
kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2883
SLAB_HWCACHE_ALIGN, NULL);
2884
if (!ip6_dst_ops_template.kmem_cachep)
2885
goto out;
2886
2887
ret = dst_entries_init(&ip6_dst_blackhole_ops);
2888
if (ret)
2889
goto out_kmem_cache;
2890
2891
ret = register_pernet_subsys(&ip6_route_net_ops);
2892
if (ret)
2893
goto out_dst_entries;
2894
2895
ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2896
2897
/* Registering of the loopback is done before this portion of code,
2898
* the loopback reference in rt6_info will not be taken, do it
2899
* manually for init_net */
2900
init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2901
init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2902
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903
init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2904
init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2905
init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2906
init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2907
#endif
2908
ret = fib6_init();
2909
if (ret)
2910
goto out_register_subsys;
2911
2912
ret = xfrm6_init();
2913
if (ret)
2914
goto out_fib6_init;
2915
2916
ret = fib6_rules_init();
2917
if (ret)
2918
goto xfrm6_init;
2919
2920
ret = -ENOBUFS;
2921
if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2922
__rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2923
__rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2924
goto fib6_rules_init;
2925
2926
ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2927
if (ret)
2928
goto fib6_rules_init;
2929
2930
out:
2931
return ret;
2932
2933
fib6_rules_init:
2934
fib6_rules_cleanup();
2935
xfrm6_init:
2936
xfrm6_fini();
2937
out_fib6_init:
2938
fib6_gc_cleanup();
2939
out_register_subsys:
2940
unregister_pernet_subsys(&ip6_route_net_ops);
2941
out_dst_entries:
2942
dst_entries_destroy(&ip6_dst_blackhole_ops);
2943
out_kmem_cache:
2944
kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2945
goto out;
2946
}
2947
2948
void ip6_route_cleanup(void)
2949
{
2950
unregister_netdevice_notifier(&ip6_route_dev_notifier);
2951
fib6_rules_cleanup();
2952
xfrm6_fini();
2953
fib6_gc_cleanup();
2954
unregister_pernet_subsys(&ip6_route_net_ops);
2955
dst_entries_destroy(&ip6_dst_blackhole_ops);
2956
kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2957
}
2958
2959