Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/ipv6/anycast.c
15109 views
1
/*
2
* Anycast support for IPv6
3
* Linux INET6 implementation
4
*
5
* Authors:
6
* David L Stevens ([email protected])
7
*
8
* based heavily on net/ipv6/mcast.c
9
*
10
* This program is free software; you can redistribute it and/or
11
* modify it under the terms of the GNU General Public License
12
* as published by the Free Software Foundation; either version
13
* 2 of the License, or (at your option) any later version.
14
*/
15
16
#include <linux/capability.h>
17
#include <linux/module.h>
18
#include <linux/errno.h>
19
#include <linux/types.h>
20
#include <linux/random.h>
21
#include <linux/string.h>
22
#include <linux/socket.h>
23
#include <linux/sockios.h>
24
#include <linux/net.h>
25
#include <linux/in6.h>
26
#include <linux/netdevice.h>
27
#include <linux/if_arp.h>
28
#include <linux/route.h>
29
#include <linux/init.h>
30
#include <linux/proc_fs.h>
31
#include <linux/seq_file.h>
32
#include <linux/slab.h>
33
34
#include <net/net_namespace.h>
35
#include <net/sock.h>
36
#include <net/snmp.h>
37
38
#include <net/ipv6.h>
39
#include <net/protocol.h>
40
#include <net/if_inet6.h>
41
#include <net/ndisc.h>
42
#include <net/addrconf.h>
43
#include <net/ip6_route.h>
44
45
#include <net/checksum.h>
46
47
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
48
49
/* Big ac list lock for all the sockets */
50
static DEFINE_RWLOCK(ipv6_sk_ac_lock);
51
52
53
/*
54
* socket join an anycast group
55
*/
56
57
int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
58
{
59
struct ipv6_pinfo *np = inet6_sk(sk);
60
struct net_device *dev = NULL;
61
struct inet6_dev *idev;
62
struct ipv6_ac_socklist *pac;
63
struct net *net = sock_net(sk);
64
int ishost = !net->ipv6.devconf_all->forwarding;
65
int err = 0;
66
67
if (!capable(CAP_NET_ADMIN))
68
return -EPERM;
69
if (ipv6_addr_is_multicast(addr))
70
return -EINVAL;
71
if (ipv6_chk_addr(net, addr, NULL, 0))
72
return -EINVAL;
73
74
pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
75
if (pac == NULL)
76
return -ENOMEM;
77
pac->acl_next = NULL;
78
ipv6_addr_copy(&pac->acl_addr, addr);
79
80
rcu_read_lock();
81
if (ifindex == 0) {
82
struct rt6_info *rt;
83
84
rt = rt6_lookup(net, addr, NULL, 0, 0);
85
if (rt) {
86
dev = rt->rt6i_dev;
87
dst_release(&rt->dst);
88
} else if (ishost) {
89
err = -EADDRNOTAVAIL;
90
goto error;
91
} else {
92
/* router, no matching interface: just pick one */
93
dev = dev_get_by_flags_rcu(net, IFF_UP,
94
IFF_UP | IFF_LOOPBACK);
95
}
96
} else
97
dev = dev_get_by_index_rcu(net, ifindex);
98
99
if (dev == NULL) {
100
err = -ENODEV;
101
goto error;
102
}
103
104
idev = __in6_dev_get(dev);
105
if (!idev) {
106
if (ifindex)
107
err = -ENODEV;
108
else
109
err = -EADDRNOTAVAIL;
110
goto error;
111
}
112
/* reset ishost, now that we have a specific device */
113
ishost = !idev->cnf.forwarding;
114
115
pac->acl_ifindex = dev->ifindex;
116
117
/* XXX
118
* For hosts, allow link-local or matching prefix anycasts.
119
* This obviates the need for propagating anycast routes while
120
* still allowing some non-router anycast participation.
121
*/
122
if (!ipv6_chk_prefix(addr, dev)) {
123
if (ishost)
124
err = -EADDRNOTAVAIL;
125
if (err)
126
goto error;
127
}
128
129
err = ipv6_dev_ac_inc(dev, addr);
130
if (!err) {
131
write_lock_bh(&ipv6_sk_ac_lock);
132
pac->acl_next = np->ipv6_ac_list;
133
np->ipv6_ac_list = pac;
134
write_unlock_bh(&ipv6_sk_ac_lock);
135
pac = NULL;
136
}
137
138
error:
139
rcu_read_unlock();
140
if (pac)
141
sock_kfree_s(sk, pac, sizeof(*pac));
142
return err;
143
}
144
145
/*
146
* socket leave an anycast group
147
*/
148
int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
149
{
150
struct ipv6_pinfo *np = inet6_sk(sk);
151
struct net_device *dev;
152
struct ipv6_ac_socklist *pac, *prev_pac;
153
struct net *net = sock_net(sk);
154
155
write_lock_bh(&ipv6_sk_ac_lock);
156
prev_pac = NULL;
157
for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
158
if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
159
ipv6_addr_equal(&pac->acl_addr, addr))
160
break;
161
prev_pac = pac;
162
}
163
if (!pac) {
164
write_unlock_bh(&ipv6_sk_ac_lock);
165
return -ENOENT;
166
}
167
if (prev_pac)
168
prev_pac->acl_next = pac->acl_next;
169
else
170
np->ipv6_ac_list = pac->acl_next;
171
172
write_unlock_bh(&ipv6_sk_ac_lock);
173
174
rcu_read_lock();
175
dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
176
if (dev)
177
ipv6_dev_ac_dec(dev, &pac->acl_addr);
178
rcu_read_unlock();
179
180
sock_kfree_s(sk, pac, sizeof(*pac));
181
return 0;
182
}
183
184
void ipv6_sock_ac_close(struct sock *sk)
185
{
186
struct ipv6_pinfo *np = inet6_sk(sk);
187
struct net_device *dev = NULL;
188
struct ipv6_ac_socklist *pac;
189
struct net *net = sock_net(sk);
190
int prev_index;
191
192
write_lock_bh(&ipv6_sk_ac_lock);
193
pac = np->ipv6_ac_list;
194
np->ipv6_ac_list = NULL;
195
write_unlock_bh(&ipv6_sk_ac_lock);
196
197
prev_index = 0;
198
rcu_read_lock();
199
while (pac) {
200
struct ipv6_ac_socklist *next = pac->acl_next;
201
202
if (pac->acl_ifindex != prev_index) {
203
dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
204
prev_index = pac->acl_ifindex;
205
}
206
if (dev)
207
ipv6_dev_ac_dec(dev, &pac->acl_addr);
208
sock_kfree_s(sk, pac, sizeof(*pac));
209
pac = next;
210
}
211
rcu_read_unlock();
212
}
213
214
#if 0
215
/* The function is not used, which is funny. Apparently, author
216
* supposed to use it to filter out datagrams inside udp/raw but forgot.
217
*
218
* It is OK, anycasts are not special comparing to delivery to unicasts.
219
*/
220
221
int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex)
222
{
223
struct ipv6_ac_socklist *pac;
224
struct ipv6_pinfo *np = inet6_sk(sk);
225
int found;
226
227
found = 0;
228
read_lock(&ipv6_sk_ac_lock);
229
for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) {
230
if (ifindex && pac->acl_ifindex != ifindex)
231
continue;
232
found = ipv6_addr_equal(&pac->acl_addr, addr);
233
if (found)
234
break;
235
}
236
read_unlock(&ipv6_sk_ac_lock);
237
238
return found;
239
}
240
241
#endif
242
243
static void aca_put(struct ifacaddr6 *ac)
244
{
245
if (atomic_dec_and_test(&ac->aca_refcnt)) {
246
in6_dev_put(ac->aca_idev);
247
dst_release(&ac->aca_rt->dst);
248
kfree(ac);
249
}
250
}
251
252
/*
253
* device anycast group inc (add if not found)
254
*/
255
int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
256
{
257
struct ifacaddr6 *aca;
258
struct inet6_dev *idev;
259
struct rt6_info *rt;
260
int err;
261
262
idev = in6_dev_get(dev);
263
264
if (idev == NULL)
265
return -EINVAL;
266
267
write_lock_bh(&idev->lock);
268
if (idev->dead) {
269
err = -ENODEV;
270
goto out;
271
}
272
273
for (aca = idev->ac_list; aca; aca = aca->aca_next) {
274
if (ipv6_addr_equal(&aca->aca_addr, addr)) {
275
aca->aca_users++;
276
err = 0;
277
goto out;
278
}
279
}
280
281
/*
282
* not found: create a new one.
283
*/
284
285
aca = kzalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
286
287
if (aca == NULL) {
288
err = -ENOMEM;
289
goto out;
290
}
291
292
rt = addrconf_dst_alloc(idev, addr, 1);
293
if (IS_ERR(rt)) {
294
kfree(aca);
295
err = PTR_ERR(rt);
296
goto out;
297
}
298
299
ipv6_addr_copy(&aca->aca_addr, addr);
300
aca->aca_idev = idev;
301
aca->aca_rt = rt;
302
aca->aca_users = 1;
303
/* aca_tstamp should be updated upon changes */
304
aca->aca_cstamp = aca->aca_tstamp = jiffies;
305
atomic_set(&aca->aca_refcnt, 2);
306
spin_lock_init(&aca->aca_lock);
307
308
aca->aca_next = idev->ac_list;
309
idev->ac_list = aca;
310
write_unlock_bh(&idev->lock);
311
312
ip6_ins_rt(rt);
313
314
addrconf_join_solict(dev, &aca->aca_addr);
315
316
aca_put(aca);
317
return 0;
318
out:
319
write_unlock_bh(&idev->lock);
320
in6_dev_put(idev);
321
return err;
322
}
323
324
/*
325
* device anycast group decrement
326
*/
327
int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
328
{
329
struct ifacaddr6 *aca, *prev_aca;
330
331
write_lock_bh(&idev->lock);
332
prev_aca = NULL;
333
for (aca = idev->ac_list; aca; aca = aca->aca_next) {
334
if (ipv6_addr_equal(&aca->aca_addr, addr))
335
break;
336
prev_aca = aca;
337
}
338
if (!aca) {
339
write_unlock_bh(&idev->lock);
340
return -ENOENT;
341
}
342
if (--aca->aca_users > 0) {
343
write_unlock_bh(&idev->lock);
344
return 0;
345
}
346
if (prev_aca)
347
prev_aca->aca_next = aca->aca_next;
348
else
349
idev->ac_list = aca->aca_next;
350
write_unlock_bh(&idev->lock);
351
addrconf_leave_solict(idev, &aca->aca_addr);
352
353
dst_hold(&aca->aca_rt->dst);
354
ip6_del_rt(aca->aca_rt);
355
356
aca_put(aca);
357
return 0;
358
}
359
360
/* called with rcu_read_lock() */
361
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
362
{
363
struct inet6_dev *idev = __in6_dev_get(dev);
364
365
if (idev == NULL)
366
return -ENODEV;
367
return __ipv6_dev_ac_dec(idev, addr);
368
}
369
370
/*
371
* check if the interface has this anycast address
372
* called with rcu_read_lock()
373
*/
374
static int ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr)
375
{
376
struct inet6_dev *idev;
377
struct ifacaddr6 *aca;
378
379
idev = __in6_dev_get(dev);
380
if (idev) {
381
read_lock_bh(&idev->lock);
382
for (aca = idev->ac_list; aca; aca = aca->aca_next)
383
if (ipv6_addr_equal(&aca->aca_addr, addr))
384
break;
385
read_unlock_bh(&idev->lock);
386
return aca != NULL;
387
}
388
return 0;
389
}
390
391
/*
392
* check if given interface (or any, if dev==0) has this anycast address
393
*/
394
int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
395
const struct in6_addr *addr)
396
{
397
int found = 0;
398
399
rcu_read_lock();
400
if (dev)
401
found = ipv6_chk_acast_dev(dev, addr);
402
else
403
for_each_netdev_rcu(net, dev)
404
if (ipv6_chk_acast_dev(dev, addr)) {
405
found = 1;
406
break;
407
}
408
rcu_read_unlock();
409
return found;
410
}
411
412
413
#ifdef CONFIG_PROC_FS
414
struct ac6_iter_state {
415
struct seq_net_private p;
416
struct net_device *dev;
417
struct inet6_dev *idev;
418
};
419
420
#define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private)
421
422
static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
423
{
424
struct ifacaddr6 *im = NULL;
425
struct ac6_iter_state *state = ac6_seq_private(seq);
426
struct net *net = seq_file_net(seq);
427
428
state->idev = NULL;
429
for_each_netdev_rcu(net, state->dev) {
430
struct inet6_dev *idev;
431
idev = __in6_dev_get(state->dev);
432
if (!idev)
433
continue;
434
read_lock_bh(&idev->lock);
435
im = idev->ac_list;
436
if (im) {
437
state->idev = idev;
438
break;
439
}
440
read_unlock_bh(&idev->lock);
441
}
442
return im;
443
}
444
445
static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
446
{
447
struct ac6_iter_state *state = ac6_seq_private(seq);
448
449
im = im->aca_next;
450
while (!im) {
451
if (likely(state->idev != NULL))
452
read_unlock_bh(&state->idev->lock);
453
454
state->dev = next_net_device_rcu(state->dev);
455
if (!state->dev) {
456
state->idev = NULL;
457
break;
458
}
459
state->idev = __in6_dev_get(state->dev);
460
if (!state->idev)
461
continue;
462
read_lock_bh(&state->idev->lock);
463
im = state->idev->ac_list;
464
}
465
return im;
466
}
467
468
static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
469
{
470
struct ifacaddr6 *im = ac6_get_first(seq);
471
if (im)
472
while (pos && (im = ac6_get_next(seq, im)) != NULL)
473
--pos;
474
return pos ? NULL : im;
475
}
476
477
static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
478
__acquires(RCU)
479
{
480
rcu_read_lock();
481
return ac6_get_idx(seq, *pos);
482
}
483
484
static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
485
{
486
struct ifacaddr6 *im = ac6_get_next(seq, v);
487
488
++*pos;
489
return im;
490
}
491
492
static void ac6_seq_stop(struct seq_file *seq, void *v)
493
__releases(RCU)
494
{
495
struct ac6_iter_state *state = ac6_seq_private(seq);
496
497
if (likely(state->idev != NULL)) {
498
read_unlock_bh(&state->idev->lock);
499
state->idev = NULL;
500
}
501
rcu_read_unlock();
502
}
503
504
static int ac6_seq_show(struct seq_file *seq, void *v)
505
{
506
struct ifacaddr6 *im = (struct ifacaddr6 *)v;
507
struct ac6_iter_state *state = ac6_seq_private(seq);
508
509
seq_printf(seq, "%-4d %-15s %pi6 %5d\n",
510
state->dev->ifindex, state->dev->name,
511
&im->aca_addr, im->aca_users);
512
return 0;
513
}
514
515
static const struct seq_operations ac6_seq_ops = {
516
.start = ac6_seq_start,
517
.next = ac6_seq_next,
518
.stop = ac6_seq_stop,
519
.show = ac6_seq_show,
520
};
521
522
static int ac6_seq_open(struct inode *inode, struct file *file)
523
{
524
return seq_open_net(inode, file, &ac6_seq_ops,
525
sizeof(struct ac6_iter_state));
526
}
527
528
static const struct file_operations ac6_seq_fops = {
529
.owner = THIS_MODULE,
530
.open = ac6_seq_open,
531
.read = seq_read,
532
.llseek = seq_lseek,
533
.release = seq_release_net,
534
};
535
536
int __net_init ac6_proc_init(struct net *net)
537
{
538
if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops))
539
return -ENOMEM;
540
541
return 0;
542
}
543
544
void ac6_proc_exit(struct net *net)
545
{
546
proc_net_remove(net, "anycast6");
547
}
548
#endif
549
550
551