Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netinet6/in6_src.c
39475 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
* 3. Neither the name of the project nor the names of its contributors
16
* may be used to endorse or promote products derived from this software
17
* without specific prior written permission.
18
*
19
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
* SUCH DAMAGE.
30
*
31
* $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $
32
*/
33
34
/*-
35
* Copyright (c) 1982, 1986, 1991, 1993
36
* The Regents of the University of California. All rights reserved.
37
*
38
* Redistribution and use in source and binary forms, with or without
39
* modification, are permitted provided that the following conditions
40
* are met:
41
* 1. Redistributions of source code must retain the above copyright
42
* notice, this list of conditions and the following disclaimer.
43
* 2. Redistributions in binary form must reproduce the above copyright
44
* notice, this list of conditions and the following disclaimer in the
45
* documentation and/or other materials provided with the distribution.
46
* 3. Neither the name of the University nor the names of its contributors
47
* may be used to endorse or promote products derived from this software
48
* without specific prior written permission.
49
*
50
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60
* SUCH DAMAGE.
61
*/
62
63
#include <sys/cdefs.h>
64
#include "opt_inet.h"
65
#include "opt_inet6.h"
66
#include <sys/param.h>
67
#include <sys/systm.h>
68
#include <sys/lock.h>
69
#include <sys/malloc.h>
70
#include <sys/mbuf.h>
71
#include <sys/priv.h>
72
#include <sys/protosw.h>
73
#include <sys/socket.h>
74
#include <sys/socketvar.h>
75
#include <sys/sockio.h>
76
#include <sys/sysctl.h>
77
#include <sys/errno.h>
78
#include <sys/time.h>
79
#include <sys/jail.h>
80
#include <sys/kernel.h>
81
#include <sys/rmlock.h>
82
#include <sys/sx.h>
83
84
#include <net/if.h>
85
#include <net/if_var.h>
86
#include <net/if_dl.h>
87
#include <net/if_private.h>
88
#include <net/route.h>
89
#include <net/route/nhop.h>
90
#include <net/if_llatbl.h>
91
92
#include <netinet/in.h>
93
#include <netinet/in_var.h>
94
#include <netinet/in_systm.h>
95
#include <netinet/ip.h>
96
#include <netinet/in_pcb.h>
97
#include <netinet/ip_var.h>
98
#include <netinet/udp.h>
99
#include <netinet/udp_var.h>
100
101
#include <netinet6/in6_var.h>
102
#include <netinet/ip6.h>
103
#include <netinet6/in6_fib.h>
104
#include <netinet6/in6_pcb.h>
105
#include <netinet6/ip6_var.h>
106
#include <netinet6/scope6_var.h>
107
#include <netinet6/nd6.h>
108
109
static struct mtx addrsel_lock;
110
#define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF)
111
#define ADDRSEL_LOCK() mtx_lock(&addrsel_lock)
112
#define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock)
113
#define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED)
114
115
static struct sx addrsel_sxlock;
116
#define ADDRSEL_SXLOCK_INIT() sx_init(&addrsel_sxlock, "addrsel_sxlock")
117
#define ADDRSEL_SLOCK() sx_slock(&addrsel_sxlock)
118
#define ADDRSEL_SUNLOCK() sx_sunlock(&addrsel_sxlock)
119
#define ADDRSEL_XLOCK() sx_xlock(&addrsel_sxlock)
120
#define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock)
121
122
#define ADDR_LABEL_NOTAPP (-1)
123
VNET_DEFINE_STATIC(struct in6_addrpolicy, defaultaddrpolicy);
124
#define V_defaultaddrpolicy VNET(defaultaddrpolicy)
125
126
VNET_DEFINE(int, ip6_prefer_tempaddr) = 0;
127
128
static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
129
struct ip6_moptions *, struct route_in6 *, struct ifnet **,
130
struct nhop_object **, int, u_int, uint32_t);
131
static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
132
struct ip6_moptions *, struct ifnet **,
133
struct ifnet *, u_int);
134
static int in6_selectsrc(uint32_t, struct sockaddr_in6 *,
135
struct ip6_pktopts *, struct ip6_moptions *, struct inpcb *,
136
struct ucred *, struct ifnet **, struct in6_addr *);
137
138
static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *);
139
140
static void init_policy_queue(void);
141
static int add_addrsel_policyent(struct in6_addrpolicy *);
142
static int delete_addrsel_policyent(struct in6_addrpolicy *);
143
static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *),
144
void *);
145
static int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
146
static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
147
148
/*
149
* Return an IPv6 address, which is the most appropriate for a given
150
* destination and user specified options.
151
* If necessary, this function lookups the routing table and returns
152
* an entry to the caller for later use.
153
*/
154
#define REPLACE(r) do {\
155
IP6STAT_INC2(ip6s_sources_rule, (r)); \
156
/* { \
157
char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
158
printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
159
} */ \
160
goto replace; \
161
} while(0)
162
#define NEXT(r) do {\
163
/* { \
164
char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
165
printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
166
} */ \
167
goto next; /* XXX: we can't use 'continue' here */ \
168
} while(0)
169
#define BREAK(r) do { \
170
IP6STAT_INC2(ip6s_sources_rule, (r)); \
171
goto out; /* XXX: we can't use 'break' here */ \
172
} while(0)
173
174
static int
175
in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock,
176
struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct inpcb *inp,
177
struct ucred *cred, struct ifnet **ifpp, struct in6_addr *srcp)
178
{
179
struct rm_priotracker in6_ifa_tracker;
180
struct in6_addr dst, tmp;
181
struct ifnet *ifp = NULL, *oifp = NULL;
182
struct in6_ifaddr *ia = NULL, *ia_best = NULL;
183
struct in6_pktinfo *pi = NULL;
184
int dst_scope = -1, best_scope = -1, best_matchlen = -1;
185
struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
186
u_int32_t odstzone;
187
int prefer_tempaddr;
188
int error;
189
190
NET_EPOCH_ASSERT();
191
KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__));
192
193
dst = dstsock->sin6_addr; /* make a copy for local operation */
194
if (ifpp) {
195
/*
196
* Save a possibly passed in ifp for in6_selectsrc. Only
197
* neighbor discovery code should use this feature, where
198
* we may know the interface but not the FIB number holding
199
* the connected subnet in case someone deleted it from the
200
* default FIB and we need to check the interface.
201
*/
202
if (*ifpp != NULL)
203
oifp = *ifpp;
204
*ifpp = NULL;
205
}
206
207
/*
208
* If the source address is explicitly specified by the caller,
209
* check if the requested source address is indeed a unicast address
210
* assigned to the node, and can be used as the packet's source
211
* address. If everything is okay, use the address as source.
212
*/
213
if (opts && (pi = opts->ip6po_pktinfo) &&
214
!IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
215
/* get the outgoing interface */
216
if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp,
217
fibnum))
218
!= 0)
219
return (error);
220
221
/*
222
* determine the appropriate zone id of the source based on
223
* the zone of the destination and the outgoing interface.
224
* If the specified address is ambiguous wrt the scope zone,
225
* the interface must be specified; otherwise, ifa_ifwithaddr()
226
* will fail matching the address.
227
*/
228
tmp = pi->ipi6_addr;
229
if (ifp) {
230
error = in6_setscope(&tmp, ifp, &odstzone);
231
if (error)
232
return (error);
233
}
234
if (cred != NULL && (error = prison_local_ip6(cred,
235
&tmp, (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0)
236
return (error);
237
238
/*
239
* If IPV6_BINDANY socket option is set, we allow to specify
240
* non local addresses as source address in IPV6_PKTINFO
241
* ancillary data.
242
*/
243
if ((inp->inp_flags & INP_BINDANY) == 0) {
244
ia = in6ifa_ifwithaddr(&tmp, 0 /* XXX */, false);
245
if (ia == NULL || (ia->ia6_flags & (IN6_IFF_ANYCAST |
246
IN6_IFF_NOTREADY)))
247
return (EADDRNOTAVAIL);
248
bcopy(&ia->ia_addr.sin6_addr, srcp, sizeof(*srcp));
249
} else
250
bcopy(&tmp, srcp, sizeof(*srcp));
251
pi->ipi6_addr = tmp; /* XXX: this overrides pi */
252
if (ifpp)
253
*ifpp = ifp;
254
return (0);
255
}
256
257
/*
258
* Otherwise, if the socket has already bound the source, just use it.
259
*/
260
if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
261
if (cred != NULL &&
262
(error = prison_local_ip6(cred, &inp->in6p_laddr,
263
((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
264
return (error);
265
bcopy(&inp->in6p_laddr, srcp, sizeof(*srcp));
266
return (0);
267
}
268
269
/*
270
* Bypass source address selection and use the primary jail IP
271
* if requested.
272
*/
273
if (cred != NULL && !prison_saddrsel_ip6(cred, srcp))
274
return (0);
275
276
/*
277
* If the address is not specified, choose the best one based on
278
* the outgoing interface and the destination address.
279
*/
280
/* get the outgoing interface */
281
if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp,
282
(inp != NULL) ? inp->inp_inc.inc_fibnum : fibnum)) != 0)
283
return (error);
284
285
#ifdef DIAGNOSTIC
286
if (ifp == NULL) /* this should not happen */
287
panic("in6_selectsrc: NULL ifp");
288
#endif
289
error = in6_setscope(&dst, ifp, &odstzone);
290
if (error)
291
return (error);
292
293
IN6_IFADDR_RLOCK(&in6_ifa_tracker);
294
CK_STAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
295
int new_scope = -1, new_matchlen = -1;
296
struct in6_addrpolicy *new_policy = NULL;
297
u_int32_t srczone, osrczone, dstzone;
298
struct in6_addr src;
299
struct ifnet *ifp1 = ia->ia_ifp;
300
301
/*
302
* We'll never take an address that breaks the scope zone
303
* of the destination. We also skip an address if its zone
304
* does not contain the outgoing interface.
305
* XXX: we should probably use sin6_scope_id here.
306
*/
307
if (in6_setscope(&dst, ifp1, &dstzone) ||
308
odstzone != dstzone) {
309
continue;
310
}
311
src = ia->ia_addr.sin6_addr;
312
if (in6_setscope(&src, ifp, &osrczone) ||
313
in6_setscope(&src, ifp1, &srczone) ||
314
osrczone != srczone) {
315
continue;
316
}
317
318
/* avoid unusable addresses */
319
if ((ia->ia6_flags &
320
(IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) {
321
continue;
322
}
323
if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
324
continue;
325
326
/* If jailed only take addresses of the jail into account. */
327
if (cred != NULL &&
328
prison_check_ip6(cred, &ia->ia_addr.sin6_addr) != 0)
329
continue;
330
331
/* Rule 1: Prefer same address */
332
if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) {
333
ia_best = ia;
334
BREAK(1); /* there should be no better candidate */
335
}
336
337
if (ia_best == NULL)
338
REPLACE(0);
339
340
/* Rule 2: Prefer appropriate scope */
341
if (dst_scope < 0)
342
dst_scope = in6_addrscope(&dst);
343
new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
344
if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
345
if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
346
REPLACE(2);
347
NEXT(2);
348
} else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
349
if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
350
NEXT(2);
351
REPLACE(2);
352
}
353
354
/*
355
* Rule 3: Avoid deprecated addresses. Note that the case of
356
* !ip6_use_deprecated is already rejected above.
357
*/
358
if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
359
NEXT(3);
360
if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
361
REPLACE(3);
362
363
/* Rule 4: Prefer home addresses */
364
/*
365
* XXX: This is a TODO. We should probably merge the MIP6
366
* case above.
367
*/
368
369
/* Rule 5: Prefer outgoing interface */
370
if (!(ND_IFINFO(ifp)->flags & ND6_IFF_NO_PREFER_IFACE)) {
371
if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
372
NEXT(5);
373
if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
374
REPLACE(5);
375
}
376
377
/*
378
* Rule 6: Prefer matching label
379
* Note that best_policy should be non-NULL here.
380
*/
381
if (dst_policy == NULL)
382
dst_policy = lookup_addrsel_policy(dstsock);
383
if (dst_policy->label != ADDR_LABEL_NOTAPP) {
384
new_policy = lookup_addrsel_policy(&ia->ia_addr);
385
if (dst_policy->label == best_policy->label &&
386
dst_policy->label != new_policy->label)
387
NEXT(6);
388
if (dst_policy->label != best_policy->label &&
389
dst_policy->label == new_policy->label)
390
REPLACE(6);
391
}
392
393
/*
394
* Rule 7: Prefer public addresses.
395
* We allow users to reverse the logic by configuring
396
* a sysctl variable, so that privacy conscious users can
397
* always prefer temporary addresses.
398
*/
399
if (opts == NULL ||
400
opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
401
prefer_tempaddr = V_ip6_prefer_tempaddr;
402
} else if (opts->ip6po_prefer_tempaddr ==
403
IP6PO_TEMPADDR_NOTPREFER) {
404
prefer_tempaddr = 0;
405
} else
406
prefer_tempaddr = 1;
407
if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
408
(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
409
if (prefer_tempaddr)
410
REPLACE(7);
411
else
412
NEXT(7);
413
}
414
if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
415
!(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
416
if (prefer_tempaddr)
417
NEXT(7);
418
else
419
REPLACE(7);
420
}
421
422
/*
423
* Rule 8: prefer addresses on alive interfaces.
424
* This is a KAME specific rule.
425
*/
426
if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
427
!(ia->ia_ifp->if_flags & IFF_UP))
428
NEXT(8);
429
if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
430
(ia->ia_ifp->if_flags & IFF_UP))
431
REPLACE(8);
432
433
/*
434
* Rule 9: prefer address with better virtual status.
435
*/
436
if (ifa_preferred(&ia_best->ia_ifa, &ia->ia_ifa))
437
REPLACE(9);
438
if (ifa_preferred(&ia->ia_ifa, &ia_best->ia_ifa))
439
NEXT(9);
440
441
/*
442
* Rule 10: prefer address with `prefer_source' flag.
443
*/
444
if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0 &&
445
(ia->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0)
446
REPLACE(10);
447
if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0 &&
448
(ia->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0)
449
NEXT(10);
450
451
/*
452
* Rule 14: Use longest matching prefix.
453
* Note: in the address selection draft, this rule is
454
* documented as "Rule 8". However, since it is also
455
* documented that this rule can be overridden, we assign
456
* a large number so that it is easy to assign smaller numbers
457
* to more preferred rules.
458
*/
459
new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
460
if (best_matchlen < new_matchlen)
461
REPLACE(14);
462
if (new_matchlen < best_matchlen)
463
NEXT(14);
464
465
/* Rule 15 is reserved. */
466
467
/*
468
* Last resort: just keep the current candidate.
469
* Or, do we need more rules?
470
*/
471
continue;
472
473
replace:
474
ia_best = ia;
475
best_scope = (new_scope >= 0 ? new_scope :
476
in6_addrscope(&ia_best->ia_addr.sin6_addr));
477
best_policy = (new_policy ? new_policy :
478
lookup_addrsel_policy(&ia_best->ia_addr));
479
best_matchlen = (new_matchlen >= 0 ? new_matchlen :
480
in6_matchlen(&ia_best->ia_addr.sin6_addr,
481
&dst));
482
483
next:
484
continue;
485
486
out:
487
break;
488
}
489
490
if ((ia = ia_best) == NULL) {
491
IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
492
IP6STAT_INC(ip6s_sources_none);
493
return (EADDRNOTAVAIL);
494
}
495
496
/*
497
* At this point at least one of the addresses belonged to the jail
498
* but it could still be, that we want to further restrict it, e.g.
499
* theoratically IN6_IS_ADDR_LOOPBACK.
500
* It must not be IN6_IS_ADDR_UNSPECIFIED anymore.
501
* prison_local_ip6() will fix an IN6_IS_ADDR_LOOPBACK but should
502
* let all others previously selected pass.
503
* Use tmp to not change ::1 on lo0 to the primary jail address.
504
*/
505
tmp = ia->ia_addr.sin6_addr;
506
if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL &&
507
(inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) {
508
IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
509
IP6STAT_INC(ip6s_sources_none);
510
return (EADDRNOTAVAIL);
511
}
512
513
if (ifpp)
514
*ifpp = ifp;
515
516
bcopy(&tmp, srcp, sizeof(*srcp));
517
if (ia->ia_ifp == ifp)
518
IP6STAT_INC2(ip6s_sources_sameif, best_scope);
519
else
520
IP6STAT_INC2(ip6s_sources_otherif, best_scope);
521
if (dst_scope == best_scope)
522
IP6STAT_INC2(ip6s_sources_samescope, best_scope);
523
else
524
IP6STAT_INC2(ip6s_sources_otherscope, best_scope);
525
if (IFA6_IS_DEPRECATED(ia))
526
IP6STAT_INC2(ip6s_sources_deprecated, best_scope);
527
IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
528
return (0);
529
}
530
531
/*
532
* Select source address based on @inp, @dstsock and @opts.
533
* Stores selected address to @srcp. If @scope_ambiguous is set,
534
* embed scope from selected outgoing interface. If @hlim pointer
535
* is provided, stores calculated hop limit there.
536
* Returns 0 on success.
537
*/
538
int
539
in6_selectsrc_socket(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
540
struct inpcb *inp, struct ucred *cred, int scope_ambiguous,
541
struct in6_addr *srcp, int *hlim)
542
{
543
struct ifnet *retifp;
544
uint32_t fibnum;
545
int error;
546
547
INP_LOCK_ASSERT(inp);
548
549
fibnum = inp->inp_inc.inc_fibnum;
550
retifp = NULL;
551
552
error = in6_selectsrc(fibnum, dstsock, opts, inp->in6p_moptions,
553
inp, cred, &retifp, srcp);
554
if (error != 0)
555
return (error);
556
557
if (hlim != NULL)
558
*hlim = in6_selecthlim(inp, retifp);
559
560
if (retifp == NULL || scope_ambiguous == 0)
561
return (0);
562
563
/*
564
* Application should provide a proper zone ID or the use of
565
* default zone IDs should be enabled. Unfortunately, some
566
* applications do not behave as it should, so we need a
567
* workaround. Even if an appropriate ID is not determined
568
* (when it's required), if we can determine the outgoing
569
* interface. determine the zone ID based on the interface.
570
*/
571
error = in6_setscope(&dstsock->sin6_addr, retifp, NULL);
572
573
return (error);
574
}
575
576
/*
577
* Select source address based on @fibnum, @dst and @scopeid.
578
* Stores selected address to @srcp.
579
* Returns 0 on success.
580
*
581
* Used by non-socket based consumers
582
*/
583
int
584
in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst,
585
uint32_t scopeid, struct ifnet *ifp, struct in6_addr *srcp,
586
int *hlim)
587
{
588
struct ifnet *retifp;
589
struct sockaddr_in6 dst_sa;
590
int error;
591
592
retifp = ifp;
593
bzero(&dst_sa, sizeof(dst_sa));
594
dst_sa.sin6_family = AF_INET6;
595
dst_sa.sin6_len = sizeof(dst_sa);
596
dst_sa.sin6_addr = *dst;
597
dst_sa.sin6_scope_id = scopeid;
598
sa6_embedscope(&dst_sa, 0);
599
600
error = in6_selectsrc(fibnum, &dst_sa, NULL, NULL,
601
NULL, NULL, &retifp, srcp);
602
if (hlim != NULL)
603
*hlim = in6_selecthlim(NULL, retifp);
604
605
return (error);
606
}
607
608
/*
609
* Select source address based on @fibnum, @dst and @mopts.
610
* Stores selected address to @srcp.
611
* Returns 0 on success.
612
*
613
* Used by non-socket based consumers (ND code mostly)
614
*/
615
int
616
in6_selectsrc_nbr(uint32_t fibnum, const struct in6_addr *dst,
617
struct ip6_moptions *mopts, struct ifnet *ifp, struct in6_addr *srcp)
618
{
619
struct sockaddr_in6 dst_sa;
620
struct ifnet *retifp;
621
int error;
622
623
retifp = ifp;
624
bzero(&dst_sa, sizeof(dst_sa));
625
dst_sa.sin6_family = AF_INET6;
626
dst_sa.sin6_len = sizeof(dst_sa);
627
dst_sa.sin6_addr = *dst;
628
dst_sa.sin6_scope_id = ntohs(in6_getscope(dst));
629
sa6_embedscope(&dst_sa, 0);
630
631
error = in6_selectsrc(fibnum, &dst_sa, NULL, mopts,
632
NULL, NULL, &retifp, srcp);
633
return (error);
634
}
635
636
static struct nhop_object *
637
cache_route(uint32_t fibnum, const struct sockaddr_in6 *dst, struct route_in6 *ro,
638
uint32_t flowid)
639
{
640
/*
641
* Use a cached route if it exists and is valid, else try to allocate
642
* a new one. Note that we should check the address family of the
643
* cached destination, in case of sharing the cache with IPv4.
644
* Assumes that 'struct route_in6' is exclusively locked.
645
*/
646
if (ro->ro_nh != NULL && (
647
!NH_IS_VALID(ro->ro_nh) || ro->ro_dst.sin6_family != AF_INET6 ||
648
!IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &dst->sin6_addr)))
649
RO_NHFREE(ro);
650
651
if (ro->ro_nh == NULL) {
652
ro->ro_dst = *dst;
653
654
const struct in6_addr *paddr;
655
struct in6_addr unscoped_addr;
656
uint32_t scopeid = 0;
657
if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) {
658
in6_splitscope(&dst->sin6_addr, &unscoped_addr, &scopeid);
659
paddr = &unscoped_addr;
660
} else
661
paddr = &dst->sin6_addr;
662
ro->ro_nh = fib6_lookup(fibnum, paddr, scopeid, NHR_REF, flowid);
663
}
664
return (ro->ro_nh);
665
}
666
667
static struct nhop_object *
668
lookup_route(uint32_t fibnum, struct sockaddr_in6 *dst, struct route_in6 *ro,
669
struct ip6_pktopts *opts, uint32_t flowid)
670
{
671
struct nhop_object *nh = NULL;
672
673
/*
674
* If the next hop address for the packet is specified by the caller,
675
* use it as the gateway.
676
*/
677
if (opts && opts->ip6po_nexthop) {
678
struct route_in6 *ron = &opts->ip6po_nextroute;
679
struct sockaddr_in6 *sin6_next = satosin6(opts->ip6po_nexthop);
680
681
nh = cache_route(fibnum, sin6_next, ron, flowid);
682
683
/*
684
* The node identified by that address must be a
685
* neighbor of the sending host.
686
*/
687
if (nh != NULL && (nh->nh_flags & NHF_GATEWAY) != 0)
688
nh = NULL;
689
} else if (ro != NULL) {
690
nh = cache_route(fibnum, dst, ro, flowid);
691
if (nh == NULL)
692
return (NULL);
693
694
/*
695
* Check if the outgoing interface conflicts with
696
* the interface specified by ipi6_ifindex (if specified).
697
*/
698
struct in6_pktinfo *pi;
699
if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) {
700
if (nh->nh_aifp->if_index != pi->ipi6_ifindex)
701
nh = NULL;
702
}
703
}
704
705
return (nh);
706
}
707
708
/*
709
* Finds outgoing nexthop or the outgoing interface for the
710
* @dstsock.
711
* Return 0 on success and stores the lookup result in @retnh and @retifp
712
*/
713
static int
714
selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
715
struct ip6_moptions *mopts, struct route_in6 *ro,
716
struct ifnet **retifp, struct nhop_object **retnh, int norouteok,
717
u_int fibnum, uint32_t flowid)
718
{
719
int error = 0;
720
struct ifnet *ifp = NULL;
721
struct in6_pktinfo *pi = NULL;
722
struct in6_addr *dst = &dstsock->sin6_addr;
723
724
/* If the caller specify the outgoing interface explicitly, use it. */
725
if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) {
726
/* XXX boundary check is assumed to be already done. */
727
ifp = ifnet_byindex(pi->ipi6_ifindex);
728
if (ifp != NULL && (norouteok || IN6_IS_ADDR_MULTICAST(dst))) {
729
/*
730
* we do not have to check or get the route for
731
* multicast.
732
*/
733
goto done;
734
} else
735
goto getroute;
736
}
737
/*
738
* If the destination address is a multicast address and the outgoing
739
* interface for the address is specified by the caller, use it.
740
*/
741
if (IN6_IS_ADDR_MULTICAST(dst) &&
742
mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) {
743
goto done; /* we do not need a route for multicast. */
744
}
745
/*
746
* If destination address is LLA or link- or node-local multicast,
747
* use it's embedded scope zone id to determine outgoing interface.
748
*/
749
if (IN6_IS_ADDR_MC_LINKLOCAL(dst) ||
750
IN6_IS_ADDR_MC_NODELOCAL(dst)) {
751
uint32_t zoneid = ntohs(in6_getscope(dst));
752
if (zoneid > 0) {
753
ifp = in6_getlinkifnet(zoneid);
754
goto done;
755
}
756
}
757
758
getroute:;
759
struct nhop_object *nh = lookup_route(fibnum, dstsock, ro, opts, flowid);
760
if (nh != NULL) {
761
*retifp = nh->nh_aifp;
762
error = 0;
763
} else {
764
*retifp = NULL;
765
IP6STAT_INC(ip6s_noroute);
766
error = EHOSTUNREACH;
767
}
768
*retnh = nh;
769
return (error);
770
771
done:
772
if (ifp == NULL) {
773
/*
774
* This can happen if the caller did not pass a cached route
775
* nor any other hints. We treat this case an error.
776
*/
777
error = EHOSTUNREACH;
778
}
779
if (error == EHOSTUNREACH)
780
IP6STAT_INC(ip6s_noroute);
781
782
*retifp = ifp;
783
*retnh = NULL;
784
785
return (error);
786
}
787
788
static int
789
in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
790
struct ip6_moptions *mopts, struct ifnet **retifp,
791
struct ifnet *oifp, u_int fibnum)
792
{
793
int error;
794
struct route_in6 sro;
795
struct nhop_object *nh = NULL;
796
uint16_t nh_flags;
797
798
KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__));
799
800
bzero(&sro, sizeof(sro));
801
nh_flags = 0;
802
803
error = selectroute(dstsock, opts, mopts, &sro, retifp, &nh, 1, fibnum, 0);
804
805
if (nh != NULL)
806
nh_flags = nh->nh_flags;
807
if (nh != NULL && nh == sro.ro_nh)
808
NH_FREE(nh);
809
810
if (error != 0) {
811
/* Help ND. See oifp comment in in6_selectsrc(). */
812
if (oifp != NULL && fibnum == RT_DEFAULT_FIB) {
813
*retifp = oifp;
814
error = 0;
815
}
816
return (error);
817
}
818
819
/*
820
* do not use a rejected or black hole route.
821
* XXX: this check should be done in the L2 output routine.
822
* However, if we skipped this check here, we'd see the following
823
* scenario:
824
* - install a rejected route for a scoped address prefix
825
* (like fe80::/10)
826
* - send a packet to a destination that matches the scoped prefix,
827
* with ambiguity about the scope zone.
828
* - pick the outgoing interface from the route, and disambiguate the
829
* scope zone with the interface.
830
* - ip6_output() would try to get another route with the "new"
831
* destination, which may be valid.
832
* - we'd see no error on output.
833
* Although this may not be very harmful, it should still be confusing.
834
* We thus reject the case here.
835
*/
836
837
if (nh_flags & (NHF_REJECT | NHF_BLACKHOLE)) {
838
error = (nh_flags & NHF_HOST ? EHOSTUNREACH : ENETUNREACH);
839
return (error);
840
}
841
842
return (0);
843
}
844
845
/* Public wrapper function to selectroute(). */
846
int
847
in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
848
struct ip6_moptions *mopts, struct route_in6 *ro,
849
struct ifnet **retifp, struct nhop_object **retnh, u_int fibnum, uint32_t flowid)
850
{
851
MPASS(retifp != NULL);
852
MPASS(retnh != NULL);
853
854
return (selectroute(dstsock, opts, mopts, ro, retifp,
855
retnh, 0, fibnum, flowid));
856
}
857
858
/*
859
* Default hop limit selection. The precedence is as follows:
860
* 1. Hoplimit value specified via ioctl.
861
* 2. (If the outgoing interface is detected) the current
862
* hop limit of the interface specified by router advertisement.
863
* 3. The system default hoplimit.
864
*/
865
int
866
in6_selecthlim(struct inpcb *inp, struct ifnet *ifp)
867
{
868
869
if (inp && inp->in6p_hops >= 0)
870
return (inp->in6p_hops);
871
else if (ifp)
872
return (ND_IFINFO(ifp)->chlim);
873
else if (inp && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
874
struct nhop_object *nh;
875
struct in6_addr dst;
876
uint32_t fibnum, scopeid;
877
int hlim;
878
879
fibnum = inp->inp_inc.inc_fibnum;
880
in6_splitscope(&inp->in6p_faddr, &dst, &scopeid);
881
nh = fib6_lookup(fibnum, &dst, scopeid, 0, 0);
882
if (nh != NULL) {
883
hlim = ND_IFINFO(nh->nh_ifp)->chlim;
884
return (hlim);
885
}
886
}
887
return (V_ip6_defhlim);
888
}
889
890
void
891
addrsel_policy_init(void)
892
{
893
894
init_policy_queue();
895
896
/* initialize the "last resort" policy */
897
bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy));
898
V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
899
900
if (!IS_DEFAULT_VNET(curvnet))
901
return;
902
903
ADDRSEL_LOCK_INIT();
904
ADDRSEL_SXLOCK_INIT();
905
}
906
907
static struct in6_addrpolicy *
908
lookup_addrsel_policy(struct sockaddr_in6 *key)
909
{
910
struct in6_addrpolicy *match = NULL;
911
912
ADDRSEL_LOCK();
913
match = match_addrsel_policy(key);
914
915
if (match == NULL)
916
match = &V_defaultaddrpolicy;
917
else
918
match->use++;
919
ADDRSEL_UNLOCK();
920
921
return (match);
922
}
923
924
/*
925
* Subroutines to manage the address selection policy table via sysctl.
926
*/
927
struct walkarg {
928
struct sysctl_req *w_req;
929
};
930
931
static int in6_src_sysctl(SYSCTL_HANDLER_ARGS);
932
SYSCTL_DECL(_net_inet6_ip6);
933
static SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
934
CTLFLAG_RD | CTLFLAG_MPSAFE, in6_src_sysctl,
935
"");
936
937
static int
938
in6_src_sysctl(SYSCTL_HANDLER_ARGS)
939
{
940
struct walkarg w;
941
942
if (req->newptr)
943
return EPERM;
944
945
bzero(&w, sizeof(w));
946
w.w_req = req;
947
948
return (walk_addrsel_policy(dump_addrsel_policyent, &w));
949
}
950
951
int
952
in6_src_ioctl(u_long cmd, caddr_t data)
953
{
954
struct in6_addrpolicy ent0;
955
956
if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
957
return (EOPNOTSUPP); /* check for safety */
958
959
ent0 = *(struct in6_addrpolicy *)data;
960
961
if (ent0.label == ADDR_LABEL_NOTAPP)
962
return (EINVAL);
963
/* check if the prefix mask is consecutive. */
964
if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
965
return (EINVAL);
966
/* clear trailing garbages (if any) of the prefix address. */
967
IN6_MASK_ADDR(&ent0.addr.sin6_addr, &ent0.addrmask.sin6_addr);
968
ent0.use = 0;
969
970
switch (cmd) {
971
case SIOCAADDRCTL_POLICY:
972
return (add_addrsel_policyent(&ent0));
973
case SIOCDADDRCTL_POLICY:
974
return (delete_addrsel_policyent(&ent0));
975
}
976
977
return (0); /* XXX: compromise compilers */
978
}
979
980
/*
981
* The followings are implementation of the policy table using a
982
* simple tail queue.
983
* XXX such details should be hidden.
984
* XXX implementation using binary tree should be more efficient.
985
*/
986
struct addrsel_policyent {
987
TAILQ_ENTRY(addrsel_policyent) ape_entry;
988
struct in6_addrpolicy ape_policy;
989
};
990
991
TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
992
993
VNET_DEFINE_STATIC(struct addrsel_policyhead, addrsel_policytab);
994
#define V_addrsel_policytab VNET(addrsel_policytab)
995
996
static void
997
init_policy_queue(void)
998
{
999
1000
TAILQ_INIT(&V_addrsel_policytab);
1001
}
1002
1003
static int
1004
add_addrsel_policyent(struct in6_addrpolicy *newpolicy)
1005
{
1006
struct addrsel_policyent *new, *pol;
1007
1008
new = malloc(sizeof(*new), M_IFADDR,
1009
M_WAITOK);
1010
ADDRSEL_XLOCK();
1011
ADDRSEL_LOCK();
1012
1013
/* duplication check */
1014
TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
1015
if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
1016
&pol->ape_policy.addr.sin6_addr) &&
1017
IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
1018
&pol->ape_policy.addrmask.sin6_addr)) {
1019
ADDRSEL_UNLOCK();
1020
ADDRSEL_XUNLOCK();
1021
free(new, M_IFADDR);
1022
return (EEXIST); /* or override it? */
1023
}
1024
}
1025
1026
bzero(new, sizeof(*new));
1027
1028
/* XXX: should validate entry */
1029
new->ape_policy = *newpolicy;
1030
1031
TAILQ_INSERT_TAIL(&V_addrsel_policytab, new, ape_entry);
1032
ADDRSEL_UNLOCK();
1033
ADDRSEL_XUNLOCK();
1034
1035
return (0);
1036
}
1037
1038
static int
1039
delete_addrsel_policyent(struct in6_addrpolicy *key)
1040
{
1041
struct addrsel_policyent *pol;
1042
1043
ADDRSEL_XLOCK();
1044
ADDRSEL_LOCK();
1045
1046
/* search for the entry in the table */
1047
TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
1048
if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
1049
&pol->ape_policy.addr.sin6_addr) &&
1050
IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
1051
&pol->ape_policy.addrmask.sin6_addr)) {
1052
break;
1053
}
1054
}
1055
if (pol == NULL) {
1056
ADDRSEL_UNLOCK();
1057
ADDRSEL_XUNLOCK();
1058
return (ESRCH);
1059
}
1060
1061
TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry);
1062
ADDRSEL_UNLOCK();
1063
ADDRSEL_XUNLOCK();
1064
free(pol, M_IFADDR);
1065
1066
return (0);
1067
}
1068
1069
static int
1070
walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w)
1071
{
1072
struct addrsel_policyent *pol;
1073
int error = 0;
1074
1075
ADDRSEL_SLOCK();
1076
TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
1077
if ((error = (*callback)(&pol->ape_policy, w)) != 0) {
1078
ADDRSEL_SUNLOCK();
1079
return (error);
1080
}
1081
}
1082
ADDRSEL_SUNLOCK();
1083
return (error);
1084
}
1085
1086
static int
1087
dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg)
1088
{
1089
int error = 0;
1090
struct walkarg *w = arg;
1091
1092
error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol));
1093
1094
return (error);
1095
}
1096
1097
static struct in6_addrpolicy *
1098
match_addrsel_policy(struct sockaddr_in6 *key)
1099
{
1100
struct addrsel_policyent *pent;
1101
struct in6_addrpolicy *bestpol = NULL, *pol;
1102
int matchlen, bestmatchlen = -1;
1103
u_char *mp, *ep, *k, *p, m;
1104
1105
TAILQ_FOREACH(pent, &V_addrsel_policytab, ape_entry) {
1106
matchlen = 0;
1107
1108
pol = &pent->ape_policy;
1109
mp = (u_char *)&pol->addrmask.sin6_addr;
1110
ep = mp + 16; /* XXX: scope field? */
1111
k = (u_char *)&key->sin6_addr;
1112
p = (u_char *)&pol->addr.sin6_addr;
1113
for (; mp < ep && *mp; mp++, k++, p++) {
1114
m = *mp;
1115
if ((*k & m) != *p)
1116
goto next; /* not match */
1117
if (m == 0xff) /* short cut for a typical case */
1118
matchlen += 8;
1119
else {
1120
while (m >= 0x80) {
1121
matchlen++;
1122
m <<= 1;
1123
}
1124
}
1125
}
1126
1127
/* matched. check if this is better than the current best. */
1128
if (bestpol == NULL ||
1129
matchlen > bestmatchlen) {
1130
bestpol = pol;
1131
bestmatchlen = matchlen;
1132
}
1133
1134
next:
1135
continue;
1136
}
1137
1138
return (bestpol);
1139
}
1140
1141