Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netinet6/ip6_output.c
103481 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
* 3. Neither the name of the project nor the names of its contributors
16
* may be used to endorse or promote products derived from this software
17
* without specific prior written permission.
18
*
19
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
* SUCH DAMAGE.
30
*
31
* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
32
*/
33
34
/*-
35
* Copyright (c) 1982, 1986, 1988, 1990, 1993
36
* The Regents of the University of California. All rights reserved.
37
*
38
* Redistribution and use in source and binary forms, with or without
39
* modification, are permitted provided that the following conditions
40
* are met:
41
* 1. Redistributions of source code must retain the above copyright
42
* notice, this list of conditions and the following disclaimer.
43
* 2. Redistributions in binary form must reproduce the above copyright
44
* notice, this list of conditions and the following disclaimer in the
45
* documentation and/or other materials provided with the distribution.
46
* 3. Neither the name of the University nor the names of its contributors
47
* may be used to endorse or promote products derived from this software
48
* without specific prior written permission.
49
*
50
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60
* SUCH DAMAGE.
61
*/
62
63
#include "opt_inet.h"
64
#include "opt_inet6.h"
65
#include "opt_ipsec.h"
66
#include "opt_kern_tls.h"
67
#include "opt_ratelimit.h"
68
#include "opt_route.h"
69
#include "opt_rss.h"
70
#include "opt_sctp.h"
71
72
#include <sys/param.h>
73
#include <sys/kernel.h>
74
#include <sys/ktls.h>
75
#include <sys/malloc.h>
76
#include <sys/mbuf.h>
77
#include <sys/errno.h>
78
#include <sys/priv.h>
79
#include <sys/proc.h>
80
#include <sys/protosw.h>
81
#include <sys/socket.h>
82
#include <sys/socketvar.h>
83
#include <sys/syslog.h>
84
#include <sys/ucred.h>
85
86
#include <machine/in_cksum.h>
87
88
#include <net/if.h>
89
#include <net/if_var.h>
90
#include <net/if_private.h>
91
#include <net/if_vlan_var.h>
92
#include <net/if_llatbl.h>
93
#include <net/ethernet.h>
94
#include <net/netisr.h>
95
#include <net/route.h>
96
#include <net/route/nhop.h>
97
#include <net/pfil.h>
98
#include <net/rss_config.h>
99
#include <net/vnet.h>
100
101
#include <netinet/in.h>
102
#include <netinet/in_var.h>
103
#include <netinet/ip_var.h>
104
#include <netinet6/in6_fib.h>
105
#include <netinet6/in6_var.h>
106
#include <netinet/ip6.h>
107
#include <netinet/icmp6.h>
108
#include <netinet6/ip6_var.h>
109
#include <netinet/in_pcb.h>
110
#include <netinet/tcp_var.h>
111
#include <netinet6/nd6.h>
112
#include <netinet6/in6_rss.h>
113
#include <netinet6/ip6_mroute.h>
114
115
#include <netipsec/ipsec_support.h>
116
#if defined(SCTP) || defined(SCTP_SUPPORT)
117
#include <netinet/sctp.h>
118
#include <netinet/sctp_crc32.h>
119
#endif
120
121
#include <netinet6/scope6_var.h>
122
123
extern int in6_mcast_loop;
124
125
struct ip6_exthdrs {
126
struct mbuf *ip6e_ip6;
127
struct mbuf *ip6e_hbh;
128
struct mbuf *ip6e_dest1;
129
struct mbuf *ip6e_rthdr;
130
struct mbuf *ip6e_dest2;
131
};
132
133
static MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
134
135
static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
136
struct ucred *, int);
137
static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
138
struct socket *, struct sockopt *);
139
static int ip6_getpcbopt(struct inpcb *, int, struct sockopt *);
140
static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *,
141
struct ucred *, int, int, int);
142
143
static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
144
static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
145
struct ip6_frag **);
146
static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
147
static void ip6_getpmtu(struct route_in6 *, int,
148
struct ifnet *, const struct in6_addr *, u_long *, u_int, u_int);
149
static void ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long,
150
u_long *, u_int);
151
static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *);
152
static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
153
154
/*
155
* Make an extension header from option data. hp is the source,
156
* mp is the destination, and _ol is the optlen.
157
*/
158
#define MAKE_EXTHDR(hp, mp, _ol) \
159
do { \
160
struct ip6_ext *eh = (struct ip6_ext *)(hp); \
161
error = ip6_copyexthdr((mp), (caddr_t)(hp), \
162
((eh)->ip6e_len + 1) << 3); \
163
if (error) \
164
goto freehdrs; \
165
(_ol) += (*(mp))->m_len; \
166
} while (/*CONSTCOND*/ 0)
167
168
/*
169
* Form a chain of extension headers.
170
* m is the extension header mbuf
171
* mp is the previous mbuf in the chain
172
* p is the next header
173
* i is the type of option.
174
*/
175
#define MAKE_CHAIN(m, mp, p, i)\
176
do {\
177
if (m) {\
178
if (!hdrsplit) \
179
panic("%s:%d: assumption failed: "\
180
"hdr not split: hdrsplit %d exthdrs %p",\
181
__func__, __LINE__, hdrsplit, &exthdrs);\
182
*mtod((m), u_char *) = *(p);\
183
*(p) = (i);\
184
p = mtod((m), u_char *);\
185
(m)->m_next = (mp)->m_next;\
186
(mp)->m_next = (m);\
187
(mp) = (m);\
188
}\
189
} while (/*CONSTCOND*/ 0)
190
191
void
192
in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
193
{
194
u_short csum;
195
196
csum = in_cksum_skip(m, offset + plen, offset);
197
if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0)
198
csum = 0xffff;
199
offset += m->m_pkthdr.csum_data; /* checksum offset */
200
201
if (offset + sizeof(csum) > m->m_len)
202
m_copyback(m, offset, sizeof(csum), (caddr_t)&csum);
203
else
204
*(u_short *)mtodo(m, offset) = csum;
205
}
206
207
static void
208
ip6_output_delayed_csum(struct mbuf *m, struct ifnet *ifp, int csum_flags,
209
int plen, int optlen)
210
{
211
212
KASSERT((plen >= optlen), ("%s:%d: plen %d < optlen %d, m %p, ifp %p "
213
"csum_flags %#x",
214
__func__, __LINE__, plen, optlen, m, ifp, csum_flags));
215
216
if (csum_flags & CSUM_DELAY_DATA_IPV6) {
217
in6_delayed_cksum(m, plen - optlen,
218
sizeof(struct ip6_hdr) + optlen);
219
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
220
}
221
#if defined(SCTP) || defined(SCTP_SUPPORT)
222
if (csum_flags & CSUM_SCTP_IPV6) {
223
sctp_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
224
m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
225
}
226
#endif
227
}
228
229
int
230
ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
231
int fraglen , uint32_t id)
232
{
233
struct mbuf *m, **mnext, *m_frgpart;
234
struct ip6_hdr *ip6, *mhip6;
235
struct ip6_frag *ip6f;
236
int off;
237
int error;
238
int tlen = m0->m_pkthdr.len;
239
240
KASSERT((fraglen % 8 == 0), ("Fragment length must be a multiple of 8"));
241
242
m = m0;
243
ip6 = mtod(m, struct ip6_hdr *);
244
mnext = &m->m_nextpkt;
245
246
for (off = hlen; off < tlen; off += fraglen) {
247
m = m_gethdr(M_NOWAIT, MT_DATA);
248
if (!m) {
249
IP6STAT_INC(ip6s_odropped);
250
return (ENOBUFS);
251
}
252
253
/*
254
* Make sure the complete packet header gets copied
255
* from the originating mbuf to the newly created
256
* mbuf. This also ensures that existing firewall
257
* classification(s), VLAN tags and so on get copied
258
* to the resulting fragmented packet(s):
259
*/
260
if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
261
m_free(m);
262
IP6STAT_INC(ip6s_odropped);
263
return (ENOBUFS);
264
}
265
266
*mnext = m;
267
mnext = &m->m_nextpkt;
268
m->m_data += max_linkhdr;
269
mhip6 = mtod(m, struct ip6_hdr *);
270
*mhip6 = *ip6;
271
m->m_len = sizeof(*mhip6);
272
error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
273
if (error) {
274
IP6STAT_INC(ip6s_odropped);
275
return (error);
276
}
277
ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
278
if (off + fraglen >= tlen)
279
fraglen = tlen - off;
280
else
281
ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
282
mhip6->ip6_plen = htons((u_short)(fraglen + hlen +
283
sizeof(*ip6f) - sizeof(struct ip6_hdr)));
284
if ((m_frgpart = m_copym(m0, off, fraglen, M_NOWAIT)) == NULL) {
285
IP6STAT_INC(ip6s_odropped);
286
return (ENOBUFS);
287
}
288
m_cat(m, m_frgpart);
289
m->m_pkthdr.len = fraglen + hlen + sizeof(*ip6f);
290
ip6f->ip6f_reserved = 0;
291
ip6f->ip6f_ident = id;
292
ip6f->ip6f_nxt = nextproto;
293
IP6STAT_INC(ip6s_ofragments);
294
in6_ifstat_inc(ifp, ifs6_out_fragcreat);
295
}
296
297
return (0);
298
}
299
300
static int
301
ip6_output_send(struct inpcb *inp, struct ifnet *ifp, struct ifnet *origifp,
302
struct mbuf *m, struct sockaddr_in6 *dst, struct route_in6 *ro,
303
bool stamp_tag)
304
{
305
#ifdef KERN_TLS
306
struct ktls_session *tls = NULL;
307
#endif
308
struct m_snd_tag *mst;
309
int error;
310
311
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
312
mst = NULL;
313
314
#ifdef KERN_TLS
315
/*
316
* If this is an unencrypted TLS record, save a reference to
317
* the record. This local reference is used to call
318
* ktls_output_eagain after the mbuf has been freed (thus
319
* dropping the mbuf's reference) in if_output.
320
*/
321
if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) {
322
tls = ktls_hold(m->m_next->m_epg_tls);
323
mst = tls->snd_tag;
324
325
/*
326
* If a TLS session doesn't have a valid tag, it must
327
* have had an earlier ifp mismatch, so drop this
328
* packet.
329
*/
330
if (mst == NULL) {
331
m_freem(m);
332
error = EAGAIN;
333
goto done;
334
}
335
/*
336
* Always stamp tags that include NIC ktls.
337
*/
338
stamp_tag = true;
339
}
340
#endif
341
#ifdef RATELIMIT
342
if (inp != NULL && mst == NULL) {
343
if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
344
(inp->inp_snd_tag != NULL &&
345
inp->inp_snd_tag->ifp != ifp))
346
in_pcboutput_txrtlmt(inp, ifp, m);
347
348
if (inp->inp_snd_tag != NULL)
349
mst = inp->inp_snd_tag;
350
}
351
#endif
352
if (stamp_tag && mst != NULL) {
353
KASSERT(m->m_pkthdr.rcvif == NULL,
354
("trying to add a send tag to a forwarded packet"));
355
if (mst->ifp != ifp) {
356
m_freem(m);
357
error = EAGAIN;
358
goto done;
359
}
360
361
/* stamp send tag on mbuf */
362
m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
363
m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
364
}
365
366
error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro);
367
368
done:
369
/* Check for route change invalidating send tags. */
370
#ifdef KERN_TLS
371
if (tls != NULL) {
372
if (error == EAGAIN)
373
error = ktls_output_eagain(inp, tls);
374
ktls_free(tls);
375
}
376
#endif
377
#ifdef RATELIMIT
378
if (error == EAGAIN)
379
in_pcboutput_eagain(inp);
380
#endif
381
return (error);
382
}
383
384
/*
385
* IP6 output.
386
* The packet in mbuf chain m contains a skeletal IP6 header (with pri, len,
387
* nxt, hlim, src, dst).
388
* This function may modify ver and hlim only.
389
* The mbuf chain containing the packet will be freed.
390
* The mbuf opt, if present, will not be freed.
391
* If route_in6 ro is present and has ro_nh initialized, route lookup would be
392
* skipped and ro->ro_nh would be used. If ro is present but ro->ro_nh is NULL,
393
* then result of route lookup is stored in ro->ro_nh.
394
*
395
* Type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and nd_ifinfo.linkmtu
396
* is uint32_t. So we use u_long to hold largest one, which is rt_mtu.
397
*
398
* ifpp - XXX: just for statistics
399
*/
400
int
401
ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
402
struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
403
struct ifnet **ifpp, struct inpcb *inp)
404
{
405
struct ip6_hdr *ip6;
406
struct ifnet *ifp, *origifp;
407
struct mbuf *m = m0;
408
struct mbuf *mprev;
409
struct route_in6 *ro_pmtu;
410
struct nhop_object *nh;
411
struct sockaddr_in6 *dst, sin6, src_sa, dst_sa;
412
struct in6_addr odst;
413
u_char *nexthdrp;
414
int tlen, len;
415
int error = 0;
416
int vlan_pcp = -1;
417
struct in6_ifaddr *ia = NULL;
418
u_long mtu;
419
int dontfrag;
420
u_int32_t optlen, plen = 0, unfragpartlen;
421
struct ip6_exthdrs exthdrs;
422
struct in6_addr src0, dst0;
423
u_int32_t zone;
424
bool hdrsplit;
425
int sw_csum, tso;
426
int needfiblookup;
427
uint32_t fibnum;
428
struct m_tag *fwd_tag = NULL;
429
uint32_t id;
430
uint32_t optvalid;
431
432
NET_EPOCH_ASSERT();
433
434
if (inp != NULL) {
435
INP_LOCK_ASSERT(inp);
436
M_SETFIB(m, inp->inp_inc.inc_fibnum);
437
if ((flags & IP_NODEFAULTFLOWID) == 0) {
438
/* Unconditionally set flowid. */
439
m->m_pkthdr.flowid = inp->inp_flowid;
440
M_HASHTYPE_SET(m, inp->inp_flowtype);
441
}
442
if ((inp->inp_flags2 & INP_2PCP_SET) != 0)
443
vlan_pcp = (inp->inp_flags2 & INP_2PCP_MASK) >>
444
INP_2PCP_SHIFT;
445
#ifdef NUMA
446
m->m_pkthdr.numa_domain = inp->inp_numa_domain;
447
#endif
448
}
449
450
/* Source address validation. */
451
ip6 = mtod(m, struct ip6_hdr *);
452
if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
453
(flags & IPV6_UNSPECSRC) == 0) {
454
error = EOPNOTSUPP;
455
IP6STAT_INC(ip6s_badscope);
456
goto bad;
457
}
458
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
459
error = EOPNOTSUPP;
460
IP6STAT_INC(ip6s_badscope);
461
goto bad;
462
}
463
464
/*
465
* If we are given packet options to add extension headers prepare them.
466
* Calculate the total length of the extension header chain.
467
* Keep the length of the unfragmentable part for fragmentation.
468
*/
469
bzero(&exthdrs, sizeof(exthdrs));
470
optlen = optvalid = 0;
471
unfragpartlen = sizeof(struct ip6_hdr);
472
if (opt) {
473
optvalid = opt->ip6po_valid;
474
475
/* Hop-by-Hop options header. */
476
if ((optvalid & IP6PO_VALID_HBH) != 0)
477
MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh, optlen);
478
479
/* Destination options header (1st part). */
480
if ((optvalid & IP6PO_VALID_RHINFO) != 0) {
481
#ifndef RTHDR_SUPPORT_IMPLEMENTED
482
/*
483
* If there is a routing header, discard the packet
484
* right away here. RH0/1 are obsolete and we do not
485
* currently support RH2/3/4.
486
* People trying to use RH253/254 may want to disable
487
* this check.
488
* The moment we do support any routing header (again)
489
* this block should check the routing type more
490
* selectively.
491
*/
492
error = EINVAL;
493
goto bad;
494
#endif
495
496
/*
497
* Destination options header (1st part).
498
* This only makes sense with a routing header.
499
* See Section 9.2 of RFC 3542.
500
* Disabling this part just for MIP6 convenience is
501
* a bad idea. We need to think carefully about a
502
* way to make the advanced API coexist with MIP6
503
* options, which might automatically be inserted in
504
* the kernel.
505
*/
506
if ((optvalid & IP6PO_VALID_DEST1) != 0)
507
MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1,
508
optlen);
509
}
510
/* Routing header. */
511
if ((optvalid & IP6PO_VALID_RHINFO) != 0)
512
MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr, optlen);
513
514
unfragpartlen += optlen;
515
516
/*
517
* NOTE: we don't add AH/ESP length here (done in
518
* ip6_ipsec_output()).
519
*/
520
521
/* Destination options header (2nd part). */
522
if ((optvalid & IP6PO_VALID_DEST2) != 0)
523
MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2, optlen);
524
}
525
526
/*
527
* If there is at least one extension header,
528
* separate IP6 header from the payload.
529
*/
530
hdrsplit = false;
531
if (optlen) {
532
if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
533
m = NULL;
534
goto freehdrs;
535
}
536
m = exthdrs.ip6e_ip6;
537
ip6 = mtod(m, struct ip6_hdr *);
538
hdrsplit = true;
539
}
540
541
/* Adjust mbuf packet header length. */
542
m->m_pkthdr.len += optlen;
543
plen = m->m_pkthdr.len - sizeof(*ip6);
544
545
if (plen > IPV6_MAXPACKET) {
546
error = EMSGSIZE;
547
goto freehdrs;
548
} else
549
ip6->ip6_plen = htons(plen);
550
nexthdrp = &ip6->ip6_nxt;
551
552
if (optlen) {
553
/*
554
* Concatenate headers and fill in next header fields.
555
* Here we have, on "m"
556
* IPv6 payload
557
* and we insert headers accordingly.
558
* Finally, we should be getting:
559
* IPv6 hbh dest1 rthdr ah* [esp* dest2 payload].
560
*
561
* During the header composing process "m" points to IPv6
562
* header. "mprev" points to an extension header prior to esp.
563
*/
564
mprev = m;
565
566
/*
567
* We treat dest2 specially. This makes IPsec processing
568
* much easier. The goal here is to make mprev point the
569
* mbuf prior to dest2.
570
*
571
* Result: IPv6 dest2 payload.
572
* m and mprev will point to IPv6 header.
573
*/
574
if (exthdrs.ip6e_dest2) {
575
if (!hdrsplit)
576
panic("%s:%d: assumption failed: "
577
"hdr not split: hdrsplit %d exthdrs %p",
578
__func__, __LINE__, hdrsplit, &exthdrs);
579
exthdrs.ip6e_dest2->m_next = m->m_next;
580
m->m_next = exthdrs.ip6e_dest2;
581
*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
582
ip6->ip6_nxt = IPPROTO_DSTOPTS;
583
}
584
585
/*
586
* Result: IPv6 hbh dest1 rthdr dest2 payload.
587
* m will point to IPv6 header. mprev will point to the
588
* extension header prior to dest2 (rthdr in the above case).
589
*/
590
MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
591
MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
592
IPPROTO_DSTOPTS);
593
MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
594
IPPROTO_ROUTING);
595
}
596
597
IP6STAT_INC(ip6s_localout);
598
599
/* Route packet. */
600
ro_pmtu = ro;
601
if ((optvalid & IP6PO_VALID_RHINFO) != 0)
602
ro = &opt->ip6po_route;
603
if (ro != NULL)
604
dst = (struct sockaddr_in6 *)&ro->ro_dst;
605
else
606
dst = &sin6;
607
fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
608
609
again:
610
/*
611
* If specified, try to fill in the traffic class field.
612
* Do not override if a non-zero value is already set.
613
* We check the diffserv field and the ECN field separately.
614
*/
615
if ((optvalid & IP6PO_VALID_TC) != 0){
616
int mask = 0;
617
618
if (IPV6_DSCP(ip6) == 0)
619
mask |= 0xfc;
620
if (IPV6_ECN(ip6) == 0)
621
mask |= 0x03;
622
if (mask != 0)
623
ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
624
}
625
626
/* Fill in or override the hop limit field, if necessary. */
627
if ((optvalid & IP6PO_VALID_HLIM) != 0)
628
ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
629
else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
630
if (im6o != NULL)
631
ip6->ip6_hlim = im6o->im6o_multicast_hlim;
632
else
633
ip6->ip6_hlim = V_ip6_defmcasthlim;
634
}
635
636
if (ro == NULL || ro->ro_nh == NULL) {
637
bzero(dst, sizeof(*dst));
638
dst->sin6_family = AF_INET6;
639
dst->sin6_len = sizeof(*dst);
640
dst->sin6_addr = ip6->ip6_dst;
641
}
642
/*
643
* Validate route against routing table changes.
644
* Make sure that the address family is set in route.
645
*/
646
nh = NULL;
647
ifp = NULL;
648
mtu = 0;
649
if (ro != NULL) {
650
if (ro->ro_nh != NULL && inp != NULL) {
651
ro->ro_dst.sin6_family = AF_INET6; /* XXX KASSERT? */
652
NH_VALIDATE((struct route *)ro, &inp->inp_rt_cookie,
653
fibnum);
654
}
655
if (ro->ro_nh != NULL && fwd_tag == NULL &&
656
(!NH_IS_VALID(ro->ro_nh) ||
657
ro->ro_dst.sin6_family != AF_INET6 ||
658
!IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)))
659
RO_INVALIDATE_CACHE(ro);
660
661
if (ro->ro_nh != NULL && fwd_tag == NULL &&
662
ro->ro_dst.sin6_family == AF_INET6 &&
663
IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) {
664
/* Nexthop is valid and contains valid ifp */
665
nh = ro->ro_nh;
666
} else {
667
if (ro->ro_lle)
668
LLE_FREE(ro->ro_lle); /* zeros ro_lle */
669
ro->ro_lle = NULL;
670
if (fwd_tag == NULL) {
671
bzero(&dst_sa, sizeof(dst_sa));
672
dst_sa.sin6_family = AF_INET6;
673
dst_sa.sin6_len = sizeof(dst_sa);
674
dst_sa.sin6_addr = ip6->ip6_dst;
675
}
676
error = in6_selectroute(&dst_sa, opt, im6o, ro, &ifp,
677
&nh, fibnum, m->m_pkthdr.flowid);
678
if (error != 0) {
679
IP6STAT_INC(ip6s_noroute);
680
if (ifp != NULL)
681
in6_ifstat_inc(ifp, ifs6_out_discard);
682
goto bad;
683
}
684
/*
685
* At this point at least @ifp is not NULL
686
* Can be the case when dst is multicast, link-local or
687
* interface is explicitly specificed by the caller.
688
*/
689
}
690
if (nh == NULL) {
691
/*
692
* If in6_selectroute() does not return a nexthop
693
* dst may not have been updated.
694
*/
695
*dst = dst_sa; /* XXX */
696
origifp = ifp;
697
mtu = ifp->if_mtu;
698
} else {
699
ifp = nh->nh_ifp;
700
origifp = nh->nh_aifp;
701
ia = (struct in6_ifaddr *)(nh->nh_ifa);
702
counter_u64_add(nh->nh_pksent, 1);
703
}
704
} else {
705
struct nhop_object *nh;
706
struct in6_addr kdst;
707
uint32_t scopeid;
708
709
if (fwd_tag == NULL) {
710
bzero(&dst_sa, sizeof(dst_sa));
711
dst_sa.sin6_family = AF_INET6;
712
dst_sa.sin6_len = sizeof(dst_sa);
713
dst_sa.sin6_addr = ip6->ip6_dst;
714
}
715
716
if (IN6_IS_ADDR_MULTICAST(&dst_sa.sin6_addr) &&
717
im6o != NULL &&
718
(ifp = im6o->im6o_multicast_ifp) != NULL) {
719
/* We do not need a route lookup. */
720
*dst = dst_sa; /* XXX */
721
origifp = ifp;
722
goto nonh6lookup;
723
}
724
725
in6_splitscope(&dst_sa.sin6_addr, &kdst, &scopeid);
726
727
if (IN6_IS_ADDR_MC_LINKLOCAL(&dst_sa.sin6_addr) ||
728
IN6_IS_ADDR_MC_NODELOCAL(&dst_sa.sin6_addr)) {
729
if (scopeid > 0) {
730
ifp = in6_getlinkifnet(scopeid);
731
if (ifp == NULL) {
732
error = EHOSTUNREACH;
733
goto bad;
734
}
735
*dst = dst_sa; /* XXX */
736
origifp = ifp;
737
goto nonh6lookup;
738
}
739
}
740
741
nh = fib6_lookup(fibnum, &kdst, scopeid, NHR_NONE,
742
m->m_pkthdr.flowid);
743
if (nh == NULL) {
744
IP6STAT_INC(ip6s_noroute);
745
/* No ifp in6_ifstat_inc(ifp, ifs6_out_discard); */
746
error = EHOSTUNREACH;
747
goto bad;
748
}
749
750
ifp = nh->nh_ifp;
751
origifp = nh->nh_aifp;
752
ia = ifatoia6(nh->nh_ifa);
753
if (nh->nh_flags & NHF_GATEWAY)
754
dst->sin6_addr = nh->gw6_sa.sin6_addr;
755
else if (fwd_tag != NULL)
756
dst->sin6_addr = dst_sa.sin6_addr;
757
nonh6lookup:
758
;
759
}
760
/*
761
* At this point ifp MUST be pointing to the valid transmit ifp.
762
* origifp MUST be valid and pointing to either the same ifp or,
763
* in case of loopback output, to the interface which ip6_src
764
* belongs to.
765
* Examples:
766
* fe80::1%em0 -> fe80::2%em0 -> ifp=em0, origifp=em0
767
* fe80::1%em0 -> fe80::1%em0 -> ifp=lo0, origifp=em0
768
* ::1 -> ::1 -> ifp=lo0, origifp=lo0
769
*
770
* mtu can be 0 and will be refined later.
771
*/
772
KASSERT((ifp != NULL), ("output interface must not be NULL"));
773
KASSERT((origifp != NULL), ("output address interface must not be NULL"));
774
775
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
776
/*
777
* IPSec checking which handles several cases.
778
* FAST IPSEC: We re-injected the packet.
779
* XXX: need scope argument.
780
*/
781
if (IPSEC_ENABLED(ipv6)) {
782
if ((error = IPSEC_OUTPUT(ipv6, ifp, m, inp, mtu == 0 ?
783
ifp->if_mtu : mtu)) != 0) {
784
if (error == EINPROGRESS)
785
error = 0;
786
goto done;
787
}
788
}
789
#endif /* IPSEC */
790
791
if ((flags & IPV6_FORWARDING) == 0) {
792
/* XXX: the FORWARDING flag can be set for mrouting. */
793
in6_ifstat_inc(ifp, ifs6_out_request);
794
}
795
796
/* Setup data structures for scope ID checks. */
797
src0 = ip6->ip6_src;
798
bzero(&src_sa, sizeof(src_sa));
799
src_sa.sin6_family = AF_INET6;
800
src_sa.sin6_len = sizeof(src_sa);
801
src_sa.sin6_addr = ip6->ip6_src;
802
803
dst0 = ip6->ip6_dst;
804
/* Re-initialize to be sure. */
805
bzero(&dst_sa, sizeof(dst_sa));
806
dst_sa.sin6_family = AF_INET6;
807
dst_sa.sin6_len = sizeof(dst_sa);
808
dst_sa.sin6_addr = ip6->ip6_dst;
809
810
/* Check for valid scope ID. */
811
if (in6_setscope(&src0, origifp, &zone) == 0 &&
812
sa6_recoverscope(&src_sa) == 0 && zone == src_sa.sin6_scope_id &&
813
in6_setscope(&dst0, origifp, &zone) == 0 &&
814
sa6_recoverscope(&dst_sa) == 0 && zone == dst_sa.sin6_scope_id) {
815
/*
816
* The outgoing interface is in the zone of the source
817
* and destination addresses.
818
*
819
*/
820
} else if ((origifp->if_flags & IFF_LOOPBACK) == 0 ||
821
sa6_recoverscope(&src_sa) != 0 ||
822
sa6_recoverscope(&dst_sa) != 0 ||
823
dst_sa.sin6_scope_id == 0 ||
824
(src_sa.sin6_scope_id != 0 &&
825
src_sa.sin6_scope_id != dst_sa.sin6_scope_id) ||
826
ifnet_byindex(dst_sa.sin6_scope_id) == NULL) {
827
/*
828
* If the destination network interface is not a
829
* loopback interface, or the destination network
830
* address has no scope ID, or the source address has
831
* a scope ID set which is different from the
832
* destination address one, or there is no network
833
* interface representing this scope ID, the address
834
* pair is considered invalid.
835
*/
836
IP6STAT_INC(ip6s_badscope);
837
in6_ifstat_inc(origifp, ifs6_out_discard);
838
if (error == 0)
839
error = EHOSTUNREACH; /* XXX */
840
goto bad;
841
}
842
/* All scope ID checks are successful. */
843
844
if (nh && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
845
if ((optvalid & IP6PO_VALID_NHINFO) != 0) {
846
/*
847
* The nexthop is explicitly specified by the
848
* application. We assume the next hop is an IPv6
849
* address.
850
*/
851
dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
852
}
853
else if ((nh->nh_flags & NHF_GATEWAY))
854
dst = &nh->gw6_sa;
855
}
856
857
if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
858
m->m_flags &= ~(M_BCAST | M_MCAST); /* Just in case. */
859
} else {
860
m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
861
in6_ifstat_inc(ifp, ifs6_out_mcast);
862
863
/* Confirm that the outgoing interface supports multicast. */
864
if (!(ifp->if_flags & IFF_MULTICAST)) {
865
IP6STAT_INC(ip6s_noroute);
866
in6_ifstat_inc(ifp, ifs6_out_discard);
867
error = ENETUNREACH;
868
goto bad;
869
}
870
if ((im6o == NULL && in6_mcast_loop) ||
871
(im6o && im6o->im6o_multicast_loop)) {
872
/*
873
* Loop back multicast datagram if not expressly
874
* forbidden to do so, even if we have not joined
875
* the address; protocols will filter it later,
876
* thus deferring a hash lookup and lock acquisition
877
* at the expense of an m_copym().
878
*/
879
ip6_mloopback(ifp, m);
880
} else {
881
/*
882
* If we are acting as a multicast router, perform
883
* multicast forwarding as if the packet had just
884
* arrived on the interface to which we are about
885
* to send. The multicast forwarding function
886
* recursively calls this function, using the
887
* IPV6_FORWARDING flag to prevent infinite recursion.
888
*
889
* Multicasts that are looped back by ip6_mloopback(),
890
* above, will be forwarded by the ip6_input() routine,
891
* if necessary.
892
*/
893
if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
894
/*
895
* XXX: ip6_mforward expects that rcvif is NULL
896
* when it is called from the originating path.
897
* However, it may not always be the case.
898
*/
899
m->m_pkthdr.rcvif = NULL;
900
if (ip6_mforward(ip6, ifp, m) != 0) {
901
m_freem(m);
902
goto done;
903
}
904
}
905
}
906
/*
907
* Multicasts with a hoplimit of zero may be looped back,
908
* above, but must not be transmitted on a network.
909
* Also, multicasts addressed to the loopback interface
910
* are not sent -- the above call to ip6_mloopback() will
911
* loop back a copy if this host actually belongs to the
912
* destination group on the loopback interface.
913
*/
914
if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
915
IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
916
m_freem(m);
917
goto done;
918
}
919
}
920
921
/*
922
* Fill the outgoing inteface to tell the upper layer
923
* to increment per-interface statistics.
924
*/
925
if (ifpp)
926
*ifpp = ifp;
927
928
/* Determine path MTU. */
929
ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst, &mtu, fibnum,
930
*nexthdrp);
931
KASSERT(mtu > 0, ("%s:%d: mtu %ld, ro_pmtu %p ro %p ifp %p fibnum %u",
932
__func__, __LINE__, mtu, ro_pmtu, ro, ifp, fibnum));
933
934
/*
935
* The caller of this function may specify to use the minimum MTU
936
* in some cases.
937
* An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
938
* setting. The logic is a bit complicated; by default, unicast
939
* packets will follow path MTU while multicast packets will be sent at
940
* the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
941
* including unicast ones will be sent at the minimum MTU. Multicast
942
* packets will always be sent at the minimum MTU unless
943
* IP6PO_MINMTU_DISABLE is explicitly specified.
944
* See RFC 3542 for more details.
945
*/
946
if (mtu > IPV6_MMTU) {
947
if ((flags & IPV6_MINMTU))
948
mtu = IPV6_MMTU;
949
else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
950
mtu = IPV6_MMTU;
951
else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
952
(opt == NULL ||
953
opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
954
mtu = IPV6_MMTU;
955
}
956
}
957
958
/*
959
* Clear embedded scope identifiers if necessary.
960
* in6_clearscope() will touch the addresses only when necessary.
961
*/
962
in6_clearscope(&ip6->ip6_src);
963
in6_clearscope(&ip6->ip6_dst);
964
965
/*
966
* If the outgoing packet contains a hop-by-hop options header,
967
* it must be examined and processed even by the source node.
968
* (RFC 2460, section 4.)
969
*/
970
if (exthdrs.ip6e_hbh) {
971
struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
972
u_int32_t dummy; /* XXX unused */
973
974
#ifdef DIAGNOSTIC
975
if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
976
panic("ip6e_hbh is not contiguous");
977
#endif
978
/*
979
* XXX: if we have to send an ICMPv6 error to the sender,
980
* we need the M_LOOP flag since icmp6_error() expects
981
* the IPv6 and the hop-by-hop options header are
982
* contiguous unless the flag is set.
983
*/
984
m->m_flags |= M_LOOP;
985
m->m_pkthdr.rcvif = ifp;
986
if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
987
((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
988
&dummy) < 0) {
989
/* m was already freed at this point. */
990
error = EINVAL;/* better error? */
991
goto done;
992
}
993
m->m_flags &= ~M_LOOP; /* XXX */
994
m->m_pkthdr.rcvif = NULL;
995
}
996
997
/* Jump over all PFIL processing if hooks are not active. */
998
if (!PFIL_HOOKED_OUT(V_inet6_pfil_head))
999
goto passout;
1000
1001
odst = ip6->ip6_dst;
1002
/* Run through list of hooks for output packets. */
1003
switch (pfil_mbuf_out(V_inet6_pfil_head, &m, ifp, inp)) {
1004
case PFIL_PASS:
1005
ip6 = mtod(m, struct ip6_hdr *);
1006
break;
1007
case PFIL_DROPPED:
1008
error = EACCES;
1009
/* FALLTHROUGH */
1010
case PFIL_CONSUMED:
1011
goto done;
1012
}
1013
1014
needfiblookup = 0;
1015
/* See if destination IP address was changed by packet filter. */
1016
if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
1017
m->m_flags |= M_SKIP_FIREWALL;
1018
/* If destination is now ourself drop to ip6_input(). */
1019
if (in6_localip(&ip6->ip6_dst)) {
1020
m->m_flags |= M_FASTFWD_OURS;
1021
if (m->m_pkthdr.rcvif == NULL)
1022
m->m_pkthdr.rcvif = V_loif;
1023
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
1024
m->m_pkthdr.csum_flags |=
1025
CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
1026
m->m_pkthdr.csum_data = 0xffff;
1027
}
1028
#if defined(SCTP) || defined(SCTP_SUPPORT)
1029
if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
1030
m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1031
#endif
1032
error = netisr_queue(NETISR_IPV6, m);
1033
goto done;
1034
} else {
1035
if (ro != NULL)
1036
RO_INVALIDATE_CACHE(ro);
1037
needfiblookup = 1; /* Redo the routing table lookup. */
1038
}
1039
}
1040
/* See if fib was changed by packet filter. */
1041
if (fibnum != M_GETFIB(m)) {
1042
m->m_flags |= M_SKIP_FIREWALL;
1043
fibnum = M_GETFIB(m);
1044
if (ro != NULL)
1045
RO_INVALIDATE_CACHE(ro);
1046
needfiblookup = 1;
1047
}
1048
if (needfiblookup)
1049
goto again;
1050
1051
/* See if local, if yes, send it to netisr. */
1052
if (m->m_flags & M_FASTFWD_OURS) {
1053
if (m->m_pkthdr.rcvif == NULL)
1054
m->m_pkthdr.rcvif = V_loif;
1055
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
1056
m->m_pkthdr.csum_flags |=
1057
CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
1058
m->m_pkthdr.csum_data = 0xffff;
1059
}
1060
#if defined(SCTP) || defined(SCTP_SUPPORT)
1061
if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
1062
m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1063
#endif
1064
error = netisr_queue(NETISR_IPV6, m);
1065
goto done;
1066
}
1067
/* Or forward to some other address? */
1068
if ((m->m_flags & M_IP6_NEXTHOP) &&
1069
(fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
1070
if (ro != NULL)
1071
dst = (struct sockaddr_in6 *)&ro->ro_dst;
1072
else
1073
dst = &sin6;
1074
bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6));
1075
m->m_flags |= M_SKIP_FIREWALL;
1076
m->m_flags &= ~M_IP6_NEXTHOP;
1077
m_tag_delete(m, fwd_tag);
1078
goto again;
1079
}
1080
1081
passout:
1082
if (vlan_pcp > -1)
1083
EVL_APPLY_PRI(m, vlan_pcp);
1084
1085
/* Ensure the packet data is mapped if the interface requires it. */
1086
if ((ifp->if_capenable & IFCAP_MEXTPG) == 0) {
1087
struct mbuf *m1;
1088
1089
error = mb_unmapped_to_ext(m, &m1);
1090
if (error != 0) {
1091
if (error == EINVAL) {
1092
if_printf(ifp, "TLS packet\n");
1093
/* XXXKIB */
1094
} else if (error == ENOMEM) {
1095
error = ENOBUFS;
1096
}
1097
IP6STAT_INC(ip6s_odropped);
1098
return (error);
1099
} else {
1100
m = m1;
1101
}
1102
}
1103
1104
/*
1105
* Send the packet to the outgoing interface.
1106
* If necessary, do IPv6 fragmentation before sending.
1107
*
1108
* 1: normal case (dontfrag == 0)
1109
* 1-a: send as is if tlen <= path mtu
1110
* 1-b: fragment if tlen > path mtu
1111
*
1112
* 2: if user asks us not to fragment (dontfrag == 1)
1113
* 2-a: send as is if tlen <= interface mtu
1114
* 2-b: error if tlen > interface mtu
1115
*/
1116
sw_csum = m->m_pkthdr.csum_flags;
1117
if (!hdrsplit) {
1118
tso = ((sw_csum & ifp->if_hwassist &
1119
(CSUM_TSO | CSUM_INNER_TSO)) != 0) ? 1 : 0;
1120
sw_csum &= ~ifp->if_hwassist;
1121
} else
1122
tso = 0;
1123
/*
1124
* If we added extension headers, we will not do TSO and calculate the
1125
* checksums ourselves for now.
1126
* XXX-BZ Need a framework to know when the NIC can handle it, even
1127
* with ext. hdrs.
1128
*/
1129
ip6_output_delayed_csum(m, ifp, sw_csum, plen, optlen);
1130
/* XXX-BZ m->m_pkthdr.csum_flags &= ~ifp->if_hwassist; */
1131
tlen = m->m_pkthdr.len;
1132
1133
if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso)
1134
dontfrag = 1;
1135
else
1136
dontfrag = 0;
1137
if (dontfrag && tlen > in6_ifmtu(ifp) && !tso) { /* Case 2-b. */
1138
/*
1139
* If the DONTFRAG option is specified, we cannot send the
1140
* packet when the data length is larger than the MTU of the
1141
* outgoing interface.
1142
* Notify the error by sending IPV6_PATHMTU ancillary data if
1143
* application wanted to know the MTU value. Also return an
1144
* error code (this is not described in the API spec).
1145
*/
1146
if (inp != NULL)
1147
ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu);
1148
error = EMSGSIZE;
1149
goto bad;
1150
}
1151
1152
/* Transmit packet without fragmentation. */
1153
if (dontfrag || tlen <= mtu) { /* Cases 1-a and 2-a. */
1154
struct in6_ifaddr *ia6;
1155
1156
ip6 = mtod(m, struct ip6_hdr *);
1157
ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1158
if (ia6) {
1159
/* Record statistics for this interface address. */
1160
counter_u64_add(ia6->ia_ifa.ifa_opackets, 1);
1161
counter_u64_add(ia6->ia_ifa.ifa_obytes,
1162
m->m_pkthdr.len);
1163
}
1164
error = ip6_output_send(inp, ifp, origifp, m, dst, ro,
1165
(flags & IP_NO_SND_TAG_RL) ? false : true);
1166
goto done;
1167
}
1168
1169
/* Try to fragment the packet. Case 1-b. */
1170
if (mtu < IPV6_MMTU) {
1171
/* Path MTU cannot be less than IPV6_MMTU. */
1172
error = EMSGSIZE;
1173
in6_ifstat_inc(ifp, ifs6_out_fragfail);
1174
goto bad;
1175
} else if (ip6->ip6_plen == 0) {
1176
/* We do not support jumbo payload. */
1177
error = EMSGSIZE;
1178
in6_ifstat_inc(ifp, ifs6_out_fragfail);
1179
goto bad;
1180
} else {
1181
u_char nextproto;
1182
1183
/*
1184
* Too large for the destination or interface;
1185
* fragment if possible.
1186
* Must be able to put at least 8 bytes per fragment.
1187
*/
1188
if (mtu > IPV6_MAXPACKET)
1189
mtu = IPV6_MAXPACKET;
1190
1191
len = (mtu - unfragpartlen - sizeof(struct ip6_frag)) & ~7;
1192
if (len < 8) {
1193
error = EMSGSIZE;
1194
in6_ifstat_inc(ifp, ifs6_out_fragfail);
1195
goto bad;
1196
}
1197
1198
/*
1199
* If the interface will not calculate checksums on
1200
* fragmented packets, then do it here.
1201
* XXX-BZ handle the hw offloading case. Need flags.
1202
*/
1203
ip6_output_delayed_csum(m, ifp, m->m_pkthdr.csum_flags, plen,
1204
optlen);
1205
1206
/*
1207
* Change the next header field of the last header in the
1208
* unfragmentable part.
1209
*/
1210
if (exthdrs.ip6e_rthdr) {
1211
nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1212
*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1213
} else if (exthdrs.ip6e_dest1) {
1214
nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1215
*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1216
} else if (exthdrs.ip6e_hbh) {
1217
nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1218
*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1219
} else {
1220
ip6 = mtod(m, struct ip6_hdr *);
1221
nextproto = ip6->ip6_nxt;
1222
ip6->ip6_nxt = IPPROTO_FRAGMENT;
1223
}
1224
1225
/*
1226
* Loop through length of segment after first fragment,
1227
* make new header and copy data of each part and link onto
1228
* chain.
1229
*/
1230
m0 = m;
1231
id = htonl(ip6_randomid());
1232
error = ip6_fragment(ifp, m, unfragpartlen, nextproto,len, id);
1233
if (error != 0)
1234
goto sendorfree;
1235
1236
in6_ifstat_inc(ifp, ifs6_out_fragok);
1237
}
1238
1239
/* Remove leading garbage. */
1240
sendorfree:
1241
m = m0->m_nextpkt;
1242
m0->m_nextpkt = 0;
1243
m_freem(m0);
1244
for (; m; m = m0) {
1245
m0 = m->m_nextpkt;
1246
m->m_nextpkt = 0;
1247
if (error == 0) {
1248
/* Record statistics for this interface address. */
1249
if (ia) {
1250
counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
1251
counter_u64_add(ia->ia_ifa.ifa_obytes,
1252
m->m_pkthdr.len);
1253
}
1254
if (vlan_pcp > -1)
1255
EVL_APPLY_PRI(m, vlan_pcp);
1256
error = ip6_output_send(inp, ifp, origifp, m, dst, ro,
1257
true);
1258
} else
1259
m_freem(m);
1260
}
1261
1262
if (error == 0)
1263
IP6STAT_INC(ip6s_fragmented);
1264
1265
done:
1266
return (error);
1267
1268
freehdrs:
1269
m_freem(exthdrs.ip6e_hbh); /* m_freem() checks if mbuf is NULL. */
1270
m_freem(exthdrs.ip6e_dest1);
1271
m_freem(exthdrs.ip6e_rthdr);
1272
m_freem(exthdrs.ip6e_dest2);
1273
/* FALLTHROUGH */
1274
bad:
1275
if (m)
1276
m_freem(m);
1277
goto done;
1278
}
1279
1280
static int
1281
ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1282
{
1283
struct mbuf *m;
1284
1285
if (hlen > MCLBYTES)
1286
return (ENOBUFS); /* XXX */
1287
1288
if (hlen > MLEN)
1289
m = m_getcl(M_NOWAIT, MT_DATA, 0);
1290
else
1291
m = m_get(M_NOWAIT, MT_DATA);
1292
if (m == NULL)
1293
return (ENOBUFS);
1294
m->m_len = hlen;
1295
if (hdr)
1296
bcopy(hdr, mtod(m, caddr_t), hlen);
1297
1298
*mp = m;
1299
return (0);
1300
}
1301
1302
/*
1303
* Insert fragment header and copy unfragmentable header portions.
1304
*/
1305
static int
1306
ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1307
struct ip6_frag **frghdrp)
1308
{
1309
struct mbuf *n, *mlast;
1310
1311
if (hlen > sizeof(struct ip6_hdr)) {
1312
n = m_copym(m0, sizeof(struct ip6_hdr),
1313
hlen - sizeof(struct ip6_hdr), M_NOWAIT);
1314
if (n == NULL)
1315
return (ENOBUFS);
1316
m->m_next = n;
1317
} else
1318
n = m;
1319
1320
/* Search for the last mbuf of unfragmentable part. */
1321
for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1322
;
1323
1324
if (M_WRITABLE(mlast) &&
1325
M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1326
/* use the trailing space of the last mbuf for the fragment hdr */
1327
*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1328
mlast->m_len);
1329
mlast->m_len += sizeof(struct ip6_frag);
1330
m->m_pkthdr.len += sizeof(struct ip6_frag);
1331
} else {
1332
/* allocate a new mbuf for the fragment header */
1333
struct mbuf *mfrg;
1334
1335
mfrg = m_get(M_NOWAIT, MT_DATA);
1336
if (mfrg == NULL)
1337
return (ENOBUFS);
1338
mfrg->m_len = sizeof(struct ip6_frag);
1339
*frghdrp = mtod(mfrg, struct ip6_frag *);
1340
mlast->m_next = mfrg;
1341
}
1342
1343
return (0);
1344
}
1345
1346
/*
1347
* Calculates IPv6 path mtu for destination @dst.
1348
* Resulting MTU is stored in @mtup.
1349
*
1350
* Returns 0 on success.
1351
*/
1352
static int
1353
ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup)
1354
{
1355
struct epoch_tracker et;
1356
struct nhop_object *nh;
1357
struct in6_addr kdst;
1358
uint32_t scopeid;
1359
int error;
1360
1361
in6_splitscope(dst, &kdst, &scopeid);
1362
1363
NET_EPOCH_ENTER(et);
1364
nh = fib6_lookup(fibnum, &kdst, scopeid, NHR_NONE, 0);
1365
if (nh != NULL) {
1366
ip6_calcmtu(nh->nh_ifp, dst, nh->nh_mtu, mtup, 0);
1367
error = 0;
1368
} else
1369
error = EHOSTUNREACH;
1370
NET_EPOCH_EXIT(et);
1371
1372
return (error);
1373
}
1374
1375
/*
1376
* Calculates IPv6 path MTU for @dst based on transmit @ifp,
1377
* and cached data in @ro_pmtu.
1378
* MTU from (successful) route lookup is saved (along with dst)
1379
* inside @ro_pmtu to avoid subsequent route lookups after packet
1380
* filter processing.
1381
*
1382
* Stores mtu into @mtup.
1383
*/
1384
static void
1385
ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
1386
struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup,
1387
u_int fibnum, u_int proto)
1388
{
1389
struct nhop_object *nh;
1390
struct in6_addr kdst;
1391
uint32_t scopeid;
1392
struct sockaddr_in6 *sa6_dst, sin6;
1393
u_long mtu;
1394
1395
NET_EPOCH_ASSERT();
1396
1397
mtu = 0;
1398
if (ro_pmtu == NULL || do_lookup) {
1399
/*
1400
* Here ro_pmtu has final destination address, while
1401
* ro might represent immediate destination.
1402
* Use ro_pmtu destination since mtu might differ.
1403
*/
1404
if (ro_pmtu != NULL) {
1405
sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1406
if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))
1407
ro_pmtu->ro_mtu = 0;
1408
} else
1409
sa6_dst = &sin6;
1410
1411
if (ro_pmtu == NULL || ro_pmtu->ro_mtu == 0) {
1412
bzero(sa6_dst, sizeof(*sa6_dst));
1413
sa6_dst->sin6_family = AF_INET6;
1414
sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1415
sa6_dst->sin6_addr = *dst;
1416
1417
in6_splitscope(dst, &kdst, &scopeid);
1418
nh = fib6_lookup(fibnum, &kdst, scopeid, NHR_NONE, 0);
1419
if (nh != NULL) {
1420
mtu = nh->nh_mtu;
1421
if (ro_pmtu != NULL)
1422
ro_pmtu->ro_mtu = mtu;
1423
}
1424
} else
1425
mtu = ro_pmtu->ro_mtu;
1426
}
1427
1428
if (ro_pmtu != NULL && ro_pmtu->ro_nh != NULL)
1429
mtu = ro_pmtu->ro_nh->nh_mtu;
1430
1431
ip6_calcmtu(ifp, dst, mtu, mtup, proto);
1432
}
1433
1434
/*
1435
* Calculate MTU based on transmit @ifp, route mtu @rt_mtu and
1436
* hostcache data for @dst.
1437
* Stores mtu into @mtup.
1438
*/
1439
static void
1440
ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu,
1441
u_long *mtup, u_int proto)
1442
{
1443
u_long mtu = 0;
1444
1445
if (rt_mtu > 0) {
1446
/* Skip the hostcache if the protocol handles PMTU changes. */
1447
if (proto != IPPROTO_TCP && proto != IPPROTO_SCTP) {
1448
struct in_conninfo inc = {
1449
.inc_flags = INC_ISIPV6,
1450
.inc6_faddr = *dst,
1451
};
1452
1453
mtu = tcp_hc_getmtu(&inc);
1454
}
1455
1456
if (mtu)
1457
mtu = min(mtu, rt_mtu);
1458
else
1459
mtu = rt_mtu;
1460
}
1461
1462
if (mtu == 0)
1463
mtu = in6_ifmtu(ifp);
1464
1465
*mtup = mtu;
1466
}
1467
1468
/*
1469
* IP6 socket option processing.
1470
*/
1471
int
1472
ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1473
{
1474
int optdatalen, uproto;
1475
void *optdata;
1476
struct inpcb *inp = sotoinpcb(so);
1477
int error, optval;
1478
int level, op, optname;
1479
int optlen;
1480
struct thread *td;
1481
#ifdef RSS
1482
uint32_t rss_bucket;
1483
int retval;
1484
#endif
1485
1486
/*
1487
* Don't use more than a quarter of mbuf clusters. N.B.:
1488
* nmbclusters is an int, but nmbclusters * MCLBYTES may overflow
1489
* on LP64 architectures, so cast to u_long to avoid undefined
1490
* behavior. ILP32 architectures cannot have nmbclusters
1491
* large enough to overflow for other reasons.
1492
*/
1493
#define IPV6_PKTOPTIONS_MBUF_LIMIT ((u_long)nmbclusters * MCLBYTES / 4)
1494
1495
level = sopt->sopt_level;
1496
op = sopt->sopt_dir;
1497
optname = sopt->sopt_name;
1498
optlen = sopt->sopt_valsize;
1499
td = sopt->sopt_td;
1500
error = 0;
1501
optval = 0;
1502
uproto = (int)so->so_proto->pr_protocol;
1503
1504
if (level != IPPROTO_IPV6) {
1505
error = EINVAL;
1506
1507
if (sopt->sopt_level == SOL_SOCKET &&
1508
sopt->sopt_dir == SOPT_SET) {
1509
switch (sopt->sopt_name) {
1510
case SO_SETFIB:
1511
error = sooptcopyin(sopt, &optval,
1512
sizeof(optval), sizeof(optval));
1513
if (error != 0)
1514
break;
1515
1516
INP_WLOCK(inp);
1517
if ((inp->inp_flags & INP_BOUNDFIB) != 0 &&
1518
optval != so->so_fibnum) {
1519
INP_WUNLOCK(inp);
1520
error = EISCONN;
1521
break;
1522
}
1523
error = sosetfib(inp->inp_socket, optval);
1524
if (error == 0)
1525
inp->inp_inc.inc_fibnum = optval;
1526
INP_WUNLOCK(inp);
1527
break;
1528
case SO_MAX_PACING_RATE:
1529
#ifdef RATELIMIT
1530
INP_WLOCK(inp);
1531
inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
1532
INP_WUNLOCK(inp);
1533
error = 0;
1534
#else
1535
error = EOPNOTSUPP;
1536
#endif
1537
break;
1538
default:
1539
break;
1540
}
1541
}
1542
} else { /* level == IPPROTO_IPV6 */
1543
switch (op) {
1544
case SOPT_SET:
1545
switch (optname) {
1546
case IPV6_2292PKTOPTIONS:
1547
#ifdef IPV6_PKTOPTIONS
1548
case IPV6_PKTOPTIONS:
1549
#endif
1550
{
1551
struct mbuf *m;
1552
1553
if (optlen > IPV6_PKTOPTIONS_MBUF_LIMIT) {
1554
printf("ip6_ctloutput: mbuf limit hit\n");
1555
error = ENOBUFS;
1556
break;
1557
}
1558
1559
error = soopt_getm(sopt, &m); /* XXX */
1560
if (error != 0)
1561
break;
1562
error = soopt_mcopyin(sopt, m); /* XXX */
1563
if (error != 0)
1564
break;
1565
INP_WLOCK(inp);
1566
error = ip6_pcbopts(&inp->in6p_outputopts, m,
1567
so, sopt);
1568
INP_WUNLOCK(inp);
1569
m_freem(m); /* XXX */
1570
break;
1571
}
1572
1573
/*
1574
* Use of some Hop-by-Hop options or some
1575
* Destination options, might require special
1576
* privilege. That is, normal applications
1577
* (without special privilege) might be forbidden
1578
* from setting certain options in outgoing packets,
1579
* and might never see certain options in received
1580
* packets. [RFC 2292 Section 6]
1581
* KAME specific note:
1582
* KAME prevents non-privileged users from sending or
1583
* receiving ANY hbh/dst options in order to avoid
1584
* overhead of parsing options in the kernel.
1585
*/
1586
case IPV6_RECVHOPOPTS:
1587
case IPV6_RECVDSTOPTS:
1588
case IPV6_RECVRTHDRDSTOPTS:
1589
if (td != NULL) {
1590
error = priv_check(td,
1591
PRIV_NETINET_SETHDROPTS);
1592
if (error)
1593
break;
1594
}
1595
/* FALLTHROUGH */
1596
case IPV6_UNICAST_HOPS:
1597
case IPV6_HOPLIMIT:
1598
1599
case IPV6_RECVPKTINFO:
1600
case IPV6_RECVHOPLIMIT:
1601
case IPV6_RECVRTHDR:
1602
case IPV6_RECVPATHMTU:
1603
case IPV6_RECVTCLASS:
1604
case IPV6_RECVFLOWID:
1605
#ifdef RSS
1606
case IPV6_RECVRSSBUCKETID:
1607
#endif
1608
case IPV6_V6ONLY:
1609
case IPV6_AUTOFLOWLABEL:
1610
case IPV6_ORIGDSTADDR:
1611
case IPV6_BINDANY:
1612
case IPV6_VLAN_PCP:
1613
if (optname == IPV6_BINDANY && td != NULL) {
1614
error = priv_check(td,
1615
PRIV_NETINET_BINDANY);
1616
if (error)
1617
break;
1618
}
1619
1620
if (optlen != sizeof(int)) {
1621
error = EINVAL;
1622
break;
1623
}
1624
error = sooptcopyin(sopt, &optval,
1625
sizeof optval, sizeof optval);
1626
if (error)
1627
break;
1628
switch (optname) {
1629
case IPV6_UNICAST_HOPS:
1630
if (optval < -1 || optval >= 256)
1631
error = EINVAL;
1632
else {
1633
/* -1 = kernel default */
1634
inp->in6p_hops = optval;
1635
if ((inp->inp_vflag &
1636
INP_IPV4) != 0)
1637
inp->inp_ip_ttl = optval;
1638
}
1639
break;
1640
#define OPTSET(bit) \
1641
do { \
1642
INP_WLOCK(inp); \
1643
if (optval) \
1644
inp->inp_flags |= (bit); \
1645
else \
1646
inp->inp_flags &= ~(bit); \
1647
INP_WUNLOCK(inp); \
1648
} while (/*CONSTCOND*/ 0)
1649
#define OPTSET2292(bit) \
1650
do { \
1651
INP_WLOCK(inp); \
1652
inp->inp_flags |= IN6P_RFC2292; \
1653
if (optval) \
1654
inp->inp_flags |= (bit); \
1655
else \
1656
inp->inp_flags &= ~(bit); \
1657
INP_WUNLOCK(inp); \
1658
} while (/*CONSTCOND*/ 0)
1659
#define OPTBIT(bit) (inp->inp_flags & (bit) ? 1 : 0)
1660
1661
#define OPTSET2_N(bit, val) do { \
1662
if (val) \
1663
inp->inp_flags2 |= bit; \
1664
else \
1665
inp->inp_flags2 &= ~bit; \
1666
} while (0)
1667
#define OPTSET2(bit, val) do { \
1668
INP_WLOCK(inp); \
1669
OPTSET2_N(bit, val); \
1670
INP_WUNLOCK(inp); \
1671
} while (0)
1672
#define OPTBIT2(bit) (inp->inp_flags2 & (bit) ? 1 : 0)
1673
#define OPTSET2292_EXCLUSIVE(bit) \
1674
do { \
1675
INP_WLOCK(inp); \
1676
if (OPTBIT(IN6P_RFC2292)) { \
1677
error = EINVAL; \
1678
} else { \
1679
if (optval) \
1680
inp->inp_flags |= (bit); \
1681
else \
1682
inp->inp_flags &= ~(bit); \
1683
} \
1684
INP_WUNLOCK(inp); \
1685
} while (/*CONSTCOND*/ 0)
1686
1687
case IPV6_RECVPKTINFO:
1688
OPTSET2292_EXCLUSIVE(IN6P_PKTINFO);
1689
break;
1690
1691
case IPV6_HOPLIMIT:
1692
{
1693
struct ip6_pktopts **optp;
1694
1695
/* cannot mix with RFC2292 */
1696
if (OPTBIT(IN6P_RFC2292)) {
1697
error = EINVAL;
1698
break;
1699
}
1700
INP_WLOCK(inp);
1701
if (inp->inp_flags & INP_DROPPED) {
1702
INP_WUNLOCK(inp);
1703
return (ECONNRESET);
1704
}
1705
optp = &inp->in6p_outputopts;
1706
error = ip6_pcbopt(IPV6_HOPLIMIT,
1707
(u_char *)&optval, sizeof(optval),
1708
optp, (td != NULL) ? td->td_ucred :
1709
NULL, uproto);
1710
INP_WUNLOCK(inp);
1711
break;
1712
}
1713
1714
case IPV6_RECVHOPLIMIT:
1715
OPTSET2292_EXCLUSIVE(IN6P_HOPLIMIT);
1716
break;
1717
1718
case IPV6_RECVHOPOPTS:
1719
OPTSET2292_EXCLUSIVE(IN6P_HOPOPTS);
1720
break;
1721
1722
case IPV6_RECVDSTOPTS:
1723
OPTSET2292_EXCLUSIVE(IN6P_DSTOPTS);
1724
break;
1725
1726
case IPV6_RECVRTHDRDSTOPTS:
1727
OPTSET2292_EXCLUSIVE(IN6P_RTHDRDSTOPTS);
1728
break;
1729
1730
case IPV6_RECVRTHDR:
1731
OPTSET2292_EXCLUSIVE(IN6P_RTHDR);
1732
break;
1733
1734
case IPV6_RECVPATHMTU:
1735
/*
1736
* We ignore this option for TCP
1737
* sockets.
1738
* (RFC3542 leaves this case
1739
* unspecified.)
1740
*/
1741
if (uproto != IPPROTO_TCP)
1742
OPTSET(IN6P_MTU);
1743
break;
1744
1745
case IPV6_RECVFLOWID:
1746
OPTSET2(INP_RECVFLOWID, optval);
1747
break;
1748
1749
#ifdef RSS
1750
case IPV6_RECVRSSBUCKETID:
1751
OPTSET2(INP_RECVRSSBUCKETID, optval);
1752
break;
1753
#endif
1754
1755
case IPV6_V6ONLY:
1756
INP_WLOCK(inp);
1757
if (inp->inp_lport ||
1758
!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
1759
/*
1760
* The socket is already bound.
1761
*/
1762
INP_WUNLOCK(inp);
1763
error = EINVAL;
1764
break;
1765
}
1766
if (optval) {
1767
inp->inp_flags |= IN6P_IPV6_V6ONLY;
1768
inp->inp_vflag &= ~INP_IPV4;
1769
} else {
1770
inp->inp_flags &= ~IN6P_IPV6_V6ONLY;
1771
inp->inp_vflag |= INP_IPV4;
1772
}
1773
INP_WUNLOCK(inp);
1774
break;
1775
case IPV6_RECVTCLASS:
1776
/* cannot mix with RFC2292 XXX */
1777
OPTSET2292_EXCLUSIVE(IN6P_TCLASS);
1778
break;
1779
case IPV6_AUTOFLOWLABEL:
1780
OPTSET(IN6P_AUTOFLOWLABEL);
1781
break;
1782
1783
case IPV6_ORIGDSTADDR:
1784
OPTSET2(INP_ORIGDSTADDR, optval);
1785
break;
1786
case IPV6_BINDANY:
1787
OPTSET(INP_BINDANY);
1788
break;
1789
case IPV6_VLAN_PCP:
1790
if ((optval >= -1) && (optval <=
1791
(INP_2PCP_MASK >> INP_2PCP_SHIFT))) {
1792
if (optval == -1) {
1793
INP_WLOCK(inp);
1794
inp->inp_flags2 &=
1795
~(INP_2PCP_SET |
1796
INP_2PCP_MASK);
1797
INP_WUNLOCK(inp);
1798
} else {
1799
INP_WLOCK(inp);
1800
inp->inp_flags2 |=
1801
INP_2PCP_SET;
1802
inp->inp_flags2 &=
1803
~INP_2PCP_MASK;
1804
inp->inp_flags2 |=
1805
optval <<
1806
INP_2PCP_SHIFT;
1807
INP_WUNLOCK(inp);
1808
}
1809
} else
1810
error = EINVAL;
1811
break;
1812
}
1813
break;
1814
1815
case IPV6_TCLASS:
1816
case IPV6_DONTFRAG:
1817
case IPV6_USE_MIN_MTU:
1818
case IPV6_PREFER_TEMPADDR:
1819
if (optlen != sizeof(optval)) {
1820
error = EINVAL;
1821
break;
1822
}
1823
error = sooptcopyin(sopt, &optval,
1824
sizeof optval, sizeof optval);
1825
if (error)
1826
break;
1827
{
1828
struct ip6_pktopts **optp;
1829
INP_WLOCK(inp);
1830
if (inp->inp_flags & INP_DROPPED) {
1831
INP_WUNLOCK(inp);
1832
return (ECONNRESET);
1833
}
1834
optp = &inp->in6p_outputopts;
1835
error = ip6_pcbopt(optname,
1836
(u_char *)&optval, sizeof(optval),
1837
optp, (td != NULL) ? td->td_ucred :
1838
NULL, uproto);
1839
INP_WUNLOCK(inp);
1840
break;
1841
}
1842
1843
case IPV6_2292PKTINFO:
1844
case IPV6_2292HOPLIMIT:
1845
case IPV6_2292HOPOPTS:
1846
case IPV6_2292DSTOPTS:
1847
case IPV6_2292RTHDR:
1848
/* RFC 2292 */
1849
if (optlen != sizeof(int)) {
1850
error = EINVAL;
1851
break;
1852
}
1853
error = sooptcopyin(sopt, &optval,
1854
sizeof optval, sizeof optval);
1855
if (error)
1856
break;
1857
switch (optname) {
1858
case IPV6_2292PKTINFO:
1859
OPTSET2292(IN6P_PKTINFO);
1860
break;
1861
case IPV6_2292HOPLIMIT:
1862
OPTSET2292(IN6P_HOPLIMIT);
1863
break;
1864
case IPV6_2292HOPOPTS:
1865
/*
1866
* Check super-user privilege.
1867
* See comments for IPV6_RECVHOPOPTS.
1868
*/
1869
if (td != NULL) {
1870
error = priv_check(td,
1871
PRIV_NETINET_SETHDROPTS);
1872
if (error)
1873
return (error);
1874
}
1875
OPTSET2292(IN6P_HOPOPTS);
1876
break;
1877
case IPV6_2292DSTOPTS:
1878
if (td != NULL) {
1879
error = priv_check(td,
1880
PRIV_NETINET_SETHDROPTS);
1881
if (error)
1882
return (error);
1883
}
1884
OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1885
break;
1886
case IPV6_2292RTHDR:
1887
OPTSET2292(IN6P_RTHDR);
1888
break;
1889
}
1890
break;
1891
case IPV6_PKTINFO:
1892
case IPV6_HOPOPTS:
1893
case IPV6_RTHDR:
1894
case IPV6_DSTOPTS:
1895
case IPV6_RTHDRDSTOPTS:
1896
case IPV6_NEXTHOP:
1897
{
1898
/* new advanced API (RFC3542) */
1899
u_char *optbuf;
1900
u_char optbuf_storage[MCLBYTES];
1901
int optlen;
1902
struct ip6_pktopts **optp;
1903
1904
/* cannot mix with RFC2292 */
1905
if (OPTBIT(IN6P_RFC2292)) {
1906
error = EINVAL;
1907
break;
1908
}
1909
1910
/*
1911
* We only ensure valsize is not too large
1912
* here. Further validation will be done
1913
* later.
1914
*/
1915
error = sooptcopyin(sopt, optbuf_storage,
1916
sizeof(optbuf_storage), 0);
1917
if (error)
1918
break;
1919
optlen = sopt->sopt_valsize;
1920
optbuf = optbuf_storage;
1921
INP_WLOCK(inp);
1922
if (inp->inp_flags & INP_DROPPED) {
1923
INP_WUNLOCK(inp);
1924
return (ECONNRESET);
1925
}
1926
optp = &inp->in6p_outputopts;
1927
error = ip6_pcbopt(optname, optbuf, optlen,
1928
optp, (td != NULL) ? td->td_ucred : NULL,
1929
uproto);
1930
INP_WUNLOCK(inp);
1931
break;
1932
}
1933
#undef OPTSET
1934
1935
case IPV6_MULTICAST_IF:
1936
case IPV6_MULTICAST_HOPS:
1937
case IPV6_MULTICAST_LOOP:
1938
case IPV6_JOIN_GROUP:
1939
case IPV6_LEAVE_GROUP:
1940
case IPV6_MSFILTER:
1941
case MCAST_BLOCK_SOURCE:
1942
case MCAST_UNBLOCK_SOURCE:
1943
case MCAST_JOIN_GROUP:
1944
case MCAST_LEAVE_GROUP:
1945
case MCAST_JOIN_SOURCE_GROUP:
1946
case MCAST_LEAVE_SOURCE_GROUP:
1947
error = ip6_setmoptions(inp, sopt);
1948
break;
1949
1950
case IPV6_PORTRANGE:
1951
error = sooptcopyin(sopt, &optval,
1952
sizeof optval, sizeof optval);
1953
if (error)
1954
break;
1955
1956
INP_WLOCK(inp);
1957
switch (optval) {
1958
case IPV6_PORTRANGE_DEFAULT:
1959
inp->inp_flags &= ~(INP_LOWPORT);
1960
inp->inp_flags &= ~(INP_HIGHPORT);
1961
break;
1962
1963
case IPV6_PORTRANGE_HIGH:
1964
inp->inp_flags &= ~(INP_LOWPORT);
1965
inp->inp_flags |= INP_HIGHPORT;
1966
break;
1967
1968
case IPV6_PORTRANGE_LOW:
1969
inp->inp_flags &= ~(INP_HIGHPORT);
1970
inp->inp_flags |= INP_LOWPORT;
1971
break;
1972
1973
default:
1974
error = EINVAL;
1975
break;
1976
}
1977
INP_WUNLOCK(inp);
1978
break;
1979
1980
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
1981
case IPV6_IPSEC_POLICY:
1982
if (IPSEC_ENABLED(ipv6)) {
1983
error = IPSEC_PCBCTL(ipv6, inp, sopt);
1984
break;
1985
}
1986
/* FALLTHROUGH */
1987
#endif /* IPSEC */
1988
1989
default:
1990
error = ENOPROTOOPT;
1991
break;
1992
}
1993
break;
1994
1995
case SOPT_GET:
1996
switch (optname) {
1997
case IPV6_2292PKTOPTIONS:
1998
#ifdef IPV6_PKTOPTIONS
1999
case IPV6_PKTOPTIONS:
2000
#endif
2001
/*
2002
* RFC3542 (effectively) deprecated the
2003
* semantics of the 2292-style pktoptions.
2004
* Since it was not reliable in nature (i.e.,
2005
* applications had to expect the lack of some
2006
* information after all), it would make sense
2007
* to simplify this part by always returning
2008
* empty data.
2009
*/
2010
sopt->sopt_valsize = 0;
2011
break;
2012
2013
case IPV6_RECVHOPOPTS:
2014
case IPV6_RECVDSTOPTS:
2015
case IPV6_RECVRTHDRDSTOPTS:
2016
case IPV6_UNICAST_HOPS:
2017
case IPV6_RECVPKTINFO:
2018
case IPV6_RECVHOPLIMIT:
2019
case IPV6_RECVRTHDR:
2020
case IPV6_RECVPATHMTU:
2021
2022
case IPV6_V6ONLY:
2023
case IPV6_PORTRANGE:
2024
case IPV6_RECVTCLASS:
2025
case IPV6_AUTOFLOWLABEL:
2026
case IPV6_BINDANY:
2027
case IPV6_FLOWID:
2028
case IPV6_FLOWTYPE:
2029
case IPV6_RECVFLOWID:
2030
#ifdef RSS
2031
case IPV6_RSSBUCKETID:
2032
case IPV6_RECVRSSBUCKETID:
2033
#endif
2034
case IPV6_VLAN_PCP:
2035
switch (optname) {
2036
case IPV6_RECVHOPOPTS:
2037
optval = OPTBIT(IN6P_HOPOPTS);
2038
break;
2039
2040
case IPV6_RECVDSTOPTS:
2041
optval = OPTBIT(IN6P_DSTOPTS);
2042
break;
2043
2044
case IPV6_RECVRTHDRDSTOPTS:
2045
optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2046
break;
2047
2048
case IPV6_UNICAST_HOPS:
2049
optval = inp->in6p_hops;
2050
break;
2051
2052
case IPV6_RECVPKTINFO:
2053
optval = OPTBIT(IN6P_PKTINFO);
2054
break;
2055
2056
case IPV6_RECVHOPLIMIT:
2057
optval = OPTBIT(IN6P_HOPLIMIT);
2058
break;
2059
2060
case IPV6_RECVRTHDR:
2061
optval = OPTBIT(IN6P_RTHDR);
2062
break;
2063
2064
case IPV6_RECVPATHMTU:
2065
optval = OPTBIT(IN6P_MTU);
2066
break;
2067
2068
case IPV6_V6ONLY:
2069
optval = OPTBIT(IN6P_IPV6_V6ONLY);
2070
break;
2071
2072
case IPV6_PORTRANGE:
2073
{
2074
int flags;
2075
flags = inp->inp_flags;
2076
if (flags & INP_HIGHPORT)
2077
optval = IPV6_PORTRANGE_HIGH;
2078
else if (flags & INP_LOWPORT)
2079
optval = IPV6_PORTRANGE_LOW;
2080
else
2081
optval = 0;
2082
break;
2083
}
2084
case IPV6_RECVTCLASS:
2085
optval = OPTBIT(IN6P_TCLASS);
2086
break;
2087
2088
case IPV6_AUTOFLOWLABEL:
2089
optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2090
break;
2091
2092
case IPV6_ORIGDSTADDR:
2093
optval = OPTBIT2(INP_ORIGDSTADDR);
2094
break;
2095
2096
case IPV6_BINDANY:
2097
optval = OPTBIT(INP_BINDANY);
2098
break;
2099
2100
case IPV6_FLOWID:
2101
optval = inp->inp_flowid;
2102
break;
2103
2104
case IPV6_FLOWTYPE:
2105
optval = inp->inp_flowtype;
2106
break;
2107
2108
case IPV6_RECVFLOWID:
2109
optval = OPTBIT2(INP_RECVFLOWID);
2110
break;
2111
#ifdef RSS
2112
case IPV6_RSSBUCKETID:
2113
retval =
2114
rss_hash2bucket(inp->inp_flowid,
2115
inp->inp_flowtype,
2116
&rss_bucket);
2117
if (retval == 0)
2118
optval = rss_bucket;
2119
else
2120
error = EINVAL;
2121
break;
2122
2123
case IPV6_RECVRSSBUCKETID:
2124
optval = OPTBIT2(INP_RECVRSSBUCKETID);
2125
break;
2126
#endif
2127
2128
2129
case IPV6_VLAN_PCP:
2130
if (OPTBIT2(INP_2PCP_SET)) {
2131
optval = (inp->inp_flags2 &
2132
INP_2PCP_MASK) >>
2133
INP_2PCP_SHIFT;
2134
} else {
2135
optval = -1;
2136
}
2137
break;
2138
}
2139
2140
if (error)
2141
break;
2142
error = sooptcopyout(sopt, &optval,
2143
sizeof optval);
2144
break;
2145
2146
case IPV6_PATHMTU:
2147
{
2148
u_long pmtu = 0;
2149
struct ip6_mtuinfo mtuinfo;
2150
struct in6_addr addr;
2151
2152
if (!(so->so_state & SS_ISCONNECTED))
2153
return (ENOTCONN);
2154
/*
2155
* XXX: we dot not consider the case of source
2156
* routing, or optional information to specify
2157
* the outgoing interface.
2158
* Copy faddr out of inp to avoid holding lock
2159
* on inp during route lookup.
2160
*/
2161
INP_RLOCK(inp);
2162
bcopy(&inp->in6p_faddr, &addr, sizeof(addr));
2163
INP_RUNLOCK(inp);
2164
error = ip6_getpmtu_ctl(so->so_fibnum,
2165
&addr, &pmtu);
2166
if (error)
2167
break;
2168
if (pmtu > IPV6_MAXPACKET)
2169
pmtu = IPV6_MAXPACKET;
2170
2171
bzero(&mtuinfo, sizeof(mtuinfo));
2172
mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2173
optdata = (void *)&mtuinfo;
2174
optdatalen = sizeof(mtuinfo);
2175
error = sooptcopyout(sopt, optdata,
2176
optdatalen);
2177
break;
2178
}
2179
2180
case IPV6_2292PKTINFO:
2181
case IPV6_2292HOPLIMIT:
2182
case IPV6_2292HOPOPTS:
2183
case IPV6_2292RTHDR:
2184
case IPV6_2292DSTOPTS:
2185
switch (optname) {
2186
case IPV6_2292PKTINFO:
2187
optval = OPTBIT(IN6P_PKTINFO);
2188
break;
2189
case IPV6_2292HOPLIMIT:
2190
optval = OPTBIT(IN6P_HOPLIMIT);
2191
break;
2192
case IPV6_2292HOPOPTS:
2193
optval = OPTBIT(IN6P_HOPOPTS);
2194
break;
2195
case IPV6_2292RTHDR:
2196
optval = OPTBIT(IN6P_RTHDR);
2197
break;
2198
case IPV6_2292DSTOPTS:
2199
optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2200
break;
2201
}
2202
error = sooptcopyout(sopt, &optval,
2203
sizeof optval);
2204
break;
2205
case IPV6_PKTINFO:
2206
case IPV6_HOPOPTS:
2207
case IPV6_RTHDR:
2208
case IPV6_DSTOPTS:
2209
case IPV6_RTHDRDSTOPTS:
2210
case IPV6_NEXTHOP:
2211
case IPV6_TCLASS:
2212
case IPV6_DONTFRAG:
2213
case IPV6_USE_MIN_MTU:
2214
case IPV6_PREFER_TEMPADDR:
2215
error = ip6_getpcbopt(inp, optname, sopt);
2216
break;
2217
2218
case IPV6_MULTICAST_IF:
2219
case IPV6_MULTICAST_HOPS:
2220
case IPV6_MULTICAST_LOOP:
2221
case IPV6_MSFILTER:
2222
error = ip6_getmoptions(inp, sopt);
2223
break;
2224
2225
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
2226
case IPV6_IPSEC_POLICY:
2227
if (IPSEC_ENABLED(ipv6)) {
2228
error = IPSEC_PCBCTL(ipv6, inp, sopt);
2229
break;
2230
}
2231
/* FALLTHROUGH */
2232
#endif /* IPSEC */
2233
default:
2234
error = ENOPROTOOPT;
2235
break;
2236
}
2237
break;
2238
}
2239
}
2240
return (error);
2241
}
2242
2243
int
2244
ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2245
{
2246
int error = 0, optval, optlen;
2247
const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2248
struct inpcb *inp = sotoinpcb(so);
2249
int level, op, optname;
2250
2251
level = sopt->sopt_level;
2252
op = sopt->sopt_dir;
2253
optname = sopt->sopt_name;
2254
optlen = sopt->sopt_valsize;
2255
2256
if (level != IPPROTO_IPV6) {
2257
return (EINVAL);
2258
}
2259
2260
switch (optname) {
2261
case IPV6_CHECKSUM:
2262
/*
2263
* For ICMPv6 sockets, no modification allowed for checksum
2264
* offset, permit "no change" values to help existing apps.
2265
*
2266
* RFC3542 says: "An attempt to set IPV6_CHECKSUM
2267
* for an ICMPv6 socket will fail."
2268
* The current behavior does not meet RFC3542.
2269
*/
2270
switch (op) {
2271
case SOPT_SET:
2272
if (optlen != sizeof(int)) {
2273
error = EINVAL;
2274
break;
2275
}
2276
error = sooptcopyin(sopt, &optval, sizeof(optval),
2277
sizeof(optval));
2278
if (error)
2279
break;
2280
if (optval < -1 || (optval % 2) != 0) {
2281
/*
2282
* The API assumes non-negative even offset
2283
* values or -1 as a special value.
2284
*/
2285
error = EINVAL;
2286
} else if (inp->inp_ip_p == IPPROTO_ICMPV6) {
2287
if (optval != icmp6off)
2288
error = EINVAL;
2289
} else
2290
inp->in6p_cksum = optval;
2291
break;
2292
2293
case SOPT_GET:
2294
if (inp->inp_ip_p == IPPROTO_ICMPV6)
2295
optval = icmp6off;
2296
else
2297
optval = inp->in6p_cksum;
2298
2299
error = sooptcopyout(sopt, &optval, sizeof(optval));
2300
break;
2301
2302
default:
2303
error = EINVAL;
2304
break;
2305
}
2306
break;
2307
2308
default:
2309
error = ENOPROTOOPT;
2310
break;
2311
}
2312
2313
return (error);
2314
}
2315
2316
/*
2317
* Set up IP6 options in pcb for insertion in output packets or
2318
* specifying behavior of outgoing packets.
2319
*/
2320
static int
2321
ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
2322
struct socket *so, struct sockopt *sopt)
2323
{
2324
struct ip6_pktopts *opt = *pktopt;
2325
int error = 0;
2326
struct thread *td = sopt->sopt_td;
2327
struct epoch_tracker et;
2328
2329
/* turn off any old options. */
2330
if (opt) {
2331
#ifdef DIAGNOSTIC
2332
if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2333
opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2334
opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2335
printf("ip6_pcbopts: all specified options are cleared.\n");
2336
#endif
2337
ip6_clearpktopts(opt, -1);
2338
} else {
2339
opt = malloc(sizeof(*opt), M_IP6OPT, M_NOWAIT);
2340
if (opt == NULL)
2341
return (ENOMEM);
2342
}
2343
*pktopt = NULL;
2344
2345
if (!m || m->m_len == 0) {
2346
/*
2347
* Only turning off any previous options, regardless of
2348
* whether the opt is just created or given.
2349
*/
2350
free(opt, M_IP6OPT);
2351
return (0);
2352
}
2353
2354
/* set options specified by user. */
2355
NET_EPOCH_ENTER(et);
2356
if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
2357
td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
2358
ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2359
free(opt, M_IP6OPT);
2360
NET_EPOCH_EXIT(et);
2361
return (error);
2362
}
2363
NET_EPOCH_EXIT(et);
2364
*pktopt = opt;
2365
return (0);
2366
}
2367
2368
/*
2369
* initialize ip6_pktopts. beware that there are non-zero default values in
2370
* the struct.
2371
*/
2372
void
2373
ip6_initpktopts(struct ip6_pktopts *opt)
2374
{
2375
2376
bzero(opt, sizeof(*opt));
2377
opt->ip6po_hlim = -1; /* -1 means default hop limit */
2378
opt->ip6po_tclass = -1; /* -1 means default traffic class */
2379
opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2380
opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2381
}
2382
2383
static int
2384
ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2385
struct ucred *cred, int uproto)
2386
{
2387
struct epoch_tracker et;
2388
struct ip6_pktopts *opt;
2389
int ret;
2390
2391
if (*pktopt == NULL) {
2392
*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2393
M_NOWAIT);
2394
if (*pktopt == NULL)
2395
return (ENOBUFS);
2396
ip6_initpktopts(*pktopt);
2397
}
2398
opt = *pktopt;
2399
2400
NET_EPOCH_ENTER(et);
2401
ret = ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto);
2402
NET_EPOCH_EXIT(et);
2403
2404
return (ret);
2405
}
2406
2407
#define GET_PKTOPT_VAR(field, lenexpr) do { \
2408
if (pktopt && pktopt->field) { \
2409
INP_RUNLOCK(inp); \
2410
optdata = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK); \
2411
malloc_optdata = true; \
2412
INP_RLOCK(inp); \
2413
if (inp->inp_flags & INP_DROPPED) { \
2414
INP_RUNLOCK(inp); \
2415
free(optdata, M_TEMP); \
2416
return (ECONNRESET); \
2417
} \
2418
pktopt = inp->in6p_outputopts; \
2419
if (pktopt && pktopt->field) { \
2420
optdatalen = min(lenexpr, sopt->sopt_valsize); \
2421
bcopy(pktopt->field, optdata, optdatalen); \
2422
} else { \
2423
free(optdata, M_TEMP); \
2424
optdata = NULL; \
2425
malloc_optdata = false; \
2426
} \
2427
} \
2428
} while(0)
2429
2430
#define GET_PKTOPT_EXT_HDR(field) GET_PKTOPT_VAR(field, \
2431
(((struct ip6_ext *)pktopt->field)->ip6e_len + 1) << 3)
2432
2433
#define GET_PKTOPT_SOCKADDR(field) GET_PKTOPT_VAR(field, \
2434
pktopt->field->sa_len)
2435
2436
static int
2437
ip6_getpcbopt(struct inpcb *inp, int optname, struct sockopt *sopt)
2438
{
2439
void *optdata = NULL;
2440
bool malloc_optdata = false;
2441
int optdatalen = 0;
2442
int error = 0;
2443
struct in6_pktinfo null_pktinfo;
2444
int deftclass = 0, on;
2445
int defminmtu = IP6PO_MINMTU_MCASTONLY;
2446
int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2447
struct ip6_pktopts *pktopt;
2448
2449
INP_RLOCK(inp);
2450
pktopt = inp->in6p_outputopts;
2451
2452
switch (optname) {
2453
case IPV6_PKTINFO:
2454
optdata = (void *)&null_pktinfo;
2455
if (pktopt && pktopt->ip6po_pktinfo) {
2456
bcopy(pktopt->ip6po_pktinfo, &null_pktinfo,
2457
sizeof(null_pktinfo));
2458
in6_clearscope(&null_pktinfo.ipi6_addr);
2459
} else {
2460
/* XXX: we don't have to do this every time... */
2461
bzero(&null_pktinfo, sizeof(null_pktinfo));
2462
}
2463
optdatalen = sizeof(struct in6_pktinfo);
2464
break;
2465
case IPV6_TCLASS:
2466
if (pktopt && pktopt->ip6po_tclass >= 0)
2467
deftclass = pktopt->ip6po_tclass;
2468
optdata = (void *)&deftclass;
2469
optdatalen = sizeof(int);
2470
break;
2471
case IPV6_HOPOPTS:
2472
GET_PKTOPT_EXT_HDR(ip6po_hbh);
2473
break;
2474
case IPV6_RTHDR:
2475
GET_PKTOPT_EXT_HDR(ip6po_rthdr);
2476
break;
2477
case IPV6_RTHDRDSTOPTS:
2478
GET_PKTOPT_EXT_HDR(ip6po_dest1);
2479
break;
2480
case IPV6_DSTOPTS:
2481
GET_PKTOPT_EXT_HDR(ip6po_dest2);
2482
break;
2483
case IPV6_NEXTHOP:
2484
GET_PKTOPT_SOCKADDR(ip6po_nexthop);
2485
break;
2486
case IPV6_USE_MIN_MTU:
2487
if (pktopt)
2488
defminmtu = pktopt->ip6po_minmtu;
2489
optdata = (void *)&defminmtu;
2490
optdatalen = sizeof(int);
2491
break;
2492
case IPV6_DONTFRAG:
2493
if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2494
on = 1;
2495
else
2496
on = 0;
2497
optdata = (void *)&on;
2498
optdatalen = sizeof(on);
2499
break;
2500
case IPV6_PREFER_TEMPADDR:
2501
if (pktopt)
2502
defpreftemp = pktopt->ip6po_prefer_tempaddr;
2503
optdata = (void *)&defpreftemp;
2504
optdatalen = sizeof(int);
2505
break;
2506
default: /* should not happen */
2507
#ifdef DIAGNOSTIC
2508
panic("ip6_getpcbopt: unexpected option\n");
2509
#endif
2510
INP_RUNLOCK(inp);
2511
return (ENOPROTOOPT);
2512
}
2513
INP_RUNLOCK(inp);
2514
2515
error = sooptcopyout(sopt, optdata, optdatalen);
2516
if (malloc_optdata)
2517
free(optdata, M_TEMP);
2518
2519
return (error);
2520
}
2521
2522
void
2523
ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2524
{
2525
if (pktopt == NULL)
2526
return;
2527
2528
if (optname == -1 || optname == IPV6_PKTINFO) {
2529
if (pktopt->ip6po_pktinfo)
2530
free(pktopt->ip6po_pktinfo, M_IP6OPT);
2531
pktopt->ip6po_pktinfo = NULL;
2532
}
2533
if (optname == -1 || optname == IPV6_HOPLIMIT) {
2534
pktopt->ip6po_hlim = -1;
2535
pktopt->ip6po_valid &= ~IP6PO_VALID_HLIM;
2536
}
2537
if (optname == -1 || optname == IPV6_TCLASS) {
2538
pktopt->ip6po_tclass = -1;
2539
pktopt->ip6po_valid &= ~IP6PO_VALID_TC;
2540
}
2541
if (optname == -1 || optname == IPV6_NEXTHOP) {
2542
if (pktopt->ip6po_nextroute.ro_nh) {
2543
NH_FREE(pktopt->ip6po_nextroute.ro_nh);
2544
pktopt->ip6po_nextroute.ro_nh = NULL;
2545
}
2546
if (pktopt->ip6po_nexthop)
2547
free(pktopt->ip6po_nexthop, M_IP6OPT);
2548
pktopt->ip6po_nexthop = NULL;
2549
pktopt->ip6po_valid &= ~IP6PO_VALID_NHINFO;
2550
}
2551
if (optname == -1 || optname == IPV6_HOPOPTS) {
2552
if (pktopt->ip6po_hbh)
2553
free(pktopt->ip6po_hbh, M_IP6OPT);
2554
pktopt->ip6po_hbh = NULL;
2555
pktopt->ip6po_valid &= ~IP6PO_VALID_HBH;
2556
}
2557
if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2558
if (pktopt->ip6po_dest1)
2559
free(pktopt->ip6po_dest1, M_IP6OPT);
2560
pktopt->ip6po_dest1 = NULL;
2561
pktopt->ip6po_valid &= ~IP6PO_VALID_DEST1;
2562
}
2563
if (optname == -1 || optname == IPV6_RTHDR) {
2564
if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2565
free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2566
pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2567
if (pktopt->ip6po_route.ro_nh) {
2568
NH_FREE(pktopt->ip6po_route.ro_nh);
2569
pktopt->ip6po_route.ro_nh = NULL;
2570
}
2571
pktopt->ip6po_valid &= ~IP6PO_VALID_RHINFO;
2572
}
2573
if (optname == -1 || optname == IPV6_DSTOPTS) {
2574
if (pktopt->ip6po_dest2)
2575
free(pktopt->ip6po_dest2, M_IP6OPT);
2576
pktopt->ip6po_dest2 = NULL;
2577
pktopt->ip6po_valid &= ~IP6PO_VALID_DEST2;
2578
}
2579
}
2580
2581
#define PKTOPT_EXTHDRCPY(type) \
2582
do {\
2583
if (src->type) {\
2584
int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2585
dst->type = malloc(hlen, M_IP6OPT, canwait);\
2586
if (dst->type == NULL)\
2587
goto bad;\
2588
bcopy(src->type, dst->type, hlen);\
2589
}\
2590
} while (/*CONSTCOND*/ 0)
2591
2592
static int
2593
copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2594
{
2595
if (dst == NULL || src == NULL) {
2596
printf("ip6_clearpktopts: invalid argument\n");
2597
return (EINVAL);
2598
}
2599
2600
dst->ip6po_hlim = src->ip6po_hlim;
2601
dst->ip6po_tclass = src->ip6po_tclass;
2602
dst->ip6po_flags = src->ip6po_flags;
2603
dst->ip6po_minmtu = src->ip6po_minmtu;
2604
dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
2605
if (src->ip6po_pktinfo) {
2606
dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2607
M_IP6OPT, canwait);
2608
if (dst->ip6po_pktinfo == NULL)
2609
goto bad;
2610
*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2611
}
2612
if (src->ip6po_nexthop) {
2613
dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2614
M_IP6OPT, canwait);
2615
if (dst->ip6po_nexthop == NULL)
2616
goto bad;
2617
bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2618
src->ip6po_nexthop->sa_len);
2619
}
2620
PKTOPT_EXTHDRCPY(ip6po_hbh);
2621
PKTOPT_EXTHDRCPY(ip6po_dest1);
2622
PKTOPT_EXTHDRCPY(ip6po_dest2);
2623
PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2624
dst->ip6po_valid = src->ip6po_valid;
2625
return (0);
2626
2627
bad:
2628
ip6_clearpktopts(dst, -1);
2629
return (ENOBUFS);
2630
}
2631
#undef PKTOPT_EXTHDRCPY
2632
2633
struct ip6_pktopts *
2634
ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2635
{
2636
int error;
2637
struct ip6_pktopts *dst;
2638
2639
dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2640
if (dst == NULL)
2641
return (NULL);
2642
ip6_initpktopts(dst);
2643
2644
if ((error = copypktopts(dst, src, canwait)) != 0) {
2645
free(dst, M_IP6OPT);
2646
return (NULL);
2647
}
2648
2649
return (dst);
2650
}
2651
2652
void
2653
ip6_freepcbopts(struct ip6_pktopts *pktopt)
2654
{
2655
if (pktopt == NULL)
2656
return;
2657
2658
ip6_clearpktopts(pktopt, -1);
2659
2660
free(pktopt, M_IP6OPT);
2661
}
2662
2663
/*
2664
* Set IPv6 outgoing packet options based on advanced API.
2665
*/
2666
int
2667
ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2668
struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
2669
{
2670
struct cmsghdr *cm = NULL;
2671
2672
if (control == NULL || opt == NULL)
2673
return (EINVAL);
2674
2675
/*
2676
* ip6_setpktopt can call ifnet_byindex(), so it's imperative that we
2677
* are in the network epoch here.
2678
*/
2679
NET_EPOCH_ASSERT();
2680
2681
ip6_initpktopts(opt);
2682
if (stickyopt) {
2683
int error;
2684
2685
/*
2686
* If stickyopt is provided, make a local copy of the options
2687
* for this particular packet, then override them by ancillary
2688
* objects.
2689
* XXX: copypktopts() does not copy the cached route to a next
2690
* hop (if any). This is not very good in terms of efficiency,
2691
* but we can allow this since this option should be rarely
2692
* used.
2693
*/
2694
if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2695
return (error);
2696
}
2697
2698
/*
2699
* XXX: Currently, we assume all the optional information is stored
2700
* in a single mbuf.
2701
*/
2702
if (control->m_next)
2703
return (EINVAL);
2704
2705
for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2706
control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2707
int error;
2708
2709
if (control->m_len < CMSG_LEN(0))
2710
return (EINVAL);
2711
2712
cm = mtod(control, struct cmsghdr *);
2713
if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2714
return (EINVAL);
2715
if (cm->cmsg_level != IPPROTO_IPV6)
2716
continue;
2717
2718
error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2719
cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
2720
if (error)
2721
return (error);
2722
}
2723
2724
return (0);
2725
}
2726
2727
/*
2728
* Set a particular packet option, as a sticky option or an ancillary data
2729
* item. "len" can be 0 only when it's a sticky option.
2730
* We have 4 cases of combination of "sticky" and "cmsg":
2731
* "sticky=0, cmsg=0": impossible
2732
* "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2733
* "sticky=1, cmsg=0": RFC3542 socket option
2734
* "sticky=1, cmsg=1": RFC2292 socket option
2735
*/
2736
static int
2737
ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2738
struct ucred *cred, int sticky, int cmsg, int uproto)
2739
{
2740
int minmtupolicy, preftemp;
2741
int error;
2742
2743
NET_EPOCH_ASSERT();
2744
2745
if (!sticky && !cmsg) {
2746
#ifdef DIAGNOSTIC
2747
printf("ip6_setpktopt: impossible case\n");
2748
#endif
2749
return (EINVAL);
2750
}
2751
2752
/*
2753
* IPV6_2292xxx is for backward compatibility to RFC2292, and should
2754
* not be specified in the context of RFC3542. Conversely,
2755
* RFC3542 types should not be specified in the context of RFC2292.
2756
*/
2757
if (!cmsg) {
2758
switch (optname) {
2759
case IPV6_2292PKTINFO:
2760
case IPV6_2292HOPLIMIT:
2761
case IPV6_2292NEXTHOP:
2762
case IPV6_2292HOPOPTS:
2763
case IPV6_2292DSTOPTS:
2764
case IPV6_2292RTHDR:
2765
case IPV6_2292PKTOPTIONS:
2766
return (ENOPROTOOPT);
2767
}
2768
}
2769
if (sticky && cmsg) {
2770
switch (optname) {
2771
case IPV6_PKTINFO:
2772
case IPV6_HOPLIMIT:
2773
case IPV6_NEXTHOP:
2774
case IPV6_HOPOPTS:
2775
case IPV6_DSTOPTS:
2776
case IPV6_RTHDRDSTOPTS:
2777
case IPV6_RTHDR:
2778
case IPV6_USE_MIN_MTU:
2779
case IPV6_DONTFRAG:
2780
case IPV6_TCLASS:
2781
case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
2782
return (ENOPROTOOPT);
2783
}
2784
}
2785
2786
switch (optname) {
2787
case IPV6_2292PKTINFO:
2788
case IPV6_PKTINFO:
2789
{
2790
struct ifnet *ifp = NULL;
2791
struct in6_pktinfo *pktinfo;
2792
2793
if (len != sizeof(struct in6_pktinfo))
2794
return (EINVAL);
2795
2796
pktinfo = (struct in6_pktinfo *)buf;
2797
2798
/*
2799
* An application can clear any sticky IPV6_PKTINFO option by
2800
* doing a "regular" setsockopt with ipi6_addr being
2801
* in6addr_any and ipi6_ifindex being zero.
2802
* [RFC 3542, Section 6]
2803
*/
2804
if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2805
pktinfo->ipi6_ifindex == 0 &&
2806
IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2807
ip6_clearpktopts(opt, optname);
2808
break;
2809
}
2810
2811
if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2812
sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2813
return (EINVAL);
2814
}
2815
if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr))
2816
return (EINVAL);
2817
/* validate the interface index if specified. */
2818
if (pktinfo->ipi6_ifindex) {
2819
ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
2820
if (ifp == NULL)
2821
return (ENXIO);
2822
}
2823
if (ifp != NULL && (ifp->if_inet6 == NULL ||
2824
(ifp->if_inet6->nd_flags & ND6_IFF_IFDISABLED) != 0))
2825
return (ENETDOWN);
2826
2827
if (ifp != NULL &&
2828
!IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2829
struct in6_ifaddr *ia;
2830
2831
in6_setscope(&pktinfo->ipi6_addr, ifp, NULL);
2832
ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr);
2833
if (ia == NULL)
2834
return (EADDRNOTAVAIL);
2835
ifa_free(&ia->ia_ifa);
2836
}
2837
/*
2838
* We store the address anyway, and let in6_selectsrc()
2839
* validate the specified address. This is because ipi6_addr
2840
* may not have enough information about its scope zone, and
2841
* we may need additional information (such as outgoing
2842
* interface or the scope zone of a destination address) to
2843
* disambiguate the scope.
2844
* XXX: the delay of the validation may confuse the
2845
* application when it is used as a sticky option.
2846
*/
2847
if (opt->ip6po_pktinfo == NULL) {
2848
opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2849
M_IP6OPT, M_NOWAIT);
2850
if (opt->ip6po_pktinfo == NULL)
2851
return (ENOBUFS);
2852
}
2853
bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2854
opt->ip6po_valid |= IP6PO_VALID_PKTINFO;
2855
break;
2856
}
2857
2858
case IPV6_2292HOPLIMIT:
2859
case IPV6_HOPLIMIT:
2860
{
2861
int *hlimp;
2862
2863
/*
2864
* RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2865
* to simplify the ordering among hoplimit options.
2866
*/
2867
if (optname == IPV6_HOPLIMIT && sticky)
2868
return (ENOPROTOOPT);
2869
2870
if (len != sizeof(int))
2871
return (EINVAL);
2872
hlimp = (int *)buf;
2873
if (*hlimp < -1 || *hlimp > 255)
2874
return (EINVAL);
2875
2876
opt->ip6po_hlim = *hlimp;
2877
opt->ip6po_valid |= IP6PO_VALID_HLIM;
2878
break;
2879
}
2880
2881
case IPV6_TCLASS:
2882
{
2883
int tclass;
2884
2885
if (len != sizeof(int))
2886
return (EINVAL);
2887
tclass = *(int *)buf;
2888
if (tclass < -1 || tclass > 255)
2889
return (EINVAL);
2890
2891
opt->ip6po_tclass = tclass;
2892
opt->ip6po_valid |= IP6PO_VALID_TC;
2893
break;
2894
}
2895
2896
case IPV6_2292NEXTHOP:
2897
case IPV6_NEXTHOP:
2898
if (cred != NULL) {
2899
error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS);
2900
if (error)
2901
return (error);
2902
}
2903
2904
if (len == 0) { /* just remove the option */
2905
ip6_clearpktopts(opt, IPV6_NEXTHOP);
2906
break;
2907
}
2908
2909
/* check if cmsg_len is large enough for sa_len */
2910
if (len < sizeof(struct sockaddr) || len < *buf)
2911
return (EINVAL);
2912
2913
switch (((struct sockaddr *)buf)->sa_family) {
2914
case AF_INET6:
2915
{
2916
struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
2917
int error;
2918
2919
if (sa6->sin6_len != sizeof(struct sockaddr_in6))
2920
return (EINVAL);
2921
2922
if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
2923
IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
2924
return (EINVAL);
2925
}
2926
if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
2927
!= 0) {
2928
return (error);
2929
}
2930
break;
2931
}
2932
case AF_LINK: /* should eventually be supported */
2933
default:
2934
return (EAFNOSUPPORT);
2935
}
2936
2937
/* turn off the previous option, then set the new option. */
2938
ip6_clearpktopts(opt, IPV6_NEXTHOP);
2939
opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
2940
if (opt->ip6po_nexthop == NULL)
2941
return (ENOBUFS);
2942
bcopy(buf, opt->ip6po_nexthop, *buf);
2943
opt->ip6po_valid |= IP6PO_VALID_NHINFO;
2944
break;
2945
2946
case IPV6_2292HOPOPTS:
2947
case IPV6_HOPOPTS:
2948
{
2949
struct ip6_hbh *hbh;
2950
int hbhlen;
2951
2952
/*
2953
* XXX: We don't allow a non-privileged user to set ANY HbH
2954
* options, since per-option restriction has too much
2955
* overhead.
2956
*/
2957
if (cred != NULL) {
2958
error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS);
2959
if (error)
2960
return (error);
2961
}
2962
2963
if (len == 0) {
2964
ip6_clearpktopts(opt, IPV6_HOPOPTS);
2965
break; /* just remove the option */
2966
}
2967
2968
/* message length validation */
2969
if (len < sizeof(struct ip6_hbh))
2970
return (EINVAL);
2971
hbh = (struct ip6_hbh *)buf;
2972
hbhlen = (hbh->ip6h_len + 1) << 3;
2973
if (len != hbhlen)
2974
return (EINVAL);
2975
2976
/* turn off the previous option, then set the new option. */
2977
ip6_clearpktopts(opt, IPV6_HOPOPTS);
2978
opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
2979
if (opt->ip6po_hbh == NULL)
2980
return (ENOBUFS);
2981
bcopy(hbh, opt->ip6po_hbh, hbhlen);
2982
opt->ip6po_valid |= IP6PO_VALID_HBH;
2983
2984
break;
2985
}
2986
2987
case IPV6_2292DSTOPTS:
2988
case IPV6_DSTOPTS:
2989
case IPV6_RTHDRDSTOPTS:
2990
{
2991
struct ip6_dest *dest, **newdest = NULL;
2992
int destlen;
2993
2994
if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
2995
error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS);
2996
if (error)
2997
return (error);
2998
}
2999
3000
if (len == 0) {
3001
ip6_clearpktopts(opt, optname);
3002
break; /* just remove the option */
3003
}
3004
3005
/* message length validation */
3006
if (len < sizeof(struct ip6_dest))
3007
return (EINVAL);
3008
dest = (struct ip6_dest *)buf;
3009
destlen = (dest->ip6d_len + 1) << 3;
3010
if (len != destlen)
3011
return (EINVAL);
3012
3013
/*
3014
* Determine the position that the destination options header
3015
* should be inserted; before or after the routing header.
3016
*/
3017
switch (optname) {
3018
case IPV6_2292DSTOPTS:
3019
/*
3020
* The old advacned API is ambiguous on this point.
3021
* Our approach is to determine the position based
3022
* according to the existence of a routing header.
3023
* Note, however, that this depends on the order of the
3024
* extension headers in the ancillary data; the 1st
3025
* part of the destination options header must appear
3026
* before the routing header in the ancillary data,
3027
* too.
3028
* RFC3542 solved the ambiguity by introducing
3029
* separate ancillary data or option types.
3030
*/
3031
if (opt->ip6po_rthdr == NULL)
3032
newdest = &opt->ip6po_dest1;
3033
else
3034
newdest = &opt->ip6po_dest2;
3035
break;
3036
case IPV6_RTHDRDSTOPTS:
3037
newdest = &opt->ip6po_dest1;
3038
break;
3039
case IPV6_DSTOPTS:
3040
newdest = &opt->ip6po_dest2;
3041
break;
3042
}
3043
3044
/* turn off the previous option, then set the new option. */
3045
ip6_clearpktopts(opt, optname);
3046
*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3047
if (*newdest == NULL)
3048
return (ENOBUFS);
3049
bcopy(dest, *newdest, destlen);
3050
if (newdest == &opt->ip6po_dest1)
3051
opt->ip6po_valid |= IP6PO_VALID_DEST1;
3052
else
3053
opt->ip6po_valid |= IP6PO_VALID_DEST2;
3054
3055
break;
3056
}
3057
3058
case IPV6_2292RTHDR:
3059
case IPV6_RTHDR:
3060
{
3061
struct ip6_rthdr *rth;
3062
int rthlen;
3063
3064
if (len == 0) {
3065
ip6_clearpktopts(opt, IPV6_RTHDR);
3066
break; /* just remove the option */
3067
}
3068
3069
/* message length validation */
3070
if (len < sizeof(struct ip6_rthdr))
3071
return (EINVAL);
3072
rth = (struct ip6_rthdr *)buf;
3073
rthlen = (rth->ip6r_len + 1) << 3;
3074
if (len != rthlen)
3075
return (EINVAL);
3076
3077
switch (rth->ip6r_type) {
3078
case IPV6_RTHDR_TYPE_0:
3079
if (rth->ip6r_len == 0) /* must contain one addr */
3080
return (EINVAL);
3081
if (rth->ip6r_len % 2) /* length must be even */
3082
return (EINVAL);
3083
if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3084
return (EINVAL);
3085
break;
3086
default:
3087
return (EINVAL); /* not supported */
3088
}
3089
3090
/* turn off the previous option */
3091
ip6_clearpktopts(opt, IPV6_RTHDR);
3092
opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3093
if (opt->ip6po_rthdr == NULL)
3094
return (ENOBUFS);
3095
bcopy(rth, opt->ip6po_rthdr, rthlen);
3096
opt->ip6po_valid |= IP6PO_VALID_RHINFO;
3097
3098
break;
3099
}
3100
3101
case IPV6_USE_MIN_MTU:
3102
if (len != sizeof(int))
3103
return (EINVAL);
3104
minmtupolicy = *(int *)buf;
3105
if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3106
minmtupolicy != IP6PO_MINMTU_DISABLE &&
3107
minmtupolicy != IP6PO_MINMTU_ALL) {
3108
return (EINVAL);
3109
}
3110
opt->ip6po_minmtu = minmtupolicy;
3111
break;
3112
3113
case IPV6_DONTFRAG:
3114
if (len != sizeof(int))
3115
return (EINVAL);
3116
3117
if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3118
/*
3119
* we ignore this option for TCP sockets.
3120
* (RFC3542 leaves this case unspecified.)
3121
*/
3122
opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3123
} else
3124
opt->ip6po_flags |= IP6PO_DONTFRAG;
3125
break;
3126
3127
case IPV6_PREFER_TEMPADDR:
3128
if (len != sizeof(int))
3129
return (EINVAL);
3130
preftemp = *(int *)buf;
3131
if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3132
preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3133
preftemp != IP6PO_TEMPADDR_PREFER) {
3134
return (EINVAL);
3135
}
3136
opt->ip6po_prefer_tempaddr = preftemp;
3137
break;
3138
3139
default:
3140
return (ENOPROTOOPT);
3141
} /* end of switch */
3142
3143
return (0);
3144
}
3145
3146
/*
3147
* Routine called from ip6_output() to loop back a copy of an IP6 multicast
3148
* packet to the input queue of a specified interface. Note that this
3149
* calls the output routine of the loopback "driver", but with an interface
3150
* pointer that might NOT be &loif -- easier than replicating that code here.
3151
*/
3152
void
3153
ip6_mloopback(struct ifnet *ifp, struct mbuf *m)
3154
{
3155
struct mbuf *copym;
3156
struct ip6_hdr *ip6;
3157
3158
copym = m_copym(m, 0, M_COPYALL, M_NOWAIT);
3159
if (copym == NULL)
3160
return;
3161
3162
/*
3163
* Make sure to deep-copy IPv6 header portion in case the data
3164
* is in an mbuf cluster, so that we can safely override the IPv6
3165
* header portion later.
3166
*/
3167
if (!M_WRITABLE(copym) ||
3168
copym->m_len < sizeof(struct ip6_hdr)) {
3169
copym = m_pullup(copym, sizeof(struct ip6_hdr));
3170
if (copym == NULL)
3171
return;
3172
}
3173
ip6 = mtod(copym, struct ip6_hdr *);
3174
/*
3175
* clear embedded scope identifiers if necessary.
3176
* in6_clearscope will touch the addresses only when necessary.
3177
*/
3178
in6_clearscope(&ip6->ip6_src);
3179
in6_clearscope(&ip6->ip6_dst);
3180
if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
3181
copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 |
3182
CSUM_PSEUDO_HDR;
3183
copym->m_pkthdr.csum_data = 0xffff;
3184
}
3185
if_simloop(ifp, copym, AF_INET6, 0);
3186
}
3187
3188
/*
3189
* Chop IPv6 header off from the payload.
3190
*/
3191
static int
3192
ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3193
{
3194
struct mbuf *mh;
3195
struct ip6_hdr *ip6;
3196
3197
ip6 = mtod(m, struct ip6_hdr *);
3198
if (m->m_len > sizeof(*ip6)) {
3199
mh = m_gethdr(M_NOWAIT, MT_DATA);
3200
if (mh == NULL) {
3201
m_freem(m);
3202
return ENOBUFS;
3203
}
3204
m_move_pkthdr(mh, m);
3205
M_ALIGN(mh, sizeof(*ip6));
3206
m->m_len -= sizeof(*ip6);
3207
m->m_data += sizeof(*ip6);
3208
mh->m_next = m;
3209
m = mh;
3210
m->m_len = sizeof(*ip6);
3211
bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3212
}
3213
exthdrs->ip6e_ip6 = m;
3214
return 0;
3215
}
3216
3217
/*
3218
* Compute IPv6 extension header length.
3219
*/
3220
int
3221
ip6_optlen(struct inpcb *inp)
3222
{
3223
int len;
3224
3225
if (!inp->in6p_outputopts)
3226
return 0;
3227
3228
len = 0;
3229
#define elen(x) \
3230
(((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3231
3232
len += elen(inp->in6p_outputopts->ip6po_hbh);
3233
if (inp->in6p_outputopts->ip6po_rthdr)
3234
/* dest1 is valid with rthdr only */
3235
len += elen(inp->in6p_outputopts->ip6po_dest1);
3236
len += elen(inp->in6p_outputopts->ip6po_rthdr);
3237
len += elen(inp->in6p_outputopts->ip6po_dest2);
3238
return len;
3239
#undef elen
3240
}
3241
3242