Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netinet6/mld6.c
39475 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 2009 Bruce Simpson.
5
*
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
8
* are met:
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
14
* 3. The name of the author may not be used to endorse or promote
15
* products derived from this software without specific prior written
16
* permission.
17
*
18
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
* SUCH DAMAGE.
29
*
30
* $KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $
31
*/
32
33
/*-
34
* Copyright (c) 1988 Stephen Deering.
35
* Copyright (c) 1992, 1993
36
* The Regents of the University of California. All rights reserved.
37
*
38
* This code is derived from software contributed to Berkeley by
39
* Stephen Deering of Stanford University.
40
*
41
* Redistribution and use in source and binary forms, with or without
42
* modification, are permitted provided that the following conditions
43
* are met:
44
* 1. Redistributions of source code must retain the above copyright
45
* notice, this list of conditions and the following disclaimer.
46
* 2. Redistributions in binary form must reproduce the above copyright
47
* notice, this list of conditions and the following disclaimer in the
48
* documentation and/or other materials provided with the distribution.
49
* 3. Neither the name of the University nor the names of its contributors
50
* may be used to endorse or promote products derived from this software
51
* without specific prior written permission.
52
*
53
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63
* SUCH DAMAGE.
64
*/
65
66
#include <sys/cdefs.h>
67
#include "opt_inet.h"
68
#include "opt_inet6.h"
69
70
#include <sys/param.h>
71
#include <sys/systm.h>
72
#include <sys/mbuf.h>
73
#include <sys/socket.h>
74
#include <sys/sysctl.h>
75
#include <sys/kernel.h>
76
#include <sys/callout.h>
77
#include <sys/malloc.h>
78
#include <sys/module.h>
79
#include <sys/ktr.h>
80
81
#include <net/if.h>
82
#include <net/if_var.h>
83
#include <net/if_private.h>
84
#include <net/route.h>
85
#include <net/vnet.h>
86
87
#include <netinet/in.h>
88
#include <netinet/in_var.h>
89
#include <netinet6/in6_var.h>
90
#include <netinet/ip6.h>
91
#include <netinet6/ip6_var.h>
92
#include <netinet6/scope6_var.h>
93
#include <netinet/icmp6.h>
94
#include <netinet6/mld6.h>
95
#include <netinet6/mld6_var.h>
96
97
#include <security/mac/mac_framework.h>
98
99
#ifndef KTR_MLD
100
#define KTR_MLD KTR_INET6
101
#endif
102
103
static void mli_delete_locked(struct ifnet *);
104
static void mld_dispatch_packet(struct mbuf *);
105
static void mld_dispatch_queue(struct mbufq *, int);
106
static void mld_final_leave(struct in6_multi *, struct mld_ifsoftc *);
107
static void mld_fasttimo_vnet(struct in6_multi_head *inmh);
108
static int mld_handle_state_change(struct in6_multi *,
109
struct mld_ifsoftc *);
110
static int mld_initial_join(struct in6_multi *, struct mld_ifsoftc *,
111
const int);
112
#ifdef KTR
113
static char * mld_rec_type_to_str(const int);
114
#endif
115
static void mld_set_version(struct mld_ifsoftc *, const int);
116
static void mld_slowtimo_vnet(void);
117
static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
118
/*const*/ struct mld_hdr *);
119
static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *,
120
/*const*/ struct mld_hdr *);
121
static void mld_v1_process_group_timer(struct in6_multi_head *,
122
struct in6_multi *);
123
static void mld_v1_process_querier_timers(struct mld_ifsoftc *);
124
static int mld_v1_transmit_report(struct in6_multi *, const int);
125
static void mld_v1_update_group(struct in6_multi *, const int);
126
static void mld_v2_cancel_link_timers(struct mld_ifsoftc *);
127
static void mld_v2_dispatch_general_query(struct mld_ifsoftc *);
128
static struct mbuf *
129
mld_v2_encap_report(struct ifnet *, struct mbuf *);
130
static int mld_v2_enqueue_filter_change(struct mbufq *,
131
struct in6_multi *);
132
static int mld_v2_enqueue_group_record(struct mbufq *,
133
struct in6_multi *, const int, const int, const int,
134
const int);
135
static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
136
struct mbuf *, struct mldv2_query *, const int, const int);
137
static int mld_v2_merge_state_changes(struct in6_multi *,
138
struct mbufq *);
139
static void mld_v2_process_group_timers(struct in6_multi_head *,
140
struct mbufq *, struct mbufq *,
141
struct in6_multi *, const int);
142
static int mld_v2_process_group_query(struct in6_multi *,
143
struct mld_ifsoftc *mli, int, struct mbuf *,
144
struct mldv2_query *, const int);
145
static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS);
146
static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS);
147
148
/*
149
* Normative references: RFC 2710, RFC 3590, RFC 3810.
150
*
151
* Locking:
152
* * The MLD subsystem lock ends up being system-wide for the moment,
153
* but could be per-VIMAGE later on.
154
* * The permitted lock order is: IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK.
155
* Any may be taken independently; if any are held at the same
156
* time, the above lock order must be followed.
157
* * IN6_MULTI_LOCK covers in_multi.
158
* * MLD_LOCK covers per-link state and any global variables in this file.
159
* * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
160
* per-link state iterators.
161
*
162
* XXX LOR PREVENTION
163
* A special case for IPv6 is the in6_setscope() routine. ip6_output()
164
* will not accept an ifp; it wants an embedded scope ID, unlike
165
* ip_output(), which happily takes the ifp given to it. The embedded
166
* scope ID is only used by MLD to select the outgoing interface.
167
*
168
* During interface attach and detach, MLD will take MLD_LOCK *after*
169
* the IF_AFDATA_LOCK.
170
* As in6_setscope() takes IF_AFDATA_LOCK then SCOPE_LOCK, we can't call
171
* it with MLD_LOCK held without triggering an LOR. A netisr with indirect
172
* dispatch could work around this, but we'd rather not do that, as it
173
* can introduce other races.
174
*
175
* As such, we exploit the fact that the scope ID is just the interface
176
* index, and embed it in the IPv6 destination address accordingly.
177
* This is potentially NOT VALID for MLDv1 reports, as they
178
* are always sent to the multicast group itself; as MLDv2
179
* reports are always sent to ff02::16, this is not an issue
180
* when MLDv2 is in use.
181
*
182
* This does not however eliminate the LOR when ip6_output() itself
183
* calls in6_setscope() internally whilst MLD_LOCK is held. This will
184
* trigger a LOR warning in WITNESS when the ifnet is detached.
185
*
186
* The right answer is probably to make IF_AFDATA_LOCK an rwlock, given
187
* how it's used across the network stack. Here we're simply exploiting
188
* the fact that MLD runs at a similar layer in the stack to scope6.c.
189
*
190
* VIMAGE:
191
* * Each in6_multi corresponds to an ifp, and each ifp corresponds
192
* to a vnet in ifp->if_vnet.
193
*/
194
static struct mtx mld_mtx;
195
static MALLOC_DEFINE(M_MLD, "mld", "mld state");
196
197
#define MLD_EMBEDSCOPE(pin6, zoneid) \
198
if (IN6_IS_SCOPE_LINKLOCAL(pin6) || \
199
IN6_IS_ADDR_MC_INTFACELOCAL(pin6)) \
200
(pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF) \
201
202
/*
203
* VIMAGE-wide globals.
204
*/
205
VNET_DEFINE_STATIC(struct timeval, mld_gsrdelay) = {10, 0};
206
VNET_DEFINE_STATIC(LIST_HEAD(, mld_ifsoftc), mli_head);
207
VNET_DEFINE_STATIC(int, interface_timers_running6);
208
VNET_DEFINE_STATIC(int, state_change_timers_running6);
209
VNET_DEFINE_STATIC(int, current_state_timers_running6);
210
211
#define V_mld_gsrdelay VNET(mld_gsrdelay)
212
#define V_mli_head VNET(mli_head)
213
#define V_interface_timers_running6 VNET(interface_timers_running6)
214
#define V_state_change_timers_running6 VNET(state_change_timers_running6)
215
#define V_current_state_timers_running6 VNET(current_state_timers_running6)
216
217
SYSCTL_DECL(_net_inet6); /* Note: Not in any common header. */
218
219
SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
220
"IPv6 Multicast Listener Discovery");
221
222
/*
223
* Virtualized sysctls.
224
*/
225
SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
226
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
227
&VNET_NAME(mld_gsrdelay.tv_sec), 0, sysctl_mld_gsr, "I",
228
"Rate limit for MLDv2 Group-and-Source queries in seconds");
229
230
/*
231
* Non-virtualized sysctls.
232
*/
233
static SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo,
234
CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_mld_ifinfo,
235
"Per-interface MLDv2 state");
236
237
VNET_DEFINE_STATIC(bool, mld_v1enable) = true;
238
#define V_mld_v1enable VNET(mld_v1enable)
239
SYSCTL_BOOL(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_VNET | CTLFLAG_RWTUN,
240
&VNET_NAME(mld_v1enable), 0, "Enable fallback to MLDv1");
241
242
VNET_DEFINE_STATIC(bool, mld_v2enable) = true;
243
#define V_mld_v2enable VNET(mld_v2enable)
244
SYSCTL_BOOL(_net_inet6_mld, OID_AUTO, v2enable, CTLFLAG_VNET | CTLFLAG_RWTUN,
245
&VNET_NAME(mld_v2enable), 0, "Enable MLDv2");
246
247
VNET_DEFINE_STATIC(bool, mld_use_allow) = true;
248
#define V_mld_use_allow VNET(mld_use_allow)
249
SYSCTL_BOOL(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_VNET | CTLFLAG_RWTUN,
250
&VNET_NAME(mld_use_allow), 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
251
252
/*
253
* Packed Router Alert option structure declaration.
254
*/
255
struct mld_raopt {
256
struct ip6_hbh hbh;
257
struct ip6_opt pad;
258
struct ip6_opt_router ra;
259
} __packed;
260
261
/*
262
* Router Alert hop-by-hop option header.
263
*/
264
static struct mld_raopt mld_ra = {
265
.hbh = { 0, 0 },
266
.pad = { .ip6o_type = IP6OPT_PADN, 0 },
267
.ra = {
268
.ip6or_type = IP6OPT_ROUTER_ALERT,
269
.ip6or_len = IP6OPT_RTALERT_LEN - 2,
270
.ip6or_value[0] = ((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
271
.ip6or_value[1] = (IP6OPT_RTALERT_MLD & 0xFF)
272
}
273
};
274
static struct ip6_pktopts mld_po;
275
276
static __inline void
277
mld_save_context(struct mbuf *m, struct ifnet *ifp)
278
{
279
280
#ifdef VIMAGE
281
m->m_pkthdr.PH_loc.ptr = ifp->if_vnet;
282
#endif /* VIMAGE */
283
m->m_pkthdr.rcvif = ifp;
284
m->m_pkthdr.flowid = ifp->if_index;
285
}
286
287
static __inline void
288
mld_scrub_context(struct mbuf *m)
289
{
290
291
m->m_pkthdr.PH_loc.ptr = NULL;
292
m->m_pkthdr.flowid = 0;
293
}
294
295
/*
296
* Restore context from a queued output chain.
297
* Return saved ifindex.
298
*
299
* VIMAGE: The assertion is there to make sure that we
300
* actually called CURVNET_SET() with what's in the mbuf chain.
301
*/
302
static __inline uint32_t
303
mld_restore_context(struct mbuf *m)
304
{
305
306
#if defined(VIMAGE) && defined(INVARIANTS)
307
KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr,
308
("%s: called when curvnet was not restored: cuvnet %p m ptr %p",
309
__func__, curvnet, m->m_pkthdr.PH_loc.ptr));
310
#endif
311
return (m->m_pkthdr.flowid);
312
}
313
314
/*
315
* Retrieve or set threshold between group-source queries in seconds.
316
*
317
* VIMAGE: Assume curvnet set by caller.
318
* SMPng: NOTE: Serialized by MLD lock.
319
*/
320
static int
321
sysctl_mld_gsr(SYSCTL_HANDLER_ARGS)
322
{
323
int error;
324
int i;
325
326
error = sysctl_wire_old_buffer(req, sizeof(int));
327
if (error)
328
return (error);
329
330
MLD_LOCK();
331
332
i = V_mld_gsrdelay.tv_sec;
333
334
error = sysctl_handle_int(oidp, &i, 0, req);
335
if (error || !req->newptr)
336
goto out_locked;
337
338
if (i < -1 || i >= 60) {
339
error = EINVAL;
340
goto out_locked;
341
}
342
343
CTR2(KTR_MLD, "change mld_gsrdelay from %d to %d",
344
V_mld_gsrdelay.tv_sec, i);
345
V_mld_gsrdelay.tv_sec = i;
346
347
out_locked:
348
MLD_UNLOCK();
349
return (error);
350
}
351
352
/*
353
* Expose struct mld_ifsoftc to userland, keyed by ifindex.
354
* For use by ifmcstat(8).
355
*
356
* VIMAGE: Assume curvnet set by caller. The node handler itself
357
* is not directly virtualized.
358
*/
359
static int
360
sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS)
361
{
362
struct epoch_tracker et;
363
int *name;
364
int error;
365
u_int namelen;
366
struct ifnet *ifp;
367
struct mld_ifsoftc *mli;
368
369
name = (int *)arg1;
370
namelen = arg2;
371
372
if (req->newptr != NULL)
373
return (EPERM);
374
375
if (namelen != 1)
376
return (EINVAL);
377
378
error = sysctl_wire_old_buffer(req, sizeof(struct mld_ifinfo));
379
if (error)
380
return (error);
381
382
IN6_MULTI_LOCK();
383
IN6_MULTI_LIST_LOCK();
384
MLD_LOCK();
385
NET_EPOCH_ENTER(et);
386
387
error = ENOENT;
388
ifp = ifnet_byindex(name[0]);
389
if (ifp == NULL)
390
goto out_locked;
391
392
LIST_FOREACH(mli, &V_mli_head, mli_link) {
393
if (ifp == mli->mli_ifp) {
394
struct mld_ifinfo info;
395
396
info.mli_version = mli->mli_version;
397
info.mli_v1_timer = mli->mli_v1_timer;
398
info.mli_v2_timer = mli->mli_v2_timer;
399
info.mli_flags = mli->mli_flags;
400
info.mli_rv = mli->mli_rv;
401
info.mli_qi = mli->mli_qi;
402
info.mli_qri = mli->mli_qri;
403
info.mli_uri = mli->mli_uri;
404
error = SYSCTL_OUT(req, &info, sizeof(info));
405
break;
406
}
407
}
408
409
out_locked:
410
NET_EPOCH_EXIT(et);
411
MLD_UNLOCK();
412
IN6_MULTI_LIST_UNLOCK();
413
IN6_MULTI_UNLOCK();
414
return (error);
415
}
416
417
/*
418
* Dispatch an entire queue of pending packet chains.
419
* VIMAGE: Assumes the vnet pointer has been set.
420
*/
421
static void
422
mld_dispatch_queue(struct mbufq *mq, int limit)
423
{
424
struct mbuf *m;
425
426
while ((m = mbufq_dequeue(mq)) != NULL) {
427
CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, mq, m);
428
mld_dispatch_packet(m);
429
if (--limit == 0)
430
break;
431
}
432
}
433
434
/*
435
* Filter outgoing MLD report state by group.
436
*
437
* Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
438
* and node-local addresses. However, kernel and socket consumers
439
* always embed the KAME scope ID in the address provided, so strip it
440
* when performing comparison.
441
* Note: This is not the same as the *multicast* scope.
442
*
443
* Return zero if the given group is one for which MLD reports
444
* should be suppressed, or non-zero if reports should be issued.
445
*/
446
static __inline int
447
mld_is_addr_reported(const struct in6_addr *addr)
448
{
449
450
KASSERT(IN6_IS_ADDR_MULTICAST(addr), ("%s: not multicast", __func__));
451
452
if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL)
453
return (0);
454
455
if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) {
456
struct in6_addr tmp = *addr;
457
in6_clearscope(&tmp);
458
if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes))
459
return (0);
460
}
461
462
return (1);
463
}
464
465
/*
466
* Attach MLD when PF_INET6 is attached to an interface. Assumes that the
467
* current VNET is set by the caller.
468
*/
469
struct mld_ifsoftc *
470
mld_domifattach(struct ifnet *ifp)
471
{
472
struct mld_ifsoftc *mli;
473
474
CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, if_name(ifp));
475
476
mli = malloc(sizeof(struct mld_ifsoftc), M_MLD, M_WAITOK | M_ZERO);
477
mli->mli_ifp = ifp;
478
mli->mli_version = MLD_VERSION_2;
479
mli->mli_flags = 0;
480
mli->mli_rv = MLD_RV_INIT;
481
mli->mli_qi = MLD_QI_INIT;
482
mli->mli_qri = MLD_QRI_INIT;
483
mli->mli_uri = MLD_URI_INIT;
484
mbufq_init(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS);
485
if ((ifp->if_flags & IFF_MULTICAST) == 0)
486
mli->mli_flags |= MLIF_SILENT;
487
if (V_mld_use_allow)
488
mli->mli_flags |= MLIF_USEALLOW;
489
490
MLD_LOCK();
491
LIST_INSERT_HEAD(&V_mli_head, mli, mli_link);
492
MLD_UNLOCK();
493
494
return (mli);
495
}
496
497
/*
498
* Hook for ifdetach.
499
*
500
* NOTE: Some finalization tasks need to run before the protocol domain
501
* is detached, but also before the link layer does its cleanup.
502
* Run before link-layer cleanup; cleanup groups, but do not free MLD state.
503
*
504
* SMPng: Caller must hold IN6_MULTI_LOCK().
505
* Must take IF_ADDR_LOCK() to cover if_multiaddrs iterator.
506
* XXX This routine is also bitten by unlocked ifma_protospec access.
507
*/
508
void
509
mld_ifdetach(struct ifnet *ifp, struct in6_multi_head *inmh)
510
{
511
struct epoch_tracker et;
512
struct mld_ifsoftc *mli;
513
struct ifmultiaddr *ifma;
514
struct in6_multi *inm;
515
516
CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp,
517
if_name(ifp));
518
519
IN6_MULTI_LIST_LOCK_ASSERT();
520
MLD_LOCK();
521
522
mli = MLD_IFINFO(ifp);
523
IF_ADDR_WLOCK(ifp);
524
/*
525
* Extract list of in6_multi associated with the detaching ifp
526
* which the PF_INET6 layer is about to release.
527
*/
528
NET_EPOCH_ENTER(et);
529
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
530
inm = in6m_ifmultiaddr_get_inm(ifma);
531
if (inm == NULL)
532
continue;
533
in6m_disconnect_locked(inmh, inm);
534
535
if (mli->mli_version == MLD_VERSION_2) {
536
in6m_clear_recorded(inm);
537
538
/*
539
* We need to release the final reference held
540
* for issuing the INCLUDE {}.
541
*/
542
if (inm->in6m_state == MLD_LEAVING_MEMBER) {
543
inm->in6m_state = MLD_NOT_MEMBER;
544
in6m_rele_locked(inmh, inm);
545
}
546
}
547
}
548
NET_EPOCH_EXIT(et);
549
IF_ADDR_WUNLOCK(ifp);
550
MLD_UNLOCK();
551
}
552
553
/*
554
* Hook for domifdetach.
555
* Runs after link-layer cleanup; free MLD state.
556
*
557
* SMPng: Normally called with IF_AFDATA_LOCK held.
558
*/
559
void
560
mld_domifdetach(struct ifnet *ifp)
561
{
562
563
CTR3(KTR_MLD, "%s: called for ifp %p(%s)",
564
__func__, ifp, if_name(ifp));
565
566
MLD_LOCK();
567
mli_delete_locked(ifp);
568
MLD_UNLOCK();
569
}
570
571
static void
572
mli_delete_locked(struct ifnet *ifp)
573
{
574
struct mld_ifsoftc *mli, *tmli;
575
576
CTR3(KTR_MLD, "%s: freeing mld_ifsoftc for ifp %p(%s)",
577
__func__, ifp, if_name(ifp));
578
579
MLD_LOCK_ASSERT();
580
581
LIST_FOREACH_SAFE(mli, &V_mli_head, mli_link, tmli) {
582
if (mli->mli_ifp == ifp) {
583
/*
584
* Free deferred General Query responses.
585
*/
586
mbufq_drain(&mli->mli_gq);
587
588
LIST_REMOVE(mli, mli_link);
589
590
free(mli, M_MLD);
591
return;
592
}
593
}
594
}
595
596
/*
597
* Process a received MLDv1 general or address-specific query.
598
* Assumes that the query header has been pulled up to sizeof(mld_hdr).
599
*
600
* NOTE: Can't be fully const correct as we temporarily embed scope ID in
601
* mld_addr. This is OK as we own the mbuf chain.
602
*/
603
static int
604
mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
605
/*const*/ struct mld_hdr *mld)
606
{
607
struct ifmultiaddr *ifma;
608
struct mld_ifsoftc *mli;
609
struct in6_multi *inm;
610
int is_general_query;
611
uint16_t timer;
612
#ifdef KTR
613
char ip6tbuf[INET6_ADDRSTRLEN];
614
#endif
615
616
NET_EPOCH_ASSERT();
617
618
is_general_query = 0;
619
620
if (!V_mld_v1enable) {
621
CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)",
622
ip6_sprintf(ip6tbuf, &mld->mld_addr),
623
ifp, if_name(ifp));
624
return (0);
625
}
626
627
/*
628
* RFC3810 Section 6.2: MLD queries must originate from
629
* a router's link-local address.
630
*/
631
if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
632
CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
633
ip6_sprintf(ip6tbuf, &ip6->ip6_src),
634
ifp, if_name(ifp));
635
return (0);
636
}
637
638
/*
639
* Do address field validation upfront before we accept
640
* the query.
641
*/
642
if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
643
/*
644
* MLDv1 General Query.
645
* If this was not sent to the all-nodes group, ignore it.
646
*/
647
struct in6_addr dst;
648
649
dst = ip6->ip6_dst;
650
in6_clearscope(&dst);
651
if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes))
652
return (EINVAL);
653
is_general_query = 1;
654
} else {
655
/*
656
* Embed scope ID of receiving interface in MLD query for
657
* lookup whilst we don't hold other locks.
658
*/
659
in6_setscope(&mld->mld_addr, ifp, NULL);
660
}
661
662
IN6_MULTI_LIST_LOCK();
663
MLD_LOCK();
664
665
/*
666
* Switch to MLDv1 host compatibility mode.
667
*/
668
mli = MLD_IFINFO(ifp);
669
KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
670
mld_set_version(mli, MLD_VERSION_1);
671
672
timer = (ntohs(mld->mld_maxdelay) * MLD_FASTHZ) / MLD_TIMER_SCALE;
673
if (timer == 0)
674
timer = 1;
675
676
if (is_general_query) {
677
/*
678
* For each reporting group joined on this
679
* interface, kick the report timer.
680
*/
681
CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)",
682
ifp, if_name(ifp));
683
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
684
inm = in6m_ifmultiaddr_get_inm(ifma);
685
if (inm == NULL)
686
continue;
687
mld_v1_update_group(inm, timer);
688
}
689
} else {
690
/*
691
* MLDv1 Group-Specific Query.
692
* If this is a group-specific MLDv1 query, we need only
693
* look up the single group to process it.
694
*/
695
inm = in6m_lookup_locked(ifp, &mld->mld_addr);
696
if (inm != NULL) {
697
CTR3(KTR_MLD, "process v1 query %s on ifp %p(%s)",
698
ip6_sprintf(ip6tbuf, &mld->mld_addr),
699
ifp, if_name(ifp));
700
mld_v1_update_group(inm, timer);
701
}
702
/* XXX Clear embedded scope ID as userland won't expect it. */
703
in6_clearscope(&mld->mld_addr);
704
}
705
706
MLD_UNLOCK();
707
IN6_MULTI_LIST_UNLOCK();
708
709
return (0);
710
}
711
712
/*
713
* Update the report timer on a group in response to an MLDv1 query.
714
*
715
* If we are becoming the reporting member for this group, start the timer.
716
* If we already are the reporting member for this group, and timer is
717
* below the threshold, reset it.
718
*
719
* We may be updating the group for the first time since we switched
720
* to MLDv2. If we are, then we must clear any recorded source lists,
721
* and transition to REPORTING state; the group timer is overloaded
722
* for group and group-source query responses.
723
*
724
* Unlike MLDv2, the delay per group should be jittered
725
* to avoid bursts of MLDv1 reports.
726
*/
727
static void
728
mld_v1_update_group(struct in6_multi *inm, const int timer)
729
{
730
#ifdef KTR
731
char ip6tbuf[INET6_ADDRSTRLEN];
732
#endif
733
734
CTR4(KTR_MLD, "%s: %s/%s timer=%d", __func__,
735
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
736
if_name(inm->in6m_ifp), timer);
737
738
IN6_MULTI_LIST_LOCK_ASSERT();
739
740
switch (inm->in6m_state) {
741
case MLD_NOT_MEMBER:
742
case MLD_SILENT_MEMBER:
743
break;
744
case MLD_REPORTING_MEMBER:
745
if (inm->in6m_timer != 0 &&
746
inm->in6m_timer <= timer) {
747
CTR1(KTR_MLD, "%s: REPORTING and timer running, "
748
"skipping.", __func__);
749
break;
750
}
751
/* FALLTHROUGH */
752
case MLD_SG_QUERY_PENDING_MEMBER:
753
case MLD_G_QUERY_PENDING_MEMBER:
754
case MLD_IDLE_MEMBER:
755
case MLD_LAZY_MEMBER:
756
case MLD_AWAKENING_MEMBER:
757
CTR1(KTR_MLD, "%s: ->REPORTING", __func__);
758
inm->in6m_state = MLD_REPORTING_MEMBER;
759
inm->in6m_timer = MLD_RANDOM_DELAY(timer);
760
V_current_state_timers_running6 = 1;
761
break;
762
case MLD_SLEEPING_MEMBER:
763
CTR1(KTR_MLD, "%s: ->AWAKENING", __func__);
764
inm->in6m_state = MLD_AWAKENING_MEMBER;
765
break;
766
case MLD_LEAVING_MEMBER:
767
break;
768
}
769
}
770
771
/*
772
* Process a received MLDv2 general, group-specific or
773
* group-and-source-specific query.
774
*
775
* Assumes that mld points to a struct mldv2_query which is stored in
776
* contiguous memory.
777
*
778
* Return 0 if successful, otherwise an appropriate error code is returned.
779
*/
780
static int
781
mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
782
struct mbuf *m, struct mldv2_query *mld, const int off, const int icmp6len)
783
{
784
struct mld_ifsoftc *mli;
785
struct in6_multi *inm;
786
uint32_t maxdelay, nsrc, qqi;
787
int is_general_query;
788
uint16_t timer;
789
uint8_t qrv;
790
#ifdef KTR
791
char ip6tbuf[INET6_ADDRSTRLEN];
792
#endif
793
794
NET_EPOCH_ASSERT();
795
796
if (!V_mld_v2enable) {
797
CTR3(KTR_MLD, "ignore v2 query src %s on ifp %p(%s)",
798
ip6_sprintf(ip6tbuf, &ip6->ip6_src),
799
ifp, if_name(ifp));
800
return (0);
801
}
802
803
/*
804
* RFC3810 Section 6.2: MLD queries must originate from
805
* a router's link-local address.
806
*/
807
if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
808
CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
809
ip6_sprintf(ip6tbuf, &ip6->ip6_src),
810
ifp, if_name(ifp));
811
return (0);
812
}
813
814
is_general_query = 0;
815
816
CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp));
817
818
maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */
819
if (maxdelay >= 32768) {
820
maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
821
(MLD_MRC_EXP(maxdelay) + 3);
822
}
823
timer = (maxdelay * MLD_FASTHZ) / MLD_TIMER_SCALE;
824
if (timer == 0)
825
timer = 1;
826
827
qrv = MLD_QRV(mld->mld_misc);
828
if (qrv < 2) {
829
CTR3(KTR_MLD, "%s: clamping qrv %d to %d", __func__,
830
qrv, MLD_RV_INIT);
831
qrv = MLD_RV_INIT;
832
}
833
834
qqi = mld->mld_qqi;
835
if (qqi >= 128) {
836
qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
837
(MLD_QQIC_EXP(mld->mld_qqi) + 3);
838
}
839
840
nsrc = ntohs(mld->mld_numsrc);
841
if (nsrc > MLD_MAX_GS_SOURCES)
842
return (EMSGSIZE);
843
if (icmp6len < sizeof(struct mldv2_query) +
844
(nsrc * sizeof(struct in6_addr)))
845
return (EMSGSIZE);
846
847
/*
848
* Do further input validation upfront to avoid resetting timers
849
* should we need to discard this query.
850
*/
851
if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
852
/*
853
* A general query with a source list has undefined
854
* behaviour; discard it.
855
*/
856
if (nsrc > 0)
857
return (EINVAL);
858
is_general_query = 1;
859
} else {
860
/*
861
* Embed scope ID of receiving interface in MLD query for
862
* lookup whilst we don't hold other locks (due to KAME
863
* locking lameness). We own this mbuf chain just now.
864
*/
865
in6_setscope(&mld->mld_addr, ifp, NULL);
866
}
867
868
IN6_MULTI_LIST_LOCK();
869
MLD_LOCK();
870
871
mli = MLD_IFINFO(ifp);
872
KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
873
874
/*
875
* Discard the v2 query if we're in Compatibility Mode.
876
* The RFC is pretty clear that hosts need to stay in MLDv1 mode
877
* until the Old Version Querier Present timer expires.
878
*/
879
if (mli->mli_version != MLD_VERSION_2)
880
goto out_locked;
881
882
mld_set_version(mli, MLD_VERSION_2);
883
mli->mli_rv = qrv;
884
mli->mli_qi = qqi;
885
mli->mli_qri = maxdelay;
886
887
CTR4(KTR_MLD, "%s: qrv %d qi %d maxdelay %d", __func__, qrv, qqi,
888
maxdelay);
889
890
if (is_general_query) {
891
/*
892
* MLDv2 General Query.
893
*
894
* Schedule a current-state report on this ifp for
895
* all groups, possibly containing source lists.
896
*
897
* If there is a pending General Query response
898
* scheduled earlier than the selected delay, do
899
* not schedule any other reports.
900
* Otherwise, reset the interface timer.
901
*/
902
CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)",
903
ifp, if_name(ifp));
904
if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
905
mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
906
V_interface_timers_running6 = 1;
907
}
908
} else {
909
/*
910
* MLDv2 Group-specific or Group-and-source-specific Query.
911
*
912
* Group-source-specific queries are throttled on
913
* a per-group basis to defeat denial-of-service attempts.
914
* Queries for groups we are not a member of on this
915
* link are simply ignored.
916
*/
917
inm = in6m_lookup_locked(ifp, &mld->mld_addr);
918
if (inm == NULL)
919
goto out_locked;
920
if (nsrc > 0) {
921
if (!ratecheck(&inm->in6m_lastgsrtv,
922
&V_mld_gsrdelay)) {
923
CTR1(KTR_MLD, "%s: GS query throttled.",
924
__func__);
925
goto out_locked;
926
}
927
}
928
CTR2(KTR_MLD, "process v2 group query on ifp %p(%s)",
929
ifp, if_name(ifp));
930
/*
931
* If there is a pending General Query response
932
* scheduled sooner than the selected delay, no
933
* further report need be scheduled.
934
* Otherwise, prepare to respond to the
935
* group-specific or group-and-source query.
936
*/
937
if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer)
938
mld_v2_process_group_query(inm, mli, timer, m, mld, off);
939
940
/* XXX Clear embedded scope ID as userland won't expect it. */
941
in6_clearscope(&mld->mld_addr);
942
}
943
944
out_locked:
945
MLD_UNLOCK();
946
IN6_MULTI_LIST_UNLOCK();
947
948
return (0);
949
}
950
951
/*
952
* Process a received MLDv2 group-specific or group-and-source-specific
953
* query.
954
* Return <0 if any error occurred. Currently this is ignored.
955
*/
956
static int
957
mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli,
958
int timer, struct mbuf *m0, struct mldv2_query *mld, const int off)
959
{
960
int retval;
961
uint16_t nsrc;
962
963
IN6_MULTI_LIST_LOCK_ASSERT();
964
MLD_LOCK_ASSERT();
965
966
retval = 0;
967
968
switch (inm->in6m_state) {
969
case MLD_NOT_MEMBER:
970
case MLD_SILENT_MEMBER:
971
case MLD_SLEEPING_MEMBER:
972
case MLD_LAZY_MEMBER:
973
case MLD_AWAKENING_MEMBER:
974
case MLD_IDLE_MEMBER:
975
case MLD_LEAVING_MEMBER:
976
return (retval);
977
break;
978
case MLD_REPORTING_MEMBER:
979
case MLD_G_QUERY_PENDING_MEMBER:
980
case MLD_SG_QUERY_PENDING_MEMBER:
981
break;
982
}
983
984
nsrc = ntohs(mld->mld_numsrc);
985
986
/* Length should be checked by calling function. */
987
KASSERT((m0->m_flags & M_PKTHDR) == 0 ||
988
m0->m_pkthdr.len >= off + sizeof(struct mldv2_query) +
989
nsrc * sizeof(struct in6_addr),
990
("mldv2 packet is too short: (%d bytes < %zd bytes, m=%p)",
991
m0->m_pkthdr.len, off + sizeof(struct mldv2_query) +
992
nsrc * sizeof(struct in6_addr), m0));
993
994
/*
995
* Deal with group-specific queries upfront.
996
* If any group query is already pending, purge any recorded
997
* source-list state if it exists, and schedule a query response
998
* for this group-specific query.
999
*/
1000
if (nsrc == 0) {
1001
if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
1002
inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
1003
in6m_clear_recorded(inm);
1004
timer = min(inm->in6m_timer, timer);
1005
}
1006
inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
1007
inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1008
V_current_state_timers_running6 = 1;
1009
return (retval);
1010
}
1011
1012
/*
1013
* Deal with the case where a group-and-source-specific query has
1014
* been received but a group-specific query is already pending.
1015
*/
1016
if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
1017
timer = min(inm->in6m_timer, timer);
1018
inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1019
V_current_state_timers_running6 = 1;
1020
return (retval);
1021
}
1022
1023
/*
1024
* Finally, deal with the case where a group-and-source-specific
1025
* query has been received, where a response to a previous g-s-r
1026
* query exists, or none exists.
1027
* In this case, we need to parse the source-list which the Querier
1028
* has provided us with and check if we have any source list filter
1029
* entries at T1 for these sources. If we do not, there is no need
1030
* schedule a report and the query may be dropped.
1031
* If we do, we must record them and schedule a current-state
1032
* report for those sources.
1033
*/
1034
if (inm->in6m_nsrc > 0) {
1035
struct in6_addr srcaddr;
1036
int i, nrecorded;
1037
int soff;
1038
1039
soff = off + sizeof(struct mldv2_query);
1040
nrecorded = 0;
1041
for (i = 0; i < nsrc; i++) {
1042
m_copydata(m0, soff, sizeof(struct in6_addr),
1043
(caddr_t)&srcaddr);
1044
retval = in6m_record_source(inm, &srcaddr);
1045
if (retval < 0)
1046
break;
1047
nrecorded += retval;
1048
soff += sizeof(struct in6_addr);
1049
}
1050
if (nrecorded > 0) {
1051
CTR1(KTR_MLD,
1052
"%s: schedule response to SG query", __func__);
1053
inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
1054
inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1055
V_current_state_timers_running6 = 1;
1056
}
1057
}
1058
1059
return (retval);
1060
}
1061
1062
/*
1063
* Process a received MLDv1 host membership report.
1064
* Assumes mld points to mld_hdr in pulled up mbuf chain.
1065
*
1066
* NOTE: Can't be fully const correct as we temporarily embed scope ID in
1067
* mld_addr. This is OK as we own the mbuf chain.
1068
*/
1069
static int
1070
mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
1071
/*const*/ struct mld_hdr *mld)
1072
{
1073
struct in6_addr src, dst;
1074
struct in6_ifaddr *ia;
1075
struct in6_multi *inm;
1076
#ifdef KTR
1077
char ip6tbuf[INET6_ADDRSTRLEN];
1078
#endif
1079
1080
NET_EPOCH_ASSERT();
1081
1082
if (!V_mld_v1enable) {
1083
CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)",
1084
ip6_sprintf(ip6tbuf, &mld->mld_addr),
1085
ifp, if_name(ifp));
1086
return (0);
1087
}
1088
1089
if (ifp->if_flags & IFF_LOOPBACK)
1090
return (0);
1091
1092
/*
1093
* MLDv1 reports must originate from a host's link-local address,
1094
* or the unspecified address (when booting).
1095
*/
1096
src = ip6->ip6_src;
1097
in6_clearscope(&src);
1098
if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
1099
CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
1100
ip6_sprintf(ip6tbuf, &ip6->ip6_src),
1101
ifp, if_name(ifp));
1102
return (EINVAL);
1103
}
1104
1105
/*
1106
* RFC2710 Section 4: MLDv1 reports must pertain to a multicast
1107
* group, and must be directed to the group itself.
1108
*/
1109
dst = ip6->ip6_dst;
1110
in6_clearscope(&dst);
1111
if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) ||
1112
!IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
1113
CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)",
1114
ip6_sprintf(ip6tbuf, &ip6->ip6_dst),
1115
ifp, if_name(ifp));
1116
return (EINVAL);
1117
}
1118
1119
/*
1120
* Make sure we don't hear our own membership report, as fast
1121
* leave requires knowing that we are the only member of a
1122
* group. Assume we used the link-local address if available,
1123
* otherwise look for ::.
1124
*
1125
* XXX Note that scope ID comparison is needed for the address
1126
* returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
1127
* performed for the on-wire address.
1128
*/
1129
ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
1130
if ((ia && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia))) ||
1131
(ia == NULL && IN6_IS_ADDR_UNSPECIFIED(&src))) {
1132
if (ia != NULL)
1133
ifa_free(&ia->ia_ifa);
1134
return (0);
1135
}
1136
if (ia != NULL)
1137
ifa_free(&ia->ia_ifa);
1138
1139
CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)",
1140
ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp));
1141
1142
/*
1143
* Embed scope ID of receiving interface in MLD query for lookup
1144
* whilst we don't hold other locks (due to KAME locking lameness).
1145
*/
1146
if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr))
1147
in6_setscope(&mld->mld_addr, ifp, NULL);
1148
1149
IN6_MULTI_LIST_LOCK();
1150
MLD_LOCK();
1151
1152
/*
1153
* MLDv1 report suppression.
1154
* If we are a member of this group, and our membership should be
1155
* reported, and our group timer is pending or about to be reset,
1156
* stop our group timer by transitioning to the 'lazy' state.
1157
*/
1158
inm = in6m_lookup_locked(ifp, &mld->mld_addr);
1159
if (inm != NULL) {
1160
struct mld_ifsoftc *mli;
1161
1162
mli = inm->in6m_mli;
1163
KASSERT(mli != NULL,
1164
("%s: no mli for ifp %p", __func__, ifp));
1165
1166
/*
1167
* If we are in MLDv2 host mode, do not allow the
1168
* other host's MLDv1 report to suppress our reports.
1169
*/
1170
if (mli->mli_version == MLD_VERSION_2)
1171
goto out_locked;
1172
1173
inm->in6m_timer = 0;
1174
1175
switch (inm->in6m_state) {
1176
case MLD_NOT_MEMBER:
1177
case MLD_SILENT_MEMBER:
1178
case MLD_SLEEPING_MEMBER:
1179
break;
1180
case MLD_REPORTING_MEMBER:
1181
case MLD_IDLE_MEMBER:
1182
case MLD_AWAKENING_MEMBER:
1183
CTR3(KTR_MLD,
1184
"report suppressed for %s on ifp %p(%s)",
1185
ip6_sprintf(ip6tbuf, &mld->mld_addr),
1186
ifp, if_name(ifp));
1187
case MLD_LAZY_MEMBER:
1188
inm->in6m_state = MLD_LAZY_MEMBER;
1189
break;
1190
case MLD_G_QUERY_PENDING_MEMBER:
1191
case MLD_SG_QUERY_PENDING_MEMBER:
1192
case MLD_LEAVING_MEMBER:
1193
break;
1194
}
1195
}
1196
1197
out_locked:
1198
MLD_UNLOCK();
1199
IN6_MULTI_LIST_UNLOCK();
1200
1201
/* XXX Clear embedded scope ID as userland won't expect it. */
1202
in6_clearscope(&mld->mld_addr);
1203
1204
return (0);
1205
}
1206
1207
/*
1208
* MLD input path.
1209
*
1210
* Assume query messages which fit in a single ICMPv6 message header
1211
* have been pulled up.
1212
* Assume that userland will want to see the message, even if it
1213
* otherwise fails kernel input validation; do not free it.
1214
* Pullup may however free the mbuf chain m if it fails.
1215
*
1216
* Return IPPROTO_DONE if we freed m. Otherwise, return 0.
1217
*/
1218
int
1219
mld_input(struct mbuf **mp, int off, int icmp6len)
1220
{
1221
struct ifnet *ifp;
1222
struct ip6_hdr *ip6;
1223
struct mbuf *m;
1224
struct mld_hdr *mld;
1225
int mldlen;
1226
1227
m = *mp;
1228
CTR3(KTR_MLD, "%s: called w/mbuf (%p,%d)", __func__, m, off);
1229
1230
ifp = m->m_pkthdr.rcvif;
1231
1232
/* Pullup to appropriate size. */
1233
if (m->m_len < off + sizeof(*mld)) {
1234
m = m_pullup(m, off + sizeof(*mld));
1235
if (m == NULL) {
1236
ICMP6STAT_INC(icp6s_badlen);
1237
return (IPPROTO_DONE);
1238
}
1239
}
1240
mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
1241
if (mld->mld_type == MLD_LISTENER_QUERY &&
1242
icmp6len >= sizeof(struct mldv2_query)) {
1243
mldlen = sizeof(struct mldv2_query);
1244
} else {
1245
mldlen = sizeof(struct mld_hdr);
1246
}
1247
if (m->m_len < off + mldlen) {
1248
m = m_pullup(m, off + mldlen);
1249
if (m == NULL) {
1250
ICMP6STAT_INC(icp6s_badlen);
1251
return (IPPROTO_DONE);
1252
}
1253
}
1254
*mp = m;
1255
ip6 = mtod(m, struct ip6_hdr *);
1256
mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
1257
1258
/*
1259
* Userland needs to see all of this traffic for implementing
1260
* the endpoint discovery portion of multicast routing.
1261
*/
1262
switch (mld->mld_type) {
1263
case MLD_LISTENER_QUERY:
1264
icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
1265
if (icmp6len == sizeof(struct mld_hdr)) {
1266
if (mld_v1_input_query(ifp, ip6, mld) != 0)
1267
return (0);
1268
} else if (icmp6len >= sizeof(struct mldv2_query)) {
1269
if (mld_v2_input_query(ifp, ip6, m,
1270
(struct mldv2_query *)mld, off, icmp6len) != 0)
1271
return (0);
1272
}
1273
break;
1274
case MLD_LISTENER_REPORT:
1275
icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1276
if (mld_v1_input_report(ifp, ip6, mld) != 0)
1277
return (0);
1278
break;
1279
case MLDV2_LISTENER_REPORT:
1280
icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1281
break;
1282
case MLD_LISTENER_DONE:
1283
icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
1284
break;
1285
default:
1286
break;
1287
}
1288
1289
return (0);
1290
}
1291
1292
/*
1293
* Fast timeout handler (global).
1294
* VIMAGE: Timeout handlers are expected to service all vimages.
1295
*/
1296
static struct callout mldfast_callout;
1297
static void
1298
mld_fasttimo(void *arg __unused)
1299
{
1300
struct epoch_tracker et;
1301
struct in6_multi_head inmh;
1302
VNET_ITERATOR_DECL(vnet_iter);
1303
1304
SLIST_INIT(&inmh);
1305
1306
NET_EPOCH_ENTER(et);
1307
VNET_LIST_RLOCK_NOSLEEP();
1308
VNET_FOREACH(vnet_iter) {
1309
CURVNET_SET(vnet_iter);
1310
mld_fasttimo_vnet(&inmh);
1311
CURVNET_RESTORE();
1312
}
1313
VNET_LIST_RUNLOCK_NOSLEEP();
1314
NET_EPOCH_EXIT(et);
1315
in6m_release_list_deferred(&inmh);
1316
1317
callout_reset(&mldfast_callout, hz / MLD_FASTHZ, mld_fasttimo, NULL);
1318
}
1319
1320
/*
1321
* Fast timeout handler (per-vnet).
1322
*
1323
* VIMAGE: Assume caller has set up our curvnet.
1324
*/
1325
static void
1326
mld_fasttimo_vnet(struct in6_multi_head *inmh)
1327
{
1328
struct mbufq scq; /* State-change packets */
1329
struct mbufq qrq; /* Query response packets */
1330
struct ifnet *ifp;
1331
struct mld_ifsoftc *mli;
1332
struct ifmultiaddr *ifma;
1333
struct in6_multi *inm;
1334
int uri_fasthz;
1335
1336
uri_fasthz = 0;
1337
1338
/*
1339
* Quick check to see if any work needs to be done, in order to
1340
* minimize the overhead of fasttimo processing.
1341
* SMPng: XXX Unlocked reads.
1342
*/
1343
if (!V_current_state_timers_running6 &&
1344
!V_interface_timers_running6 &&
1345
!V_state_change_timers_running6)
1346
return;
1347
1348
IN6_MULTI_LIST_LOCK();
1349
MLD_LOCK();
1350
1351
/*
1352
* MLDv2 General Query response timer processing.
1353
*/
1354
if (V_interface_timers_running6) {
1355
CTR1(KTR_MLD, "%s: interface timers running", __func__);
1356
1357
V_interface_timers_running6 = 0;
1358
LIST_FOREACH(mli, &V_mli_head, mli_link) {
1359
if (mli->mli_v2_timer == 0) {
1360
/* Do nothing. */
1361
} else if (--mli->mli_v2_timer == 0) {
1362
mld_v2_dispatch_general_query(mli);
1363
} else {
1364
V_interface_timers_running6 = 1;
1365
}
1366
}
1367
}
1368
1369
if (!V_current_state_timers_running6 &&
1370
!V_state_change_timers_running6)
1371
goto out_locked;
1372
1373
V_current_state_timers_running6 = 0;
1374
V_state_change_timers_running6 = 0;
1375
1376
CTR1(KTR_MLD, "%s: state change timers running", __func__);
1377
1378
/*
1379
* MLD host report and state-change timer processing.
1380
* Note: Processing a v2 group timer may remove a node.
1381
*/
1382
LIST_FOREACH(mli, &V_mli_head, mli_link) {
1383
ifp = mli->mli_ifp;
1384
1385
if (mli->mli_version == MLD_VERSION_2) {
1386
uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri *
1387
MLD_FASTHZ);
1388
mbufq_init(&qrq, MLD_MAX_G_GS_PACKETS);
1389
mbufq_init(&scq, MLD_MAX_STATE_CHANGE_PACKETS);
1390
}
1391
1392
IF_ADDR_WLOCK(ifp);
1393
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1394
inm = in6m_ifmultiaddr_get_inm(ifma);
1395
if (inm == NULL)
1396
continue;
1397
switch (mli->mli_version) {
1398
case MLD_VERSION_1:
1399
mld_v1_process_group_timer(inmh, inm);
1400
break;
1401
case MLD_VERSION_2:
1402
mld_v2_process_group_timers(inmh, &qrq,
1403
&scq, inm, uri_fasthz);
1404
break;
1405
}
1406
}
1407
IF_ADDR_WUNLOCK(ifp);
1408
1409
switch (mli->mli_version) {
1410
case MLD_VERSION_1:
1411
/*
1412
* Transmit reports for this lifecycle. This
1413
* is done while not holding IF_ADDR_LOCK
1414
* since this can call
1415
* in6ifa_ifpforlinklocal() which locks
1416
* IF_ADDR_LOCK internally as well as
1417
* ip6_output() to transmit a packet.
1418
*/
1419
while ((inm = SLIST_FIRST(inmh)) != NULL) {
1420
SLIST_REMOVE_HEAD(inmh, in6m_defer);
1421
(void)mld_v1_transmit_report(inm,
1422
MLD_LISTENER_REPORT);
1423
}
1424
break;
1425
case MLD_VERSION_2:
1426
mld_dispatch_queue(&qrq, 0);
1427
mld_dispatch_queue(&scq, 0);
1428
break;
1429
}
1430
}
1431
1432
out_locked:
1433
MLD_UNLOCK();
1434
IN6_MULTI_LIST_UNLOCK();
1435
}
1436
1437
/*
1438
* Update host report group timer.
1439
* Will update the global pending timer flags.
1440
*/
1441
static void
1442
mld_v1_process_group_timer(struct in6_multi_head *inmh, struct in6_multi *inm)
1443
{
1444
int report_timer_expired;
1445
1446
IN6_MULTI_LIST_LOCK_ASSERT();
1447
MLD_LOCK_ASSERT();
1448
1449
if (inm->in6m_timer == 0) {
1450
report_timer_expired = 0;
1451
} else if (--inm->in6m_timer == 0) {
1452
report_timer_expired = 1;
1453
} else {
1454
V_current_state_timers_running6 = 1;
1455
return;
1456
}
1457
1458
switch (inm->in6m_state) {
1459
case MLD_NOT_MEMBER:
1460
case MLD_SILENT_MEMBER:
1461
case MLD_IDLE_MEMBER:
1462
case MLD_LAZY_MEMBER:
1463
case MLD_SLEEPING_MEMBER:
1464
case MLD_AWAKENING_MEMBER:
1465
break;
1466
case MLD_REPORTING_MEMBER:
1467
if (report_timer_expired) {
1468
inm->in6m_state = MLD_IDLE_MEMBER;
1469
SLIST_INSERT_HEAD(inmh, inm, in6m_defer);
1470
}
1471
break;
1472
case MLD_G_QUERY_PENDING_MEMBER:
1473
case MLD_SG_QUERY_PENDING_MEMBER:
1474
case MLD_LEAVING_MEMBER:
1475
break;
1476
}
1477
}
1478
1479
/*
1480
* Update a group's timers for MLDv2.
1481
* Will update the global pending timer flags.
1482
* Note: Unlocked read from mli.
1483
*/
1484
static void
1485
mld_v2_process_group_timers(struct in6_multi_head *inmh,
1486
struct mbufq *qrq, struct mbufq *scq,
1487
struct in6_multi *inm, const int uri_fasthz)
1488
{
1489
int query_response_timer_expired;
1490
int state_change_retransmit_timer_expired;
1491
#ifdef KTR
1492
char ip6tbuf[INET6_ADDRSTRLEN];
1493
#endif
1494
1495
IN6_MULTI_LIST_LOCK_ASSERT();
1496
MLD_LOCK_ASSERT();
1497
1498
query_response_timer_expired = 0;
1499
state_change_retransmit_timer_expired = 0;
1500
1501
/*
1502
* During a transition from compatibility mode back to MLDv2,
1503
* a group record in REPORTING state may still have its group
1504
* timer active. This is a no-op in this function; it is easier
1505
* to deal with it here than to complicate the slow-timeout path.
1506
*/
1507
if (inm->in6m_timer == 0) {
1508
query_response_timer_expired = 0;
1509
} else if (--inm->in6m_timer == 0) {
1510
query_response_timer_expired = 1;
1511
} else {
1512
V_current_state_timers_running6 = 1;
1513
}
1514
1515
if (inm->in6m_sctimer == 0) {
1516
state_change_retransmit_timer_expired = 0;
1517
} else if (--inm->in6m_sctimer == 0) {
1518
state_change_retransmit_timer_expired = 1;
1519
} else {
1520
V_state_change_timers_running6 = 1;
1521
}
1522
1523
/* We are in fasttimo, so be quick about it. */
1524
if (!state_change_retransmit_timer_expired &&
1525
!query_response_timer_expired)
1526
return;
1527
1528
switch (inm->in6m_state) {
1529
case MLD_NOT_MEMBER:
1530
case MLD_SILENT_MEMBER:
1531
case MLD_SLEEPING_MEMBER:
1532
case MLD_LAZY_MEMBER:
1533
case MLD_AWAKENING_MEMBER:
1534
case MLD_IDLE_MEMBER:
1535
break;
1536
case MLD_G_QUERY_PENDING_MEMBER:
1537
case MLD_SG_QUERY_PENDING_MEMBER:
1538
/*
1539
* Respond to a previously pending Group-Specific
1540
* or Group-and-Source-Specific query by enqueueing
1541
* the appropriate Current-State report for
1542
* immediate transmission.
1543
*/
1544
if (query_response_timer_expired) {
1545
int retval __unused;
1546
1547
retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
1548
(inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER),
1549
0);
1550
CTR2(KTR_MLD, "%s: enqueue record = %d",
1551
__func__, retval);
1552
inm->in6m_state = MLD_REPORTING_MEMBER;
1553
in6m_clear_recorded(inm);
1554
}
1555
/* FALLTHROUGH */
1556
case MLD_REPORTING_MEMBER:
1557
case MLD_LEAVING_MEMBER:
1558
if (state_change_retransmit_timer_expired) {
1559
/*
1560
* State-change retransmission timer fired.
1561
* If there are any further pending retransmissions,
1562
* set the global pending state-change flag, and
1563
* reset the timer.
1564
*/
1565
if (--inm->in6m_scrv > 0) {
1566
inm->in6m_sctimer = uri_fasthz;
1567
V_state_change_timers_running6 = 1;
1568
}
1569
/*
1570
* Retransmit the previously computed state-change
1571
* report. If there are no further pending
1572
* retransmissions, the mbuf queue will be consumed.
1573
* Update T0 state to T1 as we have now sent
1574
* a state-change.
1575
*/
1576
(void)mld_v2_merge_state_changes(inm, scq);
1577
1578
in6m_commit(inm);
1579
CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
1580
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
1581
if_name(inm->in6m_ifp));
1582
1583
/*
1584
* If we are leaving the group for good, make sure
1585
* we release MLD's reference to it.
1586
* This release must be deferred using a SLIST,
1587
* as we are called from a loop which traverses
1588
* the in_ifmultiaddr TAILQ.
1589
*/
1590
if (inm->in6m_state == MLD_LEAVING_MEMBER &&
1591
inm->in6m_scrv == 0) {
1592
inm->in6m_state = MLD_NOT_MEMBER;
1593
in6m_disconnect_locked(inmh, inm);
1594
in6m_rele_locked(inmh, inm);
1595
}
1596
}
1597
break;
1598
}
1599
}
1600
1601
/*
1602
* Switch to a different version on the given interface,
1603
* as per Section 9.12.
1604
*/
1605
static void
1606
mld_set_version(struct mld_ifsoftc *mli, const int version)
1607
{
1608
int old_version_timer;
1609
1610
MLD_LOCK_ASSERT();
1611
1612
CTR4(KTR_MLD, "%s: switching to v%d on ifp %p(%s)", __func__,
1613
version, mli->mli_ifp, if_name(mli->mli_ifp));
1614
1615
if (version == MLD_VERSION_1) {
1616
/*
1617
* Compute the "Older Version Querier Present" timer as per
1618
* Section 9.12.
1619
*/
1620
old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
1621
old_version_timer *= MLD_SLOWHZ;
1622
mli->mli_v1_timer = old_version_timer;
1623
}
1624
1625
if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
1626
mli->mli_version = MLD_VERSION_1;
1627
mld_v2_cancel_link_timers(mli);
1628
}
1629
}
1630
1631
/*
1632
* Cancel pending MLDv2 timers for the given link and all groups
1633
* joined on it; state-change, general-query, and group-query timers.
1634
*/
1635
static void
1636
mld_v2_cancel_link_timers(struct mld_ifsoftc *mli)
1637
{
1638
struct epoch_tracker et;
1639
struct in6_multi_head inmh;
1640
struct ifmultiaddr *ifma;
1641
struct ifnet *ifp;
1642
struct in6_multi *inm;
1643
1644
CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__,
1645
mli->mli_ifp, if_name(mli->mli_ifp));
1646
1647
SLIST_INIT(&inmh);
1648
IN6_MULTI_LIST_LOCK_ASSERT();
1649
MLD_LOCK_ASSERT();
1650
1651
/*
1652
* Fast-track this potentially expensive operation
1653
* by checking all the global 'timer pending' flags.
1654
*/
1655
if (!V_interface_timers_running6 &&
1656
!V_state_change_timers_running6 &&
1657
!V_current_state_timers_running6)
1658
return;
1659
1660
mli->mli_v2_timer = 0;
1661
1662
ifp = mli->mli_ifp;
1663
1664
IF_ADDR_WLOCK(ifp);
1665
NET_EPOCH_ENTER(et);
1666
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1667
inm = in6m_ifmultiaddr_get_inm(ifma);
1668
if (inm == NULL)
1669
continue;
1670
switch (inm->in6m_state) {
1671
case MLD_NOT_MEMBER:
1672
case MLD_SILENT_MEMBER:
1673
case MLD_IDLE_MEMBER:
1674
case MLD_LAZY_MEMBER:
1675
case MLD_SLEEPING_MEMBER:
1676
case MLD_AWAKENING_MEMBER:
1677
break;
1678
case MLD_LEAVING_MEMBER:
1679
/*
1680
* If we are leaving the group and switching
1681
* version, we need to release the final
1682
* reference held for issuing the INCLUDE {}.
1683
*/
1684
if (inm->in6m_refcount == 1)
1685
in6m_disconnect_locked(&inmh, inm);
1686
in6m_rele_locked(&inmh, inm);
1687
/* FALLTHROUGH */
1688
case MLD_G_QUERY_PENDING_MEMBER:
1689
case MLD_SG_QUERY_PENDING_MEMBER:
1690
in6m_clear_recorded(inm);
1691
/* FALLTHROUGH */
1692
case MLD_REPORTING_MEMBER:
1693
inm->in6m_sctimer = 0;
1694
inm->in6m_timer = 0;
1695
inm->in6m_state = MLD_REPORTING_MEMBER;
1696
/*
1697
* Free any pending MLDv2 state-change records.
1698
*/
1699
mbufq_drain(&inm->in6m_scq);
1700
break;
1701
}
1702
}
1703
NET_EPOCH_EXIT(et);
1704
IF_ADDR_WUNLOCK(ifp);
1705
in6m_release_list_deferred(&inmh);
1706
}
1707
1708
/*
1709
* Global slowtimo handler.
1710
* VIMAGE: Timeout handlers are expected to service all vimages.
1711
*/
1712
static struct callout mldslow_callout;
1713
static void
1714
mld_slowtimo(void *arg __unused)
1715
{
1716
VNET_ITERATOR_DECL(vnet_iter);
1717
1718
VNET_LIST_RLOCK_NOSLEEP();
1719
VNET_FOREACH(vnet_iter) {
1720
CURVNET_SET(vnet_iter);
1721
mld_slowtimo_vnet();
1722
CURVNET_RESTORE();
1723
}
1724
VNET_LIST_RUNLOCK_NOSLEEP();
1725
1726
callout_reset(&mldslow_callout, hz / MLD_SLOWHZ, mld_slowtimo, NULL);
1727
}
1728
1729
/*
1730
* Per-vnet slowtimo handler.
1731
*/
1732
static void
1733
mld_slowtimo_vnet(void)
1734
{
1735
struct mld_ifsoftc *mli;
1736
1737
MLD_LOCK();
1738
1739
LIST_FOREACH(mli, &V_mli_head, mli_link) {
1740
mld_v1_process_querier_timers(mli);
1741
}
1742
1743
MLD_UNLOCK();
1744
}
1745
1746
/*
1747
* Update the Older Version Querier Present timers for a link.
1748
* See Section 9.12 of RFC 3810.
1749
*/
1750
static void
1751
mld_v1_process_querier_timers(struct mld_ifsoftc *mli)
1752
{
1753
1754
MLD_LOCK_ASSERT();
1755
1756
if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) {
1757
/*
1758
* MLDv1 Querier Present timer expired; revert to MLDv2.
1759
*/
1760
CTR5(KTR_MLD,
1761
"%s: transition from v%d -> v%d on %p(%s)",
1762
__func__, mli->mli_version, MLD_VERSION_2,
1763
mli->mli_ifp, if_name(mli->mli_ifp));
1764
mli->mli_version = MLD_VERSION_2;
1765
}
1766
}
1767
1768
/*
1769
* Transmit an MLDv1 report immediately.
1770
*/
1771
static int
1772
mld_v1_transmit_report(struct in6_multi *in6m, const int type)
1773
{
1774
struct ifnet *ifp;
1775
struct in6_ifaddr *ia;
1776
struct ip6_hdr *ip6;
1777
struct mbuf *mh, *md;
1778
struct mld_hdr *mld;
1779
1780
NET_EPOCH_ASSERT();
1781
IN6_MULTI_LIST_LOCK_ASSERT();
1782
MLD_LOCK_ASSERT();
1783
1784
ifp = in6m->in6m_ifp;
1785
/* in process of being freed */
1786
if (ifp == NULL)
1787
return (0);
1788
ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
1789
/* ia may be NULL if link-local address is tentative. */
1790
1791
mh = m_gethdr(M_NOWAIT, MT_DATA);
1792
if (mh == NULL) {
1793
if (ia != NULL)
1794
ifa_free(&ia->ia_ifa);
1795
return (ENOMEM);
1796
}
1797
md = m_get(M_NOWAIT, MT_DATA);
1798
if (md == NULL) {
1799
m_free(mh);
1800
if (ia != NULL)
1801
ifa_free(&ia->ia_ifa);
1802
return (ENOMEM);
1803
}
1804
mh->m_next = md;
1805
1806
/*
1807
* FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
1808
* that ether_output() does not need to allocate another mbuf
1809
* for the header in the most common case.
1810
*/
1811
M_ALIGN(mh, sizeof(struct ip6_hdr));
1812
mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
1813
mh->m_len = sizeof(struct ip6_hdr);
1814
1815
ip6 = mtod(mh, struct ip6_hdr *);
1816
ip6->ip6_flow = 0;
1817
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1818
ip6->ip6_vfc |= IPV6_VERSION;
1819
ip6->ip6_nxt = IPPROTO_ICMPV6;
1820
ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
1821
ip6->ip6_dst = in6m->in6m_addr;
1822
1823
md->m_len = sizeof(struct mld_hdr);
1824
mld = mtod(md, struct mld_hdr *);
1825
mld->mld_type = type;
1826
mld->mld_code = 0;
1827
mld->mld_cksum = 0;
1828
mld->mld_maxdelay = 0;
1829
mld->mld_reserved = 0;
1830
mld->mld_addr = in6m->in6m_addr;
1831
in6_clearscope(&mld->mld_addr);
1832
mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
1833
sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
1834
1835
mld_save_context(mh, ifp);
1836
mh->m_flags |= M_MLDV1;
1837
1838
mld_dispatch_packet(mh);
1839
1840
if (ia != NULL)
1841
ifa_free(&ia->ia_ifa);
1842
return (0);
1843
}
1844
1845
/*
1846
* Process a state change from the upper layer for the given IPv6 group.
1847
*
1848
* Each socket holds a reference on the in_multi in its own ip_moptions.
1849
* The socket layer will have made the necessary updates to.the group
1850
* state, it is now up to MLD to issue a state change report if there
1851
* has been any change between T0 (when the last state-change was issued)
1852
* and T1 (now).
1853
*
1854
* We use the MLDv2 state machine at group level. The MLd module
1855
* however makes the decision as to which MLD protocol version to speak.
1856
* A state change *from* INCLUDE {} always means an initial join.
1857
* A state change *to* INCLUDE {} always means a final leave.
1858
*
1859
* If delay is non-zero, and the state change is an initial multicast
1860
* join, the state change report will be delayed by 'delay' ticks
1861
* in units of MLD_FASTHZ if MLDv1 is active on the link; otherwise
1862
* the initial MLDv2 state change report will be delayed by whichever
1863
* is sooner, a pending state-change timer or delay itself.
1864
*
1865
* VIMAGE: curvnet should have been set by caller, as this routine
1866
* is called from the socket option handlers.
1867
*/
1868
int
1869
mld_change_state(struct in6_multi *inm, const int delay)
1870
{
1871
struct mld_ifsoftc *mli;
1872
struct ifnet *ifp;
1873
int error;
1874
1875
IN6_MULTI_LIST_LOCK_ASSERT();
1876
1877
error = 0;
1878
1879
/*
1880
* Check if the in6_multi has already been disconnected.
1881
*/
1882
if (inm->in6m_ifp == NULL) {
1883
CTR1(KTR_MLD, "%s: inm is disconnected", __func__);
1884
return (0);
1885
}
1886
1887
/*
1888
* Try to detect if the upper layer just asked us to change state
1889
* for an interface which has now gone away.
1890
*/
1891
KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__));
1892
ifp = inm->in6m_ifma->ifma_ifp;
1893
if (ifp == NULL)
1894
return (0);
1895
/*
1896
* Sanity check that netinet6's notion of ifp is the
1897
* same as net's.
1898
*/
1899
KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__));
1900
1901
MLD_LOCK();
1902
mli = MLD_IFINFO(ifp);
1903
KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
1904
1905
/*
1906
* If we detect a state transition to or from MCAST_UNDEFINED
1907
* for this group, then we are starting or finishing an MLD
1908
* life cycle for this group.
1909
*/
1910
if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
1911
CTR3(KTR_MLD, "%s: inm transition %d -> %d", __func__,
1912
inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode);
1913
if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
1914
CTR1(KTR_MLD, "%s: initial join", __func__);
1915
error = mld_initial_join(inm, mli, delay);
1916
goto out_locked;
1917
} else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
1918
CTR1(KTR_MLD, "%s: final leave", __func__);
1919
mld_final_leave(inm, mli);
1920
goto out_locked;
1921
}
1922
} else {
1923
CTR1(KTR_MLD, "%s: filter set change", __func__);
1924
}
1925
1926
error = mld_handle_state_change(inm, mli);
1927
1928
out_locked:
1929
MLD_UNLOCK();
1930
return (error);
1931
}
1932
1933
/*
1934
* Perform the initial join for an MLD group.
1935
*
1936
* When joining a group:
1937
* If the group should have its MLD traffic suppressed, do nothing.
1938
* MLDv1 starts sending MLDv1 host membership reports.
1939
* MLDv2 will schedule an MLDv2 state-change report containing the
1940
* initial state of the membership.
1941
*
1942
* If the delay argument is non-zero, then we must delay sending the
1943
* initial state change for delay ticks (in units of MLD_FASTHZ).
1944
*/
1945
static int
1946
mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli,
1947
const int delay)
1948
{
1949
struct epoch_tracker et;
1950
struct ifnet *ifp;
1951
struct mbufq *mq;
1952
int error, retval, syncstates;
1953
int odelay;
1954
#ifdef KTR
1955
char ip6tbuf[INET6_ADDRSTRLEN];
1956
#endif
1957
1958
CTR4(KTR_MLD, "%s: initial join %s on ifp %p(%s)",
1959
__func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
1960
inm->in6m_ifp, if_name(inm->in6m_ifp));
1961
1962
error = 0;
1963
syncstates = 1;
1964
1965
ifp = inm->in6m_ifp;
1966
1967
IN6_MULTI_LIST_LOCK_ASSERT();
1968
MLD_LOCK_ASSERT();
1969
1970
KASSERT(mli && mli->mli_ifp == ifp, ("%s: inconsistent ifp", __func__));
1971
1972
/*
1973
* Groups joined on loopback or marked as 'not reported',
1974
* enter the MLD_SILENT_MEMBER state and
1975
* are never reported in any protocol exchanges.
1976
* All other groups enter the appropriate state machine
1977
* for the version in use on this link.
1978
* A link marked as MLIF_SILENT causes MLD to be completely
1979
* disabled for the link.
1980
*/
1981
if ((ifp->if_flags & IFF_LOOPBACK) ||
1982
(mli->mli_flags & MLIF_SILENT) ||
1983
!mld_is_addr_reported(&inm->in6m_addr)) {
1984
CTR1(KTR_MLD,
1985
"%s: not kicking state machine for silent group", __func__);
1986
inm->in6m_state = MLD_SILENT_MEMBER;
1987
inm->in6m_timer = 0;
1988
} else {
1989
/*
1990
* Deal with overlapping in_multi lifecycle.
1991
* If this group was LEAVING, then make sure
1992
* we drop the reference we picked up to keep the
1993
* group around for the final INCLUDE {} enqueue.
1994
*/
1995
if (mli->mli_version == MLD_VERSION_2 &&
1996
inm->in6m_state == MLD_LEAVING_MEMBER) {
1997
inm->in6m_refcount--;
1998
MPASS(inm->in6m_refcount > 0);
1999
}
2000
inm->in6m_state = MLD_REPORTING_MEMBER;
2001
2002
switch (mli->mli_version) {
2003
case MLD_VERSION_1:
2004
/*
2005
* If a delay was provided, only use it if
2006
* it is greater than the delay normally
2007
* used for an MLDv1 state change report,
2008
* and delay sending the initial MLDv1 report
2009
* by not transitioning to the IDLE state.
2010
*/
2011
odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * MLD_FASTHZ);
2012
if (delay) {
2013
inm->in6m_timer = max(delay, odelay);
2014
V_current_state_timers_running6 = 1;
2015
} else {
2016
inm->in6m_state = MLD_IDLE_MEMBER;
2017
NET_EPOCH_ENTER(et);
2018
error = mld_v1_transmit_report(inm,
2019
MLD_LISTENER_REPORT);
2020
NET_EPOCH_EXIT(et);
2021
if (error == 0) {
2022
inm->in6m_timer = odelay;
2023
V_current_state_timers_running6 = 1;
2024
}
2025
}
2026
break;
2027
2028
case MLD_VERSION_2:
2029
/*
2030
* Defer update of T0 to T1, until the first copy
2031
* of the state change has been transmitted.
2032
*/
2033
syncstates = 0;
2034
2035
/*
2036
* Immediately enqueue a State-Change Report for
2037
* this interface, freeing any previous reports.
2038
* Don't kick the timers if there is nothing to do,
2039
* or if an error occurred.
2040
*/
2041
mq = &inm->in6m_scq;
2042
mbufq_drain(mq);
2043
retval = mld_v2_enqueue_group_record(mq, inm, 1,
2044
0, 0, (mli->mli_flags & MLIF_USEALLOW));
2045
CTR2(KTR_MLD, "%s: enqueue record = %d",
2046
__func__, retval);
2047
if (retval <= 0) {
2048
error = retval * -1;
2049
break;
2050
}
2051
2052
/*
2053
* Schedule transmission of pending state-change
2054
* report up to RV times for this link. The timer
2055
* will fire at the next mld_fasttimo (~200ms),
2056
* giving us an opportunity to merge the reports.
2057
*
2058
* If a delay was provided to this function, only
2059
* use this delay if sooner than the existing one.
2060
*/
2061
KASSERT(mli->mli_rv > 1,
2062
("%s: invalid robustness %d", __func__,
2063
mli->mli_rv));
2064
inm->in6m_scrv = mli->mli_rv;
2065
if (delay) {
2066
if (inm->in6m_sctimer > 1) {
2067
inm->in6m_sctimer =
2068
min(inm->in6m_sctimer, delay);
2069
} else
2070
inm->in6m_sctimer = delay;
2071
} else
2072
inm->in6m_sctimer = 1;
2073
V_state_change_timers_running6 = 1;
2074
2075
error = 0;
2076
break;
2077
}
2078
}
2079
2080
/*
2081
* Only update the T0 state if state change is atomic,
2082
* i.e. we don't need to wait for a timer to fire before we
2083
* can consider the state change to have been communicated.
2084
*/
2085
if (syncstates) {
2086
in6m_commit(inm);
2087
CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
2088
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2089
if_name(inm->in6m_ifp));
2090
}
2091
2092
return (error);
2093
}
2094
2095
/*
2096
* Issue an intermediate state change during the life-cycle.
2097
*/
2098
static int
2099
mld_handle_state_change(struct in6_multi *inm, struct mld_ifsoftc *mli)
2100
{
2101
struct ifnet *ifp;
2102
int retval;
2103
#ifdef KTR
2104
char ip6tbuf[INET6_ADDRSTRLEN];
2105
#endif
2106
2107
CTR4(KTR_MLD, "%s: state change for %s on ifp %p(%s)",
2108
__func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2109
inm->in6m_ifp, if_name(inm->in6m_ifp));
2110
2111
ifp = inm->in6m_ifp;
2112
2113
IN6_MULTI_LIST_LOCK_ASSERT();
2114
MLD_LOCK_ASSERT();
2115
2116
KASSERT(mli && mli->mli_ifp == ifp,
2117
("%s: inconsistent ifp", __func__));
2118
2119
if ((ifp->if_flags & IFF_LOOPBACK) ||
2120
(mli->mli_flags & MLIF_SILENT) ||
2121
!mld_is_addr_reported(&inm->in6m_addr) ||
2122
(mli->mli_version != MLD_VERSION_2)) {
2123
if (!mld_is_addr_reported(&inm->in6m_addr)) {
2124
CTR1(KTR_MLD,
2125
"%s: not kicking state machine for silent group", __func__);
2126
}
2127
CTR1(KTR_MLD, "%s: nothing to do", __func__);
2128
in6m_commit(inm);
2129
CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
2130
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2131
if_name(inm->in6m_ifp));
2132
return (0);
2133
}
2134
2135
mbufq_drain(&inm->in6m_scq);
2136
2137
retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
2138
(mli->mli_flags & MLIF_USEALLOW));
2139
CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval);
2140
if (retval <= 0)
2141
return (-retval);
2142
2143
/*
2144
* If record(s) were enqueued, start the state-change
2145
* report timer for this group.
2146
*/
2147
inm->in6m_scrv = mli->mli_rv;
2148
inm->in6m_sctimer = 1;
2149
V_state_change_timers_running6 = 1;
2150
2151
return (0);
2152
}
2153
2154
/*
2155
* Perform the final leave for a multicast address.
2156
*
2157
* When leaving a group:
2158
* MLDv1 sends a DONE message, if and only if we are the reporter.
2159
* MLDv2 enqueues a state-change report containing a transition
2160
* to INCLUDE {} for immediate transmission.
2161
*/
2162
static void
2163
mld_final_leave(struct in6_multi *inm, struct mld_ifsoftc *mli)
2164
{
2165
struct epoch_tracker et;
2166
#ifdef KTR
2167
char ip6tbuf[INET6_ADDRSTRLEN];
2168
#endif
2169
2170
CTR4(KTR_MLD, "%s: final leave %s on ifp %p(%s)",
2171
__func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2172
inm->in6m_ifp, if_name(inm->in6m_ifp));
2173
2174
IN6_MULTI_LIST_LOCK_ASSERT();
2175
MLD_LOCK_ASSERT();
2176
2177
switch (inm->in6m_state) {
2178
case MLD_NOT_MEMBER:
2179
case MLD_SILENT_MEMBER:
2180
case MLD_LEAVING_MEMBER:
2181
/* Already leaving or left; do nothing. */
2182
CTR1(KTR_MLD,
2183
"%s: not kicking state machine for silent group", __func__);
2184
break;
2185
case MLD_REPORTING_MEMBER:
2186
case MLD_IDLE_MEMBER:
2187
case MLD_G_QUERY_PENDING_MEMBER:
2188
case MLD_SG_QUERY_PENDING_MEMBER:
2189
if (mli->mli_version == MLD_VERSION_1) {
2190
#ifdef INVARIANTS
2191
if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
2192
inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER)
2193
panic("%s: MLDv2 state reached, not MLDv2 mode",
2194
__func__);
2195
#endif
2196
NET_EPOCH_ENTER(et);
2197
mld_v1_transmit_report(inm, MLD_LISTENER_DONE);
2198
NET_EPOCH_EXIT(et);
2199
inm->in6m_state = MLD_NOT_MEMBER;
2200
V_current_state_timers_running6 = 1;
2201
} else if (mli->mli_version == MLD_VERSION_2) {
2202
/*
2203
* Stop group timer and all pending reports.
2204
* Immediately enqueue a state-change report
2205
* TO_IN {} to be sent on the next fast timeout,
2206
* giving us an opportunity to merge reports.
2207
*/
2208
mbufq_drain(&inm->in6m_scq);
2209
inm->in6m_timer = 0;
2210
inm->in6m_scrv = mli->mli_rv;
2211
CTR4(KTR_MLD, "%s: Leaving %s/%s with %d "
2212
"pending retransmissions.", __func__,
2213
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2214
if_name(inm->in6m_ifp), inm->in6m_scrv);
2215
if (inm->in6m_scrv == 0) {
2216
inm->in6m_state = MLD_NOT_MEMBER;
2217
inm->in6m_sctimer = 0;
2218
} else {
2219
int retval __diagused;
2220
2221
in6m_acquire_locked(inm);
2222
2223
retval = mld_v2_enqueue_group_record(
2224
&inm->in6m_scq, inm, 1, 0, 0,
2225
(mli->mli_flags & MLIF_USEALLOW));
2226
KASSERT(retval != 0,
2227
("%s: enqueue record = %d", __func__,
2228
retval));
2229
2230
inm->in6m_state = MLD_LEAVING_MEMBER;
2231
inm->in6m_sctimer = 1;
2232
V_state_change_timers_running6 = 1;
2233
}
2234
break;
2235
}
2236
break;
2237
case MLD_LAZY_MEMBER:
2238
case MLD_SLEEPING_MEMBER:
2239
case MLD_AWAKENING_MEMBER:
2240
/* Our reports are suppressed; do nothing. */
2241
break;
2242
}
2243
2244
in6m_commit(inm);
2245
CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
2246
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2247
if_name(inm->in6m_ifp));
2248
inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
2249
CTR3(KTR_MLD, "%s: T1 now MCAST_UNDEFINED for %p/%s",
2250
__func__, &inm->in6m_addr, if_name(inm->in6m_ifp));
2251
}
2252
2253
/*
2254
* Enqueue an MLDv2 group record to the given output queue.
2255
*
2256
* If is_state_change is zero, a current-state record is appended.
2257
* If is_state_change is non-zero, a state-change report is appended.
2258
*
2259
* If is_group_query is non-zero, an mbuf packet chain is allocated.
2260
* If is_group_query is zero, and if there is a packet with free space
2261
* at the tail of the queue, it will be appended to providing there
2262
* is enough free space.
2263
* Otherwise a new mbuf packet chain is allocated.
2264
*
2265
* If is_source_query is non-zero, each source is checked to see if
2266
* it was recorded for a Group-Source query, and will be omitted if
2267
* it is not both in-mode and recorded.
2268
*
2269
* If use_block_allow is non-zero, state change reports for initial join
2270
* and final leave, on an inclusive mode group with a source list, will be
2271
* rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively.
2272
*
2273
* The function will attempt to allocate leading space in the packet
2274
* for the IPv6+ICMP headers to be prepended without fragmenting the chain.
2275
*
2276
* If successful the size of all data appended to the queue is returned,
2277
* otherwise an error code less than zero is returned, or zero if
2278
* no record(s) were appended.
2279
*/
2280
static int
2281
mld_v2_enqueue_group_record(struct mbufq *mq, struct in6_multi *inm,
2282
const int is_state_change, const int is_group_query,
2283
const int is_source_query, const int use_block_allow)
2284
{
2285
struct mldv2_record mr;
2286
struct mldv2_record *pmr;
2287
struct ifnet *ifp;
2288
struct ip6_msource *ims, *nims;
2289
struct mbuf *m0, *m, *md;
2290
int is_filter_list_change;
2291
int minrec0len, m0srcs, msrcs, nbytes, off;
2292
int record_has_sources;
2293
int now;
2294
int type;
2295
uint8_t mode;
2296
#ifdef KTR
2297
char ip6tbuf[INET6_ADDRSTRLEN];
2298
#endif
2299
2300
IN6_MULTI_LIST_LOCK_ASSERT();
2301
2302
ifp = inm->in6m_ifp;
2303
is_filter_list_change = 0;
2304
m = NULL;
2305
m0 = NULL;
2306
m0srcs = 0;
2307
msrcs = 0;
2308
nbytes = 0;
2309
nims = NULL;
2310
record_has_sources = 1;
2311
pmr = NULL;
2312
type = MLD_DO_NOTHING;
2313
mode = inm->in6m_st[1].iss_fmode;
2314
2315
/*
2316
* If we did not transition out of ASM mode during t0->t1,
2317
* and there are no source nodes to process, we can skip
2318
* the generation of source records.
2319
*/
2320
if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
2321
inm->in6m_nsrc == 0)
2322
record_has_sources = 0;
2323
2324
if (is_state_change) {
2325
/*
2326
* Queue a state change record.
2327
* If the mode did not change, and there are non-ASM
2328
* listeners or source filters present,
2329
* we potentially need to issue two records for the group.
2330
* If there are ASM listeners, and there was no filter
2331
* mode transition of any kind, do nothing.
2332
*
2333
* If we are transitioning to MCAST_UNDEFINED, we need
2334
* not send any sources. A transition to/from this state is
2335
* considered inclusive with some special treatment.
2336
*
2337
* If we are rewriting initial joins/leaves to use
2338
* ALLOW/BLOCK, and the group's membership is inclusive,
2339
* we need to send sources in all cases.
2340
*/
2341
if (mode != inm->in6m_st[0].iss_fmode) {
2342
if (mode == MCAST_EXCLUDE) {
2343
CTR1(KTR_MLD, "%s: change to EXCLUDE",
2344
__func__);
2345
type = MLD_CHANGE_TO_EXCLUDE_MODE;
2346
} else {
2347
CTR1(KTR_MLD, "%s: change to INCLUDE",
2348
__func__);
2349
if (use_block_allow) {
2350
/*
2351
* XXX
2352
* Here we're interested in state
2353
* edges either direction between
2354
* MCAST_UNDEFINED and MCAST_INCLUDE.
2355
* Perhaps we should just check
2356
* the group state, rather than
2357
* the filter mode.
2358
*/
2359
if (mode == MCAST_UNDEFINED) {
2360
type = MLD_BLOCK_OLD_SOURCES;
2361
} else {
2362
type = MLD_ALLOW_NEW_SOURCES;
2363
}
2364
} else {
2365
type = MLD_CHANGE_TO_INCLUDE_MODE;
2366
if (mode == MCAST_UNDEFINED)
2367
record_has_sources = 0;
2368
}
2369
}
2370
} else {
2371
if (record_has_sources) {
2372
is_filter_list_change = 1;
2373
} else {
2374
type = MLD_DO_NOTHING;
2375
}
2376
}
2377
} else {
2378
/*
2379
* Queue a current state record.
2380
*/
2381
if (mode == MCAST_EXCLUDE) {
2382
type = MLD_MODE_IS_EXCLUDE;
2383
} else if (mode == MCAST_INCLUDE) {
2384
type = MLD_MODE_IS_INCLUDE;
2385
KASSERT(inm->in6m_st[1].iss_asm == 0,
2386
("%s: inm %p is INCLUDE but ASM count is %d",
2387
__func__, inm, inm->in6m_st[1].iss_asm));
2388
}
2389
}
2390
2391
/*
2392
* Generate the filter list changes using a separate function.
2393
*/
2394
if (is_filter_list_change)
2395
return (mld_v2_enqueue_filter_change(mq, inm));
2396
2397
if (type == MLD_DO_NOTHING) {
2398
CTR3(KTR_MLD, "%s: nothing to do for %s/%s",
2399
__func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2400
if_name(inm->in6m_ifp));
2401
return (0);
2402
}
2403
2404
/*
2405
* If any sources are present, we must be able to fit at least
2406
* one in the trailing space of the tail packet's mbuf,
2407
* ideally more.
2408
*/
2409
minrec0len = sizeof(struct mldv2_record);
2410
if (record_has_sources)
2411
minrec0len += sizeof(struct in6_addr);
2412
2413
CTR4(KTR_MLD, "%s: queueing %s for %s/%s", __func__,
2414
mld_rec_type_to_str(type),
2415
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2416
if_name(inm->in6m_ifp));
2417
2418
/*
2419
* Check if we have a packet in the tail of the queue for this
2420
* group into which the first group record for this group will fit.
2421
* Otherwise allocate a new packet.
2422
* Always allocate leading space for IP6+RA+ICMPV6+REPORT.
2423
* Note: Group records for G/GSR query responses MUST be sent
2424
* in their own packet.
2425
*/
2426
m0 = mbufq_last(mq);
2427
if (!is_group_query &&
2428
m0 != NULL &&
2429
(m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
2430
(m0->m_pkthdr.len + minrec0len) <
2431
(ifp->if_mtu - MLD_MTUSPACE)) {
2432
m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2433
sizeof(struct mldv2_record)) /
2434
sizeof(struct in6_addr);
2435
m = m0;
2436
CTR1(KTR_MLD, "%s: use existing packet", __func__);
2437
} else {
2438
if (mbufq_full(mq)) {
2439
CTR1(KTR_MLD, "%s: outbound queue full", __func__);
2440
return (-ENOMEM);
2441
}
2442
m = NULL;
2443
m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2444
sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2445
if (!is_state_change && !is_group_query)
2446
m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
2447
if (m == NULL)
2448
m = m_gethdr(M_NOWAIT, MT_DATA);
2449
if (m == NULL)
2450
return (-ENOMEM);
2451
2452
mld_save_context(m, ifp);
2453
2454
CTR1(KTR_MLD, "%s: allocated first packet", __func__);
2455
}
2456
2457
/*
2458
* Append group record.
2459
* If we have sources, we don't know how many yet.
2460
*/
2461
mr.mr_type = type;
2462
mr.mr_datalen = 0;
2463
mr.mr_numsrc = 0;
2464
mr.mr_addr = inm->in6m_addr;
2465
in6_clearscope(&mr.mr_addr);
2466
if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2467
if (m != m0)
2468
m_freem(m);
2469
CTR1(KTR_MLD, "%s: m_append() failed.", __func__);
2470
return (-ENOMEM);
2471
}
2472
nbytes += sizeof(struct mldv2_record);
2473
2474
/*
2475
* Append as many sources as will fit in the first packet.
2476
* If we are appending to a new packet, the chain allocation
2477
* may potentially use clusters; use m_getptr() in this case.
2478
* If we are appending to an existing packet, we need to obtain
2479
* a pointer to the group record after m_append(), in case a new
2480
* mbuf was allocated.
2481
*
2482
* Only append sources which are in-mode at t1. If we are
2483
* transitioning to MCAST_UNDEFINED state on the group, and
2484
* use_block_allow is zero, do not include source entries.
2485
* Otherwise, we need to include this source in the report.
2486
*
2487
* Only report recorded sources in our filter set when responding
2488
* to a group-source query.
2489
*/
2490
if (record_has_sources) {
2491
if (m == m0) {
2492
md = m_last(m);
2493
pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2494
md->m_len - nbytes);
2495
} else {
2496
md = m_getptr(m, 0, &off);
2497
pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2498
off);
2499
}
2500
msrcs = 0;
2501
RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
2502
nims) {
2503
CTR2(KTR_MLD, "%s: visit node %s", __func__,
2504
ip6_sprintf(ip6tbuf, &ims->im6s_addr));
2505
now = im6s_get_mode(inm, ims, 1);
2506
CTR2(KTR_MLD, "%s: node is %d", __func__, now);
2507
if ((now != mode) ||
2508
(now == mode &&
2509
(!use_block_allow && mode == MCAST_UNDEFINED))) {
2510
CTR1(KTR_MLD, "%s: skip node", __func__);
2511
continue;
2512
}
2513
if (is_source_query && ims->im6s_stp == 0) {
2514
CTR1(KTR_MLD, "%s: skip unrecorded node",
2515
__func__);
2516
continue;
2517
}
2518
CTR1(KTR_MLD, "%s: append node", __func__);
2519
if (!m_append(m, sizeof(struct in6_addr),
2520
(void *)&ims->im6s_addr)) {
2521
if (m != m0)
2522
m_freem(m);
2523
CTR1(KTR_MLD, "%s: m_append() failed.",
2524
__func__);
2525
return (-ENOMEM);
2526
}
2527
nbytes += sizeof(struct in6_addr);
2528
++msrcs;
2529
if (msrcs == m0srcs)
2530
break;
2531
}
2532
CTR2(KTR_MLD, "%s: msrcs is %d this packet", __func__,
2533
msrcs);
2534
pmr->mr_numsrc = htons(msrcs);
2535
nbytes += (msrcs * sizeof(struct in6_addr));
2536
}
2537
2538
if (is_source_query && msrcs == 0) {
2539
CTR1(KTR_MLD, "%s: no recorded sources to report", __func__);
2540
if (m != m0)
2541
m_freem(m);
2542
return (0);
2543
}
2544
2545
/*
2546
* We are good to go with first packet.
2547
*/
2548
if (m != m0) {
2549
CTR1(KTR_MLD, "%s: enqueueing first packet", __func__);
2550
m->m_pkthdr.vt_nrecs = 1;
2551
mbufq_enqueue(mq, m);
2552
} else
2553
m->m_pkthdr.vt_nrecs++;
2554
2555
/*
2556
* No further work needed if no source list in packet(s).
2557
*/
2558
if (!record_has_sources)
2559
return (nbytes);
2560
2561
/*
2562
* Whilst sources remain to be announced, we need to allocate
2563
* a new packet and fill out as many sources as will fit.
2564
* Always try for a cluster first.
2565
*/
2566
while (nims != NULL) {
2567
if (mbufq_full(mq)) {
2568
CTR1(KTR_MLD, "%s: outbound queue full", __func__);
2569
return (-ENOMEM);
2570
}
2571
m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
2572
if (m == NULL)
2573
m = m_gethdr(M_NOWAIT, MT_DATA);
2574
if (m == NULL)
2575
return (-ENOMEM);
2576
mld_save_context(m, ifp);
2577
md = m_getptr(m, 0, &off);
2578
pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off);
2579
CTR1(KTR_MLD, "%s: allocated next packet", __func__);
2580
2581
if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2582
if (m != m0)
2583
m_freem(m);
2584
CTR1(KTR_MLD, "%s: m_append() failed.", __func__);
2585
return (-ENOMEM);
2586
}
2587
m->m_pkthdr.vt_nrecs = 1;
2588
nbytes += sizeof(struct mldv2_record);
2589
2590
m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2591
sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2592
2593
msrcs = 0;
2594
RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
2595
CTR2(KTR_MLD, "%s: visit node %s",
2596
__func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr));
2597
now = im6s_get_mode(inm, ims, 1);
2598
if ((now != mode) ||
2599
(now == mode &&
2600
(!use_block_allow && mode == MCAST_UNDEFINED))) {
2601
CTR1(KTR_MLD, "%s: skip node", __func__);
2602
continue;
2603
}
2604
if (is_source_query && ims->im6s_stp == 0) {
2605
CTR1(KTR_MLD, "%s: skip unrecorded node",
2606
__func__);
2607
continue;
2608
}
2609
CTR1(KTR_MLD, "%s: append node", __func__);
2610
if (!m_append(m, sizeof(struct in6_addr),
2611
(void *)&ims->im6s_addr)) {
2612
if (m != m0)
2613
m_freem(m);
2614
CTR1(KTR_MLD, "%s: m_append() failed.",
2615
__func__);
2616
return (-ENOMEM);
2617
}
2618
++msrcs;
2619
if (msrcs == m0srcs)
2620
break;
2621
}
2622
pmr->mr_numsrc = htons(msrcs);
2623
nbytes += (msrcs * sizeof(struct in6_addr));
2624
2625
CTR1(KTR_MLD, "%s: enqueueing next packet", __func__);
2626
mbufq_enqueue(mq, m);
2627
}
2628
2629
return (nbytes);
2630
}
2631
2632
/*
2633
* Type used to mark record pass completion.
2634
* We exploit the fact we can cast to this easily from the
2635
* current filter modes on each ip_msource node.
2636
*/
2637
typedef enum {
2638
REC_NONE = 0x00, /* MCAST_UNDEFINED */
2639
REC_ALLOW = 0x01, /* MCAST_INCLUDE */
2640
REC_BLOCK = 0x02, /* MCAST_EXCLUDE */
2641
REC_FULL = REC_ALLOW | REC_BLOCK
2642
} rectype_t;
2643
2644
/*
2645
* Enqueue an MLDv2 filter list change to the given output queue.
2646
*
2647
* Source list filter state is held in an RB-tree. When the filter list
2648
* for a group is changed without changing its mode, we need to compute
2649
* the deltas between T0 and T1 for each source in the filter set,
2650
* and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
2651
*
2652
* As we may potentially queue two record types, and the entire R-B tree
2653
* needs to be walked at once, we break this out into its own function
2654
* so we can generate a tightly packed queue of packets.
2655
*
2656
* XXX This could be written to only use one tree walk, although that makes
2657
* serializing into the mbuf chains a bit harder. For now we do two walks
2658
* which makes things easier on us, and it may or may not be harder on
2659
* the L2 cache.
2660
*
2661
* If successful the size of all data appended to the queue is returned,
2662
* otherwise an error code less than zero is returned, or zero if
2663
* no record(s) were appended.
2664
*/
2665
static int
2666
mld_v2_enqueue_filter_change(struct mbufq *mq, struct in6_multi *inm)
2667
{
2668
static const int MINRECLEN =
2669
sizeof(struct mldv2_record) + sizeof(struct in6_addr);
2670
struct ifnet *ifp;
2671
struct mldv2_record mr;
2672
struct mldv2_record *pmr;
2673
struct ip6_msource *ims, *nims;
2674
struct mbuf *m, *m0, *md;
2675
int m0srcs, nbytes, npbytes, off, rsrcs, schanged;
2676
uint8_t mode, now, then;
2677
rectype_t crt, drt, nrt;
2678
#ifdef KTR
2679
int nallow, nblock;
2680
char ip6tbuf[INET6_ADDRSTRLEN];
2681
#endif
2682
2683
IN6_MULTI_LIST_LOCK_ASSERT();
2684
2685
if (inm->in6m_nsrc == 0 ||
2686
(inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0))
2687
return (0);
2688
2689
ifp = inm->in6m_ifp; /* interface */
2690
mode = inm->in6m_st[1].iss_fmode; /* filter mode at t1 */
2691
crt = REC_NONE; /* current group record type */
2692
drt = REC_NONE; /* mask of completed group record types */
2693
nrt = REC_NONE; /* record type for current node */
2694
m0srcs = 0; /* # source which will fit in current mbuf chain */
2695
npbytes = 0; /* # of bytes appended this packet */
2696
nbytes = 0; /* # of bytes appended to group's state-change queue */
2697
rsrcs = 0; /* # sources encoded in current record */
2698
schanged = 0; /* # nodes encoded in overall filter change */
2699
#ifdef KTR
2700
nallow = 0; /* # of source entries in ALLOW_NEW */
2701
nblock = 0; /* # of source entries in BLOCK_OLD */
2702
#endif
2703
nims = NULL; /* next tree node pointer */
2704
2705
/*
2706
* For each possible filter record mode.
2707
* The first kind of source we encounter tells us which
2708
* is the first kind of record we start appending.
2709
* If a node transitioned to UNDEFINED at t1, its mode is treated
2710
* as the inverse of the group's filter mode.
2711
*/
2712
while (drt != REC_FULL) {
2713
do {
2714
m0 = mbufq_last(mq);
2715
if (m0 != NULL &&
2716
(m0->m_pkthdr.vt_nrecs + 1 <=
2717
MLD_V2_REPORT_MAXRECS) &&
2718
(m0->m_pkthdr.len + MINRECLEN) <
2719
(ifp->if_mtu - MLD_MTUSPACE)) {
2720
m = m0;
2721
m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2722
sizeof(struct mldv2_record)) /
2723
sizeof(struct in6_addr);
2724
CTR1(KTR_MLD,
2725
"%s: use previous packet", __func__);
2726
} else {
2727
m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
2728
if (m == NULL)
2729
m = m_gethdr(M_NOWAIT, MT_DATA);
2730
if (m == NULL) {
2731
CTR1(KTR_MLD,
2732
"%s: m_get*() failed", __func__);
2733
return (-ENOMEM);
2734
}
2735
m->m_pkthdr.vt_nrecs = 0;
2736
mld_save_context(m, ifp);
2737
m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2738
sizeof(struct mldv2_record)) /
2739
sizeof(struct in6_addr);
2740
npbytes = 0;
2741
CTR1(KTR_MLD,
2742
"%s: allocated new packet", __func__);
2743
}
2744
/*
2745
* Append the MLD group record header to the
2746
* current packet's data area.
2747
* Recalculate pointer to free space for next
2748
* group record, in case m_append() allocated
2749
* a new mbuf or cluster.
2750
*/
2751
memset(&mr, 0, sizeof(mr));
2752
mr.mr_addr = inm->in6m_addr;
2753
in6_clearscope(&mr.mr_addr);
2754
if (!m_append(m, sizeof(mr), (void *)&mr)) {
2755
if (m != m0)
2756
m_freem(m);
2757
CTR1(KTR_MLD,
2758
"%s: m_append() failed", __func__);
2759
return (-ENOMEM);
2760
}
2761
npbytes += sizeof(struct mldv2_record);
2762
if (m != m0) {
2763
/* new packet; offset in chain */
2764
md = m_getptr(m, npbytes -
2765
sizeof(struct mldv2_record), &off);
2766
pmr = (struct mldv2_record *)(mtod(md,
2767
uint8_t *) + off);
2768
} else {
2769
/* current packet; offset from last append */
2770
md = m_last(m);
2771
pmr = (struct mldv2_record *)(mtod(md,
2772
uint8_t *) + md->m_len -
2773
sizeof(struct mldv2_record));
2774
}
2775
/*
2776
* Begin walking the tree for this record type
2777
* pass, or continue from where we left off
2778
* previously if we had to allocate a new packet.
2779
* Only report deltas in-mode at t1.
2780
* We need not report included sources as allowed
2781
* if we are in inclusive mode on the group,
2782
* however the converse is not true.
2783
*/
2784
rsrcs = 0;
2785
if (nims == NULL) {
2786
nims = RB_MIN(ip6_msource_tree,
2787
&inm->in6m_srcs);
2788
}
2789
RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
2790
CTR2(KTR_MLD, "%s: visit node %s", __func__,
2791
ip6_sprintf(ip6tbuf, &ims->im6s_addr));
2792
now = im6s_get_mode(inm, ims, 1);
2793
then = im6s_get_mode(inm, ims, 0);
2794
CTR3(KTR_MLD, "%s: mode: t0 %d, t1 %d",
2795
__func__, then, now);
2796
if (now == then) {
2797
CTR1(KTR_MLD,
2798
"%s: skip unchanged", __func__);
2799
continue;
2800
}
2801
if (mode == MCAST_EXCLUDE &&
2802
now == MCAST_INCLUDE) {
2803
CTR1(KTR_MLD,
2804
"%s: skip IN src on EX group",
2805
__func__);
2806
continue;
2807
}
2808
nrt = (rectype_t)now;
2809
if (nrt == REC_NONE)
2810
nrt = (rectype_t)(~mode & REC_FULL);
2811
if (schanged++ == 0) {
2812
crt = nrt;
2813
} else if (crt != nrt)
2814
continue;
2815
if (!m_append(m, sizeof(struct in6_addr),
2816
(void *)&ims->im6s_addr)) {
2817
if (m != m0)
2818
m_freem(m);
2819
CTR1(KTR_MLD,
2820
"%s: m_append() failed", __func__);
2821
return (-ENOMEM);
2822
}
2823
#ifdef KTR
2824
nallow += !!(crt == REC_ALLOW);
2825
nblock += !!(crt == REC_BLOCK);
2826
#endif
2827
if (++rsrcs == m0srcs)
2828
break;
2829
}
2830
/*
2831
* If we did not append any tree nodes on this
2832
* pass, back out of allocations.
2833
*/
2834
if (rsrcs == 0) {
2835
npbytes -= sizeof(struct mldv2_record);
2836
if (m != m0) {
2837
CTR1(KTR_MLD,
2838
"%s: m_free(m)", __func__);
2839
m_freem(m);
2840
} else {
2841
CTR1(KTR_MLD,
2842
"%s: m_adj(m, -mr)", __func__);
2843
m_adj(m, -((int)sizeof(
2844
struct mldv2_record)));
2845
}
2846
continue;
2847
}
2848
npbytes += (rsrcs * sizeof(struct in6_addr));
2849
if (crt == REC_ALLOW)
2850
pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
2851
else if (crt == REC_BLOCK)
2852
pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
2853
pmr->mr_numsrc = htons(rsrcs);
2854
/*
2855
* Count the new group record, and enqueue this
2856
* packet if it wasn't already queued.
2857
*/
2858
m->m_pkthdr.vt_nrecs++;
2859
if (m != m0)
2860
mbufq_enqueue(mq, m);
2861
nbytes += npbytes;
2862
} while (nims != NULL);
2863
drt |= crt;
2864
crt = (~crt & REC_FULL);
2865
}
2866
2867
CTR3(KTR_MLD, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__,
2868
nallow, nblock);
2869
2870
return (nbytes);
2871
}
2872
2873
static int
2874
mld_v2_merge_state_changes(struct in6_multi *inm, struct mbufq *scq)
2875
{
2876
struct mbufq *gq;
2877
struct mbuf *m; /* pending state-change */
2878
struct mbuf *m0; /* copy of pending state-change */
2879
struct mbuf *mt; /* last state-change in packet */
2880
int docopy, domerge;
2881
u_int recslen;
2882
2883
docopy = 0;
2884
domerge = 0;
2885
recslen = 0;
2886
2887
IN6_MULTI_LIST_LOCK_ASSERT();
2888
MLD_LOCK_ASSERT();
2889
2890
/*
2891
* If there are further pending retransmissions, make a writable
2892
* copy of each queued state-change message before merging.
2893
*/
2894
if (inm->in6m_scrv > 0)
2895
docopy = 1;
2896
2897
gq = &inm->in6m_scq;
2898
#ifdef KTR
2899
if (mbufq_first(gq) == NULL) {
2900
CTR2(KTR_MLD, "%s: WARNING: queue for inm %p is empty",
2901
__func__, inm);
2902
}
2903
#endif
2904
2905
m = mbufq_first(gq);
2906
while (m != NULL) {
2907
/*
2908
* Only merge the report into the current packet if
2909
* there is sufficient space to do so; an MLDv2 report
2910
* packet may only contain 65,535 group records.
2911
* Always use a simple mbuf chain concatentation to do this,
2912
* as large state changes for single groups may have
2913
* allocated clusters.
2914
*/
2915
domerge = 0;
2916
mt = mbufq_last(scq);
2917
if (mt != NULL) {
2918
recslen = m_length(m, NULL);
2919
2920
if ((mt->m_pkthdr.vt_nrecs +
2921
m->m_pkthdr.vt_nrecs <=
2922
MLD_V2_REPORT_MAXRECS) &&
2923
(mt->m_pkthdr.len + recslen <=
2924
(inm->in6m_ifp->if_mtu - MLD_MTUSPACE)))
2925
domerge = 1;
2926
}
2927
2928
if (!domerge && mbufq_full(gq)) {
2929
CTR2(KTR_MLD,
2930
"%s: outbound queue full, skipping whole packet %p",
2931
__func__, m);
2932
mt = m->m_nextpkt;
2933
if (!docopy)
2934
m_freem(m);
2935
m = mt;
2936
continue;
2937
}
2938
2939
if (!docopy) {
2940
CTR2(KTR_MLD, "%s: dequeueing %p", __func__, m);
2941
m0 = mbufq_dequeue(gq);
2942
m = m0->m_nextpkt;
2943
} else {
2944
CTR2(KTR_MLD, "%s: copying %p", __func__, m);
2945
m0 = m_dup(m, M_NOWAIT);
2946
if (m0 == NULL)
2947
return (ENOMEM);
2948
m0->m_nextpkt = NULL;
2949
m = m->m_nextpkt;
2950
}
2951
2952
if (!domerge) {
2953
CTR3(KTR_MLD, "%s: queueing %p to scq %p)",
2954
__func__, m0, scq);
2955
mbufq_enqueue(scq, m0);
2956
} else {
2957
struct mbuf *mtl; /* last mbuf of packet mt */
2958
2959
CTR3(KTR_MLD, "%s: merging %p with ifscq tail %p)",
2960
__func__, m0, mt);
2961
2962
mtl = m_last(mt);
2963
m0->m_flags &= ~M_PKTHDR;
2964
mt->m_pkthdr.len += recslen;
2965
mt->m_pkthdr.vt_nrecs +=
2966
m0->m_pkthdr.vt_nrecs;
2967
2968
mtl->m_next = m0;
2969
}
2970
}
2971
2972
return (0);
2973
}
2974
2975
/*
2976
* Respond to a pending MLDv2 General Query.
2977
*/
2978
static void
2979
mld_v2_dispatch_general_query(struct mld_ifsoftc *mli)
2980
{
2981
struct ifmultiaddr *ifma;
2982
struct ifnet *ifp;
2983
struct in6_multi *inm;
2984
int retval __unused;
2985
2986
NET_EPOCH_ASSERT();
2987
IN6_MULTI_LIST_LOCK_ASSERT();
2988
MLD_LOCK_ASSERT();
2989
2990
KASSERT(mli->mli_version == MLD_VERSION_2,
2991
("%s: called when version %d", __func__, mli->mli_version));
2992
2993
/*
2994
* Check that there are some packets queued. If so, send them first.
2995
* For large number of groups the reply to general query can take
2996
* many packets, we should finish sending them before starting of
2997
* queuing the new reply.
2998
*/
2999
if (!mbufq_empty(&mli->mli_gq))
3000
goto send;
3001
3002
ifp = mli->mli_ifp;
3003
3004
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3005
inm = in6m_ifmultiaddr_get_inm(ifma);
3006
if (inm == NULL)
3007
continue;
3008
KASSERT(ifp == inm->in6m_ifp,
3009
("%s: inconsistent ifp", __func__));
3010
3011
switch (inm->in6m_state) {
3012
case MLD_NOT_MEMBER:
3013
case MLD_SILENT_MEMBER:
3014
break;
3015
case MLD_REPORTING_MEMBER:
3016
case MLD_IDLE_MEMBER:
3017
case MLD_LAZY_MEMBER:
3018
case MLD_SLEEPING_MEMBER:
3019
case MLD_AWAKENING_MEMBER:
3020
inm->in6m_state = MLD_REPORTING_MEMBER;
3021
retval = mld_v2_enqueue_group_record(&mli->mli_gq,
3022
inm, 0, 0, 0, 0);
3023
CTR2(KTR_MLD, "%s: enqueue record = %d",
3024
__func__, retval);
3025
break;
3026
case MLD_G_QUERY_PENDING_MEMBER:
3027
case MLD_SG_QUERY_PENDING_MEMBER:
3028
case MLD_LEAVING_MEMBER:
3029
break;
3030
}
3031
}
3032
3033
send:
3034
mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST);
3035
3036
/*
3037
* Slew transmission of bursts over 500ms intervals.
3038
*/
3039
if (mbufq_first(&mli->mli_gq) != NULL) {
3040
mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
3041
MLD_RESPONSE_BURST_INTERVAL);
3042
V_interface_timers_running6 = 1;
3043
}
3044
}
3045
3046
/*
3047
* Transmit the next pending message in the output queue.
3048
*
3049
* VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis.
3050
* MRT: Nothing needs to be done, as MLD traffic is always local to
3051
* a link and uses a link-scope multicast address.
3052
*/
3053
static void
3054
mld_dispatch_packet(struct mbuf *m)
3055
{
3056
struct ip6_moptions im6o;
3057
struct ifnet *ifp;
3058
struct ifnet *oifp;
3059
struct mbuf *m0;
3060
struct mbuf *md;
3061
struct ip6_hdr *ip6;
3062
struct mld_hdr *mld;
3063
int error;
3064
int off;
3065
int type;
3066
uint32_t ifindex;
3067
3068
CTR2(KTR_MLD, "%s: transmit %p", __func__, m);
3069
NET_EPOCH_ASSERT();
3070
3071
/*
3072
* Set VNET image pointer from enqueued mbuf chain
3073
* before doing anything else. Whilst we use interface
3074
* indexes to guard against interface detach, they are
3075
* unique to each VIMAGE and must be retrieved.
3076
*/
3077
ifindex = mld_restore_context(m);
3078
3079
/*
3080
* Check if the ifnet still exists. This limits the scope of
3081
* any race in the absence of a global ifp lock for low cost
3082
* (an array lookup).
3083
*/
3084
ifp = ifnet_byindex(ifindex);
3085
if (ifp == NULL) {
3086
CTR3(KTR_MLD, "%s: dropped %p as ifindex %u went away.",
3087
__func__, m, ifindex);
3088
m_freem(m);
3089
IP6STAT_INC(ip6s_noroute);
3090
goto out;
3091
}
3092
3093
im6o.im6o_multicast_hlim = 1;
3094
im6o.im6o_multicast_loop = (V_ip6_mrouter != NULL);
3095
im6o.im6o_multicast_ifp = ifp;
3096
3097
if (m->m_flags & M_MLDV1) {
3098
m0 = m;
3099
} else {
3100
m0 = mld_v2_encap_report(ifp, m);
3101
if (m0 == NULL) {
3102
CTR2(KTR_MLD, "%s: dropped %p", __func__, m);
3103
IP6STAT_INC(ip6s_odropped);
3104
goto out;
3105
}
3106
}
3107
3108
mld_scrub_context(m0);
3109
m_clrprotoflags(m);
3110
m0->m_pkthdr.rcvif = V_loif;
3111
3112
ip6 = mtod(m0, struct ip6_hdr *);
3113
#if 0
3114
(void)in6_setscope(&ip6->ip6_dst, ifp, NULL); /* XXX LOR */
3115
#else
3116
/*
3117
* XXX XXX Break some KPI rules to prevent an LOR which would
3118
* occur if we called in6_setscope() at transmission.
3119
* See comments at top of file.
3120
*/
3121
MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index);
3122
#endif
3123
3124
/*
3125
* Retrieve the ICMPv6 type before handoff to ip6_output(),
3126
* so we can bump the stats.
3127
*/
3128
md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
3129
mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
3130
type = mld->mld_type;
3131
3132
oifp = NULL;
3133
error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, &im6o,
3134
&oifp, NULL);
3135
if (error) {
3136
CTR3(KTR_MLD, "%s: ip6_output(%p) = %d", __func__, m0, error);
3137
goto out;
3138
}
3139
ICMP6STAT_INC2(icp6s_outhist, type);
3140
if (oifp != NULL) {
3141
icmp6_ifstat_inc(oifp, ifs6_out_msg);
3142
switch (type) {
3143
case MLD_LISTENER_REPORT:
3144
case MLDV2_LISTENER_REPORT:
3145
icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
3146
break;
3147
case MLD_LISTENER_DONE:
3148
icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
3149
break;
3150
}
3151
}
3152
out:
3153
return;
3154
}
3155
3156
/*
3157
* Encapsulate an MLDv2 report.
3158
*
3159
* KAME IPv6 requires that hop-by-hop options be passed separately,
3160
* and that the IPv6 header be prepended in a separate mbuf.
3161
*
3162
* Returns a pointer to the new mbuf chain head, or NULL if the
3163
* allocation failed.
3164
*/
3165
static struct mbuf *
3166
mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
3167
{
3168
struct mbuf *mh;
3169
struct mldv2_report *mld;
3170
struct ip6_hdr *ip6;
3171
struct in6_ifaddr *ia;
3172
int mldreclen;
3173
3174
KASSERT(ifp != NULL, ("%s: null ifp", __func__));
3175
KASSERT((m->m_flags & M_PKTHDR),
3176
("%s: mbuf chain %p is !M_PKTHDR", __func__, m));
3177
3178
/*
3179
* RFC3590: OK to send as :: or tentative during DAD.
3180
*/
3181
NET_EPOCH_ASSERT();
3182
ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
3183
if (ia == NULL)
3184
CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__);
3185
3186
mh = m_gethdr(M_NOWAIT, MT_DATA);
3187
if (mh == NULL) {
3188
if (ia != NULL)
3189
ifa_free(&ia->ia_ifa);
3190
m_freem(m);
3191
return (NULL);
3192
}
3193
M_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
3194
3195
mldreclen = m_length(m, NULL);
3196
CTR2(KTR_MLD, "%s: mldreclen is %d", __func__, mldreclen);
3197
3198
mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
3199
mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
3200
sizeof(struct mldv2_report) + mldreclen;
3201
3202
ip6 = mtod(mh, struct ip6_hdr *);
3203
ip6->ip6_flow = 0;
3204
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
3205
ip6->ip6_vfc |= IPV6_VERSION;
3206
ip6->ip6_nxt = IPPROTO_ICMPV6;
3207
ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
3208
if (ia != NULL)
3209
ifa_free(&ia->ia_ifa);
3210
ip6->ip6_dst = in6addr_linklocal_allv2routers;
3211
/* scope ID will be set in netisr */
3212
3213
mld = (struct mldv2_report *)(ip6 + 1);
3214
mld->mld_type = MLDV2_LISTENER_REPORT;
3215
mld->mld_code = 0;
3216
mld->mld_cksum = 0;
3217
mld->mld_v2_reserved = 0;
3218
mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs);
3219
m->m_pkthdr.vt_nrecs = 0;
3220
3221
mh->m_next = m;
3222
mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
3223
sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
3224
return (mh);
3225
}
3226
3227
#ifdef KTR
3228
static char *
3229
mld_rec_type_to_str(const int type)
3230
{
3231
3232
switch (type) {
3233
case MLD_CHANGE_TO_EXCLUDE_MODE:
3234
return "TO_EX";
3235
break;
3236
case MLD_CHANGE_TO_INCLUDE_MODE:
3237
return "TO_IN";
3238
break;
3239
case MLD_MODE_IS_EXCLUDE:
3240
return "MODE_EX";
3241
break;
3242
case MLD_MODE_IS_INCLUDE:
3243
return "MODE_IN";
3244
break;
3245
case MLD_ALLOW_NEW_SOURCES:
3246
return "ALLOW_NEW";
3247
break;
3248
case MLD_BLOCK_OLD_SOURCES:
3249
return "BLOCK_OLD";
3250
break;
3251
default:
3252
break;
3253
}
3254
return "unknown";
3255
}
3256
#endif
3257
3258
static void
3259
mld_init(void *unused __unused)
3260
{
3261
3262
CTR1(KTR_MLD, "%s: initializing", __func__);
3263
MLD_LOCK_INIT();
3264
3265
ip6_initpktopts(&mld_po);
3266
mld_po.ip6po_hlim = 1;
3267
mld_po.ip6po_hbh = &mld_ra.hbh;
3268
mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
3269
mld_po.ip6po_flags = IP6PO_DONTFRAG;
3270
3271
callout_init(&mldslow_callout, 1);
3272
callout_reset(&mldslow_callout, hz / MLD_SLOWHZ, mld_slowtimo, NULL);
3273
callout_init(&mldfast_callout, 1);
3274
callout_reset(&mldfast_callout, hz / MLD_FASTHZ, mld_fasttimo, NULL);
3275
}
3276
SYSINIT(mld_init, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_init, NULL);
3277
3278
static void
3279
mld_uninit(void *unused __unused)
3280
{
3281
3282
CTR1(KTR_MLD, "%s: tearing down", __func__);
3283
callout_drain(&mldslow_callout);
3284
callout_drain(&mldfast_callout);
3285
MLD_LOCK_DESTROY();
3286
}
3287
SYSUNINIT(mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_uninit, NULL);
3288
3289
static void
3290
vnet_mld_init(const void *unused __unused)
3291
{
3292
3293
CTR1(KTR_MLD, "%s: initializing", __func__);
3294
3295
LIST_INIT(&V_mli_head);
3296
}
3297
VNET_SYSINIT(vnet_mld_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_init,
3298
NULL);
3299
3300
static void
3301
vnet_mld_uninit(const void *unused __unused)
3302
{
3303
3304
/* This can happen if we shutdown the network stack. */
3305
CTR1(KTR_MLD, "%s: tearing down", __func__);
3306
}
3307
VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_uninit,
3308
NULL);
3309
3310
static int
3311
mld_modevent(module_t mod, int type, void *unused __unused)
3312
{
3313
3314
switch (type) {
3315
case MOD_LOAD:
3316
case MOD_UNLOAD:
3317
break;
3318
default:
3319
return (EOPNOTSUPP);
3320
}
3321
return (0);
3322
}
3323
3324
static moduledata_t mld_mod = {
3325
"mld",
3326
mld_modevent,
3327
0
3328
};
3329
DECLARE_MODULE(mld, mld_mod, SI_SUB_PROTO_MC, SI_ORDER_ANY);
3330
3331