Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netpfil/pf/if_pfsync.c
106594 views
1
/*-
2
* SPDX-License-Identifier: (BSD-2-Clause AND ISC)
3
*
4
* Copyright (c) 2002 Michael Shalayeff
5
* Copyright (c) 2012 Gleb Smirnoff <[email protected]>
6
* All rights reserved.
7
*
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
10
* are met:
11
* 1. Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in the
15
* documentation and/or other materials provided with the distribution.
16
*
17
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
* IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23
* SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27
* THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
/*-
31
* Copyright (c) 2009 David Gwynne <[email protected]>
32
*
33
* Permission to use, copy, modify, and distribute this software for any
34
* purpose with or without fee is hereby granted, provided that the above
35
* copyright notice and this permission notice appear in all copies.
36
*
37
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
38
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
39
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
40
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
41
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
42
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
43
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44
*/
45
46
/*
47
* $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $
48
*
49
* Revisions picked from OpenBSD after revision 1.110 import:
50
* 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input()
51
* 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
52
* 1.120, 1.175 - use monotonic time_uptime
53
* 1.122 - reduce number of updates for non-TCP sessions
54
* 1.125, 1.127 - rewrite merge or stale processing
55
* 1.128 - cleanups
56
* 1.146 - bzero() mbuf before sparsely filling it with data
57
* 1.170 - SIOCSIFMTU checks
58
* 1.126, 1.142 - deferred packets processing
59
* 1.173 - correct expire time processing
60
*/
61
62
#include <sys/cdefs.h>
63
#include "opt_inet.h"
64
#include "opt_inet6.h"
65
#include "opt_pf.h"
66
67
#include <sys/param.h>
68
#include <sys/bus.h>
69
#include <sys/endian.h>
70
#include <sys/interrupt.h>
71
#include <sys/kernel.h>
72
#include <sys/lock.h>
73
#include <sys/mbuf.h>
74
#include <sys/module.h>
75
#include <sys/mutex.h>
76
#include <sys/nv.h>
77
#include <sys/priv.h>
78
#include <sys/smp.h>
79
#include <sys/socket.h>
80
#include <sys/sockio.h>
81
#include <sys/sysctl.h>
82
#include <sys/syslog.h>
83
84
#include <net/bpf.h>
85
#include <net/if.h>
86
#include <net/if_var.h>
87
#include <net/if_clone.h>
88
#include <net/if_private.h>
89
#include <net/if_types.h>
90
#include <net/vnet.h>
91
#include <net/pfvar.h>
92
#include <net/route.h>
93
#include <net/if_pfsync.h>
94
95
#include <netinet/if_ether.h>
96
#include <netinet/in.h>
97
#include <netinet/in_var.h>
98
#include <netinet6/in6_var.h>
99
#include <netinet/ip.h>
100
#include <netinet/ip6.h>
101
#include <netinet/ip_carp.h>
102
#include <netinet/ip_var.h>
103
#include <netinet/tcp.h>
104
#include <netinet/tcp_fsm.h>
105
#include <netinet/tcp_seq.h>
106
107
#include <netinet/ip6.h>
108
#include <netinet6/ip6_var.h>
109
#include <netinet6/scope6_var.h>
110
111
#include <netpfil/pf/pfsync_nv.h>
112
113
struct pfsync_bucket;
114
struct pfsync_softc;
115
116
union inet_template {
117
struct ip ipv4;
118
struct ip6_hdr ipv6;
119
};
120
121
#define PFSYNC_MINPKT ( \
122
sizeof(union inet_template) + \
123
sizeof(struct pfsync_header) + \
124
sizeof(struct pfsync_subheader) )
125
126
static int pfsync_upd_tcp(struct pf_kstate *, struct pf_state_peer_export *,
127
struct pf_state_peer_export *);
128
static int pfsync_in_clr(struct mbuf *, int, int, int, int);
129
static int pfsync_in_ins(struct mbuf *, int, int, int, int);
130
static int pfsync_in_iack(struct mbuf *, int, int, int, int);
131
static int pfsync_in_upd(struct mbuf *, int, int, int, int);
132
static int pfsync_in_upd_c(struct mbuf *, int, int, int, int);
133
static int pfsync_in_ureq(struct mbuf *, int, int, int, int);
134
static int pfsync_in_del_c(struct mbuf *, int, int, int, int);
135
static int pfsync_in_bus(struct mbuf *, int, int, int, int);
136
static int pfsync_in_tdb(struct mbuf *, int, int, int, int);
137
static int pfsync_in_eof(struct mbuf *, int, int, int, int);
138
static int pfsync_in_error(struct mbuf *, int, int, int, int);
139
140
static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = {
141
pfsync_in_clr, /* PFSYNC_ACT_CLR */
142
pfsync_in_ins, /* PFSYNC_ACT_INS_1301 */
143
pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */
144
pfsync_in_upd, /* PFSYNC_ACT_UPD_1301 */
145
pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */
146
pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */
147
pfsync_in_error, /* PFSYNC_ACT_DEL */
148
pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */
149
pfsync_in_error, /* PFSYNC_ACT_INS_F */
150
pfsync_in_error, /* PFSYNC_ACT_DEL_F */
151
pfsync_in_bus, /* PFSYNC_ACT_BUS */
152
pfsync_in_tdb, /* PFSYNC_ACT_TDB */
153
pfsync_in_eof, /* PFSYNC_ACT_EOF */
154
pfsync_in_ins, /* PFSYNC_ACT_INS_1400 */
155
pfsync_in_upd, /* PFSYNC_ACT_UPD_1400 */
156
pfsync_in_ins, /* PFSYNC_ACT_INS_1500 */
157
pfsync_in_upd, /* PFSYNC_ACT_UPD_1500 */
158
};
159
160
struct pfsync_q {
161
void (*write)(struct pf_kstate *, void *);
162
size_t len;
163
u_int8_t action;
164
};
165
166
/* We have the following sync queues */
167
enum pfsync_q_id {
168
PFSYNC_Q_INS_1301,
169
PFSYNC_Q_INS_1400,
170
PFSYNC_Q_INS_1500,
171
PFSYNC_Q_IACK,
172
PFSYNC_Q_UPD_1301,
173
PFSYNC_Q_UPD_1400,
174
PFSYNC_Q_UPD_1500,
175
PFSYNC_Q_UPD_C,
176
PFSYNC_Q_DEL_C,
177
PFSYNC_Q_COUNT,
178
};
179
180
/* Functions for building messages for given queue */
181
static void pfsync_out_state_1301(struct pf_kstate *, void *);
182
static void pfsync_out_state_1400(struct pf_kstate *, void *);
183
static void pfsync_out_state_1500(struct pf_kstate *, void *);
184
static void pfsync_out_iack(struct pf_kstate *, void *);
185
static void pfsync_out_upd_c(struct pf_kstate *, void *);
186
static void pfsync_out_del_c(struct pf_kstate *, void *);
187
188
/* Attach those functions to queue */
189
static struct pfsync_q pfsync_qs[] = {
190
{ pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 },
191
{ pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 },
192
{ pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_INS_1500 },
193
{ pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
194
{ pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 },
195
{ pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 },
196
{ pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_UPD_1500 },
197
{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C },
198
{ pfsync_out_del_c, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }
199
};
200
201
/* Map queue to pf_kstate->sync_state */
202
static u_int8_t pfsync_qid_sstate[] = {
203
PFSYNC_S_INS, /* PFSYNC_Q_INS_1301 */
204
PFSYNC_S_INS, /* PFSYNC_Q_INS_1400 */
205
PFSYNC_S_INS, /* PFSYNC_Q_INS_1500 */
206
PFSYNC_S_IACK, /* PFSYNC_Q_IACK */
207
PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1301 */
208
PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1400 */
209
PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1500 */
210
PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */
211
PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */
212
};
213
214
/* Map pf_kstate->sync_state to queue */
215
static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t);
216
217
static void pfsync_q_ins(struct pf_kstate *, int sync_state, bool);
218
static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *);
219
220
static void pfsync_update_state(struct pf_kstate *);
221
static void pfsync_tx(struct pfsync_softc *, struct mbuf *);
222
223
struct pfsync_upd_req_item {
224
TAILQ_ENTRY(pfsync_upd_req_item) ur_entry;
225
struct pfsync_upd_req ur_msg;
226
};
227
228
struct pfsync_deferral {
229
struct pfsync_softc *pd_sc;
230
TAILQ_ENTRY(pfsync_deferral) pd_entry;
231
struct callout pd_tmo;
232
233
struct pf_kstate *pd_st;
234
struct mbuf *pd_m;
235
};
236
237
struct pfsync_bucket
238
{
239
int b_id;
240
struct pfsync_softc *b_sc;
241
struct mtx b_mtx;
242
struct callout b_tmo;
243
int b_flags;
244
#define PFSYNCF_BUCKET_PUSH 0x00000001
245
246
size_t b_len;
247
TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_Q_COUNT];
248
TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list;
249
TAILQ_HEAD(, pfsync_deferral) b_deferrals;
250
u_int b_deferred;
251
uint8_t *b_plus;
252
size_t b_pluslen;
253
254
struct ifaltq b_snd;
255
};
256
257
struct pfsync_softc {
258
/* Configuration */
259
struct ifnet *sc_ifp;
260
struct ifnet *sc_sync_if;
261
struct ip_moptions sc_imo;
262
struct ip6_moptions sc_im6o;
263
struct sockaddr_storage sc_sync_peer;
264
uint32_t sc_flags;
265
uint8_t sc_maxupdates;
266
union inet_template sc_template;
267
struct mtx sc_mtx;
268
uint32_t sc_version;
269
270
/* Queued data */
271
struct pfsync_bucket *sc_buckets;
272
273
/* Bulk update info */
274
struct mtx sc_bulk_mtx;
275
uint32_t sc_ureq_sent;
276
int sc_bulk_tries;
277
uint32_t sc_ureq_received;
278
int sc_bulk_hashid;
279
uint64_t sc_bulk_stateid;
280
uint32_t sc_bulk_creatorid;
281
struct callout sc_bulk_tmo;
282
struct callout sc_bulkfail_tmo;
283
};
284
285
#define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx)
286
#define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx)
287
#define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED)
288
289
#define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx)
290
#define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx)
291
#define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED)
292
293
#define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx)
294
#define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx)
295
#define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
296
297
#define PFSYNC_DEFER_TIMEOUT 20
298
299
static const char pfsyncname[] = "pfsync";
300
static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
301
VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL;
302
#define V_pfsyncif VNET(pfsyncif)
303
VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL;
304
#define V_pfsync_swi_cookie VNET(pfsync_swi_cookie)
305
VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie);
306
#define V_pfsync_swi_ie VNET(pfsync_swi_ie)
307
VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats);
308
#define V_pfsyncstats VNET(pfsyncstats)
309
VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW;
310
#define V_pfsync_carp_adj VNET(pfsync_carp_adj)
311
VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT;
312
#define V_pfsync_defer_timeout VNET(pfsync_defer_timeout)
313
314
static void pfsync_timeout(void *);
315
static void pfsync_push(struct pfsync_bucket *);
316
static void pfsync_push_all(struct pfsync_softc *);
317
static void pfsyncintr(void *);
318
static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
319
struct in_mfilter *, struct in6_mfilter *);
320
static void pfsync_multicast_cleanup(struct pfsync_softc *);
321
static void pfsync_pointers_init(void);
322
static void pfsync_pointers_uninit(void);
323
static int pfsync_init(void);
324
static void pfsync_uninit(void);
325
326
static unsigned long pfsync_buckets;
327
328
SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
329
"PFSYNC");
330
SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
331
&VNET_NAME(pfsyncstats), pfsyncstats,
332
"PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
333
SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW,
334
&VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
335
SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN,
336
&pfsync_buckets, 0, "Number of pfsync hash buckets");
337
SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW,
338
&VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)");
339
340
static int pfsync_clone_create(struct if_clone *, int, caddr_t);
341
static void pfsync_clone_destroy(struct ifnet *);
342
static int pfsync_alloc_scrub_memory(struct pf_state_peer_export *,
343
struct pf_state_peer *);
344
static int pfsyncoutput(struct ifnet *, struct mbuf *,
345
const struct sockaddr *, struct route *);
346
static int pfsyncioctl(struct ifnet *, u_long, caddr_t);
347
348
static int pfsync_defer(struct pf_kstate *, struct mbuf *);
349
static void pfsync_undefer(struct pfsync_deferral *, int);
350
static void pfsync_undefer_state_locked(struct pf_kstate *, int);
351
static void pfsync_undefer_state(struct pf_kstate *, int);
352
static void pfsync_defer_tmo(void *);
353
354
static void pfsync_request_update(u_int32_t, u_int64_t);
355
static bool pfsync_update_state_req(struct pf_kstate *);
356
357
static void pfsync_drop_all(struct pfsync_softc *);
358
static void pfsync_drop(struct pfsync_softc *, int);
359
static void pfsync_sendout(int, int);
360
static void pfsync_send_plus(void *, size_t);
361
362
static void pfsync_bulk_start(void);
363
static void pfsync_bulk_status(u_int8_t);
364
static void pfsync_bulk_update(void *);
365
static void pfsync_bulk_fail(void *);
366
367
static void pfsync_detach_ifnet(struct ifnet *);
368
369
static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *,
370
struct pfsync_kstatus *);
371
static int pfsync_kstatus_to_softc(struct pfsync_kstatus *,
372
struct pfsync_softc *);
373
374
#ifdef IPSEC
375
static void pfsync_update_net_tdb(struct pfsync_tdb *);
376
#endif
377
static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *,
378
struct pf_kstate *);
379
380
#define PFSYNC_MAX_BULKTRIES 12
381
382
VNET_DEFINE(struct if_clone *, pfsync_cloner);
383
#define V_pfsync_cloner VNET(pfsync_cloner)
384
385
const struct in6_addr in6addr_linklocal_pfsync_group =
386
{{{ 0xff, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
387
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0 }}};
388
static int
389
pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
390
{
391
struct pfsync_softc *sc;
392
struct ifnet *ifp;
393
struct pfsync_bucket *b;
394
int c;
395
enum pfsync_q_id q;
396
397
if (unit != 0)
398
return (EINVAL);
399
400
if (! pfsync_buckets)
401
pfsync_buckets = mp_ncpus * 2;
402
403
sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
404
sc->sc_flags |= PFSYNCF_OK;
405
sc->sc_maxupdates = 128;
406
sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT;
407
sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets),
408
M_PFSYNC, M_ZERO | M_WAITOK);
409
for (c = 0; c < pfsync_buckets; c++) {
410
b = &sc->sc_buckets[c];
411
mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF);
412
413
b->b_id = c;
414
b->b_sc = sc;
415
b->b_len = PFSYNC_MINPKT;
416
417
for (q = 0; q < PFSYNC_Q_COUNT; q++)
418
TAILQ_INIT(&b->b_qs[q]);
419
420
TAILQ_INIT(&b->b_upd_req_list);
421
TAILQ_INIT(&b->b_deferrals);
422
423
callout_init(&b->b_tmo, 1);
424
425
b->b_snd.ifq_maxlen = ifqmaxlen;
426
}
427
428
ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
429
if_initname(ifp, pfsyncname, unit);
430
ifp->if_softc = sc;
431
ifp->if_ioctl = pfsyncioctl;
432
ifp->if_output = pfsyncoutput;
433
ifp->if_hdrlen = sizeof(struct pfsync_header);
434
ifp->if_mtu = ETHERMTU;
435
mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
436
mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
437
callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
438
callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
439
440
if_attach(ifp);
441
442
bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
443
444
V_pfsyncif = sc;
445
446
return (0);
447
}
448
449
static void
450
pfsync_clone_destroy(struct ifnet *ifp)
451
{
452
struct pfsync_softc *sc = ifp->if_softc;
453
struct pfsync_bucket *b;
454
int c, ret;
455
456
for (c = 0; c < pfsync_buckets; c++) {
457
b = &sc->sc_buckets[c];
458
/*
459
* At this stage, everything should have already been
460
* cleared by pfsync_uninit(), and we have only to
461
* drain callouts.
462
*/
463
PFSYNC_BUCKET_LOCK(b);
464
while (b->b_deferred > 0) {
465
struct pfsync_deferral *pd =
466
TAILQ_FIRST(&b->b_deferrals);
467
468
ret = callout_stop(&pd->pd_tmo);
469
if (ret > 0) {
470
pfsync_undefer(pd, 1);
471
} else {
472
PFSYNC_BUCKET_UNLOCK(b);
473
callout_drain(&pd->pd_tmo);
474
PFSYNC_BUCKET_LOCK(b);
475
}
476
}
477
MPASS(b->b_deferred == 0);
478
MPASS(TAILQ_EMPTY(&b->b_deferrals));
479
PFSYNC_BUCKET_UNLOCK(b);
480
481
free(b->b_plus, M_PFSYNC);
482
b->b_plus = NULL;
483
b->b_pluslen = 0;
484
485
callout_drain(&b->b_tmo);
486
}
487
488
callout_drain(&sc->sc_bulkfail_tmo);
489
callout_drain(&sc->sc_bulk_tmo);
490
491
if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
492
(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
493
bpfdetach(ifp);
494
if_detach(ifp);
495
496
pfsync_drop_all(sc);
497
498
if_free(ifp);
499
pfsync_multicast_cleanup(sc);
500
mtx_destroy(&sc->sc_mtx);
501
mtx_destroy(&sc->sc_bulk_mtx);
502
503
for (c = 0; c < pfsync_buckets; c++) {
504
b = &sc->sc_buckets[c];
505
mtx_destroy(&b->b_mtx);
506
}
507
free(sc->sc_buckets, M_PFSYNC);
508
free(sc, M_PFSYNC);
509
510
V_pfsyncif = NULL;
511
}
512
513
static int
514
pfsync_alloc_scrub_memory(struct pf_state_peer_export *s,
515
struct pf_state_peer *d)
516
{
517
if (s->scrub.scrub_flag && d->scrub == NULL) {
518
d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
519
if (d->scrub == NULL)
520
return (ENOMEM);
521
}
522
523
return (0);
524
}
525
526
static int
527
pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
528
{
529
struct pfsync_softc *sc = V_pfsyncif;
530
#ifndef __NO_STRICT_ALIGNMENT
531
struct pfsync_state_key key[2];
532
#endif
533
struct pfsync_state_key *kw, *ks;
534
struct pf_kstate *st = NULL;
535
struct pf_state_key *skw = NULL, *sks = NULL;
536
struct pf_krule *r = NULL;
537
struct pfi_kkif *kif, *orig_kif;
538
struct pfi_kkif *rt_kif = NULL;
539
struct pf_kpooladdr *rpool_first;
540
int error;
541
int n = 0;
542
sa_family_t rt_af = 0;
543
uint8_t rt = 0;
544
sa_family_t wire_af, stack_af;
545
u_int8_t wire_proto, stack_proto;
546
547
PF_RULES_RASSERT();
548
549
if (strnlen(sp->pfs_1301.ifname, IFNAMSIZ) == IFNAMSIZ)
550
return (EINVAL);
551
552
if (sp->pfs_1301.creatorid == 0) {
553
if (V_pf_status.debug >= PF_DEBUG_MISC)
554
printf("%s: invalid creator id: %08x\n", __func__,
555
ntohl(sp->pfs_1301.creatorid));
556
return (EINVAL);
557
}
558
559
/*
560
* Check interfaces early on. Do it before allocating memory etc.
561
* Because there is a high chance there will be a lot more such states.
562
*/
563
if ((kif = orig_kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) {
564
if (V_pf_status.debug >= PF_DEBUG_MISC)
565
printf("%s: unknown interface: %s\n", __func__,
566
sp->pfs_1301.ifname);
567
if (flags & PFSYNC_SI_IOCTL)
568
return (EINVAL);
569
return (0); /* skip this state */
570
}
571
572
/*
573
* States created with floating interface policy can be synchronized to
574
* hosts with different interfaces, because they are bound to V_pfi_all.
575
* But s->orig_kif still points to a real interface. Don't abort
576
* importing the state if orig_kif does not exists on the importing host
577
* but the state is not interface-bound.
578
*/
579
if (msg_version == PFSYNC_MSG_VERSION_1500) {
580
orig_kif = pfi_kkif_find(sp->pfs_1500.orig_ifname);
581
if (orig_kif == NULL) {
582
if (kif == V_pfi_all) {
583
orig_kif = kif;
584
} else {
585
if (V_pf_status.debug >= PF_DEBUG_MISC)
586
printf("%s: unknown original interface:"
587
" %s\n", __func__,
588
sp->pfs_1500.orig_ifname);
589
if (flags & PFSYNC_SI_IOCTL)
590
return (EINVAL);
591
return (0); /* skip this state */
592
}
593
}
594
}
595
596
/*
597
* If the ruleset checksums match or the state is coming from the ioctl,
598
* it's safe to associate the state with the rule of that number.
599
*/
600
if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) &&
601
(flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) <
602
pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) {
603
TAILQ_FOREACH(r, pf_main_ruleset.rules[
604
PF_RULESET_FILTER].active.ptr, entries)
605
if (ntohl(sp->pfs_1301.rule) == n++)
606
break;
607
} else
608
r = &V_pf_default_rule;
609
610
switch (msg_version) {
611
case PFSYNC_MSG_VERSION_1301:
612
/*
613
* On FreeBSD <= 13 the routing interface and routing operation
614
* are not sent over pfsync. If the ruleset is identical,
615
* though, we might be able to recover the routing information
616
* from the local ruleset.
617
*/
618
if (r != &V_pf_default_rule) {
619
struct pf_kpool *pool = &r->route;
620
621
/* Backwards compatibility. */
622
if (TAILQ_EMPTY(&pool->list))
623
pool = &r->rdr;
624
625
/*
626
* The ruleset is identical, try to recover. If the rule
627
* has a redirection pool with a single interface, there
628
* is a chance that this interface is identical as on
629
* the pfsync peer. If there's more than one interface,
630
* give up, as we can't be sure that we will pick the
631
* same one as the pfsync peer did.
632
*/
633
rpool_first = TAILQ_FIRST(&(pool->list));
634
if ((rpool_first == NULL) ||
635
(TAILQ_NEXT(rpool_first, entries) != NULL)) {
636
DPFPRINTF(PF_DEBUG_MISC,
637
"%s: can't recover routing information "
638
"because of empty or bad redirection pool",
639
__func__);
640
return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0);
641
}
642
rt = r->rt;
643
rt_kif = rpool_first->kif;
644
/*
645
* Guess the AF of the route address, FreeBSD 13 does
646
* not support af-to nor prefer-ipv6-nexthop
647
* so it should be safe.
648
*/
649
rt_af = r->af;
650
} else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) {
651
/*
652
* Ruleset different, routing *supposedly* requested,
653
* give up on recovering.
654
*/
655
DPFPRINTF(PF_DEBUG_MISC,
656
"%s: can't recover routing information "
657
"because of different ruleset", __func__);
658
return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0);
659
}
660
wire_af = stack_af = sp->pfs_1301.af;
661
wire_proto = stack_proto = sp->pfs_1301.proto;
662
break;
663
case PFSYNC_MSG_VERSION_1400:
664
/*
665
* On FreeBSD 14 we're not taking any chances.
666
* We use the information synced to us.
667
*/
668
if (sp->pfs_1400.rt) {
669
rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname);
670
if (rt_kif == NULL) {
671
DPFPRINTF(PF_DEBUG_MISC,
672
"%s: unknown route interface: %s",
673
__func__, sp->pfs_1400.rt_ifname);
674
return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0);
675
}
676
rt = sp->pfs_1400.rt;
677
/*
678
* Guess the AF of the route address, FreeBSD 14 does
679
* not support af-to nor prefer-ipv6-nexthop
680
* so it should be safe.
681
*/
682
rt_af = sp->pfs_1400.af;
683
}
684
wire_af = stack_af = sp->pfs_1400.af;
685
wire_proto = stack_proto = sp->pfs_1400.proto;
686
break;
687
case PFSYNC_MSG_VERSION_1500:
688
/*
689
* On FreeBSD 15 and above we're not taking any chances.
690
* We use the information synced to us.
691
*/
692
if (sp->pfs_1500.rt) {
693
rt_kif = pfi_kkif_find(sp->pfs_1500.rt_ifname);
694
if (rt_kif == NULL) {
695
DPFPRINTF(PF_DEBUG_MISC,
696
"%s: unknown route interface: %s",
697
__func__, sp->pfs_1500.rt_ifname);
698
return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0);
699
}
700
rt = sp->pfs_1500.rt;
701
rt_af = sp->pfs_1500.rt_af;
702
}
703
wire_af = sp->pfs_1500.wire_af;
704
stack_af = sp->pfs_1500.stack_af;
705
wire_proto = sp->pfs_1500.wire_proto;
706
stack_proto = sp->pfs_1500.stack_proto;
707
break;
708
}
709
710
if ((r->max_states &&
711
counter_u64_fetch(r->states_cur) >= r->max_states))
712
goto cleanup;
713
714
/*
715
* XXXGL: consider M_WAITOK in ioctl path after.
716
*/
717
st = pf_alloc_state(M_NOWAIT);
718
if (__predict_false(st == NULL))
719
goto cleanup;
720
721
if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
722
goto cleanup;
723
724
#ifndef __NO_STRICT_ALIGNMENT
725
bcopy(&sp->pfs_1301.key, key, sizeof(struct pfsync_state_key) * 2);
726
kw = &key[PF_SK_WIRE];
727
ks = &key[PF_SK_STACK];
728
#else
729
kw = &sp->pfs_1301.key[PF_SK_WIRE];
730
ks = &sp->pfs_1301.key[PF_SK_STACK];
731
#endif
732
733
if (wire_af != stack_af ||
734
PF_ANEQ(&kw->addr[0], &ks->addr[0], wire_af) ||
735
PF_ANEQ(&kw->addr[1], &ks->addr[1], wire_af) ||
736
kw->port[0] != ks->port[0] ||
737
kw->port[1] != ks->port[1]) {
738
sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
739
if (sks == NULL)
740
goto cleanup;
741
} else
742
sks = skw;
743
744
/* allocate memory for scrub info */
745
if (pfsync_alloc_scrub_memory(&sp->pfs_1301.src, &st->src) ||
746
pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst))
747
goto cleanup;
748
749
/* Copy to state key(s). */
750
skw->addr[0] = kw->addr[0];
751
skw->addr[1] = kw->addr[1];
752
skw->port[0] = kw->port[0];
753
skw->port[1] = kw->port[1];
754
skw->proto = wire_proto;
755
skw->af = wire_af;
756
if (sks != skw) {
757
sks->addr[0] = ks->addr[0];
758
sks->addr[1] = ks->addr[1];
759
sks->port[0] = ks->port[0];
760
sks->port[1] = ks->port[1];
761
sks->proto = stack_proto;
762
sks->af = stack_af;
763
}
764
765
/* copy to state */
766
st->creation = (time_uptime - ntohl(sp->pfs_1301.creation)) * 1000;
767
st->act.rt = rt;
768
st->act.rt_kif = rt_kif;
769
st->act.rt_af = rt_af;
770
771
switch (msg_version) {
772
case PFSYNC_MSG_VERSION_1301:
773
st->state_flags = sp->pfs_1301.state_flags;
774
st->direction = sp->pfs_1301.direction;
775
st->act.log = sp->pfs_1301.log;
776
st->timeout = sp->pfs_1301.timeout;
777
if (rt)
778
bcopy(&sp->pfs_1301.rt_addr, &st->act.rt_addr,
779
sizeof(st->act.rt_addr));
780
/*
781
* In FreeBSD 13 pfsync lacks many attributes. Copy them
782
* from the rule if possible. If rule can't be matched
783
* clear any set options as we can't recover their
784
* parameters.
785
*/
786
if (r == &V_pf_default_rule) {
787
st->state_flags &= ~PFSTATE_SETMASK;
788
} else {
789
/*
790
* Similar to pf_rule_to_actions(). This code
791
* won't set the actions properly if they come
792
* from multiple "match" rules as only rule
793
* creating the state is send over pfsync.
794
*/
795
st->act.qid = r->qid;
796
st->act.pqid = r->pqid;
797
st->act.rtableid = r->rtableid;
798
if (r->scrub_flags & PFSTATE_SETTOS)
799
st->act.set_tos = r->set_tos;
800
st->act.min_ttl = r->min_ttl;
801
st->act.max_mss = r->max_mss;
802
st->state_flags |= (r->scrub_flags &
803
(PFSTATE_NODF|PFSTATE_RANDOMID|
804
PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|
805
PFSTATE_SETPRIO));
806
if (r->dnpipe || r->dnrpipe) {
807
if (r->free_flags & PFRULE_DN_IS_PIPE)
808
st->state_flags |= PFSTATE_DN_IS_PIPE;
809
else
810
st->state_flags &= ~PFSTATE_DN_IS_PIPE;
811
}
812
st->act.dnpipe = r->dnpipe;
813
st->act.dnrpipe = r->dnrpipe;
814
}
815
break;
816
case PFSYNC_MSG_VERSION_1400:
817
st->state_flags = ntohs(sp->pfs_1400.state_flags);
818
st->direction = sp->pfs_1400.direction;
819
st->act.log = sp->pfs_1400.log;
820
st->timeout = sp->pfs_1400.timeout;
821
st->act.qid = ntohs(sp->pfs_1400.qid);
822
st->act.pqid = ntohs(sp->pfs_1400.pqid);
823
st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe);
824
st->act.dnrpipe = ntohs(sp->pfs_1400.dnrpipe);
825
st->act.rtableid = ntohl(sp->pfs_1400.rtableid);
826
st->act.min_ttl = sp->pfs_1400.min_ttl;
827
st->act.set_tos = sp->pfs_1400.set_tos;
828
st->act.max_mss = ntohs(sp->pfs_1400.max_mss);
829
st->act.set_prio[0] = sp->pfs_1400.set_prio[0];
830
st->act.set_prio[1] = sp->pfs_1400.set_prio[1];
831
if (rt)
832
bcopy(&sp->pfs_1400.rt_addr, &st->act.rt_addr,
833
sizeof(st->act.rt_addr));
834
break;
835
case PFSYNC_MSG_VERSION_1500:
836
st->state_flags = ntohs(sp->pfs_1500.state_flags);
837
st->direction = sp->pfs_1500.direction;
838
st->act.log = sp->pfs_1500.log;
839
st->timeout = sp->pfs_1500.timeout;
840
st->act.qid = ntohs(sp->pfs_1500.qid);
841
st->act.pqid = ntohs(sp->pfs_1500.pqid);
842
st->act.dnpipe = ntohs(sp->pfs_1500.dnpipe);
843
st->act.dnrpipe = ntohs(sp->pfs_1500.dnrpipe);
844
st->act.rtableid = ntohl(sp->pfs_1500.rtableid);
845
st->act.min_ttl = sp->pfs_1500.min_ttl;
846
st->act.set_tos = sp->pfs_1500.set_tos;
847
st->act.max_mss = ntohs(sp->pfs_1500.max_mss);
848
st->act.set_prio[0] = sp->pfs_1500.set_prio[0];
849
st->act.set_prio[1] = sp->pfs_1500.set_prio[1];
850
if (rt)
851
bcopy(&sp->pfs_1500.rt_addr, &st->act.rt_addr,
852
sizeof(st->act.rt_addr));
853
if (sp->pfs_1500.tagname[0] != 0)
854
st->tag = pf_tagname2tag(sp->pfs_1500.tagname);
855
break;
856
default:
857
panic("%s: Unsupported pfsync_msg_version %d",
858
__func__, msg_version);
859
}
860
861
st->expire = pf_get_uptime();
862
if (sp->pfs_1301.expire) {
863
uint32_t timeout;
864
timeout = r->timeout[st->timeout];
865
if (!timeout)
866
timeout = V_pf_default_rule.timeout[st->timeout];
867
868
/* sp->expire may have been adaptively scaled by export. */
869
st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000;
870
}
871
872
if (! (st->act.rtableid == -1 ||
873
(st->act.rtableid >= 0 && st->act.rtableid < rt_numfibs)))
874
goto cleanup;
875
876
st->id = sp->pfs_1301.id;
877
st->creatorid = sp->pfs_1301.creatorid;
878
pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src);
879
pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst);
880
881
st->rule = r;
882
st->nat_rule = NULL;
883
st->anchor = NULL;
884
885
st->pfsync_time = time_uptime;
886
st->sync_state = PFSYNC_S_NONE;
887
888
if (!(flags & PFSYNC_SI_IOCTL))
889
st->state_flags |= PFSTATE_NOSYNC;
890
891
if ((error = pf_state_insert(kif, orig_kif, skw, sks, st)) != 0)
892
goto cleanup_state;
893
894
/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
895
counter_u64_add(r->states_cur, 1);
896
counter_u64_add(r->states_tot, 1);
897
898
if (!(flags & PFSYNC_SI_IOCTL)) {
899
st->state_flags &= ~PFSTATE_NOSYNC;
900
if (st->state_flags & PFSTATE_ACK) {
901
struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
902
PFSYNC_BUCKET_LOCK(b);
903
pfsync_q_ins(st, PFSYNC_S_IACK, true);
904
PFSYNC_BUCKET_UNLOCK(b);
905
906
pfsync_push_all(sc);
907
}
908
}
909
st->state_flags &= ~PFSTATE_ACK;
910
PF_STATE_UNLOCK(st);
911
912
return (0);
913
914
cleanup:
915
error = ENOMEM;
916
917
if (skw == sks)
918
sks = NULL;
919
uma_zfree(V_pf_state_key_z, skw);
920
uma_zfree(V_pf_state_key_z, sks);
921
922
cleanup_state: /* pf_state_insert() frees the state keys. */
923
if (st) {
924
st->timeout = PFTM_UNLINKED; /* appease an assert */
925
pf_free_state(st);
926
}
927
return (error);
928
}
929
930
#ifdef INET
931
static int
932
pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused)
933
{
934
struct pfsync_softc *sc = V_pfsyncif;
935
struct mbuf *m = *mp;
936
struct ip *ip = mtod(m, struct ip *);
937
struct pfsync_header *ph;
938
struct pfsync_subheader subh;
939
940
int offset, len, flags = 0;
941
int rv;
942
uint16_t count;
943
944
PF_RULES_RLOCK_TRACKER;
945
946
*mp = NULL;
947
V_pfsyncstats.pfsyncs_ipackets++;
948
949
/* Verify that we have a sync interface configured. */
950
if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
951
(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
952
goto done;
953
954
/* verify that the packet came in on the right interface */
955
if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
956
V_pfsyncstats.pfsyncs_badif++;
957
goto done;
958
}
959
960
if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
961
if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
962
/* verify that the IP TTL is 255. */
963
if (ip->ip_ttl != PFSYNC_DFLTTL) {
964
V_pfsyncstats.pfsyncs_badttl++;
965
goto done;
966
}
967
968
offset = ip->ip_hl << 2;
969
if (m->m_pkthdr.len < offset + sizeof(*ph)) {
970
V_pfsyncstats.pfsyncs_hdrops++;
971
goto done;
972
}
973
974
if (offset + sizeof(*ph) > m->m_len) {
975
if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
976
V_pfsyncstats.pfsyncs_hdrops++;
977
return (IPPROTO_DONE);
978
}
979
ip = mtod(m, struct ip *);
980
}
981
ph = (struct pfsync_header *)((char *)ip + offset);
982
983
/* verify the version */
984
if (ph->version != PFSYNC_VERSION) {
985
V_pfsyncstats.pfsyncs_badver++;
986
goto done;
987
}
988
989
len = ntohs(ph->len) + offset;
990
if (m->m_pkthdr.len < len) {
991
V_pfsyncstats.pfsyncs_badlen++;
992
goto done;
993
}
994
995
/*
996
* Trusting pf_chksum during packet processing, as well as seeking
997
* in interface name tree, require holding PF_RULES_RLOCK().
998
*/
999
PF_RULES_RLOCK();
1000
if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
1001
flags = PFSYNC_SI_CKSUM;
1002
1003
offset += sizeof(*ph);
1004
while (offset <= len - sizeof(subh)) {
1005
m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
1006
offset += sizeof(subh);
1007
1008
if (subh.action >= PFSYNC_ACT_MAX) {
1009
V_pfsyncstats.pfsyncs_badact++;
1010
PF_RULES_RUNLOCK();
1011
goto done;
1012
}
1013
1014
count = ntohs(subh.count);
1015
V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
1016
rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action);
1017
if (rv == -1) {
1018
PF_RULES_RUNLOCK();
1019
return (IPPROTO_DONE);
1020
}
1021
1022
offset += rv;
1023
}
1024
PF_RULES_RUNLOCK();
1025
1026
done:
1027
m_freem(m);
1028
return (IPPROTO_DONE);
1029
}
1030
#endif
1031
1032
#ifdef INET6
1033
static int
1034
pfsync6_input(struct mbuf **mp, int *offp __unused, int proto __unused)
1035
{
1036
struct pfsync_softc *sc = V_pfsyncif;
1037
struct mbuf *m = *mp;
1038
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1039
struct pfsync_header *ph;
1040
struct pfsync_subheader subh;
1041
1042
int offset, len, flags = 0;
1043
int rv;
1044
uint16_t count;
1045
1046
PF_RULES_RLOCK_TRACKER;
1047
1048
*mp = NULL;
1049
V_pfsyncstats.pfsyncs_ipackets++;
1050
1051
/* Verify that we have a sync interface configured. */
1052
if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
1053
(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1054
goto done;
1055
1056
/* verify that the packet came in on the right interface */
1057
if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
1058
V_pfsyncstats.pfsyncs_badif++;
1059
goto done;
1060
}
1061
1062
if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
1063
if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
1064
/* verify that the IP TTL is 255. */
1065
if (ip6->ip6_hlim != PFSYNC_DFLTTL) {
1066
V_pfsyncstats.pfsyncs_badttl++;
1067
goto done;
1068
}
1069
1070
1071
offset = sizeof(*ip6);
1072
if (m->m_pkthdr.len < offset + sizeof(*ph)) {
1073
V_pfsyncstats.pfsyncs_hdrops++;
1074
goto done;
1075
}
1076
1077
if (offset + sizeof(*ph) > m->m_len) {
1078
if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
1079
V_pfsyncstats.pfsyncs_hdrops++;
1080
return (IPPROTO_DONE);
1081
}
1082
ip6 = mtod(m, struct ip6_hdr *);
1083
}
1084
ph = (struct pfsync_header *)((char *)ip6 + offset);
1085
1086
/* verify the version */
1087
if (ph->version != PFSYNC_VERSION) {
1088
V_pfsyncstats.pfsyncs_badver++;
1089
goto done;
1090
}
1091
1092
len = ntohs(ph->len) + offset;
1093
if (m->m_pkthdr.len < len) {
1094
V_pfsyncstats.pfsyncs_badlen++;
1095
goto done;
1096
}
1097
1098
/*
1099
* Trusting pf_chksum during packet processing, as well as seeking
1100
* in interface name tree, require holding PF_RULES_RLOCK().
1101
*/
1102
PF_RULES_RLOCK();
1103
if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
1104
flags = PFSYNC_SI_CKSUM;
1105
1106
offset += sizeof(*ph);
1107
while (offset <= len - sizeof(subh)) {
1108
m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
1109
offset += sizeof(subh);
1110
1111
if (subh.action >= PFSYNC_ACT_MAX) {
1112
V_pfsyncstats.pfsyncs_badact++;
1113
PF_RULES_RUNLOCK();
1114
goto done;
1115
}
1116
1117
count = ntohs(subh.count);
1118
V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
1119
rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action);
1120
if (rv == -1) {
1121
PF_RULES_RUNLOCK();
1122
return (IPPROTO_DONE);
1123
}
1124
1125
offset += rv;
1126
}
1127
PF_RULES_RUNLOCK();
1128
1129
done:
1130
m_freem(m);
1131
return (IPPROTO_DONE);
1132
}
1133
#endif
1134
1135
static int
1136
pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action)
1137
{
1138
struct pfsync_clr *clr;
1139
struct mbuf *mp;
1140
int len = sizeof(*clr) * count;
1141
int i, offp;
1142
u_int32_t creatorid;
1143
1144
mp = m_pulldown(m, offset, len, &offp);
1145
if (mp == NULL) {
1146
V_pfsyncstats.pfsyncs_badlen++;
1147
return (-1);
1148
}
1149
clr = (struct pfsync_clr *)(mp->m_data + offp);
1150
1151
for (i = 0; i < count; i++) {
1152
creatorid = clr[i].creatorid;
1153
1154
if (clr[i].ifname[0] != '\0' &&
1155
pfi_kkif_find(clr[i].ifname) == NULL)
1156
continue;
1157
1158
for (int i = 0; i <= V_pf_hashmask; i++) {
1159
struct pf_idhash *ih = &V_pf_idhash[i];
1160
struct pf_kstate *s;
1161
relock:
1162
PF_HASHROW_LOCK(ih);
1163
LIST_FOREACH(s, &ih->states, entry) {
1164
if (s->creatorid == creatorid) {
1165
s->state_flags |= PFSTATE_NOSYNC;
1166
pf_remove_state(s);
1167
goto relock;
1168
}
1169
}
1170
PF_HASHROW_UNLOCK(ih);
1171
}
1172
}
1173
1174
return (len);
1175
}
1176
1177
static int
1178
pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action)
1179
{
1180
struct mbuf *mp;
1181
union pfsync_state_union *sa, *sp;
1182
int i, offp, total_len, msg_version, msg_len;
1183
u_int8_t timeout, direction;
1184
sa_family_t af;
1185
1186
switch (action) {
1187
case PFSYNC_ACT_INS_1301:
1188
msg_len = sizeof(struct pfsync_state_1301);
1189
msg_version = PFSYNC_MSG_VERSION_1301;
1190
break;
1191
case PFSYNC_ACT_INS_1400:
1192
msg_len = sizeof(struct pfsync_state_1400);
1193
msg_version = PFSYNC_MSG_VERSION_1400;
1194
break;
1195
case PFSYNC_ACT_INS_1500:
1196
msg_len = sizeof(struct pfsync_state_1500);
1197
msg_version = PFSYNC_MSG_VERSION_1500;
1198
break;
1199
default:
1200
V_pfsyncstats.pfsyncs_badver++;
1201
return (-1);
1202
}
1203
1204
total_len = msg_len * count;
1205
1206
mp = m_pulldown(m, offset, total_len, &offp);
1207
if (mp == NULL) {
1208
V_pfsyncstats.pfsyncs_badlen++;
1209
return (-1);
1210
}
1211
sa = (union pfsync_state_union *)(mp->m_data + offp);
1212
1213
for (i = 0; i < count; i++) {
1214
sp = (union pfsync_state_union *)((char *)sa + msg_len * i);
1215
1216
switch (msg_version) {
1217
case PFSYNC_MSG_VERSION_1301:
1218
case PFSYNC_MSG_VERSION_1400:
1219
af = sp->pfs_1301.af;
1220
timeout = sp->pfs_1301.timeout;
1221
direction = sp->pfs_1301.direction;
1222
break;
1223
case PFSYNC_MSG_VERSION_1500:
1224
af = sp->pfs_1500.wire_af;
1225
timeout = sp->pfs_1500.timeout;
1226
direction = sp->pfs_1500.direction;
1227
break;
1228
}
1229
1230
/* Check for invalid values. */
1231
if (timeout >= PFTM_MAX ||
1232
sp->pfs_1301.src.state > PF_TCPS_PROXY_DST ||
1233
sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST ||
1234
direction > PF_OUT ||
1235
(af != AF_INET && af != AF_INET6)) {
1236
if (V_pf_status.debug >= PF_DEBUG_MISC)
1237
printf("%s: invalid value\n", __func__);
1238
V_pfsyncstats.pfsyncs_badval++;
1239
continue;
1240
}
1241
1242
if (pfsync_state_import(sp, flags, msg_version) != 0)
1243
V_pfsyncstats.pfsyncs_badact++;
1244
}
1245
1246
return (total_len);
1247
}
1248
1249
static int
1250
pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action)
1251
{
1252
struct pfsync_ins_ack *ia, *iaa;
1253
struct pf_kstate *st;
1254
1255
struct mbuf *mp;
1256
int len = count * sizeof(*ia);
1257
int offp, i;
1258
1259
mp = m_pulldown(m, offset, len, &offp);
1260
if (mp == NULL) {
1261
V_pfsyncstats.pfsyncs_badlen++;
1262
return (-1);
1263
}
1264
iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
1265
1266
for (i = 0; i < count; i++) {
1267
ia = &iaa[i];
1268
1269
st = pf_find_state_byid(ia->id, ia->creatorid);
1270
if (st == NULL)
1271
continue;
1272
1273
if (st->state_flags & PFSTATE_ACK) {
1274
pfsync_undefer_state(st, 0);
1275
}
1276
PF_STATE_UNLOCK(st);
1277
}
1278
/*
1279
* XXX this is not yet implemented, but we know the size of the
1280
* message so we can skip it.
1281
*/
1282
1283
return (count * sizeof(struct pfsync_ins_ack));
1284
}
1285
1286
static int
1287
pfsync_upd_tcp(struct pf_kstate *st, struct pf_state_peer_export *src,
1288
struct pf_state_peer_export *dst)
1289
{
1290
int sync = 0;
1291
1292
PF_STATE_LOCK_ASSERT(st);
1293
1294
/*
1295
* The state should never go backwards except
1296
* for syn-proxy states. Neither should the
1297
* sequence window slide backwards.
1298
*/
1299
if ((st->src.state > src->state &&
1300
(st->src.state < PF_TCPS_PROXY_SRC ||
1301
src->state >= PF_TCPS_PROXY_SRC)) ||
1302
1303
(st->src.state == src->state &&
1304
SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
1305
sync++;
1306
else
1307
pf_state_peer_ntoh(src, &st->src);
1308
1309
if ((st->dst.state > dst->state) ||
1310
1311
(st->dst.state >= TCPS_SYN_SENT &&
1312
SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
1313
sync++;
1314
else
1315
pf_state_peer_ntoh(dst, &st->dst);
1316
1317
return (sync);
1318
}
1319
1320
static int
1321
pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action)
1322
{
1323
struct pfsync_softc *sc = V_pfsyncif;
1324
union pfsync_state_union *sa, *sp;
1325
struct pf_kstate *st;
1326
struct mbuf *mp;
1327
int sync, offp, i, total_len, msg_len, msg_version;
1328
u_int8_t timeout;
1329
1330
switch (action) {
1331
case PFSYNC_ACT_UPD_1301:
1332
msg_len = sizeof(struct pfsync_state_1301);
1333
msg_version = PFSYNC_MSG_VERSION_1301;
1334
break;
1335
case PFSYNC_ACT_UPD_1400:
1336
msg_len = sizeof(struct pfsync_state_1400);
1337
msg_version = PFSYNC_MSG_VERSION_1400;
1338
break;
1339
case PFSYNC_ACT_UPD_1500:
1340
msg_len = sizeof(struct pfsync_state_1500);
1341
msg_version = PFSYNC_MSG_VERSION_1500;
1342
break;
1343
default:
1344
V_pfsyncstats.pfsyncs_badact++;
1345
return (-1);
1346
}
1347
1348
total_len = msg_len * count;
1349
1350
mp = m_pulldown(m, offset, total_len, &offp);
1351
if (mp == NULL) {
1352
V_pfsyncstats.pfsyncs_badlen++;
1353
return (-1);
1354
}
1355
sa = (union pfsync_state_union *)(mp->m_data + offp);
1356
1357
for (i = 0; i < count; i++) {
1358
sp = (union pfsync_state_union *)((char *)sa + msg_len * i);
1359
1360
switch (msg_version) {
1361
case PFSYNC_MSG_VERSION_1301:
1362
case PFSYNC_MSG_VERSION_1400:
1363
timeout = sp->pfs_1301.timeout;
1364
break;
1365
case PFSYNC_MSG_VERSION_1500:
1366
timeout = sp->pfs_1500.timeout;
1367
break;
1368
}
1369
1370
/* check for invalid values */
1371
if (timeout >= PFTM_MAX ||
1372
sp->pfs_1301.src.state > PF_TCPS_PROXY_DST ||
1373
sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) {
1374
if (V_pf_status.debug >= PF_DEBUG_MISC) {
1375
printf("pfsync_input: PFSYNC_ACT_UPD: "
1376
"invalid value\n");
1377
}
1378
V_pfsyncstats.pfsyncs_badval++;
1379
continue;
1380
}
1381
1382
st = pf_find_state_byid(sp->pfs_1301.id, sp->pfs_1301.creatorid);
1383
if (st == NULL) {
1384
/* insert the update */
1385
if (pfsync_state_import(sp, flags, msg_version))
1386
V_pfsyncstats.pfsyncs_badstate++;
1387
continue;
1388
}
1389
1390
if (st->state_flags & PFSTATE_ACK) {
1391
pfsync_undefer_state(st, 1);
1392
}
1393
1394
if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1395
sync = pfsync_upd_tcp(st, &sp->pfs_1301.src, &sp->pfs_1301.dst);
1396
else {
1397
sync = 0;
1398
1399
/*
1400
* Non-TCP protocol state machine always go
1401
* forwards
1402
*/
1403
if (st->src.state > sp->pfs_1301.src.state)
1404
sync++;
1405
else
1406
pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src);
1407
if (st->dst.state > sp->pfs_1301.dst.state)
1408
sync++;
1409
else
1410
pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst);
1411
}
1412
if (sync < 2) {
1413
pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst);
1414
pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst);
1415
st->expire = pf_get_uptime();
1416
st->timeout = timeout;
1417
}
1418
st->pfsync_time = time_uptime;
1419
1420
if (sync) {
1421
V_pfsyncstats.pfsyncs_stale++;
1422
1423
pfsync_update_state(st);
1424
PF_STATE_UNLOCK(st);
1425
pfsync_push_all(sc);
1426
continue;
1427
}
1428
PF_STATE_UNLOCK(st);
1429
}
1430
1431
return (total_len);
1432
}
1433
1434
static int
1435
pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags, int action)
1436
{
1437
struct pfsync_softc *sc = V_pfsyncif;
1438
struct pfsync_upd_c *ua, *up;
1439
struct pf_kstate *st;
1440
int len = count * sizeof(*up);
1441
int sync;
1442
struct mbuf *mp;
1443
int offp, i;
1444
1445
mp = m_pulldown(m, offset, len, &offp);
1446
if (mp == NULL) {
1447
V_pfsyncstats.pfsyncs_badlen++;
1448
return (-1);
1449
}
1450
ua = (struct pfsync_upd_c *)(mp->m_data + offp);
1451
1452
for (i = 0; i < count; i++) {
1453
up = &ua[i];
1454
1455
/* check for invalid values */
1456
if (up->timeout >= PFTM_MAX ||
1457
up->src.state > PF_TCPS_PROXY_DST ||
1458
up->dst.state > PF_TCPS_PROXY_DST) {
1459
if (V_pf_status.debug >= PF_DEBUG_MISC) {
1460
printf("pfsync_input: "
1461
"PFSYNC_ACT_UPD_C: "
1462
"invalid value\n");
1463
}
1464
V_pfsyncstats.pfsyncs_badval++;
1465
continue;
1466
}
1467
1468
st = pf_find_state_byid(up->id, up->creatorid);
1469
if (st == NULL) {
1470
/* We don't have this state. Ask for it. */
1471
PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
1472
pfsync_request_update(up->creatorid, up->id);
1473
PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
1474
continue;
1475
}
1476
1477
if (st->state_flags & PFSTATE_ACK) {
1478
pfsync_undefer_state(st, 1);
1479
}
1480
1481
if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1482
sync = pfsync_upd_tcp(st, &up->src, &up->dst);
1483
else {
1484
sync = 0;
1485
1486
/*
1487
* Non-TCP protocol state machine always go
1488
* forwards
1489
*/
1490
if (st->src.state > up->src.state)
1491
sync++;
1492
else
1493
pf_state_peer_ntoh(&up->src, &st->src);
1494
if (st->dst.state > up->dst.state)
1495
sync++;
1496
else
1497
pf_state_peer_ntoh(&up->dst, &st->dst);
1498
}
1499
if (sync < 2) {
1500
pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1501
pf_state_peer_ntoh(&up->dst, &st->dst);
1502
st->expire = pf_get_uptime();
1503
st->timeout = up->timeout;
1504
}
1505
st->pfsync_time = time_uptime;
1506
1507
if (sync) {
1508
V_pfsyncstats.pfsyncs_stale++;
1509
1510
pfsync_update_state(st);
1511
PF_STATE_UNLOCK(st);
1512
pfsync_push_all(sc);
1513
continue;
1514
}
1515
PF_STATE_UNLOCK(st);
1516
}
1517
1518
return (len);
1519
}
1520
1521
static int
1522
pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags, int action)
1523
{
1524
struct pfsync_upd_req *ur, *ura;
1525
struct mbuf *mp;
1526
int len = count * sizeof(*ur);
1527
int i, offp;
1528
1529
struct pf_kstate *st;
1530
1531
mp = m_pulldown(m, offset, len, &offp);
1532
if (mp == NULL) {
1533
V_pfsyncstats.pfsyncs_badlen++;
1534
return (-1);
1535
}
1536
ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1537
1538
for (i = 0; i < count; i++) {
1539
ur = &ura[i];
1540
1541
if (ur->id == 0 && ur->creatorid == 0)
1542
pfsync_bulk_start();
1543
else {
1544
st = pf_find_state_byid(ur->id, ur->creatorid);
1545
if (st == NULL) {
1546
V_pfsyncstats.pfsyncs_badstate++;
1547
continue;
1548
}
1549
if (st->state_flags & PFSTATE_NOSYNC) {
1550
PF_STATE_UNLOCK(st);
1551
continue;
1552
}
1553
1554
pfsync_update_state_req(st);
1555
PF_STATE_UNLOCK(st);
1556
}
1557
}
1558
1559
return (len);
1560
}
1561
1562
static int
1563
pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags, int action)
1564
{
1565
struct mbuf *mp;
1566
struct pfsync_del_c *sa, *sp;
1567
struct pf_kstate *st;
1568
int len = count * sizeof(*sp);
1569
int offp, i;
1570
1571
mp = m_pulldown(m, offset, len, &offp);
1572
if (mp == NULL) {
1573
V_pfsyncstats.pfsyncs_badlen++;
1574
return (-1);
1575
}
1576
sa = (struct pfsync_del_c *)(mp->m_data + offp);
1577
1578
for (i = 0; i < count; i++) {
1579
sp = &sa[i];
1580
1581
st = pf_find_state_byid(sp->id, sp->creatorid);
1582
if (st == NULL) {
1583
V_pfsyncstats.pfsyncs_badstate++;
1584
continue;
1585
}
1586
1587
st->state_flags |= PFSTATE_NOSYNC;
1588
pf_remove_state(st);
1589
}
1590
1591
return (len);
1592
}
1593
1594
static int
1595
pfsync_in_bus(struct mbuf *m, int offset, int count, int flags, int action)
1596
{
1597
struct pfsync_softc *sc = V_pfsyncif;
1598
struct pfsync_bus *bus;
1599
struct mbuf *mp;
1600
int len = count * sizeof(*bus);
1601
int offp;
1602
1603
PFSYNC_BLOCK(sc);
1604
1605
/* If we're not waiting for a bulk update, who cares. */
1606
if (sc->sc_ureq_sent == 0) {
1607
PFSYNC_BUNLOCK(sc);
1608
return (len);
1609
}
1610
1611
mp = m_pulldown(m, offset, len, &offp);
1612
if (mp == NULL) {
1613
PFSYNC_BUNLOCK(sc);
1614
V_pfsyncstats.pfsyncs_badlen++;
1615
return (-1);
1616
}
1617
bus = (struct pfsync_bus *)(mp->m_data + offp);
1618
1619
switch (bus->status) {
1620
case PFSYNC_BUS_START:
1621
callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
1622
V_pf_limits[PF_LIMIT_STATES].limit /
1623
((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
1624
sizeof(union pfsync_state_union)),
1625
pfsync_bulk_fail, sc);
1626
if (V_pf_status.debug >= PF_DEBUG_MISC)
1627
printf("pfsync: received bulk update start\n");
1628
break;
1629
1630
case PFSYNC_BUS_END:
1631
if (time_uptime - ntohl(bus->endtime) >=
1632
sc->sc_ureq_sent) {
1633
/* that's it, we're happy */
1634
sc->sc_ureq_sent = 0;
1635
sc->sc_bulk_tries = 0;
1636
callout_stop(&sc->sc_bulkfail_tmo);
1637
if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
1638
(*carp_demote_adj_p)(-V_pfsync_carp_adj,
1639
"pfsync bulk done");
1640
sc->sc_flags |= PFSYNCF_OK;
1641
if (V_pf_status.debug >= PF_DEBUG_MISC)
1642
printf("pfsync: received valid "
1643
"bulk update end\n");
1644
} else {
1645
if (V_pf_status.debug >= PF_DEBUG_MISC)
1646
printf("pfsync: received invalid "
1647
"bulk update end: bad timestamp\n");
1648
}
1649
break;
1650
}
1651
PFSYNC_BUNLOCK(sc);
1652
1653
return (len);
1654
}
1655
1656
static int
1657
pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags, int action)
1658
{
1659
int len = count * sizeof(struct pfsync_tdb);
1660
1661
#if defined(IPSEC)
1662
struct pfsync_tdb *tp;
1663
struct mbuf *mp;
1664
int offp;
1665
int i;
1666
int s;
1667
1668
mp = m_pulldown(m, offset, len, &offp);
1669
if (mp == NULL) {
1670
V_pfsyncstats.pfsyncs_badlen++;
1671
return (-1);
1672
}
1673
tp = (struct pfsync_tdb *)(mp->m_data + offp);
1674
1675
for (i = 0; i < count; i++)
1676
pfsync_update_net_tdb(&tp[i]);
1677
#endif
1678
1679
return (len);
1680
}
1681
1682
#if defined(IPSEC)
1683
/* Update an in-kernel tdb. Silently fail if no tdb is found. */
1684
static void
1685
pfsync_update_net_tdb(struct pfsync_tdb *pt)
1686
{
1687
struct tdb *tdb;
1688
int s;
1689
1690
/* check for invalid values */
1691
if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1692
(pt->dst.sa.sa_family != AF_INET &&
1693
pt->dst.sa.sa_family != AF_INET6))
1694
goto bad;
1695
1696
tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1697
if (tdb) {
1698
pt->rpl = ntohl(pt->rpl);
1699
pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
1700
1701
/* Neither replay nor byte counter should ever decrease. */
1702
if (pt->rpl < tdb->tdb_rpl ||
1703
pt->cur_bytes < tdb->tdb_cur_bytes) {
1704
goto bad;
1705
}
1706
1707
tdb->tdb_rpl = pt->rpl;
1708
tdb->tdb_cur_bytes = pt->cur_bytes;
1709
}
1710
return;
1711
1712
bad:
1713
if (V_pf_status.debug >= PF_DEBUG_MISC)
1714
printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1715
"invalid value\n");
1716
V_pfsyncstats.pfsyncs_badstate++;
1717
return;
1718
}
1719
#endif
1720
1721
static int
1722
pfsync_in_eof(struct mbuf *m, int offset, int count, int flags, int action)
1723
{
1724
/* check if we are at the right place in the packet */
1725
if (offset != m->m_pkthdr.len)
1726
V_pfsyncstats.pfsyncs_badlen++;
1727
1728
/* we're done. free and let the caller return */
1729
m_freem(m);
1730
return (-1);
1731
}
1732
1733
static int
1734
pfsync_in_error(struct mbuf *m, int offset, int count, int flags, int action)
1735
{
1736
V_pfsyncstats.pfsyncs_badact++;
1737
1738
m_freem(m);
1739
return (-1);
1740
}
1741
1742
static int
1743
pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
1744
struct route *rt)
1745
{
1746
m_freem(m);
1747
return (0);
1748
}
1749
1750
/* ARGSUSED */
1751
static int
1752
pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1753
{
1754
struct pfsync_softc *sc = ifp->if_softc;
1755
struct ifreq *ifr = (struct ifreq *)data;
1756
struct pfsyncreq pfsyncr;
1757
size_t nvbuflen;
1758
int error;
1759
int c;
1760
1761
switch (cmd) {
1762
case SIOCSIFFLAGS:
1763
PFSYNC_LOCK(sc);
1764
if (ifp->if_flags & IFF_UP) {
1765
ifp->if_drv_flags |= IFF_DRV_RUNNING;
1766
PFSYNC_UNLOCK(sc);
1767
pfsync_pointers_init();
1768
} else {
1769
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1770
PFSYNC_UNLOCK(sc);
1771
pfsync_pointers_uninit();
1772
}
1773
break;
1774
case SIOCSIFMTU:
1775
if (!sc->sc_sync_if ||
1776
ifr->ifr_mtu <= PFSYNC_MINPKT ||
1777
ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1778
return (EINVAL);
1779
if (ifr->ifr_mtu < ifp->if_mtu) {
1780
for (c = 0; c < pfsync_buckets; c++) {
1781
PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
1782
if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT)
1783
pfsync_sendout(1, c);
1784
PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
1785
}
1786
}
1787
ifp->if_mtu = ifr->ifr_mtu;
1788
break;
1789
case SIOCGETPFSYNC:
1790
bzero(&pfsyncr, sizeof(pfsyncr));
1791
PFSYNC_LOCK(sc);
1792
if (sc->sc_sync_if) {
1793
strlcpy(pfsyncr.pfsyncr_syncdev,
1794
sc->sc_sync_if->if_xname, IFNAMSIZ);
1795
}
1796
pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr;
1797
pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1798
pfsyncr.pfsyncr_defer = sc->sc_flags;
1799
PFSYNC_UNLOCK(sc);
1800
return (copyout(&pfsyncr, ifr_data_get_ptr(ifr),
1801
sizeof(pfsyncr)));
1802
1803
case SIOCGETPFSYNCNV:
1804
{
1805
nvlist_t *nvl_syncpeer;
1806
nvlist_t *nvl = nvlist_create(0);
1807
1808
if (nvl == NULL)
1809
return (ENOMEM);
1810
1811
if (sc->sc_sync_if)
1812
nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname);
1813
nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates);
1814
nvlist_add_number(nvl, "flags", sc->sc_flags);
1815
nvlist_add_number(nvl, "version", sc->sc_version);
1816
if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL)
1817
nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer);
1818
1819
void *packed = NULL;
1820
packed = nvlist_pack(nvl, &nvbuflen);
1821
if (packed == NULL) {
1822
free(packed, M_NVLIST);
1823
nvlist_destroy(nvl);
1824
return (ENOMEM);
1825
}
1826
1827
if (nvbuflen > ifr->ifr_cap_nv.buf_length) {
1828
ifr->ifr_cap_nv.length = nvbuflen;
1829
ifr->ifr_cap_nv.buffer = NULL;
1830
free(packed, M_NVLIST);
1831
nvlist_destroy(nvl);
1832
return (EFBIG);
1833
}
1834
1835
ifr->ifr_cap_nv.length = nvbuflen;
1836
error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen);
1837
1838
nvlist_destroy(nvl);
1839
nvlist_destroy(nvl_syncpeer);
1840
free(packed, M_NVLIST);
1841
break;
1842
}
1843
1844
case SIOCSETPFSYNC:
1845
{
1846
struct pfsync_kstatus status;
1847
1848
if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1849
return (error);
1850
if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr,
1851
sizeof(pfsyncr))))
1852
return (error);
1853
1854
memset((char *)&status, 0, sizeof(struct pfsync_kstatus));
1855
pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status);
1856
1857
error = pfsync_kstatus_to_softc(&status, sc);
1858
return (error);
1859
}
1860
case SIOCSETPFSYNCNV:
1861
{
1862
struct pfsync_kstatus status;
1863
void *data;
1864
nvlist_t *nvl;
1865
1866
if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1867
return (error);
1868
if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE)
1869
return (EINVAL);
1870
1871
data = malloc(ifr->ifr_cap_nv.length, M_PF, M_WAITOK);
1872
1873
if ((error = copyin(ifr->ifr_cap_nv.buffer, data,
1874
ifr->ifr_cap_nv.length)) != 0) {
1875
free(data, M_PF);
1876
return (error);
1877
}
1878
1879
if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) {
1880
free(data, M_PF);
1881
return (EINVAL);
1882
}
1883
1884
memset((char *)&status, 0, sizeof(struct pfsync_kstatus));
1885
pfsync_nvstatus_to_kstatus(nvl, &status);
1886
1887
nvlist_destroy(nvl);
1888
free(data, M_PF);
1889
1890
error = pfsync_kstatus_to_softc(&status, sc);
1891
return (error);
1892
}
1893
default:
1894
return (ENOTTY);
1895
}
1896
1897
return (0);
1898
}
1899
1900
static void
1901
pfsync_out_state_1301(struct pf_kstate *st, void *buf)
1902
{
1903
struct pfsync_state_1301 *sp;
1904
1905
sp = buf;
1906
pfsync_state_export_1301(sp, st);
1907
}
1908
1909
static void
1910
pfsync_out_state_1400(struct pf_kstate *st, void *buf)
1911
{
1912
struct pfsync_state_1400 *sp;
1913
1914
sp = buf;
1915
pfsync_state_export_1400(sp, st);
1916
}
1917
1918
static void
1919
pfsync_out_state_1500(struct pf_kstate *st, void *buf)
1920
{
1921
struct pfsync_state_1500 *sp;
1922
1923
sp = buf;
1924
pfsync_state_export_1500(sp, st);
1925
}
1926
1927
static void
1928
pfsync_out_iack(struct pf_kstate *st, void *buf)
1929
{
1930
struct pfsync_ins_ack *iack = buf;
1931
1932
iack->id = st->id;
1933
iack->creatorid = st->creatorid;
1934
}
1935
1936
static void
1937
pfsync_out_upd_c(struct pf_kstate *st, void *buf)
1938
{
1939
struct pfsync_upd_c *up = buf;
1940
1941
bzero(up, sizeof(*up));
1942
up->id = st->id;
1943
pf_state_peer_hton(&st->src, &up->src);
1944
pf_state_peer_hton(&st->dst, &up->dst);
1945
up->creatorid = st->creatorid;
1946
up->timeout = st->timeout;
1947
}
1948
1949
static void
1950
pfsync_out_del_c(struct pf_kstate *st, void *buf)
1951
{
1952
struct pfsync_del_c *dp = buf;
1953
1954
dp->id = st->id;
1955
dp->creatorid = st->creatorid;
1956
st->state_flags |= PFSTATE_NOSYNC;
1957
}
1958
1959
static void
1960
pfsync_drop_all(struct pfsync_softc *sc)
1961
{
1962
struct pfsync_bucket *b;
1963
int c;
1964
1965
for (c = 0; c < pfsync_buckets; c++) {
1966
b = &sc->sc_buckets[c];
1967
1968
PFSYNC_BUCKET_LOCK(b);
1969
pfsync_drop(sc, c);
1970
PFSYNC_BUCKET_UNLOCK(b);
1971
}
1972
}
1973
1974
static void
1975
pfsync_drop(struct pfsync_softc *sc, int c)
1976
{
1977
struct pf_kstate *st, *next;
1978
struct pfsync_upd_req_item *ur;
1979
struct pfsync_bucket *b;
1980
enum pfsync_q_id q;
1981
1982
b = &sc->sc_buckets[c];
1983
PFSYNC_BUCKET_LOCK_ASSERT(b);
1984
1985
for (q = 0; q < PFSYNC_Q_COUNT; q++) {
1986
if (TAILQ_EMPTY(&b->b_qs[q]))
1987
continue;
1988
1989
TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) {
1990
KASSERT(st->sync_state == pfsync_qid_sstate[q],
1991
("%s: st->sync_state %d == q %d",
1992
__func__, st->sync_state, q));
1993
st->sync_state = PFSYNC_S_NONE;
1994
pf_release_state(st);
1995
}
1996
TAILQ_INIT(&b->b_qs[q]);
1997
}
1998
1999
while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
2000
TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
2001
free(ur, M_PFSYNC);
2002
}
2003
2004
b->b_len = PFSYNC_MINPKT;
2005
free(b->b_plus, M_PFSYNC);
2006
b->b_plus = NULL;
2007
b->b_pluslen = 0;
2008
}
2009
2010
static void
2011
pfsync_sendout(int schedswi, int c)
2012
{
2013
struct pfsync_softc *sc = V_pfsyncif;
2014
struct ifnet *ifp = sc->sc_ifp;
2015
struct mbuf *m;
2016
struct pfsync_header *ph;
2017
struct pfsync_subheader *subh;
2018
struct pf_kstate *st, *st_next;
2019
struct pfsync_upd_req_item *ur;
2020
struct pfsync_bucket *b = &sc->sc_buckets[c];
2021
size_t len;
2022
int aflen, offset, count = 0;
2023
enum pfsync_q_id q;
2024
2025
KASSERT(sc != NULL, ("%s: null sc", __func__));
2026
KASSERT(b->b_len > PFSYNC_MINPKT,
2027
("%s: sc_len %zu", __func__, b->b_len));
2028
PFSYNC_BUCKET_LOCK_ASSERT(b);
2029
2030
if (!bpf_peers_present(ifp->if_bpf) && sc->sc_sync_if == NULL) {
2031
pfsync_drop(sc, c);
2032
return;
2033
}
2034
2035
m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR);
2036
if (m == NULL) {
2037
if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
2038
V_pfsyncstats.pfsyncs_onomem++;
2039
return;
2040
}
2041
m->m_data += max_linkhdr;
2042
bzero(m->m_data, b->b_len);
2043
2044
len = b->b_len;
2045
2046
/* build the ip header */
2047
switch (sc->sc_sync_peer.ss_family) {
2048
#ifdef INET
2049
case AF_INET:
2050
{
2051
struct ip *ip;
2052
2053
ip = mtod(m, struct ip *);
2054
bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip));
2055
aflen = offset = sizeof(*ip);
2056
2057
len -= sizeof(union inet_template) - sizeof(struct ip);
2058
ip->ip_len = htons(len);
2059
ip_fillid(ip, V_ip_random_id);
2060
break;
2061
}
2062
#endif
2063
#ifdef INET6
2064
case AF_INET6:
2065
{
2066
struct ip6_hdr *ip6;
2067
2068
ip6 = mtod(m, struct ip6_hdr *);
2069
bcopy(&sc->sc_template.ipv6, ip6, sizeof(*ip6));
2070
aflen = offset = sizeof(*ip6);
2071
2072
len -= sizeof(union inet_template) - sizeof(struct ip6_hdr);
2073
ip6->ip6_plen = htons(len);
2074
break;
2075
}
2076
#endif
2077
default:
2078
m_freem(m);
2079
pfsync_drop(sc, c);
2080
return;
2081
}
2082
m->m_len = m->m_pkthdr.len = len;
2083
2084
/* build the pfsync header */
2085
ph = (struct pfsync_header *)(m->m_data + offset);
2086
offset += sizeof(*ph);
2087
2088
ph->version = PFSYNC_VERSION;
2089
ph->len = htons(len - aflen);
2090
bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
2091
2092
/* walk the queues */
2093
for (q = 0; q < PFSYNC_Q_COUNT; q++) {
2094
if (TAILQ_EMPTY(&b->b_qs[q]))
2095
continue;
2096
2097
subh = (struct pfsync_subheader *)(m->m_data + offset);
2098
offset += sizeof(*subh);
2099
2100
count = 0;
2101
TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) {
2102
KASSERT(st->sync_state == pfsync_qid_sstate[q],
2103
("%s: st->sync_state == q",
2104
__func__));
2105
/*
2106
* XXXGL: some of write methods do unlocked reads
2107
* of state data :(
2108
*/
2109
pfsync_qs[q].write(st, m->m_data + offset);
2110
offset += pfsync_qs[q].len;
2111
st->sync_state = PFSYNC_S_NONE;
2112
pf_release_state(st);
2113
count++;
2114
}
2115
TAILQ_INIT(&b->b_qs[q]);
2116
2117
subh->action = pfsync_qs[q].action;
2118
subh->count = htons(count);
2119
V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
2120
}
2121
2122
if (!TAILQ_EMPTY(&b->b_upd_req_list)) {
2123
subh = (struct pfsync_subheader *)(m->m_data + offset);
2124
offset += sizeof(*subh);
2125
2126
count = 0;
2127
while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
2128
TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
2129
2130
bcopy(&ur->ur_msg, m->m_data + offset,
2131
sizeof(ur->ur_msg));
2132
offset += sizeof(ur->ur_msg);
2133
free(ur, M_PFSYNC);
2134
count++;
2135
}
2136
2137
subh->action = PFSYNC_ACT_UPD_REQ;
2138
subh->count = htons(count);
2139
V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
2140
}
2141
2142
/* has someone built a custom region for us to add? */
2143
if (b->b_plus != NULL) {
2144
bcopy(b->b_plus, m->m_data + offset, b->b_pluslen);
2145
offset += b->b_pluslen;
2146
2147
free(b->b_plus, M_PFSYNC);
2148
b->b_plus = NULL;
2149
b->b_pluslen = 0;
2150
}
2151
2152
subh = (struct pfsync_subheader *)(m->m_data + offset);
2153
offset += sizeof(*subh);
2154
2155
subh->action = PFSYNC_ACT_EOF;
2156
subh->count = htons(1);
2157
V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
2158
2159
/* we're done, let's put it on the wire */
2160
if (bpf_peers_present(ifp->if_bpf)) {
2161
m->m_data += aflen;
2162
m->m_len = m->m_pkthdr.len = len - aflen;
2163
bpf_mtap(ifp->if_bpf, m);
2164
m->m_data -= aflen;
2165
m->m_len = m->m_pkthdr.len = len;
2166
}
2167
2168
if (sc->sc_sync_if == NULL) {
2169
b->b_len = PFSYNC_MINPKT;
2170
m_freem(m);
2171
return;
2172
}
2173
2174
if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
2175
if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
2176
b->b_len = PFSYNC_MINPKT;
2177
2178
if (!_IF_QFULL(&b->b_snd))
2179
_IF_ENQUEUE(&b->b_snd, m);
2180
else {
2181
m_freem(m);
2182
if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
2183
}
2184
if (schedswi)
2185
swi_sched(V_pfsync_swi_cookie, 0);
2186
}
2187
2188
static void
2189
pfsync_insert_state(struct pf_kstate *st)
2190
{
2191
struct pfsync_softc *sc = V_pfsyncif;
2192
struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2193
2194
if (st->state_flags & PFSTATE_NOSYNC)
2195
return;
2196
2197
if ((st->rule->rule_flag & PFRULE_NOSYNC) ||
2198
st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
2199
st->state_flags |= PFSTATE_NOSYNC;
2200
return;
2201
}
2202
2203
KASSERT(st->sync_state == PFSYNC_S_NONE,
2204
("%s: st->sync_state %u", __func__, st->sync_state));
2205
2206
PFSYNC_BUCKET_LOCK(b);
2207
if (b->b_len == PFSYNC_MINPKT)
2208
callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
2209
2210
pfsync_q_ins(st, PFSYNC_S_INS, true);
2211
PFSYNC_BUCKET_UNLOCK(b);
2212
2213
st->sync_updates = 0;
2214
}
2215
2216
static int
2217
pfsync_defer(struct pf_kstate *st, struct mbuf *m)
2218
{
2219
struct pfsync_softc *sc = V_pfsyncif;
2220
struct pfsync_deferral *pd;
2221
struct pfsync_bucket *b;
2222
2223
if (m->m_flags & (M_BCAST|M_MCAST))
2224
return (0);
2225
2226
if (sc == NULL)
2227
return (0);
2228
2229
b = pfsync_get_bucket(sc, st);
2230
2231
PFSYNC_LOCK(sc);
2232
2233
if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
2234
!(sc->sc_flags & PFSYNCF_DEFER)) {
2235
PFSYNC_UNLOCK(sc);
2236
return (0);
2237
}
2238
2239
PFSYNC_BUCKET_LOCK(b);
2240
PFSYNC_UNLOCK(sc);
2241
2242
if (b->b_deferred >= 128)
2243
pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0);
2244
2245
pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
2246
if (pd == NULL) {
2247
PFSYNC_BUCKET_UNLOCK(b);
2248
return (0);
2249
}
2250
b->b_deferred++;
2251
2252
m->m_flags |= M_SKIP_FIREWALL;
2253
st->state_flags |= PFSTATE_ACK;
2254
2255
pd->pd_sc = sc;
2256
pd->pd_st = st;
2257
pf_ref_state(st);
2258
pd->pd_m = m;
2259
2260
TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry);
2261
callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED);
2262
callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000,
2263
pfsync_defer_tmo, pd);
2264
2265
pfsync_push(b);
2266
PFSYNC_BUCKET_UNLOCK(b);
2267
2268
return (1);
2269
}
2270
2271
static void
2272
pfsync_undefer(struct pfsync_deferral *pd, int drop)
2273
{
2274
struct pfsync_softc *sc = pd->pd_sc;
2275
struct mbuf *m = pd->pd_m;
2276
struct pf_kstate *st = pd->pd_st;
2277
struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2278
2279
PFSYNC_BUCKET_LOCK_ASSERT(b);
2280
2281
TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
2282
b->b_deferred--;
2283
pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */
2284
free(pd, M_PFSYNC);
2285
pf_release_state(st);
2286
2287
if (drop)
2288
m_freem(m);
2289
else {
2290
_IF_ENQUEUE(&b->b_snd, m);
2291
pfsync_push(b);
2292
}
2293
}
2294
2295
static void
2296
pfsync_defer_tmo(void *arg)
2297
{
2298
struct epoch_tracker et;
2299
struct pfsync_deferral *pd = arg;
2300
struct pfsync_softc *sc = pd->pd_sc;
2301
struct mbuf *m = pd->pd_m;
2302
struct pf_kstate *st = pd->pd_st;
2303
struct pfsync_bucket *b;
2304
2305
CURVNET_SET(sc->sc_ifp->if_vnet);
2306
2307
b = pfsync_get_bucket(sc, st);
2308
2309
PFSYNC_BUCKET_LOCK_ASSERT(b);
2310
2311
TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
2312
b->b_deferred--;
2313
pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */
2314
PFSYNC_BUCKET_UNLOCK(b);
2315
free(pd, M_PFSYNC);
2316
2317
if (sc->sc_sync_if == NULL) {
2318
pf_release_state(st);
2319
m_freem(m);
2320
CURVNET_RESTORE();
2321
return;
2322
}
2323
2324
NET_EPOCH_ENTER(et);
2325
2326
pfsync_tx(sc, m);
2327
2328
pf_release_state(st);
2329
2330
CURVNET_RESTORE();
2331
NET_EPOCH_EXIT(et);
2332
}
2333
2334
static void
2335
pfsync_undefer_state_locked(struct pf_kstate *st, int drop)
2336
{
2337
struct pfsync_softc *sc = V_pfsyncif;
2338
struct pfsync_deferral *pd;
2339
struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2340
2341
PFSYNC_BUCKET_LOCK_ASSERT(b);
2342
2343
TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) {
2344
if (pd->pd_st == st) {
2345
if (callout_stop(&pd->pd_tmo) > 0)
2346
pfsync_undefer(pd, drop);
2347
2348
return;
2349
}
2350
}
2351
2352
panic("%s: unable to find deferred state", __func__);
2353
}
2354
2355
static void
2356
pfsync_undefer_state(struct pf_kstate *st, int drop)
2357
{
2358
struct pfsync_softc *sc = V_pfsyncif;
2359
struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2360
2361
PFSYNC_BUCKET_LOCK(b);
2362
pfsync_undefer_state_locked(st, drop);
2363
PFSYNC_BUCKET_UNLOCK(b);
2364
}
2365
2366
static struct pfsync_bucket*
2367
pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st)
2368
{
2369
int c = PF_IDHASH(st) % pfsync_buckets;
2370
return &sc->sc_buckets[c];
2371
}
2372
2373
static void
2374
pfsync_update_state(struct pf_kstate *st)
2375
{
2376
struct pfsync_softc *sc = V_pfsyncif;
2377
bool sync = false, ref = true;
2378
struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2379
2380
PF_STATE_LOCK_ASSERT(st);
2381
PFSYNC_BUCKET_LOCK(b);
2382
2383
if (st->state_flags & PFSTATE_ACK)
2384
pfsync_undefer_state_locked(st, 0);
2385
if (st->state_flags & PFSTATE_NOSYNC) {
2386
if (st->sync_state != PFSYNC_S_NONE)
2387
pfsync_q_del(st, true, b);
2388
PFSYNC_BUCKET_UNLOCK(b);
2389
return;
2390
}
2391
2392
if (b->b_len == PFSYNC_MINPKT)
2393
callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
2394
2395
switch (st->sync_state) {
2396
case PFSYNC_S_UPD_C:
2397
case PFSYNC_S_UPD:
2398
case PFSYNC_S_INS:
2399
/* we're already handling it */
2400
2401
if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
2402
st->sync_updates++;
2403
if (st->sync_updates >= sc->sc_maxupdates)
2404
sync = true;
2405
}
2406
break;
2407
2408
case PFSYNC_S_IACK:
2409
pfsync_q_del(st, false, b);
2410
ref = false;
2411
/* FALLTHROUGH */
2412
2413
case PFSYNC_S_NONE:
2414
pfsync_q_ins(st, PFSYNC_S_UPD_C, ref);
2415
st->sync_updates = 0;
2416
break;
2417
2418
default:
2419
panic("%s: unexpected sync state %d", __func__, st->sync_state);
2420
}
2421
2422
if (sync || (time_uptime - st->pfsync_time) < 2)
2423
pfsync_push(b);
2424
2425
PFSYNC_BUCKET_UNLOCK(b);
2426
}
2427
2428
static void
2429
pfsync_request_update(u_int32_t creatorid, u_int64_t id)
2430
{
2431
struct pfsync_softc *sc = V_pfsyncif;
2432
struct pfsync_bucket *b = &sc->sc_buckets[0];
2433
struct pfsync_upd_req_item *item;
2434
size_t nlen = sizeof(struct pfsync_upd_req);
2435
2436
PFSYNC_BUCKET_LOCK_ASSERT(b);
2437
2438
/*
2439
* This code does a bit to prevent multiple update requests for the
2440
* same state being generated. It searches current subheader queue,
2441
* but it doesn't lookup into queue of already packed datagrams.
2442
*/
2443
TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry)
2444
if (item->ur_msg.id == id &&
2445
item->ur_msg.creatorid == creatorid)
2446
return;
2447
2448
item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
2449
if (item == NULL)
2450
return; /* XXX stats */
2451
2452
item->ur_msg.id = id;
2453
item->ur_msg.creatorid = creatorid;
2454
2455
if (TAILQ_EMPTY(&b->b_upd_req_list))
2456
nlen += sizeof(struct pfsync_subheader);
2457
2458
if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
2459
pfsync_sendout(0, 0);
2460
2461
nlen = sizeof(struct pfsync_subheader) +
2462
sizeof(struct pfsync_upd_req);
2463
}
2464
2465
TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry);
2466
b->b_len += nlen;
2467
2468
pfsync_push(b);
2469
}
2470
2471
static bool
2472
pfsync_update_state_req(struct pf_kstate *st)
2473
{
2474
struct pfsync_softc *sc = V_pfsyncif;
2475
bool ref = true, full = false;
2476
struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2477
2478
PF_STATE_LOCK_ASSERT(st);
2479
PFSYNC_BUCKET_LOCK(b);
2480
2481
if (st->state_flags & PFSTATE_NOSYNC) {
2482
if (st->sync_state != PFSYNC_S_NONE)
2483
pfsync_q_del(st, true, b);
2484
PFSYNC_BUCKET_UNLOCK(b);
2485
return (full);
2486
}
2487
2488
switch (st->sync_state) {
2489
case PFSYNC_S_UPD_C:
2490
case PFSYNC_S_IACK:
2491
pfsync_q_del(st, false, b);
2492
ref = false;
2493
/* FALLTHROUGH */
2494
2495
case PFSYNC_S_NONE:
2496
pfsync_q_ins(st, PFSYNC_S_UPD, ref);
2497
pfsync_push(b);
2498
break;
2499
2500
case PFSYNC_S_INS:
2501
case PFSYNC_S_UPD:
2502
case PFSYNC_S_DEL_C:
2503
/* we're already handling it */
2504
break;
2505
2506
default:
2507
panic("%s: unexpected sync state %d", __func__, st->sync_state);
2508
}
2509
2510
if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(union pfsync_state_union))
2511
full = true;
2512
2513
PFSYNC_BUCKET_UNLOCK(b);
2514
2515
return (full);
2516
}
2517
2518
static void
2519
pfsync_delete_state(struct pf_kstate *st)
2520
{
2521
struct pfsync_softc *sc = V_pfsyncif;
2522
struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2523
bool ref = true;
2524
2525
PFSYNC_BUCKET_LOCK(b);
2526
if (st->state_flags & PFSTATE_ACK)
2527
pfsync_undefer_state_locked(st, 1);
2528
if (st->state_flags & PFSTATE_NOSYNC) {
2529
if (st->sync_state != PFSYNC_S_NONE)
2530
pfsync_q_del(st, true, b);
2531
PFSYNC_BUCKET_UNLOCK(b);
2532
return;
2533
}
2534
2535
if (b->b_len == PFSYNC_MINPKT)
2536
callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
2537
2538
switch (st->sync_state) {
2539
case PFSYNC_S_INS:
2540
/* We never got to tell the world so just forget about it. */
2541
pfsync_q_del(st, true, b);
2542
break;
2543
2544
case PFSYNC_S_UPD_C:
2545
case PFSYNC_S_UPD:
2546
case PFSYNC_S_IACK:
2547
pfsync_q_del(st, false, b);
2548
ref = false;
2549
/* FALLTHROUGH */
2550
2551
case PFSYNC_S_NONE:
2552
pfsync_q_ins(st, PFSYNC_S_DEL_C, ref);
2553
break;
2554
2555
default:
2556
panic("%s: unexpected sync state %d", __func__, st->sync_state);
2557
}
2558
2559
PFSYNC_BUCKET_UNLOCK(b);
2560
}
2561
2562
static void
2563
pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2564
{
2565
struct {
2566
struct pfsync_subheader subh;
2567
struct pfsync_clr clr;
2568
} __packed r;
2569
2570
bzero(&r, sizeof(r));
2571
2572
r.subh.action = PFSYNC_ACT_CLR;
2573
r.subh.count = htons(1);
2574
V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
2575
2576
strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2577
r.clr.creatorid = creatorid;
2578
2579
pfsync_send_plus(&r, sizeof(r));
2580
}
2581
2582
static enum pfsync_q_id
2583
pfsync_sstate_to_qid(u_int8_t sync_state)
2584
{
2585
struct pfsync_softc *sc = V_pfsyncif;
2586
2587
switch (sync_state) {
2588
case PFSYNC_S_INS:
2589
switch (sc->sc_version) {
2590
case PFSYNC_MSG_VERSION_1301:
2591
return PFSYNC_Q_INS_1301;
2592
case PFSYNC_MSG_VERSION_1400:
2593
return PFSYNC_Q_INS_1400;
2594
case PFSYNC_MSG_VERSION_1500:
2595
return PFSYNC_Q_INS_1500;
2596
}
2597
break;
2598
case PFSYNC_S_IACK:
2599
return PFSYNC_Q_IACK;
2600
case PFSYNC_S_UPD:
2601
switch (sc->sc_version) {
2602
case PFSYNC_MSG_VERSION_1301:
2603
return PFSYNC_Q_UPD_1301;
2604
case PFSYNC_MSG_VERSION_1400:
2605
return PFSYNC_Q_UPD_1400;
2606
case PFSYNC_MSG_VERSION_1500:
2607
return PFSYNC_Q_UPD_1500;
2608
}
2609
break;
2610
case PFSYNC_S_UPD_C:
2611
return PFSYNC_Q_UPD_C;
2612
case PFSYNC_S_DEL_C:
2613
return PFSYNC_Q_DEL_C;
2614
default:
2615
panic("%s: Unsupported st->sync_state 0x%02x",
2616
__func__, sync_state);
2617
}
2618
2619
panic("%s: Unsupported pfsync_msg_version %d",
2620
__func__, sc->sc_version);
2621
}
2622
2623
static void
2624
pfsync_q_ins(struct pf_kstate *st, int sync_state, bool ref)
2625
{
2626
enum pfsync_q_id q = pfsync_sstate_to_qid(sync_state);
2627
struct pfsync_softc *sc = V_pfsyncif;
2628
size_t nlen = pfsync_qs[q].len;
2629
struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2630
2631
PFSYNC_BUCKET_LOCK_ASSERT(b);
2632
2633
KASSERT(st->sync_state == PFSYNC_S_NONE,
2634
("%s: st->sync_state %u", __func__, st->sync_state));
2635
KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
2636
b->b_len));
2637
2638
if (TAILQ_EMPTY(&b->b_qs[q]))
2639
nlen += sizeof(struct pfsync_subheader);
2640
2641
if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
2642
pfsync_sendout(1, b->b_id);
2643
2644
nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2645
}
2646
2647
b->b_len += nlen;
2648
st->sync_state = pfsync_qid_sstate[q];
2649
TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list);
2650
if (ref)
2651
pf_ref_state(st);
2652
}
2653
2654
static void
2655
pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b)
2656
{
2657
enum pfsync_q_id q;
2658
2659
PFSYNC_BUCKET_LOCK_ASSERT(b);
2660
KASSERT(st->sync_state != PFSYNC_S_NONE,
2661
("%s: st->sync_state != PFSYNC_S_NONE", __func__));
2662
2663
q = pfsync_sstate_to_qid(st->sync_state);
2664
b->b_len -= pfsync_qs[q].len;
2665
TAILQ_REMOVE(&b->b_qs[q], st, sync_list);
2666
st->sync_state = PFSYNC_S_NONE;
2667
if (unref)
2668
pf_release_state(st);
2669
2670
if (TAILQ_EMPTY(&b->b_qs[q]))
2671
b->b_len -= sizeof(struct pfsync_subheader);
2672
}
2673
2674
static void
2675
pfsync_bulk_start(void)
2676
{
2677
struct pfsync_softc *sc = V_pfsyncif;
2678
2679
if (V_pf_status.debug >= PF_DEBUG_MISC)
2680
printf("pfsync: received bulk update request\n");
2681
2682
PFSYNC_BLOCK(sc);
2683
2684
sc->sc_ureq_received = time_uptime;
2685
sc->sc_bulk_hashid = 0;
2686
sc->sc_bulk_stateid = 0;
2687
pfsync_bulk_status(PFSYNC_BUS_START);
2688
callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
2689
PFSYNC_BUNLOCK(sc);
2690
}
2691
2692
static void
2693
pfsync_bulk_update(void *arg)
2694
{
2695
struct pfsync_softc *sc = arg;
2696
struct pf_kstate *s;
2697
int i;
2698
2699
PFSYNC_BLOCK_ASSERT(sc);
2700
CURVNET_SET(sc->sc_ifp->if_vnet);
2701
2702
/*
2703
* Start with last state from previous invocation.
2704
* It may had gone, in this case start from the
2705
* hash slot.
2706
*/
2707
s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
2708
2709
if (s != NULL)
2710
i = PF_IDHASH(s);
2711
else
2712
i = sc->sc_bulk_hashid;
2713
2714
for (; i <= V_pf_hashmask; i++) {
2715
struct pf_idhash *ih = &V_pf_idhash[i];
2716
2717
if (s != NULL)
2718
PF_HASHROW_ASSERT(ih);
2719
else {
2720
PF_HASHROW_LOCK(ih);
2721
s = LIST_FIRST(&ih->states);
2722
}
2723
2724
for (; s; s = LIST_NEXT(s, entry)) {
2725
if (s->sync_state == PFSYNC_S_NONE &&
2726
s->timeout < PFTM_MAX &&
2727
s->pfsync_time <= sc->sc_ureq_received) {
2728
if (pfsync_update_state_req(s)) {
2729
/* We've filled a packet. */
2730
sc->sc_bulk_hashid = i;
2731
sc->sc_bulk_stateid = s->id;
2732
sc->sc_bulk_creatorid = s->creatorid;
2733
PF_HASHROW_UNLOCK(ih);
2734
callout_reset(&sc->sc_bulk_tmo, 1,
2735
pfsync_bulk_update, sc);
2736
goto full;
2737
}
2738
}
2739
}
2740
PF_HASHROW_UNLOCK(ih);
2741
}
2742
2743
/* We're done. */
2744
pfsync_bulk_status(PFSYNC_BUS_END);
2745
full:
2746
CURVNET_RESTORE();
2747
}
2748
2749
static void
2750
pfsync_bulk_status(u_int8_t status)
2751
{
2752
struct {
2753
struct pfsync_subheader subh;
2754
struct pfsync_bus bus;
2755
} __packed r;
2756
2757
struct pfsync_softc *sc = V_pfsyncif;
2758
2759
bzero(&r, sizeof(r));
2760
2761
r.subh.action = PFSYNC_ACT_BUS;
2762
r.subh.count = htons(1);
2763
V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
2764
2765
r.bus.creatorid = V_pf_status.hostid;
2766
r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2767
r.bus.status = status;
2768
2769
pfsync_send_plus(&r, sizeof(r));
2770
}
2771
2772
static void
2773
pfsync_bulk_fail(void *arg)
2774
{
2775
struct pfsync_softc *sc = arg;
2776
struct pfsync_bucket *b = &sc->sc_buckets[0];
2777
2778
CURVNET_SET(sc->sc_ifp->if_vnet);
2779
2780
PFSYNC_BLOCK_ASSERT(sc);
2781
2782
if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2783
/* Try again */
2784
callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
2785
pfsync_bulk_fail, V_pfsyncif);
2786
PFSYNC_BUCKET_LOCK(b);
2787
pfsync_request_update(0, 0);
2788
PFSYNC_BUCKET_UNLOCK(b);
2789
} else {
2790
/* Pretend like the transfer was ok. */
2791
sc->sc_ureq_sent = 0;
2792
sc->sc_bulk_tries = 0;
2793
PFSYNC_LOCK(sc);
2794
if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
2795
(*carp_demote_adj_p)(-V_pfsync_carp_adj,
2796
"pfsync bulk fail");
2797
sc->sc_flags |= PFSYNCF_OK;
2798
PFSYNC_UNLOCK(sc);
2799
if (V_pf_status.debug >= PF_DEBUG_MISC)
2800
printf("pfsync: failed to receive bulk update\n");
2801
}
2802
2803
CURVNET_RESTORE();
2804
}
2805
2806
static void
2807
pfsync_send_plus(void *plus, size_t pluslen)
2808
{
2809
struct pfsync_softc *sc = V_pfsyncif;
2810
struct pfsync_bucket *b = &sc->sc_buckets[0];
2811
uint8_t *newplus;
2812
2813
PFSYNC_BUCKET_LOCK(b);
2814
2815
if (b->b_len + pluslen > sc->sc_ifp->if_mtu)
2816
pfsync_sendout(1, b->b_id);
2817
2818
newplus = malloc(pluslen + b->b_pluslen, M_PFSYNC, M_NOWAIT);
2819
if (newplus == NULL)
2820
goto out;
2821
2822
if (b->b_plus != NULL) {
2823
memcpy(newplus, b->b_plus, b->b_pluslen);
2824
free(b->b_plus, M_PFSYNC);
2825
} else {
2826
MPASS(b->b_pluslen == 0);
2827
}
2828
memcpy(newplus + b->b_pluslen, plus, pluslen);
2829
2830
b->b_plus = newplus;
2831
b->b_pluslen += pluslen;
2832
b->b_len += pluslen;
2833
2834
pfsync_sendout(1, b->b_id);
2835
2836
out:
2837
PFSYNC_BUCKET_UNLOCK(b);
2838
}
2839
2840
static void
2841
pfsync_timeout(void *arg)
2842
{
2843
struct pfsync_bucket *b = arg;
2844
2845
CURVNET_SET(b->b_sc->sc_ifp->if_vnet);
2846
PFSYNC_BUCKET_LOCK(b);
2847
pfsync_push(b);
2848
PFSYNC_BUCKET_UNLOCK(b);
2849
CURVNET_RESTORE();
2850
}
2851
2852
static void
2853
pfsync_push(struct pfsync_bucket *b)
2854
{
2855
2856
PFSYNC_BUCKET_LOCK_ASSERT(b);
2857
2858
b->b_flags |= PFSYNCF_BUCKET_PUSH;
2859
swi_sched(V_pfsync_swi_cookie, 0);
2860
}
2861
2862
static void
2863
pfsync_push_all(struct pfsync_softc *sc)
2864
{
2865
int c;
2866
struct pfsync_bucket *b;
2867
2868
for (c = 0; c < pfsync_buckets; c++) {
2869
b = &sc->sc_buckets[c];
2870
2871
PFSYNC_BUCKET_LOCK(b);
2872
pfsync_push(b);
2873
PFSYNC_BUCKET_UNLOCK(b);
2874
}
2875
}
2876
2877
static void
2878
pfsync_tx(struct pfsync_softc *sc, struct mbuf *m)
2879
{
2880
struct ip *ip;
2881
int af, error = 0;
2882
2883
ip = mtod(m, struct ip *);
2884
MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4));
2885
2886
af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6;
2887
2888
/*
2889
* We distinguish between a deferral packet and our
2890
* own pfsync packet based on M_SKIP_FIREWALL
2891
* flag. This is XXX.
2892
*/
2893
switch (af) {
2894
#ifdef INET
2895
case AF_INET:
2896
if (m->m_flags & M_SKIP_FIREWALL) {
2897
error = ip_output(m, NULL, NULL, 0,
2898
NULL, NULL);
2899
} else {
2900
error = ip_output(m, NULL, NULL,
2901
IP_RAWOUTPUT, &sc->sc_imo, NULL);
2902
}
2903
break;
2904
#endif
2905
#ifdef INET6
2906
case AF_INET6:
2907
if (m->m_flags & M_SKIP_FIREWALL) {
2908
error = ip6_output(m, NULL, NULL, 0,
2909
NULL, NULL, NULL);
2910
} else {
2911
error = ip6_output(m, NULL, NULL, 0,
2912
&sc->sc_im6o, NULL, NULL);
2913
}
2914
break;
2915
#endif
2916
}
2917
2918
if (error == 0)
2919
V_pfsyncstats.pfsyncs_opackets++;
2920
else
2921
V_pfsyncstats.pfsyncs_oerrors++;
2922
2923
}
2924
2925
static void
2926
pfsyncintr(void *arg)
2927
{
2928
struct epoch_tracker et;
2929
struct pfsync_softc *sc = arg;
2930
struct pfsync_bucket *b;
2931
struct mbuf *m, *n;
2932
int c;
2933
2934
NET_EPOCH_ENTER(et);
2935
CURVNET_SET(sc->sc_ifp->if_vnet);
2936
2937
for (c = 0; c < pfsync_buckets; c++) {
2938
b = &sc->sc_buckets[c];
2939
2940
PFSYNC_BUCKET_LOCK(b);
2941
if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) {
2942
pfsync_sendout(0, b->b_id);
2943
b->b_flags &= ~PFSYNCF_BUCKET_PUSH;
2944
}
2945
_IF_DEQUEUE_ALL(&b->b_snd, m);
2946
PFSYNC_BUCKET_UNLOCK(b);
2947
2948
for (; m != NULL; m = n) {
2949
n = m->m_nextpkt;
2950
m->m_nextpkt = NULL;
2951
2952
pfsync_tx(sc, m);
2953
}
2954
}
2955
CURVNET_RESTORE();
2956
NET_EPOCH_EXIT(et);
2957
}
2958
2959
static int
2960
pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp,
2961
struct in_mfilter* imf, struct in6_mfilter* im6f)
2962
{
2963
#ifdef INET
2964
struct ip_moptions *imo = &sc->sc_imo;
2965
#endif
2966
#ifdef INET6
2967
struct ip6_moptions *im6o = &sc->sc_im6o;
2968
struct sockaddr_in6 *syncpeer_sa6 = NULL;
2969
#endif
2970
2971
if (!(ifp->if_flags & IFF_MULTICAST))
2972
return (EADDRNOTAVAIL);
2973
2974
switch (sc->sc_sync_peer.ss_family) {
2975
#ifdef INET
2976
case AF_INET:
2977
{
2978
int error;
2979
2980
ip_mfilter_init(&imo->imo_head);
2981
imo->imo_multicast_vif = -1;
2982
if ((error = in_joingroup(ifp,
2983
&((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL,
2984
&imf->imf_inm)) != 0)
2985
return (error);
2986
2987
ip_mfilter_insert(&imo->imo_head, imf);
2988
imo->imo_multicast_ifp = ifp;
2989
imo->imo_multicast_ttl = PFSYNC_DFLTTL;
2990
imo->imo_multicast_loop = 0;
2991
break;
2992
}
2993
#endif
2994
#ifdef INET6
2995
case AF_INET6:
2996
{
2997
int error;
2998
2999
syncpeer_sa6 = (struct sockaddr_in6 *)&sc->sc_sync_peer;
3000
if ((error = in6_setscope(&syncpeer_sa6->sin6_addr, ifp, NULL)))
3001
return (error);
3002
3003
ip6_mfilter_init(&im6o->im6o_head);
3004
if ((error = in6_joingroup(ifp, &syncpeer_sa6->sin6_addr, NULL,
3005
&(im6f->im6f_in6m), 0)) != 0)
3006
return (error);
3007
3008
ip6_mfilter_insert(&im6o->im6o_head, im6f);
3009
im6o->im6o_multicast_ifp = ifp;
3010
im6o->im6o_multicast_hlim = PFSYNC_DFLTTL;
3011
im6o->im6o_multicast_loop = 0;
3012
break;
3013
}
3014
#endif
3015
}
3016
3017
return (0);
3018
}
3019
3020
static void
3021
pfsync_multicast_cleanup(struct pfsync_softc *sc)
3022
{
3023
#ifdef INET
3024
struct ip_moptions *imo = &sc->sc_imo;
3025
struct in_mfilter *imf;
3026
3027
while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
3028
ip_mfilter_remove(&imo->imo_head, imf);
3029
in_leavegroup(imf->imf_inm, NULL);
3030
ip_mfilter_free(imf);
3031
}
3032
imo->imo_multicast_ifp = NULL;
3033
#endif
3034
3035
#ifdef INET6
3036
struct ip6_moptions *im6o = &sc->sc_im6o;
3037
struct in6_mfilter *im6f;
3038
3039
while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) {
3040
ip6_mfilter_remove(&im6o->im6o_head, im6f);
3041
in6_leavegroup(im6f->im6f_in6m, NULL);
3042
ip6_mfilter_free(im6f);
3043
}
3044
im6o->im6o_multicast_ifp = NULL;
3045
#endif
3046
}
3047
3048
void
3049
pfsync_detach_ifnet(struct ifnet *ifp)
3050
{
3051
struct pfsync_softc *sc = V_pfsyncif;
3052
3053
if (sc == NULL)
3054
return;
3055
3056
PFSYNC_LOCK(sc);
3057
3058
if (sc->sc_sync_if == ifp) {
3059
/* We don't need mutlicast cleanup here, because the interface
3060
* is going away. We do need to ensure we don't try to do
3061
* cleanup later.
3062
*/
3063
ip_mfilter_init(&sc->sc_imo.imo_head);
3064
sc->sc_imo.imo_multicast_ifp = NULL;
3065
sc->sc_im6o.im6o_multicast_ifp = NULL;
3066
sc->sc_sync_if = NULL;
3067
}
3068
3069
PFSYNC_UNLOCK(sc);
3070
}
3071
3072
static int
3073
pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status)
3074
{
3075
struct sockaddr_storage sa;
3076
status->maxupdates = pfsyncr->pfsyncr_maxupdates;
3077
status->flags = pfsyncr->pfsyncr_defer;
3078
3079
strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ);
3080
3081
memset(&sa, 0, sizeof(sa));
3082
if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) {
3083
struct sockaddr_in *in = (struct sockaddr_in *)&sa;
3084
in->sin_family = AF_INET;
3085
in->sin_len = sizeof(*in);
3086
in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr;
3087
}
3088
status->syncpeer = sa;
3089
3090
return 0;
3091
}
3092
3093
static int
3094
pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc)
3095
{
3096
struct ifnet *sifp;
3097
struct in_mfilter *imf = NULL;
3098
struct in6_mfilter *im6f = NULL;
3099
int error;
3100
int c;
3101
3102
if ((status->maxupdates < 0) || (status->maxupdates > 255))
3103
return (EINVAL);
3104
3105
if (status->syncdev[0] == '\0')
3106
sifp = NULL;
3107
else if ((sifp = ifunit_ref(status->syncdev)) == NULL)
3108
return (EINVAL);
3109
3110
switch (status->syncpeer.ss_family) {
3111
#ifdef INET
3112
case AF_UNSPEC:
3113
case AF_INET: {
3114
struct sockaddr_in *status_sin;
3115
status_sin = (struct sockaddr_in *)&(status->syncpeer);
3116
if (sifp != NULL) {
3117
if (status_sin->sin_addr.s_addr == 0 ||
3118
status_sin->sin_addr.s_addr ==
3119
htonl(INADDR_PFSYNC_GROUP)) {
3120
status_sin->sin_family = AF_INET;
3121
status_sin->sin_len = sizeof(*status_sin);
3122
status_sin->sin_addr.s_addr =
3123
htonl(INADDR_PFSYNC_GROUP);
3124
}
3125
3126
if (IN_MULTICAST(ntohl(status_sin->sin_addr.s_addr))) {
3127
imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
3128
}
3129
}
3130
break;
3131
}
3132
#endif
3133
#ifdef INET6
3134
case AF_INET6: {
3135
struct sockaddr_in6 *status_sin6;
3136
status_sin6 = (struct sockaddr_in6*)&(status->syncpeer);
3137
if (sifp != NULL) {
3138
if (IN6_IS_ADDR_UNSPECIFIED(&status_sin6->sin6_addr) ||
3139
IN6_ARE_ADDR_EQUAL(&status_sin6->sin6_addr,
3140
&in6addr_linklocal_pfsync_group)) {
3141
status_sin6->sin6_family = AF_INET6;
3142
status_sin6->sin6_len = sizeof(*status_sin6);
3143
status_sin6->sin6_addr =
3144
in6addr_linklocal_pfsync_group;
3145
}
3146
3147
if (IN6_IS_ADDR_MULTICAST(&status_sin6->sin6_addr)) {
3148
im6f = ip6_mfilter_alloc(M_WAITOK, 0, 0);
3149
}
3150
}
3151
break;
3152
}
3153
#endif
3154
}
3155
3156
PFSYNC_LOCK(sc);
3157
3158
switch (status->version) {
3159
case PFSYNC_MSG_VERSION_UNSPECIFIED:
3160
sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT;
3161
break;
3162
case PFSYNC_MSG_VERSION_1301:
3163
case PFSYNC_MSG_VERSION_1400:
3164
case PFSYNC_MSG_VERSION_1500:
3165
sc->sc_version = status->version;
3166
break;
3167
default:
3168
PFSYNC_UNLOCK(sc);
3169
return (EINVAL);
3170
}
3171
3172
switch (status->syncpeer.ss_family) {
3173
case AF_INET: {
3174
struct sockaddr_in *status_sin = (struct sockaddr_in *)&(status->syncpeer);
3175
struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer;
3176
sc_sin->sin_family = AF_INET;
3177
sc_sin->sin_len = sizeof(*sc_sin);
3178
if (status_sin->sin_addr.s_addr == 0) {
3179
sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
3180
} else {
3181
sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr;
3182
}
3183
break;
3184
}
3185
case AF_INET6: {
3186
struct sockaddr_in6 *status_sin = (struct sockaddr_in6 *)&(status->syncpeer);
3187
struct sockaddr_in6 *sc_sin = (struct sockaddr_in6 *)&sc->sc_sync_peer;
3188
sc_sin->sin6_family = AF_INET6;
3189
sc_sin->sin6_len = sizeof(*sc_sin);
3190
if(IN6_IS_ADDR_UNSPECIFIED(&status_sin->sin6_addr)) {
3191
sc_sin->sin6_addr = in6addr_linklocal_pfsync_group;
3192
} else {
3193
sc_sin->sin6_addr = status_sin->sin6_addr;
3194
}
3195
break;
3196
}
3197
}
3198
3199
sc->sc_maxupdates = status->maxupdates;
3200
if (status->flags & PFSYNCF_DEFER) {
3201
sc->sc_flags |= PFSYNCF_DEFER;
3202
V_pfsync_defer_ptr = pfsync_defer;
3203
} else {
3204
sc->sc_flags &= ~PFSYNCF_DEFER;
3205
V_pfsync_defer_ptr = NULL;
3206
}
3207
3208
if (sifp == NULL) {
3209
if (sc->sc_sync_if)
3210
if_rele(sc->sc_sync_if);
3211
sc->sc_sync_if = NULL;
3212
pfsync_multicast_cleanup(sc);
3213
PFSYNC_UNLOCK(sc);
3214
return (0);
3215
}
3216
3217
for (c = 0; c < pfsync_buckets; c++) {
3218
PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
3219
if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT &&
3220
(sifp->if_mtu < sc->sc_ifp->if_mtu ||
3221
(sc->sc_sync_if != NULL &&
3222
sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
3223
sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
3224
pfsync_sendout(1, c);
3225
PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
3226
}
3227
3228
pfsync_multicast_cleanup(sc);
3229
3230
if (((sc->sc_sync_peer.ss_family == AF_INET) &&
3231
IN_MULTICAST(ntohl(((struct sockaddr_in *)
3232
&sc->sc_sync_peer)->sin_addr.s_addr))) ||
3233
((sc->sc_sync_peer.ss_family == AF_INET6) &&
3234
IN6_IS_ADDR_MULTICAST(&((struct sockaddr_in6*)
3235
&sc->sc_sync_peer)->sin6_addr))) {
3236
error = pfsync_multicast_setup(sc, sifp, imf, im6f);
3237
if (error) {
3238
if_rele(sifp);
3239
PFSYNC_UNLOCK(sc);
3240
#ifdef INET
3241
if (imf != NULL)
3242
ip_mfilter_free(imf);
3243
#endif
3244
#ifdef INET6
3245
if (im6f != NULL)
3246
ip6_mfilter_free(im6f);
3247
#endif
3248
return (error);
3249
}
3250
}
3251
if (sc->sc_sync_if)
3252
if_rele(sc->sc_sync_if);
3253
sc->sc_sync_if = sifp;
3254
3255
switch (sc->sc_sync_peer.ss_family) {
3256
#ifdef INET
3257
case AF_INET: {
3258
struct ip *ip;
3259
ip = &sc->sc_template.ipv4;
3260
bzero(ip, sizeof(*ip));
3261
ip->ip_v = IPVERSION;
3262
ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2;
3263
ip->ip_tos = IPTOS_LOWDELAY;
3264
/* len and id are set later. */
3265
ip->ip_off = htons(IP_DF);
3266
ip->ip_ttl = PFSYNC_DFLTTL;
3267
ip->ip_p = IPPROTO_PFSYNC;
3268
ip->ip_src.s_addr = INADDR_ANY;
3269
ip->ip_dst = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr;
3270
break;
3271
}
3272
#endif
3273
#ifdef INET6
3274
case AF_INET6: {
3275
struct ip6_hdr *ip6;
3276
ip6 = &sc->sc_template.ipv6;
3277
bzero(ip6, sizeof(*ip6));
3278
ip6->ip6_vfc = IPV6_VERSION;
3279
ip6->ip6_hlim = PFSYNC_DFLTTL;
3280
ip6->ip6_nxt = IPPROTO_PFSYNC;
3281
ip6->ip6_dst = ((struct sockaddr_in6 *)&sc->sc_sync_peer)->sin6_addr;
3282
3283
struct epoch_tracker et;
3284
NET_EPOCH_ENTER(et);
3285
in6_selectsrc_addr(if_getfib(sc->sc_sync_if), &ip6->ip6_dst, 0,
3286
sc->sc_sync_if, &ip6->ip6_src, NULL);
3287
NET_EPOCH_EXIT(et);
3288
break;
3289
}
3290
#endif
3291
}
3292
3293
/* Request a full state table update. */
3294
if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
3295
(*carp_demote_adj_p)(V_pfsync_carp_adj,
3296
"pfsync bulk start");
3297
sc->sc_flags &= ~PFSYNCF_OK;
3298
if (V_pf_status.debug >= PF_DEBUG_MISC)
3299
printf("pfsync: requesting bulk update\n");
3300
PFSYNC_UNLOCK(sc);
3301
PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
3302
pfsync_request_update(0, 0);
3303
PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
3304
PFSYNC_BLOCK(sc);
3305
sc->sc_ureq_sent = time_uptime;
3306
callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc);
3307
PFSYNC_BUNLOCK(sc);
3308
return (0);
3309
}
3310
3311
static void
3312
pfsync_pointers_init(void)
3313
{
3314
3315
PF_RULES_WLOCK();
3316
V_pfsync_state_import_ptr = pfsync_state_import;
3317
V_pfsync_insert_state_ptr = pfsync_insert_state;
3318
V_pfsync_update_state_ptr = pfsync_update_state;
3319
V_pfsync_delete_state_ptr = pfsync_delete_state;
3320
V_pfsync_clear_states_ptr = pfsync_clear_states;
3321
V_pfsync_defer_ptr = pfsync_defer;
3322
PF_RULES_WUNLOCK();
3323
}
3324
3325
static void
3326
pfsync_pointers_uninit(void)
3327
{
3328
3329
PF_RULES_WLOCK();
3330
V_pfsync_state_import_ptr = NULL;
3331
V_pfsync_insert_state_ptr = NULL;
3332
V_pfsync_update_state_ptr = NULL;
3333
V_pfsync_delete_state_ptr = NULL;
3334
V_pfsync_clear_states_ptr = NULL;
3335
V_pfsync_defer_ptr = NULL;
3336
PF_RULES_WUNLOCK();
3337
}
3338
3339
static void
3340
vnet_pfsync_init(const void *unused __unused)
3341
{
3342
int error;
3343
3344
V_pfsync_cloner = if_clone_simple(pfsyncname,
3345
pfsync_clone_create, pfsync_clone_destroy, 1);
3346
error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif,
3347
SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
3348
if (error) {
3349
if_clone_detach(V_pfsync_cloner);
3350
log(LOG_INFO, "swi_add() failed in %s\n", __func__);
3351
}
3352
3353
pfsync_pointers_init();
3354
}
3355
VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
3356
vnet_pfsync_init, NULL);
3357
3358
static void
3359
vnet_pfsync_uninit(const void *unused __unused)
3360
{
3361
int ret __diagused;
3362
3363
pfsync_pointers_uninit();
3364
3365
if_clone_detach(V_pfsync_cloner);
3366
ret = swi_remove(V_pfsync_swi_cookie);
3367
MPASS(ret == 0);
3368
ret = intr_event_destroy(V_pfsync_swi_ie);
3369
MPASS(ret == 0);
3370
}
3371
3372
VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
3373
vnet_pfsync_uninit, NULL);
3374
3375
static int
3376
pfsync_init(void)
3377
{
3378
int error;
3379
3380
pfsync_detach_ifnet_ptr = pfsync_detach_ifnet;
3381
3382
#ifdef INET
3383
error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL);
3384
if (error)
3385
return (error);
3386
#endif
3387
#ifdef INET6
3388
error = ip6proto_register(IPPROTO_PFSYNC, pfsync6_input, NULL);
3389
if (error) {
3390
ipproto_unregister(IPPROTO_PFSYNC);
3391
return (error);
3392
}
3393
#endif
3394
3395
return (0);
3396
}
3397
3398
static void
3399
pfsync_uninit(void)
3400
{
3401
pfsync_detach_ifnet_ptr = NULL;
3402
3403
#ifdef INET
3404
ipproto_unregister(IPPROTO_PFSYNC);
3405
#endif
3406
#ifdef INET6
3407
ip6proto_unregister(IPPROTO_PFSYNC);
3408
#endif
3409
}
3410
3411
static int
3412
pfsync_modevent(module_t mod, int type, void *data)
3413
{
3414
int error = 0;
3415
3416
switch (type) {
3417
case MOD_LOAD:
3418
error = pfsync_init();
3419
break;
3420
case MOD_UNLOAD:
3421
pfsync_uninit();
3422
break;
3423
default:
3424
error = EINVAL;
3425
break;
3426
}
3427
3428
return (error);
3429
}
3430
3431
static moduledata_t pfsync_mod = {
3432
pfsyncname,
3433
pfsync_modevent,
3434
0
3435
};
3436
3437
#define PFSYNC_MODVER 1
3438
3439
/* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */
3440
DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
3441
MODULE_VERSION(pfsync, PFSYNC_MODVER);
3442
MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
3443
3444