Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netpfil/pf/pf.c
39507 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2001 Daniel Hartmeier
5
* Copyright (c) 2002 - 2008 Henning Brauer
6
* Copyright (c) 2012 Gleb Smirnoff <[email protected]>
7
* All rights reserved.
8
*
9
* Redistribution and use in source and binary forms, with or without
10
* modification, are permitted provided that the following conditions
11
* are met:
12
*
13
* - Redistributions of source code must retain the above copyright
14
* notice, this list of conditions and the following disclaimer.
15
* - Redistributions in binary form must reproduce the above
16
* copyright notice, this list of conditions and the following
17
* disclaimer in the documentation and/or other materials provided
18
* with the distribution.
19
*
20
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31
* POSSIBILITY OF SUCH DAMAGE.
32
*
33
* Effort sponsored in part by the Defense Advanced Research Projects
34
* Agency (DARPA) and Air Force Research Laboratory, Air Force
35
* Materiel Command, USAF, under agreement number F30602-01-2-0537.
36
*
37
* $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
38
*/
39
40
#include <sys/cdefs.h>
41
#include "opt_bpf.h"
42
#include "opt_inet.h"
43
#include "opt_inet6.h"
44
#include "opt_pf.h"
45
#include "opt_sctp.h"
46
47
#include <sys/param.h>
48
#include <sys/bus.h>
49
#include <sys/endian.h>
50
#include <sys/gsb_crc32.h>
51
#include <sys/hash.h>
52
#include <sys/interrupt.h>
53
#include <sys/kernel.h>
54
#include <sys/kthread.h>
55
#include <sys/limits.h>
56
#include <sys/mbuf.h>
57
#include <sys/random.h>
58
#include <sys/refcount.h>
59
#include <sys/sdt.h>
60
#include <sys/socket.h>
61
#include <sys/sysctl.h>
62
#include <sys/taskqueue.h>
63
#include <sys/ucred.h>
64
65
#include <crypto/sha2/sha512.h>
66
67
#include <net/if.h>
68
#include <net/if_var.h>
69
#include <net/if_private.h>
70
#include <net/if_types.h>
71
#include <net/if_vlan_var.h>
72
#include <net/route.h>
73
#include <net/route/nhop.h>
74
#include <net/vnet.h>
75
76
#include <net/pfil.h>
77
#include <net/pfvar.h>
78
#include <net/if_pflog.h>
79
#include <net/if_pfsync.h>
80
81
#include <netinet/in_pcb.h>
82
#include <netinet/in_var.h>
83
#include <netinet/in_fib.h>
84
#include <netinet/ip.h>
85
#include <netinet/ip_fw.h>
86
#include <netinet/ip_icmp.h>
87
#include <netinet/icmp_var.h>
88
#include <netinet/ip_var.h>
89
#include <netinet/tcp.h>
90
#include <netinet/tcp_fsm.h>
91
#include <netinet/tcp_seq.h>
92
#include <netinet/tcp_timer.h>
93
#include <netinet/tcp_var.h>
94
#include <netinet/udp.h>
95
#include <netinet/udp_var.h>
96
97
/* dummynet */
98
#include <netinet/ip_dummynet.h>
99
#include <netinet/ip_fw.h>
100
#include <netpfil/ipfw/dn_heap.h>
101
#include <netpfil/ipfw/ip_fw_private.h>
102
#include <netpfil/ipfw/ip_dn_private.h>
103
104
#ifdef INET6
105
#include <netinet/ip6.h>
106
#include <netinet/icmp6.h>
107
#include <netinet6/nd6.h>
108
#include <netinet6/ip6_var.h>
109
#include <netinet6/in6_pcb.h>
110
#include <netinet6/in6_fib.h>
111
#include <netinet6/scope6_var.h>
112
#endif /* INET6 */
113
114
#include <netinet/sctp_header.h>
115
#include <netinet/sctp_crc32.h>
116
117
#include <netipsec/ah.h>
118
119
#include <machine/in_cksum.h>
120
#include <security/mac/mac_framework.h>
121
122
SDT_PROVIDER_DEFINE(pf);
123
SDT_PROBE_DEFINE2(pf, , test, reason_set, "int", "int");
124
SDT_PROBE_DEFINE4(pf, ip, test, done, "int", "int", "struct pf_krule *",
125
"struct pf_kstate *");
126
SDT_PROBE_DEFINE5(pf, ip, state, lookup, "struct pfi_kkif *",
127
"struct pf_state_key_cmp *", "int", "struct pf_pdesc *",
128
"struct pf_kstate *");
129
SDT_PROBE_DEFINE2(pf, ip, , bound_iface, "struct pf_kstate *",
130
"struct pfi_kkif *");
131
SDT_PROBE_DEFINE4(pf, ip, route_to, entry, "struct mbuf *",
132
"struct pf_pdesc *", "struct pf_kstate *", "struct ifnet *");
133
SDT_PROBE_DEFINE1(pf, ip, route_to, drop, "int");
134
SDT_PROBE_DEFINE2(pf, ip, route_to, output, "struct ifnet *", "int");
135
SDT_PROBE_DEFINE4(pf, ip6, route_to, entry, "struct mbuf *",
136
"struct pf_pdesc *", "struct pf_kstate *", "struct ifnet *");
137
SDT_PROBE_DEFINE1(pf, ip6, route_to, drop, "int");
138
SDT_PROBE_DEFINE2(pf, ip6, route_to, output, "struct ifnet *", "int");
139
SDT_PROBE_DEFINE4(pf, sctp, multihome, test, "struct pfi_kkif *",
140
"struct pf_krule *", "struct mbuf *", "int");
141
SDT_PROBE_DEFINE2(pf, sctp, multihome, add, "uint32_t",
142
"struct pf_sctp_source *");
143
SDT_PROBE_DEFINE3(pf, sctp, multihome, remove, "uint32_t",
144
"struct pf_kstate *", "struct pf_sctp_source *");
145
SDT_PROBE_DEFINE4(pf, sctp, multihome_scan, entry, "int",
146
"int", "struct pf_pdesc *", "int");
147
SDT_PROBE_DEFINE2(pf, sctp, multihome_scan, param, "uint16_t", "uint16_t");
148
SDT_PROBE_DEFINE2(pf, sctp, multihome_scan, ipv4, "struct in_addr *",
149
"int");
150
SDT_PROBE_DEFINE2(pf, sctp, multihome_scan, ipv6, "struct in_addr6 *",
151
"int");
152
153
SDT_PROBE_DEFINE3(pf, eth, test_rule, entry, "int", "struct ifnet *",
154
"struct mbuf *");
155
SDT_PROBE_DEFINE2(pf, eth, test_rule, test, "int", "struct pf_keth_rule *");
156
SDT_PROBE_DEFINE3(pf, eth, test_rule, mismatch,
157
"int", "struct pf_keth_rule *", "char *");
158
SDT_PROBE_DEFINE2(pf, eth, test_rule, match, "int", "struct pf_keth_rule *");
159
SDT_PROBE_DEFINE2(pf, eth, test_rule, final_match,
160
"int", "struct pf_keth_rule *");
161
SDT_PROBE_DEFINE2(pf, purge, state, rowcount, "int", "size_t");
162
SDT_PROBE_DEFINE2(pf, , log, log, "int", "const char *");
163
164
/*
165
* Global variables
166
*/
167
168
/* state tables */
169
VNET_DEFINE(struct pf_altqqueue, pf_altqs[4]);
170
VNET_DEFINE(struct pf_kpalist, pf_pabuf[3]);
171
VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active);
172
VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_active);
173
VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive);
174
VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_inactive);
175
VNET_DEFINE(struct pf_kstatus, pf_status);
176
177
VNET_DEFINE(u_int32_t, ticket_altqs_active);
178
VNET_DEFINE(u_int32_t, ticket_altqs_inactive);
179
VNET_DEFINE(int, altqs_inactive_open);
180
VNET_DEFINE(u_int32_t, ticket_pabuf);
181
182
static const int PF_HDR_LIMIT = 20; /* arbitrary limit */
183
184
VNET_DEFINE(SHA512_CTX, pf_tcp_secret_ctx);
185
#define V_pf_tcp_secret_ctx VNET(pf_tcp_secret_ctx)
186
VNET_DEFINE(u_char, pf_tcp_secret[16]);
187
#define V_pf_tcp_secret VNET(pf_tcp_secret)
188
VNET_DEFINE(int, pf_tcp_secret_init);
189
#define V_pf_tcp_secret_init VNET(pf_tcp_secret_init)
190
VNET_DEFINE(int, pf_tcp_iss_off);
191
#define V_pf_tcp_iss_off VNET(pf_tcp_iss_off)
192
VNET_DECLARE(int, pf_vnet_active);
193
#define V_pf_vnet_active VNET(pf_vnet_active)
194
195
VNET_DEFINE_STATIC(uint32_t, pf_purge_idx);
196
#define V_pf_purge_idx VNET(pf_purge_idx)
197
198
#ifdef PF_WANT_32_TO_64_COUNTER
199
VNET_DEFINE_STATIC(uint32_t, pf_counter_periodic_iter);
200
#define V_pf_counter_periodic_iter VNET(pf_counter_periodic_iter)
201
202
VNET_DEFINE(struct allrulelist_head, pf_allrulelist);
203
VNET_DEFINE(size_t, pf_allrulecount);
204
VNET_DEFINE(struct pf_krule *, pf_rulemarker);
205
#endif
206
207
#define PF_SCTP_MAX_ENDPOINTS 8
208
209
struct pf_sctp_endpoint;
210
RB_HEAD(pf_sctp_endpoints, pf_sctp_endpoint);
211
struct pf_sctp_source {
212
sa_family_t af;
213
struct pf_addr addr;
214
TAILQ_ENTRY(pf_sctp_source) entry;
215
};
216
TAILQ_HEAD(pf_sctp_sources, pf_sctp_source);
217
struct pf_sctp_endpoint
218
{
219
uint32_t v_tag;
220
struct pf_sctp_sources sources;
221
RB_ENTRY(pf_sctp_endpoint) entry;
222
};
223
static int
224
pf_sctp_endpoint_compare(struct pf_sctp_endpoint *a, struct pf_sctp_endpoint *b)
225
{
226
return (a->v_tag - b->v_tag);
227
}
228
RB_PROTOTYPE(pf_sctp_endpoints, pf_sctp_endpoint, entry, pf_sctp_endpoint_compare);
229
RB_GENERATE(pf_sctp_endpoints, pf_sctp_endpoint, entry, pf_sctp_endpoint_compare);
230
VNET_DEFINE_STATIC(struct pf_sctp_endpoints, pf_sctp_endpoints);
231
#define V_pf_sctp_endpoints VNET(pf_sctp_endpoints)
232
static struct mtx_padalign pf_sctp_endpoints_mtx;
233
MTX_SYSINIT(pf_sctp_endpoints_mtx, &pf_sctp_endpoints_mtx, "SCTP endpoints", MTX_DEF);
234
#define PF_SCTP_ENDPOINTS_LOCK() mtx_lock(&pf_sctp_endpoints_mtx)
235
#define PF_SCTP_ENDPOINTS_UNLOCK() mtx_unlock(&pf_sctp_endpoints_mtx)
236
237
/*
238
* Queue for pf_intr() sends.
239
*/
240
static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
241
struct pf_send_entry {
242
STAILQ_ENTRY(pf_send_entry) pfse_next;
243
struct mbuf *pfse_m;
244
enum {
245
PFSE_IP,
246
PFSE_IP6,
247
PFSE_ICMP,
248
PFSE_ICMP6,
249
} pfse_type;
250
struct {
251
int type;
252
int code;
253
int mtu;
254
} icmpopts;
255
};
256
257
STAILQ_HEAD(pf_send_head, pf_send_entry);
258
VNET_DEFINE_STATIC(struct pf_send_head, pf_sendqueue);
259
#define V_pf_sendqueue VNET(pf_sendqueue)
260
261
static struct mtx_padalign pf_sendqueue_mtx;
262
MTX_SYSINIT(pf_sendqueue_mtx, &pf_sendqueue_mtx, "pf send queue", MTX_DEF);
263
#define PF_SENDQ_LOCK() mtx_lock(&pf_sendqueue_mtx)
264
#define PF_SENDQ_UNLOCK() mtx_unlock(&pf_sendqueue_mtx)
265
266
/*
267
* Queue for pf_overload_task() tasks.
268
*/
269
struct pf_overload_entry {
270
SLIST_ENTRY(pf_overload_entry) next;
271
struct pf_addr addr;
272
sa_family_t af;
273
uint8_t dir;
274
struct pf_krule *rule;
275
};
276
277
SLIST_HEAD(pf_overload_head, pf_overload_entry);
278
VNET_DEFINE_STATIC(struct pf_overload_head, pf_overloadqueue);
279
#define V_pf_overloadqueue VNET(pf_overloadqueue)
280
VNET_DEFINE_STATIC(struct task, pf_overloadtask);
281
#define V_pf_overloadtask VNET(pf_overloadtask)
282
283
static struct mtx_padalign pf_overloadqueue_mtx;
284
MTX_SYSINIT(pf_overloadqueue_mtx, &pf_overloadqueue_mtx,
285
"pf overload/flush queue", MTX_DEF);
286
#define PF_OVERLOADQ_LOCK() mtx_lock(&pf_overloadqueue_mtx)
287
#define PF_OVERLOADQ_UNLOCK() mtx_unlock(&pf_overloadqueue_mtx)
288
289
VNET_DEFINE(struct pf_krulequeue, pf_unlinked_rules);
290
struct mtx_padalign pf_unlnkdrules_mtx;
291
MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules",
292
MTX_DEF);
293
294
struct sx pf_config_lock;
295
SX_SYSINIT(pf_config_lock, &pf_config_lock, "pf config");
296
297
struct mtx_padalign pf_table_stats_lock;
298
MTX_SYSINIT(pf_table_stats_lock, &pf_table_stats_lock, "pf table stats",
299
MTX_DEF);
300
301
VNET_DEFINE_STATIC(uma_zone_t, pf_sources_z);
302
#define V_pf_sources_z VNET(pf_sources_z)
303
uma_zone_t pf_mtag_z;
304
VNET_DEFINE(uma_zone_t, pf_state_z);
305
VNET_DEFINE(uma_zone_t, pf_state_key_z);
306
VNET_DEFINE(uma_zone_t, pf_udp_mapping_z);
307
308
VNET_DEFINE(struct unrhdr64, pf_stateid);
309
310
static void pf_src_tree_remove_state(struct pf_kstate *);
311
static int pf_check_threshold(struct pf_kthreshold *);
312
313
static void pf_change_ap(struct pf_pdesc *, struct pf_addr *, u_int16_t *,
314
struct pf_addr *, u_int16_t);
315
static int pf_modulate_sack(struct pf_pdesc *,
316
struct tcphdr *, struct pf_state_peer *);
317
int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *,
318
u_int16_t *, u_int16_t *);
319
static void pf_change_icmp(struct pf_addr *, u_int16_t *,
320
struct pf_addr *, struct pf_addr *, u_int16_t,
321
u_int16_t *, u_int16_t *, u_int16_t *,
322
u_int16_t *, u_int8_t, sa_family_t);
323
int pf_change_icmp_af(struct mbuf *, int,
324
struct pf_pdesc *, struct pf_pdesc *,
325
struct pf_addr *, struct pf_addr *, sa_family_t,
326
sa_family_t);
327
int pf_translate_icmp_af(int, void *);
328
static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
329
int, sa_family_t, struct pf_krule *, int);
330
static void pf_detach_state(struct pf_kstate *);
331
static int pf_state_key_attach(struct pf_state_key *,
332
struct pf_state_key *, struct pf_kstate *);
333
static void pf_state_key_detach(struct pf_kstate *, int);
334
static int pf_state_key_ctor(void *, int, void *, int);
335
static u_int32_t pf_tcp_iss(struct pf_pdesc *);
336
static __inline void pf_dummynet_flag_remove(struct mbuf *m,
337
struct pf_mtag *pf_mtag);
338
static int pf_dummynet(struct pf_pdesc *, struct pf_kstate *,
339
struct pf_krule *, struct mbuf **);
340
static int pf_dummynet_route(struct pf_pdesc *,
341
struct pf_kstate *, struct pf_krule *,
342
struct ifnet *, const struct sockaddr *, struct mbuf **);
343
static int pf_test_eth_rule(int, struct pfi_kkif *,
344
struct mbuf **);
345
static int pf_test_rule(struct pf_krule **, struct pf_kstate **,
346
struct pf_pdesc *, struct pf_krule **,
347
struct pf_kruleset **, u_short *, struct inpcb *,
348
struct pf_krule_slist *);
349
static int pf_create_state(struct pf_krule *,
350
struct pf_test_ctx *,
351
struct pf_kstate **, u_int16_t, u_int16_t,
352
struct pf_krule_slist *match_rules);
353
static int pf_state_key_addr_setup(struct pf_pdesc *,
354
struct pf_state_key_cmp *, int);
355
static int pf_tcp_track_full(struct pf_kstate *,
356
struct pf_pdesc *, u_short *, int *,
357
struct pf_state_peer *, struct pf_state_peer *,
358
u_int8_t, u_int8_t);
359
static int pf_tcp_track_sloppy(struct pf_kstate *,
360
struct pf_pdesc *, u_short *,
361
struct pf_state_peer *, struct pf_state_peer *,
362
u_int8_t, u_int8_t);
363
static int pf_test_state(struct pf_kstate **, struct pf_pdesc *,
364
u_short *);
365
int pf_icmp_state_lookup(struct pf_state_key_cmp *,
366
struct pf_pdesc *, struct pf_kstate **,
367
u_int16_t, u_int16_t, int, int *, int, int);
368
static int pf_test_state_icmp(struct pf_kstate **,
369
struct pf_pdesc *, u_short *);
370
static int pf_sctp_track(struct pf_kstate *, struct pf_pdesc *,
371
u_short *);
372
static void pf_sctp_multihome_detach_addr(const struct pf_kstate *);
373
static void pf_sctp_multihome_delayed(struct pf_pdesc *,
374
struct pfi_kkif *, struct pf_kstate *, int);
375
static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
376
int, u_int16_t);
377
static int pf_check_proto_cksum(struct mbuf *, int, int,
378
u_int8_t, sa_family_t);
379
static int pf_walk_option(struct pf_pdesc *, struct ip *,
380
int, int, u_short *);
381
static int pf_walk_header(struct pf_pdesc *, struct ip *, u_short *);
382
#ifdef INET6
383
static int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *,
384
int, int, u_short *);
385
static int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *,
386
u_short *);
387
#endif
388
static void pf_print_state_parts(struct pf_kstate *,
389
struct pf_state_key *, struct pf_state_key *);
390
static int pf_patch_8(struct pf_pdesc *, u_int8_t *, u_int8_t,
391
bool);
392
static int pf_find_state(struct pf_pdesc *,
393
const struct pf_state_key_cmp *, struct pf_kstate **);
394
static bool pf_src_connlimit(struct pf_kstate *);
395
static int pf_match_rcvif(struct mbuf *, struct pf_krule *);
396
static void pf_counters_inc(int, struct pf_pdesc *,
397
struct pf_kstate *, struct pf_krule *,
398
struct pf_krule *, struct pf_krule_slist *);
399
static void pf_log_matches(struct pf_pdesc *, struct pf_krule *,
400
struct pf_krule *, struct pf_kruleset *,
401
struct pf_krule_slist *);
402
static void pf_overload_task(void *v, int pending);
403
static u_short pf_insert_src_node(struct pf_ksrc_node *[PF_SN_MAX],
404
struct pf_srchash *[PF_SN_MAX], struct pf_krule *,
405
struct pf_addr *, sa_family_t, struct pf_addr *,
406
struct pfi_kkif *, sa_family_t, pf_sn_types_t);
407
static u_int pf_purge_expired_states(u_int, int);
408
static void pf_purge_unlinked_rules(void);
409
static int pf_mtag_uminit(void *, int, int);
410
static void pf_mtag_free(struct m_tag *);
411
static void pf_packet_rework_nat(struct pf_pdesc *, int,
412
struct pf_state_key *);
413
#ifdef INET
414
static int pf_route(struct pf_krule *,
415
struct ifnet *, struct pf_kstate *,
416
struct pf_pdesc *, struct inpcb *);
417
#endif /* INET */
418
#ifdef INET6
419
static void pf_change_a6(struct pf_addr *, u_int16_t *,
420
struct pf_addr *, u_int8_t);
421
static int pf_route6(struct pf_krule *,
422
struct ifnet *, struct pf_kstate *,
423
struct pf_pdesc *, struct inpcb *);
424
#endif /* INET6 */
425
static __inline void pf_set_protostate(struct pf_kstate *, int, u_int8_t);
426
427
int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
428
429
extern int pf_end_threads;
430
extern struct proc *pf_purge_proc;
431
432
VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
433
434
#define PACKET_UNDO_NAT(_pd, _off, _s) \
435
do { \
436
struct pf_state_key *nk; \
437
if ((pd->dir) == PF_OUT) \
438
nk = (_s)->key[PF_SK_STACK]; \
439
else \
440
nk = (_s)->key[PF_SK_WIRE]; \
441
pf_packet_rework_nat(_pd, _off, nk); \
442
} while (0)
443
444
#define PACKET_LOOPED(pd) ((pd)->pf_mtag && \
445
(pd)->pf_mtag->flags & PF_MTAG_FLAG_PACKET_LOOPED)
446
447
static struct pfi_kkif *
448
BOUND_IFACE(struct pf_kstate *st, struct pf_pdesc *pd)
449
{
450
struct pfi_kkif *k = pd->kif;
451
452
SDT_PROBE2(pf, ip, , bound_iface, st, k);
453
454
/* Floating unless otherwise specified. */
455
if (! (st->rule->rule_flag & PFRULE_IFBOUND))
456
return (V_pfi_all);
457
458
/*
459
* Initially set to all, because we don't know what interface we'll be
460
* sending this out when we create the state.
461
*/
462
if (st->rule->rt == PF_REPLYTO || (pd->af != pd->naf && st->direction == PF_IN))
463
return (V_pfi_all);
464
465
/*
466
* If this state is created based on another state (e.g. SCTP
467
* multihome) always set it floating initially. We can't know for sure
468
* what interface the actual traffic for this state will come in on.
469
*/
470
if (pd->related_rule)
471
return (V_pfi_all);
472
473
/* Don't overrule the interface for states created on incoming packets. */
474
if (st->direction == PF_IN)
475
return (k);
476
477
/* No route-to, so don't overrule. */
478
if (st->act.rt != PF_ROUTETO)
479
return (k);
480
481
/* Bind to the route-to interface. */
482
return (st->act.rt_kif);
483
}
484
485
#define STATE_INC_COUNTERS(s) \
486
do { \
487
struct pf_krule_item *mrm; \
488
counter_u64_add(s->rule->states_cur, 1); \
489
counter_u64_add(s->rule->states_tot, 1); \
490
if (s->anchor != NULL) { \
491
counter_u64_add(s->anchor->states_cur, 1); \
492
counter_u64_add(s->anchor->states_tot, 1); \
493
} \
494
if (s->nat_rule != NULL && s->nat_rule != s->rule) { \
495
counter_u64_add(s->nat_rule->states_cur, 1); \
496
counter_u64_add(s->nat_rule->states_tot, 1); \
497
} \
498
SLIST_FOREACH(mrm, &s->match_rules, entry) { \
499
if (s->nat_rule != mrm->r) { \
500
counter_u64_add(mrm->r->states_cur, 1); \
501
counter_u64_add(mrm->r->states_tot, 1); \
502
} \
503
} \
504
} while (0)
505
506
#define STATE_DEC_COUNTERS(s) \
507
do { \
508
struct pf_krule_item *mrm; \
509
counter_u64_add(s->rule->states_cur, -1); \
510
if (s->anchor != NULL) \
511
counter_u64_add(s->anchor->states_cur, -1); \
512
if (s->nat_rule != NULL && s->nat_rule != s->rule) \
513
counter_u64_add(s->nat_rule->states_cur, -1); \
514
SLIST_FOREACH(mrm, &s->match_rules, entry) \
515
if (s->nat_rule != mrm->r) { \
516
counter_u64_add(mrm->r->states_cur, -1);\
517
} \
518
} while (0)
519
520
MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
521
MALLOC_DEFINE(M_PF_RULE_ITEM, "pf_krule_item", "pf(4) rule items");
522
VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
523
VNET_DEFINE(struct pf_idhash *, pf_idhash);
524
VNET_DEFINE(struct pf_srchash *, pf_srchash);
525
VNET_DEFINE(struct pf_udpendpointhash *, pf_udpendpointhash);
526
VNET_DEFINE(struct pf_udpendpointmapping *, pf_udpendpointmapping);
527
528
SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
529
"pf(4)");
530
531
VNET_DEFINE(u_long, pf_hashmask);
532
VNET_DEFINE(u_long, pf_srchashmask);
533
VNET_DEFINE(u_long, pf_udpendpointhashmask);
534
VNET_DEFINE_STATIC(u_long, pf_hashsize);
535
#define V_pf_hashsize VNET(pf_hashsize)
536
VNET_DEFINE_STATIC(u_long, pf_srchashsize);
537
#define V_pf_srchashsize VNET(pf_srchashsize)
538
VNET_DEFINE_STATIC(u_long, pf_udpendpointhashsize);
539
#define V_pf_udpendpointhashsize VNET(pf_udpendpointhashsize)
540
u_long pf_ioctl_maxcount = 65535;
541
542
SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
543
&VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable");
544
SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
545
&VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable");
546
SYSCTL_ULONG(_net_pf, OID_AUTO, udpendpoint_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
547
&VNET_NAME(pf_udpendpointhashsize), 0, "Size of pf(4) endpoint hashtable");
548
SYSCTL_ULONG(_net_pf, OID_AUTO, request_maxcount, CTLFLAG_RWTUN,
549
&pf_ioctl_maxcount, 0, "Maximum number of tables, addresses, ... in a single ioctl() call");
550
551
VNET_DEFINE(void *, pf_swi_cookie);
552
VNET_DEFINE(struct intr_event *, pf_swi_ie);
553
554
VNET_DEFINE(uint32_t, pf_hashseed);
555
#define V_pf_hashseed VNET(pf_hashseed)
556
557
static void
558
pf_sctp_checksum(struct mbuf *m, int off)
559
{
560
uint32_t sum = 0;
561
562
/* Zero out the checksum, to enable recalculation. */
563
m_copyback(m, off + offsetof(struct sctphdr, checksum),
564
sizeof(sum), (caddr_t)&sum);
565
566
sum = sctp_calculate_cksum(m, off);
567
568
m_copyback(m, off + offsetof(struct sctphdr, checksum),
569
sizeof(sum), (caddr_t)&sum);
570
}
571
572
int
573
pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
574
{
575
576
switch (af) {
577
#ifdef INET
578
case AF_INET:
579
if (a->addr32[0] > b->addr32[0])
580
return (1);
581
if (a->addr32[0] < b->addr32[0])
582
return (-1);
583
break;
584
#endif /* INET */
585
#ifdef INET6
586
case AF_INET6:
587
if (a->addr32[3] > b->addr32[3])
588
return (1);
589
if (a->addr32[3] < b->addr32[3])
590
return (-1);
591
if (a->addr32[2] > b->addr32[2])
592
return (1);
593
if (a->addr32[2] < b->addr32[2])
594
return (-1);
595
if (a->addr32[1] > b->addr32[1])
596
return (1);
597
if (a->addr32[1] < b->addr32[1])
598
return (-1);
599
if (a->addr32[0] > b->addr32[0])
600
return (1);
601
if (a->addr32[0] < b->addr32[0])
602
return (-1);
603
break;
604
#endif /* INET6 */
605
default:
606
unhandled_af(af);
607
}
608
return (0);
609
}
610
611
static bool
612
pf_is_loopback(sa_family_t af, struct pf_addr *addr)
613
{
614
switch (af) {
615
#ifdef INET
616
case AF_INET:
617
return IN_LOOPBACK(ntohl(addr->v4.s_addr));
618
#endif /* INET */
619
case AF_INET6:
620
return IN6_IS_ADDR_LOOPBACK(&addr->v6);
621
default:
622
unhandled_af(af);
623
}
624
}
625
626
static void
627
pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk)
628
{
629
630
switch (pd->virtual_proto) {
631
case IPPROTO_TCP: {
632
struct tcphdr *th = &pd->hdr.tcp;
633
634
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
635
pf_change_ap(pd, pd->src, &th->th_sport,
636
&nk->addr[pd->sidx], nk->port[pd->sidx]);
637
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
638
pf_change_ap(pd, pd->dst, &th->th_dport,
639
&nk->addr[pd->didx], nk->port[pd->didx]);
640
m_copyback(pd->m, off, sizeof(*th), (caddr_t)th);
641
break;
642
}
643
case IPPROTO_UDP: {
644
struct udphdr *uh = &pd->hdr.udp;
645
646
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
647
pf_change_ap(pd, pd->src, &uh->uh_sport,
648
&nk->addr[pd->sidx], nk->port[pd->sidx]);
649
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
650
pf_change_ap(pd, pd->dst, &uh->uh_dport,
651
&nk->addr[pd->didx], nk->port[pd->didx]);
652
m_copyback(pd->m, off, sizeof(*uh), (caddr_t)uh);
653
break;
654
}
655
case IPPROTO_SCTP: {
656
struct sctphdr *sh = &pd->hdr.sctp;
657
658
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) {
659
pf_change_ap(pd, pd->src, &sh->src_port,
660
&nk->addr[pd->sidx], nk->port[pd->sidx]);
661
}
662
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) {
663
pf_change_ap(pd, pd->dst, &sh->dest_port,
664
&nk->addr[pd->didx], nk->port[pd->didx]);
665
}
666
667
break;
668
}
669
case IPPROTO_ICMP: {
670
struct icmp *ih = &pd->hdr.icmp;
671
672
if (nk->port[pd->sidx] != ih->icmp_id) {
673
pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
674
ih->icmp_cksum, ih->icmp_id,
675
nk->port[pd->sidx], 0);
676
ih->icmp_id = nk->port[pd->sidx];
677
pd->sport = &ih->icmp_id;
678
679
m_copyback(pd->m, off, ICMP_MINLEN, (caddr_t)ih);
680
}
681
/* FALLTHROUGH */
682
}
683
default:
684
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) {
685
switch (pd->af) {
686
case AF_INET:
687
pf_change_a(&pd->src->v4.s_addr,
688
pd->ip_sum, nk->addr[pd->sidx].v4.s_addr,
689
0);
690
break;
691
case AF_INET6:
692
pf_addrcpy(pd->src, &nk->addr[pd->sidx],
693
pd->af);
694
break;
695
default:
696
unhandled_af(pd->af);
697
}
698
}
699
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) {
700
switch (pd->af) {
701
case AF_INET:
702
pf_change_a(&pd->dst->v4.s_addr,
703
pd->ip_sum, nk->addr[pd->didx].v4.s_addr,
704
0);
705
break;
706
case AF_INET6:
707
pf_addrcpy(pd->dst, &nk->addr[pd->didx],
708
pd->af);
709
break;
710
default:
711
unhandled_af(pd->af);
712
}
713
}
714
break;
715
}
716
}
717
718
static __inline uint32_t
719
pf_hashkey(const struct pf_state_key *sk)
720
{
721
uint32_t h;
722
723
h = murmur3_32_hash32((const uint32_t *)sk,
724
sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
725
V_pf_hashseed);
726
727
return (h & V_pf_hashmask);
728
}
729
730
__inline uint32_t
731
pf_hashsrc(struct pf_addr *addr, sa_family_t af)
732
{
733
uint32_t h;
734
735
switch (af) {
736
case AF_INET:
737
h = murmur3_32_hash32((uint32_t *)&addr->v4,
738
sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
739
break;
740
case AF_INET6:
741
h = murmur3_32_hash32((uint32_t *)&addr->v6,
742
sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
743
break;
744
default:
745
unhandled_af(af);
746
}
747
748
return (h & V_pf_srchashmask);
749
}
750
751
static inline uint32_t
752
pf_hashudpendpoint(struct pf_udp_endpoint *endpoint)
753
{
754
uint32_t h;
755
756
h = murmur3_32_hash32((uint32_t *)endpoint,
757
sizeof(struct pf_udp_endpoint_cmp)/sizeof(uint32_t),
758
V_pf_hashseed);
759
return (h & V_pf_udpendpointhashmask);
760
}
761
762
#ifdef ALTQ
763
static int
764
pf_state_hash(struct pf_kstate *s)
765
{
766
u_int32_t hv = (intptr_t)s / sizeof(*s);
767
768
hv ^= crc32(&s->src, sizeof(s->src));
769
hv ^= crc32(&s->dst, sizeof(s->dst));
770
if (hv == 0)
771
hv = 1;
772
return (hv);
773
}
774
#endif /* ALTQ */
775
776
static __inline void
777
pf_set_protostate(struct pf_kstate *s, int which, u_int8_t newstate)
778
{
779
if (which == PF_PEER_DST || which == PF_PEER_BOTH)
780
s->dst.state = newstate;
781
if (which == PF_PEER_DST)
782
return;
783
if (s->src.state == newstate)
784
return;
785
if (s->creatorid == V_pf_status.hostid &&
786
s->key[PF_SK_STACK] != NULL &&
787
s->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
788
!(TCPS_HAVEESTABLISHED(s->src.state) ||
789
s->src.state == TCPS_CLOSED) &&
790
(TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED))
791
atomic_add_32(&V_pf_status.states_halfopen, -1);
792
793
s->src.state = newstate;
794
}
795
796
bool
797
pf_init_threshold(struct pf_kthreshold *threshold,
798
u_int32_t limit, u_int32_t seconds)
799
{
800
threshold->limit = limit;
801
threshold->seconds = seconds;
802
threshold->cr = counter_rate_alloc(M_NOWAIT, seconds);
803
804
return (threshold->cr != NULL);
805
}
806
807
static int
808
pf_check_threshold(struct pf_kthreshold *threshold)
809
{
810
return (counter_ratecheck(threshold->cr, threshold->limit) < 0);
811
}
812
813
static bool
814
pf_src_connlimit(struct pf_kstate *state)
815
{
816
struct pf_overload_entry *pfoe;
817
struct pf_ksrc_node *src_node = state->sns[PF_SN_LIMIT];
818
bool limited = false;
819
820
PF_STATE_LOCK_ASSERT(state);
821
PF_SRC_NODE_LOCK(src_node);
822
823
src_node->conn++;
824
state->src.tcp_est = 1;
825
826
if (state->rule->max_src_conn &&
827
state->rule->max_src_conn <
828
src_node->conn) {
829
counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1);
830
limited = true;
831
}
832
833
if (state->rule->max_src_conn_rate.limit &&
834
pf_check_threshold(&src_node->conn_rate)) {
835
counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1);
836
limited = true;
837
}
838
839
if (!limited)
840
goto done;
841
842
/* Kill this state. */
843
state->timeout = PFTM_PURGE;
844
pf_set_protostate(state, PF_PEER_BOTH, TCPS_CLOSED);
845
846
if (state->rule->overload_tbl == NULL)
847
goto done;
848
849
/* Schedule overloading and flushing task. */
850
pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
851
if (pfoe == NULL)
852
goto done; /* too bad :( */
853
854
bcopy(&src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
855
pfoe->af = state->key[PF_SK_WIRE]->af;
856
pfoe->rule = state->rule;
857
pfoe->dir = state->direction;
858
PF_OVERLOADQ_LOCK();
859
SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
860
PF_OVERLOADQ_UNLOCK();
861
taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
862
863
done:
864
PF_SRC_NODE_UNLOCK(src_node);
865
return (limited);
866
}
867
868
static void
869
pf_overload_task(void *v, int pending)
870
{
871
struct pf_overload_head queue;
872
struct pfr_addr p;
873
struct pf_overload_entry *pfoe, *pfoe1;
874
uint32_t killed = 0;
875
876
CURVNET_SET((struct vnet *)v);
877
878
PF_OVERLOADQ_LOCK();
879
queue = V_pf_overloadqueue;
880
SLIST_INIT(&V_pf_overloadqueue);
881
PF_OVERLOADQ_UNLOCK();
882
883
bzero(&p, sizeof(p));
884
SLIST_FOREACH(pfoe, &queue, next) {
885
counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1);
886
if (V_pf_status.debug >= PF_DEBUG_MISC) {
887
printf("%s: blocking address ", __func__);
888
pf_print_host(&pfoe->addr, 0, pfoe->af);
889
printf("\n");
890
}
891
892
p.pfra_af = pfoe->af;
893
switch (pfoe->af) {
894
#ifdef INET
895
case AF_INET:
896
p.pfra_net = 32;
897
p.pfra_ip4addr = pfoe->addr.v4;
898
break;
899
#endif /* INET */
900
#ifdef INET6
901
case AF_INET6:
902
p.pfra_net = 128;
903
p.pfra_ip6addr = pfoe->addr.v6;
904
break;
905
#endif /* INET6 */
906
default:
907
unhandled_af(pfoe->af);
908
}
909
910
PF_RULES_WLOCK();
911
pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
912
PF_RULES_WUNLOCK();
913
}
914
915
/*
916
* Remove those entries, that don't need flushing.
917
*/
918
SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
919
if (pfoe->rule->flush == 0) {
920
SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
921
free(pfoe, M_PFTEMP);
922
} else
923
counter_u64_add(
924
V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1);
925
926
/* If nothing to flush, return. */
927
if (SLIST_EMPTY(&queue)) {
928
CURVNET_RESTORE();
929
return;
930
}
931
932
for (int i = 0; i <= V_pf_hashmask; i++) {
933
struct pf_idhash *ih = &V_pf_idhash[i];
934
struct pf_state_key *sk;
935
struct pf_kstate *s;
936
937
PF_HASHROW_LOCK(ih);
938
LIST_FOREACH(s, &ih->states, entry) {
939
sk = s->key[PF_SK_WIRE];
940
SLIST_FOREACH(pfoe, &queue, next)
941
if (sk->af == pfoe->af &&
942
((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
943
pfoe->rule == s->rule) &&
944
((pfoe->dir == PF_OUT &&
945
PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
946
(pfoe->dir == PF_IN &&
947
PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
948
s->timeout = PFTM_PURGE;
949
pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED);
950
killed++;
951
}
952
}
953
PF_HASHROW_UNLOCK(ih);
954
}
955
SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
956
free(pfoe, M_PFTEMP);
957
if (V_pf_status.debug >= PF_DEBUG_MISC)
958
printf("%s: %u states killed", __func__, killed);
959
960
CURVNET_RESTORE();
961
}
962
963
/*
964
* On node found always returns locked. On not found its configurable.
965
*/
966
struct pf_ksrc_node *
967
pf_find_src_node(struct pf_addr *src, struct pf_krule *rule, sa_family_t af,
968
struct pf_srchash **sh, pf_sn_types_t sn_type, bool returnlocked)
969
{
970
struct pf_ksrc_node *n;
971
972
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
973
974
*sh = &V_pf_srchash[pf_hashsrc(src, af)];
975
PF_HASHROW_LOCK(*sh);
976
LIST_FOREACH(n, &(*sh)->nodes, entry)
977
if (n->rule == rule && n->af == af && n->type == sn_type &&
978
((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
979
(af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
980
break;
981
982
if (n == NULL && !returnlocked)
983
PF_HASHROW_UNLOCK(*sh);
984
985
return (n);
986
}
987
988
bool
989
pf_src_node_exists(struct pf_ksrc_node **sn, struct pf_srchash *sh)
990
{
991
struct pf_ksrc_node *cur;
992
993
if ((*sn) == NULL)
994
return (false);
995
996
KASSERT(sh != NULL, ("%s: sh is NULL", __func__));
997
998
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
999
PF_HASHROW_LOCK(sh);
1000
LIST_FOREACH(cur, &(sh->nodes), entry) {
1001
if (cur == (*sn) &&
1002
cur->expire != 1) /* Ignore nodes being killed */
1003
return (true);
1004
}
1005
PF_HASHROW_UNLOCK(sh);
1006
(*sn) = NULL;
1007
return (false);
1008
}
1009
1010
static void
1011
pf_free_src_node(struct pf_ksrc_node *sn)
1012
{
1013
1014
for (int i = 0; i < 2; i++) {
1015
counter_u64_free(sn->bytes[i]);
1016
counter_u64_free(sn->packets[i]);
1017
}
1018
counter_rate_free(sn->conn_rate.cr);
1019
uma_zfree(V_pf_sources_z, sn);
1020
}
1021
1022
static u_short
1023
pf_insert_src_node(struct pf_ksrc_node *sns[PF_SN_MAX],
1024
struct pf_srchash *snhs[PF_SN_MAX], struct pf_krule *rule,
1025
struct pf_addr *src, sa_family_t af, struct pf_addr *raddr,
1026
struct pfi_kkif *rkif, sa_family_t raf, pf_sn_types_t sn_type)
1027
{
1028
u_short reason = 0;
1029
struct pf_krule *r_track = rule;
1030
struct pf_ksrc_node **sn = &(sns[sn_type]);
1031
struct pf_srchash **sh = &(snhs[sn_type]);
1032
1033
KASSERT(sn_type != PF_SN_LIMIT || (raddr == NULL && rkif == NULL),
1034
("%s: raddr and rkif must be NULL for PF_SN_LIMIT", __func__));
1035
1036
KASSERT(sn_type != PF_SN_LIMIT || (rule->rule_flag & PFRULE_SRCTRACK),
1037
("%s: PF_SN_LIMIT only valid for rules with PFRULE_SRCTRACK", __func__));
1038
1039
/*
1040
* XXX: There could be a KASSERT for
1041
* sn_type == PF_SN_LIMIT || (pool->opts & PF_POOL_STICKYADDR)
1042
* but we'd need to pass pool *only* for this KASSERT.
1043
*/
1044
1045
if ( (rule->rule_flag & PFRULE_SRCTRACK) &&
1046
!(rule->rule_flag & PFRULE_RULESRCTRACK))
1047
r_track = &V_pf_default_rule;
1048
1049
/*
1050
* Request the sh to always be locked, as we might insert a new sn.
1051
*/
1052
if (*sn == NULL)
1053
*sn = pf_find_src_node(src, r_track, af, sh, sn_type, true);
1054
1055
if (*sn == NULL) {
1056
PF_HASHROW_ASSERT(*sh);
1057
1058
if (sn_type == PF_SN_LIMIT && rule->max_src_nodes &&
1059
counter_u64_fetch(r_track->src_nodes[sn_type]) >= rule->max_src_nodes) {
1060
counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], 1);
1061
reason = PFRES_SRCLIMIT;
1062
goto done;
1063
}
1064
1065
(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
1066
if ((*sn) == NULL) {
1067
reason = PFRES_MEMORY;
1068
goto done;
1069
}
1070
1071
for (int i = 0; i < 2; i++) {
1072
(*sn)->bytes[i] = counter_u64_alloc(M_NOWAIT);
1073
(*sn)->packets[i] = counter_u64_alloc(M_NOWAIT);
1074
1075
if ((*sn)->bytes[i] == NULL || (*sn)->packets[i] == NULL) {
1076
pf_free_src_node(*sn);
1077
reason = PFRES_MEMORY;
1078
goto done;
1079
}
1080
}
1081
1082
if (sn_type == PF_SN_LIMIT)
1083
if (! pf_init_threshold(&(*sn)->conn_rate,
1084
rule->max_src_conn_rate.limit,
1085
rule->max_src_conn_rate.seconds)) {
1086
pf_free_src_node(*sn);
1087
reason = PFRES_MEMORY;
1088
goto done;
1089
}
1090
1091
MPASS((*sn)->lock == NULL);
1092
(*sn)->lock = &(*sh)->lock;
1093
1094
(*sn)->af = af;
1095
(*sn)->rule = r_track;
1096
pf_addrcpy(&(*sn)->addr, src, af);
1097
if (raddr != NULL)
1098
pf_addrcpy(&(*sn)->raddr, raddr, raf);
1099
(*sn)->rkif = rkif;
1100
(*sn)->raf = raf;
1101
LIST_INSERT_HEAD(&(*sh)->nodes, *sn, entry);
1102
(*sn)->creation = time_uptime;
1103
(*sn)->ruletype = rule->action;
1104
(*sn)->type = sn_type;
1105
counter_u64_add(r_track->src_nodes[sn_type], 1);
1106
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1);
1107
} else {
1108
if (sn_type == PF_SN_LIMIT && rule->max_src_states &&
1109
(*sn)->states >= rule->max_src_states) {
1110
counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES],
1111
1);
1112
reason = PFRES_SRCLIMIT;
1113
goto done;
1114
}
1115
}
1116
done:
1117
if (reason == 0)
1118
(*sn)->states++;
1119
else
1120
(*sn) = NULL;
1121
1122
PF_HASHROW_UNLOCK(*sh);
1123
return (reason);
1124
}
1125
1126
void
1127
pf_unlink_src_node(struct pf_ksrc_node *src)
1128
{
1129
PF_SRC_NODE_LOCK_ASSERT(src);
1130
1131
LIST_REMOVE(src, entry);
1132
if (src->rule)
1133
counter_u64_add(src->rule->src_nodes[src->type], -1);
1134
}
1135
1136
u_int
1137
pf_free_src_nodes(struct pf_ksrc_node_list *head)
1138
{
1139
struct pf_ksrc_node *sn, *tmp;
1140
u_int count = 0;
1141
1142
LIST_FOREACH_SAFE(sn, head, entry, tmp) {
1143
pf_free_src_node(sn);
1144
count++;
1145
}
1146
1147
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count);
1148
1149
return (count);
1150
}
1151
1152
void
1153
pf_mtag_initialize(void)
1154
{
1155
1156
pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
1157
sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL,
1158
UMA_ALIGN_PTR, 0);
1159
}
1160
1161
/* Per-vnet data storage structures initialization. */
1162
void
1163
pf_initialize(void)
1164
{
1165
struct pf_keyhash *kh;
1166
struct pf_idhash *ih;
1167
struct pf_srchash *sh;
1168
struct pf_udpendpointhash *uh;
1169
u_int i;
1170
1171
if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize))
1172
V_pf_hashsize = PF_HASHSIZ;
1173
if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize))
1174
V_pf_srchashsize = PF_SRCHASHSIZ;
1175
if (V_pf_udpendpointhashsize == 0 || !powerof2(V_pf_udpendpointhashsize))
1176
V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ;
1177
1178
V_pf_hashseed = arc4random();
1179
1180
/* States and state keys storage. */
1181
V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_kstate),
1182
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1183
V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
1184
uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
1185
uma_zone_set_warning(V_pf_state_z, "PF states limit reached");
1186
1187
V_pf_state_key_z = uma_zcreate("pf state keys",
1188
sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
1189
UMA_ALIGN_PTR, 0);
1190
1191
V_pf_keyhash = mallocarray(V_pf_hashsize, sizeof(struct pf_keyhash),
1192
M_PFHASH, M_NOWAIT | M_ZERO);
1193
V_pf_idhash = mallocarray(V_pf_hashsize, sizeof(struct pf_idhash),
1194
M_PFHASH, M_NOWAIT | M_ZERO);
1195
if (V_pf_keyhash == NULL || V_pf_idhash == NULL) {
1196
printf("pf: Unable to allocate memory for "
1197
"state_hashsize %lu.\n", V_pf_hashsize);
1198
1199
free(V_pf_keyhash, M_PFHASH);
1200
free(V_pf_idhash, M_PFHASH);
1201
1202
V_pf_hashsize = PF_HASHSIZ;
1203
V_pf_keyhash = mallocarray(V_pf_hashsize,
1204
sizeof(struct pf_keyhash), M_PFHASH, M_WAITOK | M_ZERO);
1205
V_pf_idhash = mallocarray(V_pf_hashsize,
1206
sizeof(struct pf_idhash), M_PFHASH, M_WAITOK | M_ZERO);
1207
}
1208
1209
V_pf_hashmask = V_pf_hashsize - 1;
1210
for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
1211
i++, kh++, ih++) {
1212
mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
1213
mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
1214
}
1215
1216
/* Source nodes. */
1217
V_pf_sources_z = uma_zcreate("pf source nodes",
1218
sizeof(struct pf_ksrc_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1219
0);
1220
V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
1221
uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
1222
uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
1223
1224
V_pf_srchash = mallocarray(V_pf_srchashsize,
1225
sizeof(struct pf_srchash), M_PFHASH, M_NOWAIT | M_ZERO);
1226
if (V_pf_srchash == NULL) {
1227
printf("pf: Unable to allocate memory for "
1228
"source_hashsize %lu.\n", V_pf_srchashsize);
1229
1230
V_pf_srchashsize = PF_SRCHASHSIZ;
1231
V_pf_srchash = mallocarray(V_pf_srchashsize,
1232
sizeof(struct pf_srchash), M_PFHASH, M_WAITOK | M_ZERO);
1233
}
1234
1235
V_pf_srchashmask = V_pf_srchashsize - 1;
1236
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++)
1237
mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
1238
1239
1240
/* UDP endpoint mappings. */
1241
V_pf_udp_mapping_z = uma_zcreate("pf UDP mappings",
1242
sizeof(struct pf_udp_mapping), NULL, NULL, NULL, NULL,
1243
UMA_ALIGN_PTR, 0);
1244
V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize,
1245
sizeof(struct pf_udpendpointhash), M_PFHASH, M_NOWAIT | M_ZERO);
1246
if (V_pf_udpendpointhash == NULL) {
1247
printf("pf: Unable to allocate memory for "
1248
"udpendpoint_hashsize %lu.\n", V_pf_udpendpointhashsize);
1249
1250
V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ;
1251
V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize,
1252
sizeof(struct pf_udpendpointhash), M_PFHASH, M_WAITOK | M_ZERO);
1253
}
1254
1255
V_pf_udpendpointhashmask = V_pf_udpendpointhashsize - 1;
1256
for (i = 0, uh = V_pf_udpendpointhash;
1257
i <= V_pf_udpendpointhashmask;
1258
i++, uh++) {
1259
mtx_init(&uh->lock, "pf_udpendpointhash", NULL,
1260
MTX_DEF | MTX_DUPOK);
1261
}
1262
1263
/* Anchors */
1264
V_pf_anchor_z = uma_zcreate("pf anchors",
1265
sizeof(struct pf_kanchor), NULL, NULL, NULL, NULL,
1266
UMA_ALIGN_PTR, 0);
1267
V_pf_limits[PF_LIMIT_ANCHORS].zone = V_pf_anchor_z;
1268
uma_zone_set_max(V_pf_anchor_z, PF_ANCHOR_HIWAT);
1269
uma_zone_set_warning(V_pf_anchor_z, "PF anchor limit reached");
1270
1271
V_pf_eth_anchor_z = uma_zcreate("pf Ethernet anchors",
1272
sizeof(struct pf_keth_anchor), NULL, NULL, NULL, NULL,
1273
UMA_ALIGN_PTR, 0);
1274
V_pf_limits[PF_LIMIT_ETH_ANCHORS].zone = V_pf_eth_anchor_z;
1275
uma_zone_set_max(V_pf_eth_anchor_z, PF_ANCHOR_HIWAT);
1276
uma_zone_set_warning(V_pf_eth_anchor_z, "PF Ethernet anchor limit reached");
1277
1278
/* ALTQ */
1279
TAILQ_INIT(&V_pf_altqs[0]);
1280
TAILQ_INIT(&V_pf_altqs[1]);
1281
TAILQ_INIT(&V_pf_altqs[2]);
1282
TAILQ_INIT(&V_pf_altqs[3]);
1283
TAILQ_INIT(&V_pf_pabuf[0]);
1284
TAILQ_INIT(&V_pf_pabuf[1]);
1285
TAILQ_INIT(&V_pf_pabuf[2]);
1286
V_pf_altqs_active = &V_pf_altqs[0];
1287
V_pf_altq_ifs_active = &V_pf_altqs[1];
1288
V_pf_altqs_inactive = &V_pf_altqs[2];
1289
V_pf_altq_ifs_inactive = &V_pf_altqs[3];
1290
1291
/* Send & overload+flush queues. */
1292
STAILQ_INIT(&V_pf_sendqueue);
1293
SLIST_INIT(&V_pf_overloadqueue);
1294
TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet);
1295
1296
/* Unlinked, but may be referenced rules. */
1297
TAILQ_INIT(&V_pf_unlinked_rules);
1298
}
1299
1300
void
1301
pf_mtag_cleanup(void)
1302
{
1303
1304
uma_zdestroy(pf_mtag_z);
1305
}
1306
1307
void
1308
pf_cleanup(void)
1309
{
1310
struct pf_keyhash *kh;
1311
struct pf_idhash *ih;
1312
struct pf_srchash *sh;
1313
struct pf_udpendpointhash *uh;
1314
struct pf_send_entry *pfse, *next;
1315
u_int i;
1316
1317
for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash;
1318
i <= V_pf_hashmask;
1319
i++, kh++, ih++) {
1320
KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
1321
__func__));
1322
KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
1323
__func__));
1324
mtx_destroy(&kh->lock);
1325
mtx_destroy(&ih->lock);
1326
}
1327
free(V_pf_keyhash, M_PFHASH);
1328
free(V_pf_idhash, M_PFHASH);
1329
1330
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
1331
KASSERT(LIST_EMPTY(&sh->nodes),
1332
("%s: source node hash not empty", __func__));
1333
mtx_destroy(&sh->lock);
1334
}
1335
free(V_pf_srchash, M_PFHASH);
1336
1337
for (i = 0, uh = V_pf_udpendpointhash;
1338
i <= V_pf_udpendpointhashmask;
1339
i++, uh++) {
1340
KASSERT(LIST_EMPTY(&uh->endpoints),
1341
("%s: udp endpoint hash not empty", __func__));
1342
mtx_destroy(&uh->lock);
1343
}
1344
free(V_pf_udpendpointhash, M_PFHASH);
1345
1346
STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
1347
m_freem(pfse->pfse_m);
1348
free(pfse, M_PFTEMP);
1349
}
1350
MPASS(RB_EMPTY(&V_pf_sctp_endpoints));
1351
1352
uma_zdestroy(V_pf_sources_z);
1353
uma_zdestroy(V_pf_state_z);
1354
uma_zdestroy(V_pf_state_key_z);
1355
uma_zdestroy(V_pf_udp_mapping_z);
1356
uma_zdestroy(V_pf_anchor_z);
1357
uma_zdestroy(V_pf_eth_anchor_z);
1358
}
1359
1360
static int
1361
pf_mtag_uminit(void *mem, int size, int how)
1362
{
1363
struct m_tag *t;
1364
1365
t = (struct m_tag *)mem;
1366
t->m_tag_cookie = MTAG_ABI_COMPAT;
1367
t->m_tag_id = PACKET_TAG_PF;
1368
t->m_tag_len = sizeof(struct pf_mtag);
1369
t->m_tag_free = pf_mtag_free;
1370
1371
return (0);
1372
}
1373
1374
static void
1375
pf_mtag_free(struct m_tag *t)
1376
{
1377
1378
uma_zfree(pf_mtag_z, t);
1379
}
1380
1381
struct pf_mtag *
1382
pf_get_mtag(struct mbuf *m)
1383
{
1384
struct m_tag *mtag;
1385
1386
if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
1387
return ((struct pf_mtag *)(mtag + 1));
1388
1389
mtag = uma_zalloc(pf_mtag_z, M_NOWAIT);
1390
if (mtag == NULL)
1391
return (NULL);
1392
bzero(mtag + 1, sizeof(struct pf_mtag));
1393
m_tag_prepend(m, mtag);
1394
1395
return ((struct pf_mtag *)(mtag + 1));
1396
}
1397
1398
static int
1399
pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
1400
struct pf_kstate *s)
1401
{
1402
struct pf_keyhash *khs, *khw, *kh;
1403
struct pf_state_key *sk, *cur;
1404
struct pf_kstate *si, *olds = NULL;
1405
int idx;
1406
1407
NET_EPOCH_ASSERT();
1408
KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
1409
KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
1410
KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
1411
1412
/*
1413
* We need to lock hash slots of both keys. To avoid deadlock
1414
* we always lock the slot with lower address first. Unlock order
1415
* isn't important.
1416
*
1417
* We also need to lock ID hash slot before dropping key
1418
* locks. On success we return with ID hash slot locked.
1419
*/
1420
1421
if (skw == sks) {
1422
khs = khw = &V_pf_keyhash[pf_hashkey(skw)];
1423
PF_HASHROW_LOCK(khs);
1424
} else {
1425
khs = &V_pf_keyhash[pf_hashkey(sks)];
1426
khw = &V_pf_keyhash[pf_hashkey(skw)];
1427
if (khs == khw) {
1428
PF_HASHROW_LOCK(khs);
1429
} else if (khs < khw) {
1430
PF_HASHROW_LOCK(khs);
1431
PF_HASHROW_LOCK(khw);
1432
} else {
1433
PF_HASHROW_LOCK(khw);
1434
PF_HASHROW_LOCK(khs);
1435
}
1436
}
1437
1438
#define KEYS_UNLOCK() do { \
1439
if (khs != khw) { \
1440
PF_HASHROW_UNLOCK(khs); \
1441
PF_HASHROW_UNLOCK(khw); \
1442
} else \
1443
PF_HASHROW_UNLOCK(khs); \
1444
} while (0)
1445
1446
/*
1447
* First run: start with wire key.
1448
*/
1449
sk = skw;
1450
kh = khw;
1451
idx = PF_SK_WIRE;
1452
1453
MPASS(s->lock == NULL);
1454
s->lock = &V_pf_idhash[PF_IDHASH(s)].lock;
1455
1456
keyattach:
1457
LIST_FOREACH(cur, &kh->keys, entry)
1458
if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0)
1459
break;
1460
1461
if (cur != NULL) {
1462
/* Key exists. Check for same kif, if none, add to key. */
1463
TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
1464
struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
1465
1466
PF_HASHROW_LOCK(ih);
1467
if (si->kif == s->kif &&
1468
((si->key[PF_SK_WIRE]->af == sk->af &&
1469
si->direction == s->direction) ||
1470
(si->key[PF_SK_WIRE]->af !=
1471
si->key[PF_SK_STACK]->af &&
1472
sk->af == si->key[PF_SK_STACK]->af &&
1473
si->direction != s->direction))) {
1474
bool reuse = false;
1475
1476
if (sk->proto == IPPROTO_TCP &&
1477
si->src.state >= TCPS_FIN_WAIT_2 &&
1478
si->dst.state >= TCPS_FIN_WAIT_2)
1479
reuse = true;
1480
1481
if (V_pf_status.debug >= PF_DEBUG_MISC) {
1482
printf("pf: %s key attach "
1483
"%s on %s: ",
1484
(idx == PF_SK_WIRE) ?
1485
"wire" : "stack",
1486
reuse ? "reuse" : "failed",
1487
s->kif->pfik_name);
1488
pf_print_state_parts(s,
1489
(idx == PF_SK_WIRE) ?
1490
sk : NULL,
1491
(idx == PF_SK_STACK) ?
1492
sk : NULL);
1493
printf(", existing: ");
1494
pf_print_state_parts(si,
1495
(idx == PF_SK_WIRE) ?
1496
sk : NULL,
1497
(idx == PF_SK_STACK) ?
1498
sk : NULL);
1499
printf("\n");
1500
}
1501
1502
if (reuse) {
1503
/*
1504
* New state matches an old >FIN_WAIT_2
1505
* state. We can't drop key hash locks,
1506
* thus we can't unlink it properly.
1507
*
1508
* As a workaround we drop it into
1509
* TCPS_CLOSED state, schedule purge
1510
* ASAP and push it into the very end
1511
* of the slot TAILQ, so that it won't
1512
* conflict with our new state.
1513
*/
1514
pf_set_protostate(si, PF_PEER_BOTH,
1515
TCPS_CLOSED);
1516
si->timeout = PFTM_PURGE;
1517
olds = si;
1518
} else {
1519
s->timeout = PFTM_UNLINKED;
1520
if (idx == PF_SK_STACK)
1521
/*
1522
* Remove the wire key from
1523
* the hash. Other threads
1524
* can't be referencing it
1525
* because we still hold the
1526
* hash lock.
1527
*/
1528
pf_state_key_detach(s,
1529
PF_SK_WIRE);
1530
PF_HASHROW_UNLOCK(ih);
1531
KEYS_UNLOCK();
1532
if (idx == PF_SK_WIRE)
1533
/*
1534
* We've not inserted either key.
1535
* Free both.
1536
*/
1537
uma_zfree(V_pf_state_key_z, skw);
1538
if (skw != sks)
1539
uma_zfree(
1540
V_pf_state_key_z,
1541
sks);
1542
return (EEXIST); /* collision! */
1543
}
1544
}
1545
PF_HASHROW_UNLOCK(ih);
1546
}
1547
uma_zfree(V_pf_state_key_z, sk);
1548
s->key[idx] = cur;
1549
} else {
1550
LIST_INSERT_HEAD(&kh->keys, sk, entry);
1551
s->key[idx] = sk;
1552
}
1553
1554
stateattach:
1555
/* List is sorted, if-bound states before floating. */
1556
if (s->kif == V_pfi_all)
1557
TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
1558
else
1559
TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
1560
1561
if (olds) {
1562
TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]);
1563
TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds,
1564
key_list[idx]);
1565
olds = NULL;
1566
}
1567
1568
/*
1569
* Attach done. See how should we (or should not?)
1570
* attach a second key.
1571
*/
1572
if (sks == skw) {
1573
s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
1574
idx = PF_SK_STACK;
1575
sks = NULL;
1576
goto stateattach;
1577
} else if (sks != NULL) {
1578
/*
1579
* Continue attaching with stack key.
1580
*/
1581
sk = sks;
1582
kh = khs;
1583
idx = PF_SK_STACK;
1584
sks = NULL;
1585
goto keyattach;
1586
}
1587
1588
PF_STATE_LOCK(s);
1589
KEYS_UNLOCK();
1590
1591
KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
1592
("%s failure", __func__));
1593
1594
return (0);
1595
#undef KEYS_UNLOCK
1596
}
1597
1598
static void
1599
pf_detach_state(struct pf_kstate *s)
1600
{
1601
struct pf_state_key *sks = s->key[PF_SK_STACK];
1602
struct pf_keyhash *kh;
1603
1604
NET_EPOCH_ASSERT();
1605
MPASS(s->timeout >= PFTM_MAX);
1606
1607
pf_sctp_multihome_detach_addr(s);
1608
1609
if ((s->state_flags & PFSTATE_PFLOW) && V_pflow_export_state_ptr)
1610
V_pflow_export_state_ptr(s);
1611
1612
if (sks != NULL) {
1613
kh = &V_pf_keyhash[pf_hashkey(sks)];
1614
PF_HASHROW_LOCK(kh);
1615
if (s->key[PF_SK_STACK] != NULL)
1616
pf_state_key_detach(s, PF_SK_STACK);
1617
/*
1618
* If both point to same key, then we are done.
1619
*/
1620
if (sks == s->key[PF_SK_WIRE]) {
1621
pf_state_key_detach(s, PF_SK_WIRE);
1622
PF_HASHROW_UNLOCK(kh);
1623
return;
1624
}
1625
PF_HASHROW_UNLOCK(kh);
1626
}
1627
1628
if (s->key[PF_SK_WIRE] != NULL) {
1629
kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
1630
PF_HASHROW_LOCK(kh);
1631
if (s->key[PF_SK_WIRE] != NULL)
1632
pf_state_key_detach(s, PF_SK_WIRE);
1633
PF_HASHROW_UNLOCK(kh);
1634
}
1635
}
1636
1637
static void
1638
pf_state_key_detach(struct pf_kstate *s, int idx)
1639
{
1640
struct pf_state_key *sk = s->key[idx];
1641
#ifdef INVARIANTS
1642
struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
1643
1644
PF_HASHROW_ASSERT(kh);
1645
#endif /* INVARIANTS */
1646
TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
1647
s->key[idx] = NULL;
1648
1649
if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
1650
LIST_REMOVE(sk, entry);
1651
uma_zfree(V_pf_state_key_z, sk);
1652
}
1653
}
1654
1655
static int
1656
pf_state_key_ctor(void *mem, int size, void *arg, int flags)
1657
{
1658
struct pf_state_key *sk = mem;
1659
1660
bzero(sk, sizeof(struct pf_state_key_cmp));
1661
TAILQ_INIT(&sk->states[PF_SK_WIRE]);
1662
TAILQ_INIT(&sk->states[PF_SK_STACK]);
1663
1664
return (0);
1665
}
1666
1667
static int
1668
pf_state_key_addr_setup(struct pf_pdesc *pd,
1669
struct pf_state_key_cmp *key, int multi)
1670
{
1671
struct pf_addr *saddr = pd->src;
1672
struct pf_addr *daddr = pd->dst;
1673
#ifdef INET6
1674
struct nd_neighbor_solicit nd;
1675
struct pf_addr *target;
1676
1677
if (pd->af == AF_INET || pd->proto != IPPROTO_ICMPV6)
1678
goto copy;
1679
1680
switch (pd->hdr.icmp6.icmp6_type) {
1681
case ND_NEIGHBOR_SOLICIT:
1682
if (multi)
1683
return (-1);
1684
if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL,
1685
pd->af))
1686
return (-1);
1687
target = (struct pf_addr *)&nd.nd_ns_target;
1688
daddr = target;
1689
break;
1690
case ND_NEIGHBOR_ADVERT:
1691
if (multi)
1692
return (-1);
1693
if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL,
1694
pd->af))
1695
return (-1);
1696
target = (struct pf_addr *)&nd.nd_ns_target;
1697
saddr = target;
1698
if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) {
1699
key->addr[pd->didx].addr32[0] = 0;
1700
key->addr[pd->didx].addr32[1] = 0;
1701
key->addr[pd->didx].addr32[2] = 0;
1702
key->addr[pd->didx].addr32[3] = 0;
1703
daddr = NULL; /* overwritten */
1704
}
1705
break;
1706
default:
1707
if (multi) {
1708
key->addr[pd->sidx].addr32[0] = IPV6_ADDR_INT32_MLL;
1709
key->addr[pd->sidx].addr32[1] = 0;
1710
key->addr[pd->sidx].addr32[2] = 0;
1711
key->addr[pd->sidx].addr32[3] = IPV6_ADDR_INT32_ONE;
1712
saddr = NULL; /* overwritten */
1713
}
1714
}
1715
copy:
1716
#endif /* INET6 */
1717
if (saddr)
1718
pf_addrcpy(&key->addr[pd->sidx], saddr, pd->af);
1719
if (daddr)
1720
pf_addrcpy(&key->addr[pd->didx], daddr, pd->af);
1721
1722
return (0);
1723
}
1724
1725
int
1726
pf_state_key_setup(struct pf_pdesc *pd, u_int16_t sport, u_int16_t dport,
1727
struct pf_state_key **sk, struct pf_state_key **nk)
1728
{
1729
*sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
1730
if (*sk == NULL)
1731
return (ENOMEM);
1732
1733
if (pf_state_key_addr_setup(pd, (struct pf_state_key_cmp *)*sk,
1734
0)) {
1735
uma_zfree(V_pf_state_key_z, *sk);
1736
*sk = NULL;
1737
return (ENOMEM);
1738
}
1739
1740
(*sk)->port[pd->sidx] = sport;
1741
(*sk)->port[pd->didx] = dport;
1742
(*sk)->proto = pd->proto;
1743
(*sk)->af = pd->af;
1744
1745
*nk = pf_state_key_clone(*sk);
1746
if (*nk == NULL) {
1747
uma_zfree(V_pf_state_key_z, *sk);
1748
*sk = NULL;
1749
return (ENOMEM);
1750
}
1751
1752
if (pd->af != pd->naf) {
1753
(*sk)->port[pd->sidx] = pd->osport;
1754
(*sk)->port[pd->didx] = pd->odport;
1755
1756
(*nk)->af = pd->naf;
1757
1758
/*
1759
* We're overwriting an address here, so potentially there's bits of an IPv6
1760
* address left in here. Clear that out first.
1761
*/
1762
bzero(&(*nk)->addr[0], sizeof((*nk)->addr[0]));
1763
bzero(&(*nk)->addr[1], sizeof((*nk)->addr[1]));
1764
if (pd->dir == PF_IN) {
1765
pf_addrcpy(&(*nk)->addr[pd->didx], &pd->nsaddr,
1766
pd->naf);
1767
pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->ndaddr,
1768
pd->naf);
1769
(*nk)->port[pd->didx] = pd->nsport;
1770
(*nk)->port[pd->sidx] = pd->ndport;
1771
} else {
1772
pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->nsaddr,
1773
pd->naf);
1774
pf_addrcpy(&(*nk)->addr[pd->didx], &pd->ndaddr,
1775
pd->naf);
1776
(*nk)->port[pd->sidx] = pd->nsport;
1777
(*nk)->port[pd->didx] = pd->ndport;
1778
}
1779
1780
switch (pd->proto) {
1781
case IPPROTO_ICMP:
1782
(*nk)->proto = IPPROTO_ICMPV6;
1783
break;
1784
case IPPROTO_ICMPV6:
1785
(*nk)->proto = IPPROTO_ICMP;
1786
break;
1787
default:
1788
(*nk)->proto = pd->proto;
1789
}
1790
}
1791
1792
return (0);
1793
}
1794
1795
struct pf_state_key *
1796
pf_state_key_clone(const struct pf_state_key *orig)
1797
{
1798
struct pf_state_key *sk;
1799
1800
sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
1801
if (sk == NULL)
1802
return (NULL);
1803
1804
bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
1805
1806
return (sk);
1807
}
1808
1809
int
1810
pf_state_insert(struct pfi_kkif *kif, struct pfi_kkif *orig_kif,
1811
struct pf_state_key *skw, struct pf_state_key *sks, struct pf_kstate *s)
1812
{
1813
struct pf_idhash *ih;
1814
struct pf_kstate *cur;
1815
int error;
1816
1817
NET_EPOCH_ASSERT();
1818
1819
KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
1820
("%s: sks not pristine", __func__));
1821
KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
1822
("%s: skw not pristine", __func__));
1823
KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
1824
1825
s->kif = kif;
1826
s->orig_kif = orig_kif;
1827
1828
if (s->id == 0 && s->creatorid == 0) {
1829
s->id = alloc_unr64(&V_pf_stateid);
1830
s->id = htobe64(s->id);
1831
s->creatorid = V_pf_status.hostid;
1832
}
1833
1834
/* Returns with ID locked on success. */
1835
if ((error = pf_state_key_attach(skw, sks, s)) != 0)
1836
return (error);
1837
skw = sks = NULL;
1838
1839
ih = &V_pf_idhash[PF_IDHASH(s)];
1840
PF_HASHROW_ASSERT(ih);
1841
LIST_FOREACH(cur, &ih->states, entry)
1842
if (cur->id == s->id && cur->creatorid == s->creatorid)
1843
break;
1844
1845
if (cur != NULL) {
1846
s->timeout = PFTM_UNLINKED;
1847
PF_HASHROW_UNLOCK(ih);
1848
if (V_pf_status.debug >= PF_DEBUG_MISC) {
1849
printf("pf: state ID collision: "
1850
"id: %016llx creatorid: %08x\n",
1851
(unsigned long long)be64toh(s->id),
1852
ntohl(s->creatorid));
1853
}
1854
pf_detach_state(s);
1855
return (EEXIST);
1856
}
1857
LIST_INSERT_HEAD(&ih->states, s, entry);
1858
/* One for keys, one for ID hash. */
1859
refcount_init(&s->refs, 2);
1860
1861
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_INSERT], 1);
1862
if (V_pfsync_insert_state_ptr != NULL)
1863
V_pfsync_insert_state_ptr(s);
1864
1865
/* Returns locked. */
1866
return (0);
1867
}
1868
1869
/*
1870
* Find state by ID: returns with locked row on success.
1871
*/
1872
struct pf_kstate *
1873
pf_find_state_byid(uint64_t id, uint32_t creatorid)
1874
{
1875
struct pf_idhash *ih;
1876
struct pf_kstate *s;
1877
1878
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
1879
1880
ih = &V_pf_idhash[PF_IDHASHID(id)];
1881
1882
PF_HASHROW_LOCK(ih);
1883
LIST_FOREACH(s, &ih->states, entry)
1884
if (s->id == id && s->creatorid == creatorid)
1885
break;
1886
1887
if (s == NULL)
1888
PF_HASHROW_UNLOCK(ih);
1889
1890
return (s);
1891
}
1892
1893
/*
1894
* Find state by key.
1895
* Returns with ID hash slot locked on success.
1896
*/
1897
static int
1898
pf_find_state(struct pf_pdesc *pd, const struct pf_state_key_cmp *key,
1899
struct pf_kstate **state)
1900
{
1901
struct pf_keyhash *kh;
1902
struct pf_state_key *sk;
1903
struct pf_kstate *s;
1904
int idx;
1905
1906
*state = NULL;
1907
1908
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
1909
1910
kh = &V_pf_keyhash[pf_hashkey((const struct pf_state_key *)key)];
1911
1912
PF_HASHROW_LOCK(kh);
1913
LIST_FOREACH(sk, &kh->keys, entry)
1914
if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
1915
break;
1916
if (sk == NULL) {
1917
PF_HASHROW_UNLOCK(kh);
1918
return (PF_DROP);
1919
}
1920
1921
idx = (pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
1922
1923
/* List is sorted, if-bound states before floating ones. */
1924
TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
1925
if (s->kif == V_pfi_all || s->kif == pd->kif ||
1926
s->orig_kif == pd->kif) {
1927
PF_STATE_LOCK(s);
1928
PF_HASHROW_UNLOCK(kh);
1929
if (__predict_false(s->timeout >= PFTM_MAX)) {
1930
/*
1931
* State is either being processed by
1932
* pf_remove_state() in an other thread, or
1933
* is scheduled for immediate expiry.
1934
*/
1935
PF_STATE_UNLOCK(s);
1936
SDT_PROBE5(pf, ip, state, lookup, pd->kif,
1937
key, (pd->dir), pd, *state);
1938
return (PF_DROP);
1939
}
1940
goto out;
1941
}
1942
1943
/* Look through the other list, in case of AF-TO */
1944
idx = idx == PF_SK_WIRE ? PF_SK_STACK : PF_SK_WIRE;
1945
TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
1946
if (s->key[PF_SK_WIRE]->af == s->key[PF_SK_STACK]->af)
1947
continue;
1948
if (s->kif == V_pfi_all || s->kif == pd->kif ||
1949
s->orig_kif == pd->kif) {
1950
PF_STATE_LOCK(s);
1951
PF_HASHROW_UNLOCK(kh);
1952
if (__predict_false(s->timeout >= PFTM_MAX)) {
1953
/*
1954
* State is either being processed by
1955
* pf_remove_state() in an other thread, or
1956
* is scheduled for immediate expiry.
1957
*/
1958
PF_STATE_UNLOCK(s);
1959
SDT_PROBE5(pf, ip, state, lookup, pd->kif,
1960
key, (pd->dir), pd, NULL);
1961
return (PF_DROP);
1962
}
1963
goto out;
1964
}
1965
}
1966
1967
PF_HASHROW_UNLOCK(kh);
1968
1969
out:
1970
SDT_PROBE5(pf, ip, state, lookup, pd->kif, key, (pd->dir), pd, *state);
1971
1972
if (s == NULL || s->timeout == PFTM_PURGE) {
1973
if (s)
1974
PF_STATE_UNLOCK(s);
1975
return (PF_DROP);
1976
}
1977
1978
if ((s)->rule->pktrate.limit && pd->dir == (s)->direction) {
1979
if (pf_check_threshold(&(s)->rule->pktrate)) {
1980
PF_STATE_UNLOCK(s);
1981
return (PF_DROP);
1982
}
1983
}
1984
if (PACKET_LOOPED(pd)) {
1985
PF_STATE_UNLOCK(s);
1986
return (PF_PASS);
1987
}
1988
1989
*state = s;
1990
1991
return (PF_MATCH);
1992
}
1993
1994
/*
1995
* Returns with ID hash slot locked on success.
1996
*/
1997
struct pf_kstate *
1998
pf_find_state_all(const struct pf_state_key_cmp *key, u_int dir, int *more)
1999
{
2000
struct pf_keyhash *kh;
2001
struct pf_state_key *sk;
2002
struct pf_kstate *s, *ret = NULL;
2003
int idx, inout = 0;
2004
2005
if (more != NULL)
2006
*more = 0;
2007
2008
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
2009
2010
kh = &V_pf_keyhash[pf_hashkey((const struct pf_state_key *)key)];
2011
2012
PF_HASHROW_LOCK(kh);
2013
LIST_FOREACH(sk, &kh->keys, entry)
2014
if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
2015
break;
2016
if (sk == NULL) {
2017
PF_HASHROW_UNLOCK(kh);
2018
return (NULL);
2019
}
2020
switch (dir) {
2021
case PF_IN:
2022
idx = PF_SK_WIRE;
2023
break;
2024
case PF_OUT:
2025
idx = PF_SK_STACK;
2026
break;
2027
case PF_INOUT:
2028
idx = PF_SK_WIRE;
2029
inout = 1;
2030
break;
2031
default:
2032
panic("%s: dir %u", __func__, dir);
2033
}
2034
second_run:
2035
TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
2036
if (more == NULL) {
2037
PF_STATE_LOCK(s);
2038
PF_HASHROW_UNLOCK(kh);
2039
return (s);
2040
}
2041
2042
if (ret)
2043
(*more)++;
2044
else {
2045
ret = s;
2046
PF_STATE_LOCK(s);
2047
}
2048
}
2049
if (inout == 1) {
2050
inout = 0;
2051
idx = PF_SK_STACK;
2052
goto second_run;
2053
}
2054
PF_HASHROW_UNLOCK(kh);
2055
2056
return (ret);
2057
}
2058
2059
/*
2060
* FIXME
2061
* This routine is inefficient -- locks the state only to unlock immediately on
2062
* return.
2063
* It is racy -- after the state is unlocked nothing stops other threads from
2064
* removing it.
2065
*/
2066
bool
2067
pf_find_state_all_exists(const struct pf_state_key_cmp *key, u_int dir)
2068
{
2069
struct pf_kstate *s;
2070
2071
s = pf_find_state_all(key, dir, NULL);
2072
if (s != NULL) {
2073
PF_STATE_UNLOCK(s);
2074
return (true);
2075
}
2076
return (false);
2077
}
2078
2079
void
2080
pf_state_peer_hton(const struct pf_state_peer *s, struct pf_state_peer_export *d)
2081
{
2082
d->seqlo = htonl(s->seqlo);
2083
d->seqhi = htonl(s->seqhi);
2084
d->seqdiff = htonl(s->seqdiff);
2085
d->max_win = htons(s->max_win);
2086
d->mss = htons(s->mss);
2087
d->state = s->state;
2088
d->wscale = s->wscale;
2089
if (s->scrub) {
2090
d->scrub.pfss_flags = htons(
2091
s->scrub->pfss_flags & PFSS_TIMESTAMP);
2092
d->scrub.pfss_ttl = (s)->scrub->pfss_ttl;
2093
d->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);
2094
d->scrub.scrub_flag = PF_SCRUB_FLAG_VALID;
2095
}
2096
}
2097
2098
void
2099
pf_state_peer_ntoh(const struct pf_state_peer_export *s, struct pf_state_peer *d)
2100
{
2101
d->seqlo = ntohl(s->seqlo);
2102
d->seqhi = ntohl(s->seqhi);
2103
d->seqdiff = ntohl(s->seqdiff);
2104
d->max_win = ntohs(s->max_win);
2105
d->mss = ntohs(s->mss);
2106
d->state = s->state;
2107
d->wscale = s->wscale;
2108
if (s->scrub.scrub_flag == PF_SCRUB_FLAG_VALID &&
2109
d->scrub != NULL) {
2110
d->scrub->pfss_flags = ntohs(s->scrub.pfss_flags) &
2111
PFSS_TIMESTAMP;
2112
d->scrub->pfss_ttl = s->scrub.pfss_ttl;
2113
d->scrub->pfss_ts_mod = ntohl(s->scrub.pfss_ts_mod);
2114
}
2115
}
2116
2117
struct pf_udp_mapping *
2118
pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port,
2119
struct pf_addr *nat_addr, uint16_t nat_port)
2120
{
2121
struct pf_udp_mapping *mapping;
2122
2123
mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO);
2124
if (mapping == NULL)
2125
return (NULL);
2126
pf_addrcpy(&mapping->endpoints[0].addr, src_addr, af);
2127
mapping->endpoints[0].port = src_port;
2128
mapping->endpoints[0].af = af;
2129
mapping->endpoints[0].mapping = mapping;
2130
pf_addrcpy(&mapping->endpoints[1].addr, nat_addr, af);
2131
mapping->endpoints[1].port = nat_port;
2132
mapping->endpoints[1].af = af;
2133
mapping->endpoints[1].mapping = mapping;
2134
refcount_init(&mapping->refs, 1);
2135
return (mapping);
2136
}
2137
2138
int
2139
pf_udp_mapping_insert(struct pf_udp_mapping *mapping)
2140
{
2141
struct pf_udpendpointhash *h0, *h1;
2142
struct pf_udp_endpoint *endpoint;
2143
int ret = EEXIST;
2144
2145
h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
2146
h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
2147
if (h0 == h1) {
2148
PF_HASHROW_LOCK(h0);
2149
} else if (h0 < h1) {
2150
PF_HASHROW_LOCK(h0);
2151
PF_HASHROW_LOCK(h1);
2152
} else {
2153
PF_HASHROW_LOCK(h1);
2154
PF_HASHROW_LOCK(h0);
2155
}
2156
2157
LIST_FOREACH(endpoint, &h0->endpoints, entry) {
2158
if (bcmp(endpoint, &mapping->endpoints[0],
2159
sizeof(struct pf_udp_endpoint_cmp)) == 0)
2160
break;
2161
}
2162
if (endpoint != NULL)
2163
goto cleanup;
2164
LIST_FOREACH(endpoint, &h1->endpoints, entry) {
2165
if (bcmp(endpoint, &mapping->endpoints[1],
2166
sizeof(struct pf_udp_endpoint_cmp)) == 0)
2167
break;
2168
}
2169
if (endpoint != NULL)
2170
goto cleanup;
2171
LIST_INSERT_HEAD(&h0->endpoints, &mapping->endpoints[0], entry);
2172
LIST_INSERT_HEAD(&h1->endpoints, &mapping->endpoints[1], entry);
2173
ret = 0;
2174
2175
cleanup:
2176
if (h0 != h1) {
2177
PF_HASHROW_UNLOCK(h0);
2178
PF_HASHROW_UNLOCK(h1);
2179
} else {
2180
PF_HASHROW_UNLOCK(h0);
2181
}
2182
return (ret);
2183
}
2184
2185
void
2186
pf_udp_mapping_release(struct pf_udp_mapping *mapping)
2187
{
2188
/* refcount is synchronized on the source endpoint's row lock */
2189
struct pf_udpendpointhash *h0, *h1;
2190
2191
if (mapping == NULL)
2192
return;
2193
2194
h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
2195
PF_HASHROW_LOCK(h0);
2196
if (refcount_release(&mapping->refs)) {
2197
LIST_REMOVE(&mapping->endpoints[0], entry);
2198
PF_HASHROW_UNLOCK(h0);
2199
h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
2200
PF_HASHROW_LOCK(h1);
2201
LIST_REMOVE(&mapping->endpoints[1], entry);
2202
PF_HASHROW_UNLOCK(h1);
2203
2204
uma_zfree(V_pf_udp_mapping_z, mapping);
2205
} else {
2206
PF_HASHROW_UNLOCK(h0);
2207
}
2208
}
2209
2210
2211
struct pf_udp_mapping *
2212
pf_udp_mapping_find(struct pf_udp_endpoint_cmp *key)
2213
{
2214
struct pf_udpendpointhash *uh;
2215
struct pf_udp_endpoint *endpoint;
2216
2217
uh = &V_pf_udpendpointhash[pf_hashudpendpoint((struct pf_udp_endpoint*)key)];
2218
2219
PF_HASHROW_LOCK(uh);
2220
LIST_FOREACH(endpoint, &uh->endpoints, entry) {
2221
if (bcmp(endpoint, key, sizeof(struct pf_udp_endpoint_cmp)) == 0 &&
2222
bcmp(endpoint, &endpoint->mapping->endpoints[0],
2223
sizeof(struct pf_udp_endpoint_cmp)) == 0)
2224
break;
2225
}
2226
if (endpoint == NULL) {
2227
PF_HASHROW_UNLOCK(uh);
2228
return (NULL);
2229
}
2230
refcount_acquire(&endpoint->mapping->refs);
2231
PF_HASHROW_UNLOCK(uh);
2232
return (endpoint->mapping);
2233
}
2234
/* END state table stuff */
2235
2236
static void
2237
pf_send(struct pf_send_entry *pfse)
2238
{
2239
2240
PF_SENDQ_LOCK();
2241
STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
2242
PF_SENDQ_UNLOCK();
2243
swi_sched(V_pf_swi_cookie, 0);
2244
}
2245
2246
static bool
2247
pf_isforlocal(struct mbuf *m, int af)
2248
{
2249
switch (af) {
2250
#ifdef INET
2251
case AF_INET: {
2252
struct ip *ip = mtod(m, struct ip *);
2253
2254
return (in_localip(ip->ip_dst));
2255
}
2256
#endif /* INET */
2257
#ifdef INET6
2258
case AF_INET6: {
2259
struct ip6_hdr *ip6;
2260
struct in6_ifaddr *ia;
2261
ip6 = mtod(m, struct ip6_hdr *);
2262
ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false);
2263
if (ia == NULL)
2264
return (false);
2265
return (! (ia->ia6_flags & IN6_IFF_NOTREADY));
2266
}
2267
#endif /* INET6 */
2268
default:
2269
unhandled_af(af);
2270
}
2271
2272
return (false);
2273
}
2274
2275
int
2276
pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type,
2277
int *icmp_dir, u_int16_t *virtual_id, u_int16_t *virtual_type)
2278
{
2279
/*
2280
* ICMP types marked with PF_OUT are typically responses to
2281
* PF_IN, and will match states in the opposite direction.
2282
* PF_IN ICMP types need to match a state with that type.
2283
*/
2284
*icmp_dir = PF_OUT;
2285
2286
/* Queries (and responses) */
2287
switch (pd->af) {
2288
#ifdef INET
2289
case AF_INET:
2290
switch (type) {
2291
case ICMP_ECHO:
2292
*icmp_dir = PF_IN;
2293
/* FALLTHROUGH */
2294
case ICMP_ECHOREPLY:
2295
*virtual_type = ICMP_ECHO;
2296
*virtual_id = pd->hdr.icmp.icmp_id;
2297
break;
2298
2299
case ICMP_TSTAMP:
2300
*icmp_dir = PF_IN;
2301
/* FALLTHROUGH */
2302
case ICMP_TSTAMPREPLY:
2303
*virtual_type = ICMP_TSTAMP;
2304
*virtual_id = pd->hdr.icmp.icmp_id;
2305
break;
2306
2307
case ICMP_IREQ:
2308
*icmp_dir = PF_IN;
2309
/* FALLTHROUGH */
2310
case ICMP_IREQREPLY:
2311
*virtual_type = ICMP_IREQ;
2312
*virtual_id = pd->hdr.icmp.icmp_id;
2313
break;
2314
2315
case ICMP_MASKREQ:
2316
*icmp_dir = PF_IN;
2317
/* FALLTHROUGH */
2318
case ICMP_MASKREPLY:
2319
*virtual_type = ICMP_MASKREQ;
2320
*virtual_id = pd->hdr.icmp.icmp_id;
2321
break;
2322
2323
case ICMP_IPV6_WHEREAREYOU:
2324
*icmp_dir = PF_IN;
2325
/* FALLTHROUGH */
2326
case ICMP_IPV6_IAMHERE:
2327
*virtual_type = ICMP_IPV6_WHEREAREYOU;
2328
*virtual_id = 0; /* Nothing sane to match on! */
2329
break;
2330
2331
case ICMP_MOBILE_REGREQUEST:
2332
*icmp_dir = PF_IN;
2333
/* FALLTHROUGH */
2334
case ICMP_MOBILE_REGREPLY:
2335
*virtual_type = ICMP_MOBILE_REGREQUEST;
2336
*virtual_id = 0; /* Nothing sane to match on! */
2337
break;
2338
2339
case ICMP_ROUTERSOLICIT:
2340
*icmp_dir = PF_IN;
2341
/* FALLTHROUGH */
2342
case ICMP_ROUTERADVERT:
2343
*virtual_type = ICMP_ROUTERSOLICIT;
2344
*virtual_id = 0; /* Nothing sane to match on! */
2345
break;
2346
2347
/* These ICMP types map to other connections */
2348
case ICMP_UNREACH:
2349
case ICMP_SOURCEQUENCH:
2350
case ICMP_REDIRECT:
2351
case ICMP_TIMXCEED:
2352
case ICMP_PARAMPROB:
2353
/* These will not be used, but set them anyway */
2354
*icmp_dir = PF_IN;
2355
*virtual_type = type;
2356
*virtual_id = 0;
2357
*virtual_type = htons(*virtual_type);
2358
return (1); /* These types match to another state */
2359
2360
/*
2361
* All remaining ICMP types get their own states,
2362
* and will only match in one direction.
2363
*/
2364
default:
2365
*icmp_dir = PF_IN;
2366
*virtual_type = type;
2367
*virtual_id = 0;
2368
break;
2369
}
2370
break;
2371
#endif /* INET */
2372
#ifdef INET6
2373
case AF_INET6:
2374
switch (type) {
2375
case ICMP6_ECHO_REQUEST:
2376
*icmp_dir = PF_IN;
2377
/* FALLTHROUGH */
2378
case ICMP6_ECHO_REPLY:
2379
*virtual_type = ICMP6_ECHO_REQUEST;
2380
*virtual_id = pd->hdr.icmp6.icmp6_id;
2381
break;
2382
2383
case MLD_LISTENER_QUERY:
2384
case MLD_LISTENER_REPORT: {
2385
/*
2386
* Listener Report can be sent by clients
2387
* without an associated Listener Query.
2388
* In addition to that, when Report is sent as a
2389
* reply to a Query its source and destination
2390
* address are different.
2391
*/
2392
*icmp_dir = PF_IN;
2393
*virtual_type = MLD_LISTENER_QUERY;
2394
*virtual_id = 0;
2395
break;
2396
}
2397
case MLD_MTRACE:
2398
*icmp_dir = PF_IN;
2399
/* FALLTHROUGH */
2400
case MLD_MTRACE_RESP:
2401
*virtual_type = MLD_MTRACE;
2402
*virtual_id = 0; /* Nothing sane to match on! */
2403
break;
2404
2405
case ND_NEIGHBOR_SOLICIT:
2406
*icmp_dir = PF_IN;
2407
/* FALLTHROUGH */
2408
case ND_NEIGHBOR_ADVERT: {
2409
*virtual_type = ND_NEIGHBOR_SOLICIT;
2410
*virtual_id = 0;
2411
break;
2412
}
2413
2414
/*
2415
* These ICMP types map to other connections.
2416
* ND_REDIRECT can't be in this list because the triggering
2417
* packet header is optional.
2418
*/
2419
case ICMP6_DST_UNREACH:
2420
case ICMP6_PACKET_TOO_BIG:
2421
case ICMP6_TIME_EXCEEDED:
2422
case ICMP6_PARAM_PROB:
2423
/* These will not be used, but set them anyway */
2424
*icmp_dir = PF_IN;
2425
*virtual_type = type;
2426
*virtual_id = 0;
2427
*virtual_type = htons(*virtual_type);
2428
return (1); /* These types match to another state */
2429
/*
2430
* All remaining ICMP6 types get their own states,
2431
* and will only match in one direction.
2432
*/
2433
default:
2434
*icmp_dir = PF_IN;
2435
*virtual_type = type;
2436
*virtual_id = 0;
2437
break;
2438
}
2439
break;
2440
#endif /* INET6 */
2441
default:
2442
unhandled_af(pd->af);
2443
}
2444
*virtual_type = htons(*virtual_type);
2445
return (0); /* These types match to their own state */
2446
}
2447
2448
void
2449
pf_intr(void *v)
2450
{
2451
struct epoch_tracker et;
2452
struct pf_send_head queue;
2453
struct pf_send_entry *pfse, *next;
2454
2455
CURVNET_SET((struct vnet *)v);
2456
2457
PF_SENDQ_LOCK();
2458
queue = V_pf_sendqueue;
2459
STAILQ_INIT(&V_pf_sendqueue);
2460
PF_SENDQ_UNLOCK();
2461
2462
NET_EPOCH_ENTER(et);
2463
2464
STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
2465
switch (pfse->pfse_type) {
2466
#ifdef INET
2467
case PFSE_IP: {
2468
if (pf_isforlocal(pfse->pfse_m, AF_INET)) {
2469
KASSERT(pfse->pfse_m->m_pkthdr.rcvif == V_loif,
2470
("%s: rcvif != loif", __func__));
2471
2472
pfse->pfse_m->m_flags |= M_SKIP_FIREWALL;
2473
pfse->pfse_m->m_pkthdr.csum_flags |=
2474
CSUM_IP_VALID | CSUM_IP_CHECKED |
2475
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
2476
pfse->pfse_m->m_pkthdr.csum_data = 0xffff;
2477
ip_input(pfse->pfse_m);
2478
} else {
2479
ip_output(pfse->pfse_m, NULL, NULL, 0, NULL,
2480
NULL);
2481
}
2482
break;
2483
}
2484
case PFSE_ICMP:
2485
icmp_error(pfse->pfse_m, pfse->icmpopts.type,
2486
pfse->icmpopts.code, 0, pfse->icmpopts.mtu);
2487
break;
2488
#endif /* INET */
2489
#ifdef INET6
2490
case PFSE_IP6:
2491
if (pf_isforlocal(pfse->pfse_m, AF_INET6)) {
2492
KASSERT(pfse->pfse_m->m_pkthdr.rcvif == V_loif,
2493
("%s: rcvif != loif", __func__));
2494
2495
pfse->pfse_m->m_flags |= M_SKIP_FIREWALL |
2496
M_LOOP;
2497
pfse->pfse_m->m_pkthdr.csum_flags |=
2498
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
2499
pfse->pfse_m->m_pkthdr.csum_data = 0xffff;
2500
ip6_input(pfse->pfse_m);
2501
} else {
2502
ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL,
2503
NULL, NULL);
2504
}
2505
break;
2506
case PFSE_ICMP6:
2507
icmp6_error(pfse->pfse_m, pfse->icmpopts.type,
2508
pfse->icmpopts.code, pfse->icmpopts.mtu);
2509
break;
2510
#endif /* INET6 */
2511
default:
2512
panic("%s: unknown type", __func__);
2513
}
2514
free(pfse, M_PFTEMP);
2515
}
2516
NET_EPOCH_EXIT(et);
2517
CURVNET_RESTORE();
2518
}
2519
2520
#define pf_purge_thread_period (hz / 10)
2521
2522
#ifdef PF_WANT_32_TO_64_COUNTER
2523
static void
2524
pf_status_counter_u64_periodic(void)
2525
{
2526
2527
PF_RULES_RASSERT();
2528
2529
if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 60)) != 0) {
2530
return;
2531
}
2532
2533
for (int i = 0; i < FCNT_MAX; i++) {
2534
pf_counter_u64_periodic(&V_pf_status.fcounters[i]);
2535
}
2536
}
2537
2538
static void
2539
pf_kif_counter_u64_periodic(void)
2540
{
2541
struct pfi_kkif *kif;
2542
size_t r, run;
2543
2544
PF_RULES_RASSERT();
2545
2546
if (__predict_false(V_pf_allkifcount == 0)) {
2547
return;
2548
}
2549
2550
if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) {
2551
return;
2552
}
2553
2554
run = V_pf_allkifcount / 10;
2555
if (run < 5)
2556
run = 5;
2557
2558
for (r = 0; r < run; r++) {
2559
kif = LIST_NEXT(V_pf_kifmarker, pfik_allkiflist);
2560
if (kif == NULL) {
2561
LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
2562
LIST_INSERT_HEAD(&V_pf_allkiflist, V_pf_kifmarker, pfik_allkiflist);
2563
break;
2564
}
2565
2566
LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
2567
LIST_INSERT_AFTER(kif, V_pf_kifmarker, pfik_allkiflist);
2568
2569
for (int i = 0; i < 2; i++) {
2570
for (int j = 0; j < 2; j++) {
2571
for (int k = 0; k < 2; k++) {
2572
pf_counter_u64_periodic(&kif->pfik_packets[i][j][k]);
2573
pf_counter_u64_periodic(&kif->pfik_bytes[i][j][k]);
2574
}
2575
}
2576
}
2577
}
2578
}
2579
2580
static void
2581
pf_rule_counter_u64_periodic(void)
2582
{
2583
struct pf_krule *rule;
2584
size_t r, run;
2585
2586
PF_RULES_RASSERT();
2587
2588
if (__predict_false(V_pf_allrulecount == 0)) {
2589
return;
2590
}
2591
2592
if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) {
2593
return;
2594
}
2595
2596
run = V_pf_allrulecount / 10;
2597
if (run < 5)
2598
run = 5;
2599
2600
for (r = 0; r < run; r++) {
2601
rule = LIST_NEXT(V_pf_rulemarker, allrulelist);
2602
if (rule == NULL) {
2603
LIST_REMOVE(V_pf_rulemarker, allrulelist);
2604
LIST_INSERT_HEAD(&V_pf_allrulelist, V_pf_rulemarker, allrulelist);
2605
break;
2606
}
2607
2608
LIST_REMOVE(V_pf_rulemarker, allrulelist);
2609
LIST_INSERT_AFTER(rule, V_pf_rulemarker, allrulelist);
2610
2611
pf_counter_u64_periodic(&rule->evaluations);
2612
for (int i = 0; i < 2; i++) {
2613
pf_counter_u64_periodic(&rule->packets[i]);
2614
pf_counter_u64_periodic(&rule->bytes[i]);
2615
}
2616
}
2617
}
2618
2619
static void
2620
pf_counter_u64_periodic_main(void)
2621
{
2622
PF_RULES_RLOCK_TRACKER;
2623
2624
V_pf_counter_periodic_iter++;
2625
2626
PF_RULES_RLOCK();
2627
pf_counter_u64_critical_enter();
2628
pf_status_counter_u64_periodic();
2629
pf_kif_counter_u64_periodic();
2630
pf_rule_counter_u64_periodic();
2631
pf_counter_u64_critical_exit();
2632
PF_RULES_RUNLOCK();
2633
}
2634
#else
2635
#define pf_counter_u64_periodic_main() do { } while (0)
2636
#endif
2637
2638
void
2639
pf_purge_thread(void *unused __unused)
2640
{
2641
struct epoch_tracker et;
2642
2643
VNET_ITERATOR_DECL(vnet_iter);
2644
2645
sx_xlock(&pf_end_lock);
2646
while (pf_end_threads == 0) {
2647
sx_sleep(pf_purge_thread, &pf_end_lock, 0, "pftm", pf_purge_thread_period);
2648
2649
VNET_LIST_RLOCK();
2650
NET_EPOCH_ENTER(et);
2651
VNET_FOREACH(vnet_iter) {
2652
CURVNET_SET(vnet_iter);
2653
2654
/* Wait until V_pf_default_rule is initialized. */
2655
if (V_pf_vnet_active == 0) {
2656
CURVNET_RESTORE();
2657
continue;
2658
}
2659
2660
pf_counter_u64_periodic_main();
2661
2662
/*
2663
* Process 1/interval fraction of the state
2664
* table every run.
2665
*/
2666
V_pf_purge_idx =
2667
pf_purge_expired_states(V_pf_purge_idx, V_pf_hashmask /
2668
(V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
2669
2670
/*
2671
* Purge other expired types every
2672
* PFTM_INTERVAL seconds.
2673
*/
2674
if (V_pf_purge_idx == 0) {
2675
/*
2676
* Order is important:
2677
* - states and src nodes reference rules
2678
* - states and rules reference kifs
2679
*/
2680
pf_purge_expired_fragments();
2681
pf_purge_expired_src_nodes();
2682
pf_purge_unlinked_rules();
2683
pfi_kkif_purge();
2684
}
2685
CURVNET_RESTORE();
2686
}
2687
NET_EPOCH_EXIT(et);
2688
VNET_LIST_RUNLOCK();
2689
}
2690
2691
pf_end_threads++;
2692
sx_xunlock(&pf_end_lock);
2693
kproc_exit(0);
2694
}
2695
2696
void
2697
pf_unload_vnet_purge(void)
2698
{
2699
2700
/*
2701
* To cleanse up all kifs and rules we need
2702
* two runs: first one clears reference flags,
2703
* then pf_purge_expired_states() doesn't
2704
* raise them, and then second run frees.
2705
*/
2706
pf_purge_unlinked_rules();
2707
pfi_kkif_purge();
2708
2709
/*
2710
* Now purge everything.
2711
*/
2712
pf_purge_expired_states(0, V_pf_hashmask);
2713
pf_purge_fragments(UINT_MAX);
2714
pf_purge_expired_src_nodes();
2715
2716
/*
2717
* Now all kifs & rules should be unreferenced,
2718
* thus should be successfully freed.
2719
*/
2720
pf_purge_unlinked_rules();
2721
pfi_kkif_purge();
2722
}
2723
2724
u_int32_t
2725
pf_state_expires(const struct pf_kstate *state)
2726
{
2727
u_int32_t timeout;
2728
u_int32_t start;
2729
u_int32_t end;
2730
u_int32_t states;
2731
2732
/* handle all PFTM_* > PFTM_MAX here */
2733
if (state->timeout == PFTM_PURGE)
2734
return (time_uptime);
2735
KASSERT(state->timeout != PFTM_UNLINKED,
2736
("pf_state_expires: timeout == PFTM_UNLINKED"));
2737
KASSERT((state->timeout < PFTM_MAX),
2738
("pf_state_expires: timeout > PFTM_MAX"));
2739
timeout = state->rule->timeout[state->timeout];
2740
if (!timeout)
2741
timeout = V_pf_default_rule.timeout[state->timeout];
2742
start = state->rule->timeout[PFTM_ADAPTIVE_START];
2743
if (start && state->rule != &V_pf_default_rule) {
2744
end = state->rule->timeout[PFTM_ADAPTIVE_END];
2745
states = counter_u64_fetch(state->rule->states_cur);
2746
} else {
2747
start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
2748
end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
2749
states = V_pf_status.states;
2750
}
2751
if (end && states > start && start < end) {
2752
if (states < end) {
2753
timeout = (u_int64_t)timeout * (end - states) /
2754
(end - start);
2755
return ((state->expire / 1000) + timeout);
2756
}
2757
else
2758
return (time_uptime);
2759
}
2760
return ((state->expire / 1000) + timeout);
2761
}
2762
2763
void
2764
pf_purge_expired_src_nodes(void)
2765
{
2766
struct pf_ksrc_node_list freelist;
2767
struct pf_srchash *sh;
2768
struct pf_ksrc_node *cur, *next;
2769
int i;
2770
2771
LIST_INIT(&freelist);
2772
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
2773
PF_HASHROW_LOCK(sh);
2774
LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
2775
if (cur->states == 0 && cur->expire <= time_uptime) {
2776
pf_unlink_src_node(cur);
2777
LIST_INSERT_HEAD(&freelist, cur, entry);
2778
} else if (cur->rule != NULL)
2779
cur->rule->rule_ref |= PFRULE_REFS;
2780
PF_HASHROW_UNLOCK(sh);
2781
}
2782
2783
pf_free_src_nodes(&freelist);
2784
2785
V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z);
2786
}
2787
2788
static void
2789
pf_src_tree_remove_state(struct pf_kstate *s)
2790
{
2791
uint32_t timeout;
2792
2793
timeout = s->rule->timeout[PFTM_SRC_NODE] ?
2794
s->rule->timeout[PFTM_SRC_NODE] :
2795
V_pf_default_rule.timeout[PFTM_SRC_NODE];
2796
2797
for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) {
2798
if (s->sns[sn_type] == NULL)
2799
continue;
2800
PF_SRC_NODE_LOCK(s->sns[sn_type]);
2801
if (sn_type == PF_SN_LIMIT && s->src.tcp_est)
2802
--(s->sns[sn_type]->conn);
2803
if (--(s->sns[sn_type]->states) == 0)
2804
s->sns[sn_type]->expire = time_uptime + timeout;
2805
PF_SRC_NODE_UNLOCK(s->sns[sn_type]);
2806
s->sns[sn_type] = NULL;
2807
}
2808
2809
}
2810
2811
/*
2812
* Unlink and potentilly free a state. Function may be
2813
* called with ID hash row locked, but always returns
2814
* unlocked, since it needs to go through key hash locking.
2815
*/
2816
int
2817
pf_remove_state(struct pf_kstate *s)
2818
{
2819
struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
2820
2821
NET_EPOCH_ASSERT();
2822
PF_HASHROW_ASSERT(ih);
2823
2824
if (s->timeout == PFTM_UNLINKED) {
2825
/*
2826
* State is being processed
2827
* by pf_remove_state() in
2828
* an other thread.
2829
*/
2830
PF_HASHROW_UNLOCK(ih);
2831
return (0); /* XXXGL: undefined actually */
2832
}
2833
2834
if (s->src.state == PF_TCPS_PROXY_DST) {
2835
/* XXX wire key the right one? */
2836
pf_send_tcp(s->rule, s->key[PF_SK_WIRE]->af,
2837
&s->key[PF_SK_WIRE]->addr[1],
2838
&s->key[PF_SK_WIRE]->addr[0],
2839
s->key[PF_SK_WIRE]->port[1],
2840
s->key[PF_SK_WIRE]->port[0],
2841
s->src.seqhi, s->src.seqlo + 1,
2842
TH_RST|TH_ACK, 0, 0, 0, M_SKIP_FIREWALL, s->tag, 0,
2843
s->act.rtableid, NULL);
2844
}
2845
2846
LIST_REMOVE(s, entry);
2847
pf_src_tree_remove_state(s);
2848
2849
if (V_pfsync_delete_state_ptr != NULL)
2850
V_pfsync_delete_state_ptr(s);
2851
2852
STATE_DEC_COUNTERS(s);
2853
2854
s->timeout = PFTM_UNLINKED;
2855
2856
/* Ensure we remove it from the list of halfopen states, if needed. */
2857
if (s->key[PF_SK_STACK] != NULL &&
2858
s->key[PF_SK_STACK]->proto == IPPROTO_TCP)
2859
pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED);
2860
2861
PF_HASHROW_UNLOCK(ih);
2862
2863
pf_detach_state(s);
2864
2865
pf_udp_mapping_release(s->udp_mapping);
2866
2867
/* pf_state_insert() initialises refs to 2 */
2868
return (pf_release_staten(s, 2));
2869
}
2870
2871
struct pf_kstate *
2872
pf_alloc_state(int flags)
2873
{
2874
2875
return (uma_zalloc(V_pf_state_z, flags | M_ZERO));
2876
}
2877
2878
static __inline void
2879
pf_free_match_rules(struct pf_krule_slist *match_rules) {
2880
struct pf_krule_item *ri;
2881
2882
while ((ri = SLIST_FIRST(match_rules))) {
2883
SLIST_REMOVE_HEAD(match_rules, entry);
2884
free(ri, M_PF_RULE_ITEM);
2885
}
2886
}
2887
2888
void
2889
pf_free_state(struct pf_kstate *cur)
2890
{
2891
KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
2892
KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
2893
cur->timeout));
2894
2895
pf_free_match_rules(&(cur->match_rules));
2896
pf_normalize_tcp_cleanup(cur);
2897
uma_zfree(V_pf_state_z, cur);
2898
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1);
2899
}
2900
2901
/*
2902
* Called only from pf_purge_thread(), thus serialized.
2903
*/
2904
static u_int
2905
pf_purge_expired_states(u_int i, int maxcheck)
2906
{
2907
struct pf_idhash *ih;
2908
struct pf_kstate *s;
2909
struct pf_krule_item *mrm;
2910
size_t count __unused;
2911
2912
V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
2913
2914
/*
2915
* Go through hash and unlink states that expire now.
2916
*/
2917
while (maxcheck > 0) {
2918
count = 0;
2919
ih = &V_pf_idhash[i];
2920
2921
/* only take the lock if we expect to do work */
2922
if (!LIST_EMPTY(&ih->states)) {
2923
relock:
2924
PF_HASHROW_LOCK(ih);
2925
LIST_FOREACH(s, &ih->states, entry) {
2926
if (pf_state_expires(s) <= time_uptime) {
2927
V_pf_status.states -=
2928
pf_remove_state(s);
2929
goto relock;
2930
}
2931
s->rule->rule_ref |= PFRULE_REFS;
2932
if (s->nat_rule != NULL)
2933
s->nat_rule->rule_ref |= PFRULE_REFS;
2934
if (s->anchor != NULL)
2935
s->anchor->rule_ref |= PFRULE_REFS;
2936
s->kif->pfik_flags |= PFI_IFLAG_REFS;
2937
SLIST_FOREACH(mrm, &s->match_rules, entry)
2938
mrm->r->rule_ref |= PFRULE_REFS;
2939
if (s->act.rt_kif)
2940
s->act.rt_kif->pfik_flags |= PFI_IFLAG_REFS;
2941
count++;
2942
}
2943
PF_HASHROW_UNLOCK(ih);
2944
}
2945
2946
SDT_PROBE2(pf, purge, state, rowcount, i, count);
2947
2948
/* Return when we hit end of hash. */
2949
if (++i > V_pf_hashmask) {
2950
V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
2951
return (0);
2952
}
2953
2954
maxcheck--;
2955
}
2956
2957
V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
2958
2959
return (i);
2960
}
2961
2962
static void
2963
pf_purge_unlinked_rules(void)
2964
{
2965
struct pf_krulequeue tmpq;
2966
struct pf_krule *r, *r1;
2967
2968
/*
2969
* If we have overloading task pending, then we'd
2970
* better skip purging this time. There is a tiny
2971
* probability that overloading task references
2972
* an already unlinked rule.
2973
*/
2974
PF_OVERLOADQ_LOCK();
2975
if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
2976
PF_OVERLOADQ_UNLOCK();
2977
return;
2978
}
2979
PF_OVERLOADQ_UNLOCK();
2980
2981
/*
2982
* Do naive mark-and-sweep garbage collecting of old rules.
2983
* Reference flag is raised by pf_purge_expired_states()
2984
* and pf_purge_expired_src_nodes().
2985
*
2986
* To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
2987
* use a temporary queue.
2988
*/
2989
TAILQ_INIT(&tmpq);
2990
PF_UNLNKDRULES_LOCK();
2991
TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
2992
if (!(r->rule_ref & PFRULE_REFS)) {
2993
TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
2994
TAILQ_INSERT_TAIL(&tmpq, r, entries);
2995
} else
2996
r->rule_ref &= ~PFRULE_REFS;
2997
}
2998
PF_UNLNKDRULES_UNLOCK();
2999
3000
if (!TAILQ_EMPTY(&tmpq)) {
3001
PF_CONFIG_LOCK();
3002
PF_RULES_WLOCK();
3003
TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
3004
TAILQ_REMOVE(&tmpq, r, entries);
3005
pf_free_rule(r);
3006
}
3007
PF_RULES_WUNLOCK();
3008
PF_CONFIG_UNLOCK();
3009
}
3010
}
3011
3012
void
3013
pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
3014
{
3015
switch (af) {
3016
#ifdef INET
3017
case AF_INET: {
3018
u_int32_t a = ntohl(addr->addr32[0]);
3019
printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
3020
(a>>8)&255, a&255);
3021
if (p) {
3022
p = ntohs(p);
3023
printf(":%u", p);
3024
}
3025
break;
3026
}
3027
#endif /* INET */
3028
#ifdef INET6
3029
case AF_INET6: {
3030
u_int16_t b;
3031
u_int8_t i, curstart, curend, maxstart, maxend;
3032
curstart = curend = maxstart = maxend = 255;
3033
for (i = 0; i < 8; i++) {
3034
if (!addr->addr16[i]) {
3035
if (curstart == 255)
3036
curstart = i;
3037
curend = i;
3038
} else {
3039
if ((curend - curstart) >
3040
(maxend - maxstart)) {
3041
maxstart = curstart;
3042
maxend = curend;
3043
}
3044
curstart = curend = 255;
3045
}
3046
}
3047
if ((curend - curstart) >
3048
(maxend - maxstart)) {
3049
maxstart = curstart;
3050
maxend = curend;
3051
}
3052
for (i = 0; i < 8; i++) {
3053
if (i >= maxstart && i <= maxend) {
3054
if (i == 0)
3055
printf(":");
3056
if (i == maxend)
3057
printf(":");
3058
} else {
3059
b = ntohs(addr->addr16[i]);
3060
printf("%x", b);
3061
if (i < 7)
3062
printf(":");
3063
}
3064
}
3065
if (p) {
3066
p = ntohs(p);
3067
printf("[%u]", p);
3068
}
3069
break;
3070
}
3071
#endif /* INET6 */
3072
default:
3073
unhandled_af(af);
3074
}
3075
}
3076
3077
void
3078
pf_print_state(struct pf_kstate *s)
3079
{
3080
pf_print_state_parts(s, NULL, NULL);
3081
}
3082
3083
static void
3084
pf_print_state_parts(struct pf_kstate *s,
3085
struct pf_state_key *skwp, struct pf_state_key *sksp)
3086
{
3087
struct pf_state_key *skw, *sks;
3088
u_int8_t proto, dir;
3089
3090
/* Do our best to fill these, but they're skipped if NULL */
3091
skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
3092
sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
3093
proto = skw ? skw->proto : (sks ? sks->proto : 0);
3094
dir = s ? s->direction : 0;
3095
3096
switch (proto) {
3097
case IPPROTO_IPV4:
3098
printf("IPv4");
3099
break;
3100
case IPPROTO_IPV6:
3101
printf("IPv6");
3102
break;
3103
case IPPROTO_TCP:
3104
printf("TCP");
3105
break;
3106
case IPPROTO_UDP:
3107
printf("UDP");
3108
break;
3109
case IPPROTO_ICMP:
3110
printf("ICMP");
3111
break;
3112
case IPPROTO_ICMPV6:
3113
printf("ICMPv6");
3114
break;
3115
default:
3116
printf("%u", proto);
3117
break;
3118
}
3119
switch (dir) {
3120
case PF_IN:
3121
printf(" in");
3122
break;
3123
case PF_OUT:
3124
printf(" out");
3125
break;
3126
}
3127
if (skw) {
3128
printf(" wire: ");
3129
pf_print_host(&skw->addr[0], skw->port[0], skw->af);
3130
printf(" ");
3131
pf_print_host(&skw->addr[1], skw->port[1], skw->af);
3132
}
3133
if (sks) {
3134
printf(" stack: ");
3135
if (sks != skw) {
3136
pf_print_host(&sks->addr[0], sks->port[0], sks->af);
3137
printf(" ");
3138
pf_print_host(&sks->addr[1], sks->port[1], sks->af);
3139
} else
3140
printf("-");
3141
}
3142
if (s) {
3143
if (proto == IPPROTO_TCP) {
3144
printf(" [lo=%u high=%u win=%u modulator=%u",
3145
s->src.seqlo, s->src.seqhi,
3146
s->src.max_win, s->src.seqdiff);
3147
if (s->src.wscale && s->dst.wscale)
3148
printf(" wscale=%u",
3149
s->src.wscale & PF_WSCALE_MASK);
3150
printf("]");
3151
printf(" [lo=%u high=%u win=%u modulator=%u",
3152
s->dst.seqlo, s->dst.seqhi,
3153
s->dst.max_win, s->dst.seqdiff);
3154
if (s->src.wscale && s->dst.wscale)
3155
printf(" wscale=%u",
3156
s->dst.wscale & PF_WSCALE_MASK);
3157
printf("]");
3158
}
3159
printf(" %u:%u", s->src.state, s->dst.state);
3160
if (s->rule)
3161
printf(" @%d", s->rule->nr);
3162
}
3163
}
3164
3165
void
3166
pf_print_flags(uint16_t f)
3167
{
3168
if (f)
3169
printf(" ");
3170
if (f & TH_FIN)
3171
printf("F");
3172
if (f & TH_SYN)
3173
printf("S");
3174
if (f & TH_RST)
3175
printf("R");
3176
if (f & TH_PUSH)
3177
printf("P");
3178
if (f & TH_ACK)
3179
printf("A");
3180
if (f & TH_URG)
3181
printf("U");
3182
if (f & TH_ECE)
3183
printf("E");
3184
if (f & TH_CWR)
3185
printf("W");
3186
if (f & TH_AE)
3187
printf("e");
3188
}
3189
3190
#define PF_SET_SKIP_STEPS(i) \
3191
do { \
3192
while (head[i] != cur) { \
3193
head[i]->skip[i] = cur; \
3194
head[i] = TAILQ_NEXT(head[i], entries); \
3195
} \
3196
} while (0)
3197
3198
void
3199
pf_calc_skip_steps(struct pf_krulequeue *rules)
3200
{
3201
struct pf_krule *cur, *prev, *head[PF_SKIP_COUNT];
3202
int i;
3203
3204
cur = TAILQ_FIRST(rules);
3205
prev = cur;
3206
for (i = 0; i < PF_SKIP_COUNT; ++i)
3207
head[i] = cur;
3208
while (cur != NULL) {
3209
if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
3210
PF_SET_SKIP_STEPS(PF_SKIP_IFP);
3211
if (cur->direction != prev->direction)
3212
PF_SET_SKIP_STEPS(PF_SKIP_DIR);
3213
if (cur->af != prev->af)
3214
PF_SET_SKIP_STEPS(PF_SKIP_AF);
3215
if (cur->proto != prev->proto)
3216
PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
3217
if (cur->src.neg != prev->src.neg ||
3218
pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
3219
PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
3220
if (cur->dst.neg != prev->dst.neg ||
3221
pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
3222
PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
3223
if (cur->src.port[0] != prev->src.port[0] ||
3224
cur->src.port[1] != prev->src.port[1] ||
3225
cur->src.port_op != prev->src.port_op)
3226
PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
3227
if (cur->dst.port[0] != prev->dst.port[0] ||
3228
cur->dst.port[1] != prev->dst.port[1] ||
3229
cur->dst.port_op != prev->dst.port_op)
3230
PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
3231
3232
prev = cur;
3233
cur = TAILQ_NEXT(cur, entries);
3234
}
3235
for (i = 0; i < PF_SKIP_COUNT; ++i)
3236
PF_SET_SKIP_STEPS(i);
3237
}
3238
3239
int
3240
pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
3241
{
3242
if (aw1->type != aw2->type)
3243
return (1);
3244
switch (aw1->type) {
3245
case PF_ADDR_ADDRMASK:
3246
case PF_ADDR_RANGE:
3247
if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
3248
return (1);
3249
if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
3250
return (1);
3251
return (0);
3252
case PF_ADDR_DYNIFTL:
3253
return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
3254
case PF_ADDR_NONE:
3255
case PF_ADDR_NOROUTE:
3256
case PF_ADDR_URPFFAILED:
3257
return (0);
3258
case PF_ADDR_TABLE:
3259
return (aw1->p.tbl != aw2->p.tbl);
3260
default:
3261
printf("invalid address type: %d\n", aw1->type);
3262
return (1);
3263
}
3264
}
3265
3266
/**
3267
* Checksum updates are a little complicated because the checksum in the TCP/UDP
3268
* header isn't always a full checksum. In some cases (i.e. output) it's a
3269
* pseudo-header checksum, which is a partial checksum over src/dst IP
3270
* addresses, protocol number and length.
3271
*
3272
* That means we have the following cases:
3273
* * Input or forwarding: we don't have TSO, the checksum fields are full
3274
* checksums, we need to update the checksum whenever we change anything.
3275
* * Output (i.e. the checksum is a pseudo-header checksum):
3276
* x The field being updated is src/dst address or affects the length of
3277
* the packet. We need to update the pseudo-header checksum (note that this
3278
* checksum is not ones' complement).
3279
* x Some other field is being modified (e.g. src/dst port numbers): We
3280
* don't have to update anything.
3281
**/
3282
u_int16_t
3283
pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
3284
{
3285
u_int32_t x;
3286
3287
x = cksum + old - new;
3288
x = (x + (x >> 16)) & 0xffff;
3289
3290
/* optimise: eliminate a branch when not udp */
3291
if (udp && cksum == 0x0000)
3292
return cksum;
3293
if (udp && x == 0x0000)
3294
x = 0xffff;
3295
3296
return (u_int16_t)(x);
3297
}
3298
3299
static int
3300
pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi)
3301
{
3302
int rewrite = 0;
3303
3304
if (*f != v) {
3305
uint16_t old = htons(hi ? (*f << 8) : *f);
3306
uint16_t new = htons(hi ? ( v << 8) : v);
3307
3308
*f = v;
3309
3310
if (! (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA |
3311
CSUM_DELAY_DATA_IPV6)))
3312
*pd->pcksum = pf_cksum_fixup(*pd->pcksum, old, new,
3313
pd->proto == IPPROTO_UDP);
3314
3315
rewrite = 1;
3316
}
3317
3318
return (rewrite);
3319
}
3320
3321
int
3322
pf_patch_16(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi)
3323
{
3324
int rewrite = 0;
3325
u_int8_t *fb = (u_int8_t *)f;
3326
u_int8_t *vb = (u_int8_t *)&v;
3327
3328
rewrite += pf_patch_8(pd, fb++, *vb++, hi);
3329
rewrite += pf_patch_8(pd, fb++, *vb++, !hi);
3330
3331
return (rewrite);
3332
}
3333
3334
int
3335
pf_patch_32(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi)
3336
{
3337
int rewrite = 0;
3338
u_int8_t *fb = (u_int8_t *)f;
3339
u_int8_t *vb = (u_int8_t *)&v;
3340
3341
rewrite += pf_patch_8(pd, fb++, *vb++, hi);
3342
rewrite += pf_patch_8(pd, fb++, *vb++, !hi);
3343
rewrite += pf_patch_8(pd, fb++, *vb++, hi);
3344
rewrite += pf_patch_8(pd, fb++, *vb++, !hi);
3345
3346
return (rewrite);
3347
}
3348
3349
u_int16_t
3350
pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
3351
u_int16_t new, u_int8_t udp)
3352
{
3353
if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
3354
return (cksum);
3355
3356
return (pf_cksum_fixup(cksum, old, new, udp));
3357
}
3358
3359
static void
3360
pf_change_ap(struct pf_pdesc *pd, struct pf_addr *a, u_int16_t *p,
3361
struct pf_addr *an, u_int16_t pn)
3362
{
3363
struct pf_addr ao;
3364
u_int16_t po;
3365
uint8_t u = pd->virtual_proto == IPPROTO_UDP;
3366
3367
MPASS(pd->pcksum != NULL);
3368
if (pd->af == AF_INET) {
3369
MPASS(pd->ip_sum);
3370
}
3371
3372
pf_addrcpy(&ao, a, pd->af);
3373
if (pd->af == pd->naf)
3374
pf_addrcpy(a, an, pd->af);
3375
3376
if (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
3377
*pd->pcksum = ~*pd->pcksum;
3378
3379
if (p == NULL) /* no port -> done. no cksum to worry about. */
3380
return;
3381
po = *p;
3382
*p = pn;
3383
3384
switch (pd->af) {
3385
#ifdef INET
3386
case AF_INET:
3387
switch (pd->naf) {
3388
case AF_INET:
3389
*pd->ip_sum = pf_cksum_fixup(pf_cksum_fixup(*pd->ip_sum,
3390
ao.addr16[0], an->addr16[0], 0),
3391
ao.addr16[1], an->addr16[1], 0);
3392
*p = pn;
3393
3394
*pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum,
3395
ao.addr16[0], an->addr16[0], u),
3396
ao.addr16[1], an->addr16[1], u);
3397
3398
*pd->pcksum = pf_proto_cksum_fixup(pd->m, *pd->pcksum, po, pn, u);
3399
break;
3400
#ifdef INET6
3401
case AF_INET6:
3402
*pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3403
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3404
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum,
3405
ao.addr16[0], an->addr16[0], u),
3406
ao.addr16[1], an->addr16[1], u),
3407
0, an->addr16[2], u),
3408
0, an->addr16[3], u),
3409
0, an->addr16[4], u),
3410
0, an->addr16[5], u),
3411
0, an->addr16[6], u),
3412
0, an->addr16[7], u),
3413
po, pn, u);
3414
break;
3415
#endif /* INET6 */
3416
default:
3417
unhandled_af(pd->naf);
3418
}
3419
break;
3420
#endif /* INET */
3421
#ifdef INET6
3422
case AF_INET6:
3423
switch (pd->naf) {
3424
#ifdef INET
3425
case AF_INET:
3426
*pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3427
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3428
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum,
3429
ao.addr16[0], an->addr16[0], u),
3430
ao.addr16[1], an->addr16[1], u),
3431
ao.addr16[2], 0, u),
3432
ao.addr16[3], 0, u),
3433
ao.addr16[4], 0, u),
3434
ao.addr16[5], 0, u),
3435
ao.addr16[6], 0, u),
3436
ao.addr16[7], 0, u),
3437
po, pn, u);
3438
break;
3439
#endif /* INET */
3440
case AF_INET6:
3441
*pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3442
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3443
pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum,
3444
ao.addr16[0], an->addr16[0], u),
3445
ao.addr16[1], an->addr16[1], u),
3446
ao.addr16[2], an->addr16[2], u),
3447
ao.addr16[3], an->addr16[3], u),
3448
ao.addr16[4], an->addr16[4], u),
3449
ao.addr16[5], an->addr16[5], u),
3450
ao.addr16[6], an->addr16[6], u),
3451
ao.addr16[7], an->addr16[7], u);
3452
3453
*pd->pcksum = pf_proto_cksum_fixup(pd->m, *pd->pcksum, po, pn, u);
3454
break;
3455
default:
3456
unhandled_af(pd->naf);
3457
}
3458
break;
3459
#endif /* INET6 */
3460
default:
3461
unhandled_af(pd->af);
3462
}
3463
3464
if (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA |
3465
CSUM_DELAY_DATA_IPV6)) {
3466
*pd->pcksum = ~*pd->pcksum;
3467
if (! *pd->pcksum)
3468
*pd->pcksum = 0xffff;
3469
}
3470
}
3471
3472
/* Changes a u_int32_t. Uses a void * so there are no align restrictions */
3473
void
3474
pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
3475
{
3476
u_int32_t ao;
3477
3478
memcpy(&ao, a, sizeof(ao));
3479
memcpy(a, &an, sizeof(u_int32_t));
3480
*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
3481
ao % 65536, an % 65536, u);
3482
}
3483
3484
void
3485
pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
3486
{
3487
u_int32_t ao;
3488
3489
memcpy(&ao, a, sizeof(ao));
3490
memcpy(a, &an, sizeof(u_int32_t));
3491
3492
*c = pf_proto_cksum_fixup(m,
3493
pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
3494
ao % 65536, an % 65536, udp);
3495
}
3496
3497
#ifdef INET6
3498
static void
3499
pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
3500
{
3501
struct pf_addr ao;
3502
3503
pf_addrcpy(&ao, a, AF_INET6);
3504
pf_addrcpy(a, an, AF_INET6);
3505
3506
*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3507
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3508
pf_cksum_fixup(pf_cksum_fixup(*c,
3509
ao.addr16[0], an->addr16[0], u),
3510
ao.addr16[1], an->addr16[1], u),
3511
ao.addr16[2], an->addr16[2], u),
3512
ao.addr16[3], an->addr16[3], u),
3513
ao.addr16[4], an->addr16[4], u),
3514
ao.addr16[5], an->addr16[5], u),
3515
ao.addr16[6], an->addr16[6], u),
3516
ao.addr16[7], an->addr16[7], u);
3517
}
3518
#endif /* INET6 */
3519
3520
static void
3521
pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
3522
struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
3523
u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
3524
{
3525
struct pf_addr oia, ooa;
3526
3527
pf_addrcpy(&oia, ia, af);
3528
if (oa)
3529
pf_addrcpy(&ooa, oa, af);
3530
3531
/* Change inner protocol port, fix inner protocol checksum. */
3532
if (ip != NULL) {
3533
u_int16_t oip = *ip;
3534
u_int32_t opc;
3535
3536
if (pc != NULL)
3537
opc = *pc;
3538
*ip = np;
3539
if (pc != NULL)
3540
*pc = pf_cksum_fixup(*pc, oip, *ip, u);
3541
*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
3542
if (pc != NULL)
3543
*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
3544
}
3545
/* Change inner ip address, fix inner ip and icmp checksums. */
3546
pf_addrcpy(ia, na, af);
3547
switch (af) {
3548
#ifdef INET
3549
case AF_INET: {
3550
u_int32_t oh2c = *h2c;
3551
3552
*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
3553
oia.addr16[0], ia->addr16[0], 0),
3554
oia.addr16[1], ia->addr16[1], 0);
3555
*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
3556
oia.addr16[0], ia->addr16[0], 0),
3557
oia.addr16[1], ia->addr16[1], 0);
3558
*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
3559
break;
3560
}
3561
#endif /* INET */
3562
#ifdef INET6
3563
case AF_INET6:
3564
*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3565
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3566
pf_cksum_fixup(pf_cksum_fixup(*ic,
3567
oia.addr16[0], ia->addr16[0], u),
3568
oia.addr16[1], ia->addr16[1], u),
3569
oia.addr16[2], ia->addr16[2], u),
3570
oia.addr16[3], ia->addr16[3], u),
3571
oia.addr16[4], ia->addr16[4], u),
3572
oia.addr16[5], ia->addr16[5], u),
3573
oia.addr16[6], ia->addr16[6], u),
3574
oia.addr16[7], ia->addr16[7], u);
3575
break;
3576
#endif /* INET6 */
3577
}
3578
/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
3579
if (oa) {
3580
pf_addrcpy(oa, na, af);
3581
switch (af) {
3582
#ifdef INET
3583
case AF_INET:
3584
*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
3585
ooa.addr16[0], oa->addr16[0], 0),
3586
ooa.addr16[1], oa->addr16[1], 0);
3587
break;
3588
#endif /* INET */
3589
#ifdef INET6
3590
case AF_INET6:
3591
*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3592
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3593
pf_cksum_fixup(pf_cksum_fixup(*ic,
3594
ooa.addr16[0], oa->addr16[0], u),
3595
ooa.addr16[1], oa->addr16[1], u),
3596
ooa.addr16[2], oa->addr16[2], u),
3597
ooa.addr16[3], oa->addr16[3], u),
3598
ooa.addr16[4], oa->addr16[4], u),
3599
ooa.addr16[5], oa->addr16[5], u),
3600
ooa.addr16[6], oa->addr16[6], u),
3601
ooa.addr16[7], oa->addr16[7], u);
3602
break;
3603
#endif /* INET6 */
3604
}
3605
}
3606
}
3607
3608
int
3609
pf_translate_af(struct pf_pdesc *pd)
3610
{
3611
#if defined(INET) && defined(INET6)
3612
struct mbuf *mp;
3613
struct ip *ip4;
3614
struct ip6_hdr *ip6;
3615
struct icmp6_hdr *icmp;
3616
struct m_tag *mtag;
3617
struct pf_fragment_tag *ftag;
3618
int hlen;
3619
3620
hlen = pd->naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
3621
3622
/* trim the old header */
3623
m_adj(pd->m, pd->off);
3624
3625
/* prepend a new one */
3626
M_PREPEND(pd->m, hlen, M_NOWAIT);
3627
if (pd->m == NULL)
3628
return (-1);
3629
3630
switch (pd->naf) {
3631
case AF_INET:
3632
ip4 = mtod(pd->m, struct ip *);
3633
bzero(ip4, hlen);
3634
ip4->ip_v = IPVERSION;
3635
ip4->ip_hl = hlen >> 2;
3636
ip4->ip_tos = pd->tos;
3637
ip4->ip_len = htons(hlen + (pd->tot_len - pd->off));
3638
ip_fillid(ip4, V_ip_random_id);
3639
ip4->ip_ttl = pd->ttl;
3640
ip4->ip_p = pd->proto;
3641
ip4->ip_src = pd->nsaddr.v4;
3642
ip4->ip_dst = pd->ndaddr.v4;
3643
pd->src = (struct pf_addr *)&ip4->ip_src;
3644
pd->dst = (struct pf_addr *)&ip4->ip_dst;
3645
pd->off = sizeof(struct ip);
3646
if (pd->m->m_pkthdr.csum_flags & CSUM_TCP_IPV6) {
3647
pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP_IPV6;
3648
pd->m->m_pkthdr.csum_flags |= CSUM_TCP;
3649
}
3650
if (pd->m->m_pkthdr.csum_flags & CSUM_UDP_IPV6) {
3651
pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP_IPV6;
3652
pd->m->m_pkthdr.csum_flags |= CSUM_UDP;
3653
}
3654
if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
3655
pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
3656
pd->m->m_pkthdr.csum_flags |= CSUM_SCTP;
3657
}
3658
break;
3659
case AF_INET6:
3660
ip6 = mtod(pd->m, struct ip6_hdr *);
3661
bzero(ip6, hlen);
3662
ip6->ip6_vfc = IPV6_VERSION;
3663
ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20);
3664
ip6->ip6_plen = htons(pd->tot_len - pd->off);
3665
ip6->ip6_nxt = pd->proto;
3666
if (!pd->ttl || pd->ttl > IPV6_DEFHLIM)
3667
ip6->ip6_hlim = IPV6_DEFHLIM;
3668
else
3669
ip6->ip6_hlim = pd->ttl;
3670
ip6->ip6_src = pd->nsaddr.v6;
3671
ip6->ip6_dst = pd->ndaddr.v6;
3672
pd->src = (struct pf_addr *)&ip6->ip6_src;
3673
pd->dst = (struct pf_addr *)&ip6->ip6_dst;
3674
pd->off = sizeof(struct ip6_hdr);
3675
if (pd->m->m_pkthdr.csum_flags & CSUM_TCP) {
3676
pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP;
3677
pd->m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
3678
}
3679
if (pd->m->m_pkthdr.csum_flags & CSUM_UDP) {
3680
pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP;
3681
pd->m->m_pkthdr.csum_flags |= CSUM_UDP_IPV6;
3682
}
3683
if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP) {
3684
pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
3685
pd->m->m_pkthdr.csum_flags |= CSUM_SCTP_IPV6;
3686
}
3687
3688
/*
3689
* If we're dealing with a reassembled packet we need to adjust
3690
* the header length from the IPv4 header size to IPv6 header
3691
* size.
3692
*/
3693
mtag = m_tag_find(pd->m, PACKET_TAG_PF_REASSEMBLED, NULL);
3694
if (mtag) {
3695
ftag = (struct pf_fragment_tag *)(mtag + 1);
3696
ftag->ft_hdrlen = sizeof(*ip6);
3697
ftag->ft_maxlen -= sizeof(struct ip6_hdr) -
3698
sizeof(struct ip) + sizeof(struct ip6_frag);
3699
}
3700
break;
3701
default:
3702
return (-1);
3703
}
3704
3705
/* recalculate icmp/icmp6 checksums */
3706
if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) {
3707
int off;
3708
if ((mp = m_pulldown(pd->m, hlen, sizeof(*icmp), &off)) ==
3709
NULL) {
3710
pd->m = NULL;
3711
return (-1);
3712
}
3713
icmp = (struct icmp6_hdr *)(mp->m_data + off);
3714
icmp->icmp6_cksum = 0;
3715
icmp->icmp6_cksum = pd->naf == AF_INET ?
3716
in4_cksum(pd->m, 0, hlen, ntohs(ip4->ip_len) - hlen) :
3717
in6_cksum(pd->m, IPPROTO_ICMPV6, hlen,
3718
ntohs(ip6->ip6_plen));
3719
}
3720
#endif /* INET && INET6 */
3721
3722
return (0);
3723
}
3724
3725
int
3726
pf_change_icmp_af(struct mbuf *m, int off, struct pf_pdesc *pd,
3727
struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst,
3728
sa_family_t af, sa_family_t naf)
3729
{
3730
#if defined(INET) && defined(INET6)
3731
struct mbuf *n = NULL;
3732
struct ip *ip4;
3733
struct ip6_hdr *ip6;
3734
int hlen, olen, mlen;
3735
3736
if (af == naf || (af != AF_INET && af != AF_INET6) ||
3737
(naf != AF_INET && naf != AF_INET6))
3738
return (-1);
3739
3740
/* split the mbuf chain on the inner ip/ip6 header boundary */
3741
if ((n = m_split(m, off, M_NOWAIT)) == NULL)
3742
return (-1);
3743
3744
/* old header */
3745
olen = pd2->off - off;
3746
/* new header */
3747
hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
3748
3749
/* trim old header */
3750
m_adj(n, olen);
3751
3752
/* prepend a new one */
3753
M_PREPEND(n, hlen, M_NOWAIT);
3754
if (n == NULL)
3755
return (-1);
3756
3757
/* translate inner ip/ip6 header */
3758
switch (naf) {
3759
case AF_INET:
3760
ip4 = mtod(n, struct ip *);
3761
bzero(ip4, sizeof(*ip4));
3762
ip4->ip_v = IPVERSION;
3763
ip4->ip_hl = sizeof(*ip4) >> 2;
3764
ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen);
3765
ip_fillid(ip4, V_ip_random_id);
3766
ip4->ip_off = htons(IP_DF);
3767
ip4->ip_ttl = pd2->ttl;
3768
if (pd2->proto == IPPROTO_ICMPV6)
3769
ip4->ip_p = IPPROTO_ICMP;
3770
else
3771
ip4->ip_p = pd2->proto;
3772
ip4->ip_src = src->v4;
3773
ip4->ip_dst = dst->v4;
3774
ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2);
3775
break;
3776
case AF_INET6:
3777
ip6 = mtod(n, struct ip6_hdr *);
3778
bzero(ip6, sizeof(*ip6));
3779
ip6->ip6_vfc = IPV6_VERSION;
3780
ip6->ip6_plen = htons(pd2->tot_len - olen);
3781
if (pd2->proto == IPPROTO_ICMP)
3782
ip6->ip6_nxt = IPPROTO_ICMPV6;
3783
else
3784
ip6->ip6_nxt = pd2->proto;
3785
if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM)
3786
ip6->ip6_hlim = IPV6_DEFHLIM;
3787
else
3788
ip6->ip6_hlim = pd2->ttl;
3789
ip6->ip6_src = src->v6;
3790
ip6->ip6_dst = dst->v6;
3791
break;
3792
default:
3793
unhandled_af(naf);
3794
}
3795
3796
/* adjust payload offset and total packet length */
3797
pd2->off += hlen - olen;
3798
pd->tot_len += hlen - olen;
3799
3800
/* merge modified inner packet with the original header */
3801
mlen = n->m_pkthdr.len;
3802
m_cat(m, n);
3803
m->m_pkthdr.len += mlen;
3804
#endif /* INET && INET6 */
3805
3806
return (0);
3807
}
3808
3809
#define PTR_IP(field) (offsetof(struct ip, field))
3810
#define PTR_IP6(field) (offsetof(struct ip6_hdr, field))
3811
3812
int
3813
pf_translate_icmp_af(int af, void *arg)
3814
{
3815
#if defined(INET) && defined(INET6)
3816
struct icmp *icmp4;
3817
struct icmp6_hdr *icmp6;
3818
u_int32_t mtu;
3819
int32_t ptr = -1;
3820
u_int8_t type;
3821
u_int8_t code;
3822
3823
switch (af) {
3824
case AF_INET:
3825
icmp6 = arg;
3826
type = icmp6->icmp6_type;
3827
code = icmp6->icmp6_code;
3828
mtu = ntohl(icmp6->icmp6_mtu);
3829
3830
switch (type) {
3831
case ICMP6_ECHO_REQUEST:
3832
type = ICMP_ECHO;
3833
break;
3834
case ICMP6_ECHO_REPLY:
3835
type = ICMP_ECHOREPLY;
3836
break;
3837
case ICMP6_DST_UNREACH:
3838
type = ICMP_UNREACH;
3839
switch (code) {
3840
case ICMP6_DST_UNREACH_NOROUTE:
3841
case ICMP6_DST_UNREACH_BEYONDSCOPE:
3842
case ICMP6_DST_UNREACH_ADDR:
3843
code = ICMP_UNREACH_HOST;
3844
break;
3845
case ICMP6_DST_UNREACH_ADMIN:
3846
code = ICMP_UNREACH_HOST_PROHIB;
3847
break;
3848
case ICMP6_DST_UNREACH_NOPORT:
3849
code = ICMP_UNREACH_PORT;
3850
break;
3851
default:
3852
return (-1);
3853
}
3854
break;
3855
case ICMP6_PACKET_TOO_BIG:
3856
type = ICMP_UNREACH;
3857
code = ICMP_UNREACH_NEEDFRAG;
3858
mtu -= 20;
3859
break;
3860
case ICMP6_TIME_EXCEEDED:
3861
type = ICMP_TIMXCEED;
3862
break;
3863
case ICMP6_PARAM_PROB:
3864
switch (code) {
3865
case ICMP6_PARAMPROB_HEADER:
3866
type = ICMP_PARAMPROB;
3867
code = ICMP_PARAMPROB_ERRATPTR;
3868
ptr = ntohl(icmp6->icmp6_pptr);
3869
3870
if (ptr == PTR_IP6(ip6_vfc))
3871
; /* preserve */
3872
else if (ptr == PTR_IP6(ip6_vfc) + 1)
3873
ptr = PTR_IP(ip_tos);
3874
else if (ptr == PTR_IP6(ip6_plen) ||
3875
ptr == PTR_IP6(ip6_plen) + 1)
3876
ptr = PTR_IP(ip_len);
3877
else if (ptr == PTR_IP6(ip6_nxt))
3878
ptr = PTR_IP(ip_p);
3879
else if (ptr == PTR_IP6(ip6_hlim))
3880
ptr = PTR_IP(ip_ttl);
3881
else if (ptr >= PTR_IP6(ip6_src) &&
3882
ptr < PTR_IP6(ip6_dst))
3883
ptr = PTR_IP(ip_src);
3884
else if (ptr >= PTR_IP6(ip6_dst) &&
3885
ptr < sizeof(struct ip6_hdr))
3886
ptr = PTR_IP(ip_dst);
3887
else {
3888
return (-1);
3889
}
3890
break;
3891
case ICMP6_PARAMPROB_NEXTHEADER:
3892
type = ICMP_UNREACH;
3893
code = ICMP_UNREACH_PROTOCOL;
3894
break;
3895
default:
3896
return (-1);
3897
}
3898
break;
3899
default:
3900
return (-1);
3901
}
3902
if (icmp6->icmp6_type != type) {
3903
icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
3904
icmp6->icmp6_type, type, 0);
3905
icmp6->icmp6_type = type;
3906
}
3907
if (icmp6->icmp6_code != code) {
3908
icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
3909
icmp6->icmp6_code, code, 0);
3910
icmp6->icmp6_code = code;
3911
}
3912
if (icmp6->icmp6_mtu != htonl(mtu)) {
3913
icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
3914
htons(ntohl(icmp6->icmp6_mtu)), htons(mtu), 0);
3915
/* aligns well with a icmpv4 nextmtu */
3916
icmp6->icmp6_mtu = htonl(mtu);
3917
}
3918
if (ptr >= 0 && icmp6->icmp6_pptr != htonl(ptr)) {
3919
icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
3920
htons(ntohl(icmp6->icmp6_pptr)), htons(ptr), 0);
3921
/* icmpv4 pptr is a one most significant byte */
3922
icmp6->icmp6_pptr = htonl(ptr << 24);
3923
}
3924
break;
3925
case AF_INET6:
3926
icmp4 = arg;
3927
type = icmp4->icmp_type;
3928
code = icmp4->icmp_code;
3929
mtu = ntohs(icmp4->icmp_nextmtu);
3930
3931
switch (type) {
3932
case ICMP_ECHO:
3933
type = ICMP6_ECHO_REQUEST;
3934
break;
3935
case ICMP_ECHOREPLY:
3936
type = ICMP6_ECHO_REPLY;
3937
break;
3938
case ICMP_UNREACH:
3939
type = ICMP6_DST_UNREACH;
3940
switch (code) {
3941
case ICMP_UNREACH_NET:
3942
case ICMP_UNREACH_HOST:
3943
case ICMP_UNREACH_NET_UNKNOWN:
3944
case ICMP_UNREACH_HOST_UNKNOWN:
3945
case ICMP_UNREACH_ISOLATED:
3946
case ICMP_UNREACH_TOSNET:
3947
case ICMP_UNREACH_TOSHOST:
3948
code = ICMP6_DST_UNREACH_NOROUTE;
3949
break;
3950
case ICMP_UNREACH_PORT:
3951
code = ICMP6_DST_UNREACH_NOPORT;
3952
break;
3953
case ICMP_UNREACH_NET_PROHIB:
3954
case ICMP_UNREACH_HOST_PROHIB:
3955
case ICMP_UNREACH_FILTER_PROHIB:
3956
case ICMP_UNREACH_PRECEDENCE_CUTOFF:
3957
code = ICMP6_DST_UNREACH_ADMIN;
3958
break;
3959
case ICMP_UNREACH_PROTOCOL:
3960
type = ICMP6_PARAM_PROB;
3961
code = ICMP6_PARAMPROB_NEXTHEADER;
3962
ptr = offsetof(struct ip6_hdr, ip6_nxt);
3963
break;
3964
case ICMP_UNREACH_NEEDFRAG:
3965
type = ICMP6_PACKET_TOO_BIG;
3966
code = 0;
3967
mtu += 20;
3968
break;
3969
default:
3970
return (-1);
3971
}
3972
break;
3973
case ICMP_TIMXCEED:
3974
type = ICMP6_TIME_EXCEEDED;
3975
break;
3976
case ICMP_PARAMPROB:
3977
type = ICMP6_PARAM_PROB;
3978
switch (code) {
3979
case ICMP_PARAMPROB_ERRATPTR:
3980
code = ICMP6_PARAMPROB_HEADER;
3981
break;
3982
case ICMP_PARAMPROB_LENGTH:
3983
code = ICMP6_PARAMPROB_HEADER;
3984
break;
3985
default:
3986
return (-1);
3987
}
3988
3989
ptr = icmp4->icmp_pptr;
3990
if (ptr == 0 || ptr == PTR_IP(ip_tos))
3991
; /* preserve */
3992
else if (ptr == PTR_IP(ip_len) ||
3993
ptr == PTR_IP(ip_len) + 1)
3994
ptr = PTR_IP6(ip6_plen);
3995
else if (ptr == PTR_IP(ip_ttl))
3996
ptr = PTR_IP6(ip6_hlim);
3997
else if (ptr == PTR_IP(ip_p))
3998
ptr = PTR_IP6(ip6_nxt);
3999
else if (ptr >= PTR_IP(ip_src) && ptr < PTR_IP(ip_dst))
4000
ptr = PTR_IP6(ip6_src);
4001
else if (ptr >= PTR_IP(ip_dst) &&
4002
ptr < sizeof(struct ip))
4003
ptr = PTR_IP6(ip6_dst);
4004
else {
4005
return (-1);
4006
}
4007
break;
4008
default:
4009
return (-1);
4010
}
4011
if (icmp4->icmp_type != type) {
4012
icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
4013
icmp4->icmp_type, type, 0);
4014
icmp4->icmp_type = type;
4015
}
4016
if (icmp4->icmp_code != code) {
4017
icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
4018
icmp4->icmp_code, code, 0);
4019
icmp4->icmp_code = code;
4020
}
4021
if (icmp4->icmp_nextmtu != htons(mtu)) {
4022
icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
4023
icmp4->icmp_nextmtu, htons(mtu), 0);
4024
icmp4->icmp_nextmtu = htons(mtu);
4025
}
4026
if (ptr >= 0 && icmp4->icmp_void != ptr) {
4027
icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
4028
htons(icmp4->icmp_pptr), htons(ptr), 0);
4029
icmp4->icmp_void = htonl(ptr);
4030
}
4031
break;
4032
default:
4033
unhandled_af(af);
4034
}
4035
#endif /* INET && INET6 */
4036
4037
return (0);
4038
}
4039
4040
/*
4041
* Need to modulate the sequence numbers in the TCP SACK option
4042
* (credits to Krzysztof Pfaff for report and patch)
4043
*/
4044
static int
4045
pf_modulate_sack(struct pf_pdesc *pd, struct tcphdr *th,
4046
struct pf_state_peer *dst)
4047
{
4048
struct sackblk sack;
4049
int copyback = 0, i;
4050
int olen, optsoff;
4051
uint8_t opts[MAX_TCPOPTLEN], *opt, *eoh;
4052
4053
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
4054
optsoff = pd->off + sizeof(struct tcphdr);
4055
#define TCPOLEN_MINSACK (TCPOLEN_SACK + 2)
4056
if (olen < TCPOLEN_MINSACK ||
4057
!pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af))
4058
return (0);
4059
4060
eoh = opts + olen;
4061
opt = opts;
4062
while ((opt = pf_find_tcpopt(opt, opts, olen,
4063
TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL)
4064
{
4065
size_t safelen = MIN(opt[1], (eoh - opt));
4066
for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) {
4067
size_t startoff = (opt + i) - opts;
4068
memcpy(&sack, &opt[i], sizeof(sack));
4069
pf_patch_32(pd, &sack.start,
4070
htonl(ntohl(sack.start) - dst->seqdiff),
4071
PF_ALGNMNT(startoff));
4072
pf_patch_32(pd, &sack.end,
4073
htonl(ntohl(sack.end) - dst->seqdiff),
4074
PF_ALGNMNT(startoff + sizeof(sack.start)));
4075
memcpy(&opt[i], &sack, sizeof(sack));
4076
}
4077
copyback = 1;
4078
opt += opt[1];
4079
}
4080
4081
if (copyback)
4082
m_copyback(pd->m, optsoff, olen, (caddr_t)opts);
4083
4084
return (copyback);
4085
}
4086
4087
struct mbuf *
4088
pf_build_tcp(const struct pf_krule *r, sa_family_t af,
4089
const struct pf_addr *saddr, const struct pf_addr *daddr,
4090
u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
4091
u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl,
4092
int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, u_int sack,
4093
int rtableid, u_short *reason)
4094
{
4095
struct mbuf *m;
4096
int len, tlen;
4097
#ifdef INET
4098
struct ip *h = NULL;
4099
#endif /* INET */
4100
#ifdef INET6
4101
struct ip6_hdr *h6 = NULL;
4102
#endif /* INET6 */
4103
struct tcphdr *th;
4104
char *opt;
4105
struct pf_mtag *pf_mtag;
4106
4107
len = 0;
4108
th = NULL;
4109
4110
/* maximum segment size tcp option */
4111
tlen = sizeof(struct tcphdr);
4112
if (mss)
4113
tlen += 4;
4114
if (sack)
4115
tlen += 2;
4116
4117
switch (af) {
4118
#ifdef INET
4119
case AF_INET:
4120
len = sizeof(struct ip) + tlen;
4121
break;
4122
#endif /* INET */
4123
#ifdef INET6
4124
case AF_INET6:
4125
len = sizeof(struct ip6_hdr) + tlen;
4126
break;
4127
#endif /* INET6 */
4128
default:
4129
unhandled_af(af);
4130
}
4131
4132
m = m_gethdr(M_NOWAIT, MT_DATA);
4133
if (m == NULL) {
4134
REASON_SET(reason, PFRES_MEMORY);
4135
return (NULL);
4136
}
4137
4138
#ifdef MAC
4139
mac_netinet_firewall_send(m);
4140
#endif
4141
if ((pf_mtag = pf_get_mtag(m)) == NULL) {
4142
REASON_SET(reason, PFRES_MEMORY);
4143
m_freem(m);
4144
return (NULL);
4145
}
4146
m->m_flags |= mbuf_flags;
4147
pf_mtag->tag = mtag_tag;
4148
pf_mtag->flags = mtag_flags;
4149
4150
if (rtableid >= 0)
4151
M_SETFIB(m, rtableid);
4152
4153
#ifdef ALTQ
4154
if (r != NULL && r->qid) {
4155
pf_mtag->qid = r->qid;
4156
4157
/* add hints for ecn */
4158
pf_mtag->hdr = mtod(m, struct ip *);
4159
}
4160
#endif /* ALTQ */
4161
m->m_data += max_linkhdr;
4162
m->m_pkthdr.len = m->m_len = len;
4163
/* The rest of the stack assumes a rcvif, so provide one.
4164
* This is a locally generated packet, so .. close enough. */
4165
m->m_pkthdr.rcvif = V_loif;
4166
bzero(m->m_data, len);
4167
switch (af) {
4168
#ifdef INET
4169
case AF_INET:
4170
m->m_pkthdr.csum_flags |= CSUM_TCP;
4171
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
4172
4173
h = mtod(m, struct ip *);
4174
4175
h->ip_p = IPPROTO_TCP;
4176
h->ip_len = htons(tlen);
4177
h->ip_v = 4;
4178
h->ip_hl = sizeof(*h) >> 2;
4179
h->ip_tos = IPTOS_LOWDELAY;
4180
h->ip_len = htons(len);
4181
h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
4182
h->ip_ttl = ttl ? ttl : V_ip_defttl;
4183
h->ip_sum = 0;
4184
h->ip_src.s_addr = saddr->v4.s_addr;
4185
h->ip_dst.s_addr = daddr->v4.s_addr;
4186
4187
th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
4188
th->th_sum = in_pseudo(h->ip_src.s_addr, h->ip_dst.s_addr,
4189
htons(len - sizeof(struct ip) + IPPROTO_TCP));
4190
break;
4191
#endif /* INET */
4192
#ifdef INET6
4193
case AF_INET6:
4194
m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
4195
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
4196
4197
h6 = mtod(m, struct ip6_hdr *);
4198
4199
/* IP header fields included in the TCP checksum */
4200
h6->ip6_nxt = IPPROTO_TCP;
4201
h6->ip6_plen = htons(tlen);
4202
h6->ip6_vfc |= IPV6_VERSION;
4203
h6->ip6_hlim = V_ip6_defhlim;
4204
memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
4205
memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
4206
4207
th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
4208
th->th_sum = in6_cksum_pseudo(h6, len - sizeof(struct ip6_hdr),
4209
IPPROTO_TCP, 0);
4210
break;
4211
#endif /* INET6 */
4212
}
4213
4214
/* TCP header */
4215
th->th_sport = sport;
4216
th->th_dport = dport;
4217
th->th_seq = htonl(seq);
4218
th->th_ack = htonl(ack);
4219
th->th_off = tlen >> 2;
4220
tcp_set_flags(th, tcp_flags);
4221
th->th_win = htons(win);
4222
4223
opt = (char *)(th + 1);
4224
if (mss) {
4225
opt = (char *)(th + 1);
4226
opt[0] = TCPOPT_MAXSEG;
4227
opt[1] = 4;
4228
mss = htons(mss);
4229
memcpy((opt + 2), &mss, 2);
4230
opt += 4;
4231
}
4232
if (sack) {
4233
opt[0] = TCPOPT_SACK_PERMITTED;
4234
opt[1] = 2;
4235
opt += 2;
4236
}
4237
4238
return (m);
4239
}
4240
4241
static void
4242
pf_send_sctp_abort(sa_family_t af, struct pf_pdesc *pd,
4243
uint8_t ttl, int rtableid)
4244
{
4245
struct mbuf *m;
4246
#ifdef INET
4247
struct ip *h = NULL;
4248
#endif /* INET */
4249
#ifdef INET6
4250
struct ip6_hdr *h6 = NULL;
4251
#endif /* INET6 */
4252
struct sctphdr *hdr;
4253
struct sctp_chunkhdr *chunk;
4254
struct pf_send_entry *pfse;
4255
int off = 0;
4256
4257
MPASS(af == pd->af);
4258
4259
m = m_gethdr(M_NOWAIT, MT_DATA);
4260
if (m == NULL)
4261
return;
4262
4263
m->m_data += max_linkhdr;
4264
m->m_flags |= M_SKIP_FIREWALL;
4265
/* The rest of the stack assumes a rcvif, so provide one.
4266
* This is a locally generated packet, so .. close enough. */
4267
m->m_pkthdr.rcvif = V_loif;
4268
4269
/* IPv4|6 header */
4270
switch (af) {
4271
#ifdef INET
4272
case AF_INET:
4273
bzero(m->m_data, sizeof(struct ip) + sizeof(*hdr) + sizeof(*chunk));
4274
4275
h = mtod(m, struct ip *);
4276
4277
/* IP header fields included in the TCP checksum */
4278
4279
h->ip_p = IPPROTO_SCTP;
4280
h->ip_len = htons(sizeof(*h) + sizeof(*hdr) + sizeof(*chunk));
4281
h->ip_ttl = ttl ? ttl : V_ip_defttl;
4282
h->ip_src = pd->dst->v4;
4283
h->ip_dst = pd->src->v4;
4284
4285
off += sizeof(struct ip);
4286
break;
4287
#endif /* INET */
4288
#ifdef INET6
4289
case AF_INET6:
4290
bzero(m->m_data, sizeof(struct ip6_hdr) + sizeof(*hdr) + sizeof(*chunk));
4291
4292
h6 = mtod(m, struct ip6_hdr *);
4293
4294
/* IP header fields included in the TCP checksum */
4295
h6->ip6_vfc |= IPV6_VERSION;
4296
h6->ip6_nxt = IPPROTO_SCTP;
4297
h6->ip6_plen = htons(sizeof(*h6) + sizeof(*hdr) + sizeof(*chunk));
4298
h6->ip6_hlim = ttl ? ttl : V_ip6_defhlim;
4299
memcpy(&h6->ip6_src, &pd->dst->v6, sizeof(struct in6_addr));
4300
memcpy(&h6->ip6_dst, &pd->src->v6, sizeof(struct in6_addr));
4301
4302
off += sizeof(struct ip6_hdr);
4303
break;
4304
#endif /* INET6 */
4305
default:
4306
unhandled_af(af);
4307
}
4308
4309
/* SCTP header */
4310
hdr = mtodo(m, off);
4311
4312
hdr->src_port = pd->hdr.sctp.dest_port;
4313
hdr->dest_port = pd->hdr.sctp.src_port;
4314
hdr->v_tag = pd->sctp_initiate_tag;
4315
hdr->checksum = 0;
4316
4317
/* Abort chunk. */
4318
off += sizeof(struct sctphdr);
4319
chunk = mtodo(m, off);
4320
4321
chunk->chunk_type = SCTP_ABORT_ASSOCIATION;
4322
chunk->chunk_length = htons(sizeof(*chunk));
4323
4324
/* SCTP checksum */
4325
off += sizeof(*chunk);
4326
m->m_pkthdr.len = m->m_len = off;
4327
4328
pf_sctp_checksum(m, off - sizeof(*hdr) - sizeof(*chunk));
4329
4330
if (rtableid >= 0)
4331
M_SETFIB(m, rtableid);
4332
4333
/* Allocate outgoing queue entry, mbuf and mbuf tag. */
4334
pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
4335
if (pfse == NULL) {
4336
m_freem(m);
4337
return;
4338
}
4339
4340
switch (af) {
4341
#ifdef INET
4342
case AF_INET:
4343
pfse->pfse_type = PFSE_IP;
4344
break;
4345
#endif /* INET */
4346
#ifdef INET6
4347
case AF_INET6:
4348
pfse->pfse_type = PFSE_IP6;
4349
break;
4350
#endif /* INET6 */
4351
}
4352
4353
pfse->pfse_m = m;
4354
pf_send(pfse);
4355
}
4356
4357
void
4358
pf_send_tcp(const struct pf_krule *r, sa_family_t af,
4359
const struct pf_addr *saddr, const struct pf_addr *daddr,
4360
u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
4361
u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl,
4362
int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid,
4363
u_short *reason)
4364
{
4365
struct pf_send_entry *pfse;
4366
struct mbuf *m;
4367
4368
m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, tcp_flags,
4369
win, mss, ttl, mbuf_flags, mtag_tag, mtag_flags, 0, rtableid, reason);
4370
if (m == NULL)
4371
return;
4372
4373
/* Allocate outgoing queue entry, mbuf and mbuf tag. */
4374
pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
4375
if (pfse == NULL) {
4376
m_freem(m);
4377
REASON_SET(reason, PFRES_MEMORY);
4378
return;
4379
}
4380
4381
switch (af) {
4382
#ifdef INET
4383
case AF_INET:
4384
pfse->pfse_type = PFSE_IP;
4385
break;
4386
#endif /* INET */
4387
#ifdef INET6
4388
case AF_INET6:
4389
pfse->pfse_type = PFSE_IP6;
4390
break;
4391
#endif /* INET6 */
4392
default:
4393
unhandled_af(af);
4394
}
4395
4396
pfse->pfse_m = m;
4397
pf_send(pfse);
4398
}
4399
4400
static void
4401
pf_undo_nat(struct pf_krule *nr, struct pf_pdesc *pd, uint16_t bip_sum)
4402
{
4403
/* undo NAT changes, if they have taken place */
4404
if (nr != NULL) {
4405
pf_addrcpy(pd->src, &pd->osrc, pd->af);
4406
pf_addrcpy(pd->dst, &pd->odst, pd->af);
4407
if (pd->sport)
4408
*pd->sport = pd->osport;
4409
if (pd->dport)
4410
*pd->dport = pd->odport;
4411
if (pd->ip_sum)
4412
*pd->ip_sum = bip_sum;
4413
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
4414
}
4415
}
4416
4417
static void
4418
pf_return(struct pf_krule *r, struct pf_krule *nr, struct pf_pdesc *pd,
4419
struct tcphdr *th, u_int16_t bproto_sum, u_int16_t bip_sum,
4420
u_short *reason, int rtableid)
4421
{
4422
pf_undo_nat(nr, pd, bip_sum);
4423
4424
if (pd->proto == IPPROTO_TCP &&
4425
((r->rule_flag & PFRULE_RETURNRST) ||
4426
(r->rule_flag & PFRULE_RETURN)) &&
4427
!(tcp_get_flags(th) & TH_RST)) {
4428
u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
4429
4430
if (pf_check_proto_cksum(pd->m, pd->off, pd->tot_len - pd->off,
4431
IPPROTO_TCP, pd->af))
4432
REASON_SET(reason, PFRES_PROTCKSUM);
4433
else {
4434
if (tcp_get_flags(th) & TH_SYN)
4435
ack++;
4436
if (tcp_get_flags(th) & TH_FIN)
4437
ack++;
4438
pf_send_tcp(r, pd->af, pd->dst,
4439
pd->src, th->th_dport, th->th_sport,
4440
ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
4441
r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid,
4442
reason);
4443
}
4444
} else if (pd->proto == IPPROTO_SCTP &&
4445
(r->rule_flag & PFRULE_RETURN)) {
4446
pf_send_sctp_abort(pd->af, pd, r->return_ttl, rtableid);
4447
} else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET &&
4448
r->return_icmp)
4449
pf_send_icmp(pd->m, r->return_icmp >> 8,
4450
r->return_icmp & 255, 0, pd->af, r, rtableid);
4451
else if (pd->proto != IPPROTO_ICMPV6 && pd->af == AF_INET6 &&
4452
r->return_icmp6)
4453
pf_send_icmp(pd->m, r->return_icmp6 >> 8,
4454
r->return_icmp6 & 255, 0, pd->af, r, rtableid);
4455
}
4456
4457
static int
4458
pf_match_ieee8021q_pcp(u_int8_t prio, struct mbuf *m)
4459
{
4460
struct m_tag *mtag;
4461
u_int8_t mpcp;
4462
4463
mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL);
4464
if (mtag == NULL)
4465
return (0);
4466
4467
if (prio == PF_PRIO_ZERO)
4468
prio = 0;
4469
4470
mpcp = *(uint8_t *)(mtag + 1);
4471
4472
return (mpcp == prio);
4473
}
4474
4475
static int
4476
pf_icmp_to_bandlim(uint8_t type)
4477
{
4478
switch (type) {
4479
case ICMP_ECHO:
4480
case ICMP_ECHOREPLY:
4481
return (BANDLIM_ICMP_ECHO);
4482
case ICMP_TSTAMP:
4483
case ICMP_TSTAMPREPLY:
4484
return (BANDLIM_ICMP_TSTAMP);
4485
case ICMP_UNREACH:
4486
default:
4487
return (BANDLIM_ICMP_UNREACH);
4488
}
4489
}
4490
4491
static void
4492
pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_kstate *s,
4493
struct pf_state_peer *src, struct pf_state_peer *dst,
4494
u_short *reason)
4495
{
4496
/*
4497
* We are sending challenge ACK as a response to SYN packet, which
4498
* matches existing state (modulo TCP window check). Therefore packet
4499
* must be sent on behalf of destination.
4500
*
4501
* We expect sender to remain either silent, or send RST packet
4502
* so both, firewall and remote peer, can purge dead state from
4503
* memory.
4504
*/
4505
pf_send_tcp(s->rule, pd->af, pd->dst, pd->src,
4506
pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo,
4507
src->seqlo, TH_ACK, 0, 0, s->rule->return_ttl, 0, 0, 0,
4508
s->rule->rtableid, reason);
4509
}
4510
4511
static void
4512
pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int mtu,
4513
sa_family_t af, struct pf_krule *r, int rtableid)
4514
{
4515
struct pf_send_entry *pfse;
4516
struct mbuf *m0;
4517
struct pf_mtag *pf_mtag;
4518
4519
/* ICMP packet rate limitation. */
4520
switch (af) {
4521
#ifdef INET6
4522
case AF_INET6:
4523
if (icmp6_ratelimit(NULL, type, code))
4524
return;
4525
break;
4526
#endif /* INET6 */
4527
#ifdef INET
4528
case AF_INET:
4529
if (badport_bandlim(pf_icmp_to_bandlim(type)) != 0)
4530
return;
4531
break;
4532
#endif /* INET */
4533
}
4534
4535
/* Allocate outgoing queue entry, mbuf and mbuf tag. */
4536
pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
4537
if (pfse == NULL)
4538
return;
4539
4540
if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
4541
free(pfse, M_PFTEMP);
4542
return;
4543
}
4544
4545
if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
4546
free(pfse, M_PFTEMP);
4547
return;
4548
}
4549
/* XXX: revisit */
4550
m0->m_flags |= M_SKIP_FIREWALL;
4551
4552
if (rtableid >= 0)
4553
M_SETFIB(m0, rtableid);
4554
4555
#ifdef ALTQ
4556
if (r->qid) {
4557
pf_mtag->qid = r->qid;
4558
/* add hints for ecn */
4559
pf_mtag->hdr = mtod(m0, struct ip *);
4560
}
4561
#endif /* ALTQ */
4562
4563
switch (af) {
4564
#ifdef INET
4565
case AF_INET:
4566
pfse->pfse_type = PFSE_ICMP;
4567
break;
4568
#endif /* INET */
4569
#ifdef INET6
4570
case AF_INET6:
4571
pfse->pfse_type = PFSE_ICMP6;
4572
break;
4573
#endif /* INET6 */
4574
}
4575
pfse->pfse_m = m0;
4576
pfse->icmpopts.type = type;
4577
pfse->icmpopts.code = code;
4578
pfse->icmpopts.mtu = mtu;
4579
pf_send(pfse);
4580
}
4581
4582
/*
4583
* Return ((n = 0) == (a = b [with mask m]))
4584
* Note: n != 0 => returns (a != b [with mask m])
4585
*/
4586
int
4587
pf_match_addr(u_int8_t n, const struct pf_addr *a, const struct pf_addr *m,
4588
const struct pf_addr *b, sa_family_t af)
4589
{
4590
switch (af) {
4591
#ifdef INET
4592
case AF_INET:
4593
if (IN_ARE_MASKED_ADDR_EQUAL(a->v4, b->v4, m->v4))
4594
return (n == 0);
4595
break;
4596
#endif /* INET */
4597
#ifdef INET6
4598
case AF_INET6:
4599
if (IN6_ARE_MASKED_ADDR_EQUAL(&a->v6, &b->v6, &m->v6))
4600
return (n == 0);
4601
break;
4602
#endif /* INET6 */
4603
}
4604
4605
return (n != 0);
4606
}
4607
4608
/*
4609
* Return 1 if b <= a <= e, otherwise return 0.
4610
*/
4611
int
4612
pf_match_addr_range(const struct pf_addr *b, const struct pf_addr *e,
4613
const struct pf_addr *a, sa_family_t af)
4614
{
4615
switch (af) {
4616
#ifdef INET
4617
case AF_INET:
4618
if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
4619
(ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
4620
return (0);
4621
break;
4622
#endif /* INET */
4623
#ifdef INET6
4624
case AF_INET6: {
4625
int i;
4626
4627
/* check a >= b */
4628
for (i = 0; i < 4; ++i)
4629
if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
4630
break;
4631
else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
4632
return (0);
4633
/* check a <= e */
4634
for (i = 0; i < 4; ++i)
4635
if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
4636
break;
4637
else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
4638
return (0);
4639
break;
4640
}
4641
#endif /* INET6 */
4642
}
4643
return (1);
4644
}
4645
4646
static int
4647
pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
4648
{
4649
switch (op) {
4650
case PF_OP_IRG:
4651
return ((p > a1) && (p < a2));
4652
case PF_OP_XRG:
4653
return ((p < a1) || (p > a2));
4654
case PF_OP_RRG:
4655
return ((p >= a1) && (p <= a2));
4656
case PF_OP_EQ:
4657
return (p == a1);
4658
case PF_OP_NE:
4659
return (p != a1);
4660
case PF_OP_LT:
4661
return (p < a1);
4662
case PF_OP_LE:
4663
return (p <= a1);
4664
case PF_OP_GT:
4665
return (p > a1);
4666
case PF_OP_GE:
4667
return (p >= a1);
4668
}
4669
return (0); /* never reached */
4670
}
4671
4672
int
4673
pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
4674
{
4675
return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p)));
4676
}
4677
4678
static int
4679
pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
4680
{
4681
if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE)
4682
return (0);
4683
return (pf_match(op, a1, a2, u));
4684
}
4685
4686
static int
4687
pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
4688
{
4689
if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE)
4690
return (0);
4691
return (pf_match(op, a1, a2, g));
4692
}
4693
4694
int
4695
pf_match_tag(struct mbuf *m, struct pf_krule *r, int *tag, int mtag)
4696
{
4697
if (*tag == -1)
4698
*tag = mtag;
4699
4700
return ((!r->match_tag_not && r->match_tag == *tag) ||
4701
(r->match_tag_not && r->match_tag != *tag));
4702
}
4703
4704
static int
4705
pf_match_rcvif(struct mbuf *m, struct pf_krule *r)
4706
{
4707
struct ifnet *ifp = m->m_pkthdr.rcvif;
4708
struct pfi_kkif *kif;
4709
4710
if (ifp == NULL)
4711
return (0);
4712
4713
kif = (struct pfi_kkif *)ifp->if_pf_kif;
4714
4715
if (kif == NULL) {
4716
DPFPRINTF(PF_DEBUG_URGENT,
4717
"%s: kif == NULL, @%d via %s", __func__, r->nr,
4718
r->rcv_ifname);
4719
return (0);
4720
}
4721
4722
return (pfi_kkif_match(r->rcv_kif, kif));
4723
}
4724
4725
int
4726
pf_tag_packet(struct pf_pdesc *pd, int tag)
4727
{
4728
4729
KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
4730
4731
if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL))
4732
return (ENOMEM);
4733
4734
pd->pf_mtag->tag = tag;
4735
4736
return (0);
4737
}
4738
4739
/*
4740
* XXX: We rely on malloc(9) returning pointer aligned addresses.
4741
*/
4742
#define PF_ANCHORSTACK_MATCH 0x00000001
4743
#define PF_ANCHORSTACK_MASK (PF_ANCHORSTACK_MATCH)
4744
4745
#define PF_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
4746
#define PF_ANCHOR_RULE(f) (struct pf_krule *) \
4747
((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
4748
#define PF_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \
4749
((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \
4750
} while (0)
4751
4752
enum pf_test_status
4753
pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r,
4754
struct pf_krule_slist *match_rules)
4755
{
4756
enum pf_test_status rv;
4757
4758
PF_RULES_RASSERT();
4759
4760
if (ctx->depth >= PF_ANCHOR_STACK_MAX) {
4761
printf("%s: anchor stack overflow on %s\n",
4762
__func__, r->anchor->name);
4763
return (PF_TEST_FAIL);
4764
}
4765
4766
ctx->depth++;
4767
4768
if (r->anchor_wildcard) {
4769
struct pf_kanchor *child;
4770
rv = PF_TEST_OK;
4771
RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) {
4772
rv = pf_match_rule(ctx, &child->ruleset, match_rules);
4773
if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
4774
/*
4775
* we either hit a rule with quick action
4776
* (more likely), or hit some runtime
4777
* error (e.g. pool_get() failure).
4778
*/
4779
break;
4780
}
4781
}
4782
} else {
4783
rv = pf_match_rule(ctx, &r->anchor->ruleset, match_rules);
4784
/*
4785
* Unless errors occured, stop iff any rule matched
4786
* within quick anchors.
4787
*/
4788
if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK &&
4789
*ctx->am == r)
4790
rv = PF_TEST_QUICK;
4791
}
4792
4793
ctx->depth--;
4794
4795
return (rv);
4796
}
4797
4798
struct pf_keth_anchor_stackframe {
4799
struct pf_keth_ruleset *rs;
4800
struct pf_keth_rule *r; /* XXX: + match bit */
4801
struct pf_keth_anchor *child;
4802
};
4803
4804
#define PF_ETH_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
4805
#define PF_ETH_ANCHOR_RULE(f) (struct pf_keth_rule *) \
4806
((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
4807
#define PF_ETH_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \
4808
((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \
4809
} while (0)
4810
4811
void
4812
pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
4813
struct pf_keth_ruleset **rs, struct pf_keth_rule **r,
4814
struct pf_keth_rule **a, int *match)
4815
{
4816
struct pf_keth_anchor_stackframe *f;
4817
4818
NET_EPOCH_ASSERT();
4819
4820
if (match)
4821
*match = 0;
4822
if (*depth >= PF_ANCHOR_STACK_MAX) {
4823
printf("%s: anchor stack overflow on %s\n",
4824
__func__, (*r)->anchor->name);
4825
*r = TAILQ_NEXT(*r, entries);
4826
return;
4827
} else if (*depth == 0 && a != NULL)
4828
*a = *r;
4829
f = stack + (*depth)++;
4830
f->rs = *rs;
4831
f->r = *r;
4832
if ((*r)->anchor_wildcard) {
4833
struct pf_keth_anchor_node *parent = &(*r)->anchor->children;
4834
4835
if ((f->child = RB_MIN(pf_keth_anchor_node, parent)) == NULL) {
4836
*r = NULL;
4837
return;
4838
}
4839
*rs = &f->child->ruleset;
4840
} else {
4841
f->child = NULL;
4842
*rs = &(*r)->anchor->ruleset;
4843
}
4844
*r = TAILQ_FIRST((*rs)->active.rules);
4845
}
4846
4847
int
4848
pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
4849
struct pf_keth_ruleset **rs, struct pf_keth_rule **r,
4850
struct pf_keth_rule **a, int *match)
4851
{
4852
struct pf_keth_anchor_stackframe *f;
4853
struct pf_keth_rule *fr;
4854
int quick = 0;
4855
4856
NET_EPOCH_ASSERT();
4857
4858
do {
4859
if (*depth <= 0)
4860
break;
4861
f = stack + *depth - 1;
4862
fr = PF_ETH_ANCHOR_RULE(f);
4863
if (f->child != NULL) {
4864
/*
4865
* This block traverses through
4866
* a wildcard anchor.
4867
*/
4868
if (match != NULL && *match) {
4869
/*
4870
* If any of "*" matched, then
4871
* "foo/ *" matched, mark frame
4872
* appropriately.
4873
*/
4874
PF_ETH_ANCHOR_SET_MATCH(f);
4875
*match = 0;
4876
}
4877
f->child = RB_NEXT(pf_keth_anchor_node,
4878
&fr->anchor->children, f->child);
4879
if (f->child != NULL) {
4880
*rs = &f->child->ruleset;
4881
*r = TAILQ_FIRST((*rs)->active.rules);
4882
if (*r == NULL)
4883
continue;
4884
else
4885
break;
4886
}
4887
}
4888
(*depth)--;
4889
if (*depth == 0 && a != NULL)
4890
*a = NULL;
4891
*rs = f->rs;
4892
if (PF_ETH_ANCHOR_MATCH(f) || (match != NULL && *match))
4893
quick = fr->quick;
4894
*r = TAILQ_NEXT(fr, entries);
4895
} while (*r == NULL);
4896
4897
return (quick);
4898
}
4899
4900
void
4901
pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
4902
struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
4903
{
4904
switch (af) {
4905
#ifdef INET
4906
case AF_INET:
4907
naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
4908
((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
4909
break;
4910
#endif /* INET */
4911
#ifdef INET6
4912
case AF_INET6:
4913
naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
4914
((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
4915
naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
4916
((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
4917
naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
4918
((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
4919
naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
4920
((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
4921
break;
4922
#endif /* INET6 */
4923
}
4924
}
4925
4926
void
4927
pf_addr_inc(struct pf_addr *addr, sa_family_t af)
4928
{
4929
switch (af) {
4930
#ifdef INET
4931
case AF_INET:
4932
addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
4933
break;
4934
#endif /* INET */
4935
#ifdef INET6
4936
case AF_INET6:
4937
if (addr->addr32[3] == 0xffffffff) {
4938
addr->addr32[3] = 0;
4939
if (addr->addr32[2] == 0xffffffff) {
4940
addr->addr32[2] = 0;
4941
if (addr->addr32[1] == 0xffffffff) {
4942
addr->addr32[1] = 0;
4943
addr->addr32[0] =
4944
htonl(ntohl(addr->addr32[0]) + 1);
4945
} else
4946
addr->addr32[1] =
4947
htonl(ntohl(addr->addr32[1]) + 1);
4948
} else
4949
addr->addr32[2] =
4950
htonl(ntohl(addr->addr32[2]) + 1);
4951
} else
4952
addr->addr32[3] =
4953
htonl(ntohl(addr->addr32[3]) + 1);
4954
break;
4955
#endif /* INET6 */
4956
}
4957
}
4958
4959
void
4960
pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a)
4961
{
4962
/*
4963
* Modern rules use the same flags in rules as they do in states.
4964
*/
4965
a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID|
4966
PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO));
4967
4968
/*
4969
* Old-style scrub rules have different flags which need to be translated.
4970
*/
4971
if (r->rule_flag & PFRULE_RANDOMID)
4972
a->flags |= PFSTATE_RANDOMID;
4973
if (r->scrub_flags & PFSTATE_SETTOS || r->rule_flag & PFRULE_SET_TOS ) {
4974
a->flags |= PFSTATE_SETTOS;
4975
a->set_tos = r->set_tos;
4976
}
4977
4978
if (r->qid)
4979
a->qid = r->qid;
4980
if (r->pqid)
4981
a->pqid = r->pqid;
4982
if (r->rtableid >= 0)
4983
a->rtableid = r->rtableid;
4984
a->log |= r->log;
4985
if (r->min_ttl)
4986
a->min_ttl = r->min_ttl;
4987
if (r->max_mss)
4988
a->max_mss = r->max_mss;
4989
if (r->dnpipe)
4990
a->dnpipe = r->dnpipe;
4991
if (r->dnrpipe)
4992
a->dnrpipe = r->dnrpipe;
4993
if (r->dnpipe || r->dnrpipe) {
4994
if (r->free_flags & PFRULE_DN_IS_PIPE)
4995
a->flags |= PFSTATE_DN_IS_PIPE;
4996
else
4997
a->flags &= ~PFSTATE_DN_IS_PIPE;
4998
}
4999
if (r->scrub_flags & PFSTATE_SETPRIO) {
5000
a->set_prio[0] = r->set_prio[0];
5001
a->set_prio[1] = r->set_prio[1];
5002
}
5003
if (r->allow_opts)
5004
a->allow_opts = r->allow_opts;
5005
if (r->max_pkt_size)
5006
a->max_pkt_size = r->max_pkt_size;
5007
}
5008
5009
int
5010
pf_socket_lookup(struct pf_pdesc *pd)
5011
{
5012
struct pf_addr *saddr, *daddr;
5013
u_int16_t sport, dport;
5014
struct inpcbinfo *pi;
5015
struct inpcb *inp;
5016
5017
pd->lookup.uid = -1;
5018
pd->lookup.gid = -1;
5019
5020
switch (pd->proto) {
5021
case IPPROTO_TCP:
5022
sport = pd->hdr.tcp.th_sport;
5023
dport = pd->hdr.tcp.th_dport;
5024
pi = &V_tcbinfo;
5025
break;
5026
case IPPROTO_UDP:
5027
sport = pd->hdr.udp.uh_sport;
5028
dport = pd->hdr.udp.uh_dport;
5029
pi = &V_udbinfo;
5030
break;
5031
default:
5032
return (-1);
5033
}
5034
if (pd->dir == PF_IN) {
5035
saddr = pd->src;
5036
daddr = pd->dst;
5037
} else {
5038
u_int16_t p;
5039
5040
p = sport;
5041
sport = dport;
5042
dport = p;
5043
saddr = pd->dst;
5044
daddr = pd->src;
5045
}
5046
switch (pd->af) {
5047
#ifdef INET
5048
case AF_INET:
5049
inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
5050
dport, INPLOOKUP_RLOCKPCB, NULL, pd->m);
5051
if (inp == NULL) {
5052
inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
5053
daddr->v4, dport, INPLOOKUP_WILDCARD |
5054
INPLOOKUP_RLOCKPCB, NULL, pd->m);
5055
if (inp == NULL)
5056
return (-1);
5057
}
5058
break;
5059
#endif /* INET */
5060
#ifdef INET6
5061
case AF_INET6:
5062
inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
5063
dport, INPLOOKUP_RLOCKPCB, NULL, pd->m);
5064
if (inp == NULL) {
5065
inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
5066
&daddr->v6, dport, INPLOOKUP_WILDCARD |
5067
INPLOOKUP_RLOCKPCB, NULL, pd->m);
5068
if (inp == NULL)
5069
return (-1);
5070
}
5071
break;
5072
#endif /* INET6 */
5073
default:
5074
unhandled_af(pd->af);
5075
}
5076
INP_RLOCK_ASSERT(inp);
5077
pd->lookup.uid = inp->inp_cred->cr_uid;
5078
pd->lookup.gid = inp->inp_cred->cr_gid;
5079
INP_RUNLOCK(inp);
5080
5081
return (1);
5082
}
5083
5084
/* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity"
5085
* /\ (eoh - r) >= min_typelen >= 2 "safety" )
5086
*
5087
* warning: r + r[1] may exceed opts bounds for r[1] > min_typelen
5088
*/
5089
uint8_t*
5090
pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type,
5091
u_int8_t min_typelen)
5092
{
5093
uint8_t *eoh = opts + hlen;
5094
5095
if (min_typelen < 2)
5096
return (NULL);
5097
5098
while ((eoh - opt) >= min_typelen) {
5099
switch (*opt) {
5100
case TCPOPT_EOL:
5101
/* FALLTHROUGH - Workaround the failure of some
5102
systems to NOP-pad their bzero'd option buffers,
5103
producing spurious EOLs */
5104
case TCPOPT_NOP:
5105
opt++;
5106
continue;
5107
default:
5108
if (opt[0] == type &&
5109
opt[1] >= min_typelen)
5110
return (opt);
5111
}
5112
5113
opt += MAX(opt[1], 2); /* evade infinite loops */
5114
}
5115
5116
return (NULL);
5117
}
5118
5119
u_int8_t
5120
pf_get_wscale(struct pf_pdesc *pd)
5121
{
5122
int olen;
5123
uint8_t opts[MAX_TCPOPTLEN], *opt;
5124
uint8_t wscale = 0;
5125
5126
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
5127
if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m,
5128
pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af))
5129
return (0);
5130
5131
opt = opts;
5132
while ((opt = pf_find_tcpopt(opt, opts, olen,
5133
TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) {
5134
wscale = opt[2];
5135
wscale = MIN(wscale, TCP_MAX_WINSHIFT);
5136
wscale |= PF_WSCALE_FLAG;
5137
5138
opt += opt[1];
5139
}
5140
5141
return (wscale);
5142
}
5143
5144
u_int16_t
5145
pf_get_mss(struct pf_pdesc *pd)
5146
{
5147
int olen;
5148
uint8_t opts[MAX_TCPOPTLEN], *opt;
5149
u_int16_t mss = V_tcp_mssdflt;
5150
5151
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
5152
if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m,
5153
pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af))
5154
return (0);
5155
5156
opt = opts;
5157
while ((opt = pf_find_tcpopt(opt, opts, olen,
5158
TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) {
5159
memcpy(&mss, (opt + 2), 2);
5160
mss = ntohs(mss);
5161
opt += opt[1];
5162
}
5163
5164
return (mss);
5165
}
5166
5167
static u_int16_t
5168
pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
5169
{
5170
struct nhop_object *nh;
5171
#ifdef INET6
5172
struct in6_addr dst6;
5173
uint32_t scopeid;
5174
#endif /* INET6 */
5175
int hlen = 0;
5176
uint16_t mss = 0;
5177
5178
NET_EPOCH_ASSERT();
5179
5180
switch (af) {
5181
#ifdef INET
5182
case AF_INET:
5183
hlen = sizeof(struct ip);
5184
nh = fib4_lookup(rtableid, addr->v4, 0, 0, 0);
5185
if (nh != NULL)
5186
mss = nh->nh_mtu - hlen - sizeof(struct tcphdr);
5187
break;
5188
#endif /* INET */
5189
#ifdef INET6
5190
case AF_INET6:
5191
hlen = sizeof(struct ip6_hdr);
5192
in6_splitscope(&addr->v6, &dst6, &scopeid);
5193
nh = fib6_lookup(rtableid, &dst6, scopeid, 0, 0);
5194
if (nh != NULL)
5195
mss = nh->nh_mtu - hlen - sizeof(struct tcphdr);
5196
break;
5197
#endif /* INET6 */
5198
}
5199
5200
mss = max(V_tcp_mssdflt, mss);
5201
mss = min(mss, offer);
5202
mss = max(mss, 64); /* sanity - at least max opt space */
5203
return (mss);
5204
}
5205
5206
static u_int32_t
5207
pf_tcp_iss(struct pf_pdesc *pd)
5208
{
5209
SHA512_CTX ctx;
5210
union {
5211
uint8_t bytes[SHA512_DIGEST_LENGTH];
5212
uint32_t words[1];
5213
} digest;
5214
5215
if (V_pf_tcp_secret_init == 0) {
5216
arc4random_buf(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
5217
SHA512_Init(&V_pf_tcp_secret_ctx);
5218
SHA512_Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
5219
sizeof(V_pf_tcp_secret));
5220
V_pf_tcp_secret_init = 1;
5221
}
5222
5223
ctx = V_pf_tcp_secret_ctx;
5224
5225
SHA512_Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short));
5226
SHA512_Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short));
5227
switch (pd->af) {
5228
case AF_INET6:
5229
SHA512_Update(&ctx, &pd->src->v6, sizeof(struct in6_addr));
5230
SHA512_Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr));
5231
break;
5232
case AF_INET:
5233
SHA512_Update(&ctx, &pd->src->v4, sizeof(struct in_addr));
5234
SHA512_Update(&ctx, &pd->dst->v4, sizeof(struct in_addr));
5235
break;
5236
}
5237
SHA512_Final(digest.bytes, &ctx);
5238
V_pf_tcp_iss_off += 4096;
5239
#define ISN_RANDOM_INCREMENT (4096 - 1)
5240
return (digest.words[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
5241
V_pf_tcp_iss_off);
5242
#undef ISN_RANDOM_INCREMENT
5243
}
5244
5245
static bool
5246
pf_match_eth_addr(const uint8_t *a, const struct pf_keth_rule_addr *r)
5247
{
5248
bool match = true;
5249
5250
/* Always matches if not set */
5251
if (! r->isset)
5252
return (!r->neg);
5253
5254
for (int i = 0; i < ETHER_ADDR_LEN; i++) {
5255
if ((a[i] & r->mask[i]) != (r->addr[i] & r->mask[i])) {
5256
match = false;
5257
break;
5258
}
5259
}
5260
5261
return (match ^ r->neg);
5262
}
5263
5264
static int
5265
pf_match_eth_tag(struct mbuf *m, struct pf_keth_rule *r, int *tag, int mtag)
5266
{
5267
if (*tag == -1)
5268
*tag = mtag;
5269
5270
return ((!r->match_tag_not && r->match_tag == *tag) ||
5271
(r->match_tag_not && r->match_tag != *tag));
5272
}
5273
5274
static void
5275
pf_bridge_to(struct ifnet *ifp, struct mbuf *m)
5276
{
5277
/* If we don't have the interface drop the packet. */
5278
if (ifp == NULL) {
5279
m_freem(m);
5280
return;
5281
}
5282
5283
switch (ifp->if_type) {
5284
case IFT_ETHER:
5285
case IFT_XETHER:
5286
case IFT_L2VLAN:
5287
case IFT_BRIDGE:
5288
case IFT_IEEE8023ADLAG:
5289
break;
5290
default:
5291
m_freem(m);
5292
return;
5293
}
5294
5295
ifp->if_transmit(ifp, m);
5296
}
5297
5298
static int
5299
pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0)
5300
{
5301
#ifdef INET
5302
struct ip ip;
5303
#endif /* INET */
5304
#ifdef INET6
5305
struct ip6_hdr ip6;
5306
#endif /* INET6 */
5307
struct mbuf *m = *m0;
5308
struct ether_header *e;
5309
struct pf_keth_rule *r, *rm, *a = NULL;
5310
struct pf_keth_ruleset *ruleset = NULL;
5311
struct pf_mtag *mtag;
5312
struct pf_keth_ruleq *rules;
5313
struct pf_addr *src = NULL, *dst = NULL;
5314
struct pfi_kkif *bridge_to;
5315
sa_family_t af = 0;
5316
uint16_t proto;
5317
int asd = 0, match = 0;
5318
int tag = -1;
5319
uint8_t action;
5320
struct pf_keth_anchor_stackframe anchor_stack[PF_ANCHOR_STACK_MAX];
5321
5322
MPASS(kif->pfik_ifp->if_vnet == curvnet);
5323
NET_EPOCH_ASSERT();
5324
5325
PF_RULES_RLOCK_TRACKER;
5326
5327
SDT_PROBE3(pf, eth, test_rule, entry, dir, kif->pfik_ifp, m);
5328
5329
mtag = pf_find_mtag(m);
5330
if (mtag != NULL && mtag->flags & PF_MTAG_FLAG_DUMMYNET) {
5331
/* Dummynet re-injects packets after they've
5332
* completed their delay. We've already
5333
* processed them, so pass unconditionally. */
5334
5335
/* But only once. We may see the packet multiple times (e.g.
5336
* PFIL_IN/PFIL_OUT). */
5337
pf_dummynet_flag_remove(m, mtag);
5338
5339
return (PF_PASS);
5340
}
5341
5342
if (__predict_false(m->m_len < sizeof(struct ether_header)) &&
5343
(m = *m0 = m_pullup(*m0, sizeof(struct ether_header))) == NULL) {
5344
DPFPRINTF(PF_DEBUG_URGENT,
5345
"%s: m_len < sizeof(struct ether_header)"
5346
", pullup failed", __func__);
5347
return (PF_DROP);
5348
}
5349
e = mtod(m, struct ether_header *);
5350
proto = ntohs(e->ether_type);
5351
5352
switch (proto) {
5353
#ifdef INET
5354
case ETHERTYPE_IP: {
5355
if (m_length(m, NULL) < (sizeof(struct ether_header) +
5356
sizeof(ip)))
5357
return (PF_DROP);
5358
5359
af = AF_INET;
5360
m_copydata(m, sizeof(struct ether_header), sizeof(ip),
5361
(caddr_t)&ip);
5362
src = (struct pf_addr *)&ip.ip_src;
5363
dst = (struct pf_addr *)&ip.ip_dst;
5364
break;
5365
}
5366
#endif /* INET */
5367
#ifdef INET6
5368
case ETHERTYPE_IPV6: {
5369
if (m_length(m, NULL) < (sizeof(struct ether_header) +
5370
sizeof(ip6)))
5371
return (PF_DROP);
5372
5373
af = AF_INET6;
5374
m_copydata(m, sizeof(struct ether_header), sizeof(ip6),
5375
(caddr_t)&ip6);
5376
src = (struct pf_addr *)&ip6.ip6_src;
5377
dst = (struct pf_addr *)&ip6.ip6_dst;
5378
break;
5379
}
5380
#endif /* INET6 */
5381
}
5382
5383
PF_RULES_RLOCK();
5384
5385
ruleset = V_pf_keth;
5386
rules = atomic_load_ptr(&ruleset->active.rules);
5387
for (r = TAILQ_FIRST(rules), rm = NULL; r != NULL;) {
5388
counter_u64_add(r->evaluations, 1);
5389
SDT_PROBE2(pf, eth, test_rule, test, r->nr, r);
5390
5391
if (pfi_kkif_match(r->kif, kif) == r->ifnot) {
5392
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5393
"kif");
5394
r = r->skip[PFE_SKIP_IFP].ptr;
5395
}
5396
else if (r->direction && r->direction != dir) {
5397
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5398
"dir");
5399
r = r->skip[PFE_SKIP_DIR].ptr;
5400
}
5401
else if (r->proto && r->proto != proto) {
5402
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5403
"proto");
5404
r = r->skip[PFE_SKIP_PROTO].ptr;
5405
}
5406
else if (! pf_match_eth_addr(e->ether_shost, &r->src)) {
5407
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5408
"src");
5409
r = r->skip[PFE_SKIP_SRC_ADDR].ptr;
5410
}
5411
else if (! pf_match_eth_addr(e->ether_dhost, &r->dst)) {
5412
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5413
"dst");
5414
r = r->skip[PFE_SKIP_DST_ADDR].ptr;
5415
}
5416
else if (src != NULL && PF_MISMATCHAW(&r->ipsrc.addr, src, af,
5417
r->ipsrc.neg, kif, M_GETFIB(m))) {
5418
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5419
"ip_src");
5420
r = r->skip[PFE_SKIP_SRC_IP_ADDR].ptr;
5421
}
5422
else if (dst != NULL && PF_MISMATCHAW(&r->ipdst.addr, dst, af,
5423
r->ipdst.neg, kif, M_GETFIB(m))) {
5424
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5425
"ip_dst");
5426
r = r->skip[PFE_SKIP_DST_IP_ADDR].ptr;
5427
}
5428
else if (r->match_tag && !pf_match_eth_tag(m, r, &tag,
5429
mtag ? mtag->tag : 0)) {
5430
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5431
"match_tag");
5432
r = TAILQ_NEXT(r, entries);
5433
}
5434
else {
5435
if (r->tag)
5436
tag = r->tag;
5437
if (r->anchor == NULL) {
5438
/* Rule matches */
5439
rm = r;
5440
5441
SDT_PROBE2(pf, eth, test_rule, match, r->nr, r);
5442
5443
if (r->quick)
5444
break;
5445
5446
r = TAILQ_NEXT(r, entries);
5447
} else {
5448
pf_step_into_keth_anchor(anchor_stack, &asd,
5449
&ruleset, &r, &a, &match);
5450
}
5451
}
5452
if (r == NULL && pf_step_out_of_keth_anchor(anchor_stack, &asd,
5453
&ruleset, &r, &a, &match))
5454
break;
5455
}
5456
5457
r = rm;
5458
5459
SDT_PROBE2(pf, eth, test_rule, final_match, (r != NULL ? r->nr : -1), r);
5460
5461
/* Default to pass. */
5462
if (r == NULL) {
5463
PF_RULES_RUNLOCK();
5464
return (PF_PASS);
5465
}
5466
5467
/* Execute action. */
5468
counter_u64_add(r->packets[dir == PF_OUT], 1);
5469
counter_u64_add(r->bytes[dir == PF_OUT], m_length(m, NULL));
5470
pf_update_timestamp(r);
5471
5472
/* Shortcut. Don't tag if we're just going to drop anyway. */
5473
if (r->action == PF_DROP) {
5474
PF_RULES_RUNLOCK();
5475
return (PF_DROP);
5476
}
5477
5478
if (tag > 0) {
5479
if (mtag == NULL)
5480
mtag = pf_get_mtag(m);
5481
if (mtag == NULL) {
5482
PF_RULES_RUNLOCK();
5483
counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
5484
return (PF_DROP);
5485
}
5486
mtag->tag = tag;
5487
}
5488
5489
if (r->qid != 0) {
5490
if (mtag == NULL)
5491
mtag = pf_get_mtag(m);
5492
if (mtag == NULL) {
5493
PF_RULES_RUNLOCK();
5494
counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
5495
return (PF_DROP);
5496
}
5497
mtag->qid = r->qid;
5498
}
5499
5500
action = r->action;
5501
bridge_to = r->bridge_to;
5502
5503
/* Dummynet */
5504
if (r->dnpipe) {
5505
struct ip_fw_args dnflow;
5506
5507
/* Drop packet if dummynet is not loaded. */
5508
if (ip_dn_io_ptr == NULL) {
5509
PF_RULES_RUNLOCK();
5510
m_freem(m);
5511
counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
5512
return (PF_DROP);
5513
}
5514
if (mtag == NULL)
5515
mtag = pf_get_mtag(m);
5516
if (mtag == NULL) {
5517
PF_RULES_RUNLOCK();
5518
counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
5519
return (PF_DROP);
5520
}
5521
5522
bzero(&dnflow, sizeof(dnflow));
5523
5524
/* We don't have port numbers here, so we set 0. That means
5525
* that we'll be somewhat limited in distinguishing flows (i.e.
5526
* only based on IP addresses, not based on port numbers), but
5527
* it's better than nothing. */
5528
dnflow.f_id.dst_port = 0;
5529
dnflow.f_id.src_port = 0;
5530
dnflow.f_id.proto = 0;
5531
5532
dnflow.rule.info = r->dnpipe;
5533
dnflow.rule.info |= IPFW_IS_DUMMYNET;
5534
if (r->dnflags & PFRULE_DN_IS_PIPE)
5535
dnflow.rule.info |= IPFW_IS_PIPE;
5536
5537
dnflow.f_id.extra = dnflow.rule.info;
5538
5539
dnflow.flags = dir == PF_IN ? IPFW_ARGS_IN : IPFW_ARGS_OUT;
5540
dnflow.flags |= IPFW_ARGS_ETHER;
5541
dnflow.ifp = kif->pfik_ifp;
5542
5543
switch (af) {
5544
case AF_INET:
5545
dnflow.f_id.addr_type = 4;
5546
dnflow.f_id.src_ip = src->v4.s_addr;
5547
dnflow.f_id.dst_ip = dst->v4.s_addr;
5548
break;
5549
case AF_INET6:
5550
dnflow.flags |= IPFW_ARGS_IP6;
5551
dnflow.f_id.addr_type = 6;
5552
dnflow.f_id.src_ip6 = src->v6;
5553
dnflow.f_id.dst_ip6 = dst->v6;
5554
break;
5555
}
5556
5557
PF_RULES_RUNLOCK();
5558
5559
mtag->flags |= PF_MTAG_FLAG_DUMMYNET;
5560
ip_dn_io_ptr(m0, &dnflow);
5561
if (*m0 != NULL)
5562
pf_dummynet_flag_remove(m, mtag);
5563
} else {
5564
PF_RULES_RUNLOCK();
5565
}
5566
5567
if (action == PF_PASS && bridge_to) {
5568
pf_bridge_to(bridge_to->pfik_ifp, *m0);
5569
*m0 = NULL; /* We've eaten the packet. */
5570
}
5571
5572
return (action);
5573
}
5574
5575
#define PF_TEST_ATTRIB(t, a) \
5576
if (t) { \
5577
r = a; \
5578
continue; \
5579
} else do { \
5580
} while (0)
5581
5582
static __inline u_short
5583
pf_rule_apply_nat(struct pf_test_ctx *ctx, struct pf_krule *r)
5584
{
5585
struct pf_pdesc *pd = ctx->pd;
5586
u_short transerror;
5587
u_int8_t nat_action;
5588
5589
if (r->rule_flag & PFRULE_AFTO) {
5590
/* Don't translate if there was an old style NAT rule */
5591
if (ctx->nr != NULL)
5592
return (PFRES_TRANSLATE);
5593
5594
/* pass af-to rules, unsupported on match rules */
5595
KASSERT(r->action != PF_MATCH, ("%s: af-to on match rule", __func__));
5596
/* XXX I can imagine scenarios where we have both NAT and RDR source tracking */
5597
ctx->nat_pool = &(r->nat);
5598
ctx->nr = r;
5599
pd->naf = r->naf;
5600
if (pf_get_transaddr_af(ctx->nr, pd) == -1) {
5601
return (PFRES_TRANSLATE);
5602
}
5603
return (PFRES_MATCH);
5604
} else if (r->rdr.cur || r->nat.cur) {
5605
/* Don't translate if there was an old style NAT rule */
5606
if (ctx->nr != NULL)
5607
return (PFRES_TRANSLATE);
5608
5609
/* match/pass nat-to/rdr-to rules */
5610
ctx->nr = r;
5611
if (r->nat.cur) {
5612
nat_action = PF_NAT;
5613
ctx->nat_pool = &(r->nat);
5614
} else {
5615
nat_action = PF_RDR;
5616
ctx->nat_pool = &(r->rdr);
5617
}
5618
5619
transerror = pf_get_transaddr(ctx, ctx->nr,
5620
nat_action, ctx->nat_pool);
5621
if (transerror == PFRES_MATCH) {
5622
ctx->rewrite += pf_translate_compat(ctx);
5623
return(PFRES_MATCH);
5624
}
5625
return (transerror);
5626
}
5627
5628
return (PFRES_MAX);
5629
}
5630
5631
enum pf_test_status
5632
pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset,
5633
struct pf_krule_slist *match_rules)
5634
{
5635
struct pf_krule_item *ri, *rt;
5636
struct pf_krule *r;
5637
struct pf_krule *save_a;
5638
struct pf_kruleset *save_aruleset;
5639
struct pf_pdesc *pd = ctx->pd;
5640
u_short transerror;
5641
5642
r = TAILQ_FIRST(ruleset->rules[PF_RULESET_FILTER].active.ptr);
5643
while (r != NULL) {
5644
if (ctx->pd->related_rule) {
5645
*ctx->rm = ctx->pd->related_rule;
5646
break;
5647
}
5648
PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED,
5649
TAILQ_NEXT(r, entries));
5650
/* Don't count expired rule evaluations. */
5651
pf_counter_u64_add(&r->evaluations, 1);
5652
PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot,
5653
r->skip[PF_SKIP_IFP]);
5654
PF_TEST_ATTRIB(r->direction && r->direction != pd->dir,
5655
r->skip[PF_SKIP_DIR]);
5656
PF_TEST_ATTRIB(r->af && r->af != pd->af,
5657
r->skip[PF_SKIP_AF]);
5658
PF_TEST_ATTRIB(r->proto && r->proto != pd->proto,
5659
r->skip[PF_SKIP_PROTO]);
5660
PF_TEST_ATTRIB(PF_MISMATCHAW(&r->src.addr, &pd->nsaddr, pd->naf,
5661
r->src.neg, pd->kif, M_GETFIB(pd->m)),
5662
r->skip[PF_SKIP_SRC_ADDR]);
5663
PF_TEST_ATTRIB(PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af,
5664
r->dst.neg, NULL, M_GETFIB(pd->m)),
5665
r->skip[PF_SKIP_DST_ADDR]);
5666
switch (pd->virtual_proto) {
5667
case PF_VPROTO_FRAGMENT:
5668
/* tcp/udp only. port_op always 0 in other cases */
5669
PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
5670
TAILQ_NEXT(r, entries));
5671
PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset),
5672
TAILQ_NEXT(r, entries));
5673
/* icmp only. type/code always 0 in other cases */
5674
PF_TEST_ATTRIB((r->type || r->code),
5675
TAILQ_NEXT(r, entries));
5676
/* tcp/udp only. {uid|gid}.op always 0 in other cases */
5677
PF_TEST_ATTRIB((r->gid.op || r->uid.op),
5678
TAILQ_NEXT(r, entries));
5679
break;
5680
5681
case IPPROTO_TCP:
5682
PF_TEST_ATTRIB((r->flagset & tcp_get_flags(ctx->th))
5683
!= r->flags,
5684
TAILQ_NEXT(r, entries));
5685
/* FALLTHROUGH */
5686
case IPPROTO_SCTP:
5687
case IPPROTO_UDP:
5688
/* tcp/udp only. port_op always 0 in other cases */
5689
PF_TEST_ATTRIB(r->src.port_op && !pf_match_port(r->src.port_op,
5690
r->src.port[0], r->src.port[1], pd->nsport),
5691
r->skip[PF_SKIP_SRC_PORT]);
5692
/* tcp/udp only. port_op always 0 in other cases */
5693
PF_TEST_ATTRIB(r->dst.port_op && !pf_match_port(r->dst.port_op,
5694
r->dst.port[0], r->dst.port[1], pd->ndport),
5695
r->skip[PF_SKIP_DST_PORT]);
5696
/* tcp/udp only. uid.op always 0 in other cases */
5697
PF_TEST_ATTRIB(r->uid.op && (pd->lookup.done || (pd->lookup.done =
5698
pf_socket_lookup(pd), 1)) &&
5699
!pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
5700
pd->lookup.uid),
5701
TAILQ_NEXT(r, entries));
5702
/* tcp/udp only. gid.op always 0 in other cases */
5703
PF_TEST_ATTRIB(r->gid.op && (pd->lookup.done || (pd->lookup.done =
5704
pf_socket_lookup(pd), 1)) &&
5705
!pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
5706
pd->lookup.gid),
5707
TAILQ_NEXT(r, entries));
5708
break;
5709
5710
case IPPROTO_ICMP:
5711
case IPPROTO_ICMPV6:
5712
/* icmp only. type always 0 in other cases */
5713
PF_TEST_ATTRIB(r->type && r->type != ctx->icmptype + 1,
5714
TAILQ_NEXT(r, entries));
5715
/* icmp only. type always 0 in other cases */
5716
PF_TEST_ATTRIB(r->code && r->code != ctx->icmpcode + 1,
5717
TAILQ_NEXT(r, entries));
5718
break;
5719
5720
default:
5721
break;
5722
}
5723
PF_TEST_ATTRIB(r->tos && !(r->tos == pd->tos),
5724
TAILQ_NEXT(r, entries));
5725
PF_TEST_ATTRIB(r->prio &&
5726
!pf_match_ieee8021q_pcp(r->prio, pd->m),
5727
TAILQ_NEXT(r, entries));
5728
PF_TEST_ATTRIB(r->prob &&
5729
r->prob <= arc4random(),
5730
TAILQ_NEXT(r, entries));
5731
PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r,
5732
&ctx->tag, pd->pf_mtag ? pd->pf_mtag->tag : 0),
5733
TAILQ_NEXT(r, entries));
5734
PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
5735
r->rcvifnot),
5736
TAILQ_NEXT(r, entries));
5737
PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
5738
pd->virtual_proto != PF_VPROTO_FRAGMENT),
5739
TAILQ_NEXT(r, entries));
5740
PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY &&
5741
(pd->virtual_proto != IPPROTO_TCP || !pf_osfp_match(
5742
pf_osfp_fingerprint(pd, ctx->th),
5743
r->os_fingerprint)),
5744
TAILQ_NEXT(r, entries));
5745
/* must be last! */
5746
if (r->pktrate.limit) {
5747
PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)),
5748
TAILQ_NEXT(r, entries));
5749
}
5750
/* FALLTHROUGH */
5751
if (r->tag)
5752
ctx->tag = r->tag;
5753
if (r->anchor == NULL) {
5754
5755
if (r->rule_flag & PFRULE_ONCE) {
5756
uint32_t rule_flag;
5757
5758
rule_flag = r->rule_flag;
5759
if ((rule_flag & PFRULE_EXPIRED) == 0 &&
5760
atomic_cmpset_int(&r->rule_flag, rule_flag,
5761
rule_flag | PFRULE_EXPIRED)) {
5762
r->exptime = time_uptime;
5763
} else {
5764
r = TAILQ_NEXT(r, entries);
5765
continue;
5766
}
5767
}
5768
5769
if (r->action == PF_MATCH) {
5770
/*
5771
* Apply translations before increasing counters,
5772
* in case it fails.
5773
*/
5774
transerror = pf_rule_apply_nat(ctx, r);
5775
switch (transerror) {
5776
case PFRES_MATCH:
5777
/* Translation action found in rule and applied successfully */
5778
case PFRES_MAX:
5779
/* No translation action found in rule */
5780
break;
5781
default:
5782
/* Translation action found in rule but failed to apply */
5783
REASON_SET(&ctx->reason, transerror);
5784
return (PF_TEST_FAIL);
5785
}
5786
ri = malloc(sizeof(struct pf_krule_item), M_PF_RULE_ITEM, M_NOWAIT | M_ZERO);
5787
if (ri == NULL) {
5788
REASON_SET(&ctx->reason, PFRES_MEMORY);
5789
return (PF_TEST_FAIL);
5790
}
5791
ri->r = r;
5792
5793
if (SLIST_EMPTY(match_rules)) {
5794
SLIST_INSERT_HEAD(match_rules, ri, entry);
5795
} else {
5796
SLIST_INSERT_AFTER(rt, ri, entry);
5797
}
5798
rt = ri;
5799
5800
pf_rule_to_actions(r, &pd->act);
5801
if (r->log)
5802
PFLOG_PACKET(r->action, PFRES_MATCH, r,
5803
ctx->a, ruleset, pd, 1, NULL);
5804
} else {
5805
/*
5806
* found matching r
5807
*/
5808
*ctx->rm = r;
5809
/*
5810
* anchor, with ruleset, where r belongs to
5811
*/
5812
*ctx->am = ctx->a;
5813
/*
5814
* ruleset where r belongs to
5815
*/
5816
*ctx->rsm = ruleset;
5817
/*
5818
* ruleset, where anchor belongs to.
5819
*/
5820
ctx->arsm = ctx->aruleset;
5821
}
5822
if (pd->act.log & PF_LOG_MATCHES)
5823
pf_log_matches(pd, r, ctx->a, ruleset, match_rules);
5824
if (r->quick) {
5825
ctx->test_status = PF_TEST_QUICK;
5826
break;
5827
}
5828
} else {
5829
save_a = ctx->a;
5830
save_aruleset = ctx->aruleset;
5831
5832
ctx->a = r; /* remember anchor */
5833
ctx->aruleset = ruleset; /* and its ruleset */
5834
if (ctx->a->quick)
5835
ctx->test_status = PF_TEST_QUICK;
5836
/*
5837
* Note: we don't need to restore if we are not going
5838
* to continue with ruleset evaluation.
5839
*/
5840
if (pf_step_into_anchor(ctx, r, match_rules) != PF_TEST_OK) {
5841
break;
5842
}
5843
ctx->a = save_a;
5844
ctx->aruleset = save_aruleset;
5845
}
5846
r = TAILQ_NEXT(r, entries);
5847
}
5848
5849
5850
return (ctx->test_status);
5851
}
5852
5853
static int
5854
pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
5855
struct pf_pdesc *pd, struct pf_krule **am,
5856
struct pf_kruleset **rsm, u_short *reason, struct inpcb *inp,
5857
struct pf_krule_slist *match_rules)
5858
{
5859
struct pf_krule *r = NULL;
5860
struct pf_kruleset *ruleset = NULL;
5861
struct pf_test_ctx ctx;
5862
u_short transerror;
5863
int action = PF_PASS;
5864
u_int16_t bproto_sum = 0, bip_sum = 0;
5865
enum pf_test_status rv;
5866
5867
PF_RULES_RASSERT();
5868
5869
bzero(&ctx, sizeof(ctx));
5870
ctx.tag = -1;
5871
ctx.pd = pd;
5872
ctx.rm = rm;
5873
ctx.am = am;
5874
ctx.rsm = rsm;
5875
ctx.th = &pd->hdr.tcp;
5876
ctx.reason = *reason;
5877
5878
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
5879
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
5880
5881
if (inp != NULL) {
5882
INP_LOCK_ASSERT(inp);
5883
pd->lookup.uid = inp->inp_cred->cr_uid;
5884
pd->lookup.gid = inp->inp_cred->cr_gid;
5885
pd->lookup.done = 1;
5886
}
5887
5888
if (pd->ip_sum)
5889
bip_sum = *pd->ip_sum;
5890
5891
switch (pd->virtual_proto) {
5892
case IPPROTO_TCP:
5893
bproto_sum = ctx.th->th_sum;
5894
pd->nsport = ctx.th->th_sport;
5895
pd->ndport = ctx.th->th_dport;
5896
break;
5897
case IPPROTO_UDP:
5898
bproto_sum = pd->hdr.udp.uh_sum;
5899
pd->nsport = pd->hdr.udp.uh_sport;
5900
pd->ndport = pd->hdr.udp.uh_dport;
5901
break;
5902
case IPPROTO_SCTP:
5903
pd->nsport = pd->hdr.sctp.src_port;
5904
pd->ndport = pd->hdr.sctp.dest_port;
5905
break;
5906
#ifdef INET
5907
case IPPROTO_ICMP:
5908
MPASS(pd->af == AF_INET);
5909
ctx.icmptype = pd->hdr.icmp.icmp_type;
5910
ctx.icmpcode = pd->hdr.icmp.icmp_code;
5911
ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
5912
&ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type);
5913
if (ctx.icmp_dir == PF_IN) {
5914
pd->nsport = ctx.virtual_id;
5915
pd->ndport = ctx.virtual_type;
5916
} else {
5917
pd->nsport = ctx.virtual_type;
5918
pd->ndport = ctx.virtual_id;
5919
}
5920
break;
5921
#endif /* INET */
5922
#ifdef INET6
5923
case IPPROTO_ICMPV6:
5924
MPASS(pd->af == AF_INET6);
5925
ctx.icmptype = pd->hdr.icmp6.icmp6_type;
5926
ctx.icmpcode = pd->hdr.icmp6.icmp6_code;
5927
ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
5928
&ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type);
5929
if (ctx.icmp_dir == PF_IN) {
5930
pd->nsport = ctx.virtual_id;
5931
pd->ndport = ctx.virtual_type;
5932
} else {
5933
pd->nsport = ctx.virtual_type;
5934
pd->ndport = ctx.virtual_id;
5935
}
5936
5937
break;
5938
#endif /* INET6 */
5939
default:
5940
pd->nsport = pd->ndport = 0;
5941
break;
5942
}
5943
pd->osport = pd->nsport;
5944
pd->odport = pd->ndport;
5945
5946
/* check packet for BINAT/NAT/RDR */
5947
transerror = pf_get_translation(&ctx);
5948
switch (transerror) {
5949
default:
5950
/* A translation error occurred. */
5951
REASON_SET(&ctx.reason, transerror);
5952
goto cleanup;
5953
case PFRES_MAX:
5954
/* No match. */
5955
break;
5956
case PFRES_MATCH:
5957
KASSERT(ctx.sk != NULL, ("%s: null sk", __func__));
5958
KASSERT(ctx.nk != NULL, ("%s: null nk", __func__));
5959
if (ctx.nr->log) {
5960
PFLOG_PACKET(ctx.nr->action, PFRES_MATCH, ctx.nr, ctx.a,
5961
ruleset, pd, 1, NULL);
5962
}
5963
5964
ctx.rewrite += pf_translate_compat(&ctx);
5965
ctx.nat_pool = &(ctx.nr->rdr);
5966
}
5967
5968
if (ctx.nr && ctx.nr->natpass) {
5969
r = ctx.nr;
5970
ruleset = *ctx.rsm;
5971
} else {
5972
ruleset = &pf_main_ruleset;
5973
rv = pf_match_rule(&ctx, ruleset, match_rules);
5974
if (rv == PF_TEST_FAIL) {
5975
/*
5976
* Reason has been set in pf_match_rule() already.
5977
*/
5978
goto cleanup;
5979
}
5980
5981
r = *ctx.rm; /* matching rule */
5982
ctx.a = *ctx.am; /* rule that defines an anchor containing 'r' */
5983
ruleset = *ctx.rsm; /* ruleset of the anchor defined by the rule 'a' */
5984
ctx.aruleset = ctx.arsm; /* ruleset of the 'a' rule itself */
5985
5986
/* apply actions for last matching pass/block rule */
5987
pf_rule_to_actions(r, &pd->act);
5988
transerror = pf_rule_apply_nat(&ctx, r);
5989
switch (transerror) {
5990
case PFRES_MATCH:
5991
/* Translation action found in rule and applied successfully */
5992
case PFRES_MAX:
5993
/* No translation action found in rule */
5994
break;
5995
default:
5996
/* Translation action found in rule but failed to apply */
5997
REASON_SET(&ctx.reason, transerror);
5998
goto cleanup;
5999
}
6000
}
6001
6002
REASON_SET(&ctx.reason, PFRES_MATCH);
6003
6004
if (r->log) {
6005
if (ctx.rewrite)
6006
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
6007
PFLOG_PACKET(r->action, ctx.reason, r, ctx.a, ruleset, pd, 1, NULL);
6008
}
6009
if (pd->act.log & PF_LOG_MATCHES)
6010
pf_log_matches(pd, r, ctx.a, ruleset, match_rules);
6011
if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
6012
(r->action == PF_DROP) &&
6013
((r->rule_flag & PFRULE_RETURNRST) ||
6014
(r->rule_flag & PFRULE_RETURNICMP) ||
6015
(r->rule_flag & PFRULE_RETURN))) {
6016
pf_return(r, ctx.nr, pd, ctx.th, bproto_sum,
6017
bip_sum, &ctx.reason, r->rtableid);
6018
}
6019
6020
if (r->action == PF_DROP)
6021
goto cleanup;
6022
6023
if (ctx.tag > 0 && pf_tag_packet(pd, ctx.tag)) {
6024
REASON_SET(&ctx.reason, PFRES_MEMORY);
6025
goto cleanup;
6026
}
6027
if (pd->act.rtableid >= 0)
6028
M_SETFIB(pd->m, pd->act.rtableid);
6029
6030
if (r->rt) {
6031
/*
6032
* Set act.rt here instead of in pf_rule_to_actions() because
6033
* it is applied only from the last pass rule. For rules
6034
* with the prefer-ipv6-nexthop option act.rt_af is a hint
6035
* about AF of the forwarded packet and might be changed.
6036
*/
6037
pd->act.rt = r->rt;
6038
if (r->rt == PF_REPLYTO)
6039
pd->act.rt_af = pd->af;
6040
else
6041
pd->act.rt_af = pd->naf;
6042
if ((transerror = pf_map_addr_sn(pd->af, r, pd->src,
6043
&pd->act.rt_addr, &pd->act.rt_af, &pd->act.rt_kif, NULL,
6044
&(r->route), PF_SN_ROUTE)) != PFRES_MATCH) {
6045
REASON_SET(&ctx.reason, transerror);
6046
goto cleanup;
6047
}
6048
}
6049
6050
if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
6051
(!ctx.state_icmp && (r->keep_state || ctx.nr != NULL ||
6052
(pd->flags & PFDESC_TCP_NORM)))) {
6053
bool nat64;
6054
6055
action = pf_create_state(r, &ctx, sm, bproto_sum, bip_sum,
6056
match_rules);
6057
ctx.sk = ctx.nk = NULL;
6058
if (action != PF_PASS) {
6059
pf_udp_mapping_release(ctx.udp_mapping);
6060
if (r->log || (ctx.nr != NULL && ctx.nr->log) ||
6061
ctx.reason == PFRES_MEMORY)
6062
pd->act.log |= PF_LOG_FORCE;
6063
if (action == PF_DROP &&
6064
(r->rule_flag & PFRULE_RETURN))
6065
pf_return(r, ctx.nr, pd, ctx.th,
6066
bproto_sum, bip_sum, &ctx.reason,
6067
pd->act.rtableid);
6068
*reason = ctx.reason;
6069
return (action);
6070
}
6071
6072
nat64 = pd->af != pd->naf;
6073
if (nat64) {
6074
int ret;
6075
6076
if (ctx.sk == NULL)
6077
ctx.sk = (*sm)->key[pd->dir == PF_IN ? PF_SK_STACK : PF_SK_WIRE];
6078
if (ctx.nk == NULL)
6079
ctx.nk = (*sm)->key[pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
6080
6081
if (pd->dir == PF_IN) {
6082
ret = pf_translate(pd, &ctx.sk->addr[pd->didx],
6083
ctx.sk->port[pd->didx], &ctx.sk->addr[pd->sidx],
6084
ctx.sk->port[pd->sidx], ctx.virtual_type,
6085
ctx.icmp_dir);
6086
} else {
6087
ret = pf_translate(pd, &ctx.sk->addr[pd->sidx],
6088
ctx.sk->port[pd->sidx], &ctx.sk->addr[pd->didx],
6089
ctx.sk->port[pd->didx], ctx.virtual_type,
6090
ctx.icmp_dir);
6091
}
6092
6093
if (ret < 0)
6094
goto cleanup;
6095
6096
ctx.rewrite += ret;
6097
6098
if (ctx.rewrite && ctx.sk->af != ctx.nk->af)
6099
action = PF_AFRT;
6100
}
6101
} else {
6102
uma_zfree(V_pf_state_key_z, ctx.sk);
6103
uma_zfree(V_pf_state_key_z, ctx.nk);
6104
ctx.sk = ctx.nk = NULL;
6105
pf_udp_mapping_release(ctx.udp_mapping);
6106
}
6107
6108
/* copy back packet headers if we performed NAT operations */
6109
if (ctx.rewrite)
6110
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
6111
6112
if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
6113
pd->dir == PF_OUT &&
6114
V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, pd->m)) {
6115
/*
6116
* We want the state created, but we dont
6117
* want to send this in case a partner
6118
* firewall has to know about it to allow
6119
* replies through it.
6120
*/
6121
*reason = ctx.reason;
6122
return (PF_DEFER);
6123
}
6124
6125
*reason = ctx.reason;
6126
return (action);
6127
6128
cleanup:
6129
uma_zfree(V_pf_state_key_z, ctx.sk);
6130
uma_zfree(V_pf_state_key_z, ctx.nk);
6131
pf_udp_mapping_release(ctx.udp_mapping);
6132
*reason = ctx.reason;
6133
6134
return (PF_DROP);
6135
}
6136
6137
static int
6138
pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx,
6139
struct pf_kstate **sm, u_int16_t bproto_sum, u_int16_t bip_sum,
6140
struct pf_krule_slist *match_rules)
6141
{
6142
struct pf_pdesc *pd = ctx->pd;
6143
struct pf_kstate *s = NULL;
6144
struct pf_ksrc_node *sns[PF_SN_MAX] = { NULL };
6145
/*
6146
* XXXKS: The hash for PF_SN_LIMIT and PF_SN_ROUTE should be the same
6147
* but for PF_SN_NAT it is different. Don't try optimizing it,
6148
* just store all 3 hashes.
6149
*/
6150
struct pf_srchash *snhs[PF_SN_MAX] = { NULL };
6151
struct tcphdr *th = &pd->hdr.tcp;
6152
u_int16_t mss = V_tcp_mssdflt;
6153
u_short sn_reason;
6154
6155
/* check maximums */
6156
if (r->max_states &&
6157
(counter_u64_fetch(r->states_cur) >= r->max_states)) {
6158
counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1);
6159
REASON_SET(&ctx->reason, PFRES_MAXSTATES);
6160
goto csfailed;
6161
}
6162
/* src node for limits */
6163
if ((r->rule_flag & PFRULE_SRCTRACK) &&
6164
(sn_reason = pf_insert_src_node(sns, snhs, r, pd->src, pd->af,
6165
NULL, NULL, pd->af, PF_SN_LIMIT)) != 0) {
6166
REASON_SET(&ctx->reason, sn_reason);
6167
goto csfailed;
6168
}
6169
/* src node for route-to rule */
6170
if (r->rt) {
6171
if ((r->route.opts & PF_POOL_STICKYADDR) &&
6172
(sn_reason = pf_insert_src_node(sns, snhs, r, pd->src,
6173
pd->af, &pd->act.rt_addr, pd->act.rt_kif, pd->act.rt_af,
6174
PF_SN_ROUTE)) != 0) {
6175
REASON_SET(&ctx->reason, sn_reason);
6176
goto csfailed;
6177
}
6178
}
6179
/* src node for translation rule */
6180
if (ctx->nr != NULL) {
6181
KASSERT(ctx->nat_pool != NULL, ("%s: nat_pool is NULL", __func__));
6182
/*
6183
* The NAT addresses are chosen during ruleset parsing.
6184
* The new afto code stores post-nat addresses in nsaddr.
6185
* The old nat code (also used for new nat-to rules) creates
6186
* state keys and stores addresses in them.
6187
*/
6188
if ((ctx->nat_pool->opts & PF_POOL_STICKYADDR) &&
6189
(sn_reason = pf_insert_src_node(sns, snhs, ctx->nr,
6190
ctx->sk ? &(ctx->sk->addr[pd->sidx]) : pd->src, pd->af,
6191
ctx->nk ? &(ctx->nk->addr[1]) : &(pd->nsaddr), NULL,
6192
pd->naf, PF_SN_NAT)) != 0 ) {
6193
REASON_SET(&ctx->reason, sn_reason);
6194
goto csfailed;
6195
}
6196
}
6197
s = pf_alloc_state(M_NOWAIT);
6198
if (s == NULL) {
6199
REASON_SET(&ctx->reason, PFRES_MEMORY);
6200
goto csfailed;
6201
}
6202
s->rule = r;
6203
s->nat_rule = ctx->nr;
6204
s->anchor = ctx->a;
6205
s->match_rules = *match_rules;
6206
memcpy(&s->act, &pd->act, sizeof(struct pf_rule_actions));
6207
6208
if (pd->act.allow_opts)
6209
s->state_flags |= PFSTATE_ALLOWOPTS;
6210
if (r->rule_flag & PFRULE_STATESLOPPY)
6211
s->state_flags |= PFSTATE_SLOPPY;
6212
if (pd->flags & PFDESC_TCP_NORM) /* Set by old-style scrub rules */
6213
s->state_flags |= PFSTATE_SCRUB_TCP;
6214
if ((r->rule_flag & PFRULE_PFLOW) ||
6215
(ctx->nr != NULL && ctx->nr->rule_flag & PFRULE_PFLOW))
6216
s->state_flags |= PFSTATE_PFLOW;
6217
6218
s->act.log = pd->act.log & PF_LOG_ALL;
6219
s->sync_state = PFSYNC_S_NONE;
6220
s->state_flags |= pd->act.flags; /* Only needed for pfsync and state export */
6221
6222
if (ctx->nr != NULL)
6223
s->act.log |= ctx->nr->log & PF_LOG_ALL;
6224
switch (pd->proto) {
6225
case IPPROTO_TCP:
6226
s->src.seqlo = ntohl(th->th_seq);
6227
s->src.seqhi = s->src.seqlo + pd->p_len + 1;
6228
if ((tcp_get_flags(th) & (TH_SYN|TH_ACK)) == TH_SYN &&
6229
r->keep_state == PF_STATE_MODULATE) {
6230
/* Generate sequence number modulator */
6231
if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
6232
0)
6233
s->src.seqdiff = 1;
6234
pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum,
6235
htonl(s->src.seqlo + s->src.seqdiff), 0);
6236
ctx->rewrite = 1;
6237
} else
6238
s->src.seqdiff = 0;
6239
if (tcp_get_flags(th) & TH_SYN) {
6240
s->src.seqhi++;
6241
s->src.wscale = pf_get_wscale(pd);
6242
}
6243
s->src.max_win = MAX(ntohs(th->th_win), 1);
6244
if (s->src.wscale & PF_WSCALE_MASK) {
6245
/* Remove scale factor from initial window */
6246
int win = s->src.max_win;
6247
win += 1 << (s->src.wscale & PF_WSCALE_MASK);
6248
s->src.max_win = (win - 1) >>
6249
(s->src.wscale & PF_WSCALE_MASK);
6250
}
6251
if (tcp_get_flags(th) & TH_FIN)
6252
s->src.seqhi++;
6253
s->dst.seqhi = 1;
6254
s->dst.max_win = 1;
6255
pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT);
6256
pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED);
6257
s->timeout = PFTM_TCP_FIRST_PACKET;
6258
atomic_add_32(&V_pf_status.states_halfopen, 1);
6259
break;
6260
case IPPROTO_UDP:
6261
pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE);
6262
pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC);
6263
s->timeout = PFTM_UDP_FIRST_PACKET;
6264
break;
6265
case IPPROTO_SCTP:
6266
pf_set_protostate(s, PF_PEER_SRC, SCTP_COOKIE_WAIT);
6267
pf_set_protostate(s, PF_PEER_DST, SCTP_CLOSED);
6268
s->timeout = PFTM_SCTP_FIRST_PACKET;
6269
break;
6270
case IPPROTO_ICMP:
6271
#ifdef INET6
6272
case IPPROTO_ICMPV6:
6273
#endif /* INET6 */
6274
s->timeout = PFTM_ICMP_FIRST_PACKET;
6275
break;
6276
default:
6277
pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE);
6278
pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC);
6279
s->timeout = PFTM_OTHER_FIRST_PACKET;
6280
}
6281
6282
s->creation = s->expire = pf_get_uptime();
6283
6284
if (pd->proto == IPPROTO_TCP) {
6285
if (s->state_flags & PFSTATE_SCRUB_TCP &&
6286
pf_normalize_tcp_init(pd, th, &s->src)) {
6287
REASON_SET(&ctx->reason, PFRES_MEMORY);
6288
goto csfailed;
6289
}
6290
if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub &&
6291
pf_normalize_tcp_stateful(pd, &ctx->reason, th, s,
6292
&s->src, &s->dst, &ctx->rewrite)) {
6293
/* This really shouldn't happen!!! */
6294
DPFPRINTF(PF_DEBUG_URGENT,
6295
"%s: tcp normalize failed on first "
6296
"pkt", __func__);
6297
goto csfailed;
6298
}
6299
} else if (pd->proto == IPPROTO_SCTP) {
6300
if (pf_normalize_sctp_init(pd, &s->src, &s->dst))
6301
goto csfailed;
6302
if (! (pd->sctp_flags & (PFDESC_SCTP_INIT | PFDESC_SCTP_ADD_IP)))
6303
goto csfailed;
6304
}
6305
s->direction = pd->dir;
6306
6307
/*
6308
* sk/nk could already been setup by pf_get_translation().
6309
*/
6310
if (ctx->sk == NULL && ctx->nk == NULL) {
6311
MPASS(pd->sport == NULL || (pd->osport == *pd->sport));
6312
MPASS(pd->dport == NULL || (pd->odport == *pd->dport));
6313
if (pf_state_key_setup(pd, pd->nsport, pd->ndport,
6314
&ctx->sk, &ctx->nk)) {
6315
goto csfailed;
6316
}
6317
} else
6318
KASSERT((ctx->sk != NULL && ctx->nk != NULL), ("%s: nr %p sk %p, nk %p",
6319
__func__, ctx->nr, ctx->sk, ctx->nk));
6320
6321
/* Swap sk/nk for PF_OUT. */
6322
if (pf_state_insert(BOUND_IFACE(s, pd), pd->kif,
6323
(pd->dir == PF_IN) ? ctx->sk : ctx->nk,
6324
(pd->dir == PF_IN) ? ctx->nk : ctx->sk, s)) {
6325
REASON_SET(&ctx->reason, PFRES_STATEINS);
6326
goto drop;
6327
} else
6328
*sm = s;
6329
ctx->sk = ctx->nk = NULL;
6330
6331
STATE_INC_COUNTERS(s);
6332
6333
/*
6334
* Lock order is important: first state, then source node.
6335
*/
6336
for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) {
6337
if (pf_src_node_exists(&sns[sn_type], snhs[sn_type])) {
6338
s->sns[sn_type] = sns[sn_type];
6339
PF_HASHROW_UNLOCK(snhs[sn_type]);
6340
}
6341
}
6342
6343
if (ctx->tag > 0)
6344
s->tag = ctx->tag;
6345
if (pd->proto == IPPROTO_TCP && (tcp_get_flags(th) & (TH_SYN|TH_ACK)) ==
6346
TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) {
6347
pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC);
6348
pf_undo_nat(ctx->nr, pd, bip_sum);
6349
s->src.seqhi = arc4random();
6350
/* Find mss option */
6351
int rtid = M_GETFIB(pd->m);
6352
mss = pf_get_mss(pd);
6353
mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
6354
mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
6355
s->src.mss = mss;
6356
pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
6357
th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
6358
TH_SYN|TH_ACK, 0, s->src.mss, 0, M_SKIP_FIREWALL, 0, 0,
6359
pd->act.rtableid, &ctx->reason);
6360
REASON_SET(&ctx->reason, PFRES_SYNPROXY);
6361
return (PF_SYNPROXY_DROP);
6362
}
6363
6364
s->udp_mapping = ctx->udp_mapping;
6365
6366
return (PF_PASS);
6367
6368
csfailed:
6369
uma_zfree(V_pf_state_key_z, ctx->sk);
6370
uma_zfree(V_pf_state_key_z, ctx->nk);
6371
6372
for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) {
6373
if (pf_src_node_exists(&sns[sn_type], snhs[sn_type])) {
6374
if (--sns[sn_type]->states == 0 &&
6375
sns[sn_type]->expire == 0) {
6376
pf_unlink_src_node(sns[sn_type]);
6377
pf_free_src_node(sns[sn_type]);
6378
counter_u64_add(
6379
V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
6380
}
6381
PF_HASHROW_UNLOCK(snhs[sn_type]);
6382
}
6383
}
6384
6385
drop:
6386
if (s != NULL) {
6387
pf_src_tree_remove_state(s);
6388
s->timeout = PFTM_UNLINKED;
6389
pf_free_state(s);
6390
}
6391
6392
return (PF_DROP);
6393
}
6394
6395
int
6396
pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport,
6397
struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type,
6398
int icmp_dir)
6399
{
6400
/*
6401
* pf_translate() implements OpenBSD's "new" NAT approach.
6402
* We don't follow it, because it involves a breaking syntax change
6403
* (removing nat/rdr rules, moving it into regular pf rules.)
6404
* It also moves NAT processing to be done after normal rules evaluation
6405
* whereas in FreeBSD that's done before rules processing.
6406
*
6407
* We adopt the function only for nat64, and keep other NAT processing
6408
* before rules processing.
6409
*/
6410
int rewrite = 0;
6411
int afto = pd->af != pd->naf;
6412
6413
MPASS(afto);
6414
6415
switch (pd->proto) {
6416
case IPPROTO_TCP:
6417
case IPPROTO_UDP:
6418
case IPPROTO_SCTP:
6419
if (afto || *pd->sport != sport) {
6420
pf_change_ap(pd, pd->src, pd->sport,
6421
saddr, sport);
6422
rewrite = 1;
6423
}
6424
if (afto || *pd->dport != dport) {
6425
pf_change_ap(pd, pd->dst, pd->dport,
6426
daddr, dport);
6427
rewrite = 1;
6428
}
6429
break;
6430
6431
#ifdef INET
6432
case IPPROTO_ICMP:
6433
/* pf_translate() is also used when logging invalid packets */
6434
if (pd->af != AF_INET)
6435
return (0);
6436
6437
if (afto) {
6438
if (pf_translate_icmp_af(AF_INET6, &pd->hdr.icmp))
6439
return (-1);
6440
pd->proto = IPPROTO_ICMPV6;
6441
rewrite = 1;
6442
}
6443
if (virtual_type == htons(ICMP_ECHO)) {
6444
u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
6445
6446
if (icmpid != pd->hdr.icmp.icmp_id) {
6447
pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
6448
pd->hdr.icmp.icmp_cksum,
6449
pd->hdr.icmp.icmp_id, icmpid, 0);
6450
pd->hdr.icmp.icmp_id = icmpid;
6451
/* XXX TODO copyback. */
6452
rewrite = 1;
6453
}
6454
}
6455
break;
6456
#endif /* INET */
6457
6458
#ifdef INET6
6459
case IPPROTO_ICMPV6:
6460
/* pf_translate() is also used when logging invalid packets */
6461
if (pd->af != AF_INET6)
6462
return (0);
6463
6464
if (afto) {
6465
/* ip_sum will be recalculated in pf_translate_af */
6466
if (pf_translate_icmp_af(AF_INET, &pd->hdr.icmp6))
6467
return (0);
6468
pd->proto = IPPROTO_ICMP;
6469
rewrite = 1;
6470
}
6471
break;
6472
#endif /* INET6 */
6473
6474
default:
6475
break;
6476
}
6477
6478
return (rewrite);
6479
}
6480
6481
int
6482
pf_translate_compat(struct pf_test_ctx *ctx)
6483
{
6484
struct pf_pdesc *pd = ctx->pd;
6485
struct pf_state_key *nk = ctx->nk;
6486
struct tcphdr *th = &pd->hdr.tcp;
6487
int rewrite = 0;
6488
6489
KASSERT(ctx->sk != NULL, ("%s: null sk", __func__));
6490
KASSERT(ctx->nk != NULL, ("%s: null nk", __func__));
6491
6492
switch (pd->virtual_proto) {
6493
case IPPROTO_TCP:
6494
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
6495
nk->port[pd->sidx] != pd->nsport) {
6496
pf_change_ap(pd, pd->src, &th->th_sport,
6497
&nk->addr[pd->sidx], nk->port[pd->sidx]);
6498
pd->sport = &th->th_sport;
6499
pd->nsport = th->th_sport;
6500
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
6501
}
6502
6503
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
6504
nk->port[pd->didx] != pd->ndport) {
6505
pf_change_ap(pd, pd->dst, &th->th_dport,
6506
&nk->addr[pd->didx], nk->port[pd->didx]);
6507
pd->dport = &th->th_dport;
6508
pd->ndport = th->th_dport;
6509
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
6510
}
6511
rewrite++;
6512
break;
6513
case IPPROTO_UDP:
6514
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
6515
nk->port[pd->sidx] != pd->nsport) {
6516
pf_change_ap(pd, pd->src,
6517
&pd->hdr.udp.uh_sport,
6518
&nk->addr[pd->sidx],
6519
nk->port[pd->sidx]);
6520
pd->sport = &pd->hdr.udp.uh_sport;
6521
pd->nsport = pd->hdr.udp.uh_sport;
6522
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
6523
}
6524
6525
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
6526
nk->port[pd->didx] != pd->ndport) {
6527
pf_change_ap(pd, pd->dst,
6528
&pd->hdr.udp.uh_dport,
6529
&nk->addr[pd->didx],
6530
nk->port[pd->didx]);
6531
pd->dport = &pd->hdr.udp.uh_dport;
6532
pd->ndport = pd->hdr.udp.uh_dport;
6533
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
6534
}
6535
rewrite++;
6536
break;
6537
case IPPROTO_SCTP: {
6538
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
6539
nk->port[pd->sidx] != pd->nsport) {
6540
pf_change_ap(pd, pd->src,
6541
&pd->hdr.sctp.src_port,
6542
&nk->addr[pd->sidx],
6543
nk->port[pd->sidx]);
6544
pd->sport = &pd->hdr.sctp.src_port;
6545
pd->nsport = pd->hdr.sctp.src_port;
6546
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
6547
}
6548
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
6549
nk->port[pd->didx] != pd->ndport) {
6550
pf_change_ap(pd, pd->dst,
6551
&pd->hdr.sctp.dest_port,
6552
&nk->addr[pd->didx],
6553
nk->port[pd->didx]);
6554
pd->dport = &pd->hdr.sctp.dest_port;
6555
pd->ndport = pd->hdr.sctp.dest_port;
6556
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
6557
}
6558
break;
6559
}
6560
#ifdef INET
6561
case IPPROTO_ICMP:
6562
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET)) {
6563
pf_change_a(&pd->src->v4.s_addr, pd->ip_sum,
6564
nk->addr[pd->sidx].v4.s_addr, 0);
6565
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
6566
}
6567
6568
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET)) {
6569
pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum,
6570
nk->addr[pd->didx].v4.s_addr, 0);
6571
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
6572
}
6573
6574
if (ctx->virtual_type == htons(ICMP_ECHO) &&
6575
nk->port[pd->sidx] != pd->hdr.icmp.icmp_id) {
6576
pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
6577
pd->hdr.icmp.icmp_cksum, pd->nsport,
6578
nk->port[pd->sidx], 0);
6579
pd->hdr.icmp.icmp_id = nk->port[pd->sidx];
6580
pd->sport = &pd->hdr.icmp.icmp_id;
6581
}
6582
m_copyback(pd->m, pd->off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp);
6583
break;
6584
#endif /* INET */
6585
#ifdef INET6
6586
case IPPROTO_ICMPV6:
6587
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET6)) {
6588
pf_change_a6(pd->src, &pd->hdr.icmp6.icmp6_cksum,
6589
&nk->addr[pd->sidx], 0);
6590
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
6591
}
6592
6593
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET6)) {
6594
pf_change_a6(pd->dst, &pd->hdr.icmp6.icmp6_cksum,
6595
&nk->addr[pd->didx], 0);
6596
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
6597
}
6598
rewrite++;
6599
break;
6600
#endif /* INET */
6601
default:
6602
switch (pd->af) {
6603
#ifdef INET
6604
case AF_INET:
6605
if (PF_ANEQ(&pd->nsaddr,
6606
&nk->addr[pd->sidx], AF_INET)) {
6607
pf_change_a(&pd->src->v4.s_addr,
6608
pd->ip_sum,
6609
nk->addr[pd->sidx].v4.s_addr, 0);
6610
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
6611
}
6612
6613
if (PF_ANEQ(&pd->ndaddr,
6614
&nk->addr[pd->didx], AF_INET)) {
6615
pf_change_a(&pd->dst->v4.s_addr,
6616
pd->ip_sum,
6617
nk->addr[pd->didx].v4.s_addr, 0);
6618
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
6619
}
6620
break;
6621
#endif /* INET */
6622
#ifdef INET6
6623
case AF_INET6:
6624
if (PF_ANEQ(&pd->nsaddr,
6625
&nk->addr[pd->sidx], AF_INET6)) {
6626
pf_addrcpy(&pd->nsaddr, &nk->addr[pd->sidx],
6627
pd->af);
6628
pf_addrcpy(pd->src, &nk->addr[pd->sidx], pd->af);
6629
}
6630
6631
if (PF_ANEQ(&pd->ndaddr,
6632
&nk->addr[pd->didx], AF_INET6)) {
6633
pf_addrcpy(&pd->ndaddr, &nk->addr[pd->didx],
6634
pd->af);
6635
pf_addrcpy(pd->dst, &nk->addr[pd->didx],
6636
pd->af);
6637
}
6638
break;
6639
#endif /* INET6 */
6640
}
6641
break;
6642
}
6643
return (rewrite);
6644
}
6645
6646
static int
6647
pf_tcp_track_full(struct pf_kstate *state, struct pf_pdesc *pd,
6648
u_short *reason, int *copyback, struct pf_state_peer *src,
6649
struct pf_state_peer *dst, u_int8_t psrc, u_int8_t pdst)
6650
{
6651
struct tcphdr *th = &pd->hdr.tcp;
6652
u_int16_t win = ntohs(th->th_win);
6653
u_int32_t ack, end, data_end, seq, orig_seq;
6654
u_int8_t sws, dws;
6655
int ackskew;
6656
6657
if (src->wscale && dst->wscale && !(tcp_get_flags(th) & TH_SYN)) {
6658
sws = src->wscale & PF_WSCALE_MASK;
6659
dws = dst->wscale & PF_WSCALE_MASK;
6660
} else
6661
sws = dws = 0;
6662
6663
/*
6664
* Sequence tracking algorithm from Guido van Rooij's paper:
6665
* http://www.madison-gurkha.com/publications/tcp_filtering/
6666
* tcp_filtering.ps
6667
*/
6668
6669
orig_seq = seq = ntohl(th->th_seq);
6670
if (src->seqlo == 0) {
6671
/* First packet from this end. Set its state */
6672
6673
if ((state->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) &&
6674
src->scrub == NULL) {
6675
if (pf_normalize_tcp_init(pd, th, src)) {
6676
REASON_SET(reason, PFRES_MEMORY);
6677
return (PF_DROP);
6678
}
6679
}
6680
6681
/* Deferred generation of sequence number modulator */
6682
if (dst->seqdiff && !src->seqdiff) {
6683
/* use random iss for the TCP server */
6684
while ((src->seqdiff = arc4random() - seq) == 0)
6685
;
6686
ack = ntohl(th->th_ack) - dst->seqdiff;
6687
pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum, htonl(seq +
6688
src->seqdiff), 0);
6689
pf_change_proto_a(pd->m, &th->th_ack, &th->th_sum, htonl(ack), 0);
6690
*copyback = 1;
6691
} else {
6692
ack = ntohl(th->th_ack);
6693
}
6694
6695
end = seq + pd->p_len;
6696
if (tcp_get_flags(th) & TH_SYN) {
6697
end++;
6698
if (dst->wscale & PF_WSCALE_FLAG) {
6699
src->wscale = pf_get_wscale(pd);
6700
if (src->wscale & PF_WSCALE_FLAG) {
6701
/* Remove scale factor from initial
6702
* window */
6703
sws = src->wscale & PF_WSCALE_MASK;
6704
win = ((u_int32_t)win + (1 << sws) - 1)
6705
>> sws;
6706
dws = dst->wscale & PF_WSCALE_MASK;
6707
} else {
6708
/* fixup other window */
6709
dst->max_win = MIN(TCP_MAXWIN,
6710
(u_int32_t)dst->max_win <<
6711
(dst->wscale & PF_WSCALE_MASK));
6712
/* in case of a retrans SYN|ACK */
6713
dst->wscale = 0;
6714
}
6715
}
6716
}
6717
data_end = end;
6718
if (tcp_get_flags(th) & TH_FIN)
6719
end++;
6720
6721
src->seqlo = seq;
6722
if (src->state < TCPS_SYN_SENT)
6723
pf_set_protostate(state, psrc, TCPS_SYN_SENT);
6724
6725
/*
6726
* May need to slide the window (seqhi may have been set by
6727
* the crappy stack check or if we picked up the connection
6728
* after establishment)
6729
*/
6730
if (src->seqhi == 1 ||
6731
SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
6732
src->seqhi = end + MAX(1, dst->max_win << dws);
6733
if (win > src->max_win)
6734
src->max_win = win;
6735
6736
} else {
6737
ack = ntohl(th->th_ack) - dst->seqdiff;
6738
if (src->seqdiff) {
6739
/* Modulate sequence numbers */
6740
pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum, htonl(seq +
6741
src->seqdiff), 0);
6742
pf_change_proto_a(pd->m, &th->th_ack, &th->th_sum, htonl(ack), 0);
6743
*copyback = 1;
6744
}
6745
end = seq + pd->p_len;
6746
if (tcp_get_flags(th) & TH_SYN)
6747
end++;
6748
data_end = end;
6749
if (tcp_get_flags(th) & TH_FIN)
6750
end++;
6751
}
6752
6753
if ((tcp_get_flags(th) & TH_ACK) == 0) {
6754
/* Let it pass through the ack skew check */
6755
ack = dst->seqlo;
6756
} else if ((ack == 0 &&
6757
(tcp_get_flags(th) & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
6758
/* broken tcp stacks do not set ack */
6759
(dst->state < TCPS_SYN_SENT)) {
6760
/*
6761
* Many stacks (ours included) will set the ACK number in an
6762
* FIN|ACK if the SYN times out -- no sequence to ACK.
6763
*/
6764
ack = dst->seqlo;
6765
}
6766
6767
if (seq == end) {
6768
/* Ease sequencing restrictions on no data packets */
6769
seq = src->seqlo;
6770
data_end = end = seq;
6771
}
6772
6773
ackskew = dst->seqlo - ack;
6774
6775
/*
6776
* Need to demodulate the sequence numbers in any TCP SACK options
6777
* (Selective ACK). We could optionally validate the SACK values
6778
* against the current ACK window, either forwards or backwards, but
6779
* I'm not confident that SACK has been implemented properly
6780
* everywhere. It wouldn't surprise me if several stacks accidentally
6781
* SACK too far backwards of previously ACKed data. There really aren't
6782
* any security implications of bad SACKing unless the target stack
6783
* doesn't validate the option length correctly. Someone trying to
6784
* spoof into a TCP connection won't bother blindly sending SACK
6785
* options anyway.
6786
*/
6787
if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
6788
if (pf_modulate_sack(pd, th, dst))
6789
*copyback = 1;
6790
}
6791
6792
#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
6793
if (SEQ_GEQ(src->seqhi, data_end) &&
6794
/* Last octet inside other's window space */
6795
SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
6796
/* Retrans: not more than one window back */
6797
(ackskew >= -MAXACKWINDOW) &&
6798
/* Acking not more than one reassembled fragment backwards */
6799
(ackskew <= (MAXACKWINDOW << sws)) &&
6800
/* Acking not more than one window forward */
6801
((tcp_get_flags(th) & TH_RST) == 0 || orig_seq == src->seqlo ||
6802
(orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
6803
/* Require an exact/+1 sequence match on resets when possible */
6804
(SEQ_GEQ(orig_seq, src->seqlo - (dst->max_win << dws)) &&
6805
SEQ_LEQ(orig_seq, src->seqlo + 1) && ackskew == 0 &&
6806
(th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)))) {
6807
/* Allow resets to match sequence window if ack is perfect match */
6808
6809
if (dst->scrub || src->scrub) {
6810
if (pf_normalize_tcp_stateful(pd, reason, th,
6811
state, src, dst, copyback))
6812
return (PF_DROP);
6813
}
6814
6815
/* update max window */
6816
if (src->max_win < win)
6817
src->max_win = win;
6818
/* synchronize sequencing */
6819
if (SEQ_GT(end, src->seqlo))
6820
src->seqlo = end;
6821
/* slide the window of what the other end can send */
6822
if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
6823
dst->seqhi = ack + MAX((win << sws), 1);
6824
6825
/* update states */
6826
if (tcp_get_flags(th) & TH_SYN)
6827
if (src->state < TCPS_SYN_SENT)
6828
pf_set_protostate(state, psrc, TCPS_SYN_SENT);
6829
if (tcp_get_flags(th) & TH_FIN)
6830
if (src->state < TCPS_CLOSING)
6831
pf_set_protostate(state, psrc, TCPS_CLOSING);
6832
if (tcp_get_flags(th) & TH_ACK) {
6833
if (dst->state == TCPS_SYN_SENT) {
6834
pf_set_protostate(state, pdst,
6835
TCPS_ESTABLISHED);
6836
if (src->state == TCPS_ESTABLISHED &&
6837
state->sns[PF_SN_LIMIT] != NULL &&
6838
pf_src_connlimit(state)) {
6839
REASON_SET(reason, PFRES_SRCLIMIT);
6840
return (PF_DROP);
6841
}
6842
} else if (dst->state == TCPS_CLOSING)
6843
pf_set_protostate(state, pdst,
6844
TCPS_FIN_WAIT_2);
6845
}
6846
if (tcp_get_flags(th) & TH_RST)
6847
pf_set_protostate(state, PF_PEER_BOTH, TCPS_TIME_WAIT);
6848
6849
/* update expire time */
6850
state->expire = pf_get_uptime();
6851
if (src->state >= TCPS_FIN_WAIT_2 &&
6852
dst->state >= TCPS_FIN_WAIT_2)
6853
state->timeout = PFTM_TCP_CLOSED;
6854
else if (src->state >= TCPS_CLOSING &&
6855
dst->state >= TCPS_CLOSING)
6856
state->timeout = PFTM_TCP_FIN_WAIT;
6857
else if (src->state < TCPS_ESTABLISHED ||
6858
dst->state < TCPS_ESTABLISHED)
6859
state->timeout = PFTM_TCP_OPENING;
6860
else if (src->state >= TCPS_CLOSING ||
6861
dst->state >= TCPS_CLOSING)
6862
state->timeout = PFTM_TCP_CLOSING;
6863
else
6864
state->timeout = PFTM_TCP_ESTABLISHED;
6865
6866
/* Fall through to PASS packet */
6867
6868
} else if ((dst->state < TCPS_SYN_SENT ||
6869
dst->state >= TCPS_FIN_WAIT_2 ||
6870
src->state >= TCPS_FIN_WAIT_2) &&
6871
SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) &&
6872
/* Within a window forward of the originating packet */
6873
SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
6874
/* Within a window backward of the originating packet */
6875
6876
/*
6877
* This currently handles three situations:
6878
* 1) Stupid stacks will shotgun SYNs before their peer
6879
* replies.
6880
* 2) When PF catches an already established stream (the
6881
* firewall rebooted, the state table was flushed, routes
6882
* changed...)
6883
* 3) Packets get funky immediately after the connection
6884
* closes (this should catch Solaris spurious ACK|FINs
6885
* that web servers like to spew after a close)
6886
*
6887
* This must be a little more careful than the above code
6888
* since packet floods will also be caught here. We don't
6889
* update the TTL here to mitigate the damage of a packet
6890
* flood and so the same code can handle awkward establishment
6891
* and a loosened connection close.
6892
* In the establishment case, a correct peer response will
6893
* validate the connection, go through the normal state code
6894
* and keep updating the state TTL.
6895
*/
6896
6897
if (V_pf_status.debug >= PF_DEBUG_MISC) {
6898
printf("pf: loose state match: ");
6899
pf_print_state(state);
6900
pf_print_flags(tcp_get_flags(th));
6901
printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
6902
"pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
6903
pd->p_len, ackskew, (unsigned long long)state->packets[0],
6904
(unsigned long long)state->packets[1],
6905
pd->dir == PF_IN ? "in" : "out",
6906
pd->dir == state->direction ? "fwd" : "rev");
6907
}
6908
6909
if (dst->scrub || src->scrub) {
6910
if (pf_normalize_tcp_stateful(pd, reason, th,
6911
state, src, dst, copyback))
6912
return (PF_DROP);
6913
}
6914
6915
/* update max window */
6916
if (src->max_win < win)
6917
src->max_win = win;
6918
/* synchronize sequencing */
6919
if (SEQ_GT(end, src->seqlo))
6920
src->seqlo = end;
6921
/* slide the window of what the other end can send */
6922
if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
6923
dst->seqhi = ack + MAX((win << sws), 1);
6924
6925
/*
6926
* Cannot set dst->seqhi here since this could be a shotgunned
6927
* SYN and not an already established connection.
6928
*/
6929
6930
if (tcp_get_flags(th) & TH_FIN)
6931
if (src->state < TCPS_CLOSING)
6932
pf_set_protostate(state, psrc, TCPS_CLOSING);
6933
if (tcp_get_flags(th) & TH_RST)
6934
pf_set_protostate(state, PF_PEER_BOTH, TCPS_TIME_WAIT);
6935
6936
/* Fall through to PASS packet */
6937
6938
} else {
6939
if (state->dst.state == TCPS_SYN_SENT &&
6940
state->src.state == TCPS_SYN_SENT) {
6941
/* Send RST for state mismatches during handshake */
6942
if (!(tcp_get_flags(th) & TH_RST))
6943
pf_send_tcp(state->rule, pd->af,
6944
pd->dst, pd->src, th->th_dport,
6945
th->th_sport, ntohl(th->th_ack), 0,
6946
TH_RST, 0, 0,
6947
state->rule->return_ttl, M_SKIP_FIREWALL,
6948
0, 0, state->act.rtableid, reason);
6949
src->seqlo = 0;
6950
src->seqhi = 1;
6951
src->max_win = 1;
6952
} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
6953
printf("pf: BAD state: ");
6954
pf_print_state(state);
6955
pf_print_flags(tcp_get_flags(th));
6956
printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
6957
"pkts=%llu:%llu dir=%s,%s\n",
6958
seq, orig_seq, ack, pd->p_len, ackskew,
6959
(unsigned long long)state->packets[0],
6960
(unsigned long long)state->packets[1],
6961
pd->dir == PF_IN ? "in" : "out",
6962
pd->dir == state->direction ? "fwd" : "rev");
6963
printf("pf: State failure on: %c %c %c %c | %c %c\n",
6964
SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1',
6965
SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
6966
' ': '2',
6967
(ackskew >= -MAXACKWINDOW) ? ' ' : '3',
6968
(ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
6969
SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ?' ' :'5',
6970
SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
6971
}
6972
REASON_SET(reason, PFRES_BADSTATE);
6973
return (PF_DROP);
6974
}
6975
6976
return (PF_PASS);
6977
}
6978
6979
static int
6980
pf_tcp_track_sloppy(struct pf_kstate *state, struct pf_pdesc *pd,
6981
u_short *reason, struct pf_state_peer *src, struct pf_state_peer *dst,
6982
u_int8_t psrc, u_int8_t pdst)
6983
{
6984
struct tcphdr *th = &pd->hdr.tcp;
6985
6986
if (tcp_get_flags(th) & TH_SYN)
6987
if (src->state < TCPS_SYN_SENT)
6988
pf_set_protostate(state, psrc, TCPS_SYN_SENT);
6989
if (tcp_get_flags(th) & TH_FIN)
6990
if (src->state < TCPS_CLOSING)
6991
pf_set_protostate(state, psrc, TCPS_CLOSING);
6992
if (tcp_get_flags(th) & TH_ACK) {
6993
if (dst->state == TCPS_SYN_SENT) {
6994
pf_set_protostate(state, pdst, TCPS_ESTABLISHED);
6995
if (src->state == TCPS_ESTABLISHED &&
6996
state->sns[PF_SN_LIMIT] != NULL &&
6997
pf_src_connlimit(state)) {
6998
REASON_SET(reason, PFRES_SRCLIMIT);
6999
return (PF_DROP);
7000
}
7001
} else if (dst->state == TCPS_CLOSING) {
7002
pf_set_protostate(state, pdst, TCPS_FIN_WAIT_2);
7003
} else if (src->state == TCPS_SYN_SENT &&
7004
dst->state < TCPS_SYN_SENT) {
7005
/*
7006
* Handle a special sloppy case where we only see one
7007
* half of the connection. If there is a ACK after
7008
* the initial SYN without ever seeing a packet from
7009
* the destination, set the connection to established.
7010
*/
7011
pf_set_protostate(state, PF_PEER_BOTH,
7012
TCPS_ESTABLISHED);
7013
dst->state = src->state = TCPS_ESTABLISHED;
7014
if (state->sns[PF_SN_LIMIT] != NULL &&
7015
pf_src_connlimit(state)) {
7016
REASON_SET(reason, PFRES_SRCLIMIT);
7017
return (PF_DROP);
7018
}
7019
} else if (src->state == TCPS_CLOSING &&
7020
dst->state == TCPS_ESTABLISHED &&
7021
dst->seqlo == 0) {
7022
/*
7023
* Handle the closing of half connections where we
7024
* don't see the full bidirectional FIN/ACK+ACK
7025
* handshake.
7026
*/
7027
pf_set_protostate(state, pdst, TCPS_CLOSING);
7028
}
7029
}
7030
if (tcp_get_flags(th) & TH_RST)
7031
pf_set_protostate(state, PF_PEER_BOTH, TCPS_TIME_WAIT);
7032
7033
/* update expire time */
7034
state->expire = pf_get_uptime();
7035
if (src->state >= TCPS_FIN_WAIT_2 &&
7036
dst->state >= TCPS_FIN_WAIT_2)
7037
state->timeout = PFTM_TCP_CLOSED;
7038
else if (src->state >= TCPS_CLOSING &&
7039
dst->state >= TCPS_CLOSING)
7040
state->timeout = PFTM_TCP_FIN_WAIT;
7041
else if (src->state < TCPS_ESTABLISHED ||
7042
dst->state < TCPS_ESTABLISHED)
7043
state->timeout = PFTM_TCP_OPENING;
7044
else if (src->state >= TCPS_CLOSING ||
7045
dst->state >= TCPS_CLOSING)
7046
state->timeout = PFTM_TCP_CLOSING;
7047
else
7048
state->timeout = PFTM_TCP_ESTABLISHED;
7049
7050
return (PF_PASS);
7051
}
7052
7053
static int
7054
pf_synproxy(struct pf_pdesc *pd, struct pf_kstate *state, u_short *reason)
7055
{
7056
struct pf_state_key *sk = state->key[pd->didx];
7057
struct tcphdr *th = &pd->hdr.tcp;
7058
7059
if (state->src.state == PF_TCPS_PROXY_SRC) {
7060
if (pd->dir != state->direction) {
7061
REASON_SET(reason, PFRES_SYNPROXY);
7062
return (PF_SYNPROXY_DROP);
7063
}
7064
if (tcp_get_flags(th) & TH_SYN) {
7065
if (ntohl(th->th_seq) != state->src.seqlo) {
7066
REASON_SET(reason, PFRES_SYNPROXY);
7067
return (PF_DROP);
7068
}
7069
pf_send_tcp(state->rule, pd->af, pd->dst,
7070
pd->src, th->th_dport, th->th_sport,
7071
state->src.seqhi, ntohl(th->th_seq) + 1,
7072
TH_SYN|TH_ACK, 0, state->src.mss, 0,
7073
M_SKIP_FIREWALL, 0, 0, state->act.rtableid,
7074
reason);
7075
REASON_SET(reason, PFRES_SYNPROXY);
7076
return (PF_SYNPROXY_DROP);
7077
} else if ((tcp_get_flags(th) & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK ||
7078
(ntohl(th->th_ack) != state->src.seqhi + 1) ||
7079
(ntohl(th->th_seq) != state->src.seqlo + 1)) {
7080
REASON_SET(reason, PFRES_SYNPROXY);
7081
return (PF_DROP);
7082
} else if (state->sns[PF_SN_LIMIT] != NULL &&
7083
pf_src_connlimit(state)) {
7084
REASON_SET(reason, PFRES_SRCLIMIT);
7085
return (PF_DROP);
7086
} else
7087
pf_set_protostate(state, PF_PEER_SRC,
7088
PF_TCPS_PROXY_DST);
7089
}
7090
if (state->src.state == PF_TCPS_PROXY_DST) {
7091
if (pd->dir == state->direction) {
7092
if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) != TH_ACK) ||
7093
(ntohl(th->th_ack) != state->src.seqhi + 1) ||
7094
(ntohl(th->th_seq) != state->src.seqlo + 1)) {
7095
REASON_SET(reason, PFRES_SYNPROXY);
7096
return (PF_DROP);
7097
}
7098
state->src.max_win = MAX(ntohs(th->th_win), 1);
7099
if (state->dst.seqhi == 1)
7100
state->dst.seqhi = arc4random();
7101
pf_send_tcp(state->rule, pd->af,
7102
&sk->addr[pd->sidx], &sk->addr[pd->didx],
7103
sk->port[pd->sidx], sk->port[pd->didx],
7104
state->dst.seqhi, 0, TH_SYN, 0,
7105
state->src.mss, 0,
7106
state->orig_kif->pfik_ifp == V_loif ? M_LOOP : 0,
7107
state->tag, 0, state->act.rtableid,
7108
reason);
7109
REASON_SET(reason, PFRES_SYNPROXY);
7110
return (PF_SYNPROXY_DROP);
7111
} else if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) !=
7112
(TH_SYN|TH_ACK)) ||
7113
(ntohl(th->th_ack) != state->dst.seqhi + 1)) {
7114
REASON_SET(reason, PFRES_SYNPROXY);
7115
return (PF_DROP);
7116
} else {
7117
state->dst.max_win = MAX(ntohs(th->th_win), 1);
7118
state->dst.seqlo = ntohl(th->th_seq);
7119
pf_send_tcp(state->rule, pd->af, pd->dst,
7120
pd->src, th->th_dport, th->th_sport,
7121
ntohl(th->th_ack), ntohl(th->th_seq) + 1,
7122
TH_ACK, state->src.max_win, 0, 0, 0,
7123
state->tag, 0, state->act.rtableid,
7124
reason);
7125
pf_send_tcp(state->rule, pd->af,
7126
&sk->addr[pd->sidx], &sk->addr[pd->didx],
7127
sk->port[pd->sidx], sk->port[pd->didx],
7128
state->src.seqhi + 1, state->src.seqlo + 1,
7129
TH_ACK, state->dst.max_win, 0, 0,
7130
M_SKIP_FIREWALL, 0, 0, state->act.rtableid,
7131
reason);
7132
state->src.seqdiff = state->dst.seqhi -
7133
state->src.seqlo;
7134
state->dst.seqdiff = state->src.seqhi -
7135
state->dst.seqlo;
7136
state->src.seqhi = state->src.seqlo +
7137
state->dst.max_win;
7138
state->dst.seqhi = state->dst.seqlo +
7139
state->src.max_win;
7140
state->src.wscale = state->dst.wscale = 0;
7141
pf_set_protostate(state, PF_PEER_BOTH,
7142
TCPS_ESTABLISHED);
7143
REASON_SET(reason, PFRES_SYNPROXY);
7144
return (PF_SYNPROXY_DROP);
7145
}
7146
}
7147
7148
return (PF_PASS);
7149
}
7150
7151
static int
7152
pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason)
7153
{
7154
struct pf_state_key_cmp key;
7155
int copyback = 0;
7156
struct pf_state_peer *src, *dst;
7157
uint8_t psrc, pdst;
7158
int action;
7159
7160
bzero(&key, sizeof(key));
7161
key.af = pd->af;
7162
key.proto = pd->virtual_proto;
7163
pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af);
7164
pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af);
7165
key.port[pd->sidx] = pd->osport;
7166
key.port[pd->didx] = pd->odport;
7167
7168
action = pf_find_state(pd, &key, state);
7169
if (action != PF_MATCH)
7170
return (action);
7171
7172
action = PF_PASS;
7173
if (pd->dir == (*state)->direction) {
7174
if (PF_REVERSED_KEY(*state, pd->af)) {
7175
src = &(*state)->dst;
7176
dst = &(*state)->src;
7177
psrc = PF_PEER_DST;
7178
pdst = PF_PEER_SRC;
7179
} else {
7180
src = &(*state)->src;
7181
dst = &(*state)->dst;
7182
psrc = PF_PEER_SRC;
7183
pdst = PF_PEER_DST;
7184
}
7185
} else {
7186
if (PF_REVERSED_KEY(*state, pd->af)) {
7187
src = &(*state)->src;
7188
dst = &(*state)->dst;
7189
psrc = PF_PEER_SRC;
7190
pdst = PF_PEER_DST;
7191
} else {
7192
src = &(*state)->dst;
7193
dst = &(*state)->src;
7194
psrc = PF_PEER_DST;
7195
pdst = PF_PEER_SRC;
7196
}
7197
}
7198
7199
switch (pd->virtual_proto) {
7200
case IPPROTO_TCP: {
7201
struct tcphdr *th = &pd->hdr.tcp;
7202
7203
if ((action = pf_synproxy(pd, *state, reason)) != PF_PASS)
7204
return (action);
7205
if (((tcp_get_flags(th) & (TH_SYN | TH_ACK)) == TH_SYN) ||
7206
((th->th_flags & (TH_SYN | TH_ACK | TH_RST)) == TH_ACK &&
7207
pf_syncookie_check(pd) && pd->dir == PF_IN)) {
7208
if ((*state)->src.state >= TCPS_FIN_WAIT_2 &&
7209
(*state)->dst.state >= TCPS_FIN_WAIT_2) {
7210
if (V_pf_status.debug >= PF_DEBUG_MISC) {
7211
printf("pf: state reuse ");
7212
pf_print_state(*state);
7213
pf_print_flags(tcp_get_flags(th));
7214
printf("\n");
7215
}
7216
/* XXX make sure it's the same direction ?? */
7217
pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED);
7218
pf_remove_state(*state);
7219
*state = NULL;
7220
return (PF_DROP);
7221
} else if ((*state)->src.state >= TCPS_ESTABLISHED &&
7222
(*state)->dst.state >= TCPS_ESTABLISHED) {
7223
/*
7224
* SYN matches existing state???
7225
* Typically happens when sender boots up after
7226
* sudden panic. Certain protocols (NFSv3) are
7227
* always using same port numbers. Challenge
7228
* ACK enables all parties (firewall and peers)
7229
* to get in sync again.
7230
*/
7231
pf_send_challenge_ack(pd, *state, src, dst, reason);
7232
return (PF_DROP);
7233
}
7234
}
7235
if ((*state)->state_flags & PFSTATE_SLOPPY) {
7236
if (pf_tcp_track_sloppy(*state, pd, reason, src, dst,
7237
psrc, pdst) == PF_DROP)
7238
return (PF_DROP);
7239
} else {
7240
int ret;
7241
7242
ret = pf_tcp_track_full(*state, pd, reason,
7243
&copyback, src, dst, psrc, pdst);
7244
if (ret == PF_DROP)
7245
return (PF_DROP);
7246
}
7247
break;
7248
}
7249
case IPPROTO_UDP:
7250
/* update states */
7251
if (src->state < PFUDPS_SINGLE)
7252
pf_set_protostate(*state, psrc, PFUDPS_SINGLE);
7253
if (dst->state == PFUDPS_SINGLE)
7254
pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE);
7255
7256
/* update expire time */
7257
(*state)->expire = pf_get_uptime();
7258
if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
7259
(*state)->timeout = PFTM_UDP_MULTIPLE;
7260
else
7261
(*state)->timeout = PFTM_UDP_SINGLE;
7262
break;
7263
case IPPROTO_SCTP:
7264
if ((src->state >= SCTP_SHUTDOWN_SENT || src->state == SCTP_CLOSED) &&
7265
(dst->state >= SCTP_SHUTDOWN_SENT || dst->state == SCTP_CLOSED) &&
7266
pd->sctp_flags & PFDESC_SCTP_INIT) {
7267
pf_set_protostate(*state, PF_PEER_BOTH, SCTP_CLOSED);
7268
pf_remove_state(*state);
7269
*state = NULL;
7270
return (PF_DROP);
7271
}
7272
7273
if (pf_sctp_track(*state, pd, reason) != PF_PASS)
7274
return (PF_DROP);
7275
7276
/* Track state. */
7277
if (pd->sctp_flags & PFDESC_SCTP_INIT) {
7278
if (src->state < SCTP_COOKIE_WAIT) {
7279
pf_set_protostate(*state, psrc, SCTP_COOKIE_WAIT);
7280
(*state)->timeout = PFTM_SCTP_OPENING;
7281
}
7282
}
7283
if (pd->sctp_flags & PFDESC_SCTP_INIT_ACK) {
7284
MPASS(dst->scrub != NULL);
7285
if (dst->scrub->pfss_v_tag == 0)
7286
dst->scrub->pfss_v_tag = pd->sctp_initiate_tag;
7287
}
7288
7289
/*
7290
* Bind to the correct interface if we're if-bound. For multihomed
7291
* extra associations we don't know which interface that will be until
7292
* here, so we've inserted the state on V_pf_all. Fix that now.
7293
*/
7294
if ((*state)->kif == V_pfi_all &&
7295
(*state)->rule->rule_flag & PFRULE_IFBOUND)
7296
(*state)->kif = pd->kif;
7297
7298
if (pd->sctp_flags & (PFDESC_SCTP_COOKIE | PFDESC_SCTP_HEARTBEAT_ACK)) {
7299
if (src->state < SCTP_ESTABLISHED) {
7300
pf_set_protostate(*state, psrc, SCTP_ESTABLISHED);
7301
(*state)->timeout = PFTM_SCTP_ESTABLISHED;
7302
}
7303
}
7304
if (pd->sctp_flags & (PFDESC_SCTP_SHUTDOWN |
7305
PFDESC_SCTP_SHUTDOWN_COMPLETE)) {
7306
if (src->state < SCTP_SHUTDOWN_PENDING) {
7307
pf_set_protostate(*state, psrc, SCTP_SHUTDOWN_PENDING);
7308
(*state)->timeout = PFTM_SCTP_CLOSING;
7309
}
7310
}
7311
if (pd->sctp_flags & (PFDESC_SCTP_SHUTDOWN_COMPLETE | PFDESC_SCTP_ABORT)) {
7312
pf_set_protostate(*state, psrc, SCTP_CLOSED);
7313
(*state)->timeout = PFTM_SCTP_CLOSED;
7314
}
7315
7316
(*state)->expire = pf_get_uptime();
7317
break;
7318
default:
7319
/* update states */
7320
if (src->state < PFOTHERS_SINGLE)
7321
pf_set_protostate(*state, psrc, PFOTHERS_SINGLE);
7322
if (dst->state == PFOTHERS_SINGLE)
7323
pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE);
7324
7325
/* update expire time */
7326
(*state)->expire = pf_get_uptime();
7327
if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
7328
(*state)->timeout = PFTM_OTHER_MULTIPLE;
7329
else
7330
(*state)->timeout = PFTM_OTHER_SINGLE;
7331
break;
7332
}
7333
7334
/* translate source/destination address, if necessary */
7335
if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
7336
struct pf_state_key *nk;
7337
int afto, sidx, didx;
7338
7339
if (PF_REVERSED_KEY(*state, pd->af))
7340
nk = (*state)->key[pd->sidx];
7341
else
7342
nk = (*state)->key[pd->didx];
7343
7344
afto = pd->af != nk->af;
7345
7346
if (afto && (*state)->direction == PF_IN) {
7347
sidx = pd->didx;
7348
didx = pd->sidx;
7349
} else {
7350
sidx = pd->sidx;
7351
didx = pd->didx;
7352
}
7353
7354
if (afto) {
7355
pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af);
7356
pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af);
7357
pd->naf = nk->af;
7358
action = PF_AFRT;
7359
}
7360
7361
if (afto || PF_ANEQ(pd->src, &nk->addr[sidx], pd->af) ||
7362
nk->port[sidx] != pd->osport)
7363
pf_change_ap(pd, pd->src, pd->sport,
7364
&nk->addr[sidx], nk->port[sidx]);
7365
7366
if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) ||
7367
nk->port[didx] != pd->odport)
7368
pf_change_ap(pd, pd->dst, pd->dport,
7369
&nk->addr[didx], nk->port[didx]);
7370
7371
copyback = 1;
7372
}
7373
7374
if (copyback && pd->hdrlen > 0)
7375
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
7376
7377
return (action);
7378
}
7379
7380
static int
7381
pf_sctp_track(struct pf_kstate *state, struct pf_pdesc *pd,
7382
u_short *reason)
7383
{
7384
struct pf_state_peer *src;
7385
if (pd->dir == state->direction) {
7386
if (PF_REVERSED_KEY(state, pd->af))
7387
src = &state->dst;
7388
else
7389
src = &state->src;
7390
} else {
7391
if (PF_REVERSED_KEY(state, pd->af))
7392
src = &state->src;
7393
else
7394
src = &state->dst;
7395
}
7396
7397
if (src->scrub != NULL) {
7398
if (src->scrub->pfss_v_tag == 0)
7399
src->scrub->pfss_v_tag = pd->hdr.sctp.v_tag;
7400
else if (src->scrub->pfss_v_tag != pd->hdr.sctp.v_tag)
7401
return (PF_DROP);
7402
}
7403
7404
return (PF_PASS);
7405
}
7406
7407
static void
7408
pf_sctp_multihome_detach_addr(const struct pf_kstate *s)
7409
{
7410
struct pf_sctp_endpoint key;
7411
struct pf_sctp_endpoint *ep;
7412
struct pf_state_key *sks = s->key[PF_SK_STACK];
7413
struct pf_sctp_source *i, *tmp;
7414
7415
if (sks == NULL || sks->proto != IPPROTO_SCTP || s->dst.scrub == NULL)
7416
return;
7417
7418
PF_SCTP_ENDPOINTS_LOCK();
7419
7420
key.v_tag = s->dst.scrub->pfss_v_tag;
7421
ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
7422
if (ep != NULL) {
7423
TAILQ_FOREACH_SAFE(i, &ep->sources, entry, tmp) {
7424
if (pf_addr_cmp(&i->addr,
7425
&s->key[PF_SK_WIRE]->addr[s->direction == PF_OUT],
7426
s->key[PF_SK_WIRE]->af) == 0) {
7427
SDT_PROBE3(pf, sctp, multihome, remove,
7428
key.v_tag, s, i);
7429
TAILQ_REMOVE(&ep->sources, i, entry);
7430
free(i, M_PFTEMP);
7431
break;
7432
}
7433
}
7434
7435
if (TAILQ_EMPTY(&ep->sources)) {
7436
RB_REMOVE(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
7437
free(ep, M_PFTEMP);
7438
}
7439
}
7440
7441
/* Other direction. */
7442
key.v_tag = s->src.scrub->pfss_v_tag;
7443
ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
7444
if (ep != NULL) {
7445
TAILQ_FOREACH_SAFE(i, &ep->sources, entry, tmp) {
7446
if (pf_addr_cmp(&i->addr,
7447
&s->key[PF_SK_WIRE]->addr[s->direction == PF_IN],
7448
s->key[PF_SK_WIRE]->af) == 0) {
7449
SDT_PROBE3(pf, sctp, multihome, remove,
7450
key.v_tag, s, i);
7451
TAILQ_REMOVE(&ep->sources, i, entry);
7452
free(i, M_PFTEMP);
7453
break;
7454
}
7455
}
7456
7457
if (TAILQ_EMPTY(&ep->sources)) {
7458
RB_REMOVE(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
7459
free(ep, M_PFTEMP);
7460
}
7461
}
7462
7463
PF_SCTP_ENDPOINTS_UNLOCK();
7464
}
7465
7466
static void
7467
pf_sctp_multihome_add_addr(struct pf_pdesc *pd, struct pf_addr *a, uint32_t v_tag)
7468
{
7469
struct pf_sctp_endpoint key = {
7470
.v_tag = v_tag,
7471
};
7472
struct pf_sctp_source *i;
7473
struct pf_sctp_endpoint *ep;
7474
int count;
7475
7476
PF_SCTP_ENDPOINTS_LOCK();
7477
7478
ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
7479
if (ep == NULL) {
7480
ep = malloc(sizeof(struct pf_sctp_endpoint),
7481
M_PFTEMP, M_NOWAIT);
7482
if (ep == NULL) {
7483
PF_SCTP_ENDPOINTS_UNLOCK();
7484
return;
7485
}
7486
7487
ep->v_tag = v_tag;
7488
TAILQ_INIT(&ep->sources);
7489
RB_INSERT(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
7490
}
7491
7492
/* Avoid inserting duplicates. */
7493
count = 0;
7494
TAILQ_FOREACH(i, &ep->sources, entry) {
7495
count++;
7496
if (pf_addr_cmp(&i->addr, a, pd->af) == 0) {
7497
PF_SCTP_ENDPOINTS_UNLOCK();
7498
return;
7499
}
7500
}
7501
7502
/* Limit the number of addresses per endpoint. */
7503
if (count >= PF_SCTP_MAX_ENDPOINTS) {
7504
PF_SCTP_ENDPOINTS_UNLOCK();
7505
return;
7506
}
7507
7508
i = malloc(sizeof(*i), M_PFTEMP, M_NOWAIT);
7509
if (i == NULL) {
7510
PF_SCTP_ENDPOINTS_UNLOCK();
7511
return;
7512
}
7513
7514
i->af = pd->af;
7515
memcpy(&i->addr, a, sizeof(*a));
7516
TAILQ_INSERT_TAIL(&ep->sources, i, entry);
7517
SDT_PROBE2(pf, sctp, multihome, add, v_tag, i);
7518
7519
PF_SCTP_ENDPOINTS_UNLOCK();
7520
}
7521
7522
static void
7523
pf_sctp_multihome_delayed(struct pf_pdesc *pd, struct pfi_kkif *kif,
7524
struct pf_kstate *s, int action)
7525
{
7526
struct pf_krule_slist match_rules;
7527
struct pf_sctp_multihome_job *j, *tmp;
7528
struct pf_sctp_source *i;
7529
int ret;
7530
struct pf_kstate *sm = NULL;
7531
struct pf_krule *ra = NULL;
7532
struct pf_krule *r = &V_pf_default_rule;
7533
struct pf_kruleset *rs = NULL;
7534
u_short reason;
7535
bool do_extra = true;
7536
7537
PF_RULES_RLOCK_TRACKER;
7538
7539
again:
7540
TAILQ_FOREACH_SAFE(j, &pd->sctp_multihome_jobs, next, tmp) {
7541
if (s == NULL || action != PF_PASS)
7542
goto free;
7543
7544
/* Confirm we don't recurse here. */
7545
MPASS(! (pd->sctp_flags & PFDESC_SCTP_ADD_IP));
7546
7547
switch (j->op) {
7548
case SCTP_ADD_IP_ADDRESS: {
7549
uint32_t v_tag = pd->sctp_initiate_tag;
7550
7551
if (v_tag == 0) {
7552
if (s->direction == pd->dir)
7553
v_tag = s->src.scrub->pfss_v_tag;
7554
else
7555
v_tag = s->dst.scrub->pfss_v_tag;
7556
}
7557
7558
/*
7559
* Avoid duplicating states. We'll already have
7560
* created a state based on the source address of
7561
* the packet, but SCTP endpoints may also list this
7562
* address again in the INIT(_ACK) parameters.
7563
*/
7564
if (pf_addr_cmp(&j->src, pd->src, pd->af) == 0) {
7565
break;
7566
}
7567
7568
j->pd.sctp_flags |= PFDESC_SCTP_ADD_IP;
7569
PF_RULES_RLOCK();
7570
sm = NULL;
7571
if (s->rule->rule_flag & PFRULE_ALLOW_RELATED) {
7572
j->pd.related_rule = s->rule;
7573
}
7574
SLIST_INIT(&match_rules);
7575
ret = pf_test_rule(&r, &sm,
7576
&j->pd, &ra, &rs, &reason, NULL, &match_rules);
7577
/*
7578
* Nothing to do about match rules, the processed
7579
* packet has already increased the counters.
7580
*/
7581
pf_free_match_rules(&match_rules);
7582
PF_RULES_RUNLOCK();
7583
SDT_PROBE4(pf, sctp, multihome, test, kif, r, j->pd.m, ret);
7584
if (ret != PF_DROP && sm != NULL) {
7585
/* Inherit v_tag values. */
7586
if (sm->direction == s->direction) {
7587
sm->src.scrub->pfss_v_tag = s->src.scrub->pfss_v_tag;
7588
sm->dst.scrub->pfss_v_tag = s->dst.scrub->pfss_v_tag;
7589
} else {
7590
sm->src.scrub->pfss_v_tag = s->dst.scrub->pfss_v_tag;
7591
sm->dst.scrub->pfss_v_tag = s->src.scrub->pfss_v_tag;
7592
}
7593
PF_STATE_UNLOCK(sm);
7594
} else {
7595
/* If we try duplicate inserts? */
7596
break;
7597
}
7598
7599
/* Only add the address if we've actually allowed the state. */
7600
pf_sctp_multihome_add_addr(pd, &j->src, v_tag);
7601
7602
if (! do_extra) {
7603
break;
7604
}
7605
/*
7606
* We need to do this for each of our source addresses.
7607
* Find those based on the verification tag.
7608
*/
7609
struct pf_sctp_endpoint key = {
7610
.v_tag = pd->hdr.sctp.v_tag,
7611
};
7612
struct pf_sctp_endpoint *ep;
7613
7614
PF_SCTP_ENDPOINTS_LOCK();
7615
ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
7616
if (ep == NULL) {
7617
PF_SCTP_ENDPOINTS_UNLOCK();
7618
break;
7619
}
7620
MPASS(ep != NULL);
7621
7622
TAILQ_FOREACH(i, &ep->sources, entry) {
7623
struct pf_sctp_multihome_job *nj;
7624
7625
/* SCTP can intermingle IPv4 and IPv6. */
7626
if (i->af != pd->af)
7627
continue;
7628
7629
nj = malloc(sizeof(*nj), M_PFTEMP, M_NOWAIT | M_ZERO);
7630
if (! nj) {
7631
continue;
7632
}
7633
memcpy(&nj->pd, &j->pd, sizeof(j->pd));
7634
memcpy(&nj->src, &j->src, sizeof(nj->src));
7635
nj->pd.src = &nj->src;
7636
// New destination address!
7637
memcpy(&nj->dst, &i->addr, sizeof(nj->dst));
7638
nj->pd.dst = &nj->dst;
7639
nj->pd.m = j->pd.m;
7640
nj->op = j->op;
7641
7642
MPASS(nj->pd.pcksum);
7643
TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, nj, next);
7644
}
7645
PF_SCTP_ENDPOINTS_UNLOCK();
7646
7647
break;
7648
}
7649
case SCTP_DEL_IP_ADDRESS: {
7650
struct pf_state_key_cmp key;
7651
uint8_t psrc;
7652
int action;
7653
7654
bzero(&key, sizeof(key));
7655
key.af = j->pd.af;
7656
key.proto = IPPROTO_SCTP;
7657
if (j->pd.dir == PF_IN) { /* wire side, straight */
7658
pf_addrcpy(&key.addr[0], j->pd.src, key.af);
7659
pf_addrcpy(&key.addr[1], j->pd.dst, key.af);
7660
key.port[0] = j->pd.hdr.sctp.src_port;
7661
key.port[1] = j->pd.hdr.sctp.dest_port;
7662
} else { /* stack side, reverse */
7663
pf_addrcpy(&key.addr[1], j->pd.src, key.af);
7664
pf_addrcpy(&key.addr[0], j->pd.dst, key.af);
7665
key.port[1] = j->pd.hdr.sctp.src_port;
7666
key.port[0] = j->pd.hdr.sctp.dest_port;
7667
}
7668
7669
action = pf_find_state(&j->pd, &key, &sm);
7670
if (action == PF_MATCH) {
7671
PF_STATE_LOCK_ASSERT(sm);
7672
if (j->pd.dir == sm->direction) {
7673
psrc = PF_PEER_SRC;
7674
} else {
7675
psrc = PF_PEER_DST;
7676
}
7677
pf_set_protostate(sm, psrc, SCTP_SHUTDOWN_PENDING);
7678
sm->timeout = PFTM_SCTP_CLOSING;
7679
PF_STATE_UNLOCK(sm);
7680
}
7681
break;
7682
default:
7683
panic("Unknown op %#x", j->op);
7684
}
7685
}
7686
7687
free:
7688
TAILQ_REMOVE(&pd->sctp_multihome_jobs, j, next);
7689
free(j, M_PFTEMP);
7690
}
7691
7692
/* We may have inserted extra work while processing the list. */
7693
if (! TAILQ_EMPTY(&pd->sctp_multihome_jobs)) {
7694
do_extra = false;
7695
goto again;
7696
}
7697
}
7698
7699
static int
7700
pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
7701
{
7702
int off = 0;
7703
struct pf_sctp_multihome_job *job;
7704
7705
SDT_PROBE4(pf, sctp, multihome_scan, entry, start, len, pd, op);
7706
7707
while (off < len) {
7708
struct sctp_paramhdr h;
7709
7710
if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL,
7711
pd->af))
7712
return (PF_DROP);
7713
7714
/* Parameters are at least 4 bytes. */
7715
if (ntohs(h.param_length) < 4)
7716
return (PF_DROP);
7717
7718
SDT_PROBE2(pf, sctp, multihome_scan, param, ntohs(h.param_type),
7719
ntohs(h.param_length));
7720
7721
switch (ntohs(h.param_type)) {
7722
case SCTP_IPV4_ADDRESS: {
7723
struct in_addr t;
7724
7725
if (ntohs(h.param_length) !=
7726
(sizeof(struct sctp_paramhdr) + sizeof(t)))
7727
return (PF_DROP);
7728
7729
if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t),
7730
NULL, pd->af))
7731
return (PF_DROP);
7732
7733
if (in_nullhost(t))
7734
t.s_addr = pd->src->v4.s_addr;
7735
7736
/*
7737
* We hold the state lock (idhash) here, which means
7738
* that we can't acquire the keyhash, or we'll get a
7739
* LOR (and potentially double-lock things too). We also
7740
* can't release the state lock here, so instead we'll
7741
* enqueue this for async handling.
7742
* There's a relatively small race here, in that a
7743
* packet using the new addresses could arrive already,
7744
* but that's just though luck for it.
7745
*/
7746
job = malloc(sizeof(*job), M_PFTEMP, M_NOWAIT | M_ZERO);
7747
if (! job)
7748
return (PF_DROP);
7749
7750
SDT_PROBE2(pf, sctp, multihome_scan, ipv4, &t, op);
7751
7752
memcpy(&job->pd, pd, sizeof(*pd));
7753
7754
// New source address!
7755
memcpy(&job->src, &t, sizeof(t));
7756
job->pd.src = &job->src;
7757
memcpy(&job->dst, pd->dst, sizeof(job->dst));
7758
job->pd.dst = &job->dst;
7759
job->pd.m = pd->m;
7760
job->op = op;
7761
7762
MPASS(job->pd.pcksum);
7763
TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next);
7764
break;
7765
}
7766
#ifdef INET6
7767
case SCTP_IPV6_ADDRESS: {
7768
struct in6_addr t;
7769
7770
if (ntohs(h.param_length) !=
7771
(sizeof(struct sctp_paramhdr) + sizeof(t)))
7772
return (PF_DROP);
7773
7774
if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t),
7775
NULL, pd->af))
7776
return (PF_DROP);
7777
if (memcmp(&t, &pd->src->v6, sizeof(t)) == 0)
7778
break;
7779
if (memcmp(&t, &in6addr_any, sizeof(t)) == 0)
7780
memcpy(&t, &pd->src->v6, sizeof(t));
7781
7782
job = malloc(sizeof(*job), M_PFTEMP, M_NOWAIT | M_ZERO);
7783
if (! job)
7784
return (PF_DROP);
7785
7786
SDT_PROBE2(pf, sctp, multihome_scan, ipv6, &t, op);
7787
7788
memcpy(&job->pd, pd, sizeof(*pd));
7789
memcpy(&job->src, &t, sizeof(t));
7790
job->pd.src = &job->src;
7791
memcpy(&job->dst, pd->dst, sizeof(job->dst));
7792
job->pd.dst = &job->dst;
7793
job->pd.m = pd->m;
7794
job->op = op;
7795
7796
MPASS(job->pd.pcksum);
7797
TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next);
7798
break;
7799
}
7800
#endif /* INET6 */
7801
case SCTP_ADD_IP_ADDRESS: {
7802
int ret;
7803
struct sctp_asconf_paramhdr ah;
7804
7805
if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah),
7806
NULL, pd->af))
7807
return (PF_DROP);
7808
7809
ret = pf_multihome_scan(start + off + sizeof(ah),
7810
ntohs(ah.ph.param_length) - sizeof(ah), pd,
7811
SCTP_ADD_IP_ADDRESS);
7812
if (ret != PF_PASS)
7813
return (ret);
7814
break;
7815
}
7816
case SCTP_DEL_IP_ADDRESS: {
7817
int ret;
7818
struct sctp_asconf_paramhdr ah;
7819
7820
if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah),
7821
NULL, pd->af))
7822
return (PF_DROP);
7823
ret = pf_multihome_scan(start + off + sizeof(ah),
7824
ntohs(ah.ph.param_length) - sizeof(ah), pd,
7825
SCTP_DEL_IP_ADDRESS);
7826
if (ret != PF_PASS)
7827
return (ret);
7828
break;
7829
}
7830
default:
7831
break;
7832
}
7833
7834
off += roundup(ntohs(h.param_length), 4);
7835
}
7836
7837
return (PF_PASS);
7838
}
7839
7840
int
7841
pf_multihome_scan_init(int start, int len, struct pf_pdesc *pd)
7842
{
7843
start += sizeof(struct sctp_init_chunk);
7844
len -= sizeof(struct sctp_init_chunk);
7845
7846
return (pf_multihome_scan(start, len, pd, SCTP_ADD_IP_ADDRESS));
7847
}
7848
7849
int
7850
pf_multihome_scan_asconf(int start, int len, struct pf_pdesc *pd)
7851
{
7852
start += sizeof(struct sctp_asconf_chunk);
7853
len -= sizeof(struct sctp_asconf_chunk);
7854
7855
return (pf_multihome_scan(start, len, pd, SCTP_ADD_IP_ADDRESS));
7856
}
7857
7858
int
7859
pf_icmp_state_lookup(struct pf_state_key_cmp *key, struct pf_pdesc *pd,
7860
struct pf_kstate **state, u_int16_t icmpid, u_int16_t type, int icmp_dir,
7861
int *iidx, int multi, int inner)
7862
{
7863
int action, direction = pd->dir;
7864
7865
key->af = pd->af;
7866
key->proto = pd->proto;
7867
if (icmp_dir == PF_IN) {
7868
*iidx = pd->sidx;
7869
key->port[pd->sidx] = icmpid;
7870
key->port[pd->didx] = type;
7871
} else {
7872
*iidx = pd->didx;
7873
key->port[pd->sidx] = type;
7874
key->port[pd->didx] = icmpid;
7875
}
7876
if (pf_state_key_addr_setup(pd, key, multi))
7877
return (PF_DROP);
7878
7879
action = pf_find_state(pd, key, state);
7880
if (action != PF_MATCH)
7881
return (action);
7882
7883
if ((*state)->state_flags & PFSTATE_SLOPPY)
7884
return (-1);
7885
7886
/* Is this ICMP message flowing in right direction? */
7887
if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af)
7888
direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ?
7889
PF_IN : PF_OUT;
7890
else
7891
direction = (*state)->direction;
7892
if ((*state)->rule->type &&
7893
(((!inner && direction == pd->dir) ||
7894
(inner && direction != pd->dir)) ?
7895
PF_IN : PF_OUT) != icmp_dir) {
7896
if (V_pf_status.debug >= PF_DEBUG_MISC) {
7897
printf("pf: icmp type %d in wrong direction (%d): ",
7898
ntohs(type), icmp_dir);
7899
pf_print_state(*state);
7900
printf("\n");
7901
}
7902
PF_STATE_UNLOCK(*state);
7903
*state = NULL;
7904
return (PF_DROP);
7905
}
7906
return (-1);
7907
}
7908
7909
static int
7910
pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
7911
u_short *reason)
7912
{
7913
struct pf_addr *saddr = pd->src, *daddr = pd->dst;
7914
u_int16_t *icmpsum, virtual_id, virtual_type;
7915
u_int8_t icmptype, icmpcode;
7916
int icmp_dir, iidx, ret;
7917
struct pf_state_key_cmp key;
7918
#ifdef INET
7919
u_int16_t icmpid;
7920
#endif /* INET*/
7921
7922
MPASS(*state == NULL);
7923
7924
bzero(&key, sizeof(key));
7925
switch (pd->proto) {
7926
#ifdef INET
7927
case IPPROTO_ICMP:
7928
icmptype = pd->hdr.icmp.icmp_type;
7929
icmpcode = pd->hdr.icmp.icmp_code;
7930
icmpid = pd->hdr.icmp.icmp_id;
7931
icmpsum = &pd->hdr.icmp.icmp_cksum;
7932
break;
7933
#endif /* INET */
7934
#ifdef INET6
7935
case IPPROTO_ICMPV6:
7936
icmptype = pd->hdr.icmp6.icmp6_type;
7937
icmpcode = pd->hdr.icmp6.icmp6_code;
7938
#ifdef INET
7939
icmpid = pd->hdr.icmp6.icmp6_id;
7940
#endif /* INET */
7941
icmpsum = &pd->hdr.icmp6.icmp6_cksum;
7942
break;
7943
#endif /* INET6 */
7944
default:
7945
panic("unhandled proto %d", pd->proto);
7946
}
7947
7948
if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id,
7949
&virtual_type) == 0) {
7950
/*
7951
* ICMP query/reply message not related to a TCP/UDP/SCTP
7952
* packet. Search for an ICMP state.
7953
*/
7954
ret = pf_icmp_state_lookup(&key, pd, state, virtual_id,
7955
virtual_type, icmp_dir, &iidx, 0, 0);
7956
/* IPv6? try matching a multicast address */
7957
if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) {
7958
MPASS(*state == NULL);
7959
ret = pf_icmp_state_lookup(&key, pd, state,
7960
virtual_id, virtual_type,
7961
icmp_dir, &iidx, 1, 0);
7962
}
7963
if (ret >= 0) {
7964
MPASS(*state == NULL);
7965
return (ret);
7966
}
7967
7968
(*state)->expire = pf_get_uptime();
7969
(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
7970
7971
/* translate source/destination address, if necessary */
7972
if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
7973
struct pf_state_key *nk;
7974
int afto, sidx, didx;
7975
7976
if (PF_REVERSED_KEY(*state, pd->af))
7977
nk = (*state)->key[pd->sidx];
7978
else
7979
nk = (*state)->key[pd->didx];
7980
7981
afto = pd->af != nk->af;
7982
7983
if (afto && (*state)->direction == PF_IN) {
7984
sidx = pd->didx;
7985
didx = pd->sidx;
7986
iidx = !iidx;
7987
} else {
7988
sidx = pd->sidx;
7989
didx = pd->didx;
7990
}
7991
7992
switch (pd->af) {
7993
#ifdef INET
7994
case AF_INET:
7995
#ifdef INET6
7996
if (afto) {
7997
if (pf_translate_icmp_af(AF_INET6,
7998
&pd->hdr.icmp))
7999
return (PF_DROP);
8000
pd->proto = IPPROTO_ICMPV6;
8001
}
8002
#endif /* INET6 */
8003
if (!afto &&
8004
PF_ANEQ(pd->src, &nk->addr[sidx], AF_INET))
8005
pf_change_a(&saddr->v4.s_addr,
8006
pd->ip_sum,
8007
nk->addr[sidx].v4.s_addr,
8008
0);
8009
8010
if (!afto && PF_ANEQ(pd->dst,
8011
&nk->addr[didx], AF_INET))
8012
pf_change_a(&daddr->v4.s_addr,
8013
pd->ip_sum,
8014
nk->addr[didx].v4.s_addr, 0);
8015
8016
if (nk->port[iidx] !=
8017
pd->hdr.icmp.icmp_id) {
8018
pd->hdr.icmp.icmp_cksum =
8019
pf_cksum_fixup(
8020
pd->hdr.icmp.icmp_cksum, icmpid,
8021
nk->port[iidx], 0);
8022
pd->hdr.icmp.icmp_id =
8023
nk->port[iidx];
8024
}
8025
8026
m_copyback(pd->m, pd->off, ICMP_MINLEN,
8027
(caddr_t )&pd->hdr.icmp);
8028
break;
8029
#endif /* INET */
8030
#ifdef INET6
8031
case AF_INET6:
8032
#ifdef INET
8033
if (afto) {
8034
if (pf_translate_icmp_af(AF_INET,
8035
&pd->hdr.icmp6))
8036
return (PF_DROP);
8037
pd->proto = IPPROTO_ICMP;
8038
}
8039
#endif /* INET */
8040
if (!afto &&
8041
PF_ANEQ(pd->src, &nk->addr[sidx], AF_INET6))
8042
pf_change_a6(saddr,
8043
&pd->hdr.icmp6.icmp6_cksum,
8044
&nk->addr[sidx], 0);
8045
8046
if (!afto && PF_ANEQ(pd->dst,
8047
&nk->addr[didx], AF_INET6))
8048
pf_change_a6(daddr,
8049
&pd->hdr.icmp6.icmp6_cksum,
8050
&nk->addr[didx], 0);
8051
8052
if (nk->port[iidx] != pd->hdr.icmp6.icmp6_id)
8053
pd->hdr.icmp6.icmp6_id =
8054
nk->port[iidx];
8055
8056
m_copyback(pd->m, pd->off, sizeof(struct icmp6_hdr),
8057
(caddr_t )&pd->hdr.icmp6);
8058
break;
8059
#endif /* INET6 */
8060
}
8061
if (afto) {
8062
pf_addrcpy(&pd->nsaddr, &nk->addr[sidx],
8063
nk->af);
8064
pf_addrcpy(&pd->ndaddr, &nk->addr[didx],
8065
nk->af);
8066
pd->naf = nk->af;
8067
return (PF_AFRT);
8068
}
8069
}
8070
return (PF_PASS);
8071
8072
} else {
8073
/*
8074
* ICMP error message in response to a TCP/UDP packet.
8075
* Extract the inner TCP/UDP header and search for that state.
8076
*/
8077
8078
struct pf_pdesc pd2;
8079
bzero(&pd2, sizeof pd2);
8080
#ifdef INET
8081
struct ip h2;
8082
#endif /* INET */
8083
#ifdef INET6
8084
struct ip6_hdr h2_6;
8085
#endif /* INET6 */
8086
int ipoff2 = 0;
8087
8088
pd2.af = pd->af;
8089
pd2.dir = pd->dir;
8090
/* Payload packet is from the opposite direction. */
8091
pd2.sidx = (pd->dir == PF_IN) ? 1 : 0;
8092
pd2.didx = (pd->dir == PF_IN) ? 0 : 1;
8093
pd2.m = pd->m;
8094
pd2.pf_mtag = pd->pf_mtag;
8095
pd2.kif = pd->kif;
8096
switch (pd->af) {
8097
#ifdef INET
8098
case AF_INET:
8099
/* offset of h2 in mbuf chain */
8100
ipoff2 = pd->off + ICMP_MINLEN;
8101
8102
if (!pf_pull_hdr(pd->m, ipoff2, &h2, sizeof(h2),
8103
reason, pd2.af)) {
8104
DPFPRINTF(PF_DEBUG_MISC,
8105
"pf: ICMP error message too short "
8106
"(ip)");
8107
return (PF_DROP);
8108
}
8109
/*
8110
* ICMP error messages don't refer to non-first
8111
* fragments
8112
*/
8113
if (h2.ip_off & htons(IP_OFFMASK)) {
8114
REASON_SET(reason, PFRES_FRAG);
8115
return (PF_DROP);
8116
}
8117
8118
/* offset of protocol header that follows h2 */
8119
pd2.off = ipoff2;
8120
if (pf_walk_header(&pd2, &h2, reason) != PF_PASS)
8121
return (PF_DROP);
8122
8123
pd2.tot_len = ntohs(h2.ip_len);
8124
pd2.ttl = h2.ip_ttl;
8125
pd2.src = (struct pf_addr *)&h2.ip_src;
8126
pd2.dst = (struct pf_addr *)&h2.ip_dst;
8127
pd2.ip_sum = &h2.ip_sum;
8128
break;
8129
#endif /* INET */
8130
#ifdef INET6
8131
case AF_INET6:
8132
ipoff2 = pd->off + sizeof(struct icmp6_hdr);
8133
8134
if (!pf_pull_hdr(pd->m, ipoff2, &h2_6, sizeof(h2_6),
8135
reason, pd2.af)) {
8136
DPFPRINTF(PF_DEBUG_MISC,
8137
"pf: ICMP error message too short "
8138
"(ip6)");
8139
return (PF_DROP);
8140
}
8141
pd2.off = ipoff2;
8142
if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS)
8143
return (PF_DROP);
8144
8145
pd2.tot_len = ntohs(h2_6.ip6_plen) +
8146
sizeof(struct ip6_hdr);
8147
pd2.ttl = h2_6.ip6_hlim;
8148
pd2.src = (struct pf_addr *)&h2_6.ip6_src;
8149
pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
8150
pd2.ip_sum = NULL;
8151
break;
8152
#endif /* INET6 */
8153
default:
8154
unhandled_af(pd->af);
8155
}
8156
8157
if (PF_ANEQ(pd->dst, pd2.src, pd->af)) {
8158
if (V_pf_status.debug >= PF_DEBUG_MISC) {
8159
printf("pf: BAD ICMP %d:%d outer dst: ",
8160
icmptype, icmpcode);
8161
pf_print_host(pd->src, 0, pd->af);
8162
printf(" -> ");
8163
pf_print_host(pd->dst, 0, pd->af);
8164
printf(" inner src: ");
8165
pf_print_host(pd2.src, 0, pd2.af);
8166
printf(" -> ");
8167
pf_print_host(pd2.dst, 0, pd2.af);
8168
printf("\n");
8169
}
8170
REASON_SET(reason, PFRES_BADSTATE);
8171
return (PF_DROP);
8172
}
8173
8174
switch (pd2.proto) {
8175
case IPPROTO_TCP: {
8176
struct tcphdr *th = &pd2.hdr.tcp;
8177
u_int32_t seq;
8178
struct pf_state_peer *src, *dst;
8179
u_int8_t dws;
8180
int copyback = 0;
8181
int action;
8182
8183
/*
8184
* Only the first 8 bytes of the TCP header can be
8185
* expected. Don't access any TCP header fields after
8186
* th_seq, an ackskew test is not possible.
8187
*/
8188
if (!pf_pull_hdr(pd->m, pd2.off, th, 8, reason,
8189
pd2.af)) {
8190
DPFPRINTF(PF_DEBUG_MISC,
8191
"pf: ICMP error message too short "
8192
"(tcp)");
8193
return (PF_DROP);
8194
}
8195
pd2.pcksum = &pd2.hdr.tcp.th_sum;
8196
8197
key.af = pd2.af;
8198
key.proto = IPPROTO_TCP;
8199
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
8200
pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
8201
key.port[pd2.sidx] = th->th_sport;
8202
key.port[pd2.didx] = th->th_dport;
8203
8204
action = pf_find_state(&pd2, &key, state);
8205
if (action != PF_MATCH)
8206
return (action);
8207
8208
if (pd->dir == (*state)->direction) {
8209
if (PF_REVERSED_KEY(*state, pd->af)) {
8210
src = &(*state)->src;
8211
dst = &(*state)->dst;
8212
} else {
8213
src = &(*state)->dst;
8214
dst = &(*state)->src;
8215
}
8216
} else {
8217
if (PF_REVERSED_KEY(*state, pd->af)) {
8218
src = &(*state)->dst;
8219
dst = &(*state)->src;
8220
} else {
8221
src = &(*state)->src;
8222
dst = &(*state)->dst;
8223
}
8224
}
8225
8226
if (src->wscale && dst->wscale)
8227
dws = dst->wscale & PF_WSCALE_MASK;
8228
else
8229
dws = 0;
8230
8231
/* Demodulate sequence number */
8232
seq = ntohl(th->th_seq) - src->seqdiff;
8233
if (src->seqdiff) {
8234
pf_change_a(&th->th_seq, icmpsum,
8235
htonl(seq), 0);
8236
copyback = 1;
8237
}
8238
8239
if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
8240
(!SEQ_GEQ(src->seqhi, seq) ||
8241
!SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
8242
if (V_pf_status.debug >= PF_DEBUG_MISC) {
8243
printf("pf: BAD ICMP %d:%d ",
8244
icmptype, icmpcode);
8245
pf_print_host(pd->src, 0, pd->af);
8246
printf(" -> ");
8247
pf_print_host(pd->dst, 0, pd->af);
8248
printf(" state: ");
8249
pf_print_state(*state);
8250
printf(" seq=%u\n", seq);
8251
}
8252
REASON_SET(reason, PFRES_BADSTATE);
8253
return (PF_DROP);
8254
} else {
8255
if (V_pf_status.debug >= PF_DEBUG_MISC) {
8256
printf("pf: OK ICMP %d:%d ",
8257
icmptype, icmpcode);
8258
pf_print_host(pd->src, 0, pd->af);
8259
printf(" -> ");
8260
pf_print_host(pd->dst, 0, pd->af);
8261
printf(" state: ");
8262
pf_print_state(*state);
8263
printf(" seq=%u\n", seq);
8264
}
8265
}
8266
8267
/* translate source/destination address, if necessary */
8268
if ((*state)->key[PF_SK_WIRE] !=
8269
(*state)->key[PF_SK_STACK]) {
8270
8271
struct pf_state_key *nk;
8272
8273
if (PF_REVERSED_KEY(*state, pd->af))
8274
nk = (*state)->key[pd->sidx];
8275
else
8276
nk = (*state)->key[pd->didx];
8277
8278
#if defined(INET) && defined(INET6)
8279
int afto, sidx, didx;
8280
8281
afto = pd->af != nk->af;
8282
8283
if (afto && (*state)->direction == PF_IN) {
8284
sidx = pd2.didx;
8285
didx = pd2.sidx;
8286
} else {
8287
sidx = pd2.sidx;
8288
didx = pd2.didx;
8289
}
8290
8291
if (afto) {
8292
if (pf_translate_icmp_af(nk->af,
8293
&pd->hdr.icmp))
8294
return (PF_DROP);
8295
m_copyback(pd->m, pd->off,
8296
sizeof(struct icmp6_hdr),
8297
(c_caddr_t)&pd->hdr.icmp6);
8298
if (pf_change_icmp_af(pd->m, ipoff2, pd,
8299
&pd2, &nk->addr[sidx],
8300
&nk->addr[didx], pd->af,
8301
nk->af))
8302
return (PF_DROP);
8303
pf_addrcpy(&pd->nsaddr,
8304
&nk->addr[pd2.sidx], nk->af);
8305
pf_addrcpy(&pd->ndaddr,
8306
&nk->addr[pd2.didx], nk->af);
8307
if (nk->af == AF_INET) {
8308
pd->proto = IPPROTO_ICMP;
8309
} else {
8310
pd->proto = IPPROTO_ICMPV6;
8311
/*
8312
* IPv4 becomes IPv6 so we must
8313
* copy IPv4 src addr to least
8314
* 32bits in IPv6 address to
8315
* keep traceroute/icmp
8316
* working.
8317
*/
8318
pd->nsaddr.addr32[3] =
8319
pd->src->addr32[0];
8320
}
8321
pd->naf = pd2.naf = nk->af;
8322
pf_change_ap(&pd2, pd2.src, &th->th_sport,
8323
&nk->addr[pd2.sidx], nk->port[sidx]);
8324
pf_change_ap(&pd2, pd2.dst, &th->th_dport,
8325
&nk->addr[pd2.didx], nk->port[didx]);
8326
m_copyback(pd2.m, pd2.off, 8, (c_caddr_t)th);
8327
return (PF_AFRT);
8328
}
8329
#endif /* INET && INET6 */
8330
8331
if (PF_ANEQ(pd2.src,
8332
&nk->addr[pd2.sidx], pd2.af) ||
8333
nk->port[pd2.sidx] != th->th_sport)
8334
pf_change_icmp(pd2.src, &th->th_sport,
8335
daddr, &nk->addr[pd2.sidx],
8336
nk->port[pd2.sidx], NULL,
8337
pd2.ip_sum, icmpsum,
8338
pd->ip_sum, 0, pd2.af);
8339
8340
if (PF_ANEQ(pd2.dst,
8341
&nk->addr[pd2.didx], pd2.af) ||
8342
nk->port[pd2.didx] != th->th_dport)
8343
pf_change_icmp(pd2.dst, &th->th_dport,
8344
saddr, &nk->addr[pd2.didx],
8345
nk->port[pd2.didx], NULL,
8346
pd2.ip_sum, icmpsum,
8347
pd->ip_sum, 0, pd2.af);
8348
copyback = 1;
8349
}
8350
8351
if (copyback) {
8352
switch (pd2.af) {
8353
#ifdef INET
8354
case AF_INET:
8355
m_copyback(pd->m, pd->off, ICMP_MINLEN,
8356
(caddr_t )&pd->hdr.icmp);
8357
m_copyback(pd->m, ipoff2, sizeof(h2),
8358
(caddr_t )&h2);
8359
break;
8360
#endif /* INET */
8361
#ifdef INET6
8362
case AF_INET6:
8363
m_copyback(pd->m, pd->off,
8364
sizeof(struct icmp6_hdr),
8365
(caddr_t )&pd->hdr.icmp6);
8366
m_copyback(pd->m, ipoff2, sizeof(h2_6),
8367
(caddr_t )&h2_6);
8368
break;
8369
#endif /* INET6 */
8370
default:
8371
unhandled_af(pd->af);
8372
}
8373
m_copyback(pd->m, pd2.off, 8, (caddr_t)th);
8374
}
8375
8376
return (PF_PASS);
8377
break;
8378
}
8379
case IPPROTO_UDP: {
8380
struct udphdr *uh = &pd2.hdr.udp;
8381
int action;
8382
8383
if (!pf_pull_hdr(pd->m, pd2.off, uh, sizeof(*uh),
8384
reason, pd2.af)) {
8385
DPFPRINTF(PF_DEBUG_MISC,
8386
"pf: ICMP error message too short "
8387
"(udp)");
8388
return (PF_DROP);
8389
}
8390
pd2.pcksum = &pd2.hdr.udp.uh_sum;
8391
8392
key.af = pd2.af;
8393
key.proto = IPPROTO_UDP;
8394
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
8395
pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
8396
key.port[pd2.sidx] = uh->uh_sport;
8397
key.port[pd2.didx] = uh->uh_dport;
8398
8399
action = pf_find_state(&pd2, &key, state);
8400
if (action != PF_MATCH)
8401
return (action);
8402
8403
/* translate source/destination address, if necessary */
8404
if ((*state)->key[PF_SK_WIRE] !=
8405
(*state)->key[PF_SK_STACK]) {
8406
struct pf_state_key *nk;
8407
8408
if (PF_REVERSED_KEY(*state, pd->af))
8409
nk = (*state)->key[pd->sidx];
8410
else
8411
nk = (*state)->key[pd->didx];
8412
8413
#if defined(INET) && defined(INET6)
8414
int afto, sidx, didx;
8415
8416
afto = pd->af != nk->af;
8417
8418
if (afto && (*state)->direction == PF_IN) {
8419
sidx = pd2.didx;
8420
didx = pd2.sidx;
8421
} else {
8422
sidx = pd2.sidx;
8423
didx = pd2.didx;
8424
}
8425
8426
if (afto) {
8427
if (pf_translate_icmp_af(nk->af,
8428
&pd->hdr.icmp))
8429
return (PF_DROP);
8430
m_copyback(pd->m, pd->off,
8431
sizeof(struct icmp6_hdr),
8432
(c_caddr_t)&pd->hdr.icmp6);
8433
if (pf_change_icmp_af(pd->m, ipoff2, pd,
8434
&pd2, &nk->addr[sidx],
8435
&nk->addr[didx], pd->af,
8436
nk->af))
8437
return (PF_DROP);
8438
pf_addrcpy(&pd->nsaddr,
8439
&nk->addr[pd2.sidx], nk->af);
8440
pf_addrcpy(&pd->ndaddr,
8441
&nk->addr[pd2.didx], nk->af);
8442
if (nk->af == AF_INET) {
8443
pd->proto = IPPROTO_ICMP;
8444
} else {
8445
pd->proto = IPPROTO_ICMPV6;
8446
/*
8447
* IPv4 becomes IPv6 so we must
8448
* copy IPv4 src addr to least
8449
* 32bits in IPv6 address to
8450
* keep traceroute/icmp
8451
* working.
8452
*/
8453
pd->nsaddr.addr32[3] =
8454
pd->src->addr32[0];
8455
}
8456
pd->naf = pd2.naf = nk->af;
8457
pf_change_ap(&pd2, pd2.src, &uh->uh_sport,
8458
&nk->addr[pd2.sidx], nk->port[sidx]);
8459
pf_change_ap(&pd2, pd2.dst, &uh->uh_dport,
8460
&nk->addr[pd2.didx], nk->port[didx]);
8461
m_copyback(pd2.m, pd2.off, sizeof(*uh),
8462
(c_caddr_t)uh);
8463
return (PF_AFRT);
8464
}
8465
#endif /* INET && INET6 */
8466
8467
if (PF_ANEQ(pd2.src,
8468
&nk->addr[pd2.sidx], pd2.af) ||
8469
nk->port[pd2.sidx] != uh->uh_sport)
8470
pf_change_icmp(pd2.src, &uh->uh_sport,
8471
daddr, &nk->addr[pd2.sidx],
8472
nk->port[pd2.sidx], &uh->uh_sum,
8473
pd2.ip_sum, icmpsum,
8474
pd->ip_sum, 1, pd2.af);
8475
8476
if (PF_ANEQ(pd2.dst,
8477
&nk->addr[pd2.didx], pd2.af) ||
8478
nk->port[pd2.didx] != uh->uh_dport)
8479
pf_change_icmp(pd2.dst, &uh->uh_dport,
8480
saddr, &nk->addr[pd2.didx],
8481
nk->port[pd2.didx], &uh->uh_sum,
8482
pd2.ip_sum, icmpsum,
8483
pd->ip_sum, 1, pd2.af);
8484
8485
switch (pd2.af) {
8486
#ifdef INET
8487
case AF_INET:
8488
m_copyback(pd->m, pd->off, ICMP_MINLEN,
8489
(caddr_t )&pd->hdr.icmp);
8490
m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2);
8491
break;
8492
#endif /* INET */
8493
#ifdef INET6
8494
case AF_INET6:
8495
m_copyback(pd->m, pd->off,
8496
sizeof(struct icmp6_hdr),
8497
(caddr_t )&pd->hdr.icmp6);
8498
m_copyback(pd->m, ipoff2, sizeof(h2_6),
8499
(caddr_t )&h2_6);
8500
break;
8501
#endif /* INET6 */
8502
}
8503
m_copyback(pd->m, pd2.off, sizeof(*uh), (caddr_t)uh);
8504
}
8505
return (PF_PASS);
8506
break;
8507
}
8508
#ifdef INET
8509
case IPPROTO_SCTP: {
8510
struct sctphdr *sh = &pd2.hdr.sctp;
8511
struct pf_state_peer *src;
8512
int copyback = 0;
8513
int action;
8514
8515
if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), reason,
8516
pd2.af)) {
8517
DPFPRINTF(PF_DEBUG_MISC,
8518
"pf: ICMP error message too short "
8519
"(sctp)");
8520
return (PF_DROP);
8521
}
8522
pd2.pcksum = &pd2.sctp_dummy_sum;
8523
8524
key.af = pd2.af;
8525
key.proto = IPPROTO_SCTP;
8526
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
8527
pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
8528
key.port[pd2.sidx] = sh->src_port;
8529
key.port[pd2.didx] = sh->dest_port;
8530
8531
action = pf_find_state(&pd2, &key, state);
8532
if (action != PF_MATCH)
8533
return (action);
8534
8535
if (pd->dir == (*state)->direction) {
8536
if (PF_REVERSED_KEY(*state, pd->af))
8537
src = &(*state)->src;
8538
else
8539
src = &(*state)->dst;
8540
} else {
8541
if (PF_REVERSED_KEY(*state, pd->af))
8542
src = &(*state)->dst;
8543
else
8544
src = &(*state)->src;
8545
}
8546
8547
if (src->scrub->pfss_v_tag != sh->v_tag) {
8548
DPFPRINTF(PF_DEBUG_MISC,
8549
"pf: ICMP error message has incorrect "
8550
"SCTP v_tag");
8551
return (PF_DROP);
8552
}
8553
8554
/* translate source/destination address, if necessary */
8555
if ((*state)->key[PF_SK_WIRE] !=
8556
(*state)->key[PF_SK_STACK]) {
8557
8558
struct pf_state_key *nk;
8559
8560
if (PF_REVERSED_KEY(*state, pd->af))
8561
nk = (*state)->key[pd->sidx];
8562
else
8563
nk = (*state)->key[pd->didx];
8564
8565
#if defined(INET) && defined(INET6)
8566
int afto, sidx, didx;
8567
8568
afto = pd->af != nk->af;
8569
8570
if (afto && (*state)->direction == PF_IN) {
8571
sidx = pd2.didx;
8572
didx = pd2.sidx;
8573
} else {
8574
sidx = pd2.sidx;
8575
didx = pd2.didx;
8576
}
8577
8578
if (afto) {
8579
if (pf_translate_icmp_af(nk->af,
8580
&pd->hdr.icmp))
8581
return (PF_DROP);
8582
m_copyback(pd->m, pd->off,
8583
sizeof(struct icmp6_hdr),
8584
(c_caddr_t)&pd->hdr.icmp6);
8585
if (pf_change_icmp_af(pd->m, ipoff2, pd,
8586
&pd2, &nk->addr[sidx],
8587
&nk->addr[didx], pd->af,
8588
nk->af))
8589
return (PF_DROP);
8590
sh->src_port = nk->port[sidx];
8591
sh->dest_port = nk->port[didx];
8592
m_copyback(pd2.m, pd2.off, sizeof(*sh), (c_caddr_t)sh);
8593
pf_addrcpy(&pd->nsaddr,
8594
&nk->addr[pd2.sidx], nk->af);
8595
pf_addrcpy(&pd->ndaddr,
8596
&nk->addr[pd2.didx], nk->af);
8597
if (nk->af == AF_INET) {
8598
pd->proto = IPPROTO_ICMP;
8599
} else {
8600
pd->proto = IPPROTO_ICMPV6;
8601
/*
8602
* IPv4 becomes IPv6 so we must
8603
* copy IPv4 src addr to least
8604
* 32bits in IPv6 address to
8605
* keep traceroute/icmp
8606
* working.
8607
*/
8608
pd->nsaddr.addr32[3] =
8609
pd->src->addr32[0];
8610
}
8611
pd->naf = nk->af;
8612
return (PF_AFRT);
8613
}
8614
#endif /* INET && INET6 */
8615
8616
if (PF_ANEQ(pd2.src,
8617
&nk->addr[pd2.sidx], pd2.af) ||
8618
nk->port[pd2.sidx] != sh->src_port)
8619
pf_change_icmp(pd2.src, &sh->src_port,
8620
daddr, &nk->addr[pd2.sidx],
8621
nk->port[pd2.sidx], NULL,
8622
pd2.ip_sum, icmpsum,
8623
pd->ip_sum, 0, pd2.af);
8624
8625
if (PF_ANEQ(pd2.dst,
8626
&nk->addr[pd2.didx], pd2.af) ||
8627
nk->port[pd2.didx] != sh->dest_port)
8628
pf_change_icmp(pd2.dst, &sh->dest_port,
8629
saddr, &nk->addr[pd2.didx],
8630
nk->port[pd2.didx], NULL,
8631
pd2.ip_sum, icmpsum,
8632
pd->ip_sum, 0, pd2.af);
8633
copyback = 1;
8634
}
8635
8636
if (copyback) {
8637
switch (pd2.af) {
8638
#ifdef INET
8639
case AF_INET:
8640
m_copyback(pd->m, pd->off, ICMP_MINLEN,
8641
(caddr_t )&pd->hdr.icmp);
8642
m_copyback(pd->m, ipoff2, sizeof(h2),
8643
(caddr_t )&h2);
8644
break;
8645
#endif /* INET */
8646
#ifdef INET6
8647
case AF_INET6:
8648
m_copyback(pd->m, pd->off,
8649
sizeof(struct icmp6_hdr),
8650
(caddr_t )&pd->hdr.icmp6);
8651
m_copyback(pd->m, ipoff2, sizeof(h2_6),
8652
(caddr_t )&h2_6);
8653
break;
8654
#endif /* INET6 */
8655
}
8656
m_copyback(pd->m, pd2.off, sizeof(*sh), (caddr_t)sh);
8657
}
8658
8659
return (PF_PASS);
8660
break;
8661
}
8662
case IPPROTO_ICMP: {
8663
struct icmp *iih = &pd2.hdr.icmp;
8664
8665
if (pd2.af != AF_INET) {
8666
REASON_SET(reason, PFRES_NORM);
8667
return (PF_DROP);
8668
}
8669
8670
if (!pf_pull_hdr(pd->m, pd2.off, iih, ICMP_MINLEN,
8671
reason, pd2.af)) {
8672
DPFPRINTF(PF_DEBUG_MISC,
8673
"pf: ICMP error message too short i"
8674
"(icmp)");
8675
return (PF_DROP);
8676
}
8677
pd2.pcksum = &pd2.hdr.icmp.icmp_cksum;
8678
8679
icmpid = iih->icmp_id;
8680
pf_icmp_mapping(&pd2, iih->icmp_type,
8681
&icmp_dir, &virtual_id, &virtual_type);
8682
8683
ret = pf_icmp_state_lookup(&key, &pd2, state,
8684
virtual_id, virtual_type, icmp_dir, &iidx, 0, 1);
8685
if (ret >= 0) {
8686
MPASS(*state == NULL);
8687
return (ret);
8688
}
8689
8690
/* translate source/destination address, if necessary */
8691
if ((*state)->key[PF_SK_WIRE] !=
8692
(*state)->key[PF_SK_STACK]) {
8693
struct pf_state_key *nk;
8694
8695
if (PF_REVERSED_KEY(*state, pd->af))
8696
nk = (*state)->key[pd->sidx];
8697
else
8698
nk = (*state)->key[pd->didx];
8699
8700
#if defined(INET) && defined(INET6)
8701
int afto, sidx, didx;
8702
8703
afto = pd->af != nk->af;
8704
8705
if (afto && (*state)->direction == PF_IN) {
8706
sidx = pd2.didx;
8707
didx = pd2.sidx;
8708
iidx = !iidx;
8709
} else {
8710
sidx = pd2.sidx;
8711
didx = pd2.didx;
8712
}
8713
8714
if (afto) {
8715
if (nk->af != AF_INET6)
8716
return (PF_DROP);
8717
if (pf_translate_icmp_af(nk->af,
8718
&pd->hdr.icmp))
8719
return (PF_DROP);
8720
m_copyback(pd->m, pd->off,
8721
sizeof(struct icmp6_hdr),
8722
(c_caddr_t)&pd->hdr.icmp6);
8723
if (pf_change_icmp_af(pd->m, ipoff2, pd,
8724
&pd2, &nk->addr[sidx],
8725
&nk->addr[didx], pd->af,
8726
nk->af))
8727
return (PF_DROP);
8728
pd->proto = IPPROTO_ICMPV6;
8729
if (pf_translate_icmp_af(nk->af, iih))
8730
return (PF_DROP);
8731
if (virtual_type == htons(ICMP_ECHO) &&
8732
nk->port[iidx] != iih->icmp_id)
8733
iih->icmp_id = nk->port[iidx];
8734
m_copyback(pd2.m, pd2.off, ICMP_MINLEN,
8735
(c_caddr_t)iih);
8736
pf_addrcpy(&pd->nsaddr,
8737
&nk->addr[pd2.sidx], nk->af);
8738
pf_addrcpy(&pd->ndaddr,
8739
&nk->addr[pd2.didx], nk->af);
8740
/*
8741
* IPv4 becomes IPv6 so we must copy
8742
* IPv4 src addr to least 32bits in
8743
* IPv6 address to keep traceroute
8744
* working.
8745
*/
8746
pd->nsaddr.addr32[3] =
8747
pd->src->addr32[0];
8748
pd->naf = nk->af;
8749
return (PF_AFRT);
8750
}
8751
#endif /* INET && INET6 */
8752
8753
if (PF_ANEQ(pd2.src,
8754
&nk->addr[pd2.sidx], pd2.af) ||
8755
(virtual_type == htons(ICMP_ECHO) &&
8756
nk->port[iidx] != iih->icmp_id))
8757
pf_change_icmp(pd2.src,
8758
(virtual_type == htons(ICMP_ECHO)) ?
8759
&iih->icmp_id : NULL,
8760
daddr, &nk->addr[pd2.sidx],
8761
(virtual_type == htons(ICMP_ECHO)) ?
8762
nk->port[iidx] : 0, NULL,
8763
pd2.ip_sum, icmpsum,
8764
pd->ip_sum, 0, AF_INET);
8765
8766
if (PF_ANEQ(pd2.dst,
8767
&nk->addr[pd2.didx], pd2.af))
8768
pf_change_icmp(pd2.dst, NULL, NULL,
8769
&nk->addr[pd2.didx], 0, NULL,
8770
pd2.ip_sum, icmpsum, pd->ip_sum, 0,
8771
AF_INET);
8772
8773
m_copyback(pd->m, pd->off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp);
8774
m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2);
8775
m_copyback(pd->m, pd2.off, ICMP_MINLEN, (caddr_t)iih);
8776
}
8777
return (PF_PASS);
8778
break;
8779
}
8780
#endif /* INET */
8781
#ifdef INET6
8782
case IPPROTO_ICMPV6: {
8783
struct icmp6_hdr *iih = &pd2.hdr.icmp6;
8784
8785
if (pd2.af != AF_INET6) {
8786
REASON_SET(reason, PFRES_NORM);
8787
return (PF_DROP);
8788
}
8789
8790
if (!pf_pull_hdr(pd->m, pd2.off, iih,
8791
sizeof(struct icmp6_hdr), reason, pd2.af)) {
8792
DPFPRINTF(PF_DEBUG_MISC,
8793
"pf: ICMP error message too short "
8794
"(icmp6)");
8795
return (PF_DROP);
8796
}
8797
pd2.pcksum = &pd2.hdr.icmp6.icmp6_cksum;
8798
8799
pf_icmp_mapping(&pd2, iih->icmp6_type,
8800
&icmp_dir, &virtual_id, &virtual_type);
8801
8802
ret = pf_icmp_state_lookup(&key, &pd2, state,
8803
virtual_id, virtual_type, icmp_dir, &iidx, 0, 1);
8804
/* IPv6? try matching a multicast address */
8805
if (ret == PF_DROP && pd2.af == AF_INET6 &&
8806
icmp_dir == PF_OUT) {
8807
MPASS(*state == NULL);
8808
ret = pf_icmp_state_lookup(&key, &pd2,
8809
state, virtual_id, virtual_type,
8810
icmp_dir, &iidx, 1, 1);
8811
}
8812
if (ret >= 0) {
8813
MPASS(*state == NULL);
8814
return (ret);
8815
}
8816
8817
/* translate source/destination address, if necessary */
8818
if ((*state)->key[PF_SK_WIRE] !=
8819
(*state)->key[PF_SK_STACK]) {
8820
struct pf_state_key *nk;
8821
8822
if (PF_REVERSED_KEY(*state, pd->af))
8823
nk = (*state)->key[pd->sidx];
8824
else
8825
nk = (*state)->key[pd->didx];
8826
8827
#if defined(INET) && defined(INET6)
8828
int afto, sidx, didx;
8829
8830
afto = pd->af != nk->af;
8831
8832
if (afto && (*state)->direction == PF_IN) {
8833
sidx = pd2.didx;
8834
didx = pd2.sidx;
8835
iidx = !iidx;
8836
} else {
8837
sidx = pd2.sidx;
8838
didx = pd2.didx;
8839
}
8840
8841
if (afto) {
8842
if (nk->af != AF_INET)
8843
return (PF_DROP);
8844
if (pf_translate_icmp_af(nk->af,
8845
&pd->hdr.icmp))
8846
return (PF_DROP);
8847
m_copyback(pd->m, pd->off,
8848
sizeof(struct icmp6_hdr),
8849
(c_caddr_t)&pd->hdr.icmp6);
8850
if (pf_change_icmp_af(pd->m, ipoff2, pd,
8851
&pd2, &nk->addr[sidx],
8852
&nk->addr[didx], pd->af,
8853
nk->af))
8854
return (PF_DROP);
8855
pd->proto = IPPROTO_ICMP;
8856
if (pf_translate_icmp_af(nk->af, iih))
8857
return (PF_DROP);
8858
if (virtual_type ==
8859
htons(ICMP6_ECHO_REQUEST) &&
8860
nk->port[iidx] != iih->icmp6_id)
8861
iih->icmp6_id = nk->port[iidx];
8862
m_copyback(pd2.m, pd2.off,
8863
sizeof(struct icmp6_hdr), (c_caddr_t)iih);
8864
pf_addrcpy(&pd->nsaddr,
8865
&nk->addr[pd2.sidx], nk->af);
8866
pf_addrcpy(&pd->ndaddr,
8867
&nk->addr[pd2.didx], nk->af);
8868
pd->naf = nk->af;
8869
return (PF_AFRT);
8870
}
8871
#endif /* INET && INET6 */
8872
8873
if (PF_ANEQ(pd2.src,
8874
&nk->addr[pd2.sidx], pd2.af) ||
8875
((virtual_type == htons(ICMP6_ECHO_REQUEST)) &&
8876
nk->port[pd2.sidx] != iih->icmp6_id))
8877
pf_change_icmp(pd2.src,
8878
(virtual_type == htons(ICMP6_ECHO_REQUEST))
8879
? &iih->icmp6_id : NULL,
8880
daddr, &nk->addr[pd2.sidx],
8881
(virtual_type == htons(ICMP6_ECHO_REQUEST))
8882
? nk->port[iidx] : 0, NULL,
8883
pd2.ip_sum, icmpsum,
8884
pd->ip_sum, 0, AF_INET6);
8885
8886
if (PF_ANEQ(pd2.dst,
8887
&nk->addr[pd2.didx], pd2.af))
8888
pf_change_icmp(pd2.dst, NULL, NULL,
8889
&nk->addr[pd2.didx], 0, NULL,
8890
pd2.ip_sum, icmpsum,
8891
pd->ip_sum, 0, AF_INET6);
8892
8893
m_copyback(pd->m, pd->off, sizeof(struct icmp6_hdr),
8894
(caddr_t)&pd->hdr.icmp6);
8895
m_copyback(pd->m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
8896
m_copyback(pd->m, pd2.off, sizeof(struct icmp6_hdr),
8897
(caddr_t)iih);
8898
}
8899
return (PF_PASS);
8900
break;
8901
}
8902
#endif /* INET6 */
8903
default: {
8904
int action;
8905
8906
/*
8907
* Placeholder value, so future calls to pf_change_ap()
8908
* don't try to update a NULL checksum pointer.
8909
*/
8910
pd->pcksum = &pd->sctp_dummy_sum;
8911
key.af = pd2.af;
8912
key.proto = pd2.proto;
8913
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
8914
pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
8915
key.port[0] = key.port[1] = 0;
8916
8917
action = pf_find_state(&pd2, &key, state);
8918
if (action != PF_MATCH)
8919
return (action);
8920
8921
/* translate source/destination address, if necessary */
8922
if ((*state)->key[PF_SK_WIRE] !=
8923
(*state)->key[PF_SK_STACK]) {
8924
struct pf_state_key *nk =
8925
(*state)->key[pd->didx];
8926
8927
if (PF_ANEQ(pd2.src,
8928
&nk->addr[pd2.sidx], pd2.af))
8929
pf_change_icmp(pd2.src, NULL, daddr,
8930
&nk->addr[pd2.sidx], 0, NULL,
8931
pd2.ip_sum, icmpsum,
8932
pd->ip_sum, 0, pd2.af);
8933
8934
if (PF_ANEQ(pd2.dst,
8935
&nk->addr[pd2.didx], pd2.af))
8936
pf_change_icmp(pd2.dst, NULL, saddr,
8937
&nk->addr[pd2.didx], 0, NULL,
8938
pd2.ip_sum, icmpsum,
8939
pd->ip_sum, 0, pd2.af);
8940
8941
switch (pd2.af) {
8942
#ifdef INET
8943
case AF_INET:
8944
m_copyback(pd->m, pd->off, ICMP_MINLEN,
8945
(caddr_t)&pd->hdr.icmp);
8946
m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2);
8947
break;
8948
#endif /* INET */
8949
#ifdef INET6
8950
case AF_INET6:
8951
m_copyback(pd->m, pd->off,
8952
sizeof(struct icmp6_hdr),
8953
(caddr_t )&pd->hdr.icmp6);
8954
m_copyback(pd->m, ipoff2, sizeof(h2_6),
8955
(caddr_t )&h2_6);
8956
break;
8957
#endif /* INET6 */
8958
}
8959
}
8960
return (PF_PASS);
8961
break;
8962
}
8963
}
8964
}
8965
}
8966
8967
/*
8968
* ipoff and off are measured from the start of the mbuf chain.
8969
* h must be at "ipoff" on the mbuf chain.
8970
*/
8971
void *
8972
pf_pull_hdr(const struct mbuf *m, int off, void *p, int len,
8973
u_short *reasonp, sa_family_t af)
8974
{
8975
int iplen = 0;
8976
switch (af) {
8977
#ifdef INET
8978
case AF_INET: {
8979
const struct ip *h = mtod(m, struct ip *);
8980
u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
8981
8982
if (fragoff) {
8983
REASON_SET(reasonp, PFRES_FRAG);
8984
return (NULL);
8985
}
8986
iplen = ntohs(h->ip_len);
8987
break;
8988
}
8989
#endif /* INET */
8990
#ifdef INET6
8991
case AF_INET6: {
8992
const struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
8993
8994
iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
8995
break;
8996
}
8997
#endif /* INET6 */
8998
}
8999
if (m->m_pkthdr.len < off + len || iplen < off + len) {
9000
REASON_SET(reasonp, PFRES_SHORT);
9001
return (NULL);
9002
}
9003
m_copydata(m, off, len, p);
9004
return (p);
9005
}
9006
9007
int
9008
pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kkif *kif,
9009
int rtableid)
9010
{
9011
struct ifnet *ifp;
9012
9013
/*
9014
* Skip check for addresses with embedded interface scope,
9015
* as they would always match anyway.
9016
*/
9017
if (af == AF_INET6 && IN6_IS_SCOPE_EMBED(&addr->v6))
9018
return (1);
9019
9020
if (af != AF_INET && af != AF_INET6)
9021
return (0);
9022
9023
if (kif == V_pfi_all)
9024
return (1);
9025
9026
/* Skip checks for ipsec interfaces */
9027
if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
9028
return (1);
9029
9030
ifp = (kif != NULL) ? kif->pfik_ifp : NULL;
9031
9032
switch (af) {
9033
#ifdef INET6
9034
case AF_INET6:
9035
return (fib6_check_urpf(rtableid, &addr->v6, 0, NHR_NONE,
9036
ifp));
9037
#endif /* INET6 */
9038
#ifdef INET
9039
case AF_INET:
9040
return (fib4_check_urpf(rtableid, addr->v4, 0, NHR_NONE,
9041
ifp));
9042
#endif /* INET */
9043
}
9044
9045
return (0);
9046
}
9047
9048
#ifdef INET
9049
static int
9050
pf_route(struct pf_krule *r, struct ifnet *oifp,
9051
struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
9052
{
9053
struct mbuf *m0, *m1, *md;
9054
struct route_in6 ro;
9055
union sockaddr_union rt_gw;
9056
const union sockaddr_union *gw = (const union sockaddr_union *)&ro.ro_dst;
9057
union sockaddr_union *dst;
9058
struct ip *ip;
9059
struct ifnet *ifp = NULL;
9060
int error = 0;
9061
uint16_t ip_len, ip_off;
9062
uint16_t tmp;
9063
int r_dir;
9064
bool skip_test = false;
9065
int action = PF_PASS;
9066
9067
KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__));
9068
9069
SDT_PROBE4(pf, ip, route_to, entry, pd->m, pd, s, oifp);
9070
9071
if (s) {
9072
r_dir = s->direction;
9073
} else {
9074
r_dir = r->direction;
9075
}
9076
9077
KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT ||
9078
r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction",
9079
__func__));
9080
9081
if ((pd->pf_mtag == NULL &&
9082
((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) ||
9083
pd->pf_mtag->routed++ > 3) {
9084
m0 = pd->m;
9085
pd->m = NULL;
9086
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9087
action = PF_DROP;
9088
goto bad_locked;
9089
}
9090
9091
if (pd->act.rt_kif != NULL)
9092
ifp = pd->act.rt_kif->pfik_ifp;
9093
9094
if (pd->act.rt == PF_DUPTO) {
9095
if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) {
9096
if (s != NULL) {
9097
PF_STATE_UNLOCK(s);
9098
}
9099
if (ifp == oifp) {
9100
/* When the 2nd interface is not skipped */
9101
return (action);
9102
} else {
9103
m0 = pd->m;
9104
pd->m = NULL;
9105
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9106
action = PF_DROP;
9107
goto bad;
9108
}
9109
} else {
9110
pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED;
9111
if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) {
9112
if (s)
9113
PF_STATE_UNLOCK(s);
9114
return (action);
9115
}
9116
}
9117
} else {
9118
if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) {
9119
if (pd->af == pd->naf) {
9120
pf_dummynet(pd, s, r, &pd->m);
9121
if (s)
9122
PF_STATE_UNLOCK(s);
9123
return (action);
9124
} else {
9125
if (r_dir == PF_IN) {
9126
skip_test = true;
9127
}
9128
}
9129
}
9130
9131
/*
9132
* If we're actually doing route-to and af-to and are in the
9133
* reply direction.
9134
*/
9135
if (pd->act.rt_kif && pd->act.rt_kif->pfik_ifp &&
9136
pd->af != pd->naf) {
9137
if (pd->act.rt == PF_ROUTETO && r->naf != AF_INET) {
9138
/* Un-set ifp so we do a plain route lookup. */
9139
ifp = NULL;
9140
}
9141
if (pd->act.rt == PF_REPLYTO && r->naf != AF_INET6) {
9142
/* Un-set ifp so we do a plain route lookup. */
9143
ifp = NULL;
9144
}
9145
}
9146
m0 = pd->m;
9147
}
9148
9149
ip = mtod(m0, struct ip *);
9150
9151
bzero(&ro, sizeof(ro));
9152
dst = (union sockaddr_union *)&ro.ro_dst;
9153
dst->sin.sin_family = AF_INET;
9154
dst->sin.sin_len = sizeof(struct sockaddr_in);
9155
dst->sin.sin_addr = ip->ip_dst;
9156
if (ifp) { /* Only needed in forward direction and route-to */
9157
bzero(&rt_gw, sizeof(rt_gw));
9158
ro.ro_flags |= RT_HAS_GW;
9159
gw = &rt_gw;
9160
switch (pd->act.rt_af) {
9161
#ifdef INET
9162
case AF_INET:
9163
rt_gw.sin.sin_family = AF_INET;
9164
rt_gw.sin.sin_len = sizeof(struct sockaddr_in);
9165
rt_gw.sin.sin_addr.s_addr = pd->act.rt_addr.v4.s_addr;
9166
break;
9167
#endif /* INET */
9168
#ifdef INET6
9169
case AF_INET6:
9170
rt_gw.sin6.sin6_family = AF_INET6;
9171
rt_gw.sin6.sin6_len = sizeof(struct sockaddr_in6);
9172
pf_addrcpy((struct pf_addr *)&rt_gw.sin6.sin6_addr,
9173
&pd->act.rt_addr, AF_INET6);
9174
break;
9175
#endif /* INET6 */
9176
default:
9177
/* Normal af-to without route-to */
9178
break;
9179
}
9180
}
9181
9182
if (pd->dir == PF_IN) {
9183
if (ip->ip_ttl <= IPTTLDEC) {
9184
if (r->rt != PF_DUPTO)
9185
pf_send_icmp(m0, ICMP_TIMXCEED,
9186
ICMP_TIMXCEED_INTRANS, 0, pd->af, r,
9187
pd->act.rtableid);
9188
action = PF_DROP;
9189
goto bad_locked;
9190
}
9191
ip->ip_ttl -= IPTTLDEC;
9192
}
9193
9194
if (s != NULL) {
9195
if (ifp == NULL && (pd->af != pd->naf)) {
9196
/* We're in the AFTO case. Do a route lookup. */
9197
const struct nhop_object *nh;
9198
nh = fib4_lookup(M_GETFIB(m0), ip->ip_dst, 0, NHR_NONE, 0);
9199
if (nh) {
9200
ifp = nh->nh_ifp;
9201
9202
/* Use the gateway if needed. */
9203
if (nh->nh_flags & NHF_GATEWAY) {
9204
gw = (const union sockaddr_union *)&nh->gw_sa;
9205
ro.ro_flags |= RT_HAS_GW;
9206
} else {
9207
dst->sin.sin_addr = ip->ip_dst;
9208
}
9209
}
9210
}
9211
PF_STATE_UNLOCK(s);
9212
}
9213
9214
/* It must have been either set from rt_af or from fib4_lookup */
9215
KASSERT(gw->sin.sin_family != 0, ("%s: gw address family undetermined", __func__));
9216
9217
if (ifp == NULL) {
9218
m0 = pd->m;
9219
pd->m = NULL;
9220
action = PF_DROP;
9221
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9222
goto bad;
9223
}
9224
9225
/*
9226
* Bind to the correct interface if we're if-bound. We don't know which
9227
* interface that will be until here, so we've inserted the state
9228
* on V_pf_all. Fix that now.
9229
*/
9230
if (s != NULL && s->kif == V_pfi_all && r->rule_flag & PFRULE_IFBOUND) {
9231
/* Verify that we're here because of BOUND_IFACE */
9232
MPASS(r->rt == PF_REPLYTO || (pd->af != pd->naf && s->direction == PF_IN));
9233
s->kif = ifp->if_pf_kif;
9234
if (pd->act.rt == PF_REPLYTO) {
9235
s->orig_kif = oifp->if_pf_kif;
9236
}
9237
}
9238
9239
if (r->rt == PF_DUPTO || (pd->af != pd->naf && s->direction == PF_IN))
9240
skip_test = true;
9241
9242
if (pd->dir == PF_IN) {
9243
if (skip_test) {
9244
struct pfi_kkif *out_kif = (struct pfi_kkif *)ifp->if_pf_kif;
9245
MPASS(s != NULL);
9246
pf_counter_u64_critical_enter();
9247
pf_counter_u64_add_protected(
9248
&out_kif->pfik_bytes[pd->naf == AF_INET6][1]
9249
[action != PF_PASS && action != PF_AFRT], pd->tot_len);
9250
pf_counter_u64_add_protected(
9251
&out_kif->pfik_packets[pd->naf == AF_INET6][1]
9252
[action != PF_PASS && action != PF_AFRT], 1);
9253
pf_counter_u64_critical_exit();
9254
} else {
9255
if (pf_test(AF_INET, PF_OUT, PFIL_FWD, ifp, &m0, inp,
9256
&pd->act) != PF_PASS) {
9257
action = PF_DROP;
9258
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9259
goto bad;
9260
} else if (m0 == NULL) {
9261
action = PF_DROP;
9262
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9263
goto done;
9264
}
9265
if (m0->m_len < sizeof(struct ip)) {
9266
DPFPRINTF(PF_DEBUG_URGENT,
9267
"%s: m0->m_len < sizeof(struct ip)", __func__);
9268
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9269
action = PF_DROP;
9270
goto bad;
9271
}
9272
ip = mtod(m0, struct ip *);
9273
}
9274
}
9275
9276
if (ifp->if_flags & IFF_LOOPBACK)
9277
m0->m_flags |= M_SKIP_FIREWALL;
9278
9279
ip_len = ntohs(ip->ip_len);
9280
ip_off = ntohs(ip->ip_off);
9281
9282
/* Copied from FreeBSD 10.0-CURRENT ip_output. */
9283
m0->m_pkthdr.csum_flags |= CSUM_IP;
9284
if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
9285
in_delayed_cksum(m0);
9286
m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9287
}
9288
if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
9289
pf_sctp_checksum(m0, (uint32_t)(ip->ip_hl << 2));
9290
m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
9291
}
9292
9293
if (pd->dir == PF_IN) {
9294
/*
9295
* Make sure dummynet gets the correct direction, in case it needs to
9296
* re-inject later.
9297
*/
9298
pd->dir = PF_OUT;
9299
9300
/*
9301
* The following processing is actually the rest of the inbound processing, even
9302
* though we've marked it as outbound (so we don't look through dummynet) and it
9303
* happens after the outbound processing (pf_test(PF_OUT) above).
9304
* Swap the dummynet pipe numbers, because it's going to come to the wrong
9305
* conclusion about what direction it's processing, and we can't fix it or it
9306
* will re-inject incorrectly. Swapping the pipe numbers means that its incorrect
9307
* decision will pick the right pipe, and everything will mostly work as expected.
9308
*/
9309
tmp = pd->act.dnrpipe;
9310
pd->act.dnrpipe = pd->act.dnpipe;
9311
pd->act.dnpipe = tmp;
9312
}
9313
9314
/*
9315
* If small enough for interface, or the interface will take
9316
* care of the fragmentation for us, we can just send directly.
9317
*/
9318
if (ip_len <= ifp->if_mtu ||
9319
(m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
9320
ip->ip_sum = 0;
9321
if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
9322
ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
9323
m0->m_pkthdr.csum_flags &= ~CSUM_IP;
9324
}
9325
m_clrprotoflags(m0); /* Avoid confusing lower layers. */
9326
9327
md = m0;
9328
error = pf_dummynet_route(pd, s, r, ifp,
9329
(const struct sockaddr *)gw, &md);
9330
if (md != NULL) {
9331
error = (*ifp->if_output)(ifp, md,
9332
(const struct sockaddr *)gw, (struct route *)&ro);
9333
SDT_PROBE2(pf, ip, route_to, output, ifp, error);
9334
}
9335
goto done;
9336
}
9337
9338
/* Balk when DF bit is set or the interface didn't support TSO. */
9339
if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
9340
error = EMSGSIZE;
9341
KMOD_IPSTAT_INC(ips_cantfrag);
9342
if (pd->act.rt != PF_DUPTO) {
9343
if (s && s->nat_rule != NULL) {
9344
MPASS(m0 == pd->m);
9345
PACKET_UNDO_NAT(pd,
9346
(ip->ip_hl << 2) + (ip_off & IP_OFFMASK),
9347
s);
9348
}
9349
9350
pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
9351
ifp->if_mtu, pd->af, r, pd->act.rtableid);
9352
}
9353
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9354
action = PF_DROP;
9355
goto bad;
9356
}
9357
9358
error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist);
9359
if (error) {
9360
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9361
action = PF_DROP;
9362
goto bad;
9363
}
9364
9365
for (; m0; m0 = m1) {
9366
m1 = m0->m_nextpkt;
9367
m0->m_nextpkt = NULL;
9368
if (error == 0) {
9369
m_clrprotoflags(m0);
9370
md = m0;
9371
pd->pf_mtag = pf_find_mtag(md);
9372
error = pf_dummynet_route(pd, s, r, ifp,
9373
(const struct sockaddr *)gw, &md);
9374
if (md != NULL) {
9375
error = (*ifp->if_output)(ifp, md,
9376
(const struct sockaddr *)gw,
9377
(struct route *)&ro);
9378
SDT_PROBE2(pf, ip, route_to, output, ifp, error);
9379
}
9380
} else
9381
m_freem(m0);
9382
}
9383
9384
if (error == 0)
9385
KMOD_IPSTAT_INC(ips_fragmented);
9386
9387
done:
9388
if (pd->act.rt != PF_DUPTO)
9389
pd->m = NULL;
9390
else
9391
action = PF_PASS;
9392
return (action);
9393
9394
bad_locked:
9395
if (s)
9396
PF_STATE_UNLOCK(s);
9397
bad:
9398
m_freem(m0);
9399
goto done;
9400
}
9401
#endif /* INET */
9402
9403
#ifdef INET6
9404
static int
9405
pf_route6(struct pf_krule *r, struct ifnet *oifp,
9406
struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
9407
{
9408
struct mbuf *m0, *md;
9409
struct m_tag *mtag;
9410
struct sockaddr_in6 dst;
9411
struct ip6_hdr *ip6;
9412
struct ifnet *ifp = NULL;
9413
int r_dir;
9414
bool skip_test = false;
9415
int action = PF_PASS;
9416
9417
KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__));
9418
9419
SDT_PROBE4(pf, ip6, route_to, entry, pd->m, pd, s, oifp);
9420
9421
if (s) {
9422
r_dir = s->direction;
9423
} else {
9424
r_dir = r->direction;
9425
}
9426
9427
KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT ||
9428
r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction",
9429
__func__));
9430
9431
if ((pd->pf_mtag == NULL &&
9432
((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) ||
9433
pd->pf_mtag->routed++ > 3) {
9434
m0 = pd->m;
9435
pd->m = NULL;
9436
action = PF_DROP;
9437
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9438
goto bad_locked;
9439
}
9440
9441
if (pd->act.rt_kif != NULL)
9442
ifp = pd->act.rt_kif->pfik_ifp;
9443
9444
if (pd->act.rt == PF_DUPTO) {
9445
if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) {
9446
if (s != NULL) {
9447
PF_STATE_UNLOCK(s);
9448
}
9449
if (ifp == oifp) {
9450
/* When the 2nd interface is not skipped */
9451
return (action);
9452
} else {
9453
m0 = pd->m;
9454
pd->m = NULL;
9455
action = PF_DROP;
9456
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9457
goto bad;
9458
}
9459
} else {
9460
pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED;
9461
if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) {
9462
if (s)
9463
PF_STATE_UNLOCK(s);
9464
return (action);
9465
}
9466
}
9467
} else {
9468
if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) {
9469
if (pd->af == pd->naf) {
9470
pf_dummynet(pd, s, r, &pd->m);
9471
if (s)
9472
PF_STATE_UNLOCK(s);
9473
return (action);
9474
} else {
9475
if (r_dir == PF_IN) {
9476
skip_test = true;
9477
}
9478
}
9479
}
9480
9481
/*
9482
* If we're actually doing route-to and af-to and are in the
9483
* reply direction.
9484
*/
9485
if (pd->act.rt_kif && pd->act.rt_kif->pfik_ifp &&
9486
pd->af != pd->naf) {
9487
if (pd->act.rt == PF_ROUTETO && r->naf != AF_INET6) {
9488
/* Un-set ifp so we do a plain route lookup. */
9489
ifp = NULL;
9490
}
9491
if (pd->act.rt == PF_REPLYTO && r->naf != AF_INET) {
9492
/* Un-set ifp so we do a plain route lookup. */
9493
ifp = NULL;
9494
}
9495
}
9496
m0 = pd->m;
9497
}
9498
9499
ip6 = mtod(m0, struct ip6_hdr *);
9500
9501
bzero(&dst, sizeof(dst));
9502
dst.sin6_family = AF_INET6;
9503
dst.sin6_len = sizeof(dst);
9504
pf_addrcpy((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr,
9505
AF_INET6);
9506
9507
if (pd->dir == PF_IN) {
9508
if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
9509
if (r->rt != PF_DUPTO)
9510
pf_send_icmp(m0, ICMP6_TIME_EXCEEDED,
9511
ICMP6_TIME_EXCEED_TRANSIT, 0, pd->af, r,
9512
pd->act.rtableid);
9513
action = PF_DROP;
9514
goto bad_locked;
9515
}
9516
ip6->ip6_hlim -= IPV6_HLIMDEC;
9517
}
9518
9519
if (s != NULL) {
9520
if (ifp == NULL && (pd->af != pd->naf)) {
9521
const struct nhop_object *nh;
9522
nh = fib6_lookup(M_GETFIB(m0), &ip6->ip6_dst, 0, NHR_NONE, 0);
9523
if (nh) {
9524
ifp = nh->nh_ifp;
9525
9526
/* Use the gateway if needed. */
9527
if (nh->nh_flags & NHF_GATEWAY)
9528
bcopy(&nh->gw6_sa.sin6_addr, &dst.sin6_addr,
9529
sizeof(dst.sin6_addr));
9530
else
9531
dst.sin6_addr = ip6->ip6_dst;
9532
}
9533
}
9534
PF_STATE_UNLOCK(s);
9535
}
9536
9537
if (pd->af != pd->naf) {
9538
struct udphdr *uh = &pd->hdr.udp;
9539
9540
if (pd->proto == IPPROTO_UDP && uh->uh_sum == 0) {
9541
uh->uh_sum = in6_cksum_pseudo(ip6,
9542
ntohs(uh->uh_ulen), IPPROTO_UDP, 0);
9543
m_copyback(m0, pd->off, sizeof(*uh), pd->hdr.any);
9544
}
9545
}
9546
9547
if (ifp == NULL) {
9548
m0 = pd->m;
9549
pd->m = NULL;
9550
action = PF_DROP;
9551
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9552
goto bad;
9553
}
9554
9555
/*
9556
* Bind to the correct interface if we're if-bound. We don't know which
9557
* interface that will be until here, so we've inserted the state
9558
* on V_pf_all. Fix that now.
9559
*/
9560
if (s != NULL && s->kif == V_pfi_all && r->rule_flag & PFRULE_IFBOUND) {
9561
/* Verify that we're here because of BOUND_IFACE */
9562
MPASS(r->rt == PF_REPLYTO || (pd->af != pd->naf && s->direction == PF_IN));
9563
s->kif = ifp->if_pf_kif;
9564
if (pd->act.rt == PF_REPLYTO) {
9565
s->orig_kif = oifp->if_pf_kif;
9566
}
9567
}
9568
9569
if (r->rt == PF_DUPTO || (pd->af != pd->naf && s->direction == PF_IN))
9570
skip_test = true;
9571
9572
if (pd->dir == PF_IN) {
9573
if (skip_test) {
9574
struct pfi_kkif *out_kif = (struct pfi_kkif *)ifp->if_pf_kif;
9575
MPASS(s != NULL);
9576
pf_counter_u64_critical_enter();
9577
pf_counter_u64_add_protected(
9578
&out_kif->pfik_bytes[pd->naf == AF_INET6][1]
9579
[action != PF_PASS && action != PF_AFRT], pd->tot_len);
9580
pf_counter_u64_add_protected(
9581
&out_kif->pfik_packets[pd->naf == AF_INET6][1]
9582
[action != PF_PASS && action != PF_AFRT], 1);
9583
pf_counter_u64_critical_exit();
9584
} else {
9585
if (pf_test(AF_INET6, PF_OUT, PFIL_FWD | PF_PFIL_NOREFRAGMENT,
9586
ifp, &m0, inp, &pd->act) != PF_PASS) {
9587
action = PF_DROP;
9588
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9589
goto bad;
9590
} else if (m0 == NULL) {
9591
action = PF_DROP;
9592
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9593
goto done;
9594
}
9595
if (m0->m_len < sizeof(struct ip6_hdr)) {
9596
DPFPRINTF(PF_DEBUG_URGENT,
9597
"%s: m0->m_len < sizeof(struct ip6_hdr)",
9598
__func__);
9599
action = PF_DROP;
9600
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9601
goto bad;
9602
}
9603
ip6 = mtod(m0, struct ip6_hdr *);
9604
}
9605
}
9606
9607
if (ifp->if_flags & IFF_LOOPBACK)
9608
m0->m_flags |= M_SKIP_FIREWALL;
9609
9610
if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
9611
~ifp->if_hwassist) {
9612
uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6);
9613
in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr));
9614
m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
9615
}
9616
9617
if (pd->dir == PF_IN) {
9618
uint16_t tmp;
9619
/*
9620
* Make sure dummynet gets the correct direction, in case it needs to
9621
* re-inject later.
9622
*/
9623
pd->dir = PF_OUT;
9624
9625
/*
9626
* The following processing is actually the rest of the inbound processing, even
9627
* though we've marked it as outbound (so we don't look through dummynet) and it
9628
* happens after the outbound processing (pf_test(PF_OUT) above).
9629
* Swap the dummynet pipe numbers, because it's going to come to the wrong
9630
* conclusion about what direction it's processing, and we can't fix it or it
9631
* will re-inject incorrectly. Swapping the pipe numbers means that its incorrect
9632
* decision will pick the right pipe, and everything will mostly work as expected.
9633
*/
9634
tmp = pd->act.dnrpipe;
9635
pd->act.dnrpipe = pd->act.dnpipe;
9636
pd->act.dnpipe = tmp;
9637
}
9638
9639
/*
9640
* If the packet is too large for the outgoing interface,
9641
* send back an icmp6 error.
9642
*/
9643
if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
9644
dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
9645
mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL);
9646
if (mtag != NULL) {
9647
int ret __sdt_used;
9648
ret = pf_refragment6(ifp, &m0, mtag, ifp, true);
9649
SDT_PROBE2(pf, ip6, route_to, output, ifp, ret);
9650
goto done;
9651
}
9652
9653
if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
9654
md = m0;
9655
pf_dummynet_route(pd, s, r, ifp, sintosa(&dst), &md);
9656
if (md != NULL) {
9657
int ret __sdt_used;
9658
ret = nd6_output_ifp(ifp, ifp, md, &dst, NULL);
9659
SDT_PROBE2(pf, ip6, route_to, output, ifp, ret);
9660
}
9661
}
9662
else {
9663
in6_ifstat_inc(ifp, ifs6_in_toobig);
9664
if (pd->act.rt != PF_DUPTO) {
9665
if (s && s->nat_rule != NULL) {
9666
MPASS(m0 == pd->m);
9667
PACKET_UNDO_NAT(pd,
9668
((caddr_t)ip6 - m0->m_data) +
9669
sizeof(struct ip6_hdr), s);
9670
}
9671
9672
if (r->rt != PF_DUPTO)
9673
pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0,
9674
ifp->if_mtu, pd->af, r, pd->act.rtableid);
9675
}
9676
action = PF_DROP;
9677
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9678
goto bad;
9679
}
9680
9681
done:
9682
if (pd->act.rt != PF_DUPTO)
9683
pd->m = NULL;
9684
else
9685
action = PF_PASS;
9686
return (action);
9687
9688
bad_locked:
9689
if (s)
9690
PF_STATE_UNLOCK(s);
9691
bad:
9692
m_freem(m0);
9693
goto done;
9694
}
9695
#endif /* INET6 */
9696
9697
/*
9698
* FreeBSD supports cksum offloads for the following drivers.
9699
* em(4), fxp(4), lge(4), nge(4), re(4), ti(4), txp(4), xl(4)
9700
*
9701
* CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
9702
* network driver performed cksum including pseudo header, need to verify
9703
* csum_data
9704
* CSUM_DATA_VALID :
9705
* network driver performed cksum, needs to additional pseudo header
9706
* cksum computation with partial csum_data(i.e. lack of H/W support for
9707
* pseudo header, for instance sk(4) and possibly gem(4))
9708
*
9709
* After validating the cksum of packet, set both flag CSUM_DATA_VALID and
9710
* CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
9711
* TCP/UDP layer.
9712
* Also, set csum_data to 0xffff to force cksum validation.
9713
*/
9714
static int
9715
pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
9716
{
9717
u_int16_t sum = 0;
9718
int hw_assist = 0;
9719
struct ip *ip;
9720
9721
if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
9722
return (1);
9723
if (m->m_pkthdr.len < off + len)
9724
return (1);
9725
9726
switch (p) {
9727
case IPPROTO_TCP:
9728
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
9729
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
9730
sum = m->m_pkthdr.csum_data;
9731
} else {
9732
ip = mtod(m, struct ip *);
9733
sum = in_pseudo(ip->ip_src.s_addr,
9734
ip->ip_dst.s_addr, htonl((u_short)len +
9735
m->m_pkthdr.csum_data + IPPROTO_TCP));
9736
}
9737
sum ^= 0xffff;
9738
++hw_assist;
9739
}
9740
break;
9741
case IPPROTO_UDP:
9742
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
9743
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
9744
sum = m->m_pkthdr.csum_data;
9745
} else {
9746
ip = mtod(m, struct ip *);
9747
sum = in_pseudo(ip->ip_src.s_addr,
9748
ip->ip_dst.s_addr, htonl((u_short)len +
9749
m->m_pkthdr.csum_data + IPPROTO_UDP));
9750
}
9751
sum ^= 0xffff;
9752
++hw_assist;
9753
}
9754
break;
9755
case IPPROTO_ICMP:
9756
#ifdef INET6
9757
case IPPROTO_ICMPV6:
9758
#endif /* INET6 */
9759
break;
9760
default:
9761
return (1);
9762
}
9763
9764
if (!hw_assist) {
9765
switch (af) {
9766
case AF_INET:
9767
if (m->m_len < sizeof(struct ip))
9768
return (1);
9769
sum = in4_cksum(m, (p == IPPROTO_ICMP ? 0 : p), off, len);
9770
break;
9771
#ifdef INET6
9772
case AF_INET6:
9773
if (m->m_len < sizeof(struct ip6_hdr))
9774
return (1);
9775
sum = in6_cksum(m, p, off, len);
9776
break;
9777
#endif /* INET6 */
9778
}
9779
}
9780
if (sum) {
9781
switch (p) {
9782
case IPPROTO_TCP:
9783
{
9784
KMOD_TCPSTAT_INC(tcps_rcvbadsum);
9785
break;
9786
}
9787
case IPPROTO_UDP:
9788
{
9789
KMOD_UDPSTAT_INC(udps_badsum);
9790
break;
9791
}
9792
#ifdef INET
9793
case IPPROTO_ICMP:
9794
{
9795
KMOD_ICMPSTAT_INC(icps_checksum);
9796
break;
9797
}
9798
#endif
9799
#ifdef INET6
9800
case IPPROTO_ICMPV6:
9801
{
9802
KMOD_ICMP6STAT_INC(icp6s_checksum);
9803
break;
9804
}
9805
#endif /* INET6 */
9806
}
9807
return (1);
9808
} else {
9809
if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
9810
m->m_pkthdr.csum_flags |=
9811
(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
9812
m->m_pkthdr.csum_data = 0xffff;
9813
}
9814
}
9815
return (0);
9816
}
9817
9818
static bool
9819
pf_pdesc_to_dnflow(const struct pf_pdesc *pd, const struct pf_krule *r,
9820
const struct pf_kstate *s, struct ip_fw_args *dnflow)
9821
{
9822
int dndir = r->direction;
9823
sa_family_t af = pd->naf;
9824
9825
if (s && dndir == PF_INOUT) {
9826
dndir = s->direction;
9827
} else if (dndir == PF_INOUT) {
9828
/* Assume primary direction. Happens when we've set dnpipe in
9829
* the ethernet level code. */
9830
dndir = pd->dir;
9831
}
9832
9833
if (pd->pf_mtag->flags & PF_MTAG_FLAG_DUMMYNETED)
9834
return (false);
9835
9836
memset(dnflow, 0, sizeof(*dnflow));
9837
9838
if (pd->dport != NULL)
9839
dnflow->f_id.dst_port = ntohs(*pd->dport);
9840
if (pd->sport != NULL)
9841
dnflow->f_id.src_port = ntohs(*pd->sport);
9842
9843
if (pd->dir == PF_IN)
9844
dnflow->flags |= IPFW_ARGS_IN;
9845
else
9846
dnflow->flags |= IPFW_ARGS_OUT;
9847
9848
if (pd->dir != dndir && pd->act.dnrpipe) {
9849
dnflow->rule.info = pd->act.dnrpipe;
9850
}
9851
else if (pd->dir == dndir && pd->act.dnpipe) {
9852
dnflow->rule.info = pd->act.dnpipe;
9853
}
9854
else {
9855
return (false);
9856
}
9857
9858
dnflow->rule.info |= IPFW_IS_DUMMYNET;
9859
if (r->free_flags & PFRULE_DN_IS_PIPE || pd->act.flags & PFSTATE_DN_IS_PIPE)
9860
dnflow->rule.info |= IPFW_IS_PIPE;
9861
9862
dnflow->f_id.proto = pd->proto;
9863
dnflow->f_id.extra = dnflow->rule.info;
9864
if (s)
9865
af = s->key[PF_SK_STACK]->af;
9866
9867
switch (af) {
9868
case AF_INET:
9869
dnflow->f_id.addr_type = 4;
9870
if (s) {
9871
dnflow->f_id.src_ip = htonl(
9872
s->key[PF_SK_STACK]->addr[pd->sidx].v4.s_addr);
9873
dnflow->f_id.dst_ip = htonl(
9874
s->key[PF_SK_STACK]->addr[pd->didx].v4.s_addr);
9875
} else {
9876
dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr);
9877
dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr);
9878
}
9879
break;
9880
case AF_INET6:
9881
dnflow->f_id.addr_type = 6;
9882
9883
if (s) {
9884
dnflow->f_id.src_ip6 =
9885
s->key[PF_SK_STACK]->addr[pd->sidx].v6;
9886
dnflow->f_id.dst_ip6 =
9887
s->key[PF_SK_STACK]->addr[pd->didx].v6;
9888
} else {
9889
dnflow->f_id.src_ip6 = pd->src->v6;
9890
dnflow->f_id.dst_ip6 = pd->dst->v6;
9891
}
9892
break;
9893
}
9894
9895
/*
9896
* Separate this out, because while we pass the pre-NAT addresses to
9897
* dummynet we want the post-nat address family in case of nat64.
9898
* Dummynet may call ip_output/ip6_output itself, and we need it to
9899
* call the correct one.
9900
*/
9901
if (pd->naf == AF_INET6)
9902
dnflow->flags |= IPFW_ARGS_IP6;
9903
9904
return (true);
9905
}
9906
9907
int
9908
pf_test_eth(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
9909
struct inpcb *inp)
9910
{
9911
struct pfi_kkif *kif;
9912
struct mbuf *m = *m0;
9913
9914
M_ASSERTPKTHDR(m);
9915
MPASS(ifp->if_vnet == curvnet);
9916
NET_EPOCH_ASSERT();
9917
9918
if (!V_pf_status.running)
9919
return (PF_PASS);
9920
9921
kif = (struct pfi_kkif *)ifp->if_pf_kif;
9922
9923
if (kif == NULL) {
9924
DPFPRINTF(PF_DEBUG_URGENT,
9925
"%s: kif == NULL, if_xname %s", __func__, ifp->if_xname);
9926
return (PF_DROP);
9927
}
9928
if (kif->pfik_flags & PFI_IFLAG_SKIP)
9929
return (PF_PASS);
9930
9931
if (m->m_flags & M_SKIP_FIREWALL)
9932
return (PF_PASS);
9933
9934
if (__predict_false(! M_WRITABLE(*m0))) {
9935
m = *m0 = m_unshare(*m0, M_NOWAIT);
9936
if (*m0 == NULL)
9937
return (PF_DROP);
9938
}
9939
9940
/* Stateless! */
9941
return (pf_test_eth_rule(dir, kif, m0));
9942
}
9943
9944
static __inline void
9945
pf_dummynet_flag_remove(struct mbuf *m, struct pf_mtag *pf_mtag)
9946
{
9947
struct m_tag *mtag;
9948
9949
pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET;
9950
9951
/* dummynet adds this tag, but pf does not need it,
9952
* and keeping it creates unexpected behavior,
9953
* e.g. in case of divert(4) usage right after dummynet. */
9954
mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
9955
if (mtag != NULL)
9956
m_tag_delete(m, mtag);
9957
}
9958
9959
static int
9960
pf_dummynet(struct pf_pdesc *pd, struct pf_kstate *s,
9961
struct pf_krule *r, struct mbuf **m0)
9962
{
9963
return (pf_dummynet_route(pd, s, r, NULL, NULL, m0));
9964
}
9965
9966
static int
9967
pf_dummynet_route(struct pf_pdesc *pd, struct pf_kstate *s,
9968
struct pf_krule *r, struct ifnet *ifp, const struct sockaddr *sa,
9969
struct mbuf **m0)
9970
{
9971
struct ip_fw_args dnflow;
9972
9973
NET_EPOCH_ASSERT();
9974
9975
if (pd->act.dnpipe == 0 && pd->act.dnrpipe == 0)
9976
return (0);
9977
9978
if (ip_dn_io_ptr == NULL) {
9979
m_freem(*m0);
9980
*m0 = NULL;
9981
return (ENOMEM);
9982
}
9983
9984
if (pd->pf_mtag == NULL &&
9985
((pd->pf_mtag = pf_get_mtag(*m0)) == NULL)) {
9986
m_freem(*m0);
9987
*m0 = NULL;
9988
return (ENOMEM);
9989
}
9990
9991
if (ifp != NULL) {
9992
pd->pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO;
9993
9994
pd->pf_mtag->if_index = ifp->if_index;
9995
pd->pf_mtag->if_idxgen = ifp->if_idxgen;
9996
9997
MPASS(sa != NULL);
9998
9999
switch (sa->sa_family) {
10000
case AF_INET:
10001
memcpy(&pd->pf_mtag->dst, sa,
10002
sizeof(struct sockaddr_in));
10003
break;
10004
case AF_INET6:
10005
memcpy(&pd->pf_mtag->dst, sa,
10006
sizeof(struct sockaddr_in6));
10007
break;
10008
}
10009
}
10010
10011
if (s != NULL && s->nat_rule != NULL &&
10012
s->nat_rule->action == PF_RDR &&
10013
(
10014
#ifdef INET
10015
(pd->af == AF_INET && IN_LOOPBACK(ntohl(pd->dst->v4.s_addr))) ||
10016
#endif /* INET */
10017
(pd->af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd->dst->v6)))) {
10018
/*
10019
* If we're redirecting to loopback mark this packet
10020
* as being local. Otherwise it might get dropped
10021
* if dummynet re-injects.
10022
*/
10023
(*m0)->m_pkthdr.rcvif = V_loif;
10024
}
10025
10026
if (pf_pdesc_to_dnflow(pd, r, s, &dnflow)) {
10027
pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNET;
10028
pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNETED;
10029
ip_dn_io_ptr(m0, &dnflow);
10030
if (*m0 != NULL) {
10031
pd->pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO;
10032
pf_dummynet_flag_remove(*m0, pd->pf_mtag);
10033
}
10034
}
10035
10036
return (0);
10037
}
10038
10039
static int
10040
pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end,
10041
u_short *reason)
10042
{
10043
uint8_t type, length, opts[15 * 4 - sizeof(struct ip)];
10044
10045
/* IP header in payload of ICMP packet may be too short */
10046
if (pd->m->m_pkthdr.len < end) {
10047
DPFPRINTF(PF_DEBUG_MISC, "IP option too short");
10048
REASON_SET(reason, PFRES_SHORT);
10049
return (PF_DROP);
10050
}
10051
10052
MPASS(end - off <= sizeof(opts));
10053
m_copydata(pd->m, off, end - off, opts);
10054
end -= off;
10055
off = 0;
10056
10057
while (off < end) {
10058
type = opts[off];
10059
if (type == IPOPT_EOL)
10060
break;
10061
if (type == IPOPT_NOP) {
10062
off++;
10063
continue;
10064
}
10065
if (off + 2 > end) {
10066
DPFPRINTF(PF_DEBUG_MISC, "IP length opt");
10067
REASON_SET(reason, PFRES_IPOPTIONS);
10068
return (PF_DROP);
10069
}
10070
length = opts[off + 1];
10071
if (length < 2) {
10072
DPFPRINTF(PF_DEBUG_MISC, "IP short opt");
10073
REASON_SET(reason, PFRES_IPOPTIONS);
10074
return (PF_DROP);
10075
}
10076
if (off + length > end) {
10077
DPFPRINTF(PF_DEBUG_MISC, "IP long opt");
10078
REASON_SET(reason, PFRES_IPOPTIONS);
10079
return (PF_DROP);
10080
}
10081
switch (type) {
10082
case IPOPT_RA:
10083
pd->badopts |= PF_OPT_ROUTER_ALERT;
10084
break;
10085
default:
10086
pd->badopts |= PF_OPT_OTHER;
10087
break;
10088
}
10089
off += length;
10090
}
10091
10092
return (PF_PASS);
10093
}
10094
10095
static int
10096
pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
10097
{
10098
struct ah ext;
10099
u_int32_t hlen, end;
10100
int hdr_cnt;
10101
10102
hlen = h->ip_hl << 2;
10103
if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) {
10104
REASON_SET(reason, PFRES_SHORT);
10105
return (PF_DROP);
10106
}
10107
if (hlen != sizeof(struct ip)) {
10108
if (pf_walk_option(pd, h, pd->off + sizeof(struct ip),
10109
pd->off + hlen, reason) != PF_PASS)
10110
return (PF_DROP);
10111
/* header options which contain only padding is fishy */
10112
if (pd->badopts == 0)
10113
pd->badopts |= PF_OPT_OTHER;
10114
}
10115
end = pd->off + ntohs(h->ip_len);
10116
pd->off += hlen;
10117
pd->proto = h->ip_p;
10118
/* IGMP packets have router alert options, allow them */
10119
if (pd->proto == IPPROTO_IGMP) {
10120
/*
10121
* According to RFC 1112 ttl must be set to 1 in all IGMP
10122
* packets sent to 224.0.0.1
10123
*/
10124
if ((h->ip_ttl != 1) &&
10125
(h->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP)) {
10126
DPFPRINTF(PF_DEBUG_MISC, "Invalid IGMP");
10127
REASON_SET(reason, PFRES_IPOPTIONS);
10128
return (PF_DROP);
10129
}
10130
pd->badopts &= ~PF_OPT_ROUTER_ALERT;
10131
}
10132
/* stop walking over non initial fragments */
10133
if ((h->ip_off & htons(IP_OFFMASK)) != 0)
10134
return (PF_PASS);
10135
for (hdr_cnt = 0; hdr_cnt < PF_HDR_LIMIT; hdr_cnt++) {
10136
switch (pd->proto) {
10137
case IPPROTO_AH:
10138
/* fragments may be short */
10139
if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 &&
10140
end < pd->off + sizeof(ext))
10141
return (PF_PASS);
10142
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
10143
reason, AF_INET)) {
10144
DPFPRINTF(PF_DEBUG_MISC, "IP short exthdr");
10145
return (PF_DROP);
10146
}
10147
pd->off += (ext.ah_len + 2) * 4;
10148
pd->proto = ext.ah_nxt;
10149
break;
10150
default:
10151
return (PF_PASS);
10152
}
10153
}
10154
DPFPRINTF(PF_DEBUG_MISC, "IPv4 nested authentication header limit");
10155
REASON_SET(reason, PFRES_IPOPTIONS);
10156
return (PF_DROP);
10157
}
10158
10159
#ifdef INET6
10160
static int
10161
pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
10162
u_short *reason)
10163
{
10164
struct ip6_opt opt;
10165
struct ip6_opt_jumbo jumbo;
10166
10167
while (off < end) {
10168
if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type,
10169
sizeof(opt.ip6o_type), reason, AF_INET6)) {
10170
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt type");
10171
return (PF_DROP);
10172
}
10173
if (opt.ip6o_type == IP6OPT_PAD1) {
10174
off++;
10175
continue;
10176
}
10177
if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt),
10178
reason, AF_INET6)) {
10179
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt");
10180
return (PF_DROP);
10181
}
10182
if (off + sizeof(opt) + opt.ip6o_len > end) {
10183
DPFPRINTF(PF_DEBUG_MISC, "IPv6 long opt");
10184
REASON_SET(reason, PFRES_IPOPTIONS);
10185
return (PF_DROP);
10186
}
10187
switch (opt.ip6o_type) {
10188
case IP6OPT_PADN:
10189
break;
10190
case IP6OPT_JUMBO:
10191
pd->badopts |= PF_OPT_JUMBO;
10192
if (pd->jumbolen != 0) {
10193
DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple jumbo");
10194
REASON_SET(reason, PFRES_IPOPTIONS);
10195
return (PF_DROP);
10196
}
10197
if (ntohs(h->ip6_plen) != 0) {
10198
DPFPRINTF(PF_DEBUG_MISC, "IPv6 bad jumbo plen");
10199
REASON_SET(reason, PFRES_IPOPTIONS);
10200
return (PF_DROP);
10201
}
10202
if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo),
10203
reason, AF_INET6)) {
10204
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbo");
10205
return (PF_DROP);
10206
}
10207
memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len,
10208
sizeof(pd->jumbolen));
10209
pd->jumbolen = ntohl(pd->jumbolen);
10210
if (pd->jumbolen < IPV6_MAXPACKET) {
10211
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbolen");
10212
REASON_SET(reason, PFRES_IPOPTIONS);
10213
return (PF_DROP);
10214
}
10215
break;
10216
case IP6OPT_ROUTER_ALERT:
10217
pd->badopts |= PF_OPT_ROUTER_ALERT;
10218
break;
10219
default:
10220
pd->badopts |= PF_OPT_OTHER;
10221
break;
10222
}
10223
off += sizeof(opt) + opt.ip6o_len;
10224
}
10225
10226
return (PF_PASS);
10227
}
10228
10229
int
10230
pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
10231
{
10232
struct ip6_frag frag;
10233
struct ip6_ext ext;
10234
struct icmp6_hdr icmp6;
10235
struct ip6_rthdr rthdr;
10236
uint32_t end;
10237
int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0;
10238
10239
pd->off += sizeof(struct ip6_hdr);
10240
end = pd->off + ntohs(h->ip6_plen);
10241
pd->fragoff = pd->extoff = pd->jumbolen = 0;
10242
pd->proto = h->ip6_nxt;
10243
for (hdr_cnt = 0; hdr_cnt < PF_HDR_LIMIT; hdr_cnt++) {
10244
switch (pd->proto) {
10245
case IPPROTO_ROUTING:
10246
case IPPROTO_DSTOPTS:
10247
pd->badopts |= PF_OPT_OTHER;
10248
break;
10249
case IPPROTO_HOPOPTS:
10250
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
10251
reason, AF_INET6)) {
10252
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr");
10253
return (PF_DROP);
10254
}
10255
if (pf_walk_option6(pd, h, pd->off + sizeof(ext),
10256
pd->off + (ext.ip6e_len + 1) * 8,
10257
reason) != PF_PASS)
10258
return (PF_DROP);
10259
/* option header which contains only padding is fishy */
10260
if (pd->badopts == 0)
10261
pd->badopts |= PF_OPT_OTHER;
10262
break;
10263
}
10264
switch (pd->proto) {
10265
case IPPROTO_FRAGMENT:
10266
if (fraghdr_cnt++) {
10267
DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple fragment");
10268
REASON_SET(reason, PFRES_FRAG);
10269
return (PF_DROP);
10270
}
10271
/* jumbo payload packets cannot be fragmented */
10272
if (pd->jumbolen != 0) {
10273
DPFPRINTF(PF_DEBUG_MISC, "IPv6 fragmented jumbo");
10274
REASON_SET(reason, PFRES_FRAG);
10275
return (PF_DROP);
10276
}
10277
if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag),
10278
reason, AF_INET6)) {
10279
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short fragment");
10280
return (PF_DROP);
10281
}
10282
/* stop walking over non initial fragments */
10283
if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) {
10284
pd->fragoff = pd->off;
10285
return (PF_PASS);
10286
}
10287
/* RFC6946: reassemble only non atomic fragments */
10288
if (frag.ip6f_offlg & IP6F_MORE_FRAG)
10289
pd->fragoff = pd->off;
10290
pd->off += sizeof(frag);
10291
pd->proto = frag.ip6f_nxt;
10292
break;
10293
case IPPROTO_ROUTING:
10294
if (rthdr_cnt++) {
10295
DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple rthdr");
10296
REASON_SET(reason, PFRES_IPOPTIONS);
10297
return (PF_DROP);
10298
}
10299
/* fragments may be short */
10300
if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) {
10301
pd->off = pd->fragoff;
10302
pd->proto = IPPROTO_FRAGMENT;
10303
return (PF_PASS);
10304
}
10305
if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr),
10306
reason, AF_INET6)) {
10307
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short rthdr");
10308
return (PF_DROP);
10309
}
10310
if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
10311
DPFPRINTF(PF_DEBUG_MISC, "IPv6 rthdr0");
10312
REASON_SET(reason, PFRES_IPOPTIONS);
10313
return (PF_DROP);
10314
}
10315
/* FALLTHROUGH */
10316
case IPPROTO_HOPOPTS:
10317
/* RFC2460 4.1: Hop-by-Hop only after IPv6 header */
10318
if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) {
10319
DPFPRINTF(PF_DEBUG_MISC, "IPv6 hopopts not first");
10320
REASON_SET(reason, PFRES_IPOPTIONS);
10321
return (PF_DROP);
10322
}
10323
/* FALLTHROUGH */
10324
case IPPROTO_AH:
10325
case IPPROTO_DSTOPTS:
10326
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
10327
reason, AF_INET6)) {
10328
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr");
10329
return (PF_DROP);
10330
}
10331
/* fragments may be short */
10332
if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) {
10333
pd->off = pd->fragoff;
10334
pd->proto = IPPROTO_FRAGMENT;
10335
return (PF_PASS);
10336
}
10337
/* reassembly needs the ext header before the frag */
10338
if (pd->fragoff == 0)
10339
pd->extoff = pd->off;
10340
if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 &&
10341
ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) {
10342
DPFPRINTF(PF_DEBUG_MISC, "IPv6 missing jumbo");
10343
REASON_SET(reason, PFRES_IPOPTIONS);
10344
return (PF_DROP);
10345
}
10346
if (pd->proto == IPPROTO_AH)
10347
pd->off += (ext.ip6e_len + 2) * 4;
10348
else
10349
pd->off += (ext.ip6e_len + 1) * 8;
10350
pd->proto = ext.ip6e_nxt;
10351
break;
10352
case IPPROTO_ICMPV6:
10353
/* fragments may be short, ignore inner header then */
10354
if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) {
10355
pd->off = pd->fragoff;
10356
pd->proto = IPPROTO_FRAGMENT;
10357
return (PF_PASS);
10358
}
10359
if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6),
10360
reason, AF_INET6)) {
10361
DPFPRINTF(PF_DEBUG_MISC,
10362
"IPv6 short icmp6hdr");
10363
return (PF_DROP);
10364
}
10365
/* ICMP multicast packets have router alert options */
10366
switch (icmp6.icmp6_type) {
10367
case MLD_LISTENER_QUERY:
10368
case MLD_LISTENER_REPORT:
10369
case MLD_LISTENER_DONE:
10370
case MLDV2_LISTENER_REPORT:
10371
/*
10372
* According to RFC 2710 all MLD messages are
10373
* sent with hop-limit (ttl) set to 1, and link
10374
* local source address. If either one is
10375
* missing then MLD message is invalid and
10376
* should be discarded.
10377
*/
10378
if ((h->ip6_hlim != 1) ||
10379
!IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) {
10380
DPFPRINTF(PF_DEBUG_MISC, "Invalid MLD");
10381
REASON_SET(reason, PFRES_IPOPTIONS);
10382
return (PF_DROP);
10383
}
10384
pd->badopts &= ~PF_OPT_ROUTER_ALERT;
10385
break;
10386
}
10387
return (PF_PASS);
10388
case IPPROTO_TCP:
10389
case IPPROTO_UDP:
10390
case IPPROTO_SCTP:
10391
/* fragments may be short, ignore inner header then */
10392
if (pd->fragoff != 0 && end < pd->off +
10393
(pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) :
10394
pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) :
10395
pd->proto == IPPROTO_SCTP ? sizeof(struct sctphdr) :
10396
sizeof(struct icmp6_hdr))) {
10397
pd->off = pd->fragoff;
10398
pd->proto = IPPROTO_FRAGMENT;
10399
}
10400
/* FALLTHROUGH */
10401
default:
10402
return (PF_PASS);
10403
}
10404
}
10405
DPFPRINTF(PF_DEBUG_MISC, "IPv6 nested extension header limit");
10406
REASON_SET(reason, PFRES_IPOPTIONS);
10407
return (PF_DROP);
10408
}
10409
#endif /* INET6 */
10410
10411
static void
10412
pf_init_pdesc(struct pf_pdesc *pd, struct mbuf *m)
10413
{
10414
memset(pd, 0, sizeof(*pd));
10415
pd->pf_mtag = pf_find_mtag(m);
10416
pd->m = m;
10417
}
10418
10419
static int
10420
pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
10421
u_short *action, u_short *reason, struct pfi_kkif *kif,
10422
struct pf_rule_actions *default_actions)
10423
{
10424
pd->dir = dir;
10425
pd->kif = kif;
10426
pd->m = *m0;
10427
pd->sidx = (dir == PF_IN) ? 0 : 1;
10428
pd->didx = (dir == PF_IN) ? 1 : 0;
10429
pd->af = pd->naf = af;
10430
10431
PF_RULES_ASSERT();
10432
10433
TAILQ_INIT(&pd->sctp_multihome_jobs);
10434
if (default_actions != NULL)
10435
memcpy(&pd->act, default_actions, sizeof(pd->act));
10436
10437
if (pd->pf_mtag && pd->pf_mtag->dnpipe) {
10438
pd->act.dnpipe = pd->pf_mtag->dnpipe;
10439
pd->act.flags = pd->pf_mtag->dnflags;
10440
}
10441
10442
switch (af) {
10443
#ifdef INET
10444
case AF_INET: {
10445
struct ip *h;
10446
10447
if (__predict_false((*m0)->m_len < sizeof(struct ip)) &&
10448
(pd->m = *m0 = m_pullup(*m0, sizeof(struct ip))) == NULL) {
10449
DPFPRINTF(PF_DEBUG_URGENT,
10450
"%s: m_len < sizeof(struct ip), pullup failed",
10451
__func__);
10452
*action = PF_DROP;
10453
REASON_SET(reason, PFRES_SHORT);
10454
return (PF_DROP);
10455
}
10456
10457
h = mtod(pd->m, struct ip *);
10458
if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) {
10459
*action = PF_DROP;
10460
REASON_SET(reason, PFRES_SHORT);
10461
return (PF_DROP);
10462
}
10463
10464
if (pf_normalize_ip(reason, pd) != PF_PASS) {
10465
/* We do IP header normalization and packet reassembly here */
10466
*m0 = pd->m;
10467
*action = PF_DROP;
10468
return (PF_DROP);
10469
}
10470
*m0 = pd->m;
10471
h = mtod(pd->m, struct ip *);
10472
10473
if (pf_walk_header(pd, h, reason) != PF_PASS) {
10474
*action = PF_DROP;
10475
return (PF_DROP);
10476
}
10477
10478
pd->src = (struct pf_addr *)&h->ip_src;
10479
pd->dst = (struct pf_addr *)&h->ip_dst;
10480
pf_addrcpy(&pd->osrc, pd->src, af);
10481
pf_addrcpy(&pd->odst, pd->dst, af);
10482
pd->ip_sum = &h->ip_sum;
10483
pd->tos = h->ip_tos & ~IPTOS_ECN_MASK;
10484
pd->ttl = h->ip_ttl;
10485
pd->tot_len = ntohs(h->ip_len);
10486
pd->act.rtableid = -1;
10487
pd->df = h->ip_off & htons(IP_DF);
10488
pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ?
10489
PF_VPROTO_FRAGMENT : pd->proto;
10490
10491
break;
10492
}
10493
#endif /* INET */
10494
#ifdef INET6
10495
case AF_INET6: {
10496
struct ip6_hdr *h;
10497
10498
if (__predict_false((*m0)->m_len < sizeof(struct ip6_hdr)) &&
10499
(pd->m = *m0 = m_pullup(*m0, sizeof(struct ip6_hdr))) == NULL) {
10500
DPFPRINTF(PF_DEBUG_URGENT,
10501
"%s: m_len < sizeof(struct ip6_hdr)"
10502
", pullup failed", __func__);
10503
*action = PF_DROP;
10504
REASON_SET(reason, PFRES_SHORT);
10505
return (PF_DROP);
10506
}
10507
10508
h = mtod(pd->m, struct ip6_hdr *);
10509
if (pd->m->m_pkthdr.len <
10510
sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) {
10511
*action = PF_DROP;
10512
REASON_SET(reason, PFRES_SHORT);
10513
return (PF_DROP);
10514
}
10515
10516
/*
10517
* we do not support jumbogram. if we keep going, zero ip6_plen
10518
* will do something bad, so drop the packet for now.
10519
*/
10520
if (htons(h->ip6_plen) == 0) {
10521
*action = PF_DROP;
10522
return (PF_DROP);
10523
}
10524
10525
if (pf_walk_header6(pd, h, reason) != PF_PASS) {
10526
*action = PF_DROP;
10527
return (PF_DROP);
10528
}
10529
10530
h = mtod(pd->m, struct ip6_hdr *);
10531
pd->src = (struct pf_addr *)&h->ip6_src;
10532
pd->dst = (struct pf_addr *)&h->ip6_dst;
10533
pf_addrcpy(&pd->osrc, pd->src, af);
10534
pf_addrcpy(&pd->odst, pd->dst, af);
10535
pd->ip_sum = NULL;
10536
pd->tos = IPV6_DSCP(h);
10537
pd->ttl = h->ip6_hlim;
10538
pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10539
pd->act.rtableid = -1;
10540
10541
pd->virtual_proto = (pd->fragoff != 0) ?
10542
PF_VPROTO_FRAGMENT : pd->proto;
10543
10544
/* We do IP header normalization and packet reassembly here */
10545
if (pf_normalize_ip6(pd->fragoff, reason, pd) !=
10546
PF_PASS) {
10547
*m0 = pd->m;
10548
*action = PF_DROP;
10549
return (PF_DROP);
10550
}
10551
*m0 = pd->m;
10552
if (pd->m == NULL) {
10553
/* packet sits in reassembly queue, no error */
10554
*action = PF_PASS;
10555
return (PF_DROP);
10556
}
10557
10558
/* Update pointers into the packet. */
10559
h = mtod(pd->m, struct ip6_hdr *);
10560
pd->src = (struct pf_addr *)&h->ip6_src;
10561
pd->dst = (struct pf_addr *)&h->ip6_dst;
10562
10563
pd->off = 0;
10564
10565
if (pf_walk_header6(pd, h, reason) != PF_PASS) {
10566
*action = PF_DROP;
10567
return (PF_DROP);
10568
}
10569
10570
if (m_tag_find(pd->m, PACKET_TAG_PF_REASSEMBLED, NULL) != NULL) {
10571
/*
10572
* Reassembly may have changed the next protocol from
10573
* fragment to something else, so update.
10574
*/
10575
pd->virtual_proto = pd->proto;
10576
MPASS(pd->fragoff == 0);
10577
}
10578
10579
if (pd->fragoff != 0)
10580
pd->virtual_proto = PF_VPROTO_FRAGMENT;
10581
10582
break;
10583
}
10584
#endif /* INET6 */
10585
default:
10586
panic("pf_setup_pdesc called with illegal af %u", af);
10587
}
10588
10589
switch (pd->virtual_proto) {
10590
case IPPROTO_TCP: {
10591
struct tcphdr *th = &pd->hdr.tcp;
10592
10593
if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th),
10594
reason, af)) {
10595
*action = PF_DROP;
10596
REASON_SET(reason, PFRES_SHORT);
10597
return (PF_DROP);
10598
}
10599
pd->hdrlen = sizeof(*th);
10600
pd->p_len = pd->tot_len - pd->off - (th->th_off << 2);
10601
pd->sport = &th->th_sport;
10602
pd->dport = &th->th_dport;
10603
pd->pcksum = &th->th_sum;
10604
break;
10605
}
10606
case IPPROTO_UDP: {
10607
struct udphdr *uh = &pd->hdr.udp;
10608
10609
if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh),
10610
reason, af)) {
10611
*action = PF_DROP;
10612
REASON_SET(reason, PFRES_SHORT);
10613
return (PF_DROP);
10614
}
10615
pd->hdrlen = sizeof(*uh);
10616
if (uh->uh_dport == 0 ||
10617
ntohs(uh->uh_ulen) > pd->m->m_pkthdr.len - pd->off ||
10618
ntohs(uh->uh_ulen) < sizeof(struct udphdr)) {
10619
*action = PF_DROP;
10620
REASON_SET(reason, PFRES_SHORT);
10621
return (PF_DROP);
10622
}
10623
pd->sport = &uh->uh_sport;
10624
pd->dport = &uh->uh_dport;
10625
pd->pcksum = &uh->uh_sum;
10626
break;
10627
}
10628
case IPPROTO_SCTP: {
10629
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.sctp, sizeof(pd->hdr.sctp),
10630
reason, af)) {
10631
*action = PF_DROP;
10632
REASON_SET(reason, PFRES_SHORT);
10633
return (PF_DROP);
10634
}
10635
pd->hdrlen = sizeof(pd->hdr.sctp);
10636
pd->p_len = pd->tot_len - pd->off;
10637
10638
pd->sport = &pd->hdr.sctp.src_port;
10639
pd->dport = &pd->hdr.sctp.dest_port;
10640
if (pd->hdr.sctp.src_port == 0 || pd->hdr.sctp.dest_port == 0) {
10641
*action = PF_DROP;
10642
REASON_SET(reason, PFRES_SHORT);
10643
return (PF_DROP);
10644
}
10645
10646
/*
10647
* Placeholder. The SCTP checksum is 32-bits, but
10648
* pf_test_state() expects to update a 16-bit checksum.
10649
* Provide a dummy value which we'll subsequently ignore.
10650
* Do this before pf_scan_sctp() so any jobs we enqueue
10651
* have a pcksum set.
10652
*/
10653
pd->pcksum = &pd->sctp_dummy_sum;
10654
10655
if (pf_scan_sctp(pd) != PF_PASS) {
10656
*action = PF_DROP;
10657
REASON_SET(reason, PFRES_SHORT);
10658
return (PF_DROP);
10659
}
10660
break;
10661
}
10662
case IPPROTO_ICMP: {
10663
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN,
10664
reason, af)) {
10665
*action = PF_DROP;
10666
REASON_SET(reason, PFRES_SHORT);
10667
return (PF_DROP);
10668
}
10669
pd->pcksum = &pd->hdr.icmp.icmp_cksum;
10670
pd->hdrlen = ICMP_MINLEN;
10671
break;
10672
}
10673
#ifdef INET6
10674
case IPPROTO_ICMPV6: {
10675
size_t icmp_hlen = sizeof(struct icmp6_hdr);
10676
10677
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
10678
reason, af)) {
10679
*action = PF_DROP;
10680
REASON_SET(reason, PFRES_SHORT);
10681
return (PF_DROP);
10682
}
10683
/* ICMP headers we look further into to match state */
10684
switch (pd->hdr.icmp6.icmp6_type) {
10685
case MLD_LISTENER_QUERY:
10686
case MLD_LISTENER_REPORT:
10687
icmp_hlen = sizeof(struct mld_hdr);
10688
break;
10689
case ND_NEIGHBOR_SOLICIT:
10690
case ND_NEIGHBOR_ADVERT:
10691
icmp_hlen = sizeof(struct nd_neighbor_solicit);
10692
/* FALLTHROUGH */
10693
case ND_ROUTER_SOLICIT:
10694
case ND_ROUTER_ADVERT:
10695
case ND_REDIRECT:
10696
if (pd->ttl != 255) {
10697
REASON_SET(reason, PFRES_NORM);
10698
return (PF_DROP);
10699
}
10700
break;
10701
}
10702
if (icmp_hlen > sizeof(struct icmp6_hdr) &&
10703
!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
10704
reason, af)) {
10705
*action = PF_DROP;
10706
REASON_SET(reason, PFRES_SHORT);
10707
return (PF_DROP);
10708
}
10709
pd->hdrlen = icmp_hlen;
10710
pd->pcksum = &pd->hdr.icmp6.icmp6_cksum;
10711
break;
10712
}
10713
#endif /* INET6 */
10714
default:
10715
/*
10716
* Placeholder value, so future calls to pf_change_ap() don't
10717
* try to update a NULL checksum pointer.
10718
*/
10719
pd->pcksum = &pd->sctp_dummy_sum;
10720
break;
10721
}
10722
10723
if (pd->sport)
10724
pd->osport = pd->nsport = *pd->sport;
10725
if (pd->dport)
10726
pd->odport = pd->ndport = *pd->dport;
10727
10728
MPASS(pd->pcksum != NULL);
10729
10730
return (PF_PASS);
10731
}
10732
10733
static __inline void
10734
pf_rule_counters_inc(struct pf_pdesc *pd, struct pf_krule *r, int dir_out,
10735
int op_pass, sa_family_t af, struct pf_addr *src_host,
10736
struct pf_addr *dst_host)
10737
{
10738
pf_counter_u64_add_protected(&(r->packets[dir_out]), 1);
10739
pf_counter_u64_add_protected(&(r->bytes[dir_out]), pd->tot_len);
10740
pf_update_timestamp(r);
10741
10742
if (r->src.addr.type == PF_ADDR_TABLE)
10743
pfr_update_stats(r->src.addr.p.tbl, src_host, af,
10744
pd->tot_len, dir_out, op_pass, r->src.neg);
10745
if (r->dst.addr.type == PF_ADDR_TABLE)
10746
pfr_update_stats(r->dst.addr.p.tbl, dst_host, af,
10747
pd->tot_len, dir_out, op_pass, r->dst.neg);
10748
}
10749
10750
static void
10751
pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_kstate *s,
10752
struct pf_krule *r, struct pf_krule *a, struct pf_krule_slist *match_rules)
10753
{
10754
struct pf_krule_slist *mr = match_rules;
10755
struct pf_krule_item *ri;
10756
struct pf_krule *nr = NULL;
10757
struct pf_addr *src_host = pd->src;
10758
struct pf_addr *dst_host = pd->dst;
10759
struct pf_state_key *key;
10760
int dir_out = (pd->dir == PF_OUT);
10761
int op_r_pass = (r->action == PF_PASS);
10762
int op_pass = (action == PF_PASS || action == PF_AFRT);
10763
int s_dir_in, s_dir_out, s_dir_rev;
10764
sa_family_t af = pd->af;
10765
10766
pf_counter_u64_critical_enter();
10767
10768
/*
10769
* Set AF for interface counters, it will be later overwritten for
10770
* rule and state counters with value from proper state key.
10771
*/
10772
if (action == PF_AFRT) {
10773
MPASS(s != NULL);
10774
if (s->direction == PF_OUT && dir_out)
10775
af = pd->naf;
10776
}
10777
10778
pf_counter_u64_add_protected(
10779
&pd->kif->pfik_bytes[af == AF_INET6][dir_out][!op_pass],
10780
pd->tot_len);
10781
pf_counter_u64_add_protected(
10782
&pd->kif->pfik_packets[af == AF_INET6][dir_out][!op_pass],
10783
1);
10784
10785
/* If the rule has failed to apply, don't increase its counters */
10786
if (!(op_pass || r->action == PF_DROP)) {
10787
pf_counter_u64_critical_exit();
10788
return;
10789
}
10790
10791
if (s != NULL) {
10792
PF_STATE_LOCK_ASSERT(s);
10793
mr = &(s->match_rules);
10794
10795
/*
10796
* For af-to on the inbound direction we can determine
10797
* the direction of passing packet only by checking direction
10798
* of AF translation. The af-to in "in" direction covers both
10799
* the inbound and the outbound side of state tracking,
10800
* so pd->dir is always PF_IN. We set dir_out and s_dir_rev
10801
* in a way to count packets as if the state was outbound,
10802
* because pfctl -ss shows the state with "->", as if it was
10803
* oubound.
10804
*/
10805
if (action == PF_AFRT && s->direction == PF_IN) {
10806
dir_out = (pd->naf == s->rule->naf);
10807
s_dir_in = 1;
10808
s_dir_out = 0;
10809
s_dir_rev = (pd->naf == s->rule->af);
10810
} else {
10811
dir_out = (pd->dir == PF_OUT);
10812
s_dir_in = (s->direction == PF_IN);
10813
s_dir_out = (s->direction == PF_OUT);
10814
s_dir_rev = (pd->dir != s->direction);
10815
}
10816
10817
/* pd->tot_len is a problematic with af-to rules. Sure, we can
10818
* agree that it's the post-af-to packet length that was
10819
* forwarded through a state, but what about tables which match
10820
* on pre-af-to addresses? We don't have access the the original
10821
* packet length anymore.
10822
*/
10823
s->packets[s_dir_rev]++;
10824
s->bytes[s_dir_rev] += pd->tot_len;
10825
10826
/*
10827
* Source nodes are accessed unlocked here. But since we are
10828
* operating with stateful tracking and the state is locked,
10829
* those SNs could not have been freed.
10830
*/
10831
for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) {
10832
if (s->sns[sn_type] != NULL) {
10833
counter_u64_add(
10834
s->sns[sn_type]->packets[dir_out],
10835
1);
10836
counter_u64_add(
10837
s->sns[sn_type]->bytes[dir_out],
10838
pd->tot_len);
10839
}
10840
}
10841
10842
/* Start with pre-NAT addresses */
10843
key = s->key[(s->direction == PF_OUT)];
10844
src_host = &(key->addr[s_dir_out]);
10845
dst_host = &(key->addr[s_dir_in]);
10846
af = key->af;
10847
if (s->nat_rule) {
10848
/* Old-style NAT rules */
10849
if (s->nat_rule->action == PF_NAT ||
10850
s->nat_rule->action == PF_RDR ||
10851
s->nat_rule->action == PF_BINAT) {
10852
nr = s->nat_rule;
10853
pf_rule_counters_inc(pd, s->nat_rule, dir_out,
10854
op_r_pass, af, src_host, dst_host);
10855
/* Use post-NAT addresses from now on */
10856
key = s->key[s_dir_in];
10857
src_host = &(key->addr[s_dir_out]);
10858
dst_host = &(key->addr[s_dir_in]);
10859
af = key->af;
10860
}
10861
}
10862
}
10863
10864
SLIST_FOREACH(ri, mr, entry) {
10865
pf_rule_counters_inc(pd, ri->r, dir_out, op_r_pass, af,
10866
src_host, dst_host);
10867
if (s && s->nat_rule == ri->r) {
10868
/* Use post-NAT addresses after a match NAT rule */
10869
key = s->key[s_dir_in];
10870
src_host = &(key->addr[s_dir_out]);
10871
dst_host = &(key->addr[s_dir_in]);
10872
af = key->af;
10873
}
10874
}
10875
10876
if (s == NULL) {
10877
pf_free_match_rules(mr);
10878
}
10879
10880
if (a != NULL) {
10881
pf_rule_counters_inc(pd, a, dir_out, op_r_pass, af,
10882
src_host, dst_host);
10883
}
10884
10885
if (r != nr) {
10886
pf_rule_counters_inc(pd, r, dir_out, op_r_pass, af,
10887
src_host, dst_host);
10888
}
10889
10890
pf_counter_u64_critical_exit();
10891
}
10892
10893
static void
10894
pf_log_matches(struct pf_pdesc *pd, struct pf_krule *rm,
10895
struct pf_krule *am, struct pf_kruleset *ruleset,
10896
struct pf_krule_slist *match_rules)
10897
{
10898
struct pf_krule_item *ri;
10899
10900
/* if this is the log(matches) rule, packet has been logged already */
10901
if (rm->log & PF_LOG_MATCHES)
10902
return;
10903
10904
SLIST_FOREACH(ri, match_rules, entry)
10905
if (ri->r->log & PF_LOG_MATCHES)
10906
PFLOG_PACKET(rm->action, PFRES_MATCH, rm, am,
10907
ruleset, pd, 1, ri->r);
10908
}
10909
10910
#if defined(INET) || defined(INET6)
10911
int
10912
pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
10913
struct inpcb *inp, struct pf_rule_actions *default_actions)
10914
{
10915
struct pfi_kkif *kif;
10916
u_short action, reason = 0;
10917
struct m_tag *mtag;
10918
struct pf_krule *a = NULL, *r = &V_pf_default_rule;
10919
struct pf_kstate *s = NULL;
10920
struct pf_kruleset *ruleset = NULL;
10921
struct pf_krule_item *ri;
10922
struct pf_krule_slist match_rules;
10923
struct pf_pdesc pd;
10924
int use_2nd_queue = 0;
10925
uint16_t tag;
10926
10927
PF_RULES_RLOCK_TRACKER;
10928
KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir));
10929
M_ASSERTPKTHDR(*m0);
10930
NET_EPOCH_ASSERT();
10931
10932
if (!V_pf_status.running)
10933
return (PF_PASS);
10934
10935
kif = (struct pfi_kkif *)ifp->if_pf_kif;
10936
10937
if (__predict_false(kif == NULL)) {
10938
DPFPRINTF(PF_DEBUG_URGENT,
10939
"%s: kif == NULL, if_xname %s",
10940
__func__, ifp->if_xname);
10941
return (PF_DROP);
10942
}
10943
if (kif->pfik_flags & PFI_IFLAG_SKIP) {
10944
return (PF_PASS);
10945
}
10946
10947
if ((*m0)->m_flags & M_SKIP_FIREWALL) {
10948
return (PF_PASS);
10949
}
10950
10951
if (__predict_false(! M_WRITABLE(*m0))) {
10952
*m0 = m_unshare(*m0, M_NOWAIT);
10953
if (*m0 == NULL) {
10954
return (PF_DROP);
10955
}
10956
}
10957
10958
pf_init_pdesc(&pd, *m0);
10959
SLIST_INIT(&match_rules);
10960
10961
if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) {
10962
pd.pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO;
10963
10964
ifp = ifnet_byindexgen(pd.pf_mtag->if_index,
10965
pd.pf_mtag->if_idxgen);
10966
if (ifp == NULL || ifp->if_flags & IFF_DYING) {
10967
m_freem(*m0);
10968
*m0 = NULL;
10969
return (PF_PASS);
10970
}
10971
(ifp->if_output)(ifp, *m0, sintosa(&pd.pf_mtag->dst), NULL);
10972
*m0 = NULL;
10973
return (PF_PASS);
10974
}
10975
10976
if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL &&
10977
pd.pf_mtag->flags & PF_MTAG_FLAG_DUMMYNET) {
10978
/* Dummynet re-injects packets after they've
10979
* completed their delay. We've already
10980
* processed them, so pass unconditionally. */
10981
10982
/* But only once. We may see the packet multiple times (e.g.
10983
* PFIL_IN/PFIL_OUT). */
10984
pf_dummynet_flag_remove(pd.m, pd.pf_mtag);
10985
10986
return (PF_PASS);
10987
}
10988
10989
PF_RULES_RLOCK();
10990
10991
if (pf_setup_pdesc(af, dir, &pd, m0, &action, &reason,
10992
kif, default_actions) != PF_PASS) {
10993
if (action != PF_PASS)
10994
pd.act.log |= PF_LOG_FORCE;
10995
goto done;
10996
}
10997
10998
#ifdef INET
10999
if (af == AF_INET && dir == PF_OUT && pflags & PFIL_FWD &&
11000
pd.df && (*m0)->m_pkthdr.len > ifp->if_mtu) {
11001
PF_RULES_RUNLOCK();
11002
icmp_error(*m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
11003
0, ifp->if_mtu);
11004
*m0 = NULL;
11005
return (PF_DROP);
11006
}
11007
#endif /* INET */
11008
#ifdef INET6
11009
/*
11010
* If we end up changing IP addresses (e.g. binat) the stack may get
11011
* confused and fail to send the icmp6 packet too big error. Just send
11012
* it here, before we do any NAT.
11013
*/
11014
if (af == AF_INET6 && dir == PF_OUT && pflags & PFIL_FWD &&
11015
IN6_LINKMTU(ifp) < pf_max_frag_size(*m0)) {
11016
PF_RULES_RUNLOCK();
11017
icmp6_error(*m0, ICMP6_PACKET_TOO_BIG, 0, IN6_LINKMTU(ifp));
11018
*m0 = NULL;
11019
return (PF_DROP);
11020
}
11021
#endif /* INET6 */
11022
11023
if (__predict_false(ip_divert_ptr != NULL) &&
11024
((mtag = m_tag_locate(pd.m, MTAG_PF_DIVERT, 0, NULL)) != NULL)) {
11025
struct pf_divert_mtag *dt = (struct pf_divert_mtag *)(mtag+1);
11026
if ((dt->idir == PF_DIVERT_MTAG_DIR_IN && dir == PF_IN) ||
11027
(dt->idir == PF_DIVERT_MTAG_DIR_OUT && dir == PF_OUT)) {
11028
if (pd.pf_mtag == NULL &&
11029
((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) {
11030
action = PF_DROP;
11031
goto done;
11032
}
11033
pd.pf_mtag->flags |= PF_MTAG_FLAG_PACKET_LOOPED;
11034
}
11035
if (pd.pf_mtag && pd.pf_mtag->flags & PF_MTAG_FLAG_FASTFWD_OURS_PRESENT) {
11036
pd.m->m_flags |= M_FASTFWD_OURS;
11037
pd.pf_mtag->flags &= ~PF_MTAG_FLAG_FASTFWD_OURS_PRESENT;
11038
}
11039
m_tag_delete(pd.m, mtag);
11040
11041
mtag = m_tag_locate(pd.m, MTAG_IPFW_RULE, 0, NULL);
11042
if (mtag != NULL)
11043
m_tag_delete(pd.m, mtag);
11044
}
11045
11046
switch (pd.virtual_proto) {
11047
case PF_VPROTO_FRAGMENT:
11048
/*
11049
* handle fragments that aren't reassembled by
11050
* normalization
11051
*/
11052
if (kif == NULL || r == NULL) /* pflog */
11053
action = PF_DROP;
11054
else
11055
action = pf_test_rule(&r, &s, &pd, &a,
11056
&ruleset, &reason, inp, &match_rules);
11057
if (action != PF_PASS)
11058
REASON_SET(&reason, PFRES_FRAG);
11059
break;
11060
11061
case IPPROTO_TCP: {
11062
/* Respond to SYN with a syncookie. */
11063
if ((tcp_get_flags(&pd.hdr.tcp) & (TH_SYN|TH_ACK|TH_RST)) == TH_SYN &&
11064
pd.dir == PF_IN && pf_synflood_check(&pd)) {
11065
pf_syncookie_send(&pd, &reason);
11066
action = PF_DROP;
11067
break;
11068
}
11069
11070
if ((tcp_get_flags(&pd.hdr.tcp) & TH_ACK) && pd.p_len == 0)
11071
use_2nd_queue = 1;
11072
action = pf_normalize_tcp(&pd);
11073
if (action == PF_DROP)
11074
break;
11075
action = pf_test_state(&s, &pd, &reason);
11076
if (action == PF_PASS || action == PF_AFRT) {
11077
if (V_pfsync_update_state_ptr != NULL)
11078
V_pfsync_update_state_ptr(s);
11079
r = s->rule;
11080
a = s->anchor;
11081
} else if (s == NULL) {
11082
/* Validate remote SYN|ACK, re-create original SYN if
11083
* valid. */
11084
if ((tcp_get_flags(&pd.hdr.tcp) & (TH_SYN|TH_ACK|TH_RST)) ==
11085
TH_ACK && pf_syncookie_validate(&pd) &&
11086
pd.dir == PF_IN) {
11087
struct mbuf *msyn;
11088
11089
msyn = pf_syncookie_recreate_syn(&pd, &reason);
11090
if (msyn == NULL) {
11091
action = PF_DROP;
11092
break;
11093
}
11094
11095
action = pf_test(af, dir, pflags, ifp, &msyn, inp,
11096
&pd.act);
11097
m_freem(msyn);
11098
if (action != PF_PASS)
11099
break;
11100
11101
action = pf_test_state(&s, &pd, &reason);
11102
if (action != PF_PASS || s == NULL) {
11103
action = PF_DROP;
11104
break;
11105
}
11106
11107
s->src.seqhi = ntohl(pd.hdr.tcp.th_ack) - 1;
11108
s->src.seqlo = ntohl(pd.hdr.tcp.th_seq) - 1;
11109
pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_DST);
11110
action = pf_synproxy(&pd, s, &reason);
11111
break;
11112
} else {
11113
action = pf_test_rule(&r, &s, &pd,
11114
&a, &ruleset, &reason, inp, &match_rules);
11115
}
11116
}
11117
break;
11118
}
11119
11120
case IPPROTO_SCTP:
11121
action = pf_normalize_sctp(&pd);
11122
if (action == PF_DROP)
11123
break;
11124
/* fallthrough */
11125
case IPPROTO_UDP:
11126
default:
11127
action = pf_test_state(&s, &pd, &reason);
11128
if (action == PF_PASS || action == PF_AFRT) {
11129
if (V_pfsync_update_state_ptr != NULL)
11130
V_pfsync_update_state_ptr(s);
11131
r = s->rule;
11132
a = s->anchor;
11133
} else if (s == NULL) {
11134
action = pf_test_rule(&r, &s,
11135
&pd, &a, &ruleset, &reason, inp, &match_rules);
11136
}
11137
break;
11138
11139
case IPPROTO_ICMP:
11140
case IPPROTO_ICMPV6: {
11141
if (pd.virtual_proto == IPPROTO_ICMP && af != AF_INET) {
11142
action = PF_DROP;
11143
REASON_SET(&reason, PFRES_NORM);
11144
DPFPRINTF(PF_DEBUG_MISC,
11145
"dropping IPv6 packet with ICMPv4 payload");
11146
break;
11147
}
11148
if (pd.virtual_proto == IPPROTO_ICMPV6 && af != AF_INET6) {
11149
action = PF_DROP;
11150
REASON_SET(&reason, PFRES_NORM);
11151
DPFPRINTF(PF_DEBUG_MISC,
11152
"pf: dropping IPv4 packet with ICMPv6 payload");
11153
break;
11154
}
11155
action = pf_test_state_icmp(&s, &pd, &reason);
11156
if (action == PF_PASS || action == PF_AFRT) {
11157
if (V_pfsync_update_state_ptr != NULL)
11158
V_pfsync_update_state_ptr(s);
11159
r = s->rule;
11160
a = s->anchor;
11161
} else if (s == NULL)
11162
action = pf_test_rule(&r, &s, &pd,
11163
&a, &ruleset, &reason, inp, &match_rules);
11164
break;
11165
}
11166
11167
}
11168
11169
done:
11170
PF_RULES_RUNLOCK();
11171
11172
/* if packet sits in reassembly queue, return without error */
11173
if (pd.m == NULL) {
11174
pf_free_match_rules(&match_rules);
11175
goto eat_pkt;
11176
}
11177
11178
if (s)
11179
memcpy(&pd.act, &s->act, sizeof(s->act));
11180
11181
if (action == PF_PASS && pd.badopts != 0 && !pd.act.allow_opts) {
11182
action = PF_DROP;
11183
REASON_SET(&reason, PFRES_IPOPTIONS);
11184
pd.act.log = PF_LOG_FORCE;
11185
DPFPRINTF(PF_DEBUG_MISC,
11186
"pf: dropping packet with dangerous headers");
11187
}
11188
11189
if (pd.act.max_pkt_size && pd.act.max_pkt_size &&
11190
pd.tot_len > pd.act.max_pkt_size) {
11191
action = PF_DROP;
11192
REASON_SET(&reason, PFRES_NORM);
11193
pd.act.log = PF_LOG_FORCE;
11194
DPFPRINTF(PF_DEBUG_MISC,
11195
"pf: dropping overly long packet");
11196
}
11197
11198
if (s) {
11199
uint8_t log = pd.act.log;
11200
memcpy(&pd.act, &s->act, sizeof(struct pf_rule_actions));
11201
pd.act.log |= log;
11202
tag = s->tag;
11203
} else {
11204
tag = r->tag;
11205
}
11206
11207
if (tag > 0 && pf_tag_packet(&pd, tag)) {
11208
action = PF_DROP;
11209
REASON_SET(&reason, PFRES_MEMORY);
11210
}
11211
11212
pf_scrub(&pd);
11213
if (pd.proto == IPPROTO_TCP && pd.act.max_mss)
11214
pf_normalize_mss(&pd);
11215
11216
if (pd.act.rtableid >= 0)
11217
M_SETFIB(pd.m, pd.act.rtableid);
11218
11219
if (pd.act.flags & PFSTATE_SETPRIO) {
11220
if (pd.tos & IPTOS_LOWDELAY)
11221
use_2nd_queue = 1;
11222
if (vlan_set_pcp(pd.m, pd.act.set_prio[use_2nd_queue])) {
11223
action = PF_DROP;
11224
REASON_SET(&reason, PFRES_MEMORY);
11225
pd.act.log = PF_LOG_FORCE;
11226
DPFPRINTF(PF_DEBUG_MISC,
11227
"pf: failed to allocate 802.1q mtag");
11228
}
11229
}
11230
11231
#ifdef ALTQ
11232
if (action == PF_PASS && pd.act.qid) {
11233
if (pd.pf_mtag == NULL &&
11234
((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) {
11235
action = PF_DROP;
11236
REASON_SET(&reason, PFRES_MEMORY);
11237
} else {
11238
if (s != NULL)
11239
pd.pf_mtag->qid_hash = pf_state_hash(s);
11240
if (use_2nd_queue || (pd.tos & IPTOS_LOWDELAY))
11241
pd.pf_mtag->qid = pd.act.pqid;
11242
else
11243
pd.pf_mtag->qid = pd.act.qid;
11244
/* Add hints for ecn. */
11245
pd.pf_mtag->hdr = mtod(pd.m, void *);
11246
}
11247
}
11248
#endif /* ALTQ */
11249
11250
/*
11251
* connections redirected to loopback should not match sockets
11252
* bound specifically to loopback due to security implications,
11253
* see tcp_input() and in_pcblookup_listen().
11254
*/
11255
if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
11256
pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule != NULL &&
11257
(s->nat_rule->action == PF_RDR ||
11258
s->nat_rule->action == PF_BINAT) &&
11259
pf_is_loopback(af, pd.dst))
11260
pd.m->m_flags |= M_SKIP_FIREWALL;
11261
11262
if (af == AF_INET && __predict_false(ip_divert_ptr != NULL) &&
11263
action == PF_PASS && r->divert.port && !PACKET_LOOPED(&pd)) {
11264
mtag = m_tag_alloc(MTAG_PF_DIVERT, 0,
11265
sizeof(struct pf_divert_mtag), M_NOWAIT | M_ZERO);
11266
if (mtag != NULL) {
11267
((struct pf_divert_mtag *)(mtag+1))->port =
11268
ntohs(r->divert.port);
11269
((struct pf_divert_mtag *)(mtag+1))->idir =
11270
(dir == PF_IN) ? PF_DIVERT_MTAG_DIR_IN :
11271
PF_DIVERT_MTAG_DIR_OUT;
11272
11273
pf_counters_inc(action, &pd, s, r, a, &match_rules);
11274
11275
if (s)
11276
PF_STATE_UNLOCK(s);
11277
11278
m_tag_prepend(pd.m, mtag);
11279
if (pd.m->m_flags & M_FASTFWD_OURS) {
11280
if (pd.pf_mtag == NULL &&
11281
((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) {
11282
action = PF_DROP;
11283
REASON_SET(&reason, PFRES_MEMORY);
11284
pd.act.log = PF_LOG_FORCE;
11285
DPFPRINTF(PF_DEBUG_MISC,
11286
"pf: failed to allocate tag");
11287
} else {
11288
pd.pf_mtag->flags |=
11289
PF_MTAG_FLAG_FASTFWD_OURS_PRESENT;
11290
pd.m->m_flags &= ~M_FASTFWD_OURS;
11291
}
11292
}
11293
ip_divert_ptr(*m0, dir == PF_IN);
11294
*m0 = NULL;
11295
11296
return (action);
11297
} else {
11298
/* XXX: ipfw has the same behaviour! */
11299
action = PF_DROP;
11300
REASON_SET(&reason, PFRES_MEMORY);
11301
pd.act.log = PF_LOG_FORCE;
11302
DPFPRINTF(PF_DEBUG_MISC,
11303
"pf: failed to allocate divert tag");
11304
}
11305
}
11306
/* XXX: Anybody working on it?! */
11307
if (af == AF_INET6 && r->divert.port)
11308
printf("pf: divert(9) is not supported for IPv6\n");
11309
11310
/* this flag will need revising if the pkt is forwarded */
11311
if (pd.pf_mtag)
11312
pd.pf_mtag->flags &= ~PF_MTAG_FLAG_PACKET_LOOPED;
11313
11314
if (pd.act.log) {
11315
struct pf_krule *lr;
11316
11317
if (s != NULL && s->nat_rule != NULL &&
11318
s->nat_rule->log & PF_LOG_ALL)
11319
lr = s->nat_rule;
11320
else
11321
lr = r;
11322
11323
if (pd.act.log & PF_LOG_FORCE || lr->log & PF_LOG_ALL)
11324
PFLOG_PACKET(action, reason, lr, a,
11325
ruleset, &pd, (s == NULL), NULL);
11326
if (s) {
11327
SLIST_FOREACH(ri, &s->match_rules, entry)
11328
if (ri->r->log & PF_LOG_ALL)
11329
PFLOG_PACKET(action,
11330
reason, ri->r, a, ruleset, &pd, 0, NULL);
11331
}
11332
}
11333
11334
pf_counters_inc(action, &pd, s, r, a, &match_rules);
11335
11336
switch (action) {
11337
case PF_SYNPROXY_DROP:
11338
m_freem(*m0);
11339
case PF_DEFER:
11340
*m0 = NULL;
11341
action = PF_PASS;
11342
break;
11343
case PF_DROP:
11344
m_freem(*m0);
11345
*m0 = NULL;
11346
break;
11347
case PF_AFRT:
11348
if (pf_translate_af(&pd)) {
11349
*m0 = pd.m;
11350
action = PF_DROP;
11351
break;
11352
}
11353
#ifdef INET
11354
if (pd.naf == AF_INET) {
11355
action = pf_route(r, kif->pfik_ifp, s, &pd,
11356
inp);
11357
}
11358
#endif /* INET */
11359
#ifdef INET6
11360
if (pd.naf == AF_INET6) {
11361
action = pf_route6(r, kif->pfik_ifp, s, &pd,
11362
inp);
11363
}
11364
#endif /* INET6 */
11365
*m0 = pd.m;
11366
goto out;
11367
break;
11368
default:
11369
if (pd.act.rt) {
11370
switch (af) {
11371
#ifdef INET
11372
case AF_INET:
11373
/* pf_route() returns unlocked. */
11374
action = pf_route(r, kif->pfik_ifp, s, &pd,
11375
inp);
11376
break;
11377
#endif /* INET */
11378
#ifdef INET6
11379
case AF_INET6:
11380
/* pf_route6() returns unlocked. */
11381
action = pf_route6(r, kif->pfik_ifp, s, &pd,
11382
inp);
11383
break;
11384
#endif /* INET6 */
11385
}
11386
*m0 = pd.m;
11387
goto out;
11388
}
11389
if (pf_dummynet(&pd, s, r, m0) != 0) {
11390
action = PF_DROP;
11391
REASON_SET(&reason, PFRES_MEMORY);
11392
}
11393
break;
11394
}
11395
11396
eat_pkt:
11397
SDT_PROBE4(pf, ip, test, done, action, reason, r, s);
11398
11399
if (s && action != PF_DROP) {
11400
if (!s->if_index_in && dir == PF_IN)
11401
s->if_index_in = ifp->if_index;
11402
else if (!s->if_index_out && dir == PF_OUT)
11403
s->if_index_out = ifp->if_index;
11404
}
11405
11406
if (s)
11407
PF_STATE_UNLOCK(s);
11408
11409
out:
11410
#ifdef INET6
11411
/* If reassembled packet passed, create new fragments. */
11412
if (af == AF_INET6 && action == PF_PASS && *m0 && dir == PF_OUT &&
11413
(! (pflags & PF_PFIL_NOREFRAGMENT)) &&
11414
(mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL)) != NULL)
11415
action = pf_refragment6(ifp, m0, mtag, NULL, pflags & PFIL_FWD);
11416
#endif /* INET6 */
11417
11418
pf_sctp_multihome_delayed(&pd, kif, s, action);
11419
11420
return (action);
11421
}
11422
#endif /* INET || INET6 */
11423
11424