Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netpfil/pf/pf.c
105687 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2001 Daniel Hartmeier
5
* Copyright (c) 2002 - 2008 Henning Brauer
6
* Copyright (c) 2012 Gleb Smirnoff <[email protected]>
7
* All rights reserved.
8
*
9
* Redistribution and use in source and binary forms, with or without
10
* modification, are permitted provided that the following conditions
11
* are met:
12
*
13
* - Redistributions of source code must retain the above copyright
14
* notice, this list of conditions and the following disclaimer.
15
* - Redistributions in binary form must reproduce the above
16
* copyright notice, this list of conditions and the following
17
* disclaimer in the documentation and/or other materials provided
18
* with the distribution.
19
*
20
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31
* POSSIBILITY OF SUCH DAMAGE.
32
*
33
* Effort sponsored in part by the Defense Advanced Research Projects
34
* Agency (DARPA) and Air Force Research Laboratory, Air Force
35
* Materiel Command, USAF, under agreement number F30602-01-2-0537.
36
*
37
* $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
38
*/
39
40
#include <sys/cdefs.h>
41
#include "opt_bpf.h"
42
#include "opt_inet.h"
43
#include "opt_inet6.h"
44
#include "opt_pf.h"
45
#include "opt_sctp.h"
46
47
#include <sys/param.h>
48
#include <sys/bus.h>
49
#include <sys/endian.h>
50
#include <sys/gsb_crc32.h>
51
#include <sys/hash.h>
52
#include <sys/interrupt.h>
53
#include <sys/kernel.h>
54
#include <sys/kthread.h>
55
#include <sys/limits.h>
56
#include <sys/mbuf.h>
57
#include <sys/random.h>
58
#include <sys/refcount.h>
59
#include <sys/sdt.h>
60
#include <sys/socket.h>
61
#include <sys/sysctl.h>
62
#include <sys/taskqueue.h>
63
#include <sys/ucred.h>
64
65
#include <crypto/sha2/sha512.h>
66
67
#include <net/if.h>
68
#include <net/if_var.h>
69
#include <net/if_private.h>
70
#include <net/if_types.h>
71
#include <net/if_vlan_var.h>
72
#include <net/route.h>
73
#include <net/route/nhop.h>
74
#include <net/vnet.h>
75
76
#include <net/pfil.h>
77
#include <net/pfvar.h>
78
#include <net/if_pflog.h>
79
#include <net/if_pfsync.h>
80
81
#include <netinet/in_pcb.h>
82
#include <netinet/in_var.h>
83
#include <netinet/in_fib.h>
84
#include <netinet/ip.h>
85
#include <netinet/ip_fw.h>
86
#include <netinet/ip_icmp.h>
87
#include <netinet/icmp_var.h>
88
#include <netinet/ip_var.h>
89
#include <netinet/tcp.h>
90
#include <netinet/tcp_fsm.h>
91
#include <netinet/tcp_seq.h>
92
#include <netinet/tcp_timer.h>
93
#include <netinet/tcp_var.h>
94
#include <netinet/udp.h>
95
#include <netinet/udp_var.h>
96
97
/* dummynet */
98
#include <netinet/ip_dummynet.h>
99
#include <netinet/ip_fw.h>
100
#include <netpfil/ipfw/dn_heap.h>
101
#include <netpfil/ipfw/ip_fw_private.h>
102
#include <netpfil/ipfw/ip_dn_private.h>
103
104
#ifdef INET6
105
#include <netinet/ip6.h>
106
#include <netinet/icmp6.h>
107
#include <netinet6/nd6.h>
108
#include <netinet6/ip6_var.h>
109
#include <netinet6/in6_pcb.h>
110
#include <netinet6/in6_fib.h>
111
#include <netinet6/scope6_var.h>
112
#endif /* INET6 */
113
114
#include <netinet/sctp_header.h>
115
#include <netinet/sctp_crc32.h>
116
117
#include <netipsec/ah.h>
118
119
#include <machine/in_cksum.h>
120
#include <security/mac/mac_framework.h>
121
122
SDT_PROVIDER_DEFINE(pf);
123
SDT_PROBE_DEFINE2(pf, , test, reason_set, "int", "int");
124
SDT_PROBE_DEFINE4(pf, ip, test, done, "int", "int", "struct pf_krule *",
125
"struct pf_kstate *");
126
SDT_PROBE_DEFINE5(pf, ip, state, lookup, "struct pfi_kkif *",
127
"struct pf_state_key_cmp *", "int", "struct pf_pdesc *",
128
"struct pf_kstate *");
129
SDT_PROBE_DEFINE2(pf, ip, , bound_iface, "struct pf_kstate *",
130
"struct pfi_kkif *");
131
SDT_PROBE_DEFINE4(pf, ip, route_to, entry, "struct mbuf *",
132
"struct pf_pdesc *", "struct pf_kstate *", "struct ifnet *");
133
SDT_PROBE_DEFINE1(pf, ip, route_to, drop, "int");
134
SDT_PROBE_DEFINE2(pf, ip, route_to, output, "struct ifnet *", "int");
135
SDT_PROBE_DEFINE4(pf, ip6, route_to, entry, "struct mbuf *",
136
"struct pf_pdesc *", "struct pf_kstate *", "struct ifnet *");
137
SDT_PROBE_DEFINE1(pf, ip6, route_to, drop, "int");
138
SDT_PROBE_DEFINE2(pf, ip6, route_to, output, "struct ifnet *", "int");
139
SDT_PROBE_DEFINE4(pf, sctp, multihome, test, "struct pfi_kkif *",
140
"struct pf_krule *", "struct mbuf *", "int");
141
SDT_PROBE_DEFINE2(pf, sctp, multihome, add, "uint32_t",
142
"struct pf_sctp_source *");
143
SDT_PROBE_DEFINE3(pf, sctp, multihome, remove, "uint32_t",
144
"struct pf_kstate *", "struct pf_sctp_source *");
145
SDT_PROBE_DEFINE4(pf, sctp, multihome_scan, entry, "int",
146
"int", "struct pf_pdesc *", "int");
147
SDT_PROBE_DEFINE2(pf, sctp, multihome_scan, param, "uint16_t", "uint16_t");
148
SDT_PROBE_DEFINE2(pf, sctp, multihome_scan, ipv4, "struct in_addr *",
149
"int");
150
SDT_PROBE_DEFINE2(pf, sctp, multihome_scan, ipv6, "struct in_addr6 *",
151
"int");
152
153
SDT_PROBE_DEFINE3(pf, eth, test_rule, entry, "int", "struct ifnet *",
154
"struct mbuf *");
155
SDT_PROBE_DEFINE2(pf, eth, test_rule, test, "int", "struct pf_keth_rule *");
156
SDT_PROBE_DEFINE3(pf, eth, test_rule, mismatch,
157
"int", "struct pf_keth_rule *", "char *");
158
SDT_PROBE_DEFINE2(pf, eth, test_rule, match, "int", "struct pf_keth_rule *");
159
SDT_PROBE_DEFINE2(pf, eth, test_rule, final_match,
160
"int", "struct pf_keth_rule *");
161
SDT_PROBE_DEFINE2(pf, purge, state, rowcount, "int", "size_t");
162
SDT_PROBE_DEFINE2(pf, , log, log, "int", "const char *");
163
164
/*
165
* Global variables
166
*/
167
168
/* state tables */
169
VNET_DEFINE(struct pf_altqqueue, pf_altqs[4]);
170
VNET_DEFINE(struct pf_kpalist, pf_pabuf[3]);
171
VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active);
172
VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_active);
173
VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive);
174
VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_inactive);
175
VNET_DEFINE(struct pf_kstatus, pf_status);
176
177
VNET_DEFINE(u_int32_t, ticket_altqs_active);
178
VNET_DEFINE(u_int32_t, ticket_altqs_inactive);
179
VNET_DEFINE(int, altqs_inactive_open);
180
VNET_DEFINE(u_int32_t, ticket_pabuf);
181
182
static const int PF_HDR_LIMIT = 20; /* arbitrary limit */
183
184
VNET_DEFINE(SHA512_CTX, pf_tcp_secret_ctx);
185
#define V_pf_tcp_secret_ctx VNET(pf_tcp_secret_ctx)
186
VNET_DEFINE(u_char, pf_tcp_secret[16]);
187
#define V_pf_tcp_secret VNET(pf_tcp_secret)
188
VNET_DEFINE(int, pf_tcp_secret_init);
189
#define V_pf_tcp_secret_init VNET(pf_tcp_secret_init)
190
VNET_DEFINE(int, pf_tcp_iss_off);
191
#define V_pf_tcp_iss_off VNET(pf_tcp_iss_off)
192
VNET_DECLARE(int, pf_vnet_active);
193
#define V_pf_vnet_active VNET(pf_vnet_active)
194
195
VNET_DEFINE_STATIC(uint32_t, pf_purge_idx);
196
#define V_pf_purge_idx VNET(pf_purge_idx)
197
198
#ifdef PF_WANT_32_TO_64_COUNTER
199
VNET_DEFINE_STATIC(uint32_t, pf_counter_periodic_iter);
200
#define V_pf_counter_periodic_iter VNET(pf_counter_periodic_iter)
201
202
VNET_DEFINE(struct allrulelist_head, pf_allrulelist);
203
VNET_DEFINE(size_t, pf_allrulecount);
204
VNET_DEFINE(struct pf_krule *, pf_rulemarker);
205
#endif
206
207
#define PF_SCTP_MAX_ENDPOINTS 8
208
209
struct pf_sctp_endpoint;
210
RB_HEAD(pf_sctp_endpoints, pf_sctp_endpoint);
211
struct pf_sctp_source {
212
sa_family_t af;
213
struct pf_addr addr;
214
TAILQ_ENTRY(pf_sctp_source) entry;
215
};
216
TAILQ_HEAD(pf_sctp_sources, pf_sctp_source);
217
struct pf_sctp_endpoint
218
{
219
uint32_t v_tag;
220
struct pf_sctp_sources sources;
221
RB_ENTRY(pf_sctp_endpoint) entry;
222
};
223
static int
224
pf_sctp_endpoint_compare(struct pf_sctp_endpoint *a, struct pf_sctp_endpoint *b)
225
{
226
return (a->v_tag - b->v_tag);
227
}
228
RB_PROTOTYPE(pf_sctp_endpoints, pf_sctp_endpoint, entry, pf_sctp_endpoint_compare);
229
RB_GENERATE(pf_sctp_endpoints, pf_sctp_endpoint, entry, pf_sctp_endpoint_compare);
230
VNET_DEFINE_STATIC(struct pf_sctp_endpoints, pf_sctp_endpoints);
231
#define V_pf_sctp_endpoints VNET(pf_sctp_endpoints)
232
static struct mtx_padalign pf_sctp_endpoints_mtx;
233
MTX_SYSINIT(pf_sctp_endpoints_mtx, &pf_sctp_endpoints_mtx, "SCTP endpoints", MTX_DEF);
234
#define PF_SCTP_ENDPOINTS_LOCK() mtx_lock(&pf_sctp_endpoints_mtx)
235
#define PF_SCTP_ENDPOINTS_UNLOCK() mtx_unlock(&pf_sctp_endpoints_mtx)
236
237
/*
238
* Queue for pf_intr() sends.
239
*/
240
static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
241
struct pf_send_entry {
242
STAILQ_ENTRY(pf_send_entry) pfse_next;
243
struct mbuf *pfse_m;
244
enum {
245
PFSE_IP,
246
PFSE_IP6,
247
PFSE_ICMP,
248
PFSE_ICMP6,
249
} pfse_type;
250
struct {
251
int type;
252
int code;
253
int mtu;
254
} icmpopts;
255
};
256
257
STAILQ_HEAD(pf_send_head, pf_send_entry);
258
VNET_DEFINE_STATIC(struct pf_send_head, pf_sendqueue);
259
#define V_pf_sendqueue VNET(pf_sendqueue)
260
261
static struct mtx_padalign pf_sendqueue_mtx;
262
MTX_SYSINIT(pf_sendqueue_mtx, &pf_sendqueue_mtx, "pf send queue", MTX_DEF);
263
#define PF_SENDQ_LOCK() mtx_lock(&pf_sendqueue_mtx)
264
#define PF_SENDQ_UNLOCK() mtx_unlock(&pf_sendqueue_mtx)
265
266
/*
267
* Queue for pf_overload_task() tasks.
268
*/
269
struct pf_overload_entry {
270
SLIST_ENTRY(pf_overload_entry) next;
271
struct pf_addr addr;
272
sa_family_t af;
273
uint8_t dir;
274
struct pf_krule *rule;
275
};
276
277
SLIST_HEAD(pf_overload_head, pf_overload_entry);
278
VNET_DEFINE_STATIC(struct pf_overload_head, pf_overloadqueue);
279
#define V_pf_overloadqueue VNET(pf_overloadqueue)
280
VNET_DEFINE_STATIC(struct task, pf_overloadtask);
281
#define V_pf_overloadtask VNET(pf_overloadtask)
282
283
static struct mtx_padalign pf_overloadqueue_mtx;
284
MTX_SYSINIT(pf_overloadqueue_mtx, &pf_overloadqueue_mtx,
285
"pf overload/flush queue", MTX_DEF);
286
#define PF_OVERLOADQ_LOCK() mtx_lock(&pf_overloadqueue_mtx)
287
#define PF_OVERLOADQ_UNLOCK() mtx_unlock(&pf_overloadqueue_mtx)
288
289
VNET_DEFINE(struct pf_krulequeue, pf_unlinked_rules);
290
struct mtx_padalign pf_unlnkdrules_mtx;
291
MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules",
292
MTX_DEF);
293
294
struct sx pf_config_lock;
295
SX_SYSINIT(pf_config_lock, &pf_config_lock, "pf config");
296
297
struct mtx_padalign pf_table_stats_lock;
298
MTX_SYSINIT(pf_table_stats_lock, &pf_table_stats_lock, "pf table stats",
299
MTX_DEF);
300
301
VNET_DEFINE_STATIC(uma_zone_t, pf_sources_z);
302
#define V_pf_sources_z VNET(pf_sources_z)
303
uma_zone_t pf_mtag_z;
304
VNET_DEFINE(uma_zone_t, pf_state_z);
305
VNET_DEFINE(uma_zone_t, pf_state_key_z);
306
VNET_DEFINE(uma_zone_t, pf_udp_mapping_z);
307
308
VNET_DEFINE(struct unrhdr64, pf_stateid);
309
310
static void pf_src_tree_remove_state(struct pf_kstate *);
311
static int pf_check_threshold(struct pf_kthreshold *);
312
313
static void pf_change_ap(struct pf_pdesc *, struct pf_addr *, u_int16_t *,
314
struct pf_addr *, u_int16_t);
315
static int pf_modulate_sack(struct pf_pdesc *,
316
struct tcphdr *, struct pf_state_peer *);
317
int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *,
318
u_int16_t *, u_int16_t *);
319
static void pf_change_icmp(struct pf_addr *, u_int16_t *,
320
struct pf_addr *, struct pf_addr *, u_int16_t,
321
u_int16_t *, u_int16_t *, u_int16_t *,
322
u_int16_t *, u_int8_t, sa_family_t);
323
int pf_change_icmp_af(struct mbuf *, int,
324
struct pf_pdesc *, struct pf_pdesc *,
325
struct pf_addr *, struct pf_addr *, sa_family_t,
326
sa_family_t);
327
int pf_translate_icmp_af(int, void *);
328
static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
329
int, sa_family_t, struct pf_krule *, int);
330
static void pf_detach_state(struct pf_kstate *);
331
static int pf_state_key_attach(struct pf_state_key *,
332
struct pf_state_key *, struct pf_kstate *);
333
static void pf_state_key_detach(struct pf_kstate *, int);
334
static int pf_state_key_ctor(void *, int, void *, int);
335
static u_int32_t pf_tcp_iss(struct pf_pdesc *);
336
static __inline void pf_dummynet_flag_remove(struct mbuf *m,
337
struct pf_mtag *pf_mtag);
338
static int pf_dummynet(struct pf_pdesc *, struct pf_kstate *,
339
struct pf_krule *, struct mbuf **);
340
static int pf_dummynet_route(struct pf_pdesc *,
341
struct pf_kstate *, struct pf_krule *,
342
struct ifnet *, const struct sockaddr *, struct mbuf **);
343
static int pf_test_eth_rule(int, struct pfi_kkif *,
344
struct mbuf **);
345
static enum pf_test_status pf_match_rule(struct pf_test_ctx *, struct pf_kruleset *);
346
static int pf_test_rule(struct pf_krule **, struct pf_kstate **,
347
struct pf_pdesc *, struct pf_krule **,
348
struct pf_kruleset **, u_short *, struct inpcb *,
349
struct pf_krule_slist *);
350
static int pf_create_state(struct pf_krule *,
351
struct pf_test_ctx *,
352
struct pf_kstate **, u_int16_t, u_int16_t);
353
static int pf_state_key_addr_setup(struct pf_pdesc *,
354
struct pf_state_key_cmp *, int);
355
static int pf_tcp_track_full(struct pf_kstate *,
356
struct pf_pdesc *, u_short *, int *,
357
struct pf_state_peer *, struct pf_state_peer *,
358
u_int8_t, u_int8_t);
359
static int pf_tcp_track_sloppy(struct pf_kstate *,
360
struct pf_pdesc *, u_short *,
361
struct pf_state_peer *, struct pf_state_peer *,
362
u_int8_t, u_int8_t);
363
static __inline int pf_synproxy_ack(struct pf_krule *, struct pf_pdesc *,
364
struct pf_kstate **, struct pf_rule_actions *);
365
static int pf_test_state(struct pf_kstate **, struct pf_pdesc *,
366
u_short *);
367
int pf_icmp_state_lookup(struct pf_state_key_cmp *,
368
struct pf_pdesc *, struct pf_kstate **,
369
u_int16_t, u_int16_t, int, int *, int, int);
370
static int pf_test_state_icmp(struct pf_kstate **,
371
struct pf_pdesc *, u_short *);
372
static int pf_sctp_track(struct pf_kstate *, struct pf_pdesc *,
373
u_short *);
374
static void pf_sctp_multihome_detach_addr(const struct pf_kstate *);
375
static void pf_sctp_multihome_delayed(struct pf_pdesc *,
376
struct pfi_kkif *, struct pf_kstate *, int);
377
static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
378
int, u_int16_t);
379
static int pf_check_proto_cksum(struct mbuf *, int, int,
380
u_int8_t, sa_family_t);
381
static int pf_walk_option(struct pf_pdesc *, struct ip *,
382
int, int, u_short *);
383
static int pf_walk_header(struct pf_pdesc *, struct ip *, u_short *);
384
#ifdef INET6
385
static int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *,
386
int, int, u_short *);
387
static int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *,
388
u_short *);
389
#endif
390
static void pf_print_state_parts(struct pf_kstate *,
391
struct pf_state_key *, struct pf_state_key *);
392
static int pf_patch_8(struct pf_pdesc *, u_int8_t *, u_int8_t,
393
bool);
394
static int pf_find_state(struct pf_pdesc *,
395
const struct pf_state_key_cmp *, struct pf_kstate **);
396
static bool pf_src_connlimit(struct pf_kstate *);
397
static int pf_match_rcvif(struct mbuf *, struct pf_krule *);
398
static void pf_counters_inc(int, struct pf_pdesc *,
399
struct pf_kstate *, struct pf_krule *,
400
struct pf_krule *, struct pf_krule_slist *);
401
static void pf_log_matches(struct pf_pdesc *, struct pf_krule *,
402
struct pf_krule *, struct pf_kruleset *,
403
struct pf_krule_slist *);
404
static void pf_overload_task(void *v, int pending);
405
static u_short pf_insert_src_node(struct pf_ksrc_node *[PF_SN_MAX],
406
struct pf_srchash *[PF_SN_MAX], struct pf_krule *,
407
struct pf_addr *, sa_family_t, struct pf_addr *,
408
struct pfi_kkif *, sa_family_t, pf_sn_types_t);
409
static u_int pf_purge_expired_states(u_int, int);
410
static void pf_purge_unlinked_rules(void);
411
static int pf_mtag_uminit(void *, int, int);
412
static void pf_mtag_free(struct m_tag *);
413
static void pf_packet_rework_nat(struct pf_pdesc *, int,
414
struct pf_state_key *);
415
#ifdef INET
416
static int pf_route(struct pf_krule *,
417
struct ifnet *, struct pf_kstate *,
418
struct pf_pdesc *, struct inpcb *);
419
#endif /* INET */
420
#ifdef INET6
421
static void pf_change_a6(struct pf_addr *, u_int16_t *,
422
struct pf_addr *, u_int8_t);
423
static int pf_route6(struct pf_krule *,
424
struct ifnet *, struct pf_kstate *,
425
struct pf_pdesc *, struct inpcb *);
426
#endif /* INET6 */
427
static __inline void pf_set_protostate(struct pf_kstate *, int, u_int8_t);
428
429
int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
430
431
static inline int
432
pf_statelim_id_cmp(const struct pf_statelim *a, const struct pf_statelim *b)
433
{
434
if (a->pfstlim_id > b->pfstlim_id)
435
return (1);
436
if (a->pfstlim_id < b->pfstlim_id)
437
return (-1);
438
439
return (0);
440
}
441
442
RB_GENERATE(pf_statelim_id_tree, pf_statelim, pfstlim_id_tree,
443
pf_statelim_id_cmp);
444
445
static inline int
446
pf_statelim_nm_cmp(const struct pf_statelim *a, const struct pf_statelim *b)
447
{
448
return (strncmp(a->pfstlim_nm, b->pfstlim_nm, sizeof(a->pfstlim_nm)));
449
}
450
451
RB_GENERATE(pf_statelim_nm_tree, pf_statelim, pfstlim_nm_tree,
452
pf_statelim_nm_cmp);
453
454
VNET_DEFINE(struct pf_statelim_id_tree, pf_statelim_id_tree_active);
455
VNET_DEFINE(struct pf_statelim_list, pf_statelim_list_active);
456
VNET_DEFINE(struct pf_statelim_id_tree, pf_statelim_id_tree_inactive);
457
VNET_DEFINE(struct pf_statelim_nm_tree, pf_statelim_nm_tree_inactive);
458
VNET_DEFINE(struct pf_statelim_list, pf_statelim_list_inactive);
459
460
static inline int
461
pf_sourcelim_id_cmp(const struct pf_sourcelim *a, const struct pf_sourcelim *b)
462
{
463
if (a->pfsrlim_id > b->pfsrlim_id)
464
return (1);
465
if (a->pfsrlim_id < b->pfsrlim_id)
466
return (-1);
467
468
return (0);
469
}
470
471
RB_GENERATE(pf_sourcelim_id_tree, pf_sourcelim, pfsrlim_id_tree,
472
pf_sourcelim_id_cmp);
473
474
static inline int
475
pf_sourcelim_nm_cmp(const struct pf_sourcelim *a, const struct pf_sourcelim *b)
476
{
477
return (strncmp(a->pfsrlim_nm, b->pfsrlim_nm, sizeof(a->pfsrlim_nm)));
478
}
479
480
RB_GENERATE(pf_sourcelim_nm_tree, pf_sourcelim, pfsrlim_nm_tree,
481
pf_sourcelim_nm_cmp);
482
483
static inline int
484
pf_source_cmp(const struct pf_source *a, const struct pf_source *b)
485
{
486
if (a->pfsr_af > b->pfsr_af)
487
return (1);
488
if (a->pfsr_af < b->pfsr_af)
489
return (-1);
490
if (a->pfsr_rdomain > b->pfsr_rdomain)
491
return (1);
492
if (a->pfsr_rdomain < b->pfsr_rdomain)
493
return (-1);
494
495
return (pf_addr_cmp(&a->pfsr_addr, &b->pfsr_addr, a->pfsr_af));
496
}
497
498
RB_GENERATE(pf_source_tree, pf_source, pfsr_tree, pf_source_cmp);
499
500
static inline int
501
pf_source_ioc_cmp(const struct pf_source *a, const struct pf_source *b)
502
{
503
size_t i;
504
505
if (a->pfsr_af > b->pfsr_af)
506
return (1);
507
if (a->pfsr_af < b->pfsr_af)
508
return (-1);
509
if (a->pfsr_rdomain > b->pfsr_rdomain)
510
return (1);
511
if (a->pfsr_rdomain < b->pfsr_rdomain)
512
return (-1);
513
514
for (i = 0; i < nitems(a->pfsr_addr.addr32); i++) {
515
uint32_t wa = ntohl(a->pfsr_addr.addr32[i]);
516
uint32_t wb = ntohl(b->pfsr_addr.addr32[i]);
517
518
if (wa > wb)
519
return (1);
520
if (wa < wb)
521
return (-1);
522
}
523
524
return (0);
525
}
526
527
RB_GENERATE(pf_source_ioc_tree, pf_source, pfsr_ioc_tree, pf_source_ioc_cmp);
528
529
VNET_DEFINE(struct pf_sourcelim_id_tree, pf_sourcelim_id_tree_active);
530
VNET_DEFINE(struct pf_sourcelim_list, pf_sourcelim_list_active);
531
532
VNET_DEFINE(struct pf_sourcelim_id_tree, pf_sourcelim_id_tree_inactive);
533
VNET_DEFINE(struct pf_sourcelim_nm_tree, pf_sourcelim_nm_tree_inactive);
534
VNET_DEFINE(struct pf_sourcelim_list, pf_sourcelim_list_inactive);
535
536
static inline struct pf_statelim *
537
pf_statelim_find(uint32_t id)
538
{
539
struct pf_statelim key;
540
541
/* only the id is used in cmp, so don't have to zero all the things */
542
key.pfstlim_id = id;
543
544
return (RB_FIND(pf_statelim_id_tree,
545
&V_pf_statelim_id_tree_active, &key));
546
}
547
548
static inline struct pf_sourcelim *
549
pf_sourcelim_find(uint32_t id)
550
{
551
struct pf_sourcelim key;
552
553
/* only the id is used in cmp, so don't have to zero all the things */
554
key.pfsrlim_id = id;
555
556
return (RB_FIND(pf_sourcelim_id_tree,
557
&V_pf_sourcelim_id_tree_active, &key));
558
}
559
560
struct pf_source_list pf_source_gc = TAILQ_HEAD_INITIALIZER(pf_source_gc);
561
562
static void
563
pf_source_purge(void)
564
{
565
struct pf_source *sr, *nsr;
566
567
TAILQ_FOREACH_SAFE(sr, &pf_source_gc, pfsr_empty_gc, nsr) {
568
struct pf_sourcelim *srlim = sr->pfsr_parent;
569
570
if (time_uptime <= sr->pfsr_empty_ts +
571
srlim->pfsrlim_rate.seconds + 1)
572
continue;
573
574
TAILQ_REMOVE(&pf_source_gc, sr, pfsr_empty_gc);
575
576
RB_REMOVE(pf_source_tree, &srlim->pfsrlim_sources, sr);
577
RB_REMOVE(pf_source_ioc_tree, &srlim->pfsrlim_ioc_sources, sr);
578
srlim->pfsrlim_nsources--;
579
580
free(sr, M_PF_SOURCE_LIM);
581
}
582
}
583
584
static void
585
pf_source_pfr_addr(struct pfr_addr *p, const struct pf_source *sr)
586
{
587
struct pf_sourcelim *srlim = sr->pfsr_parent;
588
589
memset(p, 0, sizeof(*p));
590
591
p->pfra_af = sr->pfsr_af;
592
switch (sr->pfsr_af) {
593
case AF_INET:
594
p->pfra_net = srlim->pfsrlim_ipv4_prefix;
595
p->pfra_ip4addr = sr->pfsr_addr.v4;
596
break;
597
#ifdef INET6
598
case AF_INET6:
599
p->pfra_net = srlim->pfsrlim_ipv6_prefix;
600
p->pfra_ip6addr = sr->pfsr_addr.v6;
601
break;
602
#endif /* INET6 */
603
}
604
}
605
606
static void
607
pf_source_used(struct pf_source *sr)
608
{
609
struct pf_sourcelim *srlim = sr->pfsr_parent;
610
struct pfr_ktable *t;
611
unsigned int used;
612
613
used = sr->pfsr_inuse++;
614
sr->pfsr_rate_ts += srlim->pfsrlim_rate_token;
615
616
if (used == 0)
617
TAILQ_REMOVE(&pf_source_gc, sr, pfsr_empty_gc);
618
else if ((t = srlim->pfsrlim_overload.table) != NULL &&
619
used >= srlim->pfsrlim_overload.hwm && !sr->pfsr_intable) {
620
struct pfr_addr p;
621
622
pf_source_pfr_addr(&p, sr);
623
624
pfr_insert_kentry(t, &p, time_second);
625
sr->pfsr_intable = 1;
626
}
627
}
628
629
static void
630
pf_source_rele(struct pf_source *sr)
631
{
632
struct pf_sourcelim *srlim = sr->pfsr_parent;
633
struct pfr_ktable *t;
634
unsigned int used;
635
636
used = --sr->pfsr_inuse;
637
638
t = srlim->pfsrlim_overload.table;
639
if (t != NULL && sr->pfsr_intable &&
640
used < srlim->pfsrlim_overload.lwm) {
641
struct pfr_addr p;
642
643
pf_source_pfr_addr(&p, sr);
644
645
pfr_remove_kentry(t, &p);
646
sr->pfsr_intable = 0;
647
}
648
649
if (used == 0) {
650
TAILQ_INSERT_TAIL(&pf_source_gc, sr, pfsr_empty_gc);
651
sr->pfsr_empty_ts = time_uptime + srlim->pfsrlim_rate.seconds;
652
}
653
}
654
655
static inline void
656
pf_source_key(struct pf_sourcelim *srlim, struct pf_source *key,
657
sa_family_t af, const struct pf_addr *addr)
658
{
659
size_t i;
660
661
/* only af+addr is used for lookup. */
662
key->pfsr_af = af;
663
key->pfsr_rdomain = 0;
664
switch (af) {
665
case AF_INET:
666
key->pfsr_addr.addr32[0] =
667
srlim->pfsrlim_ipv4_mask.v4.s_addr &
668
addr->v4.s_addr;
669
670
for (i = 1; i < nitems(key->pfsr_addr.addr32); i++)
671
key->pfsr_addr.addr32[i] = htonl(0);
672
break;
673
#ifdef INET6
674
case AF_INET6:
675
for (i = 0; i < nitems(key->pfsr_addr.addr32); i++) {
676
key->pfsr_addr.addr32[i] =
677
srlim->pfsrlim_ipv6_mask.addr32[i] &
678
addr->addr32[i];
679
}
680
break;
681
#endif
682
default:
683
unhandled_af(af);
684
/* NOTREACHED */
685
}
686
}
687
688
static inline struct pf_source *
689
pf_source_find(struct pf_sourcelim *srlim, struct pf_source *key)
690
{
691
return (RB_FIND(pf_source_tree, &srlim->pfsrlim_sources, key));
692
}
693
694
extern int pf_end_threads;
695
extern struct proc *pf_purge_proc;
696
697
VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
698
699
#define PACKET_UNDO_NAT(_pd, _off, _s) \
700
do { \
701
struct pf_state_key *nk; \
702
if ((pd->dir) == PF_OUT) \
703
nk = (_s)->key[PF_SK_STACK]; \
704
else \
705
nk = (_s)->key[PF_SK_WIRE]; \
706
pf_packet_rework_nat(_pd, _off, nk); \
707
} while (0)
708
709
#define PACKET_LOOPED(pd) ((pd)->pf_mtag && \
710
(pd)->pf_mtag->flags & PF_MTAG_FLAG_PACKET_LOOPED)
711
712
static struct pfi_kkif *
713
BOUND_IFACE(struct pf_kstate *st, struct pf_pdesc *pd)
714
{
715
struct pfi_kkif *k = pd->kif;
716
717
SDT_PROBE2(pf, ip, , bound_iface, st, k);
718
719
/* Floating unless otherwise specified. */
720
if (! (st->rule->rule_flag & PFRULE_IFBOUND))
721
return (V_pfi_all);
722
723
/*
724
* Initially set to all, because we don't know what interface we'll be
725
* sending this out when we create the state.
726
*/
727
if (st->rule->rt == PF_REPLYTO || (pd->af != pd->naf && st->direction == PF_IN))
728
return (V_pfi_all);
729
730
/*
731
* If this state is created based on another state (e.g. SCTP
732
* multihome) always set it floating initially. We can't know for sure
733
* what interface the actual traffic for this state will come in on.
734
*/
735
if (pd->related_rule)
736
return (V_pfi_all);
737
738
/* Don't overrule the interface for states created on incoming packets. */
739
if (st->direction == PF_IN)
740
return (k);
741
742
/* No route-to, so don't overrule. */
743
if (st->act.rt != PF_ROUTETO)
744
return (k);
745
746
/* Bind to the route-to interface. */
747
return (st->act.rt_kif);
748
}
749
750
#define STATE_INC_COUNTERS(s) \
751
do { \
752
struct pf_krule_item *mrm; \
753
counter_u64_add(s->rule->states_cur, 1); \
754
counter_u64_add(s->rule->states_tot, 1); \
755
if (s->anchor != NULL) { \
756
counter_u64_add(s->anchor->states_cur, 1); \
757
counter_u64_add(s->anchor->states_tot, 1); \
758
} \
759
if (s->nat_rule != NULL && s->nat_rule != s->rule) { \
760
counter_u64_add(s->nat_rule->states_cur, 1); \
761
counter_u64_add(s->nat_rule->states_tot, 1); \
762
} \
763
SLIST_FOREACH(mrm, &s->match_rules, entry) { \
764
if (s->nat_rule != mrm->r) { \
765
counter_u64_add(mrm->r->states_cur, 1); \
766
counter_u64_add(mrm->r->states_tot, 1); \
767
} \
768
} \
769
} while (0)
770
771
#define STATE_DEC_COUNTERS(s) \
772
do { \
773
struct pf_krule_item *mrm; \
774
counter_u64_add(s->rule->states_cur, -1); \
775
if (s->anchor != NULL) \
776
counter_u64_add(s->anchor->states_cur, -1); \
777
if (s->nat_rule != NULL && s->nat_rule != s->rule) \
778
counter_u64_add(s->nat_rule->states_cur, -1); \
779
SLIST_FOREACH(mrm, &s->match_rules, entry) \
780
if (s->nat_rule != mrm->r) { \
781
counter_u64_add(mrm->r->states_cur, -1);\
782
} \
783
} while (0)
784
785
MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
786
MALLOC_DEFINE(M_PF_RULE_ITEM, "pf_krule_item", "pf(4) rule items");
787
MALLOC_DEFINE(M_PF_STATE_LINK, "pf_state_link", "pf(4) state links");
788
MALLOC_DEFINE(M_PF_SOURCE_LIM, "pf_source_lim", "pf(4) source limiter");
789
VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
790
VNET_DEFINE(struct pf_idhash *, pf_idhash);
791
VNET_DEFINE(struct pf_srchash *, pf_srchash);
792
VNET_DEFINE(struct pf_udpendpointhash *, pf_udpendpointhash);
793
VNET_DEFINE(struct pf_udpendpointmapping *, pf_udpendpointmapping);
794
795
SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
796
"pf(4)");
797
798
VNET_DEFINE(u_long, pf_hashmask);
799
VNET_DEFINE(u_long, pf_srchashmask);
800
VNET_DEFINE(u_long, pf_udpendpointhashmask);
801
VNET_DEFINE_STATIC(u_long, pf_hashsize);
802
#define V_pf_hashsize VNET(pf_hashsize)
803
VNET_DEFINE_STATIC(u_long, pf_srchashsize);
804
#define V_pf_srchashsize VNET(pf_srchashsize)
805
VNET_DEFINE_STATIC(u_long, pf_udpendpointhashsize);
806
#define V_pf_udpendpointhashsize VNET(pf_udpendpointhashsize)
807
u_long pf_ioctl_maxcount = 65535;
808
809
SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
810
&VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable");
811
SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
812
&VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable");
813
SYSCTL_ULONG(_net_pf, OID_AUTO, udpendpoint_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
814
&VNET_NAME(pf_udpendpointhashsize), 0, "Size of pf(4) endpoint hashtable");
815
SYSCTL_ULONG(_net_pf, OID_AUTO, request_maxcount, CTLFLAG_RWTUN,
816
&pf_ioctl_maxcount, 0, "Maximum number of tables, addresses, ... in a single ioctl() call");
817
818
VNET_DEFINE(void *, pf_swi_cookie);
819
VNET_DEFINE(struct intr_event *, pf_swi_ie);
820
821
VNET_DEFINE(uint32_t, pf_hashseed);
822
#define V_pf_hashseed VNET(pf_hashseed)
823
824
static void
825
pf_sctp_checksum(struct mbuf *m, int off)
826
{
827
uint32_t sum = 0;
828
829
/* Zero out the checksum, to enable recalculation. */
830
m_copyback(m, off + offsetof(struct sctphdr, checksum),
831
sizeof(sum), (caddr_t)&sum);
832
833
sum = sctp_calculate_cksum(m, off);
834
835
m_copyback(m, off + offsetof(struct sctphdr, checksum),
836
sizeof(sum), (caddr_t)&sum);
837
}
838
839
int
840
pf_addr_cmp(const struct pf_addr *a, const struct pf_addr *b, sa_family_t af)
841
{
842
843
switch (af) {
844
#ifdef INET
845
case AF_INET:
846
if (a->addr32[0] > b->addr32[0])
847
return (1);
848
if (a->addr32[0] < b->addr32[0])
849
return (-1);
850
break;
851
#endif /* INET */
852
#ifdef INET6
853
case AF_INET6:
854
if (a->addr32[3] > b->addr32[3])
855
return (1);
856
if (a->addr32[3] < b->addr32[3])
857
return (-1);
858
if (a->addr32[2] > b->addr32[2])
859
return (1);
860
if (a->addr32[2] < b->addr32[2])
861
return (-1);
862
if (a->addr32[1] > b->addr32[1])
863
return (1);
864
if (a->addr32[1] < b->addr32[1])
865
return (-1);
866
if (a->addr32[0] > b->addr32[0])
867
return (1);
868
if (a->addr32[0] < b->addr32[0])
869
return (-1);
870
break;
871
#endif /* INET6 */
872
default:
873
unhandled_af(af);
874
}
875
return (0);
876
}
877
878
static bool
879
pf_is_loopback(sa_family_t af, struct pf_addr *addr)
880
{
881
switch (af) {
882
#ifdef INET
883
case AF_INET:
884
return IN_LOOPBACK(ntohl(addr->v4.s_addr));
885
#endif /* INET */
886
case AF_INET6:
887
return IN6_IS_ADDR_LOOPBACK(&addr->v6);
888
default:
889
unhandled_af(af);
890
}
891
}
892
893
static void
894
pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk)
895
{
896
897
switch (pd->virtual_proto) {
898
case IPPROTO_TCP: {
899
struct tcphdr *th = &pd->hdr.tcp;
900
901
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
902
pf_change_ap(pd, pd->src, &th->th_sport,
903
&nk->addr[pd->sidx], nk->port[pd->sidx]);
904
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
905
pf_change_ap(pd, pd->dst, &th->th_dport,
906
&nk->addr[pd->didx], nk->port[pd->didx]);
907
m_copyback(pd->m, off, sizeof(*th), (caddr_t)th);
908
break;
909
}
910
case IPPROTO_UDP: {
911
struct udphdr *uh = &pd->hdr.udp;
912
913
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
914
pf_change_ap(pd, pd->src, &uh->uh_sport,
915
&nk->addr[pd->sidx], nk->port[pd->sidx]);
916
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
917
pf_change_ap(pd, pd->dst, &uh->uh_dport,
918
&nk->addr[pd->didx], nk->port[pd->didx]);
919
m_copyback(pd->m, off, sizeof(*uh), (caddr_t)uh);
920
break;
921
}
922
case IPPROTO_SCTP: {
923
struct sctphdr *sh = &pd->hdr.sctp;
924
925
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) {
926
pf_change_ap(pd, pd->src, &sh->src_port,
927
&nk->addr[pd->sidx], nk->port[pd->sidx]);
928
}
929
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) {
930
pf_change_ap(pd, pd->dst, &sh->dest_port,
931
&nk->addr[pd->didx], nk->port[pd->didx]);
932
}
933
934
break;
935
}
936
case IPPROTO_ICMP: {
937
struct icmp *ih = &pd->hdr.icmp;
938
939
if (nk->port[pd->sidx] != ih->icmp_id) {
940
pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
941
ih->icmp_cksum, ih->icmp_id,
942
nk->port[pd->sidx], 0);
943
ih->icmp_id = nk->port[pd->sidx];
944
pd->sport = &ih->icmp_id;
945
946
m_copyback(pd->m, off, ICMP_MINLEN, (caddr_t)ih);
947
}
948
/* FALLTHROUGH */
949
}
950
default:
951
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) {
952
switch (pd->af) {
953
case AF_INET:
954
pf_change_a(&pd->src->v4.s_addr,
955
pd->ip_sum, nk->addr[pd->sidx].v4.s_addr,
956
0);
957
break;
958
case AF_INET6:
959
pf_addrcpy(pd->src, &nk->addr[pd->sidx],
960
pd->af);
961
break;
962
default:
963
unhandled_af(pd->af);
964
}
965
}
966
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) {
967
switch (pd->af) {
968
case AF_INET:
969
pf_change_a(&pd->dst->v4.s_addr,
970
pd->ip_sum, nk->addr[pd->didx].v4.s_addr,
971
0);
972
break;
973
case AF_INET6:
974
pf_addrcpy(pd->dst, &nk->addr[pd->didx],
975
pd->af);
976
break;
977
default:
978
unhandled_af(pd->af);
979
}
980
}
981
break;
982
}
983
}
984
985
static __inline uint32_t
986
pf_hashkey(const struct pf_state_key *sk)
987
{
988
uint32_t h;
989
990
h = murmur3_32_hash32((const uint32_t *)sk,
991
sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
992
V_pf_hashseed);
993
994
return (h & V_pf_hashmask);
995
}
996
997
__inline uint32_t
998
pf_hashsrc(struct pf_addr *addr, sa_family_t af)
999
{
1000
uint32_t h;
1001
1002
switch (af) {
1003
case AF_INET:
1004
h = murmur3_32_hash32((uint32_t *)&addr->v4,
1005
sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
1006
break;
1007
case AF_INET6:
1008
h = murmur3_32_hash32((uint32_t *)&addr->v6,
1009
sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
1010
break;
1011
default:
1012
unhandled_af(af);
1013
}
1014
1015
return (h & V_pf_srchashmask);
1016
}
1017
1018
static inline uint32_t
1019
pf_hashudpendpoint(struct pf_udp_endpoint *endpoint)
1020
{
1021
uint32_t h;
1022
1023
h = murmur3_32_hash32((uint32_t *)endpoint,
1024
sizeof(struct pf_udp_endpoint_cmp)/sizeof(uint32_t),
1025
V_pf_hashseed);
1026
return (h & V_pf_udpendpointhashmask);
1027
}
1028
1029
#ifdef ALTQ
1030
static int
1031
pf_state_hash(struct pf_kstate *s)
1032
{
1033
u_int32_t hv = (intptr_t)s / sizeof(*s);
1034
1035
hv ^= crc32(&s->src, sizeof(s->src));
1036
hv ^= crc32(&s->dst, sizeof(s->dst));
1037
if (hv == 0)
1038
hv = 1;
1039
return (hv);
1040
}
1041
#endif /* ALTQ */
1042
1043
static __inline void
1044
pf_set_protostate(struct pf_kstate *s, int which, u_int8_t newstate)
1045
{
1046
if (which == PF_PEER_DST || which == PF_PEER_BOTH)
1047
s->dst.state = newstate;
1048
if (which == PF_PEER_DST)
1049
return;
1050
if (s->src.state == newstate)
1051
return;
1052
if (s->creatorid == V_pf_status.hostid &&
1053
s->key[PF_SK_STACK] != NULL &&
1054
s->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
1055
!(TCPS_HAVEESTABLISHED(s->src.state) ||
1056
s->src.state == TCPS_CLOSED) &&
1057
(TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED))
1058
atomic_add_32(&V_pf_status.states_halfopen, -1);
1059
1060
s->src.state = newstate;
1061
}
1062
1063
bool
1064
pf_init_threshold(struct pf_kthreshold *threshold,
1065
u_int32_t limit, u_int32_t seconds)
1066
{
1067
threshold->limit = limit;
1068
threshold->seconds = seconds;
1069
threshold->cr = counter_rate_alloc(M_NOWAIT, seconds);
1070
1071
return (threshold->cr != NULL);
1072
}
1073
1074
static int
1075
pf_check_threshold(struct pf_kthreshold *threshold)
1076
{
1077
return (counter_ratecheck(threshold->cr, threshold->limit) < 0);
1078
}
1079
1080
static bool
1081
pf_src_connlimit(struct pf_kstate *state)
1082
{
1083
struct pf_overload_entry *pfoe;
1084
struct pf_ksrc_node *src_node = state->sns[PF_SN_LIMIT];
1085
bool limited = false;
1086
1087
PF_STATE_LOCK_ASSERT(state);
1088
PF_SRC_NODE_LOCK(src_node);
1089
1090
src_node->conn++;
1091
state->src.tcp_est = 1;
1092
1093
if (state->rule->max_src_conn &&
1094
state->rule->max_src_conn <
1095
src_node->conn) {
1096
counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1);
1097
limited = true;
1098
}
1099
1100
if (state->rule->max_src_conn_rate.limit &&
1101
pf_check_threshold(&src_node->conn_rate)) {
1102
counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1);
1103
limited = true;
1104
}
1105
1106
if (!limited)
1107
goto done;
1108
1109
/* Kill this state. */
1110
state->timeout = PFTM_PURGE;
1111
pf_set_protostate(state, PF_PEER_BOTH, TCPS_CLOSED);
1112
1113
if (state->rule->overload_tbl == NULL)
1114
goto done;
1115
1116
/* Schedule overloading and flushing task. */
1117
pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
1118
if (pfoe == NULL)
1119
goto done; /* too bad :( */
1120
1121
bcopy(&src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
1122
pfoe->af = state->key[PF_SK_WIRE]->af;
1123
pfoe->rule = state->rule;
1124
pfoe->dir = state->direction;
1125
PF_OVERLOADQ_LOCK();
1126
SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
1127
PF_OVERLOADQ_UNLOCK();
1128
taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
1129
1130
done:
1131
PF_SRC_NODE_UNLOCK(src_node);
1132
return (limited);
1133
}
1134
1135
static void
1136
pf_overload_task(void *v, int pending)
1137
{
1138
struct pf_overload_head queue;
1139
struct pfr_addr p;
1140
struct pf_overload_entry *pfoe, *pfoe1;
1141
uint32_t killed = 0;
1142
1143
CURVNET_SET((struct vnet *)v);
1144
1145
PF_OVERLOADQ_LOCK();
1146
queue = V_pf_overloadqueue;
1147
SLIST_INIT(&V_pf_overloadqueue);
1148
PF_OVERLOADQ_UNLOCK();
1149
1150
bzero(&p, sizeof(p));
1151
SLIST_FOREACH(pfoe, &queue, next) {
1152
counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1);
1153
if (V_pf_status.debug >= PF_DEBUG_MISC) {
1154
printf("%s: blocking address ", __func__);
1155
pf_print_host(&pfoe->addr, 0, pfoe->af);
1156
printf("\n");
1157
}
1158
1159
p.pfra_af = pfoe->af;
1160
switch (pfoe->af) {
1161
#ifdef INET
1162
case AF_INET:
1163
p.pfra_net = 32;
1164
p.pfra_ip4addr = pfoe->addr.v4;
1165
break;
1166
#endif /* INET */
1167
#ifdef INET6
1168
case AF_INET6:
1169
p.pfra_net = 128;
1170
p.pfra_ip6addr = pfoe->addr.v6;
1171
break;
1172
#endif /* INET6 */
1173
default:
1174
unhandled_af(pfoe->af);
1175
}
1176
1177
PF_RULES_WLOCK();
1178
pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
1179
PF_RULES_WUNLOCK();
1180
}
1181
1182
/*
1183
* Remove those entries, that don't need flushing.
1184
*/
1185
SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
1186
if (pfoe->rule->flush == 0) {
1187
SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
1188
free(pfoe, M_PFTEMP);
1189
} else
1190
counter_u64_add(
1191
V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1);
1192
1193
/* If nothing to flush, return. */
1194
if (SLIST_EMPTY(&queue)) {
1195
CURVNET_RESTORE();
1196
return;
1197
}
1198
1199
for (int i = 0; i <= V_pf_hashmask; i++) {
1200
struct pf_idhash *ih = &V_pf_idhash[i];
1201
struct pf_state_key *sk;
1202
struct pf_kstate *s;
1203
1204
PF_HASHROW_LOCK(ih);
1205
LIST_FOREACH(s, &ih->states, entry) {
1206
sk = s->key[PF_SK_WIRE];
1207
SLIST_FOREACH(pfoe, &queue, next)
1208
if (sk->af == pfoe->af &&
1209
((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
1210
pfoe->rule == s->rule) &&
1211
((pfoe->dir == PF_OUT &&
1212
PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
1213
(pfoe->dir == PF_IN &&
1214
PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
1215
s->timeout = PFTM_PURGE;
1216
pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED);
1217
killed++;
1218
}
1219
}
1220
PF_HASHROW_UNLOCK(ih);
1221
}
1222
SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
1223
free(pfoe, M_PFTEMP);
1224
if (V_pf_status.debug >= PF_DEBUG_MISC)
1225
printf("%s: %u states killed", __func__, killed);
1226
1227
CURVNET_RESTORE();
1228
}
1229
1230
/*
1231
* On node found always returns locked. On not found its configurable.
1232
*/
1233
struct pf_ksrc_node *
1234
pf_find_src_node(struct pf_addr *src, struct pf_krule *rule, sa_family_t af,
1235
struct pf_srchash **sh, pf_sn_types_t sn_type, bool returnlocked)
1236
{
1237
struct pf_ksrc_node *n;
1238
1239
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
1240
1241
*sh = &V_pf_srchash[pf_hashsrc(src, af)];
1242
PF_HASHROW_LOCK(*sh);
1243
LIST_FOREACH(n, &(*sh)->nodes, entry)
1244
if (n->rule == rule && n->af == af && n->type == sn_type &&
1245
((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
1246
(af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
1247
break;
1248
1249
if (n == NULL && !returnlocked)
1250
PF_HASHROW_UNLOCK(*sh);
1251
1252
return (n);
1253
}
1254
1255
bool
1256
pf_src_node_exists(struct pf_ksrc_node **sn, struct pf_srchash *sh)
1257
{
1258
struct pf_ksrc_node *cur;
1259
1260
if ((*sn) == NULL)
1261
return (false);
1262
1263
KASSERT(sh != NULL, ("%s: sh is NULL", __func__));
1264
1265
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
1266
PF_HASHROW_LOCK(sh);
1267
LIST_FOREACH(cur, &(sh->nodes), entry) {
1268
if (cur == (*sn) &&
1269
cur->expire != 1) /* Ignore nodes being killed */
1270
return (true);
1271
}
1272
PF_HASHROW_UNLOCK(sh);
1273
(*sn) = NULL;
1274
return (false);
1275
}
1276
1277
void
1278
pf_free_src_node(struct pf_ksrc_node *sn)
1279
{
1280
1281
for (int i = 0; i < 2; i++) {
1282
counter_u64_free(sn->bytes[i]);
1283
counter_u64_free(sn->packets[i]);
1284
}
1285
counter_rate_free(sn->conn_rate.cr);
1286
uma_zfree(V_pf_sources_z, sn);
1287
}
1288
1289
static u_short
1290
pf_insert_src_node(struct pf_ksrc_node *sns[PF_SN_MAX],
1291
struct pf_srchash *snhs[PF_SN_MAX], struct pf_krule *rule,
1292
struct pf_addr *src, sa_family_t af, struct pf_addr *raddr,
1293
struct pfi_kkif *rkif, sa_family_t raf, pf_sn_types_t sn_type)
1294
{
1295
u_short reason = 0;
1296
struct pf_krule *r_track = rule;
1297
struct pf_ksrc_node **sn = &(sns[sn_type]);
1298
struct pf_srchash **sh = &(snhs[sn_type]);
1299
1300
KASSERT(sn_type != PF_SN_LIMIT || (raddr == NULL && rkif == NULL),
1301
("%s: raddr and rkif must be NULL for PF_SN_LIMIT", __func__));
1302
1303
KASSERT(sn_type != PF_SN_LIMIT || (rule->rule_flag & PFRULE_SRCTRACK),
1304
("%s: PF_SN_LIMIT only valid for rules with PFRULE_SRCTRACK", __func__));
1305
1306
/*
1307
* XXX: There could be a KASSERT for
1308
* sn_type == PF_SN_LIMIT || (pool->opts & PF_POOL_STICKYADDR)
1309
* but we'd need to pass pool *only* for this KASSERT.
1310
*/
1311
1312
if ( (rule->rule_flag & PFRULE_SRCTRACK) &&
1313
!(rule->rule_flag & PFRULE_RULESRCTRACK))
1314
r_track = &V_pf_default_rule;
1315
1316
/*
1317
* Request the sh to always be locked, as we might insert a new sn.
1318
*/
1319
if (*sn == NULL)
1320
*sn = pf_find_src_node(src, r_track, af, sh, sn_type, true);
1321
1322
if (*sn == NULL) {
1323
PF_HASHROW_ASSERT(*sh);
1324
1325
if (sn_type == PF_SN_LIMIT && rule->max_src_nodes &&
1326
counter_u64_fetch(r_track->src_nodes[sn_type]) >= rule->max_src_nodes) {
1327
counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], 1);
1328
reason = PFRES_SRCLIMIT;
1329
goto done;
1330
}
1331
1332
(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
1333
if ((*sn) == NULL) {
1334
reason = PFRES_MEMORY;
1335
goto done;
1336
}
1337
1338
for (int i = 0; i < 2; i++) {
1339
(*sn)->bytes[i] = counter_u64_alloc(M_NOWAIT);
1340
(*sn)->packets[i] = counter_u64_alloc(M_NOWAIT);
1341
1342
if ((*sn)->bytes[i] == NULL || (*sn)->packets[i] == NULL) {
1343
pf_free_src_node(*sn);
1344
reason = PFRES_MEMORY;
1345
goto done;
1346
}
1347
}
1348
1349
if (sn_type == PF_SN_LIMIT)
1350
if (! pf_init_threshold(&(*sn)->conn_rate,
1351
rule->max_src_conn_rate.limit,
1352
rule->max_src_conn_rate.seconds)) {
1353
pf_free_src_node(*sn);
1354
reason = PFRES_MEMORY;
1355
goto done;
1356
}
1357
1358
MPASS((*sn)->lock == NULL);
1359
(*sn)->lock = &(*sh)->lock;
1360
1361
(*sn)->af = af;
1362
(*sn)->rule = r_track;
1363
pf_addrcpy(&(*sn)->addr, src, af);
1364
if (raddr != NULL)
1365
pf_addrcpy(&(*sn)->raddr, raddr, raf);
1366
(*sn)->rkif = rkif;
1367
(*sn)->raf = raf;
1368
LIST_INSERT_HEAD(&(*sh)->nodes, *sn, entry);
1369
(*sn)->creation = time_uptime;
1370
(*sn)->ruletype = rule->action;
1371
(*sn)->type = sn_type;
1372
counter_u64_add(r_track->src_nodes[sn_type], 1);
1373
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1);
1374
} else {
1375
if (sn_type == PF_SN_LIMIT && rule->max_src_states &&
1376
(*sn)->states >= rule->max_src_states) {
1377
counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES],
1378
1);
1379
reason = PFRES_SRCLIMIT;
1380
goto done;
1381
}
1382
}
1383
done:
1384
if (reason == 0)
1385
(*sn)->states++;
1386
else
1387
(*sn) = NULL;
1388
1389
PF_HASHROW_UNLOCK(*sh);
1390
return (reason);
1391
}
1392
1393
void
1394
pf_unlink_src_node(struct pf_ksrc_node *src)
1395
{
1396
PF_SRC_NODE_LOCK_ASSERT(src);
1397
1398
LIST_REMOVE(src, entry);
1399
if (src->rule)
1400
counter_u64_add(src->rule->src_nodes[src->type], -1);
1401
}
1402
1403
u_int
1404
pf_free_src_nodes(struct pf_ksrc_node_list *head)
1405
{
1406
struct pf_ksrc_node *sn, *tmp;
1407
u_int count = 0;
1408
1409
LIST_FOREACH_SAFE(sn, head, entry, tmp) {
1410
pf_free_src_node(sn);
1411
count++;
1412
}
1413
1414
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count);
1415
1416
return (count);
1417
}
1418
1419
void
1420
pf_mtag_initialize(void)
1421
{
1422
1423
pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
1424
sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL,
1425
UMA_ALIGN_PTR, 0);
1426
}
1427
1428
/* Per-vnet data storage structures initialization. */
1429
void
1430
pf_initialize(void)
1431
{
1432
struct pf_keyhash *kh;
1433
struct pf_idhash *ih;
1434
struct pf_srchash *sh;
1435
struct pf_udpendpointhash *uh;
1436
u_int i;
1437
1438
if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize))
1439
V_pf_hashsize = PF_HASHSIZ;
1440
if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize))
1441
V_pf_srchashsize = PF_SRCHASHSIZ;
1442
if (V_pf_udpendpointhashsize == 0 || !powerof2(V_pf_udpendpointhashsize))
1443
V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ;
1444
1445
V_pf_hashseed = arc4random();
1446
1447
/* States and state keys storage. */
1448
V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_kstate),
1449
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1450
V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
1451
uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
1452
uma_zone_set_warning(V_pf_state_z, "PF states limit reached");
1453
1454
V_pf_state_key_z = uma_zcreate("pf state keys",
1455
sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
1456
UMA_ALIGN_PTR, 0);
1457
1458
V_pf_keyhash = mallocarray(V_pf_hashsize, sizeof(struct pf_keyhash),
1459
M_PFHASH, M_NOWAIT | M_ZERO);
1460
V_pf_idhash = mallocarray(V_pf_hashsize, sizeof(struct pf_idhash),
1461
M_PFHASH, M_NOWAIT | M_ZERO);
1462
if (V_pf_keyhash == NULL || V_pf_idhash == NULL) {
1463
printf("pf: Unable to allocate memory for "
1464
"state_hashsize %lu.\n", V_pf_hashsize);
1465
1466
free(V_pf_keyhash, M_PFHASH);
1467
free(V_pf_idhash, M_PFHASH);
1468
1469
V_pf_hashsize = PF_HASHSIZ;
1470
V_pf_keyhash = mallocarray(V_pf_hashsize,
1471
sizeof(struct pf_keyhash), M_PFHASH, M_WAITOK | M_ZERO);
1472
V_pf_idhash = mallocarray(V_pf_hashsize,
1473
sizeof(struct pf_idhash), M_PFHASH, M_WAITOK | M_ZERO);
1474
}
1475
1476
V_pf_hashmask = V_pf_hashsize - 1;
1477
for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
1478
i++, kh++, ih++) {
1479
mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
1480
mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
1481
}
1482
1483
/* Source nodes. */
1484
V_pf_sources_z = uma_zcreate("pf source nodes",
1485
sizeof(struct pf_ksrc_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1486
0);
1487
V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
1488
uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
1489
uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
1490
1491
V_pf_srchash = mallocarray(V_pf_srchashsize,
1492
sizeof(struct pf_srchash), M_PFHASH, M_NOWAIT | M_ZERO);
1493
if (V_pf_srchash == NULL) {
1494
printf("pf: Unable to allocate memory for "
1495
"source_hashsize %lu.\n", V_pf_srchashsize);
1496
1497
V_pf_srchashsize = PF_SRCHASHSIZ;
1498
V_pf_srchash = mallocarray(V_pf_srchashsize,
1499
sizeof(struct pf_srchash), M_PFHASH, M_WAITOK | M_ZERO);
1500
}
1501
1502
V_pf_srchashmask = V_pf_srchashsize - 1;
1503
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++)
1504
mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
1505
1506
1507
/* UDP endpoint mappings. */
1508
V_pf_udp_mapping_z = uma_zcreate("pf UDP mappings",
1509
sizeof(struct pf_udp_mapping), NULL, NULL, NULL, NULL,
1510
UMA_ALIGN_PTR, 0);
1511
V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize,
1512
sizeof(struct pf_udpendpointhash), M_PFHASH, M_NOWAIT | M_ZERO);
1513
if (V_pf_udpendpointhash == NULL) {
1514
printf("pf: Unable to allocate memory for "
1515
"udpendpoint_hashsize %lu.\n", V_pf_udpendpointhashsize);
1516
1517
V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ;
1518
V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize,
1519
sizeof(struct pf_udpendpointhash), M_PFHASH, M_WAITOK | M_ZERO);
1520
}
1521
1522
V_pf_udpendpointhashmask = V_pf_udpendpointhashsize - 1;
1523
for (i = 0, uh = V_pf_udpendpointhash;
1524
i <= V_pf_udpendpointhashmask;
1525
i++, uh++) {
1526
mtx_init(&uh->lock, "pf_udpendpointhash", NULL,
1527
MTX_DEF | MTX_DUPOK);
1528
}
1529
1530
/* Anchors */
1531
V_pf_anchor_z = uma_zcreate("pf anchors",
1532
sizeof(struct pf_kanchor), NULL, NULL, NULL, NULL,
1533
UMA_ALIGN_PTR, 0);
1534
V_pf_limits[PF_LIMIT_ANCHORS].zone = V_pf_anchor_z;
1535
uma_zone_set_max(V_pf_anchor_z, PF_ANCHOR_HIWAT);
1536
uma_zone_set_warning(V_pf_anchor_z, "PF anchor limit reached");
1537
1538
V_pf_eth_anchor_z = uma_zcreate("pf Ethernet anchors",
1539
sizeof(struct pf_keth_anchor), NULL, NULL, NULL, NULL,
1540
UMA_ALIGN_PTR, 0);
1541
V_pf_limits[PF_LIMIT_ETH_ANCHORS].zone = V_pf_eth_anchor_z;
1542
uma_zone_set_max(V_pf_eth_anchor_z, PF_ANCHOR_HIWAT);
1543
uma_zone_set_warning(V_pf_eth_anchor_z, "PF Ethernet anchor limit reached");
1544
1545
/* ALTQ */
1546
TAILQ_INIT(&V_pf_altqs[0]);
1547
TAILQ_INIT(&V_pf_altqs[1]);
1548
TAILQ_INIT(&V_pf_altqs[2]);
1549
TAILQ_INIT(&V_pf_altqs[3]);
1550
TAILQ_INIT(&V_pf_pabuf[0]);
1551
TAILQ_INIT(&V_pf_pabuf[1]);
1552
TAILQ_INIT(&V_pf_pabuf[2]);
1553
V_pf_altqs_active = &V_pf_altqs[0];
1554
V_pf_altq_ifs_active = &V_pf_altqs[1];
1555
V_pf_altqs_inactive = &V_pf_altqs[2];
1556
V_pf_altq_ifs_inactive = &V_pf_altqs[3];
1557
1558
/* Send & overload+flush queues. */
1559
STAILQ_INIT(&V_pf_sendqueue);
1560
SLIST_INIT(&V_pf_overloadqueue);
1561
TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet);
1562
1563
/* Unlinked, but may be referenced rules. */
1564
TAILQ_INIT(&V_pf_unlinked_rules);
1565
1566
/* State limiters */
1567
RB_INIT(&V_pf_statelim_id_tree_inactive);
1568
RB_INIT(&V_pf_statelim_nm_tree_inactive);
1569
TAILQ_INIT(&V_pf_statelim_list_inactive);
1570
1571
RB_INIT(&V_pf_statelim_id_tree_active);
1572
TAILQ_INIT(&V_pf_statelim_list_active);
1573
1574
/* Source limiters */
1575
RB_INIT(&V_pf_sourcelim_id_tree_active);
1576
TAILQ_INIT(&V_pf_sourcelim_list_active);
1577
1578
RB_INIT(&V_pf_sourcelim_id_tree_inactive);
1579
RB_INIT(&V_pf_sourcelim_nm_tree_inactive);
1580
TAILQ_INIT(&V_pf_sourcelim_list_inactive);
1581
}
1582
1583
void
1584
pf_mtag_cleanup(void)
1585
{
1586
1587
uma_zdestroy(pf_mtag_z);
1588
}
1589
1590
void
1591
pf_cleanup(void)
1592
{
1593
struct pf_keyhash *kh;
1594
struct pf_idhash *ih;
1595
struct pf_srchash *sh;
1596
struct pf_udpendpointhash *uh;
1597
struct pf_send_entry *pfse, *next;
1598
u_int i;
1599
1600
for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash;
1601
i <= V_pf_hashmask;
1602
i++, kh++, ih++) {
1603
KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
1604
__func__));
1605
KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
1606
__func__));
1607
mtx_destroy(&kh->lock);
1608
mtx_destroy(&ih->lock);
1609
}
1610
free(V_pf_keyhash, M_PFHASH);
1611
free(V_pf_idhash, M_PFHASH);
1612
1613
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
1614
KASSERT(LIST_EMPTY(&sh->nodes),
1615
("%s: source node hash not empty", __func__));
1616
mtx_destroy(&sh->lock);
1617
}
1618
free(V_pf_srchash, M_PFHASH);
1619
1620
for (i = 0, uh = V_pf_udpendpointhash;
1621
i <= V_pf_udpendpointhashmask;
1622
i++, uh++) {
1623
KASSERT(LIST_EMPTY(&uh->endpoints),
1624
("%s: udp endpoint hash not empty", __func__));
1625
mtx_destroy(&uh->lock);
1626
}
1627
free(V_pf_udpendpointhash, M_PFHASH);
1628
1629
STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
1630
m_freem(pfse->pfse_m);
1631
free(pfse, M_PFTEMP);
1632
}
1633
MPASS(RB_EMPTY(&V_pf_sctp_endpoints));
1634
1635
uma_zdestroy(V_pf_sources_z);
1636
uma_zdestroy(V_pf_state_z);
1637
uma_zdestroy(V_pf_state_key_z);
1638
uma_zdestroy(V_pf_udp_mapping_z);
1639
uma_zdestroy(V_pf_anchor_z);
1640
uma_zdestroy(V_pf_eth_anchor_z);
1641
}
1642
1643
static int
1644
pf_mtag_uminit(void *mem, int size, int how)
1645
{
1646
struct m_tag *t;
1647
1648
t = (struct m_tag *)mem;
1649
t->m_tag_cookie = MTAG_ABI_COMPAT;
1650
t->m_tag_id = PACKET_TAG_PF;
1651
t->m_tag_len = sizeof(struct pf_mtag);
1652
t->m_tag_free = pf_mtag_free;
1653
1654
return (0);
1655
}
1656
1657
static void
1658
pf_mtag_free(struct m_tag *t)
1659
{
1660
1661
uma_zfree(pf_mtag_z, t);
1662
}
1663
1664
struct pf_mtag *
1665
pf_get_mtag(struct mbuf *m)
1666
{
1667
struct m_tag *mtag;
1668
1669
if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
1670
return ((struct pf_mtag *)(mtag + 1));
1671
1672
mtag = uma_zalloc(pf_mtag_z, M_NOWAIT);
1673
if (mtag == NULL)
1674
return (NULL);
1675
bzero(mtag + 1, sizeof(struct pf_mtag));
1676
m_tag_prepend(m, mtag);
1677
1678
return ((struct pf_mtag *)(mtag + 1));
1679
}
1680
1681
static int
1682
pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
1683
struct pf_kstate *s)
1684
{
1685
struct pf_keyhash *khs, *khw, *kh;
1686
struct pf_state_key *sk, *cur;
1687
struct pf_kstate *si, *olds = NULL;
1688
int idx;
1689
1690
NET_EPOCH_ASSERT();
1691
KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
1692
KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
1693
KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
1694
1695
/*
1696
* We need to lock hash slots of both keys. To avoid deadlock
1697
* we always lock the slot with lower address first. Unlock order
1698
* isn't important.
1699
*
1700
* We also need to lock ID hash slot before dropping key
1701
* locks. On success we return with ID hash slot locked.
1702
*/
1703
1704
if (skw == sks) {
1705
khs = khw = &V_pf_keyhash[pf_hashkey(skw)];
1706
PF_HASHROW_LOCK(khs);
1707
} else {
1708
khs = &V_pf_keyhash[pf_hashkey(sks)];
1709
khw = &V_pf_keyhash[pf_hashkey(skw)];
1710
if (khs == khw) {
1711
PF_HASHROW_LOCK(khs);
1712
} else if (khs < khw) {
1713
PF_HASHROW_LOCK(khs);
1714
PF_HASHROW_LOCK(khw);
1715
} else {
1716
PF_HASHROW_LOCK(khw);
1717
PF_HASHROW_LOCK(khs);
1718
}
1719
}
1720
1721
#define KEYS_UNLOCK() do { \
1722
if (khs != khw) { \
1723
PF_HASHROW_UNLOCK(khs); \
1724
PF_HASHROW_UNLOCK(khw); \
1725
} else \
1726
PF_HASHROW_UNLOCK(khs); \
1727
} while (0)
1728
1729
/*
1730
* First run: start with wire key.
1731
*/
1732
sk = skw;
1733
kh = khw;
1734
idx = PF_SK_WIRE;
1735
1736
MPASS(s->lock == NULL);
1737
s->lock = &V_pf_idhash[PF_IDHASH(s)].lock;
1738
1739
keyattach:
1740
LIST_FOREACH(cur, &kh->keys, entry)
1741
if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0)
1742
break;
1743
1744
if (cur != NULL) {
1745
/* Key exists. Check for same kif, if none, add to key. */
1746
TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
1747
struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
1748
1749
PF_HASHROW_LOCK(ih);
1750
if (si->kif == s->kif &&
1751
((si->key[PF_SK_WIRE]->af == sk->af &&
1752
si->direction == s->direction) ||
1753
(si->key[PF_SK_WIRE]->af !=
1754
si->key[PF_SK_STACK]->af &&
1755
sk->af == si->key[PF_SK_STACK]->af &&
1756
si->direction != s->direction))) {
1757
bool reuse = false;
1758
1759
if (sk->proto == IPPROTO_TCP &&
1760
si->src.state >= TCPS_FIN_WAIT_2 &&
1761
si->dst.state >= TCPS_FIN_WAIT_2)
1762
reuse = true;
1763
1764
if (V_pf_status.debug >= PF_DEBUG_MISC) {
1765
printf("pf: %s key attach "
1766
"%s on %s: ",
1767
(idx == PF_SK_WIRE) ?
1768
"wire" : "stack",
1769
reuse ? "reuse" : "failed",
1770
s->kif->pfik_name);
1771
pf_print_state_parts(s,
1772
(idx == PF_SK_WIRE) ?
1773
sk : NULL,
1774
(idx == PF_SK_STACK) ?
1775
sk : NULL);
1776
printf(", existing: ");
1777
pf_print_state_parts(si,
1778
(idx == PF_SK_WIRE) ?
1779
sk : NULL,
1780
(idx == PF_SK_STACK) ?
1781
sk : NULL);
1782
printf("\n");
1783
}
1784
1785
if (reuse) {
1786
/*
1787
* New state matches an old >FIN_WAIT_2
1788
* state. We can't drop key hash locks,
1789
* thus we can't unlink it properly.
1790
*
1791
* As a workaround we drop it into
1792
* TCPS_CLOSED state, schedule purge
1793
* ASAP and push it into the very end
1794
* of the slot TAILQ, so that it won't
1795
* conflict with our new state.
1796
*/
1797
pf_set_protostate(si, PF_PEER_BOTH,
1798
TCPS_CLOSED);
1799
si->timeout = PFTM_PURGE;
1800
olds = si;
1801
} else {
1802
s->timeout = PFTM_UNLINKED;
1803
if (idx == PF_SK_STACK)
1804
/*
1805
* Remove the wire key from
1806
* the hash. Other threads
1807
* can't be referencing it
1808
* because we still hold the
1809
* hash lock.
1810
*/
1811
pf_state_key_detach(s,
1812
PF_SK_WIRE);
1813
PF_HASHROW_UNLOCK(ih);
1814
KEYS_UNLOCK();
1815
if (idx == PF_SK_WIRE)
1816
/*
1817
* We've not inserted either key.
1818
* Free both.
1819
*/
1820
uma_zfree(V_pf_state_key_z, skw);
1821
if (skw != sks)
1822
uma_zfree(
1823
V_pf_state_key_z,
1824
sks);
1825
return (EEXIST); /* collision! */
1826
}
1827
}
1828
PF_HASHROW_UNLOCK(ih);
1829
}
1830
uma_zfree(V_pf_state_key_z, sk);
1831
s->key[idx] = cur;
1832
} else {
1833
LIST_INSERT_HEAD(&kh->keys, sk, entry);
1834
s->key[idx] = sk;
1835
}
1836
1837
stateattach:
1838
/* List is sorted, if-bound states before floating. */
1839
if (s->kif == V_pfi_all)
1840
TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
1841
else
1842
TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
1843
1844
if (olds) {
1845
TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]);
1846
TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds,
1847
key_list[idx]);
1848
olds = NULL;
1849
}
1850
1851
/*
1852
* Attach done. See how should we (or should not?)
1853
* attach a second key.
1854
*/
1855
if (sks == skw) {
1856
s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
1857
idx = PF_SK_STACK;
1858
sks = NULL;
1859
goto stateattach;
1860
} else if (sks != NULL) {
1861
/*
1862
* Continue attaching with stack key.
1863
*/
1864
sk = sks;
1865
kh = khs;
1866
idx = PF_SK_STACK;
1867
sks = NULL;
1868
goto keyattach;
1869
}
1870
1871
PF_STATE_LOCK(s);
1872
KEYS_UNLOCK();
1873
1874
KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
1875
("%s failure", __func__));
1876
1877
return (0);
1878
#undef KEYS_UNLOCK
1879
}
1880
1881
static void
1882
pf_detach_state(struct pf_kstate *s)
1883
{
1884
struct pf_state_key *sks = s->key[PF_SK_STACK];
1885
struct pf_keyhash *kh;
1886
1887
NET_EPOCH_ASSERT();
1888
MPASS(s->timeout >= PFTM_MAX);
1889
1890
pf_sctp_multihome_detach_addr(s);
1891
1892
if ((s->state_flags & PFSTATE_PFLOW) && V_pflow_export_state_ptr)
1893
V_pflow_export_state_ptr(s);
1894
1895
if (sks != NULL) {
1896
kh = &V_pf_keyhash[pf_hashkey(sks)];
1897
PF_HASHROW_LOCK(kh);
1898
if (s->key[PF_SK_STACK] != NULL)
1899
pf_state_key_detach(s, PF_SK_STACK);
1900
/*
1901
* If both point to same key, then we are done.
1902
*/
1903
if (sks == s->key[PF_SK_WIRE]) {
1904
pf_state_key_detach(s, PF_SK_WIRE);
1905
PF_HASHROW_UNLOCK(kh);
1906
return;
1907
}
1908
PF_HASHROW_UNLOCK(kh);
1909
}
1910
1911
if (s->key[PF_SK_WIRE] != NULL) {
1912
kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
1913
PF_HASHROW_LOCK(kh);
1914
if (s->key[PF_SK_WIRE] != NULL)
1915
pf_state_key_detach(s, PF_SK_WIRE);
1916
PF_HASHROW_UNLOCK(kh);
1917
}
1918
}
1919
1920
static void
1921
pf_state_key_detach(struct pf_kstate *s, int idx)
1922
{
1923
struct pf_state_key *sk = s->key[idx];
1924
#ifdef INVARIANTS
1925
struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
1926
1927
PF_HASHROW_ASSERT(kh);
1928
#endif /* INVARIANTS */
1929
TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
1930
s->key[idx] = NULL;
1931
1932
if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
1933
LIST_REMOVE(sk, entry);
1934
uma_zfree(V_pf_state_key_z, sk);
1935
}
1936
}
1937
1938
static int
1939
pf_state_key_ctor(void *mem, int size, void *arg, int flags)
1940
{
1941
struct pf_state_key *sk = mem;
1942
1943
bzero(sk, sizeof(struct pf_state_key_cmp));
1944
TAILQ_INIT(&sk->states[PF_SK_WIRE]);
1945
TAILQ_INIT(&sk->states[PF_SK_STACK]);
1946
1947
return (0);
1948
}
1949
1950
static int
1951
pf_state_key_addr_setup(struct pf_pdesc *pd,
1952
struct pf_state_key_cmp *key, int multi)
1953
{
1954
struct pf_addr *saddr = pd->src;
1955
struct pf_addr *daddr = pd->dst;
1956
#ifdef INET6
1957
struct nd_neighbor_solicit nd;
1958
struct pf_addr *target;
1959
1960
if (pd->af == AF_INET || pd->proto != IPPROTO_ICMPV6)
1961
goto copy;
1962
1963
switch (pd->hdr.icmp6.icmp6_type) {
1964
case ND_NEIGHBOR_SOLICIT:
1965
if (multi)
1966
return (-1);
1967
if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL,
1968
pd->af))
1969
return (-1);
1970
target = (struct pf_addr *)&nd.nd_ns_target;
1971
daddr = target;
1972
break;
1973
case ND_NEIGHBOR_ADVERT:
1974
if (multi)
1975
return (-1);
1976
if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL,
1977
pd->af))
1978
return (-1);
1979
target = (struct pf_addr *)&nd.nd_ns_target;
1980
saddr = target;
1981
if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) {
1982
key->addr[pd->didx].addr32[0] = 0;
1983
key->addr[pd->didx].addr32[1] = 0;
1984
key->addr[pd->didx].addr32[2] = 0;
1985
key->addr[pd->didx].addr32[3] = 0;
1986
daddr = NULL; /* overwritten */
1987
}
1988
break;
1989
default:
1990
if (multi) {
1991
key->addr[pd->sidx].addr32[0] = IPV6_ADDR_INT32_MLL;
1992
key->addr[pd->sidx].addr32[1] = 0;
1993
key->addr[pd->sidx].addr32[2] = 0;
1994
key->addr[pd->sidx].addr32[3] = IPV6_ADDR_INT32_ONE;
1995
saddr = NULL; /* overwritten */
1996
}
1997
}
1998
copy:
1999
#endif /* INET6 */
2000
if (saddr)
2001
pf_addrcpy(&key->addr[pd->sidx], saddr, pd->af);
2002
if (daddr)
2003
pf_addrcpy(&key->addr[pd->didx], daddr, pd->af);
2004
2005
return (0);
2006
}
2007
2008
int
2009
pf_state_key_setup(struct pf_pdesc *pd, u_int16_t sport, u_int16_t dport,
2010
struct pf_state_key **sk, struct pf_state_key **nk)
2011
{
2012
*sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
2013
if (*sk == NULL)
2014
return (ENOMEM);
2015
2016
if (pf_state_key_addr_setup(pd, (struct pf_state_key_cmp *)*sk,
2017
0)) {
2018
uma_zfree(V_pf_state_key_z, *sk);
2019
*sk = NULL;
2020
return (ENOMEM);
2021
}
2022
2023
(*sk)->port[pd->sidx] = sport;
2024
(*sk)->port[pd->didx] = dport;
2025
(*sk)->proto = pd->proto;
2026
(*sk)->af = pd->af;
2027
2028
*nk = pf_state_key_clone(*sk);
2029
if (*nk == NULL) {
2030
uma_zfree(V_pf_state_key_z, *sk);
2031
*sk = NULL;
2032
return (ENOMEM);
2033
}
2034
2035
if (pd->af != pd->naf) {
2036
(*sk)->port[pd->sidx] = pd->osport;
2037
(*sk)->port[pd->didx] = pd->odport;
2038
2039
(*nk)->af = pd->naf;
2040
2041
/*
2042
* We're overwriting an address here, so potentially there's bits of an IPv6
2043
* address left in here. Clear that out first.
2044
*/
2045
bzero(&(*nk)->addr[0], sizeof((*nk)->addr[0]));
2046
bzero(&(*nk)->addr[1], sizeof((*nk)->addr[1]));
2047
if (pd->dir == PF_IN) {
2048
pf_addrcpy(&(*nk)->addr[pd->didx], &pd->nsaddr,
2049
pd->naf);
2050
pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->ndaddr,
2051
pd->naf);
2052
(*nk)->port[pd->didx] = pd->nsport;
2053
(*nk)->port[pd->sidx] = pd->ndport;
2054
} else {
2055
pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->nsaddr,
2056
pd->naf);
2057
pf_addrcpy(&(*nk)->addr[pd->didx], &pd->ndaddr,
2058
pd->naf);
2059
(*nk)->port[pd->sidx] = pd->nsport;
2060
(*nk)->port[pd->didx] = pd->ndport;
2061
}
2062
2063
switch (pd->proto) {
2064
case IPPROTO_ICMP:
2065
(*nk)->proto = IPPROTO_ICMPV6;
2066
break;
2067
case IPPROTO_ICMPV6:
2068
(*nk)->proto = IPPROTO_ICMP;
2069
break;
2070
default:
2071
(*nk)->proto = pd->proto;
2072
}
2073
}
2074
2075
return (0);
2076
}
2077
2078
struct pf_state_key *
2079
pf_state_key_clone(const struct pf_state_key *orig)
2080
{
2081
struct pf_state_key *sk;
2082
2083
sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
2084
if (sk == NULL)
2085
return (NULL);
2086
2087
bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
2088
2089
return (sk);
2090
}
2091
2092
int
2093
pf_state_insert(struct pfi_kkif *kif, struct pfi_kkif *orig_kif,
2094
struct pf_state_key *skw, struct pf_state_key *sks, struct pf_kstate *s)
2095
{
2096
struct pf_idhash *ih;
2097
struct pf_kstate *cur;
2098
int error;
2099
2100
NET_EPOCH_ASSERT();
2101
2102
KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
2103
("%s: sks not pristine", __func__));
2104
KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
2105
("%s: skw not pristine", __func__));
2106
KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
2107
2108
s->kif = kif;
2109
s->orig_kif = orig_kif;
2110
2111
if (s->id == 0 && s->creatorid == 0) {
2112
s->id = alloc_unr64(&V_pf_stateid);
2113
s->id = htobe64(s->id);
2114
s->creatorid = V_pf_status.hostid;
2115
}
2116
2117
/* Returns with ID locked on success. */
2118
if ((error = pf_state_key_attach(skw, sks, s)) != 0)
2119
return (error);
2120
skw = sks = NULL;
2121
2122
ih = &V_pf_idhash[PF_IDHASH(s)];
2123
PF_HASHROW_ASSERT(ih);
2124
LIST_FOREACH(cur, &ih->states, entry)
2125
if (cur->id == s->id && cur->creatorid == s->creatorid)
2126
break;
2127
2128
if (cur != NULL) {
2129
s->timeout = PFTM_UNLINKED;
2130
PF_HASHROW_UNLOCK(ih);
2131
if (V_pf_status.debug >= PF_DEBUG_MISC) {
2132
printf("pf: state ID collision: "
2133
"id: %016llx creatorid: %08x\n",
2134
(unsigned long long)be64toh(s->id),
2135
ntohl(s->creatorid));
2136
}
2137
pf_detach_state(s);
2138
return (EEXIST);
2139
}
2140
LIST_INSERT_HEAD(&ih->states, s, entry);
2141
/* One for keys, one for ID hash. */
2142
refcount_init(&s->refs, 2);
2143
2144
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_INSERT], 1);
2145
if (V_pfsync_insert_state_ptr != NULL)
2146
V_pfsync_insert_state_ptr(s);
2147
2148
/* Returns locked. */
2149
return (0);
2150
}
2151
2152
/*
2153
* Find state by ID: returns with locked row on success.
2154
*/
2155
struct pf_kstate *
2156
pf_find_state_byid(uint64_t id, uint32_t creatorid)
2157
{
2158
struct pf_idhash *ih;
2159
struct pf_kstate *s;
2160
2161
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
2162
2163
ih = &V_pf_idhash[PF_IDHASHID(id)];
2164
2165
PF_HASHROW_LOCK(ih);
2166
LIST_FOREACH(s, &ih->states, entry)
2167
if (s->id == id && s->creatorid == creatorid)
2168
break;
2169
2170
if (s == NULL)
2171
PF_HASHROW_UNLOCK(ih);
2172
2173
return (s);
2174
}
2175
2176
/*
2177
* Find state by key.
2178
* Returns with ID hash slot locked on success.
2179
*/
2180
static int
2181
pf_find_state(struct pf_pdesc *pd, const struct pf_state_key_cmp *key,
2182
struct pf_kstate **state)
2183
{
2184
struct pf_keyhash *kh;
2185
struct pf_state_key *sk;
2186
struct pf_kstate *s;
2187
int idx;
2188
2189
*state = NULL;
2190
2191
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
2192
2193
kh = &V_pf_keyhash[pf_hashkey((const struct pf_state_key *)key)];
2194
2195
PF_HASHROW_LOCK(kh);
2196
LIST_FOREACH(sk, &kh->keys, entry)
2197
if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
2198
break;
2199
if (sk == NULL) {
2200
PF_HASHROW_UNLOCK(kh);
2201
return (PF_DROP);
2202
}
2203
2204
idx = (pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
2205
2206
/* List is sorted, if-bound states before floating ones. */
2207
TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
2208
if (s->kif == V_pfi_all || s->kif == pd->kif ||
2209
s->orig_kif == pd->kif) {
2210
PF_STATE_LOCK(s);
2211
PF_HASHROW_UNLOCK(kh);
2212
if (__predict_false(s->timeout >= PFTM_MAX)) {
2213
/*
2214
* State is either being processed by
2215
* pf_remove_state() in an other thread, or
2216
* is scheduled for immediate expiry.
2217
*/
2218
PF_STATE_UNLOCK(s);
2219
SDT_PROBE5(pf, ip, state, lookup, pd->kif,
2220
key, (pd->dir), pd, *state);
2221
return (PF_DROP);
2222
}
2223
goto out;
2224
}
2225
2226
/* Look through the other list, in case of AF-TO */
2227
idx = idx == PF_SK_WIRE ? PF_SK_STACK : PF_SK_WIRE;
2228
TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
2229
if (s->key[PF_SK_WIRE]->af == s->key[PF_SK_STACK]->af)
2230
continue;
2231
if (s->kif == V_pfi_all || s->kif == pd->kif ||
2232
s->orig_kif == pd->kif) {
2233
PF_STATE_LOCK(s);
2234
PF_HASHROW_UNLOCK(kh);
2235
if (__predict_false(s->timeout >= PFTM_MAX)) {
2236
/*
2237
* State is either being processed by
2238
* pf_remove_state() in an other thread, or
2239
* is scheduled for immediate expiry.
2240
*/
2241
PF_STATE_UNLOCK(s);
2242
SDT_PROBE5(pf, ip, state, lookup, pd->kif,
2243
key, (pd->dir), pd, NULL);
2244
return (PF_DROP);
2245
}
2246
goto out;
2247
}
2248
}
2249
2250
PF_HASHROW_UNLOCK(kh);
2251
2252
out:
2253
SDT_PROBE5(pf, ip, state, lookup, pd->kif, key, (pd->dir), pd, *state);
2254
2255
if (s == NULL || s->timeout == PFTM_PURGE) {
2256
if (s)
2257
PF_STATE_UNLOCK(s);
2258
return (PF_DROP);
2259
}
2260
2261
if ((s)->rule->pktrate.limit && pd->dir == (s)->direction) {
2262
if (pf_check_threshold(&(s)->rule->pktrate)) {
2263
PF_STATE_UNLOCK(s);
2264
return (PF_DROP);
2265
}
2266
}
2267
if (PACKET_LOOPED(pd)) {
2268
PF_STATE_UNLOCK(s);
2269
return (PF_PASS);
2270
}
2271
2272
*state = s;
2273
2274
return (PF_MATCH);
2275
}
2276
2277
/*
2278
* Returns with ID hash slot locked on success.
2279
*/
2280
struct pf_kstate *
2281
pf_find_state_all(const struct pf_state_key_cmp *key, u_int dir, int *more)
2282
{
2283
struct pf_keyhash *kh;
2284
struct pf_state_key *sk;
2285
struct pf_kstate *s, *ret = NULL;
2286
int idx, inout = 0;
2287
2288
if (more != NULL)
2289
*more = 0;
2290
2291
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
2292
2293
kh = &V_pf_keyhash[pf_hashkey((const struct pf_state_key *)key)];
2294
2295
PF_HASHROW_LOCK(kh);
2296
LIST_FOREACH(sk, &kh->keys, entry)
2297
if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
2298
break;
2299
if (sk == NULL) {
2300
PF_HASHROW_UNLOCK(kh);
2301
return (NULL);
2302
}
2303
switch (dir) {
2304
case PF_IN:
2305
idx = PF_SK_WIRE;
2306
break;
2307
case PF_OUT:
2308
idx = PF_SK_STACK;
2309
break;
2310
case PF_INOUT:
2311
idx = PF_SK_WIRE;
2312
inout = 1;
2313
break;
2314
default:
2315
panic("%s: dir %u", __func__, dir);
2316
}
2317
second_run:
2318
TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
2319
if (more == NULL) {
2320
PF_STATE_LOCK(s);
2321
PF_HASHROW_UNLOCK(kh);
2322
return (s);
2323
}
2324
2325
if (ret)
2326
(*more)++;
2327
else {
2328
ret = s;
2329
PF_STATE_LOCK(s);
2330
}
2331
}
2332
if (inout == 1) {
2333
inout = 0;
2334
idx = PF_SK_STACK;
2335
goto second_run;
2336
}
2337
PF_HASHROW_UNLOCK(kh);
2338
2339
return (ret);
2340
}
2341
2342
/*
2343
* FIXME
2344
* This routine is inefficient -- locks the state only to unlock immediately on
2345
* return.
2346
* It is racy -- after the state is unlocked nothing stops other threads from
2347
* removing it.
2348
*/
2349
bool
2350
pf_find_state_all_exists(const struct pf_state_key_cmp *key, u_int dir)
2351
{
2352
struct pf_kstate *s;
2353
2354
s = pf_find_state_all(key, dir, NULL);
2355
if (s != NULL) {
2356
PF_STATE_UNLOCK(s);
2357
return (true);
2358
}
2359
return (false);
2360
}
2361
2362
void
2363
pf_state_peer_hton(const struct pf_state_peer *s, struct pf_state_peer_export *d)
2364
{
2365
d->seqlo = htonl(s->seqlo);
2366
d->seqhi = htonl(s->seqhi);
2367
d->seqdiff = htonl(s->seqdiff);
2368
d->max_win = htons(s->max_win);
2369
d->mss = htons(s->mss);
2370
d->state = s->state;
2371
d->wscale = s->wscale;
2372
if (s->scrub) {
2373
d->scrub.pfss_flags = htons(
2374
s->scrub->pfss_flags & PFSS_TIMESTAMP);
2375
d->scrub.pfss_ttl = (s)->scrub->pfss_ttl;
2376
d->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);
2377
d->scrub.scrub_flag = PF_SCRUB_FLAG_VALID;
2378
}
2379
}
2380
2381
void
2382
pf_state_peer_ntoh(const struct pf_state_peer_export *s, struct pf_state_peer *d)
2383
{
2384
d->seqlo = ntohl(s->seqlo);
2385
d->seqhi = ntohl(s->seqhi);
2386
d->seqdiff = ntohl(s->seqdiff);
2387
d->max_win = ntohs(s->max_win);
2388
d->mss = ntohs(s->mss);
2389
d->state = s->state;
2390
d->wscale = s->wscale;
2391
if (s->scrub.scrub_flag == PF_SCRUB_FLAG_VALID &&
2392
d->scrub != NULL) {
2393
d->scrub->pfss_flags = ntohs(s->scrub.pfss_flags) &
2394
PFSS_TIMESTAMP;
2395
d->scrub->pfss_ttl = s->scrub.pfss_ttl;
2396
d->scrub->pfss_ts_mod = ntohl(s->scrub.pfss_ts_mod);
2397
}
2398
}
2399
2400
struct pf_udp_mapping *
2401
pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port,
2402
struct pf_addr *nat_addr, uint16_t nat_port)
2403
{
2404
struct pf_udp_mapping *mapping;
2405
2406
mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO);
2407
if (mapping == NULL)
2408
return (NULL);
2409
pf_addrcpy(&mapping->endpoints[0].addr, src_addr, af);
2410
mapping->endpoints[0].port = src_port;
2411
mapping->endpoints[0].af = af;
2412
mapping->endpoints[0].mapping = mapping;
2413
pf_addrcpy(&mapping->endpoints[1].addr, nat_addr, af);
2414
mapping->endpoints[1].port = nat_port;
2415
mapping->endpoints[1].af = af;
2416
mapping->endpoints[1].mapping = mapping;
2417
refcount_init(&mapping->refs, 1);
2418
return (mapping);
2419
}
2420
2421
int
2422
pf_udp_mapping_insert(struct pf_udp_mapping *mapping)
2423
{
2424
struct pf_udpendpointhash *h0, *h1;
2425
struct pf_udp_endpoint *endpoint;
2426
int ret = EEXIST;
2427
2428
h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
2429
h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
2430
if (h0 == h1) {
2431
PF_HASHROW_LOCK(h0);
2432
} else if (h0 < h1) {
2433
PF_HASHROW_LOCK(h0);
2434
PF_HASHROW_LOCK(h1);
2435
} else {
2436
PF_HASHROW_LOCK(h1);
2437
PF_HASHROW_LOCK(h0);
2438
}
2439
2440
LIST_FOREACH(endpoint, &h0->endpoints, entry) {
2441
if (bcmp(endpoint, &mapping->endpoints[0],
2442
sizeof(struct pf_udp_endpoint_cmp)) == 0)
2443
break;
2444
}
2445
if (endpoint != NULL)
2446
goto cleanup;
2447
LIST_FOREACH(endpoint, &h1->endpoints, entry) {
2448
if (bcmp(endpoint, &mapping->endpoints[1],
2449
sizeof(struct pf_udp_endpoint_cmp)) == 0)
2450
break;
2451
}
2452
if (endpoint != NULL)
2453
goto cleanup;
2454
LIST_INSERT_HEAD(&h0->endpoints, &mapping->endpoints[0], entry);
2455
LIST_INSERT_HEAD(&h1->endpoints, &mapping->endpoints[1], entry);
2456
ret = 0;
2457
2458
cleanup:
2459
if (h0 != h1) {
2460
PF_HASHROW_UNLOCK(h0);
2461
PF_HASHROW_UNLOCK(h1);
2462
} else {
2463
PF_HASHROW_UNLOCK(h0);
2464
}
2465
return (ret);
2466
}
2467
2468
void
2469
pf_udp_mapping_release(struct pf_udp_mapping *mapping)
2470
{
2471
/* refcount is synchronized on the source endpoint's row lock */
2472
struct pf_udpendpointhash *h0, *h1;
2473
2474
if (mapping == NULL)
2475
return;
2476
2477
h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
2478
PF_HASHROW_LOCK(h0);
2479
if (refcount_release(&mapping->refs)) {
2480
LIST_REMOVE(&mapping->endpoints[0], entry);
2481
PF_HASHROW_UNLOCK(h0);
2482
h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
2483
PF_HASHROW_LOCK(h1);
2484
LIST_REMOVE(&mapping->endpoints[1], entry);
2485
PF_HASHROW_UNLOCK(h1);
2486
2487
uma_zfree(V_pf_udp_mapping_z, mapping);
2488
} else {
2489
PF_HASHROW_UNLOCK(h0);
2490
}
2491
}
2492
2493
2494
struct pf_udp_mapping *
2495
pf_udp_mapping_find(struct pf_udp_endpoint_cmp *key)
2496
{
2497
struct pf_udpendpointhash *uh;
2498
struct pf_udp_endpoint *endpoint;
2499
2500
uh = &V_pf_udpendpointhash[pf_hashudpendpoint((struct pf_udp_endpoint*)key)];
2501
2502
PF_HASHROW_LOCK(uh);
2503
LIST_FOREACH(endpoint, &uh->endpoints, entry) {
2504
if (bcmp(endpoint, key, sizeof(struct pf_udp_endpoint_cmp)) == 0 &&
2505
bcmp(endpoint, &endpoint->mapping->endpoints[0],
2506
sizeof(struct pf_udp_endpoint_cmp)) == 0)
2507
break;
2508
}
2509
if (endpoint == NULL) {
2510
PF_HASHROW_UNLOCK(uh);
2511
return (NULL);
2512
}
2513
refcount_acquire(&endpoint->mapping->refs);
2514
PF_HASHROW_UNLOCK(uh);
2515
return (endpoint->mapping);
2516
}
2517
/* END state table stuff */
2518
2519
static void
2520
pf_send(struct pf_send_entry *pfse)
2521
{
2522
2523
PF_SENDQ_LOCK();
2524
STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
2525
PF_SENDQ_UNLOCK();
2526
swi_sched(V_pf_swi_cookie, 0);
2527
}
2528
2529
static bool
2530
pf_isforlocal(struct mbuf *m, int af)
2531
{
2532
switch (af) {
2533
#ifdef INET
2534
case AF_INET: {
2535
struct ip *ip = mtod(m, struct ip *);
2536
2537
return (in_localip(ip->ip_dst));
2538
}
2539
#endif /* INET */
2540
#ifdef INET6
2541
case AF_INET6: {
2542
struct ip6_hdr *ip6;
2543
struct in6_ifaddr *ia;
2544
ip6 = mtod(m, struct ip6_hdr *);
2545
ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false);
2546
if (ia == NULL)
2547
return (false);
2548
return (! (ia->ia6_flags & IN6_IFF_NOTREADY));
2549
}
2550
#endif /* INET6 */
2551
default:
2552
unhandled_af(af);
2553
}
2554
2555
return (false);
2556
}
2557
2558
int
2559
pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type,
2560
int *icmp_dir, u_int16_t *virtual_id, u_int16_t *virtual_type)
2561
{
2562
/*
2563
* ICMP types marked with PF_OUT are typically responses to
2564
* PF_IN, and will match states in the opposite direction.
2565
* PF_IN ICMP types need to match a state with that type.
2566
*/
2567
*icmp_dir = PF_OUT;
2568
2569
/* Queries (and responses) */
2570
switch (pd->af) {
2571
#ifdef INET
2572
case AF_INET:
2573
switch (type) {
2574
case ICMP_ECHO:
2575
*icmp_dir = PF_IN;
2576
/* FALLTHROUGH */
2577
case ICMP_ECHOREPLY:
2578
*virtual_type = ICMP_ECHO;
2579
*virtual_id = pd->hdr.icmp.icmp_id;
2580
break;
2581
2582
case ICMP_TSTAMP:
2583
*icmp_dir = PF_IN;
2584
/* FALLTHROUGH */
2585
case ICMP_TSTAMPREPLY:
2586
*virtual_type = ICMP_TSTAMP;
2587
*virtual_id = pd->hdr.icmp.icmp_id;
2588
break;
2589
2590
case ICMP_IREQ:
2591
*icmp_dir = PF_IN;
2592
/* FALLTHROUGH */
2593
case ICMP_IREQREPLY:
2594
*virtual_type = ICMP_IREQ;
2595
*virtual_id = pd->hdr.icmp.icmp_id;
2596
break;
2597
2598
case ICMP_MASKREQ:
2599
*icmp_dir = PF_IN;
2600
/* FALLTHROUGH */
2601
case ICMP_MASKREPLY:
2602
*virtual_type = ICMP_MASKREQ;
2603
*virtual_id = pd->hdr.icmp.icmp_id;
2604
break;
2605
2606
case ICMP_IPV6_WHEREAREYOU:
2607
*icmp_dir = PF_IN;
2608
/* FALLTHROUGH */
2609
case ICMP_IPV6_IAMHERE:
2610
*virtual_type = ICMP_IPV6_WHEREAREYOU;
2611
*virtual_id = 0; /* Nothing sane to match on! */
2612
break;
2613
2614
case ICMP_MOBILE_REGREQUEST:
2615
*icmp_dir = PF_IN;
2616
/* FALLTHROUGH */
2617
case ICMP_MOBILE_REGREPLY:
2618
*virtual_type = ICMP_MOBILE_REGREQUEST;
2619
*virtual_id = 0; /* Nothing sane to match on! */
2620
break;
2621
2622
case ICMP_ROUTERSOLICIT:
2623
*icmp_dir = PF_IN;
2624
/* FALLTHROUGH */
2625
case ICMP_ROUTERADVERT:
2626
*virtual_type = ICMP_ROUTERSOLICIT;
2627
*virtual_id = 0; /* Nothing sane to match on! */
2628
break;
2629
2630
/* These ICMP types map to other connections */
2631
case ICMP_UNREACH:
2632
case ICMP_SOURCEQUENCH:
2633
case ICMP_REDIRECT:
2634
case ICMP_TIMXCEED:
2635
case ICMP_PARAMPROB:
2636
/* These will not be used, but set them anyway */
2637
*icmp_dir = PF_IN;
2638
*virtual_type = type;
2639
*virtual_id = 0;
2640
*virtual_type = htons(*virtual_type);
2641
return (1); /* These types match to another state */
2642
2643
/*
2644
* All remaining ICMP types get their own states,
2645
* and will only match in one direction.
2646
*/
2647
default:
2648
*icmp_dir = PF_IN;
2649
*virtual_type = type;
2650
*virtual_id = 0;
2651
break;
2652
}
2653
break;
2654
#endif /* INET */
2655
#ifdef INET6
2656
case AF_INET6:
2657
switch (type) {
2658
case ICMP6_ECHO_REQUEST:
2659
*icmp_dir = PF_IN;
2660
/* FALLTHROUGH */
2661
case ICMP6_ECHO_REPLY:
2662
*virtual_type = ICMP6_ECHO_REQUEST;
2663
*virtual_id = pd->hdr.icmp6.icmp6_id;
2664
break;
2665
2666
case MLD_LISTENER_QUERY:
2667
case MLD_LISTENER_REPORT: {
2668
/*
2669
* Listener Report can be sent by clients
2670
* without an associated Listener Query.
2671
* In addition to that, when Report is sent as a
2672
* reply to a Query its source and destination
2673
* address are different.
2674
*/
2675
*icmp_dir = PF_IN;
2676
*virtual_type = MLD_LISTENER_QUERY;
2677
*virtual_id = 0;
2678
break;
2679
}
2680
case MLD_MTRACE:
2681
*icmp_dir = PF_IN;
2682
/* FALLTHROUGH */
2683
case MLD_MTRACE_RESP:
2684
*virtual_type = MLD_MTRACE;
2685
*virtual_id = 0; /* Nothing sane to match on! */
2686
break;
2687
2688
case ND_NEIGHBOR_SOLICIT:
2689
*icmp_dir = PF_IN;
2690
/* FALLTHROUGH */
2691
case ND_NEIGHBOR_ADVERT: {
2692
*virtual_type = ND_NEIGHBOR_SOLICIT;
2693
*virtual_id = 0;
2694
break;
2695
}
2696
2697
/*
2698
* These ICMP types map to other connections.
2699
* ND_REDIRECT can't be in this list because the triggering
2700
* packet header is optional.
2701
*/
2702
case ICMP6_DST_UNREACH:
2703
case ICMP6_PACKET_TOO_BIG:
2704
case ICMP6_TIME_EXCEEDED:
2705
case ICMP6_PARAM_PROB:
2706
/* These will not be used, but set them anyway */
2707
*icmp_dir = PF_IN;
2708
*virtual_type = type;
2709
*virtual_id = 0;
2710
*virtual_type = htons(*virtual_type);
2711
return (1); /* These types match to another state */
2712
/*
2713
* All remaining ICMP6 types get their own states,
2714
* and will only match in one direction.
2715
*/
2716
default:
2717
*icmp_dir = PF_IN;
2718
*virtual_type = type;
2719
*virtual_id = 0;
2720
break;
2721
}
2722
break;
2723
#endif /* INET6 */
2724
default:
2725
unhandled_af(pd->af);
2726
}
2727
*virtual_type = htons(*virtual_type);
2728
return (0); /* These types match to their own state */
2729
}
2730
2731
void
2732
pf_intr(void *v)
2733
{
2734
struct epoch_tracker et;
2735
struct pf_send_head queue;
2736
struct pf_send_entry *pfse, *next;
2737
2738
CURVNET_SET((struct vnet *)v);
2739
2740
PF_SENDQ_LOCK();
2741
queue = V_pf_sendqueue;
2742
STAILQ_INIT(&V_pf_sendqueue);
2743
PF_SENDQ_UNLOCK();
2744
2745
NET_EPOCH_ENTER(et);
2746
2747
STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
2748
switch (pfse->pfse_type) {
2749
#ifdef INET
2750
case PFSE_IP: {
2751
if (pf_isforlocal(pfse->pfse_m, AF_INET)) {
2752
KASSERT(pfse->pfse_m->m_pkthdr.rcvif == V_loif,
2753
("%s: rcvif != loif", __func__));
2754
2755
pfse->pfse_m->m_flags |= M_SKIP_FIREWALL;
2756
pfse->pfse_m->m_pkthdr.csum_flags |=
2757
CSUM_IP_VALID | CSUM_IP_CHECKED |
2758
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
2759
pfse->pfse_m->m_pkthdr.csum_data = 0xffff;
2760
ip_input(pfse->pfse_m);
2761
} else {
2762
ip_output(pfse->pfse_m, NULL, NULL, 0, NULL,
2763
NULL);
2764
}
2765
break;
2766
}
2767
case PFSE_ICMP:
2768
icmp_error(pfse->pfse_m, pfse->icmpopts.type,
2769
pfse->icmpopts.code, 0, pfse->icmpopts.mtu);
2770
break;
2771
#endif /* INET */
2772
#ifdef INET6
2773
case PFSE_IP6:
2774
if (pf_isforlocal(pfse->pfse_m, AF_INET6)) {
2775
KASSERT(pfse->pfse_m->m_pkthdr.rcvif == V_loif,
2776
("%s: rcvif != loif", __func__));
2777
2778
pfse->pfse_m->m_flags |= M_SKIP_FIREWALL |
2779
M_LOOP;
2780
pfse->pfse_m->m_pkthdr.csum_flags |=
2781
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
2782
pfse->pfse_m->m_pkthdr.csum_data = 0xffff;
2783
ip6_input(pfse->pfse_m);
2784
} else {
2785
ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL,
2786
NULL, NULL);
2787
}
2788
break;
2789
case PFSE_ICMP6:
2790
icmp6_error(pfse->pfse_m, pfse->icmpopts.type,
2791
pfse->icmpopts.code, pfse->icmpopts.mtu);
2792
break;
2793
#endif /* INET6 */
2794
default:
2795
panic("%s: unknown type", __func__);
2796
}
2797
free(pfse, M_PFTEMP);
2798
}
2799
NET_EPOCH_EXIT(et);
2800
CURVNET_RESTORE();
2801
}
2802
2803
#define pf_purge_thread_period (hz / 10)
2804
2805
#ifdef PF_WANT_32_TO_64_COUNTER
2806
static void
2807
pf_status_counter_u64_periodic(void)
2808
{
2809
2810
PF_RULES_RASSERT();
2811
2812
if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 60)) != 0) {
2813
return;
2814
}
2815
2816
for (int i = 0; i < FCNT_MAX; i++) {
2817
pf_counter_u64_periodic(&V_pf_status.fcounters[i]);
2818
}
2819
}
2820
2821
static void
2822
pf_kif_counter_u64_periodic(void)
2823
{
2824
struct pfi_kkif *kif;
2825
size_t r, run;
2826
2827
PF_RULES_RASSERT();
2828
2829
if (__predict_false(V_pf_allkifcount == 0)) {
2830
return;
2831
}
2832
2833
if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) {
2834
return;
2835
}
2836
2837
run = V_pf_allkifcount / 10;
2838
if (run < 5)
2839
run = 5;
2840
2841
for (r = 0; r < run; r++) {
2842
kif = LIST_NEXT(V_pf_kifmarker, pfik_allkiflist);
2843
if (kif == NULL) {
2844
LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
2845
LIST_INSERT_HEAD(&V_pf_allkiflist, V_pf_kifmarker, pfik_allkiflist);
2846
break;
2847
}
2848
2849
LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
2850
LIST_INSERT_AFTER(kif, V_pf_kifmarker, pfik_allkiflist);
2851
2852
for (int i = 0; i < 2; i++) {
2853
for (int j = 0; j < 2; j++) {
2854
for (int k = 0; k < 2; k++) {
2855
pf_counter_u64_periodic(&kif->pfik_packets[i][j][k]);
2856
pf_counter_u64_periodic(&kif->pfik_bytes[i][j][k]);
2857
}
2858
}
2859
}
2860
}
2861
}
2862
2863
static void
2864
pf_rule_counter_u64_periodic(void)
2865
{
2866
struct pf_krule *rule;
2867
size_t r, run;
2868
2869
PF_RULES_RASSERT();
2870
2871
if (__predict_false(V_pf_allrulecount == 0)) {
2872
return;
2873
}
2874
2875
if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) {
2876
return;
2877
}
2878
2879
run = V_pf_allrulecount / 10;
2880
if (run < 5)
2881
run = 5;
2882
2883
for (r = 0; r < run; r++) {
2884
rule = LIST_NEXT(V_pf_rulemarker, allrulelist);
2885
if (rule == NULL) {
2886
LIST_REMOVE(V_pf_rulemarker, allrulelist);
2887
LIST_INSERT_HEAD(&V_pf_allrulelist, V_pf_rulemarker, allrulelist);
2888
break;
2889
}
2890
2891
LIST_REMOVE(V_pf_rulemarker, allrulelist);
2892
LIST_INSERT_AFTER(rule, V_pf_rulemarker, allrulelist);
2893
2894
pf_counter_u64_periodic(&rule->evaluations);
2895
for (int i = 0; i < 2; i++) {
2896
pf_counter_u64_periodic(&rule->packets[i]);
2897
pf_counter_u64_periodic(&rule->bytes[i]);
2898
}
2899
}
2900
}
2901
2902
static void
2903
pf_counter_u64_periodic_main(void)
2904
{
2905
PF_RULES_RLOCK_TRACKER;
2906
2907
V_pf_counter_periodic_iter++;
2908
2909
PF_RULES_RLOCK();
2910
pf_counter_u64_critical_enter();
2911
pf_status_counter_u64_periodic();
2912
pf_kif_counter_u64_periodic();
2913
pf_rule_counter_u64_periodic();
2914
pf_counter_u64_critical_exit();
2915
PF_RULES_RUNLOCK();
2916
}
2917
#else
2918
#define pf_counter_u64_periodic_main() do { } while (0)
2919
#endif
2920
2921
void
2922
pf_purge_thread(void *unused __unused)
2923
{
2924
struct epoch_tracker et;
2925
2926
VNET_ITERATOR_DECL(vnet_iter);
2927
2928
sx_xlock(&pf_end_lock);
2929
while (pf_end_threads == 0) {
2930
sx_sleep(pf_purge_thread, &pf_end_lock, 0, "pftm", pf_purge_thread_period);
2931
2932
VNET_LIST_RLOCK();
2933
NET_EPOCH_ENTER(et);
2934
VNET_FOREACH(vnet_iter) {
2935
CURVNET_SET(vnet_iter);
2936
2937
/* Wait until V_pf_default_rule is initialized. */
2938
if (V_pf_vnet_active == 0) {
2939
CURVNET_RESTORE();
2940
continue;
2941
}
2942
2943
pf_counter_u64_periodic_main();
2944
2945
/*
2946
* Process 1/interval fraction of the state
2947
* table every run.
2948
*/
2949
V_pf_purge_idx =
2950
pf_purge_expired_states(V_pf_purge_idx, V_pf_hashmask /
2951
(V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
2952
2953
/*
2954
* Purge other expired types every
2955
* PFTM_INTERVAL seconds.
2956
*/
2957
if (V_pf_purge_idx == 0) {
2958
/*
2959
* Order is important:
2960
* - states and src nodes reference rules
2961
* - states and rules reference kifs
2962
*/
2963
pf_purge_expired_fragments();
2964
pf_purge_expired_src_nodes();
2965
pf_purge_unlinked_rules();
2966
pf_source_purge();
2967
pfi_kkif_purge();
2968
}
2969
CURVNET_RESTORE();
2970
}
2971
NET_EPOCH_EXIT(et);
2972
VNET_LIST_RUNLOCK();
2973
}
2974
2975
pf_end_threads++;
2976
sx_xunlock(&pf_end_lock);
2977
kproc_exit(0);
2978
}
2979
2980
void
2981
pf_unload_vnet_purge(void)
2982
{
2983
2984
/*
2985
* To cleanse up all kifs and rules we need
2986
* two runs: first one clears reference flags,
2987
* then pf_purge_expired_states() doesn't
2988
* raise them, and then second run frees.
2989
*/
2990
pf_purge_unlinked_rules();
2991
pfi_kkif_purge();
2992
2993
/*
2994
* Now purge everything.
2995
*/
2996
pf_purge_expired_states(0, V_pf_hashmask);
2997
pf_purge_fragments(UINT_MAX);
2998
pf_purge_expired_src_nodes();
2999
pf_source_purge();
3000
3001
/*
3002
* Now all kifs & rules should be unreferenced,
3003
* thus should be successfully freed.
3004
*/
3005
pf_purge_unlinked_rules();
3006
pfi_kkif_purge();
3007
}
3008
3009
u_int32_t
3010
pf_state_expires(const struct pf_kstate *state)
3011
{
3012
u_int32_t timeout;
3013
u_int32_t start;
3014
u_int32_t end;
3015
u_int32_t states;
3016
3017
/* handle all PFTM_* > PFTM_MAX here */
3018
if (state->timeout == PFTM_PURGE)
3019
return (time_uptime);
3020
KASSERT(state->timeout != PFTM_UNLINKED,
3021
("pf_state_expires: timeout == PFTM_UNLINKED"));
3022
KASSERT((state->timeout < PFTM_MAX),
3023
("pf_state_expires: timeout > PFTM_MAX"));
3024
timeout = state->rule->timeout[state->timeout];
3025
if (!timeout)
3026
timeout = V_pf_default_rule.timeout[state->timeout];
3027
start = state->rule->timeout[PFTM_ADAPTIVE_START];
3028
if (start && state->rule != &V_pf_default_rule) {
3029
end = state->rule->timeout[PFTM_ADAPTIVE_END];
3030
states = counter_u64_fetch(state->rule->states_cur);
3031
} else {
3032
start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
3033
end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
3034
states = V_pf_status.states;
3035
}
3036
if (end && states > start && start < end) {
3037
if (states < end) {
3038
timeout = (u_int64_t)timeout * (end - states) /
3039
(end - start);
3040
return ((state->expire / 1000) + timeout);
3041
}
3042
else
3043
return (time_uptime);
3044
}
3045
return ((state->expire / 1000) + timeout);
3046
}
3047
3048
void
3049
pf_purge_expired_src_nodes(void)
3050
{
3051
struct pf_ksrc_node_list freelist;
3052
struct pf_srchash *sh;
3053
struct pf_ksrc_node *cur, *next;
3054
int i;
3055
3056
LIST_INIT(&freelist);
3057
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
3058
PF_HASHROW_LOCK(sh);
3059
LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
3060
if (cur->states == 0 && cur->expire <= time_uptime) {
3061
pf_unlink_src_node(cur);
3062
LIST_INSERT_HEAD(&freelist, cur, entry);
3063
} else if (cur->rule != NULL)
3064
cur->rule->rule_ref |= PFRULE_REFS;
3065
PF_HASHROW_UNLOCK(sh);
3066
}
3067
3068
pf_free_src_nodes(&freelist);
3069
3070
V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z);
3071
}
3072
3073
static void
3074
pf_src_tree_remove_state(struct pf_kstate *s)
3075
{
3076
uint32_t timeout;
3077
3078
timeout = s->rule->timeout[PFTM_SRC_NODE] ?
3079
s->rule->timeout[PFTM_SRC_NODE] :
3080
V_pf_default_rule.timeout[PFTM_SRC_NODE];
3081
3082
for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) {
3083
if (s->sns[sn_type] == NULL)
3084
continue;
3085
PF_SRC_NODE_LOCK(s->sns[sn_type]);
3086
if (sn_type == PF_SN_LIMIT && s->src.tcp_est)
3087
--(s->sns[sn_type]->conn);
3088
if (--(s->sns[sn_type]->states) == 0)
3089
s->sns[sn_type]->expire = time_uptime + timeout;
3090
PF_SRC_NODE_UNLOCK(s->sns[sn_type]);
3091
s->sns[sn_type] = NULL;
3092
}
3093
3094
}
3095
3096
/*
3097
* Unlink and potentilly free a state. Function may be
3098
* called with ID hash row locked, but always returns
3099
* unlocked, since it needs to go through key hash locking.
3100
*/
3101
int
3102
pf_remove_state(struct pf_kstate *s)
3103
{
3104
struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
3105
struct pf_state_link *pfl;
3106
3107
NET_EPOCH_ASSERT();
3108
PF_HASHROW_ASSERT(ih);
3109
3110
if (s->timeout == PFTM_UNLINKED) {
3111
/*
3112
* State is being processed
3113
* by pf_remove_state() in
3114
* an other thread.
3115
*/
3116
PF_HASHROW_UNLOCK(ih);
3117
return (0); /* XXXGL: undefined actually */
3118
}
3119
3120
if (s->src.state == PF_TCPS_PROXY_DST) {
3121
/* XXX wire key the right one? */
3122
pf_send_tcp(s->rule, s->key[PF_SK_WIRE]->af,
3123
&s->key[PF_SK_WIRE]->addr[1],
3124
&s->key[PF_SK_WIRE]->addr[0],
3125
s->key[PF_SK_WIRE]->port[1],
3126
s->key[PF_SK_WIRE]->port[0],
3127
s->src.seqhi, s->src.seqlo + 1,
3128
TH_RST|TH_ACK, 0, 0, 0, M_SKIP_FIREWALL, s->tag, 0,
3129
s->act.rtableid, NULL);
3130
}
3131
3132
LIST_REMOVE(s, entry);
3133
pf_src_tree_remove_state(s);
3134
3135
if (V_pfsync_delete_state_ptr != NULL)
3136
V_pfsync_delete_state_ptr(s);
3137
3138
STATE_DEC_COUNTERS(s);
3139
3140
s->timeout = PFTM_UNLINKED;
3141
3142
/* Ensure we remove it from the list of halfopen states, if needed. */
3143
if (s->key[PF_SK_STACK] != NULL &&
3144
s->key[PF_SK_STACK]->proto == IPPROTO_TCP)
3145
pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED);
3146
3147
while ((pfl = SLIST_FIRST(&s->linkage)) != NULL) {
3148
struct pf_state_link_list *list;
3149
unsigned int gen;
3150
3151
SLIST_REMOVE_HEAD(&s->linkage, pfl_linkage);
3152
3153
switch (pfl->pfl_type) {
3154
case PF_STATE_LINK_TYPE_STATELIM: {
3155
struct pf_statelim *stlim;
3156
3157
stlim = pf_statelim_find(s->statelim);
3158
KASSERT(stlim != NULL,
3159
("pf_state %p pfl %p cannot find statelim %u", s,
3160
pfl, s->statelim));
3161
3162
gen = pf_statelim_enter(stlim);
3163
stlim->pfstlim_inuse--;
3164
pf_statelim_leave(stlim, gen);
3165
3166
list = &stlim->pfstlim_states;
3167
break;
3168
}
3169
case PF_STATE_LINK_TYPE_SOURCELIM: {
3170
struct pf_sourcelim *srlim;
3171
struct pf_source key, *sr;
3172
3173
srlim = pf_sourcelim_find(s->sourcelim);
3174
KASSERT(srlim != NULL,
3175
("pf_state %p pfl %p cannot find sourcelim %u", s,
3176
pfl, s->sourcelim));
3177
3178
pf_source_key(srlim, &key, s->key[PF_SK_WIRE]->af,
3179
&s->key[PF_SK_WIRE]->addr[0 /* XXX or 1? */]);
3180
3181
sr = pf_source_find(srlim, &key);
3182
KASSERT(sr != NULL,
3183
("pf_state %p pfl %p cannot find source in %u", s,
3184
pfl, s->sourcelim));
3185
3186
gen = pf_sourcelim_enter(srlim);
3187
srlim->pfsrlim_counters.inuse--;
3188
pf_sourcelim_leave(srlim, gen);
3189
pf_source_rele(sr);
3190
3191
list = &sr->pfsr_states;
3192
break;
3193
}
3194
default:
3195
panic("%s: unexpected link type on pfl %p", __func__,
3196
pfl);
3197
}
3198
3199
PF_STATE_LOCK_ASSERT(s);
3200
TAILQ_REMOVE(list, pfl, pfl_link);
3201
free(pfl, M_PF_STATE_LINK);
3202
}
3203
3204
PF_HASHROW_UNLOCK(ih);
3205
3206
pf_detach_state(s);
3207
3208
pf_udp_mapping_release(s->udp_mapping);
3209
3210
/* pf_state_insert() initialises refs to 2 */
3211
return (pf_release_staten(s, 2));
3212
}
3213
3214
struct pf_kstate *
3215
pf_alloc_state(int flags)
3216
{
3217
3218
return (uma_zalloc(V_pf_state_z, flags | M_ZERO));
3219
}
3220
3221
static __inline void
3222
pf_free_match_rules(struct pf_krule_slist *match_rules) {
3223
struct pf_krule_item *ri;
3224
3225
while ((ri = SLIST_FIRST(match_rules))) {
3226
SLIST_REMOVE_HEAD(match_rules, entry);
3227
free(ri, M_PF_RULE_ITEM);
3228
}
3229
}
3230
3231
void
3232
pf_free_state(struct pf_kstate *cur)
3233
{
3234
KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
3235
KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
3236
cur->timeout));
3237
3238
pf_free_match_rules(&(cur->match_rules));
3239
pf_normalize_tcp_cleanup(cur);
3240
uma_zfree(V_pf_state_z, cur);
3241
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1);
3242
}
3243
3244
/*
3245
* Called only from pf_purge_thread(), thus serialized.
3246
*/
3247
static u_int
3248
pf_purge_expired_states(u_int i, int maxcheck)
3249
{
3250
struct pf_idhash *ih;
3251
struct pf_kstate *s;
3252
struct pf_krule_item *mrm;
3253
size_t count __unused;
3254
3255
V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
3256
3257
/*
3258
* Go through hash and unlink states that expire now.
3259
*/
3260
while (maxcheck > 0) {
3261
count = 0;
3262
ih = &V_pf_idhash[i];
3263
3264
/* only take the lock if we expect to do work */
3265
if (!LIST_EMPTY(&ih->states)) {
3266
relock:
3267
PF_HASHROW_LOCK(ih);
3268
LIST_FOREACH(s, &ih->states, entry) {
3269
if (pf_state_expires(s) <= time_uptime) {
3270
V_pf_status.states -=
3271
pf_remove_state(s);
3272
goto relock;
3273
}
3274
s->rule->rule_ref |= PFRULE_REFS;
3275
if (s->nat_rule != NULL)
3276
s->nat_rule->rule_ref |= PFRULE_REFS;
3277
if (s->anchor != NULL)
3278
s->anchor->rule_ref |= PFRULE_REFS;
3279
s->kif->pfik_flags |= PFI_IFLAG_REFS;
3280
SLIST_FOREACH(mrm, &s->match_rules, entry)
3281
mrm->r->rule_ref |= PFRULE_REFS;
3282
if (s->act.rt_kif)
3283
s->act.rt_kif->pfik_flags |= PFI_IFLAG_REFS;
3284
count++;
3285
}
3286
PF_HASHROW_UNLOCK(ih);
3287
}
3288
3289
SDT_PROBE2(pf, purge, state, rowcount, i, count);
3290
3291
/* Return when we hit end of hash. */
3292
if (++i > V_pf_hashmask) {
3293
V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
3294
return (0);
3295
}
3296
3297
maxcheck--;
3298
}
3299
3300
V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
3301
3302
return (i);
3303
}
3304
3305
static void
3306
pf_purge_unlinked_rules(void)
3307
{
3308
struct pf_krulequeue tmpq;
3309
struct pf_krule *r, *r1;
3310
3311
/*
3312
* If we have overloading task pending, then we'd
3313
* better skip purging this time. There is a tiny
3314
* probability that overloading task references
3315
* an already unlinked rule.
3316
*/
3317
PF_OVERLOADQ_LOCK();
3318
if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
3319
PF_OVERLOADQ_UNLOCK();
3320
return;
3321
}
3322
PF_OVERLOADQ_UNLOCK();
3323
3324
/*
3325
* Do naive mark-and-sweep garbage collecting of old rules.
3326
* Reference flag is raised by pf_purge_expired_states()
3327
* and pf_purge_expired_src_nodes().
3328
*
3329
* To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
3330
* use a temporary queue.
3331
*/
3332
TAILQ_INIT(&tmpq);
3333
PF_UNLNKDRULES_LOCK();
3334
TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
3335
if (!(r->rule_ref & PFRULE_REFS)) {
3336
TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
3337
TAILQ_INSERT_TAIL(&tmpq, r, entries);
3338
} else
3339
r->rule_ref &= ~PFRULE_REFS;
3340
}
3341
PF_UNLNKDRULES_UNLOCK();
3342
3343
if (!TAILQ_EMPTY(&tmpq)) {
3344
PF_CONFIG_LOCK();
3345
PF_RULES_WLOCK();
3346
TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
3347
TAILQ_REMOVE(&tmpq, r, entries);
3348
pf_free_rule(r);
3349
}
3350
PF_RULES_WUNLOCK();
3351
PF_CONFIG_UNLOCK();
3352
}
3353
}
3354
3355
void
3356
pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
3357
{
3358
switch (af) {
3359
#ifdef INET
3360
case AF_INET: {
3361
u_int32_t a = ntohl(addr->addr32[0]);
3362
printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
3363
(a>>8)&255, a&255);
3364
if (p) {
3365
p = ntohs(p);
3366
printf(":%u", p);
3367
}
3368
break;
3369
}
3370
#endif /* INET */
3371
#ifdef INET6
3372
case AF_INET6: {
3373
u_int16_t b;
3374
u_int8_t i, curstart, curend, maxstart, maxend;
3375
curstart = curend = maxstart = maxend = 255;
3376
for (i = 0; i < 8; i++) {
3377
if (!addr->addr16[i]) {
3378
if (curstart == 255)
3379
curstart = i;
3380
curend = i;
3381
} else {
3382
if ((curend - curstart) >
3383
(maxend - maxstart)) {
3384
maxstart = curstart;
3385
maxend = curend;
3386
}
3387
curstart = curend = 255;
3388
}
3389
}
3390
if ((curend - curstart) >
3391
(maxend - maxstart)) {
3392
maxstart = curstart;
3393
maxend = curend;
3394
}
3395
for (i = 0; i < 8; i++) {
3396
if (i >= maxstart && i <= maxend) {
3397
if (i == 0)
3398
printf(":");
3399
if (i == maxend)
3400
printf(":");
3401
} else {
3402
b = ntohs(addr->addr16[i]);
3403
printf("%x", b);
3404
if (i < 7)
3405
printf(":");
3406
}
3407
}
3408
if (p) {
3409
p = ntohs(p);
3410
printf("[%u]", p);
3411
}
3412
break;
3413
}
3414
#endif /* INET6 */
3415
default:
3416
unhandled_af(af);
3417
}
3418
}
3419
3420
void
3421
pf_print_state(struct pf_kstate *s)
3422
{
3423
pf_print_state_parts(s, NULL, NULL);
3424
}
3425
3426
static void
3427
pf_print_state_parts(struct pf_kstate *s,
3428
struct pf_state_key *skwp, struct pf_state_key *sksp)
3429
{
3430
struct pf_state_key *skw, *sks;
3431
u_int8_t proto, dir;
3432
3433
/* Do our best to fill these, but they're skipped if NULL */
3434
skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
3435
sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
3436
proto = skw ? skw->proto : (sks ? sks->proto : 0);
3437
dir = s ? s->direction : 0;
3438
3439
switch (proto) {
3440
case IPPROTO_IPV4:
3441
printf("IPv4");
3442
break;
3443
case IPPROTO_IPV6:
3444
printf("IPv6");
3445
break;
3446
case IPPROTO_TCP:
3447
printf("TCP");
3448
break;
3449
case IPPROTO_UDP:
3450
printf("UDP");
3451
break;
3452
case IPPROTO_ICMP:
3453
printf("ICMP");
3454
break;
3455
case IPPROTO_ICMPV6:
3456
printf("ICMPv6");
3457
break;
3458
default:
3459
printf("%u", proto);
3460
break;
3461
}
3462
switch (dir) {
3463
case PF_IN:
3464
printf(" in");
3465
break;
3466
case PF_OUT:
3467
printf(" out");
3468
break;
3469
}
3470
if (skw) {
3471
printf(" wire: ");
3472
pf_print_host(&skw->addr[0], skw->port[0], skw->af);
3473
printf(" ");
3474
pf_print_host(&skw->addr[1], skw->port[1], skw->af);
3475
}
3476
if (sks) {
3477
printf(" stack: ");
3478
if (sks != skw) {
3479
pf_print_host(&sks->addr[0], sks->port[0], sks->af);
3480
printf(" ");
3481
pf_print_host(&sks->addr[1], sks->port[1], sks->af);
3482
} else
3483
printf("-");
3484
}
3485
if (s) {
3486
if (proto == IPPROTO_TCP) {
3487
printf(" [lo=%u high=%u win=%u modulator=%u",
3488
s->src.seqlo, s->src.seqhi,
3489
s->src.max_win, s->src.seqdiff);
3490
if (s->src.wscale && s->dst.wscale)
3491
printf(" wscale=%u",
3492
s->src.wscale & PF_WSCALE_MASK);
3493
printf("]");
3494
printf(" [lo=%u high=%u win=%u modulator=%u",
3495
s->dst.seqlo, s->dst.seqhi,
3496
s->dst.max_win, s->dst.seqdiff);
3497
if (s->src.wscale && s->dst.wscale)
3498
printf(" wscale=%u",
3499
s->dst.wscale & PF_WSCALE_MASK);
3500
printf("]");
3501
}
3502
printf(" %u:%u", s->src.state, s->dst.state);
3503
if (s->rule)
3504
printf(" @%d", s->rule->nr);
3505
}
3506
}
3507
3508
void
3509
pf_print_flags(uint16_t f)
3510
{
3511
if (f)
3512
printf(" ");
3513
if (f & TH_FIN)
3514
printf("F");
3515
if (f & TH_SYN)
3516
printf("S");
3517
if (f & TH_RST)
3518
printf("R");
3519
if (f & TH_PUSH)
3520
printf("P");
3521
if (f & TH_ACK)
3522
printf("A");
3523
if (f & TH_URG)
3524
printf("U");
3525
if (f & TH_ECE)
3526
printf("E");
3527
if (f & TH_CWR)
3528
printf("W");
3529
if (f & TH_AE)
3530
printf("e");
3531
}
3532
3533
#define PF_SET_SKIP_STEPS(i) \
3534
do { \
3535
while (head[i] != cur) { \
3536
head[i]->skip[i] = cur; \
3537
head[i] = TAILQ_NEXT(head[i], entries); \
3538
} \
3539
} while (0)
3540
3541
void
3542
pf_calc_skip_steps(struct pf_krulequeue *rules)
3543
{
3544
struct pf_krule *cur, *prev, *head[PF_SKIP_COUNT];
3545
int i;
3546
3547
cur = TAILQ_FIRST(rules);
3548
prev = cur;
3549
for (i = 0; i < PF_SKIP_COUNT; ++i)
3550
head[i] = cur;
3551
while (cur != NULL) {
3552
if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
3553
PF_SET_SKIP_STEPS(PF_SKIP_IFP);
3554
if (cur->direction != prev->direction)
3555
PF_SET_SKIP_STEPS(PF_SKIP_DIR);
3556
if (cur->af != prev->af)
3557
PF_SET_SKIP_STEPS(PF_SKIP_AF);
3558
if (cur->proto != prev->proto)
3559
PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
3560
if (cur->src.neg != prev->src.neg ||
3561
pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
3562
PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
3563
if (cur->dst.neg != prev->dst.neg ||
3564
pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
3565
PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
3566
if (cur->src.port[0] != prev->src.port[0] ||
3567
cur->src.port[1] != prev->src.port[1] ||
3568
cur->src.port_op != prev->src.port_op)
3569
PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
3570
if (cur->dst.port[0] != prev->dst.port[0] ||
3571
cur->dst.port[1] != prev->dst.port[1] ||
3572
cur->dst.port_op != prev->dst.port_op)
3573
PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
3574
3575
prev = cur;
3576
cur = TAILQ_NEXT(cur, entries);
3577
}
3578
for (i = 0; i < PF_SKIP_COUNT; ++i)
3579
PF_SET_SKIP_STEPS(i);
3580
}
3581
3582
int
3583
pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
3584
{
3585
if (aw1->type != aw2->type)
3586
return (1);
3587
switch (aw1->type) {
3588
case PF_ADDR_ADDRMASK:
3589
case PF_ADDR_RANGE:
3590
if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
3591
return (1);
3592
if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
3593
return (1);
3594
return (0);
3595
case PF_ADDR_DYNIFTL:
3596
return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
3597
case PF_ADDR_NONE:
3598
case PF_ADDR_NOROUTE:
3599
case PF_ADDR_URPFFAILED:
3600
return (0);
3601
case PF_ADDR_TABLE:
3602
return (aw1->p.tbl != aw2->p.tbl);
3603
default:
3604
printf("invalid address type: %d\n", aw1->type);
3605
return (1);
3606
}
3607
}
3608
3609
/**
3610
* Checksum updates are a little complicated because the checksum in the TCP/UDP
3611
* header isn't always a full checksum. In some cases (i.e. output) it's a
3612
* pseudo-header checksum, which is a partial checksum over src/dst IP
3613
* addresses, protocol number and length.
3614
*
3615
* That means we have the following cases:
3616
* * Input or forwarding: we don't have TSO, the checksum fields are full
3617
* checksums, we need to update the checksum whenever we change anything.
3618
* * Output (i.e. the checksum is a pseudo-header checksum):
3619
* x The field being updated is src/dst address or affects the length of
3620
* the packet. We need to update the pseudo-header checksum (note that this
3621
* checksum is not ones' complement).
3622
* x Some other field is being modified (e.g. src/dst port numbers): We
3623
* don't have to update anything.
3624
**/
3625
u_int16_t
3626
pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
3627
{
3628
u_int32_t x;
3629
3630
x = cksum + old - new;
3631
x = (x + (x >> 16)) & 0xffff;
3632
3633
/* optimise: eliminate a branch when not udp */
3634
if (udp && cksum == 0x0000)
3635
return cksum;
3636
if (udp && x == 0x0000)
3637
x = 0xffff;
3638
3639
return (u_int16_t)(x);
3640
}
3641
3642
static int
3643
pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi)
3644
{
3645
int rewrite = 0;
3646
3647
if (*f != v) {
3648
uint16_t old = htons(hi ? (*f << 8) : *f);
3649
uint16_t new = htons(hi ? ( v << 8) : v);
3650
3651
*f = v;
3652
3653
if (! (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA |
3654
CSUM_DELAY_DATA_IPV6)))
3655
*pd->pcksum = pf_cksum_fixup(*pd->pcksum, old, new,
3656
pd->proto == IPPROTO_UDP);
3657
3658
rewrite = 1;
3659
}
3660
3661
return (rewrite);
3662
}
3663
3664
int
3665
pf_patch_16(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi)
3666
{
3667
int rewrite = 0;
3668
u_int8_t *fb = (u_int8_t *)f;
3669
u_int8_t *vb = (u_int8_t *)&v;
3670
3671
rewrite += pf_patch_8(pd, fb++, *vb++, hi);
3672
rewrite += pf_patch_8(pd, fb++, *vb++, !hi);
3673
3674
return (rewrite);
3675
}
3676
3677
int
3678
pf_patch_32(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi)
3679
{
3680
int rewrite = 0;
3681
u_int8_t *fb = (u_int8_t *)f;
3682
u_int8_t *vb = (u_int8_t *)&v;
3683
3684
rewrite += pf_patch_8(pd, fb++, *vb++, hi);
3685
rewrite += pf_patch_8(pd, fb++, *vb++, !hi);
3686
rewrite += pf_patch_8(pd, fb++, *vb++, hi);
3687
rewrite += pf_patch_8(pd, fb++, *vb++, !hi);
3688
3689
return (rewrite);
3690
}
3691
3692
u_int16_t
3693
pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
3694
u_int16_t new, u_int8_t udp)
3695
{
3696
if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
3697
return (cksum);
3698
3699
return (pf_cksum_fixup(cksum, old, new, udp));
3700
}
3701
3702
static void
3703
pf_change_ap(struct pf_pdesc *pd, struct pf_addr *a, u_int16_t *p,
3704
struct pf_addr *an, u_int16_t pn)
3705
{
3706
struct pf_addr ao;
3707
u_int16_t po;
3708
uint8_t u = pd->virtual_proto == IPPROTO_UDP;
3709
3710
MPASS(pd->pcksum != NULL);
3711
if (pd->af == AF_INET) {
3712
MPASS(pd->ip_sum);
3713
}
3714
3715
pf_addrcpy(&ao, a, pd->af);
3716
if (pd->af == pd->naf)
3717
pf_addrcpy(a, an, pd->af);
3718
3719
if (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
3720
*pd->pcksum = ~*pd->pcksum;
3721
3722
if (p == NULL) /* no port -> done. no cksum to worry about. */
3723
return;
3724
po = *p;
3725
*p = pn;
3726
3727
switch (pd->af) {
3728
#ifdef INET
3729
case AF_INET:
3730
switch (pd->naf) {
3731
case AF_INET:
3732
*pd->ip_sum = pf_cksum_fixup(pf_cksum_fixup(*pd->ip_sum,
3733
ao.addr16[0], an->addr16[0], 0),
3734
ao.addr16[1], an->addr16[1], 0);
3735
*p = pn;
3736
3737
*pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum,
3738
ao.addr16[0], an->addr16[0], u),
3739
ao.addr16[1], an->addr16[1], u);
3740
3741
*pd->pcksum = pf_proto_cksum_fixup(pd->m, *pd->pcksum, po, pn, u);
3742
break;
3743
#ifdef INET6
3744
case AF_INET6:
3745
*pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3746
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3747
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum,
3748
ao.addr16[0], an->addr16[0], u),
3749
ao.addr16[1], an->addr16[1], u),
3750
0, an->addr16[2], u),
3751
0, an->addr16[3], u),
3752
0, an->addr16[4], u),
3753
0, an->addr16[5], u),
3754
0, an->addr16[6], u),
3755
0, an->addr16[7], u),
3756
po, pn, u);
3757
break;
3758
#endif /* INET6 */
3759
default:
3760
unhandled_af(pd->naf);
3761
}
3762
break;
3763
#endif /* INET */
3764
#ifdef INET6
3765
case AF_INET6:
3766
switch (pd->naf) {
3767
#ifdef INET
3768
case AF_INET:
3769
*pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3770
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3771
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum,
3772
ao.addr16[0], an->addr16[0], u),
3773
ao.addr16[1], an->addr16[1], u),
3774
ao.addr16[2], 0, u),
3775
ao.addr16[3], 0, u),
3776
ao.addr16[4], 0, u),
3777
ao.addr16[5], 0, u),
3778
ao.addr16[6], 0, u),
3779
ao.addr16[7], 0, u),
3780
po, pn, u);
3781
break;
3782
#endif /* INET */
3783
case AF_INET6:
3784
*pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3785
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3786
pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum,
3787
ao.addr16[0], an->addr16[0], u),
3788
ao.addr16[1], an->addr16[1], u),
3789
ao.addr16[2], an->addr16[2], u),
3790
ao.addr16[3], an->addr16[3], u),
3791
ao.addr16[4], an->addr16[4], u),
3792
ao.addr16[5], an->addr16[5], u),
3793
ao.addr16[6], an->addr16[6], u),
3794
ao.addr16[7], an->addr16[7], u);
3795
3796
*pd->pcksum = pf_proto_cksum_fixup(pd->m, *pd->pcksum, po, pn, u);
3797
break;
3798
default:
3799
unhandled_af(pd->naf);
3800
}
3801
break;
3802
#endif /* INET6 */
3803
default:
3804
unhandled_af(pd->af);
3805
}
3806
3807
if (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA |
3808
CSUM_DELAY_DATA_IPV6)) {
3809
*pd->pcksum = ~*pd->pcksum;
3810
if (! *pd->pcksum)
3811
*pd->pcksum = 0xffff;
3812
}
3813
}
3814
3815
/* Changes a u_int32_t. Uses a void * so there are no align restrictions */
3816
void
3817
pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
3818
{
3819
u_int32_t ao;
3820
3821
memcpy(&ao, a, sizeof(ao));
3822
memcpy(a, &an, sizeof(u_int32_t));
3823
*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
3824
ao % 65536, an % 65536, u);
3825
}
3826
3827
void
3828
pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
3829
{
3830
u_int32_t ao;
3831
3832
memcpy(&ao, a, sizeof(ao));
3833
memcpy(a, &an, sizeof(u_int32_t));
3834
3835
*c = pf_proto_cksum_fixup(m,
3836
pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
3837
ao % 65536, an % 65536, udp);
3838
}
3839
3840
#ifdef INET6
3841
static void
3842
pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
3843
{
3844
struct pf_addr ao;
3845
3846
pf_addrcpy(&ao, a, AF_INET6);
3847
pf_addrcpy(a, an, AF_INET6);
3848
3849
*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3850
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3851
pf_cksum_fixup(pf_cksum_fixup(*c,
3852
ao.addr16[0], an->addr16[0], u),
3853
ao.addr16[1], an->addr16[1], u),
3854
ao.addr16[2], an->addr16[2], u),
3855
ao.addr16[3], an->addr16[3], u),
3856
ao.addr16[4], an->addr16[4], u),
3857
ao.addr16[5], an->addr16[5], u),
3858
ao.addr16[6], an->addr16[6], u),
3859
ao.addr16[7], an->addr16[7], u);
3860
}
3861
#endif /* INET6 */
3862
3863
static void
3864
pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
3865
struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
3866
u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
3867
{
3868
struct pf_addr oia, ooa;
3869
3870
pf_addrcpy(&oia, ia, af);
3871
if (oa)
3872
pf_addrcpy(&ooa, oa, af);
3873
3874
/* Change inner protocol port, fix inner protocol checksum. */
3875
if (ip != NULL) {
3876
u_int16_t oip = *ip;
3877
u_int16_t opc;
3878
3879
if (pc != NULL)
3880
opc = *pc;
3881
*ip = np;
3882
if (pc != NULL)
3883
*pc = pf_cksum_fixup(*pc, oip, *ip, u);
3884
*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
3885
if (pc != NULL)
3886
*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
3887
}
3888
/* Change inner ip address, fix inner ip and icmp checksums. */
3889
pf_addrcpy(ia, na, af);
3890
switch (af) {
3891
#ifdef INET
3892
case AF_INET: {
3893
u_int16_t oh2c = *h2c;
3894
3895
*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
3896
oia.addr16[0], ia->addr16[0], 0),
3897
oia.addr16[1], ia->addr16[1], 0);
3898
*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
3899
oia.addr16[0], ia->addr16[0], 0),
3900
oia.addr16[1], ia->addr16[1], 0);
3901
*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
3902
break;
3903
}
3904
#endif /* INET */
3905
#ifdef INET6
3906
case AF_INET6:
3907
*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3908
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3909
pf_cksum_fixup(pf_cksum_fixup(*ic,
3910
oia.addr16[0], ia->addr16[0], u),
3911
oia.addr16[1], ia->addr16[1], u),
3912
oia.addr16[2], ia->addr16[2], u),
3913
oia.addr16[3], ia->addr16[3], u),
3914
oia.addr16[4], ia->addr16[4], u),
3915
oia.addr16[5], ia->addr16[5], u),
3916
oia.addr16[6], ia->addr16[6], u),
3917
oia.addr16[7], ia->addr16[7], u);
3918
break;
3919
#endif /* INET6 */
3920
}
3921
/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
3922
if (oa) {
3923
pf_addrcpy(oa, na, af);
3924
switch (af) {
3925
#ifdef INET
3926
case AF_INET:
3927
*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
3928
ooa.addr16[0], oa->addr16[0], 0),
3929
ooa.addr16[1], oa->addr16[1], 0);
3930
break;
3931
#endif /* INET */
3932
#ifdef INET6
3933
case AF_INET6:
3934
*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3935
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3936
pf_cksum_fixup(pf_cksum_fixup(*ic,
3937
ooa.addr16[0], oa->addr16[0], u),
3938
ooa.addr16[1], oa->addr16[1], u),
3939
ooa.addr16[2], oa->addr16[2], u),
3940
ooa.addr16[3], oa->addr16[3], u),
3941
ooa.addr16[4], oa->addr16[4], u),
3942
ooa.addr16[5], oa->addr16[5], u),
3943
ooa.addr16[6], oa->addr16[6], u),
3944
ooa.addr16[7], oa->addr16[7], u);
3945
break;
3946
#endif /* INET6 */
3947
}
3948
}
3949
}
3950
3951
static int
3952
pf_translate_af(struct pf_pdesc *pd, struct pf_krule *r)
3953
{
3954
#if defined(INET) && defined(INET6)
3955
struct mbuf *mp;
3956
struct ip *ip4;
3957
struct ip6_hdr *ip6;
3958
struct icmp6_hdr *icmp;
3959
struct m_tag *mtag;
3960
struct pf_fragment_tag *ftag;
3961
int hlen;
3962
3963
if (pd->ttl == 1) {
3964
/* We'd generate an ICMP error. Do so now rather than after af translation. */
3965
if (pd->af == AF_INET) {
3966
pf_send_icmp(pd->m, ICMP_TIMXCEED,
3967
ICMP_TIMXCEED_INTRANS, 0, pd->af, r,
3968
pd->act.rtableid);
3969
} else {
3970
pf_send_icmp(pd->m, ICMP6_TIME_EXCEEDED,
3971
ICMP6_TIME_EXCEED_TRANSIT, 0, pd->af, r,
3972
pd->act.rtableid);
3973
}
3974
3975
return (-1);
3976
}
3977
3978
hlen = pd->naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
3979
3980
/* trim the old header */
3981
m_adj(pd->m, pd->off);
3982
3983
/* prepend a new one */
3984
M_PREPEND(pd->m, hlen, M_NOWAIT);
3985
if (pd->m == NULL)
3986
return (-1);
3987
3988
switch (pd->naf) {
3989
case AF_INET:
3990
ip4 = mtod(pd->m, struct ip *);
3991
bzero(ip4, hlen);
3992
ip4->ip_v = IPVERSION;
3993
ip4->ip_hl = hlen >> 2;
3994
ip4->ip_tos = pd->tos;
3995
ip4->ip_len = htons(hlen + (pd->tot_len - pd->off));
3996
ip_fillid(ip4, V_ip_random_id);
3997
ip4->ip_ttl = pd->ttl;
3998
ip4->ip_p = pd->proto;
3999
ip4->ip_src = pd->nsaddr.v4;
4000
ip4->ip_dst = pd->ndaddr.v4;
4001
pd->src = (struct pf_addr *)&ip4->ip_src;
4002
pd->dst = (struct pf_addr *)&ip4->ip_dst;
4003
pd->off = sizeof(struct ip);
4004
if (pd->m->m_pkthdr.csum_flags & CSUM_TCP_IPV6) {
4005
pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP_IPV6;
4006
pd->m->m_pkthdr.csum_flags |= CSUM_TCP;
4007
}
4008
if (pd->m->m_pkthdr.csum_flags & CSUM_UDP_IPV6) {
4009
pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP_IPV6;
4010
pd->m->m_pkthdr.csum_flags |= CSUM_UDP;
4011
}
4012
if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
4013
pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
4014
pd->m->m_pkthdr.csum_flags |= CSUM_SCTP;
4015
}
4016
break;
4017
case AF_INET6:
4018
ip6 = mtod(pd->m, struct ip6_hdr *);
4019
bzero(ip6, hlen);
4020
ip6->ip6_vfc = IPV6_VERSION;
4021
ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20);
4022
ip6->ip6_plen = htons(pd->tot_len - pd->off);
4023
ip6->ip6_nxt = pd->proto;
4024
if (!pd->ttl || pd->ttl > IPV6_DEFHLIM)
4025
ip6->ip6_hlim = IPV6_DEFHLIM;
4026
else
4027
ip6->ip6_hlim = pd->ttl;
4028
ip6->ip6_src = pd->nsaddr.v6;
4029
ip6->ip6_dst = pd->ndaddr.v6;
4030
pd->src = (struct pf_addr *)&ip6->ip6_src;
4031
pd->dst = (struct pf_addr *)&ip6->ip6_dst;
4032
pd->off = sizeof(struct ip6_hdr);
4033
if (pd->m->m_pkthdr.csum_flags & CSUM_TCP) {
4034
pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP;
4035
pd->m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
4036
}
4037
if (pd->m->m_pkthdr.csum_flags & CSUM_UDP) {
4038
pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP;
4039
pd->m->m_pkthdr.csum_flags |= CSUM_UDP_IPV6;
4040
}
4041
if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP) {
4042
pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
4043
pd->m->m_pkthdr.csum_flags |= CSUM_SCTP_IPV6;
4044
}
4045
4046
/*
4047
* If we're dealing with a reassembled packet we need to adjust
4048
* the header length from the IPv4 header size to IPv6 header
4049
* size.
4050
*/
4051
mtag = m_tag_find(pd->m, PACKET_TAG_PF_REASSEMBLED, NULL);
4052
if (mtag) {
4053
ftag = (struct pf_fragment_tag *)(mtag + 1);
4054
ftag->ft_hdrlen = sizeof(*ip6);
4055
ftag->ft_maxlen -= sizeof(struct ip6_hdr) -
4056
sizeof(struct ip) + sizeof(struct ip6_frag);
4057
}
4058
break;
4059
default:
4060
return (-1);
4061
}
4062
4063
/* recalculate icmp/icmp6 checksums */
4064
if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) {
4065
int off;
4066
if ((mp = m_pulldown(pd->m, hlen, sizeof(*icmp), &off)) ==
4067
NULL) {
4068
pd->m = NULL;
4069
return (-1);
4070
}
4071
icmp = (struct icmp6_hdr *)(mp->m_data + off);
4072
icmp->icmp6_cksum = 0;
4073
icmp->icmp6_cksum = pd->naf == AF_INET ?
4074
in4_cksum(pd->m, 0, hlen, ntohs(ip4->ip_len) - hlen) :
4075
in6_cksum(pd->m, IPPROTO_ICMPV6, hlen,
4076
ntohs(ip6->ip6_plen));
4077
}
4078
#endif /* INET && INET6 */
4079
4080
return (0);
4081
}
4082
4083
int
4084
pf_change_icmp_af(struct mbuf *m, int off, struct pf_pdesc *pd,
4085
struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst,
4086
sa_family_t af, sa_family_t naf)
4087
{
4088
#if defined(INET) && defined(INET6)
4089
struct mbuf *n = NULL;
4090
struct ip *ip4;
4091
struct ip6_hdr *ip6;
4092
int hlen, olen, mlen;
4093
4094
if (af == naf || (af != AF_INET && af != AF_INET6) ||
4095
(naf != AF_INET && naf != AF_INET6))
4096
return (-1);
4097
4098
/* split the mbuf chain on the inner ip/ip6 header boundary */
4099
if ((n = m_split(m, off, M_NOWAIT)) == NULL)
4100
return (-1);
4101
4102
/* old header */
4103
olen = pd2->off - off;
4104
/* new header */
4105
hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
4106
4107
/* trim old header */
4108
m_adj(n, olen);
4109
4110
/* prepend a new one */
4111
M_PREPEND(n, hlen, M_NOWAIT);
4112
if (n == NULL)
4113
return (-1);
4114
4115
/* translate inner ip/ip6 header */
4116
switch (naf) {
4117
case AF_INET:
4118
ip4 = mtod(n, struct ip *);
4119
bzero(ip4, sizeof(*ip4));
4120
ip4->ip_v = IPVERSION;
4121
ip4->ip_hl = sizeof(*ip4) >> 2;
4122
ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen);
4123
ip_fillid(ip4, V_ip_random_id);
4124
ip4->ip_off = htons(IP_DF);
4125
ip4->ip_ttl = pd2->ttl;
4126
if (pd2->proto == IPPROTO_ICMPV6)
4127
ip4->ip_p = IPPROTO_ICMP;
4128
else
4129
ip4->ip_p = pd2->proto;
4130
ip4->ip_src = src->v4;
4131
ip4->ip_dst = dst->v4;
4132
ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2);
4133
break;
4134
case AF_INET6:
4135
ip6 = mtod(n, struct ip6_hdr *);
4136
bzero(ip6, sizeof(*ip6));
4137
ip6->ip6_vfc = IPV6_VERSION;
4138
ip6->ip6_plen = htons(pd2->tot_len - olen);
4139
if (pd2->proto == IPPROTO_ICMP)
4140
ip6->ip6_nxt = IPPROTO_ICMPV6;
4141
else
4142
ip6->ip6_nxt = pd2->proto;
4143
if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM)
4144
ip6->ip6_hlim = IPV6_DEFHLIM;
4145
else
4146
ip6->ip6_hlim = pd2->ttl;
4147
ip6->ip6_src = src->v6;
4148
ip6->ip6_dst = dst->v6;
4149
break;
4150
default:
4151
unhandled_af(naf);
4152
}
4153
4154
/* adjust payload offset and total packet length */
4155
pd2->off += hlen - olen;
4156
pd->tot_len += hlen - olen;
4157
4158
/* merge modified inner packet with the original header */
4159
mlen = n->m_pkthdr.len;
4160
m_cat(m, n);
4161
m->m_pkthdr.len += mlen;
4162
#endif /* INET && INET6 */
4163
4164
return (0);
4165
}
4166
4167
#define PTR_IP(field) (offsetof(struct ip, field))
4168
#define PTR_IP6(field) (offsetof(struct ip6_hdr, field))
4169
4170
int
4171
pf_translate_icmp_af(int af, void *arg)
4172
{
4173
#if defined(INET) && defined(INET6)
4174
struct icmp *icmp4;
4175
struct icmp6_hdr *icmp6;
4176
u_int32_t mtu;
4177
int32_t ptr = -1;
4178
u_int8_t type;
4179
u_int8_t code;
4180
4181
switch (af) {
4182
case AF_INET:
4183
icmp6 = arg;
4184
type = icmp6->icmp6_type;
4185
code = icmp6->icmp6_code;
4186
mtu = ntohl(icmp6->icmp6_mtu);
4187
4188
switch (type) {
4189
case ICMP6_ECHO_REQUEST:
4190
type = ICMP_ECHO;
4191
break;
4192
case ICMP6_ECHO_REPLY:
4193
type = ICMP_ECHOREPLY;
4194
break;
4195
case ICMP6_DST_UNREACH:
4196
type = ICMP_UNREACH;
4197
switch (code) {
4198
case ICMP6_DST_UNREACH_NOROUTE:
4199
case ICMP6_DST_UNREACH_BEYONDSCOPE:
4200
case ICMP6_DST_UNREACH_ADDR:
4201
code = ICMP_UNREACH_HOST;
4202
break;
4203
case ICMP6_DST_UNREACH_ADMIN:
4204
code = ICMP_UNREACH_HOST_PROHIB;
4205
break;
4206
case ICMP6_DST_UNREACH_NOPORT:
4207
code = ICMP_UNREACH_PORT;
4208
break;
4209
default:
4210
return (-1);
4211
}
4212
break;
4213
case ICMP6_PACKET_TOO_BIG:
4214
type = ICMP_UNREACH;
4215
code = ICMP_UNREACH_NEEDFRAG;
4216
mtu -= 20;
4217
break;
4218
case ICMP6_TIME_EXCEEDED:
4219
type = ICMP_TIMXCEED;
4220
break;
4221
case ICMP6_PARAM_PROB:
4222
switch (code) {
4223
case ICMP6_PARAMPROB_HEADER:
4224
type = ICMP_PARAMPROB;
4225
code = ICMP_PARAMPROB_ERRATPTR;
4226
ptr = ntohl(icmp6->icmp6_pptr);
4227
4228
if (ptr == PTR_IP6(ip6_vfc))
4229
; /* preserve */
4230
else if (ptr == PTR_IP6(ip6_vfc) + 1)
4231
ptr = PTR_IP(ip_tos);
4232
else if (ptr == PTR_IP6(ip6_plen) ||
4233
ptr == PTR_IP6(ip6_plen) + 1)
4234
ptr = PTR_IP(ip_len);
4235
else if (ptr == PTR_IP6(ip6_nxt))
4236
ptr = PTR_IP(ip_p);
4237
else if (ptr == PTR_IP6(ip6_hlim))
4238
ptr = PTR_IP(ip_ttl);
4239
else if (ptr >= PTR_IP6(ip6_src) &&
4240
ptr < PTR_IP6(ip6_dst))
4241
ptr = PTR_IP(ip_src);
4242
else if (ptr >= PTR_IP6(ip6_dst) &&
4243
ptr < sizeof(struct ip6_hdr))
4244
ptr = PTR_IP(ip_dst);
4245
else {
4246
return (-1);
4247
}
4248
break;
4249
case ICMP6_PARAMPROB_NEXTHEADER:
4250
type = ICMP_UNREACH;
4251
code = ICMP_UNREACH_PROTOCOL;
4252
break;
4253
default:
4254
return (-1);
4255
}
4256
break;
4257
default:
4258
return (-1);
4259
}
4260
if (icmp6->icmp6_type != type) {
4261
icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
4262
icmp6->icmp6_type, type, 0);
4263
icmp6->icmp6_type = type;
4264
}
4265
if (icmp6->icmp6_code != code) {
4266
icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
4267
icmp6->icmp6_code, code, 0);
4268
icmp6->icmp6_code = code;
4269
}
4270
if (icmp6->icmp6_mtu != htonl(mtu)) {
4271
icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
4272
htons(ntohl(icmp6->icmp6_mtu)), htons(mtu), 0);
4273
/* aligns well with a icmpv4 nextmtu */
4274
icmp6->icmp6_mtu = htonl(mtu);
4275
}
4276
if (ptr >= 0 && icmp6->icmp6_pptr != htonl(ptr)) {
4277
icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
4278
htons(ntohl(icmp6->icmp6_pptr)), htons(ptr), 0);
4279
/* icmpv4 pptr is a one most significant byte */
4280
icmp6->icmp6_pptr = htonl(ptr << 24);
4281
}
4282
break;
4283
case AF_INET6:
4284
icmp4 = arg;
4285
type = icmp4->icmp_type;
4286
code = icmp4->icmp_code;
4287
mtu = ntohs(icmp4->icmp_nextmtu);
4288
4289
switch (type) {
4290
case ICMP_ECHO:
4291
type = ICMP6_ECHO_REQUEST;
4292
break;
4293
case ICMP_ECHOREPLY:
4294
type = ICMP6_ECHO_REPLY;
4295
break;
4296
case ICMP_UNREACH:
4297
type = ICMP6_DST_UNREACH;
4298
switch (code) {
4299
case ICMP_UNREACH_NET:
4300
case ICMP_UNREACH_HOST:
4301
case ICMP_UNREACH_NET_UNKNOWN:
4302
case ICMP_UNREACH_HOST_UNKNOWN:
4303
case ICMP_UNREACH_ISOLATED:
4304
case ICMP_UNREACH_TOSNET:
4305
case ICMP_UNREACH_TOSHOST:
4306
code = ICMP6_DST_UNREACH_NOROUTE;
4307
break;
4308
case ICMP_UNREACH_PORT:
4309
code = ICMP6_DST_UNREACH_NOPORT;
4310
break;
4311
case ICMP_UNREACH_NET_PROHIB:
4312
case ICMP_UNREACH_HOST_PROHIB:
4313
case ICMP_UNREACH_FILTER_PROHIB:
4314
case ICMP_UNREACH_PRECEDENCE_CUTOFF:
4315
code = ICMP6_DST_UNREACH_ADMIN;
4316
break;
4317
case ICMP_UNREACH_PROTOCOL:
4318
type = ICMP6_PARAM_PROB;
4319
code = ICMP6_PARAMPROB_NEXTHEADER;
4320
ptr = offsetof(struct ip6_hdr, ip6_nxt);
4321
break;
4322
case ICMP_UNREACH_NEEDFRAG:
4323
type = ICMP6_PACKET_TOO_BIG;
4324
code = 0;
4325
mtu += 20;
4326
break;
4327
default:
4328
return (-1);
4329
}
4330
break;
4331
case ICMP_TIMXCEED:
4332
type = ICMP6_TIME_EXCEEDED;
4333
break;
4334
case ICMP_PARAMPROB:
4335
type = ICMP6_PARAM_PROB;
4336
switch (code) {
4337
case ICMP_PARAMPROB_ERRATPTR:
4338
code = ICMP6_PARAMPROB_HEADER;
4339
break;
4340
case ICMP_PARAMPROB_LENGTH:
4341
code = ICMP6_PARAMPROB_HEADER;
4342
break;
4343
default:
4344
return (-1);
4345
}
4346
4347
ptr = icmp4->icmp_pptr;
4348
if (ptr == 0 || ptr == PTR_IP(ip_tos))
4349
; /* preserve */
4350
else if (ptr == PTR_IP(ip_len) ||
4351
ptr == PTR_IP(ip_len) + 1)
4352
ptr = PTR_IP6(ip6_plen);
4353
else if (ptr == PTR_IP(ip_ttl))
4354
ptr = PTR_IP6(ip6_hlim);
4355
else if (ptr == PTR_IP(ip_p))
4356
ptr = PTR_IP6(ip6_nxt);
4357
else if (ptr >= PTR_IP(ip_src) && ptr < PTR_IP(ip_dst))
4358
ptr = PTR_IP6(ip6_src);
4359
else if (ptr >= PTR_IP(ip_dst) &&
4360
ptr < sizeof(struct ip))
4361
ptr = PTR_IP6(ip6_dst);
4362
else {
4363
return (-1);
4364
}
4365
break;
4366
default:
4367
return (-1);
4368
}
4369
if (icmp4->icmp_type != type) {
4370
icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
4371
icmp4->icmp_type, type, 0);
4372
icmp4->icmp_type = type;
4373
}
4374
if (icmp4->icmp_code != code) {
4375
icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
4376
icmp4->icmp_code, code, 0);
4377
icmp4->icmp_code = code;
4378
}
4379
if (icmp4->icmp_nextmtu != htons(mtu)) {
4380
icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
4381
icmp4->icmp_nextmtu, htons(mtu), 0);
4382
icmp4->icmp_nextmtu = htons(mtu);
4383
}
4384
if (ptr >= 0 && icmp4->icmp_void != ptr) {
4385
icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
4386
htons(icmp4->icmp_pptr), htons(ptr), 0);
4387
icmp4->icmp_void = htonl(ptr);
4388
}
4389
break;
4390
default:
4391
unhandled_af(af);
4392
}
4393
#endif /* INET && INET6 */
4394
4395
return (0);
4396
}
4397
4398
/*
4399
* Need to modulate the sequence numbers in the TCP SACK option
4400
* (credits to Krzysztof Pfaff for report and patch)
4401
*/
4402
static int
4403
pf_modulate_sack(struct pf_pdesc *pd, struct tcphdr *th,
4404
struct pf_state_peer *dst)
4405
{
4406
struct sackblk sack;
4407
int copyback = 0, i;
4408
int olen, optsoff;
4409
uint8_t opts[MAX_TCPOPTLEN], *opt, *eoh;
4410
4411
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
4412
optsoff = pd->off + sizeof(struct tcphdr);
4413
#define TCPOLEN_MINSACK (TCPOLEN_SACK + 2)
4414
if (olen < TCPOLEN_MINSACK ||
4415
!pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af))
4416
return (0);
4417
4418
eoh = opts + olen;
4419
opt = opts;
4420
while ((opt = pf_find_tcpopt(opt, opts, olen,
4421
TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL)
4422
{
4423
size_t safelen = MIN(opt[1], (eoh - opt));
4424
for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) {
4425
size_t startoff = (opt + i) - opts;
4426
memcpy(&sack, &opt[i], sizeof(sack));
4427
pf_patch_32(pd, &sack.start,
4428
htonl(ntohl(sack.start) - dst->seqdiff),
4429
PF_ALGNMNT(startoff));
4430
pf_patch_32(pd, &sack.end,
4431
htonl(ntohl(sack.end) - dst->seqdiff),
4432
PF_ALGNMNT(startoff + sizeof(sack.start)));
4433
memcpy(&opt[i], &sack, sizeof(sack));
4434
}
4435
copyback = 1;
4436
opt += opt[1];
4437
}
4438
4439
if (copyback)
4440
m_copyback(pd->m, optsoff, olen, (caddr_t)opts);
4441
4442
return (copyback);
4443
}
4444
4445
struct mbuf *
4446
pf_build_tcp(const struct pf_krule *r, sa_family_t af,
4447
const struct pf_addr *saddr, const struct pf_addr *daddr,
4448
u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
4449
u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl,
4450
int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, u_int sack,
4451
int rtableid, u_short *reason)
4452
{
4453
struct mbuf *m;
4454
int len, tlen;
4455
#ifdef INET
4456
struct ip *h = NULL;
4457
#endif /* INET */
4458
#ifdef INET6
4459
struct ip6_hdr *h6 = NULL;
4460
#endif /* INET6 */
4461
struct tcphdr *th;
4462
char *opt;
4463
struct pf_mtag *pf_mtag;
4464
4465
len = 0;
4466
th = NULL;
4467
4468
/* maximum segment size tcp option */
4469
tlen = sizeof(struct tcphdr);
4470
if (mss)
4471
tlen += 4;
4472
if (sack)
4473
tlen += 2;
4474
4475
switch (af) {
4476
#ifdef INET
4477
case AF_INET:
4478
len = sizeof(struct ip) + tlen;
4479
break;
4480
#endif /* INET */
4481
#ifdef INET6
4482
case AF_INET6:
4483
len = sizeof(struct ip6_hdr) + tlen;
4484
break;
4485
#endif /* INET6 */
4486
default:
4487
unhandled_af(af);
4488
}
4489
4490
m = m_gethdr(M_NOWAIT, MT_DATA);
4491
if (m == NULL) {
4492
REASON_SET(reason, PFRES_MEMORY);
4493
return (NULL);
4494
}
4495
4496
#ifdef MAC
4497
mac_netinet_firewall_send(m);
4498
#endif
4499
if ((pf_mtag = pf_get_mtag(m)) == NULL) {
4500
REASON_SET(reason, PFRES_MEMORY);
4501
m_freem(m);
4502
return (NULL);
4503
}
4504
m->m_flags |= mbuf_flags;
4505
pf_mtag->tag = mtag_tag;
4506
pf_mtag->flags = mtag_flags;
4507
4508
if (rtableid >= 0)
4509
M_SETFIB(m, rtableid);
4510
4511
#ifdef ALTQ
4512
if (r != NULL && r->qid) {
4513
pf_mtag->qid = r->qid;
4514
4515
/* add hints for ecn */
4516
pf_mtag->hdr = mtod(m, struct ip *);
4517
}
4518
#endif /* ALTQ */
4519
m->m_data += max_linkhdr;
4520
m->m_pkthdr.len = m->m_len = len;
4521
/* The rest of the stack assumes a rcvif, so provide one.
4522
* This is a locally generated packet, so .. close enough. */
4523
m->m_pkthdr.rcvif = V_loif;
4524
bzero(m->m_data, len);
4525
switch (af) {
4526
#ifdef INET
4527
case AF_INET:
4528
m->m_pkthdr.csum_flags |= CSUM_TCP;
4529
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
4530
4531
h = mtod(m, struct ip *);
4532
4533
h->ip_p = IPPROTO_TCP;
4534
h->ip_len = htons(tlen);
4535
h->ip_v = 4;
4536
h->ip_hl = sizeof(*h) >> 2;
4537
h->ip_tos = IPTOS_LOWDELAY;
4538
h->ip_len = htons(len);
4539
h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
4540
h->ip_ttl = ttl ? ttl : V_ip_defttl;
4541
h->ip_sum = 0;
4542
h->ip_src.s_addr = saddr->v4.s_addr;
4543
h->ip_dst.s_addr = daddr->v4.s_addr;
4544
4545
th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
4546
th->th_sum = in_pseudo(h->ip_src.s_addr, h->ip_dst.s_addr,
4547
htons(len - sizeof(struct ip) + IPPROTO_TCP));
4548
break;
4549
#endif /* INET */
4550
#ifdef INET6
4551
case AF_INET6:
4552
m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
4553
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
4554
4555
h6 = mtod(m, struct ip6_hdr *);
4556
4557
/* IP header fields included in the TCP checksum */
4558
h6->ip6_nxt = IPPROTO_TCP;
4559
h6->ip6_plen = htons(tlen);
4560
h6->ip6_vfc |= IPV6_VERSION;
4561
h6->ip6_hlim = V_ip6_defhlim;
4562
memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
4563
memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
4564
4565
th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
4566
th->th_sum = in6_cksum_pseudo(h6, len - sizeof(struct ip6_hdr),
4567
IPPROTO_TCP, 0);
4568
break;
4569
#endif /* INET6 */
4570
}
4571
4572
/* TCP header */
4573
th->th_sport = sport;
4574
th->th_dport = dport;
4575
th->th_seq = htonl(seq);
4576
th->th_ack = htonl(ack);
4577
th->th_off = tlen >> 2;
4578
tcp_set_flags(th, tcp_flags);
4579
th->th_win = htons(win);
4580
4581
opt = (char *)(th + 1);
4582
if (mss) {
4583
opt = (char *)(th + 1);
4584
opt[0] = TCPOPT_MAXSEG;
4585
opt[1] = 4;
4586
mss = htons(mss);
4587
memcpy((opt + 2), &mss, 2);
4588
opt += 4;
4589
}
4590
if (sack) {
4591
opt[0] = TCPOPT_SACK_PERMITTED;
4592
opt[1] = 2;
4593
opt += 2;
4594
}
4595
4596
return (m);
4597
}
4598
4599
static void
4600
pf_send_sctp_abort(sa_family_t af, struct pf_pdesc *pd,
4601
uint8_t ttl, int rtableid)
4602
{
4603
struct mbuf *m;
4604
#ifdef INET
4605
struct ip *h = NULL;
4606
#endif /* INET */
4607
#ifdef INET6
4608
struct ip6_hdr *h6 = NULL;
4609
#endif /* INET6 */
4610
struct sctphdr *hdr;
4611
struct sctp_chunkhdr *chunk;
4612
struct pf_send_entry *pfse;
4613
int off = 0;
4614
4615
MPASS(af == pd->af);
4616
4617
m = m_gethdr(M_NOWAIT, MT_DATA);
4618
if (m == NULL)
4619
return;
4620
4621
m->m_data += max_linkhdr;
4622
m->m_flags |= M_SKIP_FIREWALL;
4623
/* The rest of the stack assumes a rcvif, so provide one.
4624
* This is a locally generated packet, so .. close enough. */
4625
m->m_pkthdr.rcvif = V_loif;
4626
4627
/* IPv4|6 header */
4628
switch (af) {
4629
#ifdef INET
4630
case AF_INET:
4631
bzero(m->m_data, sizeof(struct ip) + sizeof(*hdr) + sizeof(*chunk));
4632
4633
h = mtod(m, struct ip *);
4634
4635
/* IP header fields included in the TCP checksum */
4636
4637
h->ip_p = IPPROTO_SCTP;
4638
h->ip_len = htons(sizeof(*h) + sizeof(*hdr) + sizeof(*chunk));
4639
h->ip_ttl = ttl ? ttl : V_ip_defttl;
4640
h->ip_src = pd->dst->v4;
4641
h->ip_dst = pd->src->v4;
4642
4643
off += sizeof(struct ip);
4644
break;
4645
#endif /* INET */
4646
#ifdef INET6
4647
case AF_INET6:
4648
bzero(m->m_data, sizeof(struct ip6_hdr) + sizeof(*hdr) + sizeof(*chunk));
4649
4650
h6 = mtod(m, struct ip6_hdr *);
4651
4652
/* IP header fields included in the TCP checksum */
4653
h6->ip6_vfc |= IPV6_VERSION;
4654
h6->ip6_nxt = IPPROTO_SCTP;
4655
h6->ip6_plen = htons(sizeof(*h6) + sizeof(*hdr) + sizeof(*chunk));
4656
h6->ip6_hlim = ttl ? ttl : V_ip6_defhlim;
4657
memcpy(&h6->ip6_src, &pd->dst->v6, sizeof(struct in6_addr));
4658
memcpy(&h6->ip6_dst, &pd->src->v6, sizeof(struct in6_addr));
4659
4660
off += sizeof(struct ip6_hdr);
4661
break;
4662
#endif /* INET6 */
4663
default:
4664
unhandled_af(af);
4665
}
4666
4667
/* SCTP header */
4668
hdr = mtodo(m, off);
4669
4670
hdr->src_port = pd->hdr.sctp.dest_port;
4671
hdr->dest_port = pd->hdr.sctp.src_port;
4672
hdr->v_tag = pd->sctp_initiate_tag;
4673
hdr->checksum = 0;
4674
4675
/* Abort chunk. */
4676
off += sizeof(struct sctphdr);
4677
chunk = mtodo(m, off);
4678
4679
chunk->chunk_type = SCTP_ABORT_ASSOCIATION;
4680
chunk->chunk_length = htons(sizeof(*chunk));
4681
4682
/* SCTP checksum */
4683
off += sizeof(*chunk);
4684
m->m_pkthdr.len = m->m_len = off;
4685
4686
pf_sctp_checksum(m, off - sizeof(*hdr) - sizeof(*chunk));
4687
4688
if (rtableid >= 0)
4689
M_SETFIB(m, rtableid);
4690
4691
/* Allocate outgoing queue entry, mbuf and mbuf tag. */
4692
pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
4693
if (pfse == NULL) {
4694
m_freem(m);
4695
return;
4696
}
4697
4698
switch (af) {
4699
#ifdef INET
4700
case AF_INET:
4701
pfse->pfse_type = PFSE_IP;
4702
break;
4703
#endif /* INET */
4704
#ifdef INET6
4705
case AF_INET6:
4706
pfse->pfse_type = PFSE_IP6;
4707
break;
4708
#endif /* INET6 */
4709
}
4710
4711
pfse->pfse_m = m;
4712
pf_send(pfse);
4713
}
4714
4715
void
4716
pf_send_tcp(const struct pf_krule *r, sa_family_t af,
4717
const struct pf_addr *saddr, const struct pf_addr *daddr,
4718
u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
4719
u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl,
4720
int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid,
4721
u_short *reason)
4722
{
4723
struct pf_send_entry *pfse;
4724
struct mbuf *m;
4725
4726
m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, tcp_flags,
4727
win, mss, ttl, mbuf_flags, mtag_tag, mtag_flags, 0, rtableid, reason);
4728
if (m == NULL)
4729
return;
4730
4731
/* Allocate outgoing queue entry, mbuf and mbuf tag. */
4732
pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
4733
if (pfse == NULL) {
4734
m_freem(m);
4735
REASON_SET(reason, PFRES_MEMORY);
4736
return;
4737
}
4738
4739
switch (af) {
4740
#ifdef INET
4741
case AF_INET:
4742
pfse->pfse_type = PFSE_IP;
4743
break;
4744
#endif /* INET */
4745
#ifdef INET6
4746
case AF_INET6:
4747
pfse->pfse_type = PFSE_IP6;
4748
break;
4749
#endif /* INET6 */
4750
default:
4751
unhandled_af(af);
4752
}
4753
4754
pfse->pfse_m = m;
4755
pf_send(pfse);
4756
}
4757
4758
static void
4759
pf_undo_nat(struct pf_krule *nr, struct pf_pdesc *pd, uint16_t bip_sum)
4760
{
4761
/* undo NAT changes, if they have taken place */
4762
if (nr != NULL) {
4763
pf_addrcpy(pd->src, &pd->osrc, pd->af);
4764
pf_addrcpy(pd->dst, &pd->odst, pd->af);
4765
if (pd->sport)
4766
*pd->sport = pd->osport;
4767
if (pd->dport)
4768
*pd->dport = pd->odport;
4769
if (pd->ip_sum)
4770
*pd->ip_sum = bip_sum;
4771
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
4772
}
4773
}
4774
4775
static void
4776
pf_return(struct pf_krule *r, struct pf_krule *nr, struct pf_pdesc *pd,
4777
struct tcphdr *th, u_int16_t bproto_sum, u_int16_t bip_sum,
4778
u_short *reason, int rtableid)
4779
{
4780
pf_undo_nat(nr, pd, bip_sum);
4781
4782
if (pd->proto == IPPROTO_TCP &&
4783
((r->rule_flag & PFRULE_RETURNRST) ||
4784
(r->rule_flag & PFRULE_RETURN)) &&
4785
!(tcp_get_flags(th) & TH_RST)) {
4786
u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
4787
4788
if (pf_check_proto_cksum(pd->m, pd->off, pd->tot_len - pd->off,
4789
IPPROTO_TCP, pd->af))
4790
REASON_SET(reason, PFRES_PROTCKSUM);
4791
else {
4792
if (tcp_get_flags(th) & TH_SYN)
4793
ack++;
4794
if (tcp_get_flags(th) & TH_FIN)
4795
ack++;
4796
pf_send_tcp(r, pd->af, pd->dst,
4797
pd->src, th->th_dport, th->th_sport,
4798
ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
4799
r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid,
4800
reason);
4801
}
4802
} else if (pd->proto == IPPROTO_SCTP &&
4803
(r->rule_flag & PFRULE_RETURN)) {
4804
pf_send_sctp_abort(pd->af, pd, r->return_ttl, rtableid);
4805
} else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET &&
4806
r->return_icmp)
4807
pf_send_icmp(pd->m, r->return_icmp >> 8,
4808
r->return_icmp & 255, 0, pd->af, r, rtableid);
4809
else if (pd->proto != IPPROTO_ICMPV6 && pd->af == AF_INET6 &&
4810
r->return_icmp6)
4811
pf_send_icmp(pd->m, r->return_icmp6 >> 8,
4812
r->return_icmp6 & 255, 0, pd->af, r, rtableid);
4813
}
4814
4815
static int
4816
pf_match_ieee8021q_pcp(u_int8_t prio, struct mbuf *m)
4817
{
4818
struct m_tag *mtag;
4819
u_int8_t mpcp;
4820
4821
mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL);
4822
if (mtag == NULL)
4823
return (0);
4824
4825
if (prio == PF_PRIO_ZERO)
4826
prio = 0;
4827
4828
mpcp = *(uint8_t *)(mtag + 1);
4829
4830
return (mpcp == prio);
4831
}
4832
4833
static int
4834
pf_icmp_to_bandlim(uint8_t type)
4835
{
4836
switch (type) {
4837
case ICMP_ECHO:
4838
case ICMP_ECHOREPLY:
4839
return (BANDLIM_ICMP_ECHO);
4840
case ICMP_TSTAMP:
4841
case ICMP_TSTAMPREPLY:
4842
return (BANDLIM_ICMP_TSTAMP);
4843
case ICMP_UNREACH:
4844
default:
4845
return (BANDLIM_ICMP_UNREACH);
4846
}
4847
}
4848
4849
static void
4850
pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_kstate *s,
4851
struct pf_state_peer *src, struct pf_state_peer *dst,
4852
u_short *reason)
4853
{
4854
/*
4855
* We are sending challenge ACK as a response to SYN packet, which
4856
* matches existing state (modulo TCP window check). Therefore packet
4857
* must be sent on behalf of destination.
4858
*
4859
* We expect sender to remain either silent, or send RST packet
4860
* so both, firewall and remote peer, can purge dead state from
4861
* memory.
4862
*/
4863
pf_send_tcp(s->rule, pd->af, pd->dst, pd->src,
4864
pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo,
4865
src->seqlo, TH_ACK, 0, 0, s->rule->return_ttl, 0, 0, 0,
4866
s->rule->rtableid, reason);
4867
}
4868
4869
static void
4870
pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int mtu,
4871
sa_family_t af, struct pf_krule *r, int rtableid)
4872
{
4873
struct pf_send_entry *pfse;
4874
struct mbuf *m0;
4875
struct pf_mtag *pf_mtag;
4876
4877
/* ICMP packet rate limitation. */
4878
switch (af) {
4879
#ifdef INET6
4880
case AF_INET6:
4881
if (icmp6_ratelimit(NULL, type, code))
4882
return;
4883
break;
4884
#endif /* INET6 */
4885
#ifdef INET
4886
case AF_INET:
4887
if (badport_bandlim(pf_icmp_to_bandlim(type)) != 0)
4888
return;
4889
break;
4890
#endif /* INET */
4891
}
4892
4893
/* Allocate outgoing queue entry, mbuf and mbuf tag. */
4894
pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
4895
if (pfse == NULL)
4896
return;
4897
4898
if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
4899
free(pfse, M_PFTEMP);
4900
return;
4901
}
4902
4903
if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
4904
free(pfse, M_PFTEMP);
4905
return;
4906
}
4907
/* XXX: revisit */
4908
m0->m_flags |= M_SKIP_FIREWALL;
4909
4910
if (rtableid >= 0)
4911
M_SETFIB(m0, rtableid);
4912
4913
#ifdef ALTQ
4914
if (r->qid) {
4915
pf_mtag->qid = r->qid;
4916
/* add hints for ecn */
4917
pf_mtag->hdr = mtod(m0, struct ip *);
4918
}
4919
#endif /* ALTQ */
4920
4921
switch (af) {
4922
#ifdef INET
4923
case AF_INET:
4924
pfse->pfse_type = PFSE_ICMP;
4925
break;
4926
#endif /* INET */
4927
#ifdef INET6
4928
case AF_INET6:
4929
pfse->pfse_type = PFSE_ICMP6;
4930
break;
4931
#endif /* INET6 */
4932
}
4933
pfse->pfse_m = m0;
4934
pfse->icmpopts.type = type;
4935
pfse->icmpopts.code = code;
4936
pfse->icmpopts.mtu = mtu;
4937
pf_send(pfse);
4938
}
4939
4940
/*
4941
* Return ((n = 0) == (a = b [with mask m]))
4942
* Note: n != 0 => returns (a != b [with mask m])
4943
*/
4944
int
4945
pf_match_addr(u_int8_t n, const struct pf_addr *a, const struct pf_addr *m,
4946
const struct pf_addr *b, sa_family_t af)
4947
{
4948
switch (af) {
4949
#ifdef INET
4950
case AF_INET:
4951
if (IN_ARE_MASKED_ADDR_EQUAL(a->v4, b->v4, m->v4))
4952
return (n == 0);
4953
break;
4954
#endif /* INET */
4955
#ifdef INET6
4956
case AF_INET6:
4957
if (IN6_ARE_MASKED_ADDR_EQUAL(&a->v6, &b->v6, &m->v6))
4958
return (n == 0);
4959
break;
4960
#endif /* INET6 */
4961
}
4962
4963
return (n != 0);
4964
}
4965
4966
/*
4967
* Return 1 if b <= a <= e, otherwise return 0.
4968
*/
4969
int
4970
pf_match_addr_range(const struct pf_addr *b, const struct pf_addr *e,
4971
const struct pf_addr *a, sa_family_t af)
4972
{
4973
switch (af) {
4974
#ifdef INET
4975
case AF_INET:
4976
if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
4977
(ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
4978
return (0);
4979
break;
4980
#endif /* INET */
4981
#ifdef INET6
4982
case AF_INET6: {
4983
int i;
4984
4985
/* check a >= b */
4986
for (i = 0; i < 4; ++i)
4987
if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
4988
break;
4989
else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
4990
return (0);
4991
/* check a <= e */
4992
for (i = 0; i < 4; ++i)
4993
if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
4994
break;
4995
else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
4996
return (0);
4997
break;
4998
}
4999
#endif /* INET6 */
5000
}
5001
return (1);
5002
}
5003
5004
static int
5005
pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
5006
{
5007
switch (op) {
5008
case PF_OP_IRG:
5009
return ((p > a1) && (p < a2));
5010
case PF_OP_XRG:
5011
return ((p < a1) || (p > a2));
5012
case PF_OP_RRG:
5013
return ((p >= a1) && (p <= a2));
5014
case PF_OP_EQ:
5015
return (p == a1);
5016
case PF_OP_NE:
5017
return (p != a1);
5018
case PF_OP_LT:
5019
return (p < a1);
5020
case PF_OP_LE:
5021
return (p <= a1);
5022
case PF_OP_GT:
5023
return (p > a1);
5024
case PF_OP_GE:
5025
return (p >= a1);
5026
}
5027
return (0); /* never reached */
5028
}
5029
5030
int
5031
pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
5032
{
5033
return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p)));
5034
}
5035
5036
static int
5037
pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
5038
{
5039
if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE)
5040
return (0);
5041
return (pf_match(op, a1, a2, u));
5042
}
5043
5044
static int
5045
pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
5046
{
5047
if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE)
5048
return (0);
5049
return (pf_match(op, a1, a2, g));
5050
}
5051
5052
int
5053
pf_match_tag(struct mbuf *m, struct pf_krule *r, int *tag, int mtag)
5054
{
5055
if (*tag == -1)
5056
*tag = mtag;
5057
5058
return ((!r->match_tag_not && r->match_tag == *tag) ||
5059
(r->match_tag_not && r->match_tag != *tag));
5060
}
5061
5062
static int
5063
pf_match_rcvif(struct mbuf *m, struct pf_krule *r)
5064
{
5065
struct ifnet *ifp = m->m_pkthdr.rcvif;
5066
struct pfi_kkif *kif;
5067
5068
if (ifp == NULL)
5069
return (0);
5070
5071
kif = (struct pfi_kkif *)ifp->if_pf_kif;
5072
5073
if (kif == NULL) {
5074
DPFPRINTF(PF_DEBUG_URGENT,
5075
"%s: kif == NULL, @%d via %s", __func__, r->nr,
5076
r->rcv_ifname);
5077
return (0);
5078
}
5079
5080
return (pfi_kkif_match(r->rcv_kif, kif));
5081
}
5082
5083
int
5084
pf_tag_packet(struct pf_pdesc *pd, int tag)
5085
{
5086
5087
KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
5088
5089
if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL))
5090
return (ENOMEM);
5091
5092
pd->pf_mtag->tag = tag;
5093
5094
return (0);
5095
}
5096
5097
/*
5098
* XXX: We rely on malloc(9) returning pointer aligned addresses.
5099
*/
5100
#define PF_ANCHORSTACK_MATCH 0x00000001
5101
#define PF_ANCHORSTACK_MASK (PF_ANCHORSTACK_MATCH)
5102
5103
#define PF_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
5104
#define PF_ANCHOR_RULE(f) (struct pf_krule *) \
5105
((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
5106
#define PF_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \
5107
((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \
5108
} while (0)
5109
5110
enum pf_test_status
5111
pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r)
5112
{
5113
enum pf_test_status rv;
5114
5115
PF_RULES_RASSERT();
5116
5117
if (ctx->depth >= PF_ANCHOR_STACK_MAX) {
5118
printf("%s: anchor stack overflow on %s\n",
5119
__func__, r->anchor->name);
5120
return (PF_TEST_FAIL);
5121
}
5122
5123
ctx->depth++;
5124
5125
if (r->anchor_wildcard) {
5126
struct pf_kanchor *child;
5127
rv = PF_TEST_OK;
5128
RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) {
5129
rv = pf_match_rule(ctx, &child->ruleset);
5130
if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
5131
/*
5132
* we either hit a rule with quick action
5133
* (more likely), or hit some runtime
5134
* error (e.g. pool_get() failure).
5135
*/
5136
break;
5137
}
5138
}
5139
} else {
5140
rv = pf_match_rule(ctx, &r->anchor->ruleset);
5141
/*
5142
* Unless errors occured, stop iff any rule matched
5143
* within quick anchors.
5144
*/
5145
if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK &&
5146
*ctx->am == r)
5147
rv = PF_TEST_QUICK;
5148
}
5149
5150
ctx->depth--;
5151
5152
return (rv);
5153
}
5154
5155
struct pf_keth_anchor_stackframe {
5156
struct pf_keth_ruleset *rs;
5157
struct pf_keth_rule *r; /* XXX: + match bit */
5158
struct pf_keth_anchor *child;
5159
};
5160
5161
#define PF_ETH_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
5162
#define PF_ETH_ANCHOR_RULE(f) (struct pf_keth_rule *) \
5163
((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
5164
#define PF_ETH_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \
5165
((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \
5166
} while (0)
5167
5168
void
5169
pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
5170
struct pf_keth_ruleset **rs, struct pf_keth_rule **r,
5171
struct pf_keth_rule **a, int *match)
5172
{
5173
struct pf_keth_anchor_stackframe *f;
5174
5175
NET_EPOCH_ASSERT();
5176
5177
if (match)
5178
*match = 0;
5179
if (*depth >= PF_ANCHOR_STACK_MAX) {
5180
printf("%s: anchor stack overflow on %s\n",
5181
__func__, (*r)->anchor->name);
5182
*r = TAILQ_NEXT(*r, entries);
5183
return;
5184
} else if (*depth == 0 && a != NULL)
5185
*a = *r;
5186
f = stack + (*depth)++;
5187
f->rs = *rs;
5188
f->r = *r;
5189
if ((*r)->anchor_wildcard) {
5190
struct pf_keth_anchor_node *parent = &(*r)->anchor->children;
5191
5192
if ((f->child = RB_MIN(pf_keth_anchor_node, parent)) == NULL) {
5193
*r = NULL;
5194
return;
5195
}
5196
*rs = &f->child->ruleset;
5197
} else {
5198
f->child = NULL;
5199
*rs = &(*r)->anchor->ruleset;
5200
}
5201
*r = TAILQ_FIRST((*rs)->active.rules);
5202
}
5203
5204
int
5205
pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
5206
struct pf_keth_ruleset **rs, struct pf_keth_rule **r,
5207
struct pf_keth_rule **a, int *match)
5208
{
5209
struct pf_keth_anchor_stackframe *f;
5210
struct pf_keth_rule *fr;
5211
int quick = 0;
5212
5213
NET_EPOCH_ASSERT();
5214
5215
do {
5216
if (*depth <= 0)
5217
break;
5218
f = stack + *depth - 1;
5219
fr = PF_ETH_ANCHOR_RULE(f);
5220
if (f->child != NULL) {
5221
/*
5222
* This block traverses through
5223
* a wildcard anchor.
5224
*/
5225
if (match != NULL && *match) {
5226
/*
5227
* If any of "*" matched, then
5228
* "foo/ *" matched, mark frame
5229
* appropriately.
5230
*/
5231
PF_ETH_ANCHOR_SET_MATCH(f);
5232
*match = 0;
5233
}
5234
f->child = RB_NEXT(pf_keth_anchor_node,
5235
&fr->anchor->children, f->child);
5236
if (f->child != NULL) {
5237
*rs = &f->child->ruleset;
5238
*r = TAILQ_FIRST((*rs)->active.rules);
5239
if (*r == NULL)
5240
continue;
5241
else
5242
break;
5243
}
5244
}
5245
(*depth)--;
5246
if (*depth == 0 && a != NULL)
5247
*a = NULL;
5248
*rs = f->rs;
5249
if (PF_ETH_ANCHOR_MATCH(f) || (match != NULL && *match))
5250
quick = fr->quick;
5251
*r = TAILQ_NEXT(fr, entries);
5252
} while (*r == NULL);
5253
5254
return (quick);
5255
}
5256
5257
void
5258
pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
5259
struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
5260
{
5261
switch (af) {
5262
#ifdef INET
5263
case AF_INET:
5264
naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
5265
((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
5266
break;
5267
#endif /* INET */
5268
#ifdef INET6
5269
case AF_INET6:
5270
naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
5271
((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
5272
naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
5273
((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
5274
naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
5275
((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
5276
naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
5277
((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
5278
break;
5279
#endif /* INET6 */
5280
}
5281
}
5282
5283
void
5284
pf_addr_inc(struct pf_addr *addr, sa_family_t af)
5285
{
5286
switch (af) {
5287
#ifdef INET
5288
case AF_INET:
5289
addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
5290
break;
5291
#endif /* INET */
5292
#ifdef INET6
5293
case AF_INET6:
5294
if (addr->addr32[3] == 0xffffffff) {
5295
addr->addr32[3] = 0;
5296
if (addr->addr32[2] == 0xffffffff) {
5297
addr->addr32[2] = 0;
5298
if (addr->addr32[1] == 0xffffffff) {
5299
addr->addr32[1] = 0;
5300
addr->addr32[0] =
5301
htonl(ntohl(addr->addr32[0]) + 1);
5302
} else
5303
addr->addr32[1] =
5304
htonl(ntohl(addr->addr32[1]) + 1);
5305
} else
5306
addr->addr32[2] =
5307
htonl(ntohl(addr->addr32[2]) + 1);
5308
} else
5309
addr->addr32[3] =
5310
htonl(ntohl(addr->addr32[3]) + 1);
5311
break;
5312
#endif /* INET6 */
5313
}
5314
}
5315
5316
void
5317
pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a)
5318
{
5319
/*
5320
* Modern rules use the same flags in rules as they do in states.
5321
*/
5322
a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID|
5323
PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO));
5324
5325
/*
5326
* Old-style scrub rules have different flags which need to be translated.
5327
*/
5328
if (r->rule_flag & PFRULE_RANDOMID)
5329
a->flags |= PFSTATE_RANDOMID;
5330
if (r->scrub_flags & PFSTATE_SETTOS || r->rule_flag & PFRULE_SET_TOS ) {
5331
a->flags |= PFSTATE_SETTOS;
5332
a->set_tos = r->set_tos;
5333
}
5334
5335
if (r->qid)
5336
a->qid = r->qid;
5337
if (r->pqid)
5338
a->pqid = r->pqid;
5339
if (r->rtableid >= 0)
5340
a->rtableid = r->rtableid;
5341
a->log |= r->log;
5342
if (r->min_ttl)
5343
a->min_ttl = r->min_ttl;
5344
if (r->max_mss)
5345
a->max_mss = r->max_mss;
5346
if (r->dnpipe)
5347
a->dnpipe = r->dnpipe;
5348
if (r->dnrpipe)
5349
a->dnrpipe = r->dnrpipe;
5350
if (r->dnpipe || r->dnrpipe) {
5351
if (r->free_flags & PFRULE_DN_IS_PIPE)
5352
a->flags |= PFSTATE_DN_IS_PIPE;
5353
else
5354
a->flags &= ~PFSTATE_DN_IS_PIPE;
5355
}
5356
if (r->scrub_flags & PFSTATE_SETPRIO) {
5357
a->set_prio[0] = r->set_prio[0];
5358
a->set_prio[1] = r->set_prio[1];
5359
}
5360
if (r->allow_opts)
5361
a->allow_opts = r->allow_opts;
5362
if (r->max_pkt_size)
5363
a->max_pkt_size = r->max_pkt_size;
5364
}
5365
5366
int
5367
pf_socket_lookup(struct pf_pdesc *pd)
5368
{
5369
struct pf_addr *saddr, *daddr;
5370
u_int16_t sport, dport;
5371
struct inpcbinfo *pi;
5372
struct inpcb *inp;
5373
5374
pd->lookup.uid = -1;
5375
pd->lookup.gid = -1;
5376
5377
switch (pd->proto) {
5378
case IPPROTO_TCP:
5379
sport = pd->hdr.tcp.th_sport;
5380
dport = pd->hdr.tcp.th_dport;
5381
pi = &V_tcbinfo;
5382
break;
5383
case IPPROTO_UDP:
5384
sport = pd->hdr.udp.uh_sport;
5385
dport = pd->hdr.udp.uh_dport;
5386
pi = &V_udbinfo;
5387
break;
5388
default:
5389
return (-1);
5390
}
5391
if (pd->dir == PF_IN) {
5392
saddr = pd->src;
5393
daddr = pd->dst;
5394
} else {
5395
u_int16_t p;
5396
5397
p = sport;
5398
sport = dport;
5399
dport = p;
5400
saddr = pd->dst;
5401
daddr = pd->src;
5402
}
5403
switch (pd->af) {
5404
#ifdef INET
5405
case AF_INET:
5406
inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
5407
dport, INPLOOKUP_RLOCKPCB, NULL, pd->m);
5408
if (inp == NULL) {
5409
inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
5410
daddr->v4, dport, INPLOOKUP_WILDCARD |
5411
INPLOOKUP_RLOCKPCB, NULL, pd->m);
5412
if (inp == NULL)
5413
return (-1);
5414
}
5415
break;
5416
#endif /* INET */
5417
#ifdef INET6
5418
case AF_INET6:
5419
inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
5420
dport, INPLOOKUP_RLOCKPCB, NULL, pd->m);
5421
if (inp == NULL) {
5422
inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
5423
&daddr->v6, dport, INPLOOKUP_WILDCARD |
5424
INPLOOKUP_RLOCKPCB, NULL, pd->m);
5425
if (inp == NULL)
5426
return (-1);
5427
}
5428
break;
5429
#endif /* INET6 */
5430
default:
5431
unhandled_af(pd->af);
5432
}
5433
INP_RLOCK_ASSERT(inp);
5434
pd->lookup.uid = inp->inp_cred->cr_uid;
5435
pd->lookup.gid = inp->inp_cred->cr_gid;
5436
INP_RUNLOCK(inp);
5437
5438
return (1);
5439
}
5440
5441
/* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity"
5442
* /\ (eoh - r) >= min_typelen >= 2 "safety" )
5443
*
5444
* warning: r + r[1] may exceed opts bounds for r[1] > min_typelen
5445
*/
5446
uint8_t*
5447
pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type,
5448
u_int8_t min_typelen)
5449
{
5450
uint8_t *eoh = opts + hlen;
5451
5452
if (min_typelen < 2)
5453
return (NULL);
5454
5455
while ((eoh - opt) >= min_typelen) {
5456
switch (*opt) {
5457
case TCPOPT_EOL:
5458
/* FALLTHROUGH - Workaround the failure of some
5459
systems to NOP-pad their bzero'd option buffers,
5460
producing spurious EOLs */
5461
case TCPOPT_NOP:
5462
opt++;
5463
continue;
5464
default:
5465
if (opt[0] == type &&
5466
opt[1] >= min_typelen)
5467
return (opt);
5468
}
5469
5470
opt += MAX(opt[1], 2); /* evade infinite loops */
5471
}
5472
5473
return (NULL);
5474
}
5475
5476
u_int8_t
5477
pf_get_wscale(struct pf_pdesc *pd)
5478
{
5479
int olen;
5480
uint8_t opts[MAX_TCPOPTLEN], *opt;
5481
uint8_t wscale = 0;
5482
5483
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
5484
if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m,
5485
pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af))
5486
return (0);
5487
5488
opt = opts;
5489
while ((opt = pf_find_tcpopt(opt, opts, olen,
5490
TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) {
5491
wscale = opt[2];
5492
wscale = MIN(wscale, TCP_MAX_WINSHIFT);
5493
wscale |= PF_WSCALE_FLAG;
5494
5495
opt += opt[1];
5496
}
5497
5498
return (wscale);
5499
}
5500
5501
u_int16_t
5502
pf_get_mss(struct pf_pdesc *pd)
5503
{
5504
int olen;
5505
uint8_t opts[MAX_TCPOPTLEN], *opt;
5506
u_int16_t mss = V_tcp_mssdflt;
5507
5508
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
5509
if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m,
5510
pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af))
5511
return (0);
5512
5513
opt = opts;
5514
while ((opt = pf_find_tcpopt(opt, opts, olen,
5515
TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) {
5516
memcpy(&mss, (opt + 2), 2);
5517
mss = ntohs(mss);
5518
opt += opt[1];
5519
}
5520
5521
return (mss);
5522
}
5523
5524
static u_int16_t
5525
pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
5526
{
5527
struct nhop_object *nh;
5528
#ifdef INET6
5529
struct in6_addr dst6;
5530
uint32_t scopeid;
5531
#endif /* INET6 */
5532
int hlen = 0;
5533
uint16_t mss = 0;
5534
5535
NET_EPOCH_ASSERT();
5536
5537
switch (af) {
5538
#ifdef INET
5539
case AF_INET:
5540
hlen = sizeof(struct ip);
5541
nh = fib4_lookup(rtableid, addr->v4, 0, 0, 0);
5542
if (nh != NULL)
5543
mss = nh->nh_mtu - hlen - sizeof(struct tcphdr);
5544
break;
5545
#endif /* INET */
5546
#ifdef INET6
5547
case AF_INET6:
5548
hlen = sizeof(struct ip6_hdr);
5549
in6_splitscope(&addr->v6, &dst6, &scopeid);
5550
nh = fib6_lookup(rtableid, &dst6, scopeid, 0, 0);
5551
if (nh != NULL)
5552
mss = nh->nh_mtu - hlen - sizeof(struct tcphdr);
5553
break;
5554
#endif /* INET6 */
5555
}
5556
5557
mss = max(V_tcp_mssdflt, mss);
5558
mss = min(mss, offer);
5559
mss = max(mss, 64); /* sanity - at least max opt space */
5560
return (mss);
5561
}
5562
5563
static u_int32_t
5564
pf_tcp_iss(struct pf_pdesc *pd)
5565
{
5566
SHA512_CTX ctx;
5567
union {
5568
uint8_t bytes[SHA512_DIGEST_LENGTH];
5569
uint32_t words[1];
5570
} digest;
5571
5572
if (V_pf_tcp_secret_init == 0) {
5573
arc4random_buf(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
5574
SHA512_Init(&V_pf_tcp_secret_ctx);
5575
SHA512_Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
5576
sizeof(V_pf_tcp_secret));
5577
V_pf_tcp_secret_init = 1;
5578
}
5579
5580
ctx = V_pf_tcp_secret_ctx;
5581
5582
SHA512_Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short));
5583
SHA512_Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short));
5584
switch (pd->af) {
5585
case AF_INET6:
5586
SHA512_Update(&ctx, &pd->src->v6, sizeof(struct in6_addr));
5587
SHA512_Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr));
5588
break;
5589
case AF_INET:
5590
SHA512_Update(&ctx, &pd->src->v4, sizeof(struct in_addr));
5591
SHA512_Update(&ctx, &pd->dst->v4, sizeof(struct in_addr));
5592
break;
5593
}
5594
SHA512_Final(digest.bytes, &ctx);
5595
V_pf_tcp_iss_off += 4096;
5596
#define ISN_RANDOM_INCREMENT (4096 - 1)
5597
return (digest.words[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
5598
V_pf_tcp_iss_off);
5599
#undef ISN_RANDOM_INCREMENT
5600
}
5601
5602
static bool
5603
pf_match_eth_addr(const uint8_t *a, const struct pf_keth_rule_addr *r)
5604
{
5605
bool match = true;
5606
5607
/* Always matches if not set */
5608
if (! r->isset)
5609
return (!r->neg);
5610
5611
for (int i = 0; i < ETHER_ADDR_LEN; i++) {
5612
if ((a[i] & r->mask[i]) != (r->addr[i] & r->mask[i])) {
5613
match = false;
5614
break;
5615
}
5616
}
5617
5618
return (match ^ r->neg);
5619
}
5620
5621
static int
5622
pf_match_eth_tag(struct mbuf *m, struct pf_keth_rule *r, int *tag, int mtag)
5623
{
5624
if (*tag == -1)
5625
*tag = mtag;
5626
5627
return ((!r->match_tag_not && r->match_tag == *tag) ||
5628
(r->match_tag_not && r->match_tag != *tag));
5629
}
5630
5631
static void
5632
pf_bridge_to(struct ifnet *ifp, struct mbuf *m)
5633
{
5634
/* If we don't have the interface drop the packet. */
5635
if (ifp == NULL) {
5636
m_freem(m);
5637
return;
5638
}
5639
5640
switch (ifp->if_type) {
5641
case IFT_ETHER:
5642
case IFT_XETHER:
5643
case IFT_L2VLAN:
5644
case IFT_BRIDGE:
5645
case IFT_IEEE8023ADLAG:
5646
break;
5647
default:
5648
m_freem(m);
5649
return;
5650
}
5651
5652
ifp->if_transmit(ifp, m);
5653
}
5654
5655
static int
5656
pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0)
5657
{
5658
#ifdef INET
5659
struct ip ip;
5660
#endif /* INET */
5661
#ifdef INET6
5662
struct ip6_hdr ip6;
5663
#endif /* INET6 */
5664
struct mbuf *m = *m0;
5665
struct ether_header *e;
5666
struct pf_keth_rule *r, *rm, *a = NULL;
5667
struct pf_keth_ruleset *ruleset = NULL;
5668
struct pf_mtag *mtag;
5669
struct pf_keth_ruleq *rules;
5670
struct pf_addr *src = NULL, *dst = NULL;
5671
struct pfi_kkif *bridge_to;
5672
sa_family_t af = 0;
5673
uint16_t proto;
5674
int asd = 0, match = 0;
5675
int tag = -1;
5676
uint8_t action;
5677
struct pf_keth_anchor_stackframe anchor_stack[PF_ANCHOR_STACK_MAX];
5678
5679
MPASS(kif->pfik_ifp->if_vnet == curvnet);
5680
NET_EPOCH_ASSERT();
5681
5682
PF_RULES_RLOCK_TRACKER;
5683
5684
SDT_PROBE3(pf, eth, test_rule, entry, dir, kif->pfik_ifp, m);
5685
5686
mtag = pf_find_mtag(m);
5687
if (mtag != NULL && mtag->flags & PF_MTAG_FLAG_DUMMYNET) {
5688
/* Dummynet re-injects packets after they've
5689
* completed their delay. We've already
5690
* processed them, so pass unconditionally. */
5691
5692
/* But only once. We may see the packet multiple times (e.g.
5693
* PFIL_IN/PFIL_OUT). */
5694
pf_dummynet_flag_remove(m, mtag);
5695
5696
return (PF_PASS);
5697
}
5698
5699
if (__predict_false(m->m_len < sizeof(struct ether_header)) &&
5700
(m = *m0 = m_pullup(*m0, sizeof(struct ether_header))) == NULL) {
5701
DPFPRINTF(PF_DEBUG_URGENT,
5702
"%s: m_len < sizeof(struct ether_header)"
5703
", pullup failed", __func__);
5704
return (PF_DROP);
5705
}
5706
e = mtod(m, struct ether_header *);
5707
proto = ntohs(e->ether_type);
5708
5709
switch (proto) {
5710
#ifdef INET
5711
case ETHERTYPE_IP: {
5712
if (m_length(m, NULL) < (sizeof(struct ether_header) +
5713
sizeof(ip)))
5714
return (PF_DROP);
5715
5716
af = AF_INET;
5717
m_copydata(m, sizeof(struct ether_header), sizeof(ip),
5718
(caddr_t)&ip);
5719
src = (struct pf_addr *)&ip.ip_src;
5720
dst = (struct pf_addr *)&ip.ip_dst;
5721
break;
5722
}
5723
#endif /* INET */
5724
#ifdef INET6
5725
case ETHERTYPE_IPV6: {
5726
if (m_length(m, NULL) < (sizeof(struct ether_header) +
5727
sizeof(ip6)))
5728
return (PF_DROP);
5729
5730
af = AF_INET6;
5731
m_copydata(m, sizeof(struct ether_header), sizeof(ip6),
5732
(caddr_t)&ip6);
5733
src = (struct pf_addr *)&ip6.ip6_src;
5734
dst = (struct pf_addr *)&ip6.ip6_dst;
5735
break;
5736
}
5737
#endif /* INET6 */
5738
}
5739
5740
PF_RULES_RLOCK();
5741
5742
ruleset = V_pf_keth;
5743
rules = atomic_load_ptr(&ruleset->active.rules);
5744
for (r = TAILQ_FIRST(rules), rm = NULL; r != NULL;) {
5745
counter_u64_add(r->evaluations, 1);
5746
SDT_PROBE2(pf, eth, test_rule, test, r->nr, r);
5747
5748
if (pfi_kkif_match(r->kif, kif) == r->ifnot) {
5749
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5750
"kif");
5751
r = r->skip[PFE_SKIP_IFP].ptr;
5752
}
5753
else if (r->direction && r->direction != dir) {
5754
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5755
"dir");
5756
r = r->skip[PFE_SKIP_DIR].ptr;
5757
}
5758
else if (r->proto && r->proto != proto) {
5759
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5760
"proto");
5761
r = r->skip[PFE_SKIP_PROTO].ptr;
5762
}
5763
else if (! pf_match_eth_addr(e->ether_shost, &r->src)) {
5764
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5765
"src");
5766
r = r->skip[PFE_SKIP_SRC_ADDR].ptr;
5767
}
5768
else if (! pf_match_eth_addr(e->ether_dhost, &r->dst)) {
5769
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5770
"dst");
5771
r = r->skip[PFE_SKIP_DST_ADDR].ptr;
5772
}
5773
else if (src != NULL && PF_MISMATCHAW(&r->ipsrc.addr, src, af,
5774
r->ipsrc.neg, kif, M_GETFIB(m))) {
5775
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5776
"ip_src");
5777
r = r->skip[PFE_SKIP_SRC_IP_ADDR].ptr;
5778
}
5779
else if (dst != NULL && PF_MISMATCHAW(&r->ipdst.addr, dst, af,
5780
r->ipdst.neg, kif, M_GETFIB(m))) {
5781
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5782
"ip_dst");
5783
r = r->skip[PFE_SKIP_DST_IP_ADDR].ptr;
5784
}
5785
else if (r->match_tag && !pf_match_eth_tag(m, r, &tag,
5786
mtag ? mtag->tag : 0)) {
5787
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5788
"match_tag");
5789
r = TAILQ_NEXT(r, entries);
5790
}
5791
else {
5792
if (r->tag)
5793
tag = r->tag;
5794
if (r->anchor == NULL) {
5795
/* Rule matches */
5796
rm = r;
5797
5798
SDT_PROBE2(pf, eth, test_rule, match, r->nr, r);
5799
5800
if (r->quick)
5801
break;
5802
5803
r = TAILQ_NEXT(r, entries);
5804
} else {
5805
pf_step_into_keth_anchor(anchor_stack, &asd,
5806
&ruleset, &r, &a, &match);
5807
}
5808
}
5809
if (r == NULL && pf_step_out_of_keth_anchor(anchor_stack, &asd,
5810
&ruleset, &r, &a, &match))
5811
break;
5812
}
5813
5814
r = rm;
5815
5816
SDT_PROBE2(pf, eth, test_rule, final_match, (r != NULL ? r->nr : -1), r);
5817
5818
/* Default to pass. */
5819
if (r == NULL) {
5820
PF_RULES_RUNLOCK();
5821
return (PF_PASS);
5822
}
5823
5824
/* Execute action. */
5825
counter_u64_add(r->packets[dir == PF_OUT], 1);
5826
counter_u64_add(r->bytes[dir == PF_OUT], m_length(m, NULL));
5827
pf_update_timestamp(r);
5828
5829
/* Shortcut. Don't tag if we're just going to drop anyway. */
5830
if (r->action == PF_DROP) {
5831
PF_RULES_RUNLOCK();
5832
return (PF_DROP);
5833
}
5834
5835
if (tag > 0) {
5836
if (mtag == NULL)
5837
mtag = pf_get_mtag(m);
5838
if (mtag == NULL) {
5839
PF_RULES_RUNLOCK();
5840
counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
5841
return (PF_DROP);
5842
}
5843
mtag->tag = tag;
5844
}
5845
5846
if (r->qid != 0) {
5847
if (mtag == NULL)
5848
mtag = pf_get_mtag(m);
5849
if (mtag == NULL) {
5850
PF_RULES_RUNLOCK();
5851
counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
5852
return (PF_DROP);
5853
}
5854
mtag->qid = r->qid;
5855
}
5856
5857
action = r->action;
5858
bridge_to = r->bridge_to;
5859
5860
/* Dummynet */
5861
if (r->dnpipe) {
5862
struct ip_fw_args dnflow;
5863
5864
/* Drop packet if dummynet is not loaded. */
5865
if (ip_dn_io_ptr == NULL) {
5866
PF_RULES_RUNLOCK();
5867
m_freem(m);
5868
counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
5869
return (PF_DROP);
5870
}
5871
if (mtag == NULL)
5872
mtag = pf_get_mtag(m);
5873
if (mtag == NULL) {
5874
PF_RULES_RUNLOCK();
5875
counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
5876
return (PF_DROP);
5877
}
5878
5879
bzero(&dnflow, sizeof(dnflow));
5880
5881
/* We don't have port numbers here, so we set 0. That means
5882
* that we'll be somewhat limited in distinguishing flows (i.e.
5883
* only based on IP addresses, not based on port numbers), but
5884
* it's better than nothing. */
5885
dnflow.f_id.dst_port = 0;
5886
dnflow.f_id.src_port = 0;
5887
dnflow.f_id.proto = 0;
5888
5889
dnflow.rule.info = r->dnpipe;
5890
dnflow.rule.info |= IPFW_IS_DUMMYNET;
5891
if (r->dnflags & PFRULE_DN_IS_PIPE)
5892
dnflow.rule.info |= IPFW_IS_PIPE;
5893
5894
dnflow.f_id.extra = dnflow.rule.info;
5895
5896
dnflow.flags = dir == PF_IN ? IPFW_ARGS_IN : IPFW_ARGS_OUT;
5897
dnflow.flags |= IPFW_ARGS_ETHER;
5898
dnflow.ifp = kif->pfik_ifp;
5899
5900
switch (af) {
5901
case AF_INET:
5902
dnflow.f_id.addr_type = 4;
5903
dnflow.f_id.src_ip = src->v4.s_addr;
5904
dnflow.f_id.dst_ip = dst->v4.s_addr;
5905
break;
5906
case AF_INET6:
5907
dnflow.flags |= IPFW_ARGS_IP6;
5908
dnflow.f_id.addr_type = 6;
5909
dnflow.f_id.src_ip6 = src->v6;
5910
dnflow.f_id.dst_ip6 = dst->v6;
5911
break;
5912
}
5913
5914
PF_RULES_RUNLOCK();
5915
5916
mtag->flags |= PF_MTAG_FLAG_DUMMYNET;
5917
ip_dn_io_ptr(m0, &dnflow);
5918
if (*m0 != NULL)
5919
pf_dummynet_flag_remove(m, mtag);
5920
} else {
5921
PF_RULES_RUNLOCK();
5922
}
5923
5924
if (action == PF_PASS && bridge_to) {
5925
pf_bridge_to(bridge_to->pfik_ifp, *m0);
5926
*m0 = NULL; /* We've eaten the packet. */
5927
}
5928
5929
return (action);
5930
}
5931
5932
#define PF_TEST_ATTRIB(t, a) \
5933
if (t) { \
5934
r = a; \
5935
continue; \
5936
} else do { \
5937
} while (0)
5938
5939
static __inline u_short
5940
pf_rule_apply_nat(struct pf_test_ctx *ctx, struct pf_krule *r)
5941
{
5942
struct pf_pdesc *pd = ctx->pd;
5943
u_short transerror;
5944
u_int8_t nat_action;
5945
5946
if (r->rule_flag & PFRULE_AFTO) {
5947
/* Don't translate if there was an old style NAT rule */
5948
if (ctx->nr != NULL)
5949
return (PFRES_TRANSLATE);
5950
5951
/* pass af-to rules, unsupported on match rules */
5952
KASSERT(r->action != PF_MATCH, ("%s: af-to on match rule", __func__));
5953
/* XXX I can imagine scenarios where we have both NAT and RDR source tracking */
5954
ctx->nat_pool = &(r->nat);
5955
ctx->nr = r;
5956
pd->naf = r->naf;
5957
if (pf_get_transaddr_af(ctx->nr, pd) == -1) {
5958
return (PFRES_TRANSLATE);
5959
}
5960
return (PFRES_MATCH);
5961
} else if (r->rdr.cur || r->nat.cur) {
5962
/* Don't translate if there was an old style NAT rule */
5963
if (ctx->nr != NULL)
5964
return (PFRES_TRANSLATE);
5965
5966
/* match/pass nat-to/rdr-to rules */
5967
ctx->nr = r;
5968
if (r->nat.cur) {
5969
nat_action = PF_NAT;
5970
ctx->nat_pool = &(r->nat);
5971
} else {
5972
nat_action = PF_RDR;
5973
ctx->nat_pool = &(r->rdr);
5974
}
5975
5976
transerror = pf_get_transaddr(ctx, ctx->nr,
5977
nat_action, ctx->nat_pool);
5978
if (transerror == PFRES_MATCH) {
5979
ctx->rewrite += pf_translate_compat(ctx);
5980
return(PFRES_MATCH);
5981
}
5982
return (transerror);
5983
}
5984
5985
return (PFRES_MAX);
5986
}
5987
5988
enum pf_test_status
5989
pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset)
5990
{
5991
struct pf_krule_item *ri;
5992
struct pf_krule *r;
5993
struct pf_krule *save_a;
5994
struct pf_kruleset *save_aruleset;
5995
struct pf_pdesc *pd = ctx->pd;
5996
u_short transerror;
5997
5998
r = TAILQ_FIRST(ruleset->rules[PF_RULESET_FILTER].active.ptr);
5999
while (r != NULL) {
6000
struct pf_statelim *stlim = NULL;
6001
struct pf_sourcelim *srlim = NULL;
6002
struct pf_source *sr = NULL;
6003
unsigned int gen;
6004
6005
if (ctx->pd->related_rule) {
6006
*ctx->rm = ctx->pd->related_rule;
6007
break;
6008
}
6009
PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED,
6010
TAILQ_NEXT(r, entries));
6011
/* Don't count expired rule evaluations. */
6012
pf_counter_u64_add(&r->evaluations, 1);
6013
PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot,
6014
r->skip[PF_SKIP_IFP]);
6015
PF_TEST_ATTRIB(r->direction && r->direction != pd->dir,
6016
r->skip[PF_SKIP_DIR]);
6017
PF_TEST_ATTRIB(r->af && r->af != pd->af,
6018
r->skip[PF_SKIP_AF]);
6019
PF_TEST_ATTRIB(r->proto && r->proto != pd->proto,
6020
r->skip[PF_SKIP_PROTO]);
6021
PF_TEST_ATTRIB(PF_MISMATCHAW(&r->src.addr, &pd->nsaddr, pd->naf,
6022
r->src.neg, pd->kif, M_GETFIB(pd->m)),
6023
r->skip[PF_SKIP_SRC_ADDR]);
6024
PF_TEST_ATTRIB(PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af,
6025
r->dst.neg, NULL, M_GETFIB(pd->m)),
6026
r->skip[PF_SKIP_DST_ADDR]);
6027
switch (pd->virtual_proto) {
6028
case PF_VPROTO_FRAGMENT:
6029
/* tcp/udp only. port_op always 0 in other cases */
6030
PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
6031
TAILQ_NEXT(r, entries));
6032
PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset),
6033
TAILQ_NEXT(r, entries));
6034
/* icmp only. type/code always 0 in other cases */
6035
PF_TEST_ATTRIB((r->type || r->code),
6036
TAILQ_NEXT(r, entries));
6037
/* tcp/udp only. {uid|gid}.op always 0 in other cases */
6038
PF_TEST_ATTRIB((r->gid.op || r->uid.op),
6039
TAILQ_NEXT(r, entries));
6040
break;
6041
6042
case IPPROTO_TCP:
6043
PF_TEST_ATTRIB((r->flagset & tcp_get_flags(ctx->th))
6044
!= r->flags,
6045
TAILQ_NEXT(r, entries));
6046
/* FALLTHROUGH */
6047
case IPPROTO_SCTP:
6048
case IPPROTO_UDP:
6049
/* tcp/udp only. port_op always 0 in other cases */
6050
PF_TEST_ATTRIB(r->src.port_op && !pf_match_port(r->src.port_op,
6051
r->src.port[0], r->src.port[1], pd->nsport),
6052
r->skip[PF_SKIP_SRC_PORT]);
6053
/* tcp/udp only. port_op always 0 in other cases */
6054
PF_TEST_ATTRIB(r->dst.port_op && !pf_match_port(r->dst.port_op,
6055
r->dst.port[0], r->dst.port[1], pd->ndport),
6056
r->skip[PF_SKIP_DST_PORT]);
6057
/* tcp/udp only. uid.op always 0 in other cases */
6058
PF_TEST_ATTRIB(r->uid.op && (pd->lookup.done || (pd->lookup.done =
6059
pf_socket_lookup(pd), 1)) &&
6060
!pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
6061
pd->lookup.uid),
6062
TAILQ_NEXT(r, entries));
6063
/* tcp/udp only. gid.op always 0 in other cases */
6064
PF_TEST_ATTRIB(r->gid.op && (pd->lookup.done || (pd->lookup.done =
6065
pf_socket_lookup(pd), 1)) &&
6066
!pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
6067
pd->lookup.gid),
6068
TAILQ_NEXT(r, entries));
6069
break;
6070
6071
case IPPROTO_ICMP:
6072
case IPPROTO_ICMPV6:
6073
/* icmp only. type always 0 in other cases */
6074
PF_TEST_ATTRIB(r->type && r->type != ctx->icmptype + 1,
6075
TAILQ_NEXT(r, entries));
6076
/* icmp only. type always 0 in other cases */
6077
PF_TEST_ATTRIB(r->code && r->code != ctx->icmpcode + 1,
6078
TAILQ_NEXT(r, entries));
6079
break;
6080
6081
default:
6082
break;
6083
}
6084
PF_TEST_ATTRIB(r->tos && !(r->tos == pd->tos),
6085
TAILQ_NEXT(r, entries));
6086
PF_TEST_ATTRIB(r->prio &&
6087
!pf_match_ieee8021q_pcp(r->prio, pd->m),
6088
TAILQ_NEXT(r, entries));
6089
PF_TEST_ATTRIB(r->prob &&
6090
r->prob <= arc4random(),
6091
TAILQ_NEXT(r, entries));
6092
PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r,
6093
&ctx->tag, pd->pf_mtag ? pd->pf_mtag->tag : 0),
6094
TAILQ_NEXT(r, entries));
6095
PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
6096
r->rcvifnot),
6097
TAILQ_NEXT(r, entries));
6098
PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
6099
pd->virtual_proto != PF_VPROTO_FRAGMENT),
6100
TAILQ_NEXT(r, entries));
6101
PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY &&
6102
(pd->virtual_proto != IPPROTO_TCP || !pf_osfp_match(
6103
pf_osfp_fingerprint(pd, ctx->th),
6104
r->os_fingerprint)),
6105
TAILQ_NEXT(r, entries));
6106
if (r->statelim.id != PF_STATELIM_ID_NONE) {
6107
stlim = pf_statelim_find(r->statelim.id);
6108
6109
/*
6110
* Treat a missing limiter like an exhausted limiter.
6111
* There is no "backend" to get a resource out of
6112
* so the rule can't create state.
6113
*/
6114
PF_TEST_ATTRIB(stlim == NULL, TAILQ_NEXT(r, entries));
6115
6116
/*
6117
* An overcommitted pool means this rule
6118
* can't create state.
6119
*/
6120
if (stlim->pfstlim_inuse >= stlim->pfstlim_limit) {
6121
gen = pf_statelim_enter(stlim);
6122
stlim->pfstlim_counters.hardlimited++;
6123
pf_statelim_leave(stlim, gen);
6124
if (r->statelim.limiter_action == PF_LIMITER_BLOCK) {
6125
ctx->limiter_drop = 1;
6126
REASON_SET(&ctx->reason, PFRES_MAXSTATES);
6127
break; /* stop rule processing */
6128
}
6129
r = TAILQ_NEXT(r, entries);
6130
continue;
6131
}
6132
6133
/*
6134
* Is access to the pool rate limited?
6135
*/
6136
if (stlim->pfstlim_rate.limit != 0) {
6137
struct timespec ts;
6138
getnanouptime(&ts);
6139
uint64_t diff = SEC_TO_NSEC(ts.tv_sec) +
6140
ts.tv_nsec - stlim->pfstlim_rate_ts;
6141
6142
if (diff < stlim->pfstlim_rate_token) {
6143
gen = pf_statelim_enter(stlim);
6144
stlim->pfstlim_counters.ratelimited++;
6145
pf_statelim_leave(stlim, gen);
6146
if (r->statelim.limiter_action ==
6147
PF_LIMITER_BLOCK) {
6148
ctx->limiter_drop = 1;
6149
REASON_SET(&ctx->reason,
6150
PFRES_MAXSTATES);
6151
/* stop rule processing */
6152
break;
6153
}
6154
r = TAILQ_NEXT(r, entries);
6155
continue;
6156
}
6157
6158
if (diff > stlim->pfstlim_rate_bucket) {
6159
stlim->pfstlim_rate_ts =
6160
SEC_TO_NSEC(ts.tv_sec) + ts.tv_nsec -
6161
stlim->pfstlim_rate_bucket;
6162
}
6163
}
6164
}
6165
6166
if (r->sourcelim.id != PF_SOURCELIM_ID_NONE) {
6167
struct pf_source key;
6168
6169
srlim = pf_sourcelim_find(r->sourcelim.id);
6170
6171
/*
6172
* Treat a missing pool like an overcommitted pool.
6173
* There is no "backend" to get a resource out of
6174
* so the rule can't create state.
6175
*/
6176
PF_TEST_ATTRIB(srlim == NULL, TAILQ_NEXT(r, entries));
6177
6178
pf_source_key(srlim, &key, ctx->pd->af,
6179
ctx->pd->src);
6180
sr = pf_source_find(srlim, &key);
6181
if (sr != NULL) {
6182
/*
6183
* An overcommitted limiter means this rule
6184
* can't create state.
6185
*/
6186
if (sr->pfsr_inuse >= srlim->pfsrlim_limit) {
6187
sr->pfsr_counters.hardlimited++;
6188
gen = pf_sourcelim_enter(srlim);
6189
srlim->pfsrlim_counters.hardlimited++;
6190
pf_sourcelim_leave(srlim, gen);
6191
if (r->sourcelim.limiter_action ==
6192
PF_LIMITER_BLOCK) {
6193
ctx->limiter_drop = 1;
6194
REASON_SET(&ctx->reason,
6195
PFRES_SRCLIMIT);
6196
/* stop rule processing */
6197
break;
6198
}
6199
r = TAILQ_NEXT(r, entries);
6200
continue;
6201
}
6202
6203
/*
6204
* Is access to the pool rate limited?
6205
*/
6206
if (srlim->pfsrlim_rate.limit != 0) {
6207
struct timespec ts;
6208
getnanouptime(&ts);
6209
uint64_t diff = SEC_TO_NSEC(ts.tv_sec) +
6210
ts.tv_nsec - sr->pfsr_rate_ts;
6211
6212
if (diff < srlim->pfsrlim_rate_token) {
6213
sr->pfsr_counters.ratelimited++;
6214
gen = pf_sourcelim_enter(srlim);
6215
srlim->pfsrlim_counters
6216
.ratelimited++;
6217
pf_sourcelim_leave(srlim, gen);
6218
if (r->sourcelim.limiter_action ==
6219
PF_LIMITER_BLOCK) {
6220
ctx->limiter_drop = 1;
6221
REASON_SET(&ctx->reason,
6222
PFRES_SRCLIMIT);
6223
/* stop rules */
6224
break;
6225
}
6226
r = TAILQ_NEXT(r, entries);
6227
continue;
6228
}
6229
6230
if (diff > srlim->pfsrlim_rate_bucket) {
6231
sr->pfsr_rate_ts =
6232
SEC_TO_NSEC(ts.tv_sec) + ts.tv_nsec -
6233
srlim->pfsrlim_rate_bucket;
6234
}
6235
}
6236
} else {
6237
/*
6238
* a new source entry will (should)
6239
* admit a state.
6240
*/
6241
6242
if (srlim->pfsrlim_nsources >=
6243
srlim->pfsrlim_entries) {
6244
gen = pf_sourcelim_enter(srlim);
6245
srlim->pfsrlim_counters.addrlimited++;
6246
pf_sourcelim_leave(srlim, gen);
6247
r = TAILQ_NEXT(r, entries);
6248
continue;
6249
}
6250
}
6251
}
6252
6253
/* must be last! */
6254
if (r->pktrate.limit) {
6255
PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)),
6256
TAILQ_NEXT(r, entries));
6257
}
6258
/* FALLTHROUGH */
6259
if (r->tag)
6260
ctx->tag = r->tag;
6261
if (r->anchor == NULL) {
6262
6263
if (r->rule_flag & PFRULE_ONCE) {
6264
uint32_t rule_flag;
6265
6266
rule_flag = r->rule_flag;
6267
if ((rule_flag & PFRULE_EXPIRED) == 0 &&
6268
atomic_cmpset_int(&r->rule_flag, rule_flag,
6269
rule_flag | PFRULE_EXPIRED)) {
6270
r->exptime = time_uptime;
6271
} else {
6272
r = TAILQ_NEXT(r, entries);
6273
continue;
6274
}
6275
}
6276
6277
if (r->action == PF_MATCH) {
6278
/*
6279
* Apply translations before increasing counters,
6280
* in case it fails.
6281
*/
6282
transerror = pf_rule_apply_nat(ctx, r);
6283
switch (transerror) {
6284
case PFRES_MATCH:
6285
/* Translation action found in rule and applied successfully */
6286
case PFRES_MAX:
6287
/* No translation action found in rule */
6288
break;
6289
default:
6290
/* Translation action found in rule but failed to apply */
6291
REASON_SET(&ctx->reason, transerror);
6292
return (PF_TEST_FAIL);
6293
}
6294
ri = malloc(sizeof(struct pf_krule_item), M_PF_RULE_ITEM, M_NOWAIT | M_ZERO);
6295
if (ri == NULL) {
6296
REASON_SET(&ctx->reason, PFRES_MEMORY);
6297
return (PF_TEST_FAIL);
6298
}
6299
ri->r = r;
6300
6301
if (SLIST_EMPTY(ctx->match_rules)) {
6302
SLIST_INSERT_HEAD(ctx->match_rules, ri, entry);
6303
} else {
6304
SLIST_INSERT_AFTER(ctx->last_match_rule, ri, entry);
6305
}
6306
ctx->last_match_rule = ri;
6307
6308
pf_rule_to_actions(r, &pd->act);
6309
if (r->log)
6310
PFLOG_PACKET(r->action, PFRES_MATCH, r,
6311
ctx->a, ruleset, pd, 1, NULL);
6312
} else {
6313
/*
6314
* found matching r
6315
*/
6316
*ctx->rm = r;
6317
/*
6318
* anchor, with ruleset, where r belongs to
6319
*/
6320
*ctx->am = ctx->a;
6321
/*
6322
* ruleset where r belongs to
6323
*/
6324
*ctx->rsm = ruleset;
6325
/*
6326
* ruleset, where anchor belongs to.
6327
*/
6328
ctx->arsm = ctx->aruleset;
6329
/*
6330
* state/source pools
6331
*/
6332
6333
ctx->statelim = stlim;
6334
ctx->sourcelim = srlim;
6335
ctx->source = sr;
6336
}
6337
if (pd->act.log & PF_LOG_MATCHES)
6338
pf_log_matches(pd, r, ctx->a, ruleset, ctx->match_rules);
6339
if (r->quick) {
6340
ctx->test_status = PF_TEST_QUICK;
6341
break;
6342
}
6343
} else {
6344
save_a = ctx->a;
6345
save_aruleset = ctx->aruleset;
6346
6347
ctx->a = r; /* remember anchor */
6348
ctx->aruleset = ruleset; /* and its ruleset */
6349
if (ctx->a->quick)
6350
ctx->test_status = PF_TEST_QUICK;
6351
/*
6352
* Note: we don't need to restore if we are not going
6353
* to continue with ruleset evaluation.
6354
*/
6355
if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) {
6356
break;
6357
}
6358
ctx->a = save_a;
6359
ctx->aruleset = save_aruleset;
6360
}
6361
r = TAILQ_NEXT(r, entries);
6362
}
6363
6364
6365
return (ctx->test_status);
6366
}
6367
6368
static int
6369
pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
6370
struct pf_pdesc *pd, struct pf_krule **am,
6371
struct pf_kruleset **rsm, u_short *reason, struct inpcb *inp,
6372
struct pf_krule_slist *match_rules)
6373
{
6374
struct pf_krule *r = NULL;
6375
struct pf_kruleset *ruleset = NULL;
6376
struct pf_test_ctx ctx;
6377
u_short transerror;
6378
int action = PF_PASS;
6379
u_int16_t bproto_sum = 0, bip_sum = 0;
6380
enum pf_test_status rv;
6381
6382
PF_RULES_RASSERT();
6383
6384
bzero(&ctx, sizeof(ctx));
6385
ctx.tag = -1;
6386
ctx.pd = pd;
6387
ctx.rm = rm;
6388
ctx.am = am;
6389
ctx.rsm = rsm;
6390
ctx.th = &pd->hdr.tcp;
6391
ctx.reason = *reason;
6392
ctx.match_rules = match_rules;
6393
6394
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
6395
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
6396
6397
if (inp != NULL) {
6398
INP_LOCK_ASSERT(inp);
6399
pd->lookup.uid = inp->inp_cred->cr_uid;
6400
pd->lookup.gid = inp->inp_cred->cr_gid;
6401
pd->lookup.done = 1;
6402
}
6403
6404
if (pd->ip_sum)
6405
bip_sum = *pd->ip_sum;
6406
6407
switch (pd->virtual_proto) {
6408
case IPPROTO_TCP:
6409
bproto_sum = ctx.th->th_sum;
6410
pd->nsport = ctx.th->th_sport;
6411
pd->ndport = ctx.th->th_dport;
6412
break;
6413
case IPPROTO_UDP:
6414
bproto_sum = pd->hdr.udp.uh_sum;
6415
pd->nsport = pd->hdr.udp.uh_sport;
6416
pd->ndport = pd->hdr.udp.uh_dport;
6417
break;
6418
case IPPROTO_SCTP:
6419
pd->nsport = pd->hdr.sctp.src_port;
6420
pd->ndport = pd->hdr.sctp.dest_port;
6421
break;
6422
#ifdef INET
6423
case IPPROTO_ICMP:
6424
MPASS(pd->af == AF_INET);
6425
ctx.icmptype = pd->hdr.icmp.icmp_type;
6426
ctx.icmpcode = pd->hdr.icmp.icmp_code;
6427
ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
6428
&ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type);
6429
if (ctx.icmp_dir == PF_IN) {
6430
pd->nsport = ctx.virtual_id;
6431
pd->ndport = ctx.virtual_type;
6432
} else {
6433
pd->nsport = ctx.virtual_type;
6434
pd->ndport = ctx.virtual_id;
6435
}
6436
break;
6437
#endif /* INET */
6438
#ifdef INET6
6439
case IPPROTO_ICMPV6:
6440
MPASS(pd->af == AF_INET6);
6441
ctx.icmptype = pd->hdr.icmp6.icmp6_type;
6442
ctx.icmpcode = pd->hdr.icmp6.icmp6_code;
6443
ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
6444
&ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type);
6445
if (ctx.icmp_dir == PF_IN) {
6446
pd->nsport = ctx.virtual_id;
6447
pd->ndport = ctx.virtual_type;
6448
} else {
6449
pd->nsport = ctx.virtual_type;
6450
pd->ndport = ctx.virtual_id;
6451
}
6452
6453
break;
6454
#endif /* INET6 */
6455
default:
6456
pd->nsport = pd->ndport = 0;
6457
break;
6458
}
6459
pd->osport = pd->nsport;
6460
pd->odport = pd->ndport;
6461
6462
/* check packet for BINAT/NAT/RDR */
6463
transerror = pf_get_translation(&ctx);
6464
switch (transerror) {
6465
default:
6466
/* A translation error occurred. */
6467
REASON_SET(&ctx.reason, transerror);
6468
goto cleanup;
6469
case PFRES_MAX:
6470
/* No match. */
6471
break;
6472
case PFRES_MATCH:
6473
KASSERT(ctx.sk != NULL, ("%s: null sk", __func__));
6474
KASSERT(ctx.nk != NULL, ("%s: null nk", __func__));
6475
if (ctx.nr->log) {
6476
PFLOG_PACKET(ctx.nr->action, PFRES_MATCH, ctx.nr, ctx.a,
6477
ruleset, pd, 1, NULL);
6478
}
6479
6480
ctx.rewrite += pf_translate_compat(&ctx);
6481
ctx.nat_pool = &(ctx.nr->rdr);
6482
}
6483
6484
*ctx.rm = &V_pf_default_rule;
6485
if (ctx.nr && ctx.nr->natpass) {
6486
r = ctx.nr;
6487
ruleset = *ctx.rsm;
6488
} else {
6489
ruleset = &pf_main_ruleset;
6490
rv = pf_match_rule(&ctx, ruleset);
6491
if (rv == PF_TEST_FAIL || ctx.limiter_drop == 1) {
6492
REASON_SET(reason, ctx.reason);
6493
goto cleanup;
6494
}
6495
6496
r = *ctx.rm; /* matching rule */
6497
ctx.a = *ctx.am; /* rule that defines an anchor containing 'r' */
6498
ruleset = *ctx.rsm; /* ruleset of the anchor defined by the rule 'a' */
6499
ctx.aruleset = ctx.arsm; /* ruleset of the 'a' rule itself */
6500
6501
/* apply actions for last matching pass/block rule */
6502
pf_rule_to_actions(r, &pd->act);
6503
transerror = pf_rule_apply_nat(&ctx, r);
6504
switch (transerror) {
6505
case PFRES_MATCH:
6506
/* Translation action found in rule and applied successfully */
6507
case PFRES_MAX:
6508
/* No translation action found in rule */
6509
break;
6510
default:
6511
/* Translation action found in rule but failed to apply */
6512
REASON_SET(&ctx.reason, transerror);
6513
goto cleanup;
6514
}
6515
}
6516
6517
REASON_SET(&ctx.reason, PFRES_MATCH);
6518
6519
if (r->log) {
6520
if (ctx.rewrite)
6521
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
6522
PFLOG_PACKET(r->action, ctx.reason, r, ctx.a, ruleset, pd, 1, NULL);
6523
}
6524
if (pd->act.log & PF_LOG_MATCHES)
6525
pf_log_matches(pd, r, ctx.a, ruleset, ctx.match_rules);
6526
if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
6527
(r->action == PF_DROP) &&
6528
((r->rule_flag & PFRULE_RETURNRST) ||
6529
(r->rule_flag & PFRULE_RETURNICMP) ||
6530
(r->rule_flag & PFRULE_RETURN))) {
6531
pf_return(r, ctx.nr, pd, ctx.th, bproto_sum,
6532
bip_sum, &ctx.reason, r->rtableid);
6533
}
6534
6535
if (r->action == PF_DROP)
6536
goto cleanup;
6537
6538
if (ctx.tag > 0 && pf_tag_packet(pd, ctx.tag)) {
6539
REASON_SET(&ctx.reason, PFRES_MEMORY);
6540
goto cleanup;
6541
}
6542
if (pd->act.rtableid >= 0)
6543
M_SETFIB(pd->m, pd->act.rtableid);
6544
6545
if (r->rt) {
6546
/*
6547
* Set act.rt here instead of in pf_rule_to_actions() because
6548
* it is applied only from the last pass rule. For rules
6549
* with the prefer-ipv6-nexthop option act.rt_af is a hint
6550
* about AF of the forwarded packet and might be changed.
6551
*/
6552
pd->act.rt = r->rt;
6553
if (r->rt == PF_REPLYTO)
6554
pd->act.rt_af = pd->af;
6555
else
6556
pd->act.rt_af = pd->naf;
6557
if ((transerror = pf_map_addr_sn(pd->af, r, pd->src,
6558
&pd->act.rt_addr, &pd->act.rt_af, &pd->act.rt_kif, NULL,
6559
&(r->route), PF_SN_ROUTE)) != PFRES_MATCH) {
6560
REASON_SET(&ctx.reason, transerror);
6561
goto cleanup;
6562
}
6563
}
6564
6565
if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
6566
(!ctx.state_icmp && (r->keep_state || ctx.nr != NULL ||
6567
(pd->flags & PFDESC_TCP_NORM)))) {
6568
bool nat64;
6569
6570
action = pf_create_state(r, &ctx, sm, bproto_sum, bip_sum);
6571
ctx.sk = ctx.nk = NULL;
6572
if (action != PF_PASS) {
6573
pf_udp_mapping_release(ctx.udp_mapping);
6574
if (r->log || (ctx.nr != NULL && ctx.nr->log) ||
6575
ctx.reason == PFRES_MEMORY)
6576
pd->act.log |= PF_LOG_FORCE;
6577
if (action == PF_DROP &&
6578
(r->rule_flag & PFRULE_RETURN))
6579
pf_return(r, ctx.nr, pd, ctx.th,
6580
bproto_sum, bip_sum, &ctx.reason,
6581
pd->act.rtableid);
6582
*reason = ctx.reason;
6583
return (action);
6584
}
6585
6586
if (pd->proto == IPPROTO_TCP &&
6587
r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) {
6588
action = pf_synproxy_ack(r, pd, sm, &ctx.act);
6589
if (action != PF_PASS)
6590
goto cleanup; /* PF_SYNPROXY_DROP */
6591
}
6592
6593
nat64 = pd->af != pd->naf;
6594
if (nat64) {
6595
int ret;
6596
6597
if (ctx.sk == NULL)
6598
ctx.sk = (*sm)->key[pd->dir == PF_IN ? PF_SK_STACK : PF_SK_WIRE];
6599
if (ctx.nk == NULL)
6600
ctx.nk = (*sm)->key[pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
6601
6602
if (pd->dir == PF_IN) {
6603
ret = pf_translate(pd, &ctx.sk->addr[pd->didx],
6604
ctx.sk->port[pd->didx], &ctx.sk->addr[pd->sidx],
6605
ctx.sk->port[pd->sidx], ctx.virtual_type,
6606
ctx.icmp_dir);
6607
} else {
6608
ret = pf_translate(pd, &ctx.sk->addr[pd->sidx],
6609
ctx.sk->port[pd->sidx], &ctx.sk->addr[pd->didx],
6610
ctx.sk->port[pd->didx], ctx.virtual_type,
6611
ctx.icmp_dir);
6612
}
6613
6614
if (ret < 0)
6615
goto cleanup;
6616
6617
ctx.rewrite += ret;
6618
6619
if (ctx.rewrite && ctx.sk->af != ctx.nk->af)
6620
action = PF_AFRT;
6621
}
6622
} else {
6623
uma_zfree(V_pf_state_key_z, ctx.sk);
6624
uma_zfree(V_pf_state_key_z, ctx.nk);
6625
ctx.sk = ctx.nk = NULL;
6626
pf_udp_mapping_release(ctx.udp_mapping);
6627
}
6628
6629
/* copy back packet headers if we performed NAT operations */
6630
if (ctx.rewrite)
6631
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
6632
6633
if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
6634
pd->dir == PF_OUT &&
6635
V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, pd->m)) {
6636
/*
6637
* We want the state created, but we dont
6638
* want to send this in case a partner
6639
* firewall has to know about it to allow
6640
* replies through it.
6641
*/
6642
*reason = ctx.reason;
6643
return (PF_DEFER);
6644
}
6645
6646
*reason = ctx.reason;
6647
return (action);
6648
6649
cleanup:
6650
uma_zfree(V_pf_state_key_z, ctx.sk);
6651
uma_zfree(V_pf_state_key_z, ctx.nk);
6652
pf_udp_mapping_release(ctx.udp_mapping);
6653
*reason = ctx.reason;
6654
6655
return (PF_DROP);
6656
}
6657
6658
static int
6659
pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx,
6660
struct pf_kstate **sm, u_int16_t bproto_sum, u_int16_t bip_sum)
6661
{
6662
struct pf_pdesc *pd = ctx->pd;
6663
struct pf_kstate *s = NULL;
6664
struct pf_statelim *stlim = NULL;
6665
struct pf_sourcelim *srlim = NULL;
6666
struct pf_source *sr = NULL;
6667
struct pf_state_link *pfl;
6668
struct pf_ksrc_node *sns[PF_SN_MAX] = { NULL };
6669
/*
6670
* XXXKS: The hash for PF_SN_LIMIT and PF_SN_ROUTE should be the same
6671
* but for PF_SN_NAT it is different. Don't try optimizing it,
6672
* just store all 3 hashes.
6673
*/
6674
struct pf_srchash *snhs[PF_SN_MAX] = { NULL };
6675
struct tcphdr *th = &pd->hdr.tcp;
6676
u_int16_t mss = V_tcp_mssdflt;
6677
u_short sn_reason;
6678
6679
/* check maximums */
6680
if (r->max_states &&
6681
(counter_u64_fetch(r->states_cur) >= r->max_states)) {
6682
counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1);
6683
REASON_SET(&ctx->reason, PFRES_MAXSTATES);
6684
goto csfailed;
6685
}
6686
/* src node for limits */
6687
if ((r->rule_flag & PFRULE_SRCTRACK) &&
6688
(sn_reason = pf_insert_src_node(sns, snhs, r, pd->src, pd->af,
6689
NULL, NULL, pd->af, PF_SN_LIMIT)) != 0) {
6690
REASON_SET(&ctx->reason, sn_reason);
6691
goto csfailed;
6692
}
6693
/* src node for route-to rule */
6694
if (r->rt) {
6695
if ((r->route.opts & PF_POOL_STICKYADDR) &&
6696
(sn_reason = pf_insert_src_node(sns, snhs, r, pd->src,
6697
pd->af, &pd->act.rt_addr, pd->act.rt_kif, pd->act.rt_af,
6698
PF_SN_ROUTE)) != 0) {
6699
REASON_SET(&ctx->reason, sn_reason);
6700
goto csfailed;
6701
}
6702
}
6703
/* src node for translation rule */
6704
if (ctx->nr != NULL) {
6705
KASSERT(ctx->nat_pool != NULL, ("%s: nat_pool is NULL", __func__));
6706
/*
6707
* The NAT addresses are chosen during ruleset parsing.
6708
* The new afto code stores post-nat addresses in nsaddr.
6709
* The old nat code (also used for new nat-to rules) creates
6710
* state keys and stores addresses in them.
6711
*/
6712
if ((ctx->nat_pool->opts & PF_POOL_STICKYADDR) &&
6713
(sn_reason = pf_insert_src_node(sns, snhs, ctx->nr,
6714
ctx->sk ? &(ctx->sk->addr[pd->sidx]) : pd->src, pd->af,
6715
ctx->nk ? &(ctx->nk->addr[1]) : &(pd->nsaddr), NULL,
6716
pd->naf, PF_SN_NAT)) != 0 ) {
6717
REASON_SET(&ctx->reason, sn_reason);
6718
goto csfailed;
6719
}
6720
}
6721
s = pf_alloc_state(M_NOWAIT);
6722
if (s == NULL) {
6723
REASON_SET(&ctx->reason, PFRES_MEMORY);
6724
goto csfailed;
6725
}
6726
s->rule = r;
6727
s->nat_rule = ctx->nr;
6728
s->anchor = ctx->a;
6729
s->match_rules = *ctx->match_rules;
6730
SLIST_INIT(&s->linkage);
6731
memcpy(&s->act, &pd->act, sizeof(struct pf_rule_actions));
6732
6733
if (pd->act.allow_opts)
6734
s->state_flags |= PFSTATE_ALLOWOPTS;
6735
if (r->rule_flag & PFRULE_STATESLOPPY)
6736
s->state_flags |= PFSTATE_SLOPPY;
6737
if (pd->flags & PFDESC_TCP_NORM) /* Set by old-style scrub rules */
6738
s->state_flags |= PFSTATE_SCRUB_TCP;
6739
if ((r->rule_flag & PFRULE_PFLOW) ||
6740
(ctx->nr != NULL && ctx->nr->rule_flag & PFRULE_PFLOW))
6741
s->state_flags |= PFSTATE_PFLOW;
6742
6743
s->act.log = pd->act.log & PF_LOG_ALL;
6744
s->sync_state = PFSYNC_S_NONE;
6745
s->state_flags |= pd->act.flags; /* Only needed for pfsync and state export */
6746
6747
if (ctx->nr != NULL)
6748
s->act.log |= ctx->nr->log & PF_LOG_ALL;
6749
switch (pd->proto) {
6750
case IPPROTO_TCP:
6751
s->src.seqlo = ntohl(th->th_seq);
6752
s->src.seqhi = s->src.seqlo + pd->p_len + 1;
6753
if ((tcp_get_flags(th) & (TH_SYN|TH_ACK)) == TH_SYN &&
6754
r->keep_state == PF_STATE_MODULATE) {
6755
/* Generate sequence number modulator */
6756
if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
6757
0)
6758
s->src.seqdiff = 1;
6759
pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum,
6760
htonl(s->src.seqlo + s->src.seqdiff), 0);
6761
ctx->rewrite = 1;
6762
} else
6763
s->src.seqdiff = 0;
6764
if (tcp_get_flags(th) & TH_SYN) {
6765
s->src.seqhi++;
6766
s->src.wscale = pf_get_wscale(pd);
6767
}
6768
s->src.max_win = MAX(ntohs(th->th_win), 1);
6769
if (s->src.wscale & PF_WSCALE_MASK) {
6770
/* Remove scale factor from initial window */
6771
int win = s->src.max_win;
6772
win += 1 << (s->src.wscale & PF_WSCALE_MASK);
6773
s->src.max_win = (win - 1) >>
6774
(s->src.wscale & PF_WSCALE_MASK);
6775
}
6776
if (tcp_get_flags(th) & TH_FIN)
6777
s->src.seqhi++;
6778
s->dst.seqhi = 1;
6779
s->dst.max_win = 1;
6780
pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT);
6781
pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED);
6782
s->timeout = PFTM_TCP_FIRST_PACKET;
6783
atomic_add_32(&V_pf_status.states_halfopen, 1);
6784
break;
6785
case IPPROTO_UDP:
6786
pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE);
6787
pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC);
6788
s->timeout = PFTM_UDP_FIRST_PACKET;
6789
break;
6790
case IPPROTO_SCTP:
6791
pf_set_protostate(s, PF_PEER_SRC, SCTP_COOKIE_WAIT);
6792
pf_set_protostate(s, PF_PEER_DST, SCTP_CLOSED);
6793
s->timeout = PFTM_SCTP_FIRST_PACKET;
6794
break;
6795
case IPPROTO_ICMP:
6796
#ifdef INET6
6797
case IPPROTO_ICMPV6:
6798
#endif /* INET6 */
6799
s->timeout = PFTM_ICMP_FIRST_PACKET;
6800
break;
6801
default:
6802
pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE);
6803
pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC);
6804
s->timeout = PFTM_OTHER_FIRST_PACKET;
6805
}
6806
6807
s->creation = s->expire = pf_get_uptime();
6808
6809
if (pd->proto == IPPROTO_TCP) {
6810
if (s->state_flags & PFSTATE_SCRUB_TCP &&
6811
pf_normalize_tcp_init(pd, th, &s->src)) {
6812
REASON_SET(&ctx->reason, PFRES_MEMORY);
6813
goto csfailed;
6814
}
6815
if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub &&
6816
pf_normalize_tcp_stateful(pd, &ctx->reason, th, s,
6817
&s->src, &s->dst, &ctx->rewrite)) {
6818
/* This really shouldn't happen!!! */
6819
DPFPRINTF(PF_DEBUG_URGENT,
6820
"%s: tcp normalize failed on first "
6821
"pkt", __func__);
6822
goto csfailed;
6823
}
6824
} else if (pd->proto == IPPROTO_SCTP) {
6825
if (pf_normalize_sctp_init(pd, &s->src, &s->dst))
6826
goto csfailed;
6827
if (! (pd->sctp_flags & (PFDESC_SCTP_INIT | PFDESC_SCTP_ADD_IP)))
6828
goto csfailed;
6829
}
6830
s->direction = pd->dir;
6831
6832
/*
6833
* sk/nk could already been setup by pf_get_translation().
6834
*/
6835
if (ctx->sk == NULL && ctx->nk == NULL) {
6836
MPASS(pd->sport == NULL || (pd->osport == *pd->sport));
6837
MPASS(pd->dport == NULL || (pd->odport == *pd->dport));
6838
if (pf_state_key_setup(pd, pd->nsport, pd->ndport,
6839
&ctx->sk, &ctx->nk)) {
6840
goto csfailed;
6841
}
6842
} else
6843
KASSERT((ctx->sk != NULL && ctx->nk != NULL), ("%s: nr %p sk %p, nk %p",
6844
__func__, ctx->nr, ctx->sk, ctx->nk));
6845
6846
stlim = ctx->statelim;
6847
if (stlim != NULL) {
6848
unsigned int gen;
6849
6850
pfl = malloc(sizeof(*pfl), M_PF_STATE_LINK, M_NOWAIT);
6851
if (pfl == NULL) {
6852
REASON_SET(&ctx->reason, PFRES_MEMORY);
6853
goto csfailed;
6854
}
6855
6856
gen = pf_statelim_enter(stlim);
6857
stlim->pfstlim_counters.admitted++;
6858
stlim->pfstlim_inuse++;
6859
pf_statelim_leave(stlim, gen);
6860
6861
stlim->pfstlim_rate_ts += stlim->pfstlim_rate_token;
6862
6863
s->statelim = stlim->pfstlim_id;
6864
pfl->pfl_state = s;
6865
pfl->pfl_type = PF_STATE_LINK_TYPE_STATELIM;
6866
6867
TAILQ_INSERT_TAIL(&stlim->pfstlim_states, pfl, pfl_link);
6868
SLIST_INSERT_HEAD(&s->linkage, pfl, pfl_linkage);
6869
}
6870
6871
srlim = ctx->sourcelim;
6872
if (srlim != NULL) {
6873
unsigned int gen;
6874
6875
sr = ctx->source;
6876
if (sr == NULL) {
6877
sr = malloc(sizeof(*sr), M_PF_SOURCE_LIM, M_NOWAIT | M_ZERO);
6878
if (sr == NULL) {
6879
gen = pf_sourcelim_enter(srlim);
6880
srlim->pfsrlim_counters.addrnomem++;
6881
pf_sourcelim_leave(srlim, gen);
6882
REASON_SET(&ctx->reason, PFRES_MEMORY);
6883
goto csfailed;
6884
}
6885
6886
sr->pfsr_parent = srlim;
6887
pf_source_key(srlim, sr, ctx->pd->af, ctx->pd->src);
6888
TAILQ_INIT(&sr->pfsr_states);
6889
6890
if (RB_INSERT(pf_source_tree, &srlim->pfsrlim_sources,
6891
sr) != NULL) {
6892
panic("%s: source pool %u (%p) "
6893
"insert collision %p?!",
6894
__func__, srlim->pfsrlim_id, srlim, sr);
6895
}
6896
6897
if (RB_INSERT(pf_source_ioc_tree,
6898
&srlim->pfsrlim_ioc_sources, sr) != NULL) {
6899
panic("%s: source pool %u (%p) ioc "
6900
"insert collision (%p)?!",
6901
__func__, srlim->pfsrlim_id, srlim, sr);
6902
}
6903
6904
sr->pfsr_empty_ts = time_uptime;
6905
TAILQ_INSERT_TAIL(&pf_source_gc, sr, pfsr_empty_gc);
6906
6907
gen = pf_sourcelim_enter(srlim);
6908
srlim->pfsrlim_nsources++;
6909
srlim->pfsrlim_counters.addrallocs++;
6910
pf_sourcelim_leave(srlim, gen);
6911
} else {
6912
MPASS(sr->pfsr_parent == srlim);
6913
}
6914
6915
pfl = malloc(sizeof(*pfl), M_PF_STATE_LINK, M_NOWAIT);
6916
if (pfl == NULL) {
6917
REASON_SET(&ctx->reason, PFRES_MEMORY);
6918
goto csfailed;
6919
}
6920
6921
pf_source_used(sr);
6922
6923
sr->pfsr_counters.admitted++;
6924
6925
gen = pf_sourcelim_enter(srlim);
6926
srlim->pfsrlim_counters.inuse++;
6927
srlim->pfsrlim_counters.admitted++;
6928
pf_sourcelim_leave(srlim, gen);
6929
6930
s->sourcelim = srlim->pfsrlim_id;
6931
pfl->pfl_state = s;
6932
pfl->pfl_type = PF_STATE_LINK_TYPE_SOURCELIM;
6933
6934
TAILQ_INSERT_TAIL(&sr->pfsr_states, pfl, pfl_link);
6935
SLIST_INSERT_HEAD(&s->linkage, pfl, pfl_linkage);
6936
}
6937
6938
/* Swap sk/nk for PF_OUT. */
6939
if (pf_state_insert(BOUND_IFACE(s, pd), pd->kif,
6940
(pd->dir == PF_IN) ? ctx->sk : ctx->nk,
6941
(pd->dir == PF_IN) ? ctx->nk : ctx->sk, s)) {
6942
REASON_SET(&ctx->reason, PFRES_STATEINS);
6943
goto drop;
6944
} else
6945
*sm = s;
6946
ctx->sk = ctx->nk = NULL;
6947
6948
STATE_INC_COUNTERS(s);
6949
6950
/*
6951
* Lock order is important: first state, then source node.
6952
*/
6953
for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) {
6954
if (pf_src_node_exists(&sns[sn_type], snhs[sn_type])) {
6955
s->sns[sn_type] = sns[sn_type];
6956
PF_HASHROW_UNLOCK(snhs[sn_type]);
6957
}
6958
}
6959
6960
if (ctx->tag > 0)
6961
s->tag = ctx->tag;
6962
if (pd->proto == IPPROTO_TCP && (tcp_get_flags(th) & (TH_SYN|TH_ACK)) ==
6963
TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) {
6964
pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC);
6965
pf_undo_nat(ctx->nr, pd, bip_sum);
6966
s->src.seqhi = arc4random();
6967
/* Find mss option */
6968
int rtid = M_GETFIB(pd->m);
6969
mss = pf_get_mss(pd);
6970
mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
6971
mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
6972
s->src.mss = mss;
6973
pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
6974
th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
6975
TH_SYN|TH_ACK, 0, s->src.mss, 0, M_SKIP_FIREWALL, 0, 0,
6976
pd->act.rtableid, &ctx->reason);
6977
REASON_SET(&ctx->reason, PFRES_SYNPROXY);
6978
return (PF_SYNPROXY_DROP);
6979
}
6980
6981
s->udp_mapping = ctx->udp_mapping;
6982
6983
return (PF_PASS);
6984
6985
csfailed:
6986
uma_zfree(V_pf_state_key_z, ctx->sk);
6987
uma_zfree(V_pf_state_key_z, ctx->nk);
6988
6989
for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) {
6990
if (pf_src_node_exists(&sns[sn_type], snhs[sn_type])) {
6991
if (--sns[sn_type]->states == 0 &&
6992
sns[sn_type]->expire == 0) {
6993
pf_unlink_src_node(sns[sn_type]);
6994
pf_free_src_node(sns[sn_type]);
6995
counter_u64_add(
6996
V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
6997
}
6998
PF_HASHROW_UNLOCK(snhs[sn_type]);
6999
}
7000
}
7001
7002
drop:
7003
if (s != NULL) {
7004
struct pf_state_link *npfl;
7005
7006
SLIST_FOREACH_SAFE(pfl, &s->linkage, pfl_linkage, npfl) {
7007
struct pf_state_link_list *list;
7008
unsigned int gen;
7009
7010
/* who needs KASSERTS when we have NULL derefs */
7011
7012
switch (pfl->pfl_type) {
7013
case PF_STATE_LINK_TYPE_STATELIM:
7014
gen = pf_statelim_enter(stlim);
7015
stlim->pfstlim_inuse--;
7016
pf_statelim_leave(stlim, gen);
7017
7018
stlim->pfstlim_rate_ts -=
7019
stlim->pfstlim_rate_token;
7020
list = &stlim->pfstlim_states;
7021
break;
7022
case PF_STATE_LINK_TYPE_SOURCELIM:
7023
gen = pf_sourcelim_enter(srlim);
7024
srlim->pfsrlim_counters.inuse--;
7025
pf_sourcelim_leave(srlim, gen);
7026
7027
sr->pfsr_rate_ts -= srlim->pfsrlim_rate_token;
7028
pf_source_rele(sr);
7029
7030
list = &sr->pfsr_states;
7031
break;
7032
default:
7033
panic("%s: unexpected link type on pfl %p",
7034
__func__, pfl);
7035
}
7036
7037
TAILQ_REMOVE(list, pfl, pfl_link);
7038
PF_STATE_LOCK_ASSERT(s);
7039
free(pfl, M_PF_STATE_LINK);
7040
}
7041
7042
pf_src_tree_remove_state(s);
7043
s->timeout = PFTM_UNLINKED;
7044
pf_free_state(s);
7045
}
7046
7047
return (PF_DROP);
7048
}
7049
7050
int
7051
pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport,
7052
struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type,
7053
int icmp_dir)
7054
{
7055
/*
7056
* pf_translate() implements OpenBSD's "new" NAT approach.
7057
* We don't follow it, because it involves a breaking syntax change
7058
* (removing nat/rdr rules, moving it into regular pf rules.)
7059
* It also moves NAT processing to be done after normal rules evaluation
7060
* whereas in FreeBSD that's done before rules processing.
7061
*
7062
* We adopt the function only for nat64, and keep other NAT processing
7063
* before rules processing.
7064
*/
7065
int rewrite = 0;
7066
int afto = pd->af != pd->naf;
7067
7068
MPASS(afto);
7069
7070
switch (pd->proto) {
7071
case IPPROTO_TCP:
7072
case IPPROTO_UDP:
7073
case IPPROTO_SCTP:
7074
if (afto || *pd->sport != sport) {
7075
pf_change_ap(pd, pd->src, pd->sport,
7076
saddr, sport);
7077
rewrite = 1;
7078
}
7079
if (afto || *pd->dport != dport) {
7080
pf_change_ap(pd, pd->dst, pd->dport,
7081
daddr, dport);
7082
rewrite = 1;
7083
}
7084
break;
7085
7086
#ifdef INET
7087
case IPPROTO_ICMP:
7088
/* pf_translate() is also used when logging invalid packets */
7089
if (pd->af != AF_INET)
7090
return (0);
7091
7092
if (afto) {
7093
if (pf_translate_icmp_af(AF_INET6, &pd->hdr.icmp))
7094
return (-1);
7095
pd->proto = IPPROTO_ICMPV6;
7096
rewrite = 1;
7097
}
7098
if (virtual_type == htons(ICMP_ECHO)) {
7099
u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
7100
7101
if (icmpid != pd->hdr.icmp.icmp_id) {
7102
pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
7103
pd->hdr.icmp.icmp_cksum,
7104
pd->hdr.icmp.icmp_id, icmpid, 0);
7105
pd->hdr.icmp.icmp_id = icmpid;
7106
/* XXX TODO copyback. */
7107
rewrite = 1;
7108
}
7109
}
7110
break;
7111
#endif /* INET */
7112
7113
#ifdef INET6
7114
case IPPROTO_ICMPV6:
7115
/* pf_translate() is also used when logging invalid packets */
7116
if (pd->af != AF_INET6)
7117
return (0);
7118
7119
if (afto) {
7120
/* ip_sum will be recalculated in pf_translate_af */
7121
if (pf_translate_icmp_af(AF_INET, &pd->hdr.icmp6))
7122
return (0);
7123
pd->proto = IPPROTO_ICMP;
7124
rewrite = 1;
7125
}
7126
break;
7127
#endif /* INET6 */
7128
7129
default:
7130
break;
7131
}
7132
7133
return (rewrite);
7134
}
7135
7136
int
7137
pf_translate_compat(struct pf_test_ctx *ctx)
7138
{
7139
struct pf_pdesc *pd = ctx->pd;
7140
struct pf_state_key *nk = ctx->nk;
7141
struct tcphdr *th = &pd->hdr.tcp;
7142
int rewrite = 0;
7143
7144
KASSERT(ctx->sk != NULL, ("%s: null sk", __func__));
7145
KASSERT(ctx->nk != NULL, ("%s: null nk", __func__));
7146
7147
switch (pd->virtual_proto) {
7148
case IPPROTO_TCP:
7149
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
7150
nk->port[pd->sidx] != pd->nsport) {
7151
pf_change_ap(pd, pd->src, &th->th_sport,
7152
&nk->addr[pd->sidx], nk->port[pd->sidx]);
7153
pd->sport = &th->th_sport;
7154
pd->nsport = th->th_sport;
7155
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
7156
}
7157
7158
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
7159
nk->port[pd->didx] != pd->ndport) {
7160
pf_change_ap(pd, pd->dst, &th->th_dport,
7161
&nk->addr[pd->didx], nk->port[pd->didx]);
7162
pd->dport = &th->th_dport;
7163
pd->ndport = th->th_dport;
7164
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
7165
}
7166
rewrite++;
7167
break;
7168
case IPPROTO_UDP:
7169
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
7170
nk->port[pd->sidx] != pd->nsport) {
7171
pf_change_ap(pd, pd->src,
7172
&pd->hdr.udp.uh_sport,
7173
&nk->addr[pd->sidx],
7174
nk->port[pd->sidx]);
7175
pd->sport = &pd->hdr.udp.uh_sport;
7176
pd->nsport = pd->hdr.udp.uh_sport;
7177
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
7178
}
7179
7180
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
7181
nk->port[pd->didx] != pd->ndport) {
7182
pf_change_ap(pd, pd->dst,
7183
&pd->hdr.udp.uh_dport,
7184
&nk->addr[pd->didx],
7185
nk->port[pd->didx]);
7186
pd->dport = &pd->hdr.udp.uh_dport;
7187
pd->ndport = pd->hdr.udp.uh_dport;
7188
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
7189
}
7190
rewrite++;
7191
break;
7192
case IPPROTO_SCTP: {
7193
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
7194
nk->port[pd->sidx] != pd->nsport) {
7195
pf_change_ap(pd, pd->src,
7196
&pd->hdr.sctp.src_port,
7197
&nk->addr[pd->sidx],
7198
nk->port[pd->sidx]);
7199
pd->sport = &pd->hdr.sctp.src_port;
7200
pd->nsport = pd->hdr.sctp.src_port;
7201
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
7202
}
7203
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
7204
nk->port[pd->didx] != pd->ndport) {
7205
pf_change_ap(pd, pd->dst,
7206
&pd->hdr.sctp.dest_port,
7207
&nk->addr[pd->didx],
7208
nk->port[pd->didx]);
7209
pd->dport = &pd->hdr.sctp.dest_port;
7210
pd->ndport = pd->hdr.sctp.dest_port;
7211
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
7212
}
7213
break;
7214
}
7215
#ifdef INET
7216
case IPPROTO_ICMP:
7217
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET)) {
7218
pf_change_a(&pd->src->v4.s_addr, pd->ip_sum,
7219
nk->addr[pd->sidx].v4.s_addr, 0);
7220
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
7221
}
7222
7223
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET)) {
7224
pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum,
7225
nk->addr[pd->didx].v4.s_addr, 0);
7226
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
7227
}
7228
7229
if (ctx->virtual_type == htons(ICMP_ECHO) &&
7230
nk->port[pd->sidx] != pd->hdr.icmp.icmp_id) {
7231
pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
7232
pd->hdr.icmp.icmp_cksum, pd->nsport,
7233
nk->port[pd->sidx], 0);
7234
pd->hdr.icmp.icmp_id = nk->port[pd->sidx];
7235
pd->sport = &pd->hdr.icmp.icmp_id;
7236
}
7237
m_copyback(pd->m, pd->off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp);
7238
break;
7239
#endif /* INET */
7240
#ifdef INET6
7241
case IPPROTO_ICMPV6:
7242
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET6)) {
7243
pf_change_a6(pd->src, &pd->hdr.icmp6.icmp6_cksum,
7244
&nk->addr[pd->sidx], 0);
7245
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
7246
}
7247
7248
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET6)) {
7249
pf_change_a6(pd->dst, &pd->hdr.icmp6.icmp6_cksum,
7250
&nk->addr[pd->didx], 0);
7251
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
7252
}
7253
rewrite++;
7254
break;
7255
#endif /* INET */
7256
default:
7257
switch (pd->af) {
7258
#ifdef INET
7259
case AF_INET:
7260
if (PF_ANEQ(&pd->nsaddr,
7261
&nk->addr[pd->sidx], AF_INET)) {
7262
pf_change_a(&pd->src->v4.s_addr,
7263
pd->ip_sum,
7264
nk->addr[pd->sidx].v4.s_addr, 0);
7265
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
7266
}
7267
7268
if (PF_ANEQ(&pd->ndaddr,
7269
&nk->addr[pd->didx], AF_INET)) {
7270
pf_change_a(&pd->dst->v4.s_addr,
7271
pd->ip_sum,
7272
nk->addr[pd->didx].v4.s_addr, 0);
7273
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
7274
}
7275
break;
7276
#endif /* INET */
7277
#ifdef INET6
7278
case AF_INET6:
7279
if (PF_ANEQ(&pd->nsaddr,
7280
&nk->addr[pd->sidx], AF_INET6)) {
7281
pf_addrcpy(&pd->nsaddr, &nk->addr[pd->sidx],
7282
pd->af);
7283
pf_addrcpy(pd->src, &nk->addr[pd->sidx], pd->af);
7284
}
7285
7286
if (PF_ANEQ(&pd->ndaddr,
7287
&nk->addr[pd->didx], AF_INET6)) {
7288
pf_addrcpy(&pd->ndaddr, &nk->addr[pd->didx],
7289
pd->af);
7290
pf_addrcpy(pd->dst, &nk->addr[pd->didx],
7291
pd->af);
7292
}
7293
break;
7294
#endif /* INET6 */
7295
}
7296
break;
7297
}
7298
return (rewrite);
7299
}
7300
7301
static int
7302
pf_tcp_track_full(struct pf_kstate *state, struct pf_pdesc *pd,
7303
u_short *reason, int *copyback, struct pf_state_peer *src,
7304
struct pf_state_peer *dst, u_int8_t psrc, u_int8_t pdst)
7305
{
7306
struct tcphdr *th = &pd->hdr.tcp;
7307
u_int16_t win = ntohs(th->th_win);
7308
u_int32_t ack, end, data_end, seq, orig_seq;
7309
u_int8_t sws, dws;
7310
int ackskew;
7311
7312
if (src->wscale && dst->wscale && !(tcp_get_flags(th) & TH_SYN)) {
7313
sws = src->wscale & PF_WSCALE_MASK;
7314
dws = dst->wscale & PF_WSCALE_MASK;
7315
} else
7316
sws = dws = 0;
7317
7318
/*
7319
* Sequence tracking algorithm from Guido van Rooij's paper:
7320
* http://www.madison-gurkha.com/publications/tcp_filtering/
7321
* tcp_filtering.ps
7322
*/
7323
7324
orig_seq = seq = ntohl(th->th_seq);
7325
if (src->seqlo == 0) {
7326
/* First packet from this end. Set its state */
7327
7328
if ((state->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) &&
7329
src->scrub == NULL) {
7330
if (pf_normalize_tcp_init(pd, th, src)) {
7331
REASON_SET(reason, PFRES_MEMORY);
7332
return (PF_DROP);
7333
}
7334
}
7335
7336
/* Deferred generation of sequence number modulator */
7337
if (dst->seqdiff && !src->seqdiff) {
7338
/* use random iss for the TCP server */
7339
while ((src->seqdiff = arc4random() - seq) == 0)
7340
;
7341
ack = ntohl(th->th_ack) - dst->seqdiff;
7342
pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum, htonl(seq +
7343
src->seqdiff), 0);
7344
pf_change_proto_a(pd->m, &th->th_ack, &th->th_sum, htonl(ack), 0);
7345
*copyback = 1;
7346
} else {
7347
ack = ntohl(th->th_ack);
7348
}
7349
7350
end = seq + pd->p_len;
7351
if (tcp_get_flags(th) & TH_SYN) {
7352
end++;
7353
if (dst->wscale & PF_WSCALE_FLAG) {
7354
src->wscale = pf_get_wscale(pd);
7355
if (src->wscale & PF_WSCALE_FLAG) {
7356
/* Remove scale factor from initial
7357
* window */
7358
sws = src->wscale & PF_WSCALE_MASK;
7359
win = ((u_int32_t)win + (1 << sws) - 1)
7360
>> sws;
7361
dws = dst->wscale & PF_WSCALE_MASK;
7362
} else {
7363
/* fixup other window */
7364
dst->max_win = MIN(TCP_MAXWIN,
7365
(u_int32_t)dst->max_win <<
7366
(dst->wscale & PF_WSCALE_MASK));
7367
/* in case of a retrans SYN|ACK */
7368
dst->wscale = 0;
7369
}
7370
}
7371
}
7372
data_end = end;
7373
if (tcp_get_flags(th) & TH_FIN)
7374
end++;
7375
7376
src->seqlo = seq;
7377
if (src->state < TCPS_SYN_SENT)
7378
pf_set_protostate(state, psrc, TCPS_SYN_SENT);
7379
7380
/*
7381
* May need to slide the window (seqhi may have been set by
7382
* the crappy stack check or if we picked up the connection
7383
* after establishment)
7384
*/
7385
if (src->seqhi == 1 ||
7386
SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
7387
src->seqhi = end + MAX(1, dst->max_win << dws);
7388
if (win > src->max_win)
7389
src->max_win = win;
7390
7391
} else {
7392
ack = ntohl(th->th_ack) - dst->seqdiff;
7393
if (src->seqdiff) {
7394
/* Modulate sequence numbers */
7395
pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum, htonl(seq +
7396
src->seqdiff), 0);
7397
pf_change_proto_a(pd->m, &th->th_ack, &th->th_sum, htonl(ack), 0);
7398
*copyback = 1;
7399
}
7400
end = seq + pd->p_len;
7401
if (tcp_get_flags(th) & TH_SYN)
7402
end++;
7403
data_end = end;
7404
if (tcp_get_flags(th) & TH_FIN)
7405
end++;
7406
}
7407
7408
if ((tcp_get_flags(th) & TH_ACK) == 0) {
7409
/* Let it pass through the ack skew check */
7410
ack = dst->seqlo;
7411
} else if ((ack == 0 &&
7412
(tcp_get_flags(th) & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
7413
/* broken tcp stacks do not set ack */
7414
(dst->state < TCPS_SYN_SENT)) {
7415
/*
7416
* Many stacks (ours included) will set the ACK number in an
7417
* FIN|ACK if the SYN times out -- no sequence to ACK.
7418
*/
7419
ack = dst->seqlo;
7420
}
7421
7422
if (seq == end) {
7423
/* Ease sequencing restrictions on no data packets */
7424
seq = src->seqlo;
7425
data_end = end = seq;
7426
}
7427
7428
ackskew = dst->seqlo - ack;
7429
7430
/*
7431
* Need to demodulate the sequence numbers in any TCP SACK options
7432
* (Selective ACK). We could optionally validate the SACK values
7433
* against the current ACK window, either forwards or backwards, but
7434
* I'm not confident that SACK has been implemented properly
7435
* everywhere. It wouldn't surprise me if several stacks accidentally
7436
* SACK too far backwards of previously ACKed data. There really aren't
7437
* any security implications of bad SACKing unless the target stack
7438
* doesn't validate the option length correctly. Someone trying to
7439
* spoof into a TCP connection won't bother blindly sending SACK
7440
* options anyway.
7441
*/
7442
if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
7443
if (pf_modulate_sack(pd, th, dst))
7444
*copyback = 1;
7445
}
7446
7447
#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
7448
if (SEQ_GEQ(src->seqhi, data_end) &&
7449
/* Last octet inside other's window space */
7450
SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
7451
/* Retrans: not more than one window back */
7452
(ackskew >= -MAXACKWINDOW) &&
7453
/* Acking not more than one reassembled fragment backwards */
7454
(ackskew <= (MAXACKWINDOW << sws)) &&
7455
/* Acking not more than one window forward */
7456
((tcp_get_flags(th) & TH_RST) == 0 || orig_seq == src->seqlo ||
7457
(orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
7458
/* Require an exact/+1 sequence match on resets when possible */
7459
(SEQ_GEQ(orig_seq, src->seqlo - (dst->max_win << dws)) &&
7460
SEQ_LEQ(orig_seq, src->seqlo + 1) && ackskew == 0 &&
7461
(th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)))) {
7462
/* Allow resets to match sequence window if ack is perfect match */
7463
7464
if (dst->scrub || src->scrub) {
7465
if (pf_normalize_tcp_stateful(pd, reason, th,
7466
state, src, dst, copyback))
7467
return (PF_DROP);
7468
}
7469
7470
/* update max window */
7471
if (src->max_win < win)
7472
src->max_win = win;
7473
/* synchronize sequencing */
7474
if (SEQ_GT(end, src->seqlo))
7475
src->seqlo = end;
7476
/* slide the window of what the other end can send */
7477
if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
7478
dst->seqhi = ack + MAX((win << sws), 1);
7479
7480
/* update states */
7481
if (tcp_get_flags(th) & TH_SYN)
7482
if (src->state < TCPS_SYN_SENT)
7483
pf_set_protostate(state, psrc, TCPS_SYN_SENT);
7484
if (tcp_get_flags(th) & TH_FIN)
7485
if (src->state < TCPS_CLOSING)
7486
pf_set_protostate(state, psrc, TCPS_CLOSING);
7487
if (tcp_get_flags(th) & TH_ACK) {
7488
if (dst->state == TCPS_SYN_SENT) {
7489
pf_set_protostate(state, pdst,
7490
TCPS_ESTABLISHED);
7491
if (src->state == TCPS_ESTABLISHED &&
7492
state->sns[PF_SN_LIMIT] != NULL &&
7493
pf_src_connlimit(state)) {
7494
REASON_SET(reason, PFRES_SRCLIMIT);
7495
return (PF_DROP);
7496
}
7497
} else if (dst->state == TCPS_CLOSING)
7498
pf_set_protostate(state, pdst,
7499
TCPS_FIN_WAIT_2);
7500
}
7501
if (tcp_get_flags(th) & TH_RST)
7502
pf_set_protostate(state, PF_PEER_BOTH, TCPS_TIME_WAIT);
7503
7504
/* update expire time */
7505
state->expire = pf_get_uptime();
7506
if (src->state >= TCPS_FIN_WAIT_2 &&
7507
dst->state >= TCPS_FIN_WAIT_2)
7508
state->timeout = PFTM_TCP_CLOSED;
7509
else if (src->state >= TCPS_CLOSING &&
7510
dst->state >= TCPS_CLOSING)
7511
state->timeout = PFTM_TCP_FIN_WAIT;
7512
else if (src->state < TCPS_ESTABLISHED ||
7513
dst->state < TCPS_ESTABLISHED)
7514
state->timeout = PFTM_TCP_OPENING;
7515
else if (src->state >= TCPS_CLOSING ||
7516
dst->state >= TCPS_CLOSING)
7517
state->timeout = PFTM_TCP_CLOSING;
7518
else
7519
state->timeout = PFTM_TCP_ESTABLISHED;
7520
7521
/* Fall through to PASS packet */
7522
7523
} else if ((dst->state < TCPS_SYN_SENT ||
7524
dst->state >= TCPS_FIN_WAIT_2 ||
7525
src->state >= TCPS_FIN_WAIT_2) &&
7526
SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) &&
7527
/* Within a window forward of the originating packet */
7528
SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
7529
/* Within a window backward of the originating packet */
7530
7531
/*
7532
* This currently handles three situations:
7533
* 1) Stupid stacks will shotgun SYNs before their peer
7534
* replies.
7535
* 2) When PF catches an already established stream (the
7536
* firewall rebooted, the state table was flushed, routes
7537
* changed...)
7538
* 3) Packets get funky immediately after the connection
7539
* closes (this should catch Solaris spurious ACK|FINs
7540
* that web servers like to spew after a close)
7541
*
7542
* This must be a little more careful than the above code
7543
* since packet floods will also be caught here. We don't
7544
* update the TTL here to mitigate the damage of a packet
7545
* flood and so the same code can handle awkward establishment
7546
* and a loosened connection close.
7547
* In the establishment case, a correct peer response will
7548
* validate the connection, go through the normal state code
7549
* and keep updating the state TTL.
7550
*/
7551
7552
if (V_pf_status.debug >= PF_DEBUG_MISC) {
7553
printf("pf: loose state match: ");
7554
pf_print_state(state);
7555
pf_print_flags(tcp_get_flags(th));
7556
printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
7557
"pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
7558
pd->p_len, ackskew, (unsigned long long)state->packets[0],
7559
(unsigned long long)state->packets[1],
7560
pd->dir == PF_IN ? "in" : "out",
7561
pd->dir == state->direction ? "fwd" : "rev");
7562
}
7563
7564
if (dst->scrub || src->scrub) {
7565
if (pf_normalize_tcp_stateful(pd, reason, th,
7566
state, src, dst, copyback))
7567
return (PF_DROP);
7568
}
7569
7570
/* update max window */
7571
if (src->max_win < win)
7572
src->max_win = win;
7573
/* synchronize sequencing */
7574
if (SEQ_GT(end, src->seqlo))
7575
src->seqlo = end;
7576
/* slide the window of what the other end can send */
7577
if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
7578
dst->seqhi = ack + MAX((win << sws), 1);
7579
7580
/*
7581
* Cannot set dst->seqhi here since this could be a shotgunned
7582
* SYN and not an already established connection.
7583
*/
7584
7585
if (tcp_get_flags(th) & TH_FIN)
7586
if (src->state < TCPS_CLOSING)
7587
pf_set_protostate(state, psrc, TCPS_CLOSING);
7588
if (tcp_get_flags(th) & TH_RST)
7589
pf_set_protostate(state, PF_PEER_BOTH, TCPS_TIME_WAIT);
7590
7591
/* Fall through to PASS packet */
7592
7593
} else {
7594
if (state->dst.state == TCPS_SYN_SENT &&
7595
state->src.state == TCPS_SYN_SENT) {
7596
/* Send RST for state mismatches during handshake */
7597
if (!(tcp_get_flags(th) & TH_RST))
7598
pf_send_tcp(state->rule, pd->af,
7599
pd->dst, pd->src, th->th_dport,
7600
th->th_sport, ntohl(th->th_ack), 0,
7601
TH_RST, 0, 0,
7602
state->rule->return_ttl, M_SKIP_FIREWALL,
7603
0, 0, state->act.rtableid, reason);
7604
src->seqlo = 0;
7605
src->seqhi = 1;
7606
src->max_win = 1;
7607
} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
7608
printf("pf: BAD state: ");
7609
pf_print_state(state);
7610
pf_print_flags(tcp_get_flags(th));
7611
printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
7612
"pkts=%llu:%llu dir=%s,%s\n",
7613
seq, orig_seq, ack, pd->p_len, ackskew,
7614
(unsigned long long)state->packets[0],
7615
(unsigned long long)state->packets[1],
7616
pd->dir == PF_IN ? "in" : "out",
7617
pd->dir == state->direction ? "fwd" : "rev");
7618
printf("pf: State failure on: %c %c %c %c | %c %c\n",
7619
SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1',
7620
SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
7621
' ': '2',
7622
(ackskew >= -MAXACKWINDOW) ? ' ' : '3',
7623
(ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
7624
SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ?' ' :'5',
7625
SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
7626
}
7627
REASON_SET(reason, PFRES_BADSTATE);
7628
return (PF_DROP);
7629
}
7630
7631
return (PF_PASS);
7632
}
7633
7634
static int
7635
pf_tcp_track_sloppy(struct pf_kstate *state, struct pf_pdesc *pd,
7636
u_short *reason, struct pf_state_peer *src, struct pf_state_peer *dst,
7637
u_int8_t psrc, u_int8_t pdst)
7638
{
7639
struct tcphdr *th = &pd->hdr.tcp;
7640
7641
if (tcp_get_flags(th) & TH_SYN)
7642
if (src->state < TCPS_SYN_SENT)
7643
pf_set_protostate(state, psrc, TCPS_SYN_SENT);
7644
if (tcp_get_flags(th) & TH_FIN)
7645
if (src->state < TCPS_CLOSING)
7646
pf_set_protostate(state, psrc, TCPS_CLOSING);
7647
if (tcp_get_flags(th) & TH_ACK) {
7648
if (dst->state == TCPS_SYN_SENT) {
7649
pf_set_protostate(state, pdst, TCPS_ESTABLISHED);
7650
if (src->state == TCPS_ESTABLISHED &&
7651
state->sns[PF_SN_LIMIT] != NULL &&
7652
pf_src_connlimit(state)) {
7653
REASON_SET(reason, PFRES_SRCLIMIT);
7654
return (PF_DROP);
7655
}
7656
} else if (dst->state == TCPS_CLOSING) {
7657
pf_set_protostate(state, pdst, TCPS_FIN_WAIT_2);
7658
} else if (src->state == TCPS_SYN_SENT &&
7659
dst->state < TCPS_SYN_SENT) {
7660
/*
7661
* Handle a special sloppy case where we only see one
7662
* half of the connection. If there is a ACK after
7663
* the initial SYN without ever seeing a packet from
7664
* the destination, set the connection to established.
7665
*/
7666
pf_set_protostate(state, PF_PEER_BOTH,
7667
TCPS_ESTABLISHED);
7668
dst->state = src->state = TCPS_ESTABLISHED;
7669
if (state->sns[PF_SN_LIMIT] != NULL &&
7670
pf_src_connlimit(state)) {
7671
REASON_SET(reason, PFRES_SRCLIMIT);
7672
return (PF_DROP);
7673
}
7674
} else if (src->state == TCPS_CLOSING &&
7675
dst->state == TCPS_ESTABLISHED &&
7676
dst->seqlo == 0) {
7677
/*
7678
* Handle the closing of half connections where we
7679
* don't see the full bidirectional FIN/ACK+ACK
7680
* handshake.
7681
*/
7682
pf_set_protostate(state, pdst, TCPS_CLOSING);
7683
}
7684
}
7685
if (tcp_get_flags(th) & TH_RST)
7686
pf_set_protostate(state, PF_PEER_BOTH, TCPS_TIME_WAIT);
7687
7688
/* update expire time */
7689
state->expire = pf_get_uptime();
7690
if (src->state >= TCPS_FIN_WAIT_2 &&
7691
dst->state >= TCPS_FIN_WAIT_2)
7692
state->timeout = PFTM_TCP_CLOSED;
7693
else if (src->state >= TCPS_CLOSING &&
7694
dst->state >= TCPS_CLOSING)
7695
state->timeout = PFTM_TCP_FIN_WAIT;
7696
else if (src->state < TCPS_ESTABLISHED ||
7697
dst->state < TCPS_ESTABLISHED)
7698
state->timeout = PFTM_TCP_OPENING;
7699
else if (src->state >= TCPS_CLOSING ||
7700
dst->state >= TCPS_CLOSING)
7701
state->timeout = PFTM_TCP_CLOSING;
7702
else
7703
state->timeout = PFTM_TCP_ESTABLISHED;
7704
7705
return (PF_PASS);
7706
}
7707
7708
static int
7709
pf_synproxy(struct pf_pdesc *pd, struct pf_kstate *state, u_short *reason)
7710
{
7711
struct pf_state_key *sk = state->key[pd->didx];
7712
struct tcphdr *th = &pd->hdr.tcp;
7713
7714
if (state->src.state == PF_TCPS_PROXY_SRC) {
7715
if (pd->dir != state->direction) {
7716
REASON_SET(reason, PFRES_SYNPROXY);
7717
return (PF_SYNPROXY_DROP);
7718
}
7719
if (tcp_get_flags(th) & TH_SYN) {
7720
if (ntohl(th->th_seq) != state->src.seqlo) {
7721
REASON_SET(reason, PFRES_SYNPROXY);
7722
return (PF_DROP);
7723
}
7724
pf_send_tcp(state->rule, pd->af, pd->dst,
7725
pd->src, th->th_dport, th->th_sport,
7726
state->src.seqhi, ntohl(th->th_seq) + 1,
7727
TH_SYN|TH_ACK, 0, state->src.mss, 0,
7728
M_SKIP_FIREWALL, 0, 0, state->act.rtableid,
7729
reason);
7730
REASON_SET(reason, PFRES_SYNPROXY);
7731
return (PF_SYNPROXY_DROP);
7732
} else if ((tcp_get_flags(th) & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK ||
7733
(ntohl(th->th_ack) != state->src.seqhi + 1) ||
7734
(ntohl(th->th_seq) != state->src.seqlo + 1)) {
7735
REASON_SET(reason, PFRES_SYNPROXY);
7736
return (PF_DROP);
7737
} else if (state->sns[PF_SN_LIMIT] != NULL &&
7738
pf_src_connlimit(state)) {
7739
REASON_SET(reason, PFRES_SRCLIMIT);
7740
return (PF_DROP);
7741
} else
7742
pf_set_protostate(state, PF_PEER_SRC,
7743
PF_TCPS_PROXY_DST);
7744
}
7745
if (state->src.state == PF_TCPS_PROXY_DST) {
7746
if (pd->dir == state->direction) {
7747
if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) != TH_ACK) ||
7748
(ntohl(th->th_ack) != state->src.seqhi + 1) ||
7749
(ntohl(th->th_seq) != state->src.seqlo + 1)) {
7750
REASON_SET(reason, PFRES_SYNPROXY);
7751
return (PF_DROP);
7752
}
7753
state->src.max_win = MAX(ntohs(th->th_win), 1);
7754
if (state->dst.seqhi == 1)
7755
state->dst.seqhi = arc4random();
7756
pf_send_tcp(state->rule, pd->af,
7757
&sk->addr[pd->sidx], &sk->addr[pd->didx],
7758
sk->port[pd->sidx], sk->port[pd->didx],
7759
state->dst.seqhi, 0, TH_SYN, 0,
7760
state->src.mss, 0,
7761
state->orig_kif->pfik_ifp == V_loif ? M_LOOP : 0,
7762
state->tag, 0, state->act.rtableid,
7763
reason);
7764
REASON_SET(reason, PFRES_SYNPROXY);
7765
return (PF_SYNPROXY_DROP);
7766
} else if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) !=
7767
(TH_SYN|TH_ACK)) ||
7768
(ntohl(th->th_ack) != state->dst.seqhi + 1)) {
7769
REASON_SET(reason, PFRES_SYNPROXY);
7770
return (PF_DROP);
7771
} else {
7772
state->dst.max_win = MAX(ntohs(th->th_win), 1);
7773
state->dst.seqlo = ntohl(th->th_seq);
7774
pf_send_tcp(state->rule, pd->af, pd->dst,
7775
pd->src, th->th_dport, th->th_sport,
7776
ntohl(th->th_ack), ntohl(th->th_seq) + 1,
7777
TH_ACK, state->src.max_win, 0, 0, 0,
7778
state->tag, 0, state->act.rtableid,
7779
reason);
7780
pf_send_tcp(state->rule, pd->af,
7781
&sk->addr[pd->sidx], &sk->addr[pd->didx],
7782
sk->port[pd->sidx], sk->port[pd->didx],
7783
state->src.seqhi + 1, state->src.seqlo + 1,
7784
TH_ACK, state->dst.max_win, 0, 0,
7785
M_SKIP_FIREWALL, 0, 0, state->act.rtableid,
7786
reason);
7787
state->src.seqdiff = state->dst.seqhi -
7788
state->src.seqlo;
7789
state->dst.seqdiff = state->src.seqhi -
7790
state->dst.seqlo;
7791
state->src.seqhi = state->src.seqlo +
7792
state->dst.max_win;
7793
state->dst.seqhi = state->dst.seqlo +
7794
state->src.max_win;
7795
state->src.wscale = state->dst.wscale = 0;
7796
pf_set_protostate(state, PF_PEER_BOTH,
7797
TCPS_ESTABLISHED);
7798
REASON_SET(reason, PFRES_SYNPROXY);
7799
return (PF_SYNPROXY_DROP);
7800
}
7801
}
7802
7803
return (PF_PASS);
7804
}
7805
7806
static __inline int
7807
pf_synproxy_ack(struct pf_krule *r, struct pf_pdesc *pd, struct pf_kstate **sm,
7808
struct pf_rule_actions *act)
7809
{
7810
struct tcphdr *th = &pd->hdr.tcp;
7811
struct pf_kstate *s;
7812
u_int16_t mss;
7813
int rtid;
7814
u_short reason;
7815
7816
if ((th->th_flags & (TH_SYN | TH_ACK)) != TH_SYN)
7817
return (PF_PASS);
7818
7819
s = *sm;
7820
rtid = act->rtableid;
7821
7822
pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC);
7823
s->src.seqhi = arc4random();
7824
/* Find mss option */
7825
mss = pf_get_mss(pd);
7826
mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
7827
mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
7828
s->src.mss = mss;
7829
7830
pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
7831
th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
7832
TH_SYN | TH_ACK, 0, s->src.mss, 0, 1, 0, 0, r->rtableid, NULL);
7833
7834
REASON_SET(&reason, PFRES_SYNPROXY);
7835
return (PF_SYNPROXY_DROP);
7836
}
7837
7838
static int
7839
pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason)
7840
{
7841
struct pf_state_key_cmp key;
7842
int copyback = 0;
7843
struct pf_state_peer *src, *dst;
7844
uint8_t psrc, pdst;
7845
int action;
7846
7847
bzero(&key, sizeof(key));
7848
key.af = pd->af;
7849
key.proto = pd->virtual_proto;
7850
pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af);
7851
pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af);
7852
key.port[pd->sidx] = pd->osport;
7853
key.port[pd->didx] = pd->odport;
7854
7855
action = pf_find_state(pd, &key, state);
7856
if (action != PF_MATCH)
7857
return (action);
7858
7859
action = PF_PASS;
7860
if (pd->dir == (*state)->direction) {
7861
if (PF_REVERSED_KEY(*state, pd->af)) {
7862
src = &(*state)->dst;
7863
dst = &(*state)->src;
7864
psrc = PF_PEER_DST;
7865
pdst = PF_PEER_SRC;
7866
} else {
7867
src = &(*state)->src;
7868
dst = &(*state)->dst;
7869
psrc = PF_PEER_SRC;
7870
pdst = PF_PEER_DST;
7871
}
7872
} else {
7873
if (PF_REVERSED_KEY(*state, pd->af)) {
7874
src = &(*state)->src;
7875
dst = &(*state)->dst;
7876
psrc = PF_PEER_SRC;
7877
pdst = PF_PEER_DST;
7878
} else {
7879
src = &(*state)->dst;
7880
dst = &(*state)->src;
7881
psrc = PF_PEER_DST;
7882
pdst = PF_PEER_SRC;
7883
}
7884
}
7885
7886
switch (pd->virtual_proto) {
7887
case IPPROTO_TCP: {
7888
struct tcphdr *th = &pd->hdr.tcp;
7889
7890
if ((action = pf_synproxy(pd, *state, reason)) != PF_PASS)
7891
return (action);
7892
if (((tcp_get_flags(th) & (TH_SYN | TH_ACK)) == TH_SYN) ||
7893
((th->th_flags & (TH_SYN | TH_ACK | TH_RST)) == TH_ACK &&
7894
pf_syncookie_check(pd) && pd->dir == PF_IN)) {
7895
if ((*state)->src.state >= TCPS_FIN_WAIT_2 &&
7896
(*state)->dst.state >= TCPS_FIN_WAIT_2) {
7897
if (V_pf_status.debug >= PF_DEBUG_MISC) {
7898
printf("pf: state reuse ");
7899
pf_print_state(*state);
7900
pf_print_flags(tcp_get_flags(th));
7901
printf("\n");
7902
}
7903
/* XXX make sure it's the same direction ?? */
7904
pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED);
7905
pf_remove_state(*state);
7906
*state = NULL;
7907
return (PF_DROP);
7908
} else if ((*state)->src.state >= TCPS_ESTABLISHED &&
7909
(*state)->dst.state >= TCPS_ESTABLISHED) {
7910
/*
7911
* SYN matches existing state???
7912
* Typically happens when sender boots up after
7913
* sudden panic. Certain protocols (NFSv3) are
7914
* always using same port numbers. Challenge
7915
* ACK enables all parties (firewall and peers)
7916
* to get in sync again.
7917
*/
7918
pf_send_challenge_ack(pd, *state, src, dst, reason);
7919
return (PF_DROP);
7920
}
7921
}
7922
if ((*state)->state_flags & PFSTATE_SLOPPY) {
7923
if (pf_tcp_track_sloppy(*state, pd, reason, src, dst,
7924
psrc, pdst) == PF_DROP)
7925
return (PF_DROP);
7926
} else {
7927
int ret;
7928
7929
ret = pf_tcp_track_full(*state, pd, reason,
7930
&copyback, src, dst, psrc, pdst);
7931
if (ret == PF_DROP)
7932
return (PF_DROP);
7933
}
7934
break;
7935
}
7936
case IPPROTO_UDP:
7937
/* update states */
7938
if (src->state < PFUDPS_SINGLE)
7939
pf_set_protostate(*state, psrc, PFUDPS_SINGLE);
7940
if (dst->state == PFUDPS_SINGLE)
7941
pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE);
7942
7943
/* update expire time */
7944
(*state)->expire = pf_get_uptime();
7945
if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
7946
(*state)->timeout = PFTM_UDP_MULTIPLE;
7947
else
7948
(*state)->timeout = PFTM_UDP_SINGLE;
7949
break;
7950
case IPPROTO_SCTP:
7951
if ((src->state >= SCTP_SHUTDOWN_SENT || src->state == SCTP_CLOSED) &&
7952
(dst->state >= SCTP_SHUTDOWN_SENT || dst->state == SCTP_CLOSED) &&
7953
pd->sctp_flags & PFDESC_SCTP_INIT) {
7954
pf_set_protostate(*state, PF_PEER_BOTH, SCTP_CLOSED);
7955
pf_remove_state(*state);
7956
*state = NULL;
7957
return (PF_DROP);
7958
}
7959
7960
if (pf_sctp_track(*state, pd, reason) != PF_PASS)
7961
return (PF_DROP);
7962
7963
/* Track state. */
7964
if (pd->sctp_flags & PFDESC_SCTP_INIT) {
7965
if (src->state < SCTP_COOKIE_WAIT) {
7966
pf_set_protostate(*state, psrc, SCTP_COOKIE_WAIT);
7967
(*state)->timeout = PFTM_SCTP_OPENING;
7968
}
7969
}
7970
if (pd->sctp_flags & PFDESC_SCTP_INIT_ACK) {
7971
MPASS(dst->scrub != NULL);
7972
if (dst->scrub->pfss_v_tag == 0)
7973
dst->scrub->pfss_v_tag = pd->sctp_initiate_tag;
7974
}
7975
7976
/*
7977
* Bind to the correct interface if we're if-bound. For multihomed
7978
* extra associations we don't know which interface that will be until
7979
* here, so we've inserted the state on V_pf_all. Fix that now.
7980
*/
7981
if ((*state)->kif == V_pfi_all &&
7982
(*state)->rule->rule_flag & PFRULE_IFBOUND)
7983
(*state)->kif = pd->kif;
7984
7985
if (pd->sctp_flags & (PFDESC_SCTP_COOKIE | PFDESC_SCTP_HEARTBEAT_ACK)) {
7986
if (src->state < SCTP_ESTABLISHED) {
7987
pf_set_protostate(*state, psrc, SCTP_ESTABLISHED);
7988
(*state)->timeout = PFTM_SCTP_ESTABLISHED;
7989
}
7990
}
7991
if (pd->sctp_flags & (PFDESC_SCTP_SHUTDOWN |
7992
PFDESC_SCTP_SHUTDOWN_COMPLETE)) {
7993
if (src->state < SCTP_SHUTDOWN_PENDING) {
7994
pf_set_protostate(*state, psrc, SCTP_SHUTDOWN_PENDING);
7995
(*state)->timeout = PFTM_SCTP_CLOSING;
7996
}
7997
}
7998
if (pd->sctp_flags & (PFDESC_SCTP_SHUTDOWN_COMPLETE | PFDESC_SCTP_ABORT)) {
7999
pf_set_protostate(*state, psrc, SCTP_CLOSED);
8000
(*state)->timeout = PFTM_SCTP_CLOSED;
8001
}
8002
8003
(*state)->expire = pf_get_uptime();
8004
break;
8005
default:
8006
/* update states */
8007
if (src->state < PFOTHERS_SINGLE)
8008
pf_set_protostate(*state, psrc, PFOTHERS_SINGLE);
8009
if (dst->state == PFOTHERS_SINGLE)
8010
pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE);
8011
8012
/* update expire time */
8013
(*state)->expire = pf_get_uptime();
8014
if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
8015
(*state)->timeout = PFTM_OTHER_MULTIPLE;
8016
else
8017
(*state)->timeout = PFTM_OTHER_SINGLE;
8018
break;
8019
}
8020
8021
/* translate source/destination address, if necessary */
8022
if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
8023
struct pf_state_key *nk;
8024
int afto, sidx, didx;
8025
8026
if (PF_REVERSED_KEY(*state, pd->af))
8027
nk = (*state)->key[pd->sidx];
8028
else
8029
nk = (*state)->key[pd->didx];
8030
8031
afto = pd->af != nk->af;
8032
8033
if (afto && (*state)->direction == PF_IN) {
8034
sidx = pd->didx;
8035
didx = pd->sidx;
8036
} else {
8037
sidx = pd->sidx;
8038
didx = pd->didx;
8039
}
8040
8041
if (afto) {
8042
pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af);
8043
pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af);
8044
pd->naf = nk->af;
8045
action = PF_AFRT;
8046
}
8047
8048
if (afto || PF_ANEQ(pd->src, &nk->addr[sidx], pd->af) ||
8049
nk->port[sidx] != pd->osport)
8050
pf_change_ap(pd, pd->src, pd->sport,
8051
&nk->addr[sidx], nk->port[sidx]);
8052
8053
if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) ||
8054
nk->port[didx] != pd->odport)
8055
pf_change_ap(pd, pd->dst, pd->dport,
8056
&nk->addr[didx], nk->port[didx]);
8057
8058
copyback = 1;
8059
}
8060
8061
if (copyback && pd->hdrlen > 0)
8062
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
8063
8064
return (action);
8065
}
8066
8067
static int
8068
pf_sctp_track(struct pf_kstate *state, struct pf_pdesc *pd,
8069
u_short *reason)
8070
{
8071
struct pf_state_peer *src;
8072
if (pd->dir == state->direction) {
8073
if (PF_REVERSED_KEY(state, pd->af))
8074
src = &state->dst;
8075
else
8076
src = &state->src;
8077
} else {
8078
if (PF_REVERSED_KEY(state, pd->af))
8079
src = &state->src;
8080
else
8081
src = &state->dst;
8082
}
8083
8084
if (src->scrub != NULL) {
8085
/*
8086
* Allow tags to be updated, in case of retransmission of
8087
* INIT/INIT_ACK chunks.
8088
**/
8089
if (src->state <= SCTP_COOKIE_WAIT)
8090
src->scrub->pfss_v_tag = pd->hdr.sctp.v_tag;
8091
else if (src->scrub->pfss_v_tag != pd->hdr.sctp.v_tag)
8092
return (PF_DROP);
8093
}
8094
8095
return (PF_PASS);
8096
}
8097
8098
static void
8099
pf_sctp_multihome_detach_addr(const struct pf_kstate *s)
8100
{
8101
struct pf_sctp_endpoint key;
8102
struct pf_sctp_endpoint *ep;
8103
struct pf_state_key *sks = s->key[PF_SK_STACK];
8104
struct pf_sctp_source *i, *tmp;
8105
8106
if (sks == NULL || sks->proto != IPPROTO_SCTP || s->dst.scrub == NULL)
8107
return;
8108
8109
PF_SCTP_ENDPOINTS_LOCK();
8110
8111
key.v_tag = s->dst.scrub->pfss_v_tag;
8112
ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
8113
if (ep != NULL) {
8114
TAILQ_FOREACH_SAFE(i, &ep->sources, entry, tmp) {
8115
if (pf_addr_cmp(&i->addr,
8116
&s->key[PF_SK_WIRE]->addr[s->direction == PF_OUT],
8117
s->key[PF_SK_WIRE]->af) == 0) {
8118
SDT_PROBE3(pf, sctp, multihome, remove,
8119
key.v_tag, s, i);
8120
TAILQ_REMOVE(&ep->sources, i, entry);
8121
free(i, M_PFTEMP);
8122
break;
8123
}
8124
}
8125
8126
if (TAILQ_EMPTY(&ep->sources)) {
8127
RB_REMOVE(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
8128
free(ep, M_PFTEMP);
8129
}
8130
}
8131
8132
/* Other direction. */
8133
key.v_tag = s->src.scrub->pfss_v_tag;
8134
ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
8135
if (ep != NULL) {
8136
TAILQ_FOREACH_SAFE(i, &ep->sources, entry, tmp) {
8137
if (pf_addr_cmp(&i->addr,
8138
&s->key[PF_SK_WIRE]->addr[s->direction == PF_IN],
8139
s->key[PF_SK_WIRE]->af) == 0) {
8140
SDT_PROBE3(pf, sctp, multihome, remove,
8141
key.v_tag, s, i);
8142
TAILQ_REMOVE(&ep->sources, i, entry);
8143
free(i, M_PFTEMP);
8144
break;
8145
}
8146
}
8147
8148
if (TAILQ_EMPTY(&ep->sources)) {
8149
RB_REMOVE(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
8150
free(ep, M_PFTEMP);
8151
}
8152
}
8153
8154
PF_SCTP_ENDPOINTS_UNLOCK();
8155
}
8156
8157
static void
8158
pf_sctp_multihome_add_addr(struct pf_pdesc *pd, struct pf_addr *a, uint32_t v_tag)
8159
{
8160
struct pf_sctp_endpoint key = {
8161
.v_tag = v_tag,
8162
};
8163
struct pf_sctp_source *i;
8164
struct pf_sctp_endpoint *ep;
8165
int count;
8166
8167
PF_SCTP_ENDPOINTS_LOCK();
8168
8169
ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
8170
if (ep == NULL) {
8171
ep = malloc(sizeof(struct pf_sctp_endpoint),
8172
M_PFTEMP, M_NOWAIT);
8173
if (ep == NULL) {
8174
PF_SCTP_ENDPOINTS_UNLOCK();
8175
return;
8176
}
8177
8178
ep->v_tag = v_tag;
8179
TAILQ_INIT(&ep->sources);
8180
RB_INSERT(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
8181
}
8182
8183
/* Avoid inserting duplicates. */
8184
count = 0;
8185
TAILQ_FOREACH(i, &ep->sources, entry) {
8186
count++;
8187
if (pf_addr_cmp(&i->addr, a, pd->af) == 0) {
8188
PF_SCTP_ENDPOINTS_UNLOCK();
8189
return;
8190
}
8191
}
8192
8193
/* Limit the number of addresses per endpoint. */
8194
if (count >= PF_SCTP_MAX_ENDPOINTS) {
8195
PF_SCTP_ENDPOINTS_UNLOCK();
8196
return;
8197
}
8198
8199
i = malloc(sizeof(*i), M_PFTEMP, M_NOWAIT);
8200
if (i == NULL) {
8201
PF_SCTP_ENDPOINTS_UNLOCK();
8202
return;
8203
}
8204
8205
i->af = pd->af;
8206
memcpy(&i->addr, a, sizeof(*a));
8207
TAILQ_INSERT_TAIL(&ep->sources, i, entry);
8208
SDT_PROBE2(pf, sctp, multihome, add, v_tag, i);
8209
8210
PF_SCTP_ENDPOINTS_UNLOCK();
8211
}
8212
8213
static void
8214
pf_sctp_multihome_delayed(struct pf_pdesc *pd, struct pfi_kkif *kif,
8215
struct pf_kstate *s, int action)
8216
{
8217
struct pf_krule_slist match_rules;
8218
struct pf_sctp_multihome_job *j, *tmp;
8219
struct pf_sctp_source *i;
8220
int ret;
8221
struct pf_kstate *sm = NULL;
8222
struct pf_krule *ra = NULL;
8223
struct pf_krule *r = &V_pf_default_rule;
8224
struct pf_kruleset *rs = NULL;
8225
u_short reason;
8226
bool do_extra = true;
8227
8228
PF_RULES_RLOCK_TRACKER;
8229
8230
again:
8231
TAILQ_FOREACH_SAFE(j, &pd->sctp_multihome_jobs, next, tmp) {
8232
if (s == NULL || action != PF_PASS)
8233
goto free;
8234
8235
/* Confirm we don't recurse here. */
8236
MPASS(! (pd->sctp_flags & PFDESC_SCTP_ADD_IP));
8237
8238
switch (j->op) {
8239
case SCTP_ADD_IP_ADDRESS: {
8240
uint32_t v_tag = pd->sctp_initiate_tag;
8241
8242
if (v_tag == 0) {
8243
if (s->direction == pd->dir)
8244
v_tag = s->src.scrub->pfss_v_tag;
8245
else
8246
v_tag = s->dst.scrub->pfss_v_tag;
8247
}
8248
8249
/*
8250
* Avoid duplicating states. We'll already have
8251
* created a state based on the source address of
8252
* the packet, but SCTP endpoints may also list this
8253
* address again in the INIT(_ACK) parameters.
8254
*/
8255
if (pf_addr_cmp(&j->src, pd->src, pd->af) == 0) {
8256
break;
8257
}
8258
8259
j->pd.sctp_flags |= PFDESC_SCTP_ADD_IP;
8260
PF_RULES_RLOCK();
8261
sm = NULL;
8262
if (s->rule->rule_flag & PFRULE_ALLOW_RELATED) {
8263
j->pd.related_rule = s->rule;
8264
}
8265
SLIST_INIT(&match_rules);
8266
ret = pf_test_rule(&r, &sm,
8267
&j->pd, &ra, &rs, &reason, NULL, &match_rules);
8268
/*
8269
* Nothing to do about match rules, the processed
8270
* packet has already increased the counters.
8271
*/
8272
pf_free_match_rules(&match_rules);
8273
PF_RULES_RUNLOCK();
8274
SDT_PROBE4(pf, sctp, multihome, test, kif, r, j->pd.m, ret);
8275
if (ret != PF_DROP && sm != NULL) {
8276
/* Inherit v_tag values. */
8277
if (sm->direction == s->direction) {
8278
sm->src.scrub->pfss_v_tag = s->src.scrub->pfss_v_tag;
8279
sm->dst.scrub->pfss_v_tag = s->dst.scrub->pfss_v_tag;
8280
} else {
8281
sm->src.scrub->pfss_v_tag = s->dst.scrub->pfss_v_tag;
8282
sm->dst.scrub->pfss_v_tag = s->src.scrub->pfss_v_tag;
8283
}
8284
PF_STATE_UNLOCK(sm);
8285
} else {
8286
/* If we try duplicate inserts? */
8287
break;
8288
}
8289
8290
/* Only add the address if we've actually allowed the state. */
8291
pf_sctp_multihome_add_addr(pd, &j->src, v_tag);
8292
8293
if (! do_extra) {
8294
break;
8295
}
8296
/*
8297
* We need to do this for each of our source addresses.
8298
* Find those based on the verification tag.
8299
*/
8300
struct pf_sctp_endpoint key = {
8301
.v_tag = pd->hdr.sctp.v_tag,
8302
};
8303
struct pf_sctp_endpoint *ep;
8304
8305
PF_SCTP_ENDPOINTS_LOCK();
8306
ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
8307
if (ep == NULL) {
8308
PF_SCTP_ENDPOINTS_UNLOCK();
8309
break;
8310
}
8311
MPASS(ep != NULL);
8312
8313
TAILQ_FOREACH(i, &ep->sources, entry) {
8314
struct pf_sctp_multihome_job *nj;
8315
8316
/* SCTP can intermingle IPv4 and IPv6. */
8317
if (i->af != pd->af)
8318
continue;
8319
8320
nj = malloc(sizeof(*nj), M_PFTEMP, M_NOWAIT | M_ZERO);
8321
if (! nj) {
8322
continue;
8323
}
8324
memcpy(&nj->pd, &j->pd, sizeof(j->pd));
8325
memcpy(&nj->src, &j->src, sizeof(nj->src));
8326
nj->pd.src = &nj->src;
8327
// New destination address!
8328
memcpy(&nj->dst, &i->addr, sizeof(nj->dst));
8329
nj->pd.dst = &nj->dst;
8330
nj->pd.m = j->pd.m;
8331
nj->op = j->op;
8332
8333
MPASS(nj->pd.pcksum);
8334
TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, nj, next);
8335
}
8336
PF_SCTP_ENDPOINTS_UNLOCK();
8337
8338
break;
8339
}
8340
case SCTP_DEL_IP_ADDRESS: {
8341
struct pf_state_key_cmp key;
8342
uint8_t psrc;
8343
int action;
8344
8345
bzero(&key, sizeof(key));
8346
key.af = j->pd.af;
8347
key.proto = IPPROTO_SCTP;
8348
if (j->pd.dir == PF_IN) { /* wire side, straight */
8349
pf_addrcpy(&key.addr[0], j->pd.src, key.af);
8350
pf_addrcpy(&key.addr[1], j->pd.dst, key.af);
8351
key.port[0] = j->pd.hdr.sctp.src_port;
8352
key.port[1] = j->pd.hdr.sctp.dest_port;
8353
} else { /* stack side, reverse */
8354
pf_addrcpy(&key.addr[1], j->pd.src, key.af);
8355
pf_addrcpy(&key.addr[0], j->pd.dst, key.af);
8356
key.port[1] = j->pd.hdr.sctp.src_port;
8357
key.port[0] = j->pd.hdr.sctp.dest_port;
8358
}
8359
8360
action = pf_find_state(&j->pd, &key, &sm);
8361
if (action == PF_MATCH) {
8362
PF_STATE_LOCK_ASSERT(sm);
8363
if (j->pd.dir == sm->direction) {
8364
psrc = PF_PEER_SRC;
8365
} else {
8366
psrc = PF_PEER_DST;
8367
}
8368
pf_set_protostate(sm, psrc, SCTP_SHUTDOWN_PENDING);
8369
sm->timeout = PFTM_SCTP_CLOSING;
8370
PF_STATE_UNLOCK(sm);
8371
}
8372
break;
8373
default:
8374
panic("Unknown op %#x", j->op);
8375
}
8376
}
8377
8378
free:
8379
TAILQ_REMOVE(&pd->sctp_multihome_jobs, j, next);
8380
free(j, M_PFTEMP);
8381
}
8382
8383
/* We may have inserted extra work while processing the list. */
8384
if (! TAILQ_EMPTY(&pd->sctp_multihome_jobs)) {
8385
do_extra = false;
8386
goto again;
8387
}
8388
}
8389
8390
static int
8391
pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
8392
{
8393
int off = 0;
8394
struct pf_sctp_multihome_job *job;
8395
8396
SDT_PROBE4(pf, sctp, multihome_scan, entry, start, len, pd, op);
8397
8398
while (off < len) {
8399
struct sctp_paramhdr h;
8400
8401
if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL,
8402
pd->af))
8403
return (PF_DROP);
8404
8405
/* Parameters are at least 4 bytes. */
8406
if (ntohs(h.param_length) < 4)
8407
return (PF_DROP);
8408
8409
SDT_PROBE2(pf, sctp, multihome_scan, param, ntohs(h.param_type),
8410
ntohs(h.param_length));
8411
8412
switch (ntohs(h.param_type)) {
8413
case SCTP_IPV4_ADDRESS: {
8414
struct in_addr t;
8415
8416
if (ntohs(h.param_length) !=
8417
(sizeof(struct sctp_paramhdr) + sizeof(t)))
8418
return (PF_DROP);
8419
8420
if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t),
8421
NULL, pd->af))
8422
return (PF_DROP);
8423
8424
if (in_nullhost(t))
8425
t.s_addr = pd->src->v4.s_addr;
8426
8427
/*
8428
* We hold the state lock (idhash) here, which means
8429
* that we can't acquire the keyhash, or we'll get a
8430
* LOR (and potentially double-lock things too). We also
8431
* can't release the state lock here, so instead we'll
8432
* enqueue this for async handling.
8433
* There's a relatively small race here, in that a
8434
* packet using the new addresses could arrive already,
8435
* but that's just though luck for it.
8436
*/
8437
job = malloc(sizeof(*job), M_PFTEMP, M_NOWAIT | M_ZERO);
8438
if (! job)
8439
return (PF_DROP);
8440
8441
SDT_PROBE2(pf, sctp, multihome_scan, ipv4, &t, op);
8442
8443
memcpy(&job->pd, pd, sizeof(*pd));
8444
8445
// New source address!
8446
memcpy(&job->src, &t, sizeof(t));
8447
job->pd.src = &job->src;
8448
memcpy(&job->dst, pd->dst, sizeof(job->dst));
8449
job->pd.dst = &job->dst;
8450
job->pd.m = pd->m;
8451
job->op = op;
8452
8453
MPASS(job->pd.pcksum);
8454
TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next);
8455
break;
8456
}
8457
#ifdef INET6
8458
case SCTP_IPV6_ADDRESS: {
8459
struct in6_addr t;
8460
8461
if (ntohs(h.param_length) !=
8462
(sizeof(struct sctp_paramhdr) + sizeof(t)))
8463
return (PF_DROP);
8464
8465
if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t),
8466
NULL, pd->af))
8467
return (PF_DROP);
8468
if (memcmp(&t, &pd->src->v6, sizeof(t)) == 0)
8469
break;
8470
if (memcmp(&t, &in6addr_any, sizeof(t)) == 0)
8471
memcpy(&t, &pd->src->v6, sizeof(t));
8472
8473
job = malloc(sizeof(*job), M_PFTEMP, M_NOWAIT | M_ZERO);
8474
if (! job)
8475
return (PF_DROP);
8476
8477
SDT_PROBE2(pf, sctp, multihome_scan, ipv6, &t, op);
8478
8479
memcpy(&job->pd, pd, sizeof(*pd));
8480
memcpy(&job->src, &t, sizeof(t));
8481
job->pd.src = &job->src;
8482
memcpy(&job->dst, pd->dst, sizeof(job->dst));
8483
job->pd.dst = &job->dst;
8484
job->pd.m = pd->m;
8485
job->op = op;
8486
8487
MPASS(job->pd.pcksum);
8488
TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next);
8489
break;
8490
}
8491
#endif /* INET6 */
8492
case SCTP_ADD_IP_ADDRESS: {
8493
int ret;
8494
struct sctp_asconf_paramhdr ah;
8495
8496
if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah),
8497
NULL, pd->af))
8498
return (PF_DROP);
8499
8500
ret = pf_multihome_scan(start + off + sizeof(ah),
8501
ntohs(ah.ph.param_length) - sizeof(ah), pd,
8502
SCTP_ADD_IP_ADDRESS);
8503
if (ret != PF_PASS)
8504
return (ret);
8505
break;
8506
}
8507
case SCTP_DEL_IP_ADDRESS: {
8508
int ret;
8509
struct sctp_asconf_paramhdr ah;
8510
8511
if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah),
8512
NULL, pd->af))
8513
return (PF_DROP);
8514
ret = pf_multihome_scan(start + off + sizeof(ah),
8515
ntohs(ah.ph.param_length) - sizeof(ah), pd,
8516
SCTP_DEL_IP_ADDRESS);
8517
if (ret != PF_PASS)
8518
return (ret);
8519
break;
8520
}
8521
default:
8522
break;
8523
}
8524
8525
off += roundup(ntohs(h.param_length), 4);
8526
}
8527
8528
return (PF_PASS);
8529
}
8530
8531
int
8532
pf_multihome_scan_init(int start, int len, struct pf_pdesc *pd)
8533
{
8534
start += sizeof(struct sctp_init_chunk);
8535
len -= sizeof(struct sctp_init_chunk);
8536
8537
return (pf_multihome_scan(start, len, pd, SCTP_ADD_IP_ADDRESS));
8538
}
8539
8540
int
8541
pf_multihome_scan_asconf(int start, int len, struct pf_pdesc *pd)
8542
{
8543
start += sizeof(struct sctp_asconf_chunk);
8544
len -= sizeof(struct sctp_asconf_chunk);
8545
8546
return (pf_multihome_scan(start, len, pd, SCTP_ADD_IP_ADDRESS));
8547
}
8548
8549
int
8550
pf_icmp_state_lookup(struct pf_state_key_cmp *key, struct pf_pdesc *pd,
8551
struct pf_kstate **state, u_int16_t icmpid, u_int16_t type, int icmp_dir,
8552
int *iidx, int multi, int inner)
8553
{
8554
int action, direction = pd->dir;
8555
8556
key->af = pd->af;
8557
key->proto = pd->proto;
8558
if (icmp_dir == PF_IN) {
8559
*iidx = pd->sidx;
8560
key->port[pd->sidx] = icmpid;
8561
key->port[pd->didx] = type;
8562
} else {
8563
*iidx = pd->didx;
8564
key->port[pd->sidx] = type;
8565
key->port[pd->didx] = icmpid;
8566
}
8567
if (pf_state_key_addr_setup(pd, key, multi))
8568
return (PF_DROP);
8569
8570
action = pf_find_state(pd, key, state);
8571
if (action != PF_MATCH)
8572
return (action);
8573
8574
if ((*state)->state_flags & PFSTATE_SLOPPY)
8575
return (-1);
8576
8577
/* Is this ICMP message flowing in right direction? */
8578
if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af)
8579
direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ?
8580
PF_IN : PF_OUT;
8581
else
8582
direction = (*state)->direction;
8583
if ((*state)->rule->type &&
8584
(((!inner && direction == pd->dir) ||
8585
(inner && direction != pd->dir)) ?
8586
PF_IN : PF_OUT) != icmp_dir) {
8587
if (V_pf_status.debug >= PF_DEBUG_MISC) {
8588
printf("pf: icmp type %d in wrong direction (%d): ",
8589
ntohs(type), icmp_dir);
8590
pf_print_state(*state);
8591
printf("\n");
8592
}
8593
PF_STATE_UNLOCK(*state);
8594
*state = NULL;
8595
return (PF_DROP);
8596
}
8597
return (-1);
8598
}
8599
8600
static int
8601
pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
8602
u_short *reason)
8603
{
8604
struct pf_addr *saddr = pd->src, *daddr = pd->dst;
8605
u_int16_t *icmpsum, virtual_id, virtual_type;
8606
u_int8_t icmptype, icmpcode;
8607
int icmp_dir, iidx, ret;
8608
struct pf_state_key_cmp key;
8609
#ifdef INET
8610
u_int16_t icmpid;
8611
#endif /* INET*/
8612
8613
MPASS(*state == NULL);
8614
8615
bzero(&key, sizeof(key));
8616
switch (pd->proto) {
8617
#ifdef INET
8618
case IPPROTO_ICMP:
8619
icmptype = pd->hdr.icmp.icmp_type;
8620
icmpcode = pd->hdr.icmp.icmp_code;
8621
icmpid = pd->hdr.icmp.icmp_id;
8622
icmpsum = &pd->hdr.icmp.icmp_cksum;
8623
break;
8624
#endif /* INET */
8625
#ifdef INET6
8626
case IPPROTO_ICMPV6:
8627
icmptype = pd->hdr.icmp6.icmp6_type;
8628
icmpcode = pd->hdr.icmp6.icmp6_code;
8629
#ifdef INET
8630
icmpid = pd->hdr.icmp6.icmp6_id;
8631
#endif /* INET */
8632
icmpsum = &pd->hdr.icmp6.icmp6_cksum;
8633
break;
8634
#endif /* INET6 */
8635
default:
8636
panic("unhandled proto %d", pd->proto);
8637
}
8638
8639
if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id,
8640
&virtual_type) == 0) {
8641
/*
8642
* ICMP query/reply message not related to a TCP/UDP/SCTP
8643
* packet. Search for an ICMP state.
8644
*/
8645
ret = pf_icmp_state_lookup(&key, pd, state, virtual_id,
8646
virtual_type, icmp_dir, &iidx, 0, 0);
8647
/* IPv6? try matching a multicast address */
8648
if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) {
8649
MPASS(*state == NULL);
8650
ret = pf_icmp_state_lookup(&key, pd, state,
8651
virtual_id, virtual_type,
8652
icmp_dir, &iidx, 1, 0);
8653
}
8654
if (ret >= 0) {
8655
MPASS(*state == NULL);
8656
return (ret);
8657
}
8658
8659
(*state)->expire = pf_get_uptime();
8660
(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
8661
8662
/* translate source/destination address, if necessary */
8663
if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
8664
struct pf_state_key *nk;
8665
int afto, sidx, didx;
8666
8667
if (PF_REVERSED_KEY(*state, pd->af))
8668
nk = (*state)->key[pd->sidx];
8669
else
8670
nk = (*state)->key[pd->didx];
8671
8672
afto = pd->af != nk->af;
8673
8674
if (afto && (*state)->direction == PF_IN) {
8675
sidx = pd->didx;
8676
didx = pd->sidx;
8677
iidx = !iidx;
8678
} else {
8679
sidx = pd->sidx;
8680
didx = pd->didx;
8681
}
8682
8683
switch (pd->af) {
8684
#ifdef INET
8685
case AF_INET:
8686
#ifdef INET6
8687
if (afto) {
8688
if (pf_translate_icmp_af(AF_INET6,
8689
&pd->hdr.icmp))
8690
return (PF_DROP);
8691
pd->proto = IPPROTO_ICMPV6;
8692
}
8693
#endif /* INET6 */
8694
if (!afto &&
8695
PF_ANEQ(pd->src, &nk->addr[sidx], AF_INET))
8696
pf_change_a(&saddr->v4.s_addr,
8697
pd->ip_sum,
8698
nk->addr[sidx].v4.s_addr,
8699
0);
8700
8701
if (!afto && PF_ANEQ(pd->dst,
8702
&nk->addr[didx], AF_INET))
8703
pf_change_a(&daddr->v4.s_addr,
8704
pd->ip_sum,
8705
nk->addr[didx].v4.s_addr, 0);
8706
8707
if (nk->port[iidx] !=
8708
pd->hdr.icmp.icmp_id) {
8709
pd->hdr.icmp.icmp_cksum =
8710
pf_cksum_fixup(
8711
pd->hdr.icmp.icmp_cksum, icmpid,
8712
nk->port[iidx], 0);
8713
pd->hdr.icmp.icmp_id =
8714
nk->port[iidx];
8715
}
8716
8717
m_copyback(pd->m, pd->off, ICMP_MINLEN,
8718
(caddr_t )&pd->hdr.icmp);
8719
break;
8720
#endif /* INET */
8721
#ifdef INET6
8722
case AF_INET6:
8723
#ifdef INET
8724
if (afto) {
8725
if (pf_translate_icmp_af(AF_INET,
8726
&pd->hdr.icmp6))
8727
return (PF_DROP);
8728
pd->proto = IPPROTO_ICMP;
8729
}
8730
#endif /* INET */
8731
if (!afto &&
8732
PF_ANEQ(pd->src, &nk->addr[sidx], AF_INET6))
8733
pf_change_a6(saddr,
8734
&pd->hdr.icmp6.icmp6_cksum,
8735
&nk->addr[sidx], 0);
8736
8737
if (!afto && PF_ANEQ(pd->dst,
8738
&nk->addr[didx], AF_INET6))
8739
pf_change_a6(daddr,
8740
&pd->hdr.icmp6.icmp6_cksum,
8741
&nk->addr[didx], 0);
8742
8743
if (nk->port[iidx] != pd->hdr.icmp6.icmp6_id)
8744
pd->hdr.icmp6.icmp6_id =
8745
nk->port[iidx];
8746
8747
m_copyback(pd->m, pd->off, sizeof(struct icmp6_hdr),
8748
(caddr_t )&pd->hdr.icmp6);
8749
break;
8750
#endif /* INET6 */
8751
}
8752
if (afto) {
8753
pf_addrcpy(&pd->nsaddr, &nk->addr[sidx],
8754
nk->af);
8755
pf_addrcpy(&pd->ndaddr, &nk->addr[didx],
8756
nk->af);
8757
pd->naf = nk->af;
8758
return (PF_AFRT);
8759
}
8760
}
8761
return (PF_PASS);
8762
8763
} else {
8764
/*
8765
* ICMP error message in response to a TCP/UDP packet.
8766
* Extract the inner TCP/UDP header and search for that state.
8767
*/
8768
8769
struct pf_pdesc pd2;
8770
bzero(&pd2, sizeof pd2);
8771
#ifdef INET
8772
struct ip h2;
8773
#endif /* INET */
8774
#ifdef INET6
8775
struct ip6_hdr h2_6;
8776
#endif /* INET6 */
8777
int ipoff2 = 0;
8778
8779
pd2.af = pd->af;
8780
pd2.dir = pd->dir;
8781
/* Payload packet is from the opposite direction. */
8782
pd2.sidx = (pd->dir == PF_IN) ? 1 : 0;
8783
pd2.didx = (pd->dir == PF_IN) ? 0 : 1;
8784
pd2.m = pd->m;
8785
pd2.pf_mtag = pd->pf_mtag;
8786
pd2.kif = pd->kif;
8787
switch (pd->af) {
8788
#ifdef INET
8789
case AF_INET:
8790
/* offset of h2 in mbuf chain */
8791
ipoff2 = pd->off + ICMP_MINLEN;
8792
8793
if (!pf_pull_hdr(pd->m, ipoff2, &h2, sizeof(h2),
8794
reason, pd2.af)) {
8795
DPFPRINTF(PF_DEBUG_MISC,
8796
"pf: ICMP error message too short "
8797
"(ip)");
8798
return (PF_DROP);
8799
}
8800
/*
8801
* ICMP error messages don't refer to non-first
8802
* fragments
8803
*/
8804
if (h2.ip_off & htons(IP_OFFMASK)) {
8805
REASON_SET(reason, PFRES_FRAG);
8806
return (PF_DROP);
8807
}
8808
8809
/* offset of protocol header that follows h2 */
8810
pd2.off = ipoff2;
8811
if (pf_walk_header(&pd2, &h2, reason) != PF_PASS)
8812
return (PF_DROP);
8813
8814
pd2.tot_len = ntohs(h2.ip_len);
8815
pd2.ttl = h2.ip_ttl;
8816
pd2.src = (struct pf_addr *)&h2.ip_src;
8817
pd2.dst = (struct pf_addr *)&h2.ip_dst;
8818
pd2.ip_sum = &h2.ip_sum;
8819
break;
8820
#endif /* INET */
8821
#ifdef INET6
8822
case AF_INET6:
8823
ipoff2 = pd->off + sizeof(struct icmp6_hdr);
8824
8825
if (!pf_pull_hdr(pd->m, ipoff2, &h2_6, sizeof(h2_6),
8826
reason, pd2.af)) {
8827
DPFPRINTF(PF_DEBUG_MISC,
8828
"pf: ICMP error message too short "
8829
"(ip6)");
8830
return (PF_DROP);
8831
}
8832
pd2.off = ipoff2;
8833
if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS)
8834
return (PF_DROP);
8835
8836
pd2.tot_len = ntohs(h2_6.ip6_plen) +
8837
sizeof(struct ip6_hdr);
8838
pd2.ttl = h2_6.ip6_hlim;
8839
pd2.src = (struct pf_addr *)&h2_6.ip6_src;
8840
pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
8841
pd2.ip_sum = NULL;
8842
break;
8843
#endif /* INET6 */
8844
default:
8845
unhandled_af(pd->af);
8846
}
8847
8848
if (PF_ANEQ(pd->dst, pd2.src, pd->af)) {
8849
if (V_pf_status.debug >= PF_DEBUG_MISC) {
8850
printf("pf: BAD ICMP %d:%d outer dst: ",
8851
icmptype, icmpcode);
8852
pf_print_host(pd->src, 0, pd->af);
8853
printf(" -> ");
8854
pf_print_host(pd->dst, 0, pd->af);
8855
printf(" inner src: ");
8856
pf_print_host(pd2.src, 0, pd2.af);
8857
printf(" -> ");
8858
pf_print_host(pd2.dst, 0, pd2.af);
8859
printf("\n");
8860
}
8861
REASON_SET(reason, PFRES_BADSTATE);
8862
return (PF_DROP);
8863
}
8864
8865
switch (pd2.proto) {
8866
case IPPROTO_TCP: {
8867
struct tcphdr *th = &pd2.hdr.tcp;
8868
u_int32_t seq;
8869
struct pf_state_peer *src, *dst;
8870
u_int8_t dws;
8871
int copyback = 0;
8872
int action;
8873
8874
/*
8875
* Only the first 8 bytes of the TCP header can be
8876
* expected. Don't access any TCP header fields after
8877
* th_seq, an ackskew test is not possible.
8878
*/
8879
if (!pf_pull_hdr(pd->m, pd2.off, th, 8, reason,
8880
pd2.af)) {
8881
DPFPRINTF(PF_DEBUG_MISC,
8882
"pf: ICMP error message too short "
8883
"(tcp)");
8884
return (PF_DROP);
8885
}
8886
pd2.pcksum = &pd2.hdr.tcp.th_sum;
8887
8888
key.af = pd2.af;
8889
key.proto = IPPROTO_TCP;
8890
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
8891
pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
8892
key.port[pd2.sidx] = th->th_sport;
8893
key.port[pd2.didx] = th->th_dport;
8894
8895
action = pf_find_state(&pd2, &key, state);
8896
if (action != PF_MATCH)
8897
return (action);
8898
8899
if (pd->dir == (*state)->direction) {
8900
if (PF_REVERSED_KEY(*state, pd->af)) {
8901
src = &(*state)->src;
8902
dst = &(*state)->dst;
8903
} else {
8904
src = &(*state)->dst;
8905
dst = &(*state)->src;
8906
}
8907
} else {
8908
if (PF_REVERSED_KEY(*state, pd->af)) {
8909
src = &(*state)->dst;
8910
dst = &(*state)->src;
8911
} else {
8912
src = &(*state)->src;
8913
dst = &(*state)->dst;
8914
}
8915
}
8916
8917
if (src->wscale && dst->wscale)
8918
dws = dst->wscale & PF_WSCALE_MASK;
8919
else
8920
dws = 0;
8921
8922
/* Demodulate sequence number */
8923
seq = ntohl(th->th_seq) - src->seqdiff;
8924
if (src->seqdiff) {
8925
pf_change_a(&th->th_seq, icmpsum,
8926
htonl(seq), 0);
8927
copyback = 1;
8928
}
8929
8930
if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
8931
(!SEQ_GEQ(src->seqhi, seq) ||
8932
!SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
8933
if (V_pf_status.debug >= PF_DEBUG_MISC) {
8934
printf("pf: BAD ICMP %d:%d ",
8935
icmptype, icmpcode);
8936
pf_print_host(pd->src, 0, pd->af);
8937
printf(" -> ");
8938
pf_print_host(pd->dst, 0, pd->af);
8939
printf(" state: ");
8940
pf_print_state(*state);
8941
printf(" seq=%u\n", seq);
8942
}
8943
REASON_SET(reason, PFRES_BADSTATE);
8944
return (PF_DROP);
8945
} else {
8946
if (V_pf_status.debug >= PF_DEBUG_MISC) {
8947
printf("pf: OK ICMP %d:%d ",
8948
icmptype, icmpcode);
8949
pf_print_host(pd->src, 0, pd->af);
8950
printf(" -> ");
8951
pf_print_host(pd->dst, 0, pd->af);
8952
printf(" state: ");
8953
pf_print_state(*state);
8954
printf(" seq=%u\n", seq);
8955
}
8956
}
8957
8958
/* translate source/destination address, if necessary */
8959
if ((*state)->key[PF_SK_WIRE] !=
8960
(*state)->key[PF_SK_STACK]) {
8961
8962
struct pf_state_key *nk;
8963
8964
if (PF_REVERSED_KEY(*state, pd->af))
8965
nk = (*state)->key[pd->sidx];
8966
else
8967
nk = (*state)->key[pd->didx];
8968
8969
#if defined(INET) && defined(INET6)
8970
int afto, sidx, didx;
8971
8972
afto = pd->af != nk->af;
8973
8974
if (afto && (*state)->direction == PF_IN) {
8975
sidx = pd2.didx;
8976
didx = pd2.sidx;
8977
} else {
8978
sidx = pd2.sidx;
8979
didx = pd2.didx;
8980
}
8981
8982
if (afto) {
8983
if (pf_translate_icmp_af(nk->af,
8984
&pd->hdr.icmp))
8985
return (PF_DROP);
8986
m_copyback(pd->m, pd->off,
8987
sizeof(struct icmp6_hdr),
8988
(c_caddr_t)&pd->hdr.icmp6);
8989
if (pf_change_icmp_af(pd->m, ipoff2, pd,
8990
&pd2, &nk->addr[sidx],
8991
&nk->addr[didx], pd->af,
8992
nk->af))
8993
return (PF_DROP);
8994
pf_addrcpy(&pd->nsaddr,
8995
&nk->addr[pd2.sidx], nk->af);
8996
pf_addrcpy(&pd->ndaddr,
8997
&nk->addr[pd2.didx], nk->af);
8998
if (nk->af == AF_INET) {
8999
pd->proto = IPPROTO_ICMP;
9000
} else {
9001
pd->proto = IPPROTO_ICMPV6;
9002
/*
9003
* IPv4 becomes IPv6 so we must
9004
* copy IPv4 src addr to least
9005
* 32bits in IPv6 address to
9006
* keep traceroute/icmp
9007
* working.
9008
*/
9009
pd->nsaddr.addr32[3] =
9010
pd->src->addr32[0];
9011
}
9012
pd->naf = pd2.naf = nk->af;
9013
pf_change_ap(&pd2, pd2.src, &th->th_sport,
9014
&nk->addr[pd2.sidx], nk->port[sidx]);
9015
pf_change_ap(&pd2, pd2.dst, &th->th_dport,
9016
&nk->addr[pd2.didx], nk->port[didx]);
9017
m_copyback(pd2.m, pd2.off, 8, (c_caddr_t)th);
9018
return (PF_AFRT);
9019
}
9020
#endif /* INET && INET6 */
9021
9022
if (PF_ANEQ(pd2.src,
9023
&nk->addr[pd2.sidx], pd2.af) ||
9024
nk->port[pd2.sidx] != th->th_sport)
9025
pf_change_icmp(pd2.src, &th->th_sport,
9026
daddr, &nk->addr[pd2.sidx],
9027
nk->port[pd2.sidx], NULL,
9028
pd2.ip_sum, icmpsum,
9029
pd->ip_sum, 0, pd2.af);
9030
9031
if (PF_ANEQ(pd2.dst,
9032
&nk->addr[pd2.didx], pd2.af) ||
9033
nk->port[pd2.didx] != th->th_dport)
9034
pf_change_icmp(pd2.dst, &th->th_dport,
9035
saddr, &nk->addr[pd2.didx],
9036
nk->port[pd2.didx], NULL,
9037
pd2.ip_sum, icmpsum,
9038
pd->ip_sum, 0, pd2.af);
9039
copyback = 1;
9040
}
9041
9042
if (copyback) {
9043
switch (pd2.af) {
9044
#ifdef INET
9045
case AF_INET:
9046
m_copyback(pd->m, pd->off, ICMP_MINLEN,
9047
(caddr_t )&pd->hdr.icmp);
9048
m_copyback(pd->m, ipoff2, sizeof(h2),
9049
(caddr_t )&h2);
9050
break;
9051
#endif /* INET */
9052
#ifdef INET6
9053
case AF_INET6:
9054
m_copyback(pd->m, pd->off,
9055
sizeof(struct icmp6_hdr),
9056
(caddr_t )&pd->hdr.icmp6);
9057
m_copyback(pd->m, ipoff2, sizeof(h2_6),
9058
(caddr_t )&h2_6);
9059
break;
9060
#endif /* INET6 */
9061
default:
9062
unhandled_af(pd->af);
9063
}
9064
m_copyback(pd->m, pd2.off, 8, (caddr_t)th);
9065
}
9066
9067
return (PF_PASS);
9068
break;
9069
}
9070
case IPPROTO_UDP: {
9071
struct udphdr *uh = &pd2.hdr.udp;
9072
int action;
9073
9074
if (!pf_pull_hdr(pd->m, pd2.off, uh, sizeof(*uh),
9075
reason, pd2.af)) {
9076
DPFPRINTF(PF_DEBUG_MISC,
9077
"pf: ICMP error message too short "
9078
"(udp)");
9079
return (PF_DROP);
9080
}
9081
pd2.pcksum = &pd2.hdr.udp.uh_sum;
9082
9083
key.af = pd2.af;
9084
key.proto = IPPROTO_UDP;
9085
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
9086
pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
9087
key.port[pd2.sidx] = uh->uh_sport;
9088
key.port[pd2.didx] = uh->uh_dport;
9089
9090
action = pf_find_state(&pd2, &key, state);
9091
if (action != PF_MATCH)
9092
return (action);
9093
9094
/* translate source/destination address, if necessary */
9095
if ((*state)->key[PF_SK_WIRE] !=
9096
(*state)->key[PF_SK_STACK]) {
9097
struct pf_state_key *nk;
9098
9099
if (PF_REVERSED_KEY(*state, pd->af))
9100
nk = (*state)->key[pd->sidx];
9101
else
9102
nk = (*state)->key[pd->didx];
9103
9104
#if defined(INET) && defined(INET6)
9105
int afto, sidx, didx;
9106
9107
afto = pd->af != nk->af;
9108
9109
if (afto && (*state)->direction == PF_IN) {
9110
sidx = pd2.didx;
9111
didx = pd2.sidx;
9112
} else {
9113
sidx = pd2.sidx;
9114
didx = pd2.didx;
9115
}
9116
9117
if (afto) {
9118
if (pf_translate_icmp_af(nk->af,
9119
&pd->hdr.icmp))
9120
return (PF_DROP);
9121
m_copyback(pd->m, pd->off,
9122
sizeof(struct icmp6_hdr),
9123
(c_caddr_t)&pd->hdr.icmp6);
9124
if (pf_change_icmp_af(pd->m, ipoff2, pd,
9125
&pd2, &nk->addr[sidx],
9126
&nk->addr[didx], pd->af,
9127
nk->af))
9128
return (PF_DROP);
9129
pf_addrcpy(&pd->nsaddr,
9130
&nk->addr[pd2.sidx], nk->af);
9131
pf_addrcpy(&pd->ndaddr,
9132
&nk->addr[pd2.didx], nk->af);
9133
if (nk->af == AF_INET) {
9134
pd->proto = IPPROTO_ICMP;
9135
} else {
9136
pd->proto = IPPROTO_ICMPV6;
9137
/*
9138
* IPv4 becomes IPv6 so we must
9139
* copy IPv4 src addr to least
9140
* 32bits in IPv6 address to
9141
* keep traceroute/icmp
9142
* working.
9143
*/
9144
pd->nsaddr.addr32[3] =
9145
pd->src->addr32[0];
9146
}
9147
pd->naf = pd2.naf = nk->af;
9148
pf_change_ap(&pd2, pd2.src, &uh->uh_sport,
9149
&nk->addr[pd2.sidx], nk->port[sidx]);
9150
pf_change_ap(&pd2, pd2.dst, &uh->uh_dport,
9151
&nk->addr[pd2.didx], nk->port[didx]);
9152
m_copyback(pd2.m, pd2.off, sizeof(*uh),
9153
(c_caddr_t)uh);
9154
return (PF_AFRT);
9155
}
9156
#endif /* INET && INET6 */
9157
9158
if (PF_ANEQ(pd2.src,
9159
&nk->addr[pd2.sidx], pd2.af) ||
9160
nk->port[pd2.sidx] != uh->uh_sport)
9161
pf_change_icmp(pd2.src, &uh->uh_sport,
9162
daddr, &nk->addr[pd2.sidx],
9163
nk->port[pd2.sidx], &uh->uh_sum,
9164
pd2.ip_sum, icmpsum,
9165
pd->ip_sum, 1, pd2.af);
9166
9167
if (PF_ANEQ(pd2.dst,
9168
&nk->addr[pd2.didx], pd2.af) ||
9169
nk->port[pd2.didx] != uh->uh_dport)
9170
pf_change_icmp(pd2.dst, &uh->uh_dport,
9171
saddr, &nk->addr[pd2.didx],
9172
nk->port[pd2.didx], &uh->uh_sum,
9173
pd2.ip_sum, icmpsum,
9174
pd->ip_sum, 1, pd2.af);
9175
9176
switch (pd2.af) {
9177
#ifdef INET
9178
case AF_INET:
9179
m_copyback(pd->m, pd->off, ICMP_MINLEN,
9180
(caddr_t )&pd->hdr.icmp);
9181
m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2);
9182
break;
9183
#endif /* INET */
9184
#ifdef INET6
9185
case AF_INET6:
9186
m_copyback(pd->m, pd->off,
9187
sizeof(struct icmp6_hdr),
9188
(caddr_t )&pd->hdr.icmp6);
9189
m_copyback(pd->m, ipoff2, sizeof(h2_6),
9190
(caddr_t )&h2_6);
9191
break;
9192
#endif /* INET6 */
9193
}
9194
m_copyback(pd->m, pd2.off, sizeof(*uh), (caddr_t)uh);
9195
}
9196
return (PF_PASS);
9197
break;
9198
}
9199
#ifdef INET
9200
case IPPROTO_SCTP: {
9201
struct sctphdr *sh = &pd2.hdr.sctp;
9202
struct pf_state_peer *src;
9203
int copyback = 0;
9204
int action;
9205
9206
if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), reason,
9207
pd2.af)) {
9208
DPFPRINTF(PF_DEBUG_MISC,
9209
"pf: ICMP error message too short "
9210
"(sctp)");
9211
return (PF_DROP);
9212
}
9213
pd2.pcksum = &pd2.sctp_dummy_sum;
9214
9215
key.af = pd2.af;
9216
key.proto = IPPROTO_SCTP;
9217
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
9218
pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
9219
key.port[pd2.sidx] = sh->src_port;
9220
key.port[pd2.didx] = sh->dest_port;
9221
9222
action = pf_find_state(&pd2, &key, state);
9223
if (action != PF_MATCH)
9224
return (action);
9225
9226
if (pd->dir == (*state)->direction) {
9227
if (PF_REVERSED_KEY(*state, pd->af))
9228
src = &(*state)->src;
9229
else
9230
src = &(*state)->dst;
9231
} else {
9232
if (PF_REVERSED_KEY(*state, pd->af))
9233
src = &(*state)->dst;
9234
else
9235
src = &(*state)->src;
9236
}
9237
9238
if (src->scrub->pfss_v_tag != sh->v_tag) {
9239
DPFPRINTF(PF_DEBUG_MISC,
9240
"pf: ICMP error message has incorrect "
9241
"SCTP v_tag");
9242
return (PF_DROP);
9243
}
9244
9245
/* translate source/destination address, if necessary */
9246
if ((*state)->key[PF_SK_WIRE] !=
9247
(*state)->key[PF_SK_STACK]) {
9248
9249
struct pf_state_key *nk;
9250
9251
if (PF_REVERSED_KEY(*state, pd->af))
9252
nk = (*state)->key[pd->sidx];
9253
else
9254
nk = (*state)->key[pd->didx];
9255
9256
#if defined(INET) && defined(INET6)
9257
int afto, sidx, didx;
9258
9259
afto = pd->af != nk->af;
9260
9261
if (afto && (*state)->direction == PF_IN) {
9262
sidx = pd2.didx;
9263
didx = pd2.sidx;
9264
} else {
9265
sidx = pd2.sidx;
9266
didx = pd2.didx;
9267
}
9268
9269
if (afto) {
9270
if (pf_translate_icmp_af(nk->af,
9271
&pd->hdr.icmp))
9272
return (PF_DROP);
9273
m_copyback(pd->m, pd->off,
9274
sizeof(struct icmp6_hdr),
9275
(c_caddr_t)&pd->hdr.icmp6);
9276
if (pf_change_icmp_af(pd->m, ipoff2, pd,
9277
&pd2, &nk->addr[sidx],
9278
&nk->addr[didx], pd->af,
9279
nk->af))
9280
return (PF_DROP);
9281
sh->src_port = nk->port[sidx];
9282
sh->dest_port = nk->port[didx];
9283
m_copyback(pd2.m, pd2.off, sizeof(*sh), (c_caddr_t)sh);
9284
pf_addrcpy(&pd->nsaddr,
9285
&nk->addr[pd2.sidx], nk->af);
9286
pf_addrcpy(&pd->ndaddr,
9287
&nk->addr[pd2.didx], nk->af);
9288
if (nk->af == AF_INET) {
9289
pd->proto = IPPROTO_ICMP;
9290
} else {
9291
pd->proto = IPPROTO_ICMPV6;
9292
/*
9293
* IPv4 becomes IPv6 so we must
9294
* copy IPv4 src addr to least
9295
* 32bits in IPv6 address to
9296
* keep traceroute/icmp
9297
* working.
9298
*/
9299
pd->nsaddr.addr32[3] =
9300
pd->src->addr32[0];
9301
}
9302
pd->naf = nk->af;
9303
return (PF_AFRT);
9304
}
9305
#endif /* INET && INET6 */
9306
9307
if (PF_ANEQ(pd2.src,
9308
&nk->addr[pd2.sidx], pd2.af) ||
9309
nk->port[pd2.sidx] != sh->src_port)
9310
pf_change_icmp(pd2.src, &sh->src_port,
9311
daddr, &nk->addr[pd2.sidx],
9312
nk->port[pd2.sidx], NULL,
9313
pd2.ip_sum, icmpsum,
9314
pd->ip_sum, 0, pd2.af);
9315
9316
if (PF_ANEQ(pd2.dst,
9317
&nk->addr[pd2.didx], pd2.af) ||
9318
nk->port[pd2.didx] != sh->dest_port)
9319
pf_change_icmp(pd2.dst, &sh->dest_port,
9320
saddr, &nk->addr[pd2.didx],
9321
nk->port[pd2.didx], NULL,
9322
pd2.ip_sum, icmpsum,
9323
pd->ip_sum, 0, pd2.af);
9324
copyback = 1;
9325
}
9326
9327
if (copyback) {
9328
switch (pd2.af) {
9329
#ifdef INET
9330
case AF_INET:
9331
m_copyback(pd->m, pd->off, ICMP_MINLEN,
9332
(caddr_t )&pd->hdr.icmp);
9333
m_copyback(pd->m, ipoff2, sizeof(h2),
9334
(caddr_t )&h2);
9335
break;
9336
#endif /* INET */
9337
#ifdef INET6
9338
case AF_INET6:
9339
m_copyback(pd->m, pd->off,
9340
sizeof(struct icmp6_hdr),
9341
(caddr_t )&pd->hdr.icmp6);
9342
m_copyback(pd->m, ipoff2, sizeof(h2_6),
9343
(caddr_t )&h2_6);
9344
break;
9345
#endif /* INET6 */
9346
}
9347
m_copyback(pd->m, pd2.off, sizeof(*sh), (caddr_t)sh);
9348
}
9349
9350
return (PF_PASS);
9351
break;
9352
}
9353
case IPPROTO_ICMP: {
9354
struct icmp *iih = &pd2.hdr.icmp;
9355
9356
if (pd2.af != AF_INET) {
9357
REASON_SET(reason, PFRES_NORM);
9358
return (PF_DROP);
9359
}
9360
9361
if (!pf_pull_hdr(pd->m, pd2.off, iih, ICMP_MINLEN,
9362
reason, pd2.af)) {
9363
DPFPRINTF(PF_DEBUG_MISC,
9364
"pf: ICMP error message too short i"
9365
"(icmp)");
9366
return (PF_DROP);
9367
}
9368
pd2.pcksum = &pd2.hdr.icmp.icmp_cksum;
9369
9370
icmpid = iih->icmp_id;
9371
pf_icmp_mapping(&pd2, iih->icmp_type,
9372
&icmp_dir, &virtual_id, &virtual_type);
9373
9374
ret = pf_icmp_state_lookup(&key, &pd2, state,
9375
virtual_id, virtual_type, icmp_dir, &iidx, 0, 1);
9376
if (ret >= 0) {
9377
MPASS(*state == NULL);
9378
return (ret);
9379
}
9380
9381
/* translate source/destination address, if necessary */
9382
if ((*state)->key[PF_SK_WIRE] !=
9383
(*state)->key[PF_SK_STACK]) {
9384
struct pf_state_key *nk;
9385
9386
if (PF_REVERSED_KEY(*state, pd->af))
9387
nk = (*state)->key[pd->sidx];
9388
else
9389
nk = (*state)->key[pd->didx];
9390
9391
#if defined(INET) && defined(INET6)
9392
int afto, sidx, didx;
9393
9394
afto = pd->af != nk->af;
9395
9396
if (afto && (*state)->direction == PF_IN) {
9397
sidx = pd2.didx;
9398
didx = pd2.sidx;
9399
iidx = !iidx;
9400
} else {
9401
sidx = pd2.sidx;
9402
didx = pd2.didx;
9403
}
9404
9405
if (afto) {
9406
if (nk->af != AF_INET6)
9407
return (PF_DROP);
9408
if (pf_translate_icmp_af(nk->af,
9409
&pd->hdr.icmp))
9410
return (PF_DROP);
9411
m_copyback(pd->m, pd->off,
9412
sizeof(struct icmp6_hdr),
9413
(c_caddr_t)&pd->hdr.icmp6);
9414
if (pf_change_icmp_af(pd->m, ipoff2, pd,
9415
&pd2, &nk->addr[sidx],
9416
&nk->addr[didx], pd->af,
9417
nk->af))
9418
return (PF_DROP);
9419
pd->proto = IPPROTO_ICMPV6;
9420
if (pf_translate_icmp_af(nk->af, iih))
9421
return (PF_DROP);
9422
if (virtual_type == htons(ICMP_ECHO) &&
9423
nk->port[iidx] != iih->icmp_id)
9424
iih->icmp_id = nk->port[iidx];
9425
m_copyback(pd2.m, pd2.off, ICMP_MINLEN,
9426
(c_caddr_t)iih);
9427
pf_addrcpy(&pd->nsaddr,
9428
&nk->addr[pd2.sidx], nk->af);
9429
pf_addrcpy(&pd->ndaddr,
9430
&nk->addr[pd2.didx], nk->af);
9431
/*
9432
* IPv4 becomes IPv6 so we must copy
9433
* IPv4 src addr to least 32bits in
9434
* IPv6 address to keep traceroute
9435
* working.
9436
*/
9437
pd->nsaddr.addr32[3] =
9438
pd->src->addr32[0];
9439
pd->naf = nk->af;
9440
return (PF_AFRT);
9441
}
9442
#endif /* INET && INET6 */
9443
9444
if (PF_ANEQ(pd2.src,
9445
&nk->addr[pd2.sidx], pd2.af) ||
9446
(virtual_type == htons(ICMP_ECHO) &&
9447
nk->port[iidx] != iih->icmp_id))
9448
pf_change_icmp(pd2.src,
9449
(virtual_type == htons(ICMP_ECHO)) ?
9450
&iih->icmp_id : NULL,
9451
daddr, &nk->addr[pd2.sidx],
9452
(virtual_type == htons(ICMP_ECHO)) ?
9453
nk->port[iidx] : 0, NULL,
9454
pd2.ip_sum, icmpsum,
9455
pd->ip_sum, 0, AF_INET);
9456
9457
if (PF_ANEQ(pd2.dst,
9458
&nk->addr[pd2.didx], pd2.af))
9459
pf_change_icmp(pd2.dst, NULL, NULL,
9460
&nk->addr[pd2.didx], 0, NULL,
9461
pd2.ip_sum, icmpsum, pd->ip_sum, 0,
9462
AF_INET);
9463
9464
m_copyback(pd->m, pd->off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp);
9465
m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2);
9466
m_copyback(pd->m, pd2.off, ICMP_MINLEN, (caddr_t)iih);
9467
}
9468
return (PF_PASS);
9469
break;
9470
}
9471
#endif /* INET */
9472
#ifdef INET6
9473
case IPPROTO_ICMPV6: {
9474
struct icmp6_hdr *iih = &pd2.hdr.icmp6;
9475
9476
if (pd2.af != AF_INET6) {
9477
REASON_SET(reason, PFRES_NORM);
9478
return (PF_DROP);
9479
}
9480
9481
if (!pf_pull_hdr(pd->m, pd2.off, iih,
9482
sizeof(struct icmp6_hdr), reason, pd2.af)) {
9483
DPFPRINTF(PF_DEBUG_MISC,
9484
"pf: ICMP error message too short "
9485
"(icmp6)");
9486
return (PF_DROP);
9487
}
9488
pd2.pcksum = &pd2.hdr.icmp6.icmp6_cksum;
9489
9490
pf_icmp_mapping(&pd2, iih->icmp6_type,
9491
&icmp_dir, &virtual_id, &virtual_type);
9492
9493
ret = pf_icmp_state_lookup(&key, &pd2, state,
9494
virtual_id, virtual_type, icmp_dir, &iidx, 0, 1);
9495
/* IPv6? try matching a multicast address */
9496
if (ret == PF_DROP && pd2.af == AF_INET6 &&
9497
icmp_dir == PF_OUT) {
9498
MPASS(*state == NULL);
9499
ret = pf_icmp_state_lookup(&key, &pd2,
9500
state, virtual_id, virtual_type,
9501
icmp_dir, &iidx, 1, 1);
9502
}
9503
if (ret >= 0) {
9504
MPASS(*state == NULL);
9505
return (ret);
9506
}
9507
9508
/* translate source/destination address, if necessary */
9509
if ((*state)->key[PF_SK_WIRE] !=
9510
(*state)->key[PF_SK_STACK]) {
9511
struct pf_state_key *nk;
9512
9513
if (PF_REVERSED_KEY(*state, pd->af))
9514
nk = (*state)->key[pd->sidx];
9515
else
9516
nk = (*state)->key[pd->didx];
9517
9518
#if defined(INET) && defined(INET6)
9519
int afto, sidx, didx;
9520
9521
afto = pd->af != nk->af;
9522
9523
if (afto && (*state)->direction == PF_IN) {
9524
sidx = pd2.didx;
9525
didx = pd2.sidx;
9526
iidx = !iidx;
9527
} else {
9528
sidx = pd2.sidx;
9529
didx = pd2.didx;
9530
}
9531
9532
if (afto) {
9533
if (nk->af != AF_INET)
9534
return (PF_DROP);
9535
if (pf_translate_icmp_af(nk->af,
9536
&pd->hdr.icmp))
9537
return (PF_DROP);
9538
m_copyback(pd->m, pd->off,
9539
sizeof(struct icmp6_hdr),
9540
(c_caddr_t)&pd->hdr.icmp6);
9541
if (pf_change_icmp_af(pd->m, ipoff2, pd,
9542
&pd2, &nk->addr[sidx],
9543
&nk->addr[didx], pd->af,
9544
nk->af))
9545
return (PF_DROP);
9546
pd->proto = IPPROTO_ICMP;
9547
if (pf_translate_icmp_af(nk->af, iih))
9548
return (PF_DROP);
9549
if (virtual_type ==
9550
htons(ICMP6_ECHO_REQUEST) &&
9551
nk->port[iidx] != iih->icmp6_id)
9552
iih->icmp6_id = nk->port[iidx];
9553
m_copyback(pd2.m, pd2.off,
9554
sizeof(struct icmp6_hdr), (c_caddr_t)iih);
9555
pf_addrcpy(&pd->nsaddr,
9556
&nk->addr[pd2.sidx], nk->af);
9557
pf_addrcpy(&pd->ndaddr,
9558
&nk->addr[pd2.didx], nk->af);
9559
pd->naf = nk->af;
9560
return (PF_AFRT);
9561
}
9562
#endif /* INET && INET6 */
9563
9564
if (PF_ANEQ(pd2.src,
9565
&nk->addr[pd2.sidx], pd2.af) ||
9566
((virtual_type == htons(ICMP6_ECHO_REQUEST)) &&
9567
nk->port[pd2.sidx] != iih->icmp6_id))
9568
pf_change_icmp(pd2.src,
9569
(virtual_type == htons(ICMP6_ECHO_REQUEST))
9570
? &iih->icmp6_id : NULL,
9571
daddr, &nk->addr[pd2.sidx],
9572
(virtual_type == htons(ICMP6_ECHO_REQUEST))
9573
? nk->port[iidx] : 0, NULL,
9574
pd2.ip_sum, icmpsum,
9575
pd->ip_sum, 0, AF_INET6);
9576
9577
if (PF_ANEQ(pd2.dst,
9578
&nk->addr[pd2.didx], pd2.af))
9579
pf_change_icmp(pd2.dst, NULL, NULL,
9580
&nk->addr[pd2.didx], 0, NULL,
9581
pd2.ip_sum, icmpsum,
9582
pd->ip_sum, 0, AF_INET6);
9583
9584
m_copyback(pd->m, pd->off, sizeof(struct icmp6_hdr),
9585
(caddr_t)&pd->hdr.icmp6);
9586
m_copyback(pd->m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
9587
m_copyback(pd->m, pd2.off, sizeof(struct icmp6_hdr),
9588
(caddr_t)iih);
9589
}
9590
return (PF_PASS);
9591
break;
9592
}
9593
#endif /* INET6 */
9594
default: {
9595
int action;
9596
9597
/*
9598
* Placeholder value, so future calls to pf_change_ap()
9599
* don't try to update a NULL checksum pointer.
9600
*/
9601
pd->pcksum = &pd->sctp_dummy_sum;
9602
key.af = pd2.af;
9603
key.proto = pd2.proto;
9604
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
9605
pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
9606
key.port[0] = key.port[1] = 0;
9607
9608
action = pf_find_state(&pd2, &key, state);
9609
if (action != PF_MATCH)
9610
return (action);
9611
9612
/* translate source/destination address, if necessary */
9613
if ((*state)->key[PF_SK_WIRE] !=
9614
(*state)->key[PF_SK_STACK]) {
9615
struct pf_state_key *nk =
9616
(*state)->key[pd->didx];
9617
9618
if (PF_ANEQ(pd2.src,
9619
&nk->addr[pd2.sidx], pd2.af))
9620
pf_change_icmp(pd2.src, NULL, daddr,
9621
&nk->addr[pd2.sidx], 0, NULL,
9622
pd2.ip_sum, icmpsum,
9623
pd->ip_sum, 0, pd2.af);
9624
9625
if (PF_ANEQ(pd2.dst,
9626
&nk->addr[pd2.didx], pd2.af))
9627
pf_change_icmp(pd2.dst, NULL, saddr,
9628
&nk->addr[pd2.didx], 0, NULL,
9629
pd2.ip_sum, icmpsum,
9630
pd->ip_sum, 0, pd2.af);
9631
9632
switch (pd2.af) {
9633
#ifdef INET
9634
case AF_INET:
9635
m_copyback(pd->m, pd->off, ICMP_MINLEN,
9636
(caddr_t)&pd->hdr.icmp);
9637
m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2);
9638
break;
9639
#endif /* INET */
9640
#ifdef INET6
9641
case AF_INET6:
9642
m_copyback(pd->m, pd->off,
9643
sizeof(struct icmp6_hdr),
9644
(caddr_t )&pd->hdr.icmp6);
9645
m_copyback(pd->m, ipoff2, sizeof(h2_6),
9646
(caddr_t )&h2_6);
9647
break;
9648
#endif /* INET6 */
9649
}
9650
}
9651
return (PF_PASS);
9652
break;
9653
}
9654
}
9655
}
9656
}
9657
9658
/*
9659
* ipoff and off are measured from the start of the mbuf chain.
9660
* h must be at "ipoff" on the mbuf chain.
9661
*/
9662
void *
9663
pf_pull_hdr(const struct mbuf *m, int off, void *p, int len,
9664
u_short *reasonp, sa_family_t af)
9665
{
9666
int iplen = 0;
9667
switch (af) {
9668
#ifdef INET
9669
case AF_INET: {
9670
const struct ip *h = mtod(m, struct ip *);
9671
u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
9672
9673
if (fragoff) {
9674
REASON_SET(reasonp, PFRES_FRAG);
9675
return (NULL);
9676
}
9677
iplen = ntohs(h->ip_len);
9678
break;
9679
}
9680
#endif /* INET */
9681
#ifdef INET6
9682
case AF_INET6: {
9683
const struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
9684
9685
iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
9686
break;
9687
}
9688
#endif /* INET6 */
9689
}
9690
if (m->m_pkthdr.len < off + len || iplen < off + len) {
9691
REASON_SET(reasonp, PFRES_SHORT);
9692
return (NULL);
9693
}
9694
m_copydata(m, off, len, p);
9695
return (p);
9696
}
9697
9698
int
9699
pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kkif *kif,
9700
int rtableid)
9701
{
9702
struct ifnet *ifp;
9703
9704
/*
9705
* Skip check for addresses with embedded interface scope,
9706
* as they would always match anyway.
9707
*/
9708
if (af == AF_INET6 && IN6_IS_SCOPE_EMBED(&addr->v6))
9709
return (1);
9710
9711
if (af != AF_INET && af != AF_INET6)
9712
return (0);
9713
9714
if (kif == V_pfi_all)
9715
return (1);
9716
9717
/* Skip checks for ipsec interfaces */
9718
if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
9719
return (1);
9720
9721
ifp = (kif != NULL) ? kif->pfik_ifp : NULL;
9722
9723
switch (af) {
9724
#ifdef INET6
9725
case AF_INET6:
9726
return (fib6_check_urpf(rtableid, &addr->v6, 0, NHR_NONE,
9727
ifp));
9728
#endif /* INET6 */
9729
#ifdef INET
9730
case AF_INET:
9731
return (fib4_check_urpf(rtableid, addr->v4, 0, NHR_NONE,
9732
ifp));
9733
#endif /* INET */
9734
}
9735
9736
return (0);
9737
}
9738
9739
#ifdef INET
9740
static int
9741
pf_route(struct pf_krule *r, struct ifnet *oifp,
9742
struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
9743
{
9744
struct mbuf *m0, *m1, *md;
9745
struct route_in6 ro;
9746
union sockaddr_union rt_gw;
9747
const union sockaddr_union *gw = (const union sockaddr_union *)&ro.ro_dst;
9748
union sockaddr_union *dst;
9749
struct ip *ip;
9750
struct ifnet *ifp = NULL;
9751
int error = 0;
9752
uint16_t ip_len, ip_off;
9753
uint16_t tmp;
9754
int r_dir;
9755
bool skip_test = false;
9756
int action = PF_PASS;
9757
9758
KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__));
9759
9760
SDT_PROBE4(pf, ip, route_to, entry, pd->m, pd, s, oifp);
9761
9762
if (s) {
9763
r_dir = s->direction;
9764
} else {
9765
r_dir = r->direction;
9766
}
9767
9768
KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT ||
9769
r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction",
9770
__func__));
9771
9772
if ((pd->pf_mtag == NULL &&
9773
((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) ||
9774
pd->pf_mtag->routed++ > 3) {
9775
m0 = pd->m;
9776
pd->m = NULL;
9777
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9778
action = PF_DROP;
9779
goto bad_locked;
9780
}
9781
9782
if (pd->act.rt_kif != NULL)
9783
ifp = pd->act.rt_kif->pfik_ifp;
9784
9785
if (pd->act.rt == PF_DUPTO) {
9786
if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) {
9787
if (s != NULL) {
9788
PF_STATE_UNLOCK(s);
9789
}
9790
if (ifp == oifp) {
9791
/* When the 2nd interface is not skipped */
9792
return (action);
9793
} else {
9794
m0 = pd->m;
9795
pd->m = NULL;
9796
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9797
action = PF_DROP;
9798
goto bad;
9799
}
9800
} else {
9801
pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED;
9802
if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) {
9803
if (s)
9804
PF_STATE_UNLOCK(s);
9805
return (action);
9806
}
9807
}
9808
} else {
9809
if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) {
9810
if (pd->af == pd->naf) {
9811
pf_dummynet(pd, s, r, &pd->m);
9812
if (s)
9813
PF_STATE_UNLOCK(s);
9814
return (action);
9815
} else {
9816
if (r_dir == PF_IN) {
9817
skip_test = true;
9818
}
9819
}
9820
}
9821
9822
/*
9823
* If we're actually doing route-to and af-to and are in the
9824
* reply direction.
9825
*/
9826
if (pd->act.rt_kif && pd->act.rt_kif->pfik_ifp &&
9827
pd->af != pd->naf) {
9828
if (pd->act.rt == PF_ROUTETO && r->naf != AF_INET) {
9829
/* Un-set ifp so we do a plain route lookup. */
9830
ifp = NULL;
9831
}
9832
if (pd->act.rt == PF_REPLYTO && r->naf != AF_INET6) {
9833
/* Un-set ifp so we do a plain route lookup. */
9834
ifp = NULL;
9835
}
9836
}
9837
m0 = pd->m;
9838
}
9839
9840
ip = mtod(m0, struct ip *);
9841
9842
bzero(&ro, sizeof(ro));
9843
dst = (union sockaddr_union *)&ro.ro_dst;
9844
dst->sin.sin_family = AF_INET;
9845
dst->sin.sin_len = sizeof(struct sockaddr_in);
9846
dst->sin.sin_addr = ip->ip_dst;
9847
if (ifp) { /* Only needed in forward direction and route-to */
9848
bzero(&rt_gw, sizeof(rt_gw));
9849
ro.ro_flags |= RT_HAS_GW;
9850
gw = &rt_gw;
9851
switch (pd->act.rt_af) {
9852
#ifdef INET
9853
case AF_INET:
9854
rt_gw.sin.sin_family = AF_INET;
9855
rt_gw.sin.sin_len = sizeof(struct sockaddr_in);
9856
rt_gw.sin.sin_addr.s_addr = pd->act.rt_addr.v4.s_addr;
9857
break;
9858
#endif /* INET */
9859
#ifdef INET6
9860
case AF_INET6:
9861
rt_gw.sin6.sin6_family = AF_INET6;
9862
rt_gw.sin6.sin6_len = sizeof(struct sockaddr_in6);
9863
pf_addrcpy((struct pf_addr *)&rt_gw.sin6.sin6_addr,
9864
&pd->act.rt_addr, AF_INET6);
9865
break;
9866
#endif /* INET6 */
9867
default:
9868
/* Normal af-to without route-to */
9869
break;
9870
}
9871
}
9872
9873
if (pd->dir == PF_IN) {
9874
if (ip->ip_ttl <= IPTTLDEC) {
9875
if (r->rt != PF_DUPTO && pd->naf == pd->af)
9876
pf_send_icmp(m0, ICMP_TIMXCEED,
9877
ICMP_TIMXCEED_INTRANS, 0, pd->af, r,
9878
pd->act.rtableid);
9879
action = PF_DROP;
9880
goto bad_locked;
9881
}
9882
ip->ip_ttl -= IPTTLDEC;
9883
}
9884
9885
if (s != NULL) {
9886
if (ifp == NULL && (pd->af != pd->naf)) {
9887
/* We're in the AFTO case. Do a route lookup. */
9888
const struct nhop_object *nh;
9889
nh = fib4_lookup(M_GETFIB(m0), ip->ip_dst, 0, NHR_NONE, 0);
9890
if (nh) {
9891
ifp = nh->nh_ifp;
9892
9893
/* Use the gateway if needed. */
9894
if (nh->nh_flags & NHF_GATEWAY) {
9895
gw = (const union sockaddr_union *)&nh->gw_sa;
9896
ro.ro_flags |= RT_HAS_GW;
9897
} else {
9898
dst->sin.sin_addr = ip->ip_dst;
9899
}
9900
}
9901
}
9902
PF_STATE_UNLOCK(s);
9903
}
9904
9905
/* It must have been either set from rt_af or from fib4_lookup */
9906
KASSERT(gw->sin.sin_family != 0, ("%s: gw address family undetermined", __func__));
9907
9908
if (ifp == NULL) {
9909
m0 = pd->m;
9910
pd->m = NULL;
9911
action = PF_DROP;
9912
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9913
goto bad;
9914
}
9915
9916
/*
9917
* Bind to the correct interface if we're if-bound. We don't know which
9918
* interface that will be until here, so we've inserted the state
9919
* on V_pf_all. Fix that now.
9920
*/
9921
if (s != NULL && s->kif == V_pfi_all && r->rule_flag & PFRULE_IFBOUND) {
9922
/* Verify that we're here because of BOUND_IFACE */
9923
MPASS(r->rt == PF_REPLYTO || (pd->af != pd->naf && s->direction == PF_IN));
9924
s->kif = ifp->if_pf_kif;
9925
if (pd->act.rt == PF_REPLYTO) {
9926
s->orig_kif = oifp->if_pf_kif;
9927
}
9928
}
9929
9930
if (r->rt == PF_DUPTO || (pd->af != pd->naf && s->direction == PF_IN))
9931
skip_test = true;
9932
9933
if (pd->dir == PF_IN) {
9934
if (skip_test) {
9935
struct pfi_kkif *out_kif = (struct pfi_kkif *)ifp->if_pf_kif;
9936
MPASS(s != NULL);
9937
pf_counter_u64_critical_enter();
9938
pf_counter_u64_add_protected(
9939
&out_kif->pfik_bytes[pd->naf == AF_INET6][1]
9940
[action != PF_PASS && action != PF_AFRT], pd->tot_len);
9941
pf_counter_u64_add_protected(
9942
&out_kif->pfik_packets[pd->naf == AF_INET6][1]
9943
[action != PF_PASS && action != PF_AFRT], 1);
9944
pf_counter_u64_critical_exit();
9945
} else {
9946
if (pf_test(AF_INET, PF_OUT, PFIL_FWD, ifp, &m0, inp,
9947
&pd->act) != PF_PASS) {
9948
action = PF_DROP;
9949
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9950
goto bad;
9951
} else if (m0 == NULL) {
9952
action = PF_DROP;
9953
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9954
goto done;
9955
}
9956
if (m0->m_len < sizeof(struct ip)) {
9957
DPFPRINTF(PF_DEBUG_URGENT,
9958
"%s: m0->m_len < sizeof(struct ip)", __func__);
9959
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
9960
action = PF_DROP;
9961
goto bad;
9962
}
9963
ip = mtod(m0, struct ip *);
9964
}
9965
}
9966
9967
if (ifp->if_flags & IFF_LOOPBACK)
9968
m0->m_flags |= M_SKIP_FIREWALL;
9969
9970
ip_len = ntohs(ip->ip_len);
9971
ip_off = ntohs(ip->ip_off);
9972
9973
/* Copied from FreeBSD 10.0-CURRENT ip_output. */
9974
m0->m_pkthdr.csum_flags |= CSUM_IP;
9975
if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
9976
in_delayed_cksum(m0);
9977
m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9978
}
9979
if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
9980
pf_sctp_checksum(m0, (uint32_t)(ip->ip_hl << 2));
9981
m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
9982
}
9983
9984
if (pd->dir == PF_IN) {
9985
/*
9986
* Make sure dummynet gets the correct direction, in case it needs to
9987
* re-inject later.
9988
*/
9989
pd->dir = PF_OUT;
9990
9991
/*
9992
* The following processing is actually the rest of the inbound processing, even
9993
* though we've marked it as outbound (so we don't look through dummynet) and it
9994
* happens after the outbound processing (pf_test(PF_OUT) above).
9995
* Swap the dummynet pipe numbers, because it's going to come to the wrong
9996
* conclusion about what direction it's processing, and we can't fix it or it
9997
* will re-inject incorrectly. Swapping the pipe numbers means that its incorrect
9998
* decision will pick the right pipe, and everything will mostly work as expected.
9999
*/
10000
tmp = pd->act.dnrpipe;
10001
pd->act.dnrpipe = pd->act.dnpipe;
10002
pd->act.dnpipe = tmp;
10003
}
10004
10005
/*
10006
* If small enough for interface, or the interface will take
10007
* care of the fragmentation for us, we can just send directly.
10008
*/
10009
if (ip_len <= ifp->if_mtu ||
10010
(m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
10011
ip->ip_sum = 0;
10012
if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
10013
ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
10014
m0->m_pkthdr.csum_flags &= ~CSUM_IP;
10015
}
10016
m_clrprotoflags(m0); /* Avoid confusing lower layers. */
10017
10018
md = m0;
10019
error = pf_dummynet_route(pd, s, r, ifp,
10020
(const struct sockaddr *)gw, &md);
10021
if (md != NULL) {
10022
error = (*ifp->if_output)(ifp, md,
10023
(const struct sockaddr *)gw, (struct route *)&ro);
10024
SDT_PROBE2(pf, ip, route_to, output, ifp, error);
10025
}
10026
goto done;
10027
}
10028
10029
/* Balk when DF bit is set or the interface didn't support TSO. */
10030
if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
10031
error = EMSGSIZE;
10032
KMOD_IPSTAT_INC(ips_cantfrag);
10033
if (pd->act.rt != PF_DUPTO) {
10034
if (s && s->nat_rule != NULL) {
10035
MPASS(m0 == pd->m);
10036
PACKET_UNDO_NAT(pd,
10037
(ip->ip_hl << 2) + (ip_off & IP_OFFMASK),
10038
s);
10039
}
10040
10041
pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
10042
ifp->if_mtu, pd->af, r, pd->act.rtableid);
10043
}
10044
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
10045
/* Return pass, so we return PFIL_CONSUMED to the stack. */
10046
action = PF_PASS;
10047
goto bad;
10048
}
10049
10050
error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist);
10051
if (error) {
10052
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
10053
action = PF_DROP;
10054
goto bad;
10055
}
10056
10057
for (; m0; m0 = m1) {
10058
m1 = m0->m_nextpkt;
10059
m0->m_nextpkt = NULL;
10060
if (error == 0) {
10061
m_clrprotoflags(m0);
10062
md = m0;
10063
pd->pf_mtag = pf_find_mtag(md);
10064
error = pf_dummynet_route(pd, s, r, ifp,
10065
(const struct sockaddr *)gw, &md);
10066
if (md != NULL) {
10067
error = (*ifp->if_output)(ifp, md,
10068
(const struct sockaddr *)gw,
10069
(struct route *)&ro);
10070
SDT_PROBE2(pf, ip, route_to, output, ifp, error);
10071
}
10072
} else
10073
m_freem(m0);
10074
}
10075
10076
if (error == 0)
10077
KMOD_IPSTAT_INC(ips_fragmented);
10078
10079
done:
10080
if (pd->act.rt != PF_DUPTO)
10081
pd->m = NULL;
10082
else
10083
action = PF_PASS;
10084
return (action);
10085
10086
bad_locked:
10087
if (s)
10088
PF_STATE_UNLOCK(s);
10089
bad:
10090
m_freem(m0);
10091
goto done;
10092
}
10093
#endif /* INET */
10094
10095
#ifdef INET6
10096
static int
10097
pf_route6(struct pf_krule *r, struct ifnet *oifp,
10098
struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
10099
{
10100
struct mbuf *m0, *md;
10101
struct m_tag *mtag;
10102
struct sockaddr_in6 dst;
10103
struct ip6_hdr *ip6;
10104
struct ifnet *ifp = NULL;
10105
int r_dir;
10106
bool skip_test = false;
10107
int action = PF_PASS;
10108
10109
KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__));
10110
10111
SDT_PROBE4(pf, ip6, route_to, entry, pd->m, pd, s, oifp);
10112
10113
if (s) {
10114
r_dir = s->direction;
10115
} else {
10116
r_dir = r->direction;
10117
}
10118
10119
KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT ||
10120
r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction",
10121
__func__));
10122
10123
if ((pd->pf_mtag == NULL &&
10124
((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) ||
10125
pd->pf_mtag->routed++ > 3) {
10126
m0 = pd->m;
10127
pd->m = NULL;
10128
action = PF_DROP;
10129
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
10130
goto bad_locked;
10131
}
10132
10133
if (pd->act.rt_kif != NULL)
10134
ifp = pd->act.rt_kif->pfik_ifp;
10135
10136
if (pd->act.rt == PF_DUPTO) {
10137
if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) {
10138
if (s != NULL) {
10139
PF_STATE_UNLOCK(s);
10140
}
10141
if (ifp == oifp) {
10142
/* When the 2nd interface is not skipped */
10143
return (action);
10144
} else {
10145
m0 = pd->m;
10146
pd->m = NULL;
10147
action = PF_DROP;
10148
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
10149
goto bad;
10150
}
10151
} else {
10152
pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED;
10153
if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) {
10154
if (s)
10155
PF_STATE_UNLOCK(s);
10156
return (action);
10157
}
10158
}
10159
} else {
10160
if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) {
10161
if (pd->af == pd->naf) {
10162
pf_dummynet(pd, s, r, &pd->m);
10163
if (s)
10164
PF_STATE_UNLOCK(s);
10165
return (action);
10166
} else {
10167
if (r_dir == PF_IN) {
10168
skip_test = true;
10169
}
10170
}
10171
}
10172
10173
/*
10174
* If we're actually doing route-to and af-to and are in the
10175
* reply direction.
10176
*/
10177
if (pd->act.rt_kif && pd->act.rt_kif->pfik_ifp &&
10178
pd->af != pd->naf) {
10179
if (pd->act.rt == PF_ROUTETO && r->naf != AF_INET6) {
10180
/* Un-set ifp so we do a plain route lookup. */
10181
ifp = NULL;
10182
}
10183
if (pd->act.rt == PF_REPLYTO && r->naf != AF_INET) {
10184
/* Un-set ifp so we do a plain route lookup. */
10185
ifp = NULL;
10186
}
10187
}
10188
m0 = pd->m;
10189
}
10190
10191
ip6 = mtod(m0, struct ip6_hdr *);
10192
10193
bzero(&dst, sizeof(dst));
10194
dst.sin6_family = AF_INET6;
10195
dst.sin6_len = sizeof(dst);
10196
pf_addrcpy((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr,
10197
AF_INET6);
10198
10199
if (pd->dir == PF_IN) {
10200
if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
10201
if (r->rt != PF_DUPTO && pd->naf == pd->af)
10202
pf_send_icmp(m0, ICMP6_TIME_EXCEEDED,
10203
ICMP6_TIME_EXCEED_TRANSIT, 0, pd->af, r,
10204
pd->act.rtableid);
10205
action = PF_DROP;
10206
goto bad_locked;
10207
}
10208
ip6->ip6_hlim -= IPV6_HLIMDEC;
10209
}
10210
10211
if (s != NULL) {
10212
if (ifp == NULL && (pd->af != pd->naf)) {
10213
const struct nhop_object *nh;
10214
nh = fib6_lookup(M_GETFIB(m0), &ip6->ip6_dst, 0, NHR_NONE, 0);
10215
if (nh) {
10216
ifp = nh->nh_ifp;
10217
10218
/* Use the gateway if needed. */
10219
if (nh->nh_flags & NHF_GATEWAY)
10220
bcopy(&nh->gw6_sa.sin6_addr, &dst.sin6_addr,
10221
sizeof(dst.sin6_addr));
10222
else
10223
dst.sin6_addr = ip6->ip6_dst;
10224
}
10225
}
10226
PF_STATE_UNLOCK(s);
10227
}
10228
10229
if (pd->af != pd->naf) {
10230
struct udphdr *uh = &pd->hdr.udp;
10231
10232
if (pd->proto == IPPROTO_UDP && uh->uh_sum == 0) {
10233
uh->uh_sum = in6_cksum_pseudo(ip6,
10234
ntohs(uh->uh_ulen), IPPROTO_UDP, 0);
10235
m_copyback(m0, pd->off, sizeof(*uh), pd->hdr.any);
10236
}
10237
}
10238
10239
if (ifp == NULL) {
10240
m0 = pd->m;
10241
pd->m = NULL;
10242
action = PF_DROP;
10243
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
10244
goto bad;
10245
}
10246
10247
/*
10248
* Bind to the correct interface if we're if-bound. We don't know which
10249
* interface that will be until here, so we've inserted the state
10250
* on V_pf_all. Fix that now.
10251
*/
10252
if (s != NULL && s->kif == V_pfi_all && r->rule_flag & PFRULE_IFBOUND) {
10253
/* Verify that we're here because of BOUND_IFACE */
10254
MPASS(r->rt == PF_REPLYTO || (pd->af != pd->naf && s->direction == PF_IN));
10255
s->kif = ifp->if_pf_kif;
10256
if (pd->act.rt == PF_REPLYTO) {
10257
s->orig_kif = oifp->if_pf_kif;
10258
}
10259
}
10260
10261
if (r->rt == PF_DUPTO || (pd->af != pd->naf && s->direction == PF_IN))
10262
skip_test = true;
10263
10264
if (pd->dir == PF_IN) {
10265
if (skip_test) {
10266
struct pfi_kkif *out_kif = (struct pfi_kkif *)ifp->if_pf_kif;
10267
MPASS(s != NULL);
10268
pf_counter_u64_critical_enter();
10269
pf_counter_u64_add_protected(
10270
&out_kif->pfik_bytes[pd->naf == AF_INET6][1]
10271
[action != PF_PASS && action != PF_AFRT], pd->tot_len);
10272
pf_counter_u64_add_protected(
10273
&out_kif->pfik_packets[pd->naf == AF_INET6][1]
10274
[action != PF_PASS && action != PF_AFRT], 1);
10275
pf_counter_u64_critical_exit();
10276
} else {
10277
if (pf_test(AF_INET6, PF_OUT, PFIL_FWD | PF_PFIL_NOREFRAGMENT,
10278
ifp, &m0, inp, &pd->act) != PF_PASS) {
10279
action = PF_DROP;
10280
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
10281
goto bad;
10282
} else if (m0 == NULL) {
10283
action = PF_DROP;
10284
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
10285
goto done;
10286
}
10287
if (m0->m_len < sizeof(struct ip6_hdr)) {
10288
DPFPRINTF(PF_DEBUG_URGENT,
10289
"%s: m0->m_len < sizeof(struct ip6_hdr)",
10290
__func__);
10291
action = PF_DROP;
10292
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
10293
goto bad;
10294
}
10295
ip6 = mtod(m0, struct ip6_hdr *);
10296
}
10297
}
10298
10299
if (ifp->if_flags & IFF_LOOPBACK)
10300
m0->m_flags |= M_SKIP_FIREWALL;
10301
10302
if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
10303
~ifp->if_hwassist) {
10304
uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6);
10305
in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr));
10306
m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
10307
}
10308
10309
if (pd->dir == PF_IN) {
10310
uint16_t tmp;
10311
/*
10312
* Make sure dummynet gets the correct direction, in case it needs to
10313
* re-inject later.
10314
*/
10315
pd->dir = PF_OUT;
10316
10317
/*
10318
* The following processing is actually the rest of the inbound processing, even
10319
* though we've marked it as outbound (so we don't look through dummynet) and it
10320
* happens after the outbound processing (pf_test(PF_OUT) above).
10321
* Swap the dummynet pipe numbers, because it's going to come to the wrong
10322
* conclusion about what direction it's processing, and we can't fix it or it
10323
* will re-inject incorrectly. Swapping the pipe numbers means that its incorrect
10324
* decision will pick the right pipe, and everything will mostly work as expected.
10325
*/
10326
tmp = pd->act.dnrpipe;
10327
pd->act.dnrpipe = pd->act.dnpipe;
10328
pd->act.dnpipe = tmp;
10329
}
10330
10331
/*
10332
* If the packet is too large for the outgoing interface,
10333
* send back an icmp6 error.
10334
*/
10335
if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
10336
dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
10337
mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL);
10338
if (mtag != NULL) {
10339
int ret __sdt_used;
10340
ret = pf_refragment6(ifp, &m0, mtag, ifp, true);
10341
SDT_PROBE2(pf, ip6, route_to, output, ifp, ret);
10342
goto done;
10343
}
10344
10345
if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
10346
md = m0;
10347
pf_dummynet_route(pd, s, r, ifp, sintosa(&dst), &md);
10348
if (md != NULL) {
10349
int ret __sdt_used;
10350
ret = nd6_output_ifp(ifp, ifp, md, &dst, NULL);
10351
SDT_PROBE2(pf, ip6, route_to, output, ifp, ret);
10352
}
10353
}
10354
else {
10355
in6_ifstat_inc(ifp, ifs6_in_toobig);
10356
if (pd->act.rt != PF_DUPTO) {
10357
if (s && s->nat_rule != NULL) {
10358
MPASS(m0 == pd->m);
10359
PACKET_UNDO_NAT(pd,
10360
((caddr_t)ip6 - m0->m_data) +
10361
sizeof(struct ip6_hdr), s);
10362
}
10363
10364
if (r->rt != PF_DUPTO)
10365
pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0,
10366
ifp->if_mtu, pd->af, r, pd->act.rtableid);
10367
}
10368
/* Return pass, so we return PFIL_CONSUMED to the stack. */
10369
action = PF_PASS;
10370
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
10371
goto bad;
10372
}
10373
10374
done:
10375
if (pd->act.rt != PF_DUPTO)
10376
pd->m = NULL;
10377
else
10378
action = PF_PASS;
10379
return (action);
10380
10381
bad_locked:
10382
if (s)
10383
PF_STATE_UNLOCK(s);
10384
bad:
10385
m_freem(m0);
10386
goto done;
10387
}
10388
#endif /* INET6 */
10389
10390
/*
10391
* FreeBSD supports cksum offloads for the following drivers.
10392
* em(4), fxp(4), lge(4), nge(4), re(4), ti(4), txp(4), xl(4)
10393
*
10394
* CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
10395
* network driver performed cksum including pseudo header, need to verify
10396
* csum_data
10397
* CSUM_DATA_VALID :
10398
* network driver performed cksum, needs to additional pseudo header
10399
* cksum computation with partial csum_data(i.e. lack of H/W support for
10400
* pseudo header, for instance sk(4) and possibly gem(4))
10401
*
10402
* After validating the cksum of packet, set both flag CSUM_DATA_VALID and
10403
* CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
10404
* TCP/UDP layer.
10405
* Also, set csum_data to 0xffff to force cksum validation.
10406
*/
10407
static int
10408
pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
10409
{
10410
u_int16_t sum = 0;
10411
int hw_assist = 0;
10412
struct ip *ip;
10413
10414
if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
10415
return (1);
10416
if (m->m_pkthdr.len < off + len)
10417
return (1);
10418
10419
switch (p) {
10420
case IPPROTO_TCP:
10421
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
10422
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
10423
sum = m->m_pkthdr.csum_data;
10424
} else {
10425
ip = mtod(m, struct ip *);
10426
sum = in_pseudo(ip->ip_src.s_addr,
10427
ip->ip_dst.s_addr, htonl((u_short)len +
10428
m->m_pkthdr.csum_data + IPPROTO_TCP));
10429
}
10430
sum ^= 0xffff;
10431
++hw_assist;
10432
}
10433
break;
10434
case IPPROTO_UDP:
10435
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
10436
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
10437
sum = m->m_pkthdr.csum_data;
10438
} else {
10439
ip = mtod(m, struct ip *);
10440
sum = in_pseudo(ip->ip_src.s_addr,
10441
ip->ip_dst.s_addr, htonl((u_short)len +
10442
m->m_pkthdr.csum_data + IPPROTO_UDP));
10443
}
10444
sum ^= 0xffff;
10445
++hw_assist;
10446
}
10447
break;
10448
case IPPROTO_ICMP:
10449
#ifdef INET6
10450
case IPPROTO_ICMPV6:
10451
#endif /* INET6 */
10452
break;
10453
default:
10454
return (1);
10455
}
10456
10457
if (!hw_assist) {
10458
switch (af) {
10459
case AF_INET:
10460
if (m->m_len < sizeof(struct ip))
10461
return (1);
10462
sum = in4_cksum(m, (p == IPPROTO_ICMP ? 0 : p), off, len);
10463
break;
10464
#ifdef INET6
10465
case AF_INET6:
10466
if (m->m_len < sizeof(struct ip6_hdr))
10467
return (1);
10468
sum = in6_cksum(m, p, off, len);
10469
break;
10470
#endif /* INET6 */
10471
}
10472
}
10473
if (sum) {
10474
switch (p) {
10475
case IPPROTO_TCP:
10476
{
10477
KMOD_TCPSTAT_INC(tcps_rcvbadsum);
10478
break;
10479
}
10480
case IPPROTO_UDP:
10481
{
10482
KMOD_UDPSTAT_INC(udps_badsum);
10483
break;
10484
}
10485
#ifdef INET
10486
case IPPROTO_ICMP:
10487
{
10488
KMOD_ICMPSTAT_INC(icps_checksum);
10489
break;
10490
}
10491
#endif
10492
#ifdef INET6
10493
case IPPROTO_ICMPV6:
10494
{
10495
KMOD_ICMP6STAT_INC(icp6s_checksum);
10496
break;
10497
}
10498
#endif /* INET6 */
10499
}
10500
return (1);
10501
} else {
10502
if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
10503
m->m_pkthdr.csum_flags |=
10504
(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
10505
m->m_pkthdr.csum_data = 0xffff;
10506
}
10507
}
10508
return (0);
10509
}
10510
10511
static bool
10512
pf_pdesc_to_dnflow(const struct pf_pdesc *pd, const struct pf_krule *r,
10513
const struct pf_kstate *s, struct ip_fw_args *dnflow)
10514
{
10515
int dndir = r->direction;
10516
sa_family_t af = pd->naf;
10517
10518
if (s && dndir == PF_INOUT) {
10519
dndir = s->direction;
10520
} else if (dndir == PF_INOUT) {
10521
/* Assume primary direction. Happens when we've set dnpipe in
10522
* the ethernet level code. */
10523
dndir = pd->dir;
10524
}
10525
10526
if (pd->pf_mtag->flags & PF_MTAG_FLAG_DUMMYNETED)
10527
return (false);
10528
10529
memset(dnflow, 0, sizeof(*dnflow));
10530
10531
if (pd->dport != NULL)
10532
dnflow->f_id.dst_port = ntohs(*pd->dport);
10533
if (pd->sport != NULL)
10534
dnflow->f_id.src_port = ntohs(*pd->sport);
10535
10536
if (pd->dir == PF_IN)
10537
dnflow->flags |= IPFW_ARGS_IN;
10538
else
10539
dnflow->flags |= IPFW_ARGS_OUT;
10540
10541
if (pd->dir != dndir && pd->act.dnrpipe) {
10542
dnflow->rule.info = pd->act.dnrpipe;
10543
}
10544
else if (pd->dir == dndir && pd->act.dnpipe) {
10545
dnflow->rule.info = pd->act.dnpipe;
10546
}
10547
else {
10548
return (false);
10549
}
10550
10551
dnflow->rule.info |= IPFW_IS_DUMMYNET;
10552
if (r->free_flags & PFRULE_DN_IS_PIPE || pd->act.flags & PFSTATE_DN_IS_PIPE)
10553
dnflow->rule.info |= IPFW_IS_PIPE;
10554
10555
dnflow->f_id.proto = pd->proto;
10556
dnflow->f_id.extra = dnflow->rule.info;
10557
if (s)
10558
af = s->key[PF_SK_STACK]->af;
10559
10560
switch (af) {
10561
case AF_INET:
10562
dnflow->f_id.addr_type = 4;
10563
if (s) {
10564
dnflow->f_id.src_ip = htonl(
10565
s->key[PF_SK_STACK]->addr[pd->sidx].v4.s_addr);
10566
dnflow->f_id.dst_ip = htonl(
10567
s->key[PF_SK_STACK]->addr[pd->didx].v4.s_addr);
10568
} else {
10569
dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr);
10570
dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr);
10571
}
10572
break;
10573
case AF_INET6:
10574
dnflow->f_id.addr_type = 6;
10575
10576
if (s) {
10577
dnflow->f_id.src_ip6 =
10578
s->key[PF_SK_STACK]->addr[pd->sidx].v6;
10579
dnflow->f_id.dst_ip6 =
10580
s->key[PF_SK_STACK]->addr[pd->didx].v6;
10581
} else {
10582
dnflow->f_id.src_ip6 = pd->src->v6;
10583
dnflow->f_id.dst_ip6 = pd->dst->v6;
10584
}
10585
break;
10586
}
10587
10588
/*
10589
* Separate this out, because while we pass the pre-NAT addresses to
10590
* dummynet we want the post-nat address family in case of nat64.
10591
* Dummynet may call ip_output/ip6_output itself, and we need it to
10592
* call the correct one.
10593
*/
10594
if (pd->naf == AF_INET6)
10595
dnflow->flags |= IPFW_ARGS_IP6;
10596
10597
return (true);
10598
}
10599
10600
int
10601
pf_test_eth(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
10602
struct inpcb *inp)
10603
{
10604
struct pfi_kkif *kif;
10605
struct mbuf *m = *m0;
10606
10607
M_ASSERTPKTHDR(m);
10608
MPASS(ifp->if_vnet == curvnet);
10609
NET_EPOCH_ASSERT();
10610
10611
if (!V_pf_status.running)
10612
return (PF_PASS);
10613
10614
kif = (struct pfi_kkif *)ifp->if_pf_kif;
10615
10616
if (kif == NULL) {
10617
DPFPRINTF(PF_DEBUG_URGENT,
10618
"%s: kif == NULL, if_xname %s", __func__, ifp->if_xname);
10619
return (PF_DROP);
10620
}
10621
if (kif->pfik_flags & PFI_IFLAG_SKIP)
10622
return (PF_PASS);
10623
10624
if (m->m_flags & M_SKIP_FIREWALL)
10625
return (PF_PASS);
10626
10627
if (__predict_false(! M_WRITABLE(*m0))) {
10628
m = *m0 = m_unshare(*m0, M_NOWAIT);
10629
if (*m0 == NULL)
10630
return (PF_DROP);
10631
}
10632
10633
/* Stateless! */
10634
return (pf_test_eth_rule(dir, kif, m0));
10635
}
10636
10637
static __inline void
10638
pf_dummynet_flag_remove(struct mbuf *m, struct pf_mtag *pf_mtag)
10639
{
10640
struct m_tag *mtag;
10641
10642
pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET;
10643
10644
/* dummynet adds this tag, but pf does not need it,
10645
* and keeping it creates unexpected behavior,
10646
* e.g. in case of divert(4) usage right after dummynet. */
10647
mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
10648
if (mtag != NULL)
10649
m_tag_delete(m, mtag);
10650
}
10651
10652
static int
10653
pf_dummynet(struct pf_pdesc *pd, struct pf_kstate *s,
10654
struct pf_krule *r, struct mbuf **m0)
10655
{
10656
return (pf_dummynet_route(pd, s, r, NULL, NULL, m0));
10657
}
10658
10659
static int
10660
pf_dummynet_route(struct pf_pdesc *pd, struct pf_kstate *s,
10661
struct pf_krule *r, struct ifnet *ifp, const struct sockaddr *sa,
10662
struct mbuf **m0)
10663
{
10664
struct ip_fw_args dnflow;
10665
10666
NET_EPOCH_ASSERT();
10667
10668
if (pd->act.dnpipe == 0 && pd->act.dnrpipe == 0)
10669
return (0);
10670
10671
if (ip_dn_io_ptr == NULL) {
10672
m_freem(*m0);
10673
*m0 = NULL;
10674
return (ENOMEM);
10675
}
10676
10677
if (pd->pf_mtag == NULL &&
10678
((pd->pf_mtag = pf_get_mtag(*m0)) == NULL)) {
10679
m_freem(*m0);
10680
*m0 = NULL;
10681
return (ENOMEM);
10682
}
10683
10684
if (ifp != NULL) {
10685
pd->pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO;
10686
10687
pd->pf_mtag->if_index = ifp->if_index;
10688
pd->pf_mtag->if_idxgen = ifp->if_idxgen;
10689
10690
MPASS(sa != NULL);
10691
10692
switch (sa->sa_family) {
10693
case AF_INET:
10694
memcpy(&pd->pf_mtag->dst, sa,
10695
sizeof(struct sockaddr_in));
10696
break;
10697
case AF_INET6:
10698
memcpy(&pd->pf_mtag->dst, sa,
10699
sizeof(struct sockaddr_in6));
10700
break;
10701
}
10702
}
10703
10704
if (s != NULL && s->nat_rule != NULL &&
10705
s->nat_rule->action == PF_RDR &&
10706
(
10707
#ifdef INET
10708
(pd->af == AF_INET && IN_LOOPBACK(ntohl(pd->dst->v4.s_addr))) ||
10709
#endif /* INET */
10710
(pd->af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd->dst->v6)))) {
10711
/*
10712
* If we're redirecting to loopback mark this packet
10713
* as being local. Otherwise it might get dropped
10714
* if dummynet re-injects.
10715
*/
10716
(*m0)->m_pkthdr.rcvif = V_loif;
10717
}
10718
10719
if (pf_pdesc_to_dnflow(pd, r, s, &dnflow)) {
10720
pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNET;
10721
pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNETED;
10722
ip_dn_io_ptr(m0, &dnflow);
10723
if (*m0 != NULL) {
10724
pd->pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO;
10725
pf_dummynet_flag_remove(*m0, pd->pf_mtag);
10726
}
10727
}
10728
10729
return (0);
10730
}
10731
10732
static int
10733
pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end,
10734
u_short *reason)
10735
{
10736
uint8_t type, length, opts[15 * 4 - sizeof(struct ip)];
10737
10738
/* IP header in payload of ICMP packet may be too short */
10739
if (pd->m->m_pkthdr.len < end) {
10740
DPFPRINTF(PF_DEBUG_MISC, "IP option too short");
10741
REASON_SET(reason, PFRES_SHORT);
10742
return (PF_DROP);
10743
}
10744
10745
MPASS(end - off <= sizeof(opts));
10746
m_copydata(pd->m, off, end - off, opts);
10747
end -= off;
10748
off = 0;
10749
10750
while (off < end) {
10751
type = opts[off];
10752
if (type == IPOPT_EOL)
10753
break;
10754
if (type == IPOPT_NOP) {
10755
off++;
10756
continue;
10757
}
10758
if (off + 2 > end) {
10759
DPFPRINTF(PF_DEBUG_MISC, "IP length opt");
10760
REASON_SET(reason, PFRES_IPOPTIONS);
10761
return (PF_DROP);
10762
}
10763
length = opts[off + 1];
10764
if (length < 2) {
10765
DPFPRINTF(PF_DEBUG_MISC, "IP short opt");
10766
REASON_SET(reason, PFRES_IPOPTIONS);
10767
return (PF_DROP);
10768
}
10769
if (off + length > end) {
10770
DPFPRINTF(PF_DEBUG_MISC, "IP long opt");
10771
REASON_SET(reason, PFRES_IPOPTIONS);
10772
return (PF_DROP);
10773
}
10774
switch (type) {
10775
case IPOPT_RA:
10776
pd->badopts |= PF_OPT_ROUTER_ALERT;
10777
break;
10778
default:
10779
pd->badopts |= PF_OPT_OTHER;
10780
break;
10781
}
10782
off += length;
10783
}
10784
10785
return (PF_PASS);
10786
}
10787
10788
static int
10789
pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
10790
{
10791
struct ah ext;
10792
u_int32_t hlen, end;
10793
int hdr_cnt;
10794
10795
hlen = h->ip_hl << 2;
10796
if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) {
10797
REASON_SET(reason, PFRES_SHORT);
10798
return (PF_DROP);
10799
}
10800
if (hlen != sizeof(struct ip)) {
10801
if (pf_walk_option(pd, h, pd->off + sizeof(struct ip),
10802
pd->off + hlen, reason) != PF_PASS)
10803
return (PF_DROP);
10804
/* header options which contain only padding is fishy */
10805
if (pd->badopts == 0)
10806
pd->badopts |= PF_OPT_OTHER;
10807
}
10808
end = pd->off + ntohs(h->ip_len);
10809
pd->off += hlen;
10810
pd->proto = h->ip_p;
10811
/* IGMP packets have router alert options, allow them */
10812
if (pd->proto == IPPROTO_IGMP) {
10813
/*
10814
* According to RFC 1112 ttl must be set to 1 in all IGMP
10815
* packets sent to 224.0.0.1
10816
*/
10817
if ((h->ip_ttl != 1) &&
10818
(h->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP)) {
10819
DPFPRINTF(PF_DEBUG_MISC, "Invalid IGMP");
10820
REASON_SET(reason, PFRES_IPOPTIONS);
10821
return (PF_DROP);
10822
}
10823
pd->badopts &= ~PF_OPT_ROUTER_ALERT;
10824
}
10825
/* stop walking over non initial fragments */
10826
if ((h->ip_off & htons(IP_OFFMASK)) != 0)
10827
return (PF_PASS);
10828
for (hdr_cnt = 0; hdr_cnt < PF_HDR_LIMIT; hdr_cnt++) {
10829
switch (pd->proto) {
10830
case IPPROTO_AH:
10831
/* fragments may be short */
10832
if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 &&
10833
end < pd->off + sizeof(ext))
10834
return (PF_PASS);
10835
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
10836
reason, AF_INET)) {
10837
DPFPRINTF(PF_DEBUG_MISC, "IP short exthdr");
10838
return (PF_DROP);
10839
}
10840
pd->off += (ext.ah_len + 2) * 4;
10841
pd->proto = ext.ah_nxt;
10842
break;
10843
default:
10844
return (PF_PASS);
10845
}
10846
}
10847
DPFPRINTF(PF_DEBUG_MISC, "IPv4 nested authentication header limit");
10848
REASON_SET(reason, PFRES_IPOPTIONS);
10849
return (PF_DROP);
10850
}
10851
10852
#ifdef INET6
10853
static int
10854
pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
10855
u_short *reason)
10856
{
10857
struct ip6_opt opt;
10858
struct ip6_opt_jumbo jumbo;
10859
10860
while (off < end) {
10861
if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type,
10862
sizeof(opt.ip6o_type), reason, AF_INET6)) {
10863
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt type");
10864
return (PF_DROP);
10865
}
10866
if (opt.ip6o_type == IP6OPT_PAD1) {
10867
off++;
10868
continue;
10869
}
10870
if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt),
10871
reason, AF_INET6)) {
10872
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt");
10873
return (PF_DROP);
10874
}
10875
if (off + sizeof(opt) + opt.ip6o_len > end) {
10876
DPFPRINTF(PF_DEBUG_MISC, "IPv6 long opt");
10877
REASON_SET(reason, PFRES_IPOPTIONS);
10878
return (PF_DROP);
10879
}
10880
switch (opt.ip6o_type) {
10881
case IP6OPT_PADN:
10882
break;
10883
case IP6OPT_JUMBO:
10884
pd->badopts |= PF_OPT_JUMBO;
10885
if (pd->jumbolen != 0) {
10886
DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple jumbo");
10887
REASON_SET(reason, PFRES_IPOPTIONS);
10888
return (PF_DROP);
10889
}
10890
if (ntohs(h->ip6_plen) != 0) {
10891
DPFPRINTF(PF_DEBUG_MISC, "IPv6 bad jumbo plen");
10892
REASON_SET(reason, PFRES_IPOPTIONS);
10893
return (PF_DROP);
10894
}
10895
if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo),
10896
reason, AF_INET6)) {
10897
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbo");
10898
return (PF_DROP);
10899
}
10900
memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len,
10901
sizeof(pd->jumbolen));
10902
pd->jumbolen = ntohl(pd->jumbolen);
10903
if (pd->jumbolen < IPV6_MAXPACKET) {
10904
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbolen");
10905
REASON_SET(reason, PFRES_IPOPTIONS);
10906
return (PF_DROP);
10907
}
10908
break;
10909
case IP6OPT_ROUTER_ALERT:
10910
pd->badopts |= PF_OPT_ROUTER_ALERT;
10911
break;
10912
default:
10913
pd->badopts |= PF_OPT_OTHER;
10914
break;
10915
}
10916
off += sizeof(opt) + opt.ip6o_len;
10917
}
10918
10919
return (PF_PASS);
10920
}
10921
10922
int
10923
pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
10924
{
10925
struct ip6_frag frag;
10926
struct ip6_ext ext;
10927
struct icmp6_hdr icmp6;
10928
struct ip6_rthdr rthdr;
10929
uint32_t end;
10930
int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0;
10931
10932
pd->off += sizeof(struct ip6_hdr);
10933
end = pd->off + ntohs(h->ip6_plen);
10934
pd->fragoff = pd->extoff = pd->jumbolen = 0;
10935
pd->proto = h->ip6_nxt;
10936
for (hdr_cnt = 0; hdr_cnt < PF_HDR_LIMIT; hdr_cnt++) {
10937
switch (pd->proto) {
10938
case IPPROTO_ROUTING:
10939
case IPPROTO_DSTOPTS:
10940
pd->badopts |= PF_OPT_OTHER;
10941
break;
10942
case IPPROTO_HOPOPTS:
10943
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
10944
reason, AF_INET6)) {
10945
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr");
10946
return (PF_DROP);
10947
}
10948
if (pf_walk_option6(pd, h, pd->off + sizeof(ext),
10949
pd->off + (ext.ip6e_len + 1) * 8,
10950
reason) != PF_PASS)
10951
return (PF_DROP);
10952
/* option header which contains only padding is fishy */
10953
if (pd->badopts == 0)
10954
pd->badopts |= PF_OPT_OTHER;
10955
break;
10956
}
10957
switch (pd->proto) {
10958
case IPPROTO_FRAGMENT:
10959
if (fraghdr_cnt++) {
10960
DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple fragment");
10961
REASON_SET(reason, PFRES_FRAG);
10962
return (PF_DROP);
10963
}
10964
/* jumbo payload packets cannot be fragmented */
10965
if (pd->jumbolen != 0) {
10966
DPFPRINTF(PF_DEBUG_MISC, "IPv6 fragmented jumbo");
10967
REASON_SET(reason, PFRES_FRAG);
10968
return (PF_DROP);
10969
}
10970
if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag),
10971
reason, AF_INET6)) {
10972
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short fragment");
10973
return (PF_DROP);
10974
}
10975
/* stop walking over non initial fragments */
10976
if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) {
10977
pd->fragoff = pd->off;
10978
return (PF_PASS);
10979
}
10980
/* RFC6946: reassemble only non atomic fragments */
10981
if (frag.ip6f_offlg & IP6F_MORE_FRAG)
10982
pd->fragoff = pd->off;
10983
pd->off += sizeof(frag);
10984
pd->proto = frag.ip6f_nxt;
10985
break;
10986
case IPPROTO_ROUTING:
10987
if (rthdr_cnt++) {
10988
DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple rthdr");
10989
REASON_SET(reason, PFRES_IPOPTIONS);
10990
return (PF_DROP);
10991
}
10992
/* fragments may be short */
10993
if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) {
10994
pd->off = pd->fragoff;
10995
pd->proto = IPPROTO_FRAGMENT;
10996
return (PF_PASS);
10997
}
10998
if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr),
10999
reason, AF_INET6)) {
11000
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short rthdr");
11001
return (PF_DROP);
11002
}
11003
if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
11004
DPFPRINTF(PF_DEBUG_MISC, "IPv6 rthdr0");
11005
REASON_SET(reason, PFRES_IPOPTIONS);
11006
return (PF_DROP);
11007
}
11008
/* FALLTHROUGH */
11009
case IPPROTO_HOPOPTS:
11010
/* RFC2460 4.1: Hop-by-Hop only after IPv6 header */
11011
if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) {
11012
DPFPRINTF(PF_DEBUG_MISC, "IPv6 hopopts not first");
11013
REASON_SET(reason, PFRES_IPOPTIONS);
11014
return (PF_DROP);
11015
}
11016
/* FALLTHROUGH */
11017
case IPPROTO_AH:
11018
case IPPROTO_DSTOPTS:
11019
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
11020
reason, AF_INET6)) {
11021
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr");
11022
return (PF_DROP);
11023
}
11024
/* fragments may be short */
11025
if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) {
11026
pd->off = pd->fragoff;
11027
pd->proto = IPPROTO_FRAGMENT;
11028
return (PF_PASS);
11029
}
11030
/* reassembly needs the ext header before the frag */
11031
if (pd->fragoff == 0)
11032
pd->extoff = pd->off;
11033
if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 &&
11034
ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) {
11035
DPFPRINTF(PF_DEBUG_MISC, "IPv6 missing jumbo");
11036
REASON_SET(reason, PFRES_IPOPTIONS);
11037
return (PF_DROP);
11038
}
11039
if (pd->proto == IPPROTO_AH)
11040
pd->off += (ext.ip6e_len + 2) * 4;
11041
else
11042
pd->off += (ext.ip6e_len + 1) * 8;
11043
pd->proto = ext.ip6e_nxt;
11044
break;
11045
case IPPROTO_ICMPV6:
11046
/* fragments may be short, ignore inner header then */
11047
if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) {
11048
pd->off = pd->fragoff;
11049
pd->proto = IPPROTO_FRAGMENT;
11050
return (PF_PASS);
11051
}
11052
if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6),
11053
reason, AF_INET6)) {
11054
DPFPRINTF(PF_DEBUG_MISC,
11055
"IPv6 short icmp6hdr");
11056
return (PF_DROP);
11057
}
11058
/* ICMP multicast packets have router alert options */
11059
switch (icmp6.icmp6_type) {
11060
case MLD_LISTENER_QUERY:
11061
case MLD_LISTENER_REPORT:
11062
case MLD_LISTENER_DONE:
11063
case MLDV2_LISTENER_REPORT:
11064
/*
11065
* According to RFC 2710 all MLD messages are
11066
* sent with hop-limit (ttl) set to 1, and link
11067
* local source address. If either one is
11068
* missing then MLD message is invalid and
11069
* should be discarded.
11070
*/
11071
if ((h->ip6_hlim != 1) ||
11072
!IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) {
11073
DPFPRINTF(PF_DEBUG_MISC, "Invalid MLD");
11074
REASON_SET(reason, PFRES_IPOPTIONS);
11075
return (PF_DROP);
11076
}
11077
pd->badopts &= ~PF_OPT_ROUTER_ALERT;
11078
break;
11079
}
11080
return (PF_PASS);
11081
case IPPROTO_TCP:
11082
case IPPROTO_UDP:
11083
case IPPROTO_SCTP:
11084
/* fragments may be short, ignore inner header then */
11085
if (pd->fragoff != 0 && end < pd->off +
11086
(pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) :
11087
pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) :
11088
pd->proto == IPPROTO_SCTP ? sizeof(struct sctphdr) :
11089
sizeof(struct icmp6_hdr))) {
11090
pd->off = pd->fragoff;
11091
pd->proto = IPPROTO_FRAGMENT;
11092
}
11093
/* FALLTHROUGH */
11094
default:
11095
return (PF_PASS);
11096
}
11097
}
11098
DPFPRINTF(PF_DEBUG_MISC, "IPv6 nested extension header limit");
11099
REASON_SET(reason, PFRES_IPOPTIONS);
11100
return (PF_DROP);
11101
}
11102
#endif /* INET6 */
11103
11104
static void
11105
pf_init_pdesc(struct pf_pdesc *pd, struct mbuf *m)
11106
{
11107
memset(pd, 0, sizeof(*pd));
11108
pd->pf_mtag = pf_find_mtag(m);
11109
pd->m = m;
11110
}
11111
11112
static int
11113
pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
11114
u_short *action, u_short *reason, struct pfi_kkif *kif,
11115
struct pf_rule_actions *default_actions)
11116
{
11117
pd->dir = dir;
11118
pd->kif = kif;
11119
pd->m = *m0;
11120
pd->sidx = (dir == PF_IN) ? 0 : 1;
11121
pd->didx = (dir == PF_IN) ? 1 : 0;
11122
pd->af = pd->naf = af;
11123
11124
PF_RULES_ASSERT();
11125
11126
TAILQ_INIT(&pd->sctp_multihome_jobs);
11127
if (default_actions != NULL)
11128
memcpy(&pd->act, default_actions, sizeof(pd->act));
11129
11130
if (pd->pf_mtag && pd->pf_mtag->dnpipe) {
11131
pd->act.dnpipe = pd->pf_mtag->dnpipe;
11132
pd->act.flags = pd->pf_mtag->dnflags;
11133
}
11134
11135
switch (af) {
11136
#ifdef INET
11137
case AF_INET: {
11138
struct ip *h;
11139
11140
if (__predict_false((*m0)->m_len < sizeof(struct ip)) &&
11141
(pd->m = *m0 = m_pullup(*m0, sizeof(struct ip))) == NULL) {
11142
DPFPRINTF(PF_DEBUG_URGENT,
11143
"%s: m_len < sizeof(struct ip), pullup failed",
11144
__func__);
11145
*action = PF_DROP;
11146
REASON_SET(reason, PFRES_SHORT);
11147
return (PF_DROP);
11148
}
11149
11150
h = mtod(pd->m, struct ip *);
11151
if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) {
11152
*action = PF_DROP;
11153
REASON_SET(reason, PFRES_SHORT);
11154
return (PF_DROP);
11155
}
11156
11157
if (pf_normalize_ip(reason, pd) != PF_PASS) {
11158
/* We do IP header normalization and packet reassembly here */
11159
*m0 = pd->m;
11160
*action = PF_DROP;
11161
return (PF_DROP);
11162
}
11163
*m0 = pd->m;
11164
h = mtod(pd->m, struct ip *);
11165
11166
if (pf_walk_header(pd, h, reason) != PF_PASS) {
11167
*action = PF_DROP;
11168
return (PF_DROP);
11169
}
11170
11171
pd->src = (struct pf_addr *)&h->ip_src;
11172
pd->dst = (struct pf_addr *)&h->ip_dst;
11173
pf_addrcpy(&pd->osrc, pd->src, af);
11174
pf_addrcpy(&pd->odst, pd->dst, af);
11175
pd->ip_sum = &h->ip_sum;
11176
pd->tos = h->ip_tos & ~IPTOS_ECN_MASK;
11177
pd->ttl = h->ip_ttl;
11178
pd->tot_len = ntohs(h->ip_len);
11179
pd->act.rtableid = -1;
11180
pd->df = h->ip_off & htons(IP_DF);
11181
pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ?
11182
PF_VPROTO_FRAGMENT : pd->proto;
11183
11184
break;
11185
}
11186
#endif /* INET */
11187
#ifdef INET6
11188
case AF_INET6: {
11189
struct ip6_hdr *h;
11190
11191
if (__predict_false((*m0)->m_len < sizeof(struct ip6_hdr)) &&
11192
(pd->m = *m0 = m_pullup(*m0, sizeof(struct ip6_hdr))) == NULL) {
11193
DPFPRINTF(PF_DEBUG_URGENT,
11194
"%s: m_len < sizeof(struct ip6_hdr)"
11195
", pullup failed", __func__);
11196
*action = PF_DROP;
11197
REASON_SET(reason, PFRES_SHORT);
11198
return (PF_DROP);
11199
}
11200
11201
h = mtod(pd->m, struct ip6_hdr *);
11202
if (pd->m->m_pkthdr.len <
11203
sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) {
11204
*action = PF_DROP;
11205
REASON_SET(reason, PFRES_SHORT);
11206
return (PF_DROP);
11207
}
11208
11209
/*
11210
* we do not support jumbogram. if we keep going, zero ip6_plen
11211
* will do something bad, so drop the packet for now.
11212
*/
11213
if (htons(h->ip6_plen) == 0) {
11214
*action = PF_DROP;
11215
return (PF_DROP);
11216
}
11217
11218
if (pf_walk_header6(pd, h, reason) != PF_PASS) {
11219
*action = PF_DROP;
11220
return (PF_DROP);
11221
}
11222
11223
h = mtod(pd->m, struct ip6_hdr *);
11224
pd->src = (struct pf_addr *)&h->ip6_src;
11225
pd->dst = (struct pf_addr *)&h->ip6_dst;
11226
pf_addrcpy(&pd->osrc, pd->src, af);
11227
pf_addrcpy(&pd->odst, pd->dst, af);
11228
pd->ip_sum = NULL;
11229
pd->tos = IPV6_DSCP(h);
11230
pd->ttl = h->ip6_hlim;
11231
pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
11232
pd->act.rtableid = -1;
11233
11234
pd->virtual_proto = (pd->fragoff != 0) ?
11235
PF_VPROTO_FRAGMENT : pd->proto;
11236
11237
/* We do IP header normalization and packet reassembly here */
11238
if (pf_normalize_ip6(pd->fragoff, reason, pd) !=
11239
PF_PASS) {
11240
*m0 = pd->m;
11241
*action = PF_DROP;
11242
return (PF_DROP);
11243
}
11244
*m0 = pd->m;
11245
if (pd->m == NULL) {
11246
/* packet sits in reassembly queue, no error */
11247
*action = PF_PASS;
11248
return (PF_DROP);
11249
}
11250
11251
/* Update pointers into the packet. */
11252
h = mtod(pd->m, struct ip6_hdr *);
11253
pd->src = (struct pf_addr *)&h->ip6_src;
11254
pd->dst = (struct pf_addr *)&h->ip6_dst;
11255
11256
pd->off = 0;
11257
11258
if (pf_walk_header6(pd, h, reason) != PF_PASS) {
11259
*action = PF_DROP;
11260
return (PF_DROP);
11261
}
11262
11263
if (m_tag_find(pd->m, PACKET_TAG_PF_REASSEMBLED, NULL) != NULL) {
11264
/*
11265
* Reassembly may have changed the next protocol from
11266
* fragment to something else, so update.
11267
*/
11268
pd->virtual_proto = pd->proto;
11269
MPASS(pd->fragoff == 0);
11270
}
11271
11272
if (pd->fragoff != 0)
11273
pd->virtual_proto = PF_VPROTO_FRAGMENT;
11274
11275
break;
11276
}
11277
#endif /* INET6 */
11278
default:
11279
panic("pf_setup_pdesc called with illegal af %u", af);
11280
}
11281
11282
switch (pd->virtual_proto) {
11283
case IPPROTO_TCP: {
11284
struct tcphdr *th = &pd->hdr.tcp;
11285
11286
if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th),
11287
reason, af)) {
11288
*action = PF_DROP;
11289
REASON_SET(reason, PFRES_SHORT);
11290
return (PF_DROP);
11291
}
11292
pd->hdrlen = sizeof(*th);
11293
pd->p_len = pd->tot_len - pd->off - (th->th_off << 2);
11294
pd->sport = &th->th_sport;
11295
pd->dport = &th->th_dport;
11296
pd->pcksum = &th->th_sum;
11297
break;
11298
}
11299
case IPPROTO_UDP: {
11300
struct udphdr *uh = &pd->hdr.udp;
11301
11302
if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh),
11303
reason, af)) {
11304
*action = PF_DROP;
11305
REASON_SET(reason, PFRES_SHORT);
11306
return (PF_DROP);
11307
}
11308
pd->hdrlen = sizeof(*uh);
11309
if (uh->uh_dport == 0 ||
11310
ntohs(uh->uh_ulen) > pd->m->m_pkthdr.len - pd->off ||
11311
ntohs(uh->uh_ulen) < sizeof(struct udphdr)) {
11312
*action = PF_DROP;
11313
REASON_SET(reason, PFRES_SHORT);
11314
return (PF_DROP);
11315
}
11316
pd->sport = &uh->uh_sport;
11317
pd->dport = &uh->uh_dport;
11318
pd->pcksum = &uh->uh_sum;
11319
break;
11320
}
11321
case IPPROTO_SCTP: {
11322
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.sctp, sizeof(pd->hdr.sctp),
11323
reason, af)) {
11324
*action = PF_DROP;
11325
REASON_SET(reason, PFRES_SHORT);
11326
return (PF_DROP);
11327
}
11328
pd->hdrlen = sizeof(pd->hdr.sctp);
11329
pd->p_len = pd->tot_len - pd->off;
11330
11331
pd->sport = &pd->hdr.sctp.src_port;
11332
pd->dport = &pd->hdr.sctp.dest_port;
11333
if (pd->hdr.sctp.src_port == 0 || pd->hdr.sctp.dest_port == 0) {
11334
*action = PF_DROP;
11335
REASON_SET(reason, PFRES_SHORT);
11336
return (PF_DROP);
11337
}
11338
11339
/*
11340
* Placeholder. The SCTP checksum is 32-bits, but
11341
* pf_test_state() expects to update a 16-bit checksum.
11342
* Provide a dummy value which we'll subsequently ignore.
11343
* Do this before pf_scan_sctp() so any jobs we enqueue
11344
* have a pcksum set.
11345
*/
11346
pd->pcksum = &pd->sctp_dummy_sum;
11347
11348
if (pf_scan_sctp(pd) != PF_PASS) {
11349
*action = PF_DROP;
11350
REASON_SET(reason, PFRES_SHORT);
11351
return (PF_DROP);
11352
}
11353
break;
11354
}
11355
case IPPROTO_ICMP: {
11356
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN,
11357
reason, af)) {
11358
*action = PF_DROP;
11359
REASON_SET(reason, PFRES_SHORT);
11360
return (PF_DROP);
11361
}
11362
pd->pcksum = &pd->hdr.icmp.icmp_cksum;
11363
pd->hdrlen = ICMP_MINLEN;
11364
break;
11365
}
11366
#ifdef INET6
11367
case IPPROTO_ICMPV6: {
11368
size_t icmp_hlen = sizeof(struct icmp6_hdr);
11369
11370
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
11371
reason, af)) {
11372
*action = PF_DROP;
11373
REASON_SET(reason, PFRES_SHORT);
11374
return (PF_DROP);
11375
}
11376
/* ICMP headers we look further into to match state */
11377
switch (pd->hdr.icmp6.icmp6_type) {
11378
case MLD_LISTENER_QUERY:
11379
case MLD_LISTENER_REPORT:
11380
icmp_hlen = sizeof(struct mld_hdr);
11381
break;
11382
case ND_NEIGHBOR_SOLICIT:
11383
case ND_NEIGHBOR_ADVERT:
11384
icmp_hlen = sizeof(struct nd_neighbor_solicit);
11385
/* FALLTHROUGH */
11386
case ND_ROUTER_SOLICIT:
11387
case ND_ROUTER_ADVERT:
11388
case ND_REDIRECT:
11389
if (pd->ttl != 255) {
11390
REASON_SET(reason, PFRES_NORM);
11391
return (PF_DROP);
11392
}
11393
break;
11394
}
11395
if (icmp_hlen > sizeof(struct icmp6_hdr) &&
11396
!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
11397
reason, af)) {
11398
*action = PF_DROP;
11399
REASON_SET(reason, PFRES_SHORT);
11400
return (PF_DROP);
11401
}
11402
pd->hdrlen = icmp_hlen;
11403
pd->pcksum = &pd->hdr.icmp6.icmp6_cksum;
11404
break;
11405
}
11406
#endif /* INET6 */
11407
default:
11408
/*
11409
* Placeholder value, so future calls to pf_change_ap() don't
11410
* try to update a NULL checksum pointer.
11411
*/
11412
pd->pcksum = &pd->sctp_dummy_sum;
11413
break;
11414
}
11415
11416
if (pd->sport)
11417
pd->osport = pd->nsport = *pd->sport;
11418
if (pd->dport)
11419
pd->odport = pd->ndport = *pd->dport;
11420
11421
MPASS(pd->pcksum != NULL);
11422
11423
return (PF_PASS);
11424
}
11425
11426
static __inline void
11427
pf_rule_counters_inc(struct pf_pdesc *pd, struct pf_krule *r, int dir_out,
11428
int op_pass, sa_family_t af, struct pf_addr *src_host,
11429
struct pf_addr *dst_host)
11430
{
11431
pf_counter_u64_add_protected(&(r->packets[dir_out]), 1);
11432
pf_counter_u64_add_protected(&(r->bytes[dir_out]), pd->tot_len);
11433
pf_update_timestamp(r);
11434
11435
if (r->src.addr.type == PF_ADDR_TABLE)
11436
pfr_update_stats(r->src.addr.p.tbl, src_host, af,
11437
pd->tot_len, dir_out, op_pass, r->src.neg);
11438
if (r->dst.addr.type == PF_ADDR_TABLE)
11439
pfr_update_stats(r->dst.addr.p.tbl, dst_host, af,
11440
pd->tot_len, dir_out, op_pass, r->dst.neg);
11441
}
11442
11443
static void
11444
pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_kstate *s,
11445
struct pf_krule *r, struct pf_krule *a, struct pf_krule_slist *match_rules)
11446
{
11447
struct pf_krule_slist *mr = match_rules;
11448
struct pf_krule_item *ri;
11449
struct pf_krule *nr = NULL;
11450
struct pf_addr *src_host = pd->src;
11451
struct pf_addr *dst_host = pd->dst;
11452
struct pf_state_key *key;
11453
int dir_out = (pd->dir == PF_OUT);
11454
int op_r_pass = (r->action == PF_PASS);
11455
int op_pass = (action == PF_PASS || action == PF_AFRT);
11456
int s_dir_in, s_dir_out, s_dir_rev;
11457
sa_family_t af = pd->af;
11458
11459
pf_counter_u64_critical_enter();
11460
11461
/*
11462
* Set AF for interface counters, it will be later overwritten for
11463
* rule and state counters with value from proper state key.
11464
*/
11465
if (action == PF_AFRT) {
11466
MPASS(s != NULL);
11467
if (s->direction == PF_OUT && dir_out)
11468
af = pd->naf;
11469
}
11470
11471
pf_counter_u64_add_protected(
11472
&pd->kif->pfik_bytes[af == AF_INET6][dir_out][!op_pass],
11473
pd->tot_len);
11474
pf_counter_u64_add_protected(
11475
&pd->kif->pfik_packets[af == AF_INET6][dir_out][!op_pass],
11476
1);
11477
11478
/* If the rule has failed to apply, don't increase its counters */
11479
if (!(op_pass || r->action == PF_DROP)) {
11480
pf_counter_u64_critical_exit();
11481
return;
11482
}
11483
11484
if (s != NULL) {
11485
PF_STATE_LOCK_ASSERT(s);
11486
mr = &(s->match_rules);
11487
11488
/*
11489
* For af-to on the inbound direction we can determine
11490
* the direction of passing packet only by checking direction
11491
* of AF translation. The af-to in "in" direction covers both
11492
* the inbound and the outbound side of state tracking,
11493
* so pd->dir is always PF_IN. We set dir_out and s_dir_rev
11494
* in a way to count packets as if the state was outbound,
11495
* because pfctl -ss shows the state with "->", as if it was
11496
* oubound.
11497
*/
11498
if (action == PF_AFRT && s->direction == PF_IN) {
11499
dir_out = (pd->naf == s->rule->naf);
11500
s_dir_in = 1;
11501
s_dir_out = 0;
11502
s_dir_rev = (pd->naf == s->rule->af);
11503
} else {
11504
dir_out = (pd->dir == PF_OUT);
11505
s_dir_in = (s->direction == PF_IN);
11506
s_dir_out = (s->direction == PF_OUT);
11507
s_dir_rev = (pd->dir != s->direction);
11508
}
11509
11510
/* pd->tot_len is a problematic with af-to rules. Sure, we can
11511
* agree that it's the post-af-to packet length that was
11512
* forwarded through a state, but what about tables which match
11513
* on pre-af-to addresses? We don't have access the the original
11514
* packet length anymore.
11515
*/
11516
s->packets[s_dir_rev]++;
11517
s->bytes[s_dir_rev] += pd->tot_len;
11518
11519
/*
11520
* Source nodes are accessed unlocked here. But since we are
11521
* operating with stateful tracking and the state is locked,
11522
* those SNs could not have been freed.
11523
*/
11524
for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) {
11525
if (s->sns[sn_type] != NULL) {
11526
counter_u64_add(
11527
s->sns[sn_type]->packets[dir_out],
11528
1);
11529
counter_u64_add(
11530
s->sns[sn_type]->bytes[dir_out],
11531
pd->tot_len);
11532
}
11533
}
11534
11535
/* Start with pre-NAT addresses */
11536
key = s->key[(s->direction == PF_OUT)];
11537
src_host = &(key->addr[s_dir_out]);
11538
dst_host = &(key->addr[s_dir_in]);
11539
af = key->af;
11540
if (s->nat_rule) {
11541
/* Old-style NAT rules */
11542
if (s->nat_rule->action == PF_NAT ||
11543
s->nat_rule->action == PF_RDR ||
11544
s->nat_rule->action == PF_BINAT) {
11545
nr = s->nat_rule;
11546
pf_rule_counters_inc(pd, s->nat_rule, dir_out,
11547
op_r_pass, af, src_host, dst_host);
11548
/* Use post-NAT addresses from now on */
11549
key = s->key[s_dir_in];
11550
src_host = &(key->addr[s_dir_out]);
11551
dst_host = &(key->addr[s_dir_in]);
11552
af = key->af;
11553
}
11554
}
11555
}
11556
11557
SLIST_FOREACH(ri, mr, entry) {
11558
pf_rule_counters_inc(pd, ri->r, dir_out, op_r_pass, af,
11559
src_host, dst_host);
11560
if (s && s->nat_rule == ri->r) {
11561
/* Use post-NAT addresses after a match NAT rule */
11562
key = s->key[s_dir_in];
11563
src_host = &(key->addr[s_dir_out]);
11564
dst_host = &(key->addr[s_dir_in]);
11565
af = key->af;
11566
}
11567
}
11568
11569
if (s == NULL) {
11570
pf_free_match_rules(mr);
11571
}
11572
11573
if (a != NULL) {
11574
pf_rule_counters_inc(pd, a, dir_out, op_r_pass, af,
11575
src_host, dst_host);
11576
}
11577
11578
if (r != nr) {
11579
pf_rule_counters_inc(pd, r, dir_out, op_r_pass, af,
11580
src_host, dst_host);
11581
}
11582
11583
pf_counter_u64_critical_exit();
11584
}
11585
11586
static void
11587
pf_log_matches(struct pf_pdesc *pd, struct pf_krule *rm,
11588
struct pf_krule *am, struct pf_kruleset *ruleset,
11589
struct pf_krule_slist *match_rules)
11590
{
11591
struct pf_krule_item *ri;
11592
11593
/* if this is the log(matches) rule, packet has been logged already */
11594
if (rm->log & PF_LOG_MATCHES)
11595
return;
11596
11597
SLIST_FOREACH(ri, match_rules, entry)
11598
if (ri->r->log & PF_LOG_MATCHES)
11599
PFLOG_PACKET(rm->action, PFRES_MATCH, rm, am,
11600
ruleset, pd, 1, ri->r);
11601
}
11602
11603
#if defined(INET) || defined(INET6)
11604
int
11605
pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
11606
struct inpcb *inp, struct pf_rule_actions *default_actions)
11607
{
11608
struct pfi_kkif *kif;
11609
u_short action, reason = 0;
11610
struct m_tag *mtag;
11611
struct pf_krule *a = NULL, *r = &V_pf_default_rule;
11612
struct pf_kstate *s = NULL;
11613
struct pf_kruleset *ruleset = NULL;
11614
struct pf_krule_item *ri;
11615
struct pf_krule_slist match_rules;
11616
struct pf_pdesc pd;
11617
int use_2nd_queue = 0;
11618
uint16_t tag;
11619
11620
PF_RULES_RLOCK_TRACKER;
11621
KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir));
11622
M_ASSERTPKTHDR(*m0);
11623
NET_EPOCH_ASSERT();
11624
11625
if (!V_pf_status.running)
11626
return (PF_PASS);
11627
11628
kif = (struct pfi_kkif *)ifp->if_pf_kif;
11629
11630
if (__predict_false(kif == NULL)) {
11631
DPFPRINTF(PF_DEBUG_URGENT,
11632
"%s: kif == NULL, if_xname %s",
11633
__func__, ifp->if_xname);
11634
return (PF_DROP);
11635
}
11636
if (kif->pfik_flags & PFI_IFLAG_SKIP) {
11637
return (PF_PASS);
11638
}
11639
11640
if ((*m0)->m_flags & M_SKIP_FIREWALL) {
11641
return (PF_PASS);
11642
}
11643
11644
if (__predict_false(! M_WRITABLE(*m0))) {
11645
*m0 = m_unshare(*m0, M_NOWAIT);
11646
if (*m0 == NULL) {
11647
return (PF_DROP);
11648
}
11649
}
11650
11651
pf_init_pdesc(&pd, *m0);
11652
SLIST_INIT(&match_rules);
11653
11654
if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) {
11655
pd.pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO;
11656
11657
ifp = ifnet_byindexgen(pd.pf_mtag->if_index,
11658
pd.pf_mtag->if_idxgen);
11659
if (ifp == NULL || ifp->if_flags & IFF_DYING) {
11660
m_freem(*m0);
11661
*m0 = NULL;
11662
return (PF_PASS);
11663
}
11664
(ifp->if_output)(ifp, *m0, sintosa(&pd.pf_mtag->dst), NULL);
11665
*m0 = NULL;
11666
return (PF_PASS);
11667
}
11668
11669
if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL &&
11670
pd.pf_mtag->flags & PF_MTAG_FLAG_DUMMYNET) {
11671
/* Dummynet re-injects packets after they've
11672
* completed their delay. We've already
11673
* processed them, so pass unconditionally. */
11674
11675
/* But only once. We may see the packet multiple times (e.g.
11676
* PFIL_IN/PFIL_OUT). */
11677
pf_dummynet_flag_remove(pd.m, pd.pf_mtag);
11678
11679
return (PF_PASS);
11680
}
11681
11682
PF_RULES_RLOCK();
11683
11684
if (pf_setup_pdesc(af, dir, &pd, m0, &action, &reason,
11685
kif, default_actions) != PF_PASS) {
11686
if (action != PF_PASS)
11687
pd.act.log |= PF_LOG_FORCE;
11688
goto done;
11689
}
11690
11691
#ifdef INET
11692
if (af == AF_INET && dir == PF_OUT && pflags & PFIL_FWD &&
11693
pd.df && (*m0)->m_pkthdr.len > ifp->if_mtu) {
11694
PF_RULES_RUNLOCK();
11695
icmp_error(*m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
11696
0, ifp->if_mtu);
11697
*m0 = NULL;
11698
return (PF_DROP);
11699
}
11700
#endif /* INET */
11701
#ifdef INET6
11702
/*
11703
* If we end up changing IP addresses (e.g. binat) the stack may get
11704
* confused and fail to send the icmp6 packet too big error. Just send
11705
* it here, before we do any NAT.
11706
*/
11707
if (af == AF_INET6 && dir == PF_OUT && pflags & PFIL_FWD &&
11708
in6_ifmtu(ifp) < pf_max_frag_size(*m0)) {
11709
PF_RULES_RUNLOCK();
11710
icmp6_error(*m0, ICMP6_PACKET_TOO_BIG, 0, in6_ifmtu(ifp));
11711
*m0 = NULL;
11712
return (PF_DROP);
11713
}
11714
#endif /* INET6 */
11715
11716
if (__predict_false(ip_divert_ptr != NULL) &&
11717
((mtag = m_tag_locate(pd.m, MTAG_PF_DIVERT, 0, NULL)) != NULL)) {
11718
struct pf_divert_mtag *dt = (struct pf_divert_mtag *)(mtag+1);
11719
if ((dt->idir == PF_DIVERT_MTAG_DIR_IN && dir == PF_IN) ||
11720
(dt->idir == PF_DIVERT_MTAG_DIR_OUT && dir == PF_OUT)) {
11721
if (pd.pf_mtag == NULL &&
11722
((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) {
11723
action = PF_DROP;
11724
goto done;
11725
}
11726
pd.pf_mtag->flags |= PF_MTAG_FLAG_PACKET_LOOPED;
11727
}
11728
if (pd.pf_mtag && pd.pf_mtag->flags & PF_MTAG_FLAG_FASTFWD_OURS_PRESENT) {
11729
pd.m->m_flags |= M_FASTFWD_OURS;
11730
pd.pf_mtag->flags &= ~PF_MTAG_FLAG_FASTFWD_OURS_PRESENT;
11731
}
11732
m_tag_delete(pd.m, mtag);
11733
11734
mtag = m_tag_locate(pd.m, MTAG_IPFW_RULE, 0, NULL);
11735
if (mtag != NULL)
11736
m_tag_delete(pd.m, mtag);
11737
}
11738
11739
switch (pd.virtual_proto) {
11740
case PF_VPROTO_FRAGMENT:
11741
/*
11742
* handle fragments that aren't reassembled by
11743
* normalization
11744
*/
11745
if (kif == NULL || r == NULL) /* pflog */
11746
action = PF_DROP;
11747
else
11748
action = pf_test_rule(&r, &s, &pd, &a,
11749
&ruleset, &reason, inp, &match_rules);
11750
if (action != PF_PASS)
11751
REASON_SET(&reason, PFRES_FRAG);
11752
break;
11753
11754
case IPPROTO_TCP: {
11755
/* Respond to SYN with a syncookie. */
11756
if ((tcp_get_flags(&pd.hdr.tcp) & (TH_SYN|TH_ACK|TH_RST)) == TH_SYN &&
11757
pd.dir == PF_IN && pf_synflood_check(&pd)) {
11758
pf_syncookie_send(&pd, &reason);
11759
action = PF_DROP;
11760
break;
11761
}
11762
11763
if ((tcp_get_flags(&pd.hdr.tcp) & TH_ACK) && pd.p_len == 0)
11764
use_2nd_queue = 1;
11765
action = pf_normalize_tcp(&pd);
11766
if (action == PF_DROP)
11767
break;
11768
action = pf_test_state(&s, &pd, &reason);
11769
if (action == PF_PASS || action == PF_AFRT) {
11770
if (s != NULL) {
11771
if (V_pfsync_update_state_ptr != NULL)
11772
V_pfsync_update_state_ptr(s);
11773
r = s->rule;
11774
a = s->anchor;
11775
}
11776
} else if (s == NULL) {
11777
/* Validate remote SYN|ACK, re-create original SYN if
11778
* valid. */
11779
if ((tcp_get_flags(&pd.hdr.tcp) & (TH_SYN|TH_ACK|TH_RST)) ==
11780
TH_ACK && pf_syncookie_validate(&pd) &&
11781
pd.dir == PF_IN) {
11782
struct mbuf *msyn;
11783
11784
msyn = pf_syncookie_recreate_syn(&pd, &reason);
11785
if (msyn == NULL) {
11786
action = PF_DROP;
11787
break;
11788
}
11789
11790
action = pf_test(af, dir, pflags, ifp, &msyn, inp,
11791
&pd.act);
11792
m_freem(msyn);
11793
if (action != PF_PASS)
11794
break;
11795
11796
action = pf_test_state(&s, &pd, &reason);
11797
if (action != PF_PASS || s == NULL) {
11798
action = PF_DROP;
11799
break;
11800
}
11801
11802
s->src.seqhi = ntohl(pd.hdr.tcp.th_ack) - 1;
11803
s->src.seqlo = ntohl(pd.hdr.tcp.th_seq) - 1;
11804
pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_DST);
11805
action = pf_synproxy(&pd, s, &reason);
11806
break;
11807
} else {
11808
action = pf_test_rule(&r, &s, &pd,
11809
&a, &ruleset, &reason, inp, &match_rules);
11810
}
11811
}
11812
break;
11813
}
11814
11815
case IPPROTO_SCTP:
11816
action = pf_normalize_sctp(&pd);
11817
if (action == PF_DROP)
11818
break;
11819
/* fallthrough */
11820
case IPPROTO_UDP:
11821
default:
11822
action = pf_test_state(&s, &pd, &reason);
11823
if (action == PF_PASS || action == PF_AFRT) {
11824
if (s != NULL) {
11825
if (V_pfsync_update_state_ptr != NULL)
11826
V_pfsync_update_state_ptr(s);
11827
r = s->rule;
11828
a = s->anchor;
11829
}
11830
} else if (s == NULL) {
11831
action = pf_test_rule(&r, &s,
11832
&pd, &a, &ruleset, &reason, inp, &match_rules);
11833
}
11834
break;
11835
11836
case IPPROTO_ICMP:
11837
case IPPROTO_ICMPV6: {
11838
if (pd.virtual_proto == IPPROTO_ICMP && af != AF_INET) {
11839
action = PF_DROP;
11840
REASON_SET(&reason, PFRES_NORM);
11841
DPFPRINTF(PF_DEBUG_MISC,
11842
"dropping IPv6 packet with ICMPv4 payload");
11843
break;
11844
}
11845
if (pd.virtual_proto == IPPROTO_ICMPV6 && af != AF_INET6) {
11846
action = PF_DROP;
11847
REASON_SET(&reason, PFRES_NORM);
11848
DPFPRINTF(PF_DEBUG_MISC,
11849
"pf: dropping IPv4 packet with ICMPv6 payload");
11850
break;
11851
}
11852
action = pf_test_state_icmp(&s, &pd, &reason);
11853
if (action == PF_PASS || action == PF_AFRT) {
11854
if (s != NULL) {
11855
if (V_pfsync_update_state_ptr != NULL)
11856
V_pfsync_update_state_ptr(s);
11857
r = s->rule;
11858
a = s->anchor;
11859
}
11860
} else if (s == NULL)
11861
action = pf_test_rule(&r, &s, &pd,
11862
&a, &ruleset, &reason, inp, &match_rules);
11863
break;
11864
}
11865
11866
}
11867
11868
done:
11869
PF_RULES_RUNLOCK();
11870
11871
/* if packet sits in reassembly queue, return without error */
11872
if (pd.m == NULL) {
11873
pf_free_match_rules(&match_rules);
11874
goto eat_pkt;
11875
}
11876
11877
if (s)
11878
memcpy(&pd.act, &s->act, sizeof(s->act));
11879
11880
if (action == PF_PASS && pd.badopts != 0 && !pd.act.allow_opts) {
11881
action = PF_DROP;
11882
REASON_SET(&reason, PFRES_IPOPTIONS);
11883
pd.act.log = PF_LOG_FORCE;
11884
DPFPRINTF(PF_DEBUG_MISC,
11885
"pf: dropping packet with dangerous headers");
11886
}
11887
11888
if (pd.act.max_pkt_size && pd.act.max_pkt_size &&
11889
pd.tot_len > pd.act.max_pkt_size) {
11890
action = PF_DROP;
11891
REASON_SET(&reason, PFRES_NORM);
11892
pd.act.log = PF_LOG_FORCE;
11893
DPFPRINTF(PF_DEBUG_MISC,
11894
"pf: dropping overly long packet");
11895
}
11896
11897
if (s) {
11898
uint8_t log = pd.act.log;
11899
memcpy(&pd.act, &s->act, sizeof(struct pf_rule_actions));
11900
pd.act.log |= log;
11901
tag = s->tag;
11902
} else {
11903
tag = r->tag;
11904
}
11905
11906
if (tag > 0 && pf_tag_packet(&pd, tag)) {
11907
action = PF_DROP;
11908
REASON_SET(&reason, PFRES_MEMORY);
11909
}
11910
11911
pf_scrub(&pd);
11912
if (pd.proto == IPPROTO_TCP && pd.act.max_mss)
11913
pf_normalize_mss(&pd);
11914
11915
if (pd.act.rtableid >= 0)
11916
M_SETFIB(pd.m, pd.act.rtableid);
11917
11918
if (pd.act.flags & PFSTATE_SETPRIO) {
11919
if (pd.tos & IPTOS_LOWDELAY)
11920
use_2nd_queue = 1;
11921
if (vlan_set_pcp(pd.m, pd.act.set_prio[use_2nd_queue])) {
11922
action = PF_DROP;
11923
REASON_SET(&reason, PFRES_MEMORY);
11924
pd.act.log = PF_LOG_FORCE;
11925
DPFPRINTF(PF_DEBUG_MISC,
11926
"pf: failed to allocate 802.1q mtag");
11927
}
11928
}
11929
11930
#ifdef ALTQ
11931
if (action == PF_PASS && pd.act.qid) {
11932
if (pd.pf_mtag == NULL &&
11933
((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) {
11934
action = PF_DROP;
11935
REASON_SET(&reason, PFRES_MEMORY);
11936
} else {
11937
if (s != NULL)
11938
pd.pf_mtag->qid_hash = pf_state_hash(s);
11939
if (use_2nd_queue || (pd.tos & IPTOS_LOWDELAY))
11940
pd.pf_mtag->qid = pd.act.pqid;
11941
else
11942
pd.pf_mtag->qid = pd.act.qid;
11943
/* Add hints for ecn. */
11944
pd.pf_mtag->hdr = mtod(pd.m, void *);
11945
}
11946
}
11947
#endif /* ALTQ */
11948
11949
/*
11950
* connections redirected to loopback should not match sockets
11951
* bound specifically to loopback due to security implications,
11952
* see tcp_input() and in_pcblookup_listen().
11953
*/
11954
if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
11955
pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule != NULL &&
11956
(s->nat_rule->action == PF_RDR ||
11957
s->nat_rule->action == PF_BINAT) &&
11958
pf_is_loopback(af, pd.dst))
11959
pd.m->m_flags |= M_SKIP_FIREWALL;
11960
11961
if (action == PF_PASS && r->divert.port && !PACKET_LOOPED(&pd)) {
11962
mtag = m_tag_alloc(MTAG_PF_DIVERT, 0,
11963
sizeof(struct pf_divert_mtag), M_NOWAIT | M_ZERO);
11964
if (__predict_true(mtag != NULL && ip_divert_ptr != NULL)) {
11965
((struct pf_divert_mtag *)(mtag+1))->port =
11966
ntohs(r->divert.port);
11967
((struct pf_divert_mtag *)(mtag+1))->idir =
11968
(dir == PF_IN) ? PF_DIVERT_MTAG_DIR_IN :
11969
PF_DIVERT_MTAG_DIR_OUT;
11970
11971
pf_counters_inc(action, &pd, s, r, a, &match_rules);
11972
11973
if (s)
11974
PF_STATE_UNLOCK(s);
11975
11976
m_tag_prepend(pd.m, mtag);
11977
if (pd.m->m_flags & M_FASTFWD_OURS) {
11978
if (pd.pf_mtag == NULL &&
11979
((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) {
11980
action = PF_DROP;
11981
REASON_SET(&reason, PFRES_MEMORY);
11982
pd.act.log = PF_LOG_FORCE;
11983
DPFPRINTF(PF_DEBUG_MISC,
11984
"pf: failed to allocate tag");
11985
} else {
11986
pd.pf_mtag->flags |=
11987
PF_MTAG_FLAG_FASTFWD_OURS_PRESENT;
11988
pd.m->m_flags &= ~M_FASTFWD_OURS;
11989
}
11990
}
11991
ip_divert_ptr(*m0, dir == PF_IN);
11992
*m0 = NULL;
11993
return (action);
11994
} else if (mtag == NULL) {
11995
/* XXX: ipfw has the same behaviour! */
11996
action = PF_DROP;
11997
REASON_SET(&reason, PFRES_MEMORY);
11998
pd.act.log = PF_LOG_FORCE;
11999
DPFPRINTF(PF_DEBUG_MISC,
12000
"pf: failed to allocate divert tag");
12001
} else {
12002
action = PF_DROP;
12003
REASON_SET(&reason, PFRES_MATCH);
12004
pd.act.log = PF_LOG_FORCE;
12005
DPFPRINTF(PF_DEBUG_MISC,
12006
"pf: divert(4) is not loaded");
12007
}
12008
}
12009
12010
/* this flag will need revising if the pkt is forwarded */
12011
if (pd.pf_mtag)
12012
pd.pf_mtag->flags &= ~PF_MTAG_FLAG_PACKET_LOOPED;
12013
12014
if (pd.act.log) {
12015
struct pf_krule *lr;
12016
12017
if (s != NULL && s->nat_rule != NULL &&
12018
s->nat_rule->log & PF_LOG_ALL)
12019
lr = s->nat_rule;
12020
else
12021
lr = r;
12022
12023
if (pd.act.log & PF_LOG_FORCE || lr->log & PF_LOG_ALL)
12024
PFLOG_PACKET(action, reason, lr, a,
12025
ruleset, &pd, (s == NULL), NULL);
12026
if (s) {
12027
SLIST_FOREACH(ri, &s->match_rules, entry)
12028
if (ri->r->log & PF_LOG_ALL)
12029
PFLOG_PACKET(action,
12030
reason, ri->r, a, ruleset, &pd, 0, NULL);
12031
}
12032
}
12033
12034
pf_counters_inc(action, &pd, s, r, a, &match_rules);
12035
12036
switch (action) {
12037
case PF_SYNPROXY_DROP:
12038
m_freem(*m0);
12039
case PF_DEFER:
12040
*m0 = NULL;
12041
action = PF_PASS;
12042
break;
12043
case PF_DROP:
12044
m_freem(*m0);
12045
*m0 = NULL;
12046
break;
12047
case PF_AFRT:
12048
if (pf_translate_af(&pd, r)) {
12049
*m0 = pd.m;
12050
action = PF_DROP;
12051
break;
12052
}
12053
#ifdef INET
12054
if (pd.naf == AF_INET) {
12055
action = pf_route(r, kif->pfik_ifp, s, &pd,
12056
inp);
12057
}
12058
#endif /* INET */
12059
#ifdef INET6
12060
if (pd.naf == AF_INET6) {
12061
action = pf_route6(r, kif->pfik_ifp, s, &pd,
12062
inp);
12063
}
12064
#endif /* INET6 */
12065
*m0 = pd.m;
12066
goto out;
12067
break;
12068
default:
12069
if (pd.act.rt) {
12070
switch (af) {
12071
#ifdef INET
12072
case AF_INET:
12073
/* pf_route() returns unlocked. */
12074
action = pf_route(r, kif->pfik_ifp, s, &pd,
12075
inp);
12076
break;
12077
#endif /* INET */
12078
#ifdef INET6
12079
case AF_INET6:
12080
/* pf_route6() returns unlocked. */
12081
action = pf_route6(r, kif->pfik_ifp, s, &pd,
12082
inp);
12083
break;
12084
#endif /* INET6 */
12085
}
12086
*m0 = pd.m;
12087
goto out;
12088
}
12089
if (pf_dummynet(&pd, s, r, m0) != 0) {
12090
action = PF_DROP;
12091
REASON_SET(&reason, PFRES_MEMORY);
12092
}
12093
break;
12094
}
12095
12096
eat_pkt:
12097
SDT_PROBE4(pf, ip, test, done, action, reason, r, s);
12098
12099
if (s && action != PF_DROP) {
12100
if (!s->if_index_in && dir == PF_IN)
12101
s->if_index_in = ifp->if_index;
12102
else if (!s->if_index_out && dir == PF_OUT)
12103
s->if_index_out = ifp->if_index;
12104
}
12105
12106
if (s)
12107
PF_STATE_UNLOCK(s);
12108
12109
out:
12110
#ifdef INET6
12111
/* If reassembled packet passed, create new fragments. */
12112
if (af == AF_INET6 && action == PF_PASS && *m0 && dir == PF_OUT &&
12113
(! (pflags & PF_PFIL_NOREFRAGMENT)) &&
12114
(mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL)) != NULL)
12115
action = pf_refragment6(ifp, m0, mtag, NULL, pflags & PFIL_FWD);
12116
#endif /* INET6 */
12117
12118
pf_sctp_multihome_delayed(&pd, kif, s, action);
12119
12120
return (action);
12121
}
12122
#endif /* INET || INET6 */
12123
12124