#include <sys/cdefs.h>
#include "opt_bpf.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_pf.h"
#include "opt_sctp.h"
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/endian.h>
#include <sys/gsb_crc32.h>
#include <sys/hash.h>
#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/limits.h>
#include <sys/mbuf.h>
#include <sys/random.h>
#include <sys/refcount.h>
#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <sys/ucred.h>
#include <crypto/sha2/sha512.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_private.h>
#include <net/if_types.h>
#include <net/if_vlan_var.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <net/vnet.h>
#include <net/pfil.h>
#include <net/pfvar.h>
#include <net/if_pflog.h>
#include <net/if_pfsync.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/in_fib.h>
#include <netinet/ip.h>
#include <netinet/ip_fw.h>
#include <netinet/ip_icmp.h>
#include <netinet/icmp_var.h>
#include <netinet/ip_var.h>
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <netinet/ip_dummynet.h>
#include <netinet/ip_fw.h>
#include <netpfil/ipfw/dn_heap.h>
#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/ip_dn_private.h>
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <netinet6/nd6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/in6_fib.h>
#include <netinet6/scope6_var.h>
#endif
#include <netinet/sctp_header.h>
#include <netinet/sctp_crc32.h>
#include <netipsec/ah.h>
#include <machine/in_cksum.h>
#include <security/mac/mac_framework.h>
SDT_PROVIDER_DEFINE(pf);
SDT_PROBE_DEFINE2(pf, , test, reason_set, "int", "int");
SDT_PROBE_DEFINE4(pf, ip, test, done, "int", "int", "struct pf_krule *",
"struct pf_kstate *");
SDT_PROBE_DEFINE5(pf, ip, state, lookup, "struct pfi_kkif *",
"struct pf_state_key_cmp *", "int", "struct pf_pdesc *",
"struct pf_kstate *");
SDT_PROBE_DEFINE2(pf, ip, , bound_iface, "struct pf_kstate *",
"struct pfi_kkif *");
SDT_PROBE_DEFINE4(pf, ip, route_to, entry, "struct mbuf *",
"struct pf_pdesc *", "struct pf_kstate *", "struct ifnet *");
SDT_PROBE_DEFINE1(pf, ip, route_to, drop, "int");
SDT_PROBE_DEFINE2(pf, ip, route_to, output, "struct ifnet *", "int");
SDT_PROBE_DEFINE4(pf, ip6, route_to, entry, "struct mbuf *",
"struct pf_pdesc *", "struct pf_kstate *", "struct ifnet *");
SDT_PROBE_DEFINE1(pf, ip6, route_to, drop, "int");
SDT_PROBE_DEFINE2(pf, ip6, route_to, output, "struct ifnet *", "int");
SDT_PROBE_DEFINE4(pf, sctp, multihome, test, "struct pfi_kkif *",
"struct pf_krule *", "struct mbuf *", "int");
SDT_PROBE_DEFINE2(pf, sctp, multihome, add, "uint32_t",
"struct pf_sctp_source *");
SDT_PROBE_DEFINE3(pf, sctp, multihome, remove, "uint32_t",
"struct pf_kstate *", "struct pf_sctp_source *");
SDT_PROBE_DEFINE4(pf, sctp, multihome_scan, entry, "int",
"int", "struct pf_pdesc *", "int");
SDT_PROBE_DEFINE2(pf, sctp, multihome_scan, param, "uint16_t", "uint16_t");
SDT_PROBE_DEFINE2(pf, sctp, multihome_scan, ipv4, "struct in_addr *",
"int");
SDT_PROBE_DEFINE2(pf, sctp, multihome_scan, ipv6, "struct in_addr6 *",
"int");
SDT_PROBE_DEFINE3(pf, eth, test_rule, entry, "int", "struct ifnet *",
"struct mbuf *");
SDT_PROBE_DEFINE2(pf, eth, test_rule, test, "int", "struct pf_keth_rule *");
SDT_PROBE_DEFINE3(pf, eth, test_rule, mismatch,
"int", "struct pf_keth_rule *", "char *");
SDT_PROBE_DEFINE2(pf, eth, test_rule, match, "int", "struct pf_keth_rule *");
SDT_PROBE_DEFINE2(pf, eth, test_rule, final_match,
"int", "struct pf_keth_rule *");
SDT_PROBE_DEFINE2(pf, purge, state, rowcount, "int", "size_t");
SDT_PROBE_DEFINE2(pf, , log, log, "int", "const char *");
VNET_DEFINE(struct pf_altqqueue, pf_altqs[4]);
VNET_DEFINE(struct pf_kpalist, pf_pabuf[3]);
VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active);
VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_active);
VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive);
VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_inactive);
VNET_DEFINE(struct pf_kstatus, pf_status);
VNET_DEFINE(u_int32_t, ticket_altqs_active);
VNET_DEFINE(u_int32_t, ticket_altqs_inactive);
VNET_DEFINE(int, altqs_inactive_open);
VNET_DEFINE(u_int32_t, ticket_pabuf);
static const int PF_HDR_LIMIT = 20;
VNET_DEFINE(SHA512_CTX, pf_tcp_secret_ctx);
#define V_pf_tcp_secret_ctx VNET(pf_tcp_secret_ctx)
VNET_DEFINE(u_char, pf_tcp_secret[16]);
#define V_pf_tcp_secret VNET(pf_tcp_secret)
VNET_DEFINE(int, pf_tcp_secret_init);
#define V_pf_tcp_secret_init VNET(pf_tcp_secret_init)
VNET_DEFINE(int, pf_tcp_iss_off);
#define V_pf_tcp_iss_off VNET(pf_tcp_iss_off)
VNET_DECLARE(int, pf_vnet_active);
#define V_pf_vnet_active VNET(pf_vnet_active)
VNET_DEFINE_STATIC(uint32_t, pf_purge_idx);
#define V_pf_purge_idx VNET(pf_purge_idx)
#ifdef PF_WANT_32_TO_64_COUNTER
VNET_DEFINE_STATIC(uint32_t, pf_counter_periodic_iter);
#define V_pf_counter_periodic_iter VNET(pf_counter_periodic_iter)
VNET_DEFINE(struct allrulelist_head, pf_allrulelist);
VNET_DEFINE(size_t, pf_allrulecount);
VNET_DEFINE(struct pf_krule *, pf_rulemarker);
#endif
#define PF_SCTP_MAX_ENDPOINTS 8
struct pf_sctp_endpoint;
RB_HEAD(pf_sctp_endpoints, pf_sctp_endpoint);
struct pf_sctp_source {
sa_family_t af;
struct pf_addr addr;
TAILQ_ENTRY(pf_sctp_source) entry;
};
TAILQ_HEAD(pf_sctp_sources, pf_sctp_source);
struct pf_sctp_endpoint
{
uint32_t v_tag;
struct pf_sctp_sources sources;
RB_ENTRY(pf_sctp_endpoint) entry;
};
static int
pf_sctp_endpoint_compare(struct pf_sctp_endpoint *a, struct pf_sctp_endpoint *b)
{
return (a->v_tag - b->v_tag);
}
RB_PROTOTYPE(pf_sctp_endpoints, pf_sctp_endpoint, entry, pf_sctp_endpoint_compare);
RB_GENERATE(pf_sctp_endpoints, pf_sctp_endpoint, entry, pf_sctp_endpoint_compare);
VNET_DEFINE_STATIC(struct pf_sctp_endpoints, pf_sctp_endpoints);
#define V_pf_sctp_endpoints VNET(pf_sctp_endpoints)
static struct mtx_padalign pf_sctp_endpoints_mtx;
MTX_SYSINIT(pf_sctp_endpoints_mtx, &pf_sctp_endpoints_mtx, "SCTP endpoints", MTX_DEF);
#define PF_SCTP_ENDPOINTS_LOCK() mtx_lock(&pf_sctp_endpoints_mtx)
#define PF_SCTP_ENDPOINTS_UNLOCK() mtx_unlock(&pf_sctp_endpoints_mtx)
static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
struct pf_send_entry {
STAILQ_ENTRY(pf_send_entry) pfse_next;
struct mbuf *pfse_m;
enum {
PFSE_IP,
PFSE_IP6,
PFSE_ICMP,
PFSE_ICMP6,
} pfse_type;
struct {
int type;
int code;
int mtu;
} icmpopts;
};
STAILQ_HEAD(pf_send_head, pf_send_entry);
VNET_DEFINE_STATIC(struct pf_send_head, pf_sendqueue);
#define V_pf_sendqueue VNET(pf_sendqueue)
static struct mtx_padalign pf_sendqueue_mtx;
MTX_SYSINIT(pf_sendqueue_mtx, &pf_sendqueue_mtx, "pf send queue", MTX_DEF);
#define PF_SENDQ_LOCK() mtx_lock(&pf_sendqueue_mtx)
#define PF_SENDQ_UNLOCK() mtx_unlock(&pf_sendqueue_mtx)
struct pf_overload_entry {
SLIST_ENTRY(pf_overload_entry) next;
struct pf_addr addr;
sa_family_t af;
uint8_t dir;
struct pf_krule *rule;
};
SLIST_HEAD(pf_overload_head, pf_overload_entry);
VNET_DEFINE_STATIC(struct pf_overload_head, pf_overloadqueue);
#define V_pf_overloadqueue VNET(pf_overloadqueue)
VNET_DEFINE_STATIC(struct task, pf_overloadtask);
#define V_pf_overloadtask VNET(pf_overloadtask)
static struct mtx_padalign pf_overloadqueue_mtx;
MTX_SYSINIT(pf_overloadqueue_mtx, &pf_overloadqueue_mtx,
"pf overload/flush queue", MTX_DEF);
#define PF_OVERLOADQ_LOCK() mtx_lock(&pf_overloadqueue_mtx)
#define PF_OVERLOADQ_UNLOCK() mtx_unlock(&pf_overloadqueue_mtx)
VNET_DEFINE(struct pf_krulequeue, pf_unlinked_rules);
struct mtx_padalign pf_unlnkdrules_mtx;
MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules",
MTX_DEF);
struct sx pf_config_lock;
SX_SYSINIT(pf_config_lock, &pf_config_lock, "pf config");
struct mtx_padalign pf_table_stats_lock;
MTX_SYSINIT(pf_table_stats_lock, &pf_table_stats_lock, "pf table stats",
MTX_DEF);
VNET_DEFINE_STATIC(uma_zone_t, pf_sources_z);
#define V_pf_sources_z VNET(pf_sources_z)
uma_zone_t pf_mtag_z;
VNET_DEFINE(uma_zone_t, pf_state_z);
VNET_DEFINE(uma_zone_t, pf_state_key_z);
VNET_DEFINE(uma_zone_t, pf_udp_mapping_z);
VNET_DEFINE(struct unrhdr64, pf_stateid);
static void pf_src_tree_remove_state(struct pf_kstate *);
static int pf_check_threshold(struct pf_kthreshold *);
static void pf_change_ap(struct pf_pdesc *, struct pf_addr *, u_int16_t *,
struct pf_addr *, u_int16_t);
static int pf_modulate_sack(struct pf_pdesc *,
struct tcphdr *, struct pf_state_peer *);
int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *,
u_int16_t *, u_int16_t *);
static void pf_change_icmp(struct pf_addr *, u_int16_t *,
struct pf_addr *, struct pf_addr *, u_int16_t,
u_int16_t *, u_int16_t *, u_int16_t *,
u_int16_t *, u_int8_t, sa_family_t);
int pf_change_icmp_af(struct mbuf *, int,
struct pf_pdesc *, struct pf_pdesc *,
struct pf_addr *, struct pf_addr *, sa_family_t,
sa_family_t);
int pf_translate_icmp_af(int, void *);
static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
int, sa_family_t, struct pf_krule *, int);
static void pf_detach_state(struct pf_kstate *);
static int pf_state_key_attach(struct pf_state_key *,
struct pf_state_key *, struct pf_kstate *);
static void pf_state_key_detach(struct pf_kstate *, int);
static int pf_state_key_ctor(void *, int, void *, int);
static u_int32_t pf_tcp_iss(struct pf_pdesc *);
static __inline void pf_dummynet_flag_remove(struct mbuf *m,
struct pf_mtag *pf_mtag);
static int pf_dummynet(struct pf_pdesc *, struct pf_kstate *,
struct pf_krule *, struct mbuf **);
static int pf_dummynet_route(struct pf_pdesc *,
struct pf_kstate *, struct pf_krule *,
struct ifnet *, const struct sockaddr *, struct mbuf **);
static int pf_test_eth_rule(int, struct pfi_kkif *,
struct mbuf **);
static int pf_test_rule(struct pf_krule **, struct pf_kstate **,
struct pf_pdesc *, struct pf_krule **,
struct pf_kruleset **, u_short *, struct inpcb *,
struct pf_krule_slist *);
static int pf_create_state(struct pf_krule *,
struct pf_test_ctx *,
struct pf_kstate **, u_int16_t, u_int16_t,
struct pf_krule_slist *match_rules);
static int pf_state_key_addr_setup(struct pf_pdesc *,
struct pf_state_key_cmp *, int);
static int pf_tcp_track_full(struct pf_kstate *,
struct pf_pdesc *, u_short *, int *,
struct pf_state_peer *, struct pf_state_peer *,
u_int8_t, u_int8_t);
static int pf_tcp_track_sloppy(struct pf_kstate *,
struct pf_pdesc *, u_short *,
struct pf_state_peer *, struct pf_state_peer *,
u_int8_t, u_int8_t);
static int pf_test_state(struct pf_kstate **, struct pf_pdesc *,
u_short *);
int pf_icmp_state_lookup(struct pf_state_key_cmp *,
struct pf_pdesc *, struct pf_kstate **,
u_int16_t, u_int16_t, int, int *, int, int);
static int pf_test_state_icmp(struct pf_kstate **,
struct pf_pdesc *, u_short *);
static int pf_sctp_track(struct pf_kstate *, struct pf_pdesc *,
u_short *);
static void pf_sctp_multihome_detach_addr(const struct pf_kstate *);
static void pf_sctp_multihome_delayed(struct pf_pdesc *,
struct pfi_kkif *, struct pf_kstate *, int);
static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
int, u_int16_t);
static int pf_check_proto_cksum(struct mbuf *, int, int,
u_int8_t, sa_family_t);
static int pf_walk_option(struct pf_pdesc *, struct ip *,
int, int, u_short *);
static int pf_walk_header(struct pf_pdesc *, struct ip *, u_short *);
#ifdef INET6
static int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *,
int, int, u_short *);
static int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *,
u_short *);
#endif
static void pf_print_state_parts(struct pf_kstate *,
struct pf_state_key *, struct pf_state_key *);
static int pf_patch_8(struct pf_pdesc *, u_int8_t *, u_int8_t,
bool);
static int pf_find_state(struct pf_pdesc *,
const struct pf_state_key_cmp *, struct pf_kstate **);
static bool pf_src_connlimit(struct pf_kstate *);
static int pf_match_rcvif(struct mbuf *, struct pf_krule *);
static void pf_counters_inc(int, struct pf_pdesc *,
struct pf_kstate *, struct pf_krule *,
struct pf_krule *, struct pf_krule_slist *);
static void pf_log_matches(struct pf_pdesc *, struct pf_krule *,
struct pf_krule *, struct pf_kruleset *,
struct pf_krule_slist *);
static void pf_overload_task(void *v, int pending);
static u_short pf_insert_src_node(struct pf_ksrc_node *[PF_SN_MAX],
struct pf_srchash *[PF_SN_MAX], struct pf_krule *,
struct pf_addr *, sa_family_t, struct pf_addr *,
struct pfi_kkif *, sa_family_t, pf_sn_types_t);
static u_int pf_purge_expired_states(u_int, int);
static void pf_purge_unlinked_rules(void);
static int pf_mtag_uminit(void *, int, int);
static void pf_mtag_free(struct m_tag *);
static void pf_packet_rework_nat(struct pf_pdesc *, int,
struct pf_state_key *);
#ifdef INET
static int pf_route(struct pf_krule *,
struct ifnet *, struct pf_kstate *,
struct pf_pdesc *, struct inpcb *);
#endif
#ifdef INET6
static void pf_change_a6(struct pf_addr *, u_int16_t *,
struct pf_addr *, u_int8_t);
static int pf_route6(struct pf_krule *,
struct ifnet *, struct pf_kstate *,
struct pf_pdesc *, struct inpcb *);
#endif
static __inline void pf_set_protostate(struct pf_kstate *, int, u_int8_t);
int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
extern int pf_end_threads;
extern struct proc *pf_purge_proc;
VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
#define PACKET_UNDO_NAT(_pd, _off, _s) \
do { \
struct pf_state_key *nk; \
if ((pd->dir) == PF_OUT) \
nk = (_s)->key[PF_SK_STACK]; \
else \
nk = (_s)->key[PF_SK_WIRE]; \
pf_packet_rework_nat(_pd, _off, nk); \
} while (0)
#define PACKET_LOOPED(pd) ((pd)->pf_mtag && \
(pd)->pf_mtag->flags & PF_MTAG_FLAG_PACKET_LOOPED)
static struct pfi_kkif *
BOUND_IFACE(struct pf_kstate *st, struct pf_pdesc *pd)
{
struct pfi_kkif *k = pd->kif;
SDT_PROBE2(pf, ip, , bound_iface, st, k);
if (! (st->rule->rule_flag & PFRULE_IFBOUND))
return (V_pfi_all);
if (st->rule->rt == PF_REPLYTO || (pd->af != pd->naf && st->direction == PF_IN))
return (V_pfi_all);
if (pd->related_rule)
return (V_pfi_all);
if (st->direction == PF_IN)
return (k);
if (st->act.rt != PF_ROUTETO)
return (k);
return (st->act.rt_kif);
}
#define STATE_INC_COUNTERS(s) \
do { \
struct pf_krule_item *mrm; \
counter_u64_add(s->rule->states_cur, 1); \
counter_u64_add(s->rule->states_tot, 1); \
if (s->anchor != NULL) { \
counter_u64_add(s->anchor->states_cur, 1); \
counter_u64_add(s->anchor->states_tot, 1); \
} \
if (s->nat_rule != NULL && s->nat_rule != s->rule) { \
counter_u64_add(s->nat_rule->states_cur, 1); \
counter_u64_add(s->nat_rule->states_tot, 1); \
} \
SLIST_FOREACH(mrm, &s->match_rules, entry) { \
if (s->nat_rule != mrm->r) { \
counter_u64_add(mrm->r->states_cur, 1); \
counter_u64_add(mrm->r->states_tot, 1); \
} \
} \
} while (0)
#define STATE_DEC_COUNTERS(s) \
do { \
struct pf_krule_item *mrm; \
counter_u64_add(s->rule->states_cur, -1); \
if (s->anchor != NULL) \
counter_u64_add(s->anchor->states_cur, -1); \
if (s->nat_rule != NULL && s->nat_rule != s->rule) \
counter_u64_add(s->nat_rule->states_cur, -1); \
SLIST_FOREACH(mrm, &s->match_rules, entry) \
if (s->nat_rule != mrm->r) { \
counter_u64_add(mrm->r->states_cur, -1);\
} \
} while (0)
MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
MALLOC_DEFINE(M_PF_RULE_ITEM, "pf_krule_item", "pf(4) rule items");
VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
VNET_DEFINE(struct pf_idhash *, pf_idhash);
VNET_DEFINE(struct pf_srchash *, pf_srchash);
VNET_DEFINE(struct pf_udpendpointhash *, pf_udpendpointhash);
VNET_DEFINE(struct pf_udpendpointmapping *, pf_udpendpointmapping);
SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"pf(4)");
VNET_DEFINE(u_long, pf_hashmask);
VNET_DEFINE(u_long, pf_srchashmask);
VNET_DEFINE(u_long, pf_udpendpointhashmask);
VNET_DEFINE_STATIC(u_long, pf_hashsize);
#define V_pf_hashsize VNET(pf_hashsize)
VNET_DEFINE_STATIC(u_long, pf_srchashsize);
#define V_pf_srchashsize VNET(pf_srchashsize)
VNET_DEFINE_STATIC(u_long, pf_udpendpointhashsize);
#define V_pf_udpendpointhashsize VNET(pf_udpendpointhashsize)
u_long pf_ioctl_maxcount = 65535;
SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable");
SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable");
SYSCTL_ULONG(_net_pf, OID_AUTO, udpendpoint_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(pf_udpendpointhashsize), 0, "Size of pf(4) endpoint hashtable");
SYSCTL_ULONG(_net_pf, OID_AUTO, request_maxcount, CTLFLAG_RWTUN,
&pf_ioctl_maxcount, 0, "Maximum number of tables, addresses, ... in a single ioctl() call");
VNET_DEFINE(void *, pf_swi_cookie);
VNET_DEFINE(struct intr_event *, pf_swi_ie);
VNET_DEFINE(uint32_t, pf_hashseed);
#define V_pf_hashseed VNET(pf_hashseed)
static void
pf_sctp_checksum(struct mbuf *m, int off)
{
uint32_t sum = 0;
m_copyback(m, off + offsetof(struct sctphdr, checksum),
sizeof(sum), (caddr_t)&sum);
sum = sctp_calculate_cksum(m, off);
m_copyback(m, off + offsetof(struct sctphdr, checksum),
sizeof(sum), (caddr_t)&sum);
}
int
pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
{
switch (af) {
#ifdef INET
case AF_INET:
if (a->addr32[0] > b->addr32[0])
return (1);
if (a->addr32[0] < b->addr32[0])
return (-1);
break;
#endif
#ifdef INET6
case AF_INET6:
if (a->addr32[3] > b->addr32[3])
return (1);
if (a->addr32[3] < b->addr32[3])
return (-1);
if (a->addr32[2] > b->addr32[2])
return (1);
if (a->addr32[2] < b->addr32[2])
return (-1);
if (a->addr32[1] > b->addr32[1])
return (1);
if (a->addr32[1] < b->addr32[1])
return (-1);
if (a->addr32[0] > b->addr32[0])
return (1);
if (a->addr32[0] < b->addr32[0])
return (-1);
break;
#endif
default:
unhandled_af(af);
}
return (0);
}
static bool
pf_is_loopback(sa_family_t af, struct pf_addr *addr)
{
switch (af) {
#ifdef INET
case AF_INET:
return IN_LOOPBACK(ntohl(addr->v4.s_addr));
#endif
case AF_INET6:
return IN6_IS_ADDR_LOOPBACK(&addr->v6);
default:
unhandled_af(af);
}
}
static void
pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk)
{
switch (pd->virtual_proto) {
case IPPROTO_TCP: {
struct tcphdr *th = &pd->hdr.tcp;
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
pf_change_ap(pd, pd->src, &th->th_sport,
&nk->addr[pd->sidx], nk->port[pd->sidx]);
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
pf_change_ap(pd, pd->dst, &th->th_dport,
&nk->addr[pd->didx], nk->port[pd->didx]);
m_copyback(pd->m, off, sizeof(*th), (caddr_t)th);
break;
}
case IPPROTO_UDP: {
struct udphdr *uh = &pd->hdr.udp;
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
pf_change_ap(pd, pd->src, &uh->uh_sport,
&nk->addr[pd->sidx], nk->port[pd->sidx]);
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
pf_change_ap(pd, pd->dst, &uh->uh_dport,
&nk->addr[pd->didx], nk->port[pd->didx]);
m_copyback(pd->m, off, sizeof(*uh), (caddr_t)uh);
break;
}
case IPPROTO_SCTP: {
struct sctphdr *sh = &pd->hdr.sctp;
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) {
pf_change_ap(pd, pd->src, &sh->src_port,
&nk->addr[pd->sidx], nk->port[pd->sidx]);
}
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) {
pf_change_ap(pd, pd->dst, &sh->dest_port,
&nk->addr[pd->didx], nk->port[pd->didx]);
}
break;
}
case IPPROTO_ICMP: {
struct icmp *ih = &pd->hdr.icmp;
if (nk->port[pd->sidx] != ih->icmp_id) {
pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
ih->icmp_cksum, ih->icmp_id,
nk->port[pd->sidx], 0);
ih->icmp_id = nk->port[pd->sidx];
pd->sport = &ih->icmp_id;
m_copyback(pd->m, off, ICMP_MINLEN, (caddr_t)ih);
}
}
default:
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) {
switch (pd->af) {
case AF_INET:
pf_change_a(&pd->src->v4.s_addr,
pd->ip_sum, nk->addr[pd->sidx].v4.s_addr,
0);
break;
case AF_INET6:
pf_addrcpy(pd->src, &nk->addr[pd->sidx],
pd->af);
break;
default:
unhandled_af(pd->af);
}
}
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) {
switch (pd->af) {
case AF_INET:
pf_change_a(&pd->dst->v4.s_addr,
pd->ip_sum, nk->addr[pd->didx].v4.s_addr,
0);
break;
case AF_INET6:
pf_addrcpy(pd->dst, &nk->addr[pd->didx],
pd->af);
break;
default:
unhandled_af(pd->af);
}
}
break;
}
}
static __inline uint32_t
pf_hashkey(const struct pf_state_key *sk)
{
uint32_t h;
h = murmur3_32_hash32((const uint32_t *)sk,
sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
V_pf_hashseed);
return (h & V_pf_hashmask);
}
__inline uint32_t
pf_hashsrc(struct pf_addr *addr, sa_family_t af)
{
uint32_t h;
switch (af) {
case AF_INET:
h = murmur3_32_hash32((uint32_t *)&addr->v4,
sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
break;
case AF_INET6:
h = murmur3_32_hash32((uint32_t *)&addr->v6,
sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
break;
default:
unhandled_af(af);
}
return (h & V_pf_srchashmask);
}
static inline uint32_t
pf_hashudpendpoint(struct pf_udp_endpoint *endpoint)
{
uint32_t h;
h = murmur3_32_hash32((uint32_t *)endpoint,
sizeof(struct pf_udp_endpoint_cmp)/sizeof(uint32_t),
V_pf_hashseed);
return (h & V_pf_udpendpointhashmask);
}
#ifdef ALTQ
static int
pf_state_hash(struct pf_kstate *s)
{
u_int32_t hv = (intptr_t)s / sizeof(*s);
hv ^= crc32(&s->src, sizeof(s->src));
hv ^= crc32(&s->dst, sizeof(s->dst));
if (hv == 0)
hv = 1;
return (hv);
}
#endif
static __inline void
pf_set_protostate(struct pf_kstate *s, int which, u_int8_t newstate)
{
if (which == PF_PEER_DST || which == PF_PEER_BOTH)
s->dst.state = newstate;
if (which == PF_PEER_DST)
return;
if (s->src.state == newstate)
return;
if (s->creatorid == V_pf_status.hostid &&
s->key[PF_SK_STACK] != NULL &&
s->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
!(TCPS_HAVEESTABLISHED(s->src.state) ||
s->src.state == TCPS_CLOSED) &&
(TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED))
atomic_add_32(&V_pf_status.states_halfopen, -1);
s->src.state = newstate;
}
bool
pf_init_threshold(struct pf_kthreshold *threshold,
u_int32_t limit, u_int32_t seconds)
{
threshold->limit = limit;
threshold->seconds = seconds;
threshold->cr = counter_rate_alloc(M_NOWAIT, seconds);
return (threshold->cr != NULL);
}
static int
pf_check_threshold(struct pf_kthreshold *threshold)
{
return (counter_ratecheck(threshold->cr, threshold->limit) < 0);
}
static bool
pf_src_connlimit(struct pf_kstate *state)
{
struct pf_overload_entry *pfoe;
struct pf_ksrc_node *src_node = state->sns[PF_SN_LIMIT];
bool limited = false;
PF_STATE_LOCK_ASSERT(state);
PF_SRC_NODE_LOCK(src_node);
src_node->conn++;
state->src.tcp_est = 1;
if (state->rule->max_src_conn &&
state->rule->max_src_conn <
src_node->conn) {
counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1);
limited = true;
}
if (state->rule->max_src_conn_rate.limit &&
pf_check_threshold(&src_node->conn_rate)) {
counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1);
limited = true;
}
if (!limited)
goto done;
state->timeout = PFTM_PURGE;
pf_set_protostate(state, PF_PEER_BOTH, TCPS_CLOSED);
if (state->rule->overload_tbl == NULL)
goto done;
pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
if (pfoe == NULL)
goto done;
bcopy(&src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
pfoe->af = state->key[PF_SK_WIRE]->af;
pfoe->rule = state->rule;
pfoe->dir = state->direction;
PF_OVERLOADQ_LOCK();
SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
PF_OVERLOADQ_UNLOCK();
taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
done:
PF_SRC_NODE_UNLOCK(src_node);
return (limited);
}
static void
pf_overload_task(void *v, int pending)
{
struct pf_overload_head queue;
struct pfr_addr p;
struct pf_overload_entry *pfoe, *pfoe1;
uint32_t killed = 0;
CURVNET_SET((struct vnet *)v);
PF_OVERLOADQ_LOCK();
queue = V_pf_overloadqueue;
SLIST_INIT(&V_pf_overloadqueue);
PF_OVERLOADQ_UNLOCK();
bzero(&p, sizeof(p));
SLIST_FOREACH(pfoe, &queue, next) {
counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1);
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("%s: blocking address ", __func__);
pf_print_host(&pfoe->addr, 0, pfoe->af);
printf("\n");
}
p.pfra_af = pfoe->af;
switch (pfoe->af) {
#ifdef INET
case AF_INET:
p.pfra_net = 32;
p.pfra_ip4addr = pfoe->addr.v4;
break;
#endif
#ifdef INET6
case AF_INET6:
p.pfra_net = 128;
p.pfra_ip6addr = pfoe->addr.v6;
break;
#endif
default:
unhandled_af(pfoe->af);
}
PF_RULES_WLOCK();
pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
PF_RULES_WUNLOCK();
}
SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
if (pfoe->rule->flush == 0) {
SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
free(pfoe, M_PFTEMP);
} else
counter_u64_add(
V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1);
if (SLIST_EMPTY(&queue)) {
CURVNET_RESTORE();
return;
}
for (int i = 0; i <= V_pf_hashmask; i++) {
struct pf_idhash *ih = &V_pf_idhash[i];
struct pf_state_key *sk;
struct pf_kstate *s;
PF_HASHROW_LOCK(ih);
LIST_FOREACH(s, &ih->states, entry) {
sk = s->key[PF_SK_WIRE];
SLIST_FOREACH(pfoe, &queue, next)
if (sk->af == pfoe->af &&
((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
pfoe->rule == s->rule) &&
((pfoe->dir == PF_OUT &&
PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
(pfoe->dir == PF_IN &&
PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
s->timeout = PFTM_PURGE;
pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED);
killed++;
}
}
PF_HASHROW_UNLOCK(ih);
}
SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
free(pfoe, M_PFTEMP);
if (V_pf_status.debug >= PF_DEBUG_MISC)
printf("%s: %u states killed", __func__, killed);
CURVNET_RESTORE();
}
struct pf_ksrc_node *
pf_find_src_node(struct pf_addr *src, struct pf_krule *rule, sa_family_t af,
struct pf_srchash **sh, pf_sn_types_t sn_type, bool returnlocked)
{
struct pf_ksrc_node *n;
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
*sh = &V_pf_srchash[pf_hashsrc(src, af)];
PF_HASHROW_LOCK(*sh);
LIST_FOREACH(n, &(*sh)->nodes, entry)
if (n->rule == rule && n->af == af && n->type == sn_type &&
((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
(af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
break;
if (n == NULL && !returnlocked)
PF_HASHROW_UNLOCK(*sh);
return (n);
}
bool
pf_src_node_exists(struct pf_ksrc_node **sn, struct pf_srchash *sh)
{
struct pf_ksrc_node *cur;
if ((*sn) == NULL)
return (false);
KASSERT(sh != NULL, ("%s: sh is NULL", __func__));
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
PF_HASHROW_LOCK(sh);
LIST_FOREACH(cur, &(sh->nodes), entry) {
if (cur == (*sn) &&
cur->expire != 1)
return (true);
}
PF_HASHROW_UNLOCK(sh);
(*sn) = NULL;
return (false);
}
static void
pf_free_src_node(struct pf_ksrc_node *sn)
{
for (int i = 0; i < 2; i++) {
counter_u64_free(sn->bytes[i]);
counter_u64_free(sn->packets[i]);
}
counter_rate_free(sn->conn_rate.cr);
uma_zfree(V_pf_sources_z, sn);
}
static u_short
pf_insert_src_node(struct pf_ksrc_node *sns[PF_SN_MAX],
struct pf_srchash *snhs[PF_SN_MAX], struct pf_krule *rule,
struct pf_addr *src, sa_family_t af, struct pf_addr *raddr,
struct pfi_kkif *rkif, sa_family_t raf, pf_sn_types_t sn_type)
{
u_short reason = 0;
struct pf_krule *r_track = rule;
struct pf_ksrc_node **sn = &(sns[sn_type]);
struct pf_srchash **sh = &(snhs[sn_type]);
KASSERT(sn_type != PF_SN_LIMIT || (raddr == NULL && rkif == NULL),
("%s: raddr and rkif must be NULL for PF_SN_LIMIT", __func__));
KASSERT(sn_type != PF_SN_LIMIT || (rule->rule_flag & PFRULE_SRCTRACK),
("%s: PF_SN_LIMIT only valid for rules with PFRULE_SRCTRACK", __func__));
if ( (rule->rule_flag & PFRULE_SRCTRACK) &&
!(rule->rule_flag & PFRULE_RULESRCTRACK))
r_track = &V_pf_default_rule;
if (*sn == NULL)
*sn = pf_find_src_node(src, r_track, af, sh, sn_type, true);
if (*sn == NULL) {
PF_HASHROW_ASSERT(*sh);
if (sn_type == PF_SN_LIMIT && rule->max_src_nodes &&
counter_u64_fetch(r_track->src_nodes[sn_type]) >= rule->max_src_nodes) {
counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], 1);
reason = PFRES_SRCLIMIT;
goto done;
}
(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
if ((*sn) == NULL) {
reason = PFRES_MEMORY;
goto done;
}
for (int i = 0; i < 2; i++) {
(*sn)->bytes[i] = counter_u64_alloc(M_NOWAIT);
(*sn)->packets[i] = counter_u64_alloc(M_NOWAIT);
if ((*sn)->bytes[i] == NULL || (*sn)->packets[i] == NULL) {
pf_free_src_node(*sn);
reason = PFRES_MEMORY;
goto done;
}
}
if (sn_type == PF_SN_LIMIT)
if (! pf_init_threshold(&(*sn)->conn_rate,
rule->max_src_conn_rate.limit,
rule->max_src_conn_rate.seconds)) {
pf_free_src_node(*sn);
reason = PFRES_MEMORY;
goto done;
}
MPASS((*sn)->lock == NULL);
(*sn)->lock = &(*sh)->lock;
(*sn)->af = af;
(*sn)->rule = r_track;
pf_addrcpy(&(*sn)->addr, src, af);
if (raddr != NULL)
pf_addrcpy(&(*sn)->raddr, raddr, raf);
(*sn)->rkif = rkif;
(*sn)->raf = raf;
LIST_INSERT_HEAD(&(*sh)->nodes, *sn, entry);
(*sn)->creation = time_uptime;
(*sn)->ruletype = rule->action;
(*sn)->type = sn_type;
counter_u64_add(r_track->src_nodes[sn_type], 1);
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1);
} else {
if (sn_type == PF_SN_LIMIT && rule->max_src_states &&
(*sn)->states >= rule->max_src_states) {
counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES],
1);
reason = PFRES_SRCLIMIT;
goto done;
}
}
done:
if (reason == 0)
(*sn)->states++;
else
(*sn) = NULL;
PF_HASHROW_UNLOCK(*sh);
return (reason);
}
void
pf_unlink_src_node(struct pf_ksrc_node *src)
{
PF_SRC_NODE_LOCK_ASSERT(src);
LIST_REMOVE(src, entry);
if (src->rule)
counter_u64_add(src->rule->src_nodes[src->type], -1);
}
u_int
pf_free_src_nodes(struct pf_ksrc_node_list *head)
{
struct pf_ksrc_node *sn, *tmp;
u_int count = 0;
LIST_FOREACH_SAFE(sn, head, entry, tmp) {
pf_free_src_node(sn);
count++;
}
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count);
return (count);
}
void
pf_mtag_initialize(void)
{
pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL,
UMA_ALIGN_PTR, 0);
}
void
pf_initialize(void)
{
struct pf_keyhash *kh;
struct pf_idhash *ih;
struct pf_srchash *sh;
struct pf_udpendpointhash *uh;
u_int i;
if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize))
V_pf_hashsize = PF_HASHSIZ;
if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize))
V_pf_srchashsize = PF_SRCHASHSIZ;
if (V_pf_udpendpointhashsize == 0 || !powerof2(V_pf_udpendpointhashsize))
V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ;
V_pf_hashseed = arc4random();
V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_kstate),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
uma_zone_set_warning(V_pf_state_z, "PF states limit reached");
V_pf_state_key_z = uma_zcreate("pf state keys",
sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
V_pf_keyhash = mallocarray(V_pf_hashsize, sizeof(struct pf_keyhash),
M_PFHASH, M_NOWAIT | M_ZERO);
V_pf_idhash = mallocarray(V_pf_hashsize, sizeof(struct pf_idhash),
M_PFHASH, M_NOWAIT | M_ZERO);
if (V_pf_keyhash == NULL || V_pf_idhash == NULL) {
printf("pf: Unable to allocate memory for "
"state_hashsize %lu.\n", V_pf_hashsize);
free(V_pf_keyhash, M_PFHASH);
free(V_pf_idhash, M_PFHASH);
V_pf_hashsize = PF_HASHSIZ;
V_pf_keyhash = mallocarray(V_pf_hashsize,
sizeof(struct pf_keyhash), M_PFHASH, M_WAITOK | M_ZERO);
V_pf_idhash = mallocarray(V_pf_hashsize,
sizeof(struct pf_idhash), M_PFHASH, M_WAITOK | M_ZERO);
}
V_pf_hashmask = V_pf_hashsize - 1;
for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
i++, kh++, ih++) {
mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
}
V_pf_sources_z = uma_zcreate("pf source nodes",
sizeof(struct pf_ksrc_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
0);
V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
V_pf_srchash = mallocarray(V_pf_srchashsize,
sizeof(struct pf_srchash), M_PFHASH, M_NOWAIT | M_ZERO);
if (V_pf_srchash == NULL) {
printf("pf: Unable to allocate memory for "
"source_hashsize %lu.\n", V_pf_srchashsize);
V_pf_srchashsize = PF_SRCHASHSIZ;
V_pf_srchash = mallocarray(V_pf_srchashsize,
sizeof(struct pf_srchash), M_PFHASH, M_WAITOK | M_ZERO);
}
V_pf_srchashmask = V_pf_srchashsize - 1;
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++)
mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
V_pf_udp_mapping_z = uma_zcreate("pf UDP mappings",
sizeof(struct pf_udp_mapping), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize,
sizeof(struct pf_udpendpointhash), M_PFHASH, M_NOWAIT | M_ZERO);
if (V_pf_udpendpointhash == NULL) {
printf("pf: Unable to allocate memory for "
"udpendpoint_hashsize %lu.\n", V_pf_udpendpointhashsize);
V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ;
V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize,
sizeof(struct pf_udpendpointhash), M_PFHASH, M_WAITOK | M_ZERO);
}
V_pf_udpendpointhashmask = V_pf_udpendpointhashsize - 1;
for (i = 0, uh = V_pf_udpendpointhash;
i <= V_pf_udpendpointhashmask;
i++, uh++) {
mtx_init(&uh->lock, "pf_udpendpointhash", NULL,
MTX_DEF | MTX_DUPOK);
}
V_pf_anchor_z = uma_zcreate("pf anchors",
sizeof(struct pf_kanchor), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
V_pf_limits[PF_LIMIT_ANCHORS].zone = V_pf_anchor_z;
uma_zone_set_max(V_pf_anchor_z, PF_ANCHOR_HIWAT);
uma_zone_set_warning(V_pf_anchor_z, "PF anchor limit reached");
V_pf_eth_anchor_z = uma_zcreate("pf Ethernet anchors",
sizeof(struct pf_keth_anchor), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
V_pf_limits[PF_LIMIT_ETH_ANCHORS].zone = V_pf_eth_anchor_z;
uma_zone_set_max(V_pf_eth_anchor_z, PF_ANCHOR_HIWAT);
uma_zone_set_warning(V_pf_eth_anchor_z, "PF Ethernet anchor limit reached");
TAILQ_INIT(&V_pf_altqs[0]);
TAILQ_INIT(&V_pf_altqs[1]);
TAILQ_INIT(&V_pf_altqs[2]);
TAILQ_INIT(&V_pf_altqs[3]);
TAILQ_INIT(&V_pf_pabuf[0]);
TAILQ_INIT(&V_pf_pabuf[1]);
TAILQ_INIT(&V_pf_pabuf[2]);
V_pf_altqs_active = &V_pf_altqs[0];
V_pf_altq_ifs_active = &V_pf_altqs[1];
V_pf_altqs_inactive = &V_pf_altqs[2];
V_pf_altq_ifs_inactive = &V_pf_altqs[3];
STAILQ_INIT(&V_pf_sendqueue);
SLIST_INIT(&V_pf_overloadqueue);
TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet);
TAILQ_INIT(&V_pf_unlinked_rules);
}
void
pf_mtag_cleanup(void)
{
uma_zdestroy(pf_mtag_z);
}
void
pf_cleanup(void)
{
struct pf_keyhash *kh;
struct pf_idhash *ih;
struct pf_srchash *sh;
struct pf_udpendpointhash *uh;
struct pf_send_entry *pfse, *next;
u_int i;
for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash;
i <= V_pf_hashmask;
i++, kh++, ih++) {
KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
__func__));
KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
__func__));
mtx_destroy(&kh->lock);
mtx_destroy(&ih->lock);
}
free(V_pf_keyhash, M_PFHASH);
free(V_pf_idhash, M_PFHASH);
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
KASSERT(LIST_EMPTY(&sh->nodes),
("%s: source node hash not empty", __func__));
mtx_destroy(&sh->lock);
}
free(V_pf_srchash, M_PFHASH);
for (i = 0, uh = V_pf_udpendpointhash;
i <= V_pf_udpendpointhashmask;
i++, uh++) {
KASSERT(LIST_EMPTY(&uh->endpoints),
("%s: udp endpoint hash not empty", __func__));
mtx_destroy(&uh->lock);
}
free(V_pf_udpendpointhash, M_PFHASH);
STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
m_freem(pfse->pfse_m);
free(pfse, M_PFTEMP);
}
MPASS(RB_EMPTY(&V_pf_sctp_endpoints));
uma_zdestroy(V_pf_sources_z);
uma_zdestroy(V_pf_state_z);
uma_zdestroy(V_pf_state_key_z);
uma_zdestroy(V_pf_udp_mapping_z);
uma_zdestroy(V_pf_anchor_z);
uma_zdestroy(V_pf_eth_anchor_z);
}
static int
pf_mtag_uminit(void *mem, int size, int how)
{
struct m_tag *t;
t = (struct m_tag *)mem;
t->m_tag_cookie = MTAG_ABI_COMPAT;
t->m_tag_id = PACKET_TAG_PF;
t->m_tag_len = sizeof(struct pf_mtag);
t->m_tag_free = pf_mtag_free;
return (0);
}
static void
pf_mtag_free(struct m_tag *t)
{
uma_zfree(pf_mtag_z, t);
}
struct pf_mtag *
pf_get_mtag(struct mbuf *m)
{
struct m_tag *mtag;
if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
return ((struct pf_mtag *)(mtag + 1));
mtag = uma_zalloc(pf_mtag_z, M_NOWAIT);
if (mtag == NULL)
return (NULL);
bzero(mtag + 1, sizeof(struct pf_mtag));
m_tag_prepend(m, mtag);
return ((struct pf_mtag *)(mtag + 1));
}
static int
pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
struct pf_kstate *s)
{
struct pf_keyhash *khs, *khw, *kh;
struct pf_state_key *sk, *cur;
struct pf_kstate *si, *olds = NULL;
int idx;
NET_EPOCH_ASSERT();
KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
if (skw == sks) {
khs = khw = &V_pf_keyhash[pf_hashkey(skw)];
PF_HASHROW_LOCK(khs);
} else {
khs = &V_pf_keyhash[pf_hashkey(sks)];
khw = &V_pf_keyhash[pf_hashkey(skw)];
if (khs == khw) {
PF_HASHROW_LOCK(khs);
} else if (khs < khw) {
PF_HASHROW_LOCK(khs);
PF_HASHROW_LOCK(khw);
} else {
PF_HASHROW_LOCK(khw);
PF_HASHROW_LOCK(khs);
}
}
#define KEYS_UNLOCK() do { \
if (khs != khw) { \
PF_HASHROW_UNLOCK(khs); \
PF_HASHROW_UNLOCK(khw); \
} else \
PF_HASHROW_UNLOCK(khs); \
} while (0)
sk = skw;
kh = khw;
idx = PF_SK_WIRE;
MPASS(s->lock == NULL);
s->lock = &V_pf_idhash[PF_IDHASH(s)].lock;
keyattach:
LIST_FOREACH(cur, &kh->keys, entry)
if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0)
break;
if (cur != NULL) {
TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
PF_HASHROW_LOCK(ih);
if (si->kif == s->kif &&
((si->key[PF_SK_WIRE]->af == sk->af &&
si->direction == s->direction) ||
(si->key[PF_SK_WIRE]->af !=
si->key[PF_SK_STACK]->af &&
sk->af == si->key[PF_SK_STACK]->af &&
si->direction != s->direction))) {
bool reuse = false;
if (sk->proto == IPPROTO_TCP &&
si->src.state >= TCPS_FIN_WAIT_2 &&
si->dst.state >= TCPS_FIN_WAIT_2)
reuse = true;
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("pf: %s key attach "
"%s on %s: ",
(idx == PF_SK_WIRE) ?
"wire" : "stack",
reuse ? "reuse" : "failed",
s->kif->pfik_name);
pf_print_state_parts(s,
(idx == PF_SK_WIRE) ?
sk : NULL,
(idx == PF_SK_STACK) ?
sk : NULL);
printf(", existing: ");
pf_print_state_parts(si,
(idx == PF_SK_WIRE) ?
sk : NULL,
(idx == PF_SK_STACK) ?
sk : NULL);
printf("\n");
}
if (reuse) {
pf_set_protostate(si, PF_PEER_BOTH,
TCPS_CLOSED);
si->timeout = PFTM_PURGE;
olds = si;
} else {
s->timeout = PFTM_UNLINKED;
if (idx == PF_SK_STACK)
pf_state_key_detach(s,
PF_SK_WIRE);
PF_HASHROW_UNLOCK(ih);
KEYS_UNLOCK();
if (idx == PF_SK_WIRE)
uma_zfree(V_pf_state_key_z, skw);
if (skw != sks)
uma_zfree(
V_pf_state_key_z,
sks);
return (EEXIST);
}
}
PF_HASHROW_UNLOCK(ih);
}
uma_zfree(V_pf_state_key_z, sk);
s->key[idx] = cur;
} else {
LIST_INSERT_HEAD(&kh->keys, sk, entry);
s->key[idx] = sk;
}
stateattach:
if (s->kif == V_pfi_all)
TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
else
TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
if (olds) {
TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]);
TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds,
key_list[idx]);
olds = NULL;
}
if (sks == skw) {
s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
idx = PF_SK_STACK;
sks = NULL;
goto stateattach;
} else if (sks != NULL) {
sk = sks;
kh = khs;
idx = PF_SK_STACK;
sks = NULL;
goto keyattach;
}
PF_STATE_LOCK(s);
KEYS_UNLOCK();
KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
("%s failure", __func__));
return (0);
#undef KEYS_UNLOCK
}
static void
pf_detach_state(struct pf_kstate *s)
{
struct pf_state_key *sks = s->key[PF_SK_STACK];
struct pf_keyhash *kh;
NET_EPOCH_ASSERT();
MPASS(s->timeout >= PFTM_MAX);
pf_sctp_multihome_detach_addr(s);
if ((s->state_flags & PFSTATE_PFLOW) && V_pflow_export_state_ptr)
V_pflow_export_state_ptr(s);
if (sks != NULL) {
kh = &V_pf_keyhash[pf_hashkey(sks)];
PF_HASHROW_LOCK(kh);
if (s->key[PF_SK_STACK] != NULL)
pf_state_key_detach(s, PF_SK_STACK);
if (sks == s->key[PF_SK_WIRE]) {
pf_state_key_detach(s, PF_SK_WIRE);
PF_HASHROW_UNLOCK(kh);
return;
}
PF_HASHROW_UNLOCK(kh);
}
if (s->key[PF_SK_WIRE] != NULL) {
kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
PF_HASHROW_LOCK(kh);
if (s->key[PF_SK_WIRE] != NULL)
pf_state_key_detach(s, PF_SK_WIRE);
PF_HASHROW_UNLOCK(kh);
}
}
static void
pf_state_key_detach(struct pf_kstate *s, int idx)
{
struct pf_state_key *sk = s->key[idx];
#ifdef INVARIANTS
struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
PF_HASHROW_ASSERT(kh);
#endif
TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
s->key[idx] = NULL;
if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
LIST_REMOVE(sk, entry);
uma_zfree(V_pf_state_key_z, sk);
}
}
static int
pf_state_key_ctor(void *mem, int size, void *arg, int flags)
{
struct pf_state_key *sk = mem;
bzero(sk, sizeof(struct pf_state_key_cmp));
TAILQ_INIT(&sk->states[PF_SK_WIRE]);
TAILQ_INIT(&sk->states[PF_SK_STACK]);
return (0);
}
static int
pf_state_key_addr_setup(struct pf_pdesc *pd,
struct pf_state_key_cmp *key, int multi)
{
struct pf_addr *saddr = pd->src;
struct pf_addr *daddr = pd->dst;
#ifdef INET6
struct nd_neighbor_solicit nd;
struct pf_addr *target;
if (pd->af == AF_INET || pd->proto != IPPROTO_ICMPV6)
goto copy;
switch (pd->hdr.icmp6.icmp6_type) {
case ND_NEIGHBOR_SOLICIT:
if (multi)
return (-1);
if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL,
pd->af))
return (-1);
target = (struct pf_addr *)&nd.nd_ns_target;
daddr = target;
break;
case ND_NEIGHBOR_ADVERT:
if (multi)
return (-1);
if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL,
pd->af))
return (-1);
target = (struct pf_addr *)&nd.nd_ns_target;
saddr = target;
if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) {
key->addr[pd->didx].addr32[0] = 0;
key->addr[pd->didx].addr32[1] = 0;
key->addr[pd->didx].addr32[2] = 0;
key->addr[pd->didx].addr32[3] = 0;
daddr = NULL;
}
break;
default:
if (multi) {
key->addr[pd->sidx].addr32[0] = IPV6_ADDR_INT32_MLL;
key->addr[pd->sidx].addr32[1] = 0;
key->addr[pd->sidx].addr32[2] = 0;
key->addr[pd->sidx].addr32[3] = IPV6_ADDR_INT32_ONE;
saddr = NULL;
}
}
copy:
#endif
if (saddr)
pf_addrcpy(&key->addr[pd->sidx], saddr, pd->af);
if (daddr)
pf_addrcpy(&key->addr[pd->didx], daddr, pd->af);
return (0);
}
int
pf_state_key_setup(struct pf_pdesc *pd, u_int16_t sport, u_int16_t dport,
struct pf_state_key **sk, struct pf_state_key **nk)
{
*sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
if (*sk == NULL)
return (ENOMEM);
if (pf_state_key_addr_setup(pd, (struct pf_state_key_cmp *)*sk,
0)) {
uma_zfree(V_pf_state_key_z, *sk);
*sk = NULL;
return (ENOMEM);
}
(*sk)->port[pd->sidx] = sport;
(*sk)->port[pd->didx] = dport;
(*sk)->proto = pd->proto;
(*sk)->af = pd->af;
*nk = pf_state_key_clone(*sk);
if (*nk == NULL) {
uma_zfree(V_pf_state_key_z, *sk);
*sk = NULL;
return (ENOMEM);
}
if (pd->af != pd->naf) {
(*sk)->port[pd->sidx] = pd->osport;
(*sk)->port[pd->didx] = pd->odport;
(*nk)->af = pd->naf;
bzero(&(*nk)->addr[0], sizeof((*nk)->addr[0]));
bzero(&(*nk)->addr[1], sizeof((*nk)->addr[1]));
if (pd->dir == PF_IN) {
pf_addrcpy(&(*nk)->addr[pd->didx], &pd->nsaddr,
pd->naf);
pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->ndaddr,
pd->naf);
(*nk)->port[pd->didx] = pd->nsport;
(*nk)->port[pd->sidx] = pd->ndport;
} else {
pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->nsaddr,
pd->naf);
pf_addrcpy(&(*nk)->addr[pd->didx], &pd->ndaddr,
pd->naf);
(*nk)->port[pd->sidx] = pd->nsport;
(*nk)->port[pd->didx] = pd->ndport;
}
switch (pd->proto) {
case IPPROTO_ICMP:
(*nk)->proto = IPPROTO_ICMPV6;
break;
case IPPROTO_ICMPV6:
(*nk)->proto = IPPROTO_ICMP;
break;
default:
(*nk)->proto = pd->proto;
}
}
return (0);
}
struct pf_state_key *
pf_state_key_clone(const struct pf_state_key *orig)
{
struct pf_state_key *sk;
sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
if (sk == NULL)
return (NULL);
bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
return (sk);
}
int
pf_state_insert(struct pfi_kkif *kif, struct pfi_kkif *orig_kif,
struct pf_state_key *skw, struct pf_state_key *sks, struct pf_kstate *s)
{
struct pf_idhash *ih;
struct pf_kstate *cur;
int error;
NET_EPOCH_ASSERT();
KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
("%s: sks not pristine", __func__));
KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
("%s: skw not pristine", __func__));
KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
s->kif = kif;
s->orig_kif = orig_kif;
if (s->id == 0 && s->creatorid == 0) {
s->id = alloc_unr64(&V_pf_stateid);
s->id = htobe64(s->id);
s->creatorid = V_pf_status.hostid;
}
if ((error = pf_state_key_attach(skw, sks, s)) != 0)
return (error);
skw = sks = NULL;
ih = &V_pf_idhash[PF_IDHASH(s)];
PF_HASHROW_ASSERT(ih);
LIST_FOREACH(cur, &ih->states, entry)
if (cur->id == s->id && cur->creatorid == s->creatorid)
break;
if (cur != NULL) {
s->timeout = PFTM_UNLINKED;
PF_HASHROW_UNLOCK(ih);
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("pf: state ID collision: "
"id: %016llx creatorid: %08x\n",
(unsigned long long)be64toh(s->id),
ntohl(s->creatorid));
}
pf_detach_state(s);
return (EEXIST);
}
LIST_INSERT_HEAD(&ih->states, s, entry);
refcount_init(&s->refs, 2);
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_INSERT], 1);
if (V_pfsync_insert_state_ptr != NULL)
V_pfsync_insert_state_ptr(s);
return (0);
}
struct pf_kstate *
pf_find_state_byid(uint64_t id, uint32_t creatorid)
{
struct pf_idhash *ih;
struct pf_kstate *s;
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
ih = &V_pf_idhash[PF_IDHASHID(id)];
PF_HASHROW_LOCK(ih);
LIST_FOREACH(s, &ih->states, entry)
if (s->id == id && s->creatorid == creatorid)
break;
if (s == NULL)
PF_HASHROW_UNLOCK(ih);
return (s);
}
static int
pf_find_state(struct pf_pdesc *pd, const struct pf_state_key_cmp *key,
struct pf_kstate **state)
{
struct pf_keyhash *kh;
struct pf_state_key *sk;
struct pf_kstate *s;
int idx;
*state = NULL;
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
kh = &V_pf_keyhash[pf_hashkey((const struct pf_state_key *)key)];
PF_HASHROW_LOCK(kh);
LIST_FOREACH(sk, &kh->keys, entry)
if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
break;
if (sk == NULL) {
PF_HASHROW_UNLOCK(kh);
return (PF_DROP);
}
idx = (pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
if (s->kif == V_pfi_all || s->kif == pd->kif ||
s->orig_kif == pd->kif) {
PF_STATE_LOCK(s);
PF_HASHROW_UNLOCK(kh);
if (__predict_false(s->timeout >= PFTM_MAX)) {
PF_STATE_UNLOCK(s);
SDT_PROBE5(pf, ip, state, lookup, pd->kif,
key, (pd->dir), pd, *state);
return (PF_DROP);
}
goto out;
}
idx = idx == PF_SK_WIRE ? PF_SK_STACK : PF_SK_WIRE;
TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
if (s->key[PF_SK_WIRE]->af == s->key[PF_SK_STACK]->af)
continue;
if (s->kif == V_pfi_all || s->kif == pd->kif ||
s->orig_kif == pd->kif) {
PF_STATE_LOCK(s);
PF_HASHROW_UNLOCK(kh);
if (__predict_false(s->timeout >= PFTM_MAX)) {
PF_STATE_UNLOCK(s);
SDT_PROBE5(pf, ip, state, lookup, pd->kif,
key, (pd->dir), pd, NULL);
return (PF_DROP);
}
goto out;
}
}
PF_HASHROW_UNLOCK(kh);
out:
SDT_PROBE5(pf, ip, state, lookup, pd->kif, key, (pd->dir), pd, *state);
if (s == NULL || s->timeout == PFTM_PURGE) {
if (s)
PF_STATE_UNLOCK(s);
return (PF_DROP);
}
if ((s)->rule->pktrate.limit && pd->dir == (s)->direction) {
if (pf_check_threshold(&(s)->rule->pktrate)) {
PF_STATE_UNLOCK(s);
return (PF_DROP);
}
}
if (PACKET_LOOPED(pd)) {
PF_STATE_UNLOCK(s);
return (PF_PASS);
}
*state = s;
return (PF_MATCH);
}
struct pf_kstate *
pf_find_state_all(const struct pf_state_key_cmp *key, u_int dir, int *more)
{
struct pf_keyhash *kh;
struct pf_state_key *sk;
struct pf_kstate *s, *ret = NULL;
int idx, inout = 0;
if (more != NULL)
*more = 0;
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
kh = &V_pf_keyhash[pf_hashkey((const struct pf_state_key *)key)];
PF_HASHROW_LOCK(kh);
LIST_FOREACH(sk, &kh->keys, entry)
if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
break;
if (sk == NULL) {
PF_HASHROW_UNLOCK(kh);
return (NULL);
}
switch (dir) {
case PF_IN:
idx = PF_SK_WIRE;
break;
case PF_OUT:
idx = PF_SK_STACK;
break;
case PF_INOUT:
idx = PF_SK_WIRE;
inout = 1;
break;
default:
panic("%s: dir %u", __func__, dir);
}
second_run:
TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
if (more == NULL) {
PF_STATE_LOCK(s);
PF_HASHROW_UNLOCK(kh);
return (s);
}
if (ret)
(*more)++;
else {
ret = s;
PF_STATE_LOCK(s);
}
}
if (inout == 1) {
inout = 0;
idx = PF_SK_STACK;
goto second_run;
}
PF_HASHROW_UNLOCK(kh);
return (ret);
}
bool
pf_find_state_all_exists(const struct pf_state_key_cmp *key, u_int dir)
{
struct pf_kstate *s;
s = pf_find_state_all(key, dir, NULL);
if (s != NULL) {
PF_STATE_UNLOCK(s);
return (true);
}
return (false);
}
void
pf_state_peer_hton(const struct pf_state_peer *s, struct pf_state_peer_export *d)
{
d->seqlo = htonl(s->seqlo);
d->seqhi = htonl(s->seqhi);
d->seqdiff = htonl(s->seqdiff);
d->max_win = htons(s->max_win);
d->mss = htons(s->mss);
d->state = s->state;
d->wscale = s->wscale;
if (s->scrub) {
d->scrub.pfss_flags = htons(
s->scrub->pfss_flags & PFSS_TIMESTAMP);
d->scrub.pfss_ttl = (s)->scrub->pfss_ttl;
d->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);
d->scrub.scrub_flag = PF_SCRUB_FLAG_VALID;
}
}
void
pf_state_peer_ntoh(const struct pf_state_peer_export *s, struct pf_state_peer *d)
{
d->seqlo = ntohl(s->seqlo);
d->seqhi = ntohl(s->seqhi);
d->seqdiff = ntohl(s->seqdiff);
d->max_win = ntohs(s->max_win);
d->mss = ntohs(s->mss);
d->state = s->state;
d->wscale = s->wscale;
if (s->scrub.scrub_flag == PF_SCRUB_FLAG_VALID &&
d->scrub != NULL) {
d->scrub->pfss_flags = ntohs(s->scrub.pfss_flags) &
PFSS_TIMESTAMP;
d->scrub->pfss_ttl = s->scrub.pfss_ttl;
d->scrub->pfss_ts_mod = ntohl(s->scrub.pfss_ts_mod);
}
}
struct pf_udp_mapping *
pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port,
struct pf_addr *nat_addr, uint16_t nat_port)
{
struct pf_udp_mapping *mapping;
mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO);
if (mapping == NULL)
return (NULL);
pf_addrcpy(&mapping->endpoints[0].addr, src_addr, af);
mapping->endpoints[0].port = src_port;
mapping->endpoints[0].af = af;
mapping->endpoints[0].mapping = mapping;
pf_addrcpy(&mapping->endpoints[1].addr, nat_addr, af);
mapping->endpoints[1].port = nat_port;
mapping->endpoints[1].af = af;
mapping->endpoints[1].mapping = mapping;
refcount_init(&mapping->refs, 1);
return (mapping);
}
int
pf_udp_mapping_insert(struct pf_udp_mapping *mapping)
{
struct pf_udpendpointhash *h0, *h1;
struct pf_udp_endpoint *endpoint;
int ret = EEXIST;
h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
if (h0 == h1) {
PF_HASHROW_LOCK(h0);
} else if (h0 < h1) {
PF_HASHROW_LOCK(h0);
PF_HASHROW_LOCK(h1);
} else {
PF_HASHROW_LOCK(h1);
PF_HASHROW_LOCK(h0);
}
LIST_FOREACH(endpoint, &h0->endpoints, entry) {
if (bcmp(endpoint, &mapping->endpoints[0],
sizeof(struct pf_udp_endpoint_cmp)) == 0)
break;
}
if (endpoint != NULL)
goto cleanup;
LIST_FOREACH(endpoint, &h1->endpoints, entry) {
if (bcmp(endpoint, &mapping->endpoints[1],
sizeof(struct pf_udp_endpoint_cmp)) == 0)
break;
}
if (endpoint != NULL)
goto cleanup;
LIST_INSERT_HEAD(&h0->endpoints, &mapping->endpoints[0], entry);
LIST_INSERT_HEAD(&h1->endpoints, &mapping->endpoints[1], entry);
ret = 0;
cleanup:
if (h0 != h1) {
PF_HASHROW_UNLOCK(h0);
PF_HASHROW_UNLOCK(h1);
} else {
PF_HASHROW_UNLOCK(h0);
}
return (ret);
}
void
pf_udp_mapping_release(struct pf_udp_mapping *mapping)
{
struct pf_udpendpointhash *h0, *h1;
if (mapping == NULL)
return;
h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
PF_HASHROW_LOCK(h0);
if (refcount_release(&mapping->refs)) {
LIST_REMOVE(&mapping->endpoints[0], entry);
PF_HASHROW_UNLOCK(h0);
h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
PF_HASHROW_LOCK(h1);
LIST_REMOVE(&mapping->endpoints[1], entry);
PF_HASHROW_UNLOCK(h1);
uma_zfree(V_pf_udp_mapping_z, mapping);
} else {
PF_HASHROW_UNLOCK(h0);
}
}
struct pf_udp_mapping *
pf_udp_mapping_find(struct pf_udp_endpoint_cmp *key)
{
struct pf_udpendpointhash *uh;
struct pf_udp_endpoint *endpoint;
uh = &V_pf_udpendpointhash[pf_hashudpendpoint((struct pf_udp_endpoint*)key)];
PF_HASHROW_LOCK(uh);
LIST_FOREACH(endpoint, &uh->endpoints, entry) {
if (bcmp(endpoint, key, sizeof(struct pf_udp_endpoint_cmp)) == 0 &&
bcmp(endpoint, &endpoint->mapping->endpoints[0],
sizeof(struct pf_udp_endpoint_cmp)) == 0)
break;
}
if (endpoint == NULL) {
PF_HASHROW_UNLOCK(uh);
return (NULL);
}
refcount_acquire(&endpoint->mapping->refs);
PF_HASHROW_UNLOCK(uh);
return (endpoint->mapping);
}
static void
pf_send(struct pf_send_entry *pfse)
{
PF_SENDQ_LOCK();
STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
PF_SENDQ_UNLOCK();
swi_sched(V_pf_swi_cookie, 0);
}
static bool
pf_isforlocal(struct mbuf *m, int af)
{
switch (af) {
#ifdef INET
case AF_INET: {
struct ip *ip = mtod(m, struct ip *);
return (in_localip(ip->ip_dst));
}
#endif
#ifdef INET6
case AF_INET6: {
struct ip6_hdr *ip6;
struct in6_ifaddr *ia;
ip6 = mtod(m, struct ip6_hdr *);
ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 , false);
if (ia == NULL)
return (false);
return (! (ia->ia6_flags & IN6_IFF_NOTREADY));
}
#endif
default:
unhandled_af(af);
}
return (false);
}
int
pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type,
int *icmp_dir, u_int16_t *virtual_id, u_int16_t *virtual_type)
{
*icmp_dir = PF_OUT;
switch (pd->af) {
#ifdef INET
case AF_INET:
switch (type) {
case ICMP_ECHO:
*icmp_dir = PF_IN;
case ICMP_ECHOREPLY:
*virtual_type = ICMP_ECHO;
*virtual_id = pd->hdr.icmp.icmp_id;
break;
case ICMP_TSTAMP:
*icmp_dir = PF_IN;
case ICMP_TSTAMPREPLY:
*virtual_type = ICMP_TSTAMP;
*virtual_id = pd->hdr.icmp.icmp_id;
break;
case ICMP_IREQ:
*icmp_dir = PF_IN;
case ICMP_IREQREPLY:
*virtual_type = ICMP_IREQ;
*virtual_id = pd->hdr.icmp.icmp_id;
break;
case ICMP_MASKREQ:
*icmp_dir = PF_IN;
case ICMP_MASKREPLY:
*virtual_type = ICMP_MASKREQ;
*virtual_id = pd->hdr.icmp.icmp_id;
break;
case ICMP_IPV6_WHEREAREYOU:
*icmp_dir = PF_IN;
case ICMP_IPV6_IAMHERE:
*virtual_type = ICMP_IPV6_WHEREAREYOU;
*virtual_id = 0;
break;
case ICMP_MOBILE_REGREQUEST:
*icmp_dir = PF_IN;
case ICMP_MOBILE_REGREPLY:
*virtual_type = ICMP_MOBILE_REGREQUEST;
*virtual_id = 0;
break;
case ICMP_ROUTERSOLICIT:
*icmp_dir = PF_IN;
case ICMP_ROUTERADVERT:
*virtual_type = ICMP_ROUTERSOLICIT;
*virtual_id = 0;
break;
case ICMP_UNREACH:
case ICMP_SOURCEQUENCH:
case ICMP_REDIRECT:
case ICMP_TIMXCEED:
case ICMP_PARAMPROB:
*icmp_dir = PF_IN;
*virtual_type = type;
*virtual_id = 0;
*virtual_type = htons(*virtual_type);
return (1);
default:
*icmp_dir = PF_IN;
*virtual_type = type;
*virtual_id = 0;
break;
}
break;
#endif
#ifdef INET6
case AF_INET6:
switch (type) {
case ICMP6_ECHO_REQUEST:
*icmp_dir = PF_IN;
case ICMP6_ECHO_REPLY:
*virtual_type = ICMP6_ECHO_REQUEST;
*virtual_id = pd->hdr.icmp6.icmp6_id;
break;
case MLD_LISTENER_QUERY:
case MLD_LISTENER_REPORT: {
*icmp_dir = PF_IN;
*virtual_type = MLD_LISTENER_QUERY;
*virtual_id = 0;
break;
}
case MLD_MTRACE:
*icmp_dir = PF_IN;
case MLD_MTRACE_RESP:
*virtual_type = MLD_MTRACE;
*virtual_id = 0;
break;
case ND_NEIGHBOR_SOLICIT:
*icmp_dir = PF_IN;
case ND_NEIGHBOR_ADVERT: {
*virtual_type = ND_NEIGHBOR_SOLICIT;
*virtual_id = 0;
break;
}
case ICMP6_DST_UNREACH:
case ICMP6_PACKET_TOO_BIG:
case ICMP6_TIME_EXCEEDED:
case ICMP6_PARAM_PROB:
*icmp_dir = PF_IN;
*virtual_type = type;
*virtual_id = 0;
*virtual_type = htons(*virtual_type);
return (1);
default:
*icmp_dir = PF_IN;
*virtual_type = type;
*virtual_id = 0;
break;
}
break;
#endif
default:
unhandled_af(pd->af);
}
*virtual_type = htons(*virtual_type);
return (0);
}
void
pf_intr(void *v)
{
struct epoch_tracker et;
struct pf_send_head queue;
struct pf_send_entry *pfse, *next;
CURVNET_SET((struct vnet *)v);
PF_SENDQ_LOCK();
queue = V_pf_sendqueue;
STAILQ_INIT(&V_pf_sendqueue);
PF_SENDQ_UNLOCK();
NET_EPOCH_ENTER(et);
STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
switch (pfse->pfse_type) {
#ifdef INET
case PFSE_IP: {
if (pf_isforlocal(pfse->pfse_m, AF_INET)) {
KASSERT(pfse->pfse_m->m_pkthdr.rcvif == V_loif,
("%s: rcvif != loif", __func__));
pfse->pfse_m->m_flags |= M_SKIP_FIREWALL;
pfse->pfse_m->m_pkthdr.csum_flags |=
CSUM_IP_VALID | CSUM_IP_CHECKED |
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
pfse->pfse_m->m_pkthdr.csum_data = 0xffff;
ip_input(pfse->pfse_m);
} else {
ip_output(pfse->pfse_m, NULL, NULL, 0, NULL,
NULL);
}
break;
}
case PFSE_ICMP:
icmp_error(pfse->pfse_m, pfse->icmpopts.type,
pfse->icmpopts.code, 0, pfse->icmpopts.mtu);
break;
#endif
#ifdef INET6
case PFSE_IP6:
if (pf_isforlocal(pfse->pfse_m, AF_INET6)) {
KASSERT(pfse->pfse_m->m_pkthdr.rcvif == V_loif,
("%s: rcvif != loif", __func__));
pfse->pfse_m->m_flags |= M_SKIP_FIREWALL |
M_LOOP;
pfse->pfse_m->m_pkthdr.csum_flags |=
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
pfse->pfse_m->m_pkthdr.csum_data = 0xffff;
ip6_input(pfse->pfse_m);
} else {
ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL,
NULL, NULL);
}
break;
case PFSE_ICMP6:
icmp6_error(pfse->pfse_m, pfse->icmpopts.type,
pfse->icmpopts.code, pfse->icmpopts.mtu);
break;
#endif
default:
panic("%s: unknown type", __func__);
}
free(pfse, M_PFTEMP);
}
NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
}
#define pf_purge_thread_period (hz / 10)
#ifdef PF_WANT_32_TO_64_COUNTER
static void
pf_status_counter_u64_periodic(void)
{
PF_RULES_RASSERT();
if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 60)) != 0) {
return;
}
for (int i = 0; i < FCNT_MAX; i++) {
pf_counter_u64_periodic(&V_pf_status.fcounters[i]);
}
}
static void
pf_kif_counter_u64_periodic(void)
{
struct pfi_kkif *kif;
size_t r, run;
PF_RULES_RASSERT();
if (__predict_false(V_pf_allkifcount == 0)) {
return;
}
if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) {
return;
}
run = V_pf_allkifcount / 10;
if (run < 5)
run = 5;
for (r = 0; r < run; r++) {
kif = LIST_NEXT(V_pf_kifmarker, pfik_allkiflist);
if (kif == NULL) {
LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
LIST_INSERT_HEAD(&V_pf_allkiflist, V_pf_kifmarker, pfik_allkiflist);
break;
}
LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
LIST_INSERT_AFTER(kif, V_pf_kifmarker, pfik_allkiflist);
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 2; j++) {
for (int k = 0; k < 2; k++) {
pf_counter_u64_periodic(&kif->pfik_packets[i][j][k]);
pf_counter_u64_periodic(&kif->pfik_bytes[i][j][k]);
}
}
}
}
}
static void
pf_rule_counter_u64_periodic(void)
{
struct pf_krule *rule;
size_t r, run;
PF_RULES_RASSERT();
if (__predict_false(V_pf_allrulecount == 0)) {
return;
}
if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) {
return;
}
run = V_pf_allrulecount / 10;
if (run < 5)
run = 5;
for (r = 0; r < run; r++) {
rule = LIST_NEXT(V_pf_rulemarker, allrulelist);
if (rule == NULL) {
LIST_REMOVE(V_pf_rulemarker, allrulelist);
LIST_INSERT_HEAD(&V_pf_allrulelist, V_pf_rulemarker, allrulelist);
break;
}
LIST_REMOVE(V_pf_rulemarker, allrulelist);
LIST_INSERT_AFTER(rule, V_pf_rulemarker, allrulelist);
pf_counter_u64_periodic(&rule->evaluations);
for (int i = 0; i < 2; i++) {
pf_counter_u64_periodic(&rule->packets[i]);
pf_counter_u64_periodic(&rule->bytes[i]);
}
}
}
static void
pf_counter_u64_periodic_main(void)
{
PF_RULES_RLOCK_TRACKER;
V_pf_counter_periodic_iter++;
PF_RULES_RLOCK();
pf_counter_u64_critical_enter();
pf_status_counter_u64_periodic();
pf_kif_counter_u64_periodic();
pf_rule_counter_u64_periodic();
pf_counter_u64_critical_exit();
PF_RULES_RUNLOCK();
}
#else
#define pf_counter_u64_periodic_main() do { } while (0)
#endif
void
pf_purge_thread(void *unused __unused)
{
struct epoch_tracker et;
VNET_ITERATOR_DECL(vnet_iter);
sx_xlock(&pf_end_lock);
while (pf_end_threads == 0) {
sx_sleep(pf_purge_thread, &pf_end_lock, 0, "pftm", pf_purge_thread_period);
VNET_LIST_RLOCK();
NET_EPOCH_ENTER(et);
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
if (V_pf_vnet_active == 0) {
CURVNET_RESTORE();
continue;
}
pf_counter_u64_periodic_main();
V_pf_purge_idx =
pf_purge_expired_states(V_pf_purge_idx, V_pf_hashmask /
(V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
if (V_pf_purge_idx == 0) {
pf_purge_expired_fragments();
pf_purge_expired_src_nodes();
pf_purge_unlinked_rules();
pfi_kkif_purge();
}
CURVNET_RESTORE();
}
NET_EPOCH_EXIT(et);
VNET_LIST_RUNLOCK();
}
pf_end_threads++;
sx_xunlock(&pf_end_lock);
kproc_exit(0);
}
void
pf_unload_vnet_purge(void)
{
pf_purge_unlinked_rules();
pfi_kkif_purge();
pf_purge_expired_states(0, V_pf_hashmask);
pf_purge_fragments(UINT_MAX);
pf_purge_expired_src_nodes();
pf_purge_unlinked_rules();
pfi_kkif_purge();
}
u_int32_t
pf_state_expires(const struct pf_kstate *state)
{
u_int32_t timeout;
u_int32_t start;
u_int32_t end;
u_int32_t states;
if (state->timeout == PFTM_PURGE)
return (time_uptime);
KASSERT(state->timeout != PFTM_UNLINKED,
("pf_state_expires: timeout == PFTM_UNLINKED"));
KASSERT((state->timeout < PFTM_MAX),
("pf_state_expires: timeout > PFTM_MAX"));
timeout = state->rule->timeout[state->timeout];
if (!timeout)
timeout = V_pf_default_rule.timeout[state->timeout];
start = state->rule->timeout[PFTM_ADAPTIVE_START];
if (start && state->rule != &V_pf_default_rule) {
end = state->rule->timeout[PFTM_ADAPTIVE_END];
states = counter_u64_fetch(state->rule->states_cur);
} else {
start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
states = V_pf_status.states;
}
if (end && states > start && start < end) {
if (states < end) {
timeout = (u_int64_t)timeout * (end - states) /
(end - start);
return ((state->expire / 1000) + timeout);
}
else
return (time_uptime);
}
return ((state->expire / 1000) + timeout);
}
void
pf_purge_expired_src_nodes(void)
{
struct pf_ksrc_node_list freelist;
struct pf_srchash *sh;
struct pf_ksrc_node *cur, *next;
int i;
LIST_INIT(&freelist);
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
PF_HASHROW_LOCK(sh);
LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
if (cur->states == 0 && cur->expire <= time_uptime) {
pf_unlink_src_node(cur);
LIST_INSERT_HEAD(&freelist, cur, entry);
} else if (cur->rule != NULL)
cur->rule->rule_ref |= PFRULE_REFS;
PF_HASHROW_UNLOCK(sh);
}
pf_free_src_nodes(&freelist);
V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z);
}
static void
pf_src_tree_remove_state(struct pf_kstate *s)
{
uint32_t timeout;
timeout = s->rule->timeout[PFTM_SRC_NODE] ?
s->rule->timeout[PFTM_SRC_NODE] :
V_pf_default_rule.timeout[PFTM_SRC_NODE];
for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) {
if (s->sns[sn_type] == NULL)
continue;
PF_SRC_NODE_LOCK(s->sns[sn_type]);
if (sn_type == PF_SN_LIMIT && s->src.tcp_est)
--(s->sns[sn_type]->conn);
if (--(s->sns[sn_type]->states) == 0)
s->sns[sn_type]->expire = time_uptime + timeout;
PF_SRC_NODE_UNLOCK(s->sns[sn_type]);
s->sns[sn_type] = NULL;
}
}
int
pf_remove_state(struct pf_kstate *s)
{
struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
NET_EPOCH_ASSERT();
PF_HASHROW_ASSERT(ih);
if (s->timeout == PFTM_UNLINKED) {
PF_HASHROW_UNLOCK(ih);
return (0);
}
if (s->src.state == PF_TCPS_PROXY_DST) {
pf_send_tcp(s->rule, s->key[PF_SK_WIRE]->af,
&s->key[PF_SK_WIRE]->addr[1],
&s->key[PF_SK_WIRE]->addr[0],
s->key[PF_SK_WIRE]->port[1],
s->key[PF_SK_WIRE]->port[0],
s->src.seqhi, s->src.seqlo + 1,
TH_RST|TH_ACK, 0, 0, 0, M_SKIP_FIREWALL, s->tag, 0,
s->act.rtableid, NULL);
}
LIST_REMOVE(s, entry);
pf_src_tree_remove_state(s);
if (V_pfsync_delete_state_ptr != NULL)
V_pfsync_delete_state_ptr(s);
STATE_DEC_COUNTERS(s);
s->timeout = PFTM_UNLINKED;
if (s->key[PF_SK_STACK] != NULL &&
s->key[PF_SK_STACK]->proto == IPPROTO_TCP)
pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED);
PF_HASHROW_UNLOCK(ih);
pf_detach_state(s);
pf_udp_mapping_release(s->udp_mapping);
return (pf_release_staten(s, 2));
}
struct pf_kstate *
pf_alloc_state(int flags)
{
return (uma_zalloc(V_pf_state_z, flags | M_ZERO));
}
static __inline void
pf_free_match_rules(struct pf_krule_slist *match_rules) {
struct pf_krule_item *ri;
while ((ri = SLIST_FIRST(match_rules))) {
SLIST_REMOVE_HEAD(match_rules, entry);
free(ri, M_PF_RULE_ITEM);
}
}
void
pf_free_state(struct pf_kstate *cur)
{
KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
cur->timeout));
pf_free_match_rules(&(cur->match_rules));
pf_normalize_tcp_cleanup(cur);
uma_zfree(V_pf_state_z, cur);
pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1);
}
static u_int
pf_purge_expired_states(u_int i, int maxcheck)
{
struct pf_idhash *ih;
struct pf_kstate *s;
struct pf_krule_item *mrm;
size_t count __unused;
V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
while (maxcheck > 0) {
count = 0;
ih = &V_pf_idhash[i];
if (!LIST_EMPTY(&ih->states)) {
relock:
PF_HASHROW_LOCK(ih);
LIST_FOREACH(s, &ih->states, entry) {
if (pf_state_expires(s) <= time_uptime) {
V_pf_status.states -=
pf_remove_state(s);
goto relock;
}
s->rule->rule_ref |= PFRULE_REFS;
if (s->nat_rule != NULL)
s->nat_rule->rule_ref |= PFRULE_REFS;
if (s->anchor != NULL)
s->anchor->rule_ref |= PFRULE_REFS;
s->kif->pfik_flags |= PFI_IFLAG_REFS;
SLIST_FOREACH(mrm, &s->match_rules, entry)
mrm->r->rule_ref |= PFRULE_REFS;
if (s->act.rt_kif)
s->act.rt_kif->pfik_flags |= PFI_IFLAG_REFS;
count++;
}
PF_HASHROW_UNLOCK(ih);
}
SDT_PROBE2(pf, purge, state, rowcount, i, count);
if (++i > V_pf_hashmask) {
V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
return (0);
}
maxcheck--;
}
V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
return (i);
}
static void
pf_purge_unlinked_rules(void)
{
struct pf_krulequeue tmpq;
struct pf_krule *r, *r1;
PF_OVERLOADQ_LOCK();
if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
PF_OVERLOADQ_UNLOCK();
return;
}
PF_OVERLOADQ_UNLOCK();
TAILQ_INIT(&tmpq);
PF_UNLNKDRULES_LOCK();
TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
if (!(r->rule_ref & PFRULE_REFS)) {
TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
TAILQ_INSERT_TAIL(&tmpq, r, entries);
} else
r->rule_ref &= ~PFRULE_REFS;
}
PF_UNLNKDRULES_UNLOCK();
if (!TAILQ_EMPTY(&tmpq)) {
PF_CONFIG_LOCK();
PF_RULES_WLOCK();
TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
TAILQ_REMOVE(&tmpq, r, entries);
pf_free_rule(r);
}
PF_RULES_WUNLOCK();
PF_CONFIG_UNLOCK();
}
}
void
pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
{
switch (af) {
#ifdef INET
case AF_INET: {
u_int32_t a = ntohl(addr->addr32[0]);
printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
(a>>8)&255, a&255);
if (p) {
p = ntohs(p);
printf(":%u", p);
}
break;
}
#endif
#ifdef INET6
case AF_INET6: {
u_int16_t b;
u_int8_t i, curstart, curend, maxstart, maxend;
curstart = curend = maxstart = maxend = 255;
for (i = 0; i < 8; i++) {
if (!addr->addr16[i]) {
if (curstart == 255)
curstart = i;
curend = i;
} else {
if ((curend - curstart) >
(maxend - maxstart)) {
maxstart = curstart;
maxend = curend;
}
curstart = curend = 255;
}
}
if ((curend - curstart) >
(maxend - maxstart)) {
maxstart = curstart;
maxend = curend;
}
for (i = 0; i < 8; i++) {
if (i >= maxstart && i <= maxend) {
if (i == 0)
printf(":");
if (i == maxend)
printf(":");
} else {
b = ntohs(addr->addr16[i]);
printf("%x", b);
if (i < 7)
printf(":");
}
}
if (p) {
p = ntohs(p);
printf("[%u]", p);
}
break;
}
#endif
default:
unhandled_af(af);
}
}
void
pf_print_state(struct pf_kstate *s)
{
pf_print_state_parts(s, NULL, NULL);
}
static void
pf_print_state_parts(struct pf_kstate *s,
struct pf_state_key *skwp, struct pf_state_key *sksp)
{
struct pf_state_key *skw, *sks;
u_int8_t proto, dir;
skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
proto = skw ? skw->proto : (sks ? sks->proto : 0);
dir = s ? s->direction : 0;
switch (proto) {
case IPPROTO_IPV4:
printf("IPv4");
break;
case IPPROTO_IPV6:
printf("IPv6");
break;
case IPPROTO_TCP:
printf("TCP");
break;
case IPPROTO_UDP:
printf("UDP");
break;
case IPPROTO_ICMP:
printf("ICMP");
break;
case IPPROTO_ICMPV6:
printf("ICMPv6");
break;
default:
printf("%u", proto);
break;
}
switch (dir) {
case PF_IN:
printf(" in");
break;
case PF_OUT:
printf(" out");
break;
}
if (skw) {
printf(" wire: ");
pf_print_host(&skw->addr[0], skw->port[0], skw->af);
printf(" ");
pf_print_host(&skw->addr[1], skw->port[1], skw->af);
}
if (sks) {
printf(" stack: ");
if (sks != skw) {
pf_print_host(&sks->addr[0], sks->port[0], sks->af);
printf(" ");
pf_print_host(&sks->addr[1], sks->port[1], sks->af);
} else
printf("-");
}
if (s) {
if (proto == IPPROTO_TCP) {
printf(" [lo=%u high=%u win=%u modulator=%u",
s->src.seqlo, s->src.seqhi,
s->src.max_win, s->src.seqdiff);
if (s->src.wscale && s->dst.wscale)
printf(" wscale=%u",
s->src.wscale & PF_WSCALE_MASK);
printf("]");
printf(" [lo=%u high=%u win=%u modulator=%u",
s->dst.seqlo, s->dst.seqhi,
s->dst.max_win, s->dst.seqdiff);
if (s->src.wscale && s->dst.wscale)
printf(" wscale=%u",
s->dst.wscale & PF_WSCALE_MASK);
printf("]");
}
printf(" %u:%u", s->src.state, s->dst.state);
if (s->rule)
printf(" @%d", s->rule->nr);
}
}
void
pf_print_flags(uint16_t f)
{
if (f)
printf(" ");
if (f & TH_FIN)
printf("F");
if (f & TH_SYN)
printf("S");
if (f & TH_RST)
printf("R");
if (f & TH_PUSH)
printf("P");
if (f & TH_ACK)
printf("A");
if (f & TH_URG)
printf("U");
if (f & TH_ECE)
printf("E");
if (f & TH_CWR)
printf("W");
if (f & TH_AE)
printf("e");
}
#define PF_SET_SKIP_STEPS(i) \
do { \
while (head[i] != cur) { \
head[i]->skip[i] = cur; \
head[i] = TAILQ_NEXT(head[i], entries); \
} \
} while (0)
void
pf_calc_skip_steps(struct pf_krulequeue *rules)
{
struct pf_krule *cur, *prev, *head[PF_SKIP_COUNT];
int i;
cur = TAILQ_FIRST(rules);
prev = cur;
for (i = 0; i < PF_SKIP_COUNT; ++i)
head[i] = cur;
while (cur != NULL) {
if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
PF_SET_SKIP_STEPS(PF_SKIP_IFP);
if (cur->direction != prev->direction)
PF_SET_SKIP_STEPS(PF_SKIP_DIR);
if (cur->af != prev->af)
PF_SET_SKIP_STEPS(PF_SKIP_AF);
if (cur->proto != prev->proto)
PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
if (cur->src.neg != prev->src.neg ||
pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
if (cur->dst.neg != prev->dst.neg ||
pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
if (cur->src.port[0] != prev->src.port[0] ||
cur->src.port[1] != prev->src.port[1] ||
cur->src.port_op != prev->src.port_op)
PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
if (cur->dst.port[0] != prev->dst.port[0] ||
cur->dst.port[1] != prev->dst.port[1] ||
cur->dst.port_op != prev->dst.port_op)
PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
prev = cur;
cur = TAILQ_NEXT(cur, entries);
}
for (i = 0; i < PF_SKIP_COUNT; ++i)
PF_SET_SKIP_STEPS(i);
}
int
pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
{
if (aw1->type != aw2->type)
return (1);
switch (aw1->type) {
case PF_ADDR_ADDRMASK:
case PF_ADDR_RANGE:
if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
return (1);
if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
return (1);
return (0);
case PF_ADDR_DYNIFTL:
return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
case PF_ADDR_NONE:
case PF_ADDR_NOROUTE:
case PF_ADDR_URPFFAILED:
return (0);
case PF_ADDR_TABLE:
return (aw1->p.tbl != aw2->p.tbl);
default:
printf("invalid address type: %d\n", aw1->type);
return (1);
}
}
u_int16_t
pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
{
u_int32_t x;
x = cksum + old - new;
x = (x + (x >> 16)) & 0xffff;
if (udp && cksum == 0x0000)
return cksum;
if (udp && x == 0x0000)
x = 0xffff;
return (u_int16_t)(x);
}
static int
pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi)
{
int rewrite = 0;
if (*f != v) {
uint16_t old = htons(hi ? (*f << 8) : *f);
uint16_t new = htons(hi ? ( v << 8) : v);
*f = v;
if (! (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA |
CSUM_DELAY_DATA_IPV6)))
*pd->pcksum = pf_cksum_fixup(*pd->pcksum, old, new,
pd->proto == IPPROTO_UDP);
rewrite = 1;
}
return (rewrite);
}
int
pf_patch_16(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi)
{
int rewrite = 0;
u_int8_t *fb = (u_int8_t *)f;
u_int8_t *vb = (u_int8_t *)&v;
rewrite += pf_patch_8(pd, fb++, *vb++, hi);
rewrite += pf_patch_8(pd, fb++, *vb++, !hi);
return (rewrite);
}
int
pf_patch_32(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi)
{
int rewrite = 0;
u_int8_t *fb = (u_int8_t *)f;
u_int8_t *vb = (u_int8_t *)&v;
rewrite += pf_patch_8(pd, fb++, *vb++, hi);
rewrite += pf_patch_8(pd, fb++, *vb++, !hi);
rewrite += pf_patch_8(pd, fb++, *vb++, hi);
rewrite += pf_patch_8(pd, fb++, *vb++, !hi);
return (rewrite);
}
u_int16_t
pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
u_int16_t new, u_int8_t udp)
{
if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
return (cksum);
return (pf_cksum_fixup(cksum, old, new, udp));
}
static void
pf_change_ap(struct pf_pdesc *pd, struct pf_addr *a, u_int16_t *p,
struct pf_addr *an, u_int16_t pn)
{
struct pf_addr ao;
u_int16_t po;
uint8_t u = pd->virtual_proto == IPPROTO_UDP;
MPASS(pd->pcksum != NULL);
if (pd->af == AF_INET) {
MPASS(pd->ip_sum);
}
pf_addrcpy(&ao, a, pd->af);
if (pd->af == pd->naf)
pf_addrcpy(a, an, pd->af);
if (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
*pd->pcksum = ~*pd->pcksum;
if (p == NULL)
return;
po = *p;
*p = pn;
switch (pd->af) {
#ifdef INET
case AF_INET:
switch (pd->naf) {
case AF_INET:
*pd->ip_sum = pf_cksum_fixup(pf_cksum_fixup(*pd->ip_sum,
ao.addr16[0], an->addr16[0], 0),
ao.addr16[1], an->addr16[1], 0);
*p = pn;
*pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum,
ao.addr16[0], an->addr16[0], u),
ao.addr16[1], an->addr16[1], u);
*pd->pcksum = pf_proto_cksum_fixup(pd->m, *pd->pcksum, po, pn, u);
break;
#ifdef INET6
case AF_INET6:
*pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum,
ao.addr16[0], an->addr16[0], u),
ao.addr16[1], an->addr16[1], u),
0, an->addr16[2], u),
0, an->addr16[3], u),
0, an->addr16[4], u),
0, an->addr16[5], u),
0, an->addr16[6], u),
0, an->addr16[7], u),
po, pn, u);
break;
#endif
default:
unhandled_af(pd->naf);
}
break;
#endif
#ifdef INET6
case AF_INET6:
switch (pd->naf) {
#ifdef INET
case AF_INET:
*pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum,
ao.addr16[0], an->addr16[0], u),
ao.addr16[1], an->addr16[1], u),
ao.addr16[2], 0, u),
ao.addr16[3], 0, u),
ao.addr16[4], 0, u),
ao.addr16[5], 0, u),
ao.addr16[6], 0, u),
ao.addr16[7], 0, u),
po, pn, u);
break;
#endif
case AF_INET6:
*pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum,
ao.addr16[0], an->addr16[0], u),
ao.addr16[1], an->addr16[1], u),
ao.addr16[2], an->addr16[2], u),
ao.addr16[3], an->addr16[3], u),
ao.addr16[4], an->addr16[4], u),
ao.addr16[5], an->addr16[5], u),
ao.addr16[6], an->addr16[6], u),
ao.addr16[7], an->addr16[7], u);
*pd->pcksum = pf_proto_cksum_fixup(pd->m, *pd->pcksum, po, pn, u);
break;
default:
unhandled_af(pd->naf);
}
break;
#endif
default:
unhandled_af(pd->af);
}
if (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA |
CSUM_DELAY_DATA_IPV6)) {
*pd->pcksum = ~*pd->pcksum;
if (! *pd->pcksum)
*pd->pcksum = 0xffff;
}
}
void
pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
{
u_int32_t ao;
memcpy(&ao, a, sizeof(ao));
memcpy(a, &an, sizeof(u_int32_t));
*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
ao % 65536, an % 65536, u);
}
void
pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
{
u_int32_t ao;
memcpy(&ao, a, sizeof(ao));
memcpy(a, &an, sizeof(u_int32_t));
*c = pf_proto_cksum_fixup(m,
pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
ao % 65536, an % 65536, udp);
}
#ifdef INET6
static void
pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
{
struct pf_addr ao;
pf_addrcpy(&ao, a, AF_INET6);
pf_addrcpy(a, an, AF_INET6);
*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(*c,
ao.addr16[0], an->addr16[0], u),
ao.addr16[1], an->addr16[1], u),
ao.addr16[2], an->addr16[2], u),
ao.addr16[3], an->addr16[3], u),
ao.addr16[4], an->addr16[4], u),
ao.addr16[5], an->addr16[5], u),
ao.addr16[6], an->addr16[6], u),
ao.addr16[7], an->addr16[7], u);
}
#endif
static void
pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
{
struct pf_addr oia, ooa;
pf_addrcpy(&oia, ia, af);
if (oa)
pf_addrcpy(&ooa, oa, af);
if (ip != NULL) {
u_int16_t oip = *ip;
u_int32_t opc;
if (pc != NULL)
opc = *pc;
*ip = np;
if (pc != NULL)
*pc = pf_cksum_fixup(*pc, oip, *ip, u);
*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
if (pc != NULL)
*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
}
pf_addrcpy(ia, na, af);
switch (af) {
#ifdef INET
case AF_INET: {
u_int32_t oh2c = *h2c;
*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
oia.addr16[0], ia->addr16[0], 0),
oia.addr16[1], ia->addr16[1], 0);
*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
oia.addr16[0], ia->addr16[0], 0),
oia.addr16[1], ia->addr16[1], 0);
*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
break;
}
#endif
#ifdef INET6
case AF_INET6:
*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(*ic,
oia.addr16[0], ia->addr16[0], u),
oia.addr16[1], ia->addr16[1], u),
oia.addr16[2], ia->addr16[2], u),
oia.addr16[3], ia->addr16[3], u),
oia.addr16[4], ia->addr16[4], u),
oia.addr16[5], ia->addr16[5], u),
oia.addr16[6], ia->addr16[6], u),
oia.addr16[7], ia->addr16[7], u);
break;
#endif
}
if (oa) {
pf_addrcpy(oa, na, af);
switch (af) {
#ifdef INET
case AF_INET:
*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
ooa.addr16[0], oa->addr16[0], 0),
ooa.addr16[1], oa->addr16[1], 0);
break;
#endif
#ifdef INET6
case AF_INET6:
*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(*ic,
ooa.addr16[0], oa->addr16[0], u),
ooa.addr16[1], oa->addr16[1], u),
ooa.addr16[2], oa->addr16[2], u),
ooa.addr16[3], oa->addr16[3], u),
ooa.addr16[4], oa->addr16[4], u),
ooa.addr16[5], oa->addr16[5], u),
ooa.addr16[6], oa->addr16[6], u),
ooa.addr16[7], oa->addr16[7], u);
break;
#endif
}
}
}
int
pf_translate_af(struct pf_pdesc *pd)
{
#if defined(INET) && defined(INET6)
struct mbuf *mp;
struct ip *ip4;
struct ip6_hdr *ip6;
struct icmp6_hdr *icmp;
struct m_tag *mtag;
struct pf_fragment_tag *ftag;
int hlen;
hlen = pd->naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
m_adj(pd->m, pd->off);
M_PREPEND(pd->m, hlen, M_NOWAIT);
if (pd->m == NULL)
return (-1);
switch (pd->naf) {
case AF_INET:
ip4 = mtod(pd->m, struct ip *);
bzero(ip4, hlen);
ip4->ip_v = IPVERSION;
ip4->ip_hl = hlen >> 2;
ip4->ip_tos = pd->tos;
ip4->ip_len = htons(hlen + (pd->tot_len - pd->off));
ip_fillid(ip4, V_ip_random_id);
ip4->ip_ttl = pd->ttl;
ip4->ip_p = pd->proto;
ip4->ip_src = pd->nsaddr.v4;
ip4->ip_dst = pd->ndaddr.v4;
pd->src = (struct pf_addr *)&ip4->ip_src;
pd->dst = (struct pf_addr *)&ip4->ip_dst;
pd->off = sizeof(struct ip);
if (pd->m->m_pkthdr.csum_flags & CSUM_TCP_IPV6) {
pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP_IPV6;
pd->m->m_pkthdr.csum_flags |= CSUM_TCP;
}
if (pd->m->m_pkthdr.csum_flags & CSUM_UDP_IPV6) {
pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP_IPV6;
pd->m->m_pkthdr.csum_flags |= CSUM_UDP;
}
if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
pd->m->m_pkthdr.csum_flags |= CSUM_SCTP;
}
break;
case AF_INET6:
ip6 = mtod(pd->m, struct ip6_hdr *);
bzero(ip6, hlen);
ip6->ip6_vfc = IPV6_VERSION;
ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20);
ip6->ip6_plen = htons(pd->tot_len - pd->off);
ip6->ip6_nxt = pd->proto;
if (!pd->ttl || pd->ttl > IPV6_DEFHLIM)
ip6->ip6_hlim = IPV6_DEFHLIM;
else
ip6->ip6_hlim = pd->ttl;
ip6->ip6_src = pd->nsaddr.v6;
ip6->ip6_dst = pd->ndaddr.v6;
pd->src = (struct pf_addr *)&ip6->ip6_src;
pd->dst = (struct pf_addr *)&ip6->ip6_dst;
pd->off = sizeof(struct ip6_hdr);
if (pd->m->m_pkthdr.csum_flags & CSUM_TCP) {
pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP;
pd->m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
}
if (pd->m->m_pkthdr.csum_flags & CSUM_UDP) {
pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP;
pd->m->m_pkthdr.csum_flags |= CSUM_UDP_IPV6;
}
if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP) {
pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
pd->m->m_pkthdr.csum_flags |= CSUM_SCTP_IPV6;
}
mtag = m_tag_find(pd->m, PACKET_TAG_PF_REASSEMBLED, NULL);
if (mtag) {
ftag = (struct pf_fragment_tag *)(mtag + 1);
ftag->ft_hdrlen = sizeof(*ip6);
ftag->ft_maxlen -= sizeof(struct ip6_hdr) -
sizeof(struct ip) + sizeof(struct ip6_frag);
}
break;
default:
return (-1);
}
if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) {
int off;
if ((mp = m_pulldown(pd->m, hlen, sizeof(*icmp), &off)) ==
NULL) {
pd->m = NULL;
return (-1);
}
icmp = (struct icmp6_hdr *)(mp->m_data + off);
icmp->icmp6_cksum = 0;
icmp->icmp6_cksum = pd->naf == AF_INET ?
in4_cksum(pd->m, 0, hlen, ntohs(ip4->ip_len) - hlen) :
in6_cksum(pd->m, IPPROTO_ICMPV6, hlen,
ntohs(ip6->ip6_plen));
}
#endif
return (0);
}
int
pf_change_icmp_af(struct mbuf *m, int off, struct pf_pdesc *pd,
struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst,
sa_family_t af, sa_family_t naf)
{
#if defined(INET) && defined(INET6)
struct mbuf *n = NULL;
struct ip *ip4;
struct ip6_hdr *ip6;
int hlen, olen, mlen;
if (af == naf || (af != AF_INET && af != AF_INET6) ||
(naf != AF_INET && naf != AF_INET6))
return (-1);
if ((n = m_split(m, off, M_NOWAIT)) == NULL)
return (-1);
olen = pd2->off - off;
hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
m_adj(n, olen);
M_PREPEND(n, hlen, M_NOWAIT);
if (n == NULL)
return (-1);
switch (naf) {
case AF_INET:
ip4 = mtod(n, struct ip *);
bzero(ip4, sizeof(*ip4));
ip4->ip_v = IPVERSION;
ip4->ip_hl = sizeof(*ip4) >> 2;
ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen);
ip_fillid(ip4, V_ip_random_id);
ip4->ip_off = htons(IP_DF);
ip4->ip_ttl = pd2->ttl;
if (pd2->proto == IPPROTO_ICMPV6)
ip4->ip_p = IPPROTO_ICMP;
else
ip4->ip_p = pd2->proto;
ip4->ip_src = src->v4;
ip4->ip_dst = dst->v4;
ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2);
break;
case AF_INET6:
ip6 = mtod(n, struct ip6_hdr *);
bzero(ip6, sizeof(*ip6));
ip6->ip6_vfc = IPV6_VERSION;
ip6->ip6_plen = htons(pd2->tot_len - olen);
if (pd2->proto == IPPROTO_ICMP)
ip6->ip6_nxt = IPPROTO_ICMPV6;
else
ip6->ip6_nxt = pd2->proto;
if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM)
ip6->ip6_hlim = IPV6_DEFHLIM;
else
ip6->ip6_hlim = pd2->ttl;
ip6->ip6_src = src->v6;
ip6->ip6_dst = dst->v6;
break;
default:
unhandled_af(naf);
}
pd2->off += hlen - olen;
pd->tot_len += hlen - olen;
mlen = n->m_pkthdr.len;
m_cat(m, n);
m->m_pkthdr.len += mlen;
#endif
return (0);
}
#define PTR_IP(field) (offsetof(struct ip, field))
#define PTR_IP6(field) (offsetof(struct ip6_hdr, field))
int
pf_translate_icmp_af(int af, void *arg)
{
#if defined(INET) && defined(INET6)
struct icmp *icmp4;
struct icmp6_hdr *icmp6;
u_int32_t mtu;
int32_t ptr = -1;
u_int8_t type;
u_int8_t code;
switch (af) {
case AF_INET:
icmp6 = arg;
type = icmp6->icmp6_type;
code = icmp6->icmp6_code;
mtu = ntohl(icmp6->icmp6_mtu);
switch (type) {
case ICMP6_ECHO_REQUEST:
type = ICMP_ECHO;
break;
case ICMP6_ECHO_REPLY:
type = ICMP_ECHOREPLY;
break;
case ICMP6_DST_UNREACH:
type = ICMP_UNREACH;
switch (code) {
case ICMP6_DST_UNREACH_NOROUTE:
case ICMP6_DST_UNREACH_BEYONDSCOPE:
case ICMP6_DST_UNREACH_ADDR:
code = ICMP_UNREACH_HOST;
break;
case ICMP6_DST_UNREACH_ADMIN:
code = ICMP_UNREACH_HOST_PROHIB;
break;
case ICMP6_DST_UNREACH_NOPORT:
code = ICMP_UNREACH_PORT;
break;
default:
return (-1);
}
break;
case ICMP6_PACKET_TOO_BIG:
type = ICMP_UNREACH;
code = ICMP_UNREACH_NEEDFRAG;
mtu -= 20;
break;
case ICMP6_TIME_EXCEEDED:
type = ICMP_TIMXCEED;
break;
case ICMP6_PARAM_PROB:
switch (code) {
case ICMP6_PARAMPROB_HEADER:
type = ICMP_PARAMPROB;
code = ICMP_PARAMPROB_ERRATPTR;
ptr = ntohl(icmp6->icmp6_pptr);
if (ptr == PTR_IP6(ip6_vfc))
;
else if (ptr == PTR_IP6(ip6_vfc) + 1)
ptr = PTR_IP(ip_tos);
else if (ptr == PTR_IP6(ip6_plen) ||
ptr == PTR_IP6(ip6_plen) + 1)
ptr = PTR_IP(ip_len);
else if (ptr == PTR_IP6(ip6_nxt))
ptr = PTR_IP(ip_p);
else if (ptr == PTR_IP6(ip6_hlim))
ptr = PTR_IP(ip_ttl);
else if (ptr >= PTR_IP6(ip6_src) &&
ptr < PTR_IP6(ip6_dst))
ptr = PTR_IP(ip_src);
else if (ptr >= PTR_IP6(ip6_dst) &&
ptr < sizeof(struct ip6_hdr))
ptr = PTR_IP(ip_dst);
else {
return (-1);
}
break;
case ICMP6_PARAMPROB_NEXTHEADER:
type = ICMP_UNREACH;
code = ICMP_UNREACH_PROTOCOL;
break;
default:
return (-1);
}
break;
default:
return (-1);
}
if (icmp6->icmp6_type != type) {
icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
icmp6->icmp6_type, type, 0);
icmp6->icmp6_type = type;
}
if (icmp6->icmp6_code != code) {
icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
icmp6->icmp6_code, code, 0);
icmp6->icmp6_code = code;
}
if (icmp6->icmp6_mtu != htonl(mtu)) {
icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
htons(ntohl(icmp6->icmp6_mtu)), htons(mtu), 0);
icmp6->icmp6_mtu = htonl(mtu);
}
if (ptr >= 0 && icmp6->icmp6_pptr != htonl(ptr)) {
icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
htons(ntohl(icmp6->icmp6_pptr)), htons(ptr), 0);
icmp6->icmp6_pptr = htonl(ptr << 24);
}
break;
case AF_INET6:
icmp4 = arg;
type = icmp4->icmp_type;
code = icmp4->icmp_code;
mtu = ntohs(icmp4->icmp_nextmtu);
switch (type) {
case ICMP_ECHO:
type = ICMP6_ECHO_REQUEST;
break;
case ICMP_ECHOREPLY:
type = ICMP6_ECHO_REPLY;
break;
case ICMP_UNREACH:
type = ICMP6_DST_UNREACH;
switch (code) {
case ICMP_UNREACH_NET:
case ICMP_UNREACH_HOST:
case ICMP_UNREACH_NET_UNKNOWN:
case ICMP_UNREACH_HOST_UNKNOWN:
case ICMP_UNREACH_ISOLATED:
case ICMP_UNREACH_TOSNET:
case ICMP_UNREACH_TOSHOST:
code = ICMP6_DST_UNREACH_NOROUTE;
break;
case ICMP_UNREACH_PORT:
code = ICMP6_DST_UNREACH_NOPORT;
break;
case ICMP_UNREACH_NET_PROHIB:
case ICMP_UNREACH_HOST_PROHIB:
case ICMP_UNREACH_FILTER_PROHIB:
case ICMP_UNREACH_PRECEDENCE_CUTOFF:
code = ICMP6_DST_UNREACH_ADMIN;
break;
case ICMP_UNREACH_PROTOCOL:
type = ICMP6_PARAM_PROB;
code = ICMP6_PARAMPROB_NEXTHEADER;
ptr = offsetof(struct ip6_hdr, ip6_nxt);
break;
case ICMP_UNREACH_NEEDFRAG:
type = ICMP6_PACKET_TOO_BIG;
code = 0;
mtu += 20;
break;
default:
return (-1);
}
break;
case ICMP_TIMXCEED:
type = ICMP6_TIME_EXCEEDED;
break;
case ICMP_PARAMPROB:
type = ICMP6_PARAM_PROB;
switch (code) {
case ICMP_PARAMPROB_ERRATPTR:
code = ICMP6_PARAMPROB_HEADER;
break;
case ICMP_PARAMPROB_LENGTH:
code = ICMP6_PARAMPROB_HEADER;
break;
default:
return (-1);
}
ptr = icmp4->icmp_pptr;
if (ptr == 0 || ptr == PTR_IP(ip_tos))
;
else if (ptr == PTR_IP(ip_len) ||
ptr == PTR_IP(ip_len) + 1)
ptr = PTR_IP6(ip6_plen);
else if (ptr == PTR_IP(ip_ttl))
ptr = PTR_IP6(ip6_hlim);
else if (ptr == PTR_IP(ip_p))
ptr = PTR_IP6(ip6_nxt);
else if (ptr >= PTR_IP(ip_src) && ptr < PTR_IP(ip_dst))
ptr = PTR_IP6(ip6_src);
else if (ptr >= PTR_IP(ip_dst) &&
ptr < sizeof(struct ip))
ptr = PTR_IP6(ip6_dst);
else {
return (-1);
}
break;
default:
return (-1);
}
if (icmp4->icmp_type != type) {
icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
icmp4->icmp_type, type, 0);
icmp4->icmp_type = type;
}
if (icmp4->icmp_code != code) {
icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
icmp4->icmp_code, code, 0);
icmp4->icmp_code = code;
}
if (icmp4->icmp_nextmtu != htons(mtu)) {
icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
icmp4->icmp_nextmtu, htons(mtu), 0);
icmp4->icmp_nextmtu = htons(mtu);
}
if (ptr >= 0 && icmp4->icmp_void != ptr) {
icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
htons(icmp4->icmp_pptr), htons(ptr), 0);
icmp4->icmp_void = htonl(ptr);
}
break;
default:
unhandled_af(af);
}
#endif
return (0);
}
static int
pf_modulate_sack(struct pf_pdesc *pd, struct tcphdr *th,
struct pf_state_peer *dst)
{
struct sackblk sack;
int copyback = 0, i;
int olen, optsoff;
uint8_t opts[MAX_TCPOPTLEN], *opt, *eoh;
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
optsoff = pd->off + sizeof(struct tcphdr);
#define TCPOLEN_MINSACK (TCPOLEN_SACK + 2)
if (olen < TCPOLEN_MINSACK ||
!pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af))
return (0);
eoh = opts + olen;
opt = opts;
while ((opt = pf_find_tcpopt(opt, opts, olen,
TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL)
{
size_t safelen = MIN(opt[1], (eoh - opt));
for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) {
size_t startoff = (opt + i) - opts;
memcpy(&sack, &opt[i], sizeof(sack));
pf_patch_32(pd, &sack.start,
htonl(ntohl(sack.start) - dst->seqdiff),
PF_ALGNMNT(startoff));
pf_patch_32(pd, &sack.end,
htonl(ntohl(sack.end) - dst->seqdiff),
PF_ALGNMNT(startoff + sizeof(sack.start)));
memcpy(&opt[i], &sack, sizeof(sack));
}
copyback = 1;
opt += opt[1];
}
if (copyback)
m_copyback(pd->m, optsoff, olen, (caddr_t)opts);
return (copyback);
}
struct mbuf *
pf_build_tcp(const struct pf_krule *r, sa_family_t af,
const struct pf_addr *saddr, const struct pf_addr *daddr,
u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl,
int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, u_int sack,
int rtableid, u_short *reason)
{
struct mbuf *m;
int len, tlen;
#ifdef INET
struct ip *h = NULL;
#endif
#ifdef INET6
struct ip6_hdr *h6 = NULL;
#endif
struct tcphdr *th;
char *opt;
struct pf_mtag *pf_mtag;
len = 0;
th = NULL;
tlen = sizeof(struct tcphdr);
if (mss)
tlen += 4;
if (sack)
tlen += 2;
switch (af) {
#ifdef INET
case AF_INET:
len = sizeof(struct ip) + tlen;
break;
#endif
#ifdef INET6
case AF_INET6:
len = sizeof(struct ip6_hdr) + tlen;
break;
#endif
default:
unhandled_af(af);
}
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
REASON_SET(reason, PFRES_MEMORY);
return (NULL);
}
#ifdef MAC
mac_netinet_firewall_send(m);
#endif
if ((pf_mtag = pf_get_mtag(m)) == NULL) {
REASON_SET(reason, PFRES_MEMORY);
m_freem(m);
return (NULL);
}
m->m_flags |= mbuf_flags;
pf_mtag->tag = mtag_tag;
pf_mtag->flags = mtag_flags;
if (rtableid >= 0)
M_SETFIB(m, rtableid);
#ifdef ALTQ
if (r != NULL && r->qid) {
pf_mtag->qid = r->qid;
pf_mtag->hdr = mtod(m, struct ip *);
}
#endif
m->m_data += max_linkhdr;
m->m_pkthdr.len = m->m_len = len;
m->m_pkthdr.rcvif = V_loif;
bzero(m->m_data, len);
switch (af) {
#ifdef INET
case AF_INET:
m->m_pkthdr.csum_flags |= CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
h = mtod(m, struct ip *);
h->ip_p = IPPROTO_TCP;
h->ip_len = htons(tlen);
h->ip_v = 4;
h->ip_hl = sizeof(*h) >> 2;
h->ip_tos = IPTOS_LOWDELAY;
h->ip_len = htons(len);
h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
h->ip_ttl = ttl ? ttl : V_ip_defttl;
h->ip_sum = 0;
h->ip_src.s_addr = saddr->v4.s_addr;
h->ip_dst.s_addr = daddr->v4.s_addr;
th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
th->th_sum = in_pseudo(h->ip_src.s_addr, h->ip_dst.s_addr,
htons(len - sizeof(struct ip) + IPPROTO_TCP));
break;
#endif
#ifdef INET6
case AF_INET6:
m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
h6 = mtod(m, struct ip6_hdr *);
h6->ip6_nxt = IPPROTO_TCP;
h6->ip6_plen = htons(tlen);
h6->ip6_vfc |= IPV6_VERSION;
h6->ip6_hlim = V_ip6_defhlim;
memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
th->th_sum = in6_cksum_pseudo(h6, len - sizeof(struct ip6_hdr),
IPPROTO_TCP, 0);
break;
#endif
}
th->th_sport = sport;
th->th_dport = dport;
th->th_seq = htonl(seq);
th->th_ack = htonl(ack);
th->th_off = tlen >> 2;
tcp_set_flags(th, tcp_flags);
th->th_win = htons(win);
opt = (char *)(th + 1);
if (mss) {
opt = (char *)(th + 1);
opt[0] = TCPOPT_MAXSEG;
opt[1] = 4;
mss = htons(mss);
memcpy((opt + 2), &mss, 2);
opt += 4;
}
if (sack) {
opt[0] = TCPOPT_SACK_PERMITTED;
opt[1] = 2;
opt += 2;
}
return (m);
}
static void
pf_send_sctp_abort(sa_family_t af, struct pf_pdesc *pd,
uint8_t ttl, int rtableid)
{
struct mbuf *m;
#ifdef INET
struct ip *h = NULL;
#endif
#ifdef INET6
struct ip6_hdr *h6 = NULL;
#endif
struct sctphdr *hdr;
struct sctp_chunkhdr *chunk;
struct pf_send_entry *pfse;
int off = 0;
MPASS(af == pd->af);
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
m->m_data += max_linkhdr;
m->m_flags |= M_SKIP_FIREWALL;
m->m_pkthdr.rcvif = V_loif;
switch (af) {
#ifdef INET
case AF_INET:
bzero(m->m_data, sizeof(struct ip) + sizeof(*hdr) + sizeof(*chunk));
h = mtod(m, struct ip *);
h->ip_p = IPPROTO_SCTP;
h->ip_len = htons(sizeof(*h) + sizeof(*hdr) + sizeof(*chunk));
h->ip_ttl = ttl ? ttl : V_ip_defttl;
h->ip_src = pd->dst->v4;
h->ip_dst = pd->src->v4;
off += sizeof(struct ip);
break;
#endif
#ifdef INET6
case AF_INET6:
bzero(m->m_data, sizeof(struct ip6_hdr) + sizeof(*hdr) + sizeof(*chunk));
h6 = mtod(m, struct ip6_hdr *);
h6->ip6_vfc |= IPV6_VERSION;
h6->ip6_nxt = IPPROTO_SCTP;
h6->ip6_plen = htons(sizeof(*h6) + sizeof(*hdr) + sizeof(*chunk));
h6->ip6_hlim = ttl ? ttl : V_ip6_defhlim;
memcpy(&h6->ip6_src, &pd->dst->v6, sizeof(struct in6_addr));
memcpy(&h6->ip6_dst, &pd->src->v6, sizeof(struct in6_addr));
off += sizeof(struct ip6_hdr);
break;
#endif
default:
unhandled_af(af);
}
hdr = mtodo(m, off);
hdr->src_port = pd->hdr.sctp.dest_port;
hdr->dest_port = pd->hdr.sctp.src_port;
hdr->v_tag = pd->sctp_initiate_tag;
hdr->checksum = 0;
off += sizeof(struct sctphdr);
chunk = mtodo(m, off);
chunk->chunk_type = SCTP_ABORT_ASSOCIATION;
chunk->chunk_length = htons(sizeof(*chunk));
off += sizeof(*chunk);
m->m_pkthdr.len = m->m_len = off;
pf_sctp_checksum(m, off - sizeof(*hdr) - sizeof(*chunk));
if (rtableid >= 0)
M_SETFIB(m, rtableid);
pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
if (pfse == NULL) {
m_freem(m);
return;
}
switch (af) {
#ifdef INET
case AF_INET:
pfse->pfse_type = PFSE_IP;
break;
#endif
#ifdef INET6
case AF_INET6:
pfse->pfse_type = PFSE_IP6;
break;
#endif
}
pfse->pfse_m = m;
pf_send(pfse);
}
void
pf_send_tcp(const struct pf_krule *r, sa_family_t af,
const struct pf_addr *saddr, const struct pf_addr *daddr,
u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl,
int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid,
u_short *reason)
{
struct pf_send_entry *pfse;
struct mbuf *m;
m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, tcp_flags,
win, mss, ttl, mbuf_flags, mtag_tag, mtag_flags, 0, rtableid, reason);
if (m == NULL)
return;
pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
if (pfse == NULL) {
m_freem(m);
REASON_SET(reason, PFRES_MEMORY);
return;
}
switch (af) {
#ifdef INET
case AF_INET:
pfse->pfse_type = PFSE_IP;
break;
#endif
#ifdef INET6
case AF_INET6:
pfse->pfse_type = PFSE_IP6;
break;
#endif
default:
unhandled_af(af);
}
pfse->pfse_m = m;
pf_send(pfse);
}
static void
pf_undo_nat(struct pf_krule *nr, struct pf_pdesc *pd, uint16_t bip_sum)
{
if (nr != NULL) {
pf_addrcpy(pd->src, &pd->osrc, pd->af);
pf_addrcpy(pd->dst, &pd->odst, pd->af);
if (pd->sport)
*pd->sport = pd->osport;
if (pd->dport)
*pd->dport = pd->odport;
if (pd->ip_sum)
*pd->ip_sum = bip_sum;
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
}
}
static void
pf_return(struct pf_krule *r, struct pf_krule *nr, struct pf_pdesc *pd,
struct tcphdr *th, u_int16_t bproto_sum, u_int16_t bip_sum,
u_short *reason, int rtableid)
{
pf_undo_nat(nr, pd, bip_sum);
if (pd->proto == IPPROTO_TCP &&
((r->rule_flag & PFRULE_RETURNRST) ||
(r->rule_flag & PFRULE_RETURN)) &&
!(tcp_get_flags(th) & TH_RST)) {
u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
if (pf_check_proto_cksum(pd->m, pd->off, pd->tot_len - pd->off,
IPPROTO_TCP, pd->af))
REASON_SET(reason, PFRES_PROTCKSUM);
else {
if (tcp_get_flags(th) & TH_SYN)
ack++;
if (tcp_get_flags(th) & TH_FIN)
ack++;
pf_send_tcp(r, pd->af, pd->dst,
pd->src, th->th_dport, th->th_sport,
ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid,
reason);
}
} else if (pd->proto == IPPROTO_SCTP &&
(r->rule_flag & PFRULE_RETURN)) {
pf_send_sctp_abort(pd->af, pd, r->return_ttl, rtableid);
} else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET &&
r->return_icmp)
pf_send_icmp(pd->m, r->return_icmp >> 8,
r->return_icmp & 255, 0, pd->af, r, rtableid);
else if (pd->proto != IPPROTO_ICMPV6 && pd->af == AF_INET6 &&
r->return_icmp6)
pf_send_icmp(pd->m, r->return_icmp6 >> 8,
r->return_icmp6 & 255, 0, pd->af, r, rtableid);
}
static int
pf_match_ieee8021q_pcp(u_int8_t prio, struct mbuf *m)
{
struct m_tag *mtag;
u_int8_t mpcp;
mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL);
if (mtag == NULL)
return (0);
if (prio == PF_PRIO_ZERO)
prio = 0;
mpcp = *(uint8_t *)(mtag + 1);
return (mpcp == prio);
}
static int
pf_icmp_to_bandlim(uint8_t type)
{
switch (type) {
case ICMP_ECHO:
case ICMP_ECHOREPLY:
return (BANDLIM_ICMP_ECHO);
case ICMP_TSTAMP:
case ICMP_TSTAMPREPLY:
return (BANDLIM_ICMP_TSTAMP);
case ICMP_UNREACH:
default:
return (BANDLIM_ICMP_UNREACH);
}
}
static void
pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_kstate *s,
struct pf_state_peer *src, struct pf_state_peer *dst,
u_short *reason)
{
pf_send_tcp(s->rule, pd->af, pd->dst, pd->src,
pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo,
src->seqlo, TH_ACK, 0, 0, s->rule->return_ttl, 0, 0, 0,
s->rule->rtableid, reason);
}
static void
pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int mtu,
sa_family_t af, struct pf_krule *r, int rtableid)
{
struct pf_send_entry *pfse;
struct mbuf *m0;
struct pf_mtag *pf_mtag;
switch (af) {
#ifdef INET6
case AF_INET6:
if (icmp6_ratelimit(NULL, type, code))
return;
break;
#endif
#ifdef INET
case AF_INET:
if (badport_bandlim(pf_icmp_to_bandlim(type)) != 0)
return;
break;
#endif
}
pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
if (pfse == NULL)
return;
if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
free(pfse, M_PFTEMP);
return;
}
if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
free(pfse, M_PFTEMP);
return;
}
m0->m_flags |= M_SKIP_FIREWALL;
if (rtableid >= 0)
M_SETFIB(m0, rtableid);
#ifdef ALTQ
if (r->qid) {
pf_mtag->qid = r->qid;
pf_mtag->hdr = mtod(m0, struct ip *);
}
#endif
switch (af) {
#ifdef INET
case AF_INET:
pfse->pfse_type = PFSE_ICMP;
break;
#endif
#ifdef INET6
case AF_INET6:
pfse->pfse_type = PFSE_ICMP6;
break;
#endif
}
pfse->pfse_m = m0;
pfse->icmpopts.type = type;
pfse->icmpopts.code = code;
pfse->icmpopts.mtu = mtu;
pf_send(pfse);
}
int
pf_match_addr(u_int8_t n, const struct pf_addr *a, const struct pf_addr *m,
const struct pf_addr *b, sa_family_t af)
{
switch (af) {
#ifdef INET
case AF_INET:
if (IN_ARE_MASKED_ADDR_EQUAL(a->v4, b->v4, m->v4))
return (n == 0);
break;
#endif
#ifdef INET6
case AF_INET6:
if (IN6_ARE_MASKED_ADDR_EQUAL(&a->v6, &b->v6, &m->v6))
return (n == 0);
break;
#endif
}
return (n != 0);
}
int
pf_match_addr_range(const struct pf_addr *b, const struct pf_addr *e,
const struct pf_addr *a, sa_family_t af)
{
switch (af) {
#ifdef INET
case AF_INET:
if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
(ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
return (0);
break;
#endif
#ifdef INET6
case AF_INET6: {
int i;
for (i = 0; i < 4; ++i)
if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
break;
else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
return (0);
for (i = 0; i < 4; ++i)
if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
break;
else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
return (0);
break;
}
#endif
}
return (1);
}
static int
pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
{
switch (op) {
case PF_OP_IRG:
return ((p > a1) && (p < a2));
case PF_OP_XRG:
return ((p < a1) || (p > a2));
case PF_OP_RRG:
return ((p >= a1) && (p <= a2));
case PF_OP_EQ:
return (p == a1);
case PF_OP_NE:
return (p != a1);
case PF_OP_LT:
return (p < a1);
case PF_OP_LE:
return (p <= a1);
case PF_OP_GT:
return (p > a1);
case PF_OP_GE:
return (p >= a1);
}
return (0);
}
int
pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
{
return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p)));
}
static int
pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
{
if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE)
return (0);
return (pf_match(op, a1, a2, u));
}
static int
pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
{
if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE)
return (0);
return (pf_match(op, a1, a2, g));
}
int
pf_match_tag(struct mbuf *m, struct pf_krule *r, int *tag, int mtag)
{
if (*tag == -1)
*tag = mtag;
return ((!r->match_tag_not && r->match_tag == *tag) ||
(r->match_tag_not && r->match_tag != *tag));
}
static int
pf_match_rcvif(struct mbuf *m, struct pf_krule *r)
{
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct pfi_kkif *kif;
if (ifp == NULL)
return (0);
kif = (struct pfi_kkif *)ifp->if_pf_kif;
if (kif == NULL) {
DPFPRINTF(PF_DEBUG_URGENT,
"%s: kif == NULL, @%d via %s", __func__, r->nr,
r->rcv_ifname);
return (0);
}
return (pfi_kkif_match(r->rcv_kif, kif));
}
int
pf_tag_packet(struct pf_pdesc *pd, int tag)
{
KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL))
return (ENOMEM);
pd->pf_mtag->tag = tag;
return (0);
}
#define PF_ANCHORSTACK_MATCH 0x00000001
#define PF_ANCHORSTACK_MASK (PF_ANCHORSTACK_MATCH)
#define PF_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
#define PF_ANCHOR_RULE(f) (struct pf_krule *) \
((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
#define PF_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \
((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \
} while (0)
enum pf_test_status
pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r,
struct pf_krule_slist *match_rules)
{
enum pf_test_status rv;
PF_RULES_RASSERT();
if (ctx->depth >= PF_ANCHOR_STACK_MAX) {
printf("%s: anchor stack overflow on %s\n",
__func__, r->anchor->name);
return (PF_TEST_FAIL);
}
ctx->depth++;
if (r->anchor_wildcard) {
struct pf_kanchor *child;
rv = PF_TEST_OK;
RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) {
rv = pf_match_rule(ctx, &child->ruleset, match_rules);
if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
break;
}
}
} else {
rv = pf_match_rule(ctx, &r->anchor->ruleset, match_rules);
if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK &&
*ctx->am == r)
rv = PF_TEST_QUICK;
}
ctx->depth--;
return (rv);
}
struct pf_keth_anchor_stackframe {
struct pf_keth_ruleset *rs;
struct pf_keth_rule *r;
struct pf_keth_anchor *child;
};
#define PF_ETH_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
#define PF_ETH_ANCHOR_RULE(f) (struct pf_keth_rule *) \
((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
#define PF_ETH_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \
((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \
} while (0)
void
pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
struct pf_keth_ruleset **rs, struct pf_keth_rule **r,
struct pf_keth_rule **a, int *match)
{
struct pf_keth_anchor_stackframe *f;
NET_EPOCH_ASSERT();
if (match)
*match = 0;
if (*depth >= PF_ANCHOR_STACK_MAX) {
printf("%s: anchor stack overflow on %s\n",
__func__, (*r)->anchor->name);
*r = TAILQ_NEXT(*r, entries);
return;
} else if (*depth == 0 && a != NULL)
*a = *r;
f = stack + (*depth)++;
f->rs = *rs;
f->r = *r;
if ((*r)->anchor_wildcard) {
struct pf_keth_anchor_node *parent = &(*r)->anchor->children;
if ((f->child = RB_MIN(pf_keth_anchor_node, parent)) == NULL) {
*r = NULL;
return;
}
*rs = &f->child->ruleset;
} else {
f->child = NULL;
*rs = &(*r)->anchor->ruleset;
}
*r = TAILQ_FIRST((*rs)->active.rules);
}
int
pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
struct pf_keth_ruleset **rs, struct pf_keth_rule **r,
struct pf_keth_rule **a, int *match)
{
struct pf_keth_anchor_stackframe *f;
struct pf_keth_rule *fr;
int quick = 0;
NET_EPOCH_ASSERT();
do {
if (*depth <= 0)
break;
f = stack + *depth - 1;
fr = PF_ETH_ANCHOR_RULE(f);
if (f->child != NULL) {
if (match != NULL && *match) {
PF_ETH_ANCHOR_SET_MATCH(f);
*match = 0;
}
f->child = RB_NEXT(pf_keth_anchor_node,
&fr->anchor->children, f->child);
if (f->child != NULL) {
*rs = &f->child->ruleset;
*r = TAILQ_FIRST((*rs)->active.rules);
if (*r == NULL)
continue;
else
break;
}
}
(*depth)--;
if (*depth == 0 && a != NULL)
*a = NULL;
*rs = f->rs;
if (PF_ETH_ANCHOR_MATCH(f) || (match != NULL && *match))
quick = fr->quick;
*r = TAILQ_NEXT(fr, entries);
} while (*r == NULL);
return (quick);
}
void
pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
{
switch (af) {
#ifdef INET
case AF_INET:
naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
break;
#endif
#ifdef INET6
case AF_INET6:
naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
break;
#endif
}
}
void
pf_addr_inc(struct pf_addr *addr, sa_family_t af)
{
switch (af) {
#ifdef INET
case AF_INET:
addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
break;
#endif
#ifdef INET6
case AF_INET6:
if (addr->addr32[3] == 0xffffffff) {
addr->addr32[3] = 0;
if (addr->addr32[2] == 0xffffffff) {
addr->addr32[2] = 0;
if (addr->addr32[1] == 0xffffffff) {
addr->addr32[1] = 0;
addr->addr32[0] =
htonl(ntohl(addr->addr32[0]) + 1);
} else
addr->addr32[1] =
htonl(ntohl(addr->addr32[1]) + 1);
} else
addr->addr32[2] =
htonl(ntohl(addr->addr32[2]) + 1);
} else
addr->addr32[3] =
htonl(ntohl(addr->addr32[3]) + 1);
break;
#endif
}
}
void
pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a)
{
a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID|
PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO));
if (r->rule_flag & PFRULE_RANDOMID)
a->flags |= PFSTATE_RANDOMID;
if (r->scrub_flags & PFSTATE_SETTOS || r->rule_flag & PFRULE_SET_TOS ) {
a->flags |= PFSTATE_SETTOS;
a->set_tos = r->set_tos;
}
if (r->qid)
a->qid = r->qid;
if (r->pqid)
a->pqid = r->pqid;
if (r->rtableid >= 0)
a->rtableid = r->rtableid;
a->log |= r->log;
if (r->min_ttl)
a->min_ttl = r->min_ttl;
if (r->max_mss)
a->max_mss = r->max_mss;
if (r->dnpipe)
a->dnpipe = r->dnpipe;
if (r->dnrpipe)
a->dnrpipe = r->dnrpipe;
if (r->dnpipe || r->dnrpipe) {
if (r->free_flags & PFRULE_DN_IS_PIPE)
a->flags |= PFSTATE_DN_IS_PIPE;
else
a->flags &= ~PFSTATE_DN_IS_PIPE;
}
if (r->scrub_flags & PFSTATE_SETPRIO) {
a->set_prio[0] = r->set_prio[0];
a->set_prio[1] = r->set_prio[1];
}
if (r->allow_opts)
a->allow_opts = r->allow_opts;
if (r->max_pkt_size)
a->max_pkt_size = r->max_pkt_size;
}
int
pf_socket_lookup(struct pf_pdesc *pd)
{
struct pf_addr *saddr, *daddr;
u_int16_t sport, dport;
struct inpcbinfo *pi;
struct inpcb *inp;
pd->lookup.uid = -1;
pd->lookup.gid = -1;
switch (pd->proto) {
case IPPROTO_TCP:
sport = pd->hdr.tcp.th_sport;
dport = pd->hdr.tcp.th_dport;
pi = &V_tcbinfo;
break;
case IPPROTO_UDP:
sport = pd->hdr.udp.uh_sport;
dport = pd->hdr.udp.uh_dport;
pi = &V_udbinfo;
break;
default:
return (-1);
}
if (pd->dir == PF_IN) {
saddr = pd->src;
daddr = pd->dst;
} else {
u_int16_t p;
p = sport;
sport = dport;
dport = p;
saddr = pd->dst;
daddr = pd->src;
}
switch (pd->af) {
#ifdef INET
case AF_INET:
inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
dport, INPLOOKUP_RLOCKPCB, NULL, pd->m);
if (inp == NULL) {
inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
daddr->v4, dport, INPLOOKUP_WILDCARD |
INPLOOKUP_RLOCKPCB, NULL, pd->m);
if (inp == NULL)
return (-1);
}
break;
#endif
#ifdef INET6
case AF_INET6:
inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
dport, INPLOOKUP_RLOCKPCB, NULL, pd->m);
if (inp == NULL) {
inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
&daddr->v6, dport, INPLOOKUP_WILDCARD |
INPLOOKUP_RLOCKPCB, NULL, pd->m);
if (inp == NULL)
return (-1);
}
break;
#endif
default:
unhandled_af(pd->af);
}
INP_RLOCK_ASSERT(inp);
pd->lookup.uid = inp->inp_cred->cr_uid;
pd->lookup.gid = inp->inp_cred->cr_gid;
INP_RUNLOCK(inp);
return (1);
}
uint8_t*
pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type,
u_int8_t min_typelen)
{
uint8_t *eoh = opts + hlen;
if (min_typelen < 2)
return (NULL);
while ((eoh - opt) >= min_typelen) {
switch (*opt) {
case TCPOPT_EOL:
case TCPOPT_NOP:
opt++;
continue;
default:
if (opt[0] == type &&
opt[1] >= min_typelen)
return (opt);
}
opt += MAX(opt[1], 2);
}
return (NULL);
}
u_int8_t
pf_get_wscale(struct pf_pdesc *pd)
{
int olen;
uint8_t opts[MAX_TCPOPTLEN], *opt;
uint8_t wscale = 0;
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m,
pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af))
return (0);
opt = opts;
while ((opt = pf_find_tcpopt(opt, opts, olen,
TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) {
wscale = opt[2];
wscale = MIN(wscale, TCP_MAX_WINSHIFT);
wscale |= PF_WSCALE_FLAG;
opt += opt[1];
}
return (wscale);
}
u_int16_t
pf_get_mss(struct pf_pdesc *pd)
{
int olen;
uint8_t opts[MAX_TCPOPTLEN], *opt;
u_int16_t mss = V_tcp_mssdflt;
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m,
pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af))
return (0);
opt = opts;
while ((opt = pf_find_tcpopt(opt, opts, olen,
TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) {
memcpy(&mss, (opt + 2), 2);
mss = ntohs(mss);
opt += opt[1];
}
return (mss);
}
static u_int16_t
pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
{
struct nhop_object *nh;
#ifdef INET6
struct in6_addr dst6;
uint32_t scopeid;
#endif
int hlen = 0;
uint16_t mss = 0;
NET_EPOCH_ASSERT();
switch (af) {
#ifdef INET
case AF_INET:
hlen = sizeof(struct ip);
nh = fib4_lookup(rtableid, addr->v4, 0, 0, 0);
if (nh != NULL)
mss = nh->nh_mtu - hlen - sizeof(struct tcphdr);
break;
#endif
#ifdef INET6
case AF_INET6:
hlen = sizeof(struct ip6_hdr);
in6_splitscope(&addr->v6, &dst6, &scopeid);
nh = fib6_lookup(rtableid, &dst6, scopeid, 0, 0);
if (nh != NULL)
mss = nh->nh_mtu - hlen - sizeof(struct tcphdr);
break;
#endif
}
mss = max(V_tcp_mssdflt, mss);
mss = min(mss, offer);
mss = max(mss, 64);
return (mss);
}
static u_int32_t
pf_tcp_iss(struct pf_pdesc *pd)
{
SHA512_CTX ctx;
union {
uint8_t bytes[SHA512_DIGEST_LENGTH];
uint32_t words[1];
} digest;
if (V_pf_tcp_secret_init == 0) {
arc4random_buf(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
SHA512_Init(&V_pf_tcp_secret_ctx);
SHA512_Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
sizeof(V_pf_tcp_secret));
V_pf_tcp_secret_init = 1;
}
ctx = V_pf_tcp_secret_ctx;
SHA512_Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short));
SHA512_Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short));
switch (pd->af) {
case AF_INET6:
SHA512_Update(&ctx, &pd->src->v6, sizeof(struct in6_addr));
SHA512_Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr));
break;
case AF_INET:
SHA512_Update(&ctx, &pd->src->v4, sizeof(struct in_addr));
SHA512_Update(&ctx, &pd->dst->v4, sizeof(struct in_addr));
break;
}
SHA512_Final(digest.bytes, &ctx);
V_pf_tcp_iss_off += 4096;
#define ISN_RANDOM_INCREMENT (4096 - 1)
return (digest.words[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
V_pf_tcp_iss_off);
#undef ISN_RANDOM_INCREMENT
}
static bool
pf_match_eth_addr(const uint8_t *a, const struct pf_keth_rule_addr *r)
{
bool match = true;
if (! r->isset)
return (!r->neg);
for (int i = 0; i < ETHER_ADDR_LEN; i++) {
if ((a[i] & r->mask[i]) != (r->addr[i] & r->mask[i])) {
match = false;
break;
}
}
return (match ^ r->neg);
}
static int
pf_match_eth_tag(struct mbuf *m, struct pf_keth_rule *r, int *tag, int mtag)
{
if (*tag == -1)
*tag = mtag;
return ((!r->match_tag_not && r->match_tag == *tag) ||
(r->match_tag_not && r->match_tag != *tag));
}
static void
pf_bridge_to(struct ifnet *ifp, struct mbuf *m)
{
if (ifp == NULL) {
m_freem(m);
return;
}
switch (ifp->if_type) {
case IFT_ETHER:
case IFT_XETHER:
case IFT_L2VLAN:
case IFT_BRIDGE:
case IFT_IEEE8023ADLAG:
break;
default:
m_freem(m);
return;
}
ifp->if_transmit(ifp, m);
}
static int
pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0)
{
#ifdef INET
struct ip ip;
#endif
#ifdef INET6
struct ip6_hdr ip6;
#endif
struct mbuf *m = *m0;
struct ether_header *e;
struct pf_keth_rule *r, *rm, *a = NULL;
struct pf_keth_ruleset *ruleset = NULL;
struct pf_mtag *mtag;
struct pf_keth_ruleq *rules;
struct pf_addr *src = NULL, *dst = NULL;
struct pfi_kkif *bridge_to;
sa_family_t af = 0;
uint16_t proto;
int asd = 0, match = 0;
int tag = -1;
uint8_t action;
struct pf_keth_anchor_stackframe anchor_stack[PF_ANCHOR_STACK_MAX];
MPASS(kif->pfik_ifp->if_vnet == curvnet);
NET_EPOCH_ASSERT();
PF_RULES_RLOCK_TRACKER;
SDT_PROBE3(pf, eth, test_rule, entry, dir, kif->pfik_ifp, m);
mtag = pf_find_mtag(m);
if (mtag != NULL && mtag->flags & PF_MTAG_FLAG_DUMMYNET) {
pf_dummynet_flag_remove(m, mtag);
return (PF_PASS);
}
if (__predict_false(m->m_len < sizeof(struct ether_header)) &&
(m = *m0 = m_pullup(*m0, sizeof(struct ether_header))) == NULL) {
DPFPRINTF(PF_DEBUG_URGENT,
"%s: m_len < sizeof(struct ether_header)"
", pullup failed", __func__);
return (PF_DROP);
}
e = mtod(m, struct ether_header *);
proto = ntohs(e->ether_type);
switch (proto) {
#ifdef INET
case ETHERTYPE_IP: {
if (m_length(m, NULL) < (sizeof(struct ether_header) +
sizeof(ip)))
return (PF_DROP);
af = AF_INET;
m_copydata(m, sizeof(struct ether_header), sizeof(ip),
(caddr_t)&ip);
src = (struct pf_addr *)&ip.ip_src;
dst = (struct pf_addr *)&ip.ip_dst;
break;
}
#endif
#ifdef INET6
case ETHERTYPE_IPV6: {
if (m_length(m, NULL) < (sizeof(struct ether_header) +
sizeof(ip6)))
return (PF_DROP);
af = AF_INET6;
m_copydata(m, sizeof(struct ether_header), sizeof(ip6),
(caddr_t)&ip6);
src = (struct pf_addr *)&ip6.ip6_src;
dst = (struct pf_addr *)&ip6.ip6_dst;
break;
}
#endif
}
PF_RULES_RLOCK();
ruleset = V_pf_keth;
rules = atomic_load_ptr(&ruleset->active.rules);
for (r = TAILQ_FIRST(rules), rm = NULL; r != NULL;) {
counter_u64_add(r->evaluations, 1);
SDT_PROBE2(pf, eth, test_rule, test, r->nr, r);
if (pfi_kkif_match(r->kif, kif) == r->ifnot) {
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
"kif");
r = r->skip[PFE_SKIP_IFP].ptr;
}
else if (r->direction && r->direction != dir) {
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
"dir");
r = r->skip[PFE_SKIP_DIR].ptr;
}
else if (r->proto && r->proto != proto) {
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
"proto");
r = r->skip[PFE_SKIP_PROTO].ptr;
}
else if (! pf_match_eth_addr(e->ether_shost, &r->src)) {
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
"src");
r = r->skip[PFE_SKIP_SRC_ADDR].ptr;
}
else if (! pf_match_eth_addr(e->ether_dhost, &r->dst)) {
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
"dst");
r = r->skip[PFE_SKIP_DST_ADDR].ptr;
}
else if (src != NULL && PF_MISMATCHAW(&r->ipsrc.addr, src, af,
r->ipsrc.neg, kif, M_GETFIB(m))) {
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
"ip_src");
r = r->skip[PFE_SKIP_SRC_IP_ADDR].ptr;
}
else if (dst != NULL && PF_MISMATCHAW(&r->ipdst.addr, dst, af,
r->ipdst.neg, kif, M_GETFIB(m))) {
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
"ip_dst");
r = r->skip[PFE_SKIP_DST_IP_ADDR].ptr;
}
else if (r->match_tag && !pf_match_eth_tag(m, r, &tag,
mtag ? mtag->tag : 0)) {
SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
"match_tag");
r = TAILQ_NEXT(r, entries);
}
else {
if (r->tag)
tag = r->tag;
if (r->anchor == NULL) {
rm = r;
SDT_PROBE2(pf, eth, test_rule, match, r->nr, r);
if (r->quick)
break;
r = TAILQ_NEXT(r, entries);
} else {
pf_step_into_keth_anchor(anchor_stack, &asd,
&ruleset, &r, &a, &match);
}
}
if (r == NULL && pf_step_out_of_keth_anchor(anchor_stack, &asd,
&ruleset, &r, &a, &match))
break;
}
r = rm;
SDT_PROBE2(pf, eth, test_rule, final_match, (r != NULL ? r->nr : -1), r);
if (r == NULL) {
PF_RULES_RUNLOCK();
return (PF_PASS);
}
counter_u64_add(r->packets[dir == PF_OUT], 1);
counter_u64_add(r->bytes[dir == PF_OUT], m_length(m, NULL));
pf_update_timestamp(r);
if (r->action == PF_DROP) {
PF_RULES_RUNLOCK();
return (PF_DROP);
}
if (tag > 0) {
if (mtag == NULL)
mtag = pf_get_mtag(m);
if (mtag == NULL) {
PF_RULES_RUNLOCK();
counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
return (PF_DROP);
}
mtag->tag = tag;
}
if (r->qid != 0) {
if (mtag == NULL)
mtag = pf_get_mtag(m);
if (mtag == NULL) {
PF_RULES_RUNLOCK();
counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
return (PF_DROP);
}
mtag->qid = r->qid;
}
action = r->action;
bridge_to = r->bridge_to;
if (r->dnpipe) {
struct ip_fw_args dnflow;
if (ip_dn_io_ptr == NULL) {
PF_RULES_RUNLOCK();
m_freem(m);
counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
return (PF_DROP);
}
if (mtag == NULL)
mtag = pf_get_mtag(m);
if (mtag == NULL) {
PF_RULES_RUNLOCK();
counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
return (PF_DROP);
}
bzero(&dnflow, sizeof(dnflow));
dnflow.f_id.dst_port = 0;
dnflow.f_id.src_port = 0;
dnflow.f_id.proto = 0;
dnflow.rule.info = r->dnpipe;
dnflow.rule.info |= IPFW_IS_DUMMYNET;
if (r->dnflags & PFRULE_DN_IS_PIPE)
dnflow.rule.info |= IPFW_IS_PIPE;
dnflow.f_id.extra = dnflow.rule.info;
dnflow.flags = dir == PF_IN ? IPFW_ARGS_IN : IPFW_ARGS_OUT;
dnflow.flags |= IPFW_ARGS_ETHER;
dnflow.ifp = kif->pfik_ifp;
switch (af) {
case AF_INET:
dnflow.f_id.addr_type = 4;
dnflow.f_id.src_ip = src->v4.s_addr;
dnflow.f_id.dst_ip = dst->v4.s_addr;
break;
case AF_INET6:
dnflow.flags |= IPFW_ARGS_IP6;
dnflow.f_id.addr_type = 6;
dnflow.f_id.src_ip6 = src->v6;
dnflow.f_id.dst_ip6 = dst->v6;
break;
}
PF_RULES_RUNLOCK();
mtag->flags |= PF_MTAG_FLAG_DUMMYNET;
ip_dn_io_ptr(m0, &dnflow);
if (*m0 != NULL)
pf_dummynet_flag_remove(m, mtag);
} else {
PF_RULES_RUNLOCK();
}
if (action == PF_PASS && bridge_to) {
pf_bridge_to(bridge_to->pfik_ifp, *m0);
*m0 = NULL;
}
return (action);
}
#define PF_TEST_ATTRIB(t, a) \
if (t) { \
r = a; \
continue; \
} else do { \
} while (0)
static __inline u_short
pf_rule_apply_nat(struct pf_test_ctx *ctx, struct pf_krule *r)
{
struct pf_pdesc *pd = ctx->pd;
u_short transerror;
u_int8_t nat_action;
if (r->rule_flag & PFRULE_AFTO) {
if (ctx->nr != NULL)
return (PFRES_TRANSLATE);
KASSERT(r->action != PF_MATCH, ("%s: af-to on match rule", __func__));
ctx->nat_pool = &(r->nat);
ctx->nr = r;
pd->naf = r->naf;
if (pf_get_transaddr_af(ctx->nr, pd) == -1) {
return (PFRES_TRANSLATE);
}
return (PFRES_MATCH);
} else if (r->rdr.cur || r->nat.cur) {
if (ctx->nr != NULL)
return (PFRES_TRANSLATE);
ctx->nr = r;
if (r->nat.cur) {
nat_action = PF_NAT;
ctx->nat_pool = &(r->nat);
} else {
nat_action = PF_RDR;
ctx->nat_pool = &(r->rdr);
}
transerror = pf_get_transaddr(ctx, ctx->nr,
nat_action, ctx->nat_pool);
if (transerror == PFRES_MATCH) {
ctx->rewrite += pf_translate_compat(ctx);
return(PFRES_MATCH);
}
return (transerror);
}
return (PFRES_MAX);
}
enum pf_test_status
pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset,
struct pf_krule_slist *match_rules)
{
struct pf_krule_item *ri, *rt;
struct pf_krule *r;
struct pf_krule *save_a;
struct pf_kruleset *save_aruleset;
struct pf_pdesc *pd = ctx->pd;
u_short transerror;
r = TAILQ_FIRST(ruleset->rules[PF_RULESET_FILTER].active.ptr);
while (r != NULL) {
if (ctx->pd->related_rule) {
*ctx->rm = ctx->pd->related_rule;
break;
}
PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED,
TAILQ_NEXT(r, entries));
pf_counter_u64_add(&r->evaluations, 1);
PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot,
r->skip[PF_SKIP_IFP]);
PF_TEST_ATTRIB(r->direction && r->direction != pd->dir,
r->skip[PF_SKIP_DIR]);
PF_TEST_ATTRIB(r->af && r->af != pd->af,
r->skip[PF_SKIP_AF]);
PF_TEST_ATTRIB(r->proto && r->proto != pd->proto,
r->skip[PF_SKIP_PROTO]);
PF_TEST_ATTRIB(PF_MISMATCHAW(&r->src.addr, &pd->nsaddr, pd->naf,
r->src.neg, pd->kif, M_GETFIB(pd->m)),
r->skip[PF_SKIP_SRC_ADDR]);
PF_TEST_ATTRIB(PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af,
r->dst.neg, NULL, M_GETFIB(pd->m)),
r->skip[PF_SKIP_DST_ADDR]);
switch (pd->virtual_proto) {
case PF_VPROTO_FRAGMENT:
PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset),
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB((r->type || r->code),
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB((r->gid.op || r->uid.op),
TAILQ_NEXT(r, entries));
break;
case IPPROTO_TCP:
PF_TEST_ATTRIB((r->flagset & tcp_get_flags(ctx->th))
!= r->flags,
TAILQ_NEXT(r, entries));
case IPPROTO_SCTP:
case IPPROTO_UDP:
PF_TEST_ATTRIB(r->src.port_op && !pf_match_port(r->src.port_op,
r->src.port[0], r->src.port[1], pd->nsport),
r->skip[PF_SKIP_SRC_PORT]);
PF_TEST_ATTRIB(r->dst.port_op && !pf_match_port(r->dst.port_op,
r->dst.port[0], r->dst.port[1], pd->ndport),
r->skip[PF_SKIP_DST_PORT]);
PF_TEST_ATTRIB(r->uid.op && (pd->lookup.done || (pd->lookup.done =
pf_socket_lookup(pd), 1)) &&
!pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
pd->lookup.uid),
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB(r->gid.op && (pd->lookup.done || (pd->lookup.done =
pf_socket_lookup(pd), 1)) &&
!pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
pd->lookup.gid),
TAILQ_NEXT(r, entries));
break;
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
PF_TEST_ATTRIB(r->type && r->type != ctx->icmptype + 1,
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB(r->code && r->code != ctx->icmpcode + 1,
TAILQ_NEXT(r, entries));
break;
default:
break;
}
PF_TEST_ATTRIB(r->tos && !(r->tos == pd->tos),
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB(r->prio &&
!pf_match_ieee8021q_pcp(r->prio, pd->m),
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB(r->prob &&
r->prob <= arc4random(),
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r,
&ctx->tag, pd->pf_mtag ? pd->pf_mtag->tag : 0),
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
r->rcvifnot),
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
pd->virtual_proto != PF_VPROTO_FRAGMENT),
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY &&
(pd->virtual_proto != IPPROTO_TCP || !pf_osfp_match(
pf_osfp_fingerprint(pd, ctx->th),
r->os_fingerprint)),
TAILQ_NEXT(r, entries));
if (r->pktrate.limit) {
PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)),
TAILQ_NEXT(r, entries));
}
if (r->tag)
ctx->tag = r->tag;
if (r->anchor == NULL) {
if (r->rule_flag & PFRULE_ONCE) {
uint32_t rule_flag;
rule_flag = r->rule_flag;
if ((rule_flag & PFRULE_EXPIRED) == 0 &&
atomic_cmpset_int(&r->rule_flag, rule_flag,
rule_flag | PFRULE_EXPIRED)) {
r->exptime = time_uptime;
} else {
r = TAILQ_NEXT(r, entries);
continue;
}
}
if (r->action == PF_MATCH) {
transerror = pf_rule_apply_nat(ctx, r);
switch (transerror) {
case PFRES_MATCH:
case PFRES_MAX:
break;
default:
REASON_SET(&ctx->reason, transerror);
return (PF_TEST_FAIL);
}
ri = malloc(sizeof(struct pf_krule_item), M_PF_RULE_ITEM, M_NOWAIT | M_ZERO);
if (ri == NULL) {
REASON_SET(&ctx->reason, PFRES_MEMORY);
return (PF_TEST_FAIL);
}
ri->r = r;
if (SLIST_EMPTY(match_rules)) {
SLIST_INSERT_HEAD(match_rules, ri, entry);
} else {
SLIST_INSERT_AFTER(rt, ri, entry);
}
rt = ri;
pf_rule_to_actions(r, &pd->act);
if (r->log)
PFLOG_PACKET(r->action, PFRES_MATCH, r,
ctx->a, ruleset, pd, 1, NULL);
} else {
*ctx->rm = r;
*ctx->am = ctx->a;
*ctx->rsm = ruleset;
ctx->arsm = ctx->aruleset;
}
if (pd->act.log & PF_LOG_MATCHES)
pf_log_matches(pd, r, ctx->a, ruleset, match_rules);
if (r->quick) {
ctx->test_status = PF_TEST_QUICK;
break;
}
} else {
save_a = ctx->a;
save_aruleset = ctx->aruleset;
ctx->a = r;
ctx->aruleset = ruleset;
if (ctx->a->quick)
ctx->test_status = PF_TEST_QUICK;
if (pf_step_into_anchor(ctx, r, match_rules) != PF_TEST_OK) {
break;
}
ctx->a = save_a;
ctx->aruleset = save_aruleset;
}
r = TAILQ_NEXT(r, entries);
}
return (ctx->test_status);
}
static int
pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
struct pf_pdesc *pd, struct pf_krule **am,
struct pf_kruleset **rsm, u_short *reason, struct inpcb *inp,
struct pf_krule_slist *match_rules)
{
struct pf_krule *r = NULL;
struct pf_kruleset *ruleset = NULL;
struct pf_test_ctx ctx;
u_short transerror;
int action = PF_PASS;
u_int16_t bproto_sum = 0, bip_sum = 0;
enum pf_test_status rv;
PF_RULES_RASSERT();
bzero(&ctx, sizeof(ctx));
ctx.tag = -1;
ctx.pd = pd;
ctx.rm = rm;
ctx.am = am;
ctx.rsm = rsm;
ctx.th = &pd->hdr.tcp;
ctx.reason = *reason;
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
if (inp != NULL) {
INP_LOCK_ASSERT(inp);
pd->lookup.uid = inp->inp_cred->cr_uid;
pd->lookup.gid = inp->inp_cred->cr_gid;
pd->lookup.done = 1;
}
if (pd->ip_sum)
bip_sum = *pd->ip_sum;
switch (pd->virtual_proto) {
case IPPROTO_TCP:
bproto_sum = ctx.th->th_sum;
pd->nsport = ctx.th->th_sport;
pd->ndport = ctx.th->th_dport;
break;
case IPPROTO_UDP:
bproto_sum = pd->hdr.udp.uh_sum;
pd->nsport = pd->hdr.udp.uh_sport;
pd->ndport = pd->hdr.udp.uh_dport;
break;
case IPPROTO_SCTP:
pd->nsport = pd->hdr.sctp.src_port;
pd->ndport = pd->hdr.sctp.dest_port;
break;
#ifdef INET
case IPPROTO_ICMP:
MPASS(pd->af == AF_INET);
ctx.icmptype = pd->hdr.icmp.icmp_type;
ctx.icmpcode = pd->hdr.icmp.icmp_code;
ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
&ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type);
if (ctx.icmp_dir == PF_IN) {
pd->nsport = ctx.virtual_id;
pd->ndport = ctx.virtual_type;
} else {
pd->nsport = ctx.virtual_type;
pd->ndport = ctx.virtual_id;
}
break;
#endif
#ifdef INET6
case IPPROTO_ICMPV6:
MPASS(pd->af == AF_INET6);
ctx.icmptype = pd->hdr.icmp6.icmp6_type;
ctx.icmpcode = pd->hdr.icmp6.icmp6_code;
ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
&ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type);
if (ctx.icmp_dir == PF_IN) {
pd->nsport = ctx.virtual_id;
pd->ndport = ctx.virtual_type;
} else {
pd->nsport = ctx.virtual_type;
pd->ndport = ctx.virtual_id;
}
break;
#endif
default:
pd->nsport = pd->ndport = 0;
break;
}
pd->osport = pd->nsport;
pd->odport = pd->ndport;
transerror = pf_get_translation(&ctx);
switch (transerror) {
default:
REASON_SET(&ctx.reason, transerror);
goto cleanup;
case PFRES_MAX:
break;
case PFRES_MATCH:
KASSERT(ctx.sk != NULL, ("%s: null sk", __func__));
KASSERT(ctx.nk != NULL, ("%s: null nk", __func__));
if (ctx.nr->log) {
PFLOG_PACKET(ctx.nr->action, PFRES_MATCH, ctx.nr, ctx.a,
ruleset, pd, 1, NULL);
}
ctx.rewrite += pf_translate_compat(&ctx);
ctx.nat_pool = &(ctx.nr->rdr);
}
if (ctx.nr && ctx.nr->natpass) {
r = ctx.nr;
ruleset = *ctx.rsm;
} else {
ruleset = &pf_main_ruleset;
rv = pf_match_rule(&ctx, ruleset, match_rules);
if (rv == PF_TEST_FAIL) {
goto cleanup;
}
r = *ctx.rm;
ctx.a = *ctx.am;
ruleset = *ctx.rsm;
ctx.aruleset = ctx.arsm;
pf_rule_to_actions(r, &pd->act);
transerror = pf_rule_apply_nat(&ctx, r);
switch (transerror) {
case PFRES_MATCH:
case PFRES_MAX:
break;
default:
REASON_SET(&ctx.reason, transerror);
goto cleanup;
}
}
REASON_SET(&ctx.reason, PFRES_MATCH);
if (r->log) {
if (ctx.rewrite)
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
PFLOG_PACKET(r->action, ctx.reason, r, ctx.a, ruleset, pd, 1, NULL);
}
if (pd->act.log & PF_LOG_MATCHES)
pf_log_matches(pd, r, ctx.a, ruleset, match_rules);
if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
(r->action == PF_DROP) &&
((r->rule_flag & PFRULE_RETURNRST) ||
(r->rule_flag & PFRULE_RETURNICMP) ||
(r->rule_flag & PFRULE_RETURN))) {
pf_return(r, ctx.nr, pd, ctx.th, bproto_sum,
bip_sum, &ctx.reason, r->rtableid);
}
if (r->action == PF_DROP)
goto cleanup;
if (ctx.tag > 0 && pf_tag_packet(pd, ctx.tag)) {
REASON_SET(&ctx.reason, PFRES_MEMORY);
goto cleanup;
}
if (pd->act.rtableid >= 0)
M_SETFIB(pd->m, pd->act.rtableid);
if (r->rt) {
pd->act.rt = r->rt;
if (r->rt == PF_REPLYTO)
pd->act.rt_af = pd->af;
else
pd->act.rt_af = pd->naf;
if ((transerror = pf_map_addr_sn(pd->af, r, pd->src,
&pd->act.rt_addr, &pd->act.rt_af, &pd->act.rt_kif, NULL,
&(r->route), PF_SN_ROUTE)) != PFRES_MATCH) {
REASON_SET(&ctx.reason, transerror);
goto cleanup;
}
}
if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
(!ctx.state_icmp && (r->keep_state || ctx.nr != NULL ||
(pd->flags & PFDESC_TCP_NORM)))) {
bool nat64;
action = pf_create_state(r, &ctx, sm, bproto_sum, bip_sum,
match_rules);
ctx.sk = ctx.nk = NULL;
if (action != PF_PASS) {
pf_udp_mapping_release(ctx.udp_mapping);
if (r->log || (ctx.nr != NULL && ctx.nr->log) ||
ctx.reason == PFRES_MEMORY)
pd->act.log |= PF_LOG_FORCE;
if (action == PF_DROP &&
(r->rule_flag & PFRULE_RETURN))
pf_return(r, ctx.nr, pd, ctx.th,
bproto_sum, bip_sum, &ctx.reason,
pd->act.rtableid);
*reason = ctx.reason;
return (action);
}
nat64 = pd->af != pd->naf;
if (nat64) {
int ret;
if (ctx.sk == NULL)
ctx.sk = (*sm)->key[pd->dir == PF_IN ? PF_SK_STACK : PF_SK_WIRE];
if (ctx.nk == NULL)
ctx.nk = (*sm)->key[pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
if (pd->dir == PF_IN) {
ret = pf_translate(pd, &ctx.sk->addr[pd->didx],
ctx.sk->port[pd->didx], &ctx.sk->addr[pd->sidx],
ctx.sk->port[pd->sidx], ctx.virtual_type,
ctx.icmp_dir);
} else {
ret = pf_translate(pd, &ctx.sk->addr[pd->sidx],
ctx.sk->port[pd->sidx], &ctx.sk->addr[pd->didx],
ctx.sk->port[pd->didx], ctx.virtual_type,
ctx.icmp_dir);
}
if (ret < 0)
goto cleanup;
ctx.rewrite += ret;
if (ctx.rewrite && ctx.sk->af != ctx.nk->af)
action = PF_AFRT;
}
} else {
uma_zfree(V_pf_state_key_z, ctx.sk);
uma_zfree(V_pf_state_key_z, ctx.nk);
ctx.sk = ctx.nk = NULL;
pf_udp_mapping_release(ctx.udp_mapping);
}
if (ctx.rewrite)
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
pd->dir == PF_OUT &&
V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, pd->m)) {
*reason = ctx.reason;
return (PF_DEFER);
}
*reason = ctx.reason;
return (action);
cleanup:
uma_zfree(V_pf_state_key_z, ctx.sk);
uma_zfree(V_pf_state_key_z, ctx.nk);
pf_udp_mapping_release(ctx.udp_mapping);
*reason = ctx.reason;
return (PF_DROP);
}
static int
pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx,
struct pf_kstate **sm, u_int16_t bproto_sum, u_int16_t bip_sum,
struct pf_krule_slist *match_rules)
{
struct pf_pdesc *pd = ctx->pd;
struct pf_kstate *s = NULL;
struct pf_ksrc_node *sns[PF_SN_MAX] = { NULL };
struct pf_srchash *snhs[PF_SN_MAX] = { NULL };
struct tcphdr *th = &pd->hdr.tcp;
u_int16_t mss = V_tcp_mssdflt;
u_short sn_reason;
if (r->max_states &&
(counter_u64_fetch(r->states_cur) >= r->max_states)) {
counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1);
REASON_SET(&ctx->reason, PFRES_MAXSTATES);
goto csfailed;
}
if ((r->rule_flag & PFRULE_SRCTRACK) &&
(sn_reason = pf_insert_src_node(sns, snhs, r, pd->src, pd->af,
NULL, NULL, pd->af, PF_SN_LIMIT)) != 0) {
REASON_SET(&ctx->reason, sn_reason);
goto csfailed;
}
if (r->rt) {
if ((r->route.opts & PF_POOL_STICKYADDR) &&
(sn_reason = pf_insert_src_node(sns, snhs, r, pd->src,
pd->af, &pd->act.rt_addr, pd->act.rt_kif, pd->act.rt_af,
PF_SN_ROUTE)) != 0) {
REASON_SET(&ctx->reason, sn_reason);
goto csfailed;
}
}
if (ctx->nr != NULL) {
KASSERT(ctx->nat_pool != NULL, ("%s: nat_pool is NULL", __func__));
if ((ctx->nat_pool->opts & PF_POOL_STICKYADDR) &&
(sn_reason = pf_insert_src_node(sns, snhs, ctx->nr,
ctx->sk ? &(ctx->sk->addr[pd->sidx]) : pd->src, pd->af,
ctx->nk ? &(ctx->nk->addr[1]) : &(pd->nsaddr), NULL,
pd->naf, PF_SN_NAT)) != 0 ) {
REASON_SET(&ctx->reason, sn_reason);
goto csfailed;
}
}
s = pf_alloc_state(M_NOWAIT);
if (s == NULL) {
REASON_SET(&ctx->reason, PFRES_MEMORY);
goto csfailed;
}
s->rule = r;
s->nat_rule = ctx->nr;
s->anchor = ctx->a;
s->match_rules = *match_rules;
memcpy(&s->act, &pd->act, sizeof(struct pf_rule_actions));
if (pd->act.allow_opts)
s->state_flags |= PFSTATE_ALLOWOPTS;
if (r->rule_flag & PFRULE_STATESLOPPY)
s->state_flags |= PFSTATE_SLOPPY;
if (pd->flags & PFDESC_TCP_NORM)
s->state_flags |= PFSTATE_SCRUB_TCP;
if ((r->rule_flag & PFRULE_PFLOW) ||
(ctx->nr != NULL && ctx->nr->rule_flag & PFRULE_PFLOW))
s->state_flags |= PFSTATE_PFLOW;
s->act.log = pd->act.log & PF_LOG_ALL;
s->sync_state = PFSYNC_S_NONE;
s->state_flags |= pd->act.flags;
if (ctx->nr != NULL)
s->act.log |= ctx->nr->log & PF_LOG_ALL;
switch (pd->proto) {
case IPPROTO_TCP:
s->src.seqlo = ntohl(th->th_seq);
s->src.seqhi = s->src.seqlo + pd->p_len + 1;
if ((tcp_get_flags(th) & (TH_SYN|TH_ACK)) == TH_SYN &&
r->keep_state == PF_STATE_MODULATE) {
if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
0)
s->src.seqdiff = 1;
pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum,
htonl(s->src.seqlo + s->src.seqdiff), 0);
ctx->rewrite = 1;
} else
s->src.seqdiff = 0;
if (tcp_get_flags(th) & TH_SYN) {
s->src.seqhi++;
s->src.wscale = pf_get_wscale(pd);
}
s->src.max_win = MAX(ntohs(th->th_win), 1);
if (s->src.wscale & PF_WSCALE_MASK) {
int win = s->src.max_win;
win += 1 << (s->src.wscale & PF_WSCALE_MASK);
s->src.max_win = (win - 1) >>
(s->src.wscale & PF_WSCALE_MASK);
}
if (tcp_get_flags(th) & TH_FIN)
s->src.seqhi++;
s->dst.seqhi = 1;
s->dst.max_win = 1;
pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT);
pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED);
s->timeout = PFTM_TCP_FIRST_PACKET;
atomic_add_32(&V_pf_status.states_halfopen, 1);
break;
case IPPROTO_UDP:
pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE);
pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC);
s->timeout = PFTM_UDP_FIRST_PACKET;
break;
case IPPROTO_SCTP:
pf_set_protostate(s, PF_PEER_SRC, SCTP_COOKIE_WAIT);
pf_set_protostate(s, PF_PEER_DST, SCTP_CLOSED);
s->timeout = PFTM_SCTP_FIRST_PACKET;
break;
case IPPROTO_ICMP:
#ifdef INET6
case IPPROTO_ICMPV6:
#endif
s->timeout = PFTM_ICMP_FIRST_PACKET;
break;
default:
pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE);
pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC);
s->timeout = PFTM_OTHER_FIRST_PACKET;
}
s->creation = s->expire = pf_get_uptime();
if (pd->proto == IPPROTO_TCP) {
if (s->state_flags & PFSTATE_SCRUB_TCP &&
pf_normalize_tcp_init(pd, th, &s->src)) {
REASON_SET(&ctx->reason, PFRES_MEMORY);
goto csfailed;
}
if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub &&
pf_normalize_tcp_stateful(pd, &ctx->reason, th, s,
&s->src, &s->dst, &ctx->rewrite)) {
DPFPRINTF(PF_DEBUG_URGENT,
"%s: tcp normalize failed on first "
"pkt", __func__);
goto csfailed;
}
} else if (pd->proto == IPPROTO_SCTP) {
if (pf_normalize_sctp_init(pd, &s->src, &s->dst))
goto csfailed;
if (! (pd->sctp_flags & (PFDESC_SCTP_INIT | PFDESC_SCTP_ADD_IP)))
goto csfailed;
}
s->direction = pd->dir;
if (ctx->sk == NULL && ctx->nk == NULL) {
MPASS(pd->sport == NULL || (pd->osport == *pd->sport));
MPASS(pd->dport == NULL || (pd->odport == *pd->dport));
if (pf_state_key_setup(pd, pd->nsport, pd->ndport,
&ctx->sk, &ctx->nk)) {
goto csfailed;
}
} else
KASSERT((ctx->sk != NULL && ctx->nk != NULL), ("%s: nr %p sk %p, nk %p",
__func__, ctx->nr, ctx->sk, ctx->nk));
if (pf_state_insert(BOUND_IFACE(s, pd), pd->kif,
(pd->dir == PF_IN) ? ctx->sk : ctx->nk,
(pd->dir == PF_IN) ? ctx->nk : ctx->sk, s)) {
REASON_SET(&ctx->reason, PFRES_STATEINS);
goto drop;
} else
*sm = s;
ctx->sk = ctx->nk = NULL;
STATE_INC_COUNTERS(s);
for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) {
if (pf_src_node_exists(&sns[sn_type], snhs[sn_type])) {
s->sns[sn_type] = sns[sn_type];
PF_HASHROW_UNLOCK(snhs[sn_type]);
}
}
if (ctx->tag > 0)
s->tag = ctx->tag;
if (pd->proto == IPPROTO_TCP && (tcp_get_flags(th) & (TH_SYN|TH_ACK)) ==
TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) {
pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC);
pf_undo_nat(ctx->nr, pd, bip_sum);
s->src.seqhi = arc4random();
int rtid = M_GETFIB(pd->m);
mss = pf_get_mss(pd);
mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
s->src.mss = mss;
pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
TH_SYN|TH_ACK, 0, s->src.mss, 0, M_SKIP_FIREWALL, 0, 0,
pd->act.rtableid, &ctx->reason);
REASON_SET(&ctx->reason, PFRES_SYNPROXY);
return (PF_SYNPROXY_DROP);
}
s->udp_mapping = ctx->udp_mapping;
return (PF_PASS);
csfailed:
uma_zfree(V_pf_state_key_z, ctx->sk);
uma_zfree(V_pf_state_key_z, ctx->nk);
for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) {
if (pf_src_node_exists(&sns[sn_type], snhs[sn_type])) {
if (--sns[sn_type]->states == 0 &&
sns[sn_type]->expire == 0) {
pf_unlink_src_node(sns[sn_type]);
pf_free_src_node(sns[sn_type]);
counter_u64_add(
V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
}
PF_HASHROW_UNLOCK(snhs[sn_type]);
}
}
drop:
if (s != NULL) {
pf_src_tree_remove_state(s);
s->timeout = PFTM_UNLINKED;
pf_free_state(s);
}
return (PF_DROP);
}
int
pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport,
struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type,
int icmp_dir)
{
int rewrite = 0;
int afto = pd->af != pd->naf;
MPASS(afto);
switch (pd->proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_SCTP:
if (afto || *pd->sport != sport) {
pf_change_ap(pd, pd->src, pd->sport,
saddr, sport);
rewrite = 1;
}
if (afto || *pd->dport != dport) {
pf_change_ap(pd, pd->dst, pd->dport,
daddr, dport);
rewrite = 1;
}
break;
#ifdef INET
case IPPROTO_ICMP:
if (pd->af != AF_INET)
return (0);
if (afto) {
if (pf_translate_icmp_af(AF_INET6, &pd->hdr.icmp))
return (-1);
pd->proto = IPPROTO_ICMPV6;
rewrite = 1;
}
if (virtual_type == htons(ICMP_ECHO)) {
u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
if (icmpid != pd->hdr.icmp.icmp_id) {
pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
pd->hdr.icmp.icmp_cksum,
pd->hdr.icmp.icmp_id, icmpid, 0);
pd->hdr.icmp.icmp_id = icmpid;
rewrite = 1;
}
}
break;
#endif
#ifdef INET6
case IPPROTO_ICMPV6:
if (pd->af != AF_INET6)
return (0);
if (afto) {
if (pf_translate_icmp_af(AF_INET, &pd->hdr.icmp6))
return (0);
pd->proto = IPPROTO_ICMP;
rewrite = 1;
}
break;
#endif
default:
break;
}
return (rewrite);
}
int
pf_translate_compat(struct pf_test_ctx *ctx)
{
struct pf_pdesc *pd = ctx->pd;
struct pf_state_key *nk = ctx->nk;
struct tcphdr *th = &pd->hdr.tcp;
int rewrite = 0;
KASSERT(ctx->sk != NULL, ("%s: null sk", __func__));
KASSERT(ctx->nk != NULL, ("%s: null nk", __func__));
switch (pd->virtual_proto) {
case IPPROTO_TCP:
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
nk->port[pd->sidx] != pd->nsport) {
pf_change_ap(pd, pd->src, &th->th_sport,
&nk->addr[pd->sidx], nk->port[pd->sidx]);
pd->sport = &th->th_sport;
pd->nsport = th->th_sport;
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
}
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
nk->port[pd->didx] != pd->ndport) {
pf_change_ap(pd, pd->dst, &th->th_dport,
&nk->addr[pd->didx], nk->port[pd->didx]);
pd->dport = &th->th_dport;
pd->ndport = th->th_dport;
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
}
rewrite++;
break;
case IPPROTO_UDP:
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
nk->port[pd->sidx] != pd->nsport) {
pf_change_ap(pd, pd->src,
&pd->hdr.udp.uh_sport,
&nk->addr[pd->sidx],
nk->port[pd->sidx]);
pd->sport = &pd->hdr.udp.uh_sport;
pd->nsport = pd->hdr.udp.uh_sport;
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
}
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
nk->port[pd->didx] != pd->ndport) {
pf_change_ap(pd, pd->dst,
&pd->hdr.udp.uh_dport,
&nk->addr[pd->didx],
nk->port[pd->didx]);
pd->dport = &pd->hdr.udp.uh_dport;
pd->ndport = pd->hdr.udp.uh_dport;
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
}
rewrite++;
break;
case IPPROTO_SCTP: {
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
nk->port[pd->sidx] != pd->nsport) {
pf_change_ap(pd, pd->src,
&pd->hdr.sctp.src_port,
&nk->addr[pd->sidx],
nk->port[pd->sidx]);
pd->sport = &pd->hdr.sctp.src_port;
pd->nsport = pd->hdr.sctp.src_port;
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
}
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
nk->port[pd->didx] != pd->ndport) {
pf_change_ap(pd, pd->dst,
&pd->hdr.sctp.dest_port,
&nk->addr[pd->didx],
nk->port[pd->didx]);
pd->dport = &pd->hdr.sctp.dest_port;
pd->ndport = pd->hdr.sctp.dest_port;
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
}
break;
}
#ifdef INET
case IPPROTO_ICMP:
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET)) {
pf_change_a(&pd->src->v4.s_addr, pd->ip_sum,
nk->addr[pd->sidx].v4.s_addr, 0);
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
}
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET)) {
pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum,
nk->addr[pd->didx].v4.s_addr, 0);
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
}
if (ctx->virtual_type == htons(ICMP_ECHO) &&
nk->port[pd->sidx] != pd->hdr.icmp.icmp_id) {
pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
pd->hdr.icmp.icmp_cksum, pd->nsport,
nk->port[pd->sidx], 0);
pd->hdr.icmp.icmp_id = nk->port[pd->sidx];
pd->sport = &pd->hdr.icmp.icmp_id;
}
m_copyback(pd->m, pd->off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp);
break;
#endif
#ifdef INET6
case IPPROTO_ICMPV6:
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET6)) {
pf_change_a6(pd->src, &pd->hdr.icmp6.icmp6_cksum,
&nk->addr[pd->sidx], 0);
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
}
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET6)) {
pf_change_a6(pd->dst, &pd->hdr.icmp6.icmp6_cksum,
&nk->addr[pd->didx], 0);
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
}
rewrite++;
break;
#endif
default:
switch (pd->af) {
#ifdef INET
case AF_INET:
if (PF_ANEQ(&pd->nsaddr,
&nk->addr[pd->sidx], AF_INET)) {
pf_change_a(&pd->src->v4.s_addr,
pd->ip_sum,
nk->addr[pd->sidx].v4.s_addr, 0);
pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
}
if (PF_ANEQ(&pd->ndaddr,
&nk->addr[pd->didx], AF_INET)) {
pf_change_a(&pd->dst->v4.s_addr,
pd->ip_sum,
nk->addr[pd->didx].v4.s_addr, 0);
pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
}
break;
#endif
#ifdef INET6
case AF_INET6:
if (PF_ANEQ(&pd->nsaddr,
&nk->addr[pd->sidx], AF_INET6)) {
pf_addrcpy(&pd->nsaddr, &nk->addr[pd->sidx],
pd->af);
pf_addrcpy(pd->src, &nk->addr[pd->sidx], pd->af);
}
if (PF_ANEQ(&pd->ndaddr,
&nk->addr[pd->didx], AF_INET6)) {
pf_addrcpy(&pd->ndaddr, &nk->addr[pd->didx],
pd->af);
pf_addrcpy(pd->dst, &nk->addr[pd->didx],
pd->af);
}
break;
#endif
}
break;
}
return (rewrite);
}
static int
pf_tcp_track_full(struct pf_kstate *state, struct pf_pdesc *pd,
u_short *reason, int *copyback, struct pf_state_peer *src,
struct pf_state_peer *dst, u_int8_t psrc, u_int8_t pdst)
{
struct tcphdr *th = &pd->hdr.tcp;
u_int16_t win = ntohs(th->th_win);
u_int32_t ack, end, data_end, seq, orig_seq;
u_int8_t sws, dws;
int ackskew;
if (src->wscale && dst->wscale && !(tcp_get_flags(th) & TH_SYN)) {
sws = src->wscale & PF_WSCALE_MASK;
dws = dst->wscale & PF_WSCALE_MASK;
} else
sws = dws = 0;
orig_seq = seq = ntohl(th->th_seq);
if (src->seqlo == 0) {
if ((state->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) &&
src->scrub == NULL) {
if (pf_normalize_tcp_init(pd, th, src)) {
REASON_SET(reason, PFRES_MEMORY);
return (PF_DROP);
}
}
if (dst->seqdiff && !src->seqdiff) {
while ((src->seqdiff = arc4random() - seq) == 0)
;
ack = ntohl(th->th_ack) - dst->seqdiff;
pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum, htonl(seq +
src->seqdiff), 0);
pf_change_proto_a(pd->m, &th->th_ack, &th->th_sum, htonl(ack), 0);
*copyback = 1;
} else {
ack = ntohl(th->th_ack);
}
end = seq + pd->p_len;
if (tcp_get_flags(th) & TH_SYN) {
end++;
if (dst->wscale & PF_WSCALE_FLAG) {
src->wscale = pf_get_wscale(pd);
if (src->wscale & PF_WSCALE_FLAG) {
sws = src->wscale & PF_WSCALE_MASK;
win = ((u_int32_t)win + (1 << sws) - 1)
>> sws;
dws = dst->wscale & PF_WSCALE_MASK;
} else {
dst->max_win = MIN(TCP_MAXWIN,
(u_int32_t)dst->max_win <<
(dst->wscale & PF_WSCALE_MASK));
dst->wscale = 0;
}
}
}
data_end = end;
if (tcp_get_flags(th) & TH_FIN)
end++;
src->seqlo = seq;
if (src->state < TCPS_SYN_SENT)
pf_set_protostate(state, psrc, TCPS_SYN_SENT);
if (src->seqhi == 1 ||
SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
src->seqhi = end + MAX(1, dst->max_win << dws);
if (win > src->max_win)
src->max_win = win;
} else {
ack = ntohl(th->th_ack) - dst->seqdiff;
if (src->seqdiff) {
pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum, htonl(seq +
src->seqdiff), 0);
pf_change_proto_a(pd->m, &th->th_ack, &th->th_sum, htonl(ack), 0);
*copyback = 1;
}
end = seq + pd->p_len;
if (tcp_get_flags(th) & TH_SYN)
end++;
data_end = end;
if (tcp_get_flags(th) & TH_FIN)
end++;
}
if ((tcp_get_flags(th) & TH_ACK) == 0) {
ack = dst->seqlo;
} else if ((ack == 0 &&
(tcp_get_flags(th) & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
(dst->state < TCPS_SYN_SENT)) {
ack = dst->seqlo;
}
if (seq == end) {
seq = src->seqlo;
data_end = end = seq;
}
ackskew = dst->seqlo - ack;
if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
if (pf_modulate_sack(pd, th, dst))
*copyback = 1;
}
#define MAXACKWINDOW (0xffff + 1500)
if (SEQ_GEQ(src->seqhi, data_end) &&
SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
(ackskew >= -MAXACKWINDOW) &&
(ackskew <= (MAXACKWINDOW << sws)) &&
((tcp_get_flags(th) & TH_RST) == 0 || orig_seq == src->seqlo ||
(orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
(SEQ_GEQ(orig_seq, src->seqlo - (dst->max_win << dws)) &&
SEQ_LEQ(orig_seq, src->seqlo + 1) && ackskew == 0 &&
(th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)))) {
if (dst->scrub || src->scrub) {
if (pf_normalize_tcp_stateful(pd, reason, th,
state, src, dst, copyback))
return (PF_DROP);
}
if (src->max_win < win)
src->max_win = win;
if (SEQ_GT(end, src->seqlo))
src->seqlo = end;
if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
dst->seqhi = ack + MAX((win << sws), 1);
if (tcp_get_flags(th) & TH_SYN)
if (src->state < TCPS_SYN_SENT)
pf_set_protostate(state, psrc, TCPS_SYN_SENT);
if (tcp_get_flags(th) & TH_FIN)
if (src->state < TCPS_CLOSING)
pf_set_protostate(state, psrc, TCPS_CLOSING);
if (tcp_get_flags(th) & TH_ACK) {
if (dst->state == TCPS_SYN_SENT) {
pf_set_protostate(state, pdst,
TCPS_ESTABLISHED);
if (src->state == TCPS_ESTABLISHED &&
state->sns[PF_SN_LIMIT] != NULL &&
pf_src_connlimit(state)) {
REASON_SET(reason, PFRES_SRCLIMIT);
return (PF_DROP);
}
} else if (dst->state == TCPS_CLOSING)
pf_set_protostate(state, pdst,
TCPS_FIN_WAIT_2);
}
if (tcp_get_flags(th) & TH_RST)
pf_set_protostate(state, PF_PEER_BOTH, TCPS_TIME_WAIT);
state->expire = pf_get_uptime();
if (src->state >= TCPS_FIN_WAIT_2 &&
dst->state >= TCPS_FIN_WAIT_2)
state->timeout = PFTM_TCP_CLOSED;
else if (src->state >= TCPS_CLOSING &&
dst->state >= TCPS_CLOSING)
state->timeout = PFTM_TCP_FIN_WAIT;
else if (src->state < TCPS_ESTABLISHED ||
dst->state < TCPS_ESTABLISHED)
state->timeout = PFTM_TCP_OPENING;
else if (src->state >= TCPS_CLOSING ||
dst->state >= TCPS_CLOSING)
state->timeout = PFTM_TCP_CLOSING;
else
state->timeout = PFTM_TCP_ESTABLISHED;
} else if ((dst->state < TCPS_SYN_SENT ||
dst->state >= TCPS_FIN_WAIT_2 ||
src->state >= TCPS_FIN_WAIT_2) &&
SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) &&
SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("pf: loose state match: ");
pf_print_state(state);
pf_print_flags(tcp_get_flags(th));
printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
"pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
pd->p_len, ackskew, (unsigned long long)state->packets[0],
(unsigned long long)state->packets[1],
pd->dir == PF_IN ? "in" : "out",
pd->dir == state->direction ? "fwd" : "rev");
}
if (dst->scrub || src->scrub) {
if (pf_normalize_tcp_stateful(pd, reason, th,
state, src, dst, copyback))
return (PF_DROP);
}
if (src->max_win < win)
src->max_win = win;
if (SEQ_GT(end, src->seqlo))
src->seqlo = end;
if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
dst->seqhi = ack + MAX((win << sws), 1);
if (tcp_get_flags(th) & TH_FIN)
if (src->state < TCPS_CLOSING)
pf_set_protostate(state, psrc, TCPS_CLOSING);
if (tcp_get_flags(th) & TH_RST)
pf_set_protostate(state, PF_PEER_BOTH, TCPS_TIME_WAIT);
} else {
if (state->dst.state == TCPS_SYN_SENT &&
state->src.state == TCPS_SYN_SENT) {
if (!(tcp_get_flags(th) & TH_RST))
pf_send_tcp(state->rule, pd->af,
pd->dst, pd->src, th->th_dport,
th->th_sport, ntohl(th->th_ack), 0,
TH_RST, 0, 0,
state->rule->return_ttl, M_SKIP_FIREWALL,
0, 0, state->act.rtableid, reason);
src->seqlo = 0;
src->seqhi = 1;
src->max_win = 1;
} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("pf: BAD state: ");
pf_print_state(state);
pf_print_flags(tcp_get_flags(th));
printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
"pkts=%llu:%llu dir=%s,%s\n",
seq, orig_seq, ack, pd->p_len, ackskew,
(unsigned long long)state->packets[0],
(unsigned long long)state->packets[1],
pd->dir == PF_IN ? "in" : "out",
pd->dir == state->direction ? "fwd" : "rev");
printf("pf: State failure on: %c %c %c %c | %c %c\n",
SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1',
SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
' ': '2',
(ackskew >= -MAXACKWINDOW) ? ' ' : '3',
(ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ?' ' :'5',
SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
}
REASON_SET(reason, PFRES_BADSTATE);
return (PF_DROP);
}
return (PF_PASS);
}
static int
pf_tcp_track_sloppy(struct pf_kstate *state, struct pf_pdesc *pd,
u_short *reason, struct pf_state_peer *src, struct pf_state_peer *dst,
u_int8_t psrc, u_int8_t pdst)
{
struct tcphdr *th = &pd->hdr.tcp;
if (tcp_get_flags(th) & TH_SYN)
if (src->state < TCPS_SYN_SENT)
pf_set_protostate(state, psrc, TCPS_SYN_SENT);
if (tcp_get_flags(th) & TH_FIN)
if (src->state < TCPS_CLOSING)
pf_set_protostate(state, psrc, TCPS_CLOSING);
if (tcp_get_flags(th) & TH_ACK) {
if (dst->state == TCPS_SYN_SENT) {
pf_set_protostate(state, pdst, TCPS_ESTABLISHED);
if (src->state == TCPS_ESTABLISHED &&
state->sns[PF_SN_LIMIT] != NULL &&
pf_src_connlimit(state)) {
REASON_SET(reason, PFRES_SRCLIMIT);
return (PF_DROP);
}
} else if (dst->state == TCPS_CLOSING) {
pf_set_protostate(state, pdst, TCPS_FIN_WAIT_2);
} else if (src->state == TCPS_SYN_SENT &&
dst->state < TCPS_SYN_SENT) {
pf_set_protostate(state, PF_PEER_BOTH,
TCPS_ESTABLISHED);
dst->state = src->state = TCPS_ESTABLISHED;
if (state->sns[PF_SN_LIMIT] != NULL &&
pf_src_connlimit(state)) {
REASON_SET(reason, PFRES_SRCLIMIT);
return (PF_DROP);
}
} else if (src->state == TCPS_CLOSING &&
dst->state == TCPS_ESTABLISHED &&
dst->seqlo == 0) {
pf_set_protostate(state, pdst, TCPS_CLOSING);
}
}
if (tcp_get_flags(th) & TH_RST)
pf_set_protostate(state, PF_PEER_BOTH, TCPS_TIME_WAIT);
state->expire = pf_get_uptime();
if (src->state >= TCPS_FIN_WAIT_2 &&
dst->state >= TCPS_FIN_WAIT_2)
state->timeout = PFTM_TCP_CLOSED;
else if (src->state >= TCPS_CLOSING &&
dst->state >= TCPS_CLOSING)
state->timeout = PFTM_TCP_FIN_WAIT;
else if (src->state < TCPS_ESTABLISHED ||
dst->state < TCPS_ESTABLISHED)
state->timeout = PFTM_TCP_OPENING;
else if (src->state >= TCPS_CLOSING ||
dst->state >= TCPS_CLOSING)
state->timeout = PFTM_TCP_CLOSING;
else
state->timeout = PFTM_TCP_ESTABLISHED;
return (PF_PASS);
}
static int
pf_synproxy(struct pf_pdesc *pd, struct pf_kstate *state, u_short *reason)
{
struct pf_state_key *sk = state->key[pd->didx];
struct tcphdr *th = &pd->hdr.tcp;
if (state->src.state == PF_TCPS_PROXY_SRC) {
if (pd->dir != state->direction) {
REASON_SET(reason, PFRES_SYNPROXY);
return (PF_SYNPROXY_DROP);
}
if (tcp_get_flags(th) & TH_SYN) {
if (ntohl(th->th_seq) != state->src.seqlo) {
REASON_SET(reason, PFRES_SYNPROXY);
return (PF_DROP);
}
pf_send_tcp(state->rule, pd->af, pd->dst,
pd->src, th->th_dport, th->th_sport,
state->src.seqhi, ntohl(th->th_seq) + 1,
TH_SYN|TH_ACK, 0, state->src.mss, 0,
M_SKIP_FIREWALL, 0, 0, state->act.rtableid,
reason);
REASON_SET(reason, PFRES_SYNPROXY);
return (PF_SYNPROXY_DROP);
} else if ((tcp_get_flags(th) & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK ||
(ntohl(th->th_ack) != state->src.seqhi + 1) ||
(ntohl(th->th_seq) != state->src.seqlo + 1)) {
REASON_SET(reason, PFRES_SYNPROXY);
return (PF_DROP);
} else if (state->sns[PF_SN_LIMIT] != NULL &&
pf_src_connlimit(state)) {
REASON_SET(reason, PFRES_SRCLIMIT);
return (PF_DROP);
} else
pf_set_protostate(state, PF_PEER_SRC,
PF_TCPS_PROXY_DST);
}
if (state->src.state == PF_TCPS_PROXY_DST) {
if (pd->dir == state->direction) {
if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) != TH_ACK) ||
(ntohl(th->th_ack) != state->src.seqhi + 1) ||
(ntohl(th->th_seq) != state->src.seqlo + 1)) {
REASON_SET(reason, PFRES_SYNPROXY);
return (PF_DROP);
}
state->src.max_win = MAX(ntohs(th->th_win), 1);
if (state->dst.seqhi == 1)
state->dst.seqhi = arc4random();
pf_send_tcp(state->rule, pd->af,
&sk->addr[pd->sidx], &sk->addr[pd->didx],
sk->port[pd->sidx], sk->port[pd->didx],
state->dst.seqhi, 0, TH_SYN, 0,
state->src.mss, 0,
state->orig_kif->pfik_ifp == V_loif ? M_LOOP : 0,
state->tag, 0, state->act.rtableid,
reason);
REASON_SET(reason, PFRES_SYNPROXY);
return (PF_SYNPROXY_DROP);
} else if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) !=
(TH_SYN|TH_ACK)) ||
(ntohl(th->th_ack) != state->dst.seqhi + 1)) {
REASON_SET(reason, PFRES_SYNPROXY);
return (PF_DROP);
} else {
state->dst.max_win = MAX(ntohs(th->th_win), 1);
state->dst.seqlo = ntohl(th->th_seq);
pf_send_tcp(state->rule, pd->af, pd->dst,
pd->src, th->th_dport, th->th_sport,
ntohl(th->th_ack), ntohl(th->th_seq) + 1,
TH_ACK, state->src.max_win, 0, 0, 0,
state->tag, 0, state->act.rtableid,
reason);
pf_send_tcp(state->rule, pd->af,
&sk->addr[pd->sidx], &sk->addr[pd->didx],
sk->port[pd->sidx], sk->port[pd->didx],
state->src.seqhi + 1, state->src.seqlo + 1,
TH_ACK, state->dst.max_win, 0, 0,
M_SKIP_FIREWALL, 0, 0, state->act.rtableid,
reason);
state->src.seqdiff = state->dst.seqhi -
state->src.seqlo;
state->dst.seqdiff = state->src.seqhi -
state->dst.seqlo;
state->src.seqhi = state->src.seqlo +
state->dst.max_win;
state->dst.seqhi = state->dst.seqlo +
state->src.max_win;
state->src.wscale = state->dst.wscale = 0;
pf_set_protostate(state, PF_PEER_BOTH,
TCPS_ESTABLISHED);
REASON_SET(reason, PFRES_SYNPROXY);
return (PF_SYNPROXY_DROP);
}
}
return (PF_PASS);
}
static int
pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason)
{
struct pf_state_key_cmp key;
int copyback = 0;
struct pf_state_peer *src, *dst;
uint8_t psrc, pdst;
int action;
bzero(&key, sizeof(key));
key.af = pd->af;
key.proto = pd->virtual_proto;
pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af);
pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af);
key.port[pd->sidx] = pd->osport;
key.port[pd->didx] = pd->odport;
action = pf_find_state(pd, &key, state);
if (action != PF_MATCH)
return (action);
action = PF_PASS;
if (pd->dir == (*state)->direction) {
if (PF_REVERSED_KEY(*state, pd->af)) {
src = &(*state)->dst;
dst = &(*state)->src;
psrc = PF_PEER_DST;
pdst = PF_PEER_SRC;
} else {
src = &(*state)->src;
dst = &(*state)->dst;
psrc = PF_PEER_SRC;
pdst = PF_PEER_DST;
}
} else {
if (PF_REVERSED_KEY(*state, pd->af)) {
src = &(*state)->src;
dst = &(*state)->dst;
psrc = PF_PEER_SRC;
pdst = PF_PEER_DST;
} else {
src = &(*state)->dst;
dst = &(*state)->src;
psrc = PF_PEER_DST;
pdst = PF_PEER_SRC;
}
}
switch (pd->virtual_proto) {
case IPPROTO_TCP: {
struct tcphdr *th = &pd->hdr.tcp;
if ((action = pf_synproxy(pd, *state, reason)) != PF_PASS)
return (action);
if (((tcp_get_flags(th) & (TH_SYN | TH_ACK)) == TH_SYN) ||
((th->th_flags & (TH_SYN | TH_ACK | TH_RST)) == TH_ACK &&
pf_syncookie_check(pd) && pd->dir == PF_IN)) {
if ((*state)->src.state >= TCPS_FIN_WAIT_2 &&
(*state)->dst.state >= TCPS_FIN_WAIT_2) {
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("pf: state reuse ");
pf_print_state(*state);
pf_print_flags(tcp_get_flags(th));
printf("\n");
}
pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED);
pf_remove_state(*state);
*state = NULL;
return (PF_DROP);
} else if ((*state)->src.state >= TCPS_ESTABLISHED &&
(*state)->dst.state >= TCPS_ESTABLISHED) {
pf_send_challenge_ack(pd, *state, src, dst, reason);
return (PF_DROP);
}
}
if ((*state)->state_flags & PFSTATE_SLOPPY) {
if (pf_tcp_track_sloppy(*state, pd, reason, src, dst,
psrc, pdst) == PF_DROP)
return (PF_DROP);
} else {
int ret;
ret = pf_tcp_track_full(*state, pd, reason,
©back, src, dst, psrc, pdst);
if (ret == PF_DROP)
return (PF_DROP);
}
break;
}
case IPPROTO_UDP:
if (src->state < PFUDPS_SINGLE)
pf_set_protostate(*state, psrc, PFUDPS_SINGLE);
if (dst->state == PFUDPS_SINGLE)
pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE);
(*state)->expire = pf_get_uptime();
if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
(*state)->timeout = PFTM_UDP_MULTIPLE;
else
(*state)->timeout = PFTM_UDP_SINGLE;
break;
case IPPROTO_SCTP:
if ((src->state >= SCTP_SHUTDOWN_SENT || src->state == SCTP_CLOSED) &&
(dst->state >= SCTP_SHUTDOWN_SENT || dst->state == SCTP_CLOSED) &&
pd->sctp_flags & PFDESC_SCTP_INIT) {
pf_set_protostate(*state, PF_PEER_BOTH, SCTP_CLOSED);
pf_remove_state(*state);
*state = NULL;
return (PF_DROP);
}
if (pf_sctp_track(*state, pd, reason) != PF_PASS)
return (PF_DROP);
if (pd->sctp_flags & PFDESC_SCTP_INIT) {
if (src->state < SCTP_COOKIE_WAIT) {
pf_set_protostate(*state, psrc, SCTP_COOKIE_WAIT);
(*state)->timeout = PFTM_SCTP_OPENING;
}
}
if (pd->sctp_flags & PFDESC_SCTP_INIT_ACK) {
MPASS(dst->scrub != NULL);
if (dst->scrub->pfss_v_tag == 0)
dst->scrub->pfss_v_tag = pd->sctp_initiate_tag;
}
if ((*state)->kif == V_pfi_all &&
(*state)->rule->rule_flag & PFRULE_IFBOUND)
(*state)->kif = pd->kif;
if (pd->sctp_flags & (PFDESC_SCTP_COOKIE | PFDESC_SCTP_HEARTBEAT_ACK)) {
if (src->state < SCTP_ESTABLISHED) {
pf_set_protostate(*state, psrc, SCTP_ESTABLISHED);
(*state)->timeout = PFTM_SCTP_ESTABLISHED;
}
}
if (pd->sctp_flags & (PFDESC_SCTP_SHUTDOWN |
PFDESC_SCTP_SHUTDOWN_COMPLETE)) {
if (src->state < SCTP_SHUTDOWN_PENDING) {
pf_set_protostate(*state, psrc, SCTP_SHUTDOWN_PENDING);
(*state)->timeout = PFTM_SCTP_CLOSING;
}
}
if (pd->sctp_flags & (PFDESC_SCTP_SHUTDOWN_COMPLETE | PFDESC_SCTP_ABORT)) {
pf_set_protostate(*state, psrc, SCTP_CLOSED);
(*state)->timeout = PFTM_SCTP_CLOSED;
}
(*state)->expire = pf_get_uptime();
break;
default:
if (src->state < PFOTHERS_SINGLE)
pf_set_protostate(*state, psrc, PFOTHERS_SINGLE);
if (dst->state == PFOTHERS_SINGLE)
pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE);
(*state)->expire = pf_get_uptime();
if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
(*state)->timeout = PFTM_OTHER_MULTIPLE;
else
(*state)->timeout = PFTM_OTHER_SINGLE;
break;
}
if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
struct pf_state_key *nk;
int afto, sidx, didx;
if (PF_REVERSED_KEY(*state, pd->af))
nk = (*state)->key[pd->sidx];
else
nk = (*state)->key[pd->didx];
afto = pd->af != nk->af;
if (afto && (*state)->direction == PF_IN) {
sidx = pd->didx;
didx = pd->sidx;
} else {
sidx = pd->sidx;
didx = pd->didx;
}
if (afto) {
pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af);
pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af);
pd->naf = nk->af;
action = PF_AFRT;
}
if (afto || PF_ANEQ(pd->src, &nk->addr[sidx], pd->af) ||
nk->port[sidx] != pd->osport)
pf_change_ap(pd, pd->src, pd->sport,
&nk->addr[sidx], nk->port[sidx]);
if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) ||
nk->port[didx] != pd->odport)
pf_change_ap(pd, pd->dst, pd->dport,
&nk->addr[didx], nk->port[didx]);
copyback = 1;
}
if (copyback && pd->hdrlen > 0)
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
return (action);
}
static int
pf_sctp_track(struct pf_kstate *state, struct pf_pdesc *pd,
u_short *reason)
{
struct pf_state_peer *src;
if (pd->dir == state->direction) {
if (PF_REVERSED_KEY(state, pd->af))
src = &state->dst;
else
src = &state->src;
} else {
if (PF_REVERSED_KEY(state, pd->af))
src = &state->src;
else
src = &state->dst;
}
if (src->scrub != NULL) {
if (src->scrub->pfss_v_tag == 0)
src->scrub->pfss_v_tag = pd->hdr.sctp.v_tag;
else if (src->scrub->pfss_v_tag != pd->hdr.sctp.v_tag)
return (PF_DROP);
}
return (PF_PASS);
}
static void
pf_sctp_multihome_detach_addr(const struct pf_kstate *s)
{
struct pf_sctp_endpoint key;
struct pf_sctp_endpoint *ep;
struct pf_state_key *sks = s->key[PF_SK_STACK];
struct pf_sctp_source *i, *tmp;
if (sks == NULL || sks->proto != IPPROTO_SCTP || s->dst.scrub == NULL)
return;
PF_SCTP_ENDPOINTS_LOCK();
key.v_tag = s->dst.scrub->pfss_v_tag;
ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
if (ep != NULL) {
TAILQ_FOREACH_SAFE(i, &ep->sources, entry, tmp) {
if (pf_addr_cmp(&i->addr,
&s->key[PF_SK_WIRE]->addr[s->direction == PF_OUT],
s->key[PF_SK_WIRE]->af) == 0) {
SDT_PROBE3(pf, sctp, multihome, remove,
key.v_tag, s, i);
TAILQ_REMOVE(&ep->sources, i, entry);
free(i, M_PFTEMP);
break;
}
}
if (TAILQ_EMPTY(&ep->sources)) {
RB_REMOVE(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
free(ep, M_PFTEMP);
}
}
key.v_tag = s->src.scrub->pfss_v_tag;
ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
if (ep != NULL) {
TAILQ_FOREACH_SAFE(i, &ep->sources, entry, tmp) {
if (pf_addr_cmp(&i->addr,
&s->key[PF_SK_WIRE]->addr[s->direction == PF_IN],
s->key[PF_SK_WIRE]->af) == 0) {
SDT_PROBE3(pf, sctp, multihome, remove,
key.v_tag, s, i);
TAILQ_REMOVE(&ep->sources, i, entry);
free(i, M_PFTEMP);
break;
}
}
if (TAILQ_EMPTY(&ep->sources)) {
RB_REMOVE(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
free(ep, M_PFTEMP);
}
}
PF_SCTP_ENDPOINTS_UNLOCK();
}
static void
pf_sctp_multihome_add_addr(struct pf_pdesc *pd, struct pf_addr *a, uint32_t v_tag)
{
struct pf_sctp_endpoint key = {
.v_tag = v_tag,
};
struct pf_sctp_source *i;
struct pf_sctp_endpoint *ep;
int count;
PF_SCTP_ENDPOINTS_LOCK();
ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
if (ep == NULL) {
ep = malloc(sizeof(struct pf_sctp_endpoint),
M_PFTEMP, M_NOWAIT);
if (ep == NULL) {
PF_SCTP_ENDPOINTS_UNLOCK();
return;
}
ep->v_tag = v_tag;
TAILQ_INIT(&ep->sources);
RB_INSERT(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
}
count = 0;
TAILQ_FOREACH(i, &ep->sources, entry) {
count++;
if (pf_addr_cmp(&i->addr, a, pd->af) == 0) {
PF_SCTP_ENDPOINTS_UNLOCK();
return;
}
}
if (count >= PF_SCTP_MAX_ENDPOINTS) {
PF_SCTP_ENDPOINTS_UNLOCK();
return;
}
i = malloc(sizeof(*i), M_PFTEMP, M_NOWAIT);
if (i == NULL) {
PF_SCTP_ENDPOINTS_UNLOCK();
return;
}
i->af = pd->af;
memcpy(&i->addr, a, sizeof(*a));
TAILQ_INSERT_TAIL(&ep->sources, i, entry);
SDT_PROBE2(pf, sctp, multihome, add, v_tag, i);
PF_SCTP_ENDPOINTS_UNLOCK();
}
static void
pf_sctp_multihome_delayed(struct pf_pdesc *pd, struct pfi_kkif *kif,
struct pf_kstate *s, int action)
{
struct pf_krule_slist match_rules;
struct pf_sctp_multihome_job *j, *tmp;
struct pf_sctp_source *i;
int ret;
struct pf_kstate *sm = NULL;
struct pf_krule *ra = NULL;
struct pf_krule *r = &V_pf_default_rule;
struct pf_kruleset *rs = NULL;
u_short reason;
bool do_extra = true;
PF_RULES_RLOCK_TRACKER;
again:
TAILQ_FOREACH_SAFE(j, &pd->sctp_multihome_jobs, next, tmp) {
if (s == NULL || action != PF_PASS)
goto free;
MPASS(! (pd->sctp_flags & PFDESC_SCTP_ADD_IP));
switch (j->op) {
case SCTP_ADD_IP_ADDRESS: {
uint32_t v_tag = pd->sctp_initiate_tag;
if (v_tag == 0) {
if (s->direction == pd->dir)
v_tag = s->src.scrub->pfss_v_tag;
else
v_tag = s->dst.scrub->pfss_v_tag;
}
if (pf_addr_cmp(&j->src, pd->src, pd->af) == 0) {
break;
}
j->pd.sctp_flags |= PFDESC_SCTP_ADD_IP;
PF_RULES_RLOCK();
sm = NULL;
if (s->rule->rule_flag & PFRULE_ALLOW_RELATED) {
j->pd.related_rule = s->rule;
}
SLIST_INIT(&match_rules);
ret = pf_test_rule(&r, &sm,
&j->pd, &ra, &rs, &reason, NULL, &match_rules);
pf_free_match_rules(&match_rules);
PF_RULES_RUNLOCK();
SDT_PROBE4(pf, sctp, multihome, test, kif, r, j->pd.m, ret);
if (ret != PF_DROP && sm != NULL) {
if (sm->direction == s->direction) {
sm->src.scrub->pfss_v_tag = s->src.scrub->pfss_v_tag;
sm->dst.scrub->pfss_v_tag = s->dst.scrub->pfss_v_tag;
} else {
sm->src.scrub->pfss_v_tag = s->dst.scrub->pfss_v_tag;
sm->dst.scrub->pfss_v_tag = s->src.scrub->pfss_v_tag;
}
PF_STATE_UNLOCK(sm);
} else {
break;
}
pf_sctp_multihome_add_addr(pd, &j->src, v_tag);
if (! do_extra) {
break;
}
struct pf_sctp_endpoint key = {
.v_tag = pd->hdr.sctp.v_tag,
};
struct pf_sctp_endpoint *ep;
PF_SCTP_ENDPOINTS_LOCK();
ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
if (ep == NULL) {
PF_SCTP_ENDPOINTS_UNLOCK();
break;
}
MPASS(ep != NULL);
TAILQ_FOREACH(i, &ep->sources, entry) {
struct pf_sctp_multihome_job *nj;
if (i->af != pd->af)
continue;
nj = malloc(sizeof(*nj), M_PFTEMP, M_NOWAIT | M_ZERO);
if (! nj) {
continue;
}
memcpy(&nj->pd, &j->pd, sizeof(j->pd));
memcpy(&nj->src, &j->src, sizeof(nj->src));
nj->pd.src = &nj->src;
memcpy(&nj->dst, &i->addr, sizeof(nj->dst));
nj->pd.dst = &nj->dst;
nj->pd.m = j->pd.m;
nj->op = j->op;
MPASS(nj->pd.pcksum);
TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, nj, next);
}
PF_SCTP_ENDPOINTS_UNLOCK();
break;
}
case SCTP_DEL_IP_ADDRESS: {
struct pf_state_key_cmp key;
uint8_t psrc;
int action;
bzero(&key, sizeof(key));
key.af = j->pd.af;
key.proto = IPPROTO_SCTP;
if (j->pd.dir == PF_IN) {
pf_addrcpy(&key.addr[0], j->pd.src, key.af);
pf_addrcpy(&key.addr[1], j->pd.dst, key.af);
key.port[0] = j->pd.hdr.sctp.src_port;
key.port[1] = j->pd.hdr.sctp.dest_port;
} else {
pf_addrcpy(&key.addr[1], j->pd.src, key.af);
pf_addrcpy(&key.addr[0], j->pd.dst, key.af);
key.port[1] = j->pd.hdr.sctp.src_port;
key.port[0] = j->pd.hdr.sctp.dest_port;
}
action = pf_find_state(&j->pd, &key, &sm);
if (action == PF_MATCH) {
PF_STATE_LOCK_ASSERT(sm);
if (j->pd.dir == sm->direction) {
psrc = PF_PEER_SRC;
} else {
psrc = PF_PEER_DST;
}
pf_set_protostate(sm, psrc, SCTP_SHUTDOWN_PENDING);
sm->timeout = PFTM_SCTP_CLOSING;
PF_STATE_UNLOCK(sm);
}
break;
default:
panic("Unknown op %#x", j->op);
}
}
free:
TAILQ_REMOVE(&pd->sctp_multihome_jobs, j, next);
free(j, M_PFTEMP);
}
if (! TAILQ_EMPTY(&pd->sctp_multihome_jobs)) {
do_extra = false;
goto again;
}
}
static int
pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
{
int off = 0;
struct pf_sctp_multihome_job *job;
SDT_PROBE4(pf, sctp, multihome_scan, entry, start, len, pd, op);
while (off < len) {
struct sctp_paramhdr h;
if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL,
pd->af))
return (PF_DROP);
if (ntohs(h.param_length) < 4)
return (PF_DROP);
SDT_PROBE2(pf, sctp, multihome_scan, param, ntohs(h.param_type),
ntohs(h.param_length));
switch (ntohs(h.param_type)) {
case SCTP_IPV4_ADDRESS: {
struct in_addr t;
if (ntohs(h.param_length) !=
(sizeof(struct sctp_paramhdr) + sizeof(t)))
return (PF_DROP);
if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t),
NULL, pd->af))
return (PF_DROP);
if (in_nullhost(t))
t.s_addr = pd->src->v4.s_addr;
job = malloc(sizeof(*job), M_PFTEMP, M_NOWAIT | M_ZERO);
if (! job)
return (PF_DROP);
SDT_PROBE2(pf, sctp, multihome_scan, ipv4, &t, op);
memcpy(&job->pd, pd, sizeof(*pd));
memcpy(&job->src, &t, sizeof(t));
job->pd.src = &job->src;
memcpy(&job->dst, pd->dst, sizeof(job->dst));
job->pd.dst = &job->dst;
job->pd.m = pd->m;
job->op = op;
MPASS(job->pd.pcksum);
TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next);
break;
}
#ifdef INET6
case SCTP_IPV6_ADDRESS: {
struct in6_addr t;
if (ntohs(h.param_length) !=
(sizeof(struct sctp_paramhdr) + sizeof(t)))
return (PF_DROP);
if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t),
NULL, pd->af))
return (PF_DROP);
if (memcmp(&t, &pd->src->v6, sizeof(t)) == 0)
break;
if (memcmp(&t, &in6addr_any, sizeof(t)) == 0)
memcpy(&t, &pd->src->v6, sizeof(t));
job = malloc(sizeof(*job), M_PFTEMP, M_NOWAIT | M_ZERO);
if (! job)
return (PF_DROP);
SDT_PROBE2(pf, sctp, multihome_scan, ipv6, &t, op);
memcpy(&job->pd, pd, sizeof(*pd));
memcpy(&job->src, &t, sizeof(t));
job->pd.src = &job->src;
memcpy(&job->dst, pd->dst, sizeof(job->dst));
job->pd.dst = &job->dst;
job->pd.m = pd->m;
job->op = op;
MPASS(job->pd.pcksum);
TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next);
break;
}
#endif
case SCTP_ADD_IP_ADDRESS: {
int ret;
struct sctp_asconf_paramhdr ah;
if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah),
NULL, pd->af))
return (PF_DROP);
ret = pf_multihome_scan(start + off + sizeof(ah),
ntohs(ah.ph.param_length) - sizeof(ah), pd,
SCTP_ADD_IP_ADDRESS);
if (ret != PF_PASS)
return (ret);
break;
}
case SCTP_DEL_IP_ADDRESS: {
int ret;
struct sctp_asconf_paramhdr ah;
if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah),
NULL, pd->af))
return (PF_DROP);
ret = pf_multihome_scan(start + off + sizeof(ah),
ntohs(ah.ph.param_length) - sizeof(ah), pd,
SCTP_DEL_IP_ADDRESS);
if (ret != PF_PASS)
return (ret);
break;
}
default:
break;
}
off += roundup(ntohs(h.param_length), 4);
}
return (PF_PASS);
}
int
pf_multihome_scan_init(int start, int len, struct pf_pdesc *pd)
{
start += sizeof(struct sctp_init_chunk);
len -= sizeof(struct sctp_init_chunk);
return (pf_multihome_scan(start, len, pd, SCTP_ADD_IP_ADDRESS));
}
int
pf_multihome_scan_asconf(int start, int len, struct pf_pdesc *pd)
{
start += sizeof(struct sctp_asconf_chunk);
len -= sizeof(struct sctp_asconf_chunk);
return (pf_multihome_scan(start, len, pd, SCTP_ADD_IP_ADDRESS));
}
int
pf_icmp_state_lookup(struct pf_state_key_cmp *key, struct pf_pdesc *pd,
struct pf_kstate **state, u_int16_t icmpid, u_int16_t type, int icmp_dir,
int *iidx, int multi, int inner)
{
int action, direction = pd->dir;
key->af = pd->af;
key->proto = pd->proto;
if (icmp_dir == PF_IN) {
*iidx = pd->sidx;
key->port[pd->sidx] = icmpid;
key->port[pd->didx] = type;
} else {
*iidx = pd->didx;
key->port[pd->sidx] = type;
key->port[pd->didx] = icmpid;
}
if (pf_state_key_addr_setup(pd, key, multi))
return (PF_DROP);
action = pf_find_state(pd, key, state);
if (action != PF_MATCH)
return (action);
if ((*state)->state_flags & PFSTATE_SLOPPY)
return (-1);
if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af)
direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ?
PF_IN : PF_OUT;
else
direction = (*state)->direction;
if ((*state)->rule->type &&
(((!inner && direction == pd->dir) ||
(inner && direction != pd->dir)) ?
PF_IN : PF_OUT) != icmp_dir) {
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("pf: icmp type %d in wrong direction (%d): ",
ntohs(type), icmp_dir);
pf_print_state(*state);
printf("\n");
}
PF_STATE_UNLOCK(*state);
*state = NULL;
return (PF_DROP);
}
return (-1);
}
static int
pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
u_short *reason)
{
struct pf_addr *saddr = pd->src, *daddr = pd->dst;
u_int16_t *icmpsum, virtual_id, virtual_type;
u_int8_t icmptype, icmpcode;
int icmp_dir, iidx, ret;
struct pf_state_key_cmp key;
#ifdef INET
u_int16_t icmpid;
#endif
MPASS(*state == NULL);
bzero(&key, sizeof(key));
switch (pd->proto) {
#ifdef INET
case IPPROTO_ICMP:
icmptype = pd->hdr.icmp.icmp_type;
icmpcode = pd->hdr.icmp.icmp_code;
icmpid = pd->hdr.icmp.icmp_id;
icmpsum = &pd->hdr.icmp.icmp_cksum;
break;
#endif
#ifdef INET6
case IPPROTO_ICMPV6:
icmptype = pd->hdr.icmp6.icmp6_type;
icmpcode = pd->hdr.icmp6.icmp6_code;
#ifdef INET
icmpid = pd->hdr.icmp6.icmp6_id;
#endif
icmpsum = &pd->hdr.icmp6.icmp6_cksum;
break;
#endif
default:
panic("unhandled proto %d", pd->proto);
}
if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id,
&virtual_type) == 0) {
ret = pf_icmp_state_lookup(&key, pd, state, virtual_id,
virtual_type, icmp_dir, &iidx, 0, 0);
if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) {
MPASS(*state == NULL);
ret = pf_icmp_state_lookup(&key, pd, state,
virtual_id, virtual_type,
icmp_dir, &iidx, 1, 0);
}
if (ret >= 0) {
MPASS(*state == NULL);
return (ret);
}
(*state)->expire = pf_get_uptime();
(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
struct pf_state_key *nk;
int afto, sidx, didx;
if (PF_REVERSED_KEY(*state, pd->af))
nk = (*state)->key[pd->sidx];
else
nk = (*state)->key[pd->didx];
afto = pd->af != nk->af;
if (afto && (*state)->direction == PF_IN) {
sidx = pd->didx;
didx = pd->sidx;
iidx = !iidx;
} else {
sidx = pd->sidx;
didx = pd->didx;
}
switch (pd->af) {
#ifdef INET
case AF_INET:
#ifdef INET6
if (afto) {
if (pf_translate_icmp_af(AF_INET6,
&pd->hdr.icmp))
return (PF_DROP);
pd->proto = IPPROTO_ICMPV6;
}
#endif
if (!afto &&
PF_ANEQ(pd->src, &nk->addr[sidx], AF_INET))
pf_change_a(&saddr->v4.s_addr,
pd->ip_sum,
nk->addr[sidx].v4.s_addr,
0);
if (!afto && PF_ANEQ(pd->dst,
&nk->addr[didx], AF_INET))
pf_change_a(&daddr->v4.s_addr,
pd->ip_sum,
nk->addr[didx].v4.s_addr, 0);
if (nk->port[iidx] !=
pd->hdr.icmp.icmp_id) {
pd->hdr.icmp.icmp_cksum =
pf_cksum_fixup(
pd->hdr.icmp.icmp_cksum, icmpid,
nk->port[iidx], 0);
pd->hdr.icmp.icmp_id =
nk->port[iidx];
}
m_copyback(pd->m, pd->off, ICMP_MINLEN,
(caddr_t )&pd->hdr.icmp);
break;
#endif
#ifdef INET6
case AF_INET6:
#ifdef INET
if (afto) {
if (pf_translate_icmp_af(AF_INET,
&pd->hdr.icmp6))
return (PF_DROP);
pd->proto = IPPROTO_ICMP;
}
#endif
if (!afto &&
PF_ANEQ(pd->src, &nk->addr[sidx], AF_INET6))
pf_change_a6(saddr,
&pd->hdr.icmp6.icmp6_cksum,
&nk->addr[sidx], 0);
if (!afto && PF_ANEQ(pd->dst,
&nk->addr[didx], AF_INET6))
pf_change_a6(daddr,
&pd->hdr.icmp6.icmp6_cksum,
&nk->addr[didx], 0);
if (nk->port[iidx] != pd->hdr.icmp6.icmp6_id)
pd->hdr.icmp6.icmp6_id =
nk->port[iidx];
m_copyback(pd->m, pd->off, sizeof(struct icmp6_hdr),
(caddr_t )&pd->hdr.icmp6);
break;
#endif
}
if (afto) {
pf_addrcpy(&pd->nsaddr, &nk->addr[sidx],
nk->af);
pf_addrcpy(&pd->ndaddr, &nk->addr[didx],
nk->af);
pd->naf = nk->af;
return (PF_AFRT);
}
}
return (PF_PASS);
} else {
struct pf_pdesc pd2;
bzero(&pd2, sizeof pd2);
#ifdef INET
struct ip h2;
#endif
#ifdef INET6
struct ip6_hdr h2_6;
#endif
int ipoff2 = 0;
pd2.af = pd->af;
pd2.dir = pd->dir;
pd2.sidx = (pd->dir == PF_IN) ? 1 : 0;
pd2.didx = (pd->dir == PF_IN) ? 0 : 1;
pd2.m = pd->m;
pd2.pf_mtag = pd->pf_mtag;
pd2.kif = pd->kif;
switch (pd->af) {
#ifdef INET
case AF_INET:
ipoff2 = pd->off + ICMP_MINLEN;
if (!pf_pull_hdr(pd->m, ipoff2, &h2, sizeof(h2),
reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(ip)");
return (PF_DROP);
}
if (h2.ip_off & htons(IP_OFFMASK)) {
REASON_SET(reason, PFRES_FRAG);
return (PF_DROP);
}
pd2.off = ipoff2;
if (pf_walk_header(&pd2, &h2, reason) != PF_PASS)
return (PF_DROP);
pd2.tot_len = ntohs(h2.ip_len);
pd2.ttl = h2.ip_ttl;
pd2.src = (struct pf_addr *)&h2.ip_src;
pd2.dst = (struct pf_addr *)&h2.ip_dst;
pd2.ip_sum = &h2.ip_sum;
break;
#endif
#ifdef INET6
case AF_INET6:
ipoff2 = pd->off + sizeof(struct icmp6_hdr);
if (!pf_pull_hdr(pd->m, ipoff2, &h2_6, sizeof(h2_6),
reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(ip6)");
return (PF_DROP);
}
pd2.off = ipoff2;
if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS)
return (PF_DROP);
pd2.tot_len = ntohs(h2_6.ip6_plen) +
sizeof(struct ip6_hdr);
pd2.ttl = h2_6.ip6_hlim;
pd2.src = (struct pf_addr *)&h2_6.ip6_src;
pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
pd2.ip_sum = NULL;
break;
#endif
default:
unhandled_af(pd->af);
}
if (PF_ANEQ(pd->dst, pd2.src, pd->af)) {
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("pf: BAD ICMP %d:%d outer dst: ",
icmptype, icmpcode);
pf_print_host(pd->src, 0, pd->af);
printf(" -> ");
pf_print_host(pd->dst, 0, pd->af);
printf(" inner src: ");
pf_print_host(pd2.src, 0, pd2.af);
printf(" -> ");
pf_print_host(pd2.dst, 0, pd2.af);
printf("\n");
}
REASON_SET(reason, PFRES_BADSTATE);
return (PF_DROP);
}
switch (pd2.proto) {
case IPPROTO_TCP: {
struct tcphdr *th = &pd2.hdr.tcp;
u_int32_t seq;
struct pf_state_peer *src, *dst;
u_int8_t dws;
int copyback = 0;
int action;
if (!pf_pull_hdr(pd->m, pd2.off, th, 8, reason,
pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(tcp)");
return (PF_DROP);
}
pd2.pcksum = &pd2.hdr.tcp.th_sum;
key.af = pd2.af;
key.proto = IPPROTO_TCP;
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
key.port[pd2.sidx] = th->th_sport;
key.port[pd2.didx] = th->th_dport;
action = pf_find_state(&pd2, &key, state);
if (action != PF_MATCH)
return (action);
if (pd->dir == (*state)->direction) {
if (PF_REVERSED_KEY(*state, pd->af)) {
src = &(*state)->src;
dst = &(*state)->dst;
} else {
src = &(*state)->dst;
dst = &(*state)->src;
}
} else {
if (PF_REVERSED_KEY(*state, pd->af)) {
src = &(*state)->dst;
dst = &(*state)->src;
} else {
src = &(*state)->src;
dst = &(*state)->dst;
}
}
if (src->wscale && dst->wscale)
dws = dst->wscale & PF_WSCALE_MASK;
else
dws = 0;
seq = ntohl(th->th_seq) - src->seqdiff;
if (src->seqdiff) {
pf_change_a(&th->th_seq, icmpsum,
htonl(seq), 0);
copyback = 1;
}
if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
(!SEQ_GEQ(src->seqhi, seq) ||
!SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("pf: BAD ICMP %d:%d ",
icmptype, icmpcode);
pf_print_host(pd->src, 0, pd->af);
printf(" -> ");
pf_print_host(pd->dst, 0, pd->af);
printf(" state: ");
pf_print_state(*state);
printf(" seq=%u\n", seq);
}
REASON_SET(reason, PFRES_BADSTATE);
return (PF_DROP);
} else {
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("pf: OK ICMP %d:%d ",
icmptype, icmpcode);
pf_print_host(pd->src, 0, pd->af);
printf(" -> ");
pf_print_host(pd->dst, 0, pd->af);
printf(" state: ");
pf_print_state(*state);
printf(" seq=%u\n", seq);
}
}
if ((*state)->key[PF_SK_WIRE] !=
(*state)->key[PF_SK_STACK]) {
struct pf_state_key *nk;
if (PF_REVERSED_KEY(*state, pd->af))
nk = (*state)->key[pd->sidx];
else
nk = (*state)->key[pd->didx];
#if defined(INET) && defined(INET6)
int afto, sidx, didx;
afto = pd->af != nk->af;
if (afto && (*state)->direction == PF_IN) {
sidx = pd2.didx;
didx = pd2.sidx;
} else {
sidx = pd2.sidx;
didx = pd2.didx;
}
if (afto) {
if (pf_translate_icmp_af(nk->af,
&pd->hdr.icmp))
return (PF_DROP);
m_copyback(pd->m, pd->off,
sizeof(struct icmp6_hdr),
(c_caddr_t)&pd->hdr.icmp6);
if (pf_change_icmp_af(pd->m, ipoff2, pd,
&pd2, &nk->addr[sidx],
&nk->addr[didx], pd->af,
nk->af))
return (PF_DROP);
pf_addrcpy(&pd->nsaddr,
&nk->addr[pd2.sidx], nk->af);
pf_addrcpy(&pd->ndaddr,
&nk->addr[pd2.didx], nk->af);
if (nk->af == AF_INET) {
pd->proto = IPPROTO_ICMP;
} else {
pd->proto = IPPROTO_ICMPV6;
pd->nsaddr.addr32[3] =
pd->src->addr32[0];
}
pd->naf = pd2.naf = nk->af;
pf_change_ap(&pd2, pd2.src, &th->th_sport,
&nk->addr[pd2.sidx], nk->port[sidx]);
pf_change_ap(&pd2, pd2.dst, &th->th_dport,
&nk->addr[pd2.didx], nk->port[didx]);
m_copyback(pd2.m, pd2.off, 8, (c_caddr_t)th);
return (PF_AFRT);
}
#endif
if (PF_ANEQ(pd2.src,
&nk->addr[pd2.sidx], pd2.af) ||
nk->port[pd2.sidx] != th->th_sport)
pf_change_icmp(pd2.src, &th->th_sport,
daddr, &nk->addr[pd2.sidx],
nk->port[pd2.sidx], NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, pd2.af);
if (PF_ANEQ(pd2.dst,
&nk->addr[pd2.didx], pd2.af) ||
nk->port[pd2.didx] != th->th_dport)
pf_change_icmp(pd2.dst, &th->th_dport,
saddr, &nk->addr[pd2.didx],
nk->port[pd2.didx], NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, pd2.af);
copyback = 1;
}
if (copyback) {
switch (pd2.af) {
#ifdef INET
case AF_INET:
m_copyback(pd->m, pd->off, ICMP_MINLEN,
(caddr_t )&pd->hdr.icmp);
m_copyback(pd->m, ipoff2, sizeof(h2),
(caddr_t )&h2);
break;
#endif
#ifdef INET6
case AF_INET6:
m_copyback(pd->m, pd->off,
sizeof(struct icmp6_hdr),
(caddr_t )&pd->hdr.icmp6);
m_copyback(pd->m, ipoff2, sizeof(h2_6),
(caddr_t )&h2_6);
break;
#endif
default:
unhandled_af(pd->af);
}
m_copyback(pd->m, pd2.off, 8, (caddr_t)th);
}
return (PF_PASS);
break;
}
case IPPROTO_UDP: {
struct udphdr *uh = &pd2.hdr.udp;
int action;
if (!pf_pull_hdr(pd->m, pd2.off, uh, sizeof(*uh),
reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(udp)");
return (PF_DROP);
}
pd2.pcksum = &pd2.hdr.udp.uh_sum;
key.af = pd2.af;
key.proto = IPPROTO_UDP;
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
key.port[pd2.sidx] = uh->uh_sport;
key.port[pd2.didx] = uh->uh_dport;
action = pf_find_state(&pd2, &key, state);
if (action != PF_MATCH)
return (action);
if ((*state)->key[PF_SK_WIRE] !=
(*state)->key[PF_SK_STACK]) {
struct pf_state_key *nk;
if (PF_REVERSED_KEY(*state, pd->af))
nk = (*state)->key[pd->sidx];
else
nk = (*state)->key[pd->didx];
#if defined(INET) && defined(INET6)
int afto, sidx, didx;
afto = pd->af != nk->af;
if (afto && (*state)->direction == PF_IN) {
sidx = pd2.didx;
didx = pd2.sidx;
} else {
sidx = pd2.sidx;
didx = pd2.didx;
}
if (afto) {
if (pf_translate_icmp_af(nk->af,
&pd->hdr.icmp))
return (PF_DROP);
m_copyback(pd->m, pd->off,
sizeof(struct icmp6_hdr),
(c_caddr_t)&pd->hdr.icmp6);
if (pf_change_icmp_af(pd->m, ipoff2, pd,
&pd2, &nk->addr[sidx],
&nk->addr[didx], pd->af,
nk->af))
return (PF_DROP);
pf_addrcpy(&pd->nsaddr,
&nk->addr[pd2.sidx], nk->af);
pf_addrcpy(&pd->ndaddr,
&nk->addr[pd2.didx], nk->af);
if (nk->af == AF_INET) {
pd->proto = IPPROTO_ICMP;
} else {
pd->proto = IPPROTO_ICMPV6;
pd->nsaddr.addr32[3] =
pd->src->addr32[0];
}
pd->naf = pd2.naf = nk->af;
pf_change_ap(&pd2, pd2.src, &uh->uh_sport,
&nk->addr[pd2.sidx], nk->port[sidx]);
pf_change_ap(&pd2, pd2.dst, &uh->uh_dport,
&nk->addr[pd2.didx], nk->port[didx]);
m_copyback(pd2.m, pd2.off, sizeof(*uh),
(c_caddr_t)uh);
return (PF_AFRT);
}
#endif
if (PF_ANEQ(pd2.src,
&nk->addr[pd2.sidx], pd2.af) ||
nk->port[pd2.sidx] != uh->uh_sport)
pf_change_icmp(pd2.src, &uh->uh_sport,
daddr, &nk->addr[pd2.sidx],
nk->port[pd2.sidx], &uh->uh_sum,
pd2.ip_sum, icmpsum,
pd->ip_sum, 1, pd2.af);
if (PF_ANEQ(pd2.dst,
&nk->addr[pd2.didx], pd2.af) ||
nk->port[pd2.didx] != uh->uh_dport)
pf_change_icmp(pd2.dst, &uh->uh_dport,
saddr, &nk->addr[pd2.didx],
nk->port[pd2.didx], &uh->uh_sum,
pd2.ip_sum, icmpsum,
pd->ip_sum, 1, pd2.af);
switch (pd2.af) {
#ifdef INET
case AF_INET:
m_copyback(pd->m, pd->off, ICMP_MINLEN,
(caddr_t )&pd->hdr.icmp);
m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2);
break;
#endif
#ifdef INET6
case AF_INET6:
m_copyback(pd->m, pd->off,
sizeof(struct icmp6_hdr),
(caddr_t )&pd->hdr.icmp6);
m_copyback(pd->m, ipoff2, sizeof(h2_6),
(caddr_t )&h2_6);
break;
#endif
}
m_copyback(pd->m, pd2.off, sizeof(*uh), (caddr_t)uh);
}
return (PF_PASS);
break;
}
#ifdef INET
case IPPROTO_SCTP: {
struct sctphdr *sh = &pd2.hdr.sctp;
struct pf_state_peer *src;
int copyback = 0;
int action;
if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), reason,
pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(sctp)");
return (PF_DROP);
}
pd2.pcksum = &pd2.sctp_dummy_sum;
key.af = pd2.af;
key.proto = IPPROTO_SCTP;
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
key.port[pd2.sidx] = sh->src_port;
key.port[pd2.didx] = sh->dest_port;
action = pf_find_state(&pd2, &key, state);
if (action != PF_MATCH)
return (action);
if (pd->dir == (*state)->direction) {
if (PF_REVERSED_KEY(*state, pd->af))
src = &(*state)->src;
else
src = &(*state)->dst;
} else {
if (PF_REVERSED_KEY(*state, pd->af))
src = &(*state)->dst;
else
src = &(*state)->src;
}
if (src->scrub->pfss_v_tag != sh->v_tag) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message has incorrect "
"SCTP v_tag");
return (PF_DROP);
}
if ((*state)->key[PF_SK_WIRE] !=
(*state)->key[PF_SK_STACK]) {
struct pf_state_key *nk;
if (PF_REVERSED_KEY(*state, pd->af))
nk = (*state)->key[pd->sidx];
else
nk = (*state)->key[pd->didx];
#if defined(INET) && defined(INET6)
int afto, sidx, didx;
afto = pd->af != nk->af;
if (afto && (*state)->direction == PF_IN) {
sidx = pd2.didx;
didx = pd2.sidx;
} else {
sidx = pd2.sidx;
didx = pd2.didx;
}
if (afto) {
if (pf_translate_icmp_af(nk->af,
&pd->hdr.icmp))
return (PF_DROP);
m_copyback(pd->m, pd->off,
sizeof(struct icmp6_hdr),
(c_caddr_t)&pd->hdr.icmp6);
if (pf_change_icmp_af(pd->m, ipoff2, pd,
&pd2, &nk->addr[sidx],
&nk->addr[didx], pd->af,
nk->af))
return (PF_DROP);
sh->src_port = nk->port[sidx];
sh->dest_port = nk->port[didx];
m_copyback(pd2.m, pd2.off, sizeof(*sh), (c_caddr_t)sh);
pf_addrcpy(&pd->nsaddr,
&nk->addr[pd2.sidx], nk->af);
pf_addrcpy(&pd->ndaddr,
&nk->addr[pd2.didx], nk->af);
if (nk->af == AF_INET) {
pd->proto = IPPROTO_ICMP;
} else {
pd->proto = IPPROTO_ICMPV6;
pd->nsaddr.addr32[3] =
pd->src->addr32[0];
}
pd->naf = nk->af;
return (PF_AFRT);
}
#endif
if (PF_ANEQ(pd2.src,
&nk->addr[pd2.sidx], pd2.af) ||
nk->port[pd2.sidx] != sh->src_port)
pf_change_icmp(pd2.src, &sh->src_port,
daddr, &nk->addr[pd2.sidx],
nk->port[pd2.sidx], NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, pd2.af);
if (PF_ANEQ(pd2.dst,
&nk->addr[pd2.didx], pd2.af) ||
nk->port[pd2.didx] != sh->dest_port)
pf_change_icmp(pd2.dst, &sh->dest_port,
saddr, &nk->addr[pd2.didx],
nk->port[pd2.didx], NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, pd2.af);
copyback = 1;
}
if (copyback) {
switch (pd2.af) {
#ifdef INET
case AF_INET:
m_copyback(pd->m, pd->off, ICMP_MINLEN,
(caddr_t )&pd->hdr.icmp);
m_copyback(pd->m, ipoff2, sizeof(h2),
(caddr_t )&h2);
break;
#endif
#ifdef INET6
case AF_INET6:
m_copyback(pd->m, pd->off,
sizeof(struct icmp6_hdr),
(caddr_t )&pd->hdr.icmp6);
m_copyback(pd->m, ipoff2, sizeof(h2_6),
(caddr_t )&h2_6);
break;
#endif
}
m_copyback(pd->m, pd2.off, sizeof(*sh), (caddr_t)sh);
}
return (PF_PASS);
break;
}
case IPPROTO_ICMP: {
struct icmp *iih = &pd2.hdr.icmp;
if (pd2.af != AF_INET) {
REASON_SET(reason, PFRES_NORM);
return (PF_DROP);
}
if (!pf_pull_hdr(pd->m, pd2.off, iih, ICMP_MINLEN,
reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short i"
"(icmp)");
return (PF_DROP);
}
pd2.pcksum = &pd2.hdr.icmp.icmp_cksum;
icmpid = iih->icmp_id;
pf_icmp_mapping(&pd2, iih->icmp_type,
&icmp_dir, &virtual_id, &virtual_type);
ret = pf_icmp_state_lookup(&key, &pd2, state,
virtual_id, virtual_type, icmp_dir, &iidx, 0, 1);
if (ret >= 0) {
MPASS(*state == NULL);
return (ret);
}
if ((*state)->key[PF_SK_WIRE] !=
(*state)->key[PF_SK_STACK]) {
struct pf_state_key *nk;
if (PF_REVERSED_KEY(*state, pd->af))
nk = (*state)->key[pd->sidx];
else
nk = (*state)->key[pd->didx];
#if defined(INET) && defined(INET6)
int afto, sidx, didx;
afto = pd->af != nk->af;
if (afto && (*state)->direction == PF_IN) {
sidx = pd2.didx;
didx = pd2.sidx;
iidx = !iidx;
} else {
sidx = pd2.sidx;
didx = pd2.didx;
}
if (afto) {
if (nk->af != AF_INET6)
return (PF_DROP);
if (pf_translate_icmp_af(nk->af,
&pd->hdr.icmp))
return (PF_DROP);
m_copyback(pd->m, pd->off,
sizeof(struct icmp6_hdr),
(c_caddr_t)&pd->hdr.icmp6);
if (pf_change_icmp_af(pd->m, ipoff2, pd,
&pd2, &nk->addr[sidx],
&nk->addr[didx], pd->af,
nk->af))
return (PF_DROP);
pd->proto = IPPROTO_ICMPV6;
if (pf_translate_icmp_af(nk->af, iih))
return (PF_DROP);
if (virtual_type == htons(ICMP_ECHO) &&
nk->port[iidx] != iih->icmp_id)
iih->icmp_id = nk->port[iidx];
m_copyback(pd2.m, pd2.off, ICMP_MINLEN,
(c_caddr_t)iih);
pf_addrcpy(&pd->nsaddr,
&nk->addr[pd2.sidx], nk->af);
pf_addrcpy(&pd->ndaddr,
&nk->addr[pd2.didx], nk->af);
pd->nsaddr.addr32[3] =
pd->src->addr32[0];
pd->naf = nk->af;
return (PF_AFRT);
}
#endif
if (PF_ANEQ(pd2.src,
&nk->addr[pd2.sidx], pd2.af) ||
(virtual_type == htons(ICMP_ECHO) &&
nk->port[iidx] != iih->icmp_id))
pf_change_icmp(pd2.src,
(virtual_type == htons(ICMP_ECHO)) ?
&iih->icmp_id : NULL,
daddr, &nk->addr[pd2.sidx],
(virtual_type == htons(ICMP_ECHO)) ?
nk->port[iidx] : 0, NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, AF_INET);
if (PF_ANEQ(pd2.dst,
&nk->addr[pd2.didx], pd2.af))
pf_change_icmp(pd2.dst, NULL, NULL,
&nk->addr[pd2.didx], 0, NULL,
pd2.ip_sum, icmpsum, pd->ip_sum, 0,
AF_INET);
m_copyback(pd->m, pd->off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp);
m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2);
m_copyback(pd->m, pd2.off, ICMP_MINLEN, (caddr_t)iih);
}
return (PF_PASS);
break;
}
#endif
#ifdef INET6
case IPPROTO_ICMPV6: {
struct icmp6_hdr *iih = &pd2.hdr.icmp6;
if (pd2.af != AF_INET6) {
REASON_SET(reason, PFRES_NORM);
return (PF_DROP);
}
if (!pf_pull_hdr(pd->m, pd2.off, iih,
sizeof(struct icmp6_hdr), reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(icmp6)");
return (PF_DROP);
}
pd2.pcksum = &pd2.hdr.icmp6.icmp6_cksum;
pf_icmp_mapping(&pd2, iih->icmp6_type,
&icmp_dir, &virtual_id, &virtual_type);
ret = pf_icmp_state_lookup(&key, &pd2, state,
virtual_id, virtual_type, icmp_dir, &iidx, 0, 1);
if (ret == PF_DROP && pd2.af == AF_INET6 &&
icmp_dir == PF_OUT) {
MPASS(*state == NULL);
ret = pf_icmp_state_lookup(&key, &pd2,
state, virtual_id, virtual_type,
icmp_dir, &iidx, 1, 1);
}
if (ret >= 0) {
MPASS(*state == NULL);
return (ret);
}
if ((*state)->key[PF_SK_WIRE] !=
(*state)->key[PF_SK_STACK]) {
struct pf_state_key *nk;
if (PF_REVERSED_KEY(*state, pd->af))
nk = (*state)->key[pd->sidx];
else
nk = (*state)->key[pd->didx];
#if defined(INET) && defined(INET6)
int afto, sidx, didx;
afto = pd->af != nk->af;
if (afto && (*state)->direction == PF_IN) {
sidx = pd2.didx;
didx = pd2.sidx;
iidx = !iidx;
} else {
sidx = pd2.sidx;
didx = pd2.didx;
}
if (afto) {
if (nk->af != AF_INET)
return (PF_DROP);
if (pf_translate_icmp_af(nk->af,
&pd->hdr.icmp))
return (PF_DROP);
m_copyback(pd->m, pd->off,
sizeof(struct icmp6_hdr),
(c_caddr_t)&pd->hdr.icmp6);
if (pf_change_icmp_af(pd->m, ipoff2, pd,
&pd2, &nk->addr[sidx],
&nk->addr[didx], pd->af,
nk->af))
return (PF_DROP);
pd->proto = IPPROTO_ICMP;
if (pf_translate_icmp_af(nk->af, iih))
return (PF_DROP);
if (virtual_type ==
htons(ICMP6_ECHO_REQUEST) &&
nk->port[iidx] != iih->icmp6_id)
iih->icmp6_id = nk->port[iidx];
m_copyback(pd2.m, pd2.off,
sizeof(struct icmp6_hdr), (c_caddr_t)iih);
pf_addrcpy(&pd->nsaddr,
&nk->addr[pd2.sidx], nk->af);
pf_addrcpy(&pd->ndaddr,
&nk->addr[pd2.didx], nk->af);
pd->naf = nk->af;
return (PF_AFRT);
}
#endif
if (PF_ANEQ(pd2.src,
&nk->addr[pd2.sidx], pd2.af) ||
((virtual_type == htons(ICMP6_ECHO_REQUEST)) &&
nk->port[pd2.sidx] != iih->icmp6_id))
pf_change_icmp(pd2.src,
(virtual_type == htons(ICMP6_ECHO_REQUEST))
? &iih->icmp6_id : NULL,
daddr, &nk->addr[pd2.sidx],
(virtual_type == htons(ICMP6_ECHO_REQUEST))
? nk->port[iidx] : 0, NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, AF_INET6);
if (PF_ANEQ(pd2.dst,
&nk->addr[pd2.didx], pd2.af))
pf_change_icmp(pd2.dst, NULL, NULL,
&nk->addr[pd2.didx], 0, NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, AF_INET6);
m_copyback(pd->m, pd->off, sizeof(struct icmp6_hdr),
(caddr_t)&pd->hdr.icmp6);
m_copyback(pd->m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
m_copyback(pd->m, pd2.off, sizeof(struct icmp6_hdr),
(caddr_t)iih);
}
return (PF_PASS);
break;
}
#endif
default: {
int action;
pd->pcksum = &pd->sctp_dummy_sum;
key.af = pd2.af;
key.proto = pd2.proto;
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
key.port[0] = key.port[1] = 0;
action = pf_find_state(&pd2, &key, state);
if (action != PF_MATCH)
return (action);
if ((*state)->key[PF_SK_WIRE] !=
(*state)->key[PF_SK_STACK]) {
struct pf_state_key *nk =
(*state)->key[pd->didx];
if (PF_ANEQ(pd2.src,
&nk->addr[pd2.sidx], pd2.af))
pf_change_icmp(pd2.src, NULL, daddr,
&nk->addr[pd2.sidx], 0, NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, pd2.af);
if (PF_ANEQ(pd2.dst,
&nk->addr[pd2.didx], pd2.af))
pf_change_icmp(pd2.dst, NULL, saddr,
&nk->addr[pd2.didx], 0, NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, pd2.af);
switch (pd2.af) {
#ifdef INET
case AF_INET:
m_copyback(pd->m, pd->off, ICMP_MINLEN,
(caddr_t)&pd->hdr.icmp);
m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2);
break;
#endif
#ifdef INET6
case AF_INET6:
m_copyback(pd->m, pd->off,
sizeof(struct icmp6_hdr),
(caddr_t )&pd->hdr.icmp6);
m_copyback(pd->m, ipoff2, sizeof(h2_6),
(caddr_t )&h2_6);
break;
#endif
}
}
return (PF_PASS);
break;
}
}
}
}
void *
pf_pull_hdr(const struct mbuf *m, int off, void *p, int len,
u_short *reasonp, sa_family_t af)
{
int iplen = 0;
switch (af) {
#ifdef INET
case AF_INET: {
const struct ip *h = mtod(m, struct ip *);
u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
if (fragoff) {
REASON_SET(reasonp, PFRES_FRAG);
return (NULL);
}
iplen = ntohs(h->ip_len);
break;
}
#endif
#ifdef INET6
case AF_INET6: {
const struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
break;
}
#endif
}
if (m->m_pkthdr.len < off + len || iplen < off + len) {
REASON_SET(reasonp, PFRES_SHORT);
return (NULL);
}
m_copydata(m, off, len, p);
return (p);
}
int
pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kkif *kif,
int rtableid)
{
struct ifnet *ifp;
if (af == AF_INET6 && IN6_IS_SCOPE_EMBED(&addr->v6))
return (1);
if (af != AF_INET && af != AF_INET6)
return (0);
if (kif == V_pfi_all)
return (1);
if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
return (1);
ifp = (kif != NULL) ? kif->pfik_ifp : NULL;
switch (af) {
#ifdef INET6
case AF_INET6:
return (fib6_check_urpf(rtableid, &addr->v6, 0, NHR_NONE,
ifp));
#endif
#ifdef INET
case AF_INET:
return (fib4_check_urpf(rtableid, addr->v4, 0, NHR_NONE,
ifp));
#endif
}
return (0);
}
#ifdef INET
static int
pf_route(struct pf_krule *r, struct ifnet *oifp,
struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
{
struct mbuf *m0, *m1, *md;
struct route_in6 ro;
union sockaddr_union rt_gw;
const union sockaddr_union *gw = (const union sockaddr_union *)&ro.ro_dst;
union sockaddr_union *dst;
struct ip *ip;
struct ifnet *ifp = NULL;
int error = 0;
uint16_t ip_len, ip_off;
uint16_t tmp;
int r_dir;
bool skip_test = false;
int action = PF_PASS;
KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__));
SDT_PROBE4(pf, ip, route_to, entry, pd->m, pd, s, oifp);
if (s) {
r_dir = s->direction;
} else {
r_dir = r->direction;
}
KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT ||
r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction",
__func__));
if ((pd->pf_mtag == NULL &&
((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) ||
pd->pf_mtag->routed++ > 3) {
m0 = pd->m;
pd->m = NULL;
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
action = PF_DROP;
goto bad_locked;
}
if (pd->act.rt_kif != NULL)
ifp = pd->act.rt_kif->pfik_ifp;
if (pd->act.rt == PF_DUPTO) {
if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) {
if (s != NULL) {
PF_STATE_UNLOCK(s);
}
if (ifp == oifp) {
return (action);
} else {
m0 = pd->m;
pd->m = NULL;
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
action = PF_DROP;
goto bad;
}
} else {
pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED;
if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) {
if (s)
PF_STATE_UNLOCK(s);
return (action);
}
}
} else {
if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) {
if (pd->af == pd->naf) {
pf_dummynet(pd, s, r, &pd->m);
if (s)
PF_STATE_UNLOCK(s);
return (action);
} else {
if (r_dir == PF_IN) {
skip_test = true;
}
}
}
if (pd->act.rt_kif && pd->act.rt_kif->pfik_ifp &&
pd->af != pd->naf) {
if (pd->act.rt == PF_ROUTETO && r->naf != AF_INET) {
ifp = NULL;
}
if (pd->act.rt == PF_REPLYTO && r->naf != AF_INET6) {
ifp = NULL;
}
}
m0 = pd->m;
}
ip = mtod(m0, struct ip *);
bzero(&ro, sizeof(ro));
dst = (union sockaddr_union *)&ro.ro_dst;
dst->sin.sin_family = AF_INET;
dst->sin.sin_len = sizeof(struct sockaddr_in);
dst->sin.sin_addr = ip->ip_dst;
if (ifp) {
bzero(&rt_gw, sizeof(rt_gw));
ro.ro_flags |= RT_HAS_GW;
gw = &rt_gw;
switch (pd->act.rt_af) {
#ifdef INET
case AF_INET:
rt_gw.sin.sin_family = AF_INET;
rt_gw.sin.sin_len = sizeof(struct sockaddr_in);
rt_gw.sin.sin_addr.s_addr = pd->act.rt_addr.v4.s_addr;
break;
#endif
#ifdef INET6
case AF_INET6:
rt_gw.sin6.sin6_family = AF_INET6;
rt_gw.sin6.sin6_len = sizeof(struct sockaddr_in6);
pf_addrcpy((struct pf_addr *)&rt_gw.sin6.sin6_addr,
&pd->act.rt_addr, AF_INET6);
break;
#endif
default:
break;
}
}
if (pd->dir == PF_IN) {
if (ip->ip_ttl <= IPTTLDEC) {
if (r->rt != PF_DUPTO)
pf_send_icmp(m0, ICMP_TIMXCEED,
ICMP_TIMXCEED_INTRANS, 0, pd->af, r,
pd->act.rtableid);
action = PF_DROP;
goto bad_locked;
}
ip->ip_ttl -= IPTTLDEC;
}
if (s != NULL) {
if (ifp == NULL && (pd->af != pd->naf)) {
const struct nhop_object *nh;
nh = fib4_lookup(M_GETFIB(m0), ip->ip_dst, 0, NHR_NONE, 0);
if (nh) {
ifp = nh->nh_ifp;
if (nh->nh_flags & NHF_GATEWAY) {
gw = (const union sockaddr_union *)&nh->gw_sa;
ro.ro_flags |= RT_HAS_GW;
} else {
dst->sin.sin_addr = ip->ip_dst;
}
}
}
PF_STATE_UNLOCK(s);
}
KASSERT(gw->sin.sin_family != 0, ("%s: gw address family undetermined", __func__));
if (ifp == NULL) {
m0 = pd->m;
pd->m = NULL;
action = PF_DROP;
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
goto bad;
}
if (s != NULL && s->kif == V_pfi_all && r->rule_flag & PFRULE_IFBOUND) {
MPASS(r->rt == PF_REPLYTO || (pd->af != pd->naf && s->direction == PF_IN));
s->kif = ifp->if_pf_kif;
if (pd->act.rt == PF_REPLYTO) {
s->orig_kif = oifp->if_pf_kif;
}
}
if (r->rt == PF_DUPTO || (pd->af != pd->naf && s->direction == PF_IN))
skip_test = true;
if (pd->dir == PF_IN) {
if (skip_test) {
struct pfi_kkif *out_kif = (struct pfi_kkif *)ifp->if_pf_kif;
MPASS(s != NULL);
pf_counter_u64_critical_enter();
pf_counter_u64_add_protected(
&out_kif->pfik_bytes[pd->naf == AF_INET6][1]
[action != PF_PASS && action != PF_AFRT], pd->tot_len);
pf_counter_u64_add_protected(
&out_kif->pfik_packets[pd->naf == AF_INET6][1]
[action != PF_PASS && action != PF_AFRT], 1);
pf_counter_u64_critical_exit();
} else {
if (pf_test(AF_INET, PF_OUT, PFIL_FWD, ifp, &m0, inp,
&pd->act) != PF_PASS) {
action = PF_DROP;
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
goto bad;
} else if (m0 == NULL) {
action = PF_DROP;
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
goto done;
}
if (m0->m_len < sizeof(struct ip)) {
DPFPRINTF(PF_DEBUG_URGENT,
"%s: m0->m_len < sizeof(struct ip)", __func__);
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
action = PF_DROP;
goto bad;
}
ip = mtod(m0, struct ip *);
}
}
if (ifp->if_flags & IFF_LOOPBACK)
m0->m_flags |= M_SKIP_FIREWALL;
ip_len = ntohs(ip->ip_len);
ip_off = ntohs(ip->ip_off);
m0->m_pkthdr.csum_flags |= CSUM_IP;
if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
in_delayed_cksum(m0);
m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
pf_sctp_checksum(m0, (uint32_t)(ip->ip_hl << 2));
m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
}
if (pd->dir == PF_IN) {
pd->dir = PF_OUT;
tmp = pd->act.dnrpipe;
pd->act.dnrpipe = pd->act.dnpipe;
pd->act.dnpipe = tmp;
}
if (ip_len <= ifp->if_mtu ||
(m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
ip->ip_sum = 0;
if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
m0->m_pkthdr.csum_flags &= ~CSUM_IP;
}
m_clrprotoflags(m0);
md = m0;
error = pf_dummynet_route(pd, s, r, ifp,
(const struct sockaddr *)gw, &md);
if (md != NULL) {
error = (*ifp->if_output)(ifp, md,
(const struct sockaddr *)gw, (struct route *)&ro);
SDT_PROBE2(pf, ip, route_to, output, ifp, error);
}
goto done;
}
if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
error = EMSGSIZE;
KMOD_IPSTAT_INC(ips_cantfrag);
if (pd->act.rt != PF_DUPTO) {
if (s && s->nat_rule != NULL) {
MPASS(m0 == pd->m);
PACKET_UNDO_NAT(pd,
(ip->ip_hl << 2) + (ip_off & IP_OFFMASK),
s);
}
pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
ifp->if_mtu, pd->af, r, pd->act.rtableid);
}
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
action = PF_DROP;
goto bad;
}
error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist);
if (error) {
SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
action = PF_DROP;
goto bad;
}
for (; m0; m0 = m1) {
m1 = m0->m_nextpkt;
m0->m_nextpkt = NULL;
if (error == 0) {
m_clrprotoflags(m0);
md = m0;
pd->pf_mtag = pf_find_mtag(md);
error = pf_dummynet_route(pd, s, r, ifp,
(const struct sockaddr *)gw, &md);
if (md != NULL) {
error = (*ifp->if_output)(ifp, md,
(const struct sockaddr *)gw,
(struct route *)&ro);
SDT_PROBE2(pf, ip, route_to, output, ifp, error);
}
} else
m_freem(m0);
}
if (error == 0)
KMOD_IPSTAT_INC(ips_fragmented);
done:
if (pd->act.rt != PF_DUPTO)
pd->m = NULL;
else
action = PF_PASS;
return (action);
bad_locked:
if (s)
PF_STATE_UNLOCK(s);
bad:
m_freem(m0);
goto done;
}
#endif
#ifdef INET6
static int
pf_route6(struct pf_krule *r, struct ifnet *oifp,
struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
{
struct mbuf *m0, *md;
struct m_tag *mtag;
struct sockaddr_in6 dst;
struct ip6_hdr *ip6;
struct ifnet *ifp = NULL;
int r_dir;
bool skip_test = false;
int action = PF_PASS;
KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__));
SDT_PROBE4(pf, ip6, route_to, entry, pd->m, pd, s, oifp);
if (s) {
r_dir = s->direction;
} else {
r_dir = r->direction;
}
KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT ||
r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction",
__func__));
if ((pd->pf_mtag == NULL &&
((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) ||
pd->pf_mtag->routed++ > 3) {
m0 = pd->m;
pd->m = NULL;
action = PF_DROP;
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
goto bad_locked;
}
if (pd->act.rt_kif != NULL)
ifp = pd->act.rt_kif->pfik_ifp;
if (pd->act.rt == PF_DUPTO) {
if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) {
if (s != NULL) {
PF_STATE_UNLOCK(s);
}
if (ifp == oifp) {
return (action);
} else {
m0 = pd->m;
pd->m = NULL;
action = PF_DROP;
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
goto bad;
}
} else {
pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED;
if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) {
if (s)
PF_STATE_UNLOCK(s);
return (action);
}
}
} else {
if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) {
if (pd->af == pd->naf) {
pf_dummynet(pd, s, r, &pd->m);
if (s)
PF_STATE_UNLOCK(s);
return (action);
} else {
if (r_dir == PF_IN) {
skip_test = true;
}
}
}
if (pd->act.rt_kif && pd->act.rt_kif->pfik_ifp &&
pd->af != pd->naf) {
if (pd->act.rt == PF_ROUTETO && r->naf != AF_INET6) {
ifp = NULL;
}
if (pd->act.rt == PF_REPLYTO && r->naf != AF_INET) {
ifp = NULL;
}
}
m0 = pd->m;
}
ip6 = mtod(m0, struct ip6_hdr *);
bzero(&dst, sizeof(dst));
dst.sin6_family = AF_INET6;
dst.sin6_len = sizeof(dst);
pf_addrcpy((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr,
AF_INET6);
if (pd->dir == PF_IN) {
if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
if (r->rt != PF_DUPTO)
pf_send_icmp(m0, ICMP6_TIME_EXCEEDED,
ICMP6_TIME_EXCEED_TRANSIT, 0, pd->af, r,
pd->act.rtableid);
action = PF_DROP;
goto bad_locked;
}
ip6->ip6_hlim -= IPV6_HLIMDEC;
}
if (s != NULL) {
if (ifp == NULL && (pd->af != pd->naf)) {
const struct nhop_object *nh;
nh = fib6_lookup(M_GETFIB(m0), &ip6->ip6_dst, 0, NHR_NONE, 0);
if (nh) {
ifp = nh->nh_ifp;
if (nh->nh_flags & NHF_GATEWAY)
bcopy(&nh->gw6_sa.sin6_addr, &dst.sin6_addr,
sizeof(dst.sin6_addr));
else
dst.sin6_addr = ip6->ip6_dst;
}
}
PF_STATE_UNLOCK(s);
}
if (pd->af != pd->naf) {
struct udphdr *uh = &pd->hdr.udp;
if (pd->proto == IPPROTO_UDP && uh->uh_sum == 0) {
uh->uh_sum = in6_cksum_pseudo(ip6,
ntohs(uh->uh_ulen), IPPROTO_UDP, 0);
m_copyback(m0, pd->off, sizeof(*uh), pd->hdr.any);
}
}
if (ifp == NULL) {
m0 = pd->m;
pd->m = NULL;
action = PF_DROP;
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
goto bad;
}
if (s != NULL && s->kif == V_pfi_all && r->rule_flag & PFRULE_IFBOUND) {
MPASS(r->rt == PF_REPLYTO || (pd->af != pd->naf && s->direction == PF_IN));
s->kif = ifp->if_pf_kif;
if (pd->act.rt == PF_REPLYTO) {
s->orig_kif = oifp->if_pf_kif;
}
}
if (r->rt == PF_DUPTO || (pd->af != pd->naf && s->direction == PF_IN))
skip_test = true;
if (pd->dir == PF_IN) {
if (skip_test) {
struct pfi_kkif *out_kif = (struct pfi_kkif *)ifp->if_pf_kif;
MPASS(s != NULL);
pf_counter_u64_critical_enter();
pf_counter_u64_add_protected(
&out_kif->pfik_bytes[pd->naf == AF_INET6][1]
[action != PF_PASS && action != PF_AFRT], pd->tot_len);
pf_counter_u64_add_protected(
&out_kif->pfik_packets[pd->naf == AF_INET6][1]
[action != PF_PASS && action != PF_AFRT], 1);
pf_counter_u64_critical_exit();
} else {
if (pf_test(AF_INET6, PF_OUT, PFIL_FWD | PF_PFIL_NOREFRAGMENT,
ifp, &m0, inp, &pd->act) != PF_PASS) {
action = PF_DROP;
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
goto bad;
} else if (m0 == NULL) {
action = PF_DROP;
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
goto done;
}
if (m0->m_len < sizeof(struct ip6_hdr)) {
DPFPRINTF(PF_DEBUG_URGENT,
"%s: m0->m_len < sizeof(struct ip6_hdr)",
__func__);
action = PF_DROP;
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
goto bad;
}
ip6 = mtod(m0, struct ip6_hdr *);
}
}
if (ifp->if_flags & IFF_LOOPBACK)
m0->m_flags |= M_SKIP_FIREWALL;
if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
~ifp->if_hwassist) {
uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6);
in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr));
m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
}
if (pd->dir == PF_IN) {
uint16_t tmp;
pd->dir = PF_OUT;
tmp = pd->act.dnrpipe;
pd->act.dnrpipe = pd->act.dnpipe;
pd->act.dnpipe = tmp;
}
if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL);
if (mtag != NULL) {
int ret __sdt_used;
ret = pf_refragment6(ifp, &m0, mtag, ifp, true);
SDT_PROBE2(pf, ip6, route_to, output, ifp, ret);
goto done;
}
if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
md = m0;
pf_dummynet_route(pd, s, r, ifp, sintosa(&dst), &md);
if (md != NULL) {
int ret __sdt_used;
ret = nd6_output_ifp(ifp, ifp, md, &dst, NULL);
SDT_PROBE2(pf, ip6, route_to, output, ifp, ret);
}
}
else {
in6_ifstat_inc(ifp, ifs6_in_toobig);
if (pd->act.rt != PF_DUPTO) {
if (s && s->nat_rule != NULL) {
MPASS(m0 == pd->m);
PACKET_UNDO_NAT(pd,
((caddr_t)ip6 - m0->m_data) +
sizeof(struct ip6_hdr), s);
}
if (r->rt != PF_DUPTO)
pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0,
ifp->if_mtu, pd->af, r, pd->act.rtableid);
}
action = PF_DROP;
SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
goto bad;
}
done:
if (pd->act.rt != PF_DUPTO)
pd->m = NULL;
else
action = PF_PASS;
return (action);
bad_locked:
if (s)
PF_STATE_UNLOCK(s);
bad:
m_freem(m0);
goto done;
}
#endif
static int
pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
{
u_int16_t sum = 0;
int hw_assist = 0;
struct ip *ip;
if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
return (1);
if (m->m_pkthdr.len < off + len)
return (1);
switch (p) {
case IPPROTO_TCP:
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
sum = m->m_pkthdr.csum_data;
} else {
ip = mtod(m, struct ip *);
sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htonl((u_short)len +
m->m_pkthdr.csum_data + IPPROTO_TCP));
}
sum ^= 0xffff;
++hw_assist;
}
break;
case IPPROTO_UDP:
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
sum = m->m_pkthdr.csum_data;
} else {
ip = mtod(m, struct ip *);
sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htonl((u_short)len +
m->m_pkthdr.csum_data + IPPROTO_UDP));
}
sum ^= 0xffff;
++hw_assist;
}
break;
case IPPROTO_ICMP:
#ifdef INET6
case IPPROTO_ICMPV6:
#endif
break;
default:
return (1);
}
if (!hw_assist) {
switch (af) {
case AF_INET:
if (m->m_len < sizeof(struct ip))
return (1);
sum = in4_cksum(m, (p == IPPROTO_ICMP ? 0 : p), off, len);
break;
#ifdef INET6
case AF_INET6:
if (m->m_len < sizeof(struct ip6_hdr))
return (1);
sum = in6_cksum(m, p, off, len);
break;
#endif
}
}
if (sum) {
switch (p) {
case IPPROTO_TCP:
{
KMOD_TCPSTAT_INC(tcps_rcvbadsum);
break;
}
case IPPROTO_UDP:
{
KMOD_UDPSTAT_INC(udps_badsum);
break;
}
#ifdef INET
case IPPROTO_ICMP:
{
KMOD_ICMPSTAT_INC(icps_checksum);
break;
}
#endif
#ifdef INET6
case IPPROTO_ICMPV6:
{
KMOD_ICMP6STAT_INC(icp6s_checksum);
break;
}
#endif
}
return (1);
} else {
if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
m->m_pkthdr.csum_flags |=
(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
m->m_pkthdr.csum_data = 0xffff;
}
}
return (0);
}
static bool
pf_pdesc_to_dnflow(const struct pf_pdesc *pd, const struct pf_krule *r,
const struct pf_kstate *s, struct ip_fw_args *dnflow)
{
int dndir = r->direction;
sa_family_t af = pd->naf;
if (s && dndir == PF_INOUT) {
dndir = s->direction;
} else if (dndir == PF_INOUT) {
dndir = pd->dir;
}
if (pd->pf_mtag->flags & PF_MTAG_FLAG_DUMMYNETED)
return (false);
memset(dnflow, 0, sizeof(*dnflow));
if (pd->dport != NULL)
dnflow->f_id.dst_port = ntohs(*pd->dport);
if (pd->sport != NULL)
dnflow->f_id.src_port = ntohs(*pd->sport);
if (pd->dir == PF_IN)
dnflow->flags |= IPFW_ARGS_IN;
else
dnflow->flags |= IPFW_ARGS_OUT;
if (pd->dir != dndir && pd->act.dnrpipe) {
dnflow->rule.info = pd->act.dnrpipe;
}
else if (pd->dir == dndir && pd->act.dnpipe) {
dnflow->rule.info = pd->act.dnpipe;
}
else {
return (false);
}
dnflow->rule.info |= IPFW_IS_DUMMYNET;
if (r->free_flags & PFRULE_DN_IS_PIPE || pd->act.flags & PFSTATE_DN_IS_PIPE)
dnflow->rule.info |= IPFW_IS_PIPE;
dnflow->f_id.proto = pd->proto;
dnflow->f_id.extra = dnflow->rule.info;
if (s)
af = s->key[PF_SK_STACK]->af;
switch (af) {
case AF_INET:
dnflow->f_id.addr_type = 4;
if (s) {
dnflow->f_id.src_ip = htonl(
s->key[PF_SK_STACK]->addr[pd->sidx].v4.s_addr);
dnflow->f_id.dst_ip = htonl(
s->key[PF_SK_STACK]->addr[pd->didx].v4.s_addr);
} else {
dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr);
dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr);
}
break;
case AF_INET6:
dnflow->f_id.addr_type = 6;
if (s) {
dnflow->f_id.src_ip6 =
s->key[PF_SK_STACK]->addr[pd->sidx].v6;
dnflow->f_id.dst_ip6 =
s->key[PF_SK_STACK]->addr[pd->didx].v6;
} else {
dnflow->f_id.src_ip6 = pd->src->v6;
dnflow->f_id.dst_ip6 = pd->dst->v6;
}
break;
}
if (pd->naf == AF_INET6)
dnflow->flags |= IPFW_ARGS_IP6;
return (true);
}
int
pf_test_eth(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
struct inpcb *inp)
{
struct pfi_kkif *kif;
struct mbuf *m = *m0;
M_ASSERTPKTHDR(m);
MPASS(ifp->if_vnet == curvnet);
NET_EPOCH_ASSERT();
if (!V_pf_status.running)
return (PF_PASS);
kif = (struct pfi_kkif *)ifp->if_pf_kif;
if (kif == NULL) {
DPFPRINTF(PF_DEBUG_URGENT,
"%s: kif == NULL, if_xname %s", __func__, ifp->if_xname);
return (PF_DROP);
}
if (kif->pfik_flags & PFI_IFLAG_SKIP)
return (PF_PASS);
if (m->m_flags & M_SKIP_FIREWALL)
return (PF_PASS);
if (__predict_false(! M_WRITABLE(*m0))) {
m = *m0 = m_unshare(*m0, M_NOWAIT);
if (*m0 == NULL)
return (PF_DROP);
}
return (pf_test_eth_rule(dir, kif, m0));
}
static __inline void
pf_dummynet_flag_remove(struct mbuf *m, struct pf_mtag *pf_mtag)
{
struct m_tag *mtag;
pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET;
mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
if (mtag != NULL)
m_tag_delete(m, mtag);
}
static int
pf_dummynet(struct pf_pdesc *pd, struct pf_kstate *s,
struct pf_krule *r, struct mbuf **m0)
{
return (pf_dummynet_route(pd, s, r, NULL, NULL, m0));
}
static int
pf_dummynet_route(struct pf_pdesc *pd, struct pf_kstate *s,
struct pf_krule *r, struct ifnet *ifp, const struct sockaddr *sa,
struct mbuf **m0)
{
struct ip_fw_args dnflow;
NET_EPOCH_ASSERT();
if (pd->act.dnpipe == 0 && pd->act.dnrpipe == 0)
return (0);
if (ip_dn_io_ptr == NULL) {
m_freem(*m0);
*m0 = NULL;
return (ENOMEM);
}
if (pd->pf_mtag == NULL &&
((pd->pf_mtag = pf_get_mtag(*m0)) == NULL)) {
m_freem(*m0);
*m0 = NULL;
return (ENOMEM);
}
if (ifp != NULL) {
pd->pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO;
pd->pf_mtag->if_index = ifp->if_index;
pd->pf_mtag->if_idxgen = ifp->if_idxgen;
MPASS(sa != NULL);
switch (sa->sa_family) {
case AF_INET:
memcpy(&pd->pf_mtag->dst, sa,
sizeof(struct sockaddr_in));
break;
case AF_INET6:
memcpy(&pd->pf_mtag->dst, sa,
sizeof(struct sockaddr_in6));
break;
}
}
if (s != NULL && s->nat_rule != NULL &&
s->nat_rule->action == PF_RDR &&
(
#ifdef INET
(pd->af == AF_INET && IN_LOOPBACK(ntohl(pd->dst->v4.s_addr))) ||
#endif
(pd->af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd->dst->v6)))) {
(*m0)->m_pkthdr.rcvif = V_loif;
}
if (pf_pdesc_to_dnflow(pd, r, s, &dnflow)) {
pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNET;
pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNETED;
ip_dn_io_ptr(m0, &dnflow);
if (*m0 != NULL) {
pd->pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO;
pf_dummynet_flag_remove(*m0, pd->pf_mtag);
}
}
return (0);
}
static int
pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end,
u_short *reason)
{
uint8_t type, length, opts[15 * 4 - sizeof(struct ip)];
if (pd->m->m_pkthdr.len < end) {
DPFPRINTF(PF_DEBUG_MISC, "IP option too short");
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
MPASS(end - off <= sizeof(opts));
m_copydata(pd->m, off, end - off, opts);
end -= off;
off = 0;
while (off < end) {
type = opts[off];
if (type == IPOPT_EOL)
break;
if (type == IPOPT_NOP) {
off++;
continue;
}
if (off + 2 > end) {
DPFPRINTF(PF_DEBUG_MISC, "IP length opt");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
length = opts[off + 1];
if (length < 2) {
DPFPRINTF(PF_DEBUG_MISC, "IP short opt");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
if (off + length > end) {
DPFPRINTF(PF_DEBUG_MISC, "IP long opt");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
switch (type) {
case IPOPT_RA:
pd->badopts |= PF_OPT_ROUTER_ALERT;
break;
default:
pd->badopts |= PF_OPT_OTHER;
break;
}
off += length;
}
return (PF_PASS);
}
static int
pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
{
struct ah ext;
u_int32_t hlen, end;
int hdr_cnt;
hlen = h->ip_hl << 2;
if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) {
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
if (hlen != sizeof(struct ip)) {
if (pf_walk_option(pd, h, pd->off + sizeof(struct ip),
pd->off + hlen, reason) != PF_PASS)
return (PF_DROP);
if (pd->badopts == 0)
pd->badopts |= PF_OPT_OTHER;
}
end = pd->off + ntohs(h->ip_len);
pd->off += hlen;
pd->proto = h->ip_p;
if (pd->proto == IPPROTO_IGMP) {
if ((h->ip_ttl != 1) &&
(h->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP)) {
DPFPRINTF(PF_DEBUG_MISC, "Invalid IGMP");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
pd->badopts &= ~PF_OPT_ROUTER_ALERT;
}
if ((h->ip_off & htons(IP_OFFMASK)) != 0)
return (PF_PASS);
for (hdr_cnt = 0; hdr_cnt < PF_HDR_LIMIT; hdr_cnt++) {
switch (pd->proto) {
case IPPROTO_AH:
if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 &&
end < pd->off + sizeof(ext))
return (PF_PASS);
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
reason, AF_INET)) {
DPFPRINTF(PF_DEBUG_MISC, "IP short exthdr");
return (PF_DROP);
}
pd->off += (ext.ah_len + 2) * 4;
pd->proto = ext.ah_nxt;
break;
default:
return (PF_PASS);
}
}
DPFPRINTF(PF_DEBUG_MISC, "IPv4 nested authentication header limit");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
#ifdef INET6
static int
pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
u_short *reason)
{
struct ip6_opt opt;
struct ip6_opt_jumbo jumbo;
while (off < end) {
if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type,
sizeof(opt.ip6o_type), reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt type");
return (PF_DROP);
}
if (opt.ip6o_type == IP6OPT_PAD1) {
off++;
continue;
}
if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt),
reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt");
return (PF_DROP);
}
if (off + sizeof(opt) + opt.ip6o_len > end) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 long opt");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
switch (opt.ip6o_type) {
case IP6OPT_PADN:
break;
case IP6OPT_JUMBO:
pd->badopts |= PF_OPT_JUMBO;
if (pd->jumbolen != 0) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple jumbo");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
if (ntohs(h->ip6_plen) != 0) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 bad jumbo plen");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo),
reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbo");
return (PF_DROP);
}
memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len,
sizeof(pd->jumbolen));
pd->jumbolen = ntohl(pd->jumbolen);
if (pd->jumbolen < IPV6_MAXPACKET) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbolen");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
break;
case IP6OPT_ROUTER_ALERT:
pd->badopts |= PF_OPT_ROUTER_ALERT;
break;
default:
pd->badopts |= PF_OPT_OTHER;
break;
}
off += sizeof(opt) + opt.ip6o_len;
}
return (PF_PASS);
}
int
pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
{
struct ip6_frag frag;
struct ip6_ext ext;
struct icmp6_hdr icmp6;
struct ip6_rthdr rthdr;
uint32_t end;
int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0;
pd->off += sizeof(struct ip6_hdr);
end = pd->off + ntohs(h->ip6_plen);
pd->fragoff = pd->extoff = pd->jumbolen = 0;
pd->proto = h->ip6_nxt;
for (hdr_cnt = 0; hdr_cnt < PF_HDR_LIMIT; hdr_cnt++) {
switch (pd->proto) {
case IPPROTO_ROUTING:
case IPPROTO_DSTOPTS:
pd->badopts |= PF_OPT_OTHER;
break;
case IPPROTO_HOPOPTS:
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr");
return (PF_DROP);
}
if (pf_walk_option6(pd, h, pd->off + sizeof(ext),
pd->off + (ext.ip6e_len + 1) * 8,
reason) != PF_PASS)
return (PF_DROP);
if (pd->badopts == 0)
pd->badopts |= PF_OPT_OTHER;
break;
}
switch (pd->proto) {
case IPPROTO_FRAGMENT:
if (fraghdr_cnt++) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple fragment");
REASON_SET(reason, PFRES_FRAG);
return (PF_DROP);
}
if (pd->jumbolen != 0) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 fragmented jumbo");
REASON_SET(reason, PFRES_FRAG);
return (PF_DROP);
}
if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag),
reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short fragment");
return (PF_DROP);
}
if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) {
pd->fragoff = pd->off;
return (PF_PASS);
}
if (frag.ip6f_offlg & IP6F_MORE_FRAG)
pd->fragoff = pd->off;
pd->off += sizeof(frag);
pd->proto = frag.ip6f_nxt;
break;
case IPPROTO_ROUTING:
if (rthdr_cnt++) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple rthdr");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) {
pd->off = pd->fragoff;
pd->proto = IPPROTO_FRAGMENT;
return (PF_PASS);
}
if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr),
reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short rthdr");
return (PF_DROP);
}
if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 rthdr0");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
case IPPROTO_HOPOPTS:
if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 hopopts not first");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
case IPPROTO_AH:
case IPPROTO_DSTOPTS:
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr");
return (PF_DROP);
}
if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) {
pd->off = pd->fragoff;
pd->proto = IPPROTO_FRAGMENT;
return (PF_PASS);
}
if (pd->fragoff == 0)
pd->extoff = pd->off;
if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 &&
ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 missing jumbo");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
if (pd->proto == IPPROTO_AH)
pd->off += (ext.ip6e_len + 2) * 4;
else
pd->off += (ext.ip6e_len + 1) * 8;
pd->proto = ext.ip6e_nxt;
break;
case IPPROTO_ICMPV6:
if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) {
pd->off = pd->fragoff;
pd->proto = IPPROTO_FRAGMENT;
return (PF_PASS);
}
if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6),
reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC,
"IPv6 short icmp6hdr");
return (PF_DROP);
}
switch (icmp6.icmp6_type) {
case MLD_LISTENER_QUERY:
case MLD_LISTENER_REPORT:
case MLD_LISTENER_DONE:
case MLDV2_LISTENER_REPORT:
if ((h->ip6_hlim != 1) ||
!IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) {
DPFPRINTF(PF_DEBUG_MISC, "Invalid MLD");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
pd->badopts &= ~PF_OPT_ROUTER_ALERT;
break;
}
return (PF_PASS);
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_SCTP:
if (pd->fragoff != 0 && end < pd->off +
(pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) :
pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) :
pd->proto == IPPROTO_SCTP ? sizeof(struct sctphdr) :
sizeof(struct icmp6_hdr))) {
pd->off = pd->fragoff;
pd->proto = IPPROTO_FRAGMENT;
}
default:
return (PF_PASS);
}
}
DPFPRINTF(PF_DEBUG_MISC, "IPv6 nested extension header limit");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
#endif
static void
pf_init_pdesc(struct pf_pdesc *pd, struct mbuf *m)
{
memset(pd, 0, sizeof(*pd));
pd->pf_mtag = pf_find_mtag(m);
pd->m = m;
}
static int
pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
u_short *action, u_short *reason, struct pfi_kkif *kif,
struct pf_rule_actions *default_actions)
{
pd->dir = dir;
pd->kif = kif;
pd->m = *m0;
pd->sidx = (dir == PF_IN) ? 0 : 1;
pd->didx = (dir == PF_IN) ? 1 : 0;
pd->af = pd->naf = af;
PF_RULES_ASSERT();
TAILQ_INIT(&pd->sctp_multihome_jobs);
if (default_actions != NULL)
memcpy(&pd->act, default_actions, sizeof(pd->act));
if (pd->pf_mtag && pd->pf_mtag->dnpipe) {
pd->act.dnpipe = pd->pf_mtag->dnpipe;
pd->act.flags = pd->pf_mtag->dnflags;
}
switch (af) {
#ifdef INET
case AF_INET: {
struct ip *h;
if (__predict_false((*m0)->m_len < sizeof(struct ip)) &&
(pd->m = *m0 = m_pullup(*m0, sizeof(struct ip))) == NULL) {
DPFPRINTF(PF_DEBUG_URGENT,
"%s: m_len < sizeof(struct ip), pullup failed",
__func__);
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
h = mtod(pd->m, struct ip *);
if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
if (pf_normalize_ip(reason, pd) != PF_PASS) {
*m0 = pd->m;
*action = PF_DROP;
return (PF_DROP);
}
*m0 = pd->m;
h = mtod(pd->m, struct ip *);
if (pf_walk_header(pd, h, reason) != PF_PASS) {
*action = PF_DROP;
return (PF_DROP);
}
pd->src = (struct pf_addr *)&h->ip_src;
pd->dst = (struct pf_addr *)&h->ip_dst;
pf_addrcpy(&pd->osrc, pd->src, af);
pf_addrcpy(&pd->odst, pd->dst, af);
pd->ip_sum = &h->ip_sum;
pd->tos = h->ip_tos & ~IPTOS_ECN_MASK;
pd->ttl = h->ip_ttl;
pd->tot_len = ntohs(h->ip_len);
pd->act.rtableid = -1;
pd->df = h->ip_off & htons(IP_DF);
pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ?
PF_VPROTO_FRAGMENT : pd->proto;
break;
}
#endif
#ifdef INET6
case AF_INET6: {
struct ip6_hdr *h;
if (__predict_false((*m0)->m_len < sizeof(struct ip6_hdr)) &&
(pd->m = *m0 = m_pullup(*m0, sizeof(struct ip6_hdr))) == NULL) {
DPFPRINTF(PF_DEBUG_URGENT,
"%s: m_len < sizeof(struct ip6_hdr)"
", pullup failed", __func__);
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
h = mtod(pd->m, struct ip6_hdr *);
if (pd->m->m_pkthdr.len <
sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
if (htons(h->ip6_plen) == 0) {
*action = PF_DROP;
return (PF_DROP);
}
if (pf_walk_header6(pd, h, reason) != PF_PASS) {
*action = PF_DROP;
return (PF_DROP);
}
h = mtod(pd->m, struct ip6_hdr *);
pd->src = (struct pf_addr *)&h->ip6_src;
pd->dst = (struct pf_addr *)&h->ip6_dst;
pf_addrcpy(&pd->osrc, pd->src, af);
pf_addrcpy(&pd->odst, pd->dst, af);
pd->ip_sum = NULL;
pd->tos = IPV6_DSCP(h);
pd->ttl = h->ip6_hlim;
pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
pd->act.rtableid = -1;
pd->virtual_proto = (pd->fragoff != 0) ?
PF_VPROTO_FRAGMENT : pd->proto;
if (pf_normalize_ip6(pd->fragoff, reason, pd) !=
PF_PASS) {
*m0 = pd->m;
*action = PF_DROP;
return (PF_DROP);
}
*m0 = pd->m;
if (pd->m == NULL) {
*action = PF_PASS;
return (PF_DROP);
}
h = mtod(pd->m, struct ip6_hdr *);
pd->src = (struct pf_addr *)&h->ip6_src;
pd->dst = (struct pf_addr *)&h->ip6_dst;
pd->off = 0;
if (pf_walk_header6(pd, h, reason) != PF_PASS) {
*action = PF_DROP;
return (PF_DROP);
}
if (m_tag_find(pd->m, PACKET_TAG_PF_REASSEMBLED, NULL) != NULL) {
pd->virtual_proto = pd->proto;
MPASS(pd->fragoff == 0);
}
if (pd->fragoff != 0)
pd->virtual_proto = PF_VPROTO_FRAGMENT;
break;
}
#endif
default:
panic("pf_setup_pdesc called with illegal af %u", af);
}
switch (pd->virtual_proto) {
case IPPROTO_TCP: {
struct tcphdr *th = &pd->hdr.tcp;
if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th),
reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
pd->hdrlen = sizeof(*th);
pd->p_len = pd->tot_len - pd->off - (th->th_off << 2);
pd->sport = &th->th_sport;
pd->dport = &th->th_dport;
pd->pcksum = &th->th_sum;
break;
}
case IPPROTO_UDP: {
struct udphdr *uh = &pd->hdr.udp;
if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh),
reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
pd->hdrlen = sizeof(*uh);
if (uh->uh_dport == 0 ||
ntohs(uh->uh_ulen) > pd->m->m_pkthdr.len - pd->off ||
ntohs(uh->uh_ulen) < sizeof(struct udphdr)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
pd->sport = &uh->uh_sport;
pd->dport = &uh->uh_dport;
pd->pcksum = &uh->uh_sum;
break;
}
case IPPROTO_SCTP: {
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.sctp, sizeof(pd->hdr.sctp),
reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
pd->hdrlen = sizeof(pd->hdr.sctp);
pd->p_len = pd->tot_len - pd->off;
pd->sport = &pd->hdr.sctp.src_port;
pd->dport = &pd->hdr.sctp.dest_port;
if (pd->hdr.sctp.src_port == 0 || pd->hdr.sctp.dest_port == 0) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
pd->pcksum = &pd->sctp_dummy_sum;
if (pf_scan_sctp(pd) != PF_PASS) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
break;
}
case IPPROTO_ICMP: {
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN,
reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
pd->pcksum = &pd->hdr.icmp.icmp_cksum;
pd->hdrlen = ICMP_MINLEN;
break;
}
#ifdef INET6
case IPPROTO_ICMPV6: {
size_t icmp_hlen = sizeof(struct icmp6_hdr);
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
switch (pd->hdr.icmp6.icmp6_type) {
case MLD_LISTENER_QUERY:
case MLD_LISTENER_REPORT:
icmp_hlen = sizeof(struct mld_hdr);
break;
case ND_NEIGHBOR_SOLICIT:
case ND_NEIGHBOR_ADVERT:
icmp_hlen = sizeof(struct nd_neighbor_solicit);
case ND_ROUTER_SOLICIT:
case ND_ROUTER_ADVERT:
case ND_REDIRECT:
if (pd->ttl != 255) {
REASON_SET(reason, PFRES_NORM);
return (PF_DROP);
}
break;
}
if (icmp_hlen > sizeof(struct icmp6_hdr) &&
!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
pd->hdrlen = icmp_hlen;
pd->pcksum = &pd->hdr.icmp6.icmp6_cksum;
break;
}
#endif
default:
pd->pcksum = &pd->sctp_dummy_sum;
break;
}
if (pd->sport)
pd->osport = pd->nsport = *pd->sport;
if (pd->dport)
pd->odport = pd->ndport = *pd->dport;
MPASS(pd->pcksum != NULL);
return (PF_PASS);
}
static __inline void
pf_rule_counters_inc(struct pf_pdesc *pd, struct pf_krule *r, int dir_out,
int op_pass, sa_family_t af, struct pf_addr *src_host,
struct pf_addr *dst_host)
{
pf_counter_u64_add_protected(&(r->packets[dir_out]), 1);
pf_counter_u64_add_protected(&(r->bytes[dir_out]), pd->tot_len);
pf_update_timestamp(r);
if (r->src.addr.type == PF_ADDR_TABLE)
pfr_update_stats(r->src.addr.p.tbl, src_host, af,
pd->tot_len, dir_out, op_pass, r->src.neg);
if (r->dst.addr.type == PF_ADDR_TABLE)
pfr_update_stats(r->dst.addr.p.tbl, dst_host, af,
pd->tot_len, dir_out, op_pass, r->dst.neg);
}
static void
pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_kstate *s,
struct pf_krule *r, struct pf_krule *a, struct pf_krule_slist *match_rules)
{
struct pf_krule_slist *mr = match_rules;
struct pf_krule_item *ri;
struct pf_krule *nr = NULL;
struct pf_addr *src_host = pd->src;
struct pf_addr *dst_host = pd->dst;
struct pf_state_key *key;
int dir_out = (pd->dir == PF_OUT);
int op_r_pass = (r->action == PF_PASS);
int op_pass = (action == PF_PASS || action == PF_AFRT);
int s_dir_in, s_dir_out, s_dir_rev;
sa_family_t af = pd->af;
pf_counter_u64_critical_enter();
if (action == PF_AFRT) {
MPASS(s != NULL);
if (s->direction == PF_OUT && dir_out)
af = pd->naf;
}
pf_counter_u64_add_protected(
&pd->kif->pfik_bytes[af == AF_INET6][dir_out][!op_pass],
pd->tot_len);
pf_counter_u64_add_protected(
&pd->kif->pfik_packets[af == AF_INET6][dir_out][!op_pass],
1);
if (!(op_pass || r->action == PF_DROP)) {
pf_counter_u64_critical_exit();
return;
}
if (s != NULL) {
PF_STATE_LOCK_ASSERT(s);
mr = &(s->match_rules);
if (action == PF_AFRT && s->direction == PF_IN) {
dir_out = (pd->naf == s->rule->naf);
s_dir_in = 1;
s_dir_out = 0;
s_dir_rev = (pd->naf == s->rule->af);
} else {
dir_out = (pd->dir == PF_OUT);
s_dir_in = (s->direction == PF_IN);
s_dir_out = (s->direction == PF_OUT);
s_dir_rev = (pd->dir != s->direction);
}
s->packets[s_dir_rev]++;
s->bytes[s_dir_rev] += pd->tot_len;
for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) {
if (s->sns[sn_type] != NULL) {
counter_u64_add(
s->sns[sn_type]->packets[dir_out],
1);
counter_u64_add(
s->sns[sn_type]->bytes[dir_out],
pd->tot_len);
}
}
key = s->key[(s->direction == PF_OUT)];
src_host = &(key->addr[s_dir_out]);
dst_host = &(key->addr[s_dir_in]);
af = key->af;
if (s->nat_rule) {
if (s->nat_rule->action == PF_NAT ||
s->nat_rule->action == PF_RDR ||
s->nat_rule->action == PF_BINAT) {
nr = s->nat_rule;
pf_rule_counters_inc(pd, s->nat_rule, dir_out,
op_r_pass, af, src_host, dst_host);
key = s->key[s_dir_in];
src_host = &(key->addr[s_dir_out]);
dst_host = &(key->addr[s_dir_in]);
af = key->af;
}
}
}
SLIST_FOREACH(ri, mr, entry) {
pf_rule_counters_inc(pd, ri->r, dir_out, op_r_pass, af,
src_host, dst_host);
if (s && s->nat_rule == ri->r) {
key = s->key[s_dir_in];
src_host = &(key->addr[s_dir_out]);
dst_host = &(key->addr[s_dir_in]);
af = key->af;
}
}
if (s == NULL) {
pf_free_match_rules(mr);
}
if (a != NULL) {
pf_rule_counters_inc(pd, a, dir_out, op_r_pass, af,
src_host, dst_host);
}
if (r != nr) {
pf_rule_counters_inc(pd, r, dir_out, op_r_pass, af,
src_host, dst_host);
}
pf_counter_u64_critical_exit();
}
static void
pf_log_matches(struct pf_pdesc *pd, struct pf_krule *rm,
struct pf_krule *am, struct pf_kruleset *ruleset,
struct pf_krule_slist *match_rules)
{
struct pf_krule_item *ri;
if (rm->log & PF_LOG_MATCHES)
return;
SLIST_FOREACH(ri, match_rules, entry)
if (ri->r->log & PF_LOG_MATCHES)
PFLOG_PACKET(rm->action, PFRES_MATCH, rm, am,
ruleset, pd, 1, ri->r);
}
#if defined(INET) || defined(INET6)
int
pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
struct inpcb *inp, struct pf_rule_actions *default_actions)
{
struct pfi_kkif *kif;
u_short action, reason = 0;
struct m_tag *mtag;
struct pf_krule *a = NULL, *r = &V_pf_default_rule;
struct pf_kstate *s = NULL;
struct pf_kruleset *ruleset = NULL;
struct pf_krule_item *ri;
struct pf_krule_slist match_rules;
struct pf_pdesc pd;
int use_2nd_queue = 0;
uint16_t tag;
PF_RULES_RLOCK_TRACKER;
KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir));
M_ASSERTPKTHDR(*m0);
NET_EPOCH_ASSERT();
if (!V_pf_status.running)
return (PF_PASS);
kif = (struct pfi_kkif *)ifp->if_pf_kif;
if (__predict_false(kif == NULL)) {
DPFPRINTF(PF_DEBUG_URGENT,
"%s: kif == NULL, if_xname %s",
__func__, ifp->if_xname);
return (PF_DROP);
}
if (kif->pfik_flags & PFI_IFLAG_SKIP) {
return (PF_PASS);
}
if ((*m0)->m_flags & M_SKIP_FIREWALL) {
return (PF_PASS);
}
if (__predict_false(! M_WRITABLE(*m0))) {
*m0 = m_unshare(*m0, M_NOWAIT);
if (*m0 == NULL) {
return (PF_DROP);
}
}
pf_init_pdesc(&pd, *m0);
SLIST_INIT(&match_rules);
if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) {
pd.pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO;
ifp = ifnet_byindexgen(pd.pf_mtag->if_index,
pd.pf_mtag->if_idxgen);
if (ifp == NULL || ifp->if_flags & IFF_DYING) {
m_freem(*m0);
*m0 = NULL;
return (PF_PASS);
}
(ifp->if_output)(ifp, *m0, sintosa(&pd.pf_mtag->dst), NULL);
*m0 = NULL;
return (PF_PASS);
}
if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL &&
pd.pf_mtag->flags & PF_MTAG_FLAG_DUMMYNET) {
pf_dummynet_flag_remove(pd.m, pd.pf_mtag);
return (PF_PASS);
}
PF_RULES_RLOCK();
if (pf_setup_pdesc(af, dir, &pd, m0, &action, &reason,
kif, default_actions) != PF_PASS) {
if (action != PF_PASS)
pd.act.log |= PF_LOG_FORCE;
goto done;
}
#ifdef INET
if (af == AF_INET && dir == PF_OUT && pflags & PFIL_FWD &&
pd.df && (*m0)->m_pkthdr.len > ifp->if_mtu) {
PF_RULES_RUNLOCK();
icmp_error(*m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
0, ifp->if_mtu);
*m0 = NULL;
return (PF_DROP);
}
#endif
#ifdef INET6
if (af == AF_INET6 && dir == PF_OUT && pflags & PFIL_FWD &&
IN6_LINKMTU(ifp) < pf_max_frag_size(*m0)) {
PF_RULES_RUNLOCK();
icmp6_error(*m0, ICMP6_PACKET_TOO_BIG, 0, IN6_LINKMTU(ifp));
*m0 = NULL;
return (PF_DROP);
}
#endif
if (__predict_false(ip_divert_ptr != NULL) &&
((mtag = m_tag_locate(pd.m, MTAG_PF_DIVERT, 0, NULL)) != NULL)) {
struct pf_divert_mtag *dt = (struct pf_divert_mtag *)(mtag+1);
if ((dt->idir == PF_DIVERT_MTAG_DIR_IN && dir == PF_IN) ||
(dt->idir == PF_DIVERT_MTAG_DIR_OUT && dir == PF_OUT)) {
if (pd.pf_mtag == NULL &&
((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) {
action = PF_DROP;
goto done;
}
pd.pf_mtag->flags |= PF_MTAG_FLAG_PACKET_LOOPED;
}
if (pd.pf_mtag && pd.pf_mtag->flags & PF_MTAG_FLAG_FASTFWD_OURS_PRESENT) {
pd.m->m_flags |= M_FASTFWD_OURS;
pd.pf_mtag->flags &= ~PF_MTAG_FLAG_FASTFWD_OURS_PRESENT;
}
m_tag_delete(pd.m, mtag);
mtag = m_tag_locate(pd.m, MTAG_IPFW_RULE, 0, NULL);
if (mtag != NULL)
m_tag_delete(pd.m, mtag);
}
switch (pd.virtual_proto) {
case PF_VPROTO_FRAGMENT:
if (kif == NULL || r == NULL)
action = PF_DROP;
else
action = pf_test_rule(&r, &s, &pd, &a,
&ruleset, &reason, inp, &match_rules);
if (action != PF_PASS)
REASON_SET(&reason, PFRES_FRAG);
break;
case IPPROTO_TCP: {
if ((tcp_get_flags(&pd.hdr.tcp) & (TH_SYN|TH_ACK|TH_RST)) == TH_SYN &&
pd.dir == PF_IN && pf_synflood_check(&pd)) {
pf_syncookie_send(&pd, &reason);
action = PF_DROP;
break;
}
if ((tcp_get_flags(&pd.hdr.tcp) & TH_ACK) && pd.p_len == 0)
use_2nd_queue = 1;
action = pf_normalize_tcp(&pd);
if (action == PF_DROP)
break;
action = pf_test_state(&s, &pd, &reason);
if (action == PF_PASS || action == PF_AFRT) {
if (V_pfsync_update_state_ptr != NULL)
V_pfsync_update_state_ptr(s);
r = s->rule;
a = s->anchor;
} else if (s == NULL) {
if ((tcp_get_flags(&pd.hdr.tcp) & (TH_SYN|TH_ACK|TH_RST)) ==
TH_ACK && pf_syncookie_validate(&pd) &&
pd.dir == PF_IN) {
struct mbuf *msyn;
msyn = pf_syncookie_recreate_syn(&pd, &reason);
if (msyn == NULL) {
action = PF_DROP;
break;
}
action = pf_test(af, dir, pflags, ifp, &msyn, inp,
&pd.act);
m_freem(msyn);
if (action != PF_PASS)
break;
action = pf_test_state(&s, &pd, &reason);
if (action != PF_PASS || s == NULL) {
action = PF_DROP;
break;
}
s->src.seqhi = ntohl(pd.hdr.tcp.th_ack) - 1;
s->src.seqlo = ntohl(pd.hdr.tcp.th_seq) - 1;
pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_DST);
action = pf_synproxy(&pd, s, &reason);
break;
} else {
action = pf_test_rule(&r, &s, &pd,
&a, &ruleset, &reason, inp, &match_rules);
}
}
break;
}
case IPPROTO_SCTP:
action = pf_normalize_sctp(&pd);
if (action == PF_DROP)
break;
case IPPROTO_UDP:
default:
action = pf_test_state(&s, &pd, &reason);
if (action == PF_PASS || action == PF_AFRT) {
if (V_pfsync_update_state_ptr != NULL)
V_pfsync_update_state_ptr(s);
r = s->rule;
a = s->anchor;
} else if (s == NULL) {
action = pf_test_rule(&r, &s,
&pd, &a, &ruleset, &reason, inp, &match_rules);
}
break;
case IPPROTO_ICMP:
case IPPROTO_ICMPV6: {
if (pd.virtual_proto == IPPROTO_ICMP && af != AF_INET) {
action = PF_DROP;
REASON_SET(&reason, PFRES_NORM);
DPFPRINTF(PF_DEBUG_MISC,
"dropping IPv6 packet with ICMPv4 payload");
break;
}
if (pd.virtual_proto == IPPROTO_ICMPV6 && af != AF_INET6) {
action = PF_DROP;
REASON_SET(&reason, PFRES_NORM);
DPFPRINTF(PF_DEBUG_MISC,
"pf: dropping IPv4 packet with ICMPv6 payload");
break;
}
action = pf_test_state_icmp(&s, &pd, &reason);
if (action == PF_PASS || action == PF_AFRT) {
if (V_pfsync_update_state_ptr != NULL)
V_pfsync_update_state_ptr(s);
r = s->rule;
a = s->anchor;
} else if (s == NULL)
action = pf_test_rule(&r, &s, &pd,
&a, &ruleset, &reason, inp, &match_rules);
break;
}
}
done:
PF_RULES_RUNLOCK();
if (pd.m == NULL) {
pf_free_match_rules(&match_rules);
goto eat_pkt;
}
if (s)
memcpy(&pd.act, &s->act, sizeof(s->act));
if (action == PF_PASS && pd.badopts != 0 && !pd.act.allow_opts) {
action = PF_DROP;
REASON_SET(&reason, PFRES_IPOPTIONS);
pd.act.log = PF_LOG_FORCE;
DPFPRINTF(PF_DEBUG_MISC,
"pf: dropping packet with dangerous headers");
}
if (pd.act.max_pkt_size && pd.act.max_pkt_size &&
pd.tot_len > pd.act.max_pkt_size) {
action = PF_DROP;
REASON_SET(&reason, PFRES_NORM);
pd.act.log = PF_LOG_FORCE;
DPFPRINTF(PF_DEBUG_MISC,
"pf: dropping overly long packet");
}
if (s) {
uint8_t log = pd.act.log;
memcpy(&pd.act, &s->act, sizeof(struct pf_rule_actions));
pd.act.log |= log;
tag = s->tag;
} else {
tag = r->tag;
}
if (tag > 0 && pf_tag_packet(&pd, tag)) {
action = PF_DROP;
REASON_SET(&reason, PFRES_MEMORY);
}
pf_scrub(&pd);
if (pd.proto == IPPROTO_TCP && pd.act.max_mss)
pf_normalize_mss(&pd);
if (pd.act.rtableid >= 0)
M_SETFIB(pd.m, pd.act.rtableid);
if (pd.act.flags & PFSTATE_SETPRIO) {
if (pd.tos & IPTOS_LOWDELAY)
use_2nd_queue = 1;
if (vlan_set_pcp(pd.m, pd.act.set_prio[use_2nd_queue])) {
action = PF_DROP;
REASON_SET(&reason, PFRES_MEMORY);
pd.act.log = PF_LOG_FORCE;
DPFPRINTF(PF_DEBUG_MISC,
"pf: failed to allocate 802.1q mtag");
}
}
#ifdef ALTQ
if (action == PF_PASS && pd.act.qid) {
if (pd.pf_mtag == NULL &&
((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) {
action = PF_DROP;
REASON_SET(&reason, PFRES_MEMORY);
} else {
if (s != NULL)
pd.pf_mtag->qid_hash = pf_state_hash(s);
if (use_2nd_queue || (pd.tos & IPTOS_LOWDELAY))
pd.pf_mtag->qid = pd.act.pqid;
else
pd.pf_mtag->qid = pd.act.qid;
pd.pf_mtag->hdr = mtod(pd.m, void *);
}
}
#endif
if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule != NULL &&
(s->nat_rule->action == PF_RDR ||
s->nat_rule->action == PF_BINAT) &&
pf_is_loopback(af, pd.dst))
pd.m->m_flags |= M_SKIP_FIREWALL;
if (af == AF_INET && __predict_false(ip_divert_ptr != NULL) &&
action == PF_PASS && r->divert.port && !PACKET_LOOPED(&pd)) {
mtag = m_tag_alloc(MTAG_PF_DIVERT, 0,
sizeof(struct pf_divert_mtag), M_NOWAIT | M_ZERO);
if (mtag != NULL) {
((struct pf_divert_mtag *)(mtag+1))->port =
ntohs(r->divert.port);
((struct pf_divert_mtag *)(mtag+1))->idir =
(dir == PF_IN) ? PF_DIVERT_MTAG_DIR_IN :
PF_DIVERT_MTAG_DIR_OUT;
pf_counters_inc(action, &pd, s, r, a, &match_rules);
if (s)
PF_STATE_UNLOCK(s);
m_tag_prepend(pd.m, mtag);
if (pd.m->m_flags & M_FASTFWD_OURS) {
if (pd.pf_mtag == NULL &&
((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) {
action = PF_DROP;
REASON_SET(&reason, PFRES_MEMORY);
pd.act.log = PF_LOG_FORCE;
DPFPRINTF(PF_DEBUG_MISC,
"pf: failed to allocate tag");
} else {
pd.pf_mtag->flags |=
PF_MTAG_FLAG_FASTFWD_OURS_PRESENT;
pd.m->m_flags &= ~M_FASTFWD_OURS;
}
}
ip_divert_ptr(*m0, dir == PF_IN);
*m0 = NULL;
return (action);
} else {
action = PF_DROP;
REASON_SET(&reason, PFRES_MEMORY);
pd.act.log = PF_LOG_FORCE;
DPFPRINTF(PF_DEBUG_MISC,
"pf: failed to allocate divert tag");
}
}
if (af == AF_INET6 && r->divert.port)
printf("pf: divert(9) is not supported for IPv6\n");
if (pd.pf_mtag)
pd.pf_mtag->flags &= ~PF_MTAG_FLAG_PACKET_LOOPED;
if (pd.act.log) {
struct pf_krule *lr;
if (s != NULL && s->nat_rule != NULL &&
s->nat_rule->log & PF_LOG_ALL)
lr = s->nat_rule;
else
lr = r;
if (pd.act.log & PF_LOG_FORCE || lr->log & PF_LOG_ALL)
PFLOG_PACKET(action, reason, lr, a,
ruleset, &pd, (s == NULL), NULL);
if (s) {
SLIST_FOREACH(ri, &s->match_rules, entry)
if (ri->r->log & PF_LOG_ALL)
PFLOG_PACKET(action,
reason, ri->r, a, ruleset, &pd, 0, NULL);
}
}
pf_counters_inc(action, &pd, s, r, a, &match_rules);
switch (action) {
case PF_SYNPROXY_DROP:
m_freem(*m0);
case PF_DEFER:
*m0 = NULL;
action = PF_PASS;
break;
case PF_DROP:
m_freem(*m0);
*m0 = NULL;
break;
case PF_AFRT:
if (pf_translate_af(&pd)) {
*m0 = pd.m;
action = PF_DROP;
break;
}
#ifdef INET
if (pd.naf == AF_INET) {
action = pf_route(r, kif->pfik_ifp, s, &pd,
inp);
}
#endif
#ifdef INET6
if (pd.naf == AF_INET6) {
action = pf_route6(r, kif->pfik_ifp, s, &pd,
inp);
}
#endif
*m0 = pd.m;
goto out;
break;
default:
if (pd.act.rt) {
switch (af) {
#ifdef INET
case AF_INET:
action = pf_route(r, kif->pfik_ifp, s, &pd,
inp);
break;
#endif
#ifdef INET6
case AF_INET6:
action = pf_route6(r, kif->pfik_ifp, s, &pd,
inp);
break;
#endif
}
*m0 = pd.m;
goto out;
}
if (pf_dummynet(&pd, s, r, m0) != 0) {
action = PF_DROP;
REASON_SET(&reason, PFRES_MEMORY);
}
break;
}
eat_pkt:
SDT_PROBE4(pf, ip, test, done, action, reason, r, s);
if (s && action != PF_DROP) {
if (!s->if_index_in && dir == PF_IN)
s->if_index_in = ifp->if_index;
else if (!s->if_index_out && dir == PF_OUT)
s->if_index_out = ifp->if_index;
}
if (s)
PF_STATE_UNLOCK(s);
out:
#ifdef INET6
if (af == AF_INET6 && action == PF_PASS && *m0 && dir == PF_OUT &&
(! (pflags & PF_PFIL_NOREFRAGMENT)) &&
(mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL)) != NULL)
action = pf_refragment6(ifp, m0, mtag, NULL, pflags & PFIL_FWD);
#endif
pf_sctp_multihome_delayed(&pd, kif, s, action);
return (action);
}
#endif