#include <sys/param.h>
#include <sys/systm.h>
#include <sys/counter.h>
#include <sys/ck.h>
#include <sys/epoch.h>
#include <sys/errno.h>
#include <sys/hash.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_pflog.h>
#include <net/pfil.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_fw.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <netinet/ip_icmp.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
#include <netinet6/ip_fw_nat64.h>
#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/pf/pf.h>
#include "nat64lsn.h"
MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
#define NAT64LSN_EPOCH_ENTER(et) NET_EPOCH_ENTER(et)
#define NAT64LSN_EPOCH_EXIT(et) NET_EPOCH_EXIT(et)
#define NAT64LSN_EPOCH_ASSERT() NET_EPOCH_ASSERT()
#define NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c))
static uma_zone_t nat64lsn_host_zone;
static uma_zone_t nat64lsn_pgchunk_zone;
static uma_zone_t nat64lsn_pg_zone;
static uma_zone_t nat64lsn_aliaslink_zone;
static uma_zone_t nat64lsn_state_zone;
static uma_zone_t nat64lsn_job_zone;
static void nat64lsn_periodic(void *data);
#define PERIODIC_DELAY 4
#define NAT64_LOOKUP(chain, cmd) \
(struct nat64lsn_instance *)SRV_OBJECT((chain), insntod(cmd, kidx)->kidx)
enum nat64lsn_jtype {
JTYPE_NEWHOST = 1,
JTYPE_NEWPORTGROUP,
JTYPE_DESTROY,
};
struct nat64lsn_job_item {
STAILQ_ENTRY(nat64lsn_job_item) entries;
enum nat64lsn_jtype jtype;
union {
struct {
struct mbuf *m;
struct nat64lsn_host *host;
struct nat64lsn_state *state;
uint32_t src6_hval;
uint32_t state_hval;
struct ipfw_flow_id f_id;
in_addr_t faddr;
uint16_t port;
uint8_t proto;
uint8_t done;
};
struct {
struct nat64lsn_hosts_slist hosts;
struct nat64lsn_pg_slist portgroups;
struct nat64lsn_pgchunk *pgchunk;
struct epoch_context epoch_ctx;
};
};
};
static struct mtx jmtx;
#define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
#define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx)
#define JQUEUE_LOCK() mtx_lock(&jmtx)
#define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
struct nat64lsn_job_item *ji);
static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
struct nat64lsn_job_item *ji);
static struct nat64lsn_job_item *nat64lsn_create_job(
struct nat64lsn_cfg *cfg, int jtype);
static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
struct nat64lsn_job_item *ji);
static void nat64lsn_job_destroy(epoch_context_t ctx);
static void nat64lsn_destroy_host(struct nat64lsn_host *host);
static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
const struct ipfw_flow_id *f_id, struct mbuf **mp);
static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
struct ipfw_flow_id *f_id, struct mbuf **mp);
static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
#define NAT64_BIT_TCP_FIN 0
#define NAT64_BIT_TCP_SYN 1
#define NAT64_BIT_TCP_ESTAB 2
#define NAT64_BIT_READY_IPV4 6
#define NAT64_BIT_STALE 7
#define NAT64_FLAG_FIN (1 << NAT64_BIT_TCP_FIN)
#define NAT64_FLAG_SYN (1 << NAT64_BIT_TCP_SYN)
#define NAT64_FLAG_ESTAB (1 << NAT64_BIT_TCP_ESTAB)
#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
#define NAT64_FLAG_READY (1 << NAT64_BIT_READY_IPV4)
#define NAT64_FLAG_STALE (1 << NAT64_BIT_STALE)
static inline uint8_t
convert_tcp_flags(uint8_t flags)
{
uint8_t result;
result = flags & (TH_FIN|TH_SYN);
result |= (flags & TH_RST) >> 2;
result |= (flags & TH_ACK) >> 2;
return (result);
}
static void
nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
struct nat64lsn_state *state)
{
memset(plog, 0, sizeof(*plog));
plog->length = PFLOG_REAL_HDRLEN;
plog->af = family;
plog->action = PF_NAT;
plog->dir = PF_IN;
plog->rulenr = htonl(state->ip_src);
plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
(state->proto << 8) | (state->ip_dst & 0xff));
plog->ruleset[0] = '\0';
strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
}
#define HVAL(p, n, s) jenkins_hash32((const uint32_t *)(p), (n), (s))
#define HOST_HVAL(c, a) HVAL((a),\
sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
#define HOSTS(c, v) ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
#define ALIASLINK_HVAL(c, f) HVAL(&(f)->dst_ip6,\
sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
#define ALIAS_BYHASH(c, v) \
((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
static struct nat64lsn_aliaslink*
nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
{
return (CK_SLIST_FIRST(&host->aliases));
}
static struct nat64lsn_alias*
nat64lsn_get_alias(struct nat64lsn_cfg *cfg,
const struct ipfw_flow_id *f_id __unused)
{
static uint32_t idx = 0;
return (&ALIAS_BYHASH(cfg, idx++));
}
#define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed)
#define STATE_HASH(h, v) \
((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
#define STATES_CHUNK(p, v) \
((p)->chunks_count == 1 ? (p)->states : \
((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
#ifdef __LP64__
#define FREEMASK_FFSLL(pg, faddr) \
ffsll(*FREEMASK_CHUNK((pg), (faddr)))
#define FREEMASK_BTR(pg, faddr, bit) \
ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
#define FREEMASK_BTS(pg, faddr, bit) \
ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
#define FREEMASK_ISSET(pg, faddr, bit) \
ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
#define FREEMASK_COPY(pg, n, out) \
(out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
#else
static inline int
freemask_ffsll(uint32_t *freemask)
{
int i;
if ((i = ffsl(freemask[0])) != 0)
return (i);
if ((i = ffsl(freemask[1])) != 0)
return (i + 32);
return (0);
}
#define FREEMASK_FFSLL(pg, faddr) \
freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
#define FREEMASK_BTR(pg, faddr, bit) \
ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
#define FREEMASK_BTS(pg, faddr, bit) \
ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
#define FREEMASK_ISSET(pg, faddr, bit) \
ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
#define FREEMASK_COPY(pg, n, out) \
(out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
#endif
#define NAT64LSN_TRY_PGCNT 36
static struct nat64lsn_pg*
nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
struct nat64lsn_pgchunk **chunks, uint32_t *pgidx, in_addr_t faddr)
{
struct nat64lsn_pg *pg;
uint32_t idx, oldidx;
int cnt;
idx = oldidx = ck_pr_load_32(pgidx);
MPASS(idx < 1024);
cnt = 0;
do {
ck_pr_fence_load();
if (idx > 1023 || !ISSET32(*chunkmask, idx / 32)) {
idx = 0;
if (cnt > 0)
break;
}
if (ISSET32(pgmask[idx / 32], idx % 32)) {
pg = ck_pr_load_ptr(
&chunks[idx / 32]->pgptr[idx % 32]);
ck_pr_fence_load();
if ((pg->flags & NAT64LSN_DEADPG) == 0 &&
FREEMASK_BITCOUNT(pg, faddr) > 0) {
if (cnt > 0)
ck_pr_cas_32(pgidx, oldidx, idx);
return (pg);
}
}
idx++;
} while (++cnt < NAT64LSN_TRY_PGCNT);
if (oldidx != idx)
ck_pr_cas_32(pgidx, oldidx, idx);
return (NULL);
}
static struct nat64lsn_state*
nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
uint16_t port, uint8_t proto)
{
struct nat64lsn_aliaslink *link;
struct nat64lsn_state *state;
struct nat64lsn_pg *pg;
int i, offset;
NAT64LSN_EPOCH_ASSERT();
CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
if (state->proto == proto && state->ip_dst == faddr &&
state->sport == port && state->dport == f_id->dst_port)
return (state);
}
link = nat64lsn_get_aliaslink(cfg, host, f_id);
if (link == NULL)
return (NULL);
switch (proto) {
case IPPROTO_TCP:
pg = nat64lsn_get_pg(&link->alias->tcp_chunkmask,
link->alias->tcp_pgmask, link->alias->tcp,
&link->alias->tcp_pgidx, faddr);
break;
case IPPROTO_UDP:
pg = nat64lsn_get_pg(&link->alias->udp_chunkmask,
link->alias->udp_pgmask, link->alias->udp,
&link->alias->udp_pgidx, faddr);
break;
case IPPROTO_ICMP:
pg = nat64lsn_get_pg(&link->alias->icmp_chunkmask,
link->alias->icmp_pgmask, link->alias->icmp,
&link->alias->icmp_pgidx, faddr);
break;
default:
panic("%s: wrong proto %d", __func__, proto);
}
if (pg == NULL || (pg->flags & NAT64LSN_DEADPG) != 0)
return (NULL);
state = NULL;
i = FREEMASK_BITCOUNT(pg, faddr);
while (i-- > 0) {
offset = FREEMASK_FFSLL(pg, faddr);
if (offset == 0) {
break;
}
if (FREEMASK_BTR(pg, faddr, offset - 1)) {
state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
state->flags = proto != IPPROTO_TCP ? 0 :
convert_tcp_flags(f_id->_flags);
state->proto = proto;
state->aport = pg->base_port + offset - 1;
state->dport = f_id->dst_port;
state->sport = port;
state->ip6_dst = f_id->dst_ip6;
state->ip_dst = faddr;
state->ip_src = link->alias->addr;
state->hval = hval;
state->host = host;
SET_AGE(state->timestamp);
HOST_LOCK(host);
SET_AGE(host->timestamp);
CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
state, entries);
host->states_count++;
HOST_UNLOCK(host);
NAT64STAT_INC(&cfg->base.stats, screated);
ck_pr_fence_store();
ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
break;
}
}
return (state);
}
static int
inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
uint16_t *port)
{
struct icmp *icmp;
struct ip *ip;
int off;
uint8_t inner_proto;
ip = mtod(*mp, struct ip *);
off = (ip->ip_hl << 2) + ICMP_MINLEN;
if ((*mp)->m_len < off)
*mp = m_pullup(*mp, off);
if (*mp == NULL)
return (ENOMEM);
ip = mtod(*mp, struct ip *);
icmp = L3HDR(ip, struct icmp *);
switch (icmp->icmp_type) {
case ICMP_ECHO:
case ICMP_ECHOREPLY:
*port = ntohs(icmp->icmp_id);
return (0);
case ICMP_UNREACH:
case ICMP_TIMXCEED:
break;
default:
return (EOPNOTSUPP);
}
if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
return (EINVAL);
if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
*mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
if (*mp == NULL)
return (ENOMEM);
ip = mtodo(*mp, off);
inner_proto = ip->ip_p;
off += ip->ip_hl << 2;
*addr = ntohl(ip->ip_src.s_addr);
if ((*mp)->m_len < off + ICMP_MINLEN)
*mp = m_pullup(*mp, off + ICMP_MINLEN);
if (*mp == NULL)
return (ENOMEM);
switch (inner_proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
*port = ntohs(*((uint16_t *)mtodo(*mp, off)));
*proto = inner_proto;
return (0);
case IPPROTO_ICMP:
icmp = mtodo(*mp, off);
if (icmp->icmp_type != ICMP_ECHO)
return (EOPNOTSUPP);
*port = ntohs(icmp->icmp_id);
return (0);
};
return (EOPNOTSUPP);
}
static struct nat64lsn_state*
nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
in_addr_t faddr, uint16_t port, uint8_t proto)
{
struct nat64lsn_state *state;
struct nat64lsn_pg *pg;
int chunk_idx, pg_idx, state_idx;
NAT64LSN_EPOCH_ASSERT();
if (port < NAT64_MIN_PORT)
return (NULL);
port -= NAT64_MIN_PORT;
chunk_idx = port / 2048;
port -= chunk_idx * 2048;
pg_idx = port / 64;
state_idx = port % 64;
pg = NULL;
switch (proto) {
case IPPROTO_TCP:
if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
break;
}
return (NULL);
case IPPROTO_UDP:
if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
pg = alias->udp[chunk_idx]->pgptr[pg_idx];
break;
}
return (NULL);
case IPPROTO_ICMP:
if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
break;
}
return (NULL);
default:
panic("%s: wrong proto %d", __func__, proto);
}
if (pg == NULL)
return (NULL);
if (FREEMASK_ISSET(pg, faddr, state_idx))
return (NULL);
state = &STATES_CHUNK(pg, faddr)->state[state_idx];
ck_pr_fence_load();
if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
return (state);
return (NULL);
}
static struct mbuf*
nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m,
uint16_t *port)
{
struct ip *ip;
int len;
m = ip_reass(m);
if (m == NULL)
return (NULL);
ip = mtod(m, struct ip *);
len = ip->ip_hl << 2;
switch (ip->ip_p) {
case IPPROTO_ICMP:
len += ICMP_MINLEN;
break;
case IPPROTO_TCP:
len += sizeof(struct tcphdr);
break;
case IPPROTO_UDP:
len += sizeof(struct udphdr);
break;
default:
m_freem(m);
NAT64STAT_INC(&cfg->base.stats, noproto);
return (NULL);
}
if (m->m_len < len) {
m = m_pullup(m, len);
if (m == NULL) {
NAT64STAT_INC(&cfg->base.stats, nomem);
return (NULL);
}
ip = mtod(m, struct ip *);
}
switch (ip->ip_p) {
case IPPROTO_TCP:
*port = ntohs(L3HDR(ip, struct tcphdr *)->th_dport);
break;
case IPPROTO_UDP:
*port = ntohs(L3HDR(ip, struct udphdr *)->uh_dport);
break;
}
return (m);
}
static int
nat64lsn_translate4(struct nat64lsn_cfg *cfg,
const struct ipfw_flow_id *f_id, struct mbuf **mp)
{
struct pfloghdr loghdr, *logdata;
struct in6_addr src6;
struct nat64lsn_state *state;
struct nat64lsn_alias *alias;
uint32_t addr, flags;
uint16_t port, ts;
int ret;
uint8_t proto;
addr = f_id->dst_ip;
port = f_id->dst_port;
proto = f_id->proto;
if (addr < cfg->prefix4 || addr > cfg->pmask4) {
NAT64STAT_INC(&cfg->base.stats, nomatch4);
return (cfg->nomatch_verdict);
}
ret = ntohs(mtod(*mp, struct ip *)->ip_off);
if ((ret & (IP_MF | IP_OFFMASK)) != 0) {
*mp = nat64lsn_reassemble4(cfg, *mp, &port);
if (*mp == NULL)
return (IP_FW_DENY);
}
switch (proto) {
case IPPROTO_ICMP:
ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
if (ret != 0) {
if (ret == ENOMEM) {
NAT64STAT_INC(&cfg->base.stats, nomem);
return (IP_FW_DENY);
}
NAT64STAT_INC(&cfg->base.stats, noproto);
return (cfg->nomatch_verdict);
}
if (addr < cfg->prefix4 || addr > cfg->pmask4) {
NAT64STAT_INC(&cfg->base.stats, nomatch4);
return (cfg->nomatch_verdict);
}
case IPPROTO_TCP:
case IPPROTO_UDP:
break;
default:
NAT64STAT_INC(&cfg->base.stats, noproto);
return (cfg->nomatch_verdict);
}
alias = &ALIAS_BYHASH(cfg, addr);
MPASS(addr == alias->addr);
state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
port, proto);
if (state == NULL) {
NAT64STAT_INC(&cfg->base.stats, nomatch4);
return (cfg->nomatch_verdict);
}
SET_AGE(ts);
if (f_id->proto == IPPROTO_TCP)
flags = convert_tcp_flags(f_id->_flags);
else
flags = 0;
if (state->timestamp != ts)
state->timestamp = ts;
if ((state->flags & flags) != flags)
state->flags |= flags;
port = htons(state->sport);
src6 = state->ip6_dst;
if (cfg->base.flags & NAT64_LOG) {
logdata = &loghdr;
nat64lsn_log(logdata, *mp, AF_INET, state);
} else
logdata = NULL;
nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
&cfg->base, logdata);
if (ret == NAT64SKIP)
return (cfg->nomatch_verdict);
if (ret == NAT64RETURN)
*mp = NULL;
return (IP_FW_DENY);
}
static int
nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
{
int age, ttl;
if (ISSET32(state->flags, NAT64_BIT_STALE))
return (1);
if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
return (0);
age = GET_AGE(state->timestamp);
switch (state->proto) {
case IPPROTO_TCP:
if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
ttl = cfg->st_close_ttl;
else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
ttl = cfg->st_estab_ttl;
else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
ttl = cfg->st_syn_ttl;
else
ttl = cfg->st_syn_ttl;
if (age > ttl)
return (1);
break;
case IPPROTO_UDP:
if (age > cfg->st_udp_ttl)
return (1);
break;
case IPPROTO_ICMP:
if (age > cfg->st_icmp_ttl)
return (1);
break;
}
return (0);
}
#define PGCOUNT_ADD(alias, proto, value) \
switch (proto) { \
case IPPROTO_TCP: (alias)->tcp_pgcount += (value); break; \
case IPPROTO_UDP: (alias)->udp_pgcount += (value); break; \
case IPPROTO_ICMP: (alias)->icmp_pgcount += (value); break; \
}
#define PGCOUNT_INC(alias, proto) PGCOUNT_ADD(alias, proto, 1)
#define PGCOUNT_DEC(alias, proto) PGCOUNT_ADD(alias, proto, -1)
static inline void
nat64lsn_state_cleanup(struct nat64lsn_state *state)
{
ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
ck_pr_fence_store();
}
static int
nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
{
struct nat64lsn_state *state;
struct nat64lsn_host *host;
uint64_t freemask;
int c, i, update_age;
update_age = 0;
for (c = 0; c < pg->chunks_count; c++) {
FREEMASK_COPY(pg, c, freemask);
for (i = 0; i < 64; i++) {
if (ISSET64(freemask, i))
continue;
state = &STATES_CHUNK(pg, c)->state[i];
if (nat64lsn_check_state(cfg, state) == 0) {
update_age = 1;
continue;
}
if (ISSET32(state->flags, NAT64_BIT_STALE)) {
state->flags = 0;
ck_pr_fence_store();
FREEMASK_BTS(pg, c, i);
NAT64STAT_INC(&cfg->base.stats, sdeleted);
continue;
}
MPASS(state->flags & NAT64_FLAG_READY);
host = state->host;
HOST_LOCK(host);
CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
state, nat64lsn_state, entries);
host->states_count--;
HOST_UNLOCK(host);
nat64lsn_state_cleanup(state);
}
}
if (update_age)
SET_AGE(pg->timestamp);
if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
return (0);
return (1);
}
static void
nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
struct nat64lsn_pg_slist *portgroups)
{
struct nat64lsn_alias *alias;
struct nat64lsn_pg *pg, *tpg;
uint32_t *pgmask, *pgidx;
int i, idx;
for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
alias = &cfg->aliases[i];
CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
if (nat64lsn_maintain_pg(cfg, pg) == 0)
continue;
if (pg->base_port == NAT64_MIN_PORT)
continue;
idx = (pg->base_port - NAT64_MIN_PORT) / 64;
switch (pg->proto) {
case IPPROTO_TCP:
pgmask = alias->tcp_pgmask;
pgidx = &alias->tcp_pgidx;
break;
case IPPROTO_UDP:
pgmask = alias->udp_pgmask;
pgidx = &alias->udp_pgidx;
break;
case IPPROTO_ICMP:
pgmask = alias->icmp_pgmask;
pgidx = &alias->icmp_pgidx;
break;
}
if (pg->flags & NAT64LSN_DEADPG) {
ALIAS_LOCK(alias);
CK_SLIST_REMOVE(&alias->portgroups, pg,
nat64lsn_pg, entries);
PGCOUNT_DEC(alias, pg->proto);
ALIAS_UNLOCK(alias);
NAT64STAT_INC(&cfg->base.stats, spgdeleted);
CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
continue;
}
ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
pg->flags |= NAT64LSN_DEADPG;
ck_pr_fence_store();
ck_pr_cas_32(pgidx, idx, 0);
}
}
}
static void
nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
struct nat64lsn_hosts_slist *hosts)
{
struct nat64lsn_host *host, *tmp;
int i;
for (i = 0; i < cfg->hosts_hashsize; i++) {
CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
entries, tmp) {
if (host->flags & NAT64LSN_DEADHOST) {
if (host->states_count > 0 ||
GET_AGE(host->timestamp) <
cfg->host_delete_delay) {
host->flags &= ~NAT64LSN_DEADHOST;
continue;
}
CFG_LOCK(cfg);
CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
nat64lsn_host, entries);
cfg->hosts_count--;
CFG_UNLOCK(cfg);
CK_SLIST_INSERT_HEAD(hosts, host, entries);
continue;
}
if (host->states_count > 0 ||
GET_AGE(host->timestamp) < cfg->host_delete_delay)
continue;
host->flags |= NAT64LSN_DEADHOST;
ck_pr_fence_store();
}
}
}
static struct nat64lsn_pgchunk*
nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
{
#if 0
struct nat64lsn_alias *alias;
struct nat64lsn_pgchunk *chunk;
uint32_t pgmask;
int i, c;
for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
alias = &cfg->aliases[i];
if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
continue;
for (c = 1; c < 32; c++) {
if ((alias->tcp_chunkmask & (1 << c)) == 0)
break;
chunk = ck_pr_load_ptr(&alias->tcp[c]);
if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
continue;
ck_pr_btr_32(&alias->tcp_chunkmask, c);
ck_pr_fence_load();
if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
continue;
}
}
#endif
return (NULL);
}
#if 0
static void
nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
{
struct nat64lsn_host *h;
struct nat64lsn_states_slist *hash;
int i, j, hsize;
for (i = 0; i < cfg->hosts_hashsize; i++) {
CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
if (h->states_count / 2 < h->states_hashsize ||
h->states_hashsize >= NAT64LSN_MAX_HSIZE)
continue;
hsize = h->states_hashsize * 2;
hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
if (hash == NULL)
continue;
for (j = 0; j < hsize; j++)
CK_SLIST_INIT(&hash[i]);
ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
}
}
}
#endif
static void
nat64lsn_periodic(void *data)
{
struct nat64lsn_job_item *ji;
struct nat64lsn_cfg *cfg;
cfg = (struct nat64lsn_cfg *) data;
CURVNET_SET(cfg->vp);
if (cfg->hosts_count > 0) {
ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
if (ji != NULL) {
ji->jtype = JTYPE_DESTROY;
CK_SLIST_INIT(&ji->hosts);
CK_SLIST_INIT(&ji->portgroups);
nat64lsn_expire_hosts(cfg, &ji->hosts);
nat64lsn_expire_portgroups(cfg, &ji->portgroups);
ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
nat64lsn_job_destroy);
} else
NAT64STAT_INC(&cfg->base.stats, jnomem);
}
callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
CURVNET_RESTORE();
}
#define ALLOC_ERROR(stage, type) ((stage) ? 10 * (type) + (stage): 0)
#define HOST_ERROR(stage) ALLOC_ERROR(stage, 1)
#define PG_ERROR(stage) ALLOC_ERROR(stage, 2)
static int
nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
{
char a[INET6_ADDRSTRLEN];
struct nat64lsn_aliaslink *link;
struct nat64lsn_host *host;
struct nat64lsn_state *state;
uint32_t hval, data[2];
int i;
NAT64LSN_EPOCH_ASSERT();
CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
ji->host = host;
goto get_state;
}
}
host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
if (ji->host == NULL)
return (HOST_ERROR(1));
host->states_hashsize = NAT64LSN_HSIZE;
host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
host->states_hashsize, M_NAT64LSN, M_NOWAIT);
if (host->states_hash == NULL) {
uma_zfree(nat64lsn_host_zone, host);
return (HOST_ERROR(2));
}
link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
if (link == NULL) {
free(host->states_hash, M_NAT64LSN);
uma_zfree(nat64lsn_host_zone, host);
return (HOST_ERROR(3));
}
HOST_LOCK_INIT(host);
SET_AGE(host->timestamp);
host->addr = ji->f_id.src_ip6;
host->hval = ji->src6_hval;
host->flags = 0;
host->states_count = 0;
CK_SLIST_INIT(&host->aliases);
for (i = 0; i < host->states_hashsize; i++)
CK_SLIST_INIT(&host->states_hash[i]);
link->alias = nat64lsn_get_alias(cfg, &ji->f_id);
CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
ALIAS_LOCK(link->alias);
CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
link->alias->hosts_count++;
ALIAS_UNLOCK(link->alias);
CFG_LOCK(cfg);
CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
cfg->hosts_count++;
CFG_UNLOCK(cfg);
get_state:
data[0] = ji->faddr;
data[1] = (ji->f_id.dst_port << 16) | ji->port;
ji->state_hval = hval = STATE_HVAL(cfg, data);
state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
ji->faddr, ji->port, ji->proto);
if (state == NULL) {
if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
return (HOST_ERROR(4));
} else
ji->state = state;
ji->done = 1;
DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
return (HOST_ERROR(0));
}
static int
nat64lsn_find_pg_place(uint32_t *data)
{
int i;
for (i = 0; i < 32; i++) {
if (~data[i] == 0)
continue;
return (i * 32 + ffs(~data[i]) - 1);
}
return (-1);
}
static int
nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
struct nat64lsn_alias *alias, uint32_t *chunkmask, uint32_t *pgmask,
struct nat64lsn_pgchunk **chunks, uint32_t *pgidx, uint8_t proto)
{
struct nat64lsn_pg *pg;
int i, pg_idx, chunk_idx;
pg_idx = nat64lsn_find_pg_place(pgmask);
if (pg_idx < 0)
return (PG_ERROR(1));
chunk_idx = pg_idx / 32;
if (!ISSET32(*chunkmask, chunk_idx)) {
chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
M_NOWAIT);
if (chunks[chunk_idx] == NULL)
return (PG_ERROR(2));
ck_pr_bts_32(chunkmask, chunk_idx);
ck_pr_fence_store();
}
pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
if (pg == NULL)
return (PG_ERROR(3));
pg->chunks_count = cfg->states_chunks;
if (pg->chunks_count > 1) {
pg->freemask_chunk = malloc(pg->chunks_count *
sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
if (pg->freemask_chunk == NULL) {
uma_zfree(nat64lsn_pg_zone, pg);
return (PG_ERROR(4));
}
pg->states_chunk = malloc(pg->chunks_count *
sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
M_NOWAIT | M_ZERO);
if (pg->states_chunk == NULL) {
free(pg->freemask_chunk, M_NAT64LSN);
uma_zfree(nat64lsn_pg_zone, pg);
return (PG_ERROR(5));
}
for (i = 0; i < pg->chunks_count; i++) {
pg->states_chunk[i] = uma_zalloc(
nat64lsn_state_zone, M_NOWAIT);
if (pg->states_chunk[i] == NULL)
goto states_failed;
}
memset(pg->freemask_chunk, 0xff,
sizeof(uint64_t) * pg->chunks_count);
} else {
pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
if (pg->states == NULL) {
uma_zfree(nat64lsn_pg_zone, pg);
return (PG_ERROR(6));
}
memset(&pg->freemask64, 0xff, sizeof(uint64_t));
}
SET_AGE(pg->timestamp);
pg->flags = 0;
pg->proto = proto;
pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
ck_pr_fence_store();
ck_pr_bts_32(&pgmask[chunk_idx], pg_idx % 32);
ck_pr_store_32(pgidx, pg_idx);
ALIAS_LOCK(alias);
CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
SET_AGE(alias->timestamp);
PGCOUNT_INC(alias, proto);
ALIAS_UNLOCK(alias);
NAT64STAT_INC(&cfg->base.stats, spgcreated);
return (PG_ERROR(0));
states_failed:
for (i = 0; i < pg->chunks_count; i++)
uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
free(pg->freemask_chunk, M_NAT64LSN);
free(pg->states_chunk, M_NAT64LSN);
uma_zfree(nat64lsn_pg_zone, pg);
return (PG_ERROR(7));
}
static int
nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
{
struct nat64lsn_aliaslink *link;
struct nat64lsn_alias *alias;
int ret;
link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
if (link == NULL)
return (PG_ERROR(1));
ret = 0;
alias = link->alias;
switch (ji->proto) {
case IPPROTO_TCP:
ret = nat64lsn_alloc_proto_pg(cfg, alias,
&alias->tcp_chunkmask, alias->tcp_pgmask,
alias->tcp, &alias->tcp_pgidx, ji->proto);
break;
case IPPROTO_UDP:
ret = nat64lsn_alloc_proto_pg(cfg, alias,
&alias->udp_chunkmask, alias->udp_pgmask,
alias->udp, &alias->udp_pgidx, ji->proto);
break;
case IPPROTO_ICMP:
ret = nat64lsn_alloc_proto_pg(cfg, alias,
&alias->icmp_chunkmask, alias->icmp_pgmask,
alias->icmp, &alias->icmp_pgidx, ji->proto);
break;
default:
panic("%s: wrong proto %d", __func__, ji->proto);
}
if (ret == PG_ERROR(1)) {
printf("NAT64LSN: %s: failed to obtain PG\n",
__func__);
return (ret);
}
if (ret == PG_ERROR(0)) {
ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
ji->state_hval, ji->faddr, ji->port, ji->proto);
if (ji->state == NULL)
ret = PG_ERROR(8);
else
ji->done = 1;
}
return (ret);
}
static void
nat64lsn_do_request(void *data)
{
struct epoch_tracker et;
struct nat64lsn_job_head jhead;
struct nat64lsn_job_item *ji, *ji2;
struct nat64lsn_cfg *cfg;
int jcount;
uint8_t flags;
cfg = (struct nat64lsn_cfg *)data;
if (cfg->jlen == 0)
return;
CURVNET_SET(cfg->vp);
STAILQ_INIT(&jhead);
JQUEUE_LOCK();
STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
jcount = cfg->jlen;
cfg->jlen = 0;
JQUEUE_UNLOCK();
NAT64STAT_INC(&cfg->base.stats, jcalls);
DPRINTF(DP_JQUEUE, "count=%d", jcount);
NAT64LSN_EPOCH_ENTER(et);
STAILQ_FOREACH(ji, &jhead, entries) {
switch (ji->jtype) {
case JTYPE_NEWHOST:
if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
NAT64STAT_INC(&cfg->base.stats, jhostfails);
break;
case JTYPE_NEWPORTGROUP:
if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
NAT64STAT_INC(&cfg->base.stats, jportfails);
break;
default:
continue;
}
if (ji->done != 0) {
flags = ji->proto != IPPROTO_TCP ? 0 :
convert_tcp_flags(ji->f_id._flags);
nat64lsn_translate6_internal(cfg, &ji->m,
ji->state, flags);
NAT64STAT_INC(&cfg->base.stats, jreinjected);
}
}
NAT64LSN_EPOCH_EXIT(et);
ji = STAILQ_FIRST(&jhead);
while (ji != NULL) {
ji2 = STAILQ_NEXT(ji, entries);
m_freem(ji->m);
uma_zfree(nat64lsn_job_zone, ji);
ji = ji2;
}
CURVNET_RESTORE();
}
static struct nat64lsn_job_item *
nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
{
struct nat64lsn_job_item *ji;
ji = NULL;
if (cfg->jlen >= cfg->jmaxlen)
NAT64STAT_INC(&cfg->base.stats, jmaxlen);
else {
ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
if (ji == NULL)
NAT64STAT_INC(&cfg->base.stats, jnomem);
}
if (ji == NULL) {
NAT64STAT_INC(&cfg->base.stats, dropped);
DPRINTF(DP_DROPS, "failed to create job");
} else {
ji->jtype = jtype;
ji->done = 0;
}
return (ji);
}
static void
nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
{
JQUEUE_LOCK();
STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
NAT64STAT_INC(&cfg->base.stats, jrequests);
cfg->jlen++;
if (callout_pending(&cfg->jcallout) == 0)
callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
JQUEUE_UNLOCK();
}
static void
nat64lsn_host_cleanup(struct nat64lsn_host *host)
{
struct nat64lsn_state *state, *ts;
int i;
printf("NAT64LSN: %s: race condition has been detected for host %p\n",
__func__, host);
for (i = 0; i < host->states_hashsize; i++) {
CK_SLIST_FOREACH_SAFE(state, &host->states_hash[i],
entries, ts) {
CK_SLIST_REMOVE(&host->states_hash[i], state,
nat64lsn_state, entries);
host->states_count--;
nat64lsn_state_cleanup(state);
}
}
MPASS(host->states_count == 0);
}
static void
nat64lsn_pg_cleanup(struct nat64lsn_pg *pg)
{
struct nat64lsn_state *state;
uint64_t usedmask;
int c, i;
printf("NAT64LSN: %s: race condition has been detected for pg %p\n",
__func__, pg);
for (c = 0; c < pg->chunks_count; c++) {
usedmask = ~(*FREEMASK_CHUNK(pg, c));
if (usedmask == 0)
continue;
for (i = 0; i < 64; i++) {
if (!ISSET64(usedmask, i))
continue;
state = &STATES_CHUNK(pg, c)->state[i];
if (ISSET32(state->flags, NAT64_BIT_STALE)) {
FREEMASK_BTS(pg, c, i);
continue;
}
if (ISSET32(state->flags, NAT64_BIT_READY_IPV4)) {
struct nat64lsn_host *host;
host = state->host;
HOST_LOCK(host);
CK_SLIST_REMOVE(&STATE_HASH(host,
state->hval), state, nat64lsn_state,
entries);
host->states_count--;
HOST_UNLOCK(host);
nat64lsn_state_cleanup(state);
}
}
}
}
static void
nat64lsn_job_destroy(epoch_context_t ctx)
{
struct nat64lsn_hosts_slist hosts;
struct nat64lsn_pg_slist portgroups;
struct nat64lsn_job_item *ji;
struct nat64lsn_host *host;
struct nat64lsn_pg *pg;
int i;
CK_SLIST_INIT(&hosts);
CK_SLIST_INIT(&portgroups);
ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
MPASS(ji->jtype == JTYPE_DESTROY);
while (!CK_SLIST_EMPTY(&ji->hosts)) {
host = CK_SLIST_FIRST(&ji->hosts);
CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
if (host->states_count > 0) {
printf("NAT64LSN: %s: destroying host with %d "
"states\n", __func__, host->states_count);
nat64lsn_host_cleanup(host);
CK_SLIST_INSERT_HEAD(&hosts, host, entries);
continue;
}
nat64lsn_destroy_host(host);
}
while (!CK_SLIST_EMPTY(&ji->portgroups)) {
pg = CK_SLIST_FIRST(&ji->portgroups);
CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
for (i = 0; i < pg->chunks_count; i++) {
if (FREEMASK_BITCOUNT(pg, i) != 64) {
printf("NAT64LSN: %s: destroying PG %p "
"with non-empty chunk %d\n", __func__,
pg, i);
nat64lsn_pg_cleanup(pg);
CK_SLIST_INSERT_HEAD(&portgroups,
pg, entries);
i = -1;
break;
}
}
if (i != -1)
nat64lsn_destroy_pg(pg);
}
if (CK_SLIST_EMPTY(&hosts) &&
CK_SLIST_EMPTY(&portgroups)) {
uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
uma_zfree(nat64lsn_job_zone, ji);
return;
}
CK_SLIST_MOVE(&ji->hosts, &hosts, entries);
CK_SLIST_MOVE(&ji->portgroups, &portgroups, entries);
NAT64LSN_EPOCH_CALL(&ji->epoch_ctx, nat64lsn_job_destroy);
}
static int
nat64lsn_request_host(struct nat64lsn_cfg *cfg,
const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
in_addr_t faddr, uint16_t port, uint8_t proto)
{
struct nat64lsn_job_item *ji;
ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
if (ji != NULL) {
ji->m = *mp;
ji->f_id = *f_id;
ji->faddr = faddr;
ji->port = port;
ji->proto = proto;
ji->src6_hval = hval;
nat64lsn_enqueue_job(cfg, ji);
NAT64STAT_INC(&cfg->base.stats, jhostsreq);
*mp = NULL;
}
return (IP_FW_DENY);
}
static int
nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
in_addr_t faddr, uint16_t port, uint8_t proto)
{
struct nat64lsn_job_item *ji;
ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
if (ji != NULL) {
ji->m = *mp;
ji->f_id = *f_id;
ji->faddr = faddr;
ji->port = port;
ji->proto = proto;
ji->state_hval = hval;
ji->host = host;
nat64lsn_enqueue_job(cfg, ji);
NAT64STAT_INC(&cfg->base.stats, jportreq);
*mp = NULL;
}
return (IP_FW_DENY);
}
static int
nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
struct nat64lsn_state *state, uint8_t flags)
{
struct pfloghdr loghdr, *logdata;
int ret;
uint16_t ts;
SET_AGE(ts);
if (state->timestamp != ts)
state->timestamp = ts;
if ((state->flags & flags) != 0)
state->flags |= flags;
if (cfg->base.flags & NAT64_LOG) {
logdata = &loghdr;
nat64lsn_log(logdata, *mp, AF_INET6, state);
} else
logdata = NULL;
ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
htons(state->aport), &cfg->base, logdata);
if (ret == NAT64SKIP)
return (cfg->nomatch_verdict);
if (ret == NAT64RETURN)
*mp = NULL;
return (IP_FW_DENY);
}
static int
nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
struct mbuf **mp)
{
struct nat64lsn_state *state;
struct nat64lsn_host *host;
struct icmp6_hdr *icmp6;
uint32_t addr, hval, data[2];
int offset, proto;
uint16_t port;
uint8_t flags;
port = f_id->src_port;
proto = f_id->proto;
switch (f_id->proto) {
case IPPROTO_ICMPV6:
offset = 0;
proto = nat64_getlasthdr(*mp, &offset);
if (proto < 0) {
NAT64STAT_INC(&cfg->base.stats, dropped);
DPRINTF(DP_DROPS, "mbuf isn't contigious");
return (IP_FW_DENY);
}
if (proto == IPPROTO_ICMPV6) {
icmp6 = mtodo(*mp, offset);
if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
icmp6->icmp6_type == ICMP6_ECHO_REPLY)
port = ntohs(icmp6->icmp6_id);
}
proto = IPPROTO_ICMP;
case IPPROTO_TCP:
case IPPROTO_UDP:
break;
default:
NAT64STAT_INC(&cfg->base.stats, noproto);
return (cfg->nomatch_verdict);
}
addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
char a[INET_ADDRSTRLEN];
NAT64STAT_INC(&cfg->base.stats, dropped);
DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
inet_ntop(AF_INET, &addr, a, sizeof(a)));
return (IP_FW_DENY);
}
hval = HOST_HVAL(cfg, &f_id->src_ip6);
CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
break;
}
addr = ntohl(addr);
if (host == NULL)
return (nat64lsn_request_host(cfg, f_id, mp,
hval, addr, port, proto));
flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
data[0] = addr;
data[1] = (f_id->dst_port << 16) | port;
hval = STATE_HVAL(cfg, data);
state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
port, proto);
if (state == NULL)
return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
port, proto));
return (nat64lsn_translate6_internal(cfg, mp, state, flags));
}
int
ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
ipfw_insn *cmd, int *done)
{
struct nat64lsn_instance *i;
ipfw_insn *icmd;
int ret;
IPFW_RLOCK_ASSERT(ch);
*done = 0;
icmd = cmd + F_LEN(cmd);
if (cmd->opcode != O_EXTERNAL_ACTION ||
insntod(cmd, kidx)->kidx != V_nat64lsn_eid ||
icmd->opcode != O_EXTERNAL_INSTANCE ||
(i = NAT64_LOOKUP(ch, icmd)) == NULL)
return (IP_FW_DENY);
*done = 1;
switch (args->f_id.addr_type) {
case 4:
ret = nat64lsn_translate4(i->cfg, &args->f_id, &args->m);
break;
case 6:
if ((i->cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
memcmp(&args->f_id.dst_ip6, &i->cfg->base.plat_prefix,
i->cfg->base.plat_plen / 8) != 0) {
ret = i->cfg->nomatch_verdict;
break;
}
ret = nat64lsn_translate6(i->cfg, &args->f_id, &args->m);
break;
default:
ret = i->cfg->nomatch_verdict;
}
if (ret != IP_FW_PASS && args->m != NULL) {
m_freem(args->m);
args->m = NULL;
}
return (ret);
}
static int
nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
{
struct nat64lsn_states_chunk *chunk;
int i;
chunk = (struct nat64lsn_states_chunk *)mem;
for (i = 0; i < 64; i++)
chunk->state[i].flags = 0;
return (0);
}
void
nat64lsn_init_internal(void)
{
nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
JQUEUE_LOCK_INIT();
}
void
nat64lsn_uninit_internal(void)
{
JQUEUE_LOCK_DESTROY();
uma_zdestroy(nat64lsn_host_zone);
uma_zdestroy(nat64lsn_pgchunk_zone);
uma_zdestroy(nat64lsn_pg_zone);
uma_zdestroy(nat64lsn_aliaslink_zone);
uma_zdestroy(nat64lsn_state_zone);
uma_zdestroy(nat64lsn_job_zone);
}
void
nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
{
CALLOUT_LOCK(cfg);
callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
nat64lsn_periodic, cfg);
CALLOUT_UNLOCK(cfg);
}
struct nat64lsn_cfg *
nat64lsn_init_config(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
{
struct nat64lsn_cfg *cfg;
struct nat64lsn_alias *alias;
int i, naddr;
cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
M_WAITOK | M_ZERO);
CFG_LOCK_INIT(cfg);
CALLOUT_LOCK_INIT(cfg);
STAILQ_INIT(&cfg->jhead);
cfg->vp = curvnet;
COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
cfg->hash_seed = arc4random();
cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
for (i = 0; i < cfg->hosts_hashsize; i++)
CK_SLIST_INIT(&cfg->hosts_hash[i]);
naddr = 1 << (32 - plen);
cfg->prefix4 = prefix;
cfg->pmask4 = prefix | (naddr - 1);
cfg->plen4 = plen;
cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
M_NAT64LSN, M_WAITOK | M_ZERO);
for (i = 0; i < naddr; i++) {
alias = &cfg->aliases[i];
alias->addr = prefix + i;
CK_SLIST_INIT(&alias->hosts);
ALIAS_LOCK_INIT(alias);
}
callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
return (cfg);
}
static void
nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
{
int i;
if (pg->chunks_count == 1) {
uma_zfree(nat64lsn_state_zone, pg->states);
} else {
for (i = 0; i < pg->chunks_count; i++)
uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
free(pg->states_chunk, M_NAT64LSN);
free(pg->freemask_chunk, M_NAT64LSN);
}
uma_zfree(nat64lsn_pg_zone, pg);
}
static void
nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
struct nat64lsn_alias *alias)
{
struct nat64lsn_pg *pg;
int i;
while (!CK_SLIST_EMPTY(&alias->portgroups)) {
pg = CK_SLIST_FIRST(&alias->portgroups);
CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
nat64lsn_destroy_pg(pg);
}
for (i = 0; i < 32; i++) {
if (ISSET32(alias->tcp_chunkmask, i))
uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
if (ISSET32(alias->udp_chunkmask, i))
uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
if (ISSET32(alias->icmp_chunkmask, i))
uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
}
ALIAS_LOCK_DESTROY(alias);
}
static void
nat64lsn_destroy_host(struct nat64lsn_host *host)
{
struct nat64lsn_aliaslink *link;
while (!CK_SLIST_EMPTY(&host->aliases)) {
link = CK_SLIST_FIRST(&host->aliases);
CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
ALIAS_LOCK(link->alias);
CK_SLIST_REMOVE(&link->alias->hosts, link,
nat64lsn_aliaslink, alias_entries);
link->alias->hosts_count--;
ALIAS_UNLOCK(link->alias);
uma_zfree(nat64lsn_aliaslink_zone, link);
}
HOST_LOCK_DESTROY(host);
free(host->states_hash, M_NAT64LSN);
uma_zfree(nat64lsn_host_zone, host);
}
void
nat64lsn_destroy_config(struct nat64lsn_cfg *cfg)
{
struct nat64lsn_host *host;
int i;
CALLOUT_LOCK(cfg);
callout_drain(&cfg->periodic);
CALLOUT_UNLOCK(cfg);
callout_drain(&cfg->jcallout);
for (i = 0; i < cfg->hosts_hashsize; i++) {
while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
nat64lsn_destroy_host(host);
}
}
for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
CALLOUT_LOCK_DESTROY(cfg);
CFG_LOCK_DESTROY(cfg);
COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
free(cfg->hosts_hash, M_NAT64LSN);
free(cfg->aliases, M_NAT64LSN);
free(cfg, M_NAT64LSN);
}