Path: blob/main/sys/netpfil/ipfw/nat64/nat64lsn_control.c
39536 views
/*-1* SPDX-License-Identifier: BSD-2-Clause2*3* Copyright (c) 2015-2019 Yandex LLC4* Copyright (c) 2015 Alexander V. Chernikov <[email protected]>5* Copyright (c) 2015-2019 Andrey V. Elsukov <[email protected]>6*7* Redistribution and use in source and binary forms, with or without8* modification, are permitted provided that the following conditions9* are met:10*11* 1. Redistributions of source code must retain the above copyright12* notice, this list of conditions and the following disclaimer.13* 2. Redistributions in binary form must reproduce the above copyright14* notice, this list of conditions and the following disclaimer in the15* documentation and/or other materials provided with the distribution.16*17* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR18* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES19* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.20* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,21* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT22* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,23* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY24* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT25* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF26* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.27*/2829#include <sys/param.h>30#include <sys/systm.h>31#include <sys/counter.h>32#include <sys/ck.h>33#include <sys/epoch.h>34#include <sys/errno.h>35#include <sys/kernel.h>36#include <sys/lock.h>37#include <sys/malloc.h>38#include <sys/mbuf.h>39#include <sys/module.h>40#include <sys/rmlock.h>41#include <sys/rwlock.h>42#include <sys/socket.h>43#include <sys/sockopt.h>4445#include <net/if.h>4647#include <netinet/in.h>48#include <netinet/ip.h>49#include <netinet/ip_var.h>50#include <netinet/ip_fw.h>51#include <netinet6/ip_fw_nat64.h>5253#include <netpfil/ipfw/ip_fw_private.h>5455#include "nat64lsn.h"5657VNET_DEFINE(uint32_t, nat64lsn_eid) = 0;5859static struct nat64lsn_instance *60nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)61{62struct named_object *no;6364no = ipfw_objhash_lookup_name_type(ni, set,65IPFW_TLV_NAT64LSN_NAME, name);66if (no == NULL)67return (NULL);68return (__containerof(no, struct nat64lsn_instance, no));69}7071static void72nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)73{7475if (uc->jmaxlen == 0)76uc->jmaxlen = NAT64LSN_JMAXLEN;77if (uc->jmaxlen > 65536)78uc->jmaxlen = 65536;79if (uc->nh_delete_delay == 0)80uc->nh_delete_delay = NAT64LSN_HOST_AGE;81if (uc->pg_delete_delay == 0)82uc->pg_delete_delay = NAT64LSN_PG_AGE;83if (uc->st_syn_ttl == 0)84uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;85if (uc->st_close_ttl == 0)86uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;87if (uc->st_estab_ttl == 0)88uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;89if (uc->st_udp_ttl == 0)90uc->st_udp_ttl = NAT64LSN_UDP_AGE;91if (uc->st_icmp_ttl == 0)92uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;9394if (uc->states_chunks == 0)95uc->states_chunks = 1;96else if (uc->states_chunks >= 128)97uc->states_chunks = 128;98else if (!powerof2(uc->states_chunks))99uc->states_chunks = 1 << fls(uc->states_chunks);100}101102/*103* Creates new nat64lsn instance.104* Data layout (v0)(current):105* Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]106*107* Returns 0 on success108*/109static int110nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,111struct sockopt_data *sd)112{113ipfw_obj_lheader *olh;114ipfw_nat64lsn_cfg *uc;115struct nat64lsn_instance *i;116struct nat64lsn_cfg *cfg;117struct namedobj_instance *ni;118uint32_t addr4, mask4;119120if (sd->valsize != sizeof(*olh) + sizeof(*uc))121return (EINVAL);122123olh = (ipfw_obj_lheader *)sd->kbuf;124uc = (ipfw_nat64lsn_cfg *)(olh + 1);125126if (ipfw_check_object_name_generic(uc->name) != 0)127return (EINVAL);128129if (uc->set >= IPFW_MAX_SETS)130return (EINVAL);131132if (uc->plen4 > 32)133return (EINVAL);134135/*136* Unspecified address has special meaning. But it must137* have valid prefix length. This length will be used to138* correctly extract and embedd IPv4 address into IPv6.139*/140if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&141IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&142nat64_check_prefixlen(uc->plen6) != 0)143return (EINVAL);144145/* XXX: Check prefix4 to be global */146addr4 = ntohl(uc->prefix4.s_addr);147mask4 = ~((1 << (32 - uc->plen4)) - 1);148if ((addr4 & mask4) != addr4)149return (EINVAL);150151nat64lsn_default_config(uc);152153ni = CHAIN_TO_SRV(ch);154IPFW_UH_RLOCK(ch);155if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {156IPFW_UH_RUNLOCK(ch);157return (EEXIST);158}159IPFW_UH_RUNLOCK(ch);160161i = malloc(sizeof(struct nat64lsn_instance), M_NAT64LSN,162M_WAITOK | M_ZERO);163strlcpy(i->name, uc->name, sizeof(i->name));164i->no.name = i->name;165i->no.etlv = IPFW_TLV_NAT64LSN_NAME;166i->no.set = uc->set;167168cfg = nat64lsn_init_config(ch, addr4, uc->plen4);169cfg->base.plat_prefix = uc->prefix6;170cfg->base.plat_plen = uc->plen6;171cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;172if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))173cfg->base.flags |= NAT64_WKPFX;174else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))175cfg->base.flags |= NAT64LSN_ANYPREFIX;176177cfg->states_chunks = uc->states_chunks;178cfg->jmaxlen = uc->jmaxlen;179cfg->host_delete_delay = uc->nh_delete_delay;180cfg->pg_delete_delay = uc->pg_delete_delay;181cfg->st_syn_ttl = uc->st_syn_ttl;182cfg->st_close_ttl = uc->st_close_ttl;183cfg->st_estab_ttl = uc->st_estab_ttl;184cfg->st_udp_ttl = uc->st_udp_ttl;185cfg->st_icmp_ttl = uc->st_icmp_ttl;186cfg->nomatch_verdict = IP_FW_DENY;187188IPFW_UH_WLOCK(ch);189190if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {191IPFW_UH_WUNLOCK(ch);192nat64lsn_destroy_config(cfg);193free(i, M_NAT64LSN);194return (EEXIST);195}196197if (ipfw_objhash_alloc_idx(ni, &i->no.kidx) != 0) {198IPFW_UH_WUNLOCK(ch);199nat64lsn_destroy_config(cfg);200free(i, M_NAT64LSN);201return (ENOSPC);202}203ipfw_objhash_add(ni, &i->no);204205/* Okay, let's link data */206i->cfg = cfg;207SRV_OBJECT(ch, i->no.kidx) = i;208nat64lsn_start_instance(cfg);209210IPFW_UH_WUNLOCK(ch);211return (0);212}213214static void215nat64lsn_detach_instance(struct ip_fw_chain *ch,216struct nat64lsn_instance *i)217{218219IPFW_UH_WLOCK_ASSERT(ch);220SRV_OBJECT(ch, i->no.kidx) = NULL;221ipfw_objhash_del(CHAIN_TO_SRV(ch), &i->no);222ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), i->no.kidx);223}224225/*226* Destroys nat64 instance.227* Data layout (v0)(current):228* Request: [ ipfw_obj_header ]229*230* Returns 0 on success231*/232static int233nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,234struct sockopt_data *sd)235{236struct nat64lsn_instance *i;237ipfw_obj_header *oh;238239if (sd->valsize != sizeof(*oh))240return (EINVAL);241242oh = (ipfw_obj_header *)op3;243244IPFW_UH_WLOCK(ch);245i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);246if (i == NULL) {247IPFW_UH_WUNLOCK(ch);248return (ENOENT);249}250251if (i->no.refcnt > 0) {252IPFW_UH_WUNLOCK(ch);253return (EBUSY);254}255256ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, i->no.kidx);257nat64lsn_detach_instance(ch, i);258IPFW_UH_WUNLOCK(ch);259260nat64lsn_destroy_config(i->cfg);261free(i, M_NAT64LSN);262return (0);263}264265#define __COPY_STAT_FIELD(_cfg, _stats, _field) \266(_stats)->_field = NAT64STAT_FETCH(&(_cfg)->base.stats, _field)267static void268export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,269struct ipfw_nat64lsn_stats *stats)270{271struct nat64lsn_alias *alias;272int i;273274__COPY_STAT_FIELD(cfg, stats, opcnt64);275__COPY_STAT_FIELD(cfg, stats, opcnt46);276__COPY_STAT_FIELD(cfg, stats, ofrags);277__COPY_STAT_FIELD(cfg, stats, ifrags);278__COPY_STAT_FIELD(cfg, stats, oerrors);279__COPY_STAT_FIELD(cfg, stats, noroute4);280__COPY_STAT_FIELD(cfg, stats, noroute6);281__COPY_STAT_FIELD(cfg, stats, nomatch4);282__COPY_STAT_FIELD(cfg, stats, noproto);283__COPY_STAT_FIELD(cfg, stats, nomem);284__COPY_STAT_FIELD(cfg, stats, dropped);285286__COPY_STAT_FIELD(cfg, stats, jcalls);287__COPY_STAT_FIELD(cfg, stats, jrequests);288__COPY_STAT_FIELD(cfg, stats, jhostsreq);289__COPY_STAT_FIELD(cfg, stats, jportreq);290__COPY_STAT_FIELD(cfg, stats, jhostfails);291__COPY_STAT_FIELD(cfg, stats, jportfails);292__COPY_STAT_FIELD(cfg, stats, jmaxlen);293__COPY_STAT_FIELD(cfg, stats, jnomem);294__COPY_STAT_FIELD(cfg, stats, jreinjected);295__COPY_STAT_FIELD(cfg, stats, screated);296__COPY_STAT_FIELD(cfg, stats, sdeleted);297__COPY_STAT_FIELD(cfg, stats, spgcreated);298__COPY_STAT_FIELD(cfg, stats, spgdeleted);299300stats->hostcount = cfg->hosts_count;301for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {302alias = &cfg->aliases[i];303stats->tcpchunks += alias->tcp_pgcount;304stats->udpchunks += alias->udp_pgcount;305stats->icmpchunks += alias->icmp_pgcount;306}307}308#undef __COPY_STAT_FIELD309310static void311nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_instance *i,312ipfw_nat64lsn_cfg *uc)313{314struct nat64lsn_cfg *cfg;315316strlcpy(uc->name, i->no.name, sizeof(uc->name));317uc->set = i->no.set;318cfg = i->cfg;319320uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;321uc->states_chunks = cfg->states_chunks;322uc->jmaxlen = cfg->jmaxlen;323uc->nh_delete_delay = cfg->host_delete_delay;324uc->pg_delete_delay = cfg->pg_delete_delay;325uc->st_syn_ttl = cfg->st_syn_ttl;326uc->st_close_ttl = cfg->st_close_ttl;327uc->st_estab_ttl = cfg->st_estab_ttl;328uc->st_udp_ttl = cfg->st_udp_ttl;329uc->st_icmp_ttl = cfg->st_icmp_ttl;330uc->prefix4.s_addr = htonl(cfg->prefix4);331uc->prefix6 = cfg->base.plat_prefix;332uc->plen4 = cfg->plen4;333uc->plen6 = cfg->base.plat_plen;334}335336struct nat64_dump_arg {337struct ip_fw_chain *ch;338struct sockopt_data *sd;339};340341static int342export_config_cb(struct namedobj_instance *ni, struct named_object *no,343void *arg)344{345struct nat64_dump_arg *da;346ipfw_nat64lsn_cfg *uc;347348da = (struct nat64_dump_arg *)arg;349uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,350sizeof(*uc));351nat64lsn_export_config(da->ch,352__containerof(no, struct nat64lsn_instance, no), uc);353return (0);354}355356/*357* Lists all nat64 lsn instances currently available in kernel.358* Data layout (v0)(current):359* Request: [ ipfw_obj_lheader ]360* Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]361*362* Returns 0 on success363*/364static int365nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,366struct sockopt_data *sd)367{368ipfw_obj_lheader *olh;369struct nat64_dump_arg da;370371/* Check minimum header size */372if (sd->valsize < sizeof(ipfw_obj_lheader))373return (EINVAL);374375olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));376377IPFW_UH_RLOCK(ch);378olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),379IPFW_TLV_NAT64LSN_NAME);380olh->objsize = sizeof(ipfw_nat64lsn_cfg);381olh->size = sizeof(*olh) + olh->count * olh->objsize;382383if (sd->valsize < olh->size) {384IPFW_UH_RUNLOCK(ch);385return (ENOMEM);386}387memset(&da, 0, sizeof(da));388da.ch = ch;389da.sd = sd;390ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,391IPFW_TLV_NAT64LSN_NAME);392IPFW_UH_RUNLOCK(ch);393394return (0);395}396397/*398* Change existing nat64lsn instance configuration.399* Data layout (v0)(current):400* Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]401* Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]402*403* Returns 0 on success404*/405static int406nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,407struct sockopt_data *sd)408{409ipfw_obj_header *oh;410ipfw_nat64lsn_cfg *uc;411struct nat64lsn_instance *i;412struct nat64lsn_cfg *cfg;413struct namedobj_instance *ni;414415if (sd->valsize != sizeof(*oh) + sizeof(*uc))416return (EINVAL);417418oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,419sizeof(*oh) + sizeof(*uc));420uc = (ipfw_nat64lsn_cfg *)(oh + 1);421422if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||423oh->ntlv.set >= IPFW_MAX_SETS)424return (EINVAL);425426ni = CHAIN_TO_SRV(ch);427if (sd->sopt->sopt_dir == SOPT_GET) {428IPFW_UH_RLOCK(ch);429i = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);430if (i == NULL) {431IPFW_UH_RUNLOCK(ch);432return (ENOENT);433}434nat64lsn_export_config(ch, i, uc);435IPFW_UH_RUNLOCK(ch);436return (0);437}438439nat64lsn_default_config(uc);440441IPFW_UH_WLOCK(ch);442i = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);443if (i == NULL) {444IPFW_UH_WUNLOCK(ch);445return (ENOENT);446}447448/*449* For now allow to change only following values:450* jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,451* tcp_est_age, udp_age, icmp_age, flags, states_chunks.452*/453cfg = i->cfg;454cfg->states_chunks = uc->states_chunks;455cfg->jmaxlen = uc->jmaxlen;456cfg->host_delete_delay = uc->nh_delete_delay;457cfg->pg_delete_delay = uc->pg_delete_delay;458cfg->st_syn_ttl = uc->st_syn_ttl;459cfg->st_close_ttl = uc->st_close_ttl;460cfg->st_estab_ttl = uc->st_estab_ttl;461cfg->st_udp_ttl = uc->st_udp_ttl;462cfg->st_icmp_ttl = uc->st_icmp_ttl;463cfg->base.flags &= ~NAT64LSN_FLAGSMASK;464cfg->base.flags |= uc->flags & NAT64LSN_FLAGSMASK;465466IPFW_UH_WUNLOCK(ch);467468return (0);469}470471/*472* Get nat64lsn statistics.473* Data layout (v0)(current):474* Request: [ ipfw_obj_header ]475* Reply: [ ipfw_obj_header ipfw_counter_tlv ]476*477* Returns 0 on success478*/479static int480nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,481struct sockopt_data *sd)482{483struct ipfw_nat64lsn_stats stats;484struct nat64lsn_instance *i;485ipfw_obj_header *oh;486ipfw_obj_ctlv *ctlv;487size_t sz;488489sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);490if (sd->valsize % sizeof(uint64_t))491return (EINVAL);492if (sd->valsize < sz)493return (ENOMEM);494oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);495if (oh == NULL)496return (EINVAL);497memset(&stats, 0, sizeof(stats));498499IPFW_UH_RLOCK(ch);500i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);501if (i == NULL) {502IPFW_UH_RUNLOCK(ch);503return (ENOENT);504}505506export_stats(ch, i->cfg, &stats);507IPFW_UH_RUNLOCK(ch);508509ctlv = (ipfw_obj_ctlv *)(oh + 1);510memset(ctlv, 0, sizeof(*ctlv));511ctlv->head.type = IPFW_TLV_COUNTERS;512ctlv->head.length = sz - sizeof(ipfw_obj_header);513ctlv->count = sizeof(stats) / sizeof(uint64_t);514ctlv->objsize = sizeof(uint64_t);515ctlv->version = IPFW_NAT64_VERSION;516memcpy(ctlv + 1, &stats, sizeof(stats));517return (0);518}519520/*521* Reset nat64lsn statistics.522* Data layout (v0)(current):523* Request: [ ipfw_obj_header ]524*525* Returns 0 on success526*/527static int528nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,529struct sockopt_data *sd)530{531struct nat64lsn_instance *i;532ipfw_obj_header *oh;533534if (sd->valsize != sizeof(*oh))535return (EINVAL);536oh = (ipfw_obj_header *)sd->kbuf;537if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||538oh->ntlv.set >= IPFW_MAX_SETS)539return (EINVAL);540541IPFW_UH_WLOCK(ch);542i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);543if (i == NULL) {544IPFW_UH_WUNLOCK(ch);545return (ENOENT);546}547COUNTER_ARRAY_ZERO(i->cfg->base.stats.cnt, NAT64STATS);548IPFW_UH_WUNLOCK(ch);549return (0);550}551552#ifdef __LP64__553#define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n))554#else555#define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n)) | \556((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)557#endif558/*559* Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg560* ipfw_nat64lsn_state x count, ... ] ]561*/562static int563nat64lsn_export_states(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,564struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)565{566ipfw_nat64lsn_state_v1 *s;567struct nat64lsn_state *state;568uint64_t freemask;569uint32_t i, count;570571/* validate user input */572if (idx->chunk > pg->chunks_count - 1)573return (EINVAL);574575FREEMASK_COPY(pg, idx->chunk, freemask);576count = 64 - bitcount64(freemask);577if (count == 0)578return (0); /* Try next PG/chunk */579580DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",581(uintmax_t)idx->index, count);582583s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,584count * sizeof(ipfw_nat64lsn_state_v1));585if (s == NULL)586return (ENOMEM);587588for (i = 0; i < 64; i++) {589if (ISSET64(freemask, i))590continue;591state = pg->chunks_count == 1 ? &pg->states->state[i] :592&pg->states_chunk[idx->chunk]->state[i];593594s->host6 = state->host->addr;595s->daddr.s_addr = htonl(state->ip_dst);596s->dport = state->dport;597s->sport = state->sport;598s->aport = state->aport;599s->flags = (uint8_t)(state->flags & 7);600s->proto = state->proto;601s->idle = GET_AGE(state->timestamp);602s++;603}604*ret_count = count;605return (0);606}607608#define LAST_IDX 0xFF609static int610nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,611union nat64lsn_pgidx *idx)612{613614/* First iterate over chunks */615if (pg != NULL) {616if (idx->chunk < pg->chunks_count - 1) {617idx->chunk++;618return (0);619}620}621idx->chunk = 0;622/* Then over PGs */623if (idx->port < UINT16_MAX - 64) {624idx->port += 64;625return (0);626}627idx->port = NAT64_MIN_PORT;628/* Then over supported protocols */629switch (idx->proto) {630case IPPROTO_ICMP:631idx->proto = IPPROTO_TCP;632return (0);633case IPPROTO_TCP:634idx->proto = IPPROTO_UDP;635return (0);636default:637idx->proto = IPPROTO_ICMP;638}639/* And then over IPv4 alias addresses */640if (idx->addr < cfg->pmask4) {641idx->addr++;642return (1); /* New states group is needed */643}644idx->index = LAST_IDX;645return (-1); /* No more states */646}647648static struct nat64lsn_pg*649nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)650{651struct nat64lsn_alias *alias;652int pg_idx;653654alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];655MPASS(alias->addr == idx->addr);656657pg_idx = (idx->port - NAT64_MIN_PORT) / 64;658switch (idx->proto) {659case IPPROTO_ICMP:660if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))661return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);662break;663case IPPROTO_TCP:664if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))665return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);666break;667case IPPROTO_UDP:668if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))669return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);670break;671}672return (NULL);673}674675/*676* Lists nat64lsn states.677* Data layout (v1)(current):678* Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]679* Reply: [ ipfw_obj_header ipfw_obj_data [680* ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]681*682* Returns 0 on success683*/684static int685nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,686struct sockopt_data *sd)687{688ipfw_obj_header *oh;689ipfw_obj_data *od;690ipfw_nat64lsn_stg_v1 *stg;691struct nat64lsn_instance *i;692struct nat64lsn_cfg *cfg;693struct nat64lsn_pg *pg;694union nat64lsn_pgidx idx;695size_t sz;696uint32_t count, total;697int ret;698699sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +700sizeof(uint64_t);701/* Check minimum header size */702if (sd->valsize < sz)703return (EINVAL);704705oh = (ipfw_obj_header *)sd->kbuf;706od = (ipfw_obj_data *)(oh + 1);707if (od->head.type != IPFW_TLV_OBJDATA ||708od->head.length != sz - sizeof(ipfw_obj_header))709return (EINVAL);710711idx.index = *(uint64_t *)(od + 1);712if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&713idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)714return (EINVAL);715if (idx.index == LAST_IDX)716return (EINVAL);717718IPFW_UH_RLOCK(ch);719i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);720if (i == NULL) {721IPFW_UH_RUNLOCK(ch);722return (ENOENT);723}724cfg = i->cfg;725if (idx.index == 0) { /* Fill in starting point */726idx.addr = cfg->prefix4;727idx.proto = IPPROTO_ICMP;728idx.port = NAT64_MIN_PORT;729}730if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||731idx.port < NAT64_MIN_PORT) {732IPFW_UH_RUNLOCK(ch);733return (EINVAL);734}735sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +736sizeof(ipfw_nat64lsn_stg_v1);737if (sd->valsize < sz) {738IPFW_UH_RUNLOCK(ch);739return (ENOMEM);740}741oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);742od = (ipfw_obj_data *)(oh + 1);743od->head.type = IPFW_TLV_OBJDATA;744od->head.length = sz - sizeof(ipfw_obj_header);745stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);746stg->count = total = 0;747stg->next.index = idx.index;748/*749* Acquire CALLOUT_LOCK to avoid races with expiration code.750* Thus states, hosts and PGs will not expire while we hold it.751*/752CALLOUT_LOCK(cfg);753ret = 0;754do {755pg = nat64lsn_get_pg_byidx(cfg, &idx);756if (pg != NULL) {757count = 0;758ret = nat64lsn_export_states(cfg, &idx, pg,759sd, &count);760if (ret != 0)761break;762if (count > 0) {763stg->count += count;764total += count;765/* Update total size of reply */766od->head.length +=767count * sizeof(ipfw_nat64lsn_state_v1);768sz += count * sizeof(ipfw_nat64lsn_state_v1);769}770stg->alias4.s_addr = htonl(idx.addr);771}772/* Determine new index */773switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {774case -1:775ret = ENOENT; /* End of search */776break;777case 1: /*778* Next alias address, new group may be needed.779* If states count is zero, use this group.780*/781if (stg->count == 0)782continue;783/* Otherwise try to create new group */784sz += sizeof(ipfw_nat64lsn_stg_v1);785if (sd->valsize < sz) {786ret = ENOMEM;787break;788}789/* Save next index in current group */790stg->next.index = idx.index;791stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,792sizeof(ipfw_nat64lsn_stg_v1));793od->head.length += sizeof(ipfw_nat64lsn_stg_v1);794stg->count = 0;795break;796}797stg->next.index = idx.index;798} while (ret == 0);799CALLOUT_UNLOCK(cfg);800IPFW_UH_RUNLOCK(ch);801return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);802}803804static struct ipfw_sopt_handler scodes[] = {805{ IP_FW_NAT64LSN_CREATE, IP_FW3_OPVER, HDIR_BOTH, nat64lsn_create },806{ IP_FW_NAT64LSN_DESTROY, IP_FW3_OPVER, HDIR_SET, nat64lsn_destroy },807{ IP_FW_NAT64LSN_CONFIG, IP_FW3_OPVER, HDIR_BOTH, nat64lsn_config },808{ IP_FW_NAT64LSN_LIST, IP_FW3_OPVER, HDIR_GET, nat64lsn_list },809{ IP_FW_NAT64LSN_STATS, IP_FW3_OPVER, HDIR_GET, nat64lsn_stats },810{ IP_FW_NAT64LSN_RESET_STATS, IP_FW3_OPVER, HDIR_SET, nat64lsn_reset_stats },811{ IP_FW_NAT64LSN_LIST_STATES, IP_FW3_OPVER, HDIR_GET, nat64lsn_states },812};813814#define NAT64LSN_ARE_EQUAL(v) (cfg0->v == cfg1->v)815static int816nat64lsn_cmp_configs(struct nat64lsn_cfg *cfg0, struct nat64lsn_cfg *cfg1)817{818819if ((cfg0->base.flags & cfg1->base.flags & NAT64LSN_ALLOW_SWAPCONF) &&820NAT64LSN_ARE_EQUAL(prefix4) &&821NAT64LSN_ARE_EQUAL(pmask4) &&822NAT64LSN_ARE_EQUAL(plen4) &&823NAT64LSN_ARE_EQUAL(base.plat_plen) &&824IN6_ARE_ADDR_EQUAL(&cfg0->base.plat_prefix,825&cfg1->base.plat_prefix))826return (0);827return (1);828}829#undef NAT64LSN_ARE_EQUAL830831static void832nat64lsn_swap_configs(struct nat64lsn_instance *i0,833struct nat64lsn_instance *i1)834{835struct nat64lsn_cfg *cfg;836837cfg = i0->cfg;838i0->cfg = i1->cfg;839i1->cfg = cfg;840}841842/*843* NAT64LSN sets swap handler.844*845* When two sets have NAT64LSN instance with the same name, we check846* most important configuration parameters, and if there are no difference,847* and both instances have NAT64LSN_ALLOW_SWAPCONF flag, we will exchange848* configs between instances. This allows to keep NAT64 states when ipfw's849* rules are reloaded using new set.850*851* XXX: since manage_sets caller doesn't hold IPFW_WLOCK(), it is possible852* that some states will be created during switching, because set of rules853* is changed a bit earley than named objects.854*/855static int856nat64lsn_swap_sets_cb(struct namedobj_instance *ni, struct named_object *no,857void *arg)858{859struct nat64lsn_instance *i0, *i1;860uint8_t *sets;861862sets = arg;863if (no->set == sets[0]) {864/*865* Check if we have instance in new set with the same866* config that is sets aware and ready to swap configs.867*/868i0 = __containerof(no, struct nat64lsn_instance, no);869if ((i0->cfg->base.flags & NAT64LSN_ALLOW_SWAPCONF) &&870(i1 = nat64lsn_find(ni, no->name, sets[1])) != NULL) {871/* Compare configs */872if (nat64lsn_cmp_configs(i0->cfg, i1->cfg) == 0) {873IPFW_UH_WLOCK_ASSERT(&V_layer3_chain);874IPFW_WLOCK(&V_layer3_chain);875nat64lsn_swap_configs(i0, i1);876IPFW_WUNLOCK(&V_layer3_chain);877}878}879}880return (0);881}882883static int884nat64lsn_manage_sets(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,885enum ipfw_sets_cmd cmd)886{887uint8_t sets[2];888889if (cmd == SWAP_ALL) {890sets[0] = (uint8_t)set;891sets[1] = new_set;892ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch),893nat64lsn_swap_sets_cb, &sets, IPFW_TLV_NAT64LSN_NAME);894}895return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,896set, new_set, cmd));897}898NAT64_DEFINE_OPCODE_REWRITER(nat64lsn, NAT64LSN, opcodes);899900static int901destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,902void *arg)903{904struct nat64lsn_instance *i;905struct ip_fw_chain *ch;906907ch = (struct ip_fw_chain *)arg;908i = (struct nat64lsn_instance *)SRV_OBJECT(ch, no->kidx);909nat64lsn_detach_instance(ch, i);910nat64lsn_destroy_config(i->cfg);911free(i, M_NAT64LSN);912return (0);913}914915int916nat64lsn_init(struct ip_fw_chain *ch, int first)917{918919if (first != 0)920nat64lsn_init_internal();921V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");922if (V_nat64lsn_eid == 0)923return (ENXIO);924IPFW_ADD_SOPT_HANDLER(first, scodes);925IPFW_ADD_OBJ_REWRITER(first, opcodes);926return (0);927}928929void930nat64lsn_uninit(struct ip_fw_chain *ch, int last)931{932933IPFW_DEL_OBJ_REWRITER(last, opcodes);934IPFW_DEL_SOPT_HANDLER(last, scodes);935ipfw_del_eaction(ch, V_nat64lsn_eid);936/*937* Since we already have deregistered external action,938* our named objects become unaccessible via rules, because939* all rules were truncated by ipfw_del_eaction().940* So, we can unlink and destroy our named objects without holding941* IPFW_WLOCK().942*/943IPFW_UH_WLOCK(ch);944ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,945IPFW_TLV_NAT64LSN_NAME);946V_nat64lsn_eid = 0;947IPFW_UH_WUNLOCK(ch);948if (last != 0)949nat64lsn_uninit_internal();950}951952953