Path: blob/main/sys/netpfil/ipfw/ip_fw_table_value.c
105688 views
/*-1* SPDX-License-Identifier: BSD-2-Clause2*3* Copyright (c) 2014-2025 Yandex LLC4* Copyright (c) 2014 Alexander V. Chernikov5*6* Redistribution and use in source and binary forms, with or without7* modification, are permitted provided that the following conditions8* are met:9* 1. Redistributions of source code must retain the above copyright10* notice, this list of conditions and the following disclaimer.11* 2. Redistributions in binary form must reproduce the above copyright12* notice, this list of conditions and the following disclaimer in the13* documentation and/or other materials provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND16* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE17* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE18* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE19* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL20* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS21* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)22* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT23* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY24* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF25* SUCH DAMAGE.26*/2728#include <sys/cdefs.h>29/*30* Multi-field value support for ipfw tables.31*32* This file contains necessary functions to convert33* large multi-field values into u32 indices suitable to be fed34* to various table algorithms. Other machinery like proper refcounting,35* internal structures resizing are also kept here.36*/3738#include "opt_ipfw.h"3940#include <sys/param.h>41#include <sys/systm.h>42#include <sys/malloc.h>43#include <sys/kernel.h>44#include <sys/hash.h>45#include <sys/lock.h>46#include <sys/rwlock.h>47#include <sys/rmlock.h>48#include <sys/socket.h>49#include <sys/socketvar.h>50#include <sys/queue.h>51#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */5253#include <netinet/in.h>54#include <netinet/ip_var.h> /* struct ipfw_rule_ref */55#include <netinet/ip_fw.h>5657#include <netpfil/ipfw/ip_fw_private.h>58#include <netpfil/ipfw/ip_fw_table.h>5960static uint32_t hash_table_value(struct namedobj_instance *ni, const void *key,61uint32_t kopt);62static int cmp_table_value(struct named_object *no, const void *key,63uint32_t kopt);6465static int list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,66struct sockopt_data *sd);6768static struct ipfw_sopt_handler scodes[] = {69{ IP_FW_TABLE_VLIST, IP_FW3_OPVER, HDIR_GET, list_table_values },70};7172#define CHAIN_TO_VI(chain) (CHAIN_TO_TCFG(chain)->valhash)7374struct table_val_link75{76struct named_object no;77struct table_value *pval; /* Pointer to real table value */78};79#define VALDATA_START_SIZE 64 /* Allocate 64-items array by default */80#define VALDATA_HASH_SIZE 655368182struct vdump_args {83struct ip_fw_chain *ch;84struct sockopt_data *sd;85struct table_value *pval;86int error;87};8889static uint32_t90hash_table_value(struct namedobj_instance *ni, const void *key, uint32_t kopt)91{9293return (hash32_buf(key, 56, 0));94}9596static int97cmp_table_value(struct named_object *no, const void *key, uint32_t kopt)98{99100return (memcmp(((struct table_val_link *)no)->pval, key, 56));101}102103static void104mask_table_value(struct table_value *src, struct table_value *dst,105uint32_t mask)106{107#define _MCPY(f, b) if ((mask & (b)) != 0) { dst->f = src->f; }108109memset(dst, 0, sizeof(*dst));110_MCPY(tag, IPFW_VTYPE_TAG);111_MCPY(pipe, IPFW_VTYPE_PIPE);112_MCPY(divert, IPFW_VTYPE_DIVERT);113_MCPY(skipto, IPFW_VTYPE_SKIPTO);114_MCPY(netgraph, IPFW_VTYPE_NETGRAPH);115_MCPY(fib, IPFW_VTYPE_FIB);116_MCPY(nat, IPFW_VTYPE_NAT);117_MCPY(limit, IPFW_VTYPE_LIMIT);118_MCPY(mark, IPFW_VTYPE_MARK);119_MCPY(dscp, IPFW_VTYPE_DSCP);120_MCPY(nh4, IPFW_VTYPE_NH4);121_MCPY(nh6, IPFW_VTYPE_NH6);122_MCPY(zoneid, IPFW_VTYPE_NH6);123#undef _MCPY124}125126static void127get_value_ptrs(struct ip_fw_chain *ch, struct table_config *tc,128struct table_value **ptv, struct namedobj_instance **pvi)129{130struct table_value *pval;131struct namedobj_instance *vi;132133if (tc->vshared != 0) {134pval = (struct table_value *)ch->valuestate;135vi = CHAIN_TO_VI(ch);136} else {137pval = NULL;138vi = NULL;139//pval = (struct table_value *)&tc->ti.data;140}141142if (ptv != NULL)143*ptv = pval;144if (pvi != NULL)145*pvi = vi;146}147148/*149* Update pointers to real values after @pval change.150*/151static int152update_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)153{154struct vdump_args *da;155struct table_val_link *ptv;156struct table_value *pval;157158da = (struct vdump_args *)arg;159ptv = (struct table_val_link *)no;160161pval = da->pval;162ptv->pval = &pval[ptv->no.kidx];163ptv->no.name = (char *)&pval[ptv->no.kidx];164return (0);165}166167/*168* Grows value storage shared among all tables.169* Notifies other running adds on @ch shared storage resize.170* Note function does not guarantee that free space171* will be available after invocation, so one caller needs172* to roll cycle himself.173*174* Returns 0 if case of no errors.175*/176static int177resize_shared_value_storage(struct ip_fw_chain *ch)178{179struct tables_config *tcfg;180struct namedobj_instance *vi;181struct table_value *pval, *valuestate, *old_valuestate;182void *new_idx;183struct vdump_args da;184int new_blocks;185int val_size, val_size_old;186187IPFW_UH_WLOCK_ASSERT(ch);188189valuestate = NULL;190new_idx = NULL;191192pval = (struct table_value *)ch->valuestate;193vi = CHAIN_TO_VI(ch);194tcfg = CHAIN_TO_TCFG(ch);195196val_size = tcfg->val_size * 2;197198if (val_size == (1 << 30))199return (ENOSPC);200201valuestate = malloc(sizeof(struct table_value) * val_size, M_IPFW,202M_WAITOK | M_ZERO);203ipfw_objhash_bitmap_alloc(val_size, (void *)&new_idx,204&new_blocks);205206/*207* Check if we still need to resize208*/209if (tcfg->val_size >= val_size)210goto done;211212/* Update pointers and notify everyone we're changing @ch */213pval = (struct table_value *)ch->valuestate;214215/* Good. Let's merge */216memcpy(valuestate, pval, sizeof(struct table_value) * tcfg->val_size);217ipfw_objhash_bitmap_merge(CHAIN_TO_VI(ch), &new_idx, &new_blocks);218219IPFW_WLOCK(ch);220/* Change pointers */221old_valuestate = ch->valuestate;222ch->valuestate = valuestate;223valuestate = old_valuestate;224ipfw_objhash_bitmap_swap(CHAIN_TO_VI(ch), &new_idx, &new_blocks);225226val_size_old = tcfg->val_size;227tcfg->val_size = val_size;228val_size = val_size_old;229IPFW_WUNLOCK(ch);230/* Update pointers to reflect resize */231memset(&da, 0, sizeof(da));232da.pval = (struct table_value *)ch->valuestate;233ipfw_objhash_foreach(vi, update_tvalue, &da);234235done:236free(valuestate, M_IPFW);237ipfw_objhash_bitmap_free(new_idx, new_blocks);238239return (0);240}241242/*243* Drops reference for table value with index @kidx, stored in @pval and244* @vi. Frees value if it has no references.245*/246static void247unref_table_value(struct namedobj_instance *vi, struct table_value *pval,248uint32_t kidx)249{250struct table_val_link *ptvl;251252KASSERT(pval[kidx].refcnt > 0, ("Refcount is 0 on kidx %d", kidx));253if (--pval[kidx].refcnt > 0)254return;255256/* Last reference, delete item */257ptvl = (struct table_val_link *)ipfw_objhash_lookup_kidx(vi, kidx);258KASSERT(ptvl != NULL, ("lookup on value kidx %d failed", kidx));259ipfw_objhash_del(vi, &ptvl->no);260ipfw_objhash_free_idx(vi, kidx);261free(ptvl, M_IPFW);262}263264struct flush_args {265struct ip_fw_chain *ch;266struct table_algo *ta;267struct table_info *ti;268void *astate;269ipfw_obj_tentry tent;270};271272static int273unref_table_value_cb(void *e, void *arg)274{275struct flush_args *fa;276struct ip_fw_chain *ch;277struct table_algo *ta;278ipfw_obj_tentry *tent;279int error;280281fa = (struct flush_args *)arg;282283ta = fa->ta;284memset(&fa->tent, 0, sizeof(fa->tent));285tent = &fa->tent;286error = ta->dump_tentry(fa->astate, fa->ti, e, tent);287if (error != 0)288return (error);289290ch = fa->ch;291292unref_table_value(CHAIN_TO_VI(ch),293(struct table_value *)ch->valuestate, tent->v.kidx);294295return (0);296}297298/*299* Drop references for each value used in @tc.300*/301void302ipfw_unref_table_values(struct ip_fw_chain *ch, struct table_config *tc,303struct table_algo *ta, void *astate, struct table_info *ti)304{305struct flush_args fa;306307IPFW_UH_WLOCK_ASSERT(ch);308309memset(&fa, 0, sizeof(fa));310fa.ch = ch;311fa.ta = ta;312fa.astate = astate;313fa.ti = ti;314315ta->foreach(astate, ti, unref_table_value_cb, &fa);316}317318/*319* Allocate new value index in either shared or per-table array.320*321* Returns 0 on success.322*/323static int324alloc_table_vidx(struct ip_fw_chain *ch, struct table_config *tc,325struct namedobj_instance *vi, uint32_t *pvidx, uint8_t flags)326{327int error, vlimit;328uint32_t vidx;329330IPFW_UH_WLOCK_ASSERT(ch);331332if ((error = ipfw_objhash_alloc_idx(vi, &vidx)) != 0 &&333(error = resize_shared_value_storage(ch)) != 0)334return (error);335336vlimit = tc->ta->vlimit;337if (vlimit != 0 && vidx >= vlimit && !(flags & IPFW_CTF_ATOMIC)) {338/*339* Algorithm is not able to store given index.340* We have to rollback state, start using341* per-table value array or return error342* if we're already using it.343*/344if (tc->vshared != 0) {345/* shared -> per-table */346return (ENOSPC); /* TODO: proper error */347}348349/* per-table. Fail for now. */350return (ENOSPC); /* TODO: proper error */351}352353*pvidx = vidx;354return (0);355}356357/*358* Drops value reference for unused values (updates, deletes, partially359* successful adds or rollbacks).360*/361void362ipfw_garbage_table_values(struct ip_fw_chain *ch, struct table_config *tc,363struct tentry_info *tei, uint32_t count, int rollback)364{365int i;366struct tentry_info *ptei;367struct table_value *pval;368struct namedobj_instance *vi;369370/*371* We have two slightly different ADD cases here:372* either (1) we are successful / partially successful,373* in that case we need374* * to ignore ADDED entries values375* * rollback every other values if atomicity is not376* * required (either UPDATED since old value has been377* stored there, or some failure like EXISTS or LIMIT378* or simply "ignored" case.379*380* (2): atomic rollback of partially successful operation381* in that case we simply need to unref all entries.382*383* DELETE case is simpler: no atomic support there, so384* we simply unref all non-zero values.385*/386387/*388* Get current table value pointers.389*/390get_value_ptrs(ch, tc, &pval, &vi);391392for (i = 0; i < count; i++) {393ptei = &tei[i];394395if (ptei->value == 0) {396/*397* We may be deleting non-existing record.398* Skip.399*/400continue;401}402403if ((ptei->flags & TEI_FLAGS_ADDED) != 0 && rollback == 0) {404ptei->value = 0;405continue;406}407408unref_table_value(vi, pval, ptei->value);409ptei->value = 0;410}411}412413/*414* Main function used to link values of entries going to be added,415* to the index. Since we may perform many UH locks drops/acquires,416* handle changes by checking tablestate "modified" field.417*418* Success: return 0.419*/420int421ipfw_link_table_values(struct ip_fw_chain *ch, struct table_config *tc,422struct tentry_info *tei, uint32_t count, uint8_t flags)423{424int error, i, found;425struct namedobj_instance *vi;426struct tentry_info *ptei;427uint32_t vidx, vlimit;428struct table_val_link *ptv;429struct table_value tval, *pval;430431/*432* Stage 1: reference all existing values and433* save their indices.434*/435IPFW_UH_WLOCK_ASSERT(ch);436get_value_ptrs(ch, tc, &pval, &vi);437438error = 0;439found = 0;440vlimit = tc->ta->vlimit;441vidx = 0;442for (i = 0; i < count; i++) {443ptei = &tei[i];444ptei->value = 0; /* Ensure value is always 0 in the beginning */445mask_table_value(ptei->pvalue, &tval, tc->vmask);446ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0,447(char *)&tval);448if (ptv == NULL)449continue;450/* Deal with vlimit later */451if (vlimit > 0 && vlimit <= ptv->no.kidx)452continue;453454/* Value found. Bump refcount */455ptv->pval->refcnt++;456ptei->value = ptv->no.kidx;457found++;458}459460if (count == found) {461/* We've found all values, no need to create new ones. */462return (0);463}464465/*466* Stage 2: allocate objects for non-existing values.467*/468for (i = 0; i < count; i++) {469ptei = &tei[i];470if (ptei->value != 0)471continue;472if (ptei->ptv != NULL)473continue;474ptei->ptv = malloc(sizeof(struct table_val_link), M_IPFW,475M_WAITOK | M_ZERO);476}477478/*479* Stage 3: allocate index numbers for new values480* and link them to index.481*/482KASSERT(pval == ch->valuestate, ("resize_storage() notify failure"));483484/* Let's try to link values */485for (i = 0; i < count; i++) {486ptei = &tei[i];487488/* Check if record has appeared */489mask_table_value(ptei->pvalue, &tval, tc->vmask);490ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0,491(char *)&tval);492if (ptv != NULL) {493ptv->pval->refcnt++;494ptei->value = ptv->no.kidx;495continue;496}497498if ((error = alloc_table_vidx(ch, tc, vi, &vidx, flags)) != 0)499return (error);500501/* Finally, we have allocated valid index, let's add entry */502ptei->value = vidx;503ptv = (struct table_val_link *)ptei->ptv;504ptei->ptv = NULL;505506ptv->no.kidx = vidx;507ptv->no.name = (char *)&pval[vidx];508ptv->pval = &pval[vidx];509memcpy(ptv->pval, &tval, sizeof(struct table_value));510pval[vidx].refcnt = 1;511ipfw_objhash_add(vi, &ptv->no);512}513514return (0);515}516517/*518* Imports table value from current userland format.519* Saves value in kernel format to the same place.520*/521void522ipfw_import_table_value_v1(ipfw_table_value *iv)523{524struct table_value v;525526memset(&v, 0, sizeof(v));527v.tag = iv->tag;528v.pipe = iv->pipe;529v.divert = iv->divert;530v.skipto = iv->skipto;531v.netgraph = iv->netgraph;532v.fib = iv->fib;533v.nat = iv->nat;534v.dscp = iv->dscp;535v.nh4 = iv->nh4;536v.nh6 = iv->nh6;537v.limit = iv->limit;538v.zoneid = iv->zoneid;539v.mark = iv->mark;540541memcpy(iv, &v, sizeof(ipfw_table_value));542}543544/*545* Export real table value @v to current userland format.546* Note that @v and @piv may point to the same memory.547*/548void549ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *piv)550{551ipfw_table_value iv;552553memset(&iv, 0, sizeof(iv));554iv.tag = v->tag;555iv.pipe = v->pipe;556iv.divert = v->divert;557iv.skipto = v->skipto;558iv.netgraph = v->netgraph;559iv.fib = v->fib;560iv.nat = v->nat;561iv.dscp = v->dscp;562iv.limit = v->limit;563iv.nh4 = v->nh4;564iv.nh6 = v->nh6;565iv.zoneid = v->zoneid;566iv.mark = v->mark;567568memcpy(piv, &iv, sizeof(iv));569}570571/*572* Exports real value data into ipfw_table_value structure including refcnt.573*/574static int575dump_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)576{577struct vdump_args *da;578struct table_val_link *ptv;579ipfw_table_value *v;580581da = (struct vdump_args *)arg;582ptv = (struct table_val_link *)no;583584v = (ipfw_table_value *)ipfw_get_sopt_space(da->sd, sizeof(*v));585/* Out of memory, returning */586if (v == NULL) {587da->error = ENOMEM;588return (ENOMEM);589}590591ipfw_export_table_value_v1(ptv->pval, v);592v->refcnt = ptv->pval->refcnt;593v->kidx = ptv->no.kidx;594return (0);595}596597/*598* Dumps all shared/table value data599* Data layout (v1)(current):600* Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size601* Reply: [ ipfw_obj_lheader ipfw_table_value x N ]602*603* Returns 0 on success604*/605static int606list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,607struct sockopt_data *sd)608{609struct _ipfw_obj_lheader *olh;610struct namedobj_instance *vi;611struct vdump_args da;612uint32_t count, size;613614olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));615if (olh == NULL)616return (EINVAL);617if (sd->valsize < olh->size)618return (EINVAL);619620IPFW_UH_RLOCK(ch);621vi = CHAIN_TO_VI(ch);622623count = ipfw_objhash_count(vi);624size = count * sizeof(ipfw_table_value) + sizeof(ipfw_obj_lheader);625626/* Fill in header regadless of buffer size */627olh->count = count;628olh->objsize = sizeof(ipfw_table_value);629630if (size > olh->size) {631olh->size = size;632IPFW_UH_RUNLOCK(ch);633return (ENOMEM);634}635olh->size = size;636637/*638* Do the actual value dump639*/640memset(&da, 0, sizeof(da));641da.ch = ch;642da.sd = sd;643ipfw_objhash_foreach(vi, dump_tvalue, &da);644645IPFW_UH_RUNLOCK(ch);646647return (0);648}649650void651ipfw_table_value_init(struct ip_fw_chain *ch, int first)652{653struct tables_config *tcfg;654655ch->valuestate = malloc(VALDATA_START_SIZE * sizeof(struct table_value),656M_IPFW, M_WAITOK | M_ZERO);657658tcfg = ch->tblcfg;659660tcfg->val_size = VALDATA_START_SIZE;661tcfg->valhash = ipfw_objhash_create(tcfg->val_size, VALDATA_HASH_SIZE);662ipfw_objhash_set_funcs(tcfg->valhash, hash_table_value,663cmp_table_value);664665IPFW_ADD_SOPT_HANDLER(first, scodes);666}667668static int669destroy_value(struct namedobj_instance *ni, struct named_object *no,670void *arg)671{672673free(no, M_IPFW);674return (0);675}676677void678ipfw_table_value_destroy(struct ip_fw_chain *ch, int last)679{680681IPFW_DEL_SOPT_HANDLER(last, scodes);682683free(ch->valuestate, M_IPFW);684ipfw_objhash_foreach(CHAIN_TO_VI(ch), destroy_value, ch);685ipfw_objhash_destroy(CHAIN_TO_VI(ch));686}687688689