Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netpfil/ipfw/ip_fw_table_value.c
105688 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2014-2025 Yandex LLC
5
* Copyright (c) 2014 Alexander V. Chernikov
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#include <sys/cdefs.h>
30
/*
31
* Multi-field value support for ipfw tables.
32
*
33
* This file contains necessary functions to convert
34
* large multi-field values into u32 indices suitable to be fed
35
* to various table algorithms. Other machinery like proper refcounting,
36
* internal structures resizing are also kept here.
37
*/
38
39
#include "opt_ipfw.h"
40
41
#include <sys/param.h>
42
#include <sys/systm.h>
43
#include <sys/malloc.h>
44
#include <sys/kernel.h>
45
#include <sys/hash.h>
46
#include <sys/lock.h>
47
#include <sys/rwlock.h>
48
#include <sys/rmlock.h>
49
#include <sys/socket.h>
50
#include <sys/socketvar.h>
51
#include <sys/queue.h>
52
#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
53
54
#include <netinet/in.h>
55
#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
56
#include <netinet/ip_fw.h>
57
58
#include <netpfil/ipfw/ip_fw_private.h>
59
#include <netpfil/ipfw/ip_fw_table.h>
60
61
static uint32_t hash_table_value(struct namedobj_instance *ni, const void *key,
62
uint32_t kopt);
63
static int cmp_table_value(struct named_object *no, const void *key,
64
uint32_t kopt);
65
66
static int list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
67
struct sockopt_data *sd);
68
69
static struct ipfw_sopt_handler scodes[] = {
70
{ IP_FW_TABLE_VLIST, IP_FW3_OPVER, HDIR_GET, list_table_values },
71
};
72
73
#define CHAIN_TO_VI(chain) (CHAIN_TO_TCFG(chain)->valhash)
74
75
struct table_val_link
76
{
77
struct named_object no;
78
struct table_value *pval; /* Pointer to real table value */
79
};
80
#define VALDATA_START_SIZE 64 /* Allocate 64-items array by default */
81
#define VALDATA_HASH_SIZE 65536
82
83
struct vdump_args {
84
struct ip_fw_chain *ch;
85
struct sockopt_data *sd;
86
struct table_value *pval;
87
int error;
88
};
89
90
static uint32_t
91
hash_table_value(struct namedobj_instance *ni, const void *key, uint32_t kopt)
92
{
93
94
return (hash32_buf(key, 56, 0));
95
}
96
97
static int
98
cmp_table_value(struct named_object *no, const void *key, uint32_t kopt)
99
{
100
101
return (memcmp(((struct table_val_link *)no)->pval, key, 56));
102
}
103
104
static void
105
mask_table_value(struct table_value *src, struct table_value *dst,
106
uint32_t mask)
107
{
108
#define _MCPY(f, b) if ((mask & (b)) != 0) { dst->f = src->f; }
109
110
memset(dst, 0, sizeof(*dst));
111
_MCPY(tag, IPFW_VTYPE_TAG);
112
_MCPY(pipe, IPFW_VTYPE_PIPE);
113
_MCPY(divert, IPFW_VTYPE_DIVERT);
114
_MCPY(skipto, IPFW_VTYPE_SKIPTO);
115
_MCPY(netgraph, IPFW_VTYPE_NETGRAPH);
116
_MCPY(fib, IPFW_VTYPE_FIB);
117
_MCPY(nat, IPFW_VTYPE_NAT);
118
_MCPY(limit, IPFW_VTYPE_LIMIT);
119
_MCPY(mark, IPFW_VTYPE_MARK);
120
_MCPY(dscp, IPFW_VTYPE_DSCP);
121
_MCPY(nh4, IPFW_VTYPE_NH4);
122
_MCPY(nh6, IPFW_VTYPE_NH6);
123
_MCPY(zoneid, IPFW_VTYPE_NH6);
124
#undef _MCPY
125
}
126
127
static void
128
get_value_ptrs(struct ip_fw_chain *ch, struct table_config *tc,
129
struct table_value **ptv, struct namedobj_instance **pvi)
130
{
131
struct table_value *pval;
132
struct namedobj_instance *vi;
133
134
if (tc->vshared != 0) {
135
pval = (struct table_value *)ch->valuestate;
136
vi = CHAIN_TO_VI(ch);
137
} else {
138
pval = NULL;
139
vi = NULL;
140
//pval = (struct table_value *)&tc->ti.data;
141
}
142
143
if (ptv != NULL)
144
*ptv = pval;
145
if (pvi != NULL)
146
*pvi = vi;
147
}
148
149
/*
150
* Update pointers to real values after @pval change.
151
*/
152
static int
153
update_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)
154
{
155
struct vdump_args *da;
156
struct table_val_link *ptv;
157
struct table_value *pval;
158
159
da = (struct vdump_args *)arg;
160
ptv = (struct table_val_link *)no;
161
162
pval = da->pval;
163
ptv->pval = &pval[ptv->no.kidx];
164
ptv->no.name = (char *)&pval[ptv->no.kidx];
165
return (0);
166
}
167
168
/*
169
* Grows value storage shared among all tables.
170
* Notifies other running adds on @ch shared storage resize.
171
* Note function does not guarantee that free space
172
* will be available after invocation, so one caller needs
173
* to roll cycle himself.
174
*
175
* Returns 0 if case of no errors.
176
*/
177
static int
178
resize_shared_value_storage(struct ip_fw_chain *ch)
179
{
180
struct tables_config *tcfg;
181
struct namedobj_instance *vi;
182
struct table_value *pval, *valuestate, *old_valuestate;
183
void *new_idx;
184
struct vdump_args da;
185
int new_blocks;
186
int val_size, val_size_old;
187
188
IPFW_UH_WLOCK_ASSERT(ch);
189
190
valuestate = NULL;
191
new_idx = NULL;
192
193
pval = (struct table_value *)ch->valuestate;
194
vi = CHAIN_TO_VI(ch);
195
tcfg = CHAIN_TO_TCFG(ch);
196
197
val_size = tcfg->val_size * 2;
198
199
if (val_size == (1 << 30))
200
return (ENOSPC);
201
202
valuestate = malloc(sizeof(struct table_value) * val_size, M_IPFW,
203
M_WAITOK | M_ZERO);
204
ipfw_objhash_bitmap_alloc(val_size, (void *)&new_idx,
205
&new_blocks);
206
207
/*
208
* Check if we still need to resize
209
*/
210
if (tcfg->val_size >= val_size)
211
goto done;
212
213
/* Update pointers and notify everyone we're changing @ch */
214
pval = (struct table_value *)ch->valuestate;
215
216
/* Good. Let's merge */
217
memcpy(valuestate, pval, sizeof(struct table_value) * tcfg->val_size);
218
ipfw_objhash_bitmap_merge(CHAIN_TO_VI(ch), &new_idx, &new_blocks);
219
220
IPFW_WLOCK(ch);
221
/* Change pointers */
222
old_valuestate = ch->valuestate;
223
ch->valuestate = valuestate;
224
valuestate = old_valuestate;
225
ipfw_objhash_bitmap_swap(CHAIN_TO_VI(ch), &new_idx, &new_blocks);
226
227
val_size_old = tcfg->val_size;
228
tcfg->val_size = val_size;
229
val_size = val_size_old;
230
IPFW_WUNLOCK(ch);
231
/* Update pointers to reflect resize */
232
memset(&da, 0, sizeof(da));
233
da.pval = (struct table_value *)ch->valuestate;
234
ipfw_objhash_foreach(vi, update_tvalue, &da);
235
236
done:
237
free(valuestate, M_IPFW);
238
ipfw_objhash_bitmap_free(new_idx, new_blocks);
239
240
return (0);
241
}
242
243
/*
244
* Drops reference for table value with index @kidx, stored in @pval and
245
* @vi. Frees value if it has no references.
246
*/
247
static void
248
unref_table_value(struct namedobj_instance *vi, struct table_value *pval,
249
uint32_t kidx)
250
{
251
struct table_val_link *ptvl;
252
253
KASSERT(pval[kidx].refcnt > 0, ("Refcount is 0 on kidx %d", kidx));
254
if (--pval[kidx].refcnt > 0)
255
return;
256
257
/* Last reference, delete item */
258
ptvl = (struct table_val_link *)ipfw_objhash_lookup_kidx(vi, kidx);
259
KASSERT(ptvl != NULL, ("lookup on value kidx %d failed", kidx));
260
ipfw_objhash_del(vi, &ptvl->no);
261
ipfw_objhash_free_idx(vi, kidx);
262
free(ptvl, M_IPFW);
263
}
264
265
struct flush_args {
266
struct ip_fw_chain *ch;
267
struct table_algo *ta;
268
struct table_info *ti;
269
void *astate;
270
ipfw_obj_tentry tent;
271
};
272
273
static int
274
unref_table_value_cb(void *e, void *arg)
275
{
276
struct flush_args *fa;
277
struct ip_fw_chain *ch;
278
struct table_algo *ta;
279
ipfw_obj_tentry *tent;
280
int error;
281
282
fa = (struct flush_args *)arg;
283
284
ta = fa->ta;
285
memset(&fa->tent, 0, sizeof(fa->tent));
286
tent = &fa->tent;
287
error = ta->dump_tentry(fa->astate, fa->ti, e, tent);
288
if (error != 0)
289
return (error);
290
291
ch = fa->ch;
292
293
unref_table_value(CHAIN_TO_VI(ch),
294
(struct table_value *)ch->valuestate, tent->v.kidx);
295
296
return (0);
297
}
298
299
/*
300
* Drop references for each value used in @tc.
301
*/
302
void
303
ipfw_unref_table_values(struct ip_fw_chain *ch, struct table_config *tc,
304
struct table_algo *ta, void *astate, struct table_info *ti)
305
{
306
struct flush_args fa;
307
308
IPFW_UH_WLOCK_ASSERT(ch);
309
310
memset(&fa, 0, sizeof(fa));
311
fa.ch = ch;
312
fa.ta = ta;
313
fa.astate = astate;
314
fa.ti = ti;
315
316
ta->foreach(astate, ti, unref_table_value_cb, &fa);
317
}
318
319
/*
320
* Allocate new value index in either shared or per-table array.
321
*
322
* Returns 0 on success.
323
*/
324
static int
325
alloc_table_vidx(struct ip_fw_chain *ch, struct table_config *tc,
326
struct namedobj_instance *vi, uint32_t *pvidx, uint8_t flags)
327
{
328
int error, vlimit;
329
uint32_t vidx;
330
331
IPFW_UH_WLOCK_ASSERT(ch);
332
333
if ((error = ipfw_objhash_alloc_idx(vi, &vidx)) != 0 &&
334
(error = resize_shared_value_storage(ch)) != 0)
335
return (error);
336
337
vlimit = tc->ta->vlimit;
338
if (vlimit != 0 && vidx >= vlimit && !(flags & IPFW_CTF_ATOMIC)) {
339
/*
340
* Algorithm is not able to store given index.
341
* We have to rollback state, start using
342
* per-table value array or return error
343
* if we're already using it.
344
*/
345
if (tc->vshared != 0) {
346
/* shared -> per-table */
347
return (ENOSPC); /* TODO: proper error */
348
}
349
350
/* per-table. Fail for now. */
351
return (ENOSPC); /* TODO: proper error */
352
}
353
354
*pvidx = vidx;
355
return (0);
356
}
357
358
/*
359
* Drops value reference for unused values (updates, deletes, partially
360
* successful adds or rollbacks).
361
*/
362
void
363
ipfw_garbage_table_values(struct ip_fw_chain *ch, struct table_config *tc,
364
struct tentry_info *tei, uint32_t count, int rollback)
365
{
366
int i;
367
struct tentry_info *ptei;
368
struct table_value *pval;
369
struct namedobj_instance *vi;
370
371
/*
372
* We have two slightly different ADD cases here:
373
* either (1) we are successful / partially successful,
374
* in that case we need
375
* * to ignore ADDED entries values
376
* * rollback every other values if atomicity is not
377
* * required (either UPDATED since old value has been
378
* stored there, or some failure like EXISTS or LIMIT
379
* or simply "ignored" case.
380
*
381
* (2): atomic rollback of partially successful operation
382
* in that case we simply need to unref all entries.
383
*
384
* DELETE case is simpler: no atomic support there, so
385
* we simply unref all non-zero values.
386
*/
387
388
/*
389
* Get current table value pointers.
390
*/
391
get_value_ptrs(ch, tc, &pval, &vi);
392
393
for (i = 0; i < count; i++) {
394
ptei = &tei[i];
395
396
if (ptei->value == 0) {
397
/*
398
* We may be deleting non-existing record.
399
* Skip.
400
*/
401
continue;
402
}
403
404
if ((ptei->flags & TEI_FLAGS_ADDED) != 0 && rollback == 0) {
405
ptei->value = 0;
406
continue;
407
}
408
409
unref_table_value(vi, pval, ptei->value);
410
ptei->value = 0;
411
}
412
}
413
414
/*
415
* Main function used to link values of entries going to be added,
416
* to the index. Since we may perform many UH locks drops/acquires,
417
* handle changes by checking tablestate "modified" field.
418
*
419
* Success: return 0.
420
*/
421
int
422
ipfw_link_table_values(struct ip_fw_chain *ch, struct table_config *tc,
423
struct tentry_info *tei, uint32_t count, uint8_t flags)
424
{
425
int error, i, found;
426
struct namedobj_instance *vi;
427
struct tentry_info *ptei;
428
uint32_t vidx, vlimit;
429
struct table_val_link *ptv;
430
struct table_value tval, *pval;
431
432
/*
433
* Stage 1: reference all existing values and
434
* save their indices.
435
*/
436
IPFW_UH_WLOCK_ASSERT(ch);
437
get_value_ptrs(ch, tc, &pval, &vi);
438
439
error = 0;
440
found = 0;
441
vlimit = tc->ta->vlimit;
442
vidx = 0;
443
for (i = 0; i < count; i++) {
444
ptei = &tei[i];
445
ptei->value = 0; /* Ensure value is always 0 in the beginning */
446
mask_table_value(ptei->pvalue, &tval, tc->vmask);
447
ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0,
448
(char *)&tval);
449
if (ptv == NULL)
450
continue;
451
/* Deal with vlimit later */
452
if (vlimit > 0 && vlimit <= ptv->no.kidx)
453
continue;
454
455
/* Value found. Bump refcount */
456
ptv->pval->refcnt++;
457
ptei->value = ptv->no.kidx;
458
found++;
459
}
460
461
if (count == found) {
462
/* We've found all values, no need to create new ones. */
463
return (0);
464
}
465
466
/*
467
* Stage 2: allocate objects for non-existing values.
468
*/
469
for (i = 0; i < count; i++) {
470
ptei = &tei[i];
471
if (ptei->value != 0)
472
continue;
473
if (ptei->ptv != NULL)
474
continue;
475
ptei->ptv = malloc(sizeof(struct table_val_link), M_IPFW,
476
M_WAITOK | M_ZERO);
477
}
478
479
/*
480
* Stage 3: allocate index numbers for new values
481
* and link them to index.
482
*/
483
KASSERT(pval == ch->valuestate, ("resize_storage() notify failure"));
484
485
/* Let's try to link values */
486
for (i = 0; i < count; i++) {
487
ptei = &tei[i];
488
489
/* Check if record has appeared */
490
mask_table_value(ptei->pvalue, &tval, tc->vmask);
491
ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0,
492
(char *)&tval);
493
if (ptv != NULL) {
494
ptv->pval->refcnt++;
495
ptei->value = ptv->no.kidx;
496
continue;
497
}
498
499
if ((error = alloc_table_vidx(ch, tc, vi, &vidx, flags)) != 0)
500
return (error);
501
502
/* Finally, we have allocated valid index, let's add entry */
503
ptei->value = vidx;
504
ptv = (struct table_val_link *)ptei->ptv;
505
ptei->ptv = NULL;
506
507
ptv->no.kidx = vidx;
508
ptv->no.name = (char *)&pval[vidx];
509
ptv->pval = &pval[vidx];
510
memcpy(ptv->pval, &tval, sizeof(struct table_value));
511
pval[vidx].refcnt = 1;
512
ipfw_objhash_add(vi, &ptv->no);
513
}
514
515
return (0);
516
}
517
518
/*
519
* Imports table value from current userland format.
520
* Saves value in kernel format to the same place.
521
*/
522
void
523
ipfw_import_table_value_v1(ipfw_table_value *iv)
524
{
525
struct table_value v;
526
527
memset(&v, 0, sizeof(v));
528
v.tag = iv->tag;
529
v.pipe = iv->pipe;
530
v.divert = iv->divert;
531
v.skipto = iv->skipto;
532
v.netgraph = iv->netgraph;
533
v.fib = iv->fib;
534
v.nat = iv->nat;
535
v.dscp = iv->dscp;
536
v.nh4 = iv->nh4;
537
v.nh6 = iv->nh6;
538
v.limit = iv->limit;
539
v.zoneid = iv->zoneid;
540
v.mark = iv->mark;
541
542
memcpy(iv, &v, sizeof(ipfw_table_value));
543
}
544
545
/*
546
* Export real table value @v to current userland format.
547
* Note that @v and @piv may point to the same memory.
548
*/
549
void
550
ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *piv)
551
{
552
ipfw_table_value iv;
553
554
memset(&iv, 0, sizeof(iv));
555
iv.tag = v->tag;
556
iv.pipe = v->pipe;
557
iv.divert = v->divert;
558
iv.skipto = v->skipto;
559
iv.netgraph = v->netgraph;
560
iv.fib = v->fib;
561
iv.nat = v->nat;
562
iv.dscp = v->dscp;
563
iv.limit = v->limit;
564
iv.nh4 = v->nh4;
565
iv.nh6 = v->nh6;
566
iv.zoneid = v->zoneid;
567
iv.mark = v->mark;
568
569
memcpy(piv, &iv, sizeof(iv));
570
}
571
572
/*
573
* Exports real value data into ipfw_table_value structure including refcnt.
574
*/
575
static int
576
dump_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)
577
{
578
struct vdump_args *da;
579
struct table_val_link *ptv;
580
ipfw_table_value *v;
581
582
da = (struct vdump_args *)arg;
583
ptv = (struct table_val_link *)no;
584
585
v = (ipfw_table_value *)ipfw_get_sopt_space(da->sd, sizeof(*v));
586
/* Out of memory, returning */
587
if (v == NULL) {
588
da->error = ENOMEM;
589
return (ENOMEM);
590
}
591
592
ipfw_export_table_value_v1(ptv->pval, v);
593
v->refcnt = ptv->pval->refcnt;
594
v->kidx = ptv->no.kidx;
595
return (0);
596
}
597
598
/*
599
* Dumps all shared/table value data
600
* Data layout (v1)(current):
601
* Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
602
* Reply: [ ipfw_obj_lheader ipfw_table_value x N ]
603
*
604
* Returns 0 on success
605
*/
606
static int
607
list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
608
struct sockopt_data *sd)
609
{
610
struct _ipfw_obj_lheader *olh;
611
struct namedobj_instance *vi;
612
struct vdump_args da;
613
uint32_t count, size;
614
615
olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
616
if (olh == NULL)
617
return (EINVAL);
618
if (sd->valsize < olh->size)
619
return (EINVAL);
620
621
IPFW_UH_RLOCK(ch);
622
vi = CHAIN_TO_VI(ch);
623
624
count = ipfw_objhash_count(vi);
625
size = count * sizeof(ipfw_table_value) + sizeof(ipfw_obj_lheader);
626
627
/* Fill in header regadless of buffer size */
628
olh->count = count;
629
olh->objsize = sizeof(ipfw_table_value);
630
631
if (size > olh->size) {
632
olh->size = size;
633
IPFW_UH_RUNLOCK(ch);
634
return (ENOMEM);
635
}
636
olh->size = size;
637
638
/*
639
* Do the actual value dump
640
*/
641
memset(&da, 0, sizeof(da));
642
da.ch = ch;
643
da.sd = sd;
644
ipfw_objhash_foreach(vi, dump_tvalue, &da);
645
646
IPFW_UH_RUNLOCK(ch);
647
648
return (0);
649
}
650
651
void
652
ipfw_table_value_init(struct ip_fw_chain *ch, int first)
653
{
654
struct tables_config *tcfg;
655
656
ch->valuestate = malloc(VALDATA_START_SIZE * sizeof(struct table_value),
657
M_IPFW, M_WAITOK | M_ZERO);
658
659
tcfg = ch->tblcfg;
660
661
tcfg->val_size = VALDATA_START_SIZE;
662
tcfg->valhash = ipfw_objhash_create(tcfg->val_size, VALDATA_HASH_SIZE);
663
ipfw_objhash_set_funcs(tcfg->valhash, hash_table_value,
664
cmp_table_value);
665
666
IPFW_ADD_SOPT_HANDLER(first, scodes);
667
}
668
669
static int
670
destroy_value(struct namedobj_instance *ni, struct named_object *no,
671
void *arg)
672
{
673
674
free(no, M_IPFW);
675
return (0);
676
}
677
678
void
679
ipfw_table_value_destroy(struct ip_fw_chain *ch, int last)
680
{
681
682
IPFW_DEL_SOPT_HANDLER(last, scodes);
683
684
free(ch->valuestate, M_IPFW);
685
ipfw_objhash_foreach(CHAIN_TO_VI(ch), destroy_value, ch);
686
ipfw_objhash_destroy(CHAIN_TO_VI(ch));
687
}
688
689