Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netpfil/ipfw/ip_fw_table.c
39478 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
5
* Copyright (c) 2014-2024 Yandex LLC
6
* Copyright (c) 2014 Alexander V. Chernikov
7
*
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
10
* are met:
11
* 1. Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in the
15
* documentation and/or other materials provided with the distribution.
16
*
17
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
* SUCH DAMAGE.
28
*/
29
30
#include <sys/cdefs.h>
31
/*
32
* Lookup table support for ipfw.
33
*
34
* This file contains handlers for all generic tables' operations:
35
* add/del/flush entries, list/dump tables etc..
36
*
37
* Table data modification is protected by both UH and runtime lock
38
* while reading configuration/data is protected by UH lock.
39
*
40
* Lookup algorithms for all table types are located in ip_fw_table_algo.c
41
*/
42
43
#include "opt_ipfw.h"
44
45
#include <sys/param.h>
46
#include <sys/systm.h>
47
#include <sys/malloc.h>
48
#include <sys/kernel.h>
49
#include <sys/lock.h>
50
#include <sys/rwlock.h>
51
#include <sys/rmlock.h>
52
#include <sys/socket.h>
53
#include <sys/socketvar.h>
54
#include <sys/queue.h>
55
#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
56
57
#include <netinet/in.h>
58
#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
59
#include <netinet/ip_fw.h>
60
61
#include <netpfil/ipfw/ip_fw_private.h>
62
#include <netpfil/ipfw/ip_fw_table.h>
63
64
/*
65
* Table has the following `type` concepts:
66
*
67
* `no.type` represents lookup key type (addr, ifp, uid, etc..)
68
* vmask represents bitmask of table values which are present at the moment.
69
* Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old
70
* single-value-for-all approach.
71
*/
72
struct table_config {
73
struct named_object no;
74
uint8_t tflags; /* type flags */
75
uint8_t locked; /* 1 if locked from changes */
76
uint8_t linked; /* 1 if already linked */
77
uint8_t ochanged; /* used by set swapping */
78
uint8_t vshared; /* 1 if using shared value array */
79
uint8_t spare[3];
80
uint32_t count; /* Number of records */
81
uint32_t limit; /* Max number of records */
82
uint32_t vmask; /* bitmask with supported values */
83
uint32_t ocount; /* used by set swapping */
84
uint64_t gencnt; /* generation count */
85
char tablename[64]; /* table name */
86
struct table_algo *ta; /* Callbacks for given algo */
87
void *astate; /* algorithm state */
88
struct table_info ti_copy; /* data to put to table_info */
89
struct namedobj_instance *vi;
90
};
91
92
static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
93
struct table_config **tc);
94
static struct table_config *find_table(struct namedobj_instance *ni,
95
struct tid_info *ti);
96
static struct table_config *alloc_table_config(struct ip_fw_chain *ch,
97
struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags);
98
static void free_table_config(struct namedobj_instance *ni,
99
struct table_config *tc);
100
static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
101
char *aname, ipfw_xtable_info *i, uint32_t *pkidx, int ref);
102
static void link_table(struct ip_fw_chain *ch, struct table_config *tc);
103
static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc);
104
static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
105
struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc);
106
#define OP_ADD 1
107
#define OP_DEL 0
108
static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
109
struct sockopt_data *sd);
110
static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
111
ipfw_xtable_info *i);
112
static int dump_table_tentry(void *e, void *arg);
113
114
static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
115
struct tid_info *b);
116
117
static int check_table_name(const char *name);
118
static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
119
struct table_config *tc, struct table_info *ti, uint32_t count);
120
static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti);
121
122
static struct table_algo *find_table_algo(struct tables_config *tableconf,
123
struct tid_info *ti, char *name);
124
125
static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti);
126
static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti);
127
128
#define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash)
129
#define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k]))
130
131
#define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */
132
133
void
134
rollback_toperation_state(struct ip_fw_chain *ch, void *object)
135
{
136
struct tables_config *tcfg;
137
struct op_state *os;
138
139
tcfg = CHAIN_TO_TCFG(ch);
140
TAILQ_FOREACH(os, &tcfg->state_list, next)
141
os->func(object, os);
142
}
143
144
void
145
add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
146
{
147
struct tables_config *tcfg;
148
149
tcfg = CHAIN_TO_TCFG(ch);
150
TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next);
151
}
152
153
void
154
del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
155
{
156
struct tables_config *tcfg;
157
158
tcfg = CHAIN_TO_TCFG(ch);
159
TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next);
160
}
161
162
void
163
tc_ref(struct table_config *tc)
164
{
165
166
tc->no.refcnt++;
167
}
168
169
void
170
tc_unref(struct table_config *tc)
171
{
172
173
tc->no.refcnt--;
174
}
175
176
static struct table_value *
177
get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx)
178
{
179
struct table_value *pval;
180
181
pval = (struct table_value *)ch->valuestate;
182
183
return (&pval[kidx]);
184
}
185
186
/*
187
* Checks if we're able to insert/update entry @tei into table
188
* w.r.t @tc limits.
189
* May alter @tei to indicate insertion error / insert
190
* options.
191
*
192
* Returns 0 if operation can be performed/
193
*/
194
static int
195
check_table_limit(struct table_config *tc, struct tentry_info *tei)
196
{
197
198
if (tc->limit == 0 || tc->count < tc->limit)
199
return (0);
200
201
if ((tei->flags & TEI_FLAGS_UPDATE) == 0) {
202
/* Notify userland on error cause */
203
tei->flags |= TEI_FLAGS_LIMIT;
204
return (EFBIG);
205
}
206
207
/*
208
* We have UPDATE flag set.
209
* Permit updating record (if found),
210
* but restrict adding new one since we've
211
* already hit the limit.
212
*/
213
tei->flags |= TEI_FLAGS_DONTADD;
214
215
return (0);
216
}
217
218
/*
219
* Convert algorithm callback return code into
220
* one of pre-defined states known by userland.
221
*/
222
static void
223
store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num)
224
{
225
int flag;
226
227
flag = 0;
228
229
switch (error) {
230
case 0:
231
if (op == OP_ADD && num != 0)
232
flag = TEI_FLAGS_ADDED;
233
if (op == OP_DEL)
234
flag = TEI_FLAGS_DELETED;
235
break;
236
case ENOENT:
237
flag = TEI_FLAGS_NOTFOUND;
238
break;
239
case EEXIST:
240
flag = TEI_FLAGS_EXISTS;
241
break;
242
default:
243
flag = TEI_FLAGS_ERROR;
244
}
245
246
tei->flags |= flag;
247
}
248
249
/*
250
* Creates and references table with default parameters.
251
* Saves table config, algo and allocated kidx info @ptc, @pta and
252
* @pkidx if non-zero.
253
* Used for table auto-creation to support old binaries.
254
*
255
* Returns 0 on success.
256
*/
257
static int
258
create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti,
259
uint32_t *pkidx)
260
{
261
ipfw_xtable_info xi;
262
int error;
263
264
memset(&xi, 0, sizeof(xi));
265
/* Set default value mask for legacy clients */
266
xi.vmask = IPFW_VTYPE_LEGACY;
267
268
error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1);
269
if (error != 0)
270
return (error);
271
272
return (0);
273
}
274
275
/*
276
* Find and reference existing table optionally
277
* creating new one.
278
*
279
* Saves found table config into @ptc.
280
* Note function may drop/acquire UH_WLOCK.
281
* Returns 0 if table was found/created and referenced
282
* or non-zero return code.
283
*/
284
static int
285
find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
286
struct tentry_info *tei, uint32_t count, int op,
287
struct table_config **ptc)
288
{
289
struct namedobj_instance *ni;
290
struct table_config *tc;
291
uint32_t kidx;
292
int error;
293
294
IPFW_UH_WLOCK_ASSERT(ch);
295
296
ni = CHAIN_TO_NI(ch);
297
tc = NULL;
298
if ((tc = find_table(ni, ti)) != NULL) {
299
/* check table type */
300
if (tc->no.subtype != ti->type)
301
return (EINVAL);
302
303
if (tc->locked != 0)
304
return (EACCES);
305
306
/* Try to exit early on limit hit */
307
if (op == OP_ADD && count == 1 &&
308
check_table_limit(tc, tei) != 0)
309
return (EFBIG);
310
311
/* Reference and return */
312
tc->no.refcnt++;
313
*ptc = tc;
314
return (0);
315
}
316
317
if (op == OP_DEL)
318
return (ESRCH);
319
320
/* Compatibility mode: create new table for old clients */
321
if ((tei->flags & TEI_FLAGS_COMPAT) == 0)
322
return (ESRCH);
323
324
IPFW_UH_WUNLOCK(ch);
325
error = create_table_compat(ch, ti, &kidx);
326
IPFW_UH_WLOCK(ch);
327
328
if (error != 0)
329
return (error);
330
331
tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
332
KASSERT(tc != NULL, ("create_table_compat returned bad idx %u", kidx));
333
334
/* OK, now we've got referenced table. */
335
*ptc = tc;
336
return (0);
337
}
338
339
/*
340
* Rolls back already @added to @tc entries using state array @ta_buf_m.
341
* Assume the following layout:
342
* 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases
343
* 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1])
344
* for storing deleted state
345
*/
346
static void
347
rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc,
348
struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m,
349
uint32_t count, uint32_t added)
350
{
351
struct table_algo *ta;
352
struct tentry_info *ptei;
353
caddr_t v, vv;
354
size_t ta_buf_sz;
355
int error __diagused, i;
356
uint32_t num;
357
358
IPFW_UH_WLOCK_ASSERT(ch);
359
360
ta = tc->ta;
361
ta_buf_sz = ta->ta_buf_size;
362
v = ta_buf_m;
363
vv = v + count * ta_buf_sz;
364
for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) {
365
ptei = &tei[i];
366
if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) {
367
/*
368
* We have old value stored by previous
369
* call in @ptei->value. Do add once again
370
* to restore it.
371
*/
372
error = ta->add(tc->astate, tinfo, ptei, v, &num);
373
KASSERT(error == 0, ("rollback UPDATE fail"));
374
KASSERT(num == 0, ("rollback UPDATE fail2"));
375
continue;
376
}
377
378
error = ta->prepare_del(ch, ptei, vv);
379
KASSERT(error == 0, ("pre-rollback INSERT failed"));
380
error = ta->del(tc->astate, tinfo, ptei, vv, &num);
381
KASSERT(error == 0, ("rollback INSERT failed"));
382
tc->count -= num;
383
}
384
}
385
386
/*
387
* Prepares add/del state for all @count entries in @tei.
388
* Uses either stack buffer (@ta_buf) or allocates a new one.
389
* Stores pointer to allocated buffer back to @ta_buf.
390
*
391
* Returns 0 on success.
392
*/
393
static int
394
prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
395
struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf)
396
{
397
caddr_t ta_buf_m, v;
398
size_t ta_buf_sz, sz;
399
struct tentry_info *ptei;
400
int error, i;
401
402
error = 0;
403
ta_buf_sz = ta->ta_buf_size;
404
if (count == 1) {
405
/* Single add/delete, use on-stack buffer */
406
memset(*ta_buf, 0, TA_BUF_SZ);
407
ta_buf_m = *ta_buf;
408
} else {
409
/*
410
* Multiple adds/deletes, allocate larger buffer
411
*
412
* Note we need 2xcount buffer for add case:
413
* we have hold both ADD state
414
* and DELETE state (this may be needed
415
* if we need to rollback all changes)
416
*/
417
sz = count * ta_buf_sz;
418
ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP,
419
M_WAITOK | M_ZERO);
420
}
421
422
v = ta_buf_m;
423
for (i = 0; i < count; i++, v += ta_buf_sz) {
424
ptei = &tei[i];
425
error = (op == OP_ADD) ?
426
ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v);
427
428
/*
429
* Some syntax error (incorrect mask, or address, or
430
* anything). Return error regardless of atomicity
431
* settings.
432
*/
433
if (error != 0)
434
break;
435
}
436
437
*ta_buf = ta_buf_m;
438
return (error);
439
}
440
441
/*
442
* Flushes allocated state for each @count entries in @tei.
443
* Frees @ta_buf_m if differs from stack buffer @ta_buf.
444
*/
445
static void
446
flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
447
struct tentry_info *tei, uint32_t count, int rollback,
448
caddr_t ta_buf_m, caddr_t ta_buf)
449
{
450
caddr_t v;
451
struct tentry_info *ptei;
452
size_t ta_buf_sz;
453
int i;
454
455
ta_buf_sz = ta->ta_buf_size;
456
457
/* Run cleaning callback anyway */
458
v = ta_buf_m;
459
for (i = 0; i < count; i++, v += ta_buf_sz) {
460
ptei = &tei[i];
461
ta->flush_entry(ch, ptei, v);
462
if (ptei->ptv != NULL) {
463
free(ptei->ptv, M_IPFW);
464
ptei->ptv = NULL;
465
}
466
}
467
468
/* Clean up "deleted" state in case of rollback */
469
if (rollback != 0) {
470
v = ta_buf_m + count * ta_buf_sz;
471
for (i = 0; i < count; i++, v += ta_buf_sz)
472
ta->flush_entry(ch, &tei[i], v);
473
}
474
475
if (ta_buf_m != ta_buf)
476
free(ta_buf_m, M_TEMP);
477
}
478
479
static void
480
rollback_add_entry(void *object, struct op_state *_state)
481
{
482
struct ip_fw_chain *ch __diagused;
483
struct tableop_state *ts;
484
485
ts = (struct tableop_state *)_state;
486
487
if (ts->tc != object && ts->ch != object)
488
return;
489
490
ch = ts->ch;
491
492
IPFW_UH_WLOCK_ASSERT(ch);
493
494
/* Call specifid unlockers */
495
rollback_table_values(ts);
496
497
/* Indicate we've called */
498
ts->modified = 1;
499
}
500
501
/*
502
* Adds/updates one or more entries in table @ti.
503
*
504
* Function may drop/reacquire UH wlock multiple times due to
505
* items alloc, algorithm callbacks (check_space), value linkage
506
* (new values, value storage realloc), etc..
507
* Other processes like other adds (which may involve storage resize),
508
* table swaps (which changes table data and may change algo type),
509
* table modify (which may change value mask) may be executed
510
* simultaneously so we need to deal with it.
511
*
512
* The following approach was implemented:
513
* we have per-chain linked list, protected with UH lock.
514
* add_table_entry prepares special on-stack structure wthich is passed
515
* to its descendants. Users add this structure to this list before unlock.
516
* After performing needed operations and acquiring UH lock back, each user
517
* checks if structure has changed. If true, it rolls local state back and
518
* returns without error to the caller.
519
* add_table_entry() on its own checks if structure has changed and restarts
520
* its operation from the beginning (goto restart).
521
*
522
* Functions which are modifying fields of interest (currently
523
* resize_shared_value_storage() and swap_tables() )
524
* traverses given list while holding UH lock immediately before
525
* performing their operations calling function provided be list entry
526
* ( currently rollback_add_entry ) which performs rollback for all necessary
527
* state and sets appropriate values in structure indicating rollback
528
* has happened.
529
*
530
* Algo interaction:
531
* Function references @ti first to ensure table won't
532
* disappear or change its type.
533
* After that, prepare_add callback is called for each @tei entry.
534
* Next, we try to add each entry under UH+WHLOCK
535
* using add() callback.
536
* Finally, we free all state by calling flush_entry callback
537
* for each @tei.
538
*
539
* Returns 0 on success.
540
*/
541
int
542
add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
543
struct tentry_info *tei, uint8_t flags, uint32_t count)
544
{
545
struct table_config *tc;
546
struct table_algo *ta;
547
struct tentry_info *ptei;
548
struct tableop_state ts;
549
char ta_buf[TA_BUF_SZ];
550
caddr_t ta_buf_m, v;
551
uint32_t kidx, num, numadd;
552
int error, first_error, i, rollback;
553
554
memset(&ts, 0, sizeof(ts));
555
ta = NULL;
556
IPFW_UH_WLOCK(ch);
557
558
/*
559
* Find and reference existing table.
560
*/
561
restart:
562
if (ts.modified != 0) {
563
IPFW_UH_WUNLOCK(ch);
564
flush_batch_buffer(ch, ta, tei, count, rollback,
565
ta_buf_m, ta_buf);
566
memset(&ts, 0, sizeof(ts));
567
ta = NULL;
568
IPFW_UH_WLOCK(ch);
569
}
570
571
error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc);
572
if (error != 0) {
573
IPFW_UH_WUNLOCK(ch);
574
return (error);
575
}
576
ta = tc->ta;
577
578
/* Fill in tablestate */
579
ts.ch = ch;
580
ts.opstate.func = rollback_add_entry;
581
ts.tc = tc;
582
ts.vshared = tc->vshared;
583
ts.vmask = tc->vmask;
584
ts.ta = ta;
585
ts.tei = tei;
586
ts.count = count;
587
rollback = 0;
588
add_toperation_state(ch, &ts);
589
IPFW_UH_WUNLOCK(ch);
590
591
/* Allocate memory and prepare record(s) */
592
/* Pass stack buffer by default */
593
ta_buf_m = ta_buf;
594
error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m);
595
596
IPFW_UH_WLOCK(ch);
597
del_toperation_state(ch, &ts);
598
/* Drop reference we've used in first search */
599
tc->no.refcnt--;
600
601
/* Check prepare_batch_buffer() error */
602
if (error != 0)
603
goto cleanup;
604
605
/*
606
* Check if table swap has happened.
607
* (so table algo might be changed).
608
* Restart operation to achieve consistent behavior.
609
*/
610
if (ts.modified != 0)
611
goto restart;
612
613
/*
614
* Link all values values to shared/per-table value array.
615
*
616
* May release/reacquire UH_WLOCK.
617
*/
618
error = ipfw_link_table_values(ch, &ts, flags);
619
if (error != 0)
620
goto cleanup;
621
if (ts.modified != 0)
622
goto restart;
623
624
/*
625
* Ensure we are able to add all entries without additional
626
* memory allocations. May release/reacquire UH_WLOCK.
627
*/
628
kidx = tc->no.kidx;
629
error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count);
630
if (error != 0)
631
goto cleanup;
632
if (ts.modified != 0)
633
goto restart;
634
635
/* We've got valid table in @tc. Let's try to add data */
636
kidx = tc->no.kidx;
637
ta = tc->ta;
638
numadd = 0;
639
first_error = 0;
640
641
IPFW_WLOCK(ch);
642
643
v = ta_buf_m;
644
for (i = 0; i < count; i++, v += ta->ta_buf_size) {
645
ptei = &tei[i];
646
num = 0;
647
/* check limit before adding */
648
if ((error = check_table_limit(tc, ptei)) == 0) {
649
/*
650
* It should be safe to insert a record w/o
651
* a properly-linked value if atomicity is
652
* not required.
653
*
654
* If the added item does not have a valid value
655
* index, it would get rejected by ta->add().
656
* */
657
error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx),
658
ptei, v, &num);
659
/* Set status flag to inform userland */
660
store_tei_result(ptei, OP_ADD, error, num);
661
}
662
if (error == 0) {
663
/* Update number of records to ease limit checking */
664
tc->count += num;
665
numadd += num;
666
continue;
667
}
668
669
if (first_error == 0)
670
first_error = error;
671
672
/*
673
* Some error have happened. Check our atomicity
674
* settings: continue if atomicity is not required,
675
* rollback changes otherwise.
676
*/
677
if ((flags & IPFW_CTF_ATOMIC) == 0)
678
continue;
679
680
rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx),
681
tei, ta_buf_m, count, i);
682
683
rollback = 1;
684
break;
685
}
686
687
IPFW_WUNLOCK(ch);
688
689
ipfw_garbage_table_values(ch, tc, tei, count, rollback);
690
691
/* Permit post-add algorithm grow/rehash. */
692
if (numadd != 0)
693
check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
694
695
/* Return first error to user, if any */
696
error = first_error;
697
698
cleanup:
699
IPFW_UH_WUNLOCK(ch);
700
701
flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf);
702
703
return (error);
704
}
705
706
/*
707
* Deletes one or more entries in table @ti.
708
*
709
* Returns 0 on success.
710
*/
711
int
712
del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
713
struct tentry_info *tei, uint8_t flags, uint32_t count)
714
{
715
struct table_config *tc;
716
struct table_algo *ta;
717
struct tentry_info *ptei;
718
char ta_buf[TA_BUF_SZ];
719
caddr_t ta_buf_m, v;
720
uint32_t kidx, num, numdel;
721
int error, first_error, i;
722
723
/*
724
* Find and reference existing table.
725
*/
726
IPFW_UH_WLOCK(ch);
727
error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc);
728
if (error != 0) {
729
IPFW_UH_WUNLOCK(ch);
730
return (error);
731
}
732
ta = tc->ta;
733
IPFW_UH_WUNLOCK(ch);
734
735
/* Allocate memory and prepare record(s) */
736
/* Pass stack buffer by default */
737
ta_buf_m = ta_buf;
738
error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m);
739
if (error != 0)
740
goto cleanup;
741
742
IPFW_UH_WLOCK(ch);
743
744
/* Drop reference we've used in first search */
745
tc->no.refcnt--;
746
747
/*
748
* Check if table algo is still the same.
749
* (changed ta may be the result of table swap).
750
*/
751
if (ta != tc->ta) {
752
IPFW_UH_WUNLOCK(ch);
753
error = EINVAL;
754
goto cleanup;
755
}
756
757
kidx = tc->no.kidx;
758
numdel = 0;
759
first_error = 0;
760
761
IPFW_WLOCK(ch);
762
v = ta_buf_m;
763
for (i = 0; i < count; i++, v += ta->ta_buf_size) {
764
ptei = &tei[i];
765
num = 0;
766
error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v,
767
&num);
768
/* Save state for userland */
769
store_tei_result(ptei, OP_DEL, error, num);
770
if (error != 0 && first_error == 0)
771
first_error = error;
772
tc->count -= num;
773
numdel += num;
774
}
775
IPFW_WUNLOCK(ch);
776
777
/* Unlink non-used values */
778
ipfw_garbage_table_values(ch, tc, tei, count, 0);
779
780
if (numdel != 0) {
781
/* Run post-del hook to permit shrinking */
782
check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
783
}
784
785
IPFW_UH_WUNLOCK(ch);
786
787
/* Return first error to user, if any */
788
error = first_error;
789
790
cleanup:
791
flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf);
792
793
return (error);
794
}
795
796
/*
797
* Ensure that table @tc has enough space to add @count entries without
798
* need for reallocation.
799
*
800
* Callbacks order:
801
* 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize.
802
*
803
* 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags.
804
* 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage
805
* 3) modify (UH_WLOCK + WLOCK) - switch pointers
806
* 4) flush_modify (UH_WLOCK) - free state, if needed
807
*
808
* Returns 0 on success.
809
*/
810
static int
811
check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
812
struct table_config *tc, struct table_info *ti, uint32_t count)
813
{
814
struct table_algo *ta;
815
uint64_t pflags;
816
char ta_buf[TA_BUF_SZ];
817
int error;
818
819
IPFW_UH_WLOCK_ASSERT(ch);
820
821
error = 0;
822
ta = tc->ta;
823
if (ta->need_modify == NULL)
824
return (0);
825
826
/* Acquire reference not to loose @tc between locks/unlocks */
827
tc->no.refcnt++;
828
829
/*
830
* TODO: think about avoiding race between large add/large delete
831
* operation on algorithm which implements shrinking along with
832
* growing.
833
*/
834
while (true) {
835
pflags = 0;
836
if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
837
error = 0;
838
break;
839
}
840
841
/* We have to shrink/grow table */
842
if (ts != NULL)
843
add_toperation_state(ch, ts);
844
IPFW_UH_WUNLOCK(ch);
845
846
memset(&ta_buf, 0, sizeof(ta_buf));
847
error = ta->prepare_mod(ta_buf, &pflags);
848
849
IPFW_UH_WLOCK(ch);
850
if (ts != NULL)
851
del_toperation_state(ch, ts);
852
853
if (error != 0)
854
break;
855
856
if (ts != NULL && ts->modified != 0) {
857
/*
858
* Swap operation has happened
859
* so we're currently operating on other
860
* table data. Stop doing this.
861
*/
862
ta->flush_mod(ta_buf);
863
break;
864
}
865
866
/* Check if we still need to alter table */
867
ti = KIDX_TO_TI(ch, tc->no.kidx);
868
if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
869
IPFW_UH_WUNLOCK(ch);
870
871
/*
872
* Other thread has already performed resize.
873
* Flush our state and return.
874
*/
875
ta->flush_mod(ta_buf);
876
break;
877
}
878
879
error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags);
880
if (error == 0) {
881
/* Do actual modification */
882
IPFW_WLOCK(ch);
883
ta->modify(tc->astate, ti, ta_buf, pflags);
884
IPFW_WUNLOCK(ch);
885
}
886
887
/* Anyway, flush data and retry */
888
ta->flush_mod(ta_buf);
889
}
890
891
tc->no.refcnt--;
892
return (error);
893
}
894
895
/*
896
* Adds or deletes record in table.
897
* Data layout (v1)(current):
898
* Request: [ ipfw_obj_header
899
* ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ]
900
* ]
901
*
902
* Returns 0 on success
903
*/
904
static int
905
manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
906
struct sockopt_data *sd)
907
{
908
ipfw_obj_tentry *tent, *ptent;
909
ipfw_obj_ctlv *ctlv;
910
ipfw_obj_header *oh;
911
struct tentry_info *ptei, tei, *tei_buf;
912
struct tid_info ti;
913
uint32_t kidx;
914
int error, i, read;
915
916
/* Check minimum header size */
917
if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv)))
918
return (EINVAL);
919
920
/* Check if passed data is too long */
921
if (sd->valsize != sd->kavail)
922
return (EINVAL);
923
924
oh = (ipfw_obj_header *)sd->kbuf;
925
926
/* Basic length checks for TLVs */
927
if (oh->ntlv.head.length != sizeof(oh->ntlv))
928
return (EINVAL);
929
930
read = sizeof(*oh);
931
932
ctlv = (ipfw_obj_ctlv *)(oh + 1);
933
if (ctlv->head.length + read != sd->valsize)
934
return (EINVAL);
935
936
read += sizeof(*ctlv);
937
tent = (ipfw_obj_tentry *)(ctlv + 1);
938
if (ctlv->count * sizeof(*tent) + read != sd->valsize)
939
return (EINVAL);
940
941
if (ctlv->count == 0)
942
return (0);
943
944
/*
945
* Mark entire buffer as "read".
946
* This instructs sopt api write it back
947
* after function return.
948
*/
949
ipfw_get_sopt_header(sd, sd->valsize);
950
951
/* Perform basic checks for each entry */
952
ptent = tent;
953
kidx = tent->idx;
954
for (i = 0; i < ctlv->count; i++, ptent++) {
955
if (ptent->head.length != sizeof(*ptent))
956
return (EINVAL);
957
if (ptent->idx != kidx)
958
return (ENOTSUP);
959
}
960
961
/* Convert data into kernel request objects */
962
objheader_to_ti(oh, &ti);
963
ti.type = oh->ntlv.type;
964
ti.uidx = kidx;
965
966
/* Use on-stack buffer for single add/del */
967
if (ctlv->count == 1) {
968
memset(&tei, 0, sizeof(tei));
969
tei_buf = &tei;
970
} else
971
tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP,
972
M_WAITOK | M_ZERO);
973
974
ptei = tei_buf;
975
ptent = tent;
976
for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
977
ptei->paddr = &ptent->k;
978
ptei->subtype = ptent->subtype;
979
ptei->masklen = ptent->masklen;
980
if (ptent->head.flags & IPFW_TF_UPDATE)
981
ptei->flags |= TEI_FLAGS_UPDATE;
982
983
ipfw_import_table_value_v1(&ptent->v.value);
984
ptei->pvalue = (struct table_value *)&ptent->v.value;
985
}
986
987
error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ?
988
add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) :
989
del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count);
990
991
/* Translate result back to userland */
992
ptei = tei_buf;
993
ptent = tent;
994
for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
995
if (ptei->flags & TEI_FLAGS_ADDED)
996
ptent->result = IPFW_TR_ADDED;
997
else if (ptei->flags & TEI_FLAGS_DELETED)
998
ptent->result = IPFW_TR_DELETED;
999
else if (ptei->flags & TEI_FLAGS_UPDATED)
1000
ptent->result = IPFW_TR_UPDATED;
1001
else if (ptei->flags & TEI_FLAGS_LIMIT)
1002
ptent->result = IPFW_TR_LIMIT;
1003
else if (ptei->flags & TEI_FLAGS_ERROR)
1004
ptent->result = IPFW_TR_ERROR;
1005
else if (ptei->flags & TEI_FLAGS_NOTFOUND)
1006
ptent->result = IPFW_TR_NOTFOUND;
1007
else if (ptei->flags & TEI_FLAGS_EXISTS)
1008
ptent->result = IPFW_TR_EXISTS;
1009
ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value);
1010
}
1011
1012
if (tei_buf != &tei)
1013
free(tei_buf, M_TEMP);
1014
1015
return (error);
1016
}
1017
1018
/*
1019
* Looks up an entry in given table.
1020
* Data layout (v0)(current):
1021
* Request: [ ipfw_obj_header ipfw_obj_tentry ]
1022
* Reply: [ ipfw_obj_header ipfw_obj_tentry ]
1023
*
1024
* Returns 0 on success
1025
*/
1026
static int
1027
find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1028
struct sockopt_data *sd)
1029
{
1030
ipfw_obj_tentry *tent;
1031
ipfw_obj_header *oh;
1032
struct tid_info ti;
1033
struct table_config *tc;
1034
struct table_algo *ta;
1035
struct table_info *kti;
1036
struct table_value *pval;
1037
struct namedobj_instance *ni;
1038
int error;
1039
size_t sz;
1040
1041
/* Check minimum header size */
1042
sz = sizeof(*oh) + sizeof(*tent);
1043
if (sd->valsize != sz)
1044
return (EINVAL);
1045
1046
oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
1047
tent = (ipfw_obj_tentry *)(oh + 1);
1048
1049
/* Basic length checks for TLVs */
1050
if (oh->ntlv.head.length != sizeof(oh->ntlv))
1051
return (EINVAL);
1052
1053
objheader_to_ti(oh, &ti);
1054
ti.type = oh->ntlv.type;
1055
ti.uidx = tent->idx;
1056
1057
IPFW_UH_RLOCK(ch);
1058
ni = CHAIN_TO_NI(ch);
1059
1060
/*
1061
* Find existing table and check its type .
1062
*/
1063
ta = NULL;
1064
if ((tc = find_table(ni, &ti)) == NULL) {
1065
IPFW_UH_RUNLOCK(ch);
1066
return (ESRCH);
1067
}
1068
1069
/* check table type */
1070
if (tc->no.subtype != ti.type) {
1071
IPFW_UH_RUNLOCK(ch);
1072
return (EINVAL);
1073
}
1074
1075
kti = KIDX_TO_TI(ch, tc->no.kidx);
1076
ta = tc->ta;
1077
1078
if (ta->find_tentry == NULL)
1079
return (ENOTSUP);
1080
1081
error = ta->find_tentry(tc->astate, kti, tent);
1082
if (error == 0) {
1083
pval = get_table_value(ch, tc, tent->v.kidx);
1084
ipfw_export_table_value_v1(pval, &tent->v.value);
1085
}
1086
IPFW_UH_RUNLOCK(ch);
1087
1088
return (error);
1089
}
1090
1091
/*
1092
* Flushes all entries or destroys given table.
1093
* Data layout (v0)(current):
1094
* Request: [ ipfw_obj_header ]
1095
*
1096
* Returns 0 on success
1097
*/
1098
static int
1099
flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1100
struct sockopt_data *sd)
1101
{
1102
int error;
1103
struct _ipfw_obj_header *oh;
1104
struct tid_info ti;
1105
1106
if (sd->valsize != sizeof(*oh))
1107
return (EINVAL);
1108
1109
oh = (struct _ipfw_obj_header *)op3;
1110
objheader_to_ti(oh, &ti);
1111
1112
if (op3->opcode == IP_FW_TABLE_XDESTROY)
1113
error = destroy_table(ch, &ti);
1114
else if (op3->opcode == IP_FW_TABLE_XFLUSH)
1115
error = flush_table(ch, &ti);
1116
else
1117
return (ENOTSUP);
1118
1119
return (error);
1120
}
1121
1122
static void
1123
restart_flush(void *object, struct op_state *_state)
1124
{
1125
struct tableop_state *ts;
1126
1127
ts = (struct tableop_state *)_state;
1128
1129
if (ts->tc != object)
1130
return;
1131
1132
/* Indicate we've called */
1133
ts->modified = 1;
1134
}
1135
1136
/*
1137
* Flushes given table.
1138
*
1139
* Function create new table instance with the same
1140
* parameters, swaps it with old one and
1141
* flushes state without holding runtime WLOCK.
1142
*
1143
* Returns 0 on success.
1144
*/
1145
int
1146
flush_table(struct ip_fw_chain *ch, struct tid_info *ti)
1147
{
1148
struct namedobj_instance *ni;
1149
struct table_config *tc;
1150
struct table_algo *ta;
1151
struct table_info ti_old, ti_new, *tablestate;
1152
void *astate_old, *astate_new;
1153
char algostate[64], *pstate;
1154
struct tableop_state ts;
1155
int error, need_gc;
1156
uint32_t kidx;
1157
uint8_t tflags;
1158
1159
/*
1160
* Stage 1: save table algorithm.
1161
* Reference found table to ensure it won't disappear.
1162
*/
1163
IPFW_UH_WLOCK(ch);
1164
ni = CHAIN_TO_NI(ch);
1165
if ((tc = find_table(ni, ti)) == NULL) {
1166
IPFW_UH_WUNLOCK(ch);
1167
return (ESRCH);
1168
}
1169
need_gc = 0;
1170
astate_new = NULL;
1171
memset(&ti_new, 0, sizeof(ti_new));
1172
restart:
1173
/* Set up swap handler */
1174
memset(&ts, 0, sizeof(ts));
1175
ts.opstate.func = restart_flush;
1176
ts.tc = tc;
1177
1178
ta = tc->ta;
1179
/* Do not flush readonly tables */
1180
if ((ta->flags & TA_FLAG_READONLY) != 0) {
1181
IPFW_UH_WUNLOCK(ch);
1182
return (EACCES);
1183
}
1184
/* Save startup algo parameters */
1185
if (ta->print_config != NULL) {
1186
ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx),
1187
algostate, sizeof(algostate));
1188
pstate = algostate;
1189
} else
1190
pstate = NULL;
1191
tflags = tc->tflags;
1192
tc->no.refcnt++;
1193
add_toperation_state(ch, &ts);
1194
IPFW_UH_WUNLOCK(ch);
1195
1196
/*
1197
* Stage 1.5: if this is not the first attempt, destroy previous state
1198
*/
1199
if (need_gc != 0) {
1200
ta->destroy(astate_new, &ti_new);
1201
need_gc = 0;
1202
}
1203
1204
/*
1205
* Stage 2: allocate new table instance using same algo.
1206
*/
1207
memset(&ti_new, 0, sizeof(struct table_info));
1208
error = ta->init(ch, &astate_new, &ti_new, pstate, tflags);
1209
1210
/*
1211
* Stage 3: swap old state pointers with newly-allocated ones.
1212
* Decrease refcount.
1213
*/
1214
IPFW_UH_WLOCK(ch);
1215
tc->no.refcnt--;
1216
del_toperation_state(ch, &ts);
1217
1218
if (error != 0) {
1219
IPFW_UH_WUNLOCK(ch);
1220
return (error);
1221
}
1222
1223
/*
1224
* Restart operation if table swap has happened:
1225
* even if algo may be the same, algo init parameters
1226
* may change. Restart operation instead of doing
1227
* complex checks.
1228
*/
1229
if (ts.modified != 0) {
1230
/* Delay destroying data since we're holding UH lock */
1231
need_gc = 1;
1232
goto restart;
1233
}
1234
1235
ni = CHAIN_TO_NI(ch);
1236
kidx = tc->no.kidx;
1237
tablestate = (struct table_info *)ch->tablestate;
1238
1239
IPFW_WLOCK(ch);
1240
ti_old = tablestate[kidx];
1241
tablestate[kidx] = ti_new;
1242
IPFW_WUNLOCK(ch);
1243
1244
astate_old = tc->astate;
1245
tc->astate = astate_new;
1246
tc->ti_copy = ti_new;
1247
tc->count = 0;
1248
1249
/* Notify algo on real @ti address */
1250
if (ta->change_ti != NULL)
1251
ta->change_ti(tc->astate, &tablestate[kidx]);
1252
1253
/*
1254
* Stage 4: unref values.
1255
*/
1256
ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old);
1257
IPFW_UH_WUNLOCK(ch);
1258
1259
/*
1260
* Stage 5: perform real flush/destroy.
1261
*/
1262
ta->destroy(astate_old, &ti_old);
1263
1264
return (0);
1265
}
1266
1267
/*
1268
* Swaps two tables.
1269
* Data layout (v0)(current):
1270
* Request: [ ipfw_obj_header ipfw_obj_ntlv ]
1271
*
1272
* Returns 0 on success
1273
*/
1274
static int
1275
swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1276
struct sockopt_data *sd)
1277
{
1278
int error;
1279
struct _ipfw_obj_header *oh;
1280
struct tid_info ti_a, ti_b;
1281
1282
if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv))
1283
return (EINVAL);
1284
1285
oh = (struct _ipfw_obj_header *)op3;
1286
ntlv_to_ti(&oh->ntlv, &ti_a);
1287
ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b);
1288
1289
error = swap_tables(ch, &ti_a, &ti_b);
1290
1291
return (error);
1292
}
1293
1294
/*
1295
* Swaps two tables of the same type/valtype.
1296
*
1297
* Checks if tables are compatible and limits
1298
* permits swap, than actually perform swap.
1299
*
1300
* Each table consists of 2 different parts:
1301
* config:
1302
* @tc (with name, set, kidx) and rule bindings, which is "stable".
1303
* number of items
1304
* table algo
1305
* runtime:
1306
* runtime data @ti (ch->tablestate)
1307
* runtime cache in @tc
1308
* algo-specific data (@tc->astate)
1309
*
1310
* So we switch:
1311
* all runtime data
1312
* number of items
1313
* table algo
1314
*
1315
* After that we call @ti change handler for each table.
1316
*
1317
* Note that referencing @tc won't protect tc->ta from change.
1318
* XXX: Do we need to restrict swap between locked tables?
1319
* XXX: Do we need to exchange ftype?
1320
*
1321
* Returns 0 on success.
1322
*/
1323
static int
1324
swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
1325
struct tid_info *b)
1326
{
1327
struct namedobj_instance *ni;
1328
struct table_config *tc_a, *tc_b;
1329
struct table_algo *ta;
1330
struct table_info ti, *tablestate;
1331
void *astate;
1332
uint32_t count;
1333
1334
/*
1335
* Stage 1: find both tables and ensure they are of
1336
* the same type.
1337
*/
1338
IPFW_UH_WLOCK(ch);
1339
ni = CHAIN_TO_NI(ch);
1340
if ((tc_a = find_table(ni, a)) == NULL) {
1341
IPFW_UH_WUNLOCK(ch);
1342
return (ESRCH);
1343
}
1344
if ((tc_b = find_table(ni, b)) == NULL) {
1345
IPFW_UH_WUNLOCK(ch);
1346
return (ESRCH);
1347
}
1348
1349
/* It is very easy to swap between the same table */
1350
if (tc_a == tc_b) {
1351
IPFW_UH_WUNLOCK(ch);
1352
return (0);
1353
}
1354
1355
/* Check type and value are the same */
1356
if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) {
1357
IPFW_UH_WUNLOCK(ch);
1358
return (EINVAL);
1359
}
1360
1361
/* Check limits before swap */
1362
if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) ||
1363
(tc_b->limit != 0 && tc_a->count > tc_b->limit)) {
1364
IPFW_UH_WUNLOCK(ch);
1365
return (EFBIG);
1366
}
1367
1368
/* Check if one of the tables is readonly */
1369
if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) {
1370
IPFW_UH_WUNLOCK(ch);
1371
return (EACCES);
1372
}
1373
1374
/* Notify we're going to swap */
1375
rollback_toperation_state(ch, tc_a);
1376
rollback_toperation_state(ch, tc_b);
1377
1378
/* Everything is fine, prepare to swap */
1379
tablestate = (struct table_info *)ch->tablestate;
1380
ti = tablestate[tc_a->no.kidx];
1381
ta = tc_a->ta;
1382
astate = tc_a->astate;
1383
count = tc_a->count;
1384
1385
IPFW_WLOCK(ch);
1386
/* a <- b */
1387
tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx];
1388
tc_a->ta = tc_b->ta;
1389
tc_a->astate = tc_b->astate;
1390
tc_a->count = tc_b->count;
1391
/* b <- a */
1392
tablestate[tc_b->no.kidx] = ti;
1393
tc_b->ta = ta;
1394
tc_b->astate = astate;
1395
tc_b->count = count;
1396
IPFW_WUNLOCK(ch);
1397
1398
/* Ensure tc.ti copies are in sync */
1399
tc_a->ti_copy = tablestate[tc_a->no.kidx];
1400
tc_b->ti_copy = tablestate[tc_b->no.kidx];
1401
1402
/* Notify both tables on @ti change */
1403
if (tc_a->ta->change_ti != NULL)
1404
tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]);
1405
if (tc_b->ta->change_ti != NULL)
1406
tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]);
1407
1408
IPFW_UH_WUNLOCK(ch);
1409
1410
return (0);
1411
}
1412
1413
/*
1414
* Destroys table specified by @ti.
1415
* Data layout (v0)(current):
1416
* Request: [ ip_fw3_opheader ]
1417
*
1418
* Returns 0 on success
1419
*/
1420
static int
1421
destroy_table(struct ip_fw_chain *ch, struct tid_info *ti)
1422
{
1423
struct namedobj_instance *ni;
1424
struct table_config *tc;
1425
1426
IPFW_UH_WLOCK(ch);
1427
1428
ni = CHAIN_TO_NI(ch);
1429
if ((tc = find_table(ni, ti)) == NULL) {
1430
IPFW_UH_WUNLOCK(ch);
1431
return (ESRCH);
1432
}
1433
1434
/* Do not permit destroying referenced tables */
1435
if (tc->no.refcnt > 0) {
1436
IPFW_UH_WUNLOCK(ch);
1437
return (EBUSY);
1438
}
1439
1440
IPFW_WLOCK(ch);
1441
unlink_table(ch, tc);
1442
IPFW_WUNLOCK(ch);
1443
1444
/* Free obj index */
1445
if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0)
1446
printf("Error unlinking kidx %u from table %s\n",
1447
tc->no.kidx, tc->tablename);
1448
1449
/* Unref values used in tables while holding UH lock */
1450
ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy);
1451
IPFW_UH_WUNLOCK(ch);
1452
1453
free_table_config(ni, tc);
1454
1455
return (0);
1456
}
1457
1458
/*
1459
* Grow tables index.
1460
*
1461
* Returns 0 on success.
1462
*/
1463
int
1464
ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
1465
{
1466
unsigned int tbl;
1467
struct namedobj_instance *ni;
1468
void *new_idx, *old_tablestate, *tablestate;
1469
struct table_info *ti;
1470
struct table_config *tc;
1471
int i, new_blocks;
1472
1473
/* Check new value for validity */
1474
if (ntables == 0)
1475
return (EINVAL);
1476
if (ntables > IPFW_TABLES_MAX)
1477
ntables = IPFW_TABLES_MAX;
1478
/* Alight to nearest power of 2 */
1479
ntables = roundup_pow_of_two(ntables);
1480
1481
/* Allocate new pointers */
1482
tablestate = malloc(ntables * sizeof(struct table_info),
1483
M_IPFW, M_WAITOK | M_ZERO);
1484
1485
ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks);
1486
1487
IPFW_UH_WLOCK(ch);
1488
1489
tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
1490
ni = CHAIN_TO_NI(ch);
1491
1492
/* Temporary restrict decreasing max_tables */
1493
if (ntables < V_fw_tables_max) {
1494
/*
1495
* FIXME: Check if we really can shrink
1496
*/
1497
IPFW_UH_WUNLOCK(ch);
1498
return (EINVAL);
1499
}
1500
1501
/* Copy table info/indices */
1502
memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl);
1503
ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks);
1504
1505
IPFW_WLOCK(ch);
1506
1507
/* Change pointers */
1508
old_tablestate = ch->tablestate;
1509
ch->tablestate = tablestate;
1510
ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks);
1511
1512
V_fw_tables_max = ntables;
1513
1514
IPFW_WUNLOCK(ch);
1515
1516
/* Notify all consumers that their @ti pointer has changed */
1517
ti = (struct table_info *)ch->tablestate;
1518
for (i = 0; i < tbl; i++, ti++) {
1519
if (ti->lookup == NULL)
1520
continue;
1521
tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i);
1522
if (tc == NULL || tc->ta->change_ti == NULL)
1523
continue;
1524
1525
tc->ta->change_ti(tc->astate, ti);
1526
}
1527
1528
IPFW_UH_WUNLOCK(ch);
1529
1530
/* Free old pointers */
1531
free(old_tablestate, M_IPFW);
1532
ipfw_objhash_bitmap_free(new_idx, new_blocks);
1533
1534
return (0);
1535
}
1536
1537
/*
1538
* Lookup table's named object by its @kidx.
1539
*/
1540
struct named_object *
1541
ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint32_t kidx)
1542
{
1543
1544
return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx));
1545
}
1546
1547
/*
1548
* Take reference to table specified in @ntlv.
1549
* On success return its @kidx.
1550
*/
1551
int
1552
ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint32_t *kidx)
1553
{
1554
struct tid_info ti;
1555
struct table_config *tc;
1556
int error;
1557
1558
IPFW_UH_WLOCK_ASSERT(ch);
1559
1560
ntlv_to_ti(ntlv, &ti);
1561
error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc);
1562
if (error != 0)
1563
return (error);
1564
1565
if (tc == NULL)
1566
return (ESRCH);
1567
1568
tc_ref(tc);
1569
*kidx = tc->no.kidx;
1570
1571
return (0);
1572
}
1573
1574
void
1575
ipfw_unref_table(struct ip_fw_chain *ch, uint32_t kidx)
1576
{
1577
1578
struct namedobj_instance *ni;
1579
struct named_object *no;
1580
1581
IPFW_UH_WLOCK_ASSERT(ch);
1582
ni = CHAIN_TO_NI(ch);
1583
no = ipfw_objhash_lookup_kidx(ni, kidx);
1584
KASSERT(no != NULL, ("Table with index %u not found", kidx));
1585
no->refcnt--;
1586
}
1587
1588
/*
1589
* Lookup an arbitrary key @paddr of length @plen in table @tbl.
1590
* Stores found value in @val.
1591
*
1592
* Returns 1 if key was found.
1593
*/
1594
int
1595
ipfw_lookup_table(struct ip_fw_chain *ch, uint32_t tbl, uint16_t plen,
1596
void *paddr, uint32_t *val)
1597
{
1598
struct table_info *ti;
1599
1600
ti = KIDX_TO_TI(ch, tbl);
1601
1602
return (ti->lookup(ti, paddr, plen, val));
1603
}
1604
1605
/*
1606
* Info/List/dump support for tables.
1607
*
1608
*/
1609
1610
/*
1611
* High-level 'get' cmds sysctl handlers
1612
*/
1613
1614
/*
1615
* Lists all tables currently available in kernel.
1616
* Data layout (v0)(current):
1617
* Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
1618
* Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ]
1619
*
1620
* Returns 0 on success
1621
*/
1622
static int
1623
list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1624
struct sockopt_data *sd)
1625
{
1626
struct _ipfw_obj_lheader *olh;
1627
int error;
1628
1629
olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
1630
if (olh == NULL)
1631
return (EINVAL);
1632
if (sd->valsize < olh->size)
1633
return (EINVAL);
1634
1635
IPFW_UH_RLOCK(ch);
1636
error = export_tables(ch, olh, sd);
1637
IPFW_UH_RUNLOCK(ch);
1638
1639
return (error);
1640
}
1641
1642
/*
1643
* Store table info to buffer provided by @sd.
1644
* Data layout (v0)(current):
1645
* Request: [ ipfw_obj_header ipfw_xtable_info(empty)]
1646
* Reply: [ ipfw_obj_header ipfw_xtable_info ]
1647
*
1648
* Returns 0 on success.
1649
*/
1650
static int
1651
describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1652
struct sockopt_data *sd)
1653
{
1654
struct _ipfw_obj_header *oh;
1655
struct table_config *tc;
1656
struct tid_info ti;
1657
size_t sz;
1658
1659
sz = sizeof(*oh) + sizeof(ipfw_xtable_info);
1660
oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
1661
if (oh == NULL)
1662
return (EINVAL);
1663
1664
objheader_to_ti(oh, &ti);
1665
1666
IPFW_UH_RLOCK(ch);
1667
if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
1668
IPFW_UH_RUNLOCK(ch);
1669
return (ESRCH);
1670
}
1671
1672
export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1));
1673
IPFW_UH_RUNLOCK(ch);
1674
1675
return (0);
1676
}
1677
1678
/*
1679
* Modifies existing table.
1680
* Data layout (v0)(current):
1681
* Request: [ ipfw_obj_header ipfw_xtable_info ]
1682
*
1683
* Returns 0 on success
1684
*/
1685
static int
1686
modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1687
struct sockopt_data *sd)
1688
{
1689
struct _ipfw_obj_header *oh;
1690
ipfw_xtable_info *i;
1691
char *tname;
1692
struct tid_info ti;
1693
struct namedobj_instance *ni;
1694
struct table_config *tc;
1695
1696
if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
1697
return (EINVAL);
1698
1699
oh = (struct _ipfw_obj_header *)sd->kbuf;
1700
i = (ipfw_xtable_info *)(oh + 1);
1701
1702
/*
1703
* Verify user-supplied strings.
1704
* Check for null-terminated/zero-length strings/
1705
*/
1706
tname = oh->ntlv.name;
1707
if (check_table_name(tname) != 0)
1708
return (EINVAL);
1709
1710
objheader_to_ti(oh, &ti);
1711
ti.type = i->type;
1712
1713
IPFW_UH_WLOCK(ch);
1714
ni = CHAIN_TO_NI(ch);
1715
if ((tc = find_table(ni, &ti)) == NULL) {
1716
IPFW_UH_WUNLOCK(ch);
1717
return (ESRCH);
1718
}
1719
1720
/* Do not support any modifications for readonly tables */
1721
if ((tc->ta->flags & TA_FLAG_READONLY) != 0) {
1722
IPFW_UH_WUNLOCK(ch);
1723
return (EACCES);
1724
}
1725
1726
if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0)
1727
tc->limit = i->limit;
1728
if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0)
1729
tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0);
1730
IPFW_UH_WUNLOCK(ch);
1731
1732
return (0);
1733
}
1734
1735
/*
1736
* Creates new table.
1737
* Data layout (v0)(current):
1738
* Request: [ ipfw_obj_header ipfw_xtable_info ]
1739
*
1740
* Returns 0 on success
1741
*/
1742
static int
1743
create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1744
struct sockopt_data *sd)
1745
{
1746
struct _ipfw_obj_header *oh;
1747
ipfw_xtable_info *i;
1748
char *tname, *aname;
1749
struct tid_info ti;
1750
struct namedobj_instance *ni;
1751
1752
if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
1753
return (EINVAL);
1754
1755
oh = (struct _ipfw_obj_header *)sd->kbuf;
1756
i = (ipfw_xtable_info *)(oh + 1);
1757
1758
/*
1759
* Verify user-supplied strings.
1760
* Check for null-terminated/zero-length strings/
1761
*/
1762
tname = oh->ntlv.name;
1763
aname = i->algoname;
1764
if (check_table_name(tname) != 0 ||
1765
strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname))
1766
return (EINVAL);
1767
1768
if (aname[0] == '\0') {
1769
/* Use default algorithm */
1770
aname = NULL;
1771
}
1772
1773
objheader_to_ti(oh, &ti);
1774
ti.type = i->type;
1775
1776
ni = CHAIN_TO_NI(ch);
1777
1778
IPFW_UH_RLOCK(ch);
1779
if (find_table(ni, &ti) != NULL) {
1780
IPFW_UH_RUNLOCK(ch);
1781
return (EEXIST);
1782
}
1783
IPFW_UH_RUNLOCK(ch);
1784
1785
return (create_table_internal(ch, &ti, aname, i, NULL, 0));
1786
}
1787
1788
/*
1789
* Creates new table based on @ti and @aname.
1790
*
1791
* Assume @aname to be checked and valid.
1792
* Stores allocated table kidx inside @pkidx (if non-NULL).
1793
* Reference created table if @compat is non-zero.
1794
*
1795
* Returns 0 on success.
1796
*/
1797
static int
1798
create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
1799
char *aname, ipfw_xtable_info *i, uint32_t *pkidx, int compat)
1800
{
1801
struct namedobj_instance *ni;
1802
struct table_config *tc, *tc_new, *tmp;
1803
struct table_algo *ta;
1804
uint32_t kidx;
1805
1806
ni = CHAIN_TO_NI(ch);
1807
1808
ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname);
1809
if (ta == NULL)
1810
return (ENOTSUP);
1811
1812
tc = alloc_table_config(ch, ti, ta, aname, i->tflags);
1813
if (tc == NULL)
1814
return (ENOMEM);
1815
1816
tc->vmask = i->vmask;
1817
tc->limit = i->limit;
1818
if (ta->flags & TA_FLAG_READONLY)
1819
tc->locked = 1;
1820
else
1821
tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0;
1822
1823
IPFW_UH_WLOCK(ch);
1824
1825
/* Check if table has been already created */
1826
tc_new = find_table(ni, ti);
1827
if (tc_new != NULL) {
1828
/*
1829
* Compat: do not fail if we're
1830
* requesting to create existing table
1831
* which has the same type
1832
*/
1833
if (compat == 0 || tc_new->no.subtype != tc->no.subtype) {
1834
IPFW_UH_WUNLOCK(ch);
1835
free_table_config(ni, tc);
1836
return (EEXIST);
1837
}
1838
1839
/* Exchange tc and tc_new for proper refcounting & freeing */
1840
tmp = tc;
1841
tc = tc_new;
1842
tc_new = tmp;
1843
} else {
1844
/* New table */
1845
if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) {
1846
IPFW_UH_WUNLOCK(ch);
1847
printf("Unable to allocate table index."
1848
" Consider increasing net.inet.ip.fw.tables_max");
1849
free_table_config(ni, tc);
1850
return (EBUSY);
1851
}
1852
tc->no.kidx = kidx;
1853
tc->no.etlv = IPFW_TLV_TBL_NAME;
1854
1855
link_table(ch, tc);
1856
}
1857
1858
if (compat != 0)
1859
tc->no.refcnt++;
1860
if (pkidx != NULL)
1861
*pkidx = tc->no.kidx;
1862
1863
IPFW_UH_WUNLOCK(ch);
1864
1865
if (tc_new != NULL)
1866
free_table_config(ni, tc_new);
1867
1868
return (0);
1869
}
1870
1871
static void
1872
ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti)
1873
{
1874
1875
memset(ti, 0, sizeof(struct tid_info));
1876
ti->set = ntlv->set;
1877
ti->uidx = ntlv->idx;
1878
ti->tlvs = ntlv;
1879
ti->tlen = ntlv->head.length;
1880
}
1881
1882
static void
1883
objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti)
1884
{
1885
1886
ntlv_to_ti(&oh->ntlv, ti);
1887
}
1888
1889
struct namedobj_instance *
1890
ipfw_get_table_objhash(struct ip_fw_chain *ch)
1891
{
1892
1893
return (CHAIN_TO_NI(ch));
1894
}
1895
1896
/*
1897
* Exports basic table info as name TLV.
1898
* Used inside dump_static_rules() to provide info
1899
* about all tables referenced by current ruleset.
1900
*
1901
* Returns 0 on success.
1902
*/
1903
int
1904
ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint32_t kidx,
1905
struct sockopt_data *sd)
1906
{
1907
struct namedobj_instance *ni;
1908
struct named_object *no;
1909
ipfw_obj_ntlv *ntlv;
1910
1911
ni = CHAIN_TO_NI(ch);
1912
1913
no = ipfw_objhash_lookup_kidx(ni, kidx);
1914
KASSERT(no != NULL, ("invalid table kidx passed"));
1915
1916
ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
1917
if (ntlv == NULL)
1918
return (ENOMEM);
1919
1920
ntlv->head.type = IPFW_TLV_TBL_NAME;
1921
ntlv->head.length = sizeof(*ntlv);
1922
ntlv->idx = no->kidx;
1923
strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
1924
1925
return (0);
1926
}
1927
1928
struct dump_args {
1929
struct ip_fw_chain *ch;
1930
struct table_info *ti;
1931
struct table_config *tc;
1932
struct sockopt_data *sd;
1933
uint32_t cnt;
1934
uint16_t uidx;
1935
int error;
1936
uint32_t size;
1937
ta_foreach_f *f;
1938
void *farg;
1939
ipfw_obj_tentry tent;
1940
};
1941
1942
static int
1943
count_ext_entries(void *e, void *arg)
1944
{
1945
struct dump_args *da;
1946
1947
da = (struct dump_args *)arg;
1948
da->cnt++;
1949
1950
return (0);
1951
}
1952
1953
/*
1954
* Gets number of items from table either using
1955
* internal counter or calling algo callback for
1956
* externally-managed tables.
1957
*
1958
* Returns number of records.
1959
*/
1960
static uint32_t
1961
table_get_count(struct ip_fw_chain *ch, struct table_config *tc)
1962
{
1963
struct table_info *ti;
1964
struct table_algo *ta;
1965
struct dump_args da;
1966
1967
ti = KIDX_TO_TI(ch, tc->no.kidx);
1968
ta = tc->ta;
1969
1970
/* Use internal counter for self-managed tables */
1971
if ((ta->flags & TA_FLAG_READONLY) == 0)
1972
return (tc->count);
1973
1974
/* Use callback to quickly get number of items */
1975
if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0)
1976
return (ta->get_count(tc->astate, ti));
1977
1978
/* Count number of iterms ourselves */
1979
memset(&da, 0, sizeof(da));
1980
ta->foreach(tc->astate, ti, count_ext_entries, &da);
1981
1982
return (da.cnt);
1983
}
1984
1985
/*
1986
* Exports table @tc info into standard ipfw_xtable_info format.
1987
*/
1988
static void
1989
export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
1990
ipfw_xtable_info *i)
1991
{
1992
struct table_info *ti;
1993
struct table_algo *ta;
1994
1995
i->type = tc->no.subtype;
1996
i->tflags = tc->tflags;
1997
i->vmask = tc->vmask;
1998
i->set = tc->no.set;
1999
i->kidx = tc->no.kidx;
2000
i->refcnt = tc->no.refcnt;
2001
i->count = table_get_count(ch, tc);
2002
i->limit = tc->limit;
2003
i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0;
2004
i->size = i->count * sizeof(ipfw_obj_tentry);
2005
i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
2006
strlcpy(i->tablename, tc->tablename, sizeof(i->tablename));
2007
ti = KIDX_TO_TI(ch, tc->no.kidx);
2008
ta = tc->ta;
2009
if (ta->print_config != NULL) {
2010
/* Use algo function to print table config to string */
2011
ta->print_config(tc->astate, ti, i->algoname,
2012
sizeof(i->algoname));
2013
} else
2014
strlcpy(i->algoname, ta->name, sizeof(i->algoname));
2015
/* Dump algo-specific data, if possible */
2016
if (ta->dump_tinfo != NULL) {
2017
ta->dump_tinfo(tc->astate, ti, &i->ta_info);
2018
i->ta_info.flags |= IPFW_TATFLAGS_DATA;
2019
}
2020
}
2021
2022
struct dump_table_args {
2023
struct ip_fw_chain *ch;
2024
struct sockopt_data *sd;
2025
};
2026
2027
static int
2028
export_table_internal(struct namedobj_instance *ni, struct named_object *no,
2029
void *arg)
2030
{
2031
ipfw_xtable_info *i;
2032
struct dump_table_args *dta;
2033
2034
dta = (struct dump_table_args *)arg;
2035
2036
i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i));
2037
KASSERT(i != NULL, ("previously checked buffer is not enough"));
2038
2039
export_table_info(dta->ch, (struct table_config *)no, i);
2040
return (0);
2041
}
2042
2043
/*
2044
* Export all tables as ipfw_xtable_info structures to
2045
* storage provided by @sd.
2046
*
2047
* If supplied buffer is too small, fills in required size
2048
* and returns ENOMEM.
2049
* Returns 0 on success.
2050
*/
2051
static int
2052
export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
2053
struct sockopt_data *sd)
2054
{
2055
uint32_t size;
2056
uint32_t count;
2057
struct dump_table_args dta;
2058
2059
count = ipfw_objhash_count(CHAIN_TO_NI(ch));
2060
size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader);
2061
2062
/* Fill in header regadless of buffer size */
2063
olh->count = count;
2064
olh->objsize = sizeof(ipfw_xtable_info);
2065
2066
if (size > olh->size) {
2067
olh->size = size;
2068
return (ENOMEM);
2069
}
2070
2071
olh->size = size;
2072
2073
dta.ch = ch;
2074
dta.sd = sd;
2075
2076
ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta);
2077
2078
return (0);
2079
}
2080
2081
/*
2082
* Dumps all table data
2083
* Data layout (v1)(current):
2084
* Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size
2085
* Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ]
2086
*
2087
* Returns 0 on success
2088
*/
2089
static int
2090
dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2091
struct sockopt_data *sd)
2092
{
2093
struct _ipfw_obj_header *oh;
2094
ipfw_xtable_info *i;
2095
struct tid_info ti;
2096
struct table_config *tc;
2097
struct table_algo *ta;
2098
struct dump_args da;
2099
uint32_t sz;
2100
2101
sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
2102
oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
2103
if (oh == NULL)
2104
return (EINVAL);
2105
2106
i = (ipfw_xtable_info *)(oh + 1);
2107
objheader_to_ti(oh, &ti);
2108
2109
IPFW_UH_RLOCK(ch);
2110
if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
2111
IPFW_UH_RUNLOCK(ch);
2112
return (ESRCH);
2113
}
2114
export_table_info(ch, tc, i);
2115
2116
if (sd->valsize < i->size) {
2117
/*
2118
* Submitted buffer size is not enough.
2119
* WE've already filled in @i structure with
2120
* relevant table info including size, so we
2121
* can return. Buffer will be flushed automatically.
2122
*/
2123
IPFW_UH_RUNLOCK(ch);
2124
return (ENOMEM);
2125
}
2126
2127
/*
2128
* Do the actual dump in eXtended format
2129
*/
2130
memset(&da, 0, sizeof(da));
2131
da.ch = ch;
2132
da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2133
da.tc = tc;
2134
da.sd = sd;
2135
2136
ta = tc->ta;
2137
2138
ta->foreach(tc->astate, da.ti, dump_table_tentry, &da);
2139
IPFW_UH_RUNLOCK(ch);
2140
2141
return (da.error);
2142
}
2143
2144
/*
2145
* Dumps table entry in eXtended format (v1)(current).
2146
*/
2147
static int
2148
dump_table_tentry(void *e, void *arg)
2149
{
2150
struct dump_args *da;
2151
struct table_config *tc;
2152
struct table_algo *ta;
2153
struct table_value *pval;
2154
ipfw_obj_tentry *tent;
2155
int error;
2156
2157
da = (struct dump_args *)arg;
2158
2159
tc = da->tc;
2160
ta = tc->ta;
2161
2162
tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent));
2163
/* Out of memory, returning */
2164
if (tent == NULL) {
2165
da->error = ENOMEM;
2166
return (1);
2167
}
2168
tent->head.length = sizeof(ipfw_obj_tentry);
2169
tent->idx = da->uidx;
2170
2171
error = ta->dump_tentry(tc->astate, da->ti, e, tent);
2172
if (error != 0)
2173
return (error);
2174
2175
pval = get_table_value(da->ch, da->tc, tent->v.kidx);
2176
ipfw_export_table_value_v1(pval, &tent->v.value);
2177
2178
return (0);
2179
}
2180
2181
/*
2182
* Helper function to export table algo data
2183
* to tentry format before calling user function.
2184
*
2185
* Returns 0 on success.
2186
*/
2187
static int
2188
prepare_table_tentry(void *e, void *arg)
2189
{
2190
struct dump_args *da;
2191
struct table_config *tc;
2192
struct table_algo *ta;
2193
int error;
2194
2195
da = (struct dump_args *)arg;
2196
2197
tc = da->tc;
2198
ta = tc->ta;
2199
2200
error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
2201
if (error != 0)
2202
return (error);
2203
2204
da->f(&da->tent, da->farg);
2205
2206
return (0);
2207
}
2208
2209
/*
2210
* Allow external consumers to read table entries in standard format.
2211
*/
2212
int
2213
ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint32_t kidx,
2214
ta_foreach_f *f, void *arg)
2215
{
2216
struct namedobj_instance *ni;
2217
struct table_config *tc;
2218
struct table_algo *ta;
2219
struct dump_args da;
2220
2221
ni = CHAIN_TO_NI(ch);
2222
2223
tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
2224
if (tc == NULL)
2225
return (ESRCH);
2226
2227
ta = tc->ta;
2228
2229
memset(&da, 0, sizeof(da));
2230
da.ch = ch;
2231
da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2232
da.tc = tc;
2233
da.f = f;
2234
da.farg = arg;
2235
2236
ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da);
2237
2238
return (0);
2239
}
2240
2241
/*
2242
* Table algorithms
2243
*/
2244
2245
/*
2246
* Finds algorithm by index, table type or supplied name.
2247
*
2248
* Returns pointer to algo or NULL.
2249
*/
2250
static struct table_algo *
2251
find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name)
2252
{
2253
int i, l;
2254
struct table_algo *ta;
2255
2256
if (ti->type > IPFW_TABLE_MAXTYPE)
2257
return (NULL);
2258
2259
/* Search by index */
2260
if (ti->atype != 0) {
2261
if (ti->atype > tcfg->algo_count)
2262
return (NULL);
2263
return (tcfg->algo[ti->atype]);
2264
}
2265
2266
if (name == NULL) {
2267
/* Return default algorithm for given type if set */
2268
return (tcfg->def_algo[ti->type]);
2269
}
2270
2271
/* Search by name */
2272
/* TODO: better search */
2273
for (i = 1; i <= tcfg->algo_count; i++) {
2274
ta = tcfg->algo[i];
2275
2276
/*
2277
* One can supply additional algorithm
2278
* parameters so we compare only the first word
2279
* of supplied name:
2280
* 'addr:chash hsize=32'
2281
* '^^^^^^^^^'
2282
*
2283
*/
2284
l = strlen(ta->name);
2285
if (strncmp(name, ta->name, l) != 0)
2286
continue;
2287
if (name[l] != '\0' && name[l] != ' ')
2288
continue;
2289
/* Check if we're requesting proper table type */
2290
if (ti->type != 0 && ti->type != ta->type)
2291
return (NULL);
2292
return (ta);
2293
}
2294
2295
return (NULL);
2296
}
2297
2298
/*
2299
* Register new table algo @ta.
2300
* Stores algo id inside @idx.
2301
*
2302
* Returns 0 on success.
2303
*/
2304
int
2305
ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size,
2306
int *idx)
2307
{
2308
struct tables_config *tcfg;
2309
struct table_algo *ta_new;
2310
size_t sz;
2311
2312
if (size > sizeof(struct table_algo))
2313
return (EINVAL);
2314
2315
/* Check for the required on-stack size for add/del */
2316
sz = roundup2(ta->ta_buf_size, sizeof(void *));
2317
if (sz > TA_BUF_SZ)
2318
return (EINVAL);
2319
2320
KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE"));
2321
2322
/* Copy algorithm data to stable storage. */
2323
ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO);
2324
memcpy(ta_new, ta, size);
2325
2326
tcfg = CHAIN_TO_TCFG(ch);
2327
2328
KASSERT(tcfg->algo_count < 255, ("Increase algo array size"));
2329
2330
tcfg->algo[++tcfg->algo_count] = ta_new;
2331
ta_new->idx = tcfg->algo_count;
2332
2333
/* Set algorithm as default one for given type */
2334
if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 &&
2335
tcfg->def_algo[ta_new->type] == NULL)
2336
tcfg->def_algo[ta_new->type] = ta_new;
2337
2338
*idx = ta_new->idx;
2339
2340
return (0);
2341
}
2342
2343
/*
2344
* Unregisters table algo using @idx as id.
2345
* XXX: It is NOT safe to call this function in any place
2346
* other than ipfw instance destroy handler.
2347
*/
2348
void
2349
ipfw_del_table_algo(struct ip_fw_chain *ch, int idx)
2350
{
2351
struct tables_config *tcfg;
2352
struct table_algo *ta;
2353
2354
tcfg = CHAIN_TO_TCFG(ch);
2355
2356
KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d",
2357
idx, tcfg->algo_count));
2358
2359
ta = tcfg->algo[idx];
2360
KASSERT(ta != NULL, ("algo idx %d is NULL", idx));
2361
2362
if (tcfg->def_algo[ta->type] == ta)
2363
tcfg->def_algo[ta->type] = NULL;
2364
2365
free(ta, M_IPFW);
2366
}
2367
2368
/*
2369
* Lists all table algorithms currently available.
2370
* Data layout (v0)(current):
2371
* Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
2372
* Reply: [ ipfw_obj_lheader ipfw_ta_info x N ]
2373
*
2374
* Returns 0 on success
2375
*/
2376
static int
2377
list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2378
struct sockopt_data *sd)
2379
{
2380
struct _ipfw_obj_lheader *olh;
2381
struct tables_config *tcfg;
2382
ipfw_ta_info *i;
2383
struct table_algo *ta;
2384
uint32_t count, n, size;
2385
2386
olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
2387
if (olh == NULL)
2388
return (EINVAL);
2389
if (sd->valsize < olh->size)
2390
return (EINVAL);
2391
2392
IPFW_UH_RLOCK(ch);
2393
tcfg = CHAIN_TO_TCFG(ch);
2394
count = tcfg->algo_count;
2395
size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader);
2396
2397
/* Fill in header regadless of buffer size */
2398
olh->count = count;
2399
olh->objsize = sizeof(ipfw_ta_info);
2400
2401
if (size > olh->size) {
2402
olh->size = size;
2403
IPFW_UH_RUNLOCK(ch);
2404
return (ENOMEM);
2405
}
2406
olh->size = size;
2407
2408
for (n = 1; n <= count; n++) {
2409
i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i));
2410
KASSERT(i != NULL, ("previously checked buffer is not enough"));
2411
ta = tcfg->algo[n];
2412
strlcpy(i->algoname, ta->name, sizeof(i->algoname));
2413
i->type = ta->type;
2414
i->refcnt = ta->refcnt;
2415
}
2416
2417
IPFW_UH_RUNLOCK(ch);
2418
2419
return (0);
2420
}
2421
2422
static int
2423
classify_srcdst(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
2424
{
2425
ipfw_insn_table *cmd;
2426
2427
/* Basic IPv4/IPv6 or u32 lookups */
2428
cmd = insntod(cmd0, table);
2429
*puidx = cmd->kidx;
2430
switch(cmd0->arg1) {
2431
case LOOKUP_DST_IP:
2432
case LOOKUP_SRC_IP:
2433
default:
2434
/* IPv4 src/dst */
2435
*ptype = IPFW_TABLE_ADDR;
2436
break;
2437
case LOOKUP_DST_PORT:
2438
case LOOKUP_SRC_PORT:
2439
case LOOKUP_UID:
2440
case LOOKUP_JAIL:
2441
case LOOKUP_DSCP:
2442
case LOOKUP_MARK:
2443
case LOOKUP_RULENUM:
2444
*ptype = IPFW_TABLE_NUMBER;
2445
break;
2446
case LOOKUP_DST_MAC:
2447
case LOOKUP_SRC_MAC:
2448
*ptype = IPFW_TABLE_MAC;
2449
break;
2450
}
2451
return (0);
2452
}
2453
2454
static int
2455
classify_via(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
2456
{
2457
ipfw_insn_if *cmdif;
2458
2459
/* Interface table, possibly */
2460
cmdif = insntod(cmd0, if);
2461
if (cmdif->name[0] != '\1')
2462
return (1);
2463
2464
*ptype = IPFW_TABLE_INTERFACE;
2465
*puidx = cmdif->p.kidx; /* XXXAE */
2466
return (0);
2467
}
2468
2469
static int
2470
classify_flow(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
2471
{
2472
*puidx = insntod(cmd0, table)->kidx;
2473
*ptype = IPFW_TABLE_FLOW;
2474
return (0);
2475
}
2476
2477
static int
2478
classify_mac_lookup(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
2479
{
2480
*puidx = insntod(cmd0, table)->kidx;
2481
*ptype = IPFW_TABLE_MAC;
2482
return (0);
2483
}
2484
2485
static void
2486
update_kidx(ipfw_insn *cmd0, uint32_t idx)
2487
{
2488
insntod(cmd0, table)->kidx = idx;
2489
}
2490
2491
static void
2492
update_via(ipfw_insn *cmd0, uint32_t idx)
2493
{
2494
insntod(cmd0, if)->p.kidx = idx;
2495
}
2496
2497
static int
2498
table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
2499
struct named_object **pno)
2500
{
2501
struct table_config *tc;
2502
int error;
2503
2504
IPFW_UH_WLOCK_ASSERT(ch);
2505
2506
error = find_table_err(CHAIN_TO_NI(ch), ti, &tc);
2507
if (error != 0)
2508
return (error);
2509
2510
*pno = &tc->no;
2511
return (0);
2512
}
2513
2514
/* XXX: sets-sets! */
2515
static struct named_object *
2516
table_findbykidx(struct ip_fw_chain *ch, uint32_t idx)
2517
{
2518
struct namedobj_instance *ni;
2519
struct table_config *tc;
2520
2521
IPFW_UH_WLOCK_ASSERT(ch);
2522
ni = CHAIN_TO_NI(ch);
2523
tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx);
2524
KASSERT(tc != NULL, ("Table with index %u not found", idx));
2525
2526
return (&tc->no);
2527
}
2528
2529
static int
2530
table_manage_sets(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
2531
enum ipfw_sets_cmd cmd)
2532
{
2533
2534
switch (cmd) {
2535
case SWAP_ALL:
2536
case TEST_ALL:
2537
case MOVE_ALL:
2538
/*
2539
* Always return success, the real action and decision
2540
* should make table_manage_sets_all().
2541
*/
2542
return (0);
2543
case TEST_ONE:
2544
case MOVE_ONE:
2545
/*
2546
* NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add
2547
* if set number will be used in hash function. Currently
2548
* we can just use generic handler that replaces set value.
2549
*/
2550
if (V_fw_tables_sets == 0)
2551
return (0);
2552
break;
2553
case COUNT_ONE:
2554
/*
2555
* Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is
2556
* disabled. This allow skip table's opcodes from additional
2557
* checks when specific rules moved to another set.
2558
*/
2559
if (V_fw_tables_sets == 0)
2560
return (EOPNOTSUPP);
2561
}
2562
/* Use generic sets handler when per-set sysctl is enabled. */
2563
return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
2564
set, new_set, cmd));
2565
}
2566
2567
/*
2568
* We register several opcode rewriters for lookup tables.
2569
* All tables opcodes have the same ETLV type, but different subtype.
2570
* To avoid invoking sets handler several times for XXX_ALL commands,
2571
* we use separate manage_sets handler. O_RECV has the lowest value,
2572
* so it should be called first.
2573
*/
2574
static int
2575
table_manage_sets_all(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
2576
enum ipfw_sets_cmd cmd)
2577
{
2578
2579
switch (cmd) {
2580
case SWAP_ALL:
2581
case TEST_ALL:
2582
/*
2583
* Return success for TEST_ALL, since nothing prevents
2584
* move rules from one set to another. All tables are
2585
* accessible from all sets when per-set tables sysctl
2586
* is disabled.
2587
*/
2588
case MOVE_ALL:
2589
if (V_fw_tables_sets == 0)
2590
return (0);
2591
break;
2592
default:
2593
return (table_manage_sets(ch, set, new_set, cmd));
2594
}
2595
/* Use generic sets handler when per-set sysctl is enabled. */
2596
return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
2597
set, new_set, cmd));
2598
}
2599
2600
static struct opcode_obj_rewrite opcodes[] = {
2601
{
2602
.opcode = O_IP_SRC_LOOKUP,
2603
.etlv = IPFW_TLV_TBL_NAME,
2604
.classifier = classify_srcdst,
2605
.update = update_kidx,
2606
.find_byname = table_findbyname,
2607
.find_bykidx = table_findbykidx,
2608
.create_object = create_table_compat,
2609
.manage_sets = table_manage_sets,
2610
},
2611
{
2612
.opcode = O_IP_DST_LOOKUP,
2613
.etlv = IPFW_TLV_TBL_NAME,
2614
.classifier = classify_srcdst,
2615
.update = update_kidx,
2616
.find_byname = table_findbyname,
2617
.find_bykidx = table_findbykidx,
2618
.create_object = create_table_compat,
2619
.manage_sets = table_manage_sets,
2620
},
2621
{
2622
.opcode = O_IP_FLOW_LOOKUP,
2623
.etlv = IPFW_TLV_TBL_NAME,
2624
.classifier = classify_flow,
2625
.update = update_kidx,
2626
.find_byname = table_findbyname,
2627
.find_bykidx = table_findbykidx,
2628
.create_object = create_table_compat,
2629
.manage_sets = table_manage_sets,
2630
},
2631
{
2632
.opcode = O_MAC_SRC_LOOKUP,
2633
.etlv = IPFW_TLV_TBL_NAME,
2634
.classifier = classify_mac_lookup,
2635
.update = update_kidx,
2636
.find_byname = table_findbyname,
2637
.find_bykidx = table_findbykidx,
2638
.create_object = create_table_compat,
2639
.manage_sets = table_manage_sets,
2640
},
2641
{
2642
.opcode = O_MAC_DST_LOOKUP,
2643
.etlv = IPFW_TLV_TBL_NAME,
2644
.classifier = classify_mac_lookup,
2645
.update = update_kidx,
2646
.find_byname = table_findbyname,
2647
.find_bykidx = table_findbykidx,
2648
.create_object = create_table_compat,
2649
.manage_sets = table_manage_sets,
2650
},
2651
{
2652
.opcode = O_XMIT,
2653
.etlv = IPFW_TLV_TBL_NAME,
2654
.classifier = classify_via,
2655
.update = update_via,
2656
.find_byname = table_findbyname,
2657
.find_bykidx = table_findbykidx,
2658
.create_object = create_table_compat,
2659
.manage_sets = table_manage_sets,
2660
},
2661
{
2662
.opcode = O_RECV,
2663
.etlv = IPFW_TLV_TBL_NAME,
2664
.classifier = classify_via,
2665
.update = update_via,
2666
.find_byname = table_findbyname,
2667
.find_bykidx = table_findbykidx,
2668
.create_object = create_table_compat,
2669
.manage_sets = table_manage_sets_all,
2670
},
2671
{
2672
.opcode = O_VIA,
2673
.etlv = IPFW_TLV_TBL_NAME,
2674
.classifier = classify_via,
2675
.update = update_via,
2676
.find_byname = table_findbyname,
2677
.find_bykidx = table_findbykidx,
2678
.create_object = create_table_compat,
2679
.manage_sets = table_manage_sets,
2680
},
2681
};
2682
2683
static int
2684
test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no,
2685
void *arg __unused)
2686
{
2687
2688
/* Check that there aren't any tables in not default set */
2689
if (no->set != 0)
2690
return (EBUSY);
2691
return (0);
2692
}
2693
2694
/*
2695
* Switch between "set 0" and "rule's set" table binding,
2696
* Check all ruleset bindings and permits changing
2697
* IFF each binding has both rule AND table in default set (set 0).
2698
*
2699
* Returns 0 on success.
2700
*/
2701
int
2702
ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
2703
{
2704
struct opcode_obj_rewrite *rw;
2705
struct namedobj_instance *ni;
2706
struct named_object *no;
2707
struct ip_fw *rule;
2708
ipfw_insn *cmd;
2709
int cmdlen, i, l;
2710
uint32_t kidx;
2711
uint8_t subtype;
2712
2713
IPFW_UH_WLOCK(ch);
2714
2715
if (V_fw_tables_sets == sets) {
2716
IPFW_UH_WUNLOCK(ch);
2717
return (0);
2718
}
2719
ni = CHAIN_TO_NI(ch);
2720
if (sets == 0) {
2721
/*
2722
* Prevent disabling sets support if we have some tables
2723
* in not default sets.
2724
*/
2725
if (ipfw_objhash_foreach_type(ni, test_sets_cb,
2726
NULL, IPFW_TLV_TBL_NAME) != 0) {
2727
IPFW_UH_WUNLOCK(ch);
2728
return (EBUSY);
2729
}
2730
}
2731
/*
2732
* Scan all rules and examine tables opcodes.
2733
*/
2734
for (i = 0; i < ch->n_rules; i++) {
2735
rule = ch->map[i];
2736
2737
l = rule->cmd_len;
2738
cmd = rule->cmd;
2739
cmdlen = 0;
2740
for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
2741
cmdlen = F_LEN(cmd);
2742
/* Check only tables opcodes */
2743
for (kidx = 0, rw = opcodes;
2744
rw < opcodes + nitems(opcodes); rw++) {
2745
if (rw->opcode != cmd->opcode)
2746
continue;
2747
if (rw->classifier(cmd, &kidx, &subtype) == 0)
2748
break;
2749
}
2750
if (kidx == 0)
2751
continue;
2752
no = ipfw_objhash_lookup_kidx(ni, kidx);
2753
/* Check if both table object and rule has the set 0 */
2754
if (no->set != 0 || rule->set != 0) {
2755
IPFW_UH_WUNLOCK(ch);
2756
return (EBUSY);
2757
}
2758
}
2759
}
2760
V_fw_tables_sets = sets;
2761
IPFW_UH_WUNLOCK(ch);
2762
return (0);
2763
}
2764
2765
/*
2766
* Checks table name for validity.
2767
* Enforce basic length checks, the rest
2768
* should be done in userland.
2769
*
2770
* Returns 0 if name is considered valid.
2771
*/
2772
static int
2773
check_table_name(const char *name)
2774
{
2775
2776
/*
2777
* TODO: do some more complicated checks
2778
*/
2779
return (ipfw_check_object_name_generic(name));
2780
}
2781
2782
/*
2783
* Finds table config based on either legacy index
2784
* or name in ntlv.
2785
* Note @ti structure contains unchecked data from userland.
2786
*
2787
* Returns 0 in success and fills in @tc with found config
2788
*/
2789
static int
2790
find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
2791
struct table_config **tc)
2792
{
2793
char *name, bname[16];
2794
struct named_object *no;
2795
ipfw_obj_ntlv *ntlv;
2796
uint32_t set;
2797
2798
if (ti->tlvs != NULL) {
2799
ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
2800
IPFW_TLV_TBL_NAME);
2801
if (ntlv == NULL)
2802
return (EINVAL);
2803
name = ntlv->name;
2804
2805
/*
2806
* Use set provided by @ti instead of @ntlv one.
2807
* This is needed due to different sets behavior
2808
* controlled by V_fw_tables_sets.
2809
*/
2810
set = (V_fw_tables_sets != 0) ? ti->set : 0;
2811
} else {
2812
snprintf(bname, sizeof(bname), "%d", ti->uidx);
2813
name = bname;
2814
set = 0;
2815
}
2816
2817
no = ipfw_objhash_lookup_name(ni, set, name);
2818
*tc = (struct table_config *)no;
2819
2820
return (0);
2821
}
2822
2823
/*
2824
* Finds table config based on either legacy index
2825
* or name in ntlv.
2826
* Note @ti structure contains unchecked data from userland.
2827
*
2828
* Returns pointer to table_config or NULL.
2829
*/
2830
static struct table_config *
2831
find_table(struct namedobj_instance *ni, struct tid_info *ti)
2832
{
2833
struct table_config *tc;
2834
2835
if (find_table_err(ni, ti, &tc) != 0)
2836
return (NULL);
2837
2838
return (tc);
2839
}
2840
2841
/*
2842
* Allocate new table config structure using
2843
* specified @algo and @aname.
2844
*
2845
* Returns pointer to config or NULL.
2846
*/
2847
static struct table_config *
2848
alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
2849
struct table_algo *ta, char *aname, uint8_t tflags)
2850
{
2851
char *name, bname[16];
2852
struct table_config *tc;
2853
int error;
2854
ipfw_obj_ntlv *ntlv;
2855
uint32_t set;
2856
2857
if (ti->tlvs != NULL) {
2858
ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
2859
IPFW_TLV_TBL_NAME);
2860
if (ntlv == NULL)
2861
return (NULL);
2862
name = ntlv->name;
2863
set = (V_fw_tables_sets == 0) ? 0 : ntlv->set;
2864
} else {
2865
/* Compat part: convert number to string representation */
2866
snprintf(bname, sizeof(bname), "%d", ti->uidx);
2867
name = bname;
2868
set = 0;
2869
}
2870
2871
tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO);
2872
tc->no.name = tc->tablename;
2873
tc->no.subtype = ta->type;
2874
tc->no.set = set;
2875
tc->tflags = tflags;
2876
tc->ta = ta;
2877
strlcpy(tc->tablename, name, sizeof(tc->tablename));
2878
/* Set "shared" value type by default */
2879
tc->vshared = 1;
2880
2881
/* Preallocate data structures for new tables */
2882
error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags);
2883
if (error != 0) {
2884
free(tc, M_IPFW);
2885
return (NULL);
2886
}
2887
2888
return (tc);
2889
}
2890
2891
/*
2892
* Destroys table state and config.
2893
*/
2894
static void
2895
free_table_config(struct namedobj_instance *ni, struct table_config *tc)
2896
{
2897
2898
KASSERT(tc->linked == 0, ("free() on linked config"));
2899
/* UH lock MUST NOT be held */
2900
2901
/*
2902
* We're using ta without any locking/referencing.
2903
* TODO: fix this if we're going to use unloadable algos.
2904
*/
2905
tc->ta->destroy(tc->astate, &tc->ti_copy);
2906
free(tc, M_IPFW);
2907
}
2908
2909
/*
2910
* Links @tc to @chain table named instance.
2911
* Sets appropriate type/states in @chain table info.
2912
*/
2913
static void
2914
link_table(struct ip_fw_chain *ch, struct table_config *tc)
2915
{
2916
struct namedobj_instance *ni;
2917
struct table_info *ti;
2918
uint16_t kidx;
2919
2920
IPFW_UH_WLOCK_ASSERT(ch);
2921
2922
ni = CHAIN_TO_NI(ch);
2923
kidx = tc->no.kidx;
2924
2925
ipfw_objhash_add(ni, &tc->no);
2926
2927
ti = KIDX_TO_TI(ch, kidx);
2928
*ti = tc->ti_copy;
2929
2930
/* Notify algo on real @ti address */
2931
if (tc->ta->change_ti != NULL)
2932
tc->ta->change_ti(tc->astate, ti);
2933
2934
tc->linked = 1;
2935
tc->ta->refcnt++;
2936
}
2937
2938
/*
2939
* Unlinks @tc from @chain table named instance.
2940
* Zeroes states in @chain and stores them in @tc.
2941
*/
2942
static void
2943
unlink_table(struct ip_fw_chain *ch, struct table_config *tc)
2944
{
2945
struct namedobj_instance *ni;
2946
struct table_info *ti;
2947
uint16_t kidx;
2948
2949
IPFW_UH_WLOCK_ASSERT(ch);
2950
IPFW_WLOCK_ASSERT(ch);
2951
2952
ni = CHAIN_TO_NI(ch);
2953
kidx = tc->no.kidx;
2954
2955
/* Clear state. @ti copy is already saved inside @tc */
2956
ipfw_objhash_del(ni, &tc->no);
2957
ti = KIDX_TO_TI(ch, kidx);
2958
memset(ti, 0, sizeof(struct table_info));
2959
tc->linked = 0;
2960
tc->ta->refcnt--;
2961
2962
/* Notify algo on real @ti address */
2963
if (tc->ta->change_ti != NULL)
2964
tc->ta->change_ti(tc->astate, NULL);
2965
}
2966
2967
static struct ipfw_sopt_handler scodes[] = {
2968
{ IP_FW_TABLE_XCREATE, IP_FW3_OPVER, HDIR_SET, create_table },
2969
{ IP_FW_TABLE_XDESTROY, IP_FW3_OPVER, HDIR_SET, flush_table_v0 },
2970
{ IP_FW_TABLE_XFLUSH, IP_FW3_OPVER, HDIR_SET, flush_table_v0 },
2971
{ IP_FW_TABLE_XMODIFY, IP_FW3_OPVER, HDIR_BOTH, modify_table },
2972
{ IP_FW_TABLE_XINFO, IP_FW3_OPVER, HDIR_GET, describe_table },
2973
{ IP_FW_TABLES_XLIST, IP_FW3_OPVER, HDIR_GET, list_tables },
2974
{ IP_FW_TABLE_XLIST, IP_FW3_OPVER, HDIR_GET, dump_table_v1 },
2975
{ IP_FW_TABLE_XADD, IP_FW3_OPVER, HDIR_BOTH, manage_table_ent_v1 },
2976
{ IP_FW_TABLE_XDEL, IP_FW3_OPVER, HDIR_BOTH, manage_table_ent_v1 },
2977
{ IP_FW_TABLE_XFIND, IP_FW3_OPVER, HDIR_GET, find_table_entry },
2978
{ IP_FW_TABLE_XSWAP, IP_FW3_OPVER, HDIR_SET, swap_table },
2979
{ IP_FW_TABLES_ALIST, IP_FW3_OPVER, HDIR_GET, list_table_algo },
2980
};
2981
2982
static int
2983
destroy_table_locked(struct namedobj_instance *ni, struct named_object *no,
2984
void *arg)
2985
{
2986
2987
unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no);
2988
if (ipfw_objhash_free_idx(ni, no->kidx) != 0)
2989
printf("Error unlinking kidx %d from table %s\n",
2990
no->kidx, no->name);
2991
free_table_config(ni, (struct table_config *)no);
2992
return (0);
2993
}
2994
2995
/*
2996
* Shuts tables module down.
2997
*/
2998
void
2999
ipfw_destroy_tables(struct ip_fw_chain *ch, int last)
3000
{
3001
3002
IPFW_DEL_SOPT_HANDLER(last, scodes);
3003
IPFW_DEL_OBJ_REWRITER(last, opcodes);
3004
3005
/* Remove all tables from working set */
3006
IPFW_UH_WLOCK(ch);
3007
IPFW_WLOCK(ch);
3008
ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch);
3009
IPFW_WUNLOCK(ch);
3010
IPFW_UH_WUNLOCK(ch);
3011
3012
/* Free pointers itself */
3013
free(ch->tablestate, M_IPFW);
3014
3015
ipfw_table_value_destroy(ch, last);
3016
ipfw_table_algo_destroy(ch);
3017
3018
ipfw_objhash_destroy(CHAIN_TO_NI(ch));
3019
free(CHAIN_TO_TCFG(ch), M_IPFW);
3020
}
3021
3022
/*
3023
* Starts tables module.
3024
*/
3025
int
3026
ipfw_init_tables(struct ip_fw_chain *ch, int first)
3027
{
3028
struct tables_config *tcfg;
3029
3030
/* Allocate pointers */
3031
ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info),
3032
M_IPFW, M_WAITOK | M_ZERO);
3033
3034
tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO);
3035
tcfg->namehash = ipfw_objhash_create(V_fw_tables_max,
3036
DEFAULT_OBJHASH_SIZE);
3037
ch->tblcfg = tcfg;
3038
3039
ipfw_table_value_init(ch, first);
3040
ipfw_table_algo_init(ch);
3041
3042
IPFW_ADD_OBJ_REWRITER(first, opcodes);
3043
IPFW_ADD_SOPT_HANDLER(first, scodes);
3044
return (0);
3045
}
3046
3047