Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netpfil/ipfw/nat64/nat64lsn.c
39536 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2015-2020 Yandex LLC
5
* Copyright (c) 2015 Alexander V. Chernikov <[email protected]>
6
* Copyright (c) 2016-2020 Andrey V. Elsukov <[email protected]>
7
*
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
10
* are met:
11
*
12
* 1. Redistributions of source code must retain the above copyright
13
* notice, this list of conditions and the following disclaimer.
14
* 2. Redistributions in binary form must reproduce the above copyright
15
* notice, this list of conditions and the following disclaimer in the
16
* documentation and/or other materials provided with the distribution.
17
*
18
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <sys/param.h>
31
#include <sys/systm.h>
32
#include <sys/counter.h>
33
#include <sys/ck.h>
34
#include <sys/epoch.h>
35
#include <sys/errno.h>
36
#include <sys/hash.h>
37
#include <sys/kernel.h>
38
#include <sys/lock.h>
39
#include <sys/malloc.h>
40
#include <sys/mbuf.h>
41
#include <sys/module.h>
42
#include <sys/rmlock.h>
43
#include <sys/socket.h>
44
#include <sys/syslog.h>
45
#include <sys/sysctl.h>
46
47
#include <net/if.h>
48
#include <net/if_var.h>
49
#include <net/if_pflog.h>
50
#include <net/pfil.h>
51
52
#include <netinet/in.h>
53
#include <netinet/ip.h>
54
#include <netinet/ip_var.h>
55
#include <netinet/ip_fw.h>
56
#include <netinet/ip6.h>
57
#include <netinet/icmp6.h>
58
#include <netinet/ip_icmp.h>
59
#include <netinet/tcp.h>
60
#include <netinet/udp.h>
61
#include <netinet6/in6_var.h>
62
#include <netinet6/ip6_var.h>
63
#include <netinet6/ip_fw_nat64.h>
64
65
#include <netpfil/ipfw/ip_fw_private.h>
66
#include <netpfil/pf/pf.h>
67
68
#include "nat64lsn.h"
69
70
MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
71
72
#define NAT64LSN_EPOCH_ENTER(et) NET_EPOCH_ENTER(et)
73
#define NAT64LSN_EPOCH_EXIT(et) NET_EPOCH_EXIT(et)
74
#define NAT64LSN_EPOCH_ASSERT() NET_EPOCH_ASSERT()
75
#define NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c))
76
77
static uma_zone_t nat64lsn_host_zone;
78
static uma_zone_t nat64lsn_pgchunk_zone;
79
static uma_zone_t nat64lsn_pg_zone;
80
static uma_zone_t nat64lsn_aliaslink_zone;
81
static uma_zone_t nat64lsn_state_zone;
82
static uma_zone_t nat64lsn_job_zone;
83
84
static void nat64lsn_periodic(void *data);
85
#define PERIODIC_DELAY 4
86
#define NAT64_LOOKUP(chain, cmd) \
87
(struct nat64lsn_instance *)SRV_OBJECT((chain), insntod(cmd, kidx)->kidx)
88
/*
89
* Delayed job queue, used to create new hosts
90
* and new portgroups
91
*/
92
enum nat64lsn_jtype {
93
JTYPE_NEWHOST = 1,
94
JTYPE_NEWPORTGROUP,
95
JTYPE_DESTROY,
96
};
97
98
struct nat64lsn_job_item {
99
STAILQ_ENTRY(nat64lsn_job_item) entries;
100
enum nat64lsn_jtype jtype;
101
102
union {
103
struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
104
struct mbuf *m;
105
struct nat64lsn_host *host;
106
struct nat64lsn_state *state;
107
uint32_t src6_hval;
108
uint32_t state_hval;
109
struct ipfw_flow_id f_id;
110
in_addr_t faddr;
111
uint16_t port;
112
uint8_t proto;
113
uint8_t done;
114
};
115
struct { /* used by JTYPE_DESTROY */
116
struct nat64lsn_hosts_slist hosts;
117
struct nat64lsn_pg_slist portgroups;
118
struct nat64lsn_pgchunk *pgchunk;
119
struct epoch_context epoch_ctx;
120
};
121
};
122
};
123
124
static struct mtx jmtx;
125
#define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
126
#define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx)
127
#define JQUEUE_LOCK() mtx_lock(&jmtx)
128
#define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
129
130
static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
131
struct nat64lsn_job_item *ji);
132
static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
133
struct nat64lsn_job_item *ji);
134
static struct nat64lsn_job_item *nat64lsn_create_job(
135
struct nat64lsn_cfg *cfg, int jtype);
136
static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
137
struct nat64lsn_job_item *ji);
138
static void nat64lsn_job_destroy(epoch_context_t ctx);
139
static void nat64lsn_destroy_host(struct nat64lsn_host *host);
140
static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
141
142
static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
143
const struct ipfw_flow_id *f_id, struct mbuf **mp);
144
static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
145
struct ipfw_flow_id *f_id, struct mbuf **mp);
146
static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
147
struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
148
149
#define NAT64_BIT_TCP_FIN 0 /* FIN was seen */
150
#define NAT64_BIT_TCP_SYN 1 /* First syn in->out */
151
#define NAT64_BIT_TCP_ESTAB 2 /* Packet with Ack */
152
#define NAT64_BIT_READY_IPV4 6 /* state is ready for translate4 */
153
#define NAT64_BIT_STALE 7 /* state is going to be expired */
154
155
#define NAT64_FLAG_FIN (1 << NAT64_BIT_TCP_FIN)
156
#define NAT64_FLAG_SYN (1 << NAT64_BIT_TCP_SYN)
157
#define NAT64_FLAG_ESTAB (1 << NAT64_BIT_TCP_ESTAB)
158
#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
159
160
#define NAT64_FLAG_READY (1 << NAT64_BIT_READY_IPV4)
161
#define NAT64_FLAG_STALE (1 << NAT64_BIT_STALE)
162
163
static inline uint8_t
164
convert_tcp_flags(uint8_t flags)
165
{
166
uint8_t result;
167
168
result = flags & (TH_FIN|TH_SYN);
169
result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
170
result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
171
172
return (result);
173
}
174
175
static void
176
nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
177
struct nat64lsn_state *state)
178
{
179
180
memset(plog, 0, sizeof(*plog));
181
plog->length = PFLOG_REAL_HDRLEN;
182
plog->af = family;
183
plog->action = PF_NAT;
184
plog->dir = PF_IN;
185
plog->rulenr = htonl(state->ip_src);
186
plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
187
(state->proto << 8) | (state->ip_dst & 0xff));
188
plog->ruleset[0] = '\0';
189
strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
190
ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
191
}
192
193
#define HVAL(p, n, s) jenkins_hash32((const uint32_t *)(p), (n), (s))
194
#define HOST_HVAL(c, a) HVAL((a),\
195
sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
196
#define HOSTS(c, v) ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
197
198
#define ALIASLINK_HVAL(c, f) HVAL(&(f)->dst_ip6,\
199
sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
200
#define ALIAS_BYHASH(c, v) \
201
((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
202
static struct nat64lsn_aliaslink*
203
nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
204
struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
205
{
206
207
/*
208
* We can implement some different algorithms how
209
* select an alias address.
210
* XXX: for now we use first available.
211
*/
212
return (CK_SLIST_FIRST(&host->aliases));
213
}
214
215
static struct nat64lsn_alias*
216
nat64lsn_get_alias(struct nat64lsn_cfg *cfg,
217
const struct ipfw_flow_id *f_id __unused)
218
{
219
static uint32_t idx = 0;
220
221
/*
222
* We can choose alias by number of allocated PGs,
223
* not used yet by other hosts, or some static configured
224
* by user.
225
* XXX: for now we choose it using round robin.
226
*/
227
return (&ALIAS_BYHASH(cfg, idx++));
228
}
229
230
#define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed)
231
#define STATE_HASH(h, v) \
232
((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
233
#define STATES_CHUNK(p, v) \
234
((p)->chunks_count == 1 ? (p)->states : \
235
((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
236
237
#ifdef __LP64__
238
#define FREEMASK_FFSLL(pg, faddr) \
239
ffsll(*FREEMASK_CHUNK((pg), (faddr)))
240
#define FREEMASK_BTR(pg, faddr, bit) \
241
ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
242
#define FREEMASK_BTS(pg, faddr, bit) \
243
ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
244
#define FREEMASK_ISSET(pg, faddr, bit) \
245
ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
246
#define FREEMASK_COPY(pg, n, out) \
247
(out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
248
#else
249
static inline int
250
freemask_ffsll(uint32_t *freemask)
251
{
252
int i;
253
254
if ((i = ffsl(freemask[0])) != 0)
255
return (i);
256
if ((i = ffsl(freemask[1])) != 0)
257
return (i + 32);
258
return (0);
259
}
260
#define FREEMASK_FFSLL(pg, faddr) \
261
freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
262
#define FREEMASK_BTR(pg, faddr, bit) \
263
ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
264
#define FREEMASK_BTS(pg, faddr, bit) \
265
ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
266
#define FREEMASK_ISSET(pg, faddr, bit) \
267
ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
268
#define FREEMASK_COPY(pg, n, out) \
269
(out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
270
((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
271
#endif /* !__LP64__ */
272
273
274
#define NAT64LSN_TRY_PGCNT 36
275
static struct nat64lsn_pg*
276
nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
277
struct nat64lsn_pgchunk **chunks, uint32_t *pgidx, in_addr_t faddr)
278
{
279
struct nat64lsn_pg *pg;
280
uint32_t idx, oldidx;
281
int cnt;
282
283
/* First try last used PG. */
284
idx = oldidx = ck_pr_load_32(pgidx);
285
MPASS(idx < 1024);
286
cnt = 0;
287
do {
288
ck_pr_fence_load();
289
if (idx > 1023 || !ISSET32(*chunkmask, idx / 32)) {
290
/* If it is first try, reset idx to first PG */
291
idx = 0;
292
/* Stop if idx is out of range */
293
if (cnt > 0)
294
break;
295
}
296
if (ISSET32(pgmask[idx / 32], idx % 32)) {
297
pg = ck_pr_load_ptr(
298
&chunks[idx / 32]->pgptr[idx % 32]);
299
ck_pr_fence_load();
300
/*
301
* Make sure that pg did not become DEAD.
302
*/
303
if ((pg->flags & NAT64LSN_DEADPG) == 0 &&
304
FREEMASK_BITCOUNT(pg, faddr) > 0) {
305
if (cnt > 0)
306
ck_pr_cas_32(pgidx, oldidx, idx);
307
return (pg);
308
}
309
}
310
idx++;
311
} while (++cnt < NAT64LSN_TRY_PGCNT);
312
if (oldidx != idx)
313
ck_pr_cas_32(pgidx, oldidx, idx);
314
return (NULL);
315
}
316
317
static struct nat64lsn_state*
318
nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
319
const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
320
uint16_t port, uint8_t proto)
321
{
322
struct nat64lsn_aliaslink *link;
323
struct nat64lsn_state *state;
324
struct nat64lsn_pg *pg;
325
int i, offset;
326
327
NAT64LSN_EPOCH_ASSERT();
328
329
/* Check that we already have state for given arguments */
330
CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
331
if (state->proto == proto && state->ip_dst == faddr &&
332
state->sport == port && state->dport == f_id->dst_port)
333
return (state);
334
}
335
336
link = nat64lsn_get_aliaslink(cfg, host, f_id);
337
if (link == NULL)
338
return (NULL);
339
340
switch (proto) {
341
case IPPROTO_TCP:
342
pg = nat64lsn_get_pg(&link->alias->tcp_chunkmask,
343
link->alias->tcp_pgmask, link->alias->tcp,
344
&link->alias->tcp_pgidx, faddr);
345
break;
346
case IPPROTO_UDP:
347
pg = nat64lsn_get_pg(&link->alias->udp_chunkmask,
348
link->alias->udp_pgmask, link->alias->udp,
349
&link->alias->udp_pgidx, faddr);
350
break;
351
case IPPROTO_ICMP:
352
pg = nat64lsn_get_pg(&link->alias->icmp_chunkmask,
353
link->alias->icmp_pgmask, link->alias->icmp,
354
&link->alias->icmp_pgidx, faddr);
355
break;
356
default:
357
panic("%s: wrong proto %d", __func__, proto);
358
}
359
if (pg == NULL || (pg->flags & NAT64LSN_DEADPG) != 0)
360
return (NULL);
361
362
/* Check that PG has some free states */
363
state = NULL;
364
i = FREEMASK_BITCOUNT(pg, faddr);
365
while (i-- > 0) {
366
offset = FREEMASK_FFSLL(pg, faddr);
367
if (offset == 0) {
368
/*
369
* We lost the race.
370
* No more free states in this PG.
371
*/
372
break;
373
}
374
375
/* Lets try to atomically grab the state */
376
if (FREEMASK_BTR(pg, faddr, offset - 1)) {
377
state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
378
/* Initialize */
379
state->flags = proto != IPPROTO_TCP ? 0 :
380
convert_tcp_flags(f_id->_flags);
381
state->proto = proto;
382
state->aport = pg->base_port + offset - 1;
383
state->dport = f_id->dst_port;
384
state->sport = port;
385
state->ip6_dst = f_id->dst_ip6;
386
state->ip_dst = faddr;
387
state->ip_src = link->alias->addr;
388
state->hval = hval;
389
state->host = host;
390
SET_AGE(state->timestamp);
391
392
/* Insert new state into host's hash table */
393
HOST_LOCK(host);
394
SET_AGE(host->timestamp);
395
CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
396
state, entries);
397
host->states_count++;
398
HOST_UNLOCK(host);
399
NAT64STAT_INC(&cfg->base.stats, screated);
400
/* Mark the state as ready for translate4 */
401
ck_pr_fence_store();
402
ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
403
break;
404
}
405
}
406
return (state);
407
}
408
409
/*
410
* Inspects icmp packets to see if the message contains different
411
* packet header so we need to alter @addr and @port.
412
*/
413
static int
414
inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
415
uint16_t *port)
416
{
417
struct icmp *icmp;
418
struct ip *ip;
419
int off;
420
uint8_t inner_proto;
421
422
ip = mtod(*mp, struct ip *); /* Outer IP header */
423
off = (ip->ip_hl << 2) + ICMP_MINLEN;
424
if ((*mp)->m_len < off)
425
*mp = m_pullup(*mp, off);
426
if (*mp == NULL)
427
return (ENOMEM);
428
429
ip = mtod(*mp, struct ip *); /* Outer IP header */
430
icmp = L3HDR(ip, struct icmp *);
431
switch (icmp->icmp_type) {
432
case ICMP_ECHO:
433
case ICMP_ECHOREPLY:
434
/* Use icmp ID as distinguisher */
435
*port = ntohs(icmp->icmp_id);
436
return (0);
437
case ICMP_UNREACH:
438
case ICMP_TIMXCEED:
439
break;
440
default:
441
return (EOPNOTSUPP);
442
}
443
/*
444
* ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
445
* of ULP header.
446
*/
447
if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
448
return (EINVAL);
449
if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
450
*mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
451
if (*mp == NULL)
452
return (ENOMEM);
453
ip = mtodo(*mp, off); /* Inner IP header */
454
inner_proto = ip->ip_p;
455
off += ip->ip_hl << 2; /* Skip inner IP header */
456
*addr = ntohl(ip->ip_src.s_addr);
457
if ((*mp)->m_len < off + ICMP_MINLEN)
458
*mp = m_pullup(*mp, off + ICMP_MINLEN);
459
if (*mp == NULL)
460
return (ENOMEM);
461
switch (inner_proto) {
462
case IPPROTO_TCP:
463
case IPPROTO_UDP:
464
/* Copy source port from the header */
465
*port = ntohs(*((uint16_t *)mtodo(*mp, off)));
466
*proto = inner_proto;
467
return (0);
468
case IPPROTO_ICMP:
469
/*
470
* We will translate only ICMP errors for our ICMP
471
* echo requests.
472
*/
473
icmp = mtodo(*mp, off);
474
if (icmp->icmp_type != ICMP_ECHO)
475
return (EOPNOTSUPP);
476
*port = ntohs(icmp->icmp_id);
477
return (0);
478
};
479
return (EOPNOTSUPP);
480
}
481
482
static struct nat64lsn_state*
483
nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
484
in_addr_t faddr, uint16_t port, uint8_t proto)
485
{
486
struct nat64lsn_state *state;
487
struct nat64lsn_pg *pg;
488
int chunk_idx, pg_idx, state_idx;
489
490
NAT64LSN_EPOCH_ASSERT();
491
492
if (port < NAT64_MIN_PORT)
493
return (NULL);
494
/*
495
* Alias keeps 32 pgchunks for each protocol.
496
* Each pgchunk has 32 pointers to portgroup.
497
* Each portgroup has 64 states for ports.
498
*/
499
port -= NAT64_MIN_PORT;
500
chunk_idx = port / 2048;
501
502
port -= chunk_idx * 2048;
503
pg_idx = port / 64;
504
state_idx = port % 64;
505
506
/*
507
* First check in proto_chunkmask that we have allocated PG chunk.
508
* Then check in proto_pgmask that we have valid PG pointer.
509
*/
510
pg = NULL;
511
switch (proto) {
512
case IPPROTO_TCP:
513
if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
514
ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
515
pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
516
break;
517
}
518
return (NULL);
519
case IPPROTO_UDP:
520
if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
521
ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
522
pg = alias->udp[chunk_idx]->pgptr[pg_idx];
523
break;
524
}
525
return (NULL);
526
case IPPROTO_ICMP:
527
if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
528
ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
529
pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
530
break;
531
}
532
return (NULL);
533
default:
534
panic("%s: wrong proto %d", __func__, proto);
535
}
536
if (pg == NULL)
537
return (NULL);
538
539
if (FREEMASK_ISSET(pg, faddr, state_idx))
540
return (NULL);
541
542
state = &STATES_CHUNK(pg, faddr)->state[state_idx];
543
ck_pr_fence_load();
544
if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
545
return (state);
546
return (NULL);
547
}
548
549
/*
550
* Reassemble IPv4 fragments, make PULLUP if needed, get some ULP fields
551
* that might be unknown until reassembling is completed.
552
*/
553
static struct mbuf*
554
nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m,
555
uint16_t *port)
556
{
557
struct ip *ip;
558
int len;
559
560
m = ip_reass(m);
561
if (m == NULL)
562
return (NULL);
563
/* IP header must be contigious after ip_reass() */
564
ip = mtod(m, struct ip *);
565
len = ip->ip_hl << 2;
566
switch (ip->ip_p) {
567
case IPPROTO_ICMP:
568
len += ICMP_MINLEN;
569
break;
570
case IPPROTO_TCP:
571
len += sizeof(struct tcphdr);
572
break;
573
case IPPROTO_UDP:
574
len += sizeof(struct udphdr);
575
break;
576
default:
577
m_freem(m);
578
NAT64STAT_INC(&cfg->base.stats, noproto);
579
return (NULL);
580
}
581
if (m->m_len < len) {
582
m = m_pullup(m, len);
583
if (m == NULL) {
584
NAT64STAT_INC(&cfg->base.stats, nomem);
585
return (NULL);
586
}
587
ip = mtod(m, struct ip *);
588
}
589
switch (ip->ip_p) {
590
case IPPROTO_TCP:
591
*port = ntohs(L3HDR(ip, struct tcphdr *)->th_dport);
592
break;
593
case IPPROTO_UDP:
594
*port = ntohs(L3HDR(ip, struct udphdr *)->uh_dport);
595
break;
596
}
597
return (m);
598
}
599
600
static int
601
nat64lsn_translate4(struct nat64lsn_cfg *cfg,
602
const struct ipfw_flow_id *f_id, struct mbuf **mp)
603
{
604
struct pfloghdr loghdr, *logdata;
605
struct in6_addr src6;
606
struct nat64lsn_state *state;
607
struct nat64lsn_alias *alias;
608
uint32_t addr, flags;
609
uint16_t port, ts;
610
int ret;
611
uint8_t proto;
612
613
addr = f_id->dst_ip;
614
port = f_id->dst_port;
615
proto = f_id->proto;
616
if (addr < cfg->prefix4 || addr > cfg->pmask4) {
617
NAT64STAT_INC(&cfg->base.stats, nomatch4);
618
return (cfg->nomatch_verdict);
619
}
620
621
/* Reassemble fragments if needed */
622
ret = ntohs(mtod(*mp, struct ip *)->ip_off);
623
if ((ret & (IP_MF | IP_OFFMASK)) != 0) {
624
*mp = nat64lsn_reassemble4(cfg, *mp, &port);
625
if (*mp == NULL)
626
return (IP_FW_DENY);
627
}
628
629
/* Check if protocol is supported */
630
switch (proto) {
631
case IPPROTO_ICMP:
632
ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
633
if (ret != 0) {
634
if (ret == ENOMEM) {
635
NAT64STAT_INC(&cfg->base.stats, nomem);
636
return (IP_FW_DENY);
637
}
638
NAT64STAT_INC(&cfg->base.stats, noproto);
639
return (cfg->nomatch_verdict);
640
}
641
if (addr < cfg->prefix4 || addr > cfg->pmask4) {
642
NAT64STAT_INC(&cfg->base.stats, nomatch4);
643
return (cfg->nomatch_verdict);
644
}
645
/* FALLTHROUGH */
646
case IPPROTO_TCP:
647
case IPPROTO_UDP:
648
break;
649
default:
650
NAT64STAT_INC(&cfg->base.stats, noproto);
651
return (cfg->nomatch_verdict);
652
}
653
654
alias = &ALIAS_BYHASH(cfg, addr);
655
MPASS(addr == alias->addr);
656
657
/* Check that we have state for this port */
658
state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
659
port, proto);
660
if (state == NULL) {
661
NAT64STAT_INC(&cfg->base.stats, nomatch4);
662
return (cfg->nomatch_verdict);
663
}
664
665
/* TODO: Check flags to see if we need to do some static mapping */
666
667
/* Update some state fields if need */
668
SET_AGE(ts);
669
if (f_id->proto == IPPROTO_TCP)
670
flags = convert_tcp_flags(f_id->_flags);
671
else
672
flags = 0;
673
if (state->timestamp != ts)
674
state->timestamp = ts;
675
if ((state->flags & flags) != flags)
676
state->flags |= flags;
677
678
port = htons(state->sport);
679
src6 = state->ip6_dst;
680
681
if (cfg->base.flags & NAT64_LOG) {
682
logdata = &loghdr;
683
nat64lsn_log(logdata, *mp, AF_INET, state);
684
} else
685
logdata = NULL;
686
687
/*
688
* We already have src6 with embedded address, but it is possible,
689
* that src_ip is different than state->ip_dst, this is why we
690
* do embedding again.
691
*/
692
nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
693
ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
694
&cfg->base, logdata);
695
if (ret == NAT64SKIP)
696
return (cfg->nomatch_verdict);
697
if (ret == NAT64RETURN)
698
*mp = NULL;
699
return (IP_FW_DENY);
700
}
701
702
/*
703
* Check if particular state is stale and should be deleted.
704
* Return 1 if true, 0 otherwise.
705
*/
706
static int
707
nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
708
{
709
int age, ttl;
710
711
/* State was marked as stale in previous pass. */
712
if (ISSET32(state->flags, NAT64_BIT_STALE))
713
return (1);
714
715
/* State is not yet initialized, it is going to be READY */
716
if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
717
return (0);
718
719
age = GET_AGE(state->timestamp);
720
switch (state->proto) {
721
case IPPROTO_TCP:
722
if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
723
ttl = cfg->st_close_ttl;
724
else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
725
ttl = cfg->st_estab_ttl;
726
else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
727
ttl = cfg->st_syn_ttl;
728
else
729
ttl = cfg->st_syn_ttl;
730
if (age > ttl)
731
return (1);
732
break;
733
case IPPROTO_UDP:
734
if (age > cfg->st_udp_ttl)
735
return (1);
736
break;
737
case IPPROTO_ICMP:
738
if (age > cfg->st_icmp_ttl)
739
return (1);
740
break;
741
}
742
return (0);
743
}
744
745
#define PGCOUNT_ADD(alias, proto, value) \
746
switch (proto) { \
747
case IPPROTO_TCP: (alias)->tcp_pgcount += (value); break; \
748
case IPPROTO_UDP: (alias)->udp_pgcount += (value); break; \
749
case IPPROTO_ICMP: (alias)->icmp_pgcount += (value); break; \
750
}
751
#define PGCOUNT_INC(alias, proto) PGCOUNT_ADD(alias, proto, 1)
752
#define PGCOUNT_DEC(alias, proto) PGCOUNT_ADD(alias, proto, -1)
753
754
static inline void
755
nat64lsn_state_cleanup(struct nat64lsn_state *state)
756
{
757
758
/*
759
* Reset READY flag and wait until it become
760
* safe for translate4.
761
*/
762
ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
763
/*
764
* And set STALE flag for deferred deletion in the
765
* next pass of nat64lsn_maintain_pg().
766
*/
767
ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
768
ck_pr_fence_store();
769
}
770
771
static int
772
nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
773
{
774
struct nat64lsn_state *state;
775
struct nat64lsn_host *host;
776
uint64_t freemask;
777
int c, i, update_age;
778
779
update_age = 0;
780
for (c = 0; c < pg->chunks_count; c++) {
781
FREEMASK_COPY(pg, c, freemask);
782
for (i = 0; i < 64; i++) {
783
if (ISSET64(freemask, i))
784
continue;
785
state = &STATES_CHUNK(pg, c)->state[i];
786
if (nat64lsn_check_state(cfg, state) == 0) {
787
update_age = 1;
788
continue;
789
}
790
/*
791
* Expire state:
792
* 1. Mark as STALE and unlink from host's hash.
793
* 2. Set bit in freemask.
794
*/
795
if (ISSET32(state->flags, NAT64_BIT_STALE)) {
796
/*
797
* State was marked as STALE in previous
798
* pass. Now it is safe to release it.
799
*/
800
state->flags = 0;
801
ck_pr_fence_store();
802
FREEMASK_BTS(pg, c, i);
803
NAT64STAT_INC(&cfg->base.stats, sdeleted);
804
continue;
805
}
806
MPASS(state->flags & NAT64_FLAG_READY);
807
808
host = state->host;
809
HOST_LOCK(host);
810
CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
811
state, nat64lsn_state, entries);
812
/*
813
* Now translate6 will not use this state.
814
*/
815
host->states_count--;
816
HOST_UNLOCK(host);
817
nat64lsn_state_cleanup(state);
818
}
819
}
820
821
/*
822
* We have some alive states, update timestamp.
823
*/
824
if (update_age)
825
SET_AGE(pg->timestamp);
826
827
if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
828
return (0);
829
830
return (1);
831
}
832
833
static void
834
nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
835
struct nat64lsn_pg_slist *portgroups)
836
{
837
struct nat64lsn_alias *alias;
838
struct nat64lsn_pg *pg, *tpg;
839
uint32_t *pgmask, *pgidx;
840
int i, idx;
841
842
for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
843
alias = &cfg->aliases[i];
844
CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
845
if (nat64lsn_maintain_pg(cfg, pg) == 0)
846
continue;
847
/* Always keep first PG */
848
if (pg->base_port == NAT64_MIN_PORT)
849
continue;
850
/*
851
* PG expires in two passes:
852
* 1. Reset bit in pgmask, mark it as DEAD.
853
* 2. Unlink it and schedule for deferred destroying.
854
*/
855
idx = (pg->base_port - NAT64_MIN_PORT) / 64;
856
switch (pg->proto) {
857
case IPPROTO_TCP:
858
pgmask = alias->tcp_pgmask;
859
pgidx = &alias->tcp_pgidx;
860
break;
861
case IPPROTO_UDP:
862
pgmask = alias->udp_pgmask;
863
pgidx = &alias->udp_pgidx;
864
break;
865
case IPPROTO_ICMP:
866
pgmask = alias->icmp_pgmask;
867
pgidx = &alias->icmp_pgidx;
868
break;
869
}
870
if (pg->flags & NAT64LSN_DEADPG) {
871
/* Unlink PG from alias's chain */
872
ALIAS_LOCK(alias);
873
CK_SLIST_REMOVE(&alias->portgroups, pg,
874
nat64lsn_pg, entries);
875
PGCOUNT_DEC(alias, pg->proto);
876
ALIAS_UNLOCK(alias);
877
/*
878
* Link it to job's chain for deferred
879
* destroying.
880
*/
881
NAT64STAT_INC(&cfg->base.stats, spgdeleted);
882
CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
883
continue;
884
}
885
886
/* Reset the corresponding bit in pgmask array. */
887
ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
888
pg->flags |= NAT64LSN_DEADPG;
889
ck_pr_fence_store();
890
/* If last used PG points to this PG, reset it. */
891
ck_pr_cas_32(pgidx, idx, 0);
892
}
893
}
894
}
895
896
static void
897
nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
898
struct nat64lsn_hosts_slist *hosts)
899
{
900
struct nat64lsn_host *host, *tmp;
901
int i;
902
903
for (i = 0; i < cfg->hosts_hashsize; i++) {
904
CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
905
entries, tmp) {
906
/* Is host was marked in previous call? */
907
if (host->flags & NAT64LSN_DEADHOST) {
908
if (host->states_count > 0 ||
909
GET_AGE(host->timestamp) <
910
cfg->host_delete_delay) {
911
host->flags &= ~NAT64LSN_DEADHOST;
912
continue;
913
}
914
/*
915
* Unlink host from hash table and schedule
916
* it for deferred destroying.
917
*/
918
CFG_LOCK(cfg);
919
CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
920
nat64lsn_host, entries);
921
cfg->hosts_count--;
922
CFG_UNLOCK(cfg);
923
CK_SLIST_INSERT_HEAD(hosts, host, entries);
924
continue;
925
}
926
if (host->states_count > 0 ||
927
GET_AGE(host->timestamp) < cfg->host_delete_delay)
928
continue;
929
/* Mark host as going to be expired in next pass */
930
host->flags |= NAT64LSN_DEADHOST;
931
ck_pr_fence_store();
932
}
933
}
934
}
935
936
static struct nat64lsn_pgchunk*
937
nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
938
{
939
#if 0
940
struct nat64lsn_alias *alias;
941
struct nat64lsn_pgchunk *chunk;
942
uint32_t pgmask;
943
int i, c;
944
945
for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
946
alias = &cfg->aliases[i];
947
if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
948
continue;
949
/* Always keep single chunk allocated */
950
for (c = 1; c < 32; c++) {
951
if ((alias->tcp_chunkmask & (1 << c)) == 0)
952
break;
953
chunk = ck_pr_load_ptr(&alias->tcp[c]);
954
if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
955
continue;
956
ck_pr_btr_32(&alias->tcp_chunkmask, c);
957
ck_pr_fence_load();
958
if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
959
continue;
960
}
961
}
962
#endif
963
return (NULL);
964
}
965
966
#if 0
967
static void
968
nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
969
{
970
struct nat64lsn_host *h;
971
struct nat64lsn_states_slist *hash;
972
int i, j, hsize;
973
974
for (i = 0; i < cfg->hosts_hashsize; i++) {
975
CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
976
if (h->states_count / 2 < h->states_hashsize ||
977
h->states_hashsize >= NAT64LSN_MAX_HSIZE)
978
continue;
979
hsize = h->states_hashsize * 2;
980
hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
981
if (hash == NULL)
982
continue;
983
for (j = 0; j < hsize; j++)
984
CK_SLIST_INIT(&hash[i]);
985
986
ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
987
}
988
}
989
}
990
#endif
991
992
/*
993
* This procedure is used to perform various maintance
994
* on dynamic hash list. Currently it is called every 4 seconds.
995
*/
996
static void
997
nat64lsn_periodic(void *data)
998
{
999
struct nat64lsn_job_item *ji;
1000
struct nat64lsn_cfg *cfg;
1001
1002
cfg = (struct nat64lsn_cfg *) data;
1003
CURVNET_SET(cfg->vp);
1004
if (cfg->hosts_count > 0) {
1005
ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1006
if (ji != NULL) {
1007
ji->jtype = JTYPE_DESTROY;
1008
CK_SLIST_INIT(&ji->hosts);
1009
CK_SLIST_INIT(&ji->portgroups);
1010
nat64lsn_expire_hosts(cfg, &ji->hosts);
1011
nat64lsn_expire_portgroups(cfg, &ji->portgroups);
1012
ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
1013
NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
1014
nat64lsn_job_destroy);
1015
} else
1016
NAT64STAT_INC(&cfg->base.stats, jnomem);
1017
}
1018
callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
1019
CURVNET_RESTORE();
1020
}
1021
1022
#define ALLOC_ERROR(stage, type) ((stage) ? 10 * (type) + (stage): 0)
1023
#define HOST_ERROR(stage) ALLOC_ERROR(stage, 1)
1024
#define PG_ERROR(stage) ALLOC_ERROR(stage, 2)
1025
static int
1026
nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1027
{
1028
char a[INET6_ADDRSTRLEN];
1029
struct nat64lsn_aliaslink *link;
1030
struct nat64lsn_host *host;
1031
struct nat64lsn_state *state;
1032
uint32_t hval, data[2];
1033
int i;
1034
1035
/* Check that host was not yet added. */
1036
NAT64LSN_EPOCH_ASSERT();
1037
CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
1038
if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
1039
/* The host was allocated in previous call. */
1040
ji->host = host;
1041
goto get_state;
1042
}
1043
}
1044
1045
host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
1046
if (ji->host == NULL)
1047
return (HOST_ERROR(1));
1048
1049
host->states_hashsize = NAT64LSN_HSIZE;
1050
host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
1051
host->states_hashsize, M_NAT64LSN, M_NOWAIT);
1052
if (host->states_hash == NULL) {
1053
uma_zfree(nat64lsn_host_zone, host);
1054
return (HOST_ERROR(2));
1055
}
1056
1057
link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
1058
if (link == NULL) {
1059
free(host->states_hash, M_NAT64LSN);
1060
uma_zfree(nat64lsn_host_zone, host);
1061
return (HOST_ERROR(3));
1062
}
1063
1064
/* Initialize */
1065
HOST_LOCK_INIT(host);
1066
SET_AGE(host->timestamp);
1067
host->addr = ji->f_id.src_ip6;
1068
host->hval = ji->src6_hval;
1069
host->flags = 0;
1070
host->states_count = 0;
1071
CK_SLIST_INIT(&host->aliases);
1072
for (i = 0; i < host->states_hashsize; i++)
1073
CK_SLIST_INIT(&host->states_hash[i]);
1074
1075
link->alias = nat64lsn_get_alias(cfg, &ji->f_id);
1076
CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1077
1078
ALIAS_LOCK(link->alias);
1079
CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1080
link->alias->hosts_count++;
1081
ALIAS_UNLOCK(link->alias);
1082
1083
CFG_LOCK(cfg);
1084
CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1085
cfg->hosts_count++;
1086
CFG_UNLOCK(cfg);
1087
1088
get_state:
1089
data[0] = ji->faddr;
1090
data[1] = (ji->f_id.dst_port << 16) | ji->port;
1091
ji->state_hval = hval = STATE_HVAL(cfg, data);
1092
state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1093
ji->faddr, ji->port, ji->proto);
1094
/*
1095
* We failed to obtain new state, used alias needs new PG.
1096
* XXX: or another alias should be used.
1097
*/
1098
if (state == NULL) {
1099
/* Try to allocate new PG */
1100
if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1101
return (HOST_ERROR(4));
1102
/* We assume that nat64lsn_alloc_pg() got state */
1103
} else
1104
ji->state = state;
1105
1106
ji->done = 1;
1107
DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1108
inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1109
return (HOST_ERROR(0));
1110
}
1111
1112
static int
1113
nat64lsn_find_pg_place(uint32_t *data)
1114
{
1115
int i;
1116
1117
for (i = 0; i < 32; i++) {
1118
if (~data[i] == 0)
1119
continue;
1120
return (i * 32 + ffs(~data[i]) - 1);
1121
}
1122
return (-1);
1123
}
1124
1125
static int
1126
nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1127
struct nat64lsn_alias *alias, uint32_t *chunkmask, uint32_t *pgmask,
1128
struct nat64lsn_pgchunk **chunks, uint32_t *pgidx, uint8_t proto)
1129
{
1130
struct nat64lsn_pg *pg;
1131
int i, pg_idx, chunk_idx;
1132
1133
/* Find place in pgchunk where PG can be added */
1134
pg_idx = nat64lsn_find_pg_place(pgmask);
1135
if (pg_idx < 0) /* no more PGs */
1136
return (PG_ERROR(1));
1137
/* Check that we have allocated pgchunk for given PG index */
1138
chunk_idx = pg_idx / 32;
1139
if (!ISSET32(*chunkmask, chunk_idx)) {
1140
chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1141
M_NOWAIT);
1142
if (chunks[chunk_idx] == NULL)
1143
return (PG_ERROR(2));
1144
ck_pr_bts_32(chunkmask, chunk_idx);
1145
ck_pr_fence_store();
1146
}
1147
/* Allocate PG and states chunks */
1148
pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1149
if (pg == NULL)
1150
return (PG_ERROR(3));
1151
pg->chunks_count = cfg->states_chunks;
1152
if (pg->chunks_count > 1) {
1153
pg->freemask_chunk = malloc(pg->chunks_count *
1154
sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1155
if (pg->freemask_chunk == NULL) {
1156
uma_zfree(nat64lsn_pg_zone, pg);
1157
return (PG_ERROR(4));
1158
}
1159
pg->states_chunk = malloc(pg->chunks_count *
1160
sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1161
M_NOWAIT | M_ZERO);
1162
if (pg->states_chunk == NULL) {
1163
free(pg->freemask_chunk, M_NAT64LSN);
1164
uma_zfree(nat64lsn_pg_zone, pg);
1165
return (PG_ERROR(5));
1166
}
1167
for (i = 0; i < pg->chunks_count; i++) {
1168
pg->states_chunk[i] = uma_zalloc(
1169
nat64lsn_state_zone, M_NOWAIT);
1170
if (pg->states_chunk[i] == NULL)
1171
goto states_failed;
1172
}
1173
memset(pg->freemask_chunk, 0xff,
1174
sizeof(uint64_t) * pg->chunks_count);
1175
} else {
1176
pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1177
if (pg->states == NULL) {
1178
uma_zfree(nat64lsn_pg_zone, pg);
1179
return (PG_ERROR(6));
1180
}
1181
memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1182
}
1183
1184
/* Initialize PG and hook it to pgchunk */
1185
SET_AGE(pg->timestamp);
1186
pg->flags = 0;
1187
pg->proto = proto;
1188
pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1189
ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1190
ck_pr_fence_store();
1191
1192
/* Set bit in pgmask and set index of last used PG */
1193
ck_pr_bts_32(&pgmask[chunk_idx], pg_idx % 32);
1194
ck_pr_store_32(pgidx, pg_idx);
1195
1196
ALIAS_LOCK(alias);
1197
CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1198
SET_AGE(alias->timestamp);
1199
PGCOUNT_INC(alias, proto);
1200
ALIAS_UNLOCK(alias);
1201
NAT64STAT_INC(&cfg->base.stats, spgcreated);
1202
return (PG_ERROR(0));
1203
1204
states_failed:
1205
for (i = 0; i < pg->chunks_count; i++)
1206
uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1207
free(pg->freemask_chunk, M_NAT64LSN);
1208
free(pg->states_chunk, M_NAT64LSN);
1209
uma_zfree(nat64lsn_pg_zone, pg);
1210
return (PG_ERROR(7));
1211
}
1212
1213
static int
1214
nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1215
{
1216
struct nat64lsn_aliaslink *link;
1217
struct nat64lsn_alias *alias;
1218
int ret;
1219
1220
link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1221
if (link == NULL)
1222
return (PG_ERROR(1));
1223
1224
/*
1225
* TODO: check that we did not already allocated PG in
1226
* previous call.
1227
*/
1228
1229
ret = 0;
1230
alias = link->alias;
1231
/* Find place in pgchunk where PG can be added */
1232
switch (ji->proto) {
1233
case IPPROTO_TCP:
1234
ret = nat64lsn_alloc_proto_pg(cfg, alias,
1235
&alias->tcp_chunkmask, alias->tcp_pgmask,
1236
alias->tcp, &alias->tcp_pgidx, ji->proto);
1237
break;
1238
case IPPROTO_UDP:
1239
ret = nat64lsn_alloc_proto_pg(cfg, alias,
1240
&alias->udp_chunkmask, alias->udp_pgmask,
1241
alias->udp, &alias->udp_pgidx, ji->proto);
1242
break;
1243
case IPPROTO_ICMP:
1244
ret = nat64lsn_alloc_proto_pg(cfg, alias,
1245
&alias->icmp_chunkmask, alias->icmp_pgmask,
1246
alias->icmp, &alias->icmp_pgidx, ji->proto);
1247
break;
1248
default:
1249
panic("%s: wrong proto %d", __func__, ji->proto);
1250
}
1251
if (ret == PG_ERROR(1)) {
1252
/*
1253
* PG_ERROR(1) means that alias lacks free PGs
1254
* XXX: try next alias.
1255
*/
1256
printf("NAT64LSN: %s: failed to obtain PG\n",
1257
__func__);
1258
return (ret);
1259
}
1260
if (ret == PG_ERROR(0)) {
1261
ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1262
ji->state_hval, ji->faddr, ji->port, ji->proto);
1263
if (ji->state == NULL)
1264
ret = PG_ERROR(8);
1265
else
1266
ji->done = 1;
1267
}
1268
return (ret);
1269
}
1270
1271
static void
1272
nat64lsn_do_request(void *data)
1273
{
1274
struct epoch_tracker et;
1275
struct nat64lsn_job_head jhead;
1276
struct nat64lsn_job_item *ji, *ji2;
1277
struct nat64lsn_cfg *cfg;
1278
int jcount;
1279
uint8_t flags;
1280
1281
cfg = (struct nat64lsn_cfg *)data;
1282
if (cfg->jlen == 0)
1283
return;
1284
1285
CURVNET_SET(cfg->vp);
1286
STAILQ_INIT(&jhead);
1287
1288
/* Grab queue */
1289
JQUEUE_LOCK();
1290
STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1291
jcount = cfg->jlen;
1292
cfg->jlen = 0;
1293
JQUEUE_UNLOCK();
1294
1295
/* TODO: check if we need to resize hash */
1296
1297
NAT64STAT_INC(&cfg->base.stats, jcalls);
1298
DPRINTF(DP_JQUEUE, "count=%d", jcount);
1299
1300
/*
1301
* TODO:
1302
* What we should do here is to build a hash
1303
* to ensure we don't have lots of duplicate requests.
1304
* Skip this for now.
1305
*
1306
* TODO: Limit per-call number of items
1307
*/
1308
1309
NAT64LSN_EPOCH_ENTER(et);
1310
STAILQ_FOREACH(ji, &jhead, entries) {
1311
switch (ji->jtype) {
1312
case JTYPE_NEWHOST:
1313
if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1314
NAT64STAT_INC(&cfg->base.stats, jhostfails);
1315
break;
1316
case JTYPE_NEWPORTGROUP:
1317
if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1318
NAT64STAT_INC(&cfg->base.stats, jportfails);
1319
break;
1320
default:
1321
continue;
1322
}
1323
if (ji->done != 0) {
1324
flags = ji->proto != IPPROTO_TCP ? 0 :
1325
convert_tcp_flags(ji->f_id._flags);
1326
nat64lsn_translate6_internal(cfg, &ji->m,
1327
ji->state, flags);
1328
NAT64STAT_INC(&cfg->base.stats, jreinjected);
1329
}
1330
}
1331
NAT64LSN_EPOCH_EXIT(et);
1332
1333
ji = STAILQ_FIRST(&jhead);
1334
while (ji != NULL) {
1335
ji2 = STAILQ_NEXT(ji, entries);
1336
/*
1337
* In any case we must free mbuf if
1338
* translator did not consumed it.
1339
*/
1340
m_freem(ji->m);
1341
uma_zfree(nat64lsn_job_zone, ji);
1342
ji = ji2;
1343
}
1344
CURVNET_RESTORE();
1345
}
1346
1347
static struct nat64lsn_job_item *
1348
nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1349
{
1350
struct nat64lsn_job_item *ji;
1351
1352
/*
1353
* Do not try to lock possibly contested mutex if we're near the
1354
* limit. Drop packet instead.
1355
*/
1356
ji = NULL;
1357
if (cfg->jlen >= cfg->jmaxlen)
1358
NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1359
else {
1360
ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1361
if (ji == NULL)
1362
NAT64STAT_INC(&cfg->base.stats, jnomem);
1363
}
1364
if (ji == NULL) {
1365
NAT64STAT_INC(&cfg->base.stats, dropped);
1366
DPRINTF(DP_DROPS, "failed to create job");
1367
} else {
1368
ji->jtype = jtype;
1369
ji->done = 0;
1370
}
1371
return (ji);
1372
}
1373
1374
static void
1375
nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1376
{
1377
1378
JQUEUE_LOCK();
1379
STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1380
NAT64STAT_INC(&cfg->base.stats, jrequests);
1381
cfg->jlen++;
1382
1383
if (callout_pending(&cfg->jcallout) == 0)
1384
callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1385
JQUEUE_UNLOCK();
1386
}
1387
1388
/*
1389
* This function is used to clean up the result of less likely possible
1390
* race condition, when host object was deleted, but some translation
1391
* state was created before it is destroyed.
1392
*
1393
* Since the state expiration removes state from host's hash table,
1394
* we need to be sure, that there will not any states, that are linked
1395
* with this host entry.
1396
*/
1397
static void
1398
nat64lsn_host_cleanup(struct nat64lsn_host *host)
1399
{
1400
struct nat64lsn_state *state, *ts;
1401
int i;
1402
1403
printf("NAT64LSN: %s: race condition has been detected for host %p\n",
1404
__func__, host);
1405
for (i = 0; i < host->states_hashsize; i++) {
1406
CK_SLIST_FOREACH_SAFE(state, &host->states_hash[i],
1407
entries, ts) {
1408
/*
1409
* We can remove the state without lock,
1410
* because this host entry is unlinked and will
1411
* be destroyed.
1412
*/
1413
CK_SLIST_REMOVE(&host->states_hash[i], state,
1414
nat64lsn_state, entries);
1415
host->states_count--;
1416
nat64lsn_state_cleanup(state);
1417
}
1418
}
1419
MPASS(host->states_count == 0);
1420
}
1421
1422
/*
1423
* This function is used to clean up the result of less likely possible
1424
* race condition, when portgroup was deleted, but some translation state
1425
* was created before it is destroyed.
1426
*
1427
* Since states entries are accessible via host's hash table, we need
1428
* to be sure, that there will not any states from this PG, that are
1429
* linked with any host entries.
1430
*/
1431
static void
1432
nat64lsn_pg_cleanup(struct nat64lsn_pg *pg)
1433
{
1434
struct nat64lsn_state *state;
1435
uint64_t usedmask;
1436
int c, i;
1437
1438
printf("NAT64LSN: %s: race condition has been detected for pg %p\n",
1439
__func__, pg);
1440
for (c = 0; c < pg->chunks_count; c++) {
1441
/*
1442
* Use inverted freemask to find what state was created.
1443
*/
1444
usedmask = ~(*FREEMASK_CHUNK(pg, c));
1445
if (usedmask == 0)
1446
continue;
1447
for (i = 0; i < 64; i++) {
1448
if (!ISSET64(usedmask, i))
1449
continue;
1450
state = &STATES_CHUNK(pg, c)->state[i];
1451
/*
1452
* If we have STALE bit, this means that state
1453
* is already unlinked from host's hash table.
1454
* Thus we can just reset the bit in mask and
1455
* schedule destroying in the next epoch call.
1456
*/
1457
if (ISSET32(state->flags, NAT64_BIT_STALE)) {
1458
FREEMASK_BTS(pg, c, i);
1459
continue;
1460
}
1461
/*
1462
* There is small window, when we have bit
1463
* grabbed from freemask, but state is not yet
1464
* linked into host's hash table.
1465
* Check for READY flag, it is set just after
1466
* linking. If it is not set, defer cleanup
1467
* for next call.
1468
*/
1469
if (ISSET32(state->flags, NAT64_BIT_READY_IPV4)) {
1470
struct nat64lsn_host *host;
1471
1472
host = state->host;
1473
HOST_LOCK(host);
1474
CK_SLIST_REMOVE(&STATE_HASH(host,
1475
state->hval), state, nat64lsn_state,
1476
entries);
1477
host->states_count--;
1478
HOST_UNLOCK(host);
1479
nat64lsn_state_cleanup(state);
1480
}
1481
}
1482
}
1483
}
1484
1485
static void
1486
nat64lsn_job_destroy(epoch_context_t ctx)
1487
{
1488
struct nat64lsn_hosts_slist hosts;
1489
struct nat64lsn_pg_slist portgroups;
1490
struct nat64lsn_job_item *ji;
1491
struct nat64lsn_host *host;
1492
struct nat64lsn_pg *pg;
1493
int i;
1494
1495
CK_SLIST_INIT(&hosts);
1496
CK_SLIST_INIT(&portgroups);
1497
ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1498
MPASS(ji->jtype == JTYPE_DESTROY);
1499
while (!CK_SLIST_EMPTY(&ji->hosts)) {
1500
host = CK_SLIST_FIRST(&ji->hosts);
1501
CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1502
if (host->states_count > 0) {
1503
/*
1504
* The state has been created during host deletion.
1505
*/
1506
printf("NAT64LSN: %s: destroying host with %d "
1507
"states\n", __func__, host->states_count);
1508
/*
1509
* We need to cleanup these states to avoid
1510
* possible access to already deleted host in
1511
* the state expiration code.
1512
*/
1513
nat64lsn_host_cleanup(host);
1514
CK_SLIST_INSERT_HEAD(&hosts, host, entries);
1515
/*
1516
* Keep host entry for next deferred destroying.
1517
* In the next epoch its states will be not
1518
* accessible.
1519
*/
1520
continue;
1521
}
1522
nat64lsn_destroy_host(host);
1523
}
1524
while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1525
pg = CK_SLIST_FIRST(&ji->portgroups);
1526
CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1527
for (i = 0; i < pg->chunks_count; i++) {
1528
if (FREEMASK_BITCOUNT(pg, i) != 64) {
1529
/*
1530
* A state has been created during
1531
* PG deletion.
1532
*/
1533
printf("NAT64LSN: %s: destroying PG %p "
1534
"with non-empty chunk %d\n", __func__,
1535
pg, i);
1536
nat64lsn_pg_cleanup(pg);
1537
CK_SLIST_INSERT_HEAD(&portgroups,
1538
pg, entries);
1539
i = -1;
1540
break;
1541
}
1542
}
1543
if (i != -1)
1544
nat64lsn_destroy_pg(pg);
1545
}
1546
if (CK_SLIST_EMPTY(&hosts) &&
1547
CK_SLIST_EMPTY(&portgroups)) {
1548
uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1549
uma_zfree(nat64lsn_job_zone, ji);
1550
return;
1551
}
1552
1553
/* Schedule job item again */
1554
CK_SLIST_MOVE(&ji->hosts, &hosts, entries);
1555
CK_SLIST_MOVE(&ji->portgroups, &portgroups, entries);
1556
NAT64LSN_EPOCH_CALL(&ji->epoch_ctx, nat64lsn_job_destroy);
1557
}
1558
1559
static int
1560
nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1561
const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1562
in_addr_t faddr, uint16_t port, uint8_t proto)
1563
{
1564
struct nat64lsn_job_item *ji;
1565
1566
ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1567
if (ji != NULL) {
1568
ji->m = *mp;
1569
ji->f_id = *f_id;
1570
ji->faddr = faddr;
1571
ji->port = port;
1572
ji->proto = proto;
1573
ji->src6_hval = hval;
1574
1575
nat64lsn_enqueue_job(cfg, ji);
1576
NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1577
*mp = NULL;
1578
}
1579
return (IP_FW_DENY);
1580
}
1581
1582
static int
1583
nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1584
const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1585
in_addr_t faddr, uint16_t port, uint8_t proto)
1586
{
1587
struct nat64lsn_job_item *ji;
1588
1589
ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1590
if (ji != NULL) {
1591
ji->m = *mp;
1592
ji->f_id = *f_id;
1593
ji->faddr = faddr;
1594
ji->port = port;
1595
ji->proto = proto;
1596
ji->state_hval = hval;
1597
ji->host = host;
1598
1599
nat64lsn_enqueue_job(cfg, ji);
1600
NAT64STAT_INC(&cfg->base.stats, jportreq);
1601
*mp = NULL;
1602
}
1603
return (IP_FW_DENY);
1604
}
1605
1606
static int
1607
nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1608
struct nat64lsn_state *state, uint8_t flags)
1609
{
1610
struct pfloghdr loghdr, *logdata;
1611
int ret;
1612
uint16_t ts;
1613
1614
/* Update timestamp and flags if needed */
1615
SET_AGE(ts);
1616
if (state->timestamp != ts)
1617
state->timestamp = ts;
1618
if ((state->flags & flags) != 0)
1619
state->flags |= flags;
1620
1621
if (cfg->base.flags & NAT64_LOG) {
1622
logdata = &loghdr;
1623
nat64lsn_log(logdata, *mp, AF_INET6, state);
1624
} else
1625
logdata = NULL;
1626
1627
ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1628
htons(state->aport), &cfg->base, logdata);
1629
if (ret == NAT64SKIP)
1630
return (cfg->nomatch_verdict);
1631
if (ret == NAT64RETURN)
1632
*mp = NULL;
1633
return (IP_FW_DENY);
1634
}
1635
1636
static int
1637
nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1638
struct mbuf **mp)
1639
{
1640
struct nat64lsn_state *state;
1641
struct nat64lsn_host *host;
1642
struct icmp6_hdr *icmp6;
1643
uint32_t addr, hval, data[2];
1644
int offset, proto;
1645
uint16_t port;
1646
uint8_t flags;
1647
1648
/* Check if protocol is supported */
1649
port = f_id->src_port;
1650
proto = f_id->proto;
1651
switch (f_id->proto) {
1652
case IPPROTO_ICMPV6:
1653
/*
1654
* For ICMPv6 echo reply/request we use icmp6_id as
1655
* local port.
1656
*/
1657
offset = 0;
1658
proto = nat64_getlasthdr(*mp, &offset);
1659
if (proto < 0) {
1660
NAT64STAT_INC(&cfg->base.stats, dropped);
1661
DPRINTF(DP_DROPS, "mbuf isn't contigious");
1662
return (IP_FW_DENY);
1663
}
1664
if (proto == IPPROTO_ICMPV6) {
1665
icmp6 = mtodo(*mp, offset);
1666
if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1667
icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1668
port = ntohs(icmp6->icmp6_id);
1669
}
1670
proto = IPPROTO_ICMP;
1671
/* FALLTHROUGH */
1672
case IPPROTO_TCP:
1673
case IPPROTO_UDP:
1674
break;
1675
default:
1676
NAT64STAT_INC(&cfg->base.stats, noproto);
1677
return (cfg->nomatch_verdict);
1678
}
1679
1680
/* Extract IPv4 from destination IPv6 address */
1681
addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1682
if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1683
char a[INET_ADDRSTRLEN];
1684
1685
NAT64STAT_INC(&cfg->base.stats, dropped);
1686
DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1687
inet_ntop(AF_INET, &addr, a, sizeof(a)));
1688
return (IP_FW_DENY); /* XXX: add extra stats? */
1689
}
1690
1691
/* Try to find host */
1692
hval = HOST_HVAL(cfg, &f_id->src_ip6);
1693
CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1694
if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1695
break;
1696
}
1697
/* We use IPv4 address in host byte order */
1698
addr = ntohl(addr);
1699
if (host == NULL)
1700
return (nat64lsn_request_host(cfg, f_id, mp,
1701
hval, addr, port, proto));
1702
1703
flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1704
1705
data[0] = addr;
1706
data[1] = (f_id->dst_port << 16) | port;
1707
hval = STATE_HVAL(cfg, data);
1708
state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1709
port, proto);
1710
if (state == NULL)
1711
return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1712
port, proto));
1713
return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1714
}
1715
1716
/*
1717
* Main dataplane entry point.
1718
*/
1719
int
1720
ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1721
ipfw_insn *cmd, int *done)
1722
{
1723
struct nat64lsn_instance *i;
1724
ipfw_insn *icmd;
1725
int ret;
1726
1727
IPFW_RLOCK_ASSERT(ch);
1728
1729
*done = 0; /* continue the search in case of failure */
1730
icmd = cmd + F_LEN(cmd);
1731
if (cmd->opcode != O_EXTERNAL_ACTION ||
1732
insntod(cmd, kidx)->kidx != V_nat64lsn_eid ||
1733
icmd->opcode != O_EXTERNAL_INSTANCE ||
1734
(i = NAT64_LOOKUP(ch, icmd)) == NULL)
1735
return (IP_FW_DENY);
1736
1737
*done = 1; /* terminate the search */
1738
1739
switch (args->f_id.addr_type) {
1740
case 4:
1741
ret = nat64lsn_translate4(i->cfg, &args->f_id, &args->m);
1742
break;
1743
case 6:
1744
/*
1745
* Check that destination IPv6 address matches our prefix6.
1746
*/
1747
if ((i->cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1748
memcmp(&args->f_id.dst_ip6, &i->cfg->base.plat_prefix,
1749
i->cfg->base.plat_plen / 8) != 0) {
1750
ret = i->cfg->nomatch_verdict;
1751
break;
1752
}
1753
ret = nat64lsn_translate6(i->cfg, &args->f_id, &args->m);
1754
break;
1755
default:
1756
ret = i->cfg->nomatch_verdict;
1757
}
1758
1759
if (ret != IP_FW_PASS && args->m != NULL) {
1760
m_freem(args->m);
1761
args->m = NULL;
1762
}
1763
return (ret);
1764
}
1765
1766
static int
1767
nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1768
{
1769
struct nat64lsn_states_chunk *chunk;
1770
int i;
1771
1772
chunk = (struct nat64lsn_states_chunk *)mem;
1773
for (i = 0; i < 64; i++)
1774
chunk->state[i].flags = 0;
1775
return (0);
1776
}
1777
1778
void
1779
nat64lsn_init_internal(void)
1780
{
1781
1782
nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1783
sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1784
UMA_ALIGN_PTR, 0);
1785
nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1786
sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1787
UMA_ALIGN_PTR, 0);
1788
nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1789
sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1790
UMA_ALIGN_PTR, 0);
1791
nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1792
sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1793
UMA_ALIGN_PTR, 0);
1794
nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1795
sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1796
NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1797
nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1798
sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1799
UMA_ALIGN_PTR, 0);
1800
JQUEUE_LOCK_INIT();
1801
}
1802
1803
void
1804
nat64lsn_uninit_internal(void)
1805
{
1806
1807
/* XXX: epoch_task drain */
1808
JQUEUE_LOCK_DESTROY();
1809
uma_zdestroy(nat64lsn_host_zone);
1810
uma_zdestroy(nat64lsn_pgchunk_zone);
1811
uma_zdestroy(nat64lsn_pg_zone);
1812
uma_zdestroy(nat64lsn_aliaslink_zone);
1813
uma_zdestroy(nat64lsn_state_zone);
1814
uma_zdestroy(nat64lsn_job_zone);
1815
}
1816
1817
void
1818
nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1819
{
1820
1821
CALLOUT_LOCK(cfg);
1822
callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1823
nat64lsn_periodic, cfg);
1824
CALLOUT_UNLOCK(cfg);
1825
}
1826
1827
struct nat64lsn_cfg *
1828
nat64lsn_init_config(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1829
{
1830
struct nat64lsn_cfg *cfg;
1831
struct nat64lsn_alias *alias;
1832
int i, naddr;
1833
1834
cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1835
M_WAITOK | M_ZERO);
1836
1837
CFG_LOCK_INIT(cfg);
1838
CALLOUT_LOCK_INIT(cfg);
1839
STAILQ_INIT(&cfg->jhead);
1840
cfg->vp = curvnet;
1841
COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1842
1843
cfg->hash_seed = arc4random();
1844
cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1845
cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1846
cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1847
for (i = 0; i < cfg->hosts_hashsize; i++)
1848
CK_SLIST_INIT(&cfg->hosts_hash[i]);
1849
1850
naddr = 1 << (32 - plen);
1851
cfg->prefix4 = prefix;
1852
cfg->pmask4 = prefix | (naddr - 1);
1853
cfg->plen4 = plen;
1854
cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1855
M_NAT64LSN, M_WAITOK | M_ZERO);
1856
for (i = 0; i < naddr; i++) {
1857
alias = &cfg->aliases[i];
1858
alias->addr = prefix + i; /* host byte order */
1859
CK_SLIST_INIT(&alias->hosts);
1860
ALIAS_LOCK_INIT(alias);
1861
}
1862
1863
callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1864
callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1865
1866
return (cfg);
1867
}
1868
1869
static void
1870
nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1871
{
1872
int i;
1873
1874
if (pg->chunks_count == 1) {
1875
uma_zfree(nat64lsn_state_zone, pg->states);
1876
} else {
1877
for (i = 0; i < pg->chunks_count; i++)
1878
uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1879
free(pg->states_chunk, M_NAT64LSN);
1880
free(pg->freemask_chunk, M_NAT64LSN);
1881
}
1882
uma_zfree(nat64lsn_pg_zone, pg);
1883
}
1884
1885
static void
1886
nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1887
struct nat64lsn_alias *alias)
1888
{
1889
struct nat64lsn_pg *pg;
1890
int i;
1891
1892
while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1893
pg = CK_SLIST_FIRST(&alias->portgroups);
1894
CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1895
nat64lsn_destroy_pg(pg);
1896
}
1897
for (i = 0; i < 32; i++) {
1898
if (ISSET32(alias->tcp_chunkmask, i))
1899
uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1900
if (ISSET32(alias->udp_chunkmask, i))
1901
uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1902
if (ISSET32(alias->icmp_chunkmask, i))
1903
uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1904
}
1905
ALIAS_LOCK_DESTROY(alias);
1906
}
1907
1908
static void
1909
nat64lsn_destroy_host(struct nat64lsn_host *host)
1910
{
1911
struct nat64lsn_aliaslink *link;
1912
1913
while (!CK_SLIST_EMPTY(&host->aliases)) {
1914
link = CK_SLIST_FIRST(&host->aliases);
1915
CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1916
1917
ALIAS_LOCK(link->alias);
1918
CK_SLIST_REMOVE(&link->alias->hosts, link,
1919
nat64lsn_aliaslink, alias_entries);
1920
link->alias->hosts_count--;
1921
ALIAS_UNLOCK(link->alias);
1922
1923
uma_zfree(nat64lsn_aliaslink_zone, link);
1924
}
1925
HOST_LOCK_DESTROY(host);
1926
free(host->states_hash, M_NAT64LSN);
1927
uma_zfree(nat64lsn_host_zone, host);
1928
}
1929
1930
void
1931
nat64lsn_destroy_config(struct nat64lsn_cfg *cfg)
1932
{
1933
struct nat64lsn_host *host;
1934
int i;
1935
1936
CALLOUT_LOCK(cfg);
1937
callout_drain(&cfg->periodic);
1938
CALLOUT_UNLOCK(cfg);
1939
callout_drain(&cfg->jcallout);
1940
1941
for (i = 0; i < cfg->hosts_hashsize; i++) {
1942
while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1943
host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1944
CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1945
nat64lsn_destroy_host(host);
1946
}
1947
}
1948
1949
for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1950
nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1951
1952
CALLOUT_LOCK_DESTROY(cfg);
1953
CFG_LOCK_DESTROY(cfg);
1954
COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1955
free(cfg->hosts_hash, M_NAT64LSN);
1956
free(cfg->aliases, M_NAT64LSN);
1957
free(cfg, M_NAT64LSN);
1958
}
1959
1960
1961