Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netgraph/netflow/netflow.c
34672 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2010-2011 Alexander V. Chernikov <[email protected]>
5
* Copyright (c) 2004-2005 Gleb Smirnoff <[email protected]>
6
* Copyright (c) 2001-2003 Roman V. Palagin <[email protected]>
7
* All rights reserved.
8
*
9
* Redistribution and use in source and binary forms, with or without
10
* modification, are permitted provided that the following conditions
11
* are met:
12
* 1. Redistributions of source code must retain the above copyright
13
* notice, this list of conditions and the following disclaimer.
14
* 2. Redistributions in binary form must reproduce the above copyright
15
* notice, this list of conditions and the following disclaimer in the
16
* documentation and/or other materials provided with the distribution.
17
*
18
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
* SUCH DAMAGE.
29
*
30
* $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $
31
*/
32
33
#include <sys/cdefs.h>
34
#include "opt_inet.h"
35
#include "opt_inet6.h"
36
#include "opt_route.h"
37
#include <sys/param.h>
38
#include <sys/bitstring.h>
39
#include <sys/systm.h>
40
#include <sys/counter.h>
41
#include <sys/kernel.h>
42
#include <sys/ktr.h>
43
#include <sys/limits.h>
44
#include <sys/mbuf.h>
45
#include <sys/syslog.h>
46
#include <sys/socket.h>
47
#include <vm/uma.h>
48
49
#include <net/if.h>
50
#include <net/if_dl.h>
51
#include <net/if_var.h>
52
#include <net/if_private.h>
53
#include <net/route.h>
54
#include <net/route/nhop.h>
55
#include <net/route/route_ctl.h>
56
#include <net/ethernet.h>
57
#include <netinet/in.h>
58
#include <netinet/in_fib.h>
59
#include <netinet/in_systm.h>
60
#include <netinet/ip.h>
61
#include <netinet/ip6.h>
62
#include <netinet/tcp.h>
63
#include <netinet/udp.h>
64
65
#include <netinet6/in6_fib.h>
66
67
#include <netgraph/ng_message.h>
68
#include <netgraph/netgraph.h>
69
70
#include <netgraph/netflow/netflow.h>
71
#include <netgraph/netflow/netflow_v9.h>
72
#include <netgraph/netflow/ng_netflow.h>
73
74
#define NBUCKETS (65536) /* must be power of 2 */
75
76
/* This hash is for TCP or UDP packets. */
77
#define FULL_HASH(addr1, addr2, port1, port2) \
78
(((addr1 ^ (addr1 >> 16) ^ \
79
htons(addr2 ^ (addr2 >> 16))) ^ \
80
port1 ^ htons(port2)) & \
81
(NBUCKETS - 1))
82
83
/* This hash is for all other IP packets. */
84
#define ADDR_HASH(addr1, addr2) \
85
((addr1 ^ (addr1 >> 16) ^ \
86
htons(addr2 ^ (addr2 >> 16))) & \
87
(NBUCKETS - 1))
88
89
/* Macros to shorten logical constructions */
90
/* XXX: priv must exist in namespace */
91
#define INACTIVE(fle) (time_uptime - fle->f.last > priv->nfinfo_inact_t)
92
#define AGED(fle) (time_uptime - fle->f.first > priv->nfinfo_act_t)
93
#define ISFREE(fle) (fle->f.packets == 0)
94
95
/*
96
* 4 is a magical number: statistically number of 4-packet flows is
97
* bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP
98
* scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case
99
* of reachable host and 4-packet otherwise.
100
*/
101
#define SMALL(fle) (fle->f.packets <= 4)
102
103
MALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash");
104
105
static int export_add(item_p, struct flow_entry *);
106
static int export_send(priv_p, fib_export_p, item_p, int);
107
108
#ifdef INET
109
static int hash_insert(priv_p, struct flow_hash_entry *, struct flow_rec *,
110
int, uint8_t, uint16_t);
111
#endif
112
#ifdef INET6
113
static int hash6_insert(priv_p, struct flow_hash_entry *, struct flow6_rec *,
114
int, uint8_t, uint16_t);
115
#endif
116
117
static void expire_flow(priv_p, fib_export_p, struct flow_entry *, int);
118
119
#ifdef INET
120
/*
121
* Generate hash for a given flow record.
122
*
123
* FIB is not used here, because:
124
* most VRFS will carry public IPv4 addresses which are unique even
125
* without FIB private addresses can overlap, but this is worked out
126
* via flow_rec bcmp() containing fib id. In IPv6 world addresses are
127
* all globally unique (it's not fully true, there is FC00::/7 for example,
128
* but chances of address overlap are MUCH smaller)
129
*/
130
static inline uint32_t
131
ip_hash(struct flow_rec *r)
132
{
133
134
switch (r->r_ip_p) {
135
case IPPROTO_TCP:
136
case IPPROTO_UDP:
137
return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr,
138
r->r_sport, r->r_dport);
139
default:
140
return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr);
141
}
142
}
143
#endif
144
145
#ifdef INET6
146
/* Generate hash for a given flow6 record. Use lower 4 octets from v6 addresses */
147
static inline uint32_t
148
ip6_hash(struct flow6_rec *r)
149
{
150
151
switch (r->r_ip_p) {
152
case IPPROTO_TCP:
153
case IPPROTO_UDP:
154
return FULL_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3],
155
r->dst.r_dst6.__u6_addr.__u6_addr32[3], r->r_sport,
156
r->r_dport);
157
default:
158
return ADDR_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3],
159
r->dst.r_dst6.__u6_addr.__u6_addr32[3]);
160
}
161
}
162
163
#endif
164
165
/*
166
* Detach export datagram from priv, if there is any.
167
* If there is no, allocate a new one.
168
*/
169
static item_p
170
get_export_dgram(priv_p priv, fib_export_p fe)
171
{
172
item_p item = NULL;
173
174
mtx_lock(&fe->export_mtx);
175
if (fe->exp.item != NULL) {
176
item = fe->exp.item;
177
fe->exp.item = NULL;
178
}
179
mtx_unlock(&fe->export_mtx);
180
181
if (item == NULL) {
182
struct netflow_v5_export_dgram *dgram;
183
struct mbuf *m;
184
185
m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
186
if (m == NULL)
187
return (NULL);
188
item = ng_package_data(m, NG_NOFLAGS);
189
if (item == NULL)
190
return (NULL);
191
dgram = mtod(m, struct netflow_v5_export_dgram *);
192
dgram->header.count = 0;
193
dgram->header.version = htons(NETFLOW_V5);
194
dgram->header.pad = 0;
195
}
196
197
return (item);
198
}
199
200
/*
201
* Re-attach incomplete datagram back to priv.
202
* If there is already another one, then send incomplete. */
203
static void
204
return_export_dgram(priv_p priv, fib_export_p fe, item_p item, int flags)
205
{
206
207
/*
208
* It may happen on SMP, that some thread has already
209
* put its item there, in this case we bail out and
210
* send what we have to collector.
211
*/
212
mtx_lock(&fe->export_mtx);
213
if (fe->exp.item == NULL) {
214
fe->exp.item = item;
215
mtx_unlock(&fe->export_mtx);
216
} else {
217
mtx_unlock(&fe->export_mtx);
218
export_send(priv, fe, item, flags);
219
}
220
}
221
222
/*
223
* The flow is over. Call export_add() and free it. If datagram is
224
* full, then call export_send().
225
*/
226
static void
227
expire_flow(priv_p priv, fib_export_p fe, struct flow_entry *fle, int flags)
228
{
229
struct netflow_export_item exp;
230
uint16_t version = fle->f.version;
231
232
if ((priv->export != NULL) && (version == IPVERSION)) {
233
exp.item = get_export_dgram(priv, fe);
234
if (exp.item == NULL) {
235
priv->nfinfo_export_failed++;
236
if (priv->export9 != NULL)
237
priv->nfinfo_export9_failed++;
238
/* fle definitely contains IPv4 flow. */
239
uma_zfree_arg(priv->zone, fle, priv);
240
return;
241
}
242
243
if (export_add(exp.item, fle) > 0)
244
export_send(priv, fe, exp.item, flags);
245
else
246
return_export_dgram(priv, fe, exp.item, NG_QUEUE);
247
}
248
249
if (priv->export9 != NULL) {
250
exp.item9 = get_export9_dgram(priv, fe, &exp.item9_opt);
251
if (exp.item9 == NULL) {
252
priv->nfinfo_export9_failed++;
253
if (version == IPVERSION)
254
uma_zfree_arg(priv->zone, fle, priv);
255
#ifdef INET6
256
else if (version == IP6VERSION)
257
uma_zfree_arg(priv->zone6, fle, priv);
258
#endif
259
else
260
panic("ng_netflow: Unknown IP proto: %d",
261
version);
262
return;
263
}
264
265
if (export9_add(exp.item9, exp.item9_opt, fle) > 0)
266
export9_send(priv, fe, exp.item9, exp.item9_opt, flags);
267
else
268
return_export9_dgram(priv, fe, exp.item9,
269
exp.item9_opt, NG_QUEUE);
270
}
271
272
if (version == IPVERSION)
273
uma_zfree_arg(priv->zone, fle, priv);
274
#ifdef INET6
275
else if (version == IP6VERSION)
276
uma_zfree_arg(priv->zone6, fle, priv);
277
#endif
278
}
279
280
/* Get a snapshot of node statistics */
281
void
282
ng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i)
283
{
284
285
i->nfinfo_bytes = counter_u64_fetch(priv->nfinfo_bytes);
286
i->nfinfo_packets = counter_u64_fetch(priv->nfinfo_packets);
287
i->nfinfo_bytes6 = counter_u64_fetch(priv->nfinfo_bytes6);
288
i->nfinfo_packets6 = counter_u64_fetch(priv->nfinfo_packets6);
289
i->nfinfo_sbytes = counter_u64_fetch(priv->nfinfo_sbytes);
290
i->nfinfo_spackets = counter_u64_fetch(priv->nfinfo_spackets);
291
i->nfinfo_sbytes6 = counter_u64_fetch(priv->nfinfo_sbytes6);
292
i->nfinfo_spackets6 = counter_u64_fetch(priv->nfinfo_spackets6);
293
i->nfinfo_act_exp = counter_u64_fetch(priv->nfinfo_act_exp);
294
i->nfinfo_inact_exp = counter_u64_fetch(priv->nfinfo_inact_exp);
295
296
i->nfinfo_used = uma_zone_get_cur(priv->zone);
297
#ifdef INET6
298
i->nfinfo_used6 = uma_zone_get_cur(priv->zone6);
299
#endif
300
301
i->nfinfo_alloc_failed = priv->nfinfo_alloc_failed;
302
i->nfinfo_export_failed = priv->nfinfo_export_failed;
303
i->nfinfo_export9_failed = priv->nfinfo_export9_failed;
304
i->nfinfo_realloc_mbuf = priv->nfinfo_realloc_mbuf;
305
i->nfinfo_alloc_fibs = priv->nfinfo_alloc_fibs;
306
i->nfinfo_inact_t = priv->nfinfo_inact_t;
307
i->nfinfo_act_t = priv->nfinfo_act_t;
308
}
309
310
/*
311
* Insert a record into defined slot.
312
*
313
* First we get for us a free flow entry, then fill in all
314
* possible fields in it.
315
*
316
* TODO: consider dropping hash mutex while filling in datagram,
317
* as this was done in previous version. Need to test & profile
318
* to be sure.
319
*/
320
#ifdef INET
321
static int
322
hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r,
323
int plen, uint8_t flags, uint16_t tcp_flags)
324
{
325
struct flow_entry *fle;
326
327
mtx_assert(&hsh->mtx, MA_OWNED);
328
329
fle = uma_zalloc_arg(priv->zone, priv, M_NOWAIT);
330
if (fle == NULL) {
331
priv->nfinfo_alloc_failed++;
332
return (ENOMEM);
333
}
334
335
/*
336
* Now fle is totally ours. It is detached from all lists,
337
* we can safely edit it.
338
*/
339
fle->f.version = IPVERSION;
340
bcopy(r, &fle->f.r, sizeof(struct flow_rec));
341
fle->f.bytes = plen;
342
fle->f.packets = 1;
343
fle->f.tcp_flags = tcp_flags;
344
345
fle->f.first = fle->f.last = time_uptime;
346
347
/*
348
* First we do route table lookup on destination address. So we can
349
* fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
350
*/
351
if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
352
struct rtentry *rt;
353
struct route_nhop_data rnd;
354
355
rt = fib4_lookup_rt(r->fib, fle->f.r.r_dst, 0, NHR_NONE, &rnd);
356
if (rt != NULL) {
357
struct in_addr addr;
358
uint32_t scopeid;
359
struct nhop_object *nh = nhop_select_func(rnd.rnd_nhop, 0);
360
int plen;
361
362
rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
363
fle->f.fle_o_ifx = nh->nh_ifp->if_index;
364
if (nh->gw_sa.sa_family == AF_INET)
365
fle->f.next_hop = nh->gw4_sa.sin_addr;
366
/*
367
* XXX we're leaving an empty gateway here for
368
* IPv6 nexthops.
369
*/
370
fle->f.dst_mask = plen;
371
}
372
}
373
374
/* Do route lookup on source address, to fill in src_mask. */
375
if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) {
376
struct rtentry *rt;
377
struct route_nhop_data rnd;
378
379
rt = fib4_lookup_rt(r->fib, fle->f.r.r_src, 0, NHR_NONE, &rnd);
380
if (rt != NULL) {
381
struct in_addr addr;
382
uint32_t scopeid;
383
int plen;
384
385
rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
386
fle->f.src_mask = plen;
387
}
388
}
389
390
/* Push new flow at the and of hash. */
391
TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
392
393
return (0);
394
}
395
#endif
396
397
#ifdef INET6
398
static int
399
hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r,
400
int plen, uint8_t flags, uint16_t tcp_flags)
401
{
402
struct flow6_entry *fle6;
403
404
mtx_assert(&hsh6->mtx, MA_OWNED);
405
406
fle6 = uma_zalloc_arg(priv->zone6, priv, M_NOWAIT);
407
if (fle6 == NULL) {
408
priv->nfinfo_alloc_failed++;
409
return (ENOMEM);
410
}
411
412
/*
413
* Now fle is totally ours. It is detached from all lists,
414
* we can safely edit it.
415
*/
416
417
fle6->f.version = IP6VERSION;
418
bcopy(r, &fle6->f.r, sizeof(struct flow6_rec));
419
fle6->f.bytes = plen;
420
fle6->f.packets = 1;
421
fle6->f.tcp_flags = tcp_flags;
422
423
fle6->f.first = fle6->f.last = time_uptime;
424
425
/*
426
* First we do route table lookup on destination address. So we can
427
* fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
428
*/
429
if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
430
struct rtentry *rt;
431
struct route_nhop_data rnd;
432
433
rt = fib6_lookup_rt(r->fib, &fle6->f.r.dst.r_dst6, 0, NHR_NONE, &rnd);
434
if (rt != NULL) {
435
struct in6_addr addr;
436
uint32_t scopeid;
437
struct nhop_object *nh = nhop_select_func(rnd.rnd_nhop, 0);
438
int plen;
439
440
rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
441
fle6->f.fle_o_ifx = nh->nh_ifp->if_index;
442
if (nh->gw_sa.sa_family == AF_INET6)
443
fle6->f.n.next_hop6 = nh->gw6_sa.sin6_addr;
444
fle6->f.dst_mask = plen;
445
}
446
}
447
448
if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) {
449
/* Do route lookup on source address, to fill in src_mask. */
450
struct rtentry *rt;
451
struct route_nhop_data rnd;
452
453
rt = fib6_lookup_rt(r->fib, &fle6->f.r.src.r_src6, 0, NHR_NONE, &rnd);
454
if (rt != NULL) {
455
struct in6_addr addr;
456
uint32_t scopeid;
457
int plen;
458
459
rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
460
fle6->f.src_mask = plen;
461
}
462
}
463
464
/* Push new flow at the and of hash. */
465
TAILQ_INSERT_TAIL(&hsh6->head, (struct flow_entry *)fle6, fle_hash);
466
467
return (0);
468
}
469
#endif
470
471
/*
472
* Non-static functions called from ng_netflow.c
473
*/
474
475
/* Allocate memory and set up flow cache */
476
void
477
ng_netflow_cache_init(priv_p priv)
478
{
479
struct flow_hash_entry *hsh;
480
int i;
481
482
/* Initialize cache UMA zone. */
483
priv->zone = uma_zcreate("NetFlow IPv4 cache",
484
sizeof(struct flow_entry), NULL, NULL, NULL, NULL,
485
UMA_ALIGN_CACHE, 0);
486
uma_zone_set_max(priv->zone, CACHESIZE);
487
#ifdef INET6
488
priv->zone6 = uma_zcreate("NetFlow IPv6 cache",
489
sizeof(struct flow6_entry), NULL, NULL, NULL, NULL,
490
UMA_ALIGN_CACHE, 0);
491
uma_zone_set_max(priv->zone6, CACHESIZE);
492
#endif
493
494
/* Allocate hash. */
495
priv->hash = malloc(NBUCKETS * sizeof(struct flow_hash_entry),
496
M_NETFLOW_HASH, M_WAITOK | M_ZERO);
497
498
/* Initialize hash. */
499
for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) {
500
mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
501
TAILQ_INIT(&hsh->head);
502
}
503
504
#ifdef INET6
505
/* Allocate hash. */
506
priv->hash6 = malloc(NBUCKETS * sizeof(struct flow_hash_entry),
507
M_NETFLOW_HASH, M_WAITOK | M_ZERO);
508
509
/* Initialize hash. */
510
for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) {
511
mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
512
TAILQ_INIT(&hsh->head);
513
}
514
#endif
515
516
priv->nfinfo_bytes = counter_u64_alloc(M_WAITOK);
517
priv->nfinfo_packets = counter_u64_alloc(M_WAITOK);
518
priv->nfinfo_bytes6 = counter_u64_alloc(M_WAITOK);
519
priv->nfinfo_packets6 = counter_u64_alloc(M_WAITOK);
520
priv->nfinfo_sbytes = counter_u64_alloc(M_WAITOK);
521
priv->nfinfo_spackets = counter_u64_alloc(M_WAITOK);
522
priv->nfinfo_sbytes6 = counter_u64_alloc(M_WAITOK);
523
priv->nfinfo_spackets6 = counter_u64_alloc(M_WAITOK);
524
priv->nfinfo_act_exp = counter_u64_alloc(M_WAITOK);
525
priv->nfinfo_inact_exp = counter_u64_alloc(M_WAITOK);
526
527
ng_netflow_v9_cache_init(priv);
528
CTR0(KTR_NET, "ng_netflow startup()");
529
}
530
531
/* Initialize new FIB table for v5 and v9 */
532
int
533
ng_netflow_fib_init(priv_p priv, int fib)
534
{
535
fib_export_p fe = priv_to_fib(priv, fib);
536
537
CTR1(KTR_NET, "ng_netflow(): fib init: %d", fib);
538
539
if (fe != NULL)
540
return (0);
541
542
if ((fe = malloc(sizeof(struct fib_export), M_NETGRAPH,
543
M_NOWAIT | M_ZERO)) == NULL)
544
return (ENOMEM);
545
546
mtx_init(&fe->export_mtx, "export dgram lock", NULL, MTX_DEF);
547
mtx_init(&fe->export9_mtx, "export9 dgram lock", NULL, MTX_DEF);
548
fe->fib = fib;
549
fe->domain_id = fib;
550
551
if (atomic_cmpset_ptr((volatile uintptr_t *)&priv->fib_data[fib],
552
(uintptr_t)NULL, (uintptr_t)fe) == 0) {
553
/* FIB already set up by other ISR */
554
CTR3(KTR_NET, "ng_netflow(): fib init: %d setup %p but got %p",
555
fib, fe, priv_to_fib(priv, fib));
556
mtx_destroy(&fe->export_mtx);
557
mtx_destroy(&fe->export9_mtx);
558
free(fe, M_NETGRAPH);
559
} else {
560
/* Increase counter for statistics */
561
CTR3(KTR_NET, "ng_netflow(): fib %d setup to %p (%p)",
562
fib, fe, priv_to_fib(priv, fib));
563
priv->nfinfo_alloc_fibs++;
564
}
565
566
return (0);
567
}
568
569
/* Free all flow cache memory. Called from node close method. */
570
void
571
ng_netflow_cache_flush(priv_p priv)
572
{
573
struct flow_entry *fle, *fle1;
574
struct flow_hash_entry *hsh;
575
struct netflow_export_item exp;
576
fib_export_p fe;
577
int i;
578
579
bzero(&exp, sizeof(exp));
580
581
/*
582
* We are going to free probably billable data.
583
* Expire everything before freeing it.
584
* No locking is required since callout is already drained.
585
*/
586
for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++)
587
TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
588
TAILQ_REMOVE(&hsh->head, fle, fle_hash);
589
fe = priv_to_fib(priv, fle->f.r.fib);
590
expire_flow(priv, fe, fle, NG_QUEUE);
591
}
592
#ifdef INET6
593
for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++)
594
TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
595
TAILQ_REMOVE(&hsh->head, fle, fle_hash);
596
fe = priv_to_fib(priv, fle->f.r.fib);
597
expire_flow(priv, fe, fle, NG_QUEUE);
598
}
599
#endif
600
601
uma_zdestroy(priv->zone);
602
/* Destroy hash mutexes. */
603
for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++)
604
mtx_destroy(&hsh->mtx);
605
606
/* Free hash memory. */
607
if (priv->hash != NULL)
608
free(priv->hash, M_NETFLOW_HASH);
609
#ifdef INET6
610
uma_zdestroy(priv->zone6);
611
/* Destroy hash mutexes. */
612
for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++)
613
mtx_destroy(&hsh->mtx);
614
615
/* Free hash memory. */
616
if (priv->hash6 != NULL)
617
free(priv->hash6, M_NETFLOW_HASH);
618
#endif
619
620
for (i = 0; i < priv->maxfibs; i++) {
621
if ((fe = priv_to_fib(priv, i)) == NULL)
622
continue;
623
624
if (fe->exp.item != NULL)
625
export_send(priv, fe, fe->exp.item, NG_QUEUE);
626
627
if (fe->exp.item9 != NULL)
628
export9_send(priv, fe, fe->exp.item9,
629
fe->exp.item9_opt, NG_QUEUE);
630
631
mtx_destroy(&fe->export_mtx);
632
mtx_destroy(&fe->export9_mtx);
633
free(fe, M_NETGRAPH);
634
}
635
636
counter_u64_free(priv->nfinfo_bytes);
637
counter_u64_free(priv->nfinfo_packets);
638
counter_u64_free(priv->nfinfo_bytes6);
639
counter_u64_free(priv->nfinfo_packets6);
640
counter_u64_free(priv->nfinfo_sbytes);
641
counter_u64_free(priv->nfinfo_spackets);
642
counter_u64_free(priv->nfinfo_sbytes6);
643
counter_u64_free(priv->nfinfo_spackets6);
644
counter_u64_free(priv->nfinfo_act_exp);
645
counter_u64_free(priv->nfinfo_inact_exp);
646
647
ng_netflow_v9_cache_flush(priv);
648
}
649
650
#ifdef INET
651
/* Insert packet from into flow cache. */
652
int
653
ng_netflow_flow_add(priv_p priv, fib_export_p fe, struct ip *ip,
654
caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags,
655
unsigned int src_if_index)
656
{
657
struct flow_entry *fle, *fle1;
658
struct flow_hash_entry *hsh;
659
struct flow_rec r;
660
int hlen, plen;
661
int error = 0;
662
uint16_t tcp_flags = 0;
663
664
bzero(&r, sizeof(r));
665
666
if (ip->ip_v != IPVERSION)
667
return (EINVAL);
668
669
hlen = ip->ip_hl << 2;
670
if (hlen < sizeof(struct ip))
671
return (EINVAL);
672
673
/* Assume L4 template by default */
674
r.flow_type = NETFLOW_V9_FLOW_V4_L4;
675
676
r.r_src = ip->ip_src;
677
r.r_dst = ip->ip_dst;
678
r.fib = fe->fib;
679
680
plen = ntohs(ip->ip_len);
681
682
r.r_ip_p = ip->ip_p;
683
r.r_tos = ip->ip_tos;
684
685
r.r_i_ifx = src_if_index;
686
687
/*
688
* XXX NOTE: only first fragment of fragmented TCP, UDP and
689
* ICMP packet will be recorded with proper s_port and d_port.
690
* Following fragments will be recorded simply as IP packet with
691
* ip_proto = ip->ip_p and s_port, d_port set to zero.
692
* I know, it looks like bug. But I don't want to re-implement
693
* ip packet assebmling here. Anyway, (in)famous trafd works this way -
694
* and nobody complains yet :)
695
*/
696
if ((ip->ip_off & htons(IP_OFFMASK)) == 0)
697
switch(r.r_ip_p) {
698
case IPPROTO_TCP:
699
{
700
struct tcphdr *tcp;
701
702
tcp = (struct tcphdr *)((caddr_t )ip + hlen);
703
r.r_sport = tcp->th_sport;
704
r.r_dport = tcp->th_dport;
705
tcp_flags = tcp_get_flags(tcp);
706
break;
707
}
708
case IPPROTO_UDP:
709
r.r_ports = *(uint32_t *)((caddr_t )ip + hlen);
710
break;
711
}
712
713
counter_u64_add(priv->nfinfo_packets, 1);
714
counter_u64_add(priv->nfinfo_bytes, plen);
715
716
/* Find hash slot. */
717
hsh = &priv->hash[ip_hash(&r)];
718
719
mtx_lock(&hsh->mtx);
720
721
/*
722
* Go through hash and find our entry. If we encounter an
723
* entry, that should be expired, purge it. We do a reverse
724
* search since most active entries are first, and most
725
* searches are done on most active entries.
726
*/
727
TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
728
if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0)
729
break;
730
if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) {
731
TAILQ_REMOVE(&hsh->head, fle, fle_hash);
732
expire_flow(priv, priv_to_fib(priv, fle->f.r.fib),
733
fle, NG_QUEUE);
734
counter_u64_add(priv->nfinfo_act_exp, 1);
735
}
736
}
737
738
if (fle) { /* An existent entry. */
739
740
fle->f.bytes += plen;
741
fle->f.packets ++;
742
fle->f.tcp_flags |= tcp_flags;
743
fle->f.last = time_uptime;
744
745
/*
746
* We have the following reasons to expire flow in active way:
747
* - it hit active timeout
748
* - a TCP connection closed
749
* - it is going to overflow counter
750
*/
751
if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) ||
752
(fle->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) {
753
TAILQ_REMOVE(&hsh->head, fle, fle_hash);
754
expire_flow(priv, priv_to_fib(priv, fle->f.r.fib),
755
fle, NG_QUEUE);
756
counter_u64_add(priv->nfinfo_act_exp, 1);
757
} else {
758
/*
759
* It is the newest, move it to the tail,
760
* if it isn't there already. Next search will
761
* locate it quicker.
762
*/
763
if (fle != TAILQ_LAST(&hsh->head, fhead)) {
764
TAILQ_REMOVE(&hsh->head, fle, fle_hash);
765
TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
766
}
767
}
768
} else /* A new flow entry. */
769
error = hash_insert(priv, hsh, &r, plen, flags, tcp_flags);
770
771
mtx_unlock(&hsh->mtx);
772
773
return (error);
774
}
775
#endif
776
777
#ifdef INET6
778
/* Insert IPv6 packet from into flow cache. */
779
int
780
ng_netflow_flow6_add(priv_p priv, fib_export_p fe, struct ip6_hdr *ip6,
781
caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags,
782
unsigned int src_if_index)
783
{
784
struct flow_entry *fle = NULL, *fle1;
785
struct flow6_entry *fle6;
786
struct flow_hash_entry *hsh;
787
struct flow6_rec r;
788
int plen;
789
int error = 0;
790
uint16_t tcp_flags = 0;
791
792
/* check version */
793
if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
794
return (EINVAL);
795
796
bzero(&r, sizeof(r));
797
798
r.src.r_src6 = ip6->ip6_src;
799
r.dst.r_dst6 = ip6->ip6_dst;
800
r.fib = fe->fib;
801
802
/* Assume L4 template by default */
803
r.flow_type = NETFLOW_V9_FLOW_V6_L4;
804
805
plen = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
806
807
#if 0
808
/* XXX: set DSCP/CoS value */
809
r.r_tos = ip->ip_tos;
810
#endif
811
if ((flags & NG_NETFLOW_IS_FRAG) == 0) {
812
switch(upper_proto) {
813
case IPPROTO_TCP:
814
{
815
struct tcphdr *tcp;
816
817
tcp = (struct tcphdr *)upper_ptr;
818
r.r_ports = *(uint32_t *)upper_ptr;
819
tcp_flags = tcp_get_flags(tcp);
820
break;
821
}
822
case IPPROTO_UDP:
823
case IPPROTO_SCTP:
824
r.r_ports = *(uint32_t *)upper_ptr;
825
break;
826
}
827
}
828
829
r.r_ip_p = upper_proto;
830
r.r_i_ifx = src_if_index;
831
832
counter_u64_add(priv->nfinfo_packets6, 1);
833
counter_u64_add(priv->nfinfo_bytes6, plen);
834
835
/* Find hash slot. */
836
hsh = &priv->hash6[ip6_hash(&r)];
837
838
mtx_lock(&hsh->mtx);
839
840
/*
841
* Go through hash and find our entry. If we encounter an
842
* entry, that should be expired, purge it. We do a reverse
843
* search since most active entries are first, and most
844
* searches are done on most active entries.
845
*/
846
TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
847
if (fle->f.version != IP6VERSION)
848
continue;
849
fle6 = (struct flow6_entry *)fle;
850
if (bcmp(&r, &fle6->f.r, sizeof(struct flow6_rec)) == 0)
851
break;
852
if ((INACTIVE(fle6) && SMALL(fle6)) || AGED(fle6)) {
853
TAILQ_REMOVE(&hsh->head, fle, fle_hash);
854
expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle,
855
NG_QUEUE);
856
counter_u64_add(priv->nfinfo_act_exp, 1);
857
}
858
}
859
860
if (fle != NULL) { /* An existent entry. */
861
fle6 = (struct flow6_entry *)fle;
862
863
fle6->f.bytes += plen;
864
fle6->f.packets ++;
865
fle6->f.tcp_flags |= tcp_flags;
866
fle6->f.last = time_uptime;
867
868
/*
869
* We have the following reasons to expire flow in active way:
870
* - it hit active timeout
871
* - a TCP connection closed
872
* - it is going to overflow counter
873
*/
874
if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle6) ||
875
(fle6->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) {
876
TAILQ_REMOVE(&hsh->head, fle, fle_hash);
877
expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle,
878
NG_QUEUE);
879
counter_u64_add(priv->nfinfo_act_exp, 1);
880
} else {
881
/*
882
* It is the newest, move it to the tail,
883
* if it isn't there already. Next search will
884
* locate it quicker.
885
*/
886
if (fle != TAILQ_LAST(&hsh->head, fhead)) {
887
TAILQ_REMOVE(&hsh->head, fle, fle_hash);
888
TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
889
}
890
}
891
} else /* A new flow entry. */
892
error = hash6_insert(priv, hsh, &r, plen, flags, tcp_flags);
893
894
mtx_unlock(&hsh->mtx);
895
896
return (error);
897
}
898
#endif
899
900
/*
901
* Return records from cache to userland.
902
*
903
* TODO: matching particular IP should be done in kernel, here.
904
*/
905
int
906
ng_netflow_flow_show(priv_p priv, struct ngnf_show_header *req,
907
struct ngnf_show_header *resp)
908
{
909
struct flow_hash_entry *hsh;
910
struct flow_entry *fle;
911
struct flow_entry_data *data = (struct flow_entry_data *)(resp + 1);
912
#ifdef INET6
913
struct flow6_entry_data *data6 = (struct flow6_entry_data *)(resp + 1);
914
#endif
915
int i, max;
916
917
i = req->hash_id;
918
if (i > NBUCKETS-1)
919
return (EINVAL);
920
921
#ifdef INET6
922
if (req->version == 6) {
923
resp->version = 6;
924
hsh = priv->hash6 + i;
925
max = NREC6_AT_ONCE;
926
} else
927
#endif
928
if (req->version == 4) {
929
resp->version = 4;
930
hsh = priv->hash + i;
931
max = NREC_AT_ONCE;
932
} else
933
return (EINVAL);
934
935
/*
936
* We will transfer not more than NREC_AT_ONCE. More data
937
* will come in next message.
938
* We send current hash index and current record number in list
939
* to userland, and userland should return it back to us.
940
* Then, we will restart with new entry.
941
*
942
* The resulting cache snapshot can be inaccurate if flow expiration
943
* is taking place on hash item between userland data requests for
944
* this hash item id.
945
*/
946
resp->nentries = 0;
947
for (; i < NBUCKETS; hsh++, i++) {
948
int list_id;
949
950
if (mtx_trylock(&hsh->mtx) == 0) {
951
/*
952
* Requested hash index is not available,
953
* relay decision to skip or re-request data
954
* to userland.
955
*/
956
resp->hash_id = i;
957
resp->list_id = 0;
958
return (0);
959
}
960
961
list_id = 0;
962
TAILQ_FOREACH(fle, &hsh->head, fle_hash) {
963
if (hsh->mtx.mtx_lock & MTX_CONTESTED) {
964
resp->hash_id = i;
965
resp->list_id = list_id;
966
mtx_unlock(&hsh->mtx);
967
return (0);
968
}
969
970
list_id++;
971
/* Search for particular record in list. */
972
if (req->list_id > 0) {
973
if (list_id < req->list_id)
974
continue;
975
976
/* Requested list position found. */
977
req->list_id = 0;
978
}
979
#ifdef INET6
980
if (req->version == 6) {
981
struct flow6_entry *fle6;
982
983
fle6 = (struct flow6_entry *)fle;
984
bcopy(&fle6->f, data6 + resp->nentries,
985
sizeof(fle6->f));
986
} else
987
#endif
988
bcopy(&fle->f, data + resp->nentries,
989
sizeof(fle->f));
990
resp->nentries++;
991
if (resp->nentries == max) {
992
resp->hash_id = i;
993
/*
994
* If it was the last item in list
995
* we simply skip to next hash_id.
996
*/
997
resp->list_id = list_id + 1;
998
mtx_unlock(&hsh->mtx);
999
return (0);
1000
}
1001
}
1002
mtx_unlock(&hsh->mtx);
1003
}
1004
1005
resp->hash_id = resp->list_id = 0;
1006
1007
return (0);
1008
}
1009
1010
/* We have full datagram in privdata. Send it to export hook. */
1011
static int
1012
export_send(priv_p priv, fib_export_p fe, item_p item, int flags)
1013
{
1014
struct mbuf *m = NGI_M(item);
1015
struct netflow_v5_export_dgram *dgram = mtod(m,
1016
struct netflow_v5_export_dgram *);
1017
struct netflow_v5_header *header = &dgram->header;
1018
struct timespec ts;
1019
int error = 0;
1020
1021
/* Fill mbuf header. */
1022
m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) *
1023
header->count + sizeof(struct netflow_v5_header);
1024
1025
/* Fill export header. */
1026
header->sys_uptime = htonl(MILLIUPTIME(time_uptime));
1027
getnanotime(&ts);
1028
header->unix_secs = htonl(ts.tv_sec);
1029
header->unix_nsecs = htonl(ts.tv_nsec);
1030
header->engine_type = 0;
1031
header->engine_id = fe->domain_id;
1032
header->pad = 0;
1033
header->flow_seq = htonl(atomic_fetchadd_32(&fe->flow_seq,
1034
header->count));
1035
header->count = htons(header->count);
1036
1037
if (priv->export != NULL)
1038
NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags);
1039
else
1040
NG_FREE_ITEM(item);
1041
1042
return (error);
1043
}
1044
1045
/* Add export record to dgram. */
1046
static int
1047
export_add(item_p item, struct flow_entry *fle)
1048
{
1049
struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item),
1050
struct netflow_v5_export_dgram *);
1051
struct netflow_v5_header *header = &dgram->header;
1052
struct netflow_v5_record *rec;
1053
1054
rec = &dgram->r[header->count];
1055
header->count ++;
1056
1057
KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS,
1058
("ng_netflow: export too big"));
1059
1060
/* Fill in export record. */
1061
rec->src_addr = fle->f.r.r_src.s_addr;
1062
rec->dst_addr = fle->f.r.r_dst.s_addr;
1063
rec->next_hop = fle->f.next_hop.s_addr;
1064
rec->i_ifx = htons(fle->f.fle_i_ifx);
1065
rec->o_ifx = htons(fle->f.fle_o_ifx);
1066
rec->packets = htonl(fle->f.packets);
1067
rec->octets = htonl(fle->f.bytes);
1068
rec->first = htonl(MILLIUPTIME(fle->f.first));
1069
rec->last = htonl(MILLIUPTIME(fle->f.last));
1070
rec->s_port = fle->f.r.r_sport;
1071
rec->d_port = fle->f.r.r_dport;
1072
rec->flags = fle->f.tcp_flags;
1073
rec->prot = fle->f.r.r_ip_p;
1074
rec->tos = fle->f.r.r_tos;
1075
rec->dst_mask = fle->f.dst_mask;
1076
rec->src_mask = fle->f.src_mask;
1077
rec->pad1 = 0;
1078
rec->pad2 = 0;
1079
1080
/* Not supported fields. */
1081
rec->src_as = rec->dst_as = 0;
1082
1083
if (header->count == NETFLOW_V5_MAX_RECORDS)
1084
return (1); /* end of datagram */
1085
else
1086
return (0);
1087
}
1088
1089
/* Periodic flow expiry run. */
1090
void
1091
ng_netflow_expire(void *arg)
1092
{
1093
struct flow_entry *fle, *fle1;
1094
struct flow_hash_entry *hsh;
1095
priv_p priv = (priv_p )arg;
1096
int used, i;
1097
1098
/*
1099
* Going through all the cache.
1100
*/
1101
used = uma_zone_get_cur(priv->zone);
1102
for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) {
1103
/*
1104
* Skip entries, that are already being worked on.
1105
*/
1106
if (mtx_trylock(&hsh->mtx) == 0)
1107
continue;
1108
1109
TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
1110
/*
1111
* Interrupt thread wants this entry!
1112
* Quick! Quick! Bail out!
1113
*/
1114
if (hsh->mtx.mtx_lock & MTX_CONTESTED)
1115
break;
1116
1117
/*
1118
* Don't expire aggressively while hash collision
1119
* ratio is predicted small.
1120
*/
1121
if (used <= (NBUCKETS*2) && !INACTIVE(fle))
1122
break;
1123
1124
if ((INACTIVE(fle) && (SMALL(fle) ||
1125
(used > (NBUCKETS*2)))) || AGED(fle)) {
1126
TAILQ_REMOVE(&hsh->head, fle, fle_hash);
1127
expire_flow(priv, priv_to_fib(priv,
1128
fle->f.r.fib), fle, NG_NOFLAGS);
1129
used--;
1130
counter_u64_add(priv->nfinfo_inact_exp, 1);
1131
}
1132
}
1133
mtx_unlock(&hsh->mtx);
1134
}
1135
1136
#ifdef INET6
1137
used = uma_zone_get_cur(priv->zone6);
1138
for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) {
1139
struct flow6_entry *fle6;
1140
1141
/*
1142
* Skip entries, that are already being worked on.
1143
*/
1144
if (mtx_trylock(&hsh->mtx) == 0)
1145
continue;
1146
1147
TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
1148
fle6 = (struct flow6_entry *)fle;
1149
/*
1150
* Interrupt thread wants this entry!
1151
* Quick! Quick! Bail out!
1152
*/
1153
if (hsh->mtx.mtx_lock & MTX_CONTESTED)
1154
break;
1155
1156
/*
1157
* Don't expire aggressively while hash collision
1158
* ratio is predicted small.
1159
*/
1160
if (used <= (NBUCKETS*2) && !INACTIVE(fle6))
1161
break;
1162
1163
if ((INACTIVE(fle6) && (SMALL(fle6) ||
1164
(used > (NBUCKETS*2)))) || AGED(fle6)) {
1165
TAILQ_REMOVE(&hsh->head, fle, fle_hash);
1166
expire_flow(priv, priv_to_fib(priv,
1167
fle->f.r.fib), fle, NG_NOFLAGS);
1168
used--;
1169
counter_u64_add(priv->nfinfo_inact_exp, 1);
1170
}
1171
}
1172
mtx_unlock(&hsh->mtx);
1173
}
1174
#endif
1175
1176
/* Schedule next expire. */
1177
callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire,
1178
(void *)priv);
1179
}
1180
1181