Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/net/bpf.c
102448 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 1990, 1991, 1993
5
* The Regents of the University of California. All rights reserved.
6
* Copyright (c) 2019 Andrey V. Elsukov <[email protected]>
7
*
8
* This code is derived from the Stanford/CMU enet packet filter,
9
* (net/enet.c) distributed as part of 4.3BSD, and code contributed
10
* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
11
* Berkeley Laboratory.
12
*
13
* Redistribution and use in source and binary forms, with or without
14
* modification, are permitted provided that the following conditions
15
* are met:
16
* 1. Redistributions of source code must retain the above copyright
17
* notice, this list of conditions and the following disclaimer.
18
* 2. Redistributions in binary form must reproduce the above copyright
19
* notice, this list of conditions and the following disclaimer in the
20
* documentation and/or other materials provided with the distribution.
21
* 3. Neither the name of the University nor the names of its contributors
22
* may be used to endorse or promote products derived from this software
23
* without specific prior written permission.
24
*
25
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35
* SUCH DAMAGE.
36
*/
37
38
#include <sys/cdefs.h>
39
#include "opt_bpf.h"
40
#include "opt_netgraph.h"
41
42
#include <sys/param.h>
43
#include <sys/conf.h>
44
#include <sys/fcntl.h>
45
#include <sys/jail.h>
46
#include <sys/ktr.h>
47
#include <sys/lock.h>
48
#include <sys/malloc.h>
49
#include <sys/mbuf.h>
50
#include <sys/mutex.h>
51
#include <sys/time.h>
52
#include <sys/priv.h>
53
#include <sys/proc.h>
54
#include <sys/signalvar.h>
55
#include <sys/filio.h>
56
#include <sys/sockio.h>
57
#include <sys/ttycom.h>
58
#include <sys/uio.h>
59
#include <sys/sysent.h>
60
#include <sys/systm.h>
61
62
#include <sys/file.h>
63
#include <sys/poll.h>
64
#include <sys/proc.h>
65
66
#include <sys/socket.h>
67
68
#include <net/if.h>
69
#include <net/if_var.h>
70
#include <net/if_private.h>
71
#include <net/if_vlan_var.h>
72
#include <net/bpf.h>
73
#include <net/bpf_buffer.h>
74
#ifdef BPF_JITTER
75
#include <net/bpf_jitter.h>
76
#endif
77
#include <net/bpf_zerocopy.h>
78
#include <net/bpfdesc.h>
79
#include <net/vnet.h>
80
81
#include <sys/kernel.h>
82
#include <sys/sysctl.h>
83
84
#include <security/mac/mac_framework.h>
85
86
MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
87
88
struct bpf_if {
89
struct bpfd_list bif_dlist; /* list of all interfaces */
90
LIST_ENTRY(bpf_if) bif_next; /* descriptor list */
91
u_int bif_dlt; /* link layer type */
92
u_int bif_hdrlen; /* length of link header */
93
volatile u_int bif_refcnt;
94
struct bpfd_list bif_wlist; /* writer-only list */
95
const struct bif_methods *bif_methods;
96
void *bif_softc;
97
const char *bif_name;
98
struct epoch_context epoch_ctx;
99
};
100
101
/* See bpf_peers_present() in bpf.h. */
102
_Static_assert(offsetof(struct bpf_if, bif_dlist) == 0,
103
"bpf_if shall start with bif_dlist");
104
105
static inline void
106
bif_attachd(struct bpf_if *bp)
107
{
108
if (bp->bif_methods->bif_attachd != NULL)
109
bp->bif_methods->bif_attachd(bp->bif_softc);
110
}
111
112
static inline void
113
bif_detachd(struct bpf_if *bp)
114
{
115
if (bp->bif_methods->bif_detachd != NULL)
116
bp->bif_methods->bif_detachd(bp->bif_softc);
117
}
118
119
static inline uint32_t
120
bif_wrsize(struct bpf_if *bp)
121
{
122
if (bp->bif_methods->bif_wrsize != NULL)
123
return (bp->bif_methods->bif_wrsize(bp->bif_softc));
124
else
125
return (0);
126
}
127
128
static inline int
129
bif_promisc(struct bpf_if *bp, bool on)
130
{
131
if (bp->bif_methods->bif_promisc != NULL)
132
return (bp->bif_methods->bif_promisc(bp->bif_softc, on));
133
else
134
return (0);
135
}
136
137
#ifdef MAC
138
static inline int
139
bif_mac_check_receive(struct bpf_if *bp, struct bpf_d *d)
140
{
141
if (bp->bif_methods->bif_mac_check_receive != NULL)
142
return (bp->bif_methods->bif_mac_check_receive(bp->bif_softc,
143
d));
144
else
145
return (0);
146
}
147
#endif
148
149
/*
150
* XXXGL: Once we migrate to tapping KPI that would specify packet direction
151
* we no longer need bif_chkdir method.
152
*/
153
static inline bool
154
bpf_chkdir(struct bpf_d *d, struct mbuf *m)
155
{
156
return (d->bd_bif->bif_methods->bif_chkdir(d->bd_bif->bif_softc, m,
157
d->bd_direction));
158
}
159
160
struct bpf_program_buffer {
161
struct epoch_context epoch_ctx;
162
#ifdef BPF_JITTER
163
bpf_jit_filter *func;
164
#endif
165
void *buffer[0];
166
};
167
168
#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
169
170
#define PRINET 26 /* interruptible */
171
#define BPF_PRIO_MAX 7
172
173
#define SIZEOF_BPF_HDR(type) \
174
(offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
175
176
#ifdef COMPAT_FREEBSD32
177
#include <sys/mount.h>
178
#include <compat/freebsd32/freebsd32.h>
179
#define BPF_ALIGNMENT32 sizeof(int32_t)
180
#define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
181
182
#ifndef BURN_BRIDGES
183
/*
184
* 32-bit version of structure prepended to each packet. We use this header
185
* instead of the standard one for 32-bit streams. We mark the a stream as
186
* 32-bit the first time we see a 32-bit compat ioctl request.
187
*/
188
struct bpf_hdr32 {
189
struct timeval32 bh_tstamp; /* time stamp */
190
uint32_t bh_caplen; /* length of captured portion */
191
uint32_t bh_datalen; /* original length of packet */
192
uint16_t bh_hdrlen; /* length of bpf header (this struct
193
plus alignment padding) */
194
};
195
#endif
196
197
struct bpf_program32 {
198
u_int bf_len;
199
uint32_t bf_insns;
200
};
201
202
struct bpf_dltlist32 {
203
u_int bfl_len;
204
u_int bfl_list;
205
};
206
207
#define BIOCSETF32 _IOW('B', 103, struct bpf_program32)
208
#define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32)
209
#define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32)
210
#define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32)
211
#define BIOCSETWF32 _IOW('B', 123, struct bpf_program32)
212
#define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32)
213
#endif
214
215
#define BPF_LOCK() sx_xlock(&bpf_sx)
216
#define BPF_UNLOCK() sx_xunlock(&bpf_sx)
217
#define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED)
218
/*
219
* bpf_iflist is a list of BPF interface structures, each corresponding to a
220
* specific DLT. The same network interface might have several BPF interface
221
* structures registered by different layers in the stack (i.e., 802.11
222
* frames, ethernet frames, etc).
223
*/
224
VNET_DEFINE_STATIC(LIST_HEAD(, bpf_if), bpf_iflist) = LIST_HEAD_INITIALIZER();
225
#define V_bpf_iflist VNET(bpf_iflist)
226
static struct sx bpf_sx; /* bpf global lock */
227
228
static void bpfif_ref(struct bpf_if *);
229
static void bpfif_rele(struct bpf_if *);
230
231
static void bpfd_ref(struct bpf_d *);
232
static void bpfd_rele(struct bpf_d *);
233
static int bpf_attachd(struct bpf_d *d, struct bpf_if *);
234
static void bpf_detachd(struct bpf_d *, bool);
235
static void bpfd_free(epoch_context_t);
236
static void bpf_timed_out(void *);
237
static __inline void
238
bpf_wakeup(struct bpf_d *);
239
static void catchpacket(struct bpf_d *, u_char *, u_int, u_int,
240
void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
241
struct bintime *);
242
static void reset_d(struct bpf_d *);
243
static int bpf_getiflist(struct bpf_iflist *);
244
static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
245
static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
246
static int bpf_setdlt(struct bpf_d *, u_int);
247
static void filt_bpfdetach(struct knote *);
248
static int filt_bpfread(struct knote *, long);
249
static int filt_bpfwrite(struct knote *, long);
250
static void bpf_drvinit(void *);
251
static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
252
253
SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
254
"bpf sysctl");
255
int bpf_maxinsns = BPF_MAXINSNS;
256
SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
257
&bpf_maxinsns, 0, "Maximum bpf program instructions");
258
static int bpf_zerocopy_enable = 0;
259
SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
260
&bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
261
static SYSCTL_NODE(_net_bpf, OID_AUTO, stats,
262
CTLFLAG_VNET | CTLFLAG_MPSAFE | CTLFLAG_RW,
263
bpf_stats_sysctl, "bpf statistics portal");
264
265
VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0;
266
#define V_bpf_optimize_writers VNET(bpf_optimize_writers)
267
SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RWTUN,
268
&VNET_NAME(bpf_optimize_writers), 0,
269
"Do not send packets until BPF program is set");
270
271
static d_open_t bpfopen;
272
static d_read_t bpfread;
273
static d_write_t bpfwrite;
274
static d_ioctl_t bpfioctl;
275
static d_poll_t bpfpoll;
276
static d_kqfilter_t bpfkqfilter;
277
278
static struct cdevsw bpf_cdevsw = {
279
.d_version = D_VERSION,
280
.d_open = bpfopen,
281
.d_read = bpfread,
282
.d_write = bpfwrite,
283
.d_ioctl = bpfioctl,
284
.d_poll = bpfpoll,
285
.d_name = "bpf",
286
.d_kqfilter = bpfkqfilter,
287
};
288
289
static const struct filterops bpfread_filtops = {
290
.f_isfd = 1,
291
.f_detach = filt_bpfdetach,
292
.f_event = filt_bpfread,
293
.f_copy = knote_triv_copy,
294
};
295
296
static const struct filterops bpfwrite_filtops = {
297
.f_isfd = 1,
298
.f_detach = filt_bpfdetach,
299
.f_event = filt_bpfwrite,
300
.f_copy = knote_triv_copy,
301
};
302
303
/*
304
* LOCKING MODEL USED BY BPF
305
*
306
* Locks:
307
* 1) global lock (BPF_LOCK). Sx, used to protect some global counters,
308
* every bpf_iflist changes, serializes ioctl access to bpf descriptors.
309
* 2) Descriptor lock. Mutex, used to protect BPF buffers and various
310
* structure fields used by bpf_*tap* code.
311
*
312
* Lock order: global lock, then descriptor lock.
313
*
314
* There are several possible consumers:
315
*
316
* 1. The kernel registers interface pointer with bpfattach().
317
* Each call allocates new bpf_if structure, references ifnet pointer
318
* and links bpf_if into bpf_iflist chain. This is protected with global
319
* lock.
320
*
321
* 2. An userland application uses ioctl() call to bpf_d descriptor.
322
* All such call are serialized with global lock. BPF filters can be
323
* changed, but pointer to old filter will be freed using NET_EPOCH_CALL().
324
* Thus it should be safe for bpf_tap/bpf_mtap* code to do access to
325
* filter pointers, even if change will happen during bpf_tap execution.
326
* Destroying of bpf_d descriptor also is doing using NET_EPOCH_CALL().
327
*
328
* 3. An userland application can write packets into bpf_d descriptor.
329
* There we need to be sure, that ifnet won't disappear during bpfwrite().
330
*
331
* 4. The kernel invokes bpf_tap/bpf_mtap* functions. The access to
332
* bif_dlist is protected with net_epoch_preempt section. So, it should
333
* be safe to make access to bpf_d descriptor inside the section.
334
*
335
* 5. The kernel invokes bpfdetach() on interface destroying. All lists
336
* are modified with global lock held and actual free() is done using
337
* NET_EPOCH_CALL().
338
*/
339
340
static void
341
bpfif_free(epoch_context_t ctx)
342
{
343
struct bpf_if *bp;
344
345
bp = __containerof(ctx, struct bpf_if, epoch_ctx);
346
free(bp, M_BPF);
347
}
348
349
static void
350
bpfif_ref(struct bpf_if *bp)
351
{
352
353
refcount_acquire(&bp->bif_refcnt);
354
}
355
356
static void
357
bpfif_rele(struct bpf_if *bp)
358
{
359
360
if (!refcount_release(&bp->bif_refcnt))
361
return;
362
NET_EPOCH_CALL(bpfif_free, &bp->epoch_ctx);
363
}
364
365
static void
366
bpfd_ref(struct bpf_d *d)
367
{
368
369
refcount_acquire(&d->bd_refcnt);
370
}
371
372
static void
373
bpfd_rele(struct bpf_d *d)
374
{
375
376
if (!refcount_release(&d->bd_refcnt))
377
return;
378
NET_EPOCH_CALL(bpfd_free, &d->epoch_ctx);
379
}
380
381
static struct bpf_program_buffer*
382
bpf_program_buffer_alloc(size_t size, int flags)
383
{
384
385
return (malloc(sizeof(struct bpf_program_buffer) + size,
386
M_BPF, flags));
387
}
388
389
static void
390
bpf_program_buffer_free(epoch_context_t ctx)
391
{
392
struct bpf_program_buffer *ptr;
393
394
ptr = __containerof(ctx, struct bpf_program_buffer, epoch_ctx);
395
#ifdef BPF_JITTER
396
if (ptr->func != NULL)
397
bpf_destroy_jit_filter(ptr->func);
398
#endif
399
free(ptr, M_BPF);
400
}
401
402
/*
403
* Wrapper functions for various buffering methods. If the set of buffer
404
* modes expands, we will probably want to introduce a switch data structure
405
* similar to protosw, et.
406
*/
407
static void
408
bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
409
u_int len)
410
{
411
412
BPFD_LOCK_ASSERT(d);
413
414
switch (d->bd_bufmode) {
415
case BPF_BUFMODE_BUFFER:
416
return (bpf_buffer_append_bytes(d, buf, offset, src, len));
417
418
case BPF_BUFMODE_ZBUF:
419
counter_u64_add(d->bd_zcopy, 1);
420
return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
421
422
default:
423
panic("bpf_buf_append_bytes");
424
}
425
}
426
427
static void
428
bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
429
u_int len)
430
{
431
432
BPFD_LOCK_ASSERT(d);
433
434
switch (d->bd_bufmode) {
435
case BPF_BUFMODE_BUFFER:
436
return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
437
438
case BPF_BUFMODE_ZBUF:
439
counter_u64_add(d->bd_zcopy, 1);
440
return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
441
442
default:
443
panic("bpf_buf_append_mbuf");
444
}
445
}
446
447
/*
448
* This function gets called when the free buffer is re-assigned.
449
*/
450
static void
451
bpf_buf_reclaimed(struct bpf_d *d)
452
{
453
454
BPFD_LOCK_ASSERT(d);
455
456
switch (d->bd_bufmode) {
457
case BPF_BUFMODE_BUFFER:
458
return;
459
460
case BPF_BUFMODE_ZBUF:
461
bpf_zerocopy_buf_reclaimed(d);
462
return;
463
464
default:
465
panic("bpf_buf_reclaimed");
466
}
467
}
468
469
/*
470
* If the buffer mechanism has a way to decide that a held buffer can be made
471
* free, then it is exposed via the bpf_canfreebuf() interface. (1) is
472
* returned if the buffer can be discarded, (0) is returned if it cannot.
473
*/
474
static int
475
bpf_canfreebuf(struct bpf_d *d)
476
{
477
478
BPFD_LOCK_ASSERT(d);
479
480
switch (d->bd_bufmode) {
481
case BPF_BUFMODE_ZBUF:
482
return (bpf_zerocopy_canfreebuf(d));
483
}
484
return (0);
485
}
486
487
/*
488
* Allow the buffer model to indicate that the current store buffer is
489
* immutable, regardless of the appearance of space. Return (1) if the
490
* buffer is writable, and (0) if not.
491
*/
492
static int
493
bpf_canwritebuf(struct bpf_d *d)
494
{
495
BPFD_LOCK_ASSERT(d);
496
497
switch (d->bd_bufmode) {
498
case BPF_BUFMODE_ZBUF:
499
return (bpf_zerocopy_canwritebuf(d));
500
}
501
return (1);
502
}
503
504
/*
505
* Notify buffer model that an attempt to write to the store buffer has
506
* resulted in a dropped packet, in which case the buffer may be considered
507
* full.
508
*/
509
static void
510
bpf_buffull(struct bpf_d *d)
511
{
512
513
BPFD_LOCK_ASSERT(d);
514
515
switch (d->bd_bufmode) {
516
case BPF_BUFMODE_ZBUF:
517
bpf_zerocopy_buffull(d);
518
break;
519
}
520
}
521
522
/*
523
* Notify the buffer model that a buffer has moved into the hold position.
524
*/
525
void
526
bpf_bufheld(struct bpf_d *d)
527
{
528
529
BPFD_LOCK_ASSERT(d);
530
531
switch (d->bd_bufmode) {
532
case BPF_BUFMODE_ZBUF:
533
bpf_zerocopy_bufheld(d);
534
break;
535
}
536
}
537
538
static void
539
bpf_free(struct bpf_d *d)
540
{
541
542
switch (d->bd_bufmode) {
543
case BPF_BUFMODE_BUFFER:
544
return (bpf_buffer_free(d));
545
546
case BPF_BUFMODE_ZBUF:
547
return (bpf_zerocopy_free(d));
548
549
default:
550
panic("bpf_buf_free");
551
}
552
}
553
554
static int
555
bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
556
{
557
558
if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
559
return (EOPNOTSUPP);
560
return (bpf_buffer_uiomove(d, buf, len, uio));
561
}
562
563
static int
564
bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
565
{
566
567
if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
568
return (EOPNOTSUPP);
569
return (bpf_buffer_ioctl_sblen(d, i));
570
}
571
572
static int
573
bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
574
{
575
576
if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
577
return (EOPNOTSUPP);
578
return (bpf_zerocopy_ioctl_getzmax(td, d, i));
579
}
580
581
static int
582
bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
583
{
584
585
if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
586
return (EOPNOTSUPP);
587
return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
588
}
589
590
static int
591
bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
592
{
593
594
if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
595
return (EOPNOTSUPP);
596
return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
597
}
598
599
/*
600
* Check if we need to upgrade our descriptor @d from write-only mode.
601
*/
602
static int
603
bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode,
604
int flen)
605
{
606
int is_snap, need_upgrade;
607
608
/*
609
* Check if we've already upgraded or new filter is empty.
610
*/
611
if (d->bd_writer == 0 || fcode == NULL)
612
return (0);
613
614
need_upgrade = 0;
615
616
/*
617
* Check if cmd looks like snaplen setting from
618
* pcap_bpf.c:pcap_open_live().
619
* Note we're not checking .k value here:
620
* while pcap_open_live() definitely sets to non-zero value,
621
* we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
622
* do not consider upgrading immediately
623
*/
624
if (cmd == BIOCSETF && flen == 1 &&
625
fcode[0].code == (BPF_RET | BPF_K))
626
is_snap = 1;
627
else
628
is_snap = 0;
629
630
if (is_snap == 0) {
631
/*
632
* We're setting first filter and it doesn't look like
633
* setting snaplen. We're probably using bpf directly.
634
* Upgrade immediately.
635
*/
636
need_upgrade = 1;
637
} else {
638
/*
639
* Do not require upgrade by first BIOCSETF
640
* (used to set snaplen) by pcap_open_live().
641
*/
642
643
if (--d->bd_writer == 0) {
644
/*
645
* First snaplen filter has already
646
* been set. This is probably catch-all
647
* filter
648
*/
649
need_upgrade = 1;
650
}
651
}
652
653
CTR5(KTR_NET,
654
"%s: filter function set by pid %d, "
655
"bd_writer counter %d, snap %d upgrade %d",
656
__func__, d->bd_pid, d->bd_writer,
657
is_snap, need_upgrade);
658
659
return (need_upgrade);
660
}
661
662
/*
663
* Detach a file from its interface.
664
*/
665
static void
666
bpf_detachd(struct bpf_d *d, bool detached_ifp)
667
{
668
struct bpf_if *bp;
669
bool writer;
670
671
BPF_LOCK_ASSERT();
672
CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
673
674
/* Check if descriptor is attached */
675
if ((bp = d->bd_bif) == NULL)
676
return;
677
678
BPFD_LOCK(d);
679
CK_LIST_REMOVE(d, bd_next);
680
writer = (d->bd_writer > 0);
681
if (detached_ifp) {
682
d->bd_bif = NULL;
683
/*
684
* Notify descriptor as it's detached, so that any
685
* sleepers wake up and get ENXIO.
686
*/
687
bpf_wakeup(d);
688
}
689
BPFD_UNLOCK(d);
690
691
if (!writer)
692
bif_detachd(bp);
693
694
if (d->bd_promisc && !detached_ifp) {
695
d->bd_promisc = 0;
696
(void)bif_promisc(bp, false);
697
}
698
699
bpfif_rele(bp);
700
}
701
702
/*
703
* Close the descriptor by detaching it from its interface,
704
* deallocating its buffers, and marking it free.
705
*/
706
static void
707
bpf_dtor(void *data)
708
{
709
struct bpf_d *d = data;
710
711
BPFD_LOCK(d);
712
if (d->bd_state == BPF_WAITING)
713
callout_stop(&d->bd_callout);
714
d->bd_state = BPF_IDLE;
715
BPFD_UNLOCK(d);
716
funsetown(&d->bd_sigio);
717
BPF_LOCK();
718
bpf_detachd(d, false);
719
BPF_UNLOCK();
720
#ifdef MAC
721
mac_bpfdesc_destroy(d);
722
#endif /* MAC */
723
seldrain(&d->bd_sel);
724
knlist_destroy(&d->bd_sel.si_note);
725
callout_drain(&d->bd_callout);
726
bpfd_rele(d);
727
}
728
729
/*
730
* Open ethernet device. Returns ENXIO for illegal minor device number,
731
* EBUSY if file is open by another process.
732
*/
733
/* ARGSUSED */
734
static int
735
bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
736
{
737
struct bpf_d *d;
738
int error;
739
740
d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
741
error = devfs_set_cdevpriv(d, bpf_dtor);
742
if (error != 0) {
743
free(d, M_BPF);
744
return (error);
745
}
746
747
/* Setup counters */
748
d->bd_rcount = counter_u64_alloc(M_WAITOK);
749
d->bd_dcount = counter_u64_alloc(M_WAITOK);
750
d->bd_fcount = counter_u64_alloc(M_WAITOK);
751
d->bd_wcount = counter_u64_alloc(M_WAITOK);
752
d->bd_wfcount = counter_u64_alloc(M_WAITOK);
753
d->bd_wdcount = counter_u64_alloc(M_WAITOK);
754
d->bd_zcopy = counter_u64_alloc(M_WAITOK);
755
756
/*
757
* For historical reasons, perform a one-time initialization call to
758
* the buffer routines, even though we're not yet committed to a
759
* particular buffer method.
760
*/
761
bpf_buffer_init(d);
762
if ((flags & FREAD) == 0)
763
d->bd_writer = 2;
764
d->bd_bufmode = BPF_BUFMODE_BUFFER;
765
d->bd_sig = SIGIO;
766
d->bd_direction = BPF_D_INOUT;
767
refcount_init(&d->bd_refcnt, 1);
768
BPF_PID_REFRESH(d, td);
769
#ifdef MAC
770
mac_bpfdesc_init(d);
771
mac_bpfdesc_create(td->td_ucred, d);
772
#endif
773
mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
774
callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
775
knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
776
777
/* Disable VLAN pcp tagging. */
778
d->bd_pcp = 0;
779
780
return (0);
781
}
782
783
/*
784
* bpfread - read next chunk of packets from buffers
785
*/
786
static int
787
bpfread(struct cdev *dev, struct uio *uio, int ioflag)
788
{
789
struct bpf_d *d;
790
int error;
791
int non_block;
792
int timed_out;
793
794
error = devfs_get_cdevpriv((void **)&d);
795
if (error != 0)
796
return (error);
797
798
/*
799
* Restrict application to use a buffer the same size as
800
* as kernel buffers.
801
*/
802
if (uio->uio_resid != d->bd_bufsize)
803
return (EINVAL);
804
805
non_block = ((ioflag & O_NONBLOCK) != 0);
806
807
BPFD_LOCK(d);
808
BPF_PID_REFRESH_CUR(d);
809
if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
810
BPFD_UNLOCK(d);
811
return (EOPNOTSUPP);
812
}
813
if (d->bd_state == BPF_WAITING)
814
callout_stop(&d->bd_callout);
815
timed_out = (d->bd_state == BPF_TIMED_OUT);
816
d->bd_state = BPF_IDLE;
817
while (d->bd_flags & BPFD_HBUF_INUSE) {
818
error = mtx_sleep(&d->bd_hbuf, &d->bd_lock, PRINET | PCATCH,
819
"bd_hbuf", 0);
820
if (error != 0) {
821
BPFD_UNLOCK(d);
822
return (error);
823
}
824
}
825
/*
826
* If the hold buffer is empty, then do a timed sleep, which
827
* ends when the timeout expires or when enough packets
828
* have arrived to fill the store buffer.
829
*/
830
while (d->bd_hbuf == NULL) {
831
if (d->bd_slen != 0) {
832
/*
833
* A packet(s) either arrived since the previous
834
* read or arrived while we were asleep.
835
*/
836
if ((d->bd_flags & BPFD_IMMEDIATE) || non_block ||
837
timed_out) {
838
/*
839
* Rotate the buffers and return what's here
840
* if we are in immediate mode, non-blocking
841
* flag is set, or this descriptor timed out.
842
*/
843
ROTATE_BUFFERS(d);
844
break;
845
}
846
}
847
848
/*
849
* No data is available, check to see if the bpf device
850
* is still pointed at a real interface. If not, return
851
* ENXIO so that the userland process knows to rebind
852
* it before using it again.
853
*/
854
if (d->bd_bif == NULL) {
855
BPFD_UNLOCK(d);
856
return (ENXIO);
857
}
858
859
if (non_block) {
860
BPFD_UNLOCK(d);
861
return (EWOULDBLOCK);
862
}
863
error = msleep(d, &d->bd_lock, PRINET | PCATCH,
864
"bpf", d->bd_rtout);
865
if (error == EINTR || error == ERESTART) {
866
BPFD_UNLOCK(d);
867
return (error);
868
}
869
if (error == EWOULDBLOCK) {
870
/*
871
* On a timeout, return what's in the buffer,
872
* which may be nothing. If there is something
873
* in the store buffer, we can rotate the buffers.
874
*/
875
if (d->bd_hbuf)
876
/*
877
* We filled up the buffer in between
878
* getting the timeout and arriving
879
* here, so we don't need to rotate.
880
*/
881
break;
882
883
if (d->bd_slen == 0) {
884
BPFD_UNLOCK(d);
885
return (0);
886
}
887
ROTATE_BUFFERS(d);
888
break;
889
}
890
}
891
/*
892
* At this point, we know we have something in the hold slot.
893
*/
894
d->bd_flags |= BPFD_HBUF_INUSE;
895
BPFD_UNLOCK(d);
896
897
/*
898
* Move data from hold buffer into user space.
899
* We know the entire buffer is transferred since
900
* we checked above that the read buffer is bpf_bufsize bytes.
901
*
902
* We do not have to worry about simultaneous reads because
903
* we waited for sole access to the hold buffer above.
904
*/
905
error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
906
907
BPFD_LOCK(d);
908
if (d->bd_flags & BPFD_HBUF_INUSE) {
909
KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
910
d->bd_fbuf = d->bd_hbuf;
911
d->bd_hbuf = NULL;
912
d->bd_hlen = 0;
913
bpf_buf_reclaimed(d);
914
d->bd_flags &= ~BPFD_HBUF_INUSE;
915
wakeup(&d->bd_hbuf);
916
}
917
BPFD_UNLOCK(d);
918
919
return (error);
920
}
921
922
/*
923
* If there are processes sleeping on this descriptor, wake them up.
924
*/
925
static __inline void
926
bpf_wakeup(struct bpf_d *d)
927
{
928
929
BPFD_LOCK_ASSERT(d);
930
if (d->bd_state == BPF_WAITING) {
931
callout_stop(&d->bd_callout);
932
d->bd_state = BPF_IDLE;
933
}
934
wakeup(d);
935
if ((d->bd_flags & BPFD_ASYNC) && d->bd_sig && d->bd_sigio)
936
pgsigio(&d->bd_sigio, d->bd_sig, 0);
937
938
selwakeuppri(&d->bd_sel, PRINET);
939
KNOTE_LOCKED(&d->bd_sel.si_note, 0);
940
}
941
942
static void
943
bpf_timed_out(void *arg)
944
{
945
struct bpf_d *d = (struct bpf_d *)arg;
946
947
BPFD_LOCK_ASSERT(d);
948
949
if (callout_pending(&d->bd_callout) ||
950
!callout_active(&d->bd_callout))
951
return;
952
if (d->bd_state == BPF_WAITING) {
953
d->bd_state = BPF_TIMED_OUT;
954
if (d->bd_slen != 0)
955
bpf_wakeup(d);
956
}
957
}
958
959
static int
960
bpf_ready(struct bpf_d *d)
961
{
962
963
BPFD_LOCK_ASSERT(d);
964
965
if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
966
return (1);
967
if (((d->bd_flags & BPFD_IMMEDIATE) || d->bd_state == BPF_TIMED_OUT) &&
968
d->bd_slen != 0)
969
return (1);
970
return (0);
971
}
972
973
static int
974
bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
975
{
976
struct epoch_tracker et;
977
struct bpf_if *bp;
978
struct bpf_d *d;
979
struct mbuf *m, *mc;
980
ssize_t len;
981
int error;
982
983
error = devfs_get_cdevpriv((void **)&d);
984
if (error != 0)
985
return (error);
986
987
if (uio->uio_resid == 0)
988
return (0);
989
990
BPFD_LOCK(d);
991
if ((bp = d->bd_bif) == NULL)
992
error = ENXIO;
993
else if (bp->bif_methods->bif_write == NULL)
994
error = EOPNOTSUPP;
995
if (error) {
996
BPFD_UNLOCK(d);
997
counter_u64_add(d->bd_wdcount, 1);
998
return (error);
999
}
1000
bpfd_ref(d);
1001
BPFD_UNLOCK(d);
1002
1003
len = uio->uio_resid;
1004
/* Allocate a mbuf, up to MJUM16BYTES bytes, for our write. */
1005
m = m_get3(len, M_WAITOK, MT_DATA, M_PKTHDR);
1006
if (m == NULL) {
1007
error = ENOMEM;
1008
goto fail_wref;
1009
}
1010
m->m_pkthdr.len = m->m_len = len;
1011
1012
error = uiomove(mtod(m, u_char *), len, uio);
1013
if (error)
1014
goto fail_wref;
1015
1016
if (bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len) == 0) {
1017
error = EPERM;
1018
goto fail_wref;
1019
}
1020
1021
if (d->bd_flags & BPFD_FEEDBACK) {
1022
mc = m_dup(m, M_WAITOK);
1023
/* Set M_PROMISC for outgoing packets to be discarded. */
1024
if (d->bd_direction == BPF_D_INOUT)
1025
m->m_flags |= M_PROMISC;
1026
} else
1027
mc = NULL;
1028
1029
/* XXXGL: should belong to bpf_ifnet.c */
1030
if (d->bd_pcp != 0)
1031
(void)vlan_set_pcp(m, d->bd_pcp);
1032
1033
BPFD_LOCK(d);
1034
#ifdef MAC
1035
mac_bpfdesc_create_mbuf(d, m);
1036
if (mc != NULL)
1037
mac_bpfdesc_create_mbuf(d, mc);
1038
#endif
1039
/*
1040
* Check that descriptor is still attached to the interface.
1041
* This can happen on bpfdetach() or if other thread did BIOCSDLT.
1042
*/
1043
if (__predict_false(d->bd_bif != bp)) {
1044
BPFD_UNLOCK(d);
1045
m_freem(mc);
1046
error = ENXIO;
1047
goto fail_wref;
1048
}
1049
BPFD_UNLOCK(d);
1050
1051
NET_EPOCH_ENTER(et);
1052
error = bp->bif_methods->bif_write(bp->bif_softc, m, mc, d->bd_flags);
1053
NET_EPOCH_EXIT(et);
1054
if (error)
1055
counter_u64_add(d->bd_wdcount, 1);
1056
else
1057
counter_u64_add(d->bd_wfcount, 1);
1058
bpfd_rele(d);
1059
1060
return (error);
1061
1062
fail_wref:
1063
counter_u64_add(d->bd_wdcount, 1);
1064
bpfd_rele(d);
1065
m_freem(m);
1066
return (error);
1067
}
1068
1069
/*
1070
* Reset a descriptor by flushing its packet buffer and clearing the receive
1071
* and drop counts. This is doable for kernel-only buffers, but with
1072
* zero-copy buffers, we can't write to (or rotate) buffers that are
1073
* currently owned by userspace. It would be nice if we could encapsulate
1074
* this logic in the buffer code rather than here.
1075
*/
1076
static void
1077
reset_d(struct bpf_d *d)
1078
{
1079
1080
BPFD_LOCK_ASSERT(d);
1081
1082
while (d->bd_flags & BPFD_HBUF_INUSE)
1083
mtx_sleep(&d->bd_hbuf, &d->bd_lock, PRINET, "bd_hbuf", 0);
1084
if ((d->bd_hbuf != NULL) &&
1085
(d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
1086
/* Free the hold buffer. */
1087
d->bd_fbuf = d->bd_hbuf;
1088
d->bd_hbuf = NULL;
1089
d->bd_hlen = 0;
1090
bpf_buf_reclaimed(d);
1091
}
1092
if (bpf_canwritebuf(d))
1093
d->bd_slen = 0;
1094
counter_u64_zero(d->bd_rcount);
1095
counter_u64_zero(d->bd_dcount);
1096
counter_u64_zero(d->bd_fcount);
1097
counter_u64_zero(d->bd_wcount);
1098
counter_u64_zero(d->bd_wfcount);
1099
counter_u64_zero(d->bd_wdcount);
1100
counter_u64_zero(d->bd_zcopy);
1101
}
1102
1103
/*
1104
* FIONREAD Check for read packet available.
1105
* BIOCGETIFLIST Get list of all tap points.
1106
* BIOCGBLEN Get buffer len [for read()].
1107
* BIOCSETF Set read filter.
1108
* BIOCSETFNR Set read filter without resetting descriptor.
1109
* BIOCSETWF Set write filter.
1110
* BIOCFLUSH Flush read packet buffer.
1111
* BIOCPROMISC Put interface into promiscuous mode.
1112
* BIOCGDLT Get link layer type.
1113
* BIOCGETIF Get interface name.
1114
* BIOCSETIF Set interface.
1115
* BIOCSRTIMEOUT Set read timeout.
1116
* BIOCGRTIMEOUT Get read timeout.
1117
* BIOCGSTATS Get packet stats.
1118
* BIOCIMMEDIATE Set immediate mode.
1119
* BIOCVERSION Get filter language version.
1120
* BIOCGHDRCMPLT Get "header already complete" flag
1121
* BIOCSHDRCMPLT Set "header already complete" flag
1122
* BIOCGDIRECTION Get packet direction flag
1123
* BIOCSDIRECTION Set packet direction flag
1124
* BIOCGTSTAMP Get time stamp format and resolution.
1125
* BIOCSTSTAMP Set time stamp format and resolution.
1126
* BIOCLOCK Set "locked" flag
1127
* BIOCFEEDBACK Set packet feedback mode.
1128
* BIOCSETZBUF Set current zero-copy buffer locations.
1129
* BIOCGETZMAX Get maximum zero-copy buffer size.
1130
* BIOCROTZBUF Force rotation of zero-copy buffer
1131
* BIOCSETBUFMODE Set buffer mode.
1132
* BIOCGETBUFMODE Get current buffer mode.
1133
* BIOCSETVLANPCP Set VLAN PCP tag.
1134
*/
1135
/* ARGSUSED */
1136
static int
1137
bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
1138
struct thread *td)
1139
{
1140
struct bpf_d *d;
1141
int error;
1142
1143
error = devfs_get_cdevpriv((void **)&d);
1144
if (error != 0)
1145
return (error);
1146
1147
/*
1148
* Refresh PID associated with this descriptor.
1149
*/
1150
BPFD_LOCK(d);
1151
BPF_PID_REFRESH(d, td);
1152
if (d->bd_state == BPF_WAITING)
1153
callout_stop(&d->bd_callout);
1154
d->bd_state = BPF_IDLE;
1155
BPFD_UNLOCK(d);
1156
1157
if (d->bd_flags & BPFD_LOCKED) {
1158
switch (cmd) {
1159
case BIOCGETIFLIST:
1160
case BIOCGBLEN:
1161
case BIOCFLUSH:
1162
case BIOCGDLT:
1163
case BIOCGDLTLIST:
1164
#ifdef COMPAT_FREEBSD32
1165
case BIOCGDLTLIST32:
1166
#endif
1167
case BIOCGETIF:
1168
case BIOCGRTIMEOUT:
1169
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1170
case BIOCGRTIMEOUT32:
1171
#endif
1172
case BIOCGSTATS:
1173
case BIOCVERSION:
1174
case BIOCGRSIG:
1175
case BIOCGHDRCMPLT:
1176
case BIOCSTSTAMP:
1177
case BIOCFEEDBACK:
1178
case FIONREAD:
1179
case BIOCLOCK:
1180
case BIOCSRTIMEOUT:
1181
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1182
case BIOCSRTIMEOUT32:
1183
#endif
1184
case BIOCIMMEDIATE:
1185
case TIOCGPGRP:
1186
case BIOCROTZBUF:
1187
break;
1188
default:
1189
return (EPERM);
1190
}
1191
}
1192
#ifdef COMPAT_FREEBSD32
1193
/*
1194
* If we see a 32-bit compat ioctl, mark the stream as 32-bit so
1195
* that it will get 32-bit packet headers.
1196
*/
1197
switch (cmd) {
1198
case BIOCSETF32:
1199
case BIOCSETFNR32:
1200
case BIOCSETWF32:
1201
case BIOCGDLTLIST32:
1202
case BIOCGRTIMEOUT32:
1203
case BIOCSRTIMEOUT32:
1204
if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
1205
BPFD_LOCK(d);
1206
d->bd_compat32 = 1;
1207
BPFD_UNLOCK(d);
1208
}
1209
}
1210
#endif
1211
1212
CURVNET_SET(TD_TO_VNET(td));
1213
switch (cmd) {
1214
default:
1215
error = EINVAL;
1216
break;
1217
1218
/*
1219
* Check for read packet available.
1220
*/
1221
case FIONREAD:
1222
{
1223
int n;
1224
1225
BPFD_LOCK(d);
1226
n = d->bd_slen;
1227
while (d->bd_flags & BPFD_HBUF_INUSE)
1228
mtx_sleep(&d->bd_hbuf, &d->bd_lock,
1229
PRINET, "bd_hbuf", 0);
1230
if (d->bd_hbuf)
1231
n += d->bd_hlen;
1232
BPFD_UNLOCK(d);
1233
1234
*(int *)addr = n;
1235
break;
1236
}
1237
/*
1238
* Get list of all tap points.
1239
*/
1240
case BIOCGETIFLIST:
1241
error = bpf_getiflist((struct bpf_iflist *)addr);
1242
break;
1243
1244
/*
1245
* Get buffer len [for read()].
1246
*/
1247
case BIOCGBLEN:
1248
BPFD_LOCK(d);
1249
*(u_int *)addr = d->bd_bufsize;
1250
BPFD_UNLOCK(d);
1251
break;
1252
1253
/*
1254
* Set buffer length.
1255
*/
1256
case BIOCSBLEN:
1257
error = bpf_ioctl_sblen(d, (u_int *)addr);
1258
break;
1259
1260
/*
1261
* Set link layer read filter.
1262
*/
1263
case BIOCSETF:
1264
case BIOCSETFNR:
1265
case BIOCSETWF:
1266
#ifdef COMPAT_FREEBSD32
1267
case BIOCSETF32:
1268
case BIOCSETFNR32:
1269
case BIOCSETWF32:
1270
#endif
1271
error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1272
break;
1273
1274
/*
1275
* Flush read packet buffer.
1276
*/
1277
case BIOCFLUSH:
1278
BPFD_LOCK(d);
1279
reset_d(d);
1280
BPFD_UNLOCK(d);
1281
break;
1282
1283
/*
1284
* Put interface into promiscuous mode.
1285
*/
1286
case BIOCPROMISC:
1287
BPF_LOCK();
1288
if (d->bd_bif == NULL) {
1289
/*
1290
* No interface attached yet.
1291
*/
1292
error = EINVAL;
1293
} else if (d->bd_promisc == 0) {
1294
struct bpf_if *bp = d->bd_bif;
1295
1296
if ((error = bif_promisc(bp, true)) == 0)
1297
d->bd_promisc = 1;
1298
}
1299
BPF_UNLOCK();
1300
break;
1301
1302
/*
1303
* Get current data link type.
1304
*/
1305
case BIOCGDLT:
1306
BPF_LOCK();
1307
if (d->bd_bif == NULL)
1308
error = EINVAL;
1309
else
1310
*(u_int *)addr = d->bd_bif->bif_dlt;
1311
BPF_UNLOCK();
1312
break;
1313
1314
/*
1315
* Get a list of supported data link types.
1316
*/
1317
#ifdef COMPAT_FREEBSD32
1318
case BIOCGDLTLIST32:
1319
{
1320
struct bpf_dltlist32 *list32;
1321
struct bpf_dltlist dltlist;
1322
1323
list32 = (struct bpf_dltlist32 *)addr;
1324
dltlist.bfl_len = list32->bfl_len;
1325
dltlist.bfl_list = PTRIN(list32->bfl_list);
1326
BPF_LOCK();
1327
if (d->bd_bif == NULL)
1328
error = EINVAL;
1329
else {
1330
error = bpf_getdltlist(d, &dltlist);
1331
if (error == 0)
1332
list32->bfl_len = dltlist.bfl_len;
1333
}
1334
BPF_UNLOCK();
1335
break;
1336
}
1337
#endif
1338
1339
case BIOCGDLTLIST:
1340
BPF_LOCK();
1341
if (d->bd_bif == NULL)
1342
error = EINVAL;
1343
else
1344
error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1345
BPF_UNLOCK();
1346
break;
1347
1348
/*
1349
* Set data link type.
1350
*/
1351
case BIOCSDLT:
1352
BPF_LOCK();
1353
if (d->bd_bif == NULL)
1354
error = EINVAL;
1355
else
1356
error = bpf_setdlt(d, *(u_int *)addr);
1357
BPF_UNLOCK();
1358
break;
1359
1360
/*
1361
* Get interface name.
1362
*/
1363
case BIOCGETIF:
1364
BPF_LOCK();
1365
if (d->bd_bif == NULL)
1366
error = EINVAL;
1367
else {
1368
struct bpf_if *const bp = d->bd_bif;
1369
struct ifreq *const ifr = (struct ifreq *)addr;
1370
1371
strlcpy(ifr->ifr_name, bp->bif_name,
1372
sizeof(ifr->ifr_name));
1373
}
1374
BPF_UNLOCK();
1375
break;
1376
1377
/*
1378
* Set interface.
1379
*/
1380
case BIOCSETIF: {
1381
struct ifreq *const ifr = (struct ifreq *)addr;
1382
struct bpf_if *bp;
1383
1384
/*
1385
* Behavior here depends on the buffering model. If we're
1386
* using kernel memory buffers, then we can allocate them here.
1387
* If we're using zero-copy, then the user process must have
1388
* registered buffers by the time we get here.
1389
*/
1390
BPFD_LOCK(d);
1391
if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
1392
d->bd_sbuf == NULL) {
1393
u_int size;
1394
1395
size = d->bd_bufsize;
1396
BPFD_UNLOCK(d);
1397
error = bpf_buffer_ioctl_sblen(d, &size);
1398
if (error != 0)
1399
break;
1400
} else
1401
BPFD_UNLOCK(d);
1402
BPF_LOCK();
1403
/*
1404
* Look through attached interfaces for the named one.
1405
*/
1406
LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
1407
if (strncmp(ifr->ifr_name, bp->bif_name,
1408
sizeof(ifr->ifr_name)) == 0)
1409
break;
1410
}
1411
if (bp != NULL)
1412
error = bpf_attachd(d, bp);
1413
else
1414
error = ENXIO;
1415
BPF_UNLOCK();
1416
break;
1417
}
1418
/*
1419
* Set read timeout.
1420
*/
1421
case BIOCSRTIMEOUT:
1422
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1423
case BIOCSRTIMEOUT32:
1424
#endif
1425
{
1426
struct timeval *tv = (struct timeval *)addr;
1427
#if defined(COMPAT_FREEBSD32)
1428
struct timeval32 *tv32;
1429
struct timeval tv64;
1430
1431
if (cmd == BIOCSRTIMEOUT32) {
1432
tv32 = (struct timeval32 *)addr;
1433
tv = &tv64;
1434
tv->tv_sec = tv32->tv_sec;
1435
tv->tv_usec = tv32->tv_usec;
1436
} else
1437
#endif
1438
tv = (struct timeval *)addr;
1439
1440
/*
1441
* Subtract 1 tick from tvtohz() since this isn't
1442
* a one-shot timer.
1443
*/
1444
if ((error = itimerfix(tv)) == 0)
1445
d->bd_rtout = tvtohz(tv) - 1;
1446
break;
1447
}
1448
1449
/*
1450
* Get read timeout.
1451
*/
1452
case BIOCGRTIMEOUT:
1453
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1454
case BIOCGRTIMEOUT32:
1455
#endif
1456
{
1457
struct timeval *tv;
1458
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1459
struct timeval32 *tv32;
1460
struct timeval tv64;
1461
1462
if (cmd == BIOCGRTIMEOUT32)
1463
tv = &tv64;
1464
else
1465
#endif
1466
tv = (struct timeval *)addr;
1467
1468
tv->tv_sec = d->bd_rtout / hz;
1469
tv->tv_usec = (d->bd_rtout % hz) * tick;
1470
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1471
if (cmd == BIOCGRTIMEOUT32) {
1472
tv32 = (struct timeval32 *)addr;
1473
tv32->tv_sec = tv->tv_sec;
1474
tv32->tv_usec = tv->tv_usec;
1475
}
1476
#endif
1477
1478
break;
1479
}
1480
1481
/*
1482
* Get packet stats.
1483
*/
1484
case BIOCGSTATS:
1485
{
1486
struct bpf_stat *bs = (struct bpf_stat *)addr;
1487
1488
/* XXXCSJP overflow */
1489
bs->bs_recv = (u_int)counter_u64_fetch(d->bd_rcount);
1490
bs->bs_drop = (u_int)counter_u64_fetch(d->bd_dcount);
1491
break;
1492
}
1493
1494
/*
1495
* Set immediate mode.
1496
*/
1497
case BIOCIMMEDIATE:
1498
BPFD_LOCK(d);
1499
d->bd_flags |= *(u_int *)addr ? BPFD_IMMEDIATE : 0;
1500
BPFD_UNLOCK(d);
1501
break;
1502
1503
case BIOCVERSION:
1504
{
1505
struct bpf_version *bv = (struct bpf_version *)addr;
1506
1507
bv->bv_major = BPF_MAJOR_VERSION;
1508
bv->bv_minor = BPF_MINOR_VERSION;
1509
break;
1510
}
1511
1512
/*
1513
* Get "header already complete" flag
1514
*/
1515
case BIOCGHDRCMPLT:
1516
BPFD_LOCK(d);
1517
*(u_int *)addr = d->bd_flags & BPFD_HDRCMPLT ? 1 : 0;
1518
BPFD_UNLOCK(d);
1519
break;
1520
1521
/*
1522
* Set "header already complete" flag
1523
*/
1524
case BIOCSHDRCMPLT:
1525
BPFD_LOCK(d);
1526
d->bd_flags |= *(u_int *)addr ? BPFD_HDRCMPLT : 0;
1527
BPFD_UNLOCK(d);
1528
break;
1529
1530
/*
1531
* Get packet direction flag
1532
*/
1533
case BIOCGDIRECTION:
1534
BPFD_LOCK(d);
1535
*(u_int *)addr = d->bd_direction;
1536
BPFD_UNLOCK(d);
1537
break;
1538
1539
/*
1540
* Set packet direction flag
1541
*/
1542
case BIOCSDIRECTION:
1543
{
1544
u_int direction;
1545
1546
direction = *(u_int *)addr;
1547
switch (direction) {
1548
case BPF_D_IN:
1549
case BPF_D_INOUT:
1550
case BPF_D_OUT:
1551
BPFD_LOCK(d);
1552
d->bd_direction = direction;
1553
BPFD_UNLOCK(d);
1554
break;
1555
default:
1556
error = EINVAL;
1557
}
1558
}
1559
break;
1560
1561
/*
1562
* Get packet timestamp format and resolution.
1563
*/
1564
case BIOCGTSTAMP:
1565
BPFD_LOCK(d);
1566
*(u_int *)addr = d->bd_tstamp;
1567
BPFD_UNLOCK(d);
1568
break;
1569
1570
/*
1571
* Set packet timestamp format and resolution.
1572
*/
1573
case BIOCSTSTAMP:
1574
{
1575
u_int func;
1576
1577
func = *(u_int *)addr;
1578
if (BPF_T_VALID(func))
1579
d->bd_tstamp = func;
1580
else
1581
error = EINVAL;
1582
}
1583
break;
1584
1585
case BIOCFEEDBACK:
1586
BPFD_LOCK(d);
1587
d->bd_flags |= *(u_int *)addr ? BPFD_FEEDBACK : 0;
1588
BPFD_UNLOCK(d);
1589
break;
1590
1591
case BIOCLOCK:
1592
BPFD_LOCK(d);
1593
d->bd_flags |= BPFD_LOCKED;
1594
BPFD_UNLOCK(d);
1595
break;
1596
1597
case FIONBIO: /* Non-blocking I/O */
1598
break;
1599
1600
case FIOASYNC: /* Send signal on receive packets */
1601
BPFD_LOCK(d);
1602
d->bd_flags |= *(u_int *)addr ? BPFD_ASYNC : 0;
1603
BPFD_UNLOCK(d);
1604
break;
1605
1606
case FIOSETOWN:
1607
/*
1608
* XXX: Add some sort of locking here?
1609
* fsetown() can sleep.
1610
*/
1611
error = fsetown(*(int *)addr, &d->bd_sigio);
1612
break;
1613
1614
case FIOGETOWN:
1615
BPFD_LOCK(d);
1616
*(int *)addr = fgetown(&d->bd_sigio);
1617
BPFD_UNLOCK(d);
1618
break;
1619
1620
/* This is deprecated, FIOSETOWN should be used instead. */
1621
case TIOCSPGRP:
1622
error = fsetown(-(*(int *)addr), &d->bd_sigio);
1623
break;
1624
1625
/* This is deprecated, FIOGETOWN should be used instead. */
1626
case TIOCGPGRP:
1627
*(int *)addr = -fgetown(&d->bd_sigio);
1628
break;
1629
1630
case BIOCSRSIG: /* Set receive signal */
1631
{
1632
u_int sig;
1633
1634
sig = *(u_int *)addr;
1635
1636
if (sig >= NSIG)
1637
error = EINVAL;
1638
else {
1639
BPFD_LOCK(d);
1640
d->bd_sig = sig;
1641
BPFD_UNLOCK(d);
1642
}
1643
break;
1644
}
1645
case BIOCGRSIG:
1646
BPFD_LOCK(d);
1647
*(u_int *)addr = d->bd_sig;
1648
BPFD_UNLOCK(d);
1649
break;
1650
1651
case BIOCGETBUFMODE:
1652
BPFD_LOCK(d);
1653
*(u_int *)addr = d->bd_bufmode;
1654
BPFD_UNLOCK(d);
1655
break;
1656
1657
case BIOCSETBUFMODE:
1658
/*
1659
* Allow the buffering mode to be changed as long as we
1660
* haven't yet committed to a particular mode. Our
1661
* definition of commitment, for now, is whether or not a
1662
* buffer has been allocated or an interface attached, since
1663
* that's the point where things get tricky.
1664
*/
1665
switch (*(u_int *)addr) {
1666
case BPF_BUFMODE_BUFFER:
1667
break;
1668
1669
case BPF_BUFMODE_ZBUF:
1670
if (bpf_zerocopy_enable)
1671
break;
1672
/* FALLSTHROUGH */
1673
1674
default:
1675
CURVNET_RESTORE();
1676
return (EINVAL);
1677
}
1678
1679
BPFD_LOCK(d);
1680
if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
1681
d->bd_fbuf != NULL || d->bd_bif != NULL) {
1682
BPFD_UNLOCK(d);
1683
CURVNET_RESTORE();
1684
return (EBUSY);
1685
}
1686
d->bd_bufmode = *(u_int *)addr;
1687
BPFD_UNLOCK(d);
1688
break;
1689
1690
case BIOCGETZMAX:
1691
error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
1692
break;
1693
1694
case BIOCSETZBUF:
1695
error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
1696
break;
1697
1698
case BIOCROTZBUF:
1699
error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
1700
break;
1701
1702
case BIOCSETVLANPCP:
1703
{
1704
u_int pcp;
1705
1706
pcp = *(u_int *)addr;
1707
if (pcp > BPF_PRIO_MAX || pcp < 0) {
1708
error = EINVAL;
1709
break;
1710
}
1711
d->bd_pcp = pcp;
1712
break;
1713
}
1714
}
1715
CURVNET_RESTORE();
1716
return (error);
1717
}
1718
1719
/*
1720
* Return list of available tapping points, or report how much space is
1721
* required for a successful return.
1722
*/
1723
static int
1724
bpf_getiflist(struct bpf_iflist *bi)
1725
{
1726
struct bpf_if *bp;
1727
u_int allsize, size, cnt;
1728
char *uaddr;
1729
1730
BPF_LOCK();
1731
1732
cnt = allsize = size = 0;
1733
LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
1734
allsize += strlen(bp->bif_name) + 1;
1735
if (++cnt == bi->bi_count)
1736
size = allsize;
1737
}
1738
if (size == 0)
1739
size = allsize;
1740
1741
if (bi->bi_size == 0) {
1742
BPF_UNLOCK();
1743
bi->bi_size = size;
1744
bi->bi_count = cnt;
1745
return (0);
1746
} else if (bi->bi_size < size) {
1747
BPF_UNLOCK();
1748
return (ENOSPC);
1749
}
1750
1751
uaddr = bi->bi_ubuf;
1752
cnt = 0;
1753
LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
1754
u_int len;
1755
int error;
1756
1757
len = strlen(bp->bif_name) + 1;
1758
if ((error = copyout(bp->bif_name, uaddr, len)) != 0) {
1759
BPF_UNLOCK();
1760
return (error);
1761
}
1762
if (++cnt == bi->bi_count)
1763
break;
1764
uaddr += len;
1765
}
1766
BPF_UNLOCK();
1767
bi->bi_count = cnt;
1768
1769
return (0);
1770
}
1771
1772
/*
1773
* Set d's packet filter program to fp. If this file already has a filter,
1774
* free it and replace it. Returns EINVAL for bogus requests.
1775
*
1776
* Note we use global lock here to serialize bpf_setf() and bpf_setif()
1777
* calls.
1778
*/
1779
static int
1780
bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1781
{
1782
#ifdef COMPAT_FREEBSD32
1783
struct bpf_program fp_swab;
1784
struct bpf_program32 *fp32;
1785
#endif
1786
struct bpf_program_buffer *fcode;
1787
struct bpf_insn *filter;
1788
#ifdef BPF_JITTER
1789
bpf_jit_filter *jfunc;
1790
#endif
1791
size_t size;
1792
u_int flen;
1793
bool track_event;
1794
1795
#ifdef COMPAT_FREEBSD32
1796
switch (cmd) {
1797
case BIOCSETF32:
1798
case BIOCSETWF32:
1799
case BIOCSETFNR32:
1800
fp32 = (struct bpf_program32 *)fp;
1801
fp_swab.bf_len = fp32->bf_len;
1802
fp_swab.bf_insns =
1803
(struct bpf_insn *)(uintptr_t)fp32->bf_insns;
1804
fp = &fp_swab;
1805
switch (cmd) {
1806
case BIOCSETF32:
1807
cmd = BIOCSETF;
1808
break;
1809
case BIOCSETWF32:
1810
cmd = BIOCSETWF;
1811
break;
1812
}
1813
break;
1814
}
1815
#endif
1816
1817
filter = NULL;
1818
#ifdef BPF_JITTER
1819
jfunc = NULL;
1820
#endif
1821
/*
1822
* Check new filter validness before acquiring any locks.
1823
* Allocate memory for new filter, if needed.
1824
*/
1825
flen = fp->bf_len;
1826
if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
1827
return (EINVAL);
1828
size = flen * sizeof(*fp->bf_insns);
1829
if (size > 0) {
1830
/* We're setting up new filter. Copy and check actual data. */
1831
fcode = bpf_program_buffer_alloc(size, M_WAITOK);
1832
filter = (struct bpf_insn *)fcode->buffer;
1833
if (copyin(fp->bf_insns, filter, size) != 0 ||
1834
!bpf_validate(filter, flen)) {
1835
free(fcode, M_BPF);
1836
return (EINVAL);
1837
}
1838
#ifdef BPF_JITTER
1839
if (cmd != BIOCSETWF) {
1840
/*
1841
* Filter is copied inside fcode and is
1842
* perfectly valid.
1843
*/
1844
jfunc = bpf_jitter(filter, flen);
1845
}
1846
#endif
1847
}
1848
1849
track_event = false;
1850
fcode = NULL;
1851
1852
BPF_LOCK();
1853
BPFD_LOCK(d);
1854
/* Set up new filter. */
1855
if (cmd == BIOCSETWF) {
1856
if (d->bd_wfilter != NULL) {
1857
fcode = __containerof((void *)d->bd_wfilter,
1858
struct bpf_program_buffer, buffer);
1859
#ifdef BPF_JITTER
1860
fcode->func = NULL;
1861
#endif
1862
}
1863
d->bd_wfilter = filter;
1864
} else {
1865
if (d->bd_rfilter != NULL) {
1866
fcode = __containerof((void *)d->bd_rfilter,
1867
struct bpf_program_buffer, buffer);
1868
#ifdef BPF_JITTER
1869
fcode->func = d->bd_bfilter;
1870
#endif
1871
}
1872
d->bd_rfilter = filter;
1873
#ifdef BPF_JITTER
1874
d->bd_bfilter = jfunc;
1875
#endif
1876
if (cmd == BIOCSETF)
1877
reset_d(d);
1878
1879
if (bpf_check_upgrade(cmd, d, filter, flen) != 0) {
1880
/*
1881
* Filter can be set several times without
1882
* specifying interface. In this case just mark d
1883
* as reader.
1884
*/
1885
d->bd_writer = 0;
1886
if (d->bd_bif != NULL) {
1887
/*
1888
* Remove descriptor from writers-only list
1889
* and add it to active readers list.
1890
*/
1891
CK_LIST_REMOVE(d, bd_next);
1892
CK_LIST_INSERT_HEAD(&d->bd_bif->bif_dlist,
1893
d, bd_next);
1894
CTR2(KTR_NET,
1895
"%s: upgrade required by pid %d",
1896
__func__, d->bd_pid);
1897
track_event = true;
1898
}
1899
}
1900
}
1901
BPFD_UNLOCK(d);
1902
1903
if (fcode != NULL)
1904
NET_EPOCH_CALL(bpf_program_buffer_free, &fcode->epoch_ctx);
1905
1906
if (track_event)
1907
bif_attachd(d->bd_bif);
1908
1909
BPF_UNLOCK();
1910
return (0);
1911
}
1912
1913
/*
1914
* Attach descriptor to a tap point, possibly detaching from the old one,
1915
* reset the counters.
1916
* XXXGL: this KPI is subject to change
1917
*/
1918
static int
1919
bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
1920
{
1921
bool writer;
1922
1923
BPF_LOCK_ASSERT();
1924
1925
/*
1926
* At this point, we expect the buffer is already allocated. If not,
1927
* return an error.
1928
*/
1929
switch (d->bd_bufmode) {
1930
case BPF_BUFMODE_BUFFER:
1931
case BPF_BUFMODE_ZBUF:
1932
if (d->bd_sbuf == NULL)
1933
return (EINVAL);
1934
break;
1935
1936
default:
1937
panic("%s: bufmode %d", __func__, d->bd_bufmode);
1938
}
1939
1940
if (bp == d->bd_bif) {
1941
BPFD_LOCK(d);
1942
reset_d(d);
1943
BPFD_UNLOCK(d);
1944
return (0);
1945
} else if (d->bd_bif != NULL)
1946
bpf_detachd(d, false);
1947
1948
/*
1949
* Save sysctl value to protect from sysctl change between reads.
1950
*/
1951
writer = V_bpf_optimize_writers || (d->bd_writer > 0);
1952
1953
/*
1954
* Point d at bp, and add d to the interface's list.
1955
* Since there are many applications using BPF for
1956
* sending raw packets only (dhcpd, cdpd are good examples)
1957
* we can delay adding d to the list of active listeners until
1958
* some filter is configured.
1959
*/
1960
BPFD_LOCK(d);
1961
/*
1962
* Hold reference to bpif while descriptor uses this interface.
1963
*/
1964
bpfif_ref(bp);
1965
d->bd_bif = bp;
1966
if (writer) {
1967
/* Add to writers-only list */
1968
CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
1969
/*
1970
* We decrement bd_writer on every filter set operation.
1971
* First BIOCSETF is done by pcap_open_live() to set up
1972
* snap length. After that application usually sets its own
1973
* filter.
1974
*/
1975
d->bd_writer = 2;
1976
} else
1977
CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
1978
1979
reset_d(d);
1980
1981
/* Trigger EVFILT_WRITE events. */
1982
bpf_wakeup(d);
1983
1984
BPFD_UNLOCK(d);
1985
1986
CTR3(KTR_NET, "%s: called by pid %d, adding to %s list",
1987
__func__, d->bd_pid, d->bd_writer ? "writer" : "active");
1988
1989
if (!writer)
1990
bif_attachd(bp);
1991
1992
return (0);
1993
}
1994
1995
/*
1996
* Support for select() and poll() system calls
1997
*
1998
* Return true iff the specific operation will not block indefinitely.
1999
* Otherwise, return false but make a note that a selwakeup() must be done.
2000
*/
2001
static int
2002
bpfpoll(struct cdev *dev, int events, struct thread *td)
2003
{
2004
struct bpf_d *d;
2005
int revents;
2006
2007
if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
2008
return (events &
2009
(POLLHUP | POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM));
2010
2011
/*
2012
* Refresh PID associated with this descriptor.
2013
*/
2014
revents = events & (POLLOUT | POLLWRNORM);
2015
BPFD_LOCK(d);
2016
BPF_PID_REFRESH(d, td);
2017
if (events & (POLLIN | POLLRDNORM)) {
2018
if (bpf_ready(d))
2019
revents |= events & (POLLIN | POLLRDNORM);
2020
else {
2021
selrecord(td, &d->bd_sel);
2022
/* Start the read timeout if necessary. */
2023
if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2024
callout_reset(&d->bd_callout, d->bd_rtout,
2025
bpf_timed_out, d);
2026
d->bd_state = BPF_WAITING;
2027
}
2028
}
2029
}
2030
BPFD_UNLOCK(d);
2031
return (revents);
2032
}
2033
2034
/*
2035
* Support for kevent() system call. Register EVFILT_READ filters and
2036
* reject all others.
2037
*/
2038
int
2039
bpfkqfilter(struct cdev *dev, struct knote *kn)
2040
{
2041
struct bpf_d *d;
2042
2043
if (devfs_get_cdevpriv((void **)&d) != 0)
2044
return (1);
2045
2046
switch (kn->kn_filter) {
2047
case EVFILT_READ:
2048
kn->kn_fop = &bpfread_filtops;
2049
break;
2050
2051
case EVFILT_WRITE:
2052
kn->kn_fop = &bpfwrite_filtops;
2053
break;
2054
2055
default:
2056
return (1);
2057
}
2058
2059
/*
2060
* Refresh PID associated with this descriptor.
2061
*/
2062
BPFD_LOCK(d);
2063
BPF_PID_REFRESH_CUR(d);
2064
kn->kn_hook = d;
2065
knlist_add(&d->bd_sel.si_note, kn, 1);
2066
BPFD_UNLOCK(d);
2067
2068
return (0);
2069
}
2070
2071
static void
2072
filt_bpfdetach(struct knote *kn)
2073
{
2074
struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2075
2076
knlist_remove(&d->bd_sel.si_note, kn, 0);
2077
}
2078
2079
static int
2080
filt_bpfread(struct knote *kn, long hint)
2081
{
2082
struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2083
int ready;
2084
2085
BPFD_LOCK_ASSERT(d);
2086
ready = bpf_ready(d);
2087
if (ready) {
2088
kn->kn_data = d->bd_slen;
2089
/*
2090
* Ignore the hold buffer if it is being copied to user space.
2091
*/
2092
if (!(d->bd_flags & BPFD_HBUF_INUSE) && d->bd_hbuf)
2093
kn->kn_data += d->bd_hlen;
2094
} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2095
callout_reset(&d->bd_callout, d->bd_rtout,
2096
bpf_timed_out, d);
2097
d->bd_state = BPF_WAITING;
2098
}
2099
2100
return (ready);
2101
}
2102
2103
static int
2104
filt_bpfwrite(struct knote *kn, long hint)
2105
{
2106
struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2107
2108
BPFD_LOCK_ASSERT(d);
2109
2110
if (d->bd_bif == NULL) {
2111
kn->kn_data = 0;
2112
return (0);
2113
} else {
2114
kn->kn_data = bif_wrsize(d->bd_bif);
2115
return (1);
2116
}
2117
}
2118
2119
#define BPF_TSTAMP_NONE 0
2120
#define BPF_TSTAMP_FAST 1
2121
#define BPF_TSTAMP_NORMAL 2
2122
#define BPF_TSTAMP_EXTERN 3
2123
2124
static int
2125
bpf_ts_quality(int tstype)
2126
{
2127
2128
if (tstype == BPF_T_NONE)
2129
return (BPF_TSTAMP_NONE);
2130
if ((tstype & BPF_T_FAST) != 0)
2131
return (BPF_TSTAMP_FAST);
2132
2133
return (BPF_TSTAMP_NORMAL);
2134
}
2135
2136
static int
2137
bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
2138
{
2139
struct timespec ts;
2140
struct m_tag *tag;
2141
int quality;
2142
2143
quality = bpf_ts_quality(tstype);
2144
if (quality == BPF_TSTAMP_NONE)
2145
return (quality);
2146
2147
if (m != NULL) {
2148
if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | M_TSTMP)) {
2149
mbuf_tstmp2timespec(m, &ts);
2150
timespec2bintime(&ts, bt);
2151
return (BPF_TSTAMP_EXTERN);
2152
}
2153
tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
2154
if (tag != NULL) {
2155
*bt = *(struct bintime *)(tag + 1);
2156
return (BPF_TSTAMP_EXTERN);
2157
}
2158
}
2159
if (quality == BPF_TSTAMP_NORMAL)
2160
binuptime(bt);
2161
else
2162
getbinuptime(bt);
2163
2164
return (quality);
2165
}
2166
2167
/*
2168
* Incoming linkage from device drivers. Process the packet pkt, of length
2169
* pktlen, which is stored in a contiguous buffer. The packet is parsed
2170
* by each process' filter, and if accepted, stashed into the corresponding
2171
* buffer.
2172
*/
2173
void
2174
bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2175
{
2176
struct epoch_tracker et;
2177
struct bintime bt;
2178
struct bpf_d *d;
2179
#ifdef BPF_JITTER
2180
bpf_jit_filter *bf;
2181
#endif
2182
u_int slen;
2183
int gottime;
2184
2185
gottime = BPF_TSTAMP_NONE;
2186
NET_EPOCH_ENTER(et);
2187
CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2188
counter_u64_add(d->bd_rcount, 1);
2189
/*
2190
* NB: We don't check the direction here since there
2191
* is no way for the caller to indiciate to us whether this
2192
* packet is inbound or outbound. In the bpf_mtap() routines,
2193
* we use the interface pointers on the mbuf to figure it out.
2194
*/
2195
#ifdef BPF_JITTER
2196
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2197
if (bf != NULL)
2198
slen = (*(bf->func))(pkt, pktlen, pktlen);
2199
else
2200
#endif
2201
slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
2202
if (slen != 0) {
2203
/*
2204
* Filter matches. Let's to acquire write lock.
2205
*/
2206
BPFD_LOCK(d);
2207
counter_u64_add(d->bd_fcount, 1);
2208
if (gottime < bpf_ts_quality(d->bd_tstamp))
2209
gottime = bpf_gettime(&bt, d->bd_tstamp,
2210
NULL);
2211
#ifdef MAC
2212
if (bif_mac_check_receive(bp, d) == 0)
2213
#endif
2214
catchpacket(d, pkt, pktlen, slen,
2215
bpf_append_bytes, &bt);
2216
BPFD_UNLOCK(d);
2217
}
2218
}
2219
NET_EPOCH_EXIT(et);
2220
}
2221
2222
void
2223
bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen)
2224
{
2225
if (bpf_peers_present(ifp->if_bpf))
2226
bpf_tap(ifp->if_bpf, pkt, pktlen);
2227
}
2228
2229
/*
2230
* Incoming linkage from device drivers, when packet is in an mbuf chain.
2231
* Locking model is explained in bpf_tap().
2232
*/
2233
void
2234
bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2235
{
2236
struct epoch_tracker et;
2237
struct bintime bt;
2238
struct bpf_d *d;
2239
#ifdef BPF_JITTER
2240
bpf_jit_filter *bf;
2241
#endif
2242
u_int pktlen, slen;
2243
int gottime;
2244
2245
/* Skip outgoing duplicate packets. */
2246
if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) {
2247
m->m_flags &= ~M_PROMISC;
2248
return;
2249
}
2250
2251
pktlen = m_length(m, NULL);
2252
gottime = BPF_TSTAMP_NONE;
2253
2254
NET_EPOCH_ENTER(et);
2255
CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2256
if (bpf_chkdir(d, m))
2257
continue;
2258
counter_u64_add(d->bd_rcount, 1);
2259
#ifdef BPF_JITTER
2260
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2261
/* XXX We cannot handle multiple mbufs. */
2262
if (bf != NULL && m->m_next == NULL)
2263
slen = (*(bf->func))(mtod(m, u_char *), pktlen,
2264
pktlen);
2265
else
2266
#endif
2267
slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
2268
if (slen != 0) {
2269
BPFD_LOCK(d);
2270
2271
counter_u64_add(d->bd_fcount, 1);
2272
if (gottime < bpf_ts_quality(d->bd_tstamp))
2273
gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2274
#ifdef MAC
2275
if (bif_mac_check_receive(bp, d) == 0)
2276
#endif
2277
catchpacket(d, (u_char *)m, pktlen, slen,
2278
bpf_append_mbuf, &bt);
2279
BPFD_UNLOCK(d);
2280
}
2281
}
2282
NET_EPOCH_EXIT(et);
2283
}
2284
2285
void
2286
bpf_mtap_if(if_t ifp, struct mbuf *m)
2287
{
2288
if (bpf_peers_present(ifp->if_bpf)) {
2289
M_ASSERTVALID(m);
2290
bpf_mtap(ifp->if_bpf, m);
2291
}
2292
}
2293
2294
/*
2295
* Incoming linkage from device drivers, when packet is in
2296
* an mbuf chain and to be prepended by a contiguous header.
2297
*/
2298
void
2299
bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
2300
{
2301
struct epoch_tracker et;
2302
struct bintime bt;
2303
struct mbuf mb;
2304
struct bpf_d *d;
2305
u_int pktlen, slen;
2306
int gottime;
2307
2308
/* Skip outgoing duplicate packets. */
2309
if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2310
m->m_flags &= ~M_PROMISC;
2311
return;
2312
}
2313
2314
pktlen = m_length(m, NULL);
2315
/*
2316
* Craft on-stack mbuf suitable for passing to bpf_filter.
2317
* Note that we cut corners here; we only setup what's
2318
* absolutely needed--this mbuf should never go anywhere else.
2319
*/
2320
mb.m_flags = 0;
2321
mb.m_next = m;
2322
mb.m_data = data;
2323
mb.m_len = dlen;
2324
pktlen += dlen;
2325
2326
gottime = BPF_TSTAMP_NONE;
2327
2328
NET_EPOCH_ENTER(et);
2329
CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2330
if (bpf_chkdir(d, m))
2331
continue;
2332
counter_u64_add(d->bd_rcount, 1);
2333
slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
2334
if (slen != 0) {
2335
BPFD_LOCK(d);
2336
2337
counter_u64_add(d->bd_fcount, 1);
2338
if (gottime < bpf_ts_quality(d->bd_tstamp))
2339
gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2340
#ifdef MAC
2341
if (bif_mac_check_receive(bp, d) == 0)
2342
#endif
2343
catchpacket(d, (u_char *)&mb, pktlen, slen,
2344
bpf_append_mbuf, &bt);
2345
BPFD_UNLOCK(d);
2346
}
2347
}
2348
NET_EPOCH_EXIT(et);
2349
}
2350
2351
void
2352
bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m)
2353
{
2354
if (bpf_peers_present(ifp->if_bpf)) {
2355
M_ASSERTVALID(m);
2356
bpf_mtap2(ifp->if_bpf, data, dlen, m);
2357
}
2358
}
2359
2360
#undef BPF_TSTAMP_NONE
2361
#undef BPF_TSTAMP_FAST
2362
#undef BPF_TSTAMP_NORMAL
2363
#undef BPF_TSTAMP_EXTERN
2364
2365
static int
2366
bpf_hdrlen(struct bpf_d *d)
2367
{
2368
int hdrlen;
2369
2370
hdrlen = d->bd_bif->bif_hdrlen;
2371
#ifndef BURN_BRIDGES
2372
if (d->bd_tstamp == BPF_T_NONE ||
2373
BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
2374
#ifdef COMPAT_FREEBSD32
2375
if (d->bd_compat32)
2376
hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
2377
else
2378
#endif
2379
hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
2380
else
2381
#endif
2382
hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
2383
#ifdef COMPAT_FREEBSD32
2384
if (d->bd_compat32)
2385
hdrlen = BPF_WORDALIGN32(hdrlen);
2386
else
2387
#endif
2388
hdrlen = BPF_WORDALIGN(hdrlen);
2389
2390
return (hdrlen - d->bd_bif->bif_hdrlen);
2391
}
2392
2393
static void
2394
bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
2395
{
2396
struct bintime bt2, boottimebin;
2397
struct timeval tsm;
2398
struct timespec tsn;
2399
2400
if ((tstype & BPF_T_MONOTONIC) == 0) {
2401
bt2 = *bt;
2402
getboottimebin(&boottimebin);
2403
bintime_add(&bt2, &boottimebin);
2404
bt = &bt2;
2405
}
2406
switch (BPF_T_FORMAT(tstype)) {
2407
case BPF_T_MICROTIME:
2408
bintime2timeval(bt, &tsm);
2409
ts->bt_sec = tsm.tv_sec;
2410
ts->bt_frac = tsm.tv_usec;
2411
break;
2412
case BPF_T_NANOTIME:
2413
bintime2timespec(bt, &tsn);
2414
ts->bt_sec = tsn.tv_sec;
2415
ts->bt_frac = tsn.tv_nsec;
2416
break;
2417
case BPF_T_BINTIME:
2418
ts->bt_sec = bt->sec;
2419
ts->bt_frac = bt->frac;
2420
break;
2421
}
2422
}
2423
2424
/*
2425
* Move the packet data from interface memory (pkt) into the
2426
* store buffer. "cpfn" is the routine called to do the actual data
2427
* transfer. bcopy is passed in to copy contiguous chunks, while
2428
* bpf_append_mbuf is passed in to copy mbuf chains. In the latter case,
2429
* pkt is really an mbuf.
2430
*/
2431
static void
2432
catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
2433
void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
2434
struct bintime *bt)
2435
{
2436
static char zeroes[BPF_ALIGNMENT];
2437
struct bpf_xhdr hdr;
2438
#ifndef BURN_BRIDGES
2439
struct bpf_hdr hdr_old;
2440
#ifdef COMPAT_FREEBSD32
2441
struct bpf_hdr32 hdr32_old;
2442
#endif
2443
#endif
2444
int caplen, curlen, hdrlen, pad, totlen;
2445
int do_wakeup = 0;
2446
int do_timestamp;
2447
int tstype;
2448
2449
BPFD_LOCK_ASSERT(d);
2450
if (d->bd_bif == NULL) {
2451
/* Descriptor was detached in concurrent thread */
2452
counter_u64_add(d->bd_dcount, 1);
2453
return;
2454
}
2455
2456
/*
2457
* Detect whether user space has released a buffer back to us, and if
2458
* so, move it from being a hold buffer to a free buffer. This may
2459
* not be the best place to do it (for example, we might only want to
2460
* run this check if we need the space), but for now it's a reliable
2461
* spot to do it.
2462
*/
2463
if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
2464
d->bd_fbuf = d->bd_hbuf;
2465
d->bd_hbuf = NULL;
2466
d->bd_hlen = 0;
2467
bpf_buf_reclaimed(d);
2468
}
2469
2470
/*
2471
* Figure out how many bytes to move. If the packet is
2472
* greater or equal to the snapshot length, transfer that
2473
* much. Otherwise, transfer the whole packet (unless
2474
* we hit the buffer size limit).
2475
*/
2476
hdrlen = bpf_hdrlen(d);
2477
totlen = hdrlen + min(snaplen, pktlen);
2478
if (totlen > d->bd_bufsize)
2479
totlen = d->bd_bufsize;
2480
2481
/*
2482
* Round up the end of the previous packet to the next longword.
2483
*
2484
* Drop the packet if there's no room and no hope of room
2485
* If the packet would overflow the storage buffer or the storage
2486
* buffer is considered immutable by the buffer model, try to rotate
2487
* the buffer and wakeup pending processes.
2488
*/
2489
#ifdef COMPAT_FREEBSD32
2490
if (d->bd_compat32)
2491
curlen = BPF_WORDALIGN32(d->bd_slen);
2492
else
2493
#endif
2494
curlen = BPF_WORDALIGN(d->bd_slen);
2495
if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
2496
if (d->bd_fbuf == NULL) {
2497
/*
2498
* There's no room in the store buffer, and no
2499
* prospect of room, so drop the packet. Notify the
2500
* buffer model.
2501
*/
2502
bpf_buffull(d);
2503
counter_u64_add(d->bd_dcount, 1);
2504
return;
2505
}
2506
KASSERT(!(d->bd_flags & BPFD_HBUF_INUSE),
2507
("hold buffer is in use"));
2508
ROTATE_BUFFERS(d);
2509
do_wakeup = 1;
2510
curlen = 0;
2511
} else {
2512
if ((d->bd_flags & BPFD_IMMEDIATE) ||
2513
d->bd_state == BPF_TIMED_OUT) {
2514
/*
2515
* Immediate mode is set, or the read timeout has
2516
* already expired during a select call. A packet
2517
* arrived, so the reader should be woken up.
2518
*/
2519
do_wakeup = 1;
2520
}
2521
pad = curlen - d->bd_slen;
2522
KASSERT(pad >= 0 && pad <= sizeof(zeroes),
2523
("%s: invalid pad byte count %d", __func__, pad));
2524
if (pad > 0) {
2525
/* Zero pad bytes. */
2526
bpf_append_bytes(d, d->bd_sbuf, d->bd_slen, zeroes,
2527
pad);
2528
}
2529
}
2530
2531
caplen = totlen - hdrlen;
2532
tstype = d->bd_tstamp;
2533
do_timestamp = tstype != BPF_T_NONE;
2534
#ifndef BURN_BRIDGES
2535
if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
2536
struct bpf_ts ts;
2537
if (do_timestamp)
2538
bpf_bintime2ts(bt, &ts, tstype);
2539
#ifdef COMPAT_FREEBSD32
2540
if (d->bd_compat32) {
2541
bzero(&hdr32_old, sizeof(hdr32_old));
2542
if (do_timestamp) {
2543
hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
2544
hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
2545
}
2546
hdr32_old.bh_datalen = pktlen;
2547
hdr32_old.bh_hdrlen = hdrlen;
2548
hdr32_old.bh_caplen = caplen;
2549
bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
2550
sizeof(hdr32_old));
2551
goto copy;
2552
}
2553
#endif
2554
bzero(&hdr_old, sizeof(hdr_old));
2555
if (do_timestamp) {
2556
hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
2557
hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
2558
}
2559
hdr_old.bh_datalen = pktlen;
2560
hdr_old.bh_hdrlen = hdrlen;
2561
hdr_old.bh_caplen = caplen;
2562
bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
2563
sizeof(hdr_old));
2564
goto copy;
2565
}
2566
#endif
2567
2568
/*
2569
* Append the bpf header. Note we append the actual header size, but
2570
* move forward the length of the header plus padding.
2571
*/
2572
bzero(&hdr, sizeof(hdr));
2573
if (do_timestamp)
2574
bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
2575
hdr.bh_datalen = pktlen;
2576
hdr.bh_hdrlen = hdrlen;
2577
hdr.bh_caplen = caplen;
2578
bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
2579
2580
/*
2581
* Copy the packet data into the store buffer and update its length.
2582
*/
2583
#ifndef BURN_BRIDGES
2584
copy:
2585
#endif
2586
(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
2587
d->bd_slen = curlen + totlen;
2588
2589
if (do_wakeup)
2590
bpf_wakeup(d);
2591
}
2592
2593
/*
2594
* Free buffers currently in use by a descriptor.
2595
* Called on close.
2596
*/
2597
static void
2598
bpfd_free(epoch_context_t ctx)
2599
{
2600
struct bpf_d *d;
2601
struct bpf_program_buffer *p;
2602
2603
/*
2604
* We don't need to lock out interrupts since this descriptor has
2605
* been detached from its interface and it yet hasn't been marked
2606
* free.
2607
*/
2608
d = __containerof(ctx, struct bpf_d, epoch_ctx);
2609
bpf_free(d);
2610
if (d->bd_rfilter != NULL) {
2611
p = __containerof((void *)d->bd_rfilter,
2612
struct bpf_program_buffer, buffer);
2613
#ifdef BPF_JITTER
2614
p->func = d->bd_bfilter;
2615
#endif
2616
bpf_program_buffer_free(&p->epoch_ctx);
2617
}
2618
if (d->bd_wfilter != NULL) {
2619
p = __containerof((void *)d->bd_wfilter,
2620
struct bpf_program_buffer, buffer);
2621
#ifdef BPF_JITTER
2622
p->func = NULL;
2623
#endif
2624
bpf_program_buffer_free(&p->epoch_ctx);
2625
}
2626
2627
mtx_destroy(&d->bd_lock);
2628
counter_u64_free(d->bd_rcount);
2629
counter_u64_free(d->bd_dcount);
2630
counter_u64_free(d->bd_fcount);
2631
counter_u64_free(d->bd_wcount);
2632
counter_u64_free(d->bd_wfcount);
2633
counter_u64_free(d->bd_wdcount);
2634
counter_u64_free(d->bd_zcopy);
2635
free(d, M_BPF);
2636
}
2637
2638
/*
2639
* Attach a tap point to bpf.
2640
* XXX: with current KPI it is consumer's responsibility to avoid duplicates.
2641
*/
2642
struct bpf_if *
2643
bpf_attach(const char *name, u_int dlt, u_int hdrlen,
2644
const struct bif_methods *methods, void *sc)
2645
{
2646
struct bpf_if *bp;
2647
2648
bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
2649
2650
CK_LIST_INIT(&bp->bif_dlist);
2651
CK_LIST_INIT(&bp->bif_wlist);
2652
bp->bif_dlt = dlt;
2653
bp->bif_hdrlen = hdrlen;
2654
bp->bif_softc = sc;
2655
bp->bif_name = name;
2656
bp->bif_methods = methods;
2657
refcount_init(&bp->bif_refcnt, 1);
2658
BPF_LOCK();
2659
LIST_INSERT_HEAD(&V_bpf_iflist, bp, bif_next);
2660
BPF_UNLOCK();
2661
2662
return (bp);
2663
}
2664
2665
#ifdef VIMAGE
2666
/*
2667
* Detach descriptors on interface's vmove event.
2668
* XXXGL: shouldn't be a special case, but a full detach.
2669
*/
2670
void
2671
bpf_ifdetach(struct ifnet *ifp)
2672
{
2673
struct bpf_if *bp;
2674
struct bpf_d *d;
2675
2676
BPF_LOCK();
2677
LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2678
/* XXXGL: assuming softc is ifnet here */
2679
if (bp->bif_softc != ifp)
2680
continue;
2681
2682
/* Detach common descriptors */
2683
while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
2684
bpf_detachd(d, true);
2685
}
2686
2687
/* Detach writer-only descriptors */
2688
while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
2689
bpf_detachd(d, true);
2690
}
2691
}
2692
BPF_UNLOCK();
2693
}
2694
#endif
2695
2696
/*
2697
* Detach bpf tap point. This involves detaching each descriptor associated
2698
* with the interface. Notify each descriptor as it's detached so that any
2699
* sleepers wake up and get ENXIO.
2700
*/
2701
void
2702
bpf_detach(struct bpf_if *bp)
2703
{
2704
struct bpf_d *d;
2705
2706
BPF_LOCK();
2707
LIST_REMOVE(bp, bif_next);
2708
2709
CTR3(KTR_NET, "%s: sheduling free for encap %d for bp %p",
2710
__func__, bp->bif_dlt, bp);
2711
2712
/* Detach common descriptors */
2713
while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
2714
bpf_detachd(d, true);
2715
}
2716
2717
/* Detach writer-only descriptors */
2718
while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
2719
bpf_detachd(d, true);
2720
}
2721
bpfif_rele(bp);
2722
BPF_UNLOCK();
2723
}
2724
2725
#ifdef VIMAGE
2726
/*
2727
* Move bpf to a different VNET. This KPI is a crutch to support if_vmove
2728
* and is not supposed to be used anywhere else.
2729
*/
2730
void
2731
bpf_vmove(struct bpf_if *bp)
2732
{
2733
2734
BPF_LOCK();
2735
LIST_REMOVE(bp, bif_next);
2736
LIST_INSERT_HEAD(&V_bpf_iflist, bp, bif_next);
2737
BPF_UNLOCK();
2738
}
2739
#endif
2740
2741
bool
2742
bpf_peers_present_if(struct ifnet *ifp)
2743
{
2744
return (bpf_peers_present(ifp->if_bpf));
2745
}
2746
2747
/*
2748
* Get a list of available data link type of the tap point. If a tap point
2749
* attaches more than one time, it is supposed to attach with different DLTs
2750
* but with the same name pointer.
2751
*/
2752
static int
2753
bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
2754
{
2755
const char *name;
2756
struct bpf_if *bp;
2757
u_int *lst;
2758
int error, n, n1;
2759
2760
BPF_LOCK_ASSERT();
2761
2762
name = d->bd_bif->bif_name;
2763
n1 = 0;
2764
LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2765
if (bp->bif_name == name)
2766
n1++;
2767
}
2768
if (bfl->bfl_list == NULL) {
2769
bfl->bfl_len = n1;
2770
return (0);
2771
}
2772
if (n1 > bfl->bfl_len)
2773
return (ENOMEM);
2774
2775
lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
2776
n = 0;
2777
LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2778
if (bp->bif_name != name)
2779
continue;
2780
lst[n++] = bp->bif_dlt;
2781
}
2782
error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
2783
free(lst, M_TEMP);
2784
bfl->bfl_len = n;
2785
return (error);
2786
}
2787
2788
/*
2789
* Set the data link type of a BPF descriptor. The convention is that
2790
* application first do BIOCSETIF and then BIOCSETDLT, thus the descriptor
2791
* is supposed to be already attached. Only one kernel facility provides
2792
* tapping points with same name but different DLT - ieee80211_radiotap.
2793
*
2794
* XXXGL: this function definitely looks suspicious, e.g. it clearly doesn't
2795
* clear promisc on the old bpf_if. The convention about reference counting
2796
* is also unclear.
2797
*/
2798
static int
2799
bpf_setdlt(struct bpf_d *d, u_int dlt)
2800
{
2801
int error, opromisc;
2802
const char *name;
2803
struct bpf_if *bp;
2804
2805
BPF_LOCK_ASSERT();
2806
MPASS(d->bd_bif != NULL);
2807
2808
/*
2809
* It is safe to check bd_bif without BPFD_LOCK, it can not be
2810
* changed while we hold global lock.
2811
*/
2812
if (d->bd_bif->bif_dlt == dlt)
2813
return (0);
2814
2815
name = d->bd_bif->bif_name;
2816
LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2817
if (bp->bif_name == name && bp->bif_dlt == dlt)
2818
break;
2819
}
2820
if (bp == NULL)
2821
return (EINVAL);
2822
2823
opromisc = d->bd_promisc;
2824
bpf_detachd(d, false);
2825
bpf_attachd(d, bp);
2826
if (opromisc) {
2827
error = bp->bif_methods->bif_promisc(bp->bif_softc, true);
2828
if (error)
2829
printf("%s: bif_promisc on %s failed (%d)\n",
2830
__func__, bp->bif_name, error);
2831
else
2832
d->bd_promisc = 1;
2833
}
2834
return (0);
2835
}
2836
2837
static void
2838
bpf_drvinit(void *unused)
2839
{
2840
struct cdev *dev;
2841
2842
sx_init(&bpf_sx, "bpf global lock");
2843
dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
2844
/* For compatibility */
2845
make_dev_alias(dev, "bpf0");
2846
}
2847
2848
/*
2849
* Zero out the various packet counters associated with all of the bpf
2850
* descriptors. At some point, we will probably want to get a bit more
2851
* granular and allow the user to specify descriptors to be zeroed.
2852
*/
2853
static void
2854
bpf_zero_counters(void)
2855
{
2856
struct bpf_if *bp;
2857
struct bpf_d *bd;
2858
2859
BPF_LOCK();
2860
/*
2861
* We are protected by global lock here, interfaces and
2862
* descriptors can not be deleted while we hold it.
2863
*/
2864
LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2865
CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2866
counter_u64_zero(bd->bd_rcount);
2867
counter_u64_zero(bd->bd_dcount);
2868
counter_u64_zero(bd->bd_fcount);
2869
counter_u64_zero(bd->bd_wcount);
2870
counter_u64_zero(bd->bd_wfcount);
2871
counter_u64_zero(bd->bd_zcopy);
2872
}
2873
}
2874
BPF_UNLOCK();
2875
}
2876
2877
/*
2878
* Fill filter statistics
2879
*/
2880
static void
2881
bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
2882
{
2883
2884
BPF_LOCK_ASSERT();
2885
bzero(d, sizeof(*d));
2886
d->bd_structsize = sizeof(*d);
2887
d->bd_immediate = bd->bd_flags & BPFD_IMMEDIATE ? 1 : 0;
2888
d->bd_promisc = bd->bd_promisc;
2889
d->bd_hdrcmplt = bd->bd_flags & BPFD_HDRCMPLT ? 1 : 0;
2890
d->bd_direction = bd->bd_direction;
2891
d->bd_feedback = bd->bd_flags & BPFD_FEEDBACK ? 1 : 0;
2892
d->bd_async = bd->bd_flags & BPFD_ASYNC ? 1 : 0;
2893
d->bd_rcount = counter_u64_fetch(bd->bd_rcount);
2894
d->bd_dcount = counter_u64_fetch(bd->bd_dcount);
2895
d->bd_fcount = counter_u64_fetch(bd->bd_fcount);
2896
d->bd_sig = bd->bd_sig;
2897
d->bd_slen = bd->bd_slen;
2898
d->bd_hlen = bd->bd_hlen;
2899
d->bd_bufsize = bd->bd_bufsize;
2900
d->bd_pid = bd->bd_pid;
2901
strlcpy(d->bd_ifname, bd->bd_bif->bif_name, sizeof(d->bd_ifname));
2902
d->bd_locked = bd->bd_flags & BPFD_LOCKED ? 1 : 0;
2903
d->bd_wcount = counter_u64_fetch(bd->bd_wcount);
2904
d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount);
2905
d->bd_wfcount = counter_u64_fetch(bd->bd_wfcount);
2906
d->bd_zcopy = counter_u64_fetch(bd->bd_zcopy);
2907
d->bd_bufmode = bd->bd_bufmode;
2908
}
2909
2910
/*
2911
* Handle `netstat -B' stats request
2912
*/
2913
static int
2914
bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
2915
{
2916
static const struct xbpf_d zerostats;
2917
struct xbpf_d *xbdbuf, *xbd, tempstats;
2918
u_int bpfd_cnt, index;
2919
int error;
2920
struct bpf_if *bp;
2921
struct bpf_d *bd;
2922
2923
/*
2924
* XXX This is not technically correct. It is possible for non
2925
* privileged users to open bpf devices. It would make sense
2926
* if the users who opened the devices were able to retrieve
2927
* the statistics for them, too.
2928
*/
2929
error = priv_check(req->td, PRIV_NET_BPF);
2930
if (error)
2931
return (error);
2932
/*
2933
* Check to see if the user is requesting that the counters be
2934
* zeroed out. Explicitly check that the supplied data is zeroed,
2935
* as we aren't allowing the user to set the counters currently.
2936
*/
2937
if (req->newptr != NULL) {
2938
if (req->newlen != sizeof(tempstats))
2939
return (EINVAL);
2940
memset(&tempstats, 0, sizeof(tempstats));
2941
error = SYSCTL_IN(req, &tempstats, sizeof(tempstats));
2942
if (error)
2943
return (error);
2944
if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0)
2945
return (EINVAL);
2946
bpf_zero_counters();
2947
return (0);
2948
}
2949
bpfd_cnt = 0;
2950
BPF_LOCK();
2951
LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2952
CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next)
2953
bpfd_cnt++;
2954
CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next)
2955
bpfd_cnt++;
2956
}
2957
if (bpfd_cnt == 0 || req->oldptr == NULL) {
2958
BPF_UNLOCK();
2959
return (SYSCTL_OUT(req, 0, bpfd_cnt * sizeof(*xbd)));
2960
}
2961
if (req->oldlen < bpfd_cnt * sizeof(*xbd)) {
2962
BPF_UNLOCK();
2963
return (ENOMEM);
2964
}
2965
xbdbuf = malloc(bpfd_cnt * sizeof(*xbd), M_BPF, M_WAITOK);
2966
index = 0;
2967
LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2968
/* Send writers-only first */
2969
CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
2970
MPASS(index <= bpfd_cnt);
2971
xbd = &xbdbuf[index++];
2972
bpfstats_fill_xbpf(xbd, bd);
2973
}
2974
CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2975
MPASS(index <= bpfd_cnt);
2976
xbd = &xbdbuf[index++];
2977
bpfstats_fill_xbpf(xbd, bd);
2978
}
2979
}
2980
BPF_UNLOCK();
2981
error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
2982
free(xbdbuf, M_BPF);
2983
return (error);
2984
}
2985
2986
SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, bpf_drvinit, NULL);
2987
2988
#else /* !DEV_BPF && !NETGRAPH_BPF */
2989
2990
/*
2991
* NOP stubs to allow bpf-using drivers to load and function.
2992
*
2993
* A 'better' implementation would allow the core bpf functionality
2994
* to be loaded at runtime.
2995
*/
2996
2997
void
2998
bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2999
{
3000
}
3001
3002
void
3003
bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen)
3004
{
3005
}
3006
3007
void
3008
bpf_mtap(struct bpf_if *bp, struct mbuf *m)
3009
{
3010
}
3011
3012
void
3013
bpf_mtap_if(if_t ifp, struct mbuf *m)
3014
{
3015
}
3016
3017
void
3018
bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
3019
{
3020
}
3021
3022
void
3023
bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m)
3024
{
3025
}
3026
3027
void
3028
bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3029
{
3030
static const struct bpfd_list dead_bpf_if = CK_LIST_HEAD_INITIALIZER();
3031
3032
ifp->if_bpf = __DECONST(struct bpf_if *, &dead_bpf_if);
3033
}
3034
3035
void
3036
bpfdetach(struct ifnet *ifp)
3037
{
3038
}
3039
3040
bool
3041
bpf_peers_present_if(struct ifnet *ifp)
3042
{
3043
return (false);
3044
}
3045
3046
u_int
3047
bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
3048
{
3049
return (-1); /* "no filter" behaviour */
3050
}
3051
3052
int
3053
bpf_validate(const struct bpf_insn *f, int len)
3054
{
3055
return (0); /* false */
3056
}
3057
3058
#endif /* !DEV_BPF && !NETGRAPH_BPF */
3059
3060