Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/net/bpf.c
39475 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 1990, 1991, 1993
5
* The Regents of the University of California. All rights reserved.
6
* Copyright (c) 2019 Andrey V. Elsukov <[email protected]>
7
*
8
* This code is derived from the Stanford/CMU enet packet filter,
9
* (net/enet.c) distributed as part of 4.3BSD, and code contributed
10
* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
11
* Berkeley Laboratory.
12
*
13
* Redistribution and use in source and binary forms, with or without
14
* modification, are permitted provided that the following conditions
15
* are met:
16
* 1. Redistributions of source code must retain the above copyright
17
* notice, this list of conditions and the following disclaimer.
18
* 2. Redistributions in binary form must reproduce the above copyright
19
* notice, this list of conditions and the following disclaimer in the
20
* documentation and/or other materials provided with the distribution.
21
* 3. Neither the name of the University nor the names of its contributors
22
* may be used to endorse or promote products derived from this software
23
* without specific prior written permission.
24
*
25
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35
* SUCH DAMAGE.
36
*/
37
38
#include <sys/cdefs.h>
39
#include "opt_bpf.h"
40
#include "opt_ddb.h"
41
#include "opt_netgraph.h"
42
43
#include <sys/param.h>
44
#include <sys/conf.h>
45
#include <sys/eventhandler.h>
46
#include <sys/fcntl.h>
47
#include <sys/jail.h>
48
#include <sys/ktr.h>
49
#include <sys/lock.h>
50
#include <sys/malloc.h>
51
#include <sys/mbuf.h>
52
#include <sys/mutex.h>
53
#include <sys/time.h>
54
#include <sys/priv.h>
55
#include <sys/proc.h>
56
#include <sys/signalvar.h>
57
#include <sys/filio.h>
58
#include <sys/sockio.h>
59
#include <sys/ttycom.h>
60
#include <sys/uio.h>
61
#include <sys/sysent.h>
62
#include <sys/systm.h>
63
64
#include <sys/event.h>
65
#include <sys/file.h>
66
#include <sys/poll.h>
67
#include <sys/proc.h>
68
69
#include <sys/socket.h>
70
71
#ifdef DDB
72
#include <ddb/ddb.h>
73
#endif
74
75
#include <net/if.h>
76
#include <net/if_var.h>
77
#include <net/if_private.h>
78
#include <net/if_vlan_var.h>
79
#include <net/if_dl.h>
80
#include <net/bpf.h>
81
#include <net/bpf_buffer.h>
82
#ifdef BPF_JITTER
83
#include <net/bpf_jitter.h>
84
#endif
85
#include <net/bpf_zerocopy.h>
86
#include <net/bpfdesc.h>
87
#include <net/route.h>
88
#include <net/vnet.h>
89
90
#include <netinet/in.h>
91
#include <netinet/if_ether.h>
92
#include <sys/kernel.h>
93
#include <sys/sysctl.h>
94
95
#include <net80211/ieee80211_freebsd.h>
96
97
#include <security/mac/mac_framework.h>
98
99
MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
100
101
static const struct bpf_if_ext dead_bpf_if = {
102
.bif_dlist = CK_LIST_HEAD_INITIALIZER()
103
};
104
105
struct bpf_if {
106
#define bif_next bif_ext.bif_next
107
#define bif_dlist bif_ext.bif_dlist
108
struct bpf_if_ext bif_ext; /* public members */
109
u_int bif_dlt; /* link layer type */
110
u_int bif_hdrlen; /* length of link header */
111
struct bpfd_list bif_wlist; /* writer-only list */
112
struct ifnet *bif_ifp; /* corresponding interface */
113
struct bpf_if **bif_bpf; /* Pointer to pointer to us */
114
volatile u_int bif_refcnt;
115
struct epoch_context epoch_ctx;
116
};
117
118
CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
119
120
struct bpf_program_buffer {
121
struct epoch_context epoch_ctx;
122
#ifdef BPF_JITTER
123
bpf_jit_filter *func;
124
#endif
125
void *buffer[0];
126
};
127
128
#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
129
130
#define PRINET 26 /* interruptible */
131
#define BPF_PRIO_MAX 7
132
133
#define SIZEOF_BPF_HDR(type) \
134
(offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
135
136
#ifdef COMPAT_FREEBSD32
137
#include <sys/mount.h>
138
#include <compat/freebsd32/freebsd32.h>
139
#define BPF_ALIGNMENT32 sizeof(int32_t)
140
#define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
141
142
#ifndef BURN_BRIDGES
143
/*
144
* 32-bit version of structure prepended to each packet. We use this header
145
* instead of the standard one for 32-bit streams. We mark the a stream as
146
* 32-bit the first time we see a 32-bit compat ioctl request.
147
*/
148
struct bpf_hdr32 {
149
struct timeval32 bh_tstamp; /* time stamp */
150
uint32_t bh_caplen; /* length of captured portion */
151
uint32_t bh_datalen; /* original length of packet */
152
uint16_t bh_hdrlen; /* length of bpf header (this struct
153
plus alignment padding) */
154
};
155
#endif
156
157
struct bpf_program32 {
158
u_int bf_len;
159
uint32_t bf_insns;
160
};
161
162
struct bpf_dltlist32 {
163
u_int bfl_len;
164
u_int bfl_list;
165
};
166
167
#define BIOCSETF32 _IOW('B', 103, struct bpf_program32)
168
#define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32)
169
#define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32)
170
#define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32)
171
#define BIOCSETWF32 _IOW('B', 123, struct bpf_program32)
172
#define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32)
173
#endif
174
175
#define BPF_LOCK() sx_xlock(&bpf_sx)
176
#define BPF_UNLOCK() sx_xunlock(&bpf_sx)
177
#define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED)
178
/*
179
* bpf_iflist is a list of BPF interface structures, each corresponding to a
180
* specific DLT. The same network interface might have several BPF interface
181
* structures registered by different layers in the stack (i.e., 802.11
182
* frames, ethernet frames, etc).
183
*/
184
CK_LIST_HEAD(bpf_iflist, bpf_if);
185
static struct bpf_iflist bpf_iflist = CK_LIST_HEAD_INITIALIZER();
186
static struct sx bpf_sx; /* bpf global lock */
187
static int bpf_bpfd_cnt;
188
189
static void bpfif_ref(struct bpf_if *);
190
static void bpfif_rele(struct bpf_if *);
191
192
static void bpfd_ref(struct bpf_d *);
193
static void bpfd_rele(struct bpf_d *);
194
static void bpf_attachd(struct bpf_d *, struct bpf_if *);
195
static void bpf_detachd(struct bpf_d *);
196
static void bpf_detachd_locked(struct bpf_d *, bool);
197
static void bpfd_free(epoch_context_t);
198
static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
199
struct sockaddr *, int *, struct bpf_d *);
200
static int bpf_setif(struct bpf_d *, struct ifreq *);
201
static void bpf_timed_out(void *);
202
static __inline void
203
bpf_wakeup(struct bpf_d *);
204
static void catchpacket(struct bpf_d *, u_char *, u_int, u_int,
205
void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
206
struct bintime *);
207
static void reset_d(struct bpf_d *);
208
static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
209
static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
210
static int bpf_setdlt(struct bpf_d *, u_int);
211
static void filt_bpfdetach(struct knote *);
212
static int filt_bpfread(struct knote *, long);
213
static int filt_bpfwrite(struct knote *, long);
214
static void bpf_drvinit(void *);
215
static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
216
217
SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
218
"bpf sysctl");
219
int bpf_maxinsns = BPF_MAXINSNS;
220
SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
221
&bpf_maxinsns, 0, "Maximum bpf program instructions");
222
static int bpf_zerocopy_enable = 0;
223
SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
224
&bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
225
static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
226
bpf_stats_sysctl, "bpf statistics portal");
227
228
VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0;
229
#define V_bpf_optimize_writers VNET(bpf_optimize_writers)
230
SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RWTUN,
231
&VNET_NAME(bpf_optimize_writers), 0,
232
"Do not send packets until BPF program is set");
233
234
static d_open_t bpfopen;
235
static d_read_t bpfread;
236
static d_write_t bpfwrite;
237
static d_ioctl_t bpfioctl;
238
static d_poll_t bpfpoll;
239
static d_kqfilter_t bpfkqfilter;
240
241
static struct cdevsw bpf_cdevsw = {
242
.d_version = D_VERSION,
243
.d_open = bpfopen,
244
.d_read = bpfread,
245
.d_write = bpfwrite,
246
.d_ioctl = bpfioctl,
247
.d_poll = bpfpoll,
248
.d_name = "bpf",
249
.d_kqfilter = bpfkqfilter,
250
};
251
252
static const struct filterops bpfread_filtops = {
253
.f_isfd = 1,
254
.f_detach = filt_bpfdetach,
255
.f_event = filt_bpfread,
256
};
257
258
static const struct filterops bpfwrite_filtops = {
259
.f_isfd = 1,
260
.f_detach = filt_bpfdetach,
261
.f_event = filt_bpfwrite,
262
};
263
264
/*
265
* LOCKING MODEL USED BY BPF
266
*
267
* Locks:
268
* 1) global lock (BPF_LOCK). Sx, used to protect some global counters,
269
* every bpf_iflist changes, serializes ioctl access to bpf descriptors.
270
* 2) Descriptor lock. Mutex, used to protect BPF buffers and various
271
* structure fields used by bpf_*tap* code.
272
*
273
* Lock order: global lock, then descriptor lock.
274
*
275
* There are several possible consumers:
276
*
277
* 1. The kernel registers interface pointer with bpfattach().
278
* Each call allocates new bpf_if structure, references ifnet pointer
279
* and links bpf_if into bpf_iflist chain. This is protected with global
280
* lock.
281
*
282
* 2. An userland application uses ioctl() call to bpf_d descriptor.
283
* All such call are serialized with global lock. BPF filters can be
284
* changed, but pointer to old filter will be freed using NET_EPOCH_CALL().
285
* Thus it should be safe for bpf_tap/bpf_mtap* code to do access to
286
* filter pointers, even if change will happen during bpf_tap execution.
287
* Destroying of bpf_d descriptor also is doing using NET_EPOCH_CALL().
288
*
289
* 3. An userland application can write packets into bpf_d descriptor.
290
* There we need to be sure, that ifnet won't disappear during bpfwrite().
291
*
292
* 4. The kernel invokes bpf_tap/bpf_mtap* functions. The access to
293
* bif_dlist is protected with net_epoch_preempt section. So, it should
294
* be safe to make access to bpf_d descriptor inside the section.
295
*
296
* 5. The kernel invokes bpfdetach() on interface destroying. All lists
297
* are modified with global lock held and actual free() is done using
298
* NET_EPOCH_CALL().
299
*/
300
301
static void
302
bpfif_free(epoch_context_t ctx)
303
{
304
struct bpf_if *bp;
305
306
bp = __containerof(ctx, struct bpf_if, epoch_ctx);
307
if_rele(bp->bif_ifp);
308
free(bp, M_BPF);
309
}
310
311
static void
312
bpfif_ref(struct bpf_if *bp)
313
{
314
315
refcount_acquire(&bp->bif_refcnt);
316
}
317
318
static void
319
bpfif_rele(struct bpf_if *bp)
320
{
321
322
if (!refcount_release(&bp->bif_refcnt))
323
return;
324
NET_EPOCH_CALL(bpfif_free, &bp->epoch_ctx);
325
}
326
327
static void
328
bpfd_ref(struct bpf_d *d)
329
{
330
331
refcount_acquire(&d->bd_refcnt);
332
}
333
334
static void
335
bpfd_rele(struct bpf_d *d)
336
{
337
338
if (!refcount_release(&d->bd_refcnt))
339
return;
340
NET_EPOCH_CALL(bpfd_free, &d->epoch_ctx);
341
}
342
343
static struct bpf_program_buffer*
344
bpf_program_buffer_alloc(size_t size, int flags)
345
{
346
347
return (malloc(sizeof(struct bpf_program_buffer) + size,
348
M_BPF, flags));
349
}
350
351
static void
352
bpf_program_buffer_free(epoch_context_t ctx)
353
{
354
struct bpf_program_buffer *ptr;
355
356
ptr = __containerof(ctx, struct bpf_program_buffer, epoch_ctx);
357
#ifdef BPF_JITTER
358
if (ptr->func != NULL)
359
bpf_destroy_jit_filter(ptr->func);
360
#endif
361
free(ptr, M_BPF);
362
}
363
364
/*
365
* Wrapper functions for various buffering methods. If the set of buffer
366
* modes expands, we will probably want to introduce a switch data structure
367
* similar to protosw, et.
368
*/
369
static void
370
bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
371
u_int len)
372
{
373
374
BPFD_LOCK_ASSERT(d);
375
376
switch (d->bd_bufmode) {
377
case BPF_BUFMODE_BUFFER:
378
return (bpf_buffer_append_bytes(d, buf, offset, src, len));
379
380
case BPF_BUFMODE_ZBUF:
381
counter_u64_add(d->bd_zcopy, 1);
382
return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
383
384
default:
385
panic("bpf_buf_append_bytes");
386
}
387
}
388
389
static void
390
bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
391
u_int len)
392
{
393
394
BPFD_LOCK_ASSERT(d);
395
396
switch (d->bd_bufmode) {
397
case BPF_BUFMODE_BUFFER:
398
return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
399
400
case BPF_BUFMODE_ZBUF:
401
counter_u64_add(d->bd_zcopy, 1);
402
return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
403
404
default:
405
panic("bpf_buf_append_mbuf");
406
}
407
}
408
409
/*
410
* This function gets called when the free buffer is re-assigned.
411
*/
412
static void
413
bpf_buf_reclaimed(struct bpf_d *d)
414
{
415
416
BPFD_LOCK_ASSERT(d);
417
418
switch (d->bd_bufmode) {
419
case BPF_BUFMODE_BUFFER:
420
return;
421
422
case BPF_BUFMODE_ZBUF:
423
bpf_zerocopy_buf_reclaimed(d);
424
return;
425
426
default:
427
panic("bpf_buf_reclaimed");
428
}
429
}
430
431
/*
432
* If the buffer mechanism has a way to decide that a held buffer can be made
433
* free, then it is exposed via the bpf_canfreebuf() interface. (1) is
434
* returned if the buffer can be discarded, (0) is returned if it cannot.
435
*/
436
static int
437
bpf_canfreebuf(struct bpf_d *d)
438
{
439
440
BPFD_LOCK_ASSERT(d);
441
442
switch (d->bd_bufmode) {
443
case BPF_BUFMODE_ZBUF:
444
return (bpf_zerocopy_canfreebuf(d));
445
}
446
return (0);
447
}
448
449
/*
450
* Allow the buffer model to indicate that the current store buffer is
451
* immutable, regardless of the appearance of space. Return (1) if the
452
* buffer is writable, and (0) if not.
453
*/
454
static int
455
bpf_canwritebuf(struct bpf_d *d)
456
{
457
BPFD_LOCK_ASSERT(d);
458
459
switch (d->bd_bufmode) {
460
case BPF_BUFMODE_ZBUF:
461
return (bpf_zerocopy_canwritebuf(d));
462
}
463
return (1);
464
}
465
466
/*
467
* Notify buffer model that an attempt to write to the store buffer has
468
* resulted in a dropped packet, in which case the buffer may be considered
469
* full.
470
*/
471
static void
472
bpf_buffull(struct bpf_d *d)
473
{
474
475
BPFD_LOCK_ASSERT(d);
476
477
switch (d->bd_bufmode) {
478
case BPF_BUFMODE_ZBUF:
479
bpf_zerocopy_buffull(d);
480
break;
481
}
482
}
483
484
/*
485
* Notify the buffer model that a buffer has moved into the hold position.
486
*/
487
void
488
bpf_bufheld(struct bpf_d *d)
489
{
490
491
BPFD_LOCK_ASSERT(d);
492
493
switch (d->bd_bufmode) {
494
case BPF_BUFMODE_ZBUF:
495
bpf_zerocopy_bufheld(d);
496
break;
497
}
498
}
499
500
static void
501
bpf_free(struct bpf_d *d)
502
{
503
504
switch (d->bd_bufmode) {
505
case BPF_BUFMODE_BUFFER:
506
return (bpf_buffer_free(d));
507
508
case BPF_BUFMODE_ZBUF:
509
return (bpf_zerocopy_free(d));
510
511
default:
512
panic("bpf_buf_free");
513
}
514
}
515
516
static int
517
bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
518
{
519
520
if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
521
return (EOPNOTSUPP);
522
return (bpf_buffer_uiomove(d, buf, len, uio));
523
}
524
525
static int
526
bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
527
{
528
529
if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
530
return (EOPNOTSUPP);
531
return (bpf_buffer_ioctl_sblen(d, i));
532
}
533
534
static int
535
bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
536
{
537
538
if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
539
return (EOPNOTSUPP);
540
return (bpf_zerocopy_ioctl_getzmax(td, d, i));
541
}
542
543
static int
544
bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
545
{
546
547
if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
548
return (EOPNOTSUPP);
549
return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
550
}
551
552
static int
553
bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
554
{
555
556
if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
557
return (EOPNOTSUPP);
558
return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
559
}
560
561
/*
562
* General BPF functions.
563
*/
564
static int
565
bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
566
struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
567
{
568
const struct ieee80211_bpf_params *p;
569
struct ether_header *eh;
570
struct mbuf *m;
571
int error;
572
int len;
573
int hlen;
574
int slen;
575
576
/*
577
* Build a sockaddr based on the data link layer type.
578
* We do this at this level because the ethernet header
579
* is copied directly into the data field of the sockaddr.
580
* In the case of SLIP, there is no header and the packet
581
* is forwarded as is.
582
* Also, we are careful to leave room at the front of the mbuf
583
* for the link level header.
584
*/
585
switch (linktype) {
586
case DLT_SLIP:
587
sockp->sa_family = AF_INET;
588
hlen = 0;
589
break;
590
591
case DLT_EN10MB:
592
sockp->sa_family = AF_UNSPEC;
593
/* XXX Would MAXLINKHDR be better? */
594
hlen = ETHER_HDR_LEN;
595
break;
596
597
case DLT_FDDI:
598
sockp->sa_family = AF_IMPLINK;
599
hlen = 0;
600
break;
601
602
case DLT_RAW:
603
sockp->sa_family = AF_UNSPEC;
604
hlen = 0;
605
break;
606
607
case DLT_NULL:
608
/*
609
* null interface types require a 4 byte pseudo header which
610
* corresponds to the address family of the packet.
611
*/
612
sockp->sa_family = AF_UNSPEC;
613
hlen = 4;
614
break;
615
616
case DLT_ATM_RFC1483:
617
/*
618
* en atm driver requires 4-byte atm pseudo header.
619
* though it isn't standard, vpi:vci needs to be
620
* specified anyway.
621
*/
622
sockp->sa_family = AF_UNSPEC;
623
hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
624
break;
625
626
case DLT_PPP:
627
sockp->sa_family = AF_UNSPEC;
628
hlen = 4; /* This should match PPP_HDRLEN */
629
break;
630
631
case DLT_IEEE802_11: /* IEEE 802.11 wireless */
632
sockp->sa_family = AF_IEEE80211;
633
hlen = 0;
634
break;
635
636
case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */
637
sockp->sa_family = AF_IEEE80211;
638
sockp->sa_len = 12; /* XXX != 0 */
639
hlen = sizeof(struct ieee80211_bpf_params);
640
break;
641
642
default:
643
return (EIO);
644
}
645
646
len = uio->uio_resid;
647
if (len < hlen || len - hlen > ifp->if_mtu)
648
return (EMSGSIZE);
649
650
/* Allocate a mbuf, up to MJUM16BYTES bytes, for our write. */
651
m = m_get3(len, M_WAITOK, MT_DATA, M_PKTHDR);
652
if (m == NULL)
653
return (EIO);
654
m->m_pkthdr.len = m->m_len = len;
655
*mp = m;
656
657
error = uiomove(mtod(m, u_char *), len, uio);
658
if (error)
659
goto bad;
660
661
slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
662
if (slen == 0) {
663
error = EPERM;
664
goto bad;
665
}
666
667
/* Check for multicast destination */
668
switch (linktype) {
669
case DLT_EN10MB:
670
eh = mtod(m, struct ether_header *);
671
if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
672
if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
673
ETHER_ADDR_LEN) == 0)
674
m->m_flags |= M_BCAST;
675
else
676
m->m_flags |= M_MCAST;
677
}
678
if (d->bd_hdrcmplt == 0) {
679
memcpy(eh->ether_shost, IF_LLADDR(ifp),
680
sizeof(eh->ether_shost));
681
}
682
break;
683
}
684
685
/*
686
* Make room for link header, and copy it to sockaddr
687
*/
688
if (hlen != 0) {
689
if (sockp->sa_family == AF_IEEE80211) {
690
/*
691
* Collect true length from the parameter header
692
* NB: sockp is known to be zero'd so if we do a
693
* short copy unspecified parameters will be
694
* zero.
695
* NB: packet may not be aligned after stripping
696
* bpf params
697
* XXX check ibp_vers
698
*/
699
p = mtod(m, const struct ieee80211_bpf_params *);
700
hlen = p->ibp_len;
701
if (hlen > sizeof(sockp->sa_data)) {
702
error = EINVAL;
703
goto bad;
704
}
705
}
706
bcopy(mtod(m, const void *), sockp->sa_data, hlen);
707
}
708
*hdrlen = hlen;
709
710
return (0);
711
bad:
712
m_freem(m);
713
return (error);
714
}
715
716
/*
717
* Attach descriptor to the bpf interface, i.e. make d listen on bp,
718
* then reset its buffers and counters with reset_d().
719
*/
720
static void
721
bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
722
{
723
int op_w;
724
725
BPF_LOCK_ASSERT();
726
727
/*
728
* Save sysctl value to protect from sysctl change
729
* between reads
730
*/
731
op_w = V_bpf_optimize_writers || d->bd_writer;
732
733
if (d->bd_bif != NULL)
734
bpf_detachd_locked(d, false);
735
/*
736
* Point d at bp, and add d to the interface's list.
737
* Since there are many applications using BPF for
738
* sending raw packets only (dhcpd, cdpd are good examples)
739
* we can delay adding d to the list of active listeners until
740
* some filter is configured.
741
*/
742
743
BPFD_LOCK(d);
744
/*
745
* Hold reference to bpif while descriptor uses this interface.
746
*/
747
bpfif_ref(bp);
748
d->bd_bif = bp;
749
if (op_w != 0) {
750
/* Add to writers-only list */
751
CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
752
/*
753
* We decrement bd_writer on every filter set operation.
754
* First BIOCSETF is done by pcap_open_live() to set up
755
* snap length. After that appliation usually sets its own
756
* filter.
757
*/
758
d->bd_writer = 2;
759
} else
760
CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
761
762
reset_d(d);
763
764
/* Trigger EVFILT_WRITE events. */
765
bpf_wakeup(d);
766
767
BPFD_UNLOCK(d);
768
bpf_bpfd_cnt++;
769
770
CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
771
__func__, d->bd_pid, d->bd_writer ? "writer" : "active");
772
773
if (op_w == 0)
774
EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
775
}
776
777
/*
778
* Check if we need to upgrade our descriptor @d from write-only mode.
779
*/
780
static int
781
bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode,
782
int flen)
783
{
784
int is_snap, need_upgrade;
785
786
/*
787
* Check if we've already upgraded or new filter is empty.
788
*/
789
if (d->bd_writer == 0 || fcode == NULL)
790
return (0);
791
792
need_upgrade = 0;
793
794
/*
795
* Check if cmd looks like snaplen setting from
796
* pcap_bpf.c:pcap_open_live().
797
* Note we're not checking .k value here:
798
* while pcap_open_live() definitely sets to non-zero value,
799
* we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
800
* do not consider upgrading immediately
801
*/
802
if (cmd == BIOCSETF && flen == 1 &&
803
fcode[0].code == (BPF_RET | BPF_K))
804
is_snap = 1;
805
else
806
is_snap = 0;
807
808
if (is_snap == 0) {
809
/*
810
* We're setting first filter and it doesn't look like
811
* setting snaplen. We're probably using bpf directly.
812
* Upgrade immediately.
813
*/
814
need_upgrade = 1;
815
} else {
816
/*
817
* Do not require upgrade by first BIOCSETF
818
* (used to set snaplen) by pcap_open_live().
819
*/
820
821
if (--d->bd_writer == 0) {
822
/*
823
* First snaplen filter has already
824
* been set. This is probably catch-all
825
* filter
826
*/
827
need_upgrade = 1;
828
}
829
}
830
831
CTR5(KTR_NET,
832
"%s: filter function set by pid %d, "
833
"bd_writer counter %d, snap %d upgrade %d",
834
__func__, d->bd_pid, d->bd_writer,
835
is_snap, need_upgrade);
836
837
return (need_upgrade);
838
}
839
840
/*
841
* Detach a file from its interface.
842
*/
843
static void
844
bpf_detachd(struct bpf_d *d)
845
{
846
BPF_LOCK();
847
bpf_detachd_locked(d, false);
848
BPF_UNLOCK();
849
}
850
851
static void
852
bpf_detachd_locked(struct bpf_d *d, bool detached_ifp)
853
{
854
struct bpf_if *bp;
855
struct ifnet *ifp;
856
int error;
857
858
BPF_LOCK_ASSERT();
859
CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
860
861
/* Check if descriptor is attached */
862
if ((bp = d->bd_bif) == NULL)
863
return;
864
865
BPFD_LOCK(d);
866
/* Remove d from the interface's descriptor list. */
867
CK_LIST_REMOVE(d, bd_next);
868
/* Save bd_writer value */
869
error = d->bd_writer;
870
ifp = bp->bif_ifp;
871
d->bd_bif = NULL;
872
if (detached_ifp) {
873
/*
874
* Notify descriptor as it's detached, so that any
875
* sleepers wake up and get ENXIO.
876
*/
877
bpf_wakeup(d);
878
}
879
BPFD_UNLOCK(d);
880
bpf_bpfd_cnt--;
881
882
/* Call event handler iff d is attached */
883
if (error == 0)
884
EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
885
886
/*
887
* Check if this descriptor had requested promiscuous mode.
888
* If so and ifnet is not detached, turn it off.
889
*/
890
if (d->bd_promisc && !detached_ifp) {
891
d->bd_promisc = 0;
892
CURVNET_SET(ifp->if_vnet);
893
error = ifpromisc(ifp, 0);
894
CURVNET_RESTORE();
895
if (error != 0 && error != ENXIO) {
896
/*
897
* ENXIO can happen if a pccard is unplugged
898
* Something is really wrong if we were able to put
899
* the driver into promiscuous mode, but can't
900
* take it out.
901
*/
902
if_printf(bp->bif_ifp,
903
"bpf_detach: ifpromisc failed (%d)\n", error);
904
}
905
}
906
bpfif_rele(bp);
907
}
908
909
/*
910
* Close the descriptor by detaching it from its interface,
911
* deallocating its buffers, and marking it free.
912
*/
913
static void
914
bpf_dtor(void *data)
915
{
916
struct bpf_d *d = data;
917
918
BPFD_LOCK(d);
919
if (d->bd_state == BPF_WAITING)
920
callout_stop(&d->bd_callout);
921
d->bd_state = BPF_IDLE;
922
BPFD_UNLOCK(d);
923
funsetown(&d->bd_sigio);
924
bpf_detachd(d);
925
#ifdef MAC
926
mac_bpfdesc_destroy(d);
927
#endif /* MAC */
928
seldrain(&d->bd_sel);
929
knlist_destroy(&d->bd_sel.si_note);
930
callout_drain(&d->bd_callout);
931
bpfd_rele(d);
932
}
933
934
/*
935
* Open ethernet device. Returns ENXIO for illegal minor device number,
936
* EBUSY if file is open by another process.
937
*/
938
/* ARGSUSED */
939
static int
940
bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
941
{
942
struct bpf_d *d;
943
int error;
944
945
d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
946
error = devfs_set_cdevpriv(d, bpf_dtor);
947
if (error != 0) {
948
free(d, M_BPF);
949
return (error);
950
}
951
952
/* Setup counters */
953
d->bd_rcount = counter_u64_alloc(M_WAITOK);
954
d->bd_dcount = counter_u64_alloc(M_WAITOK);
955
d->bd_fcount = counter_u64_alloc(M_WAITOK);
956
d->bd_wcount = counter_u64_alloc(M_WAITOK);
957
d->bd_wfcount = counter_u64_alloc(M_WAITOK);
958
d->bd_wdcount = counter_u64_alloc(M_WAITOK);
959
d->bd_zcopy = counter_u64_alloc(M_WAITOK);
960
961
/*
962
* For historical reasons, perform a one-time initialization call to
963
* the buffer routines, even though we're not yet committed to a
964
* particular buffer method.
965
*/
966
bpf_buffer_init(d);
967
if ((flags & FREAD) == 0)
968
d->bd_writer = 2;
969
d->bd_hbuf_in_use = 0;
970
d->bd_bufmode = BPF_BUFMODE_BUFFER;
971
d->bd_sig = SIGIO;
972
d->bd_direction = BPF_D_INOUT;
973
refcount_init(&d->bd_refcnt, 1);
974
BPF_PID_REFRESH(d, td);
975
#ifdef MAC
976
mac_bpfdesc_init(d);
977
mac_bpfdesc_create(td->td_ucred, d);
978
#endif
979
mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
980
callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
981
knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
982
983
/* Disable VLAN pcp tagging. */
984
d->bd_pcp = 0;
985
986
return (0);
987
}
988
989
/*
990
* bpfread - read next chunk of packets from buffers
991
*/
992
static int
993
bpfread(struct cdev *dev, struct uio *uio, int ioflag)
994
{
995
struct bpf_d *d;
996
int error;
997
int non_block;
998
int timed_out;
999
1000
error = devfs_get_cdevpriv((void **)&d);
1001
if (error != 0)
1002
return (error);
1003
1004
/*
1005
* Restrict application to use a buffer the same size as
1006
* as kernel buffers.
1007
*/
1008
if (uio->uio_resid != d->bd_bufsize)
1009
return (EINVAL);
1010
1011
non_block = ((ioflag & O_NONBLOCK) != 0);
1012
1013
BPFD_LOCK(d);
1014
BPF_PID_REFRESH_CUR(d);
1015
if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
1016
BPFD_UNLOCK(d);
1017
return (EOPNOTSUPP);
1018
}
1019
if (d->bd_state == BPF_WAITING)
1020
callout_stop(&d->bd_callout);
1021
timed_out = (d->bd_state == BPF_TIMED_OUT);
1022
d->bd_state = BPF_IDLE;
1023
while (d->bd_hbuf_in_use) {
1024
error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
1025
PRINET | PCATCH, "bd_hbuf", 0);
1026
if (error != 0) {
1027
BPFD_UNLOCK(d);
1028
return (error);
1029
}
1030
}
1031
/*
1032
* If the hold buffer is empty, then do a timed sleep, which
1033
* ends when the timeout expires or when enough packets
1034
* have arrived to fill the store buffer.
1035
*/
1036
while (d->bd_hbuf == NULL) {
1037
if (d->bd_slen != 0) {
1038
/*
1039
* A packet(s) either arrived since the previous
1040
* read or arrived while we were asleep.
1041
*/
1042
if (d->bd_immediate || non_block || timed_out) {
1043
/*
1044
* Rotate the buffers and return what's here
1045
* if we are in immediate mode, non-blocking
1046
* flag is set, or this descriptor timed out.
1047
*/
1048
ROTATE_BUFFERS(d);
1049
break;
1050
}
1051
}
1052
1053
/*
1054
* No data is available, check to see if the bpf device
1055
* is still pointed at a real interface. If not, return
1056
* ENXIO so that the userland process knows to rebind
1057
* it before using it again.
1058
*/
1059
if (d->bd_bif == NULL) {
1060
BPFD_UNLOCK(d);
1061
return (ENXIO);
1062
}
1063
1064
if (non_block) {
1065
BPFD_UNLOCK(d);
1066
return (EWOULDBLOCK);
1067
}
1068
error = msleep(d, &d->bd_lock, PRINET | PCATCH,
1069
"bpf", d->bd_rtout);
1070
if (error == EINTR || error == ERESTART) {
1071
BPFD_UNLOCK(d);
1072
return (error);
1073
}
1074
if (error == EWOULDBLOCK) {
1075
/*
1076
* On a timeout, return what's in the buffer,
1077
* which may be nothing. If there is something
1078
* in the store buffer, we can rotate the buffers.
1079
*/
1080
if (d->bd_hbuf)
1081
/*
1082
* We filled up the buffer in between
1083
* getting the timeout and arriving
1084
* here, so we don't need to rotate.
1085
*/
1086
break;
1087
1088
if (d->bd_slen == 0) {
1089
BPFD_UNLOCK(d);
1090
return (0);
1091
}
1092
ROTATE_BUFFERS(d);
1093
break;
1094
}
1095
}
1096
/*
1097
* At this point, we know we have something in the hold slot.
1098
*/
1099
d->bd_hbuf_in_use = 1;
1100
BPFD_UNLOCK(d);
1101
1102
/*
1103
* Move data from hold buffer into user space.
1104
* We know the entire buffer is transferred since
1105
* we checked above that the read buffer is bpf_bufsize bytes.
1106
*
1107
* We do not have to worry about simultaneous reads because
1108
* we waited for sole access to the hold buffer above.
1109
*/
1110
error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
1111
1112
BPFD_LOCK(d);
1113
if (d->bd_hbuf_in_use) {
1114
KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
1115
d->bd_fbuf = d->bd_hbuf;
1116
d->bd_hbuf = NULL;
1117
d->bd_hlen = 0;
1118
bpf_buf_reclaimed(d);
1119
d->bd_hbuf_in_use = 0;
1120
wakeup(&d->bd_hbuf_in_use);
1121
}
1122
BPFD_UNLOCK(d);
1123
1124
return (error);
1125
}
1126
1127
/*
1128
* If there are processes sleeping on this descriptor, wake them up.
1129
*/
1130
static __inline void
1131
bpf_wakeup(struct bpf_d *d)
1132
{
1133
1134
BPFD_LOCK_ASSERT(d);
1135
if (d->bd_state == BPF_WAITING) {
1136
callout_stop(&d->bd_callout);
1137
d->bd_state = BPF_IDLE;
1138
}
1139
wakeup(d);
1140
if (d->bd_async && d->bd_sig && d->bd_sigio)
1141
pgsigio(&d->bd_sigio, d->bd_sig, 0);
1142
1143
selwakeuppri(&d->bd_sel, PRINET);
1144
KNOTE_LOCKED(&d->bd_sel.si_note, 0);
1145
}
1146
1147
static void
1148
bpf_timed_out(void *arg)
1149
{
1150
struct bpf_d *d = (struct bpf_d *)arg;
1151
1152
BPFD_LOCK_ASSERT(d);
1153
1154
if (callout_pending(&d->bd_callout) ||
1155
!callout_active(&d->bd_callout))
1156
return;
1157
if (d->bd_state == BPF_WAITING) {
1158
d->bd_state = BPF_TIMED_OUT;
1159
if (d->bd_slen != 0)
1160
bpf_wakeup(d);
1161
}
1162
}
1163
1164
static int
1165
bpf_ready(struct bpf_d *d)
1166
{
1167
1168
BPFD_LOCK_ASSERT(d);
1169
1170
if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
1171
return (1);
1172
if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1173
d->bd_slen != 0)
1174
return (1);
1175
return (0);
1176
}
1177
1178
static int
1179
bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
1180
{
1181
struct route ro;
1182
struct sockaddr dst;
1183
struct epoch_tracker et;
1184
struct bpf_if *bp;
1185
struct bpf_d *d;
1186
struct ifnet *ifp;
1187
struct mbuf *m, *mc;
1188
int error, hlen;
1189
1190
error = devfs_get_cdevpriv((void **)&d);
1191
if (error != 0)
1192
return (error);
1193
1194
NET_EPOCH_ENTER(et);
1195
BPFD_LOCK(d);
1196
BPF_PID_REFRESH_CUR(d);
1197
counter_u64_add(d->bd_wcount, 1);
1198
if ((bp = d->bd_bif) == NULL) {
1199
error = ENXIO;
1200
goto out_locked;
1201
}
1202
1203
ifp = bp->bif_ifp;
1204
if ((ifp->if_flags & IFF_UP) == 0) {
1205
error = ENETDOWN;
1206
goto out_locked;
1207
}
1208
1209
if (uio->uio_resid == 0)
1210
goto out_locked;
1211
1212
bzero(&dst, sizeof(dst));
1213
m = NULL;
1214
hlen = 0;
1215
1216
/*
1217
* Take extra reference, unlock d and exit from epoch section,
1218
* since bpf_movein() can sleep.
1219
*/
1220
bpfd_ref(d);
1221
NET_EPOCH_EXIT(et);
1222
BPFD_UNLOCK(d);
1223
1224
error = bpf_movein(uio, (int)bp->bif_dlt, ifp,
1225
&m, &dst, &hlen, d);
1226
1227
if (error != 0) {
1228
counter_u64_add(d->bd_wdcount, 1);
1229
bpfd_rele(d);
1230
return (error);
1231
}
1232
1233
BPFD_LOCK(d);
1234
/*
1235
* Check that descriptor is still attached to the interface.
1236
* This can happen on bpfdetach(). To avoid access to detached
1237
* ifnet, free mbuf and return ENXIO.
1238
*/
1239
if (d->bd_bif == NULL) {
1240
counter_u64_add(d->bd_wdcount, 1);
1241
BPFD_UNLOCK(d);
1242
bpfd_rele(d);
1243
m_freem(m);
1244
return (ENXIO);
1245
}
1246
counter_u64_add(d->bd_wfcount, 1);
1247
if (d->bd_hdrcmplt)
1248
dst.sa_family = pseudo_AF_HDRCMPLT;
1249
1250
if (d->bd_feedback) {
1251
mc = m_dup(m, M_NOWAIT);
1252
if (mc != NULL)
1253
mc->m_pkthdr.rcvif = ifp;
1254
/* Set M_PROMISC for outgoing packets to be discarded. */
1255
if (d->bd_direction == BPF_D_INOUT)
1256
m->m_flags |= M_PROMISC;
1257
} else
1258
mc = NULL;
1259
1260
m->m_pkthdr.len -= hlen;
1261
m->m_len -= hlen;
1262
m->m_data += hlen; /* XXX */
1263
1264
CURVNET_SET(ifp->if_vnet);
1265
#ifdef MAC
1266
mac_bpfdesc_create_mbuf(d, m);
1267
if (mc != NULL)
1268
mac_bpfdesc_create_mbuf(d, mc);
1269
#endif
1270
1271
bzero(&ro, sizeof(ro));
1272
if (hlen != 0) {
1273
ro.ro_prepend = (u_char *)&dst.sa_data;
1274
ro.ro_plen = hlen;
1275
ro.ro_flags = RT_HAS_HEADER;
1276
}
1277
1278
if (d->bd_pcp != 0)
1279
vlan_set_pcp(m, d->bd_pcp);
1280
1281
/* Avoid possible recursion on BPFD_LOCK(). */
1282
NET_EPOCH_ENTER(et);
1283
BPFD_UNLOCK(d);
1284
error = (*ifp->if_output)(ifp, m, &dst, &ro);
1285
if (error)
1286
counter_u64_add(d->bd_wdcount, 1);
1287
1288
if (mc != NULL) {
1289
if (error == 0)
1290
(*ifp->if_input)(ifp, mc);
1291
else
1292
m_freem(mc);
1293
}
1294
NET_EPOCH_EXIT(et);
1295
CURVNET_RESTORE();
1296
bpfd_rele(d);
1297
return (error);
1298
1299
out_locked:
1300
counter_u64_add(d->bd_wdcount, 1);
1301
NET_EPOCH_EXIT(et);
1302
BPFD_UNLOCK(d);
1303
return (error);
1304
}
1305
1306
/*
1307
* Reset a descriptor by flushing its packet buffer and clearing the receive
1308
* and drop counts. This is doable for kernel-only buffers, but with
1309
* zero-copy buffers, we can't write to (or rotate) buffers that are
1310
* currently owned by userspace. It would be nice if we could encapsulate
1311
* this logic in the buffer code rather than here.
1312
*/
1313
static void
1314
reset_d(struct bpf_d *d)
1315
{
1316
1317
BPFD_LOCK_ASSERT(d);
1318
1319
while (d->bd_hbuf_in_use)
1320
mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
1321
"bd_hbuf", 0);
1322
if ((d->bd_hbuf != NULL) &&
1323
(d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
1324
/* Free the hold buffer. */
1325
d->bd_fbuf = d->bd_hbuf;
1326
d->bd_hbuf = NULL;
1327
d->bd_hlen = 0;
1328
bpf_buf_reclaimed(d);
1329
}
1330
if (bpf_canwritebuf(d))
1331
d->bd_slen = 0;
1332
counter_u64_zero(d->bd_rcount);
1333
counter_u64_zero(d->bd_dcount);
1334
counter_u64_zero(d->bd_fcount);
1335
counter_u64_zero(d->bd_wcount);
1336
counter_u64_zero(d->bd_wfcount);
1337
counter_u64_zero(d->bd_wdcount);
1338
counter_u64_zero(d->bd_zcopy);
1339
}
1340
1341
/*
1342
* FIONREAD Check for read packet available.
1343
* BIOCGBLEN Get buffer len [for read()].
1344
* BIOCSETF Set read filter.
1345
* BIOCSETFNR Set read filter without resetting descriptor.
1346
* BIOCSETWF Set write filter.
1347
* BIOCFLUSH Flush read packet buffer.
1348
* BIOCPROMISC Put interface into promiscuous mode.
1349
* BIOCGDLT Get link layer type.
1350
* BIOCGETIF Get interface name.
1351
* BIOCSETIF Set interface.
1352
* BIOCSRTIMEOUT Set read timeout.
1353
* BIOCGRTIMEOUT Get read timeout.
1354
* BIOCGSTATS Get packet stats.
1355
* BIOCIMMEDIATE Set immediate mode.
1356
* BIOCVERSION Get filter language version.
1357
* BIOCGHDRCMPLT Get "header already complete" flag
1358
* BIOCSHDRCMPLT Set "header already complete" flag
1359
* BIOCGDIRECTION Get packet direction flag
1360
* BIOCSDIRECTION Set packet direction flag
1361
* BIOCGTSTAMP Get time stamp format and resolution.
1362
* BIOCSTSTAMP Set time stamp format and resolution.
1363
* BIOCLOCK Set "locked" flag
1364
* BIOCFEEDBACK Set packet feedback mode.
1365
* BIOCSETZBUF Set current zero-copy buffer locations.
1366
* BIOCGETZMAX Get maximum zero-copy buffer size.
1367
* BIOCROTZBUF Force rotation of zero-copy buffer
1368
* BIOCSETBUFMODE Set buffer mode.
1369
* BIOCGETBUFMODE Get current buffer mode.
1370
* BIOCSETVLANPCP Set VLAN PCP tag.
1371
*/
1372
/* ARGSUSED */
1373
static int
1374
bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
1375
struct thread *td)
1376
{
1377
struct bpf_d *d;
1378
int error;
1379
1380
error = devfs_get_cdevpriv((void **)&d);
1381
if (error != 0)
1382
return (error);
1383
1384
/*
1385
* Refresh PID associated with this descriptor.
1386
*/
1387
BPFD_LOCK(d);
1388
BPF_PID_REFRESH(d, td);
1389
if (d->bd_state == BPF_WAITING)
1390
callout_stop(&d->bd_callout);
1391
d->bd_state = BPF_IDLE;
1392
BPFD_UNLOCK(d);
1393
1394
if (d->bd_locked == 1) {
1395
switch (cmd) {
1396
case BIOCGBLEN:
1397
case BIOCFLUSH:
1398
case BIOCGDLT:
1399
case BIOCGDLTLIST:
1400
#ifdef COMPAT_FREEBSD32
1401
case BIOCGDLTLIST32:
1402
#endif
1403
case BIOCGETIF:
1404
case BIOCGRTIMEOUT:
1405
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1406
case BIOCGRTIMEOUT32:
1407
#endif
1408
case BIOCGSTATS:
1409
case BIOCVERSION:
1410
case BIOCGRSIG:
1411
case BIOCGHDRCMPLT:
1412
case BIOCSTSTAMP:
1413
case BIOCFEEDBACK:
1414
case FIONREAD:
1415
case BIOCLOCK:
1416
case BIOCSRTIMEOUT:
1417
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1418
case BIOCSRTIMEOUT32:
1419
#endif
1420
case BIOCIMMEDIATE:
1421
case TIOCGPGRP:
1422
case BIOCROTZBUF:
1423
break;
1424
default:
1425
return (EPERM);
1426
}
1427
}
1428
#ifdef COMPAT_FREEBSD32
1429
/*
1430
* If we see a 32-bit compat ioctl, mark the stream as 32-bit so
1431
* that it will get 32-bit packet headers.
1432
*/
1433
switch (cmd) {
1434
case BIOCSETF32:
1435
case BIOCSETFNR32:
1436
case BIOCSETWF32:
1437
case BIOCGDLTLIST32:
1438
case BIOCGRTIMEOUT32:
1439
case BIOCSRTIMEOUT32:
1440
if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
1441
BPFD_LOCK(d);
1442
d->bd_compat32 = 1;
1443
BPFD_UNLOCK(d);
1444
}
1445
}
1446
#endif
1447
1448
CURVNET_SET(TD_TO_VNET(td));
1449
switch (cmd) {
1450
default:
1451
error = EINVAL;
1452
break;
1453
1454
/*
1455
* Check for read packet available.
1456
*/
1457
case FIONREAD:
1458
{
1459
int n;
1460
1461
BPFD_LOCK(d);
1462
n = d->bd_slen;
1463
while (d->bd_hbuf_in_use)
1464
mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
1465
PRINET, "bd_hbuf", 0);
1466
if (d->bd_hbuf)
1467
n += d->bd_hlen;
1468
BPFD_UNLOCK(d);
1469
1470
*(int *)addr = n;
1471
break;
1472
}
1473
1474
/*
1475
* Get buffer len [for read()].
1476
*/
1477
case BIOCGBLEN:
1478
BPFD_LOCK(d);
1479
*(u_int *)addr = d->bd_bufsize;
1480
BPFD_UNLOCK(d);
1481
break;
1482
1483
/*
1484
* Set buffer length.
1485
*/
1486
case BIOCSBLEN:
1487
error = bpf_ioctl_sblen(d, (u_int *)addr);
1488
break;
1489
1490
/*
1491
* Set link layer read filter.
1492
*/
1493
case BIOCSETF:
1494
case BIOCSETFNR:
1495
case BIOCSETWF:
1496
#ifdef COMPAT_FREEBSD32
1497
case BIOCSETF32:
1498
case BIOCSETFNR32:
1499
case BIOCSETWF32:
1500
#endif
1501
error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1502
break;
1503
1504
/*
1505
* Flush read packet buffer.
1506
*/
1507
case BIOCFLUSH:
1508
BPFD_LOCK(d);
1509
reset_d(d);
1510
BPFD_UNLOCK(d);
1511
break;
1512
1513
/*
1514
* Put interface into promiscuous mode.
1515
*/
1516
case BIOCPROMISC:
1517
BPF_LOCK();
1518
if (d->bd_bif == NULL) {
1519
/*
1520
* No interface attached yet.
1521
*/
1522
error = EINVAL;
1523
} else if (d->bd_promisc == 0) {
1524
error = ifpromisc(d->bd_bif->bif_ifp, 1);
1525
if (error == 0)
1526
d->bd_promisc = 1;
1527
}
1528
BPF_UNLOCK();
1529
break;
1530
1531
/*
1532
* Get current data link type.
1533
*/
1534
case BIOCGDLT:
1535
BPF_LOCK();
1536
if (d->bd_bif == NULL)
1537
error = EINVAL;
1538
else
1539
*(u_int *)addr = d->bd_bif->bif_dlt;
1540
BPF_UNLOCK();
1541
break;
1542
1543
/*
1544
* Get a list of supported data link types.
1545
*/
1546
#ifdef COMPAT_FREEBSD32
1547
case BIOCGDLTLIST32:
1548
{
1549
struct bpf_dltlist32 *list32;
1550
struct bpf_dltlist dltlist;
1551
1552
list32 = (struct bpf_dltlist32 *)addr;
1553
dltlist.bfl_len = list32->bfl_len;
1554
dltlist.bfl_list = PTRIN(list32->bfl_list);
1555
BPF_LOCK();
1556
if (d->bd_bif == NULL)
1557
error = EINVAL;
1558
else {
1559
error = bpf_getdltlist(d, &dltlist);
1560
if (error == 0)
1561
list32->bfl_len = dltlist.bfl_len;
1562
}
1563
BPF_UNLOCK();
1564
break;
1565
}
1566
#endif
1567
1568
case BIOCGDLTLIST:
1569
BPF_LOCK();
1570
if (d->bd_bif == NULL)
1571
error = EINVAL;
1572
else
1573
error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1574
BPF_UNLOCK();
1575
break;
1576
1577
/*
1578
* Set data link type.
1579
*/
1580
case BIOCSDLT:
1581
BPF_LOCK();
1582
if (d->bd_bif == NULL)
1583
error = EINVAL;
1584
else
1585
error = bpf_setdlt(d, *(u_int *)addr);
1586
BPF_UNLOCK();
1587
break;
1588
1589
/*
1590
* Get interface name.
1591
*/
1592
case BIOCGETIF:
1593
BPF_LOCK();
1594
if (d->bd_bif == NULL)
1595
error = EINVAL;
1596
else {
1597
struct ifnet *const ifp = d->bd_bif->bif_ifp;
1598
struct ifreq *const ifr = (struct ifreq *)addr;
1599
1600
strlcpy(ifr->ifr_name, ifp->if_xname,
1601
sizeof(ifr->ifr_name));
1602
}
1603
BPF_UNLOCK();
1604
break;
1605
1606
/*
1607
* Set interface.
1608
*/
1609
case BIOCSETIF:
1610
{
1611
int alloc_buf, size;
1612
1613
/*
1614
* Behavior here depends on the buffering model. If
1615
* we're using kernel memory buffers, then we can
1616
* allocate them here. If we're using zero-copy,
1617
* then the user process must have registered buffers
1618
* by the time we get here.
1619
*/
1620
alloc_buf = 0;
1621
BPFD_LOCK(d);
1622
if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
1623
d->bd_sbuf == NULL)
1624
alloc_buf = 1;
1625
BPFD_UNLOCK(d);
1626
if (alloc_buf) {
1627
size = d->bd_bufsize;
1628
error = bpf_buffer_ioctl_sblen(d, &size);
1629
if (error != 0)
1630
break;
1631
}
1632
BPF_LOCK();
1633
error = bpf_setif(d, (struct ifreq *)addr);
1634
BPF_UNLOCK();
1635
break;
1636
}
1637
1638
/*
1639
* Set read timeout.
1640
*/
1641
case BIOCSRTIMEOUT:
1642
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1643
case BIOCSRTIMEOUT32:
1644
#endif
1645
{
1646
struct timeval *tv = (struct timeval *)addr;
1647
#if defined(COMPAT_FREEBSD32)
1648
struct timeval32 *tv32;
1649
struct timeval tv64;
1650
1651
if (cmd == BIOCSRTIMEOUT32) {
1652
tv32 = (struct timeval32 *)addr;
1653
tv = &tv64;
1654
tv->tv_sec = tv32->tv_sec;
1655
tv->tv_usec = tv32->tv_usec;
1656
} else
1657
#endif
1658
tv = (struct timeval *)addr;
1659
1660
/*
1661
* Subtract 1 tick from tvtohz() since this isn't
1662
* a one-shot timer.
1663
*/
1664
if ((error = itimerfix(tv)) == 0)
1665
d->bd_rtout = tvtohz(tv) - 1;
1666
break;
1667
}
1668
1669
/*
1670
* Get read timeout.
1671
*/
1672
case BIOCGRTIMEOUT:
1673
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1674
case BIOCGRTIMEOUT32:
1675
#endif
1676
{
1677
struct timeval *tv;
1678
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1679
struct timeval32 *tv32;
1680
struct timeval tv64;
1681
1682
if (cmd == BIOCGRTIMEOUT32)
1683
tv = &tv64;
1684
else
1685
#endif
1686
tv = (struct timeval *)addr;
1687
1688
tv->tv_sec = d->bd_rtout / hz;
1689
tv->tv_usec = (d->bd_rtout % hz) * tick;
1690
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1691
if (cmd == BIOCGRTIMEOUT32) {
1692
tv32 = (struct timeval32 *)addr;
1693
tv32->tv_sec = tv->tv_sec;
1694
tv32->tv_usec = tv->tv_usec;
1695
}
1696
#endif
1697
1698
break;
1699
}
1700
1701
/*
1702
* Get packet stats.
1703
*/
1704
case BIOCGSTATS:
1705
{
1706
struct bpf_stat *bs = (struct bpf_stat *)addr;
1707
1708
/* XXXCSJP overflow */
1709
bs->bs_recv = (u_int)counter_u64_fetch(d->bd_rcount);
1710
bs->bs_drop = (u_int)counter_u64_fetch(d->bd_dcount);
1711
break;
1712
}
1713
1714
/*
1715
* Set immediate mode.
1716
*/
1717
case BIOCIMMEDIATE:
1718
BPFD_LOCK(d);
1719
d->bd_immediate = *(u_int *)addr;
1720
BPFD_UNLOCK(d);
1721
break;
1722
1723
case BIOCVERSION:
1724
{
1725
struct bpf_version *bv = (struct bpf_version *)addr;
1726
1727
bv->bv_major = BPF_MAJOR_VERSION;
1728
bv->bv_minor = BPF_MINOR_VERSION;
1729
break;
1730
}
1731
1732
/*
1733
* Get "header already complete" flag
1734
*/
1735
case BIOCGHDRCMPLT:
1736
BPFD_LOCK(d);
1737
*(u_int *)addr = d->bd_hdrcmplt;
1738
BPFD_UNLOCK(d);
1739
break;
1740
1741
/*
1742
* Set "header already complete" flag
1743
*/
1744
case BIOCSHDRCMPLT:
1745
BPFD_LOCK(d);
1746
d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1747
BPFD_UNLOCK(d);
1748
break;
1749
1750
/*
1751
* Get packet direction flag
1752
*/
1753
case BIOCGDIRECTION:
1754
BPFD_LOCK(d);
1755
*(u_int *)addr = d->bd_direction;
1756
BPFD_UNLOCK(d);
1757
break;
1758
1759
/*
1760
* Set packet direction flag
1761
*/
1762
case BIOCSDIRECTION:
1763
{
1764
u_int direction;
1765
1766
direction = *(u_int *)addr;
1767
switch (direction) {
1768
case BPF_D_IN:
1769
case BPF_D_INOUT:
1770
case BPF_D_OUT:
1771
BPFD_LOCK(d);
1772
d->bd_direction = direction;
1773
BPFD_UNLOCK(d);
1774
break;
1775
default:
1776
error = EINVAL;
1777
}
1778
}
1779
break;
1780
1781
/*
1782
* Get packet timestamp format and resolution.
1783
*/
1784
case BIOCGTSTAMP:
1785
BPFD_LOCK(d);
1786
*(u_int *)addr = d->bd_tstamp;
1787
BPFD_UNLOCK(d);
1788
break;
1789
1790
/*
1791
* Set packet timestamp format and resolution.
1792
*/
1793
case BIOCSTSTAMP:
1794
{
1795
u_int func;
1796
1797
func = *(u_int *)addr;
1798
if (BPF_T_VALID(func))
1799
d->bd_tstamp = func;
1800
else
1801
error = EINVAL;
1802
}
1803
break;
1804
1805
case BIOCFEEDBACK:
1806
BPFD_LOCK(d);
1807
d->bd_feedback = *(u_int *)addr;
1808
BPFD_UNLOCK(d);
1809
break;
1810
1811
case BIOCLOCK:
1812
BPFD_LOCK(d);
1813
d->bd_locked = 1;
1814
BPFD_UNLOCK(d);
1815
break;
1816
1817
case FIONBIO: /* Non-blocking I/O */
1818
break;
1819
1820
case FIOASYNC: /* Send signal on receive packets */
1821
BPFD_LOCK(d);
1822
d->bd_async = *(int *)addr;
1823
BPFD_UNLOCK(d);
1824
break;
1825
1826
case FIOSETOWN:
1827
/*
1828
* XXX: Add some sort of locking here?
1829
* fsetown() can sleep.
1830
*/
1831
error = fsetown(*(int *)addr, &d->bd_sigio);
1832
break;
1833
1834
case FIOGETOWN:
1835
BPFD_LOCK(d);
1836
*(int *)addr = fgetown(&d->bd_sigio);
1837
BPFD_UNLOCK(d);
1838
break;
1839
1840
/* This is deprecated, FIOSETOWN should be used instead. */
1841
case TIOCSPGRP:
1842
error = fsetown(-(*(int *)addr), &d->bd_sigio);
1843
break;
1844
1845
/* This is deprecated, FIOGETOWN should be used instead. */
1846
case TIOCGPGRP:
1847
*(int *)addr = -fgetown(&d->bd_sigio);
1848
break;
1849
1850
case BIOCSRSIG: /* Set receive signal */
1851
{
1852
u_int sig;
1853
1854
sig = *(u_int *)addr;
1855
1856
if (sig >= NSIG)
1857
error = EINVAL;
1858
else {
1859
BPFD_LOCK(d);
1860
d->bd_sig = sig;
1861
BPFD_UNLOCK(d);
1862
}
1863
break;
1864
}
1865
case BIOCGRSIG:
1866
BPFD_LOCK(d);
1867
*(u_int *)addr = d->bd_sig;
1868
BPFD_UNLOCK(d);
1869
break;
1870
1871
case BIOCGETBUFMODE:
1872
BPFD_LOCK(d);
1873
*(u_int *)addr = d->bd_bufmode;
1874
BPFD_UNLOCK(d);
1875
break;
1876
1877
case BIOCSETBUFMODE:
1878
/*
1879
* Allow the buffering mode to be changed as long as we
1880
* haven't yet committed to a particular mode. Our
1881
* definition of commitment, for now, is whether or not a
1882
* buffer has been allocated or an interface attached, since
1883
* that's the point where things get tricky.
1884
*/
1885
switch (*(u_int *)addr) {
1886
case BPF_BUFMODE_BUFFER:
1887
break;
1888
1889
case BPF_BUFMODE_ZBUF:
1890
if (bpf_zerocopy_enable)
1891
break;
1892
/* FALLSTHROUGH */
1893
1894
default:
1895
CURVNET_RESTORE();
1896
return (EINVAL);
1897
}
1898
1899
BPFD_LOCK(d);
1900
if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
1901
d->bd_fbuf != NULL || d->bd_bif != NULL) {
1902
BPFD_UNLOCK(d);
1903
CURVNET_RESTORE();
1904
return (EBUSY);
1905
}
1906
d->bd_bufmode = *(u_int *)addr;
1907
BPFD_UNLOCK(d);
1908
break;
1909
1910
case BIOCGETZMAX:
1911
error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
1912
break;
1913
1914
case BIOCSETZBUF:
1915
error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
1916
break;
1917
1918
case BIOCROTZBUF:
1919
error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
1920
break;
1921
1922
case BIOCSETVLANPCP:
1923
{
1924
u_int pcp;
1925
1926
pcp = *(u_int *)addr;
1927
if (pcp > BPF_PRIO_MAX || pcp < 0) {
1928
error = EINVAL;
1929
break;
1930
}
1931
d->bd_pcp = pcp;
1932
break;
1933
}
1934
}
1935
CURVNET_RESTORE();
1936
return (error);
1937
}
1938
1939
/*
1940
* Set d's packet filter program to fp. If this file already has a filter,
1941
* free it and replace it. Returns EINVAL for bogus requests.
1942
*
1943
* Note we use global lock here to serialize bpf_setf() and bpf_setif()
1944
* calls.
1945
*/
1946
static int
1947
bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1948
{
1949
#ifdef COMPAT_FREEBSD32
1950
struct bpf_program fp_swab;
1951
struct bpf_program32 *fp32;
1952
#endif
1953
struct bpf_program_buffer *fcode;
1954
struct bpf_insn *filter;
1955
#ifdef BPF_JITTER
1956
bpf_jit_filter *jfunc;
1957
#endif
1958
size_t size;
1959
u_int flen;
1960
bool track_event;
1961
1962
#ifdef COMPAT_FREEBSD32
1963
switch (cmd) {
1964
case BIOCSETF32:
1965
case BIOCSETWF32:
1966
case BIOCSETFNR32:
1967
fp32 = (struct bpf_program32 *)fp;
1968
fp_swab.bf_len = fp32->bf_len;
1969
fp_swab.bf_insns =
1970
(struct bpf_insn *)(uintptr_t)fp32->bf_insns;
1971
fp = &fp_swab;
1972
switch (cmd) {
1973
case BIOCSETF32:
1974
cmd = BIOCSETF;
1975
break;
1976
case BIOCSETWF32:
1977
cmd = BIOCSETWF;
1978
break;
1979
}
1980
break;
1981
}
1982
#endif
1983
1984
filter = NULL;
1985
#ifdef BPF_JITTER
1986
jfunc = NULL;
1987
#endif
1988
/*
1989
* Check new filter validness before acquiring any locks.
1990
* Allocate memory for new filter, if needed.
1991
*/
1992
flen = fp->bf_len;
1993
if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
1994
return (EINVAL);
1995
size = flen * sizeof(*fp->bf_insns);
1996
if (size > 0) {
1997
/* We're setting up new filter. Copy and check actual data. */
1998
fcode = bpf_program_buffer_alloc(size, M_WAITOK);
1999
filter = (struct bpf_insn *)fcode->buffer;
2000
if (copyin(fp->bf_insns, filter, size) != 0 ||
2001
!bpf_validate(filter, flen)) {
2002
free(fcode, M_BPF);
2003
return (EINVAL);
2004
}
2005
#ifdef BPF_JITTER
2006
if (cmd != BIOCSETWF) {
2007
/*
2008
* Filter is copied inside fcode and is
2009
* perfectly valid.
2010
*/
2011
jfunc = bpf_jitter(filter, flen);
2012
}
2013
#endif
2014
}
2015
2016
track_event = false;
2017
fcode = NULL;
2018
2019
BPF_LOCK();
2020
BPFD_LOCK(d);
2021
/* Set up new filter. */
2022
if (cmd == BIOCSETWF) {
2023
if (d->bd_wfilter != NULL) {
2024
fcode = __containerof((void *)d->bd_wfilter,
2025
struct bpf_program_buffer, buffer);
2026
#ifdef BPF_JITTER
2027
fcode->func = NULL;
2028
#endif
2029
}
2030
d->bd_wfilter = filter;
2031
} else {
2032
if (d->bd_rfilter != NULL) {
2033
fcode = __containerof((void *)d->bd_rfilter,
2034
struct bpf_program_buffer, buffer);
2035
#ifdef BPF_JITTER
2036
fcode->func = d->bd_bfilter;
2037
#endif
2038
}
2039
d->bd_rfilter = filter;
2040
#ifdef BPF_JITTER
2041
d->bd_bfilter = jfunc;
2042
#endif
2043
if (cmd == BIOCSETF)
2044
reset_d(d);
2045
2046
if (bpf_check_upgrade(cmd, d, filter, flen) != 0) {
2047
/*
2048
* Filter can be set several times without
2049
* specifying interface. In this case just mark d
2050
* as reader.
2051
*/
2052
d->bd_writer = 0;
2053
if (d->bd_bif != NULL) {
2054
/*
2055
* Remove descriptor from writers-only list
2056
* and add it to active readers list.
2057
*/
2058
CK_LIST_REMOVE(d, bd_next);
2059
CK_LIST_INSERT_HEAD(&d->bd_bif->bif_dlist,
2060
d, bd_next);
2061
CTR2(KTR_NET,
2062
"%s: upgrade required by pid %d",
2063
__func__, d->bd_pid);
2064
track_event = true;
2065
}
2066
}
2067
}
2068
BPFD_UNLOCK(d);
2069
2070
if (fcode != NULL)
2071
NET_EPOCH_CALL(bpf_program_buffer_free, &fcode->epoch_ctx);
2072
2073
if (track_event)
2074
EVENTHANDLER_INVOKE(bpf_track,
2075
d->bd_bif->bif_ifp, d->bd_bif->bif_dlt, 1);
2076
2077
BPF_UNLOCK();
2078
return (0);
2079
}
2080
2081
/*
2082
* Detach a file from its current interface (if attached at all) and attach
2083
* to the interface indicated by the name stored in ifr.
2084
* Return an errno or 0.
2085
*/
2086
static int
2087
bpf_setif(struct bpf_d *d, struct ifreq *ifr)
2088
{
2089
struct bpf_if *bp;
2090
struct ifnet *theywant;
2091
2092
BPF_LOCK_ASSERT();
2093
2094
theywant = ifunit(ifr->ifr_name);
2095
if (theywant == NULL)
2096
return (ENXIO);
2097
/*
2098
* Look through attached interfaces for the named one.
2099
*/
2100
CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2101
if (bp->bif_ifp == theywant &&
2102
bp->bif_bpf == &theywant->if_bpf)
2103
break;
2104
}
2105
if (bp == NULL)
2106
return (ENXIO);
2107
2108
MPASS(bp == theywant->if_bpf);
2109
/*
2110
* At this point, we expect the buffer is already allocated. If not,
2111
* return an error.
2112
*/
2113
switch (d->bd_bufmode) {
2114
case BPF_BUFMODE_BUFFER:
2115
case BPF_BUFMODE_ZBUF:
2116
if (d->bd_sbuf == NULL)
2117
return (EINVAL);
2118
break;
2119
2120
default:
2121
panic("bpf_setif: bufmode %d", d->bd_bufmode);
2122
}
2123
if (bp != d->bd_bif)
2124
bpf_attachd(d, bp);
2125
else {
2126
BPFD_LOCK(d);
2127
reset_d(d);
2128
BPFD_UNLOCK(d);
2129
}
2130
return (0);
2131
}
2132
2133
/*
2134
* Support for select() and poll() system calls
2135
*
2136
* Return true iff the specific operation will not block indefinitely.
2137
* Otherwise, return false but make a note that a selwakeup() must be done.
2138
*/
2139
static int
2140
bpfpoll(struct cdev *dev, int events, struct thread *td)
2141
{
2142
struct bpf_d *d;
2143
int revents;
2144
2145
if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
2146
return (events &
2147
(POLLHUP | POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM));
2148
2149
/*
2150
* Refresh PID associated with this descriptor.
2151
*/
2152
revents = events & (POLLOUT | POLLWRNORM);
2153
BPFD_LOCK(d);
2154
BPF_PID_REFRESH(d, td);
2155
if (events & (POLLIN | POLLRDNORM)) {
2156
if (bpf_ready(d))
2157
revents |= events & (POLLIN | POLLRDNORM);
2158
else {
2159
selrecord(td, &d->bd_sel);
2160
/* Start the read timeout if necessary. */
2161
if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2162
callout_reset(&d->bd_callout, d->bd_rtout,
2163
bpf_timed_out, d);
2164
d->bd_state = BPF_WAITING;
2165
}
2166
}
2167
}
2168
BPFD_UNLOCK(d);
2169
return (revents);
2170
}
2171
2172
/*
2173
* Support for kevent() system call. Register EVFILT_READ filters and
2174
* reject all others.
2175
*/
2176
int
2177
bpfkqfilter(struct cdev *dev, struct knote *kn)
2178
{
2179
struct bpf_d *d;
2180
2181
if (devfs_get_cdevpriv((void **)&d) != 0)
2182
return (1);
2183
2184
switch (kn->kn_filter) {
2185
case EVFILT_READ:
2186
kn->kn_fop = &bpfread_filtops;
2187
break;
2188
2189
case EVFILT_WRITE:
2190
kn->kn_fop = &bpfwrite_filtops;
2191
break;
2192
2193
default:
2194
return (1);
2195
}
2196
2197
/*
2198
* Refresh PID associated with this descriptor.
2199
*/
2200
BPFD_LOCK(d);
2201
BPF_PID_REFRESH_CUR(d);
2202
kn->kn_hook = d;
2203
knlist_add(&d->bd_sel.si_note, kn, 1);
2204
BPFD_UNLOCK(d);
2205
2206
return (0);
2207
}
2208
2209
static void
2210
filt_bpfdetach(struct knote *kn)
2211
{
2212
struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2213
2214
knlist_remove(&d->bd_sel.si_note, kn, 0);
2215
}
2216
2217
static int
2218
filt_bpfread(struct knote *kn, long hint)
2219
{
2220
struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2221
int ready;
2222
2223
BPFD_LOCK_ASSERT(d);
2224
ready = bpf_ready(d);
2225
if (ready) {
2226
kn->kn_data = d->bd_slen;
2227
/*
2228
* Ignore the hold buffer if it is being copied to user space.
2229
*/
2230
if (!d->bd_hbuf_in_use && d->bd_hbuf)
2231
kn->kn_data += d->bd_hlen;
2232
} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2233
callout_reset(&d->bd_callout, d->bd_rtout,
2234
bpf_timed_out, d);
2235
d->bd_state = BPF_WAITING;
2236
}
2237
2238
return (ready);
2239
}
2240
2241
static int
2242
filt_bpfwrite(struct knote *kn, long hint)
2243
{
2244
struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2245
2246
BPFD_LOCK_ASSERT(d);
2247
2248
if (d->bd_bif == NULL) {
2249
kn->kn_data = 0;
2250
return (0);
2251
} else {
2252
kn->kn_data = d->bd_bif->bif_ifp->if_mtu;
2253
return (1);
2254
}
2255
}
2256
2257
#define BPF_TSTAMP_NONE 0
2258
#define BPF_TSTAMP_FAST 1
2259
#define BPF_TSTAMP_NORMAL 2
2260
#define BPF_TSTAMP_EXTERN 3
2261
2262
static int
2263
bpf_ts_quality(int tstype)
2264
{
2265
2266
if (tstype == BPF_T_NONE)
2267
return (BPF_TSTAMP_NONE);
2268
if ((tstype & BPF_T_FAST) != 0)
2269
return (BPF_TSTAMP_FAST);
2270
2271
return (BPF_TSTAMP_NORMAL);
2272
}
2273
2274
static int
2275
bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
2276
{
2277
struct timespec ts;
2278
struct m_tag *tag;
2279
int quality;
2280
2281
quality = bpf_ts_quality(tstype);
2282
if (quality == BPF_TSTAMP_NONE)
2283
return (quality);
2284
2285
if (m != NULL) {
2286
if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | M_TSTMP)) {
2287
mbuf_tstmp2timespec(m, &ts);
2288
timespec2bintime(&ts, bt);
2289
return (BPF_TSTAMP_EXTERN);
2290
}
2291
tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
2292
if (tag != NULL) {
2293
*bt = *(struct bintime *)(tag + 1);
2294
return (BPF_TSTAMP_EXTERN);
2295
}
2296
}
2297
if (quality == BPF_TSTAMP_NORMAL)
2298
binuptime(bt);
2299
else
2300
getbinuptime(bt);
2301
2302
return (quality);
2303
}
2304
2305
/*
2306
* Incoming linkage from device drivers. Process the packet pkt, of length
2307
* pktlen, which is stored in a contiguous buffer. The packet is parsed
2308
* by each process' filter, and if accepted, stashed into the corresponding
2309
* buffer.
2310
*/
2311
void
2312
bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2313
{
2314
struct epoch_tracker et;
2315
struct bintime bt;
2316
struct bpf_d *d;
2317
#ifdef BPF_JITTER
2318
bpf_jit_filter *bf;
2319
#endif
2320
u_int slen;
2321
int gottime;
2322
2323
gottime = BPF_TSTAMP_NONE;
2324
NET_EPOCH_ENTER(et);
2325
CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2326
counter_u64_add(d->bd_rcount, 1);
2327
/*
2328
* NB: We dont call BPF_CHECK_DIRECTION() here since there
2329
* is no way for the caller to indiciate to us whether this
2330
* packet is inbound or outbound. In the bpf_mtap() routines,
2331
* we use the interface pointers on the mbuf to figure it out.
2332
*/
2333
#ifdef BPF_JITTER
2334
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2335
if (bf != NULL)
2336
slen = (*(bf->func))(pkt, pktlen, pktlen);
2337
else
2338
#endif
2339
slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
2340
if (slen != 0) {
2341
/*
2342
* Filter matches. Let's to acquire write lock.
2343
*/
2344
BPFD_LOCK(d);
2345
counter_u64_add(d->bd_fcount, 1);
2346
if (gottime < bpf_ts_quality(d->bd_tstamp))
2347
gottime = bpf_gettime(&bt, d->bd_tstamp,
2348
NULL);
2349
#ifdef MAC
2350
if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2351
#endif
2352
catchpacket(d, pkt, pktlen, slen,
2353
bpf_append_bytes, &bt);
2354
BPFD_UNLOCK(d);
2355
}
2356
}
2357
NET_EPOCH_EXIT(et);
2358
}
2359
2360
void
2361
bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen)
2362
{
2363
if (bpf_peers_present(ifp->if_bpf))
2364
bpf_tap(ifp->if_bpf, pkt, pktlen);
2365
}
2366
2367
#define BPF_CHECK_DIRECTION(d, r, i) \
2368
(((d)->bd_direction == BPF_D_IN && (r) != (i)) || \
2369
((d)->bd_direction == BPF_D_OUT && (r) == (i)))
2370
2371
/*
2372
* Incoming linkage from device drivers, when packet is in an mbuf chain.
2373
* Locking model is explained in bpf_tap().
2374
*/
2375
void
2376
bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2377
{
2378
struct epoch_tracker et;
2379
struct bintime bt;
2380
struct bpf_d *d;
2381
#ifdef BPF_JITTER
2382
bpf_jit_filter *bf;
2383
#endif
2384
u_int pktlen, slen;
2385
int gottime;
2386
2387
/* Skip outgoing duplicate packets. */
2388
if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) {
2389
m->m_flags &= ~M_PROMISC;
2390
return;
2391
}
2392
2393
pktlen = m_length(m, NULL);
2394
gottime = BPF_TSTAMP_NONE;
2395
2396
NET_EPOCH_ENTER(et);
2397
CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2398
if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp))
2399
continue;
2400
counter_u64_add(d->bd_rcount, 1);
2401
#ifdef BPF_JITTER
2402
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2403
/* XXX We cannot handle multiple mbufs. */
2404
if (bf != NULL && m->m_next == NULL)
2405
slen = (*(bf->func))(mtod(m, u_char *), pktlen,
2406
pktlen);
2407
else
2408
#endif
2409
slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
2410
if (slen != 0) {
2411
BPFD_LOCK(d);
2412
2413
counter_u64_add(d->bd_fcount, 1);
2414
if (gottime < bpf_ts_quality(d->bd_tstamp))
2415
gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2416
#ifdef MAC
2417
if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2418
#endif
2419
catchpacket(d, (u_char *)m, pktlen, slen,
2420
bpf_append_mbuf, &bt);
2421
BPFD_UNLOCK(d);
2422
}
2423
}
2424
NET_EPOCH_EXIT(et);
2425
}
2426
2427
void
2428
bpf_mtap_if(if_t ifp, struct mbuf *m)
2429
{
2430
if (bpf_peers_present(ifp->if_bpf)) {
2431
M_ASSERTVALID(m);
2432
bpf_mtap(ifp->if_bpf, m);
2433
}
2434
}
2435
2436
/*
2437
* Incoming linkage from device drivers, when packet is in
2438
* an mbuf chain and to be prepended by a contiguous header.
2439
*/
2440
void
2441
bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
2442
{
2443
struct epoch_tracker et;
2444
struct bintime bt;
2445
struct mbuf mb;
2446
struct bpf_d *d;
2447
u_int pktlen, slen;
2448
int gottime;
2449
2450
/* Skip outgoing duplicate packets. */
2451
if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2452
m->m_flags &= ~M_PROMISC;
2453
return;
2454
}
2455
2456
pktlen = m_length(m, NULL);
2457
/*
2458
* Craft on-stack mbuf suitable for passing to bpf_filter.
2459
* Note that we cut corners here; we only setup what's
2460
* absolutely needed--this mbuf should never go anywhere else.
2461
*/
2462
mb.m_flags = 0;
2463
mb.m_next = m;
2464
mb.m_data = data;
2465
mb.m_len = dlen;
2466
pktlen += dlen;
2467
2468
gottime = BPF_TSTAMP_NONE;
2469
2470
NET_EPOCH_ENTER(et);
2471
CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2472
if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
2473
continue;
2474
counter_u64_add(d->bd_rcount, 1);
2475
slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
2476
if (slen != 0) {
2477
BPFD_LOCK(d);
2478
2479
counter_u64_add(d->bd_fcount, 1);
2480
if (gottime < bpf_ts_quality(d->bd_tstamp))
2481
gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2482
#ifdef MAC
2483
if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2484
#endif
2485
catchpacket(d, (u_char *)&mb, pktlen, slen,
2486
bpf_append_mbuf, &bt);
2487
BPFD_UNLOCK(d);
2488
}
2489
}
2490
NET_EPOCH_EXIT(et);
2491
}
2492
2493
void
2494
bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m)
2495
{
2496
if (bpf_peers_present(ifp->if_bpf)) {
2497
M_ASSERTVALID(m);
2498
bpf_mtap2(ifp->if_bpf, data, dlen, m);
2499
}
2500
}
2501
2502
#undef BPF_CHECK_DIRECTION
2503
#undef BPF_TSTAMP_NONE
2504
#undef BPF_TSTAMP_FAST
2505
#undef BPF_TSTAMP_NORMAL
2506
#undef BPF_TSTAMP_EXTERN
2507
2508
static int
2509
bpf_hdrlen(struct bpf_d *d)
2510
{
2511
int hdrlen;
2512
2513
hdrlen = d->bd_bif->bif_hdrlen;
2514
#ifndef BURN_BRIDGES
2515
if (d->bd_tstamp == BPF_T_NONE ||
2516
BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
2517
#ifdef COMPAT_FREEBSD32
2518
if (d->bd_compat32)
2519
hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
2520
else
2521
#endif
2522
hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
2523
else
2524
#endif
2525
hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
2526
#ifdef COMPAT_FREEBSD32
2527
if (d->bd_compat32)
2528
hdrlen = BPF_WORDALIGN32(hdrlen);
2529
else
2530
#endif
2531
hdrlen = BPF_WORDALIGN(hdrlen);
2532
2533
return (hdrlen - d->bd_bif->bif_hdrlen);
2534
}
2535
2536
static void
2537
bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
2538
{
2539
struct bintime bt2, boottimebin;
2540
struct timeval tsm;
2541
struct timespec tsn;
2542
2543
if ((tstype & BPF_T_MONOTONIC) == 0) {
2544
bt2 = *bt;
2545
getboottimebin(&boottimebin);
2546
bintime_add(&bt2, &boottimebin);
2547
bt = &bt2;
2548
}
2549
switch (BPF_T_FORMAT(tstype)) {
2550
case BPF_T_MICROTIME:
2551
bintime2timeval(bt, &tsm);
2552
ts->bt_sec = tsm.tv_sec;
2553
ts->bt_frac = tsm.tv_usec;
2554
break;
2555
case BPF_T_NANOTIME:
2556
bintime2timespec(bt, &tsn);
2557
ts->bt_sec = tsn.tv_sec;
2558
ts->bt_frac = tsn.tv_nsec;
2559
break;
2560
case BPF_T_BINTIME:
2561
ts->bt_sec = bt->sec;
2562
ts->bt_frac = bt->frac;
2563
break;
2564
}
2565
}
2566
2567
/*
2568
* Move the packet data from interface memory (pkt) into the
2569
* store buffer. "cpfn" is the routine called to do the actual data
2570
* transfer. bcopy is passed in to copy contiguous chunks, while
2571
* bpf_append_mbuf is passed in to copy mbuf chains. In the latter case,
2572
* pkt is really an mbuf.
2573
*/
2574
static void
2575
catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
2576
void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
2577
struct bintime *bt)
2578
{
2579
static char zeroes[BPF_ALIGNMENT];
2580
struct bpf_xhdr hdr;
2581
#ifndef BURN_BRIDGES
2582
struct bpf_hdr hdr_old;
2583
#ifdef COMPAT_FREEBSD32
2584
struct bpf_hdr32 hdr32_old;
2585
#endif
2586
#endif
2587
int caplen, curlen, hdrlen, pad, totlen;
2588
int do_wakeup = 0;
2589
int do_timestamp;
2590
int tstype;
2591
2592
BPFD_LOCK_ASSERT(d);
2593
if (d->bd_bif == NULL) {
2594
/* Descriptor was detached in concurrent thread */
2595
counter_u64_add(d->bd_dcount, 1);
2596
return;
2597
}
2598
2599
/*
2600
* Detect whether user space has released a buffer back to us, and if
2601
* so, move it from being a hold buffer to a free buffer. This may
2602
* not be the best place to do it (for example, we might only want to
2603
* run this check if we need the space), but for now it's a reliable
2604
* spot to do it.
2605
*/
2606
if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
2607
d->bd_fbuf = d->bd_hbuf;
2608
d->bd_hbuf = NULL;
2609
d->bd_hlen = 0;
2610
bpf_buf_reclaimed(d);
2611
}
2612
2613
/*
2614
* Figure out how many bytes to move. If the packet is
2615
* greater or equal to the snapshot length, transfer that
2616
* much. Otherwise, transfer the whole packet (unless
2617
* we hit the buffer size limit).
2618
*/
2619
hdrlen = bpf_hdrlen(d);
2620
totlen = hdrlen + min(snaplen, pktlen);
2621
if (totlen > d->bd_bufsize)
2622
totlen = d->bd_bufsize;
2623
2624
/*
2625
* Round up the end of the previous packet to the next longword.
2626
*
2627
* Drop the packet if there's no room and no hope of room
2628
* If the packet would overflow the storage buffer or the storage
2629
* buffer is considered immutable by the buffer model, try to rotate
2630
* the buffer and wakeup pending processes.
2631
*/
2632
#ifdef COMPAT_FREEBSD32
2633
if (d->bd_compat32)
2634
curlen = BPF_WORDALIGN32(d->bd_slen);
2635
else
2636
#endif
2637
curlen = BPF_WORDALIGN(d->bd_slen);
2638
if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
2639
if (d->bd_fbuf == NULL) {
2640
/*
2641
* There's no room in the store buffer, and no
2642
* prospect of room, so drop the packet. Notify the
2643
* buffer model.
2644
*/
2645
bpf_buffull(d);
2646
counter_u64_add(d->bd_dcount, 1);
2647
return;
2648
}
2649
KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
2650
ROTATE_BUFFERS(d);
2651
do_wakeup = 1;
2652
curlen = 0;
2653
} else {
2654
if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
2655
/*
2656
* Immediate mode is set, or the read timeout has
2657
* already expired during a select call. A packet
2658
* arrived, so the reader should be woken up.
2659
*/
2660
do_wakeup = 1;
2661
}
2662
pad = curlen - d->bd_slen;
2663
KASSERT(pad >= 0 && pad <= sizeof(zeroes),
2664
("%s: invalid pad byte count %d", __func__, pad));
2665
if (pad > 0) {
2666
/* Zero pad bytes. */
2667
bpf_append_bytes(d, d->bd_sbuf, d->bd_slen, zeroes,
2668
pad);
2669
}
2670
}
2671
2672
caplen = totlen - hdrlen;
2673
tstype = d->bd_tstamp;
2674
do_timestamp = tstype != BPF_T_NONE;
2675
#ifndef BURN_BRIDGES
2676
if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
2677
struct bpf_ts ts;
2678
if (do_timestamp)
2679
bpf_bintime2ts(bt, &ts, tstype);
2680
#ifdef COMPAT_FREEBSD32
2681
if (d->bd_compat32) {
2682
bzero(&hdr32_old, sizeof(hdr32_old));
2683
if (do_timestamp) {
2684
hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
2685
hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
2686
}
2687
hdr32_old.bh_datalen = pktlen;
2688
hdr32_old.bh_hdrlen = hdrlen;
2689
hdr32_old.bh_caplen = caplen;
2690
bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
2691
sizeof(hdr32_old));
2692
goto copy;
2693
}
2694
#endif
2695
bzero(&hdr_old, sizeof(hdr_old));
2696
if (do_timestamp) {
2697
hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
2698
hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
2699
}
2700
hdr_old.bh_datalen = pktlen;
2701
hdr_old.bh_hdrlen = hdrlen;
2702
hdr_old.bh_caplen = caplen;
2703
bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
2704
sizeof(hdr_old));
2705
goto copy;
2706
}
2707
#endif
2708
2709
/*
2710
* Append the bpf header. Note we append the actual header size, but
2711
* move forward the length of the header plus padding.
2712
*/
2713
bzero(&hdr, sizeof(hdr));
2714
if (do_timestamp)
2715
bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
2716
hdr.bh_datalen = pktlen;
2717
hdr.bh_hdrlen = hdrlen;
2718
hdr.bh_caplen = caplen;
2719
bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
2720
2721
/*
2722
* Copy the packet data into the store buffer and update its length.
2723
*/
2724
#ifndef BURN_BRIDGES
2725
copy:
2726
#endif
2727
(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
2728
d->bd_slen = curlen + totlen;
2729
2730
if (do_wakeup)
2731
bpf_wakeup(d);
2732
}
2733
2734
/*
2735
* Free buffers currently in use by a descriptor.
2736
* Called on close.
2737
*/
2738
static void
2739
bpfd_free(epoch_context_t ctx)
2740
{
2741
struct bpf_d *d;
2742
struct bpf_program_buffer *p;
2743
2744
/*
2745
* We don't need to lock out interrupts since this descriptor has
2746
* been detached from its interface and it yet hasn't been marked
2747
* free.
2748
*/
2749
d = __containerof(ctx, struct bpf_d, epoch_ctx);
2750
bpf_free(d);
2751
if (d->bd_rfilter != NULL) {
2752
p = __containerof((void *)d->bd_rfilter,
2753
struct bpf_program_buffer, buffer);
2754
#ifdef BPF_JITTER
2755
p->func = d->bd_bfilter;
2756
#endif
2757
bpf_program_buffer_free(&p->epoch_ctx);
2758
}
2759
if (d->bd_wfilter != NULL) {
2760
p = __containerof((void *)d->bd_wfilter,
2761
struct bpf_program_buffer, buffer);
2762
#ifdef BPF_JITTER
2763
p->func = NULL;
2764
#endif
2765
bpf_program_buffer_free(&p->epoch_ctx);
2766
}
2767
2768
mtx_destroy(&d->bd_lock);
2769
counter_u64_free(d->bd_rcount);
2770
counter_u64_free(d->bd_dcount);
2771
counter_u64_free(d->bd_fcount);
2772
counter_u64_free(d->bd_wcount);
2773
counter_u64_free(d->bd_wfcount);
2774
counter_u64_free(d->bd_wdcount);
2775
counter_u64_free(d->bd_zcopy);
2776
free(d, M_BPF);
2777
}
2778
2779
/*
2780
* Attach an interface to bpf. dlt is the link layer type; hdrlen is the
2781
* fixed size of the link header (variable length headers not yet supported).
2782
*/
2783
void
2784
bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2785
{
2786
2787
bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2788
}
2789
2790
/*
2791
* Attach an interface to bpf. ifp is a pointer to the structure
2792
* defining the interface to be attached, dlt is the link layer type,
2793
* and hdrlen is the fixed size of the link header (variable length
2794
* headers are not yet supporrted).
2795
*/
2796
void
2797
bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen,
2798
struct bpf_if **driverp)
2799
{
2800
struct bpf_if *bp;
2801
2802
KASSERT(*driverp == NULL,
2803
("bpfattach2: driverp already initialized"));
2804
2805
bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
2806
2807
CK_LIST_INIT(&bp->bif_dlist);
2808
CK_LIST_INIT(&bp->bif_wlist);
2809
bp->bif_ifp = ifp;
2810
bp->bif_dlt = dlt;
2811
bp->bif_hdrlen = hdrlen;
2812
bp->bif_bpf = driverp;
2813
refcount_init(&bp->bif_refcnt, 1);
2814
*driverp = bp;
2815
/*
2816
* Reference ifnet pointer, so it won't freed until
2817
* we release it.
2818
*/
2819
if_ref(ifp);
2820
BPF_LOCK();
2821
CK_LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
2822
BPF_UNLOCK();
2823
2824
if (bootverbose && IS_DEFAULT_VNET(curvnet))
2825
if_printf(ifp, "bpf attached\n");
2826
}
2827
2828
#ifdef VIMAGE
2829
/*
2830
* When moving interfaces between vnet instances we need a way to
2831
* query the dlt and hdrlen before detach so we can re-attch the if_bpf
2832
* after the vmove. We unfortunately have no device driver infrastructure
2833
* to query the interface for these values after creation/attach, thus
2834
* add this as a workaround.
2835
*/
2836
int
2837
bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen)
2838
{
2839
2840
if (bp == NULL)
2841
return (ENXIO);
2842
if (bif_dlt == NULL && bif_hdrlen == NULL)
2843
return (0);
2844
2845
if (bif_dlt != NULL)
2846
*bif_dlt = bp->bif_dlt;
2847
if (bif_hdrlen != NULL)
2848
*bif_hdrlen = bp->bif_hdrlen;
2849
2850
return (0);
2851
}
2852
2853
/*
2854
* Detach descriptors on interface's vmove event.
2855
*/
2856
void
2857
bpf_ifdetach(struct ifnet *ifp)
2858
{
2859
struct bpf_if *bp;
2860
struct bpf_d *d;
2861
2862
BPF_LOCK();
2863
CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2864
if (bp->bif_ifp != ifp)
2865
continue;
2866
2867
/* Detach common descriptors */
2868
while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
2869
bpf_detachd_locked(d, true);
2870
}
2871
2872
/* Detach writer-only descriptors */
2873
while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
2874
bpf_detachd_locked(d, true);
2875
}
2876
}
2877
BPF_UNLOCK();
2878
}
2879
#endif
2880
2881
/*
2882
* Detach bpf from an interface. This involves detaching each descriptor
2883
* associated with the interface. Notify each descriptor as it's detached
2884
* so that any sleepers wake up and get ENXIO.
2885
*/
2886
void
2887
bpfdetach(struct ifnet *ifp)
2888
{
2889
struct bpf_if *bp, *bp_temp;
2890
struct bpf_d *d;
2891
2892
BPF_LOCK();
2893
/* Find all bpf_if struct's which reference ifp and detach them. */
2894
CK_LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
2895
if (ifp != bp->bif_ifp)
2896
continue;
2897
2898
CK_LIST_REMOVE(bp, bif_next);
2899
*bp->bif_bpf = __DECONST(struct bpf_if *, &dead_bpf_if);
2900
2901
CTR4(KTR_NET,
2902
"%s: sheduling free for encap %d (%p) for if %p",
2903
__func__, bp->bif_dlt, bp, ifp);
2904
2905
/* Detach common descriptors */
2906
while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
2907
bpf_detachd_locked(d, true);
2908
}
2909
2910
/* Detach writer-only descriptors */
2911
while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
2912
bpf_detachd_locked(d, true);
2913
}
2914
bpfif_rele(bp);
2915
}
2916
BPF_UNLOCK();
2917
}
2918
2919
bool
2920
bpf_peers_present_if(struct ifnet *ifp)
2921
{
2922
return (bpf_peers_present(ifp->if_bpf));
2923
}
2924
2925
/*
2926
* Get a list of available data link type of the interface.
2927
*/
2928
static int
2929
bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
2930
{
2931
struct ifnet *ifp;
2932
struct bpf_if *bp;
2933
u_int *lst;
2934
int error, n, n1;
2935
2936
BPF_LOCK_ASSERT();
2937
2938
ifp = d->bd_bif->bif_ifp;
2939
n1 = 0;
2940
CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2941
if (bp->bif_ifp == ifp)
2942
n1++;
2943
}
2944
if (bfl->bfl_list == NULL) {
2945
bfl->bfl_len = n1;
2946
return (0);
2947
}
2948
if (n1 > bfl->bfl_len)
2949
return (ENOMEM);
2950
2951
lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
2952
n = 0;
2953
CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2954
if (bp->bif_ifp != ifp)
2955
continue;
2956
lst[n++] = bp->bif_dlt;
2957
}
2958
error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
2959
free(lst, M_TEMP);
2960
bfl->bfl_len = n;
2961
return (error);
2962
}
2963
2964
/*
2965
* Set the data link type of a BPF instance.
2966
*/
2967
static int
2968
bpf_setdlt(struct bpf_d *d, u_int dlt)
2969
{
2970
int error, opromisc;
2971
struct ifnet *ifp;
2972
struct bpf_if *bp;
2973
2974
BPF_LOCK_ASSERT();
2975
MPASS(d->bd_bif != NULL);
2976
2977
/*
2978
* It is safe to check bd_bif without BPFD_LOCK, it can not be
2979
* changed while we hold global lock.
2980
*/
2981
if (d->bd_bif->bif_dlt == dlt)
2982
return (0);
2983
2984
ifp = d->bd_bif->bif_ifp;
2985
CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2986
if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2987
break;
2988
}
2989
if (bp == NULL)
2990
return (EINVAL);
2991
2992
opromisc = d->bd_promisc;
2993
bpf_attachd(d, bp);
2994
if (opromisc) {
2995
error = ifpromisc(bp->bif_ifp, 1);
2996
if (error)
2997
if_printf(bp->bif_ifp, "%s: ifpromisc failed (%d)\n",
2998
__func__, error);
2999
else
3000
d->bd_promisc = 1;
3001
}
3002
return (0);
3003
}
3004
3005
static void
3006
bpf_drvinit(void *unused)
3007
{
3008
struct cdev *dev;
3009
3010
sx_init(&bpf_sx, "bpf global lock");
3011
dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
3012
/* For compatibility */
3013
make_dev_alias(dev, "bpf0");
3014
}
3015
3016
/*
3017
* Zero out the various packet counters associated with all of the bpf
3018
* descriptors. At some point, we will probably want to get a bit more
3019
* granular and allow the user to specify descriptors to be zeroed.
3020
*/
3021
static void
3022
bpf_zero_counters(void)
3023
{
3024
struct bpf_if *bp;
3025
struct bpf_d *bd;
3026
3027
BPF_LOCK();
3028
/*
3029
* We are protected by global lock here, interfaces and
3030
* descriptors can not be deleted while we hold it.
3031
*/
3032
CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
3033
CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
3034
counter_u64_zero(bd->bd_rcount);
3035
counter_u64_zero(bd->bd_dcount);
3036
counter_u64_zero(bd->bd_fcount);
3037
counter_u64_zero(bd->bd_wcount);
3038
counter_u64_zero(bd->bd_wfcount);
3039
counter_u64_zero(bd->bd_zcopy);
3040
}
3041
}
3042
BPF_UNLOCK();
3043
}
3044
3045
/*
3046
* Fill filter statistics
3047
*/
3048
static void
3049
bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
3050
{
3051
3052
BPF_LOCK_ASSERT();
3053
bzero(d, sizeof(*d));
3054
d->bd_structsize = sizeof(*d);
3055
d->bd_immediate = bd->bd_immediate;
3056
d->bd_promisc = bd->bd_promisc;
3057
d->bd_hdrcmplt = bd->bd_hdrcmplt;
3058
d->bd_direction = bd->bd_direction;
3059
d->bd_feedback = bd->bd_feedback;
3060
d->bd_async = bd->bd_async;
3061
d->bd_rcount = counter_u64_fetch(bd->bd_rcount);
3062
d->bd_dcount = counter_u64_fetch(bd->bd_dcount);
3063
d->bd_fcount = counter_u64_fetch(bd->bd_fcount);
3064
d->bd_sig = bd->bd_sig;
3065
d->bd_slen = bd->bd_slen;
3066
d->bd_hlen = bd->bd_hlen;
3067
d->bd_bufsize = bd->bd_bufsize;
3068
d->bd_pid = bd->bd_pid;
3069
strlcpy(d->bd_ifname,
3070
bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
3071
d->bd_locked = bd->bd_locked;
3072
d->bd_wcount = counter_u64_fetch(bd->bd_wcount);
3073
d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount);
3074
d->bd_wfcount = counter_u64_fetch(bd->bd_wfcount);
3075
d->bd_zcopy = counter_u64_fetch(bd->bd_zcopy);
3076
d->bd_bufmode = bd->bd_bufmode;
3077
}
3078
3079
/*
3080
* Handle `netstat -B' stats request
3081
*/
3082
static int
3083
bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
3084
{
3085
static const struct xbpf_d zerostats;
3086
struct xbpf_d *xbdbuf, *xbd, tempstats;
3087
int index, error;
3088
struct bpf_if *bp;
3089
struct bpf_d *bd;
3090
3091
/*
3092
* XXX This is not technically correct. It is possible for non
3093
* privileged users to open bpf devices. It would make sense
3094
* if the users who opened the devices were able to retrieve
3095
* the statistics for them, too.
3096
*/
3097
error = priv_check(req->td, PRIV_NET_BPF);
3098
if (error)
3099
return (error);
3100
/*
3101
* Check to see if the user is requesting that the counters be
3102
* zeroed out. Explicitly check that the supplied data is zeroed,
3103
* as we aren't allowing the user to set the counters currently.
3104
*/
3105
if (req->newptr != NULL) {
3106
if (req->newlen != sizeof(tempstats))
3107
return (EINVAL);
3108
memset(&tempstats, 0, sizeof(tempstats));
3109
error = SYSCTL_IN(req, &tempstats, sizeof(tempstats));
3110
if (error)
3111
return (error);
3112
if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0)
3113
return (EINVAL);
3114
bpf_zero_counters();
3115
return (0);
3116
}
3117
if (req->oldptr == NULL)
3118
return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
3119
if (bpf_bpfd_cnt == 0)
3120
return (SYSCTL_OUT(req, 0, 0));
3121
xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
3122
BPF_LOCK();
3123
if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
3124
BPF_UNLOCK();
3125
free(xbdbuf, M_BPF);
3126
return (ENOMEM);
3127
}
3128
index = 0;
3129
CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
3130
/* Send writers-only first */
3131
CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
3132
xbd = &xbdbuf[index++];
3133
bpfstats_fill_xbpf(xbd, bd);
3134
}
3135
CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
3136
xbd = &xbdbuf[index++];
3137
bpfstats_fill_xbpf(xbd, bd);
3138
}
3139
}
3140
BPF_UNLOCK();
3141
error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
3142
free(xbdbuf, M_BPF);
3143
return (error);
3144
}
3145
3146
SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, bpf_drvinit, NULL);
3147
3148
#else /* !DEV_BPF && !NETGRAPH_BPF */
3149
3150
/*
3151
* NOP stubs to allow bpf-using drivers to load and function.
3152
*
3153
* A 'better' implementation would allow the core bpf functionality
3154
* to be loaded at runtime.
3155
*/
3156
3157
void
3158
bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
3159
{
3160
}
3161
3162
void
3163
bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen)
3164
{
3165
}
3166
3167
void
3168
bpf_mtap(struct bpf_if *bp, struct mbuf *m)
3169
{
3170
}
3171
3172
void
3173
bpf_mtap_if(if_t ifp, struct mbuf *m)
3174
{
3175
}
3176
3177
void
3178
bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
3179
{
3180
}
3181
3182
void
3183
bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m)
3184
{
3185
}
3186
3187
void
3188
bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3189
{
3190
3191
bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
3192
}
3193
3194
void
3195
bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
3196
{
3197
3198
*driverp = __DECONST(struct bpf_if *, &dead_bpf_if);
3199
}
3200
3201
void
3202
bpfdetach(struct ifnet *ifp)
3203
{
3204
}
3205
3206
bool
3207
bpf_peers_present_if(struct ifnet *ifp)
3208
{
3209
return (false);
3210
}
3211
3212
u_int
3213
bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
3214
{
3215
return (-1); /* "no filter" behaviour */
3216
}
3217
3218
int
3219
bpf_validate(const struct bpf_insn *f, int len)
3220
{
3221
return (0); /* false */
3222
}
3223
3224
#endif /* !DEV_BPF && !NETGRAPH_BPF */
3225
3226
#ifdef DDB
3227
static void
3228
bpf_show_bpf_if(struct bpf_if *bpf_if)
3229
{
3230
3231
if (bpf_if == NULL)
3232
return;
3233
db_printf("%p:\n", bpf_if);
3234
#define BPF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, bpf_if->e);
3235
#define BPF_DB_PRINTF_RAW(f, e) db_printf(" %s = " f "\n", #e, e);
3236
/* bif_ext.bif_next */
3237
/* bif_ext.bif_dlist */
3238
BPF_DB_PRINTF("%#x", bif_dlt);
3239
BPF_DB_PRINTF("%u", bif_hdrlen);
3240
/* bif_wlist */
3241
BPF_DB_PRINTF("%p", bif_ifp);
3242
BPF_DB_PRINTF("%p", bif_bpf);
3243
BPF_DB_PRINTF_RAW("%u", refcount_load(&bpf_if->bif_refcnt));
3244
}
3245
3246
DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
3247
{
3248
3249
if (!have_addr) {
3250
db_printf("usage: show bpf_if <struct bpf_if *>\n");
3251
return;
3252
}
3253
3254
bpf_show_bpf_if((struct bpf_if *)addr);
3255
}
3256
#endif
3257
3258