Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/usr.sbin/bhyve/net_backends.c
105518 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2019 Vincenzo Maffione <[email protected]>
5
*
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
8
* are met:
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
14
*
15
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
16
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
19
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20
* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
/*
29
* This file implements multiple network backends (tap, netmap, ...),
30
* to be used by network frontends such as virtio-net and e1000.
31
* The API to access the backend (e.g. send/receive packets, negotiate
32
* features) is exported by net_backends.h.
33
*/
34
35
#include <sys/types.h>
36
#ifndef WITHOUT_CAPSICUM
37
#include <sys/capsicum.h>
38
#endif
39
#include <sys/ioctl.h>
40
#include <sys/mman.h>
41
#include <sys/uio.h>
42
43
#include <net/if.h>
44
#include <net/if_tap.h>
45
46
#include <assert.h>
47
#ifndef WITHOUT_CAPSICUM
48
#include <capsicum_helpers.h>
49
#endif
50
#include <err.h>
51
#include <errno.h>
52
#include <fcntl.h>
53
#include <poll.h>
54
#include <pthread.h>
55
#include <pthread_np.h>
56
#include <stdio.h>
57
#include <stdlib.h>
58
#include <stdint.h>
59
#include <string.h>
60
#include <sysexits.h>
61
#include <unistd.h>
62
63
#include "config.h"
64
#include "debug.h"
65
#include "iov.h"
66
#include "mevent.h"
67
#include "net_backends.h"
68
#include "net_backends_priv.h"
69
#include "pci_emul.h"
70
71
#define NET_BE_SIZE(be) (sizeof(*be) + (be)->priv_size)
72
73
void
74
tap_cleanup(struct net_backend *be)
75
{
76
struct tap_priv *priv = NET_BE_PRIV(be);
77
78
if (priv->mevp) {
79
mevent_delete(priv->mevp);
80
}
81
if (be->fd != -1) {
82
close(be->fd);
83
be->fd = -1;
84
}
85
}
86
87
static int
88
tap_init(struct net_backend *be, const char *devname,
89
nvlist_t *nvl __unused, net_be_rxeof_t cb, void *param)
90
{
91
struct tap_priv *priv = NET_BE_PRIV(be);
92
char tbuf[80];
93
int opt = 1, up = IFF_UP;
94
95
#ifndef WITHOUT_CAPSICUM
96
cap_rights_t rights;
97
#endif
98
99
if (cb == NULL) {
100
EPRINTLN("TAP backend requires non-NULL callback");
101
return (-1);
102
}
103
104
strcpy(tbuf, "/dev/");
105
strlcat(tbuf, devname, sizeof(tbuf));
106
107
be->fd = open(tbuf, O_RDWR);
108
if (be->fd == -1) {
109
EPRINTLN("open of tap device %s failed", tbuf);
110
goto error;
111
}
112
113
/*
114
* Set non-blocking and register for read
115
* notifications with the event loop
116
*/
117
if (ioctl(be->fd, FIONBIO, &opt) < 0) {
118
EPRINTLN("tap device O_NONBLOCK failed");
119
goto error;
120
}
121
122
if (strncmp("ngd", be->prefix, 3) &&
123
ioctl(be->fd, VMIO_SIOCSIFFLAGS, up)) {
124
EPRINTLN("tap device link up failed");
125
goto error;
126
}
127
128
#ifndef WITHOUT_CAPSICUM
129
cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
130
if (caph_rights_limit(be->fd, &rights) == -1)
131
errx(EX_OSERR, "Unable to apply rights for sandbox");
132
#endif
133
134
memset(priv->bbuf, 0, sizeof(priv->bbuf));
135
priv->bbuflen = 0;
136
137
priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
138
if (priv->mevp == NULL) {
139
EPRINTLN("Could not register event");
140
goto error;
141
}
142
143
return (0);
144
145
error:
146
tap_cleanup(be);
147
return (-1);
148
}
149
150
/*
151
* Called to send a buffer chain out to the tap device
152
*/
153
ssize_t
154
tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
155
{
156
return (writev(be->fd, iov, iovcnt));
157
}
158
159
ssize_t
160
tap_peek_recvlen(struct net_backend *be)
161
{
162
struct tap_priv *priv = NET_BE_PRIV(be);
163
ssize_t ret;
164
165
if (priv->bbuflen > 0) {
166
/*
167
* We already have a packet in the bounce buffer.
168
* Just return its length.
169
*/
170
return priv->bbuflen;
171
}
172
173
/*
174
* Read the next packet (if any) into the bounce buffer, so
175
* that we get to know its length and we can return that
176
* to the caller.
177
*/
178
ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf));
179
if (ret < 0 && errno == EWOULDBLOCK) {
180
return (0);
181
}
182
183
if (ret > 0)
184
priv->bbuflen = ret;
185
186
return (ret);
187
}
188
189
ssize_t
190
tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
191
{
192
struct tap_priv *priv = NET_BE_PRIV(be);
193
ssize_t ret;
194
195
if (priv->bbuflen > 0) {
196
/*
197
* A packet is available in the bounce buffer, so
198
* we read it from there.
199
*/
200
ret = buf_to_iov(priv->bbuf, priv->bbuflen,
201
iov, iovcnt, 0);
202
203
/* Mark the bounce buffer as empty. */
204
priv->bbuflen = 0;
205
206
return (ret);
207
}
208
209
ret = readv(be->fd, iov, iovcnt);
210
if (ret < 0 && errno == EWOULDBLOCK) {
211
return (0);
212
}
213
214
return (ret);
215
}
216
217
void
218
tap_recv_enable(struct net_backend *be)
219
{
220
struct tap_priv *priv = NET_BE_PRIV(be);
221
222
mevent_enable(priv->mevp);
223
}
224
225
void
226
tap_recv_disable(struct net_backend *be)
227
{
228
struct tap_priv *priv = NET_BE_PRIV(be);
229
230
mevent_disable(priv->mevp);
231
}
232
233
uint64_t
234
tap_get_cap(struct net_backend *be __unused)
235
{
236
237
return (0); /* no capabilities for now */
238
}
239
240
int
241
tap_set_cap(struct net_backend *be __unused, uint64_t features,
242
unsigned vnet_hdr_len)
243
{
244
245
return ((features || vnet_hdr_len) ? -1 : 0);
246
}
247
248
static struct net_backend tap_backend = {
249
.prefix = "tap",
250
.priv_size = sizeof(struct tap_priv),
251
.init = tap_init,
252
.cleanup = tap_cleanup,
253
.send = tap_send,
254
.peek_recvlen = tap_peek_recvlen,
255
.recv = tap_recv,
256
.recv_enable = tap_recv_enable,
257
.recv_disable = tap_recv_disable,
258
.get_cap = tap_get_cap,
259
.set_cap = tap_set_cap,
260
};
261
262
/* A clone of the tap backend, with a different prefix. */
263
static struct net_backend vmnet_backend = {
264
.prefix = "vmnet",
265
.priv_size = sizeof(struct tap_priv),
266
.init = tap_init,
267
.cleanup = tap_cleanup,
268
.send = tap_send,
269
.peek_recvlen = tap_peek_recvlen,
270
.recv = tap_recv,
271
.recv_enable = tap_recv_enable,
272
.recv_disable = tap_recv_disable,
273
.get_cap = tap_get_cap,
274
.set_cap = tap_set_cap,
275
};
276
277
/* A clone of the tap backend, with a different prefix. */
278
static struct net_backend ngd_backend = {
279
.prefix = "ngd",
280
.priv_size = sizeof(struct tap_priv),
281
.init = tap_init,
282
.cleanup = tap_cleanup,
283
.send = tap_send,
284
.peek_recvlen = tap_peek_recvlen,
285
.recv = tap_recv,
286
.recv_enable = tap_recv_enable,
287
.recv_disable = tap_recv_disable,
288
.get_cap = tap_get_cap,
289
.set_cap = tap_set_cap,
290
};
291
292
DATA_SET(net_backend_set, tap_backend);
293
DATA_SET(net_backend_set, vmnet_backend);
294
DATA_SET(net_backend_set, ngd_backend);
295
296
int
297
netbe_legacy_config(nvlist_t *nvl, const char *opts)
298
{
299
char *backend, *cp;
300
301
if (opts == NULL)
302
return (0);
303
304
cp = strchr(opts, ',');
305
if (cp == NULL) {
306
set_config_value_node(nvl, "backend", opts);
307
return (0);
308
}
309
backend = strndup(opts, cp - opts);
310
set_config_value_node(nvl, "backend", backend);
311
free(backend);
312
return (pci_parse_legacy_config(nvl, cp + 1));
313
}
314
315
/*
316
* Initialize a backend and attach to the frontend.
317
* This is called during frontend initialization.
318
* @ret is a pointer to the backend to be initialized
319
* @devname is the backend-name as supplied on the command line,
320
* e.g. -s 2:0,frontend-name,backend-name[,other-args]
321
* @cb is the receive callback supplied by the frontend,
322
* and it is invoked in the event loop when a receive
323
* event is generated in the hypervisor,
324
* @param is a pointer to the frontend, and normally used as
325
* the argument for the callback.
326
*/
327
int
328
netbe_init(struct net_backend **ret, nvlist_t *nvl, net_be_rxeof_t cb,
329
void *param)
330
{
331
struct net_backend **pbe, *nbe, *tbe = NULL;
332
const char *value, *type;
333
char *devname;
334
int err;
335
336
value = get_config_value_node(nvl, "backend");
337
if (value == NULL) {
338
return (-1);
339
}
340
devname = strdup(value);
341
342
/*
343
* Use the type given by configuration if exists; otherwise
344
* use the prefix of the backend as the type.
345
*/
346
type = get_config_value_node(nvl, "type");
347
if (type == NULL)
348
type = devname;
349
350
/*
351
* Find the network backend that matches the user-provided
352
* device name. net_backend_set is built using a linker set.
353
*/
354
SET_FOREACH(pbe, net_backend_set) {
355
if (strncmp(type, (*pbe)->prefix,
356
strlen((*pbe)->prefix)) == 0) {
357
tbe = *pbe;
358
assert(tbe->init != NULL);
359
assert(tbe->cleanup != NULL);
360
assert(tbe->send != NULL);
361
assert(tbe->recv != NULL);
362
assert(tbe->get_cap != NULL);
363
assert(tbe->set_cap != NULL);
364
break;
365
}
366
}
367
368
*ret = NULL;
369
if (tbe == NULL) {
370
free(devname);
371
return (EINVAL);
372
}
373
374
nbe = calloc(1, NET_BE_SIZE(tbe));
375
*nbe = *tbe; /* copy the template */
376
nbe->fd = -1;
377
nbe->sc = param;
378
nbe->be_vnet_hdr_len = 0;
379
nbe->fe_vnet_hdr_len = 0;
380
381
/* Initialize the backend. */
382
err = nbe->init(nbe, devname, nvl, cb, param);
383
if (err) {
384
free(devname);
385
free(nbe);
386
return (err);
387
}
388
389
*ret = nbe;
390
free(devname);
391
392
return (0);
393
}
394
395
void
396
netbe_cleanup(struct net_backend *be)
397
{
398
399
if (be != NULL) {
400
be->cleanup(be);
401
free(be);
402
}
403
}
404
405
uint64_t
406
netbe_get_cap(struct net_backend *be)
407
{
408
409
assert(be != NULL);
410
return (be->get_cap(be));
411
}
412
413
int
414
netbe_set_cap(struct net_backend *be, uint64_t features,
415
unsigned vnet_hdr_len)
416
{
417
int ret;
418
419
assert(be != NULL);
420
421
/* There are only three valid lengths, i.e., 0, 10 and 12. */
422
if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
423
&& vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
424
return (-1);
425
426
be->fe_vnet_hdr_len = vnet_hdr_len;
427
428
ret = be->set_cap(be, features, vnet_hdr_len);
429
assert(be->be_vnet_hdr_len == 0 ||
430
be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
431
432
return (ret);
433
}
434
435
ssize_t
436
netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
437
{
438
439
return (be->send(be, iov, iovcnt));
440
}
441
442
ssize_t
443
netbe_peek_recvlen(struct net_backend *be)
444
{
445
446
return (be->peek_recvlen(be));
447
}
448
449
/*
450
* Try to read a packet from the backend, without blocking.
451
* If no packets are available, return 0. In case of success, return
452
* the length of the packet just read. Return -1 in case of errors.
453
*/
454
ssize_t
455
netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
456
{
457
458
return (be->recv(be, iov, iovcnt));
459
}
460
461
/*
462
* Read a packet from the backend and discard it.
463
* Returns the size of the discarded packet or zero if no packet was available.
464
* A negative error code is returned in case of read error.
465
*/
466
ssize_t
467
netbe_rx_discard(struct net_backend *be)
468
{
469
/*
470
* MP note: the dummybuf is only used to discard frames,
471
* so there is no need for it to be per-vtnet or locked.
472
* We only make it large enough for TSO-sized segment.
473
*/
474
static uint8_t dummybuf[65536 + 64];
475
struct iovec iov;
476
477
iov.iov_base = dummybuf;
478
iov.iov_len = sizeof(dummybuf);
479
480
return netbe_recv(be, &iov, 1);
481
}
482
483
void
484
netbe_rx_disable(struct net_backend *be)
485
{
486
487
return be->recv_disable(be);
488
}
489
490
void
491
netbe_rx_enable(struct net_backend *be)
492
{
493
494
return be->recv_enable(be);
495
}
496
497
size_t
498
netbe_get_vnet_hdr_len(struct net_backend *be)
499
{
500
501
return (be->be_vnet_hdr_len);
502
}
503
504