#include <sys/types.h>
#ifndef WITHOUT_CAPSICUM
#include <sys/capsicum.h>
#endif
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/uio.h>
#include <net/if.h>
#include <net/if_tap.h>
#include <assert.h>
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
#endif
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
#include <pthread.h>
#include <pthread_np.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
#include "config.h"
#include "debug.h"
#include "iov.h"
#include "mevent.h"
#include "net_backends.h"
#include "net_backends_priv.h"
#include "pci_emul.h"
#define NET_BE_SIZE(be) (sizeof(*be) + (be)->priv_size)
void
tap_cleanup(struct net_backend *be)
{
struct tap_priv *priv = NET_BE_PRIV(be);
if (priv->mevp) {
mevent_delete(priv->mevp);
}
if (be->fd != -1) {
close(be->fd);
be->fd = -1;
}
}
static int
tap_init(struct net_backend *be, const char *devname,
nvlist_t *nvl __unused, net_be_rxeof_t cb, void *param)
{
struct tap_priv *priv = NET_BE_PRIV(be);
char tbuf[80];
int opt = 1, up = IFF_UP;
#ifndef WITHOUT_CAPSICUM
cap_rights_t rights;
#endif
if (cb == NULL) {
EPRINTLN("TAP backend requires non-NULL callback");
return (-1);
}
strcpy(tbuf, "/dev/");
strlcat(tbuf, devname, sizeof(tbuf));
be->fd = open(tbuf, O_RDWR);
if (be->fd == -1) {
EPRINTLN("open of tap device %s failed", tbuf);
goto error;
}
if (ioctl(be->fd, FIONBIO, &opt) < 0) {
EPRINTLN("tap device O_NONBLOCK failed");
goto error;
}
if (strncmp("ngd", be->prefix, 3) &&
ioctl(be->fd, VMIO_SIOCSIFFLAGS, up)) {
EPRINTLN("tap device link up failed");
goto error;
}
#ifndef WITHOUT_CAPSICUM
cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
if (caph_rights_limit(be->fd, &rights) == -1)
errx(EX_OSERR, "Unable to apply rights for sandbox");
#endif
memset(priv->bbuf, 0, sizeof(priv->bbuf));
priv->bbuflen = 0;
priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
if (priv->mevp == NULL) {
EPRINTLN("Could not register event");
goto error;
}
return (0);
error:
tap_cleanup(be);
return (-1);
}
ssize_t
tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
return (writev(be->fd, iov, iovcnt));
}
ssize_t
tap_peek_recvlen(struct net_backend *be)
{
struct tap_priv *priv = NET_BE_PRIV(be);
ssize_t ret;
if (priv->bbuflen > 0) {
return priv->bbuflen;
}
ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf));
if (ret < 0 && errno == EWOULDBLOCK) {
return (0);
}
if (ret > 0)
priv->bbuflen = ret;
return (ret);
}
ssize_t
tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
struct tap_priv *priv = NET_BE_PRIV(be);
ssize_t ret;
if (priv->bbuflen > 0) {
ret = buf_to_iov(priv->bbuf, priv->bbuflen,
iov, iovcnt, 0);
priv->bbuflen = 0;
return (ret);
}
ret = readv(be->fd, iov, iovcnt);
if (ret < 0 && errno == EWOULDBLOCK) {
return (0);
}
return (ret);
}
void
tap_recv_enable(struct net_backend *be)
{
struct tap_priv *priv = NET_BE_PRIV(be);
mevent_enable(priv->mevp);
}
void
tap_recv_disable(struct net_backend *be)
{
struct tap_priv *priv = NET_BE_PRIV(be);
mevent_disable(priv->mevp);
}
uint64_t
tap_get_cap(struct net_backend *be __unused)
{
return (0);
}
int
tap_set_cap(struct net_backend *be __unused, uint64_t features,
unsigned vnet_hdr_len)
{
return ((features || vnet_hdr_len) ? -1 : 0);
}
static struct net_backend tap_backend = {
.prefix = "tap",
.priv_size = sizeof(struct tap_priv),
.init = tap_init,
.cleanup = tap_cleanup,
.send = tap_send,
.peek_recvlen = tap_peek_recvlen,
.recv = tap_recv,
.recv_enable = tap_recv_enable,
.recv_disable = tap_recv_disable,
.get_cap = tap_get_cap,
.set_cap = tap_set_cap,
};
static struct net_backend vmnet_backend = {
.prefix = "vmnet",
.priv_size = sizeof(struct tap_priv),
.init = tap_init,
.cleanup = tap_cleanup,
.send = tap_send,
.peek_recvlen = tap_peek_recvlen,
.recv = tap_recv,
.recv_enable = tap_recv_enable,
.recv_disable = tap_recv_disable,
.get_cap = tap_get_cap,
.set_cap = tap_set_cap,
};
static struct net_backend ngd_backend = {
.prefix = "ngd",
.priv_size = sizeof(struct tap_priv),
.init = tap_init,
.cleanup = tap_cleanup,
.send = tap_send,
.peek_recvlen = tap_peek_recvlen,
.recv = tap_recv,
.recv_enable = tap_recv_enable,
.recv_disable = tap_recv_disable,
.get_cap = tap_get_cap,
.set_cap = tap_set_cap,
};
DATA_SET(net_backend_set, tap_backend);
DATA_SET(net_backend_set, vmnet_backend);
DATA_SET(net_backend_set, ngd_backend);
int
netbe_legacy_config(nvlist_t *nvl, const char *opts)
{
char *backend, *cp;
if (opts == NULL)
return (0);
cp = strchr(opts, ',');
if (cp == NULL) {
set_config_value_node(nvl, "backend", opts);
return (0);
}
backend = strndup(opts, cp - opts);
set_config_value_node(nvl, "backend", backend);
free(backend);
return (pci_parse_legacy_config(nvl, cp + 1));
}
int
netbe_init(struct net_backend **ret, nvlist_t *nvl, net_be_rxeof_t cb,
void *param)
{
struct net_backend **pbe, *nbe, *tbe = NULL;
const char *value, *type;
char *devname;
int err;
value = get_config_value_node(nvl, "backend");
if (value == NULL) {
return (-1);
}
devname = strdup(value);
type = get_config_value_node(nvl, "type");
if (type == NULL)
type = devname;
SET_FOREACH(pbe, net_backend_set) {
if (strncmp(type, (*pbe)->prefix,
strlen((*pbe)->prefix)) == 0) {
tbe = *pbe;
assert(tbe->init != NULL);
assert(tbe->cleanup != NULL);
assert(tbe->send != NULL);
assert(tbe->recv != NULL);
assert(tbe->get_cap != NULL);
assert(tbe->set_cap != NULL);
break;
}
}
*ret = NULL;
if (tbe == NULL) {
free(devname);
return (EINVAL);
}
nbe = calloc(1, NET_BE_SIZE(tbe));
*nbe = *tbe;
nbe->fd = -1;
nbe->sc = param;
nbe->be_vnet_hdr_len = 0;
nbe->fe_vnet_hdr_len = 0;
err = nbe->init(nbe, devname, nvl, cb, param);
if (err) {
free(devname);
free(nbe);
return (err);
}
*ret = nbe;
free(devname);
return (0);
}
void
netbe_cleanup(struct net_backend *be)
{
if (be != NULL) {
be->cleanup(be);
free(be);
}
}
uint64_t
netbe_get_cap(struct net_backend *be)
{
assert(be != NULL);
return (be->get_cap(be));
}
int
netbe_set_cap(struct net_backend *be, uint64_t features,
unsigned vnet_hdr_len)
{
int ret;
assert(be != NULL);
if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
&& vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
return (-1);
be->fe_vnet_hdr_len = vnet_hdr_len;
ret = be->set_cap(be, features, vnet_hdr_len);
assert(be->be_vnet_hdr_len == 0 ||
be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
return (ret);
}
ssize_t
netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
return (be->send(be, iov, iovcnt));
}
ssize_t
netbe_peek_recvlen(struct net_backend *be)
{
return (be->peek_recvlen(be));
}
ssize_t
netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
return (be->recv(be, iov, iovcnt));
}
ssize_t
netbe_rx_discard(struct net_backend *be)
{
static uint8_t dummybuf[65536 + 64];
struct iovec iov;
iov.iov_base = dummybuf;
iov.iov_len = sizeof(dummybuf);
return netbe_recv(be, &iov, 1);
}
void
netbe_rx_disable(struct net_backend *be)
{
return be->recv_disable(be);
}
void
netbe_rx_enable(struct net_backend *be)
{
return be->recv_enable(be);
}
size_t
netbe_get_vnet_hdr_len(struct net_backend *be)
{
return (be->be_vnet_hdr_len);
}