#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/types.h>
#include <sys/ck.h>
#include <sys/lock.h>
#include <sys/socket.h>
#include <sys/vnode.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <net/route/route_ctl.h>
#include <netlink/netlink.h>
#include <netlink/netlink_ctl.h>
#include <netlink/netlink_linux.h>
#include <netlink/netlink_var.h>
#include <netlink/netlink_route.h>
#include <compat/linux/linux.h>
#include <compat/linux/linux_common.h>
#include <compat/linux/linux_util.h>
#define DEBUG_MOD_NAME nl_linux
#define DEBUG_MAX_LEVEL LOG_DEBUG3
#include <netlink/netlink_debug.h>
_DECLARE_DEBUG(LOG_INFO);
static bool
valid_rta_size(const struct rtattr *rta, int sz)
{
return (NL_RTA_DATA_LEN(rta) == sz);
}
static bool
valid_rta_u32(const struct rtattr *rta)
{
return (valid_rta_size(rta, sizeof(uint32_t)));
}
static uint32_t
_rta_get_uint32(const struct rtattr *rta)
{
return (*((const uint32_t *)NL_RTA_DATA_CONST(rta)));
}
static int
rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
{
struct ndmsg *ndm = (struct ndmsg *)(hdr + 1);
sa_family_t f;
if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct ndmsg))
return (EBADMSG);
if ((f = linux_to_bsd_domain(ndm->ndm_family)) == AF_UNKNOWN)
return (EPFNOSUPPORT);
ndm->ndm_family = f;
return (0);
}
static int
rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
{
struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1);
sa_family_t f;
if (hdr->nlmsg_len < sizeof(struct nlmsghdr) +
offsetof(struct ifaddrmsg, ifa_family) + sizeof(ifam->ifa_family))
return (EBADMSG);
if ((f = linux_to_bsd_domain(ifam->ifa_family)) == AF_UNKNOWN)
return (EPFNOSUPPORT);
ifam->ifa_family = f;
return (0);
}
static int
rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
{
struct rtmsg *rtm = (struct rtmsg *)(hdr + 1);
struct nlattr *nla, *nla_head;
int attrs_len;
sa_family_t f;
if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct rtmsg))
return (EBADMSG);
if ((f = linux_to_bsd_domain(rtm->rtm_family)) == AF_UNKNOWN)
return (EPFNOSUPPORT);
rtm->rtm_family = f;
if (rtm->rtm_table == 254)
rtm->rtm_table = 0;
attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr);
attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg));
nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg)));
NLA_FOREACH(nla, nla_head, attrs_len) {
RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d",
nla->nla_type, nla->nla_len, attrs_len);
struct rtattr *rta = (struct rtattr *)nla;
if (rta->rta_len < sizeof(struct rtattr)) {
break;
}
switch (rta->rta_type) {
case NL_RTA_TABLE:
if (!valid_rta_u32(rta))
return (EBADMSG);
rtm->rtm_table = 0;
uint32_t fibnum = _rta_get_uint32(rta);
RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum);
if (fibnum == 254) {
*((uint32_t *)NL_RTA_DATA(rta)) = 0;
}
break;
}
}
return (0);
}
static int
rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
{
switch (hdr->nlmsg_type) {
case NL_RTM_GETROUTE:
case NL_RTM_NEWROUTE:
case NL_RTM_DELROUTE:
return (rtnl_route_from_linux(hdr, npt));
case NL_RTM_GETNEIGH:
return (rtnl_neigh_from_linux(hdr, npt));
case NL_RTM_GETADDR:
return (rtnl_ifaddr_from_linux(hdr, npt));
case NL_RTM_NEWLINK:
case NL_RTM_DELLINK:
case NL_RTM_GETLINK:
break;
default:
RT_LOG(LOG_DEBUG, "Passing message type %d untranslated",
hdr->nlmsg_type);
}
return (0);
}
static int
nlmsg_from_linux(int netlink_family, struct nlmsghdr **hdr,
struct nl_pstate *npt)
{
switch (netlink_family) {
case NETLINK_ROUTE:
return (rtnl_from_linux(*hdr, npt));
}
return (0);
}
static bool
handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw)
{
char *out_hdr;
out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char);
if (out_hdr != NULL) {
memcpy(out_hdr, hdr, hdr->nlmsg_len);
nw->num_messages++;
return (true);
}
return (false);
}
static bool
nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw)
{
return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
hdr->nlmsg_flags, 0));
}
static void *
_nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz)
{
void *next_hdr = nlmsg_reserve_data(nw, sz, void);
memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz));
return (next_hdr);
}
#define nlmsg_copy_next_header(_hdr, _ns, _t) \
((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t))))
static bool
nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw)
{
struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr);
if (nla != NULL) {
memcpy(nla, nla_orig, nla_orig->nla_len);
return (true);
}
return (false);
}
static bool
nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw)
{
char ifname[LINUX_IFNAMSIZ];
if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname,
sizeof(ifname)) <= 0)
return (false);
return (nlattr_add_string(nw, IFLA_IFNAME, ifname));
}
#define LINUX_NLA_UNHANDLED -1
static int
nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla,
struct nl_writer *nw)
{
switch (hdr->nlmsg_type) {
case NL_RTM_NEWLINK:
case NL_RTM_DELLINK:
case NL_RTM_GETLINK:
switch (nla->nla_type) {
case IFLA_IFNAME:
return (nlmsg_translate_ifname_nla(nla, nw));
default:
break;
}
default:
break;
}
return (LINUX_NLA_UNHANDLED);
}
static bool
nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw)
{
struct nlattr *nla;
int ret;
int hdrlen = NETLINK_ALIGN(raw_hdrlen);
int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
NLA_FOREACH(nla, nla_head, attrs_len) {
RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len);
if (nla->nla_len < sizeof(struct nlattr)) {
return (false);
}
ret = nlmsg_translate_all_nla(hdr, nla, nw);
if (ret == LINUX_NLA_UNHANDLED)
ret = nlmsg_copy_nla(nla, nw);
if (!ret)
return (false);
}
return (true);
}
#undef LINUX_NLA_UNHANDLED
static unsigned int
rtnl_if_flags_to_linux(unsigned int if_flags)
{
unsigned int result = 0;
for (int i = 0; i < 31; i++) {
unsigned int flag = 1 << i;
if (!(flag & if_flags))
continue;
switch (flag) {
case IFF_UP:
case IFF_BROADCAST:
case IFF_DEBUG:
case IFF_LOOPBACK:
case IFF_POINTOPOINT:
case IFF_DRV_RUNNING:
case IFF_NOARP:
case IFF_PROMISC:
case IFF_ALLMULTI:
result |= flag;
break;
case IFF_NEEDSEPOCH:
case IFF_DRV_OACTIVE:
case IFF_SIMPLEX:
case IFF_LINK0:
case IFF_LINK1:
case IFF_LINK2:
case IFF_CANTCONFIG:
case IFF_PPROMISC:
case IFF_MONITOR:
case IFF_STATICARP:
case IFF_STICKYARP:
case IFF_DYING:
case IFF_RENAMING:
break;
case IFF_MULTICAST:
result |= 1 << 12;
}
}
return (result);
}
static bool
rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
struct nl_writer *nw)
{
if (!nlmsg_copy_header(hdr, nw))
return (false);
struct ifinfomsg *ifinfo;
ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg);
ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family);
switch (ifinfo->ifi_type) {
case IFT_ETHER:
ifinfo->ifi_type = LINUX_ARPHRD_ETHER;
break;
}
ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags);
if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw))
return (false);
if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue"))
return (false);
if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000))
return (false);
nlmsg_end(nw);
RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
return (true);
}
static bool
rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
struct nl_writer *nw)
{
if (!nlmsg_copy_header(hdr, nw))
return (false);
struct ifaddrmsg *ifamsg;
ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg);
ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family);
if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw))
return (false);
nlmsg_end(nw);
RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
return (true);
}
static bool
rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
struct nl_writer *nw)
{
if (!nlmsg_copy_header(hdr, nw))
return (false);
struct ndmsg *ndm;
ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg);
ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family);
if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw))
return (false);
nlmsg_end(nw);
RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
return (true);
}
static bool
rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
struct nl_writer *nw)
{
if (!nlmsg_copy_header(hdr, nw))
return (false);
struct rtmsg *rtm;
rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg);
rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family);
struct nlattr *nla;
int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg));
int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
NLA_FOREACH(nla, nla_head, attrs_len) {
struct rtattr *rta = (struct rtattr *)nla;
if (rta->rta_len < sizeof(struct rtattr)) {
break;
}
switch (rta->rta_type) {
case NL_RTA_TABLE:
{
uint32_t fibnum;
fibnum = _rta_get_uint32(rta);
if (fibnum == 0)
fibnum = 254;
RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum);
if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum))
return (false);
}
break;
default:
if (!nlmsg_copy_nla(nla, nw))
return (false);
break;
}
}
nlmsg_end(nw);
RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
return (true);
}
static bool
rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
{
RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type);
switch (hdr->nlmsg_type) {
case NL_RTM_NEWLINK:
case NL_RTM_DELLINK:
case NL_RTM_GETLINK:
return (rtnl_newlink_to_linux(hdr, nlp, nw));
case NL_RTM_NEWADDR:
case NL_RTM_DELADDR:
return (rtnl_newaddr_to_linux(hdr, nlp, nw));
case NL_RTM_NEWROUTE:
case NL_RTM_DELROUTE:
return (rtnl_newroute_to_linux(hdr, nlp, nw));
case NL_RTM_NEWNEIGH:
case NL_RTM_DELNEIGH:
case NL_RTM_GETNEIGH:
return (rtnl_newneigh_to_linux(hdr, nlp, nw));
default:
RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
hdr->nlmsg_type);
return (handle_default_out(hdr, nw));
}
}
static bool
nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
{
if (!nlmsg_copy_header(hdr, nw))
return (false);
struct nlmsgerr *nlerr;
nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr);
nlerr->error = bsd_to_linux_errno(nlerr->error);
int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr);
if (hdr->nlmsg_len == copied_len) {
nlmsg_end(nw);
return (true);
}
RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated",
nlerr->msg.nlmsg_type);
char *dst_payload, *src_payload;
int copy_len = hdr->nlmsg_len - copied_len;
dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char);
src_payload = (char *)hdr + copied_len;
memcpy(dst_payload, src_payload, copy_len);
nlmsg_end(nw);
return (true);
}
static bool
nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
{
if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
switch (hdr->nlmsg_type) {
case NLMSG_ERROR:
return (nlmsg_error_to_linux(hdr, nlp, nw));
case NLMSG_NOOP:
case NLMSG_DONE:
case NLMSG_OVERRUN:
return (handle_default_out(hdr, nw));
default:
RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
hdr->nlmsg_type);
return (handle_default_out(hdr, nw));
}
}
switch (nlp->nl_proto) {
case NETLINK_ROUTE:
return (rtnl_to_linux(hdr, nlp, nw));
default:
return (handle_default_out(hdr, nw));
}
}
static struct nl_buf *
nlmsgs_to_linux(struct nl_buf *orig, struct nlpcb *nlp)
{
struct nl_writer nw;
u_int offset, msglen;
if (__predict_false(!nl_writer_unicast(&nw,
orig->datalen + SCRATCH_BUFFER_SIZE, nlp, false)))
return (NULL);
for (offset = 0;
offset + sizeof(struct nlmsghdr) <= orig->datalen;
offset += msglen) {
struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset];
msglen = NLMSG_ALIGN(hdr->nlmsg_len);
if (!nlmsg_to_linux(hdr, nlp, &nw)) {
RT_LOG(LOG_DEBUG, "failed to process msg type %d",
hdr->nlmsg_type);
nl_buf_free(nw.buf);
return (NULL);
}
}
RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__,
nw.buf->datalen, nw.num_messages);
return (nw.buf);
}
static struct linux_netlink_provider linux_netlink_v1 = {
.msgs_to_linux = nlmsgs_to_linux,
.msg_from_linux = nlmsg_from_linux,
};
void
linux_netlink_register(void)
{
linux_netlink_p = &linux_netlink_v1;
}
void
linux_netlink_deregister(void)
{
linux_netlink_p = NULL;
}