#include <sys/cdefs.h>
#include "opt_ddb.h"
#include "opt_ipsec.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ratelimit.h"
#include "opt_route.h"
#include "opt_rss.h"
#include <sys/param.h>
#include <sys/hash.h>
#include <sys/systm.h>
#include <sys/libkern.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/eventhandler.h>
#include <sys/domain.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/smp.h>
#include <sys/smr.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/refcount.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#ifdef DDB
#include <ddb/ddb.h>
#endif
#include <vm/uma.h>
#include <vm/vm.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_private.h>
#include <net/if_types.h>
#include <net/if_llatbl.h>
#include <net/route.h>
#include <net/rss_config.h>
#include <net/vnet.h>
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_pcb_var.h>
#include <netinet/tcp.h>
#ifdef INET
#include <netinet/in_var.h>
#include <netinet/in_fib.h>
#endif
#include <netinet/ip_var.h>
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
#endif
#include <net/route/nhop.h>
#endif
#include <netipsec/ipsec_support.h>
#include <security/mac/mac_framework.h>
#define INPCBLBGROUP_SIZMIN 8
#define INPCBLBGROUP_SIZMAX 256
#define INP_FREED 0x00000200
#define INP_INLBGROUP 0x01000000
VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1;
VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART;
VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST;
VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST;
VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO;
VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO;
VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1;
VNET_DEFINE(int, ipport_reservedlow);
VNET_DEFINE(int, ipport_randomized) = 1;
#ifdef INET
static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
struct in_addr faddr, u_int fport_arg,
struct in_addr laddr, u_int lport_arg,
int lookupflags, uint8_t numa_domain, int fib);
#define RANGECHK(var, min, max) \
if ((var) < (min)) { (var) = (min); } \
else if ((var) > (max)) { (var) = (max); }
static int
sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
{
int error;
error = sysctl_handle_int(oidp, arg1, arg2, req);
if (error == 0) {
RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX);
RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX);
RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX);
RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX);
}
return (error);
}
#undef RANGECHK
static SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"IP Ports");
SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
&VNET_NAME(ipport_lowfirstauto), 0, &sysctl_net_ipport_check, "I",
"");
SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
&VNET_NAME(ipport_lowlastauto), 0, &sysctl_net_ipport_check, "I",
"");
SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
&VNET_NAME(ipport_firstauto), 0, &sysctl_net_ipport_check, "I",
"");
SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
&VNET_NAME(ipport_lastauto), 0, &sysctl_net_ipport_check, "I",
"");
SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
&VNET_NAME(ipport_hifirstauto), 0, &sysctl_net_ipport_check, "I",
"");
SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
&VNET_NAME(ipport_hilastauto), 0, &sysctl_net_ipport_check, "I",
"");
SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
&VNET_NAME(ipport_reservedhigh), 0, "");
SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, "");
SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized,
CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ipport_randomized), 0, "Enable random port allocation");
#ifdef RATELIMIT
counter_u64_t rate_limit_new;
counter_u64_t rate_limit_chg;
counter_u64_t rate_limit_active;
counter_u64_t rate_limit_alloc_fail;
counter_u64_t rate_limit_set_ok;
static SYSCTL_NODE(_net_inet_ip, OID_AUTO, rl, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"IP Rate Limiting");
SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, active, CTLFLAG_RD,
&rate_limit_active, "Active rate limited connections");
SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, alloc_fail, CTLFLAG_RD,
&rate_limit_alloc_fail, "Rate limited connection failures");
SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, set_ok, CTLFLAG_RD,
&rate_limit_set_ok, "Rate limited setting succeeded");
SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, newrl, CTLFLAG_RD,
&rate_limit_new, "Total Rate limit new attempts");
SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, chgrl, CTLFLAG_RD,
&rate_limit_chg, "Total Rate limited change attempts");
#endif
#endif
VNET_DEFINE(uint32_t, in_pcbhashseed);
static void
in_pcbhashseed_init(void)
{
V_in_pcbhashseed = arc4random();
}
VNET_SYSINIT(in_pcbhashseed_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
in_pcbhashseed_init, NULL);
#ifdef INET
VNET_DEFINE_STATIC(int, connect_inaddr_wild) = 0;
#define V_connect_inaddr_wild VNET(connect_inaddr_wild)
SYSCTL_INT(_net_inet_ip, OID_AUTO, connect_inaddr_wild,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(connect_inaddr_wild), 0,
"Allow connecting to INADDR_ANY or INADDR_BROADCAST for connect(2)");
#endif
static void in_pcbremhash(struct inpcb *);
static struct inpcblbgroup *
in_pcblbgroup_alloc(struct ucred *cred, u_char vflag, uint16_t port,
const union in_dependaddr *addr, int size, uint8_t numa_domain, int fib)
{
struct inpcblbgroup *grp;
size_t bytes;
bytes = __offsetof(struct inpcblbgroup, il_inp[size]);
grp = malloc(bytes, M_PCB, M_ZERO | M_NOWAIT);
if (grp == NULL)
return (NULL);
LIST_INIT(&grp->il_pending);
grp->il_cred = crhold(cred);
grp->il_vflag = vflag;
grp->il_lport = port;
grp->il_numa_domain = numa_domain;
grp->il_fibnum = fib;
grp->il_dependladdr = *addr;
grp->il_inpsiz = size;
return (grp);
}
static void
in_pcblbgroup_free_deferred(epoch_context_t ctx)
{
struct inpcblbgroup *grp;
grp = __containerof(ctx, struct inpcblbgroup, il_epoch_ctx);
crfree(grp->il_cred);
free(grp, M_PCB);
}
static void
in_pcblbgroup_free(struct inpcblbgroup *grp)
{
KASSERT(LIST_EMPTY(&grp->il_pending),
("local group %p still has pending inps", grp));
CK_LIST_REMOVE(grp, il_list);
NET_EPOCH_CALL(in_pcblbgroup_free_deferred, &grp->il_epoch_ctx);
}
static struct inpcblbgroup *
in_pcblbgroup_find(struct inpcb *inp)
{
struct inpcbinfo *pcbinfo;
struct inpcblbgroup *grp;
struct inpcblbgrouphead *hdr;
INP_LOCK_ASSERT(inp);
pcbinfo = inp->inp_pcbinfo;
INP_HASH_LOCK_ASSERT(pcbinfo);
hdr = &pcbinfo->ipi_lbgrouphashbase[
INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
CK_LIST_FOREACH(grp, hdr, il_list) {
struct inpcb *inp1;
for (unsigned int i = 0; i < grp->il_inpcnt; i++) {
if (inp == grp->il_inp[i])
goto found;
}
LIST_FOREACH(inp1, &grp->il_pending, inp_lbgroup_list) {
if (inp == inp1)
goto found;
}
}
found:
return (grp);
}
static void
in_pcblbgroup_insert(struct inpcblbgroup *grp, struct inpcb *inp)
{
KASSERT(grp->il_inpcnt < grp->il_inpsiz,
("invalid local group size %d and count %d", grp->il_inpsiz,
grp->il_inpcnt));
INP_WLOCK_ASSERT(inp);
if (inp->inp_socket->so_proto->pr_listen != pr_listen_notsupp &&
!SOLISTENING(inp->inp_socket)) {
LIST_INSERT_HEAD(&grp->il_pending, inp, inp_lbgroup_list);
grp->il_pendcnt++;
} else {
grp->il_inp[grp->il_inpcnt] = inp;
atomic_store_rel_int(&grp->il_inpcnt, grp->il_inpcnt + 1);
}
inp->inp_flags |= INP_INLBGROUP;
}
static struct inpcblbgroup *
in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
struct inpcblbgroup *old_grp, int size)
{
struct inpcblbgroup *grp;
int i;
grp = in_pcblbgroup_alloc(old_grp->il_cred, old_grp->il_vflag,
old_grp->il_lport, &old_grp->il_dependladdr, size,
old_grp->il_numa_domain, old_grp->il_fibnum);
if (grp == NULL)
return (NULL);
KASSERT(old_grp->il_inpcnt < grp->il_inpsiz,
("invalid new local group size %d and old local group count %d",
grp->il_inpsiz, old_grp->il_inpcnt));
for (i = 0; i < old_grp->il_inpcnt; ++i)
grp->il_inp[i] = old_grp->il_inp[i];
grp->il_inpcnt = old_grp->il_inpcnt;
CK_LIST_INSERT_HEAD(hdr, grp, il_list);
LIST_SWAP(&old_grp->il_pending, &grp->il_pending, inpcb,
inp_lbgroup_list);
grp->il_pendcnt = old_grp->il_pendcnt;
old_grp->il_pendcnt = 0;
in_pcblbgroup_free(old_grp);
return (grp);
}
static int
in_pcbinslbgrouphash(struct inpcb *inp, uint8_t numa_domain)
{
const static struct timeval interval = { 60, 0 };
static struct timeval lastprint;
struct inpcbinfo *pcbinfo;
struct inpcblbgrouphead *hdr;
struct inpcblbgroup *grp;
uint32_t idx;
int fib;
pcbinfo = inp->inp_pcbinfo;
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(pcbinfo);
fib = (inp->inp_flags & INP_BOUNDFIB) != 0 ?
inp->inp_inc.inc_fibnum : RT_ALL_FIBS;
#ifdef INET6
if ((inp->inp_vflag & INP_IPV4) &&
inp->inp_laddr.s_addr == INADDR_ANY &&
INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) {
return (0);
}
#endif
idx = INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask);
hdr = &pcbinfo->ipi_lbgrouphashbase[idx];
CK_LIST_FOREACH(grp, hdr, il_list) {
if (grp->il_cred->cr_prison == inp->inp_cred->cr_prison &&
grp->il_vflag == inp->inp_vflag &&
grp->il_lport == inp->inp_lport &&
grp->il_numa_domain == numa_domain &&
grp->il_fibnum == fib &&
memcmp(&grp->il_dependladdr,
&inp->inp_inc.inc_ie.ie_dependladdr,
sizeof(grp->il_dependladdr)) == 0) {
break;
}
}
if (grp == NULL) {
grp = in_pcblbgroup_alloc(inp->inp_cred, inp->inp_vflag,
inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
INPCBLBGROUP_SIZMIN, numa_domain, fib);
if (grp == NULL)
return (ENOMEM);
in_pcblbgroup_insert(grp, inp);
CK_LIST_INSERT_HEAD(hdr, grp, il_list);
} else if (grp->il_inpcnt + grp->il_pendcnt == grp->il_inpsiz) {
if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
if (ratecheck(&lastprint, &interval))
printf("lb group port %d, limit reached\n",
ntohs(grp->il_lport));
return (0);
}
grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2);
if (grp == NULL)
return (ENOMEM);
in_pcblbgroup_insert(grp, inp);
} else {
in_pcblbgroup_insert(grp, inp);
}
return (0);
}
static void
in_pcbremlbgrouphash(struct inpcb *inp)
{
struct inpcbinfo *pcbinfo;
struct inpcblbgrouphead *hdr;
struct inpcblbgroup *grp;
struct inpcb *inp1;
int i;
pcbinfo = inp->inp_pcbinfo;
INP_WLOCK_ASSERT(inp);
MPASS(inp->inp_flags & INP_INLBGROUP);
INP_HASH_WLOCK_ASSERT(pcbinfo);
hdr = &pcbinfo->ipi_lbgrouphashbase[
INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
CK_LIST_FOREACH(grp, hdr, il_list) {
for (i = 0; i < grp->il_inpcnt; ++i) {
if (grp->il_inp[i] != inp)
continue;
if (grp->il_inpcnt == 1 &&
LIST_EMPTY(&grp->il_pending)) {
in_pcblbgroup_free(grp);
} else {
grp->il_inp[i] =
grp->il_inp[grp->il_inpcnt - 1];
atomic_store_rel_int(&grp->il_inpcnt,
grp->il_inpcnt - 1);
}
inp->inp_flags &= ~INP_INLBGROUP;
return;
}
LIST_FOREACH(inp1, &grp->il_pending, inp_lbgroup_list) {
if (inp == inp1) {
LIST_REMOVE(inp, inp_lbgroup_list);
grp->il_pendcnt--;
inp->inp_flags &= ~INP_INLBGROUP;
return;
}
}
}
__assert_unreachable();
}
int
in_pcblbgroup_numa(struct inpcb *inp, int arg)
{
struct inpcbinfo *pcbinfo;
int error;
uint8_t numa_domain;
switch (arg) {
case TCP_REUSPORT_LB_NUMA_NODOM:
numa_domain = M_NODOM;
break;
case TCP_REUSPORT_LB_NUMA_CURDOM:
numa_domain = PCPU_GET(domain);
break;
default:
if (arg < 0 || arg >= vm_ndomains)
return (EINVAL);
numa_domain = arg;
}
pcbinfo = inp->inp_pcbinfo;
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK(pcbinfo);
if (in_pcblbgroup_find(inp) != NULL) {
in_pcbremlbgrouphash(inp);
in_pcbinslbgrouphash(inp, numa_domain);
error = 0;
} else {
error = ENOENT;
}
INP_HASH_WUNLOCK(pcbinfo);
return (error);
}
CTASSERT(sizeof(struct inpcbhead) == sizeof(LIST_HEAD(, inpcb)));
void
in_pcbinfo_init(struct inpcbinfo *pcbinfo, struct inpcbstorage *pcbstor,
u_int hash_nelements, u_int porthash_nelements)
{
mtx_init(&pcbinfo->ipi_lock, pcbstor->ips_infolock_name, NULL, MTX_DEF);
mtx_init(&pcbinfo->ipi_hash_lock, pcbstor->ips_hashlock_name,
NULL, MTX_DEF);
#ifdef VIMAGE
pcbinfo->ipi_vnet = curvnet;
#endif
CK_LIST_INIT(&pcbinfo->ipi_listhead);
pcbinfo->ipi_count = 0;
pcbinfo->ipi_hash_exact = hashinit(hash_nelements, M_PCB,
&pcbinfo->ipi_hashmask);
pcbinfo->ipi_hash_wild = hashinit(hash_nelements, M_PCB,
&pcbinfo->ipi_hashmask);
porthash_nelements = imin(porthash_nelements, IPPORT_MAX + 1);
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
&pcbinfo->ipi_porthashmask);
pcbinfo->ipi_lbgrouphashbase = hashinit(porthash_nelements, M_PCB,
&pcbinfo->ipi_lbgrouphashmask);
pcbinfo->ipi_zone = pcbstor->ips_zone;
pcbinfo->ipi_smr = uma_zone_get_smr(pcbinfo->ipi_zone);
}
void
in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
{
KASSERT(pcbinfo->ipi_count == 0,
("%s: ipi_count = %u", __func__, pcbinfo->ipi_count));
hashdestroy(pcbinfo->ipi_hash_exact, M_PCB, pcbinfo->ipi_hashmask);
hashdestroy(pcbinfo->ipi_hash_wild, M_PCB, pcbinfo->ipi_hashmask);
hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
pcbinfo->ipi_porthashmask);
hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB,
pcbinfo->ipi_lbgrouphashmask);
mtx_destroy(&pcbinfo->ipi_hash_lock);
mtx_destroy(&pcbinfo->ipi_lock);
}
static void inpcb_fini(void *, int);
void
in_pcbstorage_init(void *arg)
{
struct inpcbstorage *pcbstor = arg;
pcbstor->ips_zone = uma_zcreate(pcbstor->ips_zone_name,
pcbstor->ips_size, NULL, NULL, pcbstor->ips_pcbinit,
inpcb_fini, UMA_ALIGN_CACHE, UMA_ZONE_SMR);
}
void
in_pcbstorage_destroy(void *arg)
{
struct inpcbstorage *pcbstor = arg;
uma_zdestroy(pcbstor->ips_zone);
}
int
in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
{
struct inpcb *inp;
#if defined(IPSEC) || defined(IPSEC_SUPPORT) || defined(MAC)
int error;
#endif
inp = uma_zalloc_smr(pcbinfo->ipi_zone, M_NOWAIT);
if (inp == NULL)
return (ENOBUFS);
bzero(&inp->inp_start_zero, inp_zero_size);
#ifdef NUMA
inp->inp_numa_domain = M_NODOM;
#endif
inp->inp_pcbinfo = pcbinfo;
inp->inp_socket = so;
inp->inp_cred = crhold(so->so_cred);
inp->inp_inc.inc_fibnum = so->so_fibnum;
#ifdef MAC
error = mac_inpcb_init(inp, M_NOWAIT);
if (error != 0)
goto out;
mac_inpcb_create(so, inp);
#endif
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
error = ipsec_init_pcbpolicy(inp);
if (error != 0) {
#ifdef MAC
mac_inpcb_destroy(inp);
#endif
goto out;
}
#endif
#ifdef INET6
if (INP_SOCKAF(so) == AF_INET6) {
inp->inp_vflag |= INP_IPV6PROTO | INP_IPV6;
if (V_ip6_v6only)
inp->inp_flags |= IN6P_IPV6_V6ONLY;
#ifdef INET
else
inp->inp_vflag |= INP_IPV4;
#endif
if (V_ip6_auto_flowlabel)
inp->inp_flags |= IN6P_AUTOFLOWLABEL;
inp->in6p_hops = -1;
}
#endif
#if defined(INET) && defined(INET6)
else
#endif
#ifdef INET
inp->inp_vflag |= INP_IPV4;
#endif
inp->inp_smr = SMR_SEQ_INVALID;
inp->inp_route.ro_flags = RT_LLE_CACHE;
refcount_init(&inp->inp_refcount, 1);
INP_WLOCK(inp);
INP_INFO_WLOCK(pcbinfo);
pcbinfo->ipi_count++;
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
CK_LIST_INSERT_HEAD(&pcbinfo->ipi_listhead, inp, inp_list);
INP_INFO_WUNLOCK(pcbinfo);
so->so_pcb = inp;
return (0);
#if defined(IPSEC) || defined(IPSEC_SUPPORT) || defined(MAC)
out:
crfree(inp->inp_cred);
#ifdef INVARIANTS
inp->inp_cred = NULL;
#endif
uma_zfree_smr(pcbinfo->ipi_zone, inp);
return (error);
#endif
}
#ifdef INET
int
in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, int flags,
struct ucred *cred)
{
int anonport, error;
KASSERT(sin == NULL || sin->sin_family == AF_INET,
("%s: invalid address family for %p", __func__, sin));
KASSERT(sin == NULL || sin->sin_len == sizeof(struct sockaddr_in),
("%s: invalid address length for %p", __func__, sin));
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
return (EINVAL);
anonport = sin == NULL || sin->sin_port == 0;
error = in_pcbbind_setup(inp, sin, &inp->inp_laddr.s_addr,
&inp->inp_lport, flags, cred);
if (error)
return (error);
if (__predict_false((error = in_pcbinshash(inp)) != 0)) {
MPASS(inp->inp_socket->so_options & SO_REUSEPORT_LB);
inp->inp_laddr.s_addr = INADDR_ANY;
inp->inp_lport = 0;
inp->inp_flags &= ~INP_BOUNDFIB;
return (error);
}
if (anonport)
inp->inp_flags |= INP_ANONPORT;
return (0);
}
#endif
#if defined(INET) || defined(INET6)
int
in_pcb_lport_dest(const struct inpcb *inp, struct sockaddr *lsa,
u_short *lportp, struct sockaddr *fsa, u_short fport, struct ucred *cred,
int lookupflags)
{
struct inpcbinfo *pcbinfo;
struct inpcb *tmpinp;
unsigned short *lastport;
int count, error;
u_short aux, first, last, lport;
#ifdef INET
struct in_addr laddr, faddr;
#endif
#ifdef INET6
struct in6_addr *laddr6, *faddr6;
#endif
pcbinfo = inp->inp_pcbinfo;
INP_LOCK_ASSERT(inp);
INP_HASH_LOCK_ASSERT(pcbinfo);
if (inp->inp_flags & INP_HIGHPORT) {
first = V_ipport_hifirstauto;
last = V_ipport_hilastauto;
lastport = &pcbinfo->ipi_lasthi;
} else if (inp->inp_flags & INP_LOWPORT) {
error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT);
if (error)
return (error);
first = V_ipport_lowfirstauto;
last = V_ipport_lowlastauto;
lastport = &pcbinfo->ipi_lastlow;
} else {
first = V_ipport_firstauto;
last = V_ipport_lastauto;
lastport = &pcbinfo->ipi_lastport;
}
if (first > last) {
aux = first;
first = last;
last = aux;
}
#ifdef INET
laddr.s_addr = INADDR_ANY;
if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) {
if (lsa != NULL)
laddr = ((struct sockaddr_in *)lsa)->sin_addr;
if (fsa != NULL)
faddr = ((struct sockaddr_in *)fsa)->sin_addr;
}
#endif
#ifdef INET6
laddr6 = NULL;
if ((inp->inp_vflag & INP_IPV6) != 0) {
if (lsa != NULL)
laddr6 = &((struct sockaddr_in6 *)lsa)->sin6_addr;
if (fsa != NULL)
faddr6 = &((struct sockaddr_in6 *)fsa)->sin6_addr;
}
#endif
tmpinp = NULL;
if (V_ipport_randomized)
*lastport = first + (arc4random() % (last - first));
count = last - first;
do {
if (count-- < 0)
return (EADDRNOTAVAIL);
++*lastport;
if (*lastport < first || *lastport > last)
*lastport = first;
lport = htons(*lastport);
if (fsa != NULL) {
#ifdef INET
if (lsa->sa_family == AF_INET) {
tmpinp = in_pcblookup_hash_locked(pcbinfo,
faddr, fport, laddr, lport, lookupflags,
M_NODOM, RT_ALL_FIBS);
}
#endif
#ifdef INET6
if (lsa->sa_family == AF_INET6) {
tmpinp = in6_pcblookup_hash_locked(pcbinfo,
faddr6, fport, laddr6, lport, lookupflags,
M_NODOM, RT_ALL_FIBS);
}
#endif
} else {
#ifdef INET6
if ((inp->inp_vflag & INP_IPV6) != 0) {
tmpinp = in6_pcblookup_local(pcbinfo,
&inp->in6p_laddr, lport, RT_ALL_FIBS,
lookupflags, cred);
#ifdef INET
if (tmpinp == NULL &&
(inp->inp_vflag & INP_IPV4))
tmpinp = in_pcblookup_local(pcbinfo,
laddr, lport, RT_ALL_FIBS,
lookupflags, cred);
#endif
}
#endif
#if defined(INET) && defined(INET6)
else
#endif
#ifdef INET
tmpinp = in_pcblookup_local(pcbinfo, laddr,
lport, RT_ALL_FIBS, lookupflags, cred);
#endif
}
} while (tmpinp != NULL);
*lportp = lport;
return (0);
}
int
in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
struct ucred *cred, int lookupflags)
{
struct sockaddr_in laddr;
if (laddrp) {
bzero(&laddr, sizeof(laddr));
laddr.sin_family = AF_INET;
laddr.sin_addr = *laddrp;
}
return (in_pcb_lport_dest(inp, laddrp ? (struct sockaddr *) &laddr :
NULL, lportp, NULL, 0, cred, lookupflags));
}
#endif
#ifdef INET
static int
in_pcbbind_avail(struct inpcb *inp, const struct in_addr laddr,
const u_short lport, const int fib, int sooptions, int lookupflags,
struct ucred *cred)
{
int reuseport, reuseport_lb;
INP_LOCK_ASSERT(inp);
INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo);
reuseport = (sooptions & SO_REUSEPORT);
reuseport_lb = (sooptions & SO_REUSEPORT_LB);
if (IN_MULTICAST(ntohl(laddr.s_addr))) {
if ((sooptions & (SO_REUSEADDR | SO_REUSEPORT)) != 0)
reuseport = SO_REUSEADDR | SO_REUSEPORT;
if ((sooptions & (SO_REUSEADDR | SO_REUSEPORT_LB)) != 0)
reuseport_lb = SO_REUSEADDR | SO_REUSEPORT_LB;
} else if (!in_nullhost(laddr)) {
struct sockaddr_in sin;
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_len = sizeof(sin);
sin.sin_addr = laddr;
if ((inp->inp_flags & INP_BINDANY) == 0 &&
ifa_ifwithaddr_check((const struct sockaddr *)&sin) == 0)
return (EADDRNOTAVAIL);
}
if (lport != 0) {
struct inpcb *t;
if (ntohs(lport) <= V_ipport_reservedhigh &&
ntohs(lport) >= V_ipport_reservedlow &&
priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT))
return (EACCES);
if (!IN_MULTICAST(ntohl(laddr.s_addr)) &&
priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT) != 0) {
t = in_pcblookup_local(inp->inp_pcbinfo, laddr, lport,
RT_ALL_FIBS, INPLOOKUP_WILDCARD, cred);
if (t != NULL &&
(inp->inp_socket->so_type != SOCK_STREAM ||
in_nullhost(t->inp_faddr)) &&
(inp->inp_cred->cr_uid != t->inp_cred->cr_uid))
return (EADDRINUSE);
}
t = in_pcblookup_local(inp->inp_pcbinfo, laddr, lport, fib,
lookupflags, cred);
if (t != NULL && ((reuseport | reuseport_lb) &
t->inp_socket->so_options) == 0) {
#ifdef INET6
if (!in_nullhost(laddr) ||
!in_nullhost(t->inp_laddr) ||
(inp->inp_vflag & INP_IPV6PROTO) == 0 ||
(t->inp_vflag & INP_IPV6PROTO) == 0)
#endif
return (EADDRINUSE);
}
}
return (0);
}
int
in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t *laddrp,
u_short *lportp, int flags, struct ucred *cred)
{
struct socket *so = inp->inp_socket;
struct in_addr laddr;
u_short lport = 0;
int error, fib, lookupflags, sooptions;
INP_LOCK_ASSERT(inp);
INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo);
laddr.s_addr = *laddrp;
if (sin != NULL && laddr.s_addr != INADDR_ANY)
return (EINVAL);
lookupflags = 0;
sooptions = atomic_load_int(&so->so_options);
if ((sooptions & (SO_REUSEADDR | SO_REUSEPORT | SO_REUSEPORT_LB)) == 0)
lookupflags = INPLOOKUP_WILDCARD;
if (sin == NULL) {
if ((error = prison_local_ip4(cred, &laddr)) != 0)
return (error);
} else {
KASSERT(sin->sin_family == AF_INET,
("%s: invalid family for address %p", __func__, sin));
KASSERT(sin->sin_len == sizeof(*sin),
("%s: invalid length for address %p", __func__, sin));
error = prison_local_ip4(cred, &sin->sin_addr);
if (error)
return (error);
if (sin->sin_port != *lportp) {
if (*lportp != 0)
return (EINVAL);
lport = sin->sin_port;
}
laddr = sin->sin_addr;
fib = (flags & INPBIND_FIB) != 0 ? inp->inp_inc.inc_fibnum :
RT_ALL_FIBS;
error = in_pcbbind_avail(inp, laddr, lport, fib, sooptions,
lookupflags, cred);
if (error != 0)
return (error);
}
if (*lportp != 0)
lport = *lportp;
if (lport == 0) {
error = in_pcb_lport(inp, &laddr, &lport, cred, lookupflags);
if (error != 0)
return (error);
}
*laddrp = laddr.s_addr;
*lportp = lport;
if ((flags & INPBIND_FIB) != 0)
inp->inp_flags |= INP_BOUNDFIB;
return (0);
}
int
in_pcbconnect(struct inpcb *inp, struct sockaddr_in *sin, struct ucred *cred)
{
struct in_addr laddr, faddr;
u_short lport;
int error;
bool anonport;
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
KASSERT(in_nullhost(inp->inp_faddr),
("%s: inp is already connected", __func__));
KASSERT(sin->sin_family == AF_INET,
("%s: invalid address family for %p", __func__, sin));
KASSERT(sin->sin_len == sizeof(*sin),
("%s: invalid address length for %p", __func__, sin));
if (sin->sin_port == 0)
return (EADDRNOTAVAIL);
anonport = (inp->inp_lport == 0);
if (__predict_false(in_broadcast(sin->sin_addr))) {
if (!V_connect_inaddr_wild || CK_STAILQ_EMPTY(&V_in_ifaddrhead))
return (ENETUNREACH);
if (in_nullhost(sin->sin_addr)) {
faddr =
IA_SIN(CK_STAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
if ((error = prison_get_ip4(cred, &faddr)) != 0)
return (error);
} else if (sin->sin_addr.s_addr == INADDR_BROADCAST &&
CK_STAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags
& IFF_BROADCAST) {
faddr = satosin(&CK_STAILQ_FIRST(
&V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
} else
faddr = sin->sin_addr;
} else
faddr = sin->sin_addr;
if (in_nullhost(inp->inp_laddr)) {
error = in_pcbladdr(inp, &faddr, &laddr, cred);
if (error)
return (error);
} else
laddr = inp->inp_laddr;
if (anonport) {
struct sockaddr_in lsin = {
.sin_family = AF_INET,
.sin_addr = laddr,
};
struct sockaddr_in fsin = {
.sin_family = AF_INET,
.sin_addr = faddr,
};
error = in_pcb_lport_dest(inp, (struct sockaddr *)&lsin,
&lport, (struct sockaddr *)&fsin, sin->sin_port, cred,
INPLOOKUP_WILDCARD);
if (error)
return (error);
} else if (in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr,
sin->sin_port, laddr, inp->inp_lport, 0, M_NODOM, RT_ALL_FIBS) !=
NULL)
return (EADDRINUSE);
else
lport = inp->inp_lport;
MPASS(!in_nullhost(inp->inp_laddr) || inp->inp_lport != 0 ||
!(inp->inp_flags & INP_INHASHLIST));
inp->inp_faddr = faddr;
inp->inp_fport = sin->sin_port;
inp->inp_laddr = laddr;
inp->inp_lport = lport;
if ((inp->inp_flags & INP_INHASHLIST) == 0) {
error = in_pcbinshash(inp);
MPASS(error == 0);
} else
in_pcbrehash(inp);
#ifdef ROUTE_MPATH
if (CALC_FLOWID_OUTBOUND) {
uint32_t hash_val, hash_type;
hash_val = fib4_calc_software_hash(inp->inp_laddr,
inp->inp_faddr, 0, sin->sin_port,
inp->inp_socket->so_proto->pr_protocol, &hash_type);
inp->inp_flowid = hash_val;
inp->inp_flowtype = hash_type;
}
#endif
if (anonport)
inp->inp_flags |= INP_ANONPORT;
return (0);
}
int
in_pcbladdr(const struct inpcb *inp, struct in_addr *faddr,
struct in_addr *laddr, struct ucred *cred)
{
struct ifaddr *ifa;
struct sockaddr *sa;
struct sockaddr_in *sin, dst;
struct nhop_object *nh;
int error;
NET_EPOCH_ASSERT();
KASSERT(laddr != NULL, ("%s: laddr NULL", __func__));
if (!prison_saddrsel_ip4(cred, laddr))
return (0);
if (IN_MULTICAST(ntohl(faddr->s_addr)) && inp->inp_moptions != NULL &&
inp->inp_moptions->imo_multicast_ifp != NULL) {
struct ifnet *ifp = inp->inp_moptions->imo_multicast_ifp;
struct in_ifaddr *ia;
CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
if (ia->ia_ifp == ifp &&
prison_check_ip4(cred, &ia->ia_addr.sin_addr) == 0)
break;
}
if (ia == NULL)
return (EADDRNOTAVAIL);
*laddr = ia->ia_addr.sin_addr;
return (0);
}
error = 0;
nh = NULL;
bzero(&dst, sizeof(dst));
sin = &dst;
sin->sin_family = AF_INET;
sin->sin_len = sizeof(struct sockaddr_in);
sin->sin_addr.s_addr = faddr->s_addr;
if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
nh = fib4_lookup(inp->inp_inc.inc_fibnum, *faddr,
0, NHR_NONE, 0);
if (nh == NULL || nh->nh_ifp == NULL) {
struct in_ifaddr *ia;
struct ifnet *ifp;
ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin,
inp->inp_socket->so_fibnum));
if (ia == NULL) {
ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0,
inp->inp_socket->so_fibnum));
}
if (ia == NULL) {
error = ENETUNREACH;
goto done;
}
if (!prison_flag(cred, PR_IP4)) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
goto done;
}
ifp = ia->ia_ifp;
ia = NULL;
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
sa = ifa->ifa_addr;
if (sa->sa_family != AF_INET)
continue;
sin = (struct sockaddr_in *)sa;
if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
ia = (struct in_ifaddr *)ifa;
break;
}
}
if (ia != NULL) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
goto done;
}
error = prison_get_ip4(cred, laddr);
goto done;
}
if ((nh->nh_ifp->if_flags & IFF_LOOPBACK) == 0) {
struct in_ifaddr *ia;
struct ifnet *ifp;
if (!prison_flag(cred, PR_IP4)) {
ia = (struct in_ifaddr *)nh->nh_ifa;
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
goto done;
}
sin = (struct sockaddr_in *)nh->nh_ifa->ifa_addr;
if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
ia = (struct in_ifaddr *)nh->nh_ifa;
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
goto done;
}
ia = NULL;
ifp = nh->nh_ifp;
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
sa = ifa->ifa_addr;
if (sa->sa_family != AF_INET)
continue;
sin = (struct sockaddr_in *)sa;
if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
ia = (struct in_ifaddr *)ifa;
break;
}
}
if (ia != NULL) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
goto done;
}
error = prison_get_ip4(cred, laddr);
goto done;
}
if ((nh->nh_ifp->if_flags & IFF_LOOPBACK) != 0) {
struct in_ifaddr *ia;
ia = ifatoia(ifa_ifwithdstaddr(sintosa(&dst),
inp->inp_socket->so_fibnum));
if (ia == NULL)
ia = ifatoia(ifa_ifwithnet(sintosa(&dst), 0,
inp->inp_socket->so_fibnum));
if (ia == NULL)
ia = ifatoia(ifa_ifwithaddr(sintosa(&dst)));
if (!prison_flag(cred, PR_IP4)) {
if (ia == NULL) {
error = ENETUNREACH;
goto done;
}
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
goto done;
}
if (ia != NULL) {
struct ifnet *ifp;
ifp = ia->ia_ifp;
ia = NULL;
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
sa = ifa->ifa_addr;
if (sa->sa_family != AF_INET)
continue;
sin = (struct sockaddr_in *)sa;
if (prison_check_ip4(cred,
&sin->sin_addr) == 0) {
ia = (struct in_ifaddr *)ifa;
break;
}
}
if (ia != NULL) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
goto done;
}
}
error = prison_get_ip4(cred, laddr);
goto done;
}
done:
if (error == 0 && laddr->s_addr == INADDR_ANY)
return (EHOSTUNREACH);
return (error);
}
void
in_pcbdisconnect(struct inpcb *inp)
{
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
KASSERT(inp->inp_smr == SMR_SEQ_INVALID,
("%s: inp %p was already disconnected", __func__, inp));
in_pcbremhash_locked(inp);
inp->inp_smr = smr_advance(inp->inp_pcbinfo->ipi_smr);
inp->inp_laddr.s_addr = INADDR_ANY;
inp->inp_faddr.s_addr = INADDR_ANY;
inp->inp_fport = 0;
}
#endif
void
in_pcblisten(struct inpcb *inp)
{
struct inpcblbgroup *grp;
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_INLBGROUP) != 0) {
struct inpcbinfo *pcbinfo;
pcbinfo = inp->inp_pcbinfo;
INP_HASH_WLOCK(pcbinfo);
grp = in_pcblbgroup_find(inp);
LIST_REMOVE(inp, inp_lbgroup_list);
grp->il_pendcnt--;
in_pcblbgroup_insert(grp, inp);
INP_HASH_WUNLOCK(pcbinfo);
}
}
void
inp_lock(struct inpcb *inp, const inp_lookup_t lock)
{
lock == INPLOOKUP_RLOCKPCB ?
rw_rlock(&inp->inp_lock) : rw_wlock(&inp->inp_lock);
}
void
inp_unlock(struct inpcb *inp, const inp_lookup_t lock)
{
lock == INPLOOKUP_RLOCKPCB ?
rw_runlock(&inp->inp_lock) : rw_wunlock(&inp->inp_lock);
}
int
inp_trylock(struct inpcb *inp, const inp_lookup_t lock)
{
return (lock == INPLOOKUP_RLOCKPCB ?
rw_try_rlock(&inp->inp_lock) : rw_try_wlock(&inp->inp_lock));
}
static inline bool
_inp_smr_lock(struct inpcb *inp, const inp_lookup_t lock, const int ignflags)
{
MPASS(lock == INPLOOKUP_RLOCKPCB || lock == INPLOOKUP_WLOCKPCB);
SMR_ASSERT_ENTERED(inp->inp_pcbinfo->ipi_smr);
if (__predict_true(inp_trylock(inp, lock))) {
if (__predict_false(inp->inp_flags & ignflags)) {
smr_exit(inp->inp_pcbinfo->ipi_smr);
inp_unlock(inp, lock);
return (false);
}
smr_exit(inp->inp_pcbinfo->ipi_smr);
return (true);
}
if (__predict_true(refcount_acquire_if_not_zero(&inp->inp_refcount))) {
smr_exit(inp->inp_pcbinfo->ipi_smr);
inp_lock(inp, lock);
if (__predict_false(in_pcbrele(inp, lock)))
return (false);
if (__predict_false(inp->inp_flags & ignflags)) {
inp_unlock(inp, lock);
return (false);
}
return (true);
} else {
smr_exit(inp->inp_pcbinfo->ipi_smr);
return (false);
}
}
bool
inp_smr_lock(struct inpcb *inp, const inp_lookup_t lock)
{
return (_inp_smr_lock(inp, lock, INP_FREED | INP_DROPPED));
}
#define II_LIST_FIRST(ipi, hash) \
(((hash) == INP_ALL_LIST) ? \
CK_LIST_FIRST(&(ipi)->ipi_listhead) : \
CK_LIST_FIRST(&(ipi)->ipi_hash_exact[(hash)]))
#define II_LIST_NEXT(inp, hash) \
(((hash) == INP_ALL_LIST) ? \
CK_LIST_NEXT((inp), inp_list) : \
CK_LIST_NEXT((inp), inp_hash_exact))
#define II_LOCK_ASSERT(inp, lock) \
rw_assert(&(inp)->inp_lock, \
(lock) == INPLOOKUP_RLOCKPCB ? RA_RLOCKED : RA_WLOCKED )
struct inpcb *
inp_next(struct inpcb_iterator *ii)
{
const struct inpcbinfo *ipi = ii->ipi;
inp_match_t *match = ii->match;
void *ctx = ii->ctx;
inp_lookup_t lock = ii->lock;
int hash = ii->hash;
struct inpcb *inp;
if (ii->inp == NULL) {
smr_enter(ipi->ipi_smr);
for (inp = II_LIST_FIRST(ipi, hash);
inp != NULL;
inp = II_LIST_NEXT(inp, hash)) {
if (match != NULL && (match)(inp, ctx) == false)
continue;
if (__predict_true(_inp_smr_lock(inp, lock, INP_FREED)))
break;
else {
smr_enter(ipi->ipi_smr);
MPASS(inp != II_LIST_FIRST(ipi, hash));
inp = II_LIST_FIRST(ipi, hash);
if (inp == NULL)
break;
}
}
if (inp == NULL)
smr_exit(ipi->ipi_smr);
else
ii->inp = inp;
return (inp);
}
smr_enter(ipi->ipi_smr);
restart:
inp = ii->inp;
II_LOCK_ASSERT(inp, lock);
next:
inp = II_LIST_NEXT(inp, hash);
if (inp == NULL) {
smr_exit(ipi->ipi_smr);
goto found;
}
if (match != NULL && (match)(inp, ctx) == false)
goto next;
if (__predict_true(inp_trylock(inp, lock))) {
if (__predict_false(inp->inp_flags & INP_FREED)) {
inp_unlock(inp, lock);
goto next;
} else {
smr_exit(ipi->ipi_smr);
goto found;
}
}
if (__predict_true(refcount_acquire_if_not_zero(&inp->inp_refcount))) {
smr_exit(ipi->ipi_smr);
inp_lock(inp, lock);
if (__predict_false(in_pcbrele(inp, lock))) {
smr_enter(ipi->ipi_smr);
goto restart;
}
if (__predict_false(inp->inp_flags & INP_FREED)) {
inp_unlock(inp, lock);
smr_enter(ipi->ipi_smr);
goto restart;
}
} else
goto next;
found:
inp_unlock(ii->inp, lock);
ii->inp = inp;
return (ii->inp);
}
void
in_pcbref(struct inpcb *inp)
{
u_int old __diagused;
old = refcount_acquire(&inp->inp_refcount);
KASSERT(old > 0, ("%s: refcount 0", __func__));
}
bool
in_pcbrele_rlocked(struct inpcb *inp)
{
INP_RLOCK_ASSERT(inp);
if (!refcount_release(&inp->inp_refcount))
return (false);
MPASS(inp->inp_flags & INP_FREED);
MPASS(inp->inp_socket == NULL);
crfree(inp->inp_cred);
#ifdef INVARIANTS
inp->inp_cred = NULL;
#endif
INP_RUNLOCK(inp);
uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
return (true);
}
bool
in_pcbrele_wlocked(struct inpcb *inp)
{
INP_WLOCK_ASSERT(inp);
if (!refcount_release(&inp->inp_refcount))
return (false);
MPASS(inp->inp_flags & INP_FREED);
MPASS(inp->inp_socket == NULL);
crfree(inp->inp_cred);
#ifdef INVARIANTS
inp->inp_cred = NULL;
#endif
INP_WUNLOCK(inp);
uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
return (true);
}
bool
in_pcbrele(struct inpcb *inp, const inp_lookup_t lock)
{
return (lock == INPLOOKUP_RLOCKPCB ?
in_pcbrele_rlocked(inp) : in_pcbrele_wlocked(inp));
}
bool
in_pcbrele_rlock(struct inpcb *inp)
{
INP_RLOCK(inp);
if (in_pcbrele_rlocked(inp))
return (true);
if ((inp->inp_flags & INP_FREED) != 0) {
INP_RUNLOCK(inp);
return (true);
}
return (false);
}
void
in_pcbfree(struct inpcb *inp)
{
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
#ifdef INET
struct ip_moptions *imo;
#endif
#ifdef INET6
struct ip6_moptions *im6o;
#endif
INP_WLOCK_ASSERT(inp);
KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
KASSERT((inp->inp_flags & INP_FREED) == 0,
("%s: called twice for pcb %p", __func__, inp));
if (inp->inp_flags & INP_INHASHLIST)
in_pcbremhash(inp);
INP_INFO_WLOCK(pcbinfo);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
pcbinfo->ipi_count--;
CK_LIST_REMOVE(inp, inp_list);
INP_INFO_WUNLOCK(pcbinfo);
#ifdef RATELIMIT
if (inp->inp_snd_tag != NULL)
in_pcbdetach_txrtlmt(inp);
#endif
inp->inp_flags |= INP_FREED;
inp->inp_socket->so_pcb = NULL;
inp->inp_socket = NULL;
RO_INVALIDATE_CACHE(&inp->inp_route);
#ifdef MAC
mac_inpcb_destroy(inp);
#endif
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
if (inp->inp_sp != NULL)
ipsec_delete_pcbpolicy(inp);
#endif
#ifdef INET
if (inp->inp_options)
(void)m_free(inp->inp_options);
DEBUG_POISON_POINTER(inp->inp_options);
imo = inp->inp_moptions;
DEBUG_POISON_POINTER(inp->inp_moptions);
#endif
#ifdef INET6
if (inp->inp_vflag & INP_IPV6PROTO) {
ip6_freepcbopts(inp->in6p_outputopts);
DEBUG_POISON_POINTER(inp->in6p_outputopts);
im6o = inp->in6p_moptions;
DEBUG_POISON_POINTER(inp->in6p_moptions);
} else
im6o = NULL;
#endif
if (__predict_false(in_pcbrele_wlocked(inp) == false)) {
INP_WUNLOCK(inp);
}
#ifdef INET6
ip6_freemoptions(im6o);
#endif
#ifdef INET
inp_freemoptions(imo);
#endif
}
static void
inpcb_fini(void *mem, int size)
{
struct inpcb *inp = mem;
INP_LOCK_DESTROY(inp);
}
void
in_pcbdrop(struct inpcb *inp)
{
INP_WLOCK_ASSERT(inp);
inp->inp_flags |= INP_DROPPED;
if (inp->inp_flags & INP_INHASHLIST)
in_pcbremhash(inp);
}
#ifdef INET
int
in_getsockaddr(struct socket *so, struct sockaddr *sa)
{
struct inpcb *inp;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
*(struct sockaddr_in *)sa = (struct sockaddr_in ){
.sin_len = sizeof(struct sockaddr_in),
.sin_family = AF_INET,
.sin_port = inp->inp_lport,
.sin_addr = inp->inp_laddr,
};
return (0);
}
int
in_getpeeraddr(struct socket *so, struct sockaddr *sa)
{
struct inpcb *inp;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
*(struct sockaddr_in *)sa = (struct sockaddr_in ){
.sin_len = sizeof(struct sockaddr_in),
.sin_family = AF_INET,
.sin_port = inp->inp_fport,
.sin_addr = inp->inp_faddr,
};
return (0);
}
static bool
inp_v4_multi_match(const struct inpcb *inp, void *v __unused)
{
if ((inp->inp_vflag & INP_IPV4) && inp->inp_moptions != NULL)
return (true);
else
return (false);
}
void
in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
{
struct inpcb_iterator inpi = INP_ITERATOR(pcbinfo, INPLOOKUP_WLOCKPCB,
inp_v4_multi_match, NULL);
struct inpcb *inp;
struct in_multi *inm;
struct in_mfilter *imf;
struct ip_moptions *imo;
IN_MULTI_LOCK_ASSERT();
while ((inp = inp_next(&inpi)) != NULL) {
INP_WLOCK_ASSERT(inp);
imo = inp->inp_moptions;
if (imo->imo_multicast_ifp == ifp)
imo->imo_multicast_ifp = NULL;
restart:
IP_MFILTER_FOREACH(imf, &imo->imo_head) {
if ((inm = imf->imf_inm) == NULL)
continue;
if (inm->inm_ifp != ifp)
continue;
ip_mfilter_remove(&imo->imo_head, imf);
in_leavegroup_locked(inm, NULL);
ip_mfilter_free(imf);
goto restart;
}
}
}
#define INP_LOOKUP_MAPPED_PCB_COST 3
struct inpcb *
in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
u_short lport, int fib, int lookupflags, struct ucred *cred)
{
struct inpcb *inp;
#ifdef INET6
int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
#else
int matchwild = 3;
#endif
int wildcard;
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT(fib == RT_ALL_FIBS || (fib >= 0 && fib < V_rt_numfibs),
("%s: invalid fib %d", __func__, fib));
INP_HASH_LOCK_ASSERT(pcbinfo);
if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
struct inpcbhead *head;
head = &pcbinfo->ipi_hash_wild[INP_PCBHASH_WILD(lport,
pcbinfo->ipi_hashmask)];
CK_LIST_FOREACH(inp, head, inp_hash_wild) {
#ifdef INET6
if ((inp->inp_vflag & INP_IPV4) == 0)
continue;
#endif
if (inp->inp_faddr.s_addr == INADDR_ANY &&
inp->inp_laddr.s_addr == laddr.s_addr &&
inp->inp_lport == lport && (fib == RT_ALL_FIBS ||
inp->inp_inc.inc_fibnum == fib)) {
if (prison_equal_ip4(cred->cr_prison,
inp->inp_cred->cr_prison))
return (inp);
}
}
return (NULL);
} else {
struct inpcbhead *porthash;
struct inpcb *match = NULL;
porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
pcbinfo->ipi_porthashmask)];
CK_LIST_FOREACH(inp, porthash, inp_portlist) {
if (inp->inp_lport != lport)
continue;
if (!prison_equal_ip4(inp->inp_cred->cr_prison,
cred->cr_prison))
continue;
if (fib != RT_ALL_FIBS &&
inp->inp_inc.inc_fibnum != fib)
continue;
wildcard = 0;
#ifdef INET6
if ((inp->inp_vflag & INP_IPV4) == 0)
continue;
if ((inp->inp_vflag & INP_IPV6) != 0)
wildcard += INP_LOOKUP_MAPPED_PCB_COST;
#endif
if (inp->inp_faddr.s_addr != INADDR_ANY)
wildcard++;
if (inp->inp_laddr.s_addr != INADDR_ANY) {
if (laddr.s_addr == INADDR_ANY)
wildcard++;
else if (inp->inp_laddr.s_addr != laddr.s_addr)
continue;
} else {
if (laddr.s_addr != INADDR_ANY)
wildcard++;
}
if (wildcard < matchwild) {
match = inp;
matchwild = wildcard;
if (matchwild == 0)
break;
}
}
return (match);
}
}
#undef INP_LOOKUP_MAPPED_PCB_COST
static bool
in_pcblookup_lb_match(const struct inpcblbgroup *grp, int domain, int fib)
{
return ((domain == M_NODOM || domain == grp->il_numa_domain) &&
(fib == RT_ALL_FIBS || fib == grp->il_fibnum));
}
static struct inpcb *
in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
const struct in_addr *faddr, uint16_t fport, const struct in_addr *laddr,
uint16_t lport, int domain, int fib)
{
const struct inpcblbgrouphead *hdr;
struct inpcblbgroup *grp;
struct inpcblbgroup *jail_exact, *jail_wild, *local_exact, *local_wild;
struct inpcb *inp;
u_int count;
INP_HASH_LOCK_ASSERT(pcbinfo);
NET_EPOCH_ASSERT();
hdr = &pcbinfo->ipi_lbgrouphashbase[
INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
jail_exact = jail_wild = local_exact = local_wild = NULL;
CK_LIST_FOREACH(grp, hdr, il_list) {
bool injail;
#ifdef INET6
if (!(grp->il_vflag & INP_IPV4))
continue;
#endif
if (grp->il_lport != lport)
continue;
injail = prison_flag(grp->il_cred, PR_IP4) != 0;
if (injail && prison_check_ip4_locked(grp->il_cred->cr_prison,
laddr) != 0)
continue;
if (grp->il_laddr.s_addr == laddr->s_addr) {
if (injail) {
jail_exact = grp;
if (in_pcblookup_lb_match(grp, domain, fib))
goto out;
} else if (local_exact == NULL ||
in_pcblookup_lb_match(grp, domain, fib)) {
local_exact = grp;
}
} else if (grp->il_laddr.s_addr == INADDR_ANY) {
if (injail) {
if (jail_wild == NULL ||
in_pcblookup_lb_match(grp, domain, fib))
jail_wild = grp;
} else if (local_wild == NULL ||
in_pcblookup_lb_match(grp, domain, fib)) {
local_wild = grp;
}
}
}
if (jail_exact != NULL)
grp = jail_exact;
else if (jail_wild != NULL)
grp = jail_wild;
else if (local_exact != NULL)
grp = local_exact;
else
grp = local_wild;
if (grp == NULL)
return (NULL);
out:
count = atomic_load_acq_int(&grp->il_inpcnt);
if (count == 0)
return (NULL);
inp = grp->il_inp[INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) % count];
KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
return (inp);
}
static bool
in_pcblookup_exact_match(const struct inpcb *inp, struct in_addr faddr,
u_short fport, struct in_addr laddr, u_short lport)
{
#ifdef INET6
if ((inp->inp_vflag & INP_IPV4) == 0)
return (false);
#endif
if (inp->inp_faddr.s_addr == faddr.s_addr &&
inp->inp_laddr.s_addr == laddr.s_addr &&
inp->inp_fport == fport &&
inp->inp_lport == lport)
return (true);
return (false);
}
static struct inpcb *
in_pcblookup_hash_exact(struct inpcbinfo *pcbinfo, struct in_addr faddr,
u_short fport, struct in_addr laddr, u_short lport)
{
struct inpcbhead *head;
struct inpcb *inp;
INP_HASH_LOCK_ASSERT(pcbinfo);
head = &pcbinfo->ipi_hash_exact[INP_PCBHASH(&faddr, lport, fport,
pcbinfo->ipi_hashmask)];
CK_LIST_FOREACH(inp, head, inp_hash_exact) {
if (in_pcblookup_exact_match(inp, faddr, fport, laddr, lport))
return (inp);
}
return (NULL);
}
typedef enum {
INPLOOKUP_MATCH_NONE = 0,
INPLOOKUP_MATCH_WILD = 1,
INPLOOKUP_MATCH_LADDR = 2,
} inp_lookup_match_t;
static inp_lookup_match_t
in_pcblookup_wild_match(const struct inpcb *inp, struct in_addr laddr,
u_short lport, int fib)
{
#ifdef INET6
if ((inp->inp_vflag & INP_IPV4) == 0)
return (INPLOOKUP_MATCH_NONE);
#endif
if (inp->inp_faddr.s_addr != INADDR_ANY || inp->inp_lport != lport)
return (INPLOOKUP_MATCH_NONE);
if (fib != RT_ALL_FIBS && inp->inp_inc.inc_fibnum != fib)
return (INPLOOKUP_MATCH_NONE);
if (inp->inp_laddr.s_addr == INADDR_ANY)
return (INPLOOKUP_MATCH_WILD);
if (inp->inp_laddr.s_addr == laddr.s_addr)
return (INPLOOKUP_MATCH_LADDR);
return (INPLOOKUP_MATCH_NONE);
}
#define INP_LOOKUP_AGAIN ((struct inpcb *)(uintptr_t)-1)
static struct inpcb *
in_pcblookup_hash_wild_smr(struct inpcbinfo *pcbinfo, struct in_addr laddr,
u_short lport, int fib, const inp_lookup_t lockflags)
{
struct inpcbhead *head;
struct inpcb *inp;
KASSERT(SMR_ENTERED(pcbinfo->ipi_smr),
("%s: not in SMR read section", __func__));
head = &pcbinfo->ipi_hash_wild[INP_PCBHASH_WILD(lport,
pcbinfo->ipi_hashmask)];
CK_LIST_FOREACH(inp, head, inp_hash_wild) {
inp_lookup_match_t match;
match = in_pcblookup_wild_match(inp, laddr, lport, fib);
if (match == INPLOOKUP_MATCH_NONE)
continue;
if (__predict_true(inp_smr_lock(inp, lockflags))) {
match = in_pcblookup_wild_match(inp, laddr, lport, fib);
if (match != INPLOOKUP_MATCH_NONE &&
prison_check_ip4_locked(inp->inp_cred->cr_prison,
&laddr) == 0)
return (inp);
inp_unlock(inp, lockflags);
}
return (INP_LOOKUP_AGAIN);
}
return (NULL);
}
static struct inpcb *
in_pcblookup_hash_wild_locked(struct inpcbinfo *pcbinfo, struct in_addr laddr,
u_short lport, int fib)
{
struct inpcbhead *head;
struct inpcb *inp, *local_wild, *local_exact, *jail_wild;
#ifdef INET6
struct inpcb *local_wild_mapped;
#endif
INP_HASH_LOCK_ASSERT(pcbinfo);
head = &pcbinfo->ipi_hash_wild[INP_PCBHASH_WILD(lport,
pcbinfo->ipi_hashmask)];
local_wild = local_exact = jail_wild = NULL;
#ifdef INET6
local_wild_mapped = NULL;
#endif
CK_LIST_FOREACH(inp, head, inp_hash_wild) {
inp_lookup_match_t match;
bool injail;
match = in_pcblookup_wild_match(inp, laddr, lport, fib);
if (match == INPLOOKUP_MATCH_NONE)
continue;
injail = prison_flag(inp->inp_cred, PR_IP4) != 0;
if (injail) {
if (prison_check_ip4_locked(inp->inp_cred->cr_prison,
&laddr) != 0)
continue;
} else {
if (local_exact != NULL)
continue;
}
if (match == INPLOOKUP_MATCH_LADDR) {
if (injail)
return (inp);
local_exact = inp;
} else {
#ifdef INET6
if (inp->inp_vflag & INP_IPV6PROTO)
local_wild_mapped = inp;
else
#endif
if (injail)
jail_wild = inp;
else
local_wild = inp;
}
}
if (jail_wild != NULL)
return (jail_wild);
if (local_exact != NULL)
return (local_exact);
if (local_wild != NULL)
return (local_wild);
#ifdef INET6
if (local_wild_mapped != NULL)
return (local_wild_mapped);
#endif
return (NULL);
}
static struct inpcb *
in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags,
uint8_t numa_domain, int fib)
{
struct inpcb *inp;
const u_short fport = fport_arg, lport = lport_arg;
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD | INPLOOKUP_FIB)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT(faddr.s_addr != INADDR_ANY,
("%s: invalid foreign address", __func__));
KASSERT(laddr.s_addr != INADDR_ANY,
("%s: invalid local address", __func__));
INP_HASH_WLOCK_ASSERT(pcbinfo);
inp = in_pcblookup_hash_exact(pcbinfo, faddr, fport, laddr, lport);
if (inp != NULL)
return (inp);
if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
inp = in_pcblookup_lbgroup(pcbinfo, &faddr, fport,
&laddr, lport, numa_domain, fib);
if (inp == NULL) {
inp = in_pcblookup_hash_wild_locked(pcbinfo, laddr,
lport, fib);
}
}
return (inp);
}
static struct inpcb *
in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
uint8_t numa_domain, int fib)
{
struct inpcb *inp;
const inp_lookup_t lockflags = lookupflags & INPLOOKUP_LOCKMASK;
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
INP_HASH_WLOCK(pcbinfo);
inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
lookupflags & ~INPLOOKUP_LOCKMASK, numa_domain, fib);
if (inp != NULL && !inp_trylock(inp, lockflags)) {
in_pcbref(inp);
INP_HASH_WUNLOCK(pcbinfo);
inp_lock(inp, lockflags);
if (in_pcbrele(inp, lockflags))
inp = NULL;
} else {
INP_HASH_WUNLOCK(pcbinfo);
}
return (inp);
}
static struct inpcb *
in_pcblookup_hash_smr(struct inpcbinfo *pcbinfo, struct in_addr faddr,
u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags,
uint8_t numa_domain, int fib)
{
struct inpcb *inp;
const inp_lookup_t lockflags = lookupflags & INPLOOKUP_LOCKMASK;
const u_short fport = fport_arg, lport = lport_arg;
KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
smr_enter(pcbinfo->ipi_smr);
inp = in_pcblookup_hash_exact(pcbinfo, faddr, fport, laddr, lport);
if (inp != NULL) {
if (__predict_true(inp_smr_lock(inp, lockflags))) {
if (__predict_true(in_pcblookup_exact_match(inp,
faddr, fport, laddr, lport)))
return (inp);
inp_unlock(inp, lockflags);
}
return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
lookupflags, numa_domain, fib));
}
if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
inp = in_pcblookup_lbgroup(pcbinfo, &faddr, fport,
&laddr, lport, numa_domain, fib);
if (inp != NULL) {
if (__predict_true(inp_smr_lock(inp, lockflags))) {
if (__predict_true(in_pcblookup_wild_match(inp,
laddr, lport, fib) != INPLOOKUP_MATCH_NONE))
return (inp);
inp_unlock(inp, lockflags);
}
inp = INP_LOOKUP_AGAIN;
} else {
inp = in_pcblookup_hash_wild_smr(pcbinfo, laddr, lport,
fib, lockflags);
}
if (inp == INP_LOOKUP_AGAIN) {
return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr,
lport, lookupflags, numa_domain, fib));
}
}
if (inp == NULL)
smr_exit(pcbinfo->ipi_smr);
return (inp);
}
struct inpcb *
in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
struct in_addr laddr, u_int lport, int lookupflags,
struct ifnet *ifp)
{
int fib;
fib = (lookupflags & INPLOOKUP_FIB) ? if_getfib(ifp) : RT_ALL_FIBS;
return (in_pcblookup_hash_smr(pcbinfo, faddr, fport, laddr, lport,
lookupflags, M_NODOM, fib));
}
struct inpcb *
in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
struct ifnet *ifp __unused, struct mbuf *m)
{
int fib;
M_ASSERTPKTHDR(m);
fib = (lookupflags & INPLOOKUP_FIB) ? M_GETFIB(m) : RT_ALL_FIBS;
return (in_pcblookup_hash_smr(pcbinfo, faddr, fport, laddr, lport,
lookupflags, m->m_pkthdr.numa_domain, fib));
}
#endif
static bool
in_pcbjailed(const struct inpcb *inp, unsigned int flag)
{
return (prison_flag(inp->inp_cred, flag) != 0);
}
static void
_in_pcbinshash_wild(struct inpcbhead *pcbhash, struct inpcb *inp)
{
struct inpcb *last;
bool bound, injail;
INP_LOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
last = NULL;
bound = inp->inp_laddr.s_addr != INADDR_ANY;
if (!bound && (inp->inp_vflag & INP_IPV6PROTO) != 0) {
CK_LIST_FOREACH(last, pcbhash, inp_hash_wild) {
if (CK_LIST_NEXT(last, inp_hash_wild) == NULL) {
CK_LIST_INSERT_AFTER(last, inp, inp_hash_wild);
return;
}
}
CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_wild);
return;
}
injail = in_pcbjailed(inp, PR_IP4);
if (!injail) {
CK_LIST_FOREACH(last, pcbhash, inp_hash_wild) {
if (!in_pcbjailed(last, PR_IP4))
break;
if (CK_LIST_NEXT(last, inp_hash_wild) == NULL) {
CK_LIST_INSERT_AFTER(last, inp, inp_hash_wild);
return;
}
}
} else if (!CK_LIST_EMPTY(pcbhash) &&
!in_pcbjailed(CK_LIST_FIRST(pcbhash), PR_IP4)) {
CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_wild);
return;
}
if (!bound) {
CK_LIST_FOREACH_FROM(last, pcbhash, inp_hash_wild) {
if (last->inp_laddr.s_addr == INADDR_ANY)
break;
if (CK_LIST_NEXT(last, inp_hash_wild) == NULL) {
CK_LIST_INSERT_AFTER(last, inp, inp_hash_wild);
return;
}
}
}
if (last == NULL)
CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_wild);
else
CK_LIST_INSERT_BEFORE(last, inp, inp_hash_wild);
}
#ifdef INET6
static void
_in6_pcbinshash_wild(struct inpcbhead *pcbhash, struct inpcb *inp)
{
struct inpcb *last;
bool bound, injail;
INP_LOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
last = NULL;
bound = !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr);
injail = in_pcbjailed(inp, PR_IP6);
if (!injail) {
CK_LIST_FOREACH(last, pcbhash, inp_hash_wild) {
if (!in_pcbjailed(last, PR_IP6))
break;
if (CK_LIST_NEXT(last, inp_hash_wild) == NULL) {
CK_LIST_INSERT_AFTER(last, inp, inp_hash_wild);
return;
}
}
} else if (!CK_LIST_EMPTY(pcbhash) &&
!in_pcbjailed(CK_LIST_FIRST(pcbhash), PR_IP6)) {
CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_wild);
return;
}
if (!bound) {
CK_LIST_FOREACH_FROM(last, pcbhash, inp_hash_wild) {
if (IN6_IS_ADDR_UNSPECIFIED(&last->in6p_laddr))
break;
if (CK_LIST_NEXT(last, inp_hash_wild) == NULL) {
CK_LIST_INSERT_AFTER(last, inp, inp_hash_wild);
return;
}
}
}
if (last == NULL)
CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_wild);
else
CK_LIST_INSERT_BEFORE(last, inp, inp_hash_wild);
}
#endif
int
in_pcbinshash(struct inpcb *inp)
{
struct inpcbhead *pcbhash, *pcbporthash;
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
uint32_t hash;
bool connected;
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(pcbinfo);
KASSERT((inp->inp_flags & INP_INHASHLIST) == 0,
("in_pcbinshash: INP_INHASHLIST"));
#ifdef INET6
if (inp->inp_vflag & INP_IPV6) {
hash = INP6_PCBHASH(&inp->in6p_faddr, inp->inp_lport,
inp->inp_fport, pcbinfo->ipi_hashmask);
connected = !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr);
} else
#endif
{
hash = INP_PCBHASH(&inp->inp_faddr, inp->inp_lport,
inp->inp_fport, pcbinfo->ipi_hashmask);
connected = !in_nullhost(inp->inp_faddr);
}
if (connected)
pcbhash = &pcbinfo->ipi_hash_exact[hash];
else
pcbhash = &pcbinfo->ipi_hash_wild[hash];
pcbporthash = &pcbinfo->ipi_porthashbase[
INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
if ((inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) {
int error = in_pcbinslbgrouphash(inp, M_NODOM);
if (error != 0)
return (error);
}
if (inp->inp_smr != SMR_SEQ_INVALID) {
smr_wait(pcbinfo->ipi_smr, inp->inp_smr);
inp->inp_smr = SMR_SEQ_INVALID;
}
if (connected)
CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_exact);
else {
#ifdef INET6
if ((inp->inp_vflag & INP_IPV6) != 0)
_in6_pcbinshash_wild(pcbhash, inp);
else
#endif
_in_pcbinshash_wild(pcbhash, inp);
}
CK_LIST_INSERT_HEAD(pcbporthash, inp, inp_portlist);
inp->inp_flags |= INP_INHASHLIST;
return (0);
}
void
in_pcbremhash_locked(struct inpcb *inp)
{
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
MPASS(inp->inp_flags & INP_INHASHLIST);
if ((inp->inp_flags & INP_INLBGROUP) != 0)
in_pcbremlbgrouphash(inp);
#ifdef INET6
if (inp->inp_vflag & INP_IPV6) {
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
CK_LIST_REMOVE(inp, inp_hash_wild);
else
CK_LIST_REMOVE(inp, inp_hash_exact);
} else
#endif
{
if (in_nullhost(inp->inp_faddr))
CK_LIST_REMOVE(inp, inp_hash_wild);
else
CK_LIST_REMOVE(inp, inp_hash_exact);
}
CK_LIST_REMOVE(inp, inp_portlist);
inp->inp_flags &= ~INP_INHASHLIST;
}
static void
in_pcbremhash(struct inpcb *inp)
{
INP_HASH_WLOCK(inp->inp_pcbinfo);
in_pcbremhash_locked(inp);
INP_HASH_WUNLOCK(inp->inp_pcbinfo);
}
void
in_pcbrehash(struct inpcb *inp)
{
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
struct inpcbhead *head;
uint32_t hash;
bool connected;
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(pcbinfo);
KASSERT(inp->inp_flags & INP_INHASHLIST,
("%s: !INP_INHASHLIST", __func__));
KASSERT(inp->inp_smr == SMR_SEQ_INVALID,
("%s: inp was disconnected", __func__));
#ifdef INET6
if (inp->inp_vflag & INP_IPV6) {
hash = INP6_PCBHASH(&inp->in6p_faddr, inp->inp_lport,
inp->inp_fport, pcbinfo->ipi_hashmask);
connected = !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr);
} else
#endif
{
hash = INP_PCBHASH(&inp->inp_faddr, inp->inp_lport,
inp->inp_fport, pcbinfo->ipi_hashmask);
connected = !in_nullhost(inp->inp_faddr);
}
if (connected)
CK_LIST_REMOVE(inp, inp_hash_wild);
else
CK_LIST_REMOVE(inp, inp_hash_exact);
if (connected) {
head = &pcbinfo->ipi_hash_exact[hash];
CK_LIST_INSERT_HEAD(head, inp, inp_hash_exact);
} else {
head = &pcbinfo->ipi_hash_wild[hash];
CK_LIST_INSERT_HEAD(head, inp, inp_hash_wild);
}
}
void
in_losing(struct inpcb *inp)
{
RO_INVALIDATE_CACHE(&inp->inp_route);
return;
}
void
in_pcbsosetlabel(struct socket *so)
{
#ifdef MAC
struct inpcb *inp;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
INP_WLOCK(inp);
SOCK_LOCK(so);
mac_inpcb_sosetlabel(so, inp);
SOCK_UNLOCK(so);
INP_WUNLOCK(inp);
#endif
}
void
inp_wlock(struct inpcb *inp)
{
INP_WLOCK(inp);
}
void
inp_wunlock(struct inpcb *inp)
{
INP_WUNLOCK(inp);
}
void
inp_rlock(struct inpcb *inp)
{
INP_RLOCK(inp);
}
void
inp_runlock(struct inpcb *inp)
{
INP_RUNLOCK(inp);
}
#ifdef INVARIANT_SUPPORT
void
inp_lock_assert(struct inpcb *inp)
{
INP_WLOCK_ASSERT(inp);
}
void
inp_unlock_assert(struct inpcb *inp)
{
INP_UNLOCK_ASSERT(inp);
}
#endif
void
inp_apply_all(struct inpcbinfo *pcbinfo,
void (*func)(struct inpcb *, void *), void *arg)
{
struct inpcb_iterator inpi = INP_ALL_ITERATOR(pcbinfo,
INPLOOKUP_WLOCKPCB);
struct inpcb *inp;
while ((inp = inp_next(&inpi)) != NULL)
func(inp, arg);
}
struct socket *
inp_inpcbtosocket(struct inpcb *inp)
{
INP_WLOCK_ASSERT(inp);
return (inp->inp_socket);
}
void
inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
uint32_t *faddr, uint16_t *fp)
{
INP_LOCK_ASSERT(inp);
*laddr = inp->inp_laddr.s_addr;
*faddr = inp->inp_faddr.s_addr;
*lp = inp->inp_lport;
*fp = inp->inp_fport;
}
void
in_pcbtoxinpcb(const struct inpcb *inp, struct xinpcb *xi)
{
bzero(xi, sizeof(*xi));
xi->xi_len = sizeof(struct xinpcb);
if (inp->inp_socket)
sotoxsocket(inp->inp_socket, &xi->xi_socket);
bcopy(&inp->inp_inc, &xi->inp_inc, sizeof(struct in_conninfo));
xi->inp_gencnt = inp->inp_gencnt;
xi->inp_flow = inp->inp_flow;
xi->inp_flowid = inp->inp_flowid;
xi->inp_flowtype = inp->inp_flowtype;
xi->inp_flags = inp->inp_flags;
xi->inp_flags2 = inp->inp_flags2;
xi->in6p_cksum = inp->in6p_cksum;
xi->in6p_hops = inp->in6p_hops;
xi->inp_ip_tos = inp->inp_ip_tos;
xi->inp_vflag = inp->inp_vflag;
xi->inp_ip_ttl = inp->inp_ip_ttl;
xi->inp_ip_p = inp->inp_ip_p;
xi->inp_ip_minttl = inp->inp_ip_minttl;
}
int
sysctl_setsockopt(SYSCTL_HANDLER_ARGS, struct inpcbinfo *pcbinfo,
int (*ctloutput_set)(struct inpcb *, struct sockopt *))
{
struct sockopt sopt;
struct inpcb_iterator inpi = INP_ALL_ITERATOR(pcbinfo,
INPLOOKUP_WLOCKPCB);
struct inpcb *inp;
struct sockopt_parameters *params;
struct socket *so;
int error;
char buf[1024];
if (req->oldptr != NULL || req->oldlen != 0)
return (EINVAL);
if (req->newptr == NULL)
return (EPERM);
if (req->newlen > sizeof(buf))
return (ENOMEM);
error = SYSCTL_IN(req, buf, req->newlen);
if (error != 0)
return (error);
if (req->newlen < sizeof(struct sockopt_parameters))
return (EINVAL);
params = (struct sockopt_parameters *)buf;
sopt.sopt_level = params->sop_level;
sopt.sopt_name = params->sop_optname;
sopt.sopt_dir = SOPT_SET;
sopt.sopt_val = params->sop_optval;
sopt.sopt_valsize = req->newlen - sizeof(struct sockopt_parameters);
sopt.sopt_td = NULL;
#ifdef INET6
if (params->sop_inc.inc_flags & INC_ISIPV6) {
if (IN6_IS_SCOPE_LINKLOCAL(¶ms->sop_inc.inc6_laddr))
params->sop_inc.inc6_laddr.s6_addr16[1] =
htons(params->sop_inc.inc6_zoneid & 0xffff);
if (IN6_IS_SCOPE_LINKLOCAL(¶ms->sop_inc.inc6_faddr))
params->sop_inc.inc6_faddr.s6_addr16[1] =
htons(params->sop_inc.inc6_zoneid & 0xffff);
}
#endif
if (params->sop_inc.inc_lport != htons(0) &&
params->sop_inc.inc_fport != htons(0)) {
#ifdef INET6
if (params->sop_inc.inc_flags & INC_ISIPV6)
inpi.hash = INP6_PCBHASH(
¶ms->sop_inc.inc6_faddr,
params->sop_inc.inc_lport,
params->sop_inc.inc_fport,
pcbinfo->ipi_hashmask);
else
#endif
inpi.hash = INP_PCBHASH(
¶ms->sop_inc.inc_faddr,
params->sop_inc.inc_lport,
params->sop_inc.inc_fport,
pcbinfo->ipi_hashmask);
}
while ((inp = inp_next(&inpi)) != NULL)
if (inp->inp_gencnt == params->sop_id) {
if (inp->inp_flags & INP_DROPPED) {
INP_WUNLOCK(inp);
return (ECONNRESET);
}
so = inp->inp_socket;
KASSERT(so != NULL, ("inp_socket == NULL"));
soref(so);
if (params->sop_level == SOL_SOCKET) {
INP_WUNLOCK(inp);
error = sosetopt(so, &sopt);
} else
error = (*ctloutput_set)(inp, &sopt);
sorele(so);
break;
}
if (inp == NULL)
error = ESRCH;
return (error);
}
#ifdef DDB
static void
db_print_indent(int indent)
{
int i;
for (i = 0; i < indent; i++)
db_printf(" ");
}
static void
db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
{
char faddr_str[48], laddr_str[48];
db_print_indent(indent);
db_printf("%s at %p\n", name, inc);
indent += 2;
#ifdef INET6
if (inc->inc_flags & INC_ISIPV6) {
ip6_sprintf(laddr_str, &inc->inc6_laddr);
ip6_sprintf(faddr_str, &inc->inc6_faddr);
} else
#endif
{
inet_ntoa_r(inc->inc_laddr, laddr_str);
inet_ntoa_r(inc->inc_faddr, faddr_str);
}
db_print_indent(indent);
db_printf("inc_laddr %s inc_lport %u\n", laddr_str,
ntohs(inc->inc_lport));
db_print_indent(indent);
db_printf("inc_faddr %s inc_fport %u\n", faddr_str,
ntohs(inc->inc_fport));
}
static void
db_print_inpflags(int inp_flags)
{
int comma;
comma = 0;
if (inp_flags & INP_RECVOPTS) {
db_printf("%sINP_RECVOPTS", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_RECVRETOPTS) {
db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_RECVDSTADDR) {
db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_ORIGDSTADDR) {
db_printf("%sINP_ORIGDSTADDR", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_HDRINCL) {
db_printf("%sINP_HDRINCL", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_HIGHPORT) {
db_printf("%sINP_HIGHPORT", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_LOWPORT) {
db_printf("%sINP_LOWPORT", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_ANONPORT) {
db_printf("%sINP_ANONPORT", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_RECVIF) {
db_printf("%sINP_RECVIF", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_MTUDISC) {
db_printf("%sINP_MTUDISC", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_RECVTTL) {
db_printf("%sINP_RECVTTL", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_DONTFRAG) {
db_printf("%sINP_DONTFRAG", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_RECVTOS) {
db_printf("%sINP_RECVTOS", comma ? ", " : "");
comma = 1;
}
if (inp_flags & IN6P_IPV6_V6ONLY) {
db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
comma = 1;
}
if (inp_flags & IN6P_PKTINFO) {
db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
comma = 1;
}
if (inp_flags & IN6P_HOPLIMIT) {
db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
comma = 1;
}
if (inp_flags & IN6P_HOPOPTS) {
db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
comma = 1;
}
if (inp_flags & IN6P_DSTOPTS) {
db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
comma = 1;
}
if (inp_flags & IN6P_RTHDR) {
db_printf("%sIN6P_RTHDR", comma ? ", " : "");
comma = 1;
}
if (inp_flags & IN6P_RTHDRDSTOPTS) {
db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
comma = 1;
}
if (inp_flags & IN6P_TCLASS) {
db_printf("%sIN6P_TCLASS", comma ? ", " : "");
comma = 1;
}
if (inp_flags & IN6P_AUTOFLOWLABEL) {
db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_ONESBCAST) {
db_printf("%sINP_ONESBCAST", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_DROPPED) {
db_printf("%sINP_DROPPED", comma ? ", " : "");
comma = 1;
}
if (inp_flags & INP_SOCKREF) {
db_printf("%sINP_SOCKREF", comma ? ", " : "");
comma = 1;
}
if (inp_flags & IN6P_RFC2292) {
db_printf("%sIN6P_RFC2292", comma ? ", " : "");
comma = 1;
}
if (inp_flags & IN6P_MTU) {
db_printf("IN6P_MTU%s", comma ? ", " : "");
comma = 1;
}
}
static void
db_print_inpvflag(u_char inp_vflag)
{
int comma;
comma = 0;
if (inp_vflag & INP_IPV4) {
db_printf("%sINP_IPV4", comma ? ", " : "");
comma = 1;
}
if (inp_vflag & INP_IPV6) {
db_printf("%sINP_IPV6", comma ? ", " : "");
comma = 1;
}
if (inp_vflag & INP_IPV6PROTO) {
db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
comma = 1;
}
}
static void
db_print_inpcb(struct inpcb *inp, const char *name, int indent)
{
db_print_indent(indent);
db_printf("%s at %p\n", name, inp);
indent += 2;
db_print_indent(indent);
db_printf("inp_flow: 0x%x\n", inp->inp_flow);
db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
db_print_indent(indent);
db_printf("inp_label: %p inp_flags: 0x%x (",
inp->inp_label, inp->inp_flags);
db_print_inpflags(inp->inp_flags);
db_printf(")\n");
db_print_indent(indent);
db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp,
inp->inp_vflag);
db_print_inpvflag(inp->inp_vflag);
db_printf(")\n");
db_print_indent(indent);
db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n",
inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
db_print_indent(indent);
#ifdef INET6
if (inp->inp_vflag & INP_IPV6) {
db_printf("in6p_options: %p in6p_outputopts: %p "
"in6p_moptions: %p\n", inp->in6p_options,
inp->in6p_outputopts, inp->in6p_moptions);
db_printf("in6p_icmp6filt: %p in6p_cksum %d "
"in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
inp->in6p_hops);
} else
#endif
{
db_printf("inp_ip_tos: %d inp_ip_options: %p "
"inp_ip_moptions: %p\n", inp->inp_ip_tos,
inp->inp_options, inp->inp_moptions);
}
db_print_indent(indent);
db_printf("inp_gencnt: %ju\n", (uintmax_t)inp->inp_gencnt);
}
DB_SHOW_COMMAND(inpcb, db_show_inpcb)
{
struct inpcb *inp;
if (!have_addr) {
db_printf("usage: show inpcb <addr>\n");
return;
}
inp = (struct inpcb *)addr;
db_print_inpcb(inp, "inpcb", 0);
}
#endif
#ifdef RATELIMIT
int
in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate)
{
union if_snd_tag_modify_params params = {
.rate_limit.max_rate = max_pacing_rate,
.rate_limit.flags = M_NOWAIT,
};
struct m_snd_tag *mst;
int error;
mst = inp->inp_snd_tag;
if (mst == NULL)
return (EINVAL);
if (mst->sw->snd_tag_modify == NULL) {
error = EOPNOTSUPP;
} else {
error = mst->sw->snd_tag_modify(mst, ¶ms);
}
return (error);
}
int
in_pcbquery_txrtlmt(struct inpcb *inp, uint32_t *p_max_pacing_rate)
{
union if_snd_tag_query_params params = { };
struct m_snd_tag *mst;
int error;
mst = inp->inp_snd_tag;
if (mst == NULL)
return (EINVAL);
if (mst->sw->snd_tag_query == NULL) {
error = EOPNOTSUPP;
} else {
error = mst->sw->snd_tag_query(mst, ¶ms);
if (error == 0 && p_max_pacing_rate != NULL)
*p_max_pacing_rate = params.rate_limit.max_rate;
}
return (error);
}
int
in_pcbquery_txrlevel(struct inpcb *inp, uint32_t *p_txqueue_level)
{
union if_snd_tag_query_params params = { };
struct m_snd_tag *mst;
int error;
mst = inp->inp_snd_tag;
if (mst == NULL)
return (EINVAL);
if (mst->sw->snd_tag_query == NULL)
return (EOPNOTSUPP);
error = mst->sw->snd_tag_query(mst, ¶ms);
if (error == 0 && p_txqueue_level != NULL)
*p_txqueue_level = params.rate_limit.queue_level;
return (error);
}
int
in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate, struct m_snd_tag **st)
{
union if_snd_tag_alloc_params params = {
.rate_limit.hdr.type = (max_pacing_rate == -1U) ?
IF_SND_TAG_TYPE_UNLIMITED : IF_SND_TAG_TYPE_RATE_LIMIT,
.rate_limit.hdr.flowid = flowid,
.rate_limit.hdr.flowtype = flowtype,
.rate_limit.hdr.numa_domain = inp->inp_numa_domain,
.rate_limit.max_rate = max_pacing_rate,
.rate_limit.flags = M_NOWAIT,
};
int error;
INP_WLOCK_ASSERT(inp);
if (*st != NULL || (inp->inp_flags & INP_DROPPED) != 0)
return (EINVAL);
error = m_snd_tag_alloc(ifp, ¶ms, st);
#ifdef INET
if (error == 0) {
counter_u64_add(rate_limit_set_ok, 1);
counter_u64_add(rate_limit_active, 1);
} else if (error != EOPNOTSUPP)
counter_u64_add(rate_limit_alloc_fail, 1);
#endif
return (error);
}
void
in_pcbdetach_tag(struct m_snd_tag *mst)
{
m_snd_tag_rele(mst);
#ifdef INET
counter_u64_add(rate_limit_active, -1);
#endif
}
void
in_pcbdetach_txrtlmt(struct inpcb *inp)
{
struct m_snd_tag *mst;
INP_WLOCK_ASSERT(inp);
mst = inp->inp_snd_tag;
inp->inp_snd_tag = NULL;
if (mst == NULL)
return;
m_snd_tag_rele(mst);
#ifdef INET
counter_u64_add(rate_limit_active, -1);
#endif
}
int
in_pcboutput_txrtlmt_locked(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb, uint32_t max_pacing_rate)
{
int error;
if (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp) {
in_pcbdetach_txrtlmt(inp);
inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
}
if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) {
error = 0;
} else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) {
if (inp->inp_snd_tag != NULL)
in_pcbdetach_txrtlmt(inp);
error = 0;
} else if (inp->inp_snd_tag == NULL) {
if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) {
error = EAGAIN;
} else {
error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb),
mb->m_pkthdr.flowid, max_pacing_rate, &inp->inp_snd_tag);
}
} else {
error = in_pcbmodify_txrtlmt(inp, max_pacing_rate);
}
if (error == 0 || error == EOPNOTSUPP)
inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
return (error);
}
void
in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb)
{
struct socket *socket;
uint32_t max_pacing_rate;
bool did_upgrade;
if (inp == NULL)
return;
socket = inp->inp_socket;
if (socket == NULL)
return;
if (!INP_WLOCKED(inp)) {
if (!INP_TRY_UPGRADE(inp))
return;
did_upgrade = 1;
} else {
did_upgrade = 0;
}
max_pacing_rate = socket->so_max_pacing_rate;
in_pcboutput_txrtlmt_locked(inp, ifp, mb, max_pacing_rate);
if (did_upgrade)
INP_DOWNGRADE(inp);
}
void
in_pcboutput_eagain(struct inpcb *inp)
{
bool did_upgrade;
if (inp == NULL)
return;
if (inp->inp_snd_tag == NULL)
return;
if (!INP_WLOCKED(inp)) {
if (!INP_TRY_UPGRADE(inp))
return;
did_upgrade = 1;
} else {
did_upgrade = 0;
}
in_pcbdetach_txrtlmt(inp);
inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
if (did_upgrade)
INP_DOWNGRADE(inp);
}
#ifdef INET
static void
rl_init(void *st)
{
rate_limit_new = counter_u64_alloc(M_WAITOK);
rate_limit_chg = counter_u64_alloc(M_WAITOK);
rate_limit_active = counter_u64_alloc(M_WAITOK);
rate_limit_alloc_fail = counter_u64_alloc(M_WAITOK);
rate_limit_set_ok = counter_u64_alloc(M_WAITOK);
}
SYSINIT(rl, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, rl_init, NULL);
#endif
#endif