Path: blob/main/sys/netpfil/ipfw/nat64/nat64_translate.c
39536 views
/*-1* SPDX-License-Identifier: BSD-2-Clause2*3* Copyright (c) 2015-2019 Yandex LLC4* Copyright (c) 2015-2019 Andrey V. Elsukov <[email protected]>5*6* Redistribution and use in source and binary forms, with or without7* modification, are permitted provided that the following conditions8* are met:9*10* 1. Redistributions of source code must retain the above copyright11* notice, this list of conditions and the following disclaimer.12* 2. Redistributions in binary form must reproduce the above copyright13* notice, this list of conditions and the following disclaimer in the14* documentation and/or other materials provided with the distribution.15*16* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR17* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES18* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.19* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,20* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT21* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,22* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY23* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT24* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF25* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.26*/2728#include <sys/cdefs.h>29#include "opt_ipstealth.h"3031#include <sys/param.h>32#include <sys/systm.h>33#include <sys/counter.h>34#include <sys/errno.h>35#include <sys/kernel.h>36#include <sys/lock.h>37#include <sys/mbuf.h>38#include <sys/module.h>39#include <sys/rmlock.h>40#include <sys/rwlock.h>41#include <sys/socket.h>42#include <sys/queue.h>4344#include <net/if.h>45#include <net/if_var.h>46#include <net/if_private.h>47#include <net/if_pflog.h>48#include <net/pfil.h>49#include <net/netisr.h>50#include <net/route.h>51#include <net/route/nhop.h>5253#include <netinet/in.h>54#include <netinet/in_fib.h>55#include <netinet/in_var.h>56#include <netinet/ip.h>57#include <netinet/ip_var.h>58#include <netinet/ip_fw.h>59#include <netinet/ip6.h>60#include <netinet/icmp6.h>61#include <netinet/ip_icmp.h>62#include <netinet/tcp.h>63#include <netinet/udp.h>64#include <netinet6/in6_var.h>65#include <netinet6/in6_fib.h>66#include <netinet6/ip6_var.h>67#include <netinet6/ip_fw_nat64.h>6869#include <netpfil/pf/pf.h>70#include <netpfil/ipfw/ip_fw_private.h>71#include <machine/in_cksum.h>7273#include "ip_fw_nat64.h"74#include "nat64_translate.h"7576typedef int (*nat64_output_t)(struct ifnet *, struct mbuf *,77struct sockaddr *, struct nat64_counters *, void *);78typedef int (*nat64_output_one_t)(struct mbuf *, struct nat64_counters *,79void *);8081static struct nhop_object *nat64_find_route4(struct sockaddr_in *,82struct mbuf *);83static struct nhop_object *nat64_find_route6(struct sockaddr_in6 *,84struct mbuf *);85static int nat64_output_one(struct mbuf *, struct nat64_counters *, void *);86static int nat64_output(struct ifnet *, struct mbuf *, struct sockaddr *,87struct nat64_counters *, void *);88static int nat64_direct_output_one(struct mbuf *, struct nat64_counters *,89void *);90static int nat64_direct_output(struct ifnet *, struct mbuf *,91struct sockaddr *, struct nat64_counters *, void *);9293struct nat64_methods {94nat64_output_t output;95nat64_output_one_t output_one;96};97static const struct nat64_methods nat64_netisr = {98.output = nat64_output,99.output_one = nat64_output_one100};101static const struct nat64_methods nat64_direct = {102.output = nat64_direct_output,103.output_one = nat64_direct_output_one104};105106/* These variables should be initialized explicitly on module loading */107VNET_DEFINE_STATIC(const struct nat64_methods *, nat64out);108VNET_DEFINE_STATIC(const int *, nat64ipstealth);109VNET_DEFINE_STATIC(const int *, nat64ip6stealth);110#define V_nat64out VNET(nat64out)111#define V_nat64ipstealth VNET(nat64ipstealth)112#define V_nat64ip6stealth VNET(nat64ip6stealth)113114static const int stealth_on = 1;115#ifndef IPSTEALTH116static const int stealth_off = 0;117#endif118119void120nat64_set_output_method(int direct)121{122123if (direct != 0) {124V_nat64out = &nat64_direct;125#ifdef IPSTEALTH126/* Honor corresponding variables, if IPSTEALTH is defined */127V_nat64ipstealth = &V_ipstealth;128V_nat64ip6stealth = &V_ip6stealth;129#else130/* otherwise we need to decrement HLIM/TTL for direct case */131V_nat64ipstealth = V_nat64ip6stealth = &stealth_off;132#endif133} else {134V_nat64out = &nat64_netisr;135/* Leave TTL/HLIM decrementing to forwarding code */136V_nat64ipstealth = V_nat64ip6stealth = &stealth_on;137}138}139140int141nat64_get_output_method(void)142{143144return (V_nat64out == &nat64_direct ? 1: 0);145}146147static void148nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)149{150151logdata->dir = PF_OUT;152logdata->af = family;153ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);154}155156static int157nat64_direct_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,158struct nat64_counters *stats, void *logdata)159{160int error;161162if (logdata != NULL)163nat64_log(logdata, m, dst->sa_family);164error = (*ifp->if_output)(ifp, m, dst, NULL);165if (error != 0)166NAT64STAT_INC(stats, oerrors);167return (error);168}169170static int171nat64_direct_output_one(struct mbuf *m, struct nat64_counters *stats,172void *logdata)173{174struct nhop_object *nh4 = NULL;175struct nhop_object *nh6 = NULL;176struct sockaddr_in6 dst6;177struct sockaddr_in dst4;178struct sockaddr *dst;179struct ip6_hdr *ip6;180struct ip *ip4;181struct ifnet *ifp;182int error;183184ip4 = mtod(m, struct ip *);185error = 0;186switch (ip4->ip_v) {187case IPVERSION:188dst4.sin_addr = ip4->ip_dst;189nh4 = nat64_find_route4(&dst4, m);190if (nh4 == NULL) {191NAT64STAT_INC(stats, noroute4);192error = EHOSTUNREACH;193} else {194ifp = nh4->nh_ifp;195dst = (struct sockaddr *)&dst4;196}197break;198case (IPV6_VERSION >> 4):199ip6 = mtod(m, struct ip6_hdr *);200dst6.sin6_addr = ip6->ip6_dst;201nh6 = nat64_find_route6(&dst6, m);202if (nh6 == NULL) {203NAT64STAT_INC(stats, noroute6);204error = EHOSTUNREACH;205} else {206ifp = nh6->nh_ifp;207dst = (struct sockaddr *)&dst6;208}209break;210default:211m_freem(m);212NAT64STAT_INC(stats, dropped);213DPRINTF(DP_DROPS, "dropped due to unknown IP version");214return (EAFNOSUPPORT);215}216if (error != 0) {217m_freem(m);218return (EHOSTUNREACH);219}220if (logdata != NULL)221nat64_log(logdata, m, dst->sa_family);222error = (*ifp->if_output)(ifp, m, dst, NULL);223if (error != 0)224NAT64STAT_INC(stats, oerrors);225return (error);226}227228static int229nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,230struct nat64_counters *stats, void *logdata)231{232struct ip *ip4;233int ret, af;234235ip4 = mtod(m, struct ip *);236switch (ip4->ip_v) {237case IPVERSION:238af = AF_INET;239ret = NETISR_IP;240break;241case (IPV6_VERSION >> 4):242af = AF_INET6;243ret = NETISR_IPV6;244break;245default:246m_freem(m);247NAT64STAT_INC(stats, dropped);248DPRINTF(DP_DROPS, "unknown IP version");249return (EAFNOSUPPORT);250}251if (logdata != NULL)252nat64_log(logdata, m, af);253if (m->m_pkthdr.rcvif == NULL)254m->m_pkthdr.rcvif = V_loif;255ret = netisr_queue(ret, m);256if (ret != 0)257NAT64STAT_INC(stats, oerrors);258return (ret);259}260261static int262nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata)263{264265return (nat64_output(NULL, m, NULL, stats, logdata));266}267268/*269* Check the given IPv6 prefix and length according to RFC6052:270* The prefixes can only have one of the following lengths:271* 32, 40, 48, 56, 64, or 96 (The Well-Known Prefix is 96 bits long).272* Returns zero on success, otherwise EINVAL.273*/274int275nat64_check_prefixlen(int length)276{277278switch (length) {279case 32:280case 40:281case 48:282case 56:283case 64:284case 96:285return (0);286}287return (EINVAL);288}289290int291nat64_check_prefix6(const struct in6_addr *prefix, int length)292{293294if (nat64_check_prefixlen(length) != 0)295return (EINVAL);296297/* Well-known prefix has 96 prefix length */298if (IN6_IS_ADDR_WKPFX(prefix) && length != 96)299return (EINVAL);300301/* Bits 64 to 71 must be set to zero */302if (prefix->__u6_addr.__u6_addr8[8] != 0)303return (EINVAL);304305/* Some extra checks */306if (IN6_IS_ADDR_MULTICAST(prefix) ||307IN6_IS_ADDR_UNSPECIFIED(prefix) ||308IN6_IS_ADDR_LOOPBACK(prefix))309return (EINVAL);310return (0);311}312313int314nat64_check_private_ip4(const struct nat64_config *cfg, in_addr_t ia)315{316317if (cfg->flags & NAT64_ALLOW_PRIVATE)318return (0);319320/* WKPFX must not be used to represent non-global IPv4 addresses */321if (cfg->flags & NAT64_WKPFX) {322/* IN_PRIVATE */323if ((ia & htonl(0xff000000)) == htonl(0x0a000000) ||324(ia & htonl(0xfff00000)) == htonl(0xac100000) ||325(ia & htonl(0xffff0000)) == htonl(0xc0a80000))326return (1);327/*328* RFC 5735:329* 192.0.0.0/24 - reserved for IETF protocol assignments330* 192.88.99.0/24 - for use as 6to4 relay anycast addresses331* 198.18.0.0/15 - for use in benchmark tests332* 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use333* in documentation and example code334*/335if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) ||336(ia & htonl(0xffffff00)) == htonl(0xc0586300) ||337(ia & htonl(0xfffffe00)) == htonl(0xc6120000) ||338(ia & htonl(0xffffff00)) == htonl(0xc0000200) ||339(ia & htonl(0xfffffe00)) == htonl(0xc6336400) ||340(ia & htonl(0xffffff00)) == htonl(0xcb007100))341return (1);342}343return (0);344}345346/*347* Embed @ia IPv4 address into @ip6 IPv6 address.348* Place to embedding determined from prefix length @plen.349*/350void351nat64_embed_ip4(struct in6_addr *ip6, int plen, in_addr_t ia)352{353354switch (plen) {355case 32:356case 96:357ip6->s6_addr32[plen / 32] = ia;358break;359case 40:360case 48:361case 56:362/*363* Preserve prefix bits.364* Since suffix bits should be zero and reserved for future365* use, we just overwrite the whole word, where they are.366*/367ip6->s6_addr32[1] &= 0xffffffff << (32 - plen % 32);368#if BYTE_ORDER == BIG_ENDIAN369ip6->s6_addr32[1] |= ia >> (plen % 32);370ip6->s6_addr32[2] = ia << (24 - plen % 32);371#elif BYTE_ORDER == LITTLE_ENDIAN372ip6->s6_addr32[1] |= ia << (plen % 32);373ip6->s6_addr32[2] = ia >> (24 - plen % 32);374#endif375break;376case 64:377#if BYTE_ORDER == BIG_ENDIAN378ip6->s6_addr32[2] = ia >> 8;379ip6->s6_addr32[3] = ia << 24;380#elif BYTE_ORDER == LITTLE_ENDIAN381ip6->s6_addr32[2] = ia << 8;382ip6->s6_addr32[3] = ia >> 24;383#endif384break;385default:386panic("Wrong plen: %d", plen);387};388/*389* Bits 64 to 71 of the address are reserved for compatibility390* with the host identifier format defined in the IPv6 addressing391* architecture [RFC4291]. These bits MUST be set to zero.392*/393ip6->s6_addr8[8] = 0;394}395396in_addr_t397nat64_extract_ip4(const struct in6_addr *ip6, int plen)398{399in_addr_t ia;400401/*402* According to RFC 6052 p2.2:403* IPv4-embedded IPv6 addresses are composed of a variable-length404* prefix, the embedded IPv4 address, and a variable length suffix.405* The suffix bits are reserved for future extensions and SHOULD406* be set to zero.407*/408switch (plen) {409case 32:410if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)411goto badip6;412break;413case 40:414if (ip6->s6_addr32[3] != 0 ||415(ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)416goto badip6;417break;418case 48:419if (ip6->s6_addr32[3] != 0 ||420(ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)421goto badip6;422break;423case 56:424if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)425goto badip6;426break;427case 64:428if (ip6->s6_addr8[8] != 0 ||429(ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)430goto badip6;431};432switch (plen) {433case 32:434case 96:435ia = ip6->s6_addr32[plen / 32];436break;437case 40:438case 48:439case 56:440#if BYTE_ORDER == BIG_ENDIAN441ia = (ip6->s6_addr32[1] << (plen % 32)) |442(ip6->s6_addr32[2] >> (24 - plen % 32));443#elif BYTE_ORDER == LITTLE_ENDIAN444ia = (ip6->s6_addr32[1] >> (plen % 32)) |445(ip6->s6_addr32[2] << (24 - plen % 32));446#endif447break;448case 64:449#if BYTE_ORDER == BIG_ENDIAN450ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);451#elif BYTE_ORDER == LITTLE_ENDIAN452ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);453#endif454break;455default:456return (0);457};458if (nat64_check_ip4(ia) == 0)459return (ia);460461DPRINTF(DP_GENERIC | DP_DROPS,462"invalid destination address: %08x", ia);463return (0);464badip6:465DPRINTF(DP_GENERIC | DP_DROPS, "invalid IPv4-embedded IPv6 address");466return (0);467}468469/*470* According to RFC 1624 the equation for incremental checksum update is:471* HC' = ~(~HC + ~m + m') -- [Eqn. 3]472* HC' = HC - ~m - m' -- [Eqn. 4]473* So, when we are replacing IPv4 addresses to IPv6, we474* can assume, that new bytes previously were zeros, and vise versa -475* when we replacing IPv6 addresses to IPv4, now unused bytes become476* zeros. The payload length in pseudo header has bigger size, but one477* half of it should be zero. Using the equation 4 we get:478* HC' = HC - (~m0 + m0') -- m0 is first changed word479* HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word480* HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =481* = HC - sum(~m[i] + m'[i])482*483* The function result should be used as follows:484* IPv6 to IPv4: HC' = cksum_add(HC, result)485* IPv4 to IPv6: HC' = cksum_add(HC, ~result)486*/487static uint16_t488nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)489{490uint32_t sum;491uint16_t *p;492493sum = ~ip->ip_src.s_addr >> 16;494sum += ~ip->ip_src.s_addr & 0xffff;495sum += ~ip->ip_dst.s_addr >> 16;496sum += ~ip->ip_dst.s_addr & 0xffff;497498for (p = (uint16_t *)&ip6->ip6_src;499p < (uint16_t *)(&ip6->ip6_src + 2); p++)500sum += *p;501502while (sum >> 16)503sum = (sum & 0xffff) + (sum >> 16);504return (sum);505}506507static void508nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,509uint16_t plen, uint8_t proto, struct ip *ip)510{511512/* assume addresses are already initialized */513ip->ip_v = IPVERSION;514ip->ip_hl = sizeof(*ip) >> 2;515ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;516ip->ip_len = htons(sizeof(*ip) + plen);517ip->ip_ttl = ip6->ip6_hlim;518if (*V_nat64ip6stealth == 0)519ip->ip_ttl -= IPV6_HLIMDEC;520ip->ip_sum = 0;521ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;522ip_fillid(ip, V_ip_random_id);523if (frag != NULL) {524ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);525if (frag->ip6f_offlg & IP6F_MORE_FRAG)526ip->ip_off |= htons(IP_MF);527} else {528ip->ip_off = htons(IP_DF);529}530ip->ip_sum = in_cksum_hdr(ip);531}532533#define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))534static NAT64NOINLINE int535nat64_fragment6(struct nat64_counters *stats, struct ip6_hdr *ip6,536struct mbufq *mq, struct mbuf *m, uint32_t mtu, uint16_t ip_id,537uint16_t ip_off)538{539struct ip6_frag ip6f;540struct mbuf *n;541uint16_t hlen, len, offset;542int plen;543544plen = ntohs(ip6->ip6_plen);545hlen = sizeof(struct ip6_hdr);546547/* Fragmentation isn't needed */548if (ip_off == 0 && plen <= mtu - hlen) {549M_PREPEND(m, hlen, M_NOWAIT);550if (m == NULL) {551NAT64STAT_INC(stats, nomem);552return (ENOMEM);553}554bcopy(ip6, mtod(m, void *), hlen);555if (mbufq_enqueue(mq, m) != 0) {556m_freem(m);557NAT64STAT_INC(stats, dropped);558DPRINTF(DP_DROPS, "dropped due to mbufq overflow");559return (ENOBUFS);560}561return (0);562}563564hlen += sizeof(struct ip6_frag);565ip6f.ip6f_reserved = 0;566ip6f.ip6f_nxt = ip6->ip6_nxt;567ip6->ip6_nxt = IPPROTO_FRAGMENT;568if (ip_off != 0) {569/*570* We have got an IPv4 fragment.571* Use offset value and ip_id from original fragment.572*/573ip6f.ip6f_ident = htonl(ntohs(ip_id));574offset = (ntohs(ip_off) & IP_OFFMASK) << 3;575NAT64STAT_INC(stats, ifrags);576} else {577/* The packet size exceeds interface MTU */578ip6f.ip6f_ident = htonl(ip6_randomid());579offset = 0; /* First fragment*/580}581while (plen > 0 && m != NULL) {582n = NULL;583len = FRAGSZ(mtu) & ~7;584if (len > plen)585len = plen;586ip6->ip6_plen = htons(len + sizeof(ip6f));587ip6f.ip6f_offlg = ntohs(offset);588if (len < plen || (ip_off & htons(IP_MF)) != 0)589ip6f.ip6f_offlg |= IP6F_MORE_FRAG;590offset += len;591plen -= len;592if (plen > 0) {593n = m_split(m, len, M_NOWAIT);594if (n == NULL)595goto fail;596}597M_PREPEND(m, hlen, M_NOWAIT);598if (m == NULL)599goto fail;600bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));601bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),602sizeof(struct ip6_frag));603if (mbufq_enqueue(mq, m) != 0)604goto fail;605m = n;606}607NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));608return (0);609fail:610if (m != NULL)611m_freem(m);612if (n != NULL)613m_freem(n);614mbufq_drain(mq);615NAT64STAT_INC(stats, nomem);616return (ENOMEM);617}618619static struct nhop_object *620nat64_find_route6(struct sockaddr_in6 *dst, struct mbuf *m)621{622struct nhop_object *nh;623624NET_EPOCH_ASSERT();625nh = fib6_lookup(M_GETFIB(m), &dst->sin6_addr, 0, NHR_NONE, 0);626if (nh == NULL)627return (NULL);628if (nh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT))629return (NULL);630631dst->sin6_family = AF_INET6;632dst->sin6_len = sizeof(*dst);633if (nh->nh_flags & NHF_GATEWAY)634dst->sin6_addr = nh->gw6_sa.sin6_addr;635dst->sin6_port = 0;636dst->sin6_scope_id = 0;637dst->sin6_flowinfo = 0;638return (nh);639}640641#define NAT64_ICMP6_PLEN 64642static NAT64NOINLINE void643nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,644struct nat64_counters *stats, void *logdata)645{646struct icmp6_hdr *icmp6;647struct ip6_hdr *ip6, *oip6;648struct mbuf *n;649int len, plen, proto;650651len = 0;652proto = nat64_getlasthdr(m, &len);653if (proto < 0) {654DPRINTF(DP_DROPS, "mbuf isn't contigious");655goto freeit;656}657/*658* Do not send ICMPv6 in reply to ICMPv6 errors.659*/660if (proto == IPPROTO_ICMPV6) {661if (m->m_len < len + sizeof(*icmp6)) {662DPRINTF(DP_DROPS, "mbuf isn't contigious");663goto freeit;664}665icmp6 = mtodo(m, len);666if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||667icmp6->icmp6_type == ND_REDIRECT) {668DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "669"ICMPv6 errors");670goto freeit;671}672/*673* If there are extra headers between IPv6 and ICMPv6,674* strip off them.675*/676if (len > sizeof(struct ip6_hdr)) {677/*678* NOTE: ipfw_chk already did m_pullup() and it is679* expected that data is contigious from the start680* of IPv6 header up to the end of ICMPv6 header.681*/682bcopy(mtod(m, caddr_t),683mtodo(m, len - sizeof(struct ip6_hdr)),684sizeof(struct ip6_hdr));685m_adj(m, len - sizeof(struct ip6_hdr));686}687}688/*689if (icmp6_ratelimit(&ip6->ip6_src, type, code))690goto freeit;691*/692ip6 = mtod(m, struct ip6_hdr *);693switch (type) {694case ICMP6_DST_UNREACH:695case ICMP6_PACKET_TOO_BIG:696case ICMP6_TIME_EXCEEDED:697case ICMP6_PARAM_PROB:698break;699default:700goto freeit;701}702/* Calculate length of ICMPv6 payload */703len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:704m->m_pkthdr.len;705706/* Create new ICMPv6 datagram */707plen = len + sizeof(struct icmp6_hdr);708n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,709MT_HEADER, M_PKTHDR);710if (n == NULL) {711NAT64STAT_INC(stats, nomem);712m_freem(m);713return;714}715/*716* Move pkthdr from original mbuf. We should have initialized some717* fields, because we can reinject this mbuf to netisr and it will718* go through input path (it requires at least rcvif should be set).719* Also do M_ALIGN() to reduce chances of need to allocate new mbuf720* in the chain, when we will do M_PREPEND() or make some type of721* tunneling.722*/723m_move_pkthdr(n, m);724M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);725726n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;727oip6 = mtod(n, struct ip6_hdr *);728/*729* Make IPv6 source address selection for reflected datagram.730* nat64_check_ip6() doesn't allow scoped addresses, therefore731* we use zero scopeid.732*/733if (in6_selectsrc_addr(M_GETFIB(n), &ip6->ip6_src, 0,734n->m_pkthdr.rcvif, &oip6->ip6_src, NULL) != 0) {735/*736* Failed to find proper source address, drop the packet.737*/738m_freem(n);739goto freeit;740}741oip6->ip6_dst = ip6->ip6_src;742oip6->ip6_nxt = IPPROTO_ICMPV6;743oip6->ip6_flow = 0;744oip6->ip6_vfc |= IPV6_VERSION;745oip6->ip6_hlim = V_ip6_defhlim;746oip6->ip6_plen = htons(plen);747748icmp6 = mtodo(n, sizeof(struct ip6_hdr));749icmp6->icmp6_cksum = 0;750icmp6->icmp6_type = type;751icmp6->icmp6_code = code;752icmp6->icmp6_mtu = htonl(mtu);753754m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +755sizeof(struct icmp6_hdr)));756icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,757sizeof(struct ip6_hdr), plen);758m_freem(m);759V_nat64out->output_one(n, stats, logdata);760return;761freeit:762NAT64STAT_INC(stats, dropped);763m_freem(m);764}765766static struct nhop_object *767nat64_find_route4(struct sockaddr_in *dst, struct mbuf *m)768{769struct nhop_object *nh;770771NET_EPOCH_ASSERT();772nh = fib4_lookup(M_GETFIB(m), dst->sin_addr, 0, NHR_NONE, 0);773if (nh == NULL)774return (NULL);775if (nh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT))776return (NULL);777778dst->sin_family = AF_INET;779dst->sin_len = sizeof(*dst);780if (nh->nh_flags & NHF_GATEWAY)781dst->sin_addr = nh->gw4_sa.sin_addr;782dst->sin_port = 0;783return (nh);784}785786#define NAT64_ICMP_PLEN 64787static NAT64NOINLINE void788nat64_icmp_reflect(struct mbuf *m, uint8_t type,789uint8_t code, uint16_t mtu, struct nat64_counters *stats, void *logdata)790{791struct icmp *icmp;792struct ip *ip, *oip;793struct mbuf *n;794int len, plen;795796ip = mtod(m, struct ip *);797/* Do not send ICMP error if packet is not the first fragment */798if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {799DPRINTF(DP_DROPS, "not first fragment");800goto freeit;801}802/* Do not send ICMP in reply to ICMP errors */803if (ip->ip_p == IPPROTO_ICMP) {804if (m->m_len < (ip->ip_hl << 2)) {805DPRINTF(DP_DROPS, "mbuf isn't contigious");806goto freeit;807}808icmp = mtodo(m, ip->ip_hl << 2);809if (!ICMP_INFOTYPE(icmp->icmp_type)) {810DPRINTF(DP_DROPS, "do not send ICMP in reply to "811"ICMP errors");812goto freeit;813}814}815switch (type) {816case ICMP_UNREACH:817case ICMP_TIMXCEED:818case ICMP_PARAMPROB:819break;820default:821goto freeit;822}823/* Calculate length of ICMP payload */824len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:825m->m_pkthdr.len;826827/* Create new ICMPv4 datagram */828plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);829n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,830MT_HEADER, M_PKTHDR);831if (n == NULL) {832NAT64STAT_INC(stats, nomem);833m_freem(m);834return;835}836m_move_pkthdr(n, m);837M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);838839n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;840oip = mtod(n, struct ip *);841oip->ip_v = IPVERSION;842oip->ip_hl = sizeof(struct ip) >> 2;843oip->ip_tos = 0;844oip->ip_len = htons(n->m_pkthdr.len);845oip->ip_ttl = V_ip_defttl;846oip->ip_p = IPPROTO_ICMP;847ip_fillid(oip, V_ip_random_id);848oip->ip_off = htons(IP_DF);849oip->ip_src = ip->ip_dst;850oip->ip_dst = ip->ip_src;851oip->ip_sum = 0;852oip->ip_sum = in_cksum_hdr(oip);853854icmp = mtodo(n, sizeof(struct ip));855icmp->icmp_type = type;856icmp->icmp_code = code;857icmp->icmp_cksum = 0;858icmp->icmp_pmvoid = 0;859icmp->icmp_nextmtu = htons(mtu);860m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +861sizeof(struct icmphdr) + sizeof(uint32_t)));862icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,863sizeof(struct ip));864m_freem(m);865V_nat64out->output_one(n, stats, logdata);866return;867freeit:868NAT64STAT_INC(stats, dropped);869m_freem(m);870}871872/* Translate ICMP echo request/reply into ICMPv6 */873static void874nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,875uint16_t id, uint8_t type)876{877uint16_t old;878879old = *(uint16_t *)icmp6; /* save type+code in one word */880icmp6->icmp6_type = type;881/* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */882icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,883old, *(uint16_t *)icmp6);884if (id != 0) {885old = icmp6->icmp6_id;886icmp6->icmp6_id = id;887/* Reflect ICMP id translation in the cksum */888icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,889old, id);890}891/* Reflect IPv6 pseudo header in the cksum */892icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),893IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);894}895896static NAT64NOINLINE struct mbuf *897nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,898int offset, struct nat64_config *cfg)899{900struct ip ip;901struct icmp *icmp;902struct tcphdr *tcp;903struct udphdr *udp;904struct ip6_hdr *eip6;905struct mbuf *n;906uint32_t mtu;907int len, hlen, plen;908uint8_t type, code;909910if (m->m_len < offset + ICMP_MINLEN)911m = m_pullup(m, offset + ICMP_MINLEN);912if (m == NULL) {913NAT64STAT_INC(&cfg->stats, nomem);914return (m);915}916mtu = 0;917icmp = mtodo(m, offset);918/* RFC 7915 p4.2 */919switch (icmp->icmp_type) {920case ICMP_ECHOREPLY:921type = ICMP6_ECHO_REPLY;922code = 0;923break;924case ICMP_UNREACH:925type = ICMP6_DST_UNREACH;926switch (icmp->icmp_code) {927case ICMP_UNREACH_NET:928case ICMP_UNREACH_HOST:929case ICMP_UNREACH_SRCFAIL:930case ICMP_UNREACH_NET_UNKNOWN:931case ICMP_UNREACH_HOST_UNKNOWN:932case ICMP_UNREACH_TOSNET:933case ICMP_UNREACH_TOSHOST:934code = ICMP6_DST_UNREACH_NOROUTE;935break;936case ICMP_UNREACH_PROTOCOL:937type = ICMP6_PARAM_PROB;938code = ICMP6_PARAMPROB_NEXTHEADER;939break;940case ICMP_UNREACH_PORT:941code = ICMP6_DST_UNREACH_NOPORT;942break;943case ICMP_UNREACH_NEEDFRAG:944type = ICMP6_PACKET_TOO_BIG;945code = 0;946/* XXX: needs an additional look */947mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);948break;949case ICMP_UNREACH_NET_PROHIB:950case ICMP_UNREACH_HOST_PROHIB:951case ICMP_UNREACH_FILTER_PROHIB:952case ICMP_UNREACH_PRECEDENCE_CUTOFF:953code = ICMP6_DST_UNREACH_ADMIN;954break;955default:956DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",957icmp->icmp_type, icmp->icmp_code);958goto freeit;959}960break;961case ICMP_TIMXCEED:962type = ICMP6_TIME_EXCEEDED;963code = icmp->icmp_code;964break;965case ICMP_ECHO:966type = ICMP6_ECHO_REQUEST;967code = 0;968break;969case ICMP_PARAMPROB:970type = ICMP6_PARAM_PROB;971switch (icmp->icmp_code) {972case ICMP_PARAMPROB_ERRATPTR:973case ICMP_PARAMPROB_LENGTH:974code = ICMP6_PARAMPROB_HEADER;975switch (icmp->icmp_pptr) {976case 0: /* Version/IHL */977case 1: /* Type Of Service */978mtu = icmp->icmp_pptr;979break;980case 2: /* Total Length */981case 3: mtu = 4; /* Payload Length */982break;983case 8: /* Time to Live */984mtu = 7; /* Hop Limit */985break;986case 9: /* Protocol */987mtu = 6; /* Next Header */988break;989case 12: /* Source address */990case 13:991case 14:992case 15:993mtu = 8;994break;995case 16: /* Destination address */996case 17:997case 18:998case 19:999mtu = 24;1000break;1001default: /* Silently drop */1002DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"1003" code %d, pptr %d", icmp->icmp_type,1004icmp->icmp_code, icmp->icmp_pptr);1005goto freeit;1006}1007break;1008default:1009DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"1010" code %d, pptr %d", icmp->icmp_type,1011icmp->icmp_code, icmp->icmp_pptr);1012goto freeit;1013}1014break;1015default:1016DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",1017icmp->icmp_type, icmp->icmp_code);1018goto freeit;1019}1020/*1021* For echo request/reply we can use original payload,1022* but we need adjust icmp_cksum, because ICMPv6 cksum covers1023* IPv6 pseudo header and ICMPv6 types differs from ICMPv4.1024*/1025if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {1026nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);1027return (m);1028}1029/*1030* For other types of ICMP messages we need to translate inner1031* IPv4 header to IPv6 header.1032* Assume ICMP src is the same as payload dst1033* E.g. we have ( GWsrc1 , NATIP1 ) in outer header1034* and ( NATIP1, Hostdst1 ) in ICMP copy header.1035* In that case, we already have map for NATIP1 and GWsrc1.1036* The only thing we need is to copy IPv6 map prefix to1037* Hostdst1.1038*/1039hlen = offset + ICMP_MINLEN;1040if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {1041DPRINTF(DP_DROPS, "Message is too short %d",1042m->m_pkthdr.len);1043goto freeit;1044}1045m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);1046if (ip.ip_v != IPVERSION) {1047DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);1048goto freeit;1049}1050hlen += ip.ip_hl << 2; /* Skip inner IP header */1051if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||1052nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||1053nat64_check_private_ip4(cfg, ip.ip_src.s_addr) != 0 ||1054nat64_check_private_ip4(cfg, ip.ip_dst.s_addr) != 0) {1055DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",1056ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));1057goto freeit;1058}1059if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {1060DPRINTF(DP_DROPS, "Message is too short %d",1061m->m_pkthdr.len);1062goto freeit;1063}1064#if 01065/*1066* Check that inner source matches the outer destination.1067* XXX: We need some method to convert IPv4 into IPv6 address here,1068* and compare IPv6 addresses.1069*/1070if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {1071DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",1072"%04x vs %04x", ip.ip_src.s_addr,1073nat64_get_ip4(&ip6->ip6_dst));1074goto freeit;1075}1076#endif1077/*1078* Create new mbuf for ICMPv6 datagram.1079* NOTE: len is data length just after inner IP header.1080*/1081len = m->m_pkthdr.len - hlen;1082if (sizeof(struct ip6_hdr) +1083sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)1084len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -1085sizeof(struct ip6_hdr);1086plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;1087n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);1088if (n == NULL) {1089NAT64STAT_INC(&cfg->stats, nomem);1090m_freem(m);1091return (NULL);1092}1093m_move_pkthdr(n, m);1094M_ALIGN(n, offset + plen + max_hdr);1095n->m_len = n->m_pkthdr.len = offset + plen;1096/* Adjust ip6_plen in outer header */1097ip6->ip6_plen = htons(plen);1098/* Construct new inner IPv6 header */1099eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));1100eip6->ip6_src = ip6->ip6_dst;11011102/* Use the same prefix that we have in outer header */1103eip6->ip6_dst = ip6->ip6_src;1104MPASS(cfg->flags & NAT64_PLATPFX);1105nat64_embed_ip4(&eip6->ip6_dst, cfg->plat_plen, ip.ip_dst.s_addr);11061107eip6->ip6_flow = htonl(ip.ip_tos << 20);1108eip6->ip6_vfc |= IPV6_VERSION;1109eip6->ip6_hlim = ip.ip_ttl;1110eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));1111eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;1112m_copydata(m, hlen, len, (char *)(eip6 + 1));1113/*1114* We need to translate source port in the inner ULP header,1115* and adjust ULP checksum.1116*/1117switch (ip.ip_p) {1118case IPPROTO_TCP:1119if (len < offsetof(struct tcphdr, th_sum))1120break;1121tcp = TCP(eip6 + 1);1122if (icmpid != 0) {1123tcp->th_sum = cksum_adjust(tcp->th_sum,1124tcp->th_sport, icmpid);1125tcp->th_sport = icmpid;1126}1127tcp->th_sum = cksum_add(tcp->th_sum,1128~nat64_cksum_convert(eip6, &ip));1129break;1130case IPPROTO_UDP:1131if (len < offsetof(struct udphdr, uh_sum))1132break;1133udp = UDP(eip6 + 1);1134if (icmpid != 0) {1135udp->uh_sum = cksum_adjust(udp->uh_sum,1136udp->uh_sport, icmpid);1137udp->uh_sport = icmpid;1138}1139udp->uh_sum = cksum_add(udp->uh_sum,1140~nat64_cksum_convert(eip6, &ip));1141break;1142case IPPROTO_ICMP:1143/*1144* Check if this is an ICMP error message for echo request1145* that we sent. I.e. ULP in the data containing invoking1146* packet is IPPROTO_ICMP and its type is ICMP_ECHO.1147*/1148icmp = (struct icmp *)(eip6 + 1);1149if (icmp->icmp_type != ICMP_ECHO) {1150m_freem(n);1151goto freeit;1152}1153/*1154* For our client this original datagram should looks1155* like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.1156* Thus we need adjust icmp_cksum and convert type from1157* ICMP_ECHO to ICMP6_ECHO_REQUEST.1158*/1159nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,1160ICMP6_ECHO_REQUEST);1161}1162m_freem(m);1163/* Convert ICMPv4 into ICMPv6 header */1164icmp = mtodo(n, offset);1165ICMP6(icmp)->icmp6_type = type;1166ICMP6(icmp)->icmp6_code = code;1167ICMP6(icmp)->icmp6_mtu = htonl(mtu);1168ICMP6(icmp)->icmp6_cksum = 0;1169ICMP6(icmp)->icmp6_cksum = cksum_add(1170~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),1171in_cksum_skip(n, n->m_pkthdr.len, offset));1172return (n);1173freeit:1174m_freem(m);1175NAT64STAT_INC(&cfg->stats, dropped);1176return (NULL);1177}11781179int1180nat64_getlasthdr(struct mbuf *m, int *offset)1181{1182struct ip6_hdr *ip6;1183struct ip6_hbh *hbh;1184int proto, hlen;11851186if (offset != NULL)1187hlen = *offset;1188else1189hlen = 0;11901191if (m->m_len < hlen + sizeof(*ip6))1192return (-1);11931194ip6 = mtodo(m, hlen);1195hlen += sizeof(*ip6);1196proto = ip6->ip6_nxt;1197/* Skip extension headers */1198while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||1199proto == IPPROTO_DSTOPTS) {1200hbh = mtodo(m, hlen);1201/*1202* We expect mbuf has contigious data up to1203* upper level header.1204*/1205if (m->m_len < hlen)1206return (-1);1207/*1208* We doesn't support Jumbo payload option,1209* so return error.1210*/1211if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)1212return (-1);1213proto = hbh->ip6h_nxt;1214hlen += (hbh->ip6h_len + 1) << 3;1215}1216if (offset != NULL)1217*offset = hlen;1218return (proto);1219}12201221int1222nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,1223struct in6_addr *daddr, uint16_t lport, struct nat64_config *cfg,1224void *logdata)1225{1226struct nhop_object *nh;1227struct ip6_hdr ip6;1228struct sockaddr_in6 dst;1229struct ip *ip;1230struct mbufq mq;1231uint16_t ip_id, ip_off;1232uint16_t *csum;1233int plen, hlen;1234uint8_t proto;12351236ip = mtod(m, struct ip*);12371238if (*V_nat64ipstealth == 0 && ip->ip_ttl <= IPTTLDEC) {1239nat64_icmp_reflect(m, ICMP_TIMXCEED,1240ICMP_TIMXCEED_INTRANS, 0, &cfg->stats, logdata);1241return (NAT64RETURN);1242}12431244ip6.ip6_dst = *daddr;1245ip6.ip6_src = *saddr;12461247hlen = ip->ip_hl << 2;1248plen = ntohs(ip->ip_len) - hlen;1249proto = ip->ip_p;12501251/* Save ip_id and ip_off, both are in network byte order */1252ip_id = ip->ip_id;1253ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);12541255/* Fragment length must be multiple of 8 octets */1256if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {1257nat64_icmp_reflect(m, ICMP_PARAMPROB,1258ICMP_PARAMPROB_LENGTH, 0, &cfg->stats, logdata);1259return (NAT64RETURN);1260}1261/* Fragmented ICMP is unsupported */1262if (proto == IPPROTO_ICMP && ip_off != 0) {1263DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");1264NAT64STAT_INC(&cfg->stats, dropped);1265return (NAT64MFREE);1266}12671268dst.sin6_addr = ip6.ip6_dst;1269nh = nat64_find_route6(&dst, m);1270if (nh == NULL) {1271NAT64STAT_INC(&cfg->stats, noroute6);1272nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,1273&cfg->stats, logdata);1274return (NAT64RETURN);1275}1276if (nh->nh_mtu < plen + sizeof(ip6) &&1277(ip->ip_off & htons(IP_DF)) != 0) {1278nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,1279FRAGSZ(nh->nh_mtu) + sizeof(struct ip), &cfg->stats, logdata);1280return (NAT64RETURN);1281}12821283ip6.ip6_flow = htonl(ip->ip_tos << 20);1284ip6.ip6_vfc |= IPV6_VERSION;1285ip6.ip6_hlim = ip->ip_ttl;1286if (*V_nat64ipstealth == 0)1287ip6.ip6_hlim -= IPTTLDEC;1288ip6.ip6_plen = htons(plen);1289ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;12901291/* Handle delayed checksums if needed. */1292if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {1293in_delayed_cksum(m);1294m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;1295}1296/* Convert checksums. */1297switch (proto) {1298case IPPROTO_TCP:1299csum = &TCP(mtodo(m, hlen))->th_sum;1300if (lport != 0) {1301struct tcphdr *tcp = TCP(mtodo(m, hlen));1302*csum = cksum_adjust(*csum, tcp->th_dport, lport);1303tcp->th_dport = lport;1304}1305*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));1306break;1307case IPPROTO_UDP:1308csum = &UDP(mtodo(m, hlen))->uh_sum;1309if (lport != 0) {1310struct udphdr *udp = UDP(mtodo(m, hlen));1311*csum = cksum_adjust(*csum, udp->uh_dport, lport);1312udp->uh_dport = lport;1313}1314*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));1315break;1316case IPPROTO_ICMP:1317m = nat64_icmp_translate(m, &ip6, lport, hlen, cfg);1318if (m == NULL) /* stats already accounted */1319return (NAT64RETURN);1320}13211322m_adj(m, hlen);1323mbufq_init(&mq, 255);1324nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh->nh_mtu, ip_id, ip_off);1325while ((m = mbufq_dequeue(&mq)) != NULL) {1326if (V_nat64out->output(nh->nh_ifp, m, (struct sockaddr *)&dst,1327&cfg->stats, logdata) != 0)1328break;1329NAT64STAT_INC(&cfg->stats, opcnt46);1330}1331mbufq_drain(&mq);1332return (NAT64RETURN);1333}13341335int1336nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,1337struct nat64_config *cfg, void *logdata)1338{1339struct ip ip;1340struct icmp6_hdr *icmp6;1341struct ip6_frag *ip6f;1342struct ip6_hdr *ip6, *ip6i;1343uint32_t mtu;1344int plen, proto;1345uint8_t type, code;13461347if (hlen == 0) {1348ip6 = mtod(m, struct ip6_hdr *);1349if (nat64_check_ip6(&ip6->ip6_src) != 0 ||1350nat64_check_ip6(&ip6->ip6_dst) != 0)1351return (NAT64SKIP);13521353proto = nat64_getlasthdr(m, &hlen);1354if (proto != IPPROTO_ICMPV6) {1355DPRINTF(DP_DROPS,1356"dropped due to mbuf isn't contigious");1357NAT64STAT_INC(&cfg->stats, dropped);1358return (NAT64MFREE);1359}1360}13611362/*1363* Translate ICMPv6 type and code to ICMPv4 (RFC7915).1364* NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().1365*/1366icmp6 = mtodo(m, hlen);1367mtu = 0;1368switch (icmp6->icmp6_type) {1369case ICMP6_DST_UNREACH:1370type = ICMP_UNREACH;1371switch (icmp6->icmp6_code) {1372case ICMP6_DST_UNREACH_NOROUTE:1373case ICMP6_DST_UNREACH_BEYONDSCOPE:1374case ICMP6_DST_UNREACH_ADDR:1375code = ICMP_UNREACH_HOST;1376break;1377case ICMP6_DST_UNREACH_ADMIN:1378code = ICMP_UNREACH_HOST_PROHIB;1379break;1380case ICMP6_DST_UNREACH_NOPORT:1381code = ICMP_UNREACH_PORT;1382break;1383default:1384DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"1385" code %d", icmp6->icmp6_type,1386icmp6->icmp6_code);1387NAT64STAT_INC(&cfg->stats, dropped);1388return (NAT64MFREE);1389}1390break;1391case ICMP6_PACKET_TOO_BIG:1392type = ICMP_UNREACH;1393code = ICMP_UNREACH_NEEDFRAG;1394mtu = ntohl(icmp6->icmp6_mtu);1395if (mtu < IPV6_MMTU) {1396DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"1397" code %d", mtu, icmp6->icmp6_type,1398icmp6->icmp6_code);1399NAT64STAT_INC(&cfg->stats, dropped);1400return (NAT64MFREE);1401}1402/*1403* Adjust MTU to reflect difference between1404* IPv6 an IPv4 headers.1405*/1406mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);1407break;1408case ICMP6_TIME_EXCEEDED:1409type = ICMP_TIMXCEED;1410code = icmp6->icmp6_code;1411break;1412case ICMP6_PARAM_PROB:1413switch (icmp6->icmp6_code) {1414case ICMP6_PARAMPROB_HEADER:1415type = ICMP_PARAMPROB;1416code = ICMP_PARAMPROB_ERRATPTR;1417mtu = ntohl(icmp6->icmp6_pptr);1418switch (mtu) {1419case 0: /* Version/Traffic Class */1420case 1: /* Traffic Class/Flow Label */1421break;1422case 4: /* Payload Length */1423case 5:1424mtu = 2;1425break;1426case 6: /* Next Header */1427mtu = 9;1428break;1429case 7: /* Hop Limit */1430mtu = 8;1431break;1432default:1433if (mtu >= 8 && mtu <= 23) {1434mtu = 12; /* Source address */1435break;1436}1437if (mtu >= 24 && mtu <= 39) {1438mtu = 16; /* Destination address */1439break;1440}1441DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"1442" code %d, pptr %d", icmp6->icmp6_type,1443icmp6->icmp6_code, mtu);1444NAT64STAT_INC(&cfg->stats, dropped);1445return (NAT64MFREE);1446}1447case ICMP6_PARAMPROB_NEXTHEADER:1448type = ICMP_UNREACH;1449code = ICMP_UNREACH_PROTOCOL;1450break;1451default:1452DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"1453" code %d, pptr %d", icmp6->icmp6_type,1454icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));1455NAT64STAT_INC(&cfg->stats, dropped);1456return (NAT64MFREE);1457}1458break;1459default:1460DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",1461icmp6->icmp6_type, icmp6->icmp6_code);1462NAT64STAT_INC(&cfg->stats, dropped);1463return (NAT64MFREE);1464}14651466hlen += sizeof(struct icmp6_hdr);1467if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {1468NAT64STAT_INC(&cfg->stats, dropped);1469DPRINTF(DP_DROPS, "Message is too short %d",1470m->m_pkthdr.len);1471return (NAT64MFREE);1472}1473/*1474* We need at least ICMP_MINLEN bytes of original datagram payload1475* to generate ICMP message. It is nice that ICMP_MINLEN is equal1476* to sizeof(struct ip6_frag). So, if embedded datagram had a fragment1477* header we will not have to do m_pullup() again.1478*1479* What we have here:1480* Outer header: (IPv6iGW, v4mapPRefix+v4exthost)1481* Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]1482* We need to translate it to:1483*1484* Outer header: (alias_host, v4exthost)1485* Inner header: (v4exthost, alias_host) [sport, alias_port]1486*1487* Assume caller function has checked if v4mapPRefix+v4host1488* matches configured prefix.1489* The only two things we should be provided with are mapping between1490* IPv6iHost <> alias_host and between dport and alias_port.1491*/1492if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)1493m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);1494if (m == NULL) {1495NAT64STAT_INC(&cfg->stats, nomem);1496return (NAT64RETURN);1497}1498ip6 = mtod(m, struct ip6_hdr *);1499ip6i = mtodo(m, hlen);1500ip6f = NULL;1501proto = ip6i->ip6_nxt;1502plen = ntohs(ip6i->ip6_plen);1503hlen += sizeof(struct ip6_hdr);1504if (proto == IPPROTO_FRAGMENT) {1505if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +1506ICMP_MINLEN)1507goto fail;1508ip6f = mtodo(m, hlen);1509proto = ip6f->ip6f_nxt;1510plen -= sizeof(struct ip6_frag);1511hlen += sizeof(struct ip6_frag);1512/* Ajust MTU to reflect frag header size */1513if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)1514mtu -= sizeof(struct ip6_frag);1515}1516if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {1517DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",1518proto);1519goto fail;1520}1521if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||1522nat64_check_ip6(&ip6i->ip6_dst) != 0) {1523DPRINTF(DP_DROPS, "Inner addresses do not passes the check");1524goto fail;1525}1526/* Check if outer dst is the same as inner src */1527if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {1528DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");1529goto fail;1530}15311532/* Now we need to make a fake IPv4 packet to generate ICMP message */1533ip.ip_dst.s_addr = aaddr;1534ip.ip_src.s_addr = nat64_extract_ip4(&ip6i->ip6_src, cfg->plat_plen);1535if (ip.ip_src.s_addr == 0)1536goto fail;1537/* XXX: Make fake ulp header */1538if (V_nat64out == &nat64_direct) /* init_ip4hdr will decrement it */1539ip6i->ip6_hlim += IPV6_HLIMDEC;1540nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);1541m_adj(m, hlen - sizeof(struct ip));1542bcopy(&ip, mtod(m, void *), sizeof(ip));1543nat64_icmp_reflect(m, type, code, (uint16_t)mtu, &cfg->stats,1544logdata);1545return (NAT64RETURN);1546fail:1547/*1548* We must call m_freem() because mbuf pointer could be1549* changed with m_pullup().1550*/1551m_freem(m);1552NAT64STAT_INC(&cfg->stats, dropped);1553return (NAT64RETURN);1554}15551556int1557nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,1558struct nat64_config *cfg, void *logdata)1559{1560struct ip ip;1561struct nhop_object *nh;1562struct sockaddr_in dst;1563struct ip6_frag *frag;1564struct ip6_hdr *ip6;1565struct icmp6_hdr *icmp6;1566uint16_t *csum;1567int plen, hlen, proto;15681569/*1570* XXX: we expect ipfw_chk() did m_pullup() up to upper level1571* protocol's headers. Also we skip some checks, that ip6_input(),1572* ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.1573*/1574ip6 = mtod(m, struct ip6_hdr *);1575if (nat64_check_ip6(&ip6->ip6_src) != 0 ||1576nat64_check_ip6(&ip6->ip6_dst) != 0) {1577return (NAT64SKIP);1578}15791580/* Starting from this point we must not return zero */1581ip.ip_src.s_addr = aaddr;1582if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {1583DPRINTF(DP_GENERIC | DP_DROPS, "invalid source address: %08x",1584ip.ip_src.s_addr);1585NAT64STAT_INC(&cfg->stats, dropped);1586return (NAT64MFREE);1587}15881589ip.ip_dst.s_addr = nat64_extract_ip4(&ip6->ip6_dst, cfg->plat_plen);1590if (ip.ip_dst.s_addr == 0) {1591NAT64STAT_INC(&cfg->stats, dropped);1592return (NAT64MFREE);1593}15941595if (*V_nat64ip6stealth == 0 && ip6->ip6_hlim <= IPV6_HLIMDEC) {1596nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,1597ICMP6_TIME_EXCEED_TRANSIT, 0, &cfg->stats, logdata);1598return (NAT64RETURN);1599}16001601hlen = 0;1602plen = ntohs(ip6->ip6_plen);1603proto = nat64_getlasthdr(m, &hlen);1604if (proto < 0) {1605DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");1606NAT64STAT_INC(&cfg->stats, dropped);1607return (NAT64MFREE);1608}1609frag = NULL;1610if (proto == IPPROTO_FRAGMENT) {1611/* ipfw_chk should m_pullup up to frag header */1612if (m->m_len < hlen + sizeof(*frag)) {1613DPRINTF(DP_DROPS,1614"dropped due to mbuf isn't contigious");1615NAT64STAT_INC(&cfg->stats, dropped);1616return (NAT64MFREE);1617}1618frag = mtodo(m, hlen);1619proto = frag->ip6f_nxt;1620hlen += sizeof(*frag);1621/* Fragmented ICMPv6 is unsupported */1622if (proto == IPPROTO_ICMPV6) {1623DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");1624NAT64STAT_INC(&cfg->stats, dropped);1625return (NAT64MFREE);1626}1627/* Fragment length must be multiple of 8 octets */1628if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&1629((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {1630nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,1631ICMP6_PARAMPROB_HEADER,1632offsetof(struct ip6_hdr, ip6_plen), &cfg->stats,1633logdata);1634return (NAT64RETURN);1635}1636}1637plen -= hlen - sizeof(struct ip6_hdr);1638if (plen < 0 || m->m_pkthdr.len < plen + hlen) {1639DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",1640plen, m->m_pkthdr.len, hlen);1641NAT64STAT_INC(&cfg->stats, dropped);1642return (NAT64MFREE);1643}16441645icmp6 = NULL; /* Make gcc happy */1646if (proto == IPPROTO_ICMPV6) {1647icmp6 = mtodo(m, hlen);1648if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&1649icmp6->icmp6_type != ICMP6_ECHO_REPLY)1650return (nat64_handle_icmp6(m, hlen, aaddr, aport,1651cfg, logdata));1652}1653dst.sin_addr.s_addr = ip.ip_dst.s_addr;1654nh = nat64_find_route4(&dst, m);1655if (nh == NULL) {1656NAT64STAT_INC(&cfg->stats, noroute4);1657nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,1658ICMP6_DST_UNREACH_NOROUTE, 0, &cfg->stats, logdata);1659return (NAT64RETURN);1660}1661if (nh->nh_mtu < plen + sizeof(ip)) {1662nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh->nh_mtu,1663&cfg->stats, logdata);1664return (NAT64RETURN);1665}1666nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);16671668/* Handle delayed checksums if needed. */1669if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {1670in6_delayed_cksum(m, plen, hlen);1671m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;1672}1673/* Convert checksums. */1674switch (proto) {1675case IPPROTO_TCP:1676csum = &TCP(mtodo(m, hlen))->th_sum;1677if (aport != 0) {1678struct tcphdr *tcp = TCP(mtodo(m, hlen));1679*csum = cksum_adjust(*csum, tcp->th_sport, aport);1680tcp->th_sport = aport;1681}1682*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));1683break;1684case IPPROTO_UDP:1685csum = &UDP(mtodo(m, hlen))->uh_sum;1686if (aport != 0) {1687struct udphdr *udp = UDP(mtodo(m, hlen));1688*csum = cksum_adjust(*csum, udp->uh_sport, aport);1689udp->uh_sport = aport;1690}1691*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));1692break;1693case IPPROTO_ICMPV6:1694/* Checksum in ICMPv6 covers pseudo header */1695csum = &icmp6->icmp6_cksum;1696*csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,1697IPPROTO_ICMPV6, 0));1698/* Convert ICMPv6 types to ICMP */1699proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */1700if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)1701icmp6->icmp6_type = ICMP_ECHO;1702else /* ICMP6_ECHO_REPLY */1703icmp6->icmp6_type = ICMP_ECHOREPLY;1704*csum = cksum_adjust(*csum, (uint16_t)proto,1705*(uint16_t *)icmp6);1706if (aport != 0) {1707uint16_t old_id = icmp6->icmp6_id;1708icmp6->icmp6_id = aport;1709*csum = cksum_adjust(*csum, old_id, aport);1710}1711break;1712};17131714m_adj(m, hlen - sizeof(ip));1715bcopy(&ip, mtod(m, void *), sizeof(ip));1716if (V_nat64out->output(nh->nh_ifp, m, (struct sockaddr *)&dst,1717&cfg->stats, logdata) == 0)1718NAT64STAT_INC(&cfg->stats, opcnt64);1719return (NAT64RETURN);1720}172117221723