Path: blob/a-new-beginning/SharedDependencies/Sources/libslirp/tcp_input.c
2 views
/* SPDX-License-Identifier: BSD-3-Clause */1/*2* Copyright (c) 1982, 1986, 1988, 1990, 1993, 19943* The Regents of the University of California. All rights reserved.4*5* Redistribution and use in source and binary forms, with or without6* modification, are permitted provided that the following conditions7* are met:8* 1. Redistributions of source code must retain the above copyright9* notice, this list of conditions and the following disclaimer.10* 2. Redistributions in binary form must reproduce the above copyright11* notice, this list of conditions and the following disclaimer in the12* documentation and/or other materials provided with the distribution.13* 3. Neither the name of the University nor the names of its contributors14* may be used to endorse or promote products derived from this software15* without specific prior written permission.16*17* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND18* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE19* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE20* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE21* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL22* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS23* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)24* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT25* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY26* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF27* SUCH DAMAGE.28*29* @(#)tcp_input.c 8.5 (Berkeley) 4/10/9430* tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp31*/3233/*34* Changes and additions relating to SLiRP35* Copyright (c) 1995 Danny Gasparovski.36*/3738#include "slirp.h"39#include "ip_icmp.h"4041#define TCPREXMTTHRESH 34243#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ)4445/* for modulo comparisons of timestamps */46#define TSTMP_LT(a, b) ((int)((a) - (b)) < 0)47#define TSTMP_GEQ(a, b) ((int)((a) - (b)) >= 0)4849/*50* Insert segment ti into reassembly queue of tcp with51* control block tp. Return TH_FIN if reassembly now includes52* a segment with FIN.53* Set DELACK for segments received in order, but ack immediately54* when segments are out of order (so fast retransmit can work).55*/5657static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt,58struct tcpiphdr *ti);59static void tcp_xmit_timer(register struct tcpcb *tp, int rtt);6061static int tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti,62struct mbuf *m)63{64register struct tcpiphdr *q;65struct socket *so = tp->t_socket;66int flags;6768/*69* Call with ti==NULL after become established to70* force pre-ESTABLISHED data up to user socket.71*/72if (ti == NULL)73goto present;7475/*76* Find a segment which begins after this one does.77*/78for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp);79q = tcpiphdr_next(q))80if (SEQ_GT(q->ti_seq, ti->ti_seq))81break;8283/*84* If there is a preceding segment, it may provide some of85* our data already. If so, drop the data from the incoming86* segment. If it provides all of our data, drop us.87*/88if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) {89register int i;90q = tcpiphdr_prev(q);91/* conversion to int (in i) handles seq wraparound */92i = q->ti_seq + q->ti_len - ti->ti_seq;93if (i > 0) {94if (i >= ti->ti_len) {95m_free(m);96/*97* Try to present any queued data98* at the left window edge to the user.99* This is needed after the 3-WHS100* completes.101*/102goto present; /* ??? */103}104m_adj(m, i);105ti->ti_len -= i;106ti->ti_seq += i;107}108q = tcpiphdr_next(q);109}110ti->ti_mbuf = m;111112/*113* While we overlap succeeding segments trim them or,114* if they are completely covered, dequeue them.115*/116while (!tcpfrag_list_end(q, tp)) {117register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;118if (i <= 0)119break;120if (i < q->ti_len) {121q->ti_seq += i;122q->ti_len -= i;123m_adj(q->ti_mbuf, i);124break;125}126q = tcpiphdr_next(q);127m = tcpiphdr_prev(q)->ti_mbuf;128slirp_remque(tcpiphdr2qlink(tcpiphdr_prev(q)));129m_free(m);130}131132/*133* Stick new segment in its place.134*/135slirp_insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q)));136137present:138/*139* Present data to user, advancing rcv_nxt through140* completed sequence space.141*/142if (!TCPS_HAVEESTABLISHED(tp->t_state))143return (0);144ti = tcpfrag_list_first(tp);145if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt)146return (0);147if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)148return (0);149do {150tp->rcv_nxt += ti->ti_len;151flags = ti->ti_flags & TH_FIN;152slirp_remque(tcpiphdr2qlink(ti));153m = ti->ti_mbuf;154ti = tcpiphdr_next(ti);155if (so->so_state & SS_FCANTSENDMORE)156m_free(m);157else {158if (so->so_emu) {159if (tcp_emu(so, m))160sbappend(so, m);161} else162sbappend(so, m);163}164} while (!tcpfrag_list_end(ti, tp) && ti->ti_seq == tp->rcv_nxt);165return (flags);166}167168/*169* TCP input routine, follows pages 65-76 of the170* protocol specification dated September, 1981 very closely.171*/172void tcp_input(struct mbuf *m, int iphlen, struct socket *inso,173unsigned short af)174{175struct ip save_ip, *ip;176struct ip6 save_ip6, *ip6;177register struct tcpiphdr *ti;178char *optp = NULL;179int optlen = 0;180int len, tlen, off;181register struct tcpcb *tp = NULL;182register int tiflags;183struct socket *so = NULL;184int todrop, acked, ourfinisacked, needoutput = 0;185int iss = 0;186uint32_t tiwin;187int ret;188struct sockaddr_storage lhost, fhost;189struct sockaddr_in *lhost4, *fhost4;190struct sockaddr_in6 *lhost6, *fhost6;191struct gfwd_list *ex_ptr;192Slirp *slirp;193194DEBUG_CALL("tcp_input");195DEBUG_ARG("m = %p iphlen = %2d inso = %p", m, iphlen, inso);196197memset(&lhost, 0, sizeof(struct sockaddr_storage));198memset(&fhost, 0, sizeof(struct sockaddr_storage));199200/*201* If called with m == 0, then we're continuing the connect202*/203if (m == NULL) {204so = inso;205slirp = so->slirp;206207/* Re-set a few variables */208tp = sototcpcb(so);209m = so->so_m;210so->so_m = NULL;211ti = so->so_ti;212tiwin = ti->ti_win;213tiflags = ti->ti_flags;214215goto cont_conn;216}217slirp = m->slirp;218switch (af) {219case AF_INET:220M_DUP_DEBUG(slirp, m, 0,221sizeof(struct qlink) + sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr));222break;223case AF_INET6:224M_DUP_DEBUG(slirp, m, 0,225sizeof(struct qlink) + sizeof(struct tcpiphdr) - sizeof(struct ip6) - sizeof(struct tcphdr));226break;227}228229ip = mtod(m, struct ip *);230ip6 = mtod(m, struct ip6 *);231232switch (af) {233case AF_INET:234if (iphlen > sizeof(struct ip)) {235ip_stripoptions(m);236iphlen = sizeof(struct ip);237}238/* XXX Check if too short */239240241/*242* Save a copy of the IP header in case we want restore it243* for sending an ICMP error message in response.244*/245save_ip = *ip;246save_ip.ip_len += iphlen;247248/*249* Get IP and TCP header together in first mbuf.250* Note: IP leaves IP header in first mbuf.251*/252m->m_data -=253sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr);254m->m_len +=255sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr);256ti = mtod(m, struct tcpiphdr *);257258/*259* Checksum extended TCP header and data.260*/261tlen = ip->ip_len;262tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL;263memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr));264memset(&ti->ti, 0, sizeof(ti->ti));265ti->ti_x0 = 0;266ti->ti_src = save_ip.ip_src;267ti->ti_dst = save_ip.ip_dst;268ti->ti_pr = save_ip.ip_p;269ti->ti_len = htons((uint16_t)tlen);270break;271272case AF_INET6:273/*274* Save a copy of the IP header in case we want restore it275* for sending an ICMP error message in response.276*/277save_ip6 = *ip6;278/*279* Get IP and TCP header together in first mbuf.280* Note: IP leaves IP header in first mbuf.281*/282m->m_data -= sizeof(struct tcpiphdr) -283(sizeof(struct ip6) + sizeof(struct tcphdr));284m->m_len += sizeof(struct tcpiphdr) -285(sizeof(struct ip6) + sizeof(struct tcphdr));286ti = mtod(m, struct tcpiphdr *);287288tlen = ip6->ip_pl;289tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL;290memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr));291memset(&ti->ti, 0, sizeof(ti->ti));292ti->ti_x0 = 0;293ti->ti_src6 = save_ip6.ip_src;294ti->ti_dst6 = save_ip6.ip_dst;295ti->ti_nh6 = save_ip6.ip_nh;296ti->ti_len = htons((uint16_t)tlen);297break;298299default:300g_assert_not_reached();301}302303len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen);304if (cksum(m, len)) {305goto drop;306}307308/*309* Check that TCP offset makes sense,310* pull out TCP options and adjust length. XXX311*/312off = ti->ti_off << 2;313if (off < sizeof(struct tcphdr) || off > tlen) {314goto drop;315}316tlen -= off;317ti->ti_len = tlen;318if (off > sizeof(struct tcphdr)) {319optlen = off - sizeof(struct tcphdr);320optp = mtod(m, char *) + sizeof(struct tcpiphdr);321}322tiflags = ti->ti_flags;323324/*325* Convert TCP protocol specific fields to host format.326*/327NTOHL(ti->ti_seq);328NTOHL(ti->ti_ack);329NTOHS(ti->ti_win);330NTOHS(ti->ti_urp);331332/*333* Drop TCP, IP headers and TCP options.334*/335m->m_data += sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr);336m->m_len -= sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr);337338/*339* Locate pcb for segment.340*/341findso:342lhost.ss_family = af;343fhost.ss_family = af;344switch (af) {345case AF_INET:346lhost4 = (struct sockaddr_in *)&lhost;347lhost4->sin_addr = ti->ti_src;348lhost4->sin_port = ti->ti_sport;349fhost4 = (struct sockaddr_in *)&fhost;350fhost4->sin_addr = ti->ti_dst;351fhost4->sin_port = ti->ti_dport;352break;353case AF_INET6:354lhost6 = (struct sockaddr_in6 *)&lhost;355lhost6->sin6_addr = ti->ti_src6;356lhost6->sin6_port = ti->ti_sport;357fhost6 = (struct sockaddr_in6 *)&fhost;358fhost6->sin6_addr = ti->ti_dst6;359fhost6->sin6_port = ti->ti_dport;360break;361default:362g_assert_not_reached();363}364365so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost);366367/*368* If the state is CLOSED (i.e., TCB does not exist) then369* all data in the incoming segment is discarded.370* If the TCB exists but is in CLOSED state, it is embryonic,371* but should either do a listen or a connect soon.372*373* state == CLOSED means we've done socreate() but haven't374* attached it to a protocol yet...375*376* XXX If a TCB does not exist, and the TH_SYN flag is377* the only flag set, then create a session, mark it378* as if it was LISTENING, and continue...379*/380if (so == NULL) {381/* TODO: IPv6 */382if (slirp->restricted) {383/* Any hostfwds will have an existing socket, so we only get here384* for non-hostfwd connections. These should be dropped, unless it385* happens to be a guestfwd.386*/387for (ex_ptr = slirp->guestfwd_list; ex_ptr;388ex_ptr = ex_ptr->ex_next) {389if (ex_ptr->ex_fport == ti->ti_dport &&390ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) {391break;392}393}394if (!ex_ptr) {395goto dropwithreset;396}397}398399if ((tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) != TH_SYN)400goto dropwithreset;401402so = socreate(slirp, IPPROTO_TCP);403tcp_attach(so);404405sbreserve(&so->so_snd, TCP_SNDSPACE);406sbreserve(&so->so_rcv, TCP_RCVSPACE);407408so->lhost.ss = lhost;409so->fhost.ss = fhost;410411so->so_iptos = tcp_tos(so);412if (so->so_iptos == 0) {413switch (af) {414case AF_INET:415so->so_iptos = ((struct ip *)ti)->ip_tos;416break;417case AF_INET6:418break;419default:420g_assert_not_reached();421}422}423424tp = sototcpcb(so);425tp->t_state = TCPS_LISTEN;426}427428/*429* If this is a still-connecting socket, this probably430* a retransmit of the SYN. Whether it's a retransmit SYN431* or something else, we nuke it.432*/433if (so->so_state & SS_ISFCONNECTING)434goto drop;435436tp = sototcpcb(so);437438/* XXX Should never fail */439if (tp == NULL)440goto dropwithreset;441if (tp->t_state == TCPS_CLOSED)442goto drop;443444tiwin = ti->ti_win;445446/*447* Segment received on connection.448* Reset idle time and keep-alive timer.449*/450tp->t_idle = 0;451if (slirp_do_keepalive)452tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL;453else454tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE;455456/*457* Process options if not in LISTEN state,458* else do it below (after getting remote address).459*/460if (optp && tp->t_state != TCPS_LISTEN)461tcp_dooptions(tp, (uint8_t *)optp, optlen, ti);462463/*464* Header prediction: check for the two common cases465* of a uni-directional data xfer. If the packet has466* no control flags, is in-sequence, the window didn't467* change and we're not retransmitting, it's a468* candidate. If the length is zero and the ack moved469* forward, we're the sender side of the xfer. Just470* free the data acked & wake any higher level process471* that was blocked waiting for space. If the length472* is non-zero and the ack didn't move, we're the473* receiver side. If we're getting packets in-order474* (the reassembly queue is empty), add the data to475* the socket buffer and note that we need a delayed ack.476*477* XXX Some of these tests are not needed478* eg: the tiwin == tp->snd_wnd prevents many more479* predictions.. with no *real* advantage..480*/481if (tp->t_state == TCPS_ESTABLISHED &&482(tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK &&483ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd &&484tp->snd_nxt == tp->snd_max) {485if (ti->ti_len == 0) {486if (SEQ_GT(ti->ti_ack, tp->snd_una) &&487SEQ_LEQ(ti->ti_ack, tp->snd_max) &&488tp->snd_cwnd >= tp->snd_wnd) {489/*490* this is a pure ack for outstanding data.491*/492if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))493tcp_xmit_timer(tp, tp->t_rtt);494acked = ti->ti_ack - tp->snd_una;495sodrop(so, acked);496tp->snd_una = ti->ti_ack;497m_free(m);498499/*500* If all outstanding data are acked, stop501* retransmit timer, otherwise restart timer502* using current (possibly backed-off) value.503* If process is waiting for space,504* wakeup/selwakeup/signal. If data505* are ready to send, let tcp_output506* decide between more output or persist.507*/508if (tp->snd_una == tp->snd_max)509tp->t_timer[TCPT_REXMT] = 0;510else if (tp->t_timer[TCPT_PERSIST] == 0)511tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;512513/*514* This is called because sowwakeup might have515* put data into so_snd. Since we don't so sowwakeup,516* we don't need this.. XXX???517*/518if (so->so_snd.sb_cc)519tcp_output(tp);520521return;522}523} else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) &&524ti->ti_len <= sbspace(&so->so_rcv)) {525/*526* this is a pure, in-sequence data packet527* with nothing on the reassembly queue and528* we have enough buffer space to take it.529*/530tp->rcv_nxt += ti->ti_len;531/*532* Add data to socket buffer.533*/534if (so->so_emu) {535if (tcp_emu(so, m))536sbappend(so, m);537} else538sbappend(so, m);539540/*541* If this is a short packet, then ACK now - with Nagel542* congestion avoidance sender won't send more until543* he gets an ACK.544*545* It is better to not delay acks at all to maximize546* TCP throughput. See RFC 2581.547*/548tp->t_flags |= TF_ACKNOW;549tcp_output(tp);550return;551}552} /* header prediction */553/*554* Calculate amount of space in receive window,555* and then do TCP input processing.556* Receive window is amount of space in rcv queue,557* but not less than advertised window.558*/559{560int win;561win = sbspace(&so->so_rcv);562if (win < 0)563win = 0;564tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt));565}566567switch (tp->t_state) {568/*569* If the state is LISTEN then ignore segment if it contains an RST.570* If the segment contains an ACK then it is bad and send a RST.571* If it does not contain a SYN then it is not interesting; drop it.572* Don't bother responding if the destination was a broadcast.573* Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial574* tp->iss, and send a segment:575* <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>576* Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.577* Fill in remote peer address fields if not previously specified.578* Enter SYN_RECEIVED state, and process any other fields of this579* segment in this state.580*/581case TCPS_LISTEN: {582if (tiflags & TH_RST)583goto drop;584if (tiflags & TH_ACK)585goto dropwithreset;586if ((tiflags & TH_SYN) == 0)587goto drop;588589/*590* This has way too many gotos...591* But a bit of spaghetti code never hurt anybody :)592*/593594/*595* If this is destined for the control address, then flag to596* tcp_ctl once connected, otherwise connect597*/598/* TODO: IPv6 */599if (af == AF_INET &&600(so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==601slirp->vnetwork_addr.s_addr) {602if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr &&603so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) {604/* May be an add exec */605for (ex_ptr = slirp->guestfwd_list; ex_ptr;606ex_ptr = ex_ptr->ex_next) {607if (ex_ptr->ex_fport == so->so_fport &&608so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) {609so->so_state |= SS_CTL;610break;611}612}613if (so->so_state & SS_CTL) {614goto cont_input;615}616}617/* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */618}619620if (so->so_emu & EMU_NOCONNECT) {621so->so_emu &= ~EMU_NOCONNECT;622goto cont_input;623}624625if ((tcp_fconnect(so, so->so_ffamily) == -1) && (errno != EAGAIN) &&626(errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {627uint8_t code;628DEBUG_MISC(" tcp fconnect errno = %d-%s", errno, strerror(errno));629if (errno == ECONNREFUSED) {630/* ACK the SYN, send RST to refuse the connection */631tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq)0,632TH_RST | TH_ACK, af);633} else {634switch (af) {635case AF_INET:636code = ICMP_UNREACH_NET;637if (errno == EHOSTUNREACH) {638code = ICMP_UNREACH_HOST;639}640break;641case AF_INET6:642code = ICMP6_UNREACH_NO_ROUTE;643if (errno == EHOSTUNREACH) {644code = ICMP6_UNREACH_ADDRESS;645}646break;647default:648g_assert_not_reached();649}650HTONL(ti->ti_seq); /* restore tcp header */651HTONL(ti->ti_ack);652HTONS(ti->ti_win);653HTONS(ti->ti_urp);654m->m_data -=655sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr);656m->m_len +=657sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr);658switch (af) {659case AF_INET:660m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip) -661sizeof(struct tcphdr);662m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct ip) -663sizeof(struct tcphdr);664*ip = save_ip;665icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno));666break;667case AF_INET6:668m->m_data += sizeof(struct tcpiphdr) -669(sizeof(struct ip6) + sizeof(struct tcphdr));670m->m_len -= sizeof(struct tcpiphdr) -671(sizeof(struct ip6) + sizeof(struct tcphdr));672*ip6 = save_ip6;673icmp6_send_error(m, ICMP6_UNREACH, code);674break;675default:676g_assert_not_reached();677}678}679tcp_close(tp);680m_free(m);681} else {682/*683* Haven't connected yet, save the current mbuf684* and ti, and return685* XXX Some OS's don't tell us whether the connect()686* succeeded or not. So we must time it out.687*/688so->so_m = m;689so->so_ti = ti;690tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;691tp->t_state = TCPS_SYN_RECEIVED;692/*693* Initialize receive sequence numbers now so that we can send a694* valid RST if the remote end rejects our connection.695*/696tp->irs = ti->ti_seq;697tcp_rcvseqinit(tp);698tcp_template(tp);699}700return;701702cont_conn:703/* m==NULL704* Check if the connect succeeded705*/706if (so->so_state & SS_NOFDREF) {707tp = tcp_close(tp);708goto dropwithreset;709}710cont_input:711tcp_template(tp);712713if (optp)714tcp_dooptions(tp, (uint8_t *)optp, optlen, ti);715716if (iss)717tp->iss = iss;718else719tp->iss = slirp->tcp_iss;720slirp->tcp_iss += TCP_ISSINCR / 2;721tp->irs = ti->ti_seq;722tcp_sendseqinit(tp);723tcp_rcvseqinit(tp);724tp->t_flags |= TF_ACKNOW;725tp->t_state = TCPS_SYN_RECEIVED;726tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;727goto trimthenstep6;728} /* case TCPS_LISTEN */729730/*731* If the state is SYN_SENT:732* if seg contains an ACK, but not for our SYN, drop the input.733* if seg contains a RST, then drop the connection.734* if seg does not contain SYN, then drop it.735* Otherwise this is an acceptable SYN segment736* initialize tp->rcv_nxt and tp->irs737* if seg contains ack then advance tp->snd_una738* if SYN has been acked change to ESTABLISHED else SYN_RCVD state739* arrange for segment to be acked (eventually)740* continue processing rest of data/controls, beginning with URG741*/742case TCPS_SYN_SENT:743if (getenv("SLIRP_FUZZING") &&744/* Align seq numbers on what the fuzzing trace says */745tp->iss == 1 && ti->ti_ack != 0) {746tp->iss = ti->ti_ack - 1;747tp->snd_max = tp->iss + 1;748}749750if ((tiflags & TH_ACK) &&751(SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max)))752goto dropwithreset;753754if (tiflags & TH_RST) {755if (tiflags & TH_ACK) {756tcp_drop(tp, 0); /* XXX Check t_softerror! */757}758goto drop;759}760761if ((tiflags & TH_SYN) == 0)762goto drop;763if (tiflags & TH_ACK) {764tp->snd_una = ti->ti_ack;765if (SEQ_LT(tp->snd_nxt, tp->snd_una))766tp->snd_nxt = tp->snd_una;767}768769tp->t_timer[TCPT_REXMT] = 0;770tp->irs = ti->ti_seq;771tcp_rcvseqinit(tp);772tp->t_flags |= TF_ACKNOW;773if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {774soisfconnected(so);775tp->t_state = TCPS_ESTABLISHED;776777tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);778/*779* if we didn't have to retransmit the SYN,780* use its rtt as our initial srtt & rtt var.781*/782if (tp->t_rtt)783tcp_xmit_timer(tp, tp->t_rtt);784} else785tp->t_state = TCPS_SYN_RECEIVED;786787trimthenstep6:788/*789* Advance ti->ti_seq to correspond to first data byte.790* If data, trim to stay within window,791* dropping FIN if necessary.792*/793ti->ti_seq++;794if (ti->ti_len > tp->rcv_wnd) {795todrop = ti->ti_len - tp->rcv_wnd;796m_adj(m, -todrop);797ti->ti_len = tp->rcv_wnd;798tiflags &= ~TH_FIN;799}800tp->snd_wl1 = ti->ti_seq - 1;801tp->rcv_up = ti->ti_seq;802goto step6;803} /* switch tp->t_state */804/*805* States other than LISTEN or SYN_SENT.806* Check that at least some bytes of segment are within807* receive window. If segment begins before rcv_nxt,808* drop leading data (and SYN); if nothing left, just ack.809*/810todrop = tp->rcv_nxt - ti->ti_seq;811if (todrop > 0) {812if (tiflags & TH_SYN) {813tiflags &= ~TH_SYN;814ti->ti_seq++;815if (ti->ti_urp > 1)816ti->ti_urp--;817else818tiflags &= ~TH_URG;819todrop--;820}821/*822* Following if statement from Stevens, vol. 2, p. 960.823*/824if (todrop > ti->ti_len ||825(todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) {826/*827* Any valid FIN must be to the left of the window.828* At this point the FIN must be a duplicate or out829* of sequence; drop it.830*/831tiflags &= ~TH_FIN;832833/*834* Send an ACK to resynchronize and drop any data.835* But keep on processing for RST or ACK.836*/837tp->t_flags |= TF_ACKNOW;838todrop = ti->ti_len;839}840m_adj(m, todrop);841ti->ti_seq += todrop;842ti->ti_len -= todrop;843if (ti->ti_urp > todrop)844ti->ti_urp -= todrop;845else {846tiflags &= ~TH_URG;847ti->ti_urp = 0;848}849}850/*851* If new data are received on a connection after the852* user processes are gone, then RST the other end.853*/854if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT &&855ti->ti_len) {856tp = tcp_close(tp);857goto dropwithreset;858}859860/*861* If segment ends after window, drop trailing data862* (and PUSH and FIN); if nothing left, just ACK.863*/864todrop = (ti->ti_seq + ti->ti_len) - (tp->rcv_nxt + tp->rcv_wnd);865if (todrop > 0) {866if (todrop >= ti->ti_len) {867/*868* If a new connection request is received869* while in TIME_WAIT, drop the old connection870* and start over if the sequence numbers871* are above the previous ones.872*/873if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT &&874SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {875iss = tp->rcv_nxt + TCP_ISSINCR;876tp = tcp_close(tp);877goto findso;878}879/*880* If window is closed can only take segments at881* window edge, and have to drop data and PUSH from882* incoming segments. Continue processing, but883* remember to ack. Otherwise, drop segment884* and ack.885*/886if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {887tp->t_flags |= TF_ACKNOW;888} else {889goto dropafterack;890}891}892m_adj(m, -todrop);893ti->ti_len -= todrop;894tiflags &= ~(TH_PUSH | TH_FIN);895}896897/*898* If the RST bit is set examine the state:899* SYN_RECEIVED STATE:900* If passive open, return to LISTEN state.901* If active open, inform user that connection was refused.902* ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:903* Inform user that connection was reset, and close tcb.904* CLOSING, LAST_ACK, TIME_WAIT STATES905* Close the tcb.906*/907if (tiflags & TH_RST)908switch (tp->t_state) {909case TCPS_SYN_RECEIVED:910case TCPS_ESTABLISHED:911case TCPS_FIN_WAIT_1:912case TCPS_FIN_WAIT_2:913case TCPS_CLOSE_WAIT:914tp->t_state = TCPS_CLOSED;915tcp_close(tp);916goto drop;917918case TCPS_CLOSING:919case TCPS_LAST_ACK:920case TCPS_TIME_WAIT:921tcp_close(tp);922goto drop;923}924925/*926* If a SYN is in the window, then this is an927* error and we send an RST and drop the connection.928*/929if (tiflags & TH_SYN) {930tp = tcp_drop(tp, 0);931goto dropwithreset;932}933934/*935* If the ACK bit is off we drop the segment and return.936*/937if ((tiflags & TH_ACK) == 0)938goto drop;939940/*941* Ack processing.942*/943switch (tp->t_state) {944/*945* In SYN_RECEIVED state if the ack ACKs our SYN then enter946* ESTABLISHED state and continue processing, otherwise947* send an RST. una<=ack<=max948*/949case TCPS_SYN_RECEIVED:950if (getenv("SLIRP_FUZZING") &&951/* Align seq numbers on what the fuzzing trace says */952tp->iss == 1 && ti->ti_ack != 0) {953tp->iss = ti->ti_ack - 1;954tp->snd_max = tp->iss + 1;955tp->snd_una = ti->ti_ack;956}957958if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max))959goto dropwithreset;960tp->t_state = TCPS_ESTABLISHED;961/*962* The sent SYN is ack'ed with our sequence number +1963* The first data byte already in the buffer will get964* lost if no correction is made. This is only needed for965* SS_CTL since the buffer is empty otherwise.966* tp->snd_una++; or:967*/968tp->snd_una = ti->ti_ack;969if (so->so_state & SS_CTL) {970/* So tcp_ctl reports the right state */971ret = tcp_ctl(so);972if (ret == 1) {973soisfconnected(so);974so->so_state &= ~SS_CTL; /* success XXX */975} else if (ret == 2) {976so->so_state &= SS_PERSISTENT_MASK;977so->so_state |= SS_NOFDREF; /* CTL_CMD */978} else {979needoutput = 1;980tp->t_state = TCPS_FIN_WAIT_1;981}982} else {983soisfconnected(so);984}985986tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);987tp->snd_wl1 = ti->ti_seq - 1;988/* Avoid ack processing; snd_una==ti_ack => dup ack */989goto synrx_to_est;990/* fall into ... */991992/*993* In ESTABLISHED state: drop duplicate ACKs; ACK out of range994* ACKs. If the ack is in the range995* tp->snd_una < ti->ti_ack <= tp->snd_max996* then advance tp->snd_una to ti->ti_ack and drop997* data from the retransmission queue. If this ACK reflects998* more up to date window information we update our window information.999*/1000case TCPS_ESTABLISHED:1001case TCPS_FIN_WAIT_1:1002case TCPS_FIN_WAIT_2:1003case TCPS_CLOSE_WAIT:1004case TCPS_CLOSING:1005case TCPS_LAST_ACK:1006case TCPS_TIME_WAIT:10071008if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {1009if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {1010DEBUG_MISC(" dup ack m = %p so = %p", m, so);1011/*1012* If we have outstanding data (other than1013* a window probe), this is a completely1014* duplicate ack (ie, window info didn't1015* change), the ack is the biggest we've1016* seen and we've seen exactly our rexmt1017* threshold of them, assume a packet1018* has been dropped and retransmit it.1019* Kludge snd_nxt & the congestion1020* window so we send only this one1021* packet.1022*1023* We know we're losing at the current1024* window size so do congestion avoidance1025* (set ssthresh to half the current window1026* and pull our congestion window back to1027* the new ssthresh).1028*1029* Dup acks mean that packets have left the1030* network (they're now cached at the receiver)1031* so bump cwnd by the amount in the receiver1032* to keep a constant cwnd packets in the1033* network.1034*/1035if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una)1036tp->t_dupacks = 0;1037else if (++tp->t_dupacks == TCPREXMTTHRESH) {1038tcp_seq onxt = tp->snd_nxt;1039unsigned win =1040MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;10411042if (win < 2)1043win = 2;1044tp->snd_ssthresh = win * tp->t_maxseg;1045tp->t_timer[TCPT_REXMT] = 0;1046tp->t_rtt = 0;1047tp->snd_nxt = ti->ti_ack;1048tp->snd_cwnd = tp->t_maxseg;1049tcp_output(tp);1050tp->snd_cwnd =1051tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks;1052if (SEQ_GT(onxt, tp->snd_nxt))1053tp->snd_nxt = onxt;1054goto drop;1055} else if (tp->t_dupacks > TCPREXMTTHRESH) {1056tp->snd_cwnd += tp->t_maxseg;1057tcp_output(tp);1058goto drop;1059}1060} else1061tp->t_dupacks = 0;1062break;1063}1064synrx_to_est:1065/*1066* If the congestion window was inflated to account1067* for the other side's cached packets, retract it.1068*/1069if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh)1070tp->snd_cwnd = tp->snd_ssthresh;1071tp->t_dupacks = 0;1072if (SEQ_GT(ti->ti_ack, tp->snd_max)) {1073goto dropafterack;1074}1075acked = ti->ti_ack - tp->snd_una;10761077/*1078* If transmit timer is running and timed sequence1079* number was acked, update smoothed round trip time.1080* Since we now have an rtt measurement, cancel the1081* timer backoff (cf., Phil Karn's retransmit alg.).1082* Recompute the initial retransmit timer.1083*/1084if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))1085tcp_xmit_timer(tp, tp->t_rtt);10861087/*1088* If all outstanding data is acked, stop retransmit1089* timer and remember to restart (more output or persist).1090* If there is more data to be acked, restart retransmit1091* timer, using current (possibly backed-off) value.1092*/1093if (ti->ti_ack == tp->snd_max) {1094tp->t_timer[TCPT_REXMT] = 0;1095needoutput = 1;1096} else if (tp->t_timer[TCPT_PERSIST] == 0)1097tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;1098/*1099* When new data is acked, open the congestion window.1100* If the window gives us less than ssthresh packets1101* in flight, open exponentially (maxseg per packet).1102* Otherwise open linearly: maxseg per window1103* (maxseg^2 / cwnd per packet).1104*/1105{1106register unsigned cw = tp->snd_cwnd;1107register unsigned incr = tp->t_maxseg;11081109if (cw > tp->snd_ssthresh)1110incr = incr * incr / cw;1111tp->snd_cwnd = MIN(cw + incr, TCP_MAXWIN << tp->snd_scale);1112}1113if (acked > so->so_snd.sb_cc) {1114tp->snd_wnd -= so->so_snd.sb_cc;1115sodrop(so, (int)so->so_snd.sb_cc);1116ourfinisacked = 1;1117} else {1118sodrop(so, acked);1119tp->snd_wnd -= acked;1120ourfinisacked = 0;1121}1122tp->snd_una = ti->ti_ack;1123if (SEQ_LT(tp->snd_nxt, tp->snd_una))1124tp->snd_nxt = tp->snd_una;11251126switch (tp->t_state) {1127/*1128* In FIN_WAIT_1 STATE in addition to the processing1129* for the ESTABLISHED state if our FIN is now acknowledged1130* then enter FIN_WAIT_2.1131*/1132case TCPS_FIN_WAIT_1:1133if (ourfinisacked) {1134/*1135* If we can't receive any more1136* data, then closing user can proceed.1137* Starting the timer is contrary to the1138* specification, but if we don't get a FIN1139* we'll hang forever.1140*/1141if (so->so_state & SS_FCANTRCVMORE) {1142tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE;1143}1144tp->t_state = TCPS_FIN_WAIT_2;1145}1146break;11471148/*1149* In CLOSING STATE in addition to the processing for1150* the ESTABLISHED state if the ACK acknowledges our FIN1151* then enter the TIME-WAIT state, otherwise ignore1152* the segment.1153*/1154case TCPS_CLOSING:1155if (ourfinisacked) {1156tp->t_state = TCPS_TIME_WAIT;1157tcp_canceltimers(tp);1158tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;1159}1160break;11611162/*1163* In LAST_ACK, we may still be waiting for data to drain1164* and/or to be acked, as well as for the ack of our FIN.1165* If our FIN is now acknowledged, delete the TCB,1166* enter the closed state and return.1167*/1168case TCPS_LAST_ACK:1169if (ourfinisacked) {1170tcp_close(tp);1171goto drop;1172}1173break;11741175/*1176* In TIME_WAIT state the only thing that should arrive1177* is a retransmission of the remote FIN. Acknowledge1178* it and restart the finack timer.1179*/1180case TCPS_TIME_WAIT:1181tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;1182goto dropafterack;1183}1184} /* switch(tp->t_state) */11851186step6:1187/*1188* Update window information.1189* Don't look at window if no ACK: TAC's send garbage on first SYN.1190*/1191if ((tiflags & TH_ACK) &&1192(SEQ_LT(tp->snd_wl1, ti->ti_seq) ||1193(tp->snd_wl1 == ti->ti_seq &&1194(SEQ_LT(tp->snd_wl2, ti->ti_ack) ||1195(tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) {1196tp->snd_wnd = tiwin;1197tp->snd_wl1 = ti->ti_seq;1198tp->snd_wl2 = ti->ti_ack;1199if (tp->snd_wnd > tp->max_sndwnd)1200tp->max_sndwnd = tp->snd_wnd;1201needoutput = 1;1202}12031204/*1205* Process segments with URG.1206*/1207if ((tiflags & TH_URG) && ti->ti_urp &&1208TCPS_HAVERCVDFIN(tp->t_state) == 0) {1209/*1210* This is a kludge, but if we receive and accept1211* random urgent pointers, we'll crash in1212* soreceive. It's hard to imagine someone1213* actually wanting to send this much urgent data.1214*/1215if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) {1216ti->ti_urp = 0;1217tiflags &= ~TH_URG;1218goto dodata;1219}1220/*1221* If this segment advances the known urgent pointer,1222* then mark the data stream. This should not happen1223* in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since1224* a FIN has been received from the remote side.1225* In these states we ignore the URG.1226*1227* According to RFC961 (Assigned Protocols),1228* the urgent pointer points to the last octet1229* of urgent data. We continue, however,1230* to consider it to indicate the first octet1231* of data past the urgent section as the original1232* spec states (in one of two places).1233*/1234if (SEQ_GT(ti->ti_seq + ti->ti_urp, tp->rcv_up)) {1235tp->rcv_up = ti->ti_seq + ti->ti_urp;1236so->so_urgc =1237so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */1238tp->rcv_up = ti->ti_seq + ti->ti_urp;1239}1240} else1241/*1242* If no out of band data is expected,1243* pull receive urgent pointer along1244* with the receive window.1245*/1246if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))1247tp->rcv_up = tp->rcv_nxt;1248dodata:12491250/*1251* If this is a small packet, then ACK now - with Nagel1252* congestion avoidance sender won't send more until1253* he gets an ACK.1254*/1255if (ti->ti_len && (unsigned)ti->ti_len <= 5 &&1256((struct tcpiphdr_2 *)ti)->first_char == (char)27) {1257tp->t_flags |= TF_ACKNOW;1258}12591260/*1261* Process the segment text, merging it into the TCP sequencing queue,1262* and arranging for acknowledgment of receipt if necessary.1263* This process logically involves adjusting tp->rcv_wnd as data1264* is presented to the user (this happens in tcp_usrreq.c,1265* case PRU_RCVD). If a FIN has already been received on this1266* connection then we just ignore the text.1267*/1268if ((ti->ti_len || (tiflags & TH_FIN)) &&1269TCPS_HAVERCVDFIN(tp->t_state) == 0) {12701271/*1272* segment is the next to be received on an established1273* connection, and the queue is empty, avoid linkage into and1274* removal from the queue and repetition of various1275* conversions from tcp_reass().1276*/1277if (ti->ti_seq == tp->rcv_nxt && tcpfrag_list_empty(tp) &&1278tp->t_state == TCPS_ESTABLISHED) {1279tp->t_flags |= TF_DELACK;1280tp->rcv_nxt += ti->ti_len;1281tiflags = ti->ti_flags & TH_FIN;1282if (so->so_emu) {1283if (tcp_emu(so, m))1284sbappend(so, m);1285} else1286sbappend(so, m);1287} else {1288tiflags = tcp_reass(tp, ti, m);1289tp->t_flags |= TF_ACKNOW;1290}1291} else {1292m_free(m);1293tiflags &= ~TH_FIN;1294}12951296/*1297* If FIN is received ACK the FIN and let the user know1298* that the connection is closing.1299*/1300if (tiflags & TH_FIN) {1301if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {1302/*1303* If we receive a FIN we can't send more data,1304* set it SS_FDRAIN1305* Shutdown the socket if there is no rx data in the1306* buffer.1307* soread() is called on completion of shutdown() and1308* will got to TCPS_LAST_ACK, and use tcp_output()1309* to send the FIN.1310*/1311sofwdrain(so);13121313tp->t_flags |= TF_ACKNOW;1314tp->rcv_nxt++;1315}1316switch (tp->t_state) {1317/*1318* In SYN_RECEIVED and ESTABLISHED STATES1319* enter the CLOSE_WAIT state.1320*/1321case TCPS_SYN_RECEIVED:1322case TCPS_ESTABLISHED:1323if (so->so_emu == EMU_CTL) /* no shutdown on socket */1324tp->t_state = TCPS_LAST_ACK;1325else1326tp->t_state = TCPS_CLOSE_WAIT;1327break;13281329/*1330* If still in FIN_WAIT_1 STATE FIN has not been acked so1331* enter the CLOSING state.1332*/1333case TCPS_FIN_WAIT_1:1334tp->t_state = TCPS_CLOSING;1335break;13361337/*1338* In FIN_WAIT_2 state enter the TIME_WAIT state,1339* starting the time-wait timer, turning off the other1340* standard timers.1341*/1342case TCPS_FIN_WAIT_2:1343tp->t_state = TCPS_TIME_WAIT;1344tcp_canceltimers(tp);1345tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;1346break;13471348/*1349* In TIME_WAIT state restart the 2 MSL time_wait timer.1350*/1351case TCPS_TIME_WAIT:1352tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;1353break;1354}1355}13561357/*1358* Return any desired output.1359*/1360if (needoutput || (tp->t_flags & TF_ACKNOW)) {1361tcp_output(tp);1362}1363return;13641365dropafterack:1366/*1367* Generate an ACK dropping incoming segment if it occupies1368* sequence space, where the ACK reflects our state.1369*/1370if (tiflags & TH_RST)1371goto drop;1372m_free(m);1373tp->t_flags |= TF_ACKNOW;1374tcp_output(tp);1375return;13761377dropwithreset:1378/* reuses m if m!=NULL, m_free() unnecessary */1379if (tiflags & TH_ACK)1380tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af);1381else {1382if (tiflags & TH_SYN)1383ti->ti_len++;1384tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq)0,1385TH_RST | TH_ACK, af);1386}13871388return;13891390drop:1391/*1392* Drop space held by incoming segment and return.1393*/1394m_free(m);1395}13961397static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt,1398struct tcpiphdr *ti)1399{1400uint16_t mss;1401int opt, optlen;14021403DEBUG_CALL("tcp_dooptions");1404DEBUG_ARG("tp = %p cnt=%i", tp, cnt);14051406for (; cnt > 0; cnt -= optlen, cp += optlen) {1407opt = cp[0];1408if (opt == TCPOPT_EOL)1409break;1410if (opt == TCPOPT_NOP)1411optlen = 1;1412else {1413optlen = cp[1];1414if (optlen <= 0)1415break;1416}1417switch (opt) {1418default:1419continue;14201421case TCPOPT_MAXSEG:1422if (optlen != TCPOLEN_MAXSEG)1423continue;1424if (!(ti->ti_flags & TH_SYN))1425continue;1426memcpy((char *)&mss, (char *)cp + 2, sizeof(mss));1427NTOHS(mss);1428tcp_mss(tp, mss); /* sets t_maxseg */1429break;1430}1431}1432}14331434/*1435* Collect new round-trip time estimate1436* and update averages and current timeout.1437*/14381439static void tcp_xmit_timer(register struct tcpcb *tp, int rtt)1440{1441register short delta;14421443DEBUG_CALL("tcp_xmit_timer");1444DEBUG_ARG("tp = %p", tp);1445DEBUG_ARG("rtt = %d", rtt);14461447if (tp->t_srtt != 0) {1448/*1449* srtt is stored as fixed point with 3 bits after the1450* binary point (i.e., scaled by 8). The following magic1451* is equivalent to the smoothing algorithm in rfc793 with1452* an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed1453* point). Adjust rtt to origin 0.1454*/1455delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);1456if ((tp->t_srtt += delta) <= 0)1457tp->t_srtt = 1;1458/*1459* We accumulate a smoothed rtt variance (actually, a1460* smoothed mean difference), then set the retransmit1461* timer to smoothed rtt + 4 times the smoothed variance.1462* rttvar is stored as fixed point with 2 bits after the1463* binary point (scaled by 4). The following is1464* equivalent to rfc793 smoothing with an alpha of .751465* (rttvar = rttvar*3/4 + |delta| / 4). This replaces1466* rfc793's wired-in beta.1467*/1468if (delta < 0)1469delta = -delta;1470delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);1471if ((tp->t_rttvar += delta) <= 0)1472tp->t_rttvar = 1;1473} else {1474/*1475* No rtt measurement yet - use the unsmoothed rtt.1476* Set the variance to half the rtt (so our first1477* retransmit happens at 3*rtt).1478*/1479tp->t_srtt = rtt << TCP_RTT_SHIFT;1480tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);1481}1482tp->t_rtt = 0;1483tp->t_rxtshift = 0;14841485/*1486* the retransmit should happen at rtt + 4 * rttvar.1487* Because of the way we do the smoothing, srtt and rttvar1488* will each average +1/2 tick of bias. When we compute1489* the retransmit timer, we want 1/2 tick of rounding and1490* 1 extra tick because of +-1/2 tick uncertainty in the1491* firing of the timer. The bias will give us exactly the1492* 1.5 tick we need. But, because the bias is1493* statistical, we have to test that we don't drop below1494* the minimum feasible timer (which is 2 ticks).1495*/1496TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), (short)tp->t_rttmin,1497TCPTV_REXMTMAX); /* XXX */14981499/*1500* We received an ack for a packet that wasn't retransmitted;1501* it is probably safe to discard any error indications we've1502* received recently. This isn't quite right, but close enough1503* for now (a route might have failed after we sent a segment,1504* and the return path might not be symmetrical).1505*/1506tp->t_softerror = 0;1507}15081509/*1510* Determine a reasonable value for maxseg size.1511* If the route is known, check route for mtu.1512* If none, use an mss that can be handled on the outgoing1513* interface without forcing IP to fragment; if bigger than1514* an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES1515* to utilize large mbufs. If no route is found, route has no mtu,1516* or the destination isn't local, use a default, hopefully conservative1517* size (usually 512 or the default IP max size, but no more than the mtu1518* of the interface), as we can't discover anything about intervening1519* gateways or networks. We also initialize the congestion/slow start1520* window to be a single segment if the destination isn't local.1521* While looking at the routing entry, we also initialize other path-dependent1522* parameters from pre-set or cached values in the routing entry.1523*/15241525int tcp_mss(struct tcpcb *tp, unsigned offer)1526{1527struct socket *so = tp->t_socket;1528int mss;15291530DEBUG_CALL("tcp_mss");1531DEBUG_ARG("tp = %p", tp);1532DEBUG_ARG("offer = %d", offer);15331534switch (so->so_ffamily) {1535case AF_INET:1536mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) -1537sizeof(struct tcphdr) - sizeof(struct ip);1538break;1539case AF_INET6:1540mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) -1541sizeof(struct tcphdr) - sizeof(struct ip6);1542break;1543default:1544g_assert_not_reached();1545}15461547if (offer)1548mss = MIN(mss, offer);1549mss = MAX(mss, 32);1550if (mss < tp->t_maxseg || offer != 0)1551tp->t_maxseg = MIN(mss, TCP_MAXSEG_MAX);15521553tp->snd_cwnd = mss;15541555sbreserve(&so->so_snd,1556TCP_SNDSPACE +1557((TCP_SNDSPACE % mss) ? (mss - (TCP_SNDSPACE % mss)) : 0));1558sbreserve(&so->so_rcv,1559TCP_RCVSPACE +1560((TCP_RCVSPACE % mss) ? (mss - (TCP_RCVSPACE % mss)) : 0));15611562DEBUG_MISC(" returning mss = %d", mss);15631564return mss;1565}156615671568