Path: blob/a-new-beginning/SharedDependencies/Sources/libslirp/tcp_output.c
2 views
/* SPDX-License-Identifier: BSD-3-Clause */1/*2* Copyright (c) 1982, 1986, 1988, 1990, 19933* The Regents of the University of California. All rights reserved.4*5* Redistribution and use in source and binary forms, with or without6* modification, are permitted provided that the following conditions7* are met:8* 1. Redistributions of source code must retain the above copyright9* notice, this list of conditions and the following disclaimer.10* 2. Redistributions in binary form must reproduce the above copyright11* notice, this list of conditions and the following disclaimer in the12* documentation and/or other materials provided with the distribution.13* 3. Neither the name of the University nor the names of its contributors14* may be used to endorse or promote products derived from this software15* without specific prior written permission.16*17* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND18* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE19* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE20* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE21* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL22* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS23* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)24* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT25* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY26* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF27* SUCH DAMAGE.28*29* @(#)tcp_output.c 8.3 (Berkeley) 12/30/9330* tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp31*/3233/*34* Changes and additions relating to SLiRP35* Copyright (c) 1995 Danny Gasparovski.36*/3738#include "slirp.h"3940static const uint8_t tcp_outflags[TCP_NSTATES] = {41TH_RST | TH_ACK, 0, TH_SYN, TH_SYN | TH_ACK,42TH_ACK, TH_ACK, TH_FIN | TH_ACK, TH_FIN | TH_ACK,43TH_FIN | TH_ACK, TH_ACK, TH_ACK,44};454647#undef MAX_TCPOPTLEN48#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */4950/*51* Tcp output routine: figure out what should be sent and send it.52*/53int tcp_output(struct tcpcb *tp)54{55register struct socket *so = tp->t_socket;56register long len, win;57int off, flags, error;58register struct mbuf *m;59register struct tcpiphdr *ti, tcpiph_save;60struct ip *ip;61struct ip6 *ip6;62uint8_t opt[MAX_TCPOPTLEN];63unsigned optlen, hdrlen;64int idle, sendalot;6566DEBUG_CALL("tcp_output");67DEBUG_ARG("tp = %p", tp);6869/*70* Determine length of data that should be transmitted,71* and flags that will be used.72* If there is some data or critical controls (SYN, RST)73* to send, then transmit; otherwise, investigate further.74*/75idle = (tp->snd_max == tp->snd_una);76if (idle && tp->t_idle >= tp->t_rxtcur)77/*78* We have been idle for "a while" and no acks are79* expected to clock out any data we send --80* slow start to get ack "clock" running again.81*/82tp->snd_cwnd = tp->t_maxseg;83again:84sendalot = 0;85off = tp->snd_nxt - tp->snd_una;86win = MIN(tp->snd_wnd, tp->snd_cwnd);8788flags = tcp_outflags[tp->t_state];8990DEBUG_MISC(" --- tcp_output flags = 0x%x", flags);9192/*93* If in persist timeout with window of 0, send 1 byte.94* Otherwise, if window is small but nonzero95* and timer expired, we will send what we can96* and go to transmit state.97*/98if (tp->t_force) {99if (win == 0) {100/*101* If we still have some data to send, then102* clear the FIN bit. Usually this would103* happen below when it realizes that we104* aren't sending all the data. However,105* if we have exactly 1 byte of unset data,106* then it won't clear the FIN bit below,107* and if we are in persist state, we wind108* up sending the packet without recording109* that we sent the FIN bit.110*111* We can't just blindly clear the FIN bit,112* because if we don't have any more data113* to send then the probe will be the FIN114* itself.115*/116if (off < so->so_snd.sb_cc)117flags &= ~TH_FIN;118win = 1;119} else {120tp->t_timer[TCPT_PERSIST] = 0;121tp->t_rxtshift = 0;122}123}124125len = MIN(so->so_snd.sb_cc, win) - off;126127if (len < 0) {128/*129* If FIN has been sent but not acked,130* but we haven't been called to retransmit,131* len will be -1. Otherwise, window shrank132* after we sent into it. If window shrank to 0,133* cancel pending retransmit and pull snd_nxt134* back to (closed) window. We will enter persist135* state below. If the window didn't close completely,136* just wait for an ACK.137*/138len = 0;139if (win == 0) {140tp->t_timer[TCPT_REXMT] = 0;141tp->snd_nxt = tp->snd_una;142}143}144145if (len > tp->t_maxseg) {146len = tp->t_maxseg;147sendalot = 1;148}149if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))150flags &= ~TH_FIN;151152win = sbspace(&so->so_rcv);153154/*155* Sender silly window avoidance. If connection is idle156* and can send all data, a maximum segment,157* at least a maximum default-size segment do it,158* or are forced, do it; otherwise don't bother.159* If peer's buffer is tiny, then send160* when window is at least half open.161* If retransmitting (possibly after persist timer forced us162* to send into a small window), then must resend.163*/164if (len) {165if (len == tp->t_maxseg)166goto send;167if ((1 || idle || tp->t_flags & TF_NODELAY) &&168len + off >= so->so_snd.sb_cc)169goto send;170if (tp->t_force)171goto send;172if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)173goto send;174if (SEQ_LT(tp->snd_nxt, tp->snd_max))175goto send;176}177178/*179* Compare available window to amount of window180* known to peer (as advertised window less181* next expected input). If the difference is at least two182* max size segments, or at least 50% of the maximum possible183* window, then want to send a window update to peer.184*/185if (win > 0) {186/*187* "adv" is the amount we can increase the window,188* taking into account that we are limited by189* TCP_MAXWIN << tp->rcv_scale.190*/191long adv = MIN(win, (long)TCP_MAXWIN << tp->rcv_scale) -192(tp->rcv_adv - tp->rcv_nxt);193194if (adv >= (long)(2 * tp->t_maxseg))195goto send;196if (2 * adv >= (long)so->so_rcv.sb_datalen)197goto send;198}199200/*201* Send if we owe peer an ACK.202*/203if (tp->t_flags & TF_ACKNOW)204goto send;205if (flags & (TH_SYN | TH_RST))206goto send;207if (SEQ_GT(tp->snd_up, tp->snd_una))208goto send;209/*210* If our state indicates that FIN should be sent211* and we have not yet done so, or we're retransmitting the FIN,212* then we need to send.213*/214if (flags & TH_FIN &&215((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))216goto send;217218/*219* TCP window updates are not reliable, rather a polling protocol220* using ``persist'' packets is used to insure receipt of window221* updates. The three ``states'' for the output side are:222* idle not doing retransmits or persists223* persisting to move a small or zero window224* (re)transmitting and thereby not persisting225*226* tp->t_timer[TCPT_PERSIST]227* is set when we are in persist state.228* tp->t_force229* is set when we are called to send a persist packet.230* tp->t_timer[TCPT_REXMT]231* is set when we are retransmitting232* The output side is idle when both timers are zero.233*234* If send window is too small, there is data to transmit, and no235* retransmit or persist is pending, then go to persist state.236* If nothing happens soon, send when timer expires:237* if window is nonzero, transmit what we can,238* otherwise force out a byte.239*/240if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&241tp->t_timer[TCPT_PERSIST] == 0) {242tp->t_rxtshift = 0;243tcp_setpersist(tp);244}245246/*247* No reason to send a segment, just return.248*/249return (0);250251send:252/*253* Before ESTABLISHED, force sending of initial options254* unless TCP set not to do any options.255* NOTE: we assume that the IP/TCP header plus TCP options256* always fit in a single mbuf, leaving room for a maximum257* link header, i.e.258* max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN259*/260optlen = 0;261hdrlen = sizeof(struct tcpiphdr);262if (flags & TH_SYN) {263tp->snd_nxt = tp->iss;264if ((tp->t_flags & TF_NOOPT) == 0) {265uint16_t mss;266267opt[0] = TCPOPT_MAXSEG;268opt[1] = 4;269mss = htons((uint16_t)tcp_mss(tp, 0));270memcpy((char *)(opt + 2), (char *)&mss, sizeof(mss));271optlen = 4;272}273}274275hdrlen += optlen;276277/*278* Adjust data length if insertion of options will279* bump the packet length beyond the t_maxseg length.280*/281if (len > tp->t_maxseg - optlen) {282len = tp->t_maxseg - optlen;283sendalot = 1;284}285286/*287* Grab a header mbuf, attaching a copy of data to288* be transmitted, and initialize the header from289* the template for sends on this connection.290*/291if (len) {292m = m_get(so->slirp);293if (m == NULL) {294error = 1;295goto out;296}297m->m_data += IF_MAXLINKHDR;298m->m_len = hdrlen;299300sbcopy(&so->so_snd, off, (int)len, mtod(m, char *) + hdrlen);301m->m_len += len;302303/*304* If we're sending everything we've got, set PUSH.305* (This will keep happy those implementations which only306* give data to the user when a buffer fills or307* a PUSH comes in.)308*/309if (off + len == so->so_snd.sb_cc)310flags |= TH_PUSH;311} else {312m = m_get(so->slirp);313if (m == NULL) {314error = 1;315goto out;316}317m->m_data += IF_MAXLINKHDR;318m->m_len = hdrlen;319}320321ti = mtod(m, struct tcpiphdr *);322323memcpy((char *)ti, &tp->t_template, sizeof(struct tcpiphdr));324325/*326* Fill in fields, remembering maximum advertised327* window for use in delaying messages about window sizes.328* If resending a FIN, be sure not to use a new sequence number.329*/330if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&331tp->snd_nxt == tp->snd_max)332tp->snd_nxt--;333/*334* If we are doing retransmissions, then snd_nxt will335* not reflect the first unsent octet. For ACK only336* packets, we do not want the sequence number of the337* retransmitted packet, we want the sequence number338* of the next unsent octet. So, if there is no data339* (and no SYN or FIN), use snd_max instead of snd_nxt340* when filling in ti_seq. But if we are in persist341* state, snd_max might reflect one byte beyond the342* right edge of the window, so use snd_nxt in that343* case, since we know we aren't doing a retransmission.344* (retransmit and persist are mutually exclusive...)345*/346if (len || (flags & (TH_SYN | TH_FIN)) || tp->t_timer[TCPT_PERSIST])347ti->ti_seq = htonl(tp->snd_nxt);348else349ti->ti_seq = htonl(tp->snd_max);350ti->ti_ack = htonl(tp->rcv_nxt);351if (optlen) {352memcpy((char *)(ti + 1), (char *)opt, optlen);353ti->ti_off = (sizeof(struct tcphdr) + optlen) >> 2;354}355ti->ti_flags = flags;356/*357* Calculate receive window. Don't shrink window,358* but avoid silly window syndrome.359*/360if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg)361win = 0;362if (win > (long)TCP_MAXWIN << tp->rcv_scale)363win = (long)TCP_MAXWIN << tp->rcv_scale;364if (win < (long)(tp->rcv_adv - tp->rcv_nxt))365win = (long)(tp->rcv_adv - tp->rcv_nxt);366ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale));367368if (SEQ_GT(tp->snd_up, tp->snd_una)) {369ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq)));370ti->ti_flags |= TH_URG;371} else372/*373* If no urgent pointer to send, then we pull374* the urgent pointer to the left edge of the send window375* so that it doesn't drift into the send window on sequence376* number wraparound.377*/378tp->snd_up = tp->snd_una; /* drag it along */379380/*381* Put TCP length in extended header, and then382* checksum extended header and data.383*/384if (len + optlen)385ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + optlen + len));386ti->ti_sum = cksum(m, (int)(hdrlen + len));387388/*389* In transmit state, time the transmission and arrange for390* the retransmit. In persist state, just set snd_max.391*/392if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {393tcp_seq startseq = tp->snd_nxt;394395/*396* Advance snd_nxt over sequence space of this segment.397*/398if (flags & (TH_SYN | TH_FIN)) {399if (flags & TH_SYN)400tp->snd_nxt++;401if (flags & TH_FIN) {402tp->snd_nxt++;403tp->t_flags |= TF_SENTFIN;404}405}406tp->snd_nxt += len;407if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {408tp->snd_max = tp->snd_nxt;409/*410* Time this transmission if not a retransmission and411* not currently timing anything.412*/413if (tp->t_rtt == 0) {414tp->t_rtt = 1;415tp->t_rtseq = startseq;416}417}418419/*420* Set retransmit timer if not currently set,421* and not doing an ack or a keep-alive probe.422* Initial value for retransmit timer is smoothed423* round-trip time + 2 * round-trip time variance.424* Initialize shift counter which is used for backoff425* of retransmit time.426*/427if (tp->t_timer[TCPT_REXMT] == 0 && tp->snd_nxt != tp->snd_una) {428tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;429if (tp->t_timer[TCPT_PERSIST]) {430tp->t_timer[TCPT_PERSIST] = 0;431tp->t_rxtshift = 0;432}433}434} else if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))435tp->snd_max = tp->snd_nxt + len;436437/*438* Fill in IP length and desired time to live and439* send to IP level. There should be a better way440* to handle ttl and tos; we could keep them in441* the template, but need a way to checksum without them.442*/443m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */444tcpiph_save = *mtod(m, struct tcpiphdr *);445446switch (so->so_ffamily) {447case AF_INET:448m->m_data +=449sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip);450m->m_len -=451sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip);452ip = mtod(m, struct ip *);453454ip->ip_len = m->m_len;455ip->ip_dst = tcpiph_save.ti_dst;456ip->ip_src = tcpiph_save.ti_src;457ip->ip_p = tcpiph_save.ti_pr;458459ip->ip_ttl = IPDEFTTL;460ip->ip_tos = so->so_iptos;461error = ip_output(so, m);462break;463464case AF_INET6:465m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) -466sizeof(struct ip6);467m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) -468sizeof(struct ip6);469ip6 = mtod(m, struct ip6 *);470471ip6->ip_pl = tcpiph_save.ti_len;472ip6->ip_dst = tcpiph_save.ti_dst6;473ip6->ip_src = tcpiph_save.ti_src6;474ip6->ip_nh = tcpiph_save.ti_nh6;475476error = ip6_output(so, m, 0);477break;478479default:480g_assert_not_reached();481}482483if (error) {484out:485return (error);486}487488/*489* Data sent (as far as we can tell).490* If this advertises a larger window than any other segment,491* then remember the size of the advertised window.492* Any pending ACK has now been sent.493*/494if (win > 0 && SEQ_GT(tp->rcv_nxt + win, tp->rcv_adv))495tp->rcv_adv = tp->rcv_nxt + win;496tp->last_ack_sent = tp->rcv_nxt;497tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);498if (sendalot)499goto again;500501return (0);502}503504void tcp_setpersist(struct tcpcb *tp)505{506int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;507508TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], t * tcp_backoff[tp->t_rxtshift],509TCPTV_PERSMIN, TCPTV_PERSMAX);510if (tp->t_rxtshift < TCP_MAXRXTSHIFT)511tp->t_rxtshift++;512}513514515