Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
folium-app
GitHub Repository: folium-app/Folium
Path: blob/a-new-beginning/SharedDependencies/Sources/libslirp/tcp_input.c
2 views
1
/* SPDX-License-Identifier: BSD-3-Clause */
2
/*
3
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
4
* The Regents of the University of California. All rights reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
8
* are met:
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
14
* 3. Neither the name of the University nor the names of its contributors
15
* may be used to endorse or promote products derived from this software
16
* without specific prior written permission.
17
*
18
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
* SUCH DAMAGE.
29
*
30
* @(#)tcp_input.c 8.5 (Berkeley) 4/10/94
31
* tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp
32
*/
33
34
/*
35
* Changes and additions relating to SLiRP
36
* Copyright (c) 1995 Danny Gasparovski.
37
*/
38
39
#include "slirp.h"
40
#include "ip_icmp.h"
41
42
#define TCPREXMTTHRESH 3
43
44
#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ)
45
46
/* for modulo comparisons of timestamps */
47
#define TSTMP_LT(a, b) ((int)((a) - (b)) < 0)
48
#define TSTMP_GEQ(a, b) ((int)((a) - (b)) >= 0)
49
50
/*
51
* Insert segment ti into reassembly queue of tcp with
52
* control block tp. Return TH_FIN if reassembly now includes
53
* a segment with FIN.
54
* Set DELACK for segments received in order, but ack immediately
55
* when segments are out of order (so fast retransmit can work).
56
*/
57
58
static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt,
59
struct tcpiphdr *ti);
60
static void tcp_xmit_timer(register struct tcpcb *tp, int rtt);
61
62
static int tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti,
63
struct mbuf *m)
64
{
65
register struct tcpiphdr *q;
66
struct socket *so = tp->t_socket;
67
int flags;
68
69
/*
70
* Call with ti==NULL after become established to
71
* force pre-ESTABLISHED data up to user socket.
72
*/
73
if (ti == NULL)
74
goto present;
75
76
/*
77
* Find a segment which begins after this one does.
78
*/
79
for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp);
80
q = tcpiphdr_next(q))
81
if (SEQ_GT(q->ti_seq, ti->ti_seq))
82
break;
83
84
/*
85
* If there is a preceding segment, it may provide some of
86
* our data already. If so, drop the data from the incoming
87
* segment. If it provides all of our data, drop us.
88
*/
89
if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) {
90
register int i;
91
q = tcpiphdr_prev(q);
92
/* conversion to int (in i) handles seq wraparound */
93
i = q->ti_seq + q->ti_len - ti->ti_seq;
94
if (i > 0) {
95
if (i >= ti->ti_len) {
96
m_free(m);
97
/*
98
* Try to present any queued data
99
* at the left window edge to the user.
100
* This is needed after the 3-WHS
101
* completes.
102
*/
103
goto present; /* ??? */
104
}
105
m_adj(m, i);
106
ti->ti_len -= i;
107
ti->ti_seq += i;
108
}
109
q = tcpiphdr_next(q);
110
}
111
ti->ti_mbuf = m;
112
113
/*
114
* While we overlap succeeding segments trim them or,
115
* if they are completely covered, dequeue them.
116
*/
117
while (!tcpfrag_list_end(q, tp)) {
118
register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
119
if (i <= 0)
120
break;
121
if (i < q->ti_len) {
122
q->ti_seq += i;
123
q->ti_len -= i;
124
m_adj(q->ti_mbuf, i);
125
break;
126
}
127
q = tcpiphdr_next(q);
128
m = tcpiphdr_prev(q)->ti_mbuf;
129
slirp_remque(tcpiphdr2qlink(tcpiphdr_prev(q)));
130
m_free(m);
131
}
132
133
/*
134
* Stick new segment in its place.
135
*/
136
slirp_insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q)));
137
138
present:
139
/*
140
* Present data to user, advancing rcv_nxt through
141
* completed sequence space.
142
*/
143
if (!TCPS_HAVEESTABLISHED(tp->t_state))
144
return (0);
145
ti = tcpfrag_list_first(tp);
146
if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt)
147
return (0);
148
if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
149
return (0);
150
do {
151
tp->rcv_nxt += ti->ti_len;
152
flags = ti->ti_flags & TH_FIN;
153
slirp_remque(tcpiphdr2qlink(ti));
154
m = ti->ti_mbuf;
155
ti = tcpiphdr_next(ti);
156
if (so->so_state & SS_FCANTSENDMORE)
157
m_free(m);
158
else {
159
if (so->so_emu) {
160
if (tcp_emu(so, m))
161
sbappend(so, m);
162
} else
163
sbappend(so, m);
164
}
165
} while (!tcpfrag_list_end(ti, tp) && ti->ti_seq == tp->rcv_nxt);
166
return (flags);
167
}
168
169
/*
170
* TCP input routine, follows pages 65-76 of the
171
* protocol specification dated September, 1981 very closely.
172
*/
173
void tcp_input(struct mbuf *m, int iphlen, struct socket *inso,
174
unsigned short af)
175
{
176
struct ip save_ip, *ip;
177
struct ip6 save_ip6, *ip6;
178
register struct tcpiphdr *ti;
179
char *optp = NULL;
180
int optlen = 0;
181
int len, tlen, off;
182
register struct tcpcb *tp = NULL;
183
register int tiflags;
184
struct socket *so = NULL;
185
int todrop, acked, ourfinisacked, needoutput = 0;
186
int iss = 0;
187
uint32_t tiwin;
188
int ret;
189
struct sockaddr_storage lhost, fhost;
190
struct sockaddr_in *lhost4, *fhost4;
191
struct sockaddr_in6 *lhost6, *fhost6;
192
struct gfwd_list *ex_ptr;
193
Slirp *slirp;
194
195
DEBUG_CALL("tcp_input");
196
DEBUG_ARG("m = %p iphlen = %2d inso = %p", m, iphlen, inso);
197
198
memset(&lhost, 0, sizeof(struct sockaddr_storage));
199
memset(&fhost, 0, sizeof(struct sockaddr_storage));
200
201
/*
202
* If called with m == 0, then we're continuing the connect
203
*/
204
if (m == NULL) {
205
so = inso;
206
slirp = so->slirp;
207
208
/* Re-set a few variables */
209
tp = sototcpcb(so);
210
m = so->so_m;
211
so->so_m = NULL;
212
ti = so->so_ti;
213
tiwin = ti->ti_win;
214
tiflags = ti->ti_flags;
215
216
goto cont_conn;
217
}
218
slirp = m->slirp;
219
switch (af) {
220
case AF_INET:
221
M_DUP_DEBUG(slirp, m, 0,
222
sizeof(struct qlink) + sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr));
223
break;
224
case AF_INET6:
225
M_DUP_DEBUG(slirp, m, 0,
226
sizeof(struct qlink) + sizeof(struct tcpiphdr) - sizeof(struct ip6) - sizeof(struct tcphdr));
227
break;
228
}
229
230
ip = mtod(m, struct ip *);
231
ip6 = mtod(m, struct ip6 *);
232
233
switch (af) {
234
case AF_INET:
235
if (iphlen > sizeof(struct ip)) {
236
ip_stripoptions(m);
237
iphlen = sizeof(struct ip);
238
}
239
/* XXX Check if too short */
240
241
242
/*
243
* Save a copy of the IP header in case we want restore it
244
* for sending an ICMP error message in response.
245
*/
246
save_ip = *ip;
247
save_ip.ip_len += iphlen;
248
249
/*
250
* Get IP and TCP header together in first mbuf.
251
* Note: IP leaves IP header in first mbuf.
252
*/
253
m->m_data -=
254
sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr);
255
m->m_len +=
256
sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr);
257
ti = mtod(m, struct tcpiphdr *);
258
259
/*
260
* Checksum extended TCP header and data.
261
*/
262
tlen = ip->ip_len;
263
tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL;
264
memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr));
265
memset(&ti->ti, 0, sizeof(ti->ti));
266
ti->ti_x0 = 0;
267
ti->ti_src = save_ip.ip_src;
268
ti->ti_dst = save_ip.ip_dst;
269
ti->ti_pr = save_ip.ip_p;
270
ti->ti_len = htons((uint16_t)tlen);
271
break;
272
273
case AF_INET6:
274
/*
275
* Save a copy of the IP header in case we want restore it
276
* for sending an ICMP error message in response.
277
*/
278
save_ip6 = *ip6;
279
/*
280
* Get IP and TCP header together in first mbuf.
281
* Note: IP leaves IP header in first mbuf.
282
*/
283
m->m_data -= sizeof(struct tcpiphdr) -
284
(sizeof(struct ip6) + sizeof(struct tcphdr));
285
m->m_len += sizeof(struct tcpiphdr) -
286
(sizeof(struct ip6) + sizeof(struct tcphdr));
287
ti = mtod(m, struct tcpiphdr *);
288
289
tlen = ip6->ip_pl;
290
tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL;
291
memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr));
292
memset(&ti->ti, 0, sizeof(ti->ti));
293
ti->ti_x0 = 0;
294
ti->ti_src6 = save_ip6.ip_src;
295
ti->ti_dst6 = save_ip6.ip_dst;
296
ti->ti_nh6 = save_ip6.ip_nh;
297
ti->ti_len = htons((uint16_t)tlen);
298
break;
299
300
default:
301
g_assert_not_reached();
302
}
303
304
len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen);
305
if (cksum(m, len)) {
306
goto drop;
307
}
308
309
/*
310
* Check that TCP offset makes sense,
311
* pull out TCP options and adjust length. XXX
312
*/
313
off = ti->ti_off << 2;
314
if (off < sizeof(struct tcphdr) || off > tlen) {
315
goto drop;
316
}
317
tlen -= off;
318
ti->ti_len = tlen;
319
if (off > sizeof(struct tcphdr)) {
320
optlen = off - sizeof(struct tcphdr);
321
optp = mtod(m, char *) + sizeof(struct tcpiphdr);
322
}
323
tiflags = ti->ti_flags;
324
325
/*
326
* Convert TCP protocol specific fields to host format.
327
*/
328
NTOHL(ti->ti_seq);
329
NTOHL(ti->ti_ack);
330
NTOHS(ti->ti_win);
331
NTOHS(ti->ti_urp);
332
333
/*
334
* Drop TCP, IP headers and TCP options.
335
*/
336
m->m_data += sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr);
337
m->m_len -= sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr);
338
339
/*
340
* Locate pcb for segment.
341
*/
342
findso:
343
lhost.ss_family = af;
344
fhost.ss_family = af;
345
switch (af) {
346
case AF_INET:
347
lhost4 = (struct sockaddr_in *)&lhost;
348
lhost4->sin_addr = ti->ti_src;
349
lhost4->sin_port = ti->ti_sport;
350
fhost4 = (struct sockaddr_in *)&fhost;
351
fhost4->sin_addr = ti->ti_dst;
352
fhost4->sin_port = ti->ti_dport;
353
break;
354
case AF_INET6:
355
lhost6 = (struct sockaddr_in6 *)&lhost;
356
lhost6->sin6_addr = ti->ti_src6;
357
lhost6->sin6_port = ti->ti_sport;
358
fhost6 = (struct sockaddr_in6 *)&fhost;
359
fhost6->sin6_addr = ti->ti_dst6;
360
fhost6->sin6_port = ti->ti_dport;
361
break;
362
default:
363
g_assert_not_reached();
364
}
365
366
so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost);
367
368
/*
369
* If the state is CLOSED (i.e., TCB does not exist) then
370
* all data in the incoming segment is discarded.
371
* If the TCB exists but is in CLOSED state, it is embryonic,
372
* but should either do a listen or a connect soon.
373
*
374
* state == CLOSED means we've done socreate() but haven't
375
* attached it to a protocol yet...
376
*
377
* XXX If a TCB does not exist, and the TH_SYN flag is
378
* the only flag set, then create a session, mark it
379
* as if it was LISTENING, and continue...
380
*/
381
if (so == NULL) {
382
/* TODO: IPv6 */
383
if (slirp->restricted) {
384
/* Any hostfwds will have an existing socket, so we only get here
385
* for non-hostfwd connections. These should be dropped, unless it
386
* happens to be a guestfwd.
387
*/
388
for (ex_ptr = slirp->guestfwd_list; ex_ptr;
389
ex_ptr = ex_ptr->ex_next) {
390
if (ex_ptr->ex_fport == ti->ti_dport &&
391
ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) {
392
break;
393
}
394
}
395
if (!ex_ptr) {
396
goto dropwithreset;
397
}
398
}
399
400
if ((tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) != TH_SYN)
401
goto dropwithreset;
402
403
so = socreate(slirp, IPPROTO_TCP);
404
tcp_attach(so);
405
406
sbreserve(&so->so_snd, TCP_SNDSPACE);
407
sbreserve(&so->so_rcv, TCP_RCVSPACE);
408
409
so->lhost.ss = lhost;
410
so->fhost.ss = fhost;
411
412
so->so_iptos = tcp_tos(so);
413
if (so->so_iptos == 0) {
414
switch (af) {
415
case AF_INET:
416
so->so_iptos = ((struct ip *)ti)->ip_tos;
417
break;
418
case AF_INET6:
419
break;
420
default:
421
g_assert_not_reached();
422
}
423
}
424
425
tp = sototcpcb(so);
426
tp->t_state = TCPS_LISTEN;
427
}
428
429
/*
430
* If this is a still-connecting socket, this probably
431
* a retransmit of the SYN. Whether it's a retransmit SYN
432
* or something else, we nuke it.
433
*/
434
if (so->so_state & SS_ISFCONNECTING)
435
goto drop;
436
437
tp = sototcpcb(so);
438
439
/* XXX Should never fail */
440
if (tp == NULL)
441
goto dropwithreset;
442
if (tp->t_state == TCPS_CLOSED)
443
goto drop;
444
445
tiwin = ti->ti_win;
446
447
/*
448
* Segment received on connection.
449
* Reset idle time and keep-alive timer.
450
*/
451
tp->t_idle = 0;
452
if (slirp_do_keepalive)
453
tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL;
454
else
455
tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE;
456
457
/*
458
* Process options if not in LISTEN state,
459
* else do it below (after getting remote address).
460
*/
461
if (optp && tp->t_state != TCPS_LISTEN)
462
tcp_dooptions(tp, (uint8_t *)optp, optlen, ti);
463
464
/*
465
* Header prediction: check for the two common cases
466
* of a uni-directional data xfer. If the packet has
467
* no control flags, is in-sequence, the window didn't
468
* change and we're not retransmitting, it's a
469
* candidate. If the length is zero and the ack moved
470
* forward, we're the sender side of the xfer. Just
471
* free the data acked & wake any higher level process
472
* that was blocked waiting for space. If the length
473
* is non-zero and the ack didn't move, we're the
474
* receiver side. If we're getting packets in-order
475
* (the reassembly queue is empty), add the data to
476
* the socket buffer and note that we need a delayed ack.
477
*
478
* XXX Some of these tests are not needed
479
* eg: the tiwin == tp->snd_wnd prevents many more
480
* predictions.. with no *real* advantage..
481
*/
482
if (tp->t_state == TCPS_ESTABLISHED &&
483
(tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK &&
484
ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd &&
485
tp->snd_nxt == tp->snd_max) {
486
if (ti->ti_len == 0) {
487
if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
488
SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
489
tp->snd_cwnd >= tp->snd_wnd) {
490
/*
491
* this is a pure ack for outstanding data.
492
*/
493
if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
494
tcp_xmit_timer(tp, tp->t_rtt);
495
acked = ti->ti_ack - tp->snd_una;
496
sodrop(so, acked);
497
tp->snd_una = ti->ti_ack;
498
m_free(m);
499
500
/*
501
* If all outstanding data are acked, stop
502
* retransmit timer, otherwise restart timer
503
* using current (possibly backed-off) value.
504
* If process is waiting for space,
505
* wakeup/selwakeup/signal. If data
506
* are ready to send, let tcp_output
507
* decide between more output or persist.
508
*/
509
if (tp->snd_una == tp->snd_max)
510
tp->t_timer[TCPT_REXMT] = 0;
511
else if (tp->t_timer[TCPT_PERSIST] == 0)
512
tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
513
514
/*
515
* This is called because sowwakeup might have
516
* put data into so_snd. Since we don't so sowwakeup,
517
* we don't need this.. XXX???
518
*/
519
if (so->so_snd.sb_cc)
520
tcp_output(tp);
521
522
return;
523
}
524
} else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) &&
525
ti->ti_len <= sbspace(&so->so_rcv)) {
526
/*
527
* this is a pure, in-sequence data packet
528
* with nothing on the reassembly queue and
529
* we have enough buffer space to take it.
530
*/
531
tp->rcv_nxt += ti->ti_len;
532
/*
533
* Add data to socket buffer.
534
*/
535
if (so->so_emu) {
536
if (tcp_emu(so, m))
537
sbappend(so, m);
538
} else
539
sbappend(so, m);
540
541
/*
542
* If this is a short packet, then ACK now - with Nagel
543
* congestion avoidance sender won't send more until
544
* he gets an ACK.
545
*
546
* It is better to not delay acks at all to maximize
547
* TCP throughput. See RFC 2581.
548
*/
549
tp->t_flags |= TF_ACKNOW;
550
tcp_output(tp);
551
return;
552
}
553
} /* header prediction */
554
/*
555
* Calculate amount of space in receive window,
556
* and then do TCP input processing.
557
* Receive window is amount of space in rcv queue,
558
* but not less than advertised window.
559
*/
560
{
561
int win;
562
win = sbspace(&so->so_rcv);
563
if (win < 0)
564
win = 0;
565
tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt));
566
}
567
568
switch (tp->t_state) {
569
/*
570
* If the state is LISTEN then ignore segment if it contains an RST.
571
* If the segment contains an ACK then it is bad and send a RST.
572
* If it does not contain a SYN then it is not interesting; drop it.
573
* Don't bother responding if the destination was a broadcast.
574
* Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
575
* tp->iss, and send a segment:
576
* <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
577
* Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
578
* Fill in remote peer address fields if not previously specified.
579
* Enter SYN_RECEIVED state, and process any other fields of this
580
* segment in this state.
581
*/
582
case TCPS_LISTEN: {
583
if (tiflags & TH_RST)
584
goto drop;
585
if (tiflags & TH_ACK)
586
goto dropwithreset;
587
if ((tiflags & TH_SYN) == 0)
588
goto drop;
589
590
/*
591
* This has way too many gotos...
592
* But a bit of spaghetti code never hurt anybody :)
593
*/
594
595
/*
596
* If this is destined for the control address, then flag to
597
* tcp_ctl once connected, otherwise connect
598
*/
599
/* TODO: IPv6 */
600
if (af == AF_INET &&
601
(so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==
602
slirp->vnetwork_addr.s_addr) {
603
if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr &&
604
so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) {
605
/* May be an add exec */
606
for (ex_ptr = slirp->guestfwd_list; ex_ptr;
607
ex_ptr = ex_ptr->ex_next) {
608
if (ex_ptr->ex_fport == so->so_fport &&
609
so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) {
610
so->so_state |= SS_CTL;
611
break;
612
}
613
}
614
if (so->so_state & SS_CTL) {
615
goto cont_input;
616
}
617
}
618
/* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */
619
}
620
621
if (so->so_emu & EMU_NOCONNECT) {
622
so->so_emu &= ~EMU_NOCONNECT;
623
goto cont_input;
624
}
625
626
if ((tcp_fconnect(so, so->so_ffamily) == -1) && (errno != EAGAIN) &&
627
(errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
628
uint8_t code;
629
DEBUG_MISC(" tcp fconnect errno = %d-%s", errno, strerror(errno));
630
if (errno == ECONNREFUSED) {
631
/* ACK the SYN, send RST to refuse the connection */
632
tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq)0,
633
TH_RST | TH_ACK, af);
634
} else {
635
switch (af) {
636
case AF_INET:
637
code = ICMP_UNREACH_NET;
638
if (errno == EHOSTUNREACH) {
639
code = ICMP_UNREACH_HOST;
640
}
641
break;
642
case AF_INET6:
643
code = ICMP6_UNREACH_NO_ROUTE;
644
if (errno == EHOSTUNREACH) {
645
code = ICMP6_UNREACH_ADDRESS;
646
}
647
break;
648
default:
649
g_assert_not_reached();
650
}
651
HTONL(ti->ti_seq); /* restore tcp header */
652
HTONL(ti->ti_ack);
653
HTONS(ti->ti_win);
654
HTONS(ti->ti_urp);
655
m->m_data -=
656
sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr);
657
m->m_len +=
658
sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr);
659
switch (af) {
660
case AF_INET:
661
m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip) -
662
sizeof(struct tcphdr);
663
m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct ip) -
664
sizeof(struct tcphdr);
665
*ip = save_ip;
666
icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno));
667
break;
668
case AF_INET6:
669
m->m_data += sizeof(struct tcpiphdr) -
670
(sizeof(struct ip6) + sizeof(struct tcphdr));
671
m->m_len -= sizeof(struct tcpiphdr) -
672
(sizeof(struct ip6) + sizeof(struct tcphdr));
673
*ip6 = save_ip6;
674
icmp6_send_error(m, ICMP6_UNREACH, code);
675
break;
676
default:
677
g_assert_not_reached();
678
}
679
}
680
tcp_close(tp);
681
m_free(m);
682
} else {
683
/*
684
* Haven't connected yet, save the current mbuf
685
* and ti, and return
686
* XXX Some OS's don't tell us whether the connect()
687
* succeeded or not. So we must time it out.
688
*/
689
so->so_m = m;
690
so->so_ti = ti;
691
tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
692
tp->t_state = TCPS_SYN_RECEIVED;
693
/*
694
* Initialize receive sequence numbers now so that we can send a
695
* valid RST if the remote end rejects our connection.
696
*/
697
tp->irs = ti->ti_seq;
698
tcp_rcvseqinit(tp);
699
tcp_template(tp);
700
}
701
return;
702
703
cont_conn:
704
/* m==NULL
705
* Check if the connect succeeded
706
*/
707
if (so->so_state & SS_NOFDREF) {
708
tp = tcp_close(tp);
709
goto dropwithreset;
710
}
711
cont_input:
712
tcp_template(tp);
713
714
if (optp)
715
tcp_dooptions(tp, (uint8_t *)optp, optlen, ti);
716
717
if (iss)
718
tp->iss = iss;
719
else
720
tp->iss = slirp->tcp_iss;
721
slirp->tcp_iss += TCP_ISSINCR / 2;
722
tp->irs = ti->ti_seq;
723
tcp_sendseqinit(tp);
724
tcp_rcvseqinit(tp);
725
tp->t_flags |= TF_ACKNOW;
726
tp->t_state = TCPS_SYN_RECEIVED;
727
tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
728
goto trimthenstep6;
729
} /* case TCPS_LISTEN */
730
731
/*
732
* If the state is SYN_SENT:
733
* if seg contains an ACK, but not for our SYN, drop the input.
734
* if seg contains a RST, then drop the connection.
735
* if seg does not contain SYN, then drop it.
736
* Otherwise this is an acceptable SYN segment
737
* initialize tp->rcv_nxt and tp->irs
738
* if seg contains ack then advance tp->snd_una
739
* if SYN has been acked change to ESTABLISHED else SYN_RCVD state
740
* arrange for segment to be acked (eventually)
741
* continue processing rest of data/controls, beginning with URG
742
*/
743
case TCPS_SYN_SENT:
744
if (getenv("SLIRP_FUZZING") &&
745
/* Align seq numbers on what the fuzzing trace says */
746
tp->iss == 1 && ti->ti_ack != 0) {
747
tp->iss = ti->ti_ack - 1;
748
tp->snd_max = tp->iss + 1;
749
}
750
751
if ((tiflags & TH_ACK) &&
752
(SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max)))
753
goto dropwithreset;
754
755
if (tiflags & TH_RST) {
756
if (tiflags & TH_ACK) {
757
tcp_drop(tp, 0); /* XXX Check t_softerror! */
758
}
759
goto drop;
760
}
761
762
if ((tiflags & TH_SYN) == 0)
763
goto drop;
764
if (tiflags & TH_ACK) {
765
tp->snd_una = ti->ti_ack;
766
if (SEQ_LT(tp->snd_nxt, tp->snd_una))
767
tp->snd_nxt = tp->snd_una;
768
}
769
770
tp->t_timer[TCPT_REXMT] = 0;
771
tp->irs = ti->ti_seq;
772
tcp_rcvseqinit(tp);
773
tp->t_flags |= TF_ACKNOW;
774
if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
775
soisfconnected(so);
776
tp->t_state = TCPS_ESTABLISHED;
777
778
tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
779
/*
780
* if we didn't have to retransmit the SYN,
781
* use its rtt as our initial srtt & rtt var.
782
*/
783
if (tp->t_rtt)
784
tcp_xmit_timer(tp, tp->t_rtt);
785
} else
786
tp->t_state = TCPS_SYN_RECEIVED;
787
788
trimthenstep6:
789
/*
790
* Advance ti->ti_seq to correspond to first data byte.
791
* If data, trim to stay within window,
792
* dropping FIN if necessary.
793
*/
794
ti->ti_seq++;
795
if (ti->ti_len > tp->rcv_wnd) {
796
todrop = ti->ti_len - tp->rcv_wnd;
797
m_adj(m, -todrop);
798
ti->ti_len = tp->rcv_wnd;
799
tiflags &= ~TH_FIN;
800
}
801
tp->snd_wl1 = ti->ti_seq - 1;
802
tp->rcv_up = ti->ti_seq;
803
goto step6;
804
} /* switch tp->t_state */
805
/*
806
* States other than LISTEN or SYN_SENT.
807
* Check that at least some bytes of segment are within
808
* receive window. If segment begins before rcv_nxt,
809
* drop leading data (and SYN); if nothing left, just ack.
810
*/
811
todrop = tp->rcv_nxt - ti->ti_seq;
812
if (todrop > 0) {
813
if (tiflags & TH_SYN) {
814
tiflags &= ~TH_SYN;
815
ti->ti_seq++;
816
if (ti->ti_urp > 1)
817
ti->ti_urp--;
818
else
819
tiflags &= ~TH_URG;
820
todrop--;
821
}
822
/*
823
* Following if statement from Stevens, vol. 2, p. 960.
824
*/
825
if (todrop > ti->ti_len ||
826
(todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) {
827
/*
828
* Any valid FIN must be to the left of the window.
829
* At this point the FIN must be a duplicate or out
830
* of sequence; drop it.
831
*/
832
tiflags &= ~TH_FIN;
833
834
/*
835
* Send an ACK to resynchronize and drop any data.
836
* But keep on processing for RST or ACK.
837
*/
838
tp->t_flags |= TF_ACKNOW;
839
todrop = ti->ti_len;
840
}
841
m_adj(m, todrop);
842
ti->ti_seq += todrop;
843
ti->ti_len -= todrop;
844
if (ti->ti_urp > todrop)
845
ti->ti_urp -= todrop;
846
else {
847
tiflags &= ~TH_URG;
848
ti->ti_urp = 0;
849
}
850
}
851
/*
852
* If new data are received on a connection after the
853
* user processes are gone, then RST the other end.
854
*/
855
if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT &&
856
ti->ti_len) {
857
tp = tcp_close(tp);
858
goto dropwithreset;
859
}
860
861
/*
862
* If segment ends after window, drop trailing data
863
* (and PUSH and FIN); if nothing left, just ACK.
864
*/
865
todrop = (ti->ti_seq + ti->ti_len) - (tp->rcv_nxt + tp->rcv_wnd);
866
if (todrop > 0) {
867
if (todrop >= ti->ti_len) {
868
/*
869
* If a new connection request is received
870
* while in TIME_WAIT, drop the old connection
871
* and start over if the sequence numbers
872
* are above the previous ones.
873
*/
874
if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT &&
875
SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
876
iss = tp->rcv_nxt + TCP_ISSINCR;
877
tp = tcp_close(tp);
878
goto findso;
879
}
880
/*
881
* If window is closed can only take segments at
882
* window edge, and have to drop data and PUSH from
883
* incoming segments. Continue processing, but
884
* remember to ack. Otherwise, drop segment
885
* and ack.
886
*/
887
if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
888
tp->t_flags |= TF_ACKNOW;
889
} else {
890
goto dropafterack;
891
}
892
}
893
m_adj(m, -todrop);
894
ti->ti_len -= todrop;
895
tiflags &= ~(TH_PUSH | TH_FIN);
896
}
897
898
/*
899
* If the RST bit is set examine the state:
900
* SYN_RECEIVED STATE:
901
* If passive open, return to LISTEN state.
902
* If active open, inform user that connection was refused.
903
* ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
904
* Inform user that connection was reset, and close tcb.
905
* CLOSING, LAST_ACK, TIME_WAIT STATES
906
* Close the tcb.
907
*/
908
if (tiflags & TH_RST)
909
switch (tp->t_state) {
910
case TCPS_SYN_RECEIVED:
911
case TCPS_ESTABLISHED:
912
case TCPS_FIN_WAIT_1:
913
case TCPS_FIN_WAIT_2:
914
case TCPS_CLOSE_WAIT:
915
tp->t_state = TCPS_CLOSED;
916
tcp_close(tp);
917
goto drop;
918
919
case TCPS_CLOSING:
920
case TCPS_LAST_ACK:
921
case TCPS_TIME_WAIT:
922
tcp_close(tp);
923
goto drop;
924
}
925
926
/*
927
* If a SYN is in the window, then this is an
928
* error and we send an RST and drop the connection.
929
*/
930
if (tiflags & TH_SYN) {
931
tp = tcp_drop(tp, 0);
932
goto dropwithreset;
933
}
934
935
/*
936
* If the ACK bit is off we drop the segment and return.
937
*/
938
if ((tiflags & TH_ACK) == 0)
939
goto drop;
940
941
/*
942
* Ack processing.
943
*/
944
switch (tp->t_state) {
945
/*
946
* In SYN_RECEIVED state if the ack ACKs our SYN then enter
947
* ESTABLISHED state and continue processing, otherwise
948
* send an RST. una<=ack<=max
949
*/
950
case TCPS_SYN_RECEIVED:
951
if (getenv("SLIRP_FUZZING") &&
952
/* Align seq numbers on what the fuzzing trace says */
953
tp->iss == 1 && ti->ti_ack != 0) {
954
tp->iss = ti->ti_ack - 1;
955
tp->snd_max = tp->iss + 1;
956
tp->snd_una = ti->ti_ack;
957
}
958
959
if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max))
960
goto dropwithreset;
961
tp->t_state = TCPS_ESTABLISHED;
962
/*
963
* The sent SYN is ack'ed with our sequence number +1
964
* The first data byte already in the buffer will get
965
* lost if no correction is made. This is only needed for
966
* SS_CTL since the buffer is empty otherwise.
967
* tp->snd_una++; or:
968
*/
969
tp->snd_una = ti->ti_ack;
970
if (so->so_state & SS_CTL) {
971
/* So tcp_ctl reports the right state */
972
ret = tcp_ctl(so);
973
if (ret == 1) {
974
soisfconnected(so);
975
so->so_state &= ~SS_CTL; /* success XXX */
976
} else if (ret == 2) {
977
so->so_state &= SS_PERSISTENT_MASK;
978
so->so_state |= SS_NOFDREF; /* CTL_CMD */
979
} else {
980
needoutput = 1;
981
tp->t_state = TCPS_FIN_WAIT_1;
982
}
983
} else {
984
soisfconnected(so);
985
}
986
987
tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
988
tp->snd_wl1 = ti->ti_seq - 1;
989
/* Avoid ack processing; snd_una==ti_ack => dup ack */
990
goto synrx_to_est;
991
/* fall into ... */
992
993
/*
994
* In ESTABLISHED state: drop duplicate ACKs; ACK out of range
995
* ACKs. If the ack is in the range
996
* tp->snd_una < ti->ti_ack <= tp->snd_max
997
* then advance tp->snd_una to ti->ti_ack and drop
998
* data from the retransmission queue. If this ACK reflects
999
* more up to date window information we update our window information.
1000
*/
1001
case TCPS_ESTABLISHED:
1002
case TCPS_FIN_WAIT_1:
1003
case TCPS_FIN_WAIT_2:
1004
case TCPS_CLOSE_WAIT:
1005
case TCPS_CLOSING:
1006
case TCPS_LAST_ACK:
1007
case TCPS_TIME_WAIT:
1008
1009
if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
1010
if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
1011
DEBUG_MISC(" dup ack m = %p so = %p", m, so);
1012
/*
1013
* If we have outstanding data (other than
1014
* a window probe), this is a completely
1015
* duplicate ack (ie, window info didn't
1016
* change), the ack is the biggest we've
1017
* seen and we've seen exactly our rexmt
1018
* threshold of them, assume a packet
1019
* has been dropped and retransmit it.
1020
* Kludge snd_nxt & the congestion
1021
* window so we send only this one
1022
* packet.
1023
*
1024
* We know we're losing at the current
1025
* window size so do congestion avoidance
1026
* (set ssthresh to half the current window
1027
* and pull our congestion window back to
1028
* the new ssthresh).
1029
*
1030
* Dup acks mean that packets have left the
1031
* network (they're now cached at the receiver)
1032
* so bump cwnd by the amount in the receiver
1033
* to keep a constant cwnd packets in the
1034
* network.
1035
*/
1036
if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una)
1037
tp->t_dupacks = 0;
1038
else if (++tp->t_dupacks == TCPREXMTTHRESH) {
1039
tcp_seq onxt = tp->snd_nxt;
1040
unsigned win =
1041
MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
1042
1043
if (win < 2)
1044
win = 2;
1045
tp->snd_ssthresh = win * tp->t_maxseg;
1046
tp->t_timer[TCPT_REXMT] = 0;
1047
tp->t_rtt = 0;
1048
tp->snd_nxt = ti->ti_ack;
1049
tp->snd_cwnd = tp->t_maxseg;
1050
tcp_output(tp);
1051
tp->snd_cwnd =
1052
tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks;
1053
if (SEQ_GT(onxt, tp->snd_nxt))
1054
tp->snd_nxt = onxt;
1055
goto drop;
1056
} else if (tp->t_dupacks > TCPREXMTTHRESH) {
1057
tp->snd_cwnd += tp->t_maxseg;
1058
tcp_output(tp);
1059
goto drop;
1060
}
1061
} else
1062
tp->t_dupacks = 0;
1063
break;
1064
}
1065
synrx_to_est:
1066
/*
1067
* If the congestion window was inflated to account
1068
* for the other side's cached packets, retract it.
1069
*/
1070
if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh)
1071
tp->snd_cwnd = tp->snd_ssthresh;
1072
tp->t_dupacks = 0;
1073
if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
1074
goto dropafterack;
1075
}
1076
acked = ti->ti_ack - tp->snd_una;
1077
1078
/*
1079
* If transmit timer is running and timed sequence
1080
* number was acked, update smoothed round trip time.
1081
* Since we now have an rtt measurement, cancel the
1082
* timer backoff (cf., Phil Karn's retransmit alg.).
1083
* Recompute the initial retransmit timer.
1084
*/
1085
if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
1086
tcp_xmit_timer(tp, tp->t_rtt);
1087
1088
/*
1089
* If all outstanding data is acked, stop retransmit
1090
* timer and remember to restart (more output or persist).
1091
* If there is more data to be acked, restart retransmit
1092
* timer, using current (possibly backed-off) value.
1093
*/
1094
if (ti->ti_ack == tp->snd_max) {
1095
tp->t_timer[TCPT_REXMT] = 0;
1096
needoutput = 1;
1097
} else if (tp->t_timer[TCPT_PERSIST] == 0)
1098
tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
1099
/*
1100
* When new data is acked, open the congestion window.
1101
* If the window gives us less than ssthresh packets
1102
* in flight, open exponentially (maxseg per packet).
1103
* Otherwise open linearly: maxseg per window
1104
* (maxseg^2 / cwnd per packet).
1105
*/
1106
{
1107
register unsigned cw = tp->snd_cwnd;
1108
register unsigned incr = tp->t_maxseg;
1109
1110
if (cw > tp->snd_ssthresh)
1111
incr = incr * incr / cw;
1112
tp->snd_cwnd = MIN(cw + incr, TCP_MAXWIN << tp->snd_scale);
1113
}
1114
if (acked > so->so_snd.sb_cc) {
1115
tp->snd_wnd -= so->so_snd.sb_cc;
1116
sodrop(so, (int)so->so_snd.sb_cc);
1117
ourfinisacked = 1;
1118
} else {
1119
sodrop(so, acked);
1120
tp->snd_wnd -= acked;
1121
ourfinisacked = 0;
1122
}
1123
tp->snd_una = ti->ti_ack;
1124
if (SEQ_LT(tp->snd_nxt, tp->snd_una))
1125
tp->snd_nxt = tp->snd_una;
1126
1127
switch (tp->t_state) {
1128
/*
1129
* In FIN_WAIT_1 STATE in addition to the processing
1130
* for the ESTABLISHED state if our FIN is now acknowledged
1131
* then enter FIN_WAIT_2.
1132
*/
1133
case TCPS_FIN_WAIT_1:
1134
if (ourfinisacked) {
1135
/*
1136
* If we can't receive any more
1137
* data, then closing user can proceed.
1138
* Starting the timer is contrary to the
1139
* specification, but if we don't get a FIN
1140
* we'll hang forever.
1141
*/
1142
if (so->so_state & SS_FCANTRCVMORE) {
1143
tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE;
1144
}
1145
tp->t_state = TCPS_FIN_WAIT_2;
1146
}
1147
break;
1148
1149
/*
1150
* In CLOSING STATE in addition to the processing for
1151
* the ESTABLISHED state if the ACK acknowledges our FIN
1152
* then enter the TIME-WAIT state, otherwise ignore
1153
* the segment.
1154
*/
1155
case TCPS_CLOSING:
1156
if (ourfinisacked) {
1157
tp->t_state = TCPS_TIME_WAIT;
1158
tcp_canceltimers(tp);
1159
tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1160
}
1161
break;
1162
1163
/*
1164
* In LAST_ACK, we may still be waiting for data to drain
1165
* and/or to be acked, as well as for the ack of our FIN.
1166
* If our FIN is now acknowledged, delete the TCB,
1167
* enter the closed state and return.
1168
*/
1169
case TCPS_LAST_ACK:
1170
if (ourfinisacked) {
1171
tcp_close(tp);
1172
goto drop;
1173
}
1174
break;
1175
1176
/*
1177
* In TIME_WAIT state the only thing that should arrive
1178
* is a retransmission of the remote FIN. Acknowledge
1179
* it and restart the finack timer.
1180
*/
1181
case TCPS_TIME_WAIT:
1182
tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1183
goto dropafterack;
1184
}
1185
} /* switch(tp->t_state) */
1186
1187
step6:
1188
/*
1189
* Update window information.
1190
* Don't look at window if no ACK: TAC's send garbage on first SYN.
1191
*/
1192
if ((tiflags & TH_ACK) &&
1193
(SEQ_LT(tp->snd_wl1, ti->ti_seq) ||
1194
(tp->snd_wl1 == ti->ti_seq &&
1195
(SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
1196
(tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) {
1197
tp->snd_wnd = tiwin;
1198
tp->snd_wl1 = ti->ti_seq;
1199
tp->snd_wl2 = ti->ti_ack;
1200
if (tp->snd_wnd > tp->max_sndwnd)
1201
tp->max_sndwnd = tp->snd_wnd;
1202
needoutput = 1;
1203
}
1204
1205
/*
1206
* Process segments with URG.
1207
*/
1208
if ((tiflags & TH_URG) && ti->ti_urp &&
1209
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1210
/*
1211
* This is a kludge, but if we receive and accept
1212
* random urgent pointers, we'll crash in
1213
* soreceive. It's hard to imagine someone
1214
* actually wanting to send this much urgent data.
1215
*/
1216
if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) {
1217
ti->ti_urp = 0;
1218
tiflags &= ~TH_URG;
1219
goto dodata;
1220
}
1221
/*
1222
* If this segment advances the known urgent pointer,
1223
* then mark the data stream. This should not happen
1224
* in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
1225
* a FIN has been received from the remote side.
1226
* In these states we ignore the URG.
1227
*
1228
* According to RFC961 (Assigned Protocols),
1229
* the urgent pointer points to the last octet
1230
* of urgent data. We continue, however,
1231
* to consider it to indicate the first octet
1232
* of data past the urgent section as the original
1233
* spec states (in one of two places).
1234
*/
1235
if (SEQ_GT(ti->ti_seq + ti->ti_urp, tp->rcv_up)) {
1236
tp->rcv_up = ti->ti_seq + ti->ti_urp;
1237
so->so_urgc =
1238
so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */
1239
tp->rcv_up = ti->ti_seq + ti->ti_urp;
1240
}
1241
} else
1242
/*
1243
* If no out of band data is expected,
1244
* pull receive urgent pointer along
1245
* with the receive window.
1246
*/
1247
if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
1248
tp->rcv_up = tp->rcv_nxt;
1249
dodata:
1250
1251
/*
1252
* If this is a small packet, then ACK now - with Nagel
1253
* congestion avoidance sender won't send more until
1254
* he gets an ACK.
1255
*/
1256
if (ti->ti_len && (unsigned)ti->ti_len <= 5 &&
1257
((struct tcpiphdr_2 *)ti)->first_char == (char)27) {
1258
tp->t_flags |= TF_ACKNOW;
1259
}
1260
1261
/*
1262
* Process the segment text, merging it into the TCP sequencing queue,
1263
* and arranging for acknowledgment of receipt if necessary.
1264
* This process logically involves adjusting tp->rcv_wnd as data
1265
* is presented to the user (this happens in tcp_usrreq.c,
1266
* case PRU_RCVD). If a FIN has already been received on this
1267
* connection then we just ignore the text.
1268
*/
1269
if ((ti->ti_len || (tiflags & TH_FIN)) &&
1270
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1271
1272
/*
1273
* segment is the next to be received on an established
1274
* connection, and the queue is empty, avoid linkage into and
1275
* removal from the queue and repetition of various
1276
* conversions from tcp_reass().
1277
*/
1278
if (ti->ti_seq == tp->rcv_nxt && tcpfrag_list_empty(tp) &&
1279
tp->t_state == TCPS_ESTABLISHED) {
1280
tp->t_flags |= TF_DELACK;
1281
tp->rcv_nxt += ti->ti_len;
1282
tiflags = ti->ti_flags & TH_FIN;
1283
if (so->so_emu) {
1284
if (tcp_emu(so, m))
1285
sbappend(so, m);
1286
} else
1287
sbappend(so, m);
1288
} else {
1289
tiflags = tcp_reass(tp, ti, m);
1290
tp->t_flags |= TF_ACKNOW;
1291
}
1292
} else {
1293
m_free(m);
1294
tiflags &= ~TH_FIN;
1295
}
1296
1297
/*
1298
* If FIN is received ACK the FIN and let the user know
1299
* that the connection is closing.
1300
*/
1301
if (tiflags & TH_FIN) {
1302
if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1303
/*
1304
* If we receive a FIN we can't send more data,
1305
* set it SS_FDRAIN
1306
* Shutdown the socket if there is no rx data in the
1307
* buffer.
1308
* soread() is called on completion of shutdown() and
1309
* will got to TCPS_LAST_ACK, and use tcp_output()
1310
* to send the FIN.
1311
*/
1312
sofwdrain(so);
1313
1314
tp->t_flags |= TF_ACKNOW;
1315
tp->rcv_nxt++;
1316
}
1317
switch (tp->t_state) {
1318
/*
1319
* In SYN_RECEIVED and ESTABLISHED STATES
1320
* enter the CLOSE_WAIT state.
1321
*/
1322
case TCPS_SYN_RECEIVED:
1323
case TCPS_ESTABLISHED:
1324
if (so->so_emu == EMU_CTL) /* no shutdown on socket */
1325
tp->t_state = TCPS_LAST_ACK;
1326
else
1327
tp->t_state = TCPS_CLOSE_WAIT;
1328
break;
1329
1330
/*
1331
* If still in FIN_WAIT_1 STATE FIN has not been acked so
1332
* enter the CLOSING state.
1333
*/
1334
case TCPS_FIN_WAIT_1:
1335
tp->t_state = TCPS_CLOSING;
1336
break;
1337
1338
/*
1339
* In FIN_WAIT_2 state enter the TIME_WAIT state,
1340
* starting the time-wait timer, turning off the other
1341
* standard timers.
1342
*/
1343
case TCPS_FIN_WAIT_2:
1344
tp->t_state = TCPS_TIME_WAIT;
1345
tcp_canceltimers(tp);
1346
tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1347
break;
1348
1349
/*
1350
* In TIME_WAIT state restart the 2 MSL time_wait timer.
1351
*/
1352
case TCPS_TIME_WAIT:
1353
tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1354
break;
1355
}
1356
}
1357
1358
/*
1359
* Return any desired output.
1360
*/
1361
if (needoutput || (tp->t_flags & TF_ACKNOW)) {
1362
tcp_output(tp);
1363
}
1364
return;
1365
1366
dropafterack:
1367
/*
1368
* Generate an ACK dropping incoming segment if it occupies
1369
* sequence space, where the ACK reflects our state.
1370
*/
1371
if (tiflags & TH_RST)
1372
goto drop;
1373
m_free(m);
1374
tp->t_flags |= TF_ACKNOW;
1375
tcp_output(tp);
1376
return;
1377
1378
dropwithreset:
1379
/* reuses m if m!=NULL, m_free() unnecessary */
1380
if (tiflags & TH_ACK)
1381
tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af);
1382
else {
1383
if (tiflags & TH_SYN)
1384
ti->ti_len++;
1385
tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq)0,
1386
TH_RST | TH_ACK, af);
1387
}
1388
1389
return;
1390
1391
drop:
1392
/*
1393
* Drop space held by incoming segment and return.
1394
*/
1395
m_free(m);
1396
}
1397
1398
static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt,
1399
struct tcpiphdr *ti)
1400
{
1401
uint16_t mss;
1402
int opt, optlen;
1403
1404
DEBUG_CALL("tcp_dooptions");
1405
DEBUG_ARG("tp = %p cnt=%i", tp, cnt);
1406
1407
for (; cnt > 0; cnt -= optlen, cp += optlen) {
1408
opt = cp[0];
1409
if (opt == TCPOPT_EOL)
1410
break;
1411
if (opt == TCPOPT_NOP)
1412
optlen = 1;
1413
else {
1414
optlen = cp[1];
1415
if (optlen <= 0)
1416
break;
1417
}
1418
switch (opt) {
1419
default:
1420
continue;
1421
1422
case TCPOPT_MAXSEG:
1423
if (optlen != TCPOLEN_MAXSEG)
1424
continue;
1425
if (!(ti->ti_flags & TH_SYN))
1426
continue;
1427
memcpy((char *)&mss, (char *)cp + 2, sizeof(mss));
1428
NTOHS(mss);
1429
tcp_mss(tp, mss); /* sets t_maxseg */
1430
break;
1431
}
1432
}
1433
}
1434
1435
/*
1436
* Collect new round-trip time estimate
1437
* and update averages and current timeout.
1438
*/
1439
1440
static void tcp_xmit_timer(register struct tcpcb *tp, int rtt)
1441
{
1442
register short delta;
1443
1444
DEBUG_CALL("tcp_xmit_timer");
1445
DEBUG_ARG("tp = %p", tp);
1446
DEBUG_ARG("rtt = %d", rtt);
1447
1448
if (tp->t_srtt != 0) {
1449
/*
1450
* srtt is stored as fixed point with 3 bits after the
1451
* binary point (i.e., scaled by 8). The following magic
1452
* is equivalent to the smoothing algorithm in rfc793 with
1453
* an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
1454
* point). Adjust rtt to origin 0.
1455
*/
1456
delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
1457
if ((tp->t_srtt += delta) <= 0)
1458
tp->t_srtt = 1;
1459
/*
1460
* We accumulate a smoothed rtt variance (actually, a
1461
* smoothed mean difference), then set the retransmit
1462
* timer to smoothed rtt + 4 times the smoothed variance.
1463
* rttvar is stored as fixed point with 2 bits after the
1464
* binary point (scaled by 4). The following is
1465
* equivalent to rfc793 smoothing with an alpha of .75
1466
* (rttvar = rttvar*3/4 + |delta| / 4). This replaces
1467
* rfc793's wired-in beta.
1468
*/
1469
if (delta < 0)
1470
delta = -delta;
1471
delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
1472
if ((tp->t_rttvar += delta) <= 0)
1473
tp->t_rttvar = 1;
1474
} else {
1475
/*
1476
* No rtt measurement yet - use the unsmoothed rtt.
1477
* Set the variance to half the rtt (so our first
1478
* retransmit happens at 3*rtt).
1479
*/
1480
tp->t_srtt = rtt << TCP_RTT_SHIFT;
1481
tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
1482
}
1483
tp->t_rtt = 0;
1484
tp->t_rxtshift = 0;
1485
1486
/*
1487
* the retransmit should happen at rtt + 4 * rttvar.
1488
* Because of the way we do the smoothing, srtt and rttvar
1489
* will each average +1/2 tick of bias. When we compute
1490
* the retransmit timer, we want 1/2 tick of rounding and
1491
* 1 extra tick because of +-1/2 tick uncertainty in the
1492
* firing of the timer. The bias will give us exactly the
1493
* 1.5 tick we need. But, because the bias is
1494
* statistical, we have to test that we don't drop below
1495
* the minimum feasible timer (which is 2 ticks).
1496
*/
1497
TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), (short)tp->t_rttmin,
1498
TCPTV_REXMTMAX); /* XXX */
1499
1500
/*
1501
* We received an ack for a packet that wasn't retransmitted;
1502
* it is probably safe to discard any error indications we've
1503
* received recently. This isn't quite right, but close enough
1504
* for now (a route might have failed after we sent a segment,
1505
* and the return path might not be symmetrical).
1506
*/
1507
tp->t_softerror = 0;
1508
}
1509
1510
/*
1511
* Determine a reasonable value for maxseg size.
1512
* If the route is known, check route for mtu.
1513
* If none, use an mss that can be handled on the outgoing
1514
* interface without forcing IP to fragment; if bigger than
1515
* an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
1516
* to utilize large mbufs. If no route is found, route has no mtu,
1517
* or the destination isn't local, use a default, hopefully conservative
1518
* size (usually 512 or the default IP max size, but no more than the mtu
1519
* of the interface), as we can't discover anything about intervening
1520
* gateways or networks. We also initialize the congestion/slow start
1521
* window to be a single segment if the destination isn't local.
1522
* While looking at the routing entry, we also initialize other path-dependent
1523
* parameters from pre-set or cached values in the routing entry.
1524
*/
1525
1526
int tcp_mss(struct tcpcb *tp, unsigned offer)
1527
{
1528
struct socket *so = tp->t_socket;
1529
int mss;
1530
1531
DEBUG_CALL("tcp_mss");
1532
DEBUG_ARG("tp = %p", tp);
1533
DEBUG_ARG("offer = %d", offer);
1534
1535
switch (so->so_ffamily) {
1536
case AF_INET:
1537
mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) -
1538
sizeof(struct tcphdr) - sizeof(struct ip);
1539
break;
1540
case AF_INET6:
1541
mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) -
1542
sizeof(struct tcphdr) - sizeof(struct ip6);
1543
break;
1544
default:
1545
g_assert_not_reached();
1546
}
1547
1548
if (offer)
1549
mss = MIN(mss, offer);
1550
mss = MAX(mss, 32);
1551
if (mss < tp->t_maxseg || offer != 0)
1552
tp->t_maxseg = MIN(mss, TCP_MAXSEG_MAX);
1553
1554
tp->snd_cwnd = mss;
1555
1556
sbreserve(&so->so_snd,
1557
TCP_SNDSPACE +
1558
((TCP_SNDSPACE % mss) ? (mss - (TCP_SNDSPACE % mss)) : 0));
1559
sbreserve(&so->so_rcv,
1560
TCP_RCVSPACE +
1561
((TCP_RCVSPACE % mss) ? (mss - (TCP_RCVSPACE % mss)) : 0));
1562
1563
DEBUG_MISC(" returning mss = %d", mss);
1564
1565
return mss;
1566
}
1567
1568