Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netinet/cc/cc_chd.c
39476 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2009-2010
5
* Swinburne University of Technology, Melbourne, Australia
6
* Copyright (c) 2010-2011 The FreeBSD Foundation
7
* All rights reserved.
8
*
9
* This software was developed at the Centre for Advanced Internet
10
* Architectures, Swinburne University of Technology, by David Hayes and
11
* Lawrence Stewart, made possible in part by a grant from the Cisco University
12
* Research Program Fund at Community Foundation Silicon Valley.
13
*
14
* Portions of this software were developed at the Centre for Advanced Internet
15
* Architectures, Swinburne University of Technology, Melbourne, Australia by
16
* David Hayes under sponsorship from the FreeBSD Foundation.
17
*
18
* Redistribution and use in source and binary forms, with or without
19
* modification, are permitted provided that the following conditions
20
* are met:
21
* 1. Redistributions of source code must retain the above copyright
22
* notice, this list of conditions and the following disclaimer.
23
* 2. Redistributions in binary form must reproduce the above copyright
24
* notice, this list of conditions and the following disclaimer in the
25
* documentation and/or other materials provided with the distribution.
26
*
27
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
28
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
31
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37
* SUCH DAMAGE.
38
*/
39
40
/*
41
* An implementation of the CAIA-Hamilton delay based congestion control
42
* algorithm, based on "Improved coexistence and loss tolerance for delay based
43
* TCP congestion control" by D. A. Hayes and G. Armitage., in 35th Annual IEEE
44
* Conference on Local Computer Networks (LCN 2010), Denver, Colorado, USA,
45
* 11-14 October 2010.
46
*
47
* Originally released as part of the NewTCP research project at Swinburne
48
* University of Technology's Centre for Advanced Internet Architectures,
49
* Melbourne, Australia, which was made possible in part by a grant from the
50
* Cisco University Research Program Fund at Community Foundation Silicon
51
* Valley. More details are available at:
52
* http://caia.swin.edu.au/urp/newtcp/
53
*/
54
55
#include <sys/param.h>
56
#include <sys/kernel.h>
57
#include <sys/khelp.h>
58
#include <sys/limits.h>
59
#include <sys/malloc.h>
60
#include <sys/module.h>
61
#include <sys/prng.h>
62
#include <sys/queue.h>
63
#include <sys/socket.h>
64
#include <sys/socketvar.h>
65
#include <sys/sysctl.h>
66
#include <sys/systm.h>
67
68
#include <net/vnet.h>
69
70
#include <net/route.h>
71
#include <net/route/nhop.h>
72
73
#include <netinet/in_pcb.h>
74
#include <netinet/tcp.h>
75
#include <netinet/tcp_seq.h>
76
#include <netinet/tcp_timer.h>
77
#include <netinet/tcp_var.h>
78
#include <netinet/cc/cc.h>
79
#include <netinet/cc/cc_module.h>
80
81
#include <netinet/khelp/h_ertt.h>
82
83
/*
84
* Private signal type for rate based congestion signal.
85
* See <netinet/cc.h> for appropriate bit-range to use for private signals.
86
*/
87
#define CC_CHD_DELAY 0x02000000
88
89
/* Largest possible number returned by prng32(). */
90
#define RANDOM_MAX UINT32_MAX
91
92
static void chd_ack_received(struct cc_var *ccv, ccsignal_t ack_type);
93
static void chd_cb_destroy(struct cc_var *ccv);
94
static int chd_cb_init(struct cc_var *ccv, void *ptr);
95
static void chd_cong_signal(struct cc_var *ccv, ccsignal_t signal_type);
96
static void chd_conn_init(struct cc_var *ccv);
97
static int chd_mod_init(void);
98
static size_t chd_data_sz(void);
99
100
struct chd {
101
/*
102
* Shadow window - keeps track of what the NewReno congestion window
103
* would have been if delay-based cwnd backoffs had not been made. This
104
* functionality aids coexistence with loss-based TCP flows which may be
105
* sharing links along the path.
106
*/
107
unsigned long shadow_w;
108
/*
109
* Loss-based TCP compatibility flag - When set, it turns on the shadow
110
* window functionality.
111
*/
112
int loss_compete;
113
/* The maximum round trip time seen within a measured rtt period. */
114
int maxrtt_in_rtt;
115
/* The previous qdly that caused cwnd to backoff. */
116
int prev_backoff_qdly;
117
};
118
119
static int ertt_id;
120
121
VNET_DEFINE_STATIC(uint32_t, chd_qmin) = 5;
122
VNET_DEFINE_STATIC(uint32_t, chd_pmax) = 50;
123
VNET_DEFINE_STATIC(uint32_t, chd_loss_fair) = 1;
124
VNET_DEFINE_STATIC(uint32_t, chd_use_max) = 1;
125
VNET_DEFINE_STATIC(uint32_t, chd_qthresh) = 20;
126
#define V_chd_qthresh VNET(chd_qthresh)
127
#define V_chd_qmin VNET(chd_qmin)
128
#define V_chd_pmax VNET(chd_pmax)
129
#define V_chd_loss_fair VNET(chd_loss_fair)
130
#define V_chd_use_max VNET(chd_use_max)
131
132
133
struct cc_algo chd_cc_algo = {
134
.name = "chd",
135
.ack_received = chd_ack_received,
136
.cb_destroy = chd_cb_destroy,
137
.cb_init = chd_cb_init,
138
.cong_signal = chd_cong_signal,
139
.conn_init = chd_conn_init,
140
.mod_init = chd_mod_init,
141
.cc_data_sz = chd_data_sz,
142
.after_idle = newreno_cc_after_idle,
143
.post_recovery = newreno_cc_post_recovery,
144
};
145
146
static __inline void
147
chd_window_decrease(struct cc_var *ccv)
148
{
149
unsigned long win;
150
uint32_t mss = tcp_fixed_maxseg(ccv->tp);
151
152
win = min(CCV(ccv, snd_wnd), CCV(ccv, snd_cwnd)) / mss;
153
win -= max((win / 2), 1);
154
CCV(ccv, snd_ssthresh) = max(win, 2) * mss;
155
}
156
157
/*
158
* Probabilistic backoff function. Returns 1 if we should backoff or 0
159
* otherwise. The calculation of p is similar to the calculation of p in cc_hd.
160
*/
161
static __inline int
162
should_backoff(int qdly, int maxqdly, struct chd *chd_data)
163
{
164
uint32_t rand, p;
165
166
rand = prng32();
167
168
if (qdly < V_chd_qthresh) {
169
chd_data->loss_compete = 0;
170
p = (((RANDOM_MAX / 100) * V_chd_pmax) /
171
(V_chd_qthresh - V_chd_qmin)) *
172
(qdly - V_chd_qmin);
173
} else {
174
if (qdly > V_chd_qthresh) {
175
p = (((RANDOM_MAX / 100) * V_chd_pmax) /
176
(maxqdly - V_chd_qthresh)) *
177
(maxqdly - qdly);
178
if (V_chd_loss_fair && rand < p)
179
chd_data->loss_compete = 1;
180
} else {
181
p = (RANDOM_MAX / 100) * V_chd_pmax;
182
chd_data->loss_compete = 0;
183
}
184
}
185
186
return (rand < p);
187
}
188
189
static __inline void
190
chd_window_increase(struct cc_var *ccv, int new_measurement)
191
{
192
struct chd *chd_data;
193
int incr;
194
uint32_t mss = tcp_fixed_maxseg(ccv->tp);
195
196
chd_data = ccv->cc_data;
197
incr = 0;
198
199
if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh)) {
200
/* Adapted from NewReno slow start. */
201
if (V_tcp_do_rfc3465) {
202
/* In slow-start with ABC enabled. */
203
if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max)) {
204
/* Not due to RTO. */
205
incr = min(ccv->bytes_this_ack,
206
V_tcp_abc_l_var * mss);
207
} else {
208
/* Due to RTO. */
209
incr = min(ccv->bytes_this_ack, mss);
210
}
211
} else
212
incr = mss;
213
214
} else { /* Congestion avoidance. */
215
if (V_tcp_do_rfc3465) {
216
if (ccv->flags & CCF_ABC_SENTAWND) {
217
ccv->flags &= ~CCF_ABC_SENTAWND;
218
incr = mss;
219
}
220
} else if (new_measurement)
221
incr = mss;
222
}
223
224
if (chd_data->shadow_w > 0) {
225
/* Track NewReno window. */
226
chd_data->shadow_w = min(chd_data->shadow_w + incr,
227
TCP_MAXWIN << CCV(ccv, snd_scale));
228
}
229
230
CCV(ccv,snd_cwnd) = min(CCV(ccv, snd_cwnd) + incr,
231
TCP_MAXWIN << CCV(ccv, snd_scale));
232
}
233
234
/*
235
* All ACK signals are used for timing measurements to determine delay-based
236
* congestion. However, window increases are only performed when
237
* ack_type == CC_ACK.
238
*/
239
static void
240
chd_ack_received(struct cc_var *ccv, ccsignal_t ack_type)
241
{
242
struct chd *chd_data;
243
struct ertt *e_t;
244
int backoff, new_measurement, qdly, rtt;
245
246
e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id);
247
chd_data = ccv->cc_data;
248
new_measurement = e_t->flags & ERTT_NEW_MEASUREMENT;
249
backoff = qdly = 0;
250
251
chd_data->maxrtt_in_rtt = imax(e_t->rtt, chd_data->maxrtt_in_rtt);
252
253
if (new_measurement) {
254
/*
255
* There is a new per RTT measurement, so check to see if there
256
* is delay based congestion.
257
*/
258
rtt = V_chd_use_max ? chd_data->maxrtt_in_rtt : e_t->rtt;
259
chd_data->maxrtt_in_rtt = 0;
260
261
if (rtt && e_t->minrtt && !IN_RECOVERY(CCV(ccv, t_flags))) {
262
qdly = rtt - e_t->minrtt;
263
if (qdly > V_chd_qmin) {
264
/*
265
* Probabilistic delay based congestion
266
* indication.
267
*/
268
backoff = should_backoff(qdly,
269
e_t->maxrtt - e_t->minrtt, chd_data);
270
} else
271
chd_data->loss_compete = 0;
272
}
273
/* Reset per RTT measurement flag to start a new measurement. */
274
e_t->flags &= ~ERTT_NEW_MEASUREMENT;
275
}
276
277
if (backoff) {
278
/*
279
* Update shadow_w before delay based backoff.
280
*/
281
if (chd_data->loss_compete ||
282
qdly > chd_data->prev_backoff_qdly) {
283
/*
284
* Delay is higher than when we backed off previously,
285
* so it is possible that this flow is competing with
286
* loss based flows.
287
*/
288
chd_data->shadow_w = max(CCV(ccv, snd_cwnd),
289
chd_data->shadow_w);
290
} else {
291
/*
292
* Reset shadow_w, as it is probable that this flow is
293
* not competing with loss based flows at the moment.
294
*/
295
chd_data->shadow_w = 0;
296
}
297
298
chd_data->prev_backoff_qdly = qdly;
299
/*
300
* Send delay-based congestion signal to the congestion signal
301
* handler.
302
*/
303
chd_cong_signal(ccv, CC_CHD_DELAY);
304
305
} else if (ack_type == CC_ACK)
306
chd_window_increase(ccv, new_measurement);
307
}
308
309
static void
310
chd_cb_destroy(struct cc_var *ccv)
311
{
312
free(ccv->cc_data, M_CC_MEM);
313
}
314
315
size_t
316
chd_data_sz(void)
317
{
318
return (sizeof(struct chd));
319
}
320
321
static int
322
chd_cb_init(struct cc_var *ccv, void *ptr)
323
{
324
struct chd *chd_data;
325
326
INP_WLOCK_ASSERT(tptoinpcb(ccv->tp));
327
if (ptr == NULL) {
328
chd_data = malloc(sizeof(struct chd), M_CC_MEM, M_NOWAIT);
329
if (chd_data == NULL)
330
return (ENOMEM);
331
} else
332
chd_data = ptr;
333
334
chd_data->shadow_w = 0;
335
ccv->cc_data = chd_data;
336
337
return (0);
338
}
339
340
static void
341
chd_cong_signal(struct cc_var *ccv, ccsignal_t signal_type)
342
{
343
struct ertt *e_t;
344
struct chd *chd_data;
345
int qdly;
346
347
e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id);
348
chd_data = ccv->cc_data;
349
qdly = imax(e_t->rtt, chd_data->maxrtt_in_rtt) - e_t->minrtt;
350
351
switch((int)signal_type) {
352
case CC_CHD_DELAY:
353
chd_window_decrease(ccv); /* Set new ssthresh. */
354
CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
355
CCV(ccv, snd_recover) = CCV(ccv, snd_max);
356
ENTER_CONGRECOVERY(CCV(ccv, t_flags));
357
break;
358
359
case CC_NDUPACK: /* Packet loss. */
360
/*
361
* Only react to loss as a congestion signal if qdly >
362
* V_chd_qthresh. If qdly is less than qthresh, presume that
363
* this is a non congestion related loss. If qdly is greater
364
* than qthresh, assume that we are competing with loss based
365
* tcp flows and restore window from any unnecessary backoffs,
366
* before the decrease.
367
*/
368
if (!IN_RECOVERY(CCV(ccv, t_flags)) && qdly > V_chd_qthresh) {
369
if (chd_data->loss_compete) {
370
CCV(ccv, snd_cwnd) = max(CCV(ccv, snd_cwnd),
371
chd_data->shadow_w);
372
}
373
chd_window_decrease(ccv);
374
} else {
375
/*
376
* This loss isn't congestion related, or already
377
* recovering from congestion.
378
*/
379
CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd);
380
CCV(ccv, snd_recover) = CCV(ccv, snd_max);
381
}
382
383
if (chd_data->shadow_w > 0) {
384
uint32_t mss = tcp_fixed_maxseg(ccv->tp);
385
chd_data->shadow_w = max(chd_data->shadow_w /
386
mss / 2, 2) * mss;
387
}
388
ENTER_FASTRECOVERY(CCV(ccv, t_flags));
389
break;
390
391
default:
392
newreno_cc_cong_signal(ccv, signal_type);
393
break;
394
}
395
}
396
397
static void
398
chd_conn_init(struct cc_var *ccv)
399
{
400
struct chd *chd_data;
401
402
chd_data = ccv->cc_data;
403
chd_data->prev_backoff_qdly = 0;
404
chd_data->maxrtt_in_rtt = 0;
405
chd_data->loss_compete = 0;
406
/*
407
* Initialise the shadow_cwnd to be equal to snd_cwnd in case we are
408
* competing with loss based flows from the start.
409
*/
410
chd_data->shadow_w = CCV(ccv, snd_cwnd);
411
}
412
413
static int
414
chd_mod_init(void)
415
{
416
417
ertt_id = khelp_get_id("ertt");
418
if (ertt_id <= 0) {
419
printf("%s: h_ertt module not found\n", __func__);
420
return (ENOENT);
421
}
422
return (0);
423
}
424
425
static int
426
chd_loss_fair_handler(SYSCTL_HANDLER_ARGS)
427
{
428
int error;
429
uint32_t new;
430
431
new = V_chd_loss_fair;
432
error = sysctl_handle_int(oidp, &new, 0, req);
433
if (error == 0 && req->newptr != NULL) {
434
if (new > 1)
435
error = EINVAL;
436
else
437
V_chd_loss_fair = new;
438
}
439
440
return (error);
441
}
442
443
static int
444
chd_pmax_handler(SYSCTL_HANDLER_ARGS)
445
{
446
int error;
447
uint32_t new;
448
449
new = V_chd_pmax;
450
error = sysctl_handle_int(oidp, &new, 0, req);
451
if (error == 0 && req->newptr != NULL) {
452
if (new == 0 || new > 100)
453
error = EINVAL;
454
else
455
V_chd_pmax = new;
456
}
457
458
return (error);
459
}
460
461
static int
462
chd_qthresh_handler(SYSCTL_HANDLER_ARGS)
463
{
464
int error;
465
uint32_t new;
466
467
new = V_chd_qthresh;
468
error = sysctl_handle_int(oidp, &new, 0, req);
469
if (error == 0 && req->newptr != NULL) {
470
if (new <= V_chd_qmin)
471
error = EINVAL;
472
else
473
V_chd_qthresh = new;
474
}
475
476
return (error);
477
}
478
479
SYSCTL_DECL(_net_inet_tcp_cc_chd);
480
SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, chd, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
481
"CAIA Hamilton delay-based congestion control related settings");
482
483
SYSCTL_PROC(_net_inet_tcp_cc_chd, OID_AUTO, loss_fair,
484
CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
485
&VNET_NAME(chd_loss_fair), 1, &chd_loss_fair_handler,
486
"IU", "Flag to enable shadow window functionality.");
487
488
SYSCTL_PROC(_net_inet_tcp_cc_chd, OID_AUTO, pmax,
489
CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
490
&VNET_NAME(chd_pmax), 5, &chd_pmax_handler,
491
"IU", "Per RTT maximum backoff probability as a percentage");
492
493
SYSCTL_PROC(_net_inet_tcp_cc_chd, OID_AUTO, queue_threshold,
494
CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
495
&VNET_NAME(chd_qthresh), 20, &chd_qthresh_handler,
496
"IU", "Queueing congestion threshold in ticks");
497
498
SYSCTL_UINT(_net_inet_tcp_cc_chd, OID_AUTO, queue_min,
499
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(chd_qmin), 5,
500
"Minimum queueing delay threshold in ticks");
501
502
SYSCTL_UINT(_net_inet_tcp_cc_chd, OID_AUTO, use_max,
503
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(chd_use_max), 1,
504
"Use the maximum RTT seen within the measurement period (RTT) "
505
"as the basic delay measurement for the algorithm.");
506
507
DECLARE_CC_MODULE(chd, &chd_cc_algo);
508
MODULE_VERSION(chd, 2);
509
MODULE_DEPEND(chd, ertt, 1, 1, 1);
510
511