Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netinet/cc/cc_cdg.c
39485 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2009-2013
5
* Swinburne University of Technology, Melbourne, Australia
6
* All rights reserved.
7
*
8
* This software was developed at the Centre for Advanced Internet
9
* Architectures, Swinburne University of Technology, by David Hayes, made
10
* possible in part by a gift from The Cisco University Research Program Fund,
11
* a corporate advised fund of Silicon Valley Community Foundation. Development
12
* and testing were further assisted by a grant from the FreeBSD Foundation.
13
*
14
* Redistribution and use in source and binary forms, with or without
15
* modification, are permitted provided that the following conditions
16
* are met:
17
* 1. Redistributions of source code must retain the above copyright
18
* notice, this list of conditions and the following disclaimer.
19
* 2. Redistributions in binary form must reproduce the above copyright
20
* notice, this list of conditions and the following disclaimer in the
21
* documentation and/or other materials provided with the distribution.
22
*
23
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33
* SUCH DAMAGE.
34
*/
35
36
/*
37
* CAIA Delay-Gradient (CDG) congestion control algorithm
38
*
39
* An implemention of the delay-gradient congestion control algorithm proposed
40
* in the following paper:
41
*
42
* D. A. Hayes and G. Armitage, "Revisiting TCP Congestion Control using Delay
43
* Gradients", in IFIP Networking, Valencia, Spain, 9-13 May 2011.
44
*
45
* Developed as part of the NewTCP research project at Swinburne University of
46
* Technology's Centre for Advanced Internet Architectures, Melbourne,
47
* Australia. More details are available at:
48
* http://caia.swin.edu.au/urp/newtcp/
49
*/
50
51
#include <sys/param.h>
52
#include <sys/hhook.h>
53
#include <sys/kernel.h>
54
#include <sys/khelp.h>
55
#include <sys/limits.h>
56
#include <sys/lock.h>
57
#include <sys/malloc.h>
58
#include <sys/module.h>
59
#include <sys/queue.h>
60
#include <sys/prng.h>
61
#include <sys/socket.h>
62
#include <sys/socketvar.h>
63
#include <sys/sysctl.h>
64
#include <sys/systm.h>
65
66
#include <net/vnet.h>
67
68
#include <net/route.h>
69
#include <net/route/nhop.h>
70
71
#include <netinet/in_pcb.h>
72
#include <netinet/tcp.h>
73
#include <netinet/tcp_seq.h>
74
#include <netinet/tcp_timer.h>
75
#include <netinet/tcp_var.h>
76
#include <netinet/cc/cc.h>
77
#include <netinet/cc/cc_module.h>
78
79
#include <netinet/khelp/h_ertt.h>
80
81
#include <vm/uma.h>
82
83
#define CDG_VERSION "0.1"
84
85
/* Private delay-gradient induced congestion control signal. */
86
#define CC_CDG_DELAY 0x01000000
87
88
/* NewReno window deflation factor on loss (as a percentage). */
89
#define RENO_BETA 50
90
91
/* Queue states. */
92
#define CDG_Q_EMPTY 1
93
#define CDG_Q_RISING 2
94
#define CDG_Q_FALLING 3
95
#define CDG_Q_FULL 4
96
#define CDG_Q_UNKNOWN 9999
97
98
/* Number of bit shifts used in probexp lookup table. */
99
#define EXP_PREC 15
100
101
/* Largest gradient represented in probexp lookup table. */
102
#define MAXGRAD 5
103
104
/*
105
* Delay Precision Enhance - number of bit shifts used for qtrend related
106
* integer arithmetic precision.
107
*/
108
#define D_P_E 7
109
110
struct qdiff_sample {
111
long qdiff;
112
STAILQ_ENTRY(qdiff_sample) qdiff_lnk;
113
};
114
115
struct cdg {
116
long max_qtrend;
117
long min_qtrend;
118
STAILQ_HEAD(minrtts_head, qdiff_sample) qdiffmin_q;
119
STAILQ_HEAD(maxrtts_head, qdiff_sample) qdiffmax_q;
120
long window_incr;
121
/* rttcount for window increase when in congestion avoidance */
122
long rtt_count;
123
/* maximum measured rtt within an rtt period */
124
int maxrtt_in_rtt;
125
/* maximum measured rtt within prev rtt period */
126
int maxrtt_in_prevrtt;
127
/* minimum measured rtt within an rtt period */
128
int minrtt_in_rtt;
129
/* minimum measured rtt within prev rtt period */
130
int minrtt_in_prevrtt;
131
/* consecutive congestion episode counter */
132
uint32_t consec_cong_cnt;
133
/* when tracking a new reno type loss window */
134
uint32_t shadow_w;
135
/* maximum number of samples in the moving average queue */
136
int sample_q_size;
137
/* number of samples in the moving average queue */
138
int num_samples;
139
/* estimate of the queue state of the path */
140
int queue_state;
141
};
142
143
/*
144
* Lookup table for:
145
* (1 - exp(-x)) << EXP_PREC, where x = [0,MAXGRAD] in 2^-7 increments
146
*
147
* Note: probexp[0] is set to 10 (not 0) as a safety for very low increase
148
* gradients.
149
*/
150
static const int probexp[641] = {
151
10,255,508,759,1008,1255,1501,1744,1985,2225,2463,2698,2932,3165,3395,3624,
152
3850,4075,4299,4520,4740,4958,5175,5389,5602,5814,6024,6232,6438,6643,6846,
153
7048,7248,7447,7644,7839,8033,8226,8417,8606,8794,8981,9166,9350,9532,9713,
154
9892,10070,10247,10422,10596,10769,10940,11110,11278,11445,11611,11776,11939,
155
12101,12262,12422,12580,12737,12893,13048,13201,13354,13505,13655,13803,13951,
156
14097,14243,14387,14530,14672,14813,14952,15091,15229,15365,15500,15635,15768,
157
15900,16032,16162,16291,16419,16547,16673,16798,16922,17046,17168,17289,17410,
158
17529,17648,17766,17882,17998,18113,18227,18340,18453,18564,18675,18784,18893,
159
19001,19108,19215,19320,19425,19529,19632,19734,19835,19936,20036,20135,20233,
160
20331,20427,20523,20619,20713,20807,20900,20993,21084,21175,21265,21355,21444,
161
21532,21619,21706,21792,21878,21962,22046,22130,22213,22295,22376,22457,22537,
162
22617,22696,22774,22852,22929,23006,23082,23157,23232,23306,23380,23453,23525,
163
23597,23669,23739,23810,23879,23949,24017,24085,24153,24220,24286,24352,24418,
164
24483,24547,24611,24675,24738,24800,24862,24924,24985,25045,25106,25165,25224,
165
25283,25341,25399,25456,25513,25570,25626,25681,25737,25791,25846,25899,25953,
166
26006,26059,26111,26163,26214,26265,26316,26366,26416,26465,26514,26563,26611,
167
26659,26707,26754,26801,26847,26893,26939,26984,27029,27074,27118,27162,27206,
168
27249,27292,27335,27377,27419,27460,27502,27543,27583,27624,27664,27703,27743,
169
27782,27821,27859,27897,27935,27973,28010,28047,28084,28121,28157,28193,28228,
170
28263,28299,28333,28368,28402,28436,28470,28503,28536,28569,28602,28634,28667,
171
28699,28730,28762,28793,28824,28854,28885,28915,28945,28975,29004,29034,29063,
172
29092,29120,29149,29177,29205,29232,29260,29287,29314,29341,29368,29394,29421,
173
29447,29472,29498,29524,29549,29574,29599,29623,29648,29672,29696,29720,29744,
174
29767,29791,29814,29837,29860,29882,29905,29927,29949,29971,29993,30014,30036,
175
30057,30078,30099,30120,30141,30161,30181,30201,30221,30241,30261,30280,30300,
176
30319,30338,30357,30376,30394,30413,30431,30449,30467,30485,30503,30521,30538,
177
30555,30573,30590,30607,30624,30640,30657,30673,30690,30706,30722,30738,30753,
178
30769,30785,30800,30815,30831,30846,30861,30876,30890,30905,30919,30934,30948,
179
30962,30976,30990,31004,31018,31031,31045,31058,31072,31085,31098,31111,31124,
180
31137,31149,31162,31174,31187,31199,31211,31223,31235,31247,31259,31271,31283,
181
31294,31306,31317,31328,31339,31351,31362,31373,31383,31394,31405,31416,31426,
182
31436,31447,31457,31467,31477,31487,31497,31507,31517,31527,31537,31546,31556,
183
31565,31574,31584,31593,31602,31611,31620,31629,31638,31647,31655,31664,31673,
184
31681,31690,31698,31706,31715,31723,31731,31739,31747,31755,31763,31771,31778,
185
31786,31794,31801,31809,31816,31824,31831,31838,31846,31853,31860,31867,31874,
186
31881,31888,31895,31902,31908,31915,31922,31928,31935,31941,31948,31954,31960,
187
31967,31973,31979,31985,31991,31997,32003,32009,32015,32021,32027,32033,32038,
188
32044,32050,32055,32061,32066,32072,32077,32083,32088,32093,32098,32104,32109,
189
32114,32119,32124,32129,32134,32139,32144,32149,32154,32158,32163,32168,32173,
190
32177,32182,32186,32191,32195,32200,32204,32209,32213,32217,32222,32226,32230,
191
32234,32238,32242,32247,32251,32255,32259,32263,32267,32270,32274,32278,32282,
192
32286,32290,32293,32297,32301,32304,32308,32311,32315,32318,32322,32325,32329,
193
32332,32336,32339,32342,32346,32349,32352,32356,32359,32362,32365,32368,32371,
194
32374,32377,32381,32384,32387,32389,32392,32395,32398,32401,32404,32407,32410,
195
32412,32415,32418,32421,32423,32426,32429,32431,32434,32437,32439,32442,32444,
196
32447,32449,32452,32454,32457,32459,32461,32464,32466,32469,32471,32473,32476,
197
32478,32480,32482,32485,32487,32489,32491,32493,32495,32497,32500,32502,32504,
198
32506,32508,32510,32512,32514,32516,32518,32520,32522,32524,32526,32527,32529,
199
32531,32533,32535,32537,32538,32540,32542,32544,32545,32547};
200
201
static uma_zone_t qdiffsample_zone;
202
static int ertt_id;
203
204
VNET_DEFINE_STATIC(uint32_t, cdg_alpha_inc);
205
VNET_DEFINE_STATIC(uint32_t, cdg_beta_delay);
206
VNET_DEFINE_STATIC(uint32_t, cdg_beta_loss);
207
VNET_DEFINE_STATIC(uint32_t, cdg_smoothing_factor);
208
VNET_DEFINE_STATIC(uint32_t, cdg_exp_backoff_scale);
209
VNET_DEFINE_STATIC(uint32_t, cdg_consec_cong);
210
VNET_DEFINE_STATIC(uint32_t, cdg_hold_backoff);
211
#define V_cdg_alpha_inc VNET(cdg_alpha_inc)
212
#define V_cdg_beta_delay VNET(cdg_beta_delay)
213
#define V_cdg_beta_loss VNET(cdg_beta_loss)
214
#define V_cdg_smoothing_factor VNET(cdg_smoothing_factor)
215
#define V_cdg_exp_backoff_scale VNET(cdg_exp_backoff_scale)
216
#define V_cdg_consec_cong VNET(cdg_consec_cong)
217
#define V_cdg_hold_backoff VNET(cdg_hold_backoff)
218
219
/* Function prototypes. */
220
static int cdg_mod_init(void);
221
static int cdg_mod_destroy(void);
222
static void cdg_conn_init(struct cc_var *ccv);
223
static int cdg_cb_init(struct cc_var *ccv, void *ptr);
224
static void cdg_cb_destroy(struct cc_var *ccv);
225
static void cdg_cong_signal(struct cc_var *ccv, ccsignal_t signal_type);
226
static void cdg_ack_received(struct cc_var *ccv, ccsignal_t ack_type);
227
static size_t cdg_data_sz(void);
228
229
struct cc_algo cdg_cc_algo = {
230
.name = "cdg",
231
.mod_init = cdg_mod_init,
232
.ack_received = cdg_ack_received,
233
.cb_destroy = cdg_cb_destroy,
234
.cb_init = cdg_cb_init,
235
.conn_init = cdg_conn_init,
236
.cong_signal = cdg_cong_signal,
237
.mod_destroy = cdg_mod_destroy,
238
.cc_data_sz = cdg_data_sz,
239
.post_recovery = newreno_cc_post_recovery,
240
.after_idle = newreno_cc_after_idle,
241
};
242
243
/* Vnet created and being initialised. */
244
static void
245
cdg_init_vnet(const void *unused __unused)
246
{
247
248
V_cdg_alpha_inc = 0;
249
V_cdg_beta_delay = 70;
250
V_cdg_beta_loss = 50;
251
V_cdg_smoothing_factor = 8;
252
V_cdg_exp_backoff_scale = 3;
253
V_cdg_consec_cong = 5;
254
V_cdg_hold_backoff = 5;
255
}
256
257
static int
258
cdg_mod_init(void)
259
{
260
VNET_ITERATOR_DECL(v);
261
262
ertt_id = khelp_get_id("ertt");
263
if (ertt_id <= 0)
264
return (EINVAL);
265
266
qdiffsample_zone = uma_zcreate("cdg_qdiffsample",
267
sizeof(struct qdiff_sample), NULL, NULL, NULL, NULL, 0, 0);
268
269
VNET_LIST_RLOCK();
270
VNET_FOREACH(v) {
271
CURVNET_SET(v);
272
cdg_init_vnet(NULL);
273
CURVNET_RESTORE();
274
}
275
VNET_LIST_RUNLOCK();
276
return (0);
277
}
278
279
static int
280
cdg_mod_destroy(void)
281
{
282
283
uma_zdestroy(qdiffsample_zone);
284
return (0);
285
}
286
287
static size_t
288
cdg_data_sz(void)
289
{
290
return (sizeof(struct cdg));
291
}
292
293
static int
294
cdg_cb_init(struct cc_var *ccv, void *ptr)
295
{
296
struct cdg *cdg_data;
297
298
INP_WLOCK_ASSERT(tptoinpcb(ccv->tp));
299
if (ptr == NULL) {
300
cdg_data = malloc(sizeof(struct cdg), M_CC_MEM, M_NOWAIT);
301
if (cdg_data == NULL)
302
return (ENOMEM);
303
} else {
304
cdg_data = ptr;
305
}
306
cdg_data->shadow_w = 0;
307
cdg_data->max_qtrend = 0;
308
cdg_data->min_qtrend = 0;
309
cdg_data->queue_state = CDG_Q_UNKNOWN;
310
cdg_data->maxrtt_in_rtt = 0;
311
cdg_data->maxrtt_in_prevrtt = 0;
312
cdg_data->minrtt_in_rtt = INT_MAX;
313
cdg_data->minrtt_in_prevrtt = 0;
314
cdg_data->window_incr = 0;
315
cdg_data->rtt_count = 0;
316
cdg_data->consec_cong_cnt = 0;
317
cdg_data->sample_q_size = V_cdg_smoothing_factor;
318
cdg_data->num_samples = 0;
319
STAILQ_INIT(&cdg_data->qdiffmin_q);
320
STAILQ_INIT(&cdg_data->qdiffmax_q);
321
322
ccv->cc_data = cdg_data;
323
324
return (0);
325
}
326
327
static void
328
cdg_conn_init(struct cc_var *ccv)
329
{
330
struct cdg *cdg_data = ccv->cc_data;
331
332
/*
333
* Initialise the shadow_cwnd in case we are competing with loss based
334
* flows from the start
335
*/
336
cdg_data->shadow_w = CCV(ccv, snd_cwnd);
337
}
338
339
static void
340
cdg_cb_destroy(struct cc_var *ccv)
341
{
342
struct cdg *cdg_data;
343
struct qdiff_sample *qds, *qds_n;
344
345
cdg_data = ccv->cc_data;
346
347
qds = STAILQ_FIRST(&cdg_data->qdiffmin_q);
348
while (qds != NULL) {
349
qds_n = STAILQ_NEXT(qds, qdiff_lnk);
350
uma_zfree(qdiffsample_zone,qds);
351
qds = qds_n;
352
}
353
354
qds = STAILQ_FIRST(&cdg_data->qdiffmax_q);
355
while (qds != NULL) {
356
qds_n = STAILQ_NEXT(qds, qdiff_lnk);
357
uma_zfree(qdiffsample_zone,qds);
358
qds = qds_n;
359
}
360
361
free(ccv->cc_data, M_CC_MEM);
362
}
363
364
static int
365
cdg_beta_handler(SYSCTL_HANDLER_ARGS)
366
{
367
int error;
368
uint32_t new;
369
370
new = *(uint32_t *)arg1;
371
error = sysctl_handle_int(oidp, &new, 0, req);
372
if (error == 0 && req->newptr != NULL) {
373
if (new == 0 || new > 100)
374
error = EINVAL;
375
else
376
*(uint32_t *)arg1 = new;
377
}
378
379
return (error);
380
}
381
382
static int
383
cdg_exp_backoff_scale_handler(SYSCTL_HANDLER_ARGS)
384
{
385
int error;
386
uint32_t new;
387
388
new = *(uint32_t *)arg1;
389
error = sysctl_handle_int(oidp, &new, 0, req);
390
if (error == 0 && req->newptr != NULL) {
391
if (new < 1)
392
error = EINVAL;
393
else
394
*(uint32_t *)arg1 = new;
395
}
396
397
return (error);
398
}
399
400
static inline uint32_t
401
cdg_window_decrease(struct cc_var *ccv, unsigned long owin, unsigned int beta)
402
{
403
404
return ((ulmin(CCV(ccv, snd_wnd), owin) * beta) / 100);
405
}
406
407
/*
408
* Window increase function
409
* This window increase function is independent of the initial window size
410
* to ensure small window flows are not discriminated against (i.e. fairness).
411
* It increases at 1pkt/rtt like Reno for alpha_inc rtts, and then 2pkts/rtt for
412
* the next alpha_inc rtts, etc.
413
*/
414
static void
415
cdg_window_increase(struct cc_var *ccv, int new_measurement)
416
{
417
struct cdg *cdg_data;
418
int incr, s_w_incr;
419
uint32_t mss = tcp_fixed_maxseg(ccv->tp);
420
421
cdg_data = ccv->cc_data;
422
incr = s_w_incr = 0;
423
424
if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh)) {
425
/* Slow start. */
426
incr = mss;
427
s_w_incr = incr;
428
cdg_data->window_incr = cdg_data->rtt_count = 0;
429
} else {
430
/* Congestion avoidance. */
431
if (new_measurement) {
432
s_w_incr = mss;
433
if (V_cdg_alpha_inc == 0) {
434
incr = mss;
435
} else {
436
if (++cdg_data->rtt_count >= V_cdg_alpha_inc) {
437
cdg_data->window_incr++;
438
cdg_data->rtt_count = 0;
439
}
440
incr = mss *
441
cdg_data->window_incr;
442
}
443
}
444
}
445
446
if (cdg_data->shadow_w > 0)
447
cdg_data->shadow_w = ulmin(cdg_data->shadow_w + s_w_incr,
448
TCP_MAXWIN << CCV(ccv, snd_scale));
449
450
CCV(ccv, snd_cwnd) = ulmin(CCV(ccv, snd_cwnd) + incr,
451
TCP_MAXWIN << CCV(ccv, snd_scale));
452
}
453
454
static void
455
cdg_cong_signal(struct cc_var *ccv, ccsignal_t signal_type)
456
{
457
struct cdg *cdg_data = ccv->cc_data;
458
459
switch((int)signal_type) {
460
case CC_CDG_DELAY:
461
CCV(ccv, snd_ssthresh) = cdg_window_decrease(ccv,
462
CCV(ccv, snd_cwnd), V_cdg_beta_delay);
463
CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
464
CCV(ccv, snd_recover) = CCV(ccv, snd_max);
465
cdg_data->window_incr = cdg_data->rtt_count = 0;
466
ENTER_CONGRECOVERY(CCV(ccv, t_flags));
467
break;
468
case CC_NDUPACK:
469
/*
470
* If already responding to congestion OR we have guessed no
471
* queue in the path is full.
472
*/
473
if (IN_CONGRECOVERY(CCV(ccv, t_flags)) ||
474
cdg_data->queue_state < CDG_Q_FULL) {
475
CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd);
476
CCV(ccv, snd_recover) = CCV(ccv, snd_max);
477
} else {
478
/*
479
* Loss is likely to be congestion related. We have
480
* inferred a queue full state, so have shadow window
481
* react to loss as NewReno would.
482
*/
483
if (cdg_data->shadow_w > 0)
484
cdg_data->shadow_w = cdg_window_decrease(ccv,
485
cdg_data->shadow_w, RENO_BETA);
486
487
CCV(ccv, snd_ssthresh) = max(cdg_data->shadow_w,
488
cdg_window_decrease(ccv, CCV(ccv, snd_cwnd),
489
V_cdg_beta_loss));
490
491
cdg_data->window_incr = cdg_data->rtt_count = 0;
492
}
493
ENTER_RECOVERY(CCV(ccv, t_flags));
494
break;
495
default:
496
newreno_cc_cong_signal(ccv, signal_type);
497
break;
498
}
499
}
500
501
/*
502
* Using a negative exponential probabilistic backoff so that sources with
503
* varying RTTs which share the same link will, on average, have the same
504
* probability of backoff over time.
505
*
506
* Prob_backoff = 1 - exp(-qtrend / V_cdg_exp_backoff_scale), where
507
* V_cdg_exp_backoff_scale is the average qtrend for the exponential backoff.
508
*/
509
static inline int
510
prob_backoff(long qtrend)
511
{
512
int backoff, idx;
513
uint32_t p;
514
515
backoff = (qtrend > ((MAXGRAD * V_cdg_exp_backoff_scale) << D_P_E));
516
517
if (!backoff) {
518
if (V_cdg_exp_backoff_scale > 1)
519
idx = (qtrend + V_cdg_exp_backoff_scale / 2) /
520
V_cdg_exp_backoff_scale;
521
else
522
idx = qtrend;
523
524
/* Backoff probability proportional to rate of queue growth. */
525
p = (UINT32_MAX / (1 << EXP_PREC)) * probexp[idx];
526
backoff = (prng32() < p);
527
}
528
529
return (backoff);
530
}
531
532
static inline void
533
calc_moving_average(struct cdg *cdg_data, long qdiff_max, long qdiff_min)
534
{
535
struct qdiff_sample *qds;
536
537
++cdg_data->num_samples;
538
if (cdg_data->num_samples > cdg_data->sample_q_size) {
539
/* Minimum RTT. */
540
qds = STAILQ_FIRST(&cdg_data->qdiffmin_q);
541
cdg_data->min_qtrend = cdg_data->min_qtrend +
542
(qdiff_min - qds->qdiff) / cdg_data->sample_q_size;
543
STAILQ_REMOVE_HEAD(&cdg_data->qdiffmin_q, qdiff_lnk);
544
qds->qdiff = qdiff_min;
545
STAILQ_INSERT_TAIL(&cdg_data->qdiffmin_q, qds, qdiff_lnk);
546
547
/* Maximum RTT. */
548
qds = STAILQ_FIRST(&cdg_data->qdiffmax_q);
549
cdg_data->max_qtrend = cdg_data->max_qtrend +
550
(qdiff_max - qds->qdiff) / cdg_data->sample_q_size;
551
STAILQ_REMOVE_HEAD(&cdg_data->qdiffmax_q, qdiff_lnk);
552
qds->qdiff = qdiff_max;
553
STAILQ_INSERT_TAIL(&cdg_data->qdiffmax_q, qds, qdiff_lnk);
554
--cdg_data->num_samples;
555
} else {
556
qds = uma_zalloc(qdiffsample_zone, M_NOWAIT);
557
if (qds != NULL) {
558
cdg_data->min_qtrend = cdg_data->min_qtrend +
559
qdiff_min / cdg_data->sample_q_size;
560
qds->qdiff = qdiff_min;
561
STAILQ_INSERT_TAIL(&cdg_data->qdiffmin_q, qds,
562
qdiff_lnk);
563
}
564
565
qds = uma_zalloc(qdiffsample_zone, M_NOWAIT);
566
if (qds) {
567
cdg_data->max_qtrend = cdg_data->max_qtrend +
568
qdiff_max / cdg_data->sample_q_size;
569
qds->qdiff = qdiff_max;
570
STAILQ_INSERT_TAIL(&cdg_data->qdiffmax_q, qds,
571
qdiff_lnk);
572
}
573
}
574
}
575
576
static void
577
cdg_ack_received(struct cc_var *ccv, ccsignal_t ack_type)
578
{
579
struct cdg *cdg_data;
580
struct ertt *e_t;
581
long qdiff_max, qdiff_min;
582
int congestion, new_measurement, slowstart;
583
584
cdg_data = ccv->cc_data;
585
e_t = (struct ertt *)khelp_get_osd(&CCV(ccv, t_osd), ertt_id);
586
new_measurement = e_t->flags & ERTT_NEW_MEASUREMENT;
587
congestion = 0;
588
cdg_data->maxrtt_in_rtt = imax(e_t->rtt, cdg_data->maxrtt_in_rtt);
589
cdg_data->minrtt_in_rtt = imin(e_t->rtt, cdg_data->minrtt_in_rtt);
590
591
if (new_measurement) {
592
slowstart = (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh));
593
/*
594
* Update smoothed gradient measurements. Since we are only
595
* using one measurement per RTT, use max or min rtt_in_rtt.
596
* This is also less noisy than a sample RTT measurement. Max
597
* RTT measurements can have trouble due to OS issues.
598
*/
599
if (cdg_data->maxrtt_in_prevrtt) {
600
qdiff_max = ((long)(cdg_data->maxrtt_in_rtt -
601
cdg_data->maxrtt_in_prevrtt) << D_P_E );
602
qdiff_min = ((long)(cdg_data->minrtt_in_rtt -
603
cdg_data->minrtt_in_prevrtt) << D_P_E );
604
605
if (cdg_data->sample_q_size == 0) {
606
cdg_data->max_qtrend = qdiff_max;
607
cdg_data->min_qtrend = qdiff_min;
608
} else
609
calc_moving_average(cdg_data, qdiff_max, qdiff_min);
610
611
/* Probabilistic backoff with respect to gradient. */
612
if (slowstart && qdiff_min > 0)
613
congestion = prob_backoff(qdiff_min);
614
else if (cdg_data->min_qtrend > 0)
615
congestion = prob_backoff(cdg_data->min_qtrend);
616
else if (slowstart && qdiff_max > 0)
617
congestion = prob_backoff(qdiff_max);
618
else if (cdg_data->max_qtrend > 0)
619
congestion = prob_backoff(cdg_data->max_qtrend);
620
621
/* Update estimate of queue state. */
622
if (cdg_data->min_qtrend > 0 &&
623
cdg_data->max_qtrend <= 0) {
624
cdg_data->queue_state = CDG_Q_FULL;
625
} else if (cdg_data->min_qtrend >= 0 &&
626
cdg_data->max_qtrend < 0) {
627
cdg_data->queue_state = CDG_Q_EMPTY;
628
cdg_data->shadow_w = 0;
629
} else if (cdg_data->min_qtrend > 0 &&
630
cdg_data->max_qtrend > 0) {
631
cdg_data->queue_state = CDG_Q_RISING;
632
} else if (cdg_data->min_qtrend < 0 &&
633
cdg_data->max_qtrend < 0) {
634
cdg_data->queue_state = CDG_Q_FALLING;
635
}
636
637
if (cdg_data->min_qtrend < 0 ||
638
cdg_data->max_qtrend < 0)
639
cdg_data->consec_cong_cnt = 0;
640
}
641
642
cdg_data->minrtt_in_prevrtt = cdg_data->minrtt_in_rtt;
643
cdg_data->minrtt_in_rtt = INT_MAX;
644
cdg_data->maxrtt_in_prevrtt = cdg_data->maxrtt_in_rtt;
645
cdg_data->maxrtt_in_rtt = 0;
646
e_t->flags &= ~ERTT_NEW_MEASUREMENT;
647
}
648
649
if (congestion) {
650
cdg_data->consec_cong_cnt++;
651
if (!IN_RECOVERY(CCV(ccv, t_flags))) {
652
if (cdg_data->consec_cong_cnt <= V_cdg_consec_cong)
653
cdg_cong_signal(ccv, CC_CDG_DELAY);
654
else
655
/*
656
* We have been backing off but the queue is not
657
* falling. Assume we are competing with
658
* loss-based flows and don't back off for the
659
* next V_cdg_hold_backoff RTT periods.
660
*/
661
if (cdg_data->consec_cong_cnt >=
662
V_cdg_consec_cong + V_cdg_hold_backoff)
663
cdg_data->consec_cong_cnt = 0;
664
665
/* Won't see effect until 2nd RTT. */
666
cdg_data->maxrtt_in_prevrtt = 0;
667
/*
668
* Resync shadow window in case we are competing with a
669
* loss based flow
670
*/
671
cdg_data->shadow_w = ulmax(CCV(ccv, snd_cwnd),
672
cdg_data->shadow_w);
673
}
674
} else if (ack_type == CC_ACK)
675
cdg_window_increase(ccv, new_measurement);
676
}
677
678
/* When a vnet is created and being initialised, init the per-stack CDG vars. */
679
VNET_SYSINIT(cdg_init_vnet, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST,
680
cdg_init_vnet, NULL);
681
682
SYSCTL_DECL(_net_inet_tcp_cc_cdg);
683
SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, cdg, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
684
"CAIA delay-gradient congestion control related settings");
685
686
SYSCTL_STRING(_net_inet_tcp_cc_cdg, OID_AUTO, version,
687
CTLFLAG_RD, CDG_VERSION, sizeof(CDG_VERSION) - 1,
688
"Current algorithm/implementation version number");
689
690
SYSCTL_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, alpha_inc,
691
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(cdg_alpha_inc), 0,
692
"Increment the window increase factor alpha by 1 MSS segment every "
693
"alpha_inc RTTs during congestion avoidance mode.");
694
695
SYSCTL_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, beta_delay,
696
CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
697
&VNET_NAME(cdg_beta_delay), 70, &cdg_beta_handler, "IU",
698
"Delay-based window decrease factor as a percentage "
699
"(on delay-based backoff, w = w * beta_delay / 100)");
700
701
SYSCTL_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, beta_loss,
702
CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
703
&VNET_NAME(cdg_beta_loss), 50, &cdg_beta_handler, "IU",
704
"Loss-based window decrease factor as a percentage "
705
"(on loss-based backoff, w = w * beta_loss / 100)");
706
707
SYSCTL_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, exp_backoff_scale,
708
CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
709
&VNET_NAME(cdg_exp_backoff_scale), 2, &cdg_exp_backoff_scale_handler, "IU",
710
"Scaling parameter for the probabilistic exponential backoff");
711
712
SYSCTL_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, smoothing_factor,
713
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(cdg_smoothing_factor), 8,
714
"Number of samples used for moving average smoothing (0 = no smoothing)");
715
716
SYSCTL_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compete_consec_cong,
717
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(cdg_consec_cong), 5,
718
"Number of consecutive delay-gradient based congestion episodes which will "
719
"trigger loss based CC compatibility");
720
721
SYSCTL_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compete_hold_backoff,
722
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(cdg_hold_backoff), 5,
723
"Number of consecutive delay-gradient based congestion episodes to hold "
724
"the window backoff for loss based CC compatibility");
725
726
DECLARE_CC_MODULE(cdg, &cdg_cc_algo);
727
MODULE_VERSION(cdg, 2);
728
MODULE_DEPEND(cdg, ertt, 1, 1, 1);
729
730