Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netinet/cc/cc.c
39476 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2007-2008
5
* Swinburne University of Technology, Melbourne, Australia.
6
* Copyright (c) 2009-2010 Lawrence Stewart <[email protected]>
7
* Copyright (c) 2010 The FreeBSD Foundation
8
* All rights reserved.
9
*
10
* This software was developed at the Centre for Advanced Internet
11
* Architectures, Swinburne University of Technology, by Lawrence Stewart and
12
* James Healy, made possible in part by a grant from the Cisco University
13
* Research Program Fund at Community Foundation Silicon Valley.
14
*
15
* Portions of this software were developed at the Centre for Advanced
16
* Internet Architectures, Swinburne University of Technology, Melbourne,
17
* Australia by David Hayes under sponsorship from the FreeBSD Foundation.
18
*
19
* Redistribution and use in source and binary forms, with or without
20
* modification, are permitted provided that the following conditions
21
* are met:
22
* 1. Redistributions of source code must retain the above copyright
23
* notice, this list of conditions and the following disclaimer.
24
* 2. Redistributions in binary form must reproduce the above copyright
25
* notice, this list of conditions and the following disclaimer in the
26
* documentation and/or other materials provided with the distribution.
27
*
28
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
29
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38
* SUCH DAMAGE.
39
*/
40
41
/*
42
* This software was first released in 2007 by James Healy and Lawrence Stewart
43
* whilst working on the NewTCP research project at Swinburne University of
44
* Technology's Centre for Advanced Internet Architectures, Melbourne,
45
* Australia, which was made possible in part by a grant from the Cisco
46
* University Research Program Fund at Community Foundation Silicon Valley.
47
* More details are available at:
48
* http://caia.swin.edu.au/urp/newtcp/
49
*/
50
51
#include <sys/cdefs.h>
52
#include <opt_cc.h>
53
#include <sys/param.h>
54
#include <sys/kernel.h>
55
#include <sys/libkern.h>
56
#include <sys/lock.h>
57
#include <sys/malloc.h>
58
#include <sys/module.h>
59
#include <sys/mutex.h>
60
#include <sys/queue.h>
61
#include <sys/rwlock.h>
62
#include <sys/sbuf.h>
63
#include <sys/socket.h>
64
#include <sys/socketvar.h>
65
#include <sys/sysctl.h>
66
67
#include <net/vnet.h>
68
69
#include <netinet/in.h>
70
#include <netinet/in_pcb.h>
71
#include <netinet/tcp.h>
72
#include <netinet/tcp_seq.h>
73
#include <netinet/tcp_var.h>
74
#include <netinet/tcp_log_buf.h>
75
#include <netinet/tcp_hpts.h>
76
#include <netinet/cc/cc.h>
77
#include <netinet/cc/cc_module.h>
78
79
/*
80
* Have a sane default if no CC_DEFAULT is specified in the kernel config file.
81
*/
82
#ifndef CC_DEFAULT
83
#define CC_DEFAULT "cubic"
84
#endif
85
86
uint32_t hystart_minrtt_thresh = 4000;
87
uint32_t hystart_maxrtt_thresh = 16000;
88
uint32_t hystart_n_rttsamples = 8;
89
uint32_t hystart_css_growth_div = 4;
90
uint32_t hystart_css_rounds = 5;
91
uint32_t hystart_bblogs = 0;
92
93
MALLOC_DEFINE(M_CC_MEM, "CC Mem", "Congestion Control State memory");
94
95
/*
96
* List of available cc algorithms on the current system. First element
97
* is used as the system default CC algorithm.
98
*/
99
struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list);
100
101
/* Protects the cc_list TAILQ. */
102
struct rwlock cc_list_lock;
103
104
VNET_DEFINE(struct cc_algo *, default_cc_ptr) = NULL;
105
106
VNET_DEFINE(uint32_t, newreno_beta) = 50;
107
#define V_newreno_beta VNET(newreno_beta)
108
VNET_DEFINE(uint32_t, newreno_beta_ecn) = 80;
109
110
void
111
cc_refer(struct cc_algo *algo)
112
{
113
CC_LIST_LOCK_ASSERT();
114
refcount_acquire(&algo->cc_refcount);
115
}
116
117
void
118
cc_release(struct cc_algo *algo)
119
{
120
CC_LIST_LOCK_ASSERT();
121
refcount_release(&algo->cc_refcount);
122
}
123
124
125
void
126
cc_attach(struct tcpcb *tp, struct cc_algo *algo)
127
{
128
/*
129
* Attach the tcpcb to the algorithm.
130
*/
131
CC_LIST_RLOCK();
132
CC_ALGO(tp) = algo;
133
cc_refer(algo);
134
CC_LIST_RUNLOCK();
135
}
136
137
void
138
cc_detach(struct tcpcb *tp)
139
{
140
struct cc_algo *algo;
141
142
CC_LIST_RLOCK();
143
algo = CC_ALGO(tp);
144
CC_ALGO(tp) = NULL;
145
cc_release(algo);
146
CC_LIST_RUNLOCK();
147
}
148
149
/*
150
* Sysctl handler to show and change the default CC algorithm.
151
*/
152
static int
153
cc_default_algo(SYSCTL_HANDLER_ARGS)
154
{
155
char default_cc[TCP_CA_NAME_MAX];
156
struct cc_algo *funcs;
157
int error;
158
159
/* Get the current default: */
160
CC_LIST_RLOCK();
161
if (CC_DEFAULT_ALGO() != NULL)
162
strlcpy(default_cc, CC_DEFAULT_ALGO()->name, sizeof(default_cc));
163
else
164
memset(default_cc, 0, TCP_CA_NAME_MAX);
165
CC_LIST_RUNLOCK();
166
167
error = sysctl_handle_string(oidp, default_cc, sizeof(default_cc), req);
168
169
/* Check for error or no change */
170
if (error != 0 || req->newptr == NULL)
171
goto done;
172
173
error = ESRCH;
174
/* Find algo with specified name and set it to default. */
175
CC_LIST_RLOCK();
176
STAILQ_FOREACH(funcs, &cc_list, entries) {
177
if (strncmp(default_cc, funcs->name, sizeof(default_cc)))
178
continue;
179
if (funcs->flags & CC_MODULE_BEING_REMOVED) {
180
/* Its being removed, its not eligible */
181
continue;
182
}
183
V_default_cc_ptr = funcs;
184
error = 0;
185
break;
186
}
187
CC_LIST_RUNLOCK();
188
done:
189
return (error);
190
}
191
192
/*
193
* Sysctl handler to display the list of available CC algorithms.
194
*/
195
static int
196
cc_list_available(SYSCTL_HANDLER_ARGS)
197
{
198
struct cc_algo *algo;
199
int error, nalgos;
200
int linesz;
201
char *buffer, *cp;
202
size_t bufsz, outsz;
203
204
error = nalgos = 0;
205
CC_LIST_RLOCK();
206
STAILQ_FOREACH(algo, &cc_list, entries) {
207
nalgos++;
208
}
209
CC_LIST_RUNLOCK();
210
if (nalgos == 0) {
211
return (ENOENT);
212
}
213
bufsz = (nalgos+2) * ((TCP_CA_NAME_MAX + 13) + 1);
214
buffer = malloc(bufsz, M_TEMP, M_WAITOK);
215
cp = buffer;
216
217
linesz = snprintf(cp, bufsz, "\n%-16s%c %s\n", "CCmod", 'D',
218
"PCB count");
219
cp += linesz;
220
bufsz -= linesz;
221
outsz = linesz;
222
CC_LIST_RLOCK();
223
STAILQ_FOREACH(algo, &cc_list, entries) {
224
linesz = snprintf(cp, bufsz, "%-16s%c %u\n",
225
algo->name,
226
(algo == CC_DEFAULT_ALGO()) ? '*' : ' ',
227
algo->cc_refcount);
228
if (linesz >= bufsz) {
229
error = EOVERFLOW;
230
break;
231
}
232
cp += linesz;
233
bufsz -= linesz;
234
outsz += linesz;
235
}
236
CC_LIST_RUNLOCK();
237
if (error == 0)
238
error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
239
free(buffer, M_TEMP);
240
return (error);
241
}
242
243
/*
244
* Return the number of times a proposed removal_cc is
245
* being used as the default.
246
*/
247
static int
248
cc_check_default(struct cc_algo *remove_cc)
249
{
250
int cnt = 0;
251
VNET_ITERATOR_DECL(vnet_iter);
252
253
CC_LIST_LOCK_ASSERT();
254
255
VNET_LIST_RLOCK_NOSLEEP();
256
VNET_FOREACH(vnet_iter) {
257
CURVNET_SET(vnet_iter);
258
if ((CC_DEFAULT_ALGO() != NULL) &&
259
strncmp(CC_DEFAULT_ALGO()->name,
260
remove_cc->name,
261
TCP_CA_NAME_MAX) == 0) {
262
cnt++;
263
}
264
CURVNET_RESTORE();
265
}
266
VNET_LIST_RUNLOCK_NOSLEEP();
267
return (cnt);
268
}
269
270
/*
271
* Initialise CC subsystem on system boot.
272
*/
273
static void
274
cc_init(void)
275
{
276
CC_LIST_LOCK_INIT();
277
STAILQ_INIT(&cc_list);
278
}
279
280
/*
281
* Returns non-zero on success, 0 on failure.
282
*/
283
static int
284
cc_deregister_algo_locked(struct cc_algo *remove_cc)
285
{
286
struct cc_algo *funcs;
287
int found = 0;
288
289
/* This is unlikely to fail */
290
STAILQ_FOREACH(funcs, &cc_list, entries) {
291
if (funcs == remove_cc)
292
found = 1;
293
}
294
if (found == 0) {
295
/* Nothing to remove? */
296
return (ENOENT);
297
}
298
/* We assert it should have been MOD_QUIESCE'd */
299
KASSERT((remove_cc->flags & CC_MODULE_BEING_REMOVED),
300
("remove_cc:%p does not have CC_MODULE_BEING_REMOVED flag", remove_cc));
301
if (cc_check_default(remove_cc)) {
302
return(EBUSY);
303
}
304
if (remove_cc->cc_refcount != 0) {
305
return (EBUSY);
306
}
307
/* Remove algo from cc_list so that new connections can't use it. */
308
STAILQ_REMOVE(&cc_list, remove_cc, cc_algo, entries);
309
return (0);
310
}
311
312
/*
313
* Returns non-zero on success, 0 on failure.
314
*/
315
int
316
cc_deregister_algo(struct cc_algo *remove_cc)
317
{
318
int ret;
319
320
CC_LIST_WLOCK();
321
ret = cc_deregister_algo_locked(remove_cc);
322
CC_LIST_WUNLOCK();
323
return (ret);
324
}
325
326
/*
327
* Returns 0 on success, non-zero on failure.
328
*/
329
int
330
cc_register_algo(struct cc_algo *add_cc)
331
{
332
struct cc_algo *funcs;
333
int err;
334
335
err = 0;
336
337
/*
338
* Iterate over list of registered CC algorithms and make sure
339
* we're not trying to add a duplicate.
340
*/
341
CC_LIST_WLOCK();
342
STAILQ_FOREACH(funcs, &cc_list, entries) {
343
if (funcs == add_cc ||
344
strncmp(funcs->name, add_cc->name,
345
TCP_CA_NAME_MAX) == 0) {
346
err = EEXIST;
347
break;
348
}
349
}
350
/* Init its reference count */
351
if (err == 0)
352
refcount_init(&add_cc->cc_refcount, 0);
353
/*
354
* The first loaded congestion control module will become
355
* the default until we find the "CC_DEFAULT" defined in
356
* the config (if we do).
357
*/
358
if (!err) {
359
STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
360
if (strcmp(add_cc->name, CC_DEFAULT) == 0) {
361
V_default_cc_ptr = add_cc;
362
} else if (V_default_cc_ptr == NULL) {
363
V_default_cc_ptr = add_cc;
364
}
365
}
366
CC_LIST_WUNLOCK();
367
368
return (err);
369
}
370
371
static void
372
vnet_cc_sysinit(void *arg)
373
{
374
struct cc_algo *cc;
375
376
if (IS_DEFAULT_VNET(curvnet))
377
return;
378
379
CURVNET_SET(vnet0);
380
cc = V_default_cc_ptr;
381
CURVNET_RESTORE();
382
383
V_default_cc_ptr = cc;
384
}
385
VNET_SYSINIT(vnet_cc_sysinit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
386
vnet_cc_sysinit, NULL);
387
388
/*
389
* Perform any necessary tasks before we exit congestion recovery.
390
*/
391
void
392
newreno_cc_post_recovery(struct cc_var *ccv)
393
{
394
int pipe;
395
uint32_t mss = tcp_fixed_maxseg(ccv->tp);
396
397
if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
398
/*
399
* Fast recovery will conclude after returning from this
400
* function. Window inflation should have left us with
401
* approximately snd_ssthresh outstanding data. But in case we
402
* would be inclined to send a burst, better to do it via the
403
* slow start mechanism.
404
*/
405
pipe = tcp_compute_pipe(ccv->tp);
406
if (pipe < CCV(ccv, snd_ssthresh))
407
/*
408
* Ensure that cwnd does not collapse to 1 MSS under
409
* adverse conditions. Implements RFC6582
410
*/
411
CCV(ccv, snd_cwnd) = max(pipe, mss) + mss;
412
else
413
CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
414
}
415
}
416
417
void
418
newreno_cc_after_idle(struct cc_var *ccv)
419
{
420
uint32_t rw;
421
/*
422
* If we've been idle for more than one retransmit timeout the old
423
* congestion window is no longer current and we have to reduce it to
424
* the restart window before we can transmit again.
425
*
426
* The restart window is the initial window or the last CWND, whichever
427
* is smaller.
428
*
429
* This is done to prevent us from flooding the path with a full CWND at
430
* wirespeed, overloading router and switch buffers along the way.
431
*
432
* See RFC5681 Section 4.1. "Restarting Idle Connections".
433
*
434
* In addition, per RFC2861 Section 2, the ssthresh is set to the
435
* maximum of the former ssthresh or 3/4 of the old cwnd, to
436
* not exit slow-start prematurely.
437
*/
438
rw = tcp_compute_initwnd(tcp_fixed_maxseg(ccv->tp));
439
440
CCV(ccv, snd_ssthresh) = max(CCV(ccv, snd_ssthresh),
441
CCV(ccv, snd_cwnd)-(CCV(ccv, snd_cwnd)>>2));
442
443
CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd));
444
}
445
446
/*
447
* Get a new congestion window size on a multiplicative decrease event.
448
* */
449
u_int
450
newreno_cc_cwnd_on_multiplicative_decrease(struct cc_var *ccv, uint32_t mss)
451
{
452
uint32_t cwin, factor;
453
454
cwin = CCV(ccv, snd_cwnd);
455
/*
456
* Other TCP congestion controls use newreno_cong_signal(), but
457
* with their own private cc_data. Make sure the cc_data is used
458
* correctly.
459
*/
460
factor = V_newreno_beta;
461
462
return max(((uint64_t)cwin * (uint64_t)factor) / (100ULL * (uint64_t)mss), 2) * mss;
463
}
464
465
/*
466
* Perform any necessary tasks before we enter congestion recovery.
467
*/
468
void
469
newreno_cc_cong_signal(struct cc_var *ccv, ccsignal_t type)
470
{
471
uint32_t cwin, mss, pipe;
472
473
mss = tcp_fixed_maxseg(ccv->tp);
474
475
/* Catch algos which mistakenly leak private signal types. */
476
KASSERT((type & CC_SIGPRIVMASK) == 0,
477
("%s: congestion signal type 0x%08x is private\n", __func__, type));
478
479
cwin = newreno_cc_cwnd_on_multiplicative_decrease(ccv, mss);
480
481
switch (type) {
482
case CC_NDUPACK:
483
if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) {
484
if (!IN_CONGRECOVERY(CCV(ccv, t_flags)))
485
CCV(ccv, snd_ssthresh) = cwin;
486
ENTER_RECOVERY(CCV(ccv, t_flags));
487
}
488
break;
489
case CC_ECN:
490
if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
491
CCV(ccv, snd_ssthresh) = cwin;
492
CCV(ccv, snd_cwnd) = cwin;
493
ENTER_CONGRECOVERY(CCV(ccv, t_flags));
494
}
495
break;
496
case CC_RTO:
497
if (CCV(ccv, t_rxtshift) == 1) {
498
pipe = tcp_compute_pipe(ccv->tp);
499
CCV(ccv, snd_ssthresh) = max(2,
500
min(CCV(ccv, snd_wnd), pipe) / 2 / mss) * mss;
501
}
502
CCV(ccv, snd_cwnd) = mss;
503
break;
504
default:
505
break;
506
}
507
}
508
509
u_int
510
newreno_cc_cwnd_in_cong_avoid(struct cc_var *ccv)
511
{
512
u_int cw = CCV(ccv, snd_cwnd);
513
u_int incr = tcp_fixed_maxseg(ccv->tp);
514
515
KASSERT(cw > CCV(ccv, snd_ssthresh),
516
("congestion control state not in congestion avoidance\n"));
517
518
/*
519
* Regular in-order ACK, open the congestion window.
520
* The congestion control state we're in is congestion avoidance.
521
*
522
* Check if ABC (RFC 3465) is enabled.
523
* cong avoid: cwnd > ssthresh
524
*
525
* cong avoid and ABC (RFC 3465):
526
* Grow cwnd linearly by maxseg per RTT for each
527
* cwnd worth of ACKed data.
528
*
529
* cong avoid without ABC (RFC 5681):
530
* Grow cwnd linearly by approximately maxseg per RTT using
531
* maxseg^2 / cwnd per ACK as the increment.
532
* If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
533
* avoid capping cwnd.
534
*/
535
if (V_tcp_do_rfc3465) {
536
if (ccv->flags & CCF_ABC_SENTAWND)
537
ccv->flags &= ~CCF_ABC_SENTAWND;
538
else
539
incr = 0;
540
} else
541
incr = max((incr * incr / cw), 1);
542
/* ABC is on by default, so incr equals 0 frequently. */
543
if (incr > 0)
544
return min(cw + incr, TCP_MAXWIN << CCV(ccv, snd_scale));
545
else
546
return cw;
547
}
548
549
u_int
550
newreno_cc_cwnd_in_slow_start(struct cc_var *ccv)
551
{
552
u_int cw = CCV(ccv, snd_cwnd);
553
u_int mss = tcp_fixed_maxseg(ccv->tp);
554
u_int incr = mss;
555
556
KASSERT(cw <= CCV(ccv, snd_ssthresh),
557
("congestion control state not in slow start\n"));
558
559
/*
560
* Regular in-order ACK, open the congestion window.
561
* The congestion control state we're in is slow start.
562
*
563
* slow start: cwnd <= ssthresh
564
*
565
* slow start and ABC (RFC 3465):
566
* Grow cwnd exponentially by the amount of data
567
* ACKed capping the max increment per ACK to
568
* (abc_l_var * maxseg) bytes.
569
*
570
* slow start without ABC (RFC 5681):
571
* Grow cwnd exponentially by maxseg per ACK.
572
*/
573
if (V_tcp_do_rfc3465) {
574
/*
575
* In slow-start with ABC enabled and no RTO in sight?
576
* (Must not use abc_l_var > 1 if slow starting after
577
* an RTO. On RTO, snd_nxt = snd_una, so the
578
* snd_nxt == snd_max check is sufficient to
579
* handle this).
580
*
581
* XXXLAS: Find a way to signal SS after RTO that
582
* doesn't rely on tcpcb vars.
583
*/
584
uint16_t abc_val;
585
586
if (ccv->flags & CCF_USE_LOCAL_ABC)
587
abc_val = ccv->labc;
588
else
589
abc_val = V_tcp_abc_l_var;
590
if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
591
incr = min(ccv->bytes_this_ack,
592
ccv->nsegs * abc_val * mss);
593
else
594
incr = min(ccv->bytes_this_ack, mss);
595
}
596
/* ABC is on by default, so incr equals 0 frequently. */
597
if (incr > 0)
598
return min(cw + incr, TCP_MAXWIN << CCV(ccv, snd_scale));
599
else
600
return cw;
601
}
602
603
void
604
newreno_cc_ack_received(struct cc_var *ccv, ccsignal_t type)
605
{
606
if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
607
(ccv->flags & CCF_CWND_LIMITED)) {
608
if (CCV(ccv, snd_cwnd) > CCV(ccv, snd_ssthresh)) {
609
CCV(ccv, snd_cwnd) = newreno_cc_cwnd_in_cong_avoid(ccv);
610
} else {
611
CCV(ccv, snd_cwnd) = newreno_cc_cwnd_in_slow_start(ccv);
612
}
613
}
614
}
615
616
static int
617
cc_stop_new_assignments(struct cc_algo *algo)
618
{
619
CC_LIST_WLOCK();
620
if (cc_check_default(algo)) {
621
/* A default cannot be removed */
622
CC_LIST_WUNLOCK();
623
return (EBUSY);
624
}
625
algo->flags |= CC_MODULE_BEING_REMOVED;
626
CC_LIST_WUNLOCK();
627
return (0);
628
}
629
630
/*
631
* Handles kld related events. Returns 0 on success, non-zero on failure.
632
*/
633
int
634
cc_modevent(module_t mod, int event_type, void *data)
635
{
636
struct cc_algo *algo;
637
int err;
638
639
err = 0;
640
algo = (struct cc_algo *)data;
641
642
switch(event_type) {
643
case MOD_LOAD:
644
if ((algo->cc_data_sz == NULL) && (algo->cb_init != NULL)) {
645
/*
646
* A module must have a cc_data_sz function
647
* even if it has no data it should return 0.
648
*/
649
printf("Module Load Fails, it lacks a cc_data_sz() function but has a cb_init()!\n");
650
err = EINVAL;
651
break;
652
}
653
if (algo->mod_init != NULL)
654
err = algo->mod_init();
655
if (!err)
656
err = cc_register_algo(algo);
657
break;
658
659
case MOD_SHUTDOWN:
660
break;
661
case MOD_QUIESCE:
662
/* Stop any new assignments */
663
err = cc_stop_new_assignments(algo);
664
break;
665
case MOD_UNLOAD:
666
/*
667
* Deregister and remove the module from the list
668
*/
669
CC_LIST_WLOCK();
670
/* Even with -f we can't unload if its the default */
671
if (cc_check_default(algo)) {
672
/* A default cannot be removed */
673
CC_LIST_WUNLOCK();
674
return (EBUSY);
675
}
676
/*
677
* If -f was used and users are still attached to
678
* the algorithm things are going to go boom.
679
*/
680
err = cc_deregister_algo_locked(algo);
681
CC_LIST_WUNLOCK();
682
if ((err == 0) && (algo->mod_destroy != NULL)) {
683
algo->mod_destroy();
684
}
685
break;
686
default:
687
err = EINVAL;
688
break;
689
}
690
691
return (err);
692
}
693
694
SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
695
696
/* Declare sysctl tree and populate it. */
697
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
698
"Congestion control related settings");
699
700
SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm,
701
CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
702
NULL, 0, cc_default_algo, "A",
703
"Default congestion control algorithm");
704
705
SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available,
706
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
707
NULL, 0, cc_list_available, "A",
708
"List available congestion control algorithms");
709
710
SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, hystartplusplus,
711
CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
712
"New Reno related HyStart++ settings");
713
714
SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, minrtt_thresh,
715
CTLFLAG_RW,
716
&hystart_minrtt_thresh, 4000,
717
"HyStarts++ minimum RTT thresh used in clamp (in microseconds)");
718
719
SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, maxrtt_thresh,
720
CTLFLAG_RW,
721
&hystart_maxrtt_thresh, 16000,
722
"HyStarts++ maximum RTT thresh used in clamp (in microseconds)");
723
724
SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, n_rttsamples,
725
CTLFLAG_RW,
726
&hystart_n_rttsamples, 8,
727
"The number of RTT samples that must be seen to consider HyStart++");
728
729
SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, css_growth_div,
730
CTLFLAG_RW,
731
&hystart_css_growth_div, 4,
732
"The divisor to the growth when in Hystart++ CSS");
733
734
SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, css_rounds,
735
CTLFLAG_RW,
736
&hystart_css_rounds, 5,
737
"The number of rounds HyStart++ lasts in CSS before falling to CA");
738
739
SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, bblogs,
740
CTLFLAG_RW,
741
&hystart_bblogs, 0,
742
"Do we enable HyStart++ Black Box logs to be generated if BB logging is on");
743
744
VNET_DEFINE(int, cc_do_abe) = 0;
745
SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe, CTLFLAG_VNET | CTLFLAG_RW,
746
&VNET_NAME(cc_do_abe), 0,
747
"Enable draft-ietf-tcpm-alternativebackoff-ecn (TCP Alternative Backoff with ECN)");
748
749
VNET_DEFINE(int, cc_abe_frlossreduce) = 0;
750
SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe_frlossreduce, CTLFLAG_VNET | CTLFLAG_RW,
751
&VNET_NAME(cc_abe_frlossreduce), 0,
752
"Apply standard beta instead of ABE-beta during ECN-signalled congestion "
753
"recovery episodes if loss also needs to be repaired");
754
755