Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/net/altq/altq_rmclass.c
39507 views
1
/*-
2
* Copyright (c) 1991-1997 Regents of the University of California.
3
* All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
7
* are met:
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
* 3. All advertising materials mentioning features or use of this software
14
* must display the following acknowledgement:
15
* This product includes software developed by the Network Research
16
* Group at Lawrence Berkeley Laboratory.
17
* 4. Neither the name of the University nor of the Laboratory may be used
18
* to endorse or promote products derived from this software without
19
* specific prior written permission.
20
*
21
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31
* SUCH DAMAGE.
32
*
33
* LBL code modified by [email protected], May 1977.
34
* For questions and/or comments, please send mail to [email protected]
35
* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $
36
*/
37
#include "opt_altq.h"
38
#include "opt_inet.h"
39
#include "opt_inet6.h"
40
#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
41
42
#include <sys/param.h>
43
#include <sys/malloc.h>
44
#include <sys/mbuf.h>
45
#include <sys/socket.h>
46
#include <sys/systm.h>
47
#include <sys/errno.h>
48
#include <sys/time.h>
49
50
#include <net/if.h>
51
#include <net/if_var.h>
52
#include <net/if_private.h>
53
54
#include <net/altq/if_altq.h>
55
#include <net/altq/altq.h>
56
#include <net/altq/altq_codel.h>
57
#include <net/altq/altq_rmclass.h>
58
#include <net/altq/altq_rmclass_debug.h>
59
#include <net/altq/altq_red.h>
60
#include <net/altq/altq_rio.h>
61
62
/*
63
* Local Macros
64
*/
65
#define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
66
67
/*
68
* Local routines.
69
*/
70
71
static int rmc_satisfied(struct rm_class *, struct timeval *);
72
static void rmc_wrr_set_weights(struct rm_ifdat *);
73
static void rmc_depth_compute(struct rm_class *);
74
static void rmc_depth_recompute(rm_class_t *);
75
76
static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
77
static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
78
79
static int _rmc_addq(rm_class_t *, mbuf_t *);
80
static void _rmc_dropq(rm_class_t *);
81
static mbuf_t *_rmc_getq(rm_class_t *);
82
static mbuf_t *_rmc_pollq(rm_class_t *);
83
84
static int rmc_under_limit(struct rm_class *, struct timeval *);
85
static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
86
static void rmc_drop_action(struct rm_class *);
87
static void rmc_restart(void *);
88
static void rmc_root_overlimit(struct rm_class *, struct rm_class *);
89
90
#define BORROW_OFFTIME
91
/*
92
* BORROW_OFFTIME (experimental):
93
* borrow the offtime of the class borrowing from.
94
* the reason is that when its own offtime is set, the class is unable
95
* to borrow much, especially when cutoff is taking effect.
96
* but when the borrowed class is overloaded (advidle is close to minidle),
97
* use the borrowing class's offtime to avoid overload.
98
*/
99
#define ADJUST_CUTOFF
100
/*
101
* ADJUST_CUTOFF (experimental):
102
* if no underlimit class is found due to cutoff, increase cutoff and
103
* retry the scheduling loop.
104
* also, don't invoke delay_actions while cutoff is taking effect,
105
* since a sleeping class won't have a chance to be scheduled in the
106
* next loop.
107
*
108
* now heuristics for setting the top-level variable (cutoff_) becomes:
109
* 1. if a packet arrives for a not-overlimit class, set cutoff
110
* to the depth of the class.
111
* 2. if cutoff is i, and a packet arrives for an overlimit class
112
* with an underlimit ancestor at a lower level than i (say j),
113
* then set cutoff to j.
114
* 3. at scheduling a packet, if there is no underlimit class
115
* due to the current cutoff level, increase cutoff by 1 and
116
* then try to schedule again.
117
*/
118
119
/*
120
* rm_class_t *
121
* rmc_newclass(...) - Create a new resource management class at priority
122
* 'pri' on the interface given by 'ifd'.
123
*
124
* nsecPerByte is the data rate of the interface in nanoseconds/byte.
125
* E.g., 800 for a 10Mb/s ethernet. If the class gets less
126
* than 100% of the bandwidth, this number should be the
127
* 'effective' rate for the class. Let f be the
128
* bandwidth fraction allocated to this class, and let
129
* nsPerByte be the data rate of the output link in
130
* nanoseconds/byte. Then nsecPerByte is set to
131
* nsPerByte / f. E.g., 1600 (= 800 / .5)
132
* for a class that gets 50% of an ethernet's bandwidth.
133
*
134
* action the routine to call when the class is over limit.
135
*
136
* maxq max allowable queue size for class (in packets).
137
*
138
* parent parent class pointer.
139
*
140
* borrow class to borrow from (should be either 'parent' or null).
141
*
142
* maxidle max value allowed for class 'idle' time estimate (this
143
* parameter determines how large an initial burst of packets
144
* can be before overlimit action is invoked.
145
*
146
* offtime how long 'delay' action will delay when class goes over
147
* limit (this parameter determines the steady-state burst
148
* size when a class is running over its limit).
149
*
150
* Maxidle and offtime have to be computed from the following: If the
151
* average packet size is s, the bandwidth fraction allocated to this
152
* class is f, we want to allow b packet bursts, and the gain of the
153
* averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
154
*
155
* ptime = s * nsPerByte * (1 - f) / f
156
* maxidle = ptime * (1 - g^b) / g^b
157
* minidle = -ptime * (1 / (f - 1))
158
* offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
159
*
160
* Operationally, it's convenient to specify maxidle & offtime in units
161
* independent of the link bandwidth so the maxidle & offtime passed to
162
* this routine are the above values multiplied by 8*f/(1000*nsPerByte).
163
* (The constant factor is a scale factor needed to make the parameters
164
* integers. This scaling also means that the 'unscaled' values of
165
* maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
166
* not nanoseconds.) Also note that the 'idle' filter computation keeps
167
* an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
168
* maxidle also must be scaled upward by this value. Thus, the passed
169
* values for maxidle and offtime can be computed as follows:
170
*
171
* maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
172
* offtime = offtime * 8 / (1000 * nsecPerByte)
173
*
174
* When USE_HRTIME is employed, then maxidle and offtime become:
175
* maxidle = maxilde * (8.0 / nsecPerByte);
176
* offtime = offtime * (8.0 / nsecPerByte);
177
*/
178
struct rm_class *
179
rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
180
void (*action)(rm_class_t *, rm_class_t *), int maxq,
181
struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
182
int minidle, u_int offtime, int pktsize, int flags)
183
{
184
struct rm_class *cl;
185
struct rm_class *peer;
186
int s;
187
188
if (pri >= RM_MAXPRIO)
189
return (NULL);
190
#ifndef ALTQ_RED
191
if (flags & RMCF_RED) {
192
#ifdef ALTQ_DEBUG
193
printf("rmc_newclass: RED not configured for CBQ!\n");
194
#endif
195
return (NULL);
196
}
197
#endif
198
#ifndef ALTQ_RIO
199
if (flags & RMCF_RIO) {
200
#ifdef ALTQ_DEBUG
201
printf("rmc_newclass: RIO not configured for CBQ!\n");
202
#endif
203
return (NULL);
204
}
205
#endif
206
#ifndef ALTQ_CODEL
207
if (flags & RMCF_CODEL) {
208
#ifdef ALTQ_DEBUG
209
printf("rmc_newclass: CODEL not configured for CBQ!\n");
210
#endif
211
return (NULL);
212
}
213
#endif
214
215
cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO);
216
if (cl == NULL)
217
return (NULL);
218
CALLOUT_INIT(&cl->callout_);
219
cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
220
if (cl->q_ == NULL) {
221
free(cl, M_DEVBUF);
222
return (NULL);
223
}
224
225
/*
226
* Class initialization.
227
*/
228
cl->children_ = NULL;
229
cl->parent_ = parent;
230
cl->borrow_ = borrow;
231
cl->leaf_ = 1;
232
cl->ifdat_ = ifd;
233
cl->pri_ = pri;
234
cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
235
cl->depth_ = 0;
236
cl->qthresh_ = 0;
237
cl->ns_per_byte_ = nsecPerByte;
238
239
qlimit(cl->q_) = maxq;
240
qtype(cl->q_) = Q_DROPHEAD;
241
qlen(cl->q_) = 0;
242
cl->flags_ = flags;
243
244
#if 1 /* minidle is also scaled in ALTQ */
245
cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
246
if (cl->minidle_ > 0)
247
cl->minidle_ = 0;
248
#else
249
cl->minidle_ = minidle;
250
#endif
251
cl->maxidle_ = (maxidle * nsecPerByte) / 8;
252
if (cl->maxidle_ == 0)
253
cl->maxidle_ = 1;
254
#if 1 /* offtime is also scaled in ALTQ */
255
cl->avgidle_ = cl->maxidle_;
256
cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
257
if (cl->offtime_ == 0)
258
cl->offtime_ = 1;
259
#else
260
cl->avgidle_ = 0;
261
cl->offtime_ = (offtime * nsecPerByte) / 8;
262
#endif
263
cl->overlimit = action;
264
265
#ifdef ALTQ_RED
266
if (flags & (RMCF_RED|RMCF_RIO)) {
267
int red_flags, red_pkttime;
268
269
red_flags = 0;
270
if (flags & RMCF_ECN)
271
red_flags |= REDF_ECN;
272
if (flags & RMCF_FLOWVALVE)
273
red_flags |= REDF_FLOWVALVE;
274
#ifdef ALTQ_RIO
275
if (flags & RMCF_CLEARDSCP)
276
red_flags |= RIOF_CLEARDSCP;
277
#endif
278
red_pkttime = nsecPerByte * pktsize / 1000;
279
280
if (flags & RMCF_RED) {
281
cl->red_ = red_alloc(0, 0,
282
qlimit(cl->q_) * 10/100,
283
qlimit(cl->q_) * 30/100,
284
red_flags, red_pkttime);
285
if (cl->red_ != NULL)
286
qtype(cl->q_) = Q_RED;
287
}
288
#ifdef ALTQ_RIO
289
else {
290
cl->red_ = (red_t *)rio_alloc(0, NULL,
291
red_flags, red_pkttime);
292
if (cl->red_ != NULL)
293
qtype(cl->q_) = Q_RIO;
294
}
295
#endif
296
}
297
#endif /* ALTQ_RED */
298
#ifdef ALTQ_CODEL
299
if (flags & RMCF_CODEL) {
300
cl->codel_ = codel_alloc(5, 100, 0);
301
if (cl->codel_ != NULL)
302
qtype(cl->q_) = Q_CODEL;
303
}
304
#endif
305
306
/*
307
* put the class into the class tree
308
*/
309
s = splnet();
310
IFQ_LOCK(ifd->ifq_);
311
if ((peer = ifd->active_[pri]) != NULL) {
312
/* find the last class at this pri */
313
cl->peer_ = peer;
314
while (peer->peer_ != ifd->active_[pri])
315
peer = peer->peer_;
316
peer->peer_ = cl;
317
} else {
318
ifd->active_[pri] = cl;
319
cl->peer_ = cl;
320
}
321
322
if (cl->parent_) {
323
cl->next_ = parent->children_;
324
parent->children_ = cl;
325
parent->leaf_ = 0;
326
}
327
328
/*
329
* Compute the depth of this class and its ancestors in the class
330
* hierarchy.
331
*/
332
rmc_depth_compute(cl);
333
334
/*
335
* If CBQ's WRR is enabled, then initialize the class WRR state.
336
*/
337
if (ifd->wrr_) {
338
ifd->num_[pri]++;
339
ifd->alloc_[pri] += cl->allotment_;
340
rmc_wrr_set_weights(ifd);
341
}
342
IFQ_UNLOCK(ifd->ifq_);
343
splx(s);
344
return (cl);
345
}
346
347
int
348
rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
349
int minidle, u_int offtime, int pktsize)
350
{
351
struct rm_ifdat *ifd;
352
u_int old_allotment;
353
int s;
354
355
ifd = cl->ifdat_;
356
old_allotment = cl->allotment_;
357
358
s = splnet();
359
IFQ_LOCK(ifd->ifq_);
360
cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
361
cl->qthresh_ = 0;
362
cl->ns_per_byte_ = nsecPerByte;
363
364
qlimit(cl->q_) = maxq;
365
366
#if 1 /* minidle is also scaled in ALTQ */
367
cl->minidle_ = (minidle * nsecPerByte) / 8;
368
if (cl->minidle_ > 0)
369
cl->minidle_ = 0;
370
#else
371
cl->minidle_ = minidle;
372
#endif
373
cl->maxidle_ = (maxidle * nsecPerByte) / 8;
374
if (cl->maxidle_ == 0)
375
cl->maxidle_ = 1;
376
#if 1 /* offtime is also scaled in ALTQ */
377
cl->avgidle_ = cl->maxidle_;
378
cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
379
if (cl->offtime_ == 0)
380
cl->offtime_ = 1;
381
#else
382
cl->avgidle_ = 0;
383
cl->offtime_ = (offtime * nsecPerByte) / 8;
384
#endif
385
386
/*
387
* If CBQ's WRR is enabled, then initialize the class WRR state.
388
*/
389
if (ifd->wrr_) {
390
ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
391
rmc_wrr_set_weights(ifd);
392
}
393
IFQ_UNLOCK(ifd->ifq_);
394
splx(s);
395
return (0);
396
}
397
398
/*
399
* static void
400
* rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
401
* the appropriate run robin weights for the CBQ weighted round robin
402
* algorithm.
403
*
404
* Returns: NONE
405
*/
406
407
static void
408
rmc_wrr_set_weights(struct rm_ifdat *ifd)
409
{
410
int i;
411
struct rm_class *cl, *clh;
412
413
for (i = 0; i < RM_MAXPRIO; i++) {
414
/*
415
* This is inverted from that of the simulator to
416
* maintain precision.
417
*/
418
if (ifd->num_[i] == 0)
419
ifd->M_[i] = 0;
420
else
421
ifd->M_[i] = ifd->alloc_[i] /
422
(ifd->num_[i] * ifd->maxpkt_);
423
/*
424
* Compute the weighted allotment for each class.
425
* This takes the expensive div instruction out
426
* of the main loop for the wrr scheduling path.
427
* These only get recomputed when a class comes or
428
* goes.
429
*/
430
if (ifd->active_[i] != NULL) {
431
clh = cl = ifd->active_[i];
432
do {
433
/* safe-guard for slow link or alloc_ == 0 */
434
if (ifd->M_[i] == 0)
435
cl->w_allotment_ = 0;
436
else
437
cl->w_allotment_ = cl->allotment_ /
438
ifd->M_[i];
439
cl = cl->peer_;
440
} while ((cl != NULL) && (cl != clh));
441
}
442
}
443
}
444
445
int
446
rmc_get_weight(struct rm_ifdat *ifd, int pri)
447
{
448
if ((pri >= 0) && (pri < RM_MAXPRIO))
449
return (ifd->M_[pri]);
450
else
451
return (0);
452
}
453
454
/*
455
* static void
456
* rmc_depth_compute(struct rm_class *cl) - This function computes the
457
* appropriate depth of class 'cl' and its ancestors.
458
*
459
* Returns: NONE
460
*/
461
462
static void
463
rmc_depth_compute(struct rm_class *cl)
464
{
465
rm_class_t *t = cl, *p;
466
467
/*
468
* Recompute the depth for the branch of the tree.
469
*/
470
while (t != NULL) {
471
p = t->parent_;
472
if (p && (t->depth_ >= p->depth_)) {
473
p->depth_ = t->depth_ + 1;
474
t = p;
475
} else
476
t = NULL;
477
}
478
}
479
480
/*
481
* static void
482
* rmc_depth_recompute(struct rm_class *cl) - This function re-computes
483
* the depth of the tree after a class has been deleted.
484
*
485
* Returns: NONE
486
*/
487
488
static void
489
rmc_depth_recompute(rm_class_t *cl)
490
{
491
#if 1 /* ALTQ */
492
rm_class_t *p, *t;
493
494
p = cl;
495
while (p != NULL) {
496
if ((t = p->children_) == NULL) {
497
p->depth_ = 0;
498
} else {
499
int cdepth = 0;
500
501
while (t != NULL) {
502
if (t->depth_ > cdepth)
503
cdepth = t->depth_;
504
t = t->next_;
505
}
506
507
if (p->depth_ == cdepth + 1)
508
/* no change to this parent */
509
return;
510
511
p->depth_ = cdepth + 1;
512
}
513
514
p = p->parent_;
515
}
516
#else
517
rm_class_t *t;
518
519
if (cl->depth_ >= 1) {
520
if (cl->children_ == NULL) {
521
cl->depth_ = 0;
522
} else if ((t = cl->children_) != NULL) {
523
while (t != NULL) {
524
if (t->children_ != NULL)
525
rmc_depth_recompute(t);
526
t = t->next_;
527
}
528
} else
529
rmc_depth_compute(cl);
530
}
531
#endif
532
}
533
534
/*
535
* void
536
* rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
537
* function deletes a class from the link-sharing structure and frees
538
* all resources associated with the class.
539
*
540
* Returns: NONE
541
*/
542
543
void
544
rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
545
{
546
struct rm_class *p, *head, *previous;
547
int s;
548
549
ASSERT(cl->children_ == NULL);
550
551
if (cl->sleeping_)
552
CALLOUT_STOP(&cl->callout_);
553
554
s = splnet();
555
IFQ_LOCK(ifd->ifq_);
556
/*
557
* Free packets in the packet queue.
558
* XXX - this may not be a desired behavior. Packets should be
559
* re-queued.
560
*/
561
rmc_dropall(cl);
562
563
/*
564
* If the class has a parent, then remove the class from the
565
* class from the parent's children chain.
566
*/
567
if (cl->parent_ != NULL) {
568
head = cl->parent_->children_;
569
p = previous = head;
570
if (head->next_ == NULL) {
571
ASSERT(head == cl);
572
cl->parent_->children_ = NULL;
573
cl->parent_->leaf_ = 1;
574
} else while (p != NULL) {
575
if (p == cl) {
576
if (cl == head)
577
cl->parent_->children_ = cl->next_;
578
else
579
previous->next_ = cl->next_;
580
cl->next_ = NULL;
581
p = NULL;
582
} else {
583
previous = p;
584
p = p->next_;
585
}
586
}
587
}
588
589
/*
590
* Delete class from class priority peer list.
591
*/
592
if ((p = ifd->active_[cl->pri_]) != NULL) {
593
/*
594
* If there is more than one member of this priority
595
* level, then look for class(cl) in the priority level.
596
*/
597
if (p != p->peer_) {
598
while (p->peer_ != cl)
599
p = p->peer_;
600
p->peer_ = cl->peer_;
601
602
if (ifd->active_[cl->pri_] == cl)
603
ifd->active_[cl->pri_] = cl->peer_;
604
} else {
605
ASSERT(p == cl);
606
ifd->active_[cl->pri_] = NULL;
607
}
608
}
609
610
/*
611
* Recompute the WRR weights.
612
*/
613
if (ifd->wrr_) {
614
ifd->alloc_[cl->pri_] -= cl->allotment_;
615
ifd->num_[cl->pri_]--;
616
rmc_wrr_set_weights(ifd);
617
}
618
619
/*
620
* Re-compute the depth of the tree.
621
*/
622
#if 1 /* ALTQ */
623
rmc_depth_recompute(cl->parent_);
624
#else
625
rmc_depth_recompute(ifd->root_);
626
#endif
627
628
IFQ_UNLOCK(ifd->ifq_);
629
splx(s);
630
631
/*
632
* Free the class structure.
633
*/
634
if (cl->red_ != NULL) {
635
#ifdef ALTQ_RIO
636
if (q_is_rio(cl->q_))
637
rio_destroy((rio_t *)cl->red_);
638
#endif
639
#ifdef ALTQ_RED
640
if (q_is_red(cl->q_))
641
red_destroy(cl->red_);
642
#endif
643
#ifdef ALTQ_CODEL
644
if (q_is_codel(cl->q_))
645
codel_destroy(cl->codel_);
646
#endif
647
}
648
free(cl->q_, M_DEVBUF);
649
free(cl, M_DEVBUF);
650
}
651
652
/*
653
* void
654
* rmc_init(...) - Initialize the resource management data structures
655
* associated with the output portion of interface 'ifp'. 'ifd' is
656
* where the structures will be built (for backwards compatibility, the
657
* structures aren't kept in the ifnet struct). 'nsecPerByte'
658
* gives the link speed (inverse of bandwidth) in nanoseconds/byte.
659
* 'restart' is the driver-specific routine that the generic 'delay
660
* until under limit' action will call to restart output. `maxq'
661
* is the queue size of the 'link' & 'default' classes. 'maxqueued'
662
* is the maximum number of packets that the resource management
663
* code will allow to be queued 'downstream' (this is typically 1).
664
*
665
* Returns: NONE
666
*/
667
668
void
669
rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
670
void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
671
int minidle, u_int offtime, int flags)
672
{
673
int i, mtu;
674
675
/*
676
* Initialize the CBQ tracing/debug facility.
677
*/
678
CBQTRACEINIT();
679
680
bzero((char *)ifd, sizeof (*ifd));
681
mtu = ifq->altq_ifp->if_mtu;
682
ifd->ifq_ = ifq;
683
ifd->restart = restart;
684
ifd->maxqueued_ = maxqueued;
685
ifd->ns_per_byte_ = nsecPerByte;
686
ifd->maxpkt_ = mtu;
687
ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
688
ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
689
#if 1
690
ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
691
if (mtu * nsecPerByte > 10 * 1000000)
692
ifd->maxiftime_ /= 4;
693
#endif
694
695
reset_cutoff(ifd);
696
CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
697
698
/*
699
* Initialize the CBQ's WRR state.
700
*/
701
for (i = 0; i < RM_MAXPRIO; i++) {
702
ifd->alloc_[i] = 0;
703
ifd->M_[i] = 0;
704
ifd->num_[i] = 0;
705
ifd->na_[i] = 0;
706
ifd->active_[i] = NULL;
707
}
708
709
/*
710
* Initialize current packet state.
711
*/
712
ifd->qi_ = 0;
713
ifd->qo_ = 0;
714
for (i = 0; i < RM_MAXQUEUED; i++) {
715
ifd->class_[i] = NULL;
716
ifd->curlen_[i] = 0;
717
ifd->borrowed_[i] = NULL;
718
}
719
720
/*
721
* Create the root class of the link-sharing structure.
722
*/
723
if ((ifd->root_ = rmc_newclass(0, ifd,
724
nsecPerByte,
725
rmc_root_overlimit, maxq, 0, 0,
726
maxidle, minidle, offtime,
727
0, 0)) == NULL) {
728
printf("rmc_init: root class not allocated\n");
729
return ;
730
}
731
ifd->root_->depth_ = 0;
732
}
733
734
/*
735
* void
736
* rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
737
* mbuf 'm' to queue for resource class 'cl'. This routine is called
738
* by a driver's if_output routine. This routine must be called with
739
* output packet completion interrupts locked out (to avoid racing with
740
* rmc_dequeue_next).
741
*
742
* Returns: 0 on successful queueing
743
* -1 when packet drop occurs
744
*/
745
int
746
rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
747
{
748
struct timeval now;
749
struct rm_ifdat *ifd = cl->ifdat_;
750
int cpri = cl->pri_;
751
int is_empty = qempty(cl->q_);
752
753
RM_GETTIME(now);
754
if (ifd->cutoff_ > 0) {
755
if (TV_LT(&cl->undertime_, &now)) {
756
if (ifd->cutoff_ > cl->depth_)
757
ifd->cutoff_ = cl->depth_;
758
CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
759
}
760
#if 1 /* ALTQ */
761
else {
762
/*
763
* the class is overlimit. if the class has
764
* underlimit ancestors, set cutoff to the lowest
765
* depth among them.
766
*/
767
struct rm_class *borrow = cl->borrow_;
768
769
while (borrow != NULL &&
770
borrow->depth_ < ifd->cutoff_) {
771
if (TV_LT(&borrow->undertime_, &now)) {
772
ifd->cutoff_ = borrow->depth_;
773
CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
774
break;
775
}
776
borrow = borrow->borrow_;
777
}
778
}
779
#else /* !ALTQ */
780
else if ((ifd->cutoff_ > 1) && cl->borrow_) {
781
if (TV_LT(&cl->borrow_->undertime_, &now)) {
782
ifd->cutoff_ = cl->borrow_->depth_;
783
CBQTRACE(rmc_queue_packet, 'ffob',
784
cl->borrow_->depth_);
785
}
786
}
787
#endif /* !ALTQ */
788
}
789
790
if (_rmc_addq(cl, m) < 0)
791
/* failed */
792
return (-1);
793
794
if (is_empty) {
795
CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
796
ifd->na_[cpri]++;
797
}
798
799
if (qlen(cl->q_) > qlimit(cl->q_)) {
800
/* note: qlimit can be set to 0 or 1 */
801
rmc_drop_action(cl);
802
return (-1);
803
}
804
return (0);
805
}
806
807
/*
808
* void
809
* rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
810
* classes to see if there are satified.
811
*/
812
813
static void
814
rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
815
{
816
int i;
817
rm_class_t *p, *bp;
818
819
for (i = RM_MAXPRIO - 1; i >= 0; i--) {
820
if ((bp = ifd->active_[i]) != NULL) {
821
p = bp;
822
do {
823
if (!rmc_satisfied(p, now)) {
824
ifd->cutoff_ = p->depth_;
825
return;
826
}
827
p = p->peer_;
828
} while (p != bp);
829
}
830
}
831
832
reset_cutoff(ifd);
833
}
834
835
/*
836
* rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
837
*/
838
839
static int
840
rmc_satisfied(struct rm_class *cl, struct timeval *now)
841
{
842
rm_class_t *p;
843
844
if (cl == NULL)
845
return (1);
846
if (TV_LT(now, &cl->undertime_))
847
return (1);
848
if (cl->depth_ == 0) {
849
if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
850
return (0);
851
else
852
return (1);
853
}
854
if (cl->children_ != NULL) {
855
p = cl->children_;
856
while (p != NULL) {
857
if (!rmc_satisfied(p, now))
858
return (0);
859
p = p->next_;
860
}
861
}
862
863
return (1);
864
}
865
866
/*
867
* Return 1 if class 'cl' is under limit or can borrow from a parent,
868
* 0 if overlimit. As a side-effect, this routine will invoke the
869
* class overlimit action if the class if overlimit.
870
*/
871
872
static int
873
rmc_under_limit(struct rm_class *cl, struct timeval *now)
874
{
875
rm_class_t *p = cl;
876
rm_class_t *top;
877
struct rm_ifdat *ifd = cl->ifdat_;
878
879
ifd->borrowed_[ifd->qi_] = NULL;
880
/*
881
* If cl is the root class, then always return that it is
882
* underlimit. Otherwise, check to see if the class is underlimit.
883
*/
884
if (cl->parent_ == NULL)
885
return (1);
886
887
if (cl->sleeping_) {
888
if (TV_LT(now, &cl->undertime_))
889
return (0);
890
891
CALLOUT_STOP(&cl->callout_);
892
cl->sleeping_ = 0;
893
cl->undertime_.tv_sec = 0;
894
return (1);
895
}
896
897
top = NULL;
898
while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
899
if (((cl = cl->borrow_) == NULL) ||
900
(cl->depth_ > ifd->cutoff_)) {
901
#ifdef ADJUST_CUTOFF
902
if (cl != NULL)
903
/* cutoff is taking effect, just
904
return false without calling
905
the delay action. */
906
return (0);
907
#endif
908
#ifdef BORROW_OFFTIME
909
/*
910
* check if the class can borrow offtime too.
911
* borrow offtime from the top of the borrow
912
* chain if the top class is not overloaded.
913
*/
914
if (cl != NULL) {
915
/* cutoff is taking effect, use this class as top. */
916
top = cl;
917
CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
918
}
919
if (top != NULL && top->avgidle_ == top->minidle_)
920
top = NULL;
921
p->overtime_ = *now;
922
(p->overlimit)(p, top);
923
#else
924
p->overtime_ = *now;
925
(p->overlimit)(p, NULL);
926
#endif
927
return (0);
928
}
929
top = cl;
930
}
931
932
if (cl != p)
933
ifd->borrowed_[ifd->qi_] = cl;
934
return (1);
935
}
936
937
/*
938
* _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
939
* Packet-by-packet round robin.
940
*
941
* The heart of the weighted round-robin scheduler, which decides which
942
* class next gets to send a packet. Highest priority first, then
943
* weighted round-robin within priorites.
944
*
945
* Each able-to-send class gets to send until its byte allocation is
946
* exhausted. Thus, the active pointer is only changed after a class has
947
* exhausted its allocation.
948
*
949
* If the scheduler finds no class that is underlimit or able to borrow,
950
* then the first class found that had a nonzero queue and is allowed to
951
* borrow gets to send.
952
*/
953
954
static mbuf_t *
955
_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
956
{
957
struct rm_class *cl = NULL, *first = NULL;
958
u_int deficit;
959
int cpri;
960
mbuf_t *m;
961
struct timeval now;
962
963
RM_GETTIME(now);
964
965
/*
966
* if the driver polls the top of the queue and then removes
967
* the polled packet, we must return the same packet.
968
*/
969
if (op == ALTDQ_REMOVE && ifd->pollcache_) {
970
cl = ifd->pollcache_;
971
cpri = cl->pri_;
972
if (ifd->efficient_) {
973
/* check if this class is overlimit */
974
if (cl->undertime_.tv_sec != 0 &&
975
rmc_under_limit(cl, &now) == 0)
976
first = cl;
977
}
978
ifd->pollcache_ = NULL;
979
goto _wrr_out;
980
}
981
else {
982
/* mode == ALTDQ_POLL || pollcache == NULL */
983
ifd->pollcache_ = NULL;
984
ifd->borrowed_[ifd->qi_] = NULL;
985
}
986
#ifdef ADJUST_CUTOFF
987
_again:
988
#endif
989
for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
990
if (ifd->na_[cpri] == 0)
991
continue;
992
deficit = 0;
993
/*
994
* Loop through twice for a priority level, if some class
995
* was unable to send a packet the first round because
996
* of the weighted round-robin mechanism.
997
* During the second loop at this level, deficit==2.
998
* (This second loop is not needed if for every class,
999
* "M[cl->pri_])" times "cl->allotment" is greater than
1000
* the byte size for the largest packet in the class.)
1001
*/
1002
_wrr_loop:
1003
cl = ifd->active_[cpri];
1004
ASSERT(cl != NULL);
1005
do {
1006
if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
1007
cl->bytes_alloc_ += cl->w_allotment_;
1008
if (!qempty(cl->q_)) {
1009
if ((cl->undertime_.tv_sec == 0) ||
1010
rmc_under_limit(cl, &now)) {
1011
if (cl->bytes_alloc_ > 0 || deficit > 1)
1012
goto _wrr_out;
1013
1014
/* underlimit but no alloc */
1015
deficit = 1;
1016
#if 1
1017
ifd->borrowed_[ifd->qi_] = NULL;
1018
#endif
1019
}
1020
else if (first == NULL && cl->borrow_ != NULL)
1021
first = cl; /* borrowing candidate */
1022
}
1023
1024
cl->bytes_alloc_ = 0;
1025
cl = cl->peer_;
1026
} while (cl != ifd->active_[cpri]);
1027
1028
if (deficit == 1) {
1029
/* first loop found an underlimit class with deficit */
1030
/* Loop on same priority level, with new deficit. */
1031
deficit = 2;
1032
goto _wrr_loop;
1033
}
1034
}
1035
1036
#ifdef ADJUST_CUTOFF
1037
/*
1038
* no underlimit class found. if cutoff is taking effect,
1039
* increase cutoff and try again.
1040
*/
1041
if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1042
ifd->cutoff_++;
1043
CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
1044
goto _again;
1045
}
1046
#endif /* ADJUST_CUTOFF */
1047
/*
1048
* If LINK_EFFICIENCY is turned on, then the first overlimit
1049
* class we encounter will send a packet if all the classes
1050
* of the link-sharing structure are overlimit.
1051
*/
1052
reset_cutoff(ifd);
1053
CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
1054
1055
if (!ifd->efficient_ || first == NULL)
1056
return (NULL);
1057
1058
cl = first;
1059
cpri = cl->pri_;
1060
#if 0 /* too time-consuming for nothing */
1061
if (cl->sleeping_)
1062
CALLOUT_STOP(&cl->callout_);
1063
cl->sleeping_ = 0;
1064
cl->undertime_.tv_sec = 0;
1065
#endif
1066
ifd->borrowed_[ifd->qi_] = cl->borrow_;
1067
ifd->cutoff_ = cl->borrow_->depth_;
1068
1069
/*
1070
* Deque the packet and do the book keeping...
1071
*/
1072
_wrr_out:
1073
if (op == ALTDQ_REMOVE) {
1074
m = _rmc_getq(cl);
1075
if (m == NULL)
1076
panic("_rmc_wrr_dequeue_next");
1077
if (qempty(cl->q_))
1078
ifd->na_[cpri]--;
1079
1080
/*
1081
* Update class statistics and link data.
1082
*/
1083
if (cl->bytes_alloc_ > 0)
1084
cl->bytes_alloc_ -= m_pktlen(m);
1085
1086
if ((cl->bytes_alloc_ <= 0) || first == cl)
1087
ifd->active_[cl->pri_] = cl->peer_;
1088
else
1089
ifd->active_[cl->pri_] = cl;
1090
1091
ifd->class_[ifd->qi_] = cl;
1092
ifd->curlen_[ifd->qi_] = m_pktlen(m);
1093
ifd->now_[ifd->qi_] = now;
1094
ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1095
ifd->queued_++;
1096
} else {
1097
/* mode == ALTDQ_PPOLL */
1098
m = _rmc_pollq(cl);
1099
ifd->pollcache_ = cl;
1100
}
1101
return (m);
1102
}
1103
1104
/*
1105
* Dequeue & return next packet from the highest priority class that
1106
* has a packet to send & has enough allocation to send it. This
1107
* routine is called by a driver whenever it needs a new packet to
1108
* output.
1109
*/
1110
static mbuf_t *
1111
_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
1112
{
1113
mbuf_t *m;
1114
int cpri;
1115
struct rm_class *cl, *first = NULL;
1116
struct timeval now;
1117
1118
RM_GETTIME(now);
1119
1120
/*
1121
* if the driver polls the top of the queue and then removes
1122
* the polled packet, we must return the same packet.
1123
*/
1124
if (op == ALTDQ_REMOVE && ifd->pollcache_) {
1125
cl = ifd->pollcache_;
1126
cpri = cl->pri_;
1127
ifd->pollcache_ = NULL;
1128
goto _prr_out;
1129
} else {
1130
/* mode == ALTDQ_POLL || pollcache == NULL */
1131
ifd->pollcache_ = NULL;
1132
ifd->borrowed_[ifd->qi_] = NULL;
1133
}
1134
#ifdef ADJUST_CUTOFF
1135
_again:
1136
#endif
1137
for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
1138
if (ifd->na_[cpri] == 0)
1139
continue;
1140
cl = ifd->active_[cpri];
1141
ASSERT(cl != NULL);
1142
do {
1143
if (!qempty(cl->q_)) {
1144
if ((cl->undertime_.tv_sec == 0) ||
1145
rmc_under_limit(cl, &now))
1146
goto _prr_out;
1147
if (first == NULL && cl->borrow_ != NULL)
1148
first = cl;
1149
}
1150
cl = cl->peer_;
1151
} while (cl != ifd->active_[cpri]);
1152
}
1153
1154
#ifdef ADJUST_CUTOFF
1155
/*
1156
* no underlimit class found. if cutoff is taking effect, increase
1157
* cutoff and try again.
1158
*/
1159
if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1160
ifd->cutoff_++;
1161
goto _again;
1162
}
1163
#endif /* ADJUST_CUTOFF */
1164
/*
1165
* If LINK_EFFICIENCY is turned on, then the first overlimit
1166
* class we encounter will send a packet if all the classes
1167
* of the link-sharing structure are overlimit.
1168
*/
1169
reset_cutoff(ifd);
1170
if (!ifd->efficient_ || first == NULL)
1171
return (NULL);
1172
1173
cl = first;
1174
cpri = cl->pri_;
1175
#if 0 /* too time-consuming for nothing */
1176
if (cl->sleeping_)
1177
CALLOUT_STOP(&cl->callout_);
1178
cl->sleeping_ = 0;
1179
cl->undertime_.tv_sec = 0;
1180
#endif
1181
ifd->borrowed_[ifd->qi_] = cl->borrow_;
1182
ifd->cutoff_ = cl->borrow_->depth_;
1183
1184
/*
1185
* Deque the packet and do the book keeping...
1186
*/
1187
_prr_out:
1188
if (op == ALTDQ_REMOVE) {
1189
m = _rmc_getq(cl);
1190
if (m == NULL)
1191
panic("_rmc_prr_dequeue_next");
1192
if (qempty(cl->q_))
1193
ifd->na_[cpri]--;
1194
1195
ifd->active_[cpri] = cl->peer_;
1196
1197
ifd->class_[ifd->qi_] = cl;
1198
ifd->curlen_[ifd->qi_] = m_pktlen(m);
1199
ifd->now_[ifd->qi_] = now;
1200
ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1201
ifd->queued_++;
1202
} else {
1203
/* mode == ALTDQ_POLL */
1204
m = _rmc_pollq(cl);
1205
ifd->pollcache_ = cl;
1206
}
1207
return (m);
1208
}
1209
1210
/*
1211
* mbuf_t *
1212
* rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
1213
* is invoked by the packet driver to get the next packet to be
1214
* dequeued and output on the link. If WRR is enabled, then the
1215
* WRR dequeue next routine will determine the next packet to sent.
1216
* Otherwise, packet-by-packet round robin is invoked.
1217
*
1218
* Returns: NULL, if a packet is not available or if all
1219
* classes are overlimit.
1220
*
1221
* Otherwise, Pointer to the next packet.
1222
*/
1223
1224
mbuf_t *
1225
rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
1226
{
1227
if (ifd->queued_ >= ifd->maxqueued_)
1228
return (NULL);
1229
else if (ifd->wrr_)
1230
return (_rmc_wrr_dequeue_next(ifd, mode));
1231
else
1232
return (_rmc_prr_dequeue_next(ifd, mode));
1233
}
1234
1235
/*
1236
* Update the utilization estimate for the packet that just completed.
1237
* The packet's class & the parent(s) of that class all get their
1238
* estimators updated. This routine is called by the driver's output-
1239
* packet-completion interrupt service routine.
1240
*/
1241
1242
/*
1243
* a macro to approximate "divide by 1000" that gives 0.000999,
1244
* if a value has enough effective digits.
1245
* (on pentium, mul takes 9 cycles but div takes 46!)
1246
*/
1247
#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
1248
void
1249
rmc_update_class_util(struct rm_ifdat *ifd)
1250
{
1251
int idle, avgidle, pktlen;
1252
int pkt_time, tidle;
1253
rm_class_t *cl, *borrowed;
1254
rm_class_t *borrows;
1255
struct timeval *nowp;
1256
1257
/*
1258
* Get the most recent completed class.
1259
*/
1260
if ((cl = ifd->class_[ifd->qo_]) == NULL)
1261
return;
1262
1263
pktlen = ifd->curlen_[ifd->qo_];
1264
borrowed = ifd->borrowed_[ifd->qo_];
1265
borrows = borrowed;
1266
1267
PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1268
1269
/*
1270
* Run estimator on class and its ancestors.
1271
*/
1272
/*
1273
* rm_update_class_util is designed to be called when the
1274
* transfer is completed from a xmit complete interrupt,
1275
* but most drivers don't implement an upcall for that.
1276
* so, just use estimated completion time.
1277
* as a result, ifd->qi_ and ifd->qo_ are always synced.
1278
*/
1279
nowp = &ifd->now_[ifd->qo_];
1280
/* get pkt_time (for link) in usec */
1281
#if 1 /* use approximation */
1282
pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
1283
pkt_time = NSEC_TO_USEC(pkt_time);
1284
#else
1285
pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
1286
#endif
1287
#if 1 /* ALTQ4PPP */
1288
if (TV_LT(nowp, &ifd->ifnow_)) {
1289
int iftime;
1290
1291
/*
1292
* make sure the estimated completion time does not go
1293
* too far. it can happen when the link layer supports
1294
* data compression or the interface speed is set to
1295
* a much lower value.
1296
*/
1297
TV_DELTA(&ifd->ifnow_, nowp, iftime);
1298
if (iftime+pkt_time < ifd->maxiftime_) {
1299
TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1300
} else {
1301
TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
1302
}
1303
} else {
1304
TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1305
}
1306
#else
1307
if (TV_LT(nowp, &ifd->ifnow_)) {
1308
TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1309
} else {
1310
TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1311
}
1312
#endif
1313
1314
while (cl != NULL) {
1315
TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
1316
if (idle >= 2000000)
1317
/*
1318
* this class is idle enough, reset avgidle.
1319
* (TV_DELTA returns 2000000 us when delta is large.)
1320
*/
1321
cl->avgidle_ = cl->maxidle_;
1322
1323
/* get pkt_time (for class) in usec */
1324
#if 1 /* use approximation */
1325
pkt_time = pktlen * cl->ns_per_byte_;
1326
pkt_time = NSEC_TO_USEC(pkt_time);
1327
#else
1328
pkt_time = pktlen * cl->ns_per_byte_ / 1000;
1329
#endif
1330
idle -= pkt_time;
1331
1332
avgidle = cl->avgidle_;
1333
avgidle += idle - (avgidle >> RM_FILTER_GAIN);
1334
cl->avgidle_ = avgidle;
1335
1336
/* Are we overlimit ? */
1337
if (avgidle <= 0) {
1338
CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
1339
#if 1 /* ALTQ */
1340
/*
1341
* need some lower bound for avgidle, otherwise
1342
* a borrowing class gets unbounded penalty.
1343
*/
1344
if (avgidle < cl->minidle_)
1345
avgidle = cl->avgidle_ = cl->minidle_;
1346
#endif
1347
/* set next idle to make avgidle 0 */
1348
tidle = pkt_time +
1349
(((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
1350
TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
1351
++cl->stats_.over;
1352
} else {
1353
cl->avgidle_ =
1354
(avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
1355
cl->undertime_.tv_sec = 0;
1356
if (cl->sleeping_) {
1357
CALLOUT_STOP(&cl->callout_);
1358
cl->sleeping_ = 0;
1359
}
1360
}
1361
1362
if (borrows != NULL) {
1363
if (borrows != cl)
1364
++cl->stats_.borrows;
1365
else
1366
borrows = NULL;
1367
}
1368
cl->last_ = ifd->ifnow_;
1369
cl->last_pkttime_ = pkt_time;
1370
1371
#if 1
1372
if (cl->parent_ == NULL) {
1373
/* take stats of root class */
1374
PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1375
}
1376
#endif
1377
1378
cl = cl->parent_;
1379
}
1380
1381
/*
1382
* Check to see if cutoff needs to set to a new level.
1383
*/
1384
cl = ifd->class_[ifd->qo_];
1385
if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
1386
#if 1 /* ALTQ */
1387
if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
1388
rmc_tl_satisfied(ifd, nowp);
1389
CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1390
} else {
1391
ifd->cutoff_ = borrowed->depth_;
1392
CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1393
}
1394
#else /* !ALTQ */
1395
if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
1396
reset_cutoff(ifd);
1397
#ifdef notdef
1398
rmc_tl_satisfied(ifd, &now);
1399
#endif
1400
CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1401
} else {
1402
ifd->cutoff_ = borrowed->depth_;
1403
CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1404
}
1405
#endif /* !ALTQ */
1406
}
1407
1408
/*
1409
* Release class slot
1410
*/
1411
ifd->borrowed_[ifd->qo_] = NULL;
1412
ifd->class_[ifd->qo_] = NULL;
1413
ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
1414
ifd->queued_--;
1415
}
1416
1417
/*
1418
* void
1419
* rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
1420
* over-limit action routines. These get invoked by rmc_under_limit()
1421
* if a class with packets to send if over its bandwidth limit & can't
1422
* borrow from a parent class.
1423
*
1424
* Returns: NONE
1425
*/
1426
1427
static void
1428
rmc_drop_action(struct rm_class *cl)
1429
{
1430
struct rm_ifdat *ifd = cl->ifdat_;
1431
1432
ASSERT(qlen(cl->q_) > 0);
1433
_rmc_dropq(cl);
1434
if (qempty(cl->q_))
1435
ifd->na_[cl->pri_]--;
1436
}
1437
1438
void rmc_dropall(struct rm_class *cl)
1439
{
1440
struct rm_ifdat *ifd = cl->ifdat_;
1441
1442
if (!qempty(cl->q_)) {
1443
_flushq(cl->q_);
1444
1445
ifd->na_[cl->pri_]--;
1446
}
1447
}
1448
1449
static int
1450
hzto(struct timeval *tv)
1451
{
1452
struct timeval t2;
1453
1454
getmicrotime(&t2);
1455
t2.tv_sec = tv->tv_sec - t2.tv_sec;
1456
t2.tv_usec = tv->tv_usec - t2.tv_usec;
1457
return (tvtohz(&t2));
1458
}
1459
1460
/*
1461
* void
1462
* rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
1463
* delay action routine. It is invoked via rmc_under_limit when the
1464
* packet is discoverd to be overlimit.
1465
*
1466
* If the delay action is result of borrow class being overlimit, then
1467
* delay for the offtime of the borrowing class that is overlimit.
1468
*
1469
* Returns: NONE
1470
*/
1471
1472
void
1473
rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
1474
{
1475
int delay, t, extradelay;
1476
1477
cl->stats_.overactions++;
1478
TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
1479
#ifndef BORROW_OFFTIME
1480
delay += cl->offtime_;
1481
#endif
1482
1483
if (!cl->sleeping_) {
1484
CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
1485
#ifdef BORROW_OFFTIME
1486
if (borrow != NULL)
1487
extradelay = borrow->offtime_;
1488
else
1489
#endif
1490
extradelay = cl->offtime_;
1491
1492
#ifdef ALTQ
1493
/*
1494
* XXX recalculate suspend time:
1495
* current undertime is (tidle + pkt_time) calculated
1496
* from the last transmission.
1497
* tidle: time required to bring avgidle back to 0
1498
* pkt_time: target waiting time for this class
1499
* we need to replace pkt_time by offtime
1500
*/
1501
extradelay -= cl->last_pkttime_;
1502
#endif
1503
if (extradelay > 0) {
1504
TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
1505
delay += extradelay;
1506
}
1507
1508
cl->sleeping_ = 1;
1509
cl->stats_.delays++;
1510
1511
/*
1512
* Since packets are phased randomly with respect to the
1513
* clock, 1 tick (the next clock tick) can be an arbitrarily
1514
* short time so we have to wait for at least two ticks.
1515
* NOTE: If there's no other traffic, we need the timer as
1516
* a 'backstop' to restart this class.
1517
*/
1518
if (delay > tick * 2) {
1519
/* FreeBSD rounds up the tick */
1520
t = hzto(&cl->undertime_);
1521
} else
1522
t = 2;
1523
CALLOUT_RESET(&cl->callout_, t, rmc_restart, cl);
1524
}
1525
}
1526
1527
/*
1528
* void
1529
* rmc_restart() - is just a helper routine for rmc_delay_action -- it is
1530
* called by the system timer code & is responsible checking if the
1531
* class is still sleeping (it might have been restarted as a side
1532
* effect of the queue scan on a packet arrival) and, if so, restarting
1533
* output for the class. Inspecting the class state & restarting output
1534
* require locking the class structure. In general the driver is
1535
* responsible for locking but this is the only routine that is not
1536
* called directly or indirectly from the interface driver so it has
1537
* know about system locking conventions. Under bsd, locking is done
1538
* by raising IPL to splimp so that's what's implemented here. On a
1539
* different system this would probably need to be changed.
1540
*
1541
* Returns: NONE
1542
*/
1543
1544
static void
1545
rmc_restart(void *arg)
1546
{
1547
struct rm_class *cl = arg;
1548
struct rm_ifdat *ifd = cl->ifdat_;
1549
struct epoch_tracker et;
1550
int s;
1551
1552
s = splnet();
1553
NET_EPOCH_ENTER(et);
1554
IFQ_LOCK(ifd->ifq_);
1555
CURVNET_SET(ifd->ifq_->altq_ifp->if_vnet);
1556
if (cl->sleeping_) {
1557
cl->sleeping_ = 0;
1558
cl->undertime_.tv_sec = 0;
1559
1560
if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
1561
CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
1562
(ifd->restart)(ifd->ifq_);
1563
}
1564
}
1565
CURVNET_RESTORE();
1566
IFQ_UNLOCK(ifd->ifq_);
1567
NET_EPOCH_EXIT(et);
1568
splx(s);
1569
}
1570
1571
/*
1572
* void
1573
* rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
1574
* handling routine for the root class of the link sharing structure.
1575
*
1576
* Returns: NONE
1577
*/
1578
1579
static void
1580
rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
1581
{
1582
panic("rmc_root_overlimit");
1583
}
1584
1585
/*
1586
* Packet Queue handling routines. Eventually, this is to localize the
1587
* effects on the code whether queues are red queues or droptail
1588
* queues.
1589
*/
1590
1591
static int
1592
_rmc_addq(rm_class_t *cl, mbuf_t *m)
1593
{
1594
#ifdef ALTQ_RIO
1595
if (q_is_rio(cl->q_))
1596
return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
1597
#endif
1598
#ifdef ALTQ_RED
1599
if (q_is_red(cl->q_))
1600
return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
1601
#endif /* ALTQ_RED */
1602
#ifdef ALTQ_CODEL
1603
if (q_is_codel(cl->q_))
1604
return codel_addq(cl->codel_, cl->q_, m);
1605
#endif
1606
1607
if (cl->flags_ & RMCF_CLEARDSCP)
1608
write_dsfield(m, cl->pktattr_, 0);
1609
1610
_addq(cl->q_, m);
1611
return (0);
1612
}
1613
1614
/* note: _rmc_dropq is not called for red */
1615
static void
1616
_rmc_dropq(rm_class_t *cl)
1617
{
1618
mbuf_t *m;
1619
1620
if ((m = _getq(cl->q_)) != NULL)
1621
m_freem(m);
1622
}
1623
1624
static mbuf_t *
1625
_rmc_getq(rm_class_t *cl)
1626
{
1627
#ifdef ALTQ_RIO
1628
if (q_is_rio(cl->q_))
1629
return rio_getq((rio_t *)cl->red_, cl->q_);
1630
#endif
1631
#ifdef ALTQ_RED
1632
if (q_is_red(cl->q_))
1633
return red_getq(cl->red_, cl->q_);
1634
#endif
1635
#ifdef ALTQ_CODEL
1636
if (q_is_codel(cl->q_))
1637
return codel_getq(cl->codel_, cl->q_);
1638
#endif
1639
return _getq(cl->q_);
1640
}
1641
1642
static mbuf_t *
1643
_rmc_pollq(rm_class_t *cl)
1644
{
1645
return qhead(cl->q_);
1646
}
1647
1648
#ifdef CBQ_TRACE
1649
1650
struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
1651
struct cbqtrace *cbqtrace_ptr = NULL;
1652
int cbqtrace_count;
1653
1654
/*
1655
* DDB hook to trace cbq events:
1656
* the last 1024 events are held in a circular buffer.
1657
* use "call cbqtrace_dump(N)" to display 20 events from Nth event.
1658
*/
1659
void cbqtrace_dump(int);
1660
static char *rmc_funcname(void *);
1661
1662
static struct rmc_funcs {
1663
void *func;
1664
char *name;
1665
} rmc_funcs[] =
1666
{
1667
rmc_init, "rmc_init",
1668
rmc_queue_packet, "rmc_queue_packet",
1669
rmc_under_limit, "rmc_under_limit",
1670
rmc_update_class_util, "rmc_update_class_util",
1671
rmc_delay_action, "rmc_delay_action",
1672
rmc_restart, "rmc_restart",
1673
_rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
1674
NULL, NULL
1675
};
1676
1677
static char *rmc_funcname(void *func)
1678
{
1679
struct rmc_funcs *fp;
1680
1681
for (fp = rmc_funcs; fp->func != NULL; fp++)
1682
if (fp->func == func)
1683
return (fp->name);
1684
return ("unknown");
1685
}
1686
1687
void cbqtrace_dump(int counter)
1688
{
1689
int i, *p;
1690
char *cp;
1691
1692
counter = counter % NCBQTRACE;
1693
p = (int *)&cbqtrace_buffer[counter];
1694
1695
for (i=0; i<20; i++) {
1696
printf("[0x%x] ", *p++);
1697
printf("%s: ", rmc_funcname((void *)*p++));
1698
cp = (char *)p++;
1699
printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
1700
printf("%d\n",*p++);
1701
1702
if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
1703
p = (int *)cbqtrace_buffer;
1704
}
1705
}
1706
#endif /* CBQ_TRACE */
1707
#endif /* ALTQ_CBQ */
1708
1709
#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \
1710
defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL)
1711
#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
1712
1713
void
1714
_addq(class_queue_t *q, mbuf_t *m)
1715
{
1716
mbuf_t *m0;
1717
1718
if ((m0 = qtail(q)) != NULL)
1719
m->m_nextpkt = m0->m_nextpkt;
1720
else
1721
m0 = m;
1722
m0->m_nextpkt = m;
1723
qtail(q) = m;
1724
qlen(q)++;
1725
}
1726
1727
mbuf_t *
1728
_getq(class_queue_t *q)
1729
{
1730
mbuf_t *m, *m0;
1731
1732
if ((m = qtail(q)) == NULL)
1733
return (NULL);
1734
if ((m0 = m->m_nextpkt) != m)
1735
m->m_nextpkt = m0->m_nextpkt;
1736
else {
1737
ASSERT(qlen(q) == 1);
1738
qtail(q) = NULL;
1739
}
1740
qlen(q)--;
1741
m0->m_nextpkt = NULL;
1742
return (m0);
1743
}
1744
1745
/* drop a packet at the tail of the queue */
1746
mbuf_t *
1747
_getq_tail(class_queue_t *q)
1748
{
1749
mbuf_t *m, *m0, *prev;
1750
1751
if ((m = m0 = qtail(q)) == NULL)
1752
return NULL;
1753
do {
1754
prev = m0;
1755
m0 = m0->m_nextpkt;
1756
} while (m0 != m);
1757
prev->m_nextpkt = m->m_nextpkt;
1758
if (prev == m) {
1759
ASSERT(qlen(q) == 1);
1760
qtail(q) = NULL;
1761
} else
1762
qtail(q) = prev;
1763
qlen(q)--;
1764
m->m_nextpkt = NULL;
1765
return (m);
1766
}
1767
1768
/* randomly select a packet in the queue */
1769
mbuf_t *
1770
_getq_random(class_queue_t *q)
1771
{
1772
struct mbuf *m;
1773
int i, n;
1774
1775
if ((m = qtail(q)) == NULL)
1776
return NULL;
1777
if (m->m_nextpkt == m) {
1778
ASSERT(qlen(q) == 1);
1779
qtail(q) = NULL;
1780
} else {
1781
struct mbuf *prev = NULL;
1782
1783
n = arc4random() % qlen(q) + 1;
1784
for (i = 0; i < n; i++) {
1785
prev = m;
1786
m = m->m_nextpkt;
1787
}
1788
prev->m_nextpkt = m->m_nextpkt;
1789
if (m == qtail(q))
1790
qtail(q) = prev;
1791
}
1792
qlen(q)--;
1793
m->m_nextpkt = NULL;
1794
return (m);
1795
}
1796
1797
void
1798
_removeq(class_queue_t *q, mbuf_t *m)
1799
{
1800
mbuf_t *m0, *prev;
1801
1802
m0 = qtail(q);
1803
do {
1804
prev = m0;
1805
m0 = m0->m_nextpkt;
1806
} while (m0 != m);
1807
prev->m_nextpkt = m->m_nextpkt;
1808
if (prev == m)
1809
qtail(q) = NULL;
1810
else if (qtail(q) == m)
1811
qtail(q) = prev;
1812
qlen(q)--;
1813
}
1814
1815
void
1816
_flushq(class_queue_t *q)
1817
{
1818
mbuf_t *m;
1819
1820
while ((m = _getq(q)) != NULL)
1821
m_freem(m);
1822
ASSERT(qlen(q) == 0);
1823
}
1824
1825
#endif /* !__GNUC__ || ALTQ_DEBUG */
1826
#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
1827
1828