CoCalc -- altq_rmclass.c

GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/net/altq/altq_rmclass.c
³⁹⁵⁰⁷ views
1
/*-
2
 * Copyright (c) 1991-1997 Regents of the University of California.
3
 * All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 * 3. All advertising materials mentioning features or use of this software
14
 *    must display the following acknowledgement:
15
 *      This product includes software developed by the Network Research
16
 *      Group at Lawrence Berkeley Laboratory.
17
 * 4. Neither the name of the University nor of the Laboratory may be used
18
 *    to endorse or promote products derived from this software without
19
 *    specific prior written permission.
20
 *
21
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31
 * SUCH DAMAGE.
32
 *
33
 * LBL code modified by [email protected], May 1977.
34
 * For questions and/or comments, please send mail to [email protected]
35
 * $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $
36
 */
37
#include "opt_altq.h"
38
#include "opt_inet.h"
39
#include "opt_inet6.h"
40
#ifdef ALTQ_CBQ	/* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
41

42
#include <sys/param.h>
43
#include <sys/malloc.h>
44
#include <sys/mbuf.h>
45
#include <sys/socket.h>
46
#include <sys/systm.h>
47
#include <sys/errno.h>
48
#include <sys/time.h>
49

50
#include <net/if.h>
51
#include <net/if_var.h>
52
#include <net/if_private.h>
53

54
#include <net/altq/if_altq.h>
55
#include <net/altq/altq.h>
56
#include <net/altq/altq_codel.h>
57
#include <net/altq/altq_rmclass.h>
58
#include <net/altq/altq_rmclass_debug.h>
59
#include <net/altq/altq_red.h>
60
#include <net/altq/altq_rio.h>
61

62
/*
63
 * Local Macros
64
 */
65
#define	reset_cutoff(ifd)	{ ifd->cutoff_ = RM_MAXDEPTH; }
66

67
/*
68
 * Local routines.
69
 */
70

71
static int	rmc_satisfied(struct rm_class *, struct timeval *);
72
static void	rmc_wrr_set_weights(struct rm_ifdat *);
73
static void	rmc_depth_compute(struct rm_class *);
74
static void	rmc_depth_recompute(rm_class_t *);
75

76
static mbuf_t	*_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
77
static mbuf_t	*_rmc_prr_dequeue_next(struct rm_ifdat *, int);
78

79
static int	_rmc_addq(rm_class_t *, mbuf_t *);
80
static void	_rmc_dropq(rm_class_t *);
81
static mbuf_t	*_rmc_getq(rm_class_t *);
82
static mbuf_t	*_rmc_pollq(rm_class_t *);
83

84
static int	rmc_under_limit(struct rm_class *, struct timeval *);
85
static void	rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
86
static void	rmc_drop_action(struct rm_class *);
87
static void	rmc_restart(void *);
88
static void	rmc_root_overlimit(struct rm_class *, struct rm_class *);
89

90
#define	BORROW_OFFTIME
91
/*
92
 * BORROW_OFFTIME (experimental):
93
 * borrow the offtime of the class borrowing from.
94
 * the reason is that when its own offtime is set, the class is unable
95
 * to borrow much, especially when cutoff is taking effect.
96
 * but when the borrowed class is overloaded (advidle is close to minidle),
97
 * use the borrowing class's offtime to avoid overload.
98
 */
99
#define	ADJUST_CUTOFF
100
/*
101
 * ADJUST_CUTOFF (experimental):
102
 * if no underlimit class is found due to cutoff, increase cutoff and
103
 * retry the scheduling loop.
104
 * also, don't invoke delay_actions while cutoff is taking effect,
105
 * since a sleeping class won't have a chance to be scheduled in the
106
 * next loop.
107
 *
108
 * now heuristics for setting the top-level variable (cutoff_) becomes:
109
 *	1. if a packet arrives for a not-overlimit class, set cutoff
110
 *	   to the depth of the class.
111
 *	2. if cutoff is i, and a packet arrives for an overlimit class
112
 *	   with an underlimit ancestor at a lower level than i (say j),
113
 *	   then set cutoff to j.
114
 *	3. at scheduling a packet, if there is no underlimit class
115
 *	   due to the current cutoff level, increase cutoff by 1 and
116
 *	   then try to schedule again.
117
 */
118

119
/*
120
 * rm_class_t *
121
 * rmc_newclass(...) - Create a new resource management class at priority
122
 * 'pri' on the interface given by 'ifd'.
123
 *
124
 * nsecPerByte  is the data rate of the interface in nanoseconds/byte.
125
 *              E.g., 800 for a 10Mb/s ethernet.  If the class gets less
126
 *              than 100% of the bandwidth, this number should be the
127
 *              'effective' rate for the class.  Let f be the
128
 *              bandwidth fraction allocated to this class, and let
129
 *              nsPerByte be the data rate of the output link in
130
 *              nanoseconds/byte.  Then nsecPerByte is set to
131
 *              nsPerByte / f.  E.g., 1600 (= 800 / .5)
132
 *              for a class that gets 50% of an ethernet's bandwidth.
133
 *
134
 * action       the routine to call when the class is over limit.
135
 *
136
 * maxq         max allowable queue size for class (in packets).
137
 *
138
 * parent       parent class pointer.
139
 *
140
 * borrow       class to borrow from (should be either 'parent' or null).
141
 *
142
 * maxidle      max value allowed for class 'idle' time estimate (this
143
 *              parameter determines how large an initial burst of packets
144
 *              can be before overlimit action is invoked.
145
 *
146
 * offtime      how long 'delay' action will delay when class goes over
147
 *              limit (this parameter determines the steady-state burst
148
 *              size when a class is running over its limit).
149
 *
150
 * Maxidle and offtime have to be computed from the following:  If the
151
 * average packet size is s, the bandwidth fraction allocated to this
152
 * class is f, we want to allow b packet bursts, and the gain of the
153
 * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
154
 *
155
 *   ptime = s * nsPerByte * (1 - f) / f
156
 *   maxidle = ptime * (1 - g^b) / g^b
157
 *   minidle = -ptime * (1 / (f - 1))
158
 *   offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
159
 *
160
 * Operationally, it's convenient to specify maxidle & offtime in units
161
 * independent of the link bandwidth so the maxidle & offtime passed to
162
 * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
163
 * (The constant factor is a scale factor needed to make the parameters
164
 * integers.  This scaling also means that the 'unscaled' values of
165
 * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
166
 * not nanoseconds.)  Also note that the 'idle' filter computation keeps
167
 * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
168
 * maxidle also must be scaled upward by this value.  Thus, the passed
169
 * values for maxidle and offtime can be computed as follows:
170
 *
171
 * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
172
 * offtime = offtime * 8 / (1000 * nsecPerByte)
173
 *
174
 * When USE_HRTIME is employed, then maxidle and offtime become:
175
 * 	maxidle = maxilde * (8.0 / nsecPerByte);
176
 * 	offtime = offtime * (8.0 / nsecPerByte);
177
 */
178
struct rm_class *
179
rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
180
    void (*action)(rm_class_t *, rm_class_t *), int maxq,
181
    struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
182
    int minidle, u_int offtime, int pktsize, int flags)
183
{
184
	struct rm_class	*cl;
185
	struct rm_class	*peer;
186
	int		 s;
187

188
	if (pri >= RM_MAXPRIO)
189
		return (NULL);
190
#ifndef ALTQ_RED
191
	if (flags & RMCF_RED) {
192
#ifdef ALTQ_DEBUG
193
		printf("rmc_newclass: RED not configured for CBQ!\n");
194
#endif
195
		return (NULL);
196
	}
197
#endif
198
#ifndef ALTQ_RIO
199
	if (flags & RMCF_RIO) {
200
#ifdef ALTQ_DEBUG
201
		printf("rmc_newclass: RIO not configured for CBQ!\n");
202
#endif
203
		return (NULL);
204
	}
205
#endif
206
#ifndef ALTQ_CODEL
207
	if (flags & RMCF_CODEL) {
208
#ifdef ALTQ_DEBUG
209
		printf("rmc_newclass: CODEL not configured for CBQ!\n");
210
#endif
211
		return (NULL);
212
	}
213
#endif
214

215
	cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO);
216
	if (cl == NULL)
217
		return (NULL);
218
	CALLOUT_INIT(&cl->callout_);
219
	cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
220
	if (cl->q_ == NULL) {
221
		free(cl, M_DEVBUF);
222
		return (NULL);
223
	}
224

225
	/*
226
	 * Class initialization.
227
	 */
228
	cl->children_ = NULL;
229
	cl->parent_ = parent;
230
	cl->borrow_ = borrow;
231
	cl->leaf_ = 1;
232
	cl->ifdat_ = ifd;
233
	cl->pri_ = pri;
234
	cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
235
	cl->depth_ = 0;
236
	cl->qthresh_ = 0;
237
	cl->ns_per_byte_ = nsecPerByte;
238

239
	qlimit(cl->q_) = maxq;
240
	qtype(cl->q_) = Q_DROPHEAD;
241
	qlen(cl->q_) = 0;
242
	cl->flags_ = flags;
243

244
#if 1 /* minidle is also scaled in ALTQ */
245
	cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
246
	if (cl->minidle_ > 0)
247
		cl->minidle_ = 0;
248
#else
249
	cl->minidle_ = minidle;
250
#endif
251
	cl->maxidle_ = (maxidle * nsecPerByte) / 8;
252
	if (cl->maxidle_ == 0)
253
		cl->maxidle_ = 1;
254
#if 1 /* offtime is also scaled in ALTQ */
255
	cl->avgidle_ = cl->maxidle_;
256
	cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
257
	if (cl->offtime_ == 0)
258
		cl->offtime_ = 1;
259
#else
260
	cl->avgidle_ = 0;
261
	cl->offtime_ = (offtime * nsecPerByte) / 8;
262
#endif
263
	cl->overlimit = action;
264

265
#ifdef ALTQ_RED
266
	if (flags & (RMCF_RED|RMCF_RIO)) {
267
		int red_flags, red_pkttime;
268

269
		red_flags = 0;
270
		if (flags & RMCF_ECN)
271
			red_flags |= REDF_ECN;
272
		if (flags & RMCF_FLOWVALVE)
273
			red_flags |= REDF_FLOWVALVE;
274
#ifdef ALTQ_RIO
275
		if (flags & RMCF_CLEARDSCP)
276
			red_flags |= RIOF_CLEARDSCP;
277
#endif
278
		red_pkttime = nsecPerByte * pktsize  / 1000;
279

280
		if (flags & RMCF_RED) {
281
			cl->red_ = red_alloc(0, 0,
282
			    qlimit(cl->q_) * 10/100,
283
			    qlimit(cl->q_) * 30/100,
284
			    red_flags, red_pkttime);
285
			if (cl->red_ != NULL)
286
				qtype(cl->q_) = Q_RED;
287
		}
288
#ifdef ALTQ_RIO
289
		else {
290
			cl->red_ = (red_t *)rio_alloc(0, NULL,
291
						      red_flags, red_pkttime);
292
			if (cl->red_ != NULL)
293
				qtype(cl->q_) = Q_RIO;
294
		}
295
#endif
296
	}
297
#endif /* ALTQ_RED */
298
#ifdef ALTQ_CODEL
299
	if (flags & RMCF_CODEL) {
300
		cl->codel_ = codel_alloc(5, 100, 0);
301
		if (cl->codel_ != NULL)
302
			qtype(cl->q_) = Q_CODEL;
303
	}
304
#endif
305

306
	/*
307
	 * put the class into the class tree
308
	 */
309
	s = splnet();
310
	IFQ_LOCK(ifd->ifq_);
311
	if ((peer = ifd->active_[pri]) != NULL) {
312
		/* find the last class at this pri */
313
		cl->peer_ = peer;
314
		while (peer->peer_ != ifd->active_[pri])
315
			peer = peer->peer_;
316
		peer->peer_ = cl;
317
	} else {
318
		ifd->active_[pri] = cl;
319
		cl->peer_ = cl;
320
	}
321

322
	if (cl->parent_) {
323
		cl->next_ = parent->children_;
324
		parent->children_ = cl;
325
		parent->leaf_ = 0;
326
	}
327

328
	/*
329
	 * Compute the depth of this class and its ancestors in the class
330
	 * hierarchy.
331
	 */
332
	rmc_depth_compute(cl);
333

334
	/*
335
	 * If CBQ's WRR is enabled, then initialize the class WRR state.
336
	 */
337
	if (ifd->wrr_) {
338
		ifd->num_[pri]++;
339
		ifd->alloc_[pri] += cl->allotment_;
340
		rmc_wrr_set_weights(ifd);
341
	}
342
	IFQ_UNLOCK(ifd->ifq_);
343
	splx(s);
344
	return (cl);
345
}
346

347
int
348
rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
349
    int minidle, u_int offtime, int pktsize)
350
{
351
	struct rm_ifdat	*ifd;
352
	u_int		 old_allotment;
353
	int		 s;
354

355
	ifd = cl->ifdat_;
356
	old_allotment = cl->allotment_;
357

358
	s = splnet();
359
	IFQ_LOCK(ifd->ifq_);
360
	cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
361
	cl->qthresh_ = 0;
362
	cl->ns_per_byte_ = nsecPerByte;
363

364
	qlimit(cl->q_) = maxq;
365

366
#if 1 /* minidle is also scaled in ALTQ */
367
	cl->minidle_ = (minidle * nsecPerByte) / 8;
368
	if (cl->minidle_ > 0)
369
		cl->minidle_ = 0;
370
#else
371
	cl->minidle_ = minidle;
372
#endif
373
	cl->maxidle_ = (maxidle * nsecPerByte) / 8;
374
	if (cl->maxidle_ == 0)
375
		cl->maxidle_ = 1;
376
#if 1 /* offtime is also scaled in ALTQ */
377
	cl->avgidle_ = cl->maxidle_;
378
	cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
379
	if (cl->offtime_ == 0)
380
		cl->offtime_ = 1;
381
#else
382
	cl->avgidle_ = 0;
383
	cl->offtime_ = (offtime * nsecPerByte) / 8;
384
#endif
385

386
	/*
387
	 * If CBQ's WRR is enabled, then initialize the class WRR state.
388
	 */
389
	if (ifd->wrr_) {
390
		ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
391
		rmc_wrr_set_weights(ifd);
392
	}
393
	IFQ_UNLOCK(ifd->ifq_);
394
	splx(s);
395
	return (0);
396
}
397

398
/*
399
 * static void
400
 * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
401
 *	the appropriate run robin weights for the CBQ weighted round robin
402
 *	algorithm.
403
 *
404
 *	Returns: NONE
405
 */
406

407
static void
408
rmc_wrr_set_weights(struct rm_ifdat *ifd)
409
{
410
	int		i;
411
	struct rm_class	*cl, *clh;
412

413
	for (i = 0; i < RM_MAXPRIO; i++) {
414
		/*
415
		 * This is inverted from that of the simulator to
416
		 * maintain precision.
417
		 */
418
		if (ifd->num_[i] == 0)
419
			ifd->M_[i] = 0;
420
		else
421
			ifd->M_[i] = ifd->alloc_[i] /
422
				(ifd->num_[i] * ifd->maxpkt_);
423
		/*
424
		 * Compute the weighted allotment for each class.
425
		 * This takes the expensive div instruction out
426
		 * of the main loop for the wrr scheduling path.
427
		 * These only get recomputed when a class comes or
428
		 * goes.
429
		 */
430
		if (ifd->active_[i] != NULL) {
431
			clh = cl = ifd->active_[i];
432
			do {
433
				/* safe-guard for slow link or alloc_ == 0 */
434
				if (ifd->M_[i] == 0)
435
					cl->w_allotment_ = 0;
436
				else
437
					cl->w_allotment_ = cl->allotment_ /
438
						ifd->M_[i];
439
				cl = cl->peer_;
440
			} while ((cl != NULL) && (cl != clh));
441
		}
442
	}
443
}
444

445
int
446
rmc_get_weight(struct rm_ifdat *ifd, int pri)
447
{
448
	if ((pri >= 0) && (pri < RM_MAXPRIO))
449
		return (ifd->M_[pri]);
450
	else
451
		return (0);
452
}
453

454
/*
455
 * static void
456
 * rmc_depth_compute(struct rm_class *cl) - This function computes the
457
 *	appropriate depth of class 'cl' and its ancestors.
458
 *
459
 *	Returns:	NONE
460
 */
461

462
static void
463
rmc_depth_compute(struct rm_class *cl)
464
{
465
	rm_class_t	*t = cl, *p;
466

467
	/*
468
	 * Recompute the depth for the branch of the tree.
469
	 */
470
	while (t != NULL) {
471
		p = t->parent_;
472
		if (p && (t->depth_ >= p->depth_)) {
473
			p->depth_ = t->depth_ + 1;
474
			t = p;
475
		} else
476
			t = NULL;
477
	}
478
}
479

480
/*
481
 * static void
482
 * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
483
 *	the depth of the tree after a class has been deleted.
484
 *
485
 *	Returns:	NONE
486
 */
487

488
static void
489
rmc_depth_recompute(rm_class_t *cl)
490
{
491
#if 1 /* ALTQ */
492
	rm_class_t	*p, *t;
493

494
	p = cl;
495
	while (p != NULL) {
496
		if ((t = p->children_) == NULL) {
497
			p->depth_ = 0;
498
		} else {
499
			int cdepth = 0;
500

501
			while (t != NULL) {
502
				if (t->depth_ > cdepth)
503
					cdepth = t->depth_;
504
				t = t->next_;
505
			}
506

507
			if (p->depth_ == cdepth + 1)
508
				/* no change to this parent */
509
				return;
510

511
			p->depth_ = cdepth + 1;
512
		}
513

514
		p = p->parent_;
515
	}
516
#else
517
	rm_class_t	*t;
518

519
	if (cl->depth_ >= 1) {
520
		if (cl->children_ == NULL) {
521
			cl->depth_ = 0;
522
		} else if ((t = cl->children_) != NULL) {
523
			while (t != NULL) {
524
				if (t->children_ != NULL)
525
					rmc_depth_recompute(t);
526
				t = t->next_;
527
			}
528
		} else
529
			rmc_depth_compute(cl);
530
	}
531
#endif
532
}
533

534
/*
535
 * void
536
 * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
537
 *	function deletes a class from the link-sharing structure and frees
538
 *	all resources associated with the class.
539
 *
540
 *	Returns: NONE
541
 */
542

543
void
544
rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
545
{
546
	struct rm_class	*p, *head, *previous;
547
	int		 s;
548

549
	ASSERT(cl->children_ == NULL);
550

551
	if (cl->sleeping_)
552
		CALLOUT_STOP(&cl->callout_);
553

554
	s = splnet();
555
	IFQ_LOCK(ifd->ifq_);
556
	/*
557
	 * Free packets in the packet queue.
558
	 * XXX - this may not be a desired behavior.  Packets should be
559
	 *		re-queued.
560
	 */
561
	rmc_dropall(cl);
562

563
	/*
564
	 * If the class has a parent, then remove the class from the
565
	 * class from the parent's children chain.
566
	 */
567
	if (cl->parent_ != NULL) {
568
		head = cl->parent_->children_;
569
		p = previous = head;
570
		if (head->next_ == NULL) {
571
			ASSERT(head == cl);
572
			cl->parent_->children_ = NULL;
573
			cl->parent_->leaf_ = 1;
574
		} else while (p != NULL) {
575
			if (p == cl) {
576
				if (cl == head)
577
					cl->parent_->children_ = cl->next_;
578
				else
579
					previous->next_ = cl->next_;
580
				cl->next_ = NULL;
581
				p = NULL;
582
			} else {
583
				previous = p;
584
				p = p->next_;
585
			}
586
		}
587
	}
588

589
	/*
590
	 * Delete class from class priority peer list.
591
	 */
592
	if ((p = ifd->active_[cl->pri_]) != NULL) {
593
		/*
594
		 * If there is more than one member of this priority
595
		 * level, then look for class(cl) in the priority level.
596
		 */
597
		if (p != p->peer_) {
598
			while (p->peer_ != cl)
599
				p = p->peer_;
600
			p->peer_ = cl->peer_;
601

602
			if (ifd->active_[cl->pri_] == cl)
603
				ifd->active_[cl->pri_] = cl->peer_;
604
		} else {
605
			ASSERT(p == cl);
606
			ifd->active_[cl->pri_] = NULL;
607
		}
608
	}
609

610
	/*
611
	 * Recompute the WRR weights.
612
	 */
613
	if (ifd->wrr_) {
614
		ifd->alloc_[cl->pri_] -= cl->allotment_;
615
		ifd->num_[cl->pri_]--;
616
		rmc_wrr_set_weights(ifd);
617
	}
618

619
	/*
620
	 * Re-compute the depth of the tree.
621
	 */
622
#if 1 /* ALTQ */
623
	rmc_depth_recompute(cl->parent_);
624
#else
625
	rmc_depth_recompute(ifd->root_);
626
#endif
627

628
	IFQ_UNLOCK(ifd->ifq_);
629
	splx(s);
630

631
	/*
632
	 * Free the class structure.
633
	 */
634
	if (cl->red_ != NULL) {
635
#ifdef ALTQ_RIO
636
		if (q_is_rio(cl->q_))
637
			rio_destroy((rio_t *)cl->red_);
638
#endif
639
#ifdef ALTQ_RED
640
		if (q_is_red(cl->q_))
641
			red_destroy(cl->red_);
642
#endif
643
#ifdef ALTQ_CODEL
644
		if (q_is_codel(cl->q_))
645
			codel_destroy(cl->codel_);
646
#endif
647
	}
648
	free(cl->q_, M_DEVBUF);
649
	free(cl, M_DEVBUF);
650
}
651

652
/*
653
 * void
654
 * rmc_init(...) - Initialize the resource management data structures
655
 *	associated with the output portion of interface 'ifp'.  'ifd' is
656
 *	where the structures will be built (for backwards compatibility, the
657
 *	structures aren't kept in the ifnet struct).  'nsecPerByte'
658
 *	gives the link speed (inverse of bandwidth) in nanoseconds/byte.
659
 *	'restart' is the driver-specific routine that the generic 'delay
660
 *	until under limit' action will call to restart output.  `maxq'
661
 *	is the queue size of the 'link' & 'default' classes.  'maxqueued'
662
 *	is the maximum number of packets that the resource management
663
 *	code will allow to be queued 'downstream' (this is typically 1).
664
 *
665
 *	Returns:	NONE
666
 */
667

668
void
669
rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
670
    void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
671
    int minidle, u_int offtime, int flags)
672
{
673
	int		i, mtu;
674

675
	/*
676
	 * Initialize the CBQ tracing/debug facility.
677
	 */
678
	CBQTRACEINIT();
679

680
	bzero((char *)ifd, sizeof (*ifd));
681
	mtu = ifq->altq_ifp->if_mtu;
682
	ifd->ifq_ = ifq;
683
	ifd->restart = restart;
684
	ifd->maxqueued_ = maxqueued;
685
	ifd->ns_per_byte_ = nsecPerByte;
686
	ifd->maxpkt_ = mtu;
687
	ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
688
	ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
689
#if 1
690
	ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
691
	if (mtu * nsecPerByte > 10 * 1000000)
692
		ifd->maxiftime_ /= 4;
693
#endif
694

695
	reset_cutoff(ifd);
696
	CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
697

698
	/*
699
	 * Initialize the CBQ's WRR state.
700
	 */
701
	for (i = 0; i < RM_MAXPRIO; i++) {
702
		ifd->alloc_[i] = 0;
703
		ifd->M_[i] = 0;
704
		ifd->num_[i] = 0;
705
		ifd->na_[i] = 0;
706
		ifd->active_[i] = NULL;
707
	}
708

709
	/*
710
	 * Initialize current packet state.
711
	 */
712
	ifd->qi_ = 0;
713
	ifd->qo_ = 0;
714
	for (i = 0; i < RM_MAXQUEUED; i++) {
715
		ifd->class_[i] = NULL;
716
		ifd->curlen_[i] = 0;
717
		ifd->borrowed_[i] = NULL;
718
	}
719

720
	/*
721
	 * Create the root class of the link-sharing structure.
722
	 */
723
	if ((ifd->root_ = rmc_newclass(0, ifd,
724
				       nsecPerByte,
725
				       rmc_root_overlimit, maxq, 0, 0,
726
				       maxidle, minidle, offtime,
727
				       0, 0)) == NULL) {
728
		printf("rmc_init: root class not allocated\n");
729
		return ;
730
	}
731
	ifd->root_->depth_ = 0;
732
}
733

734
/*
735
 * void
736
 * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
737
 *	mbuf 'm' to queue for resource class 'cl'.  This routine is called
738
 *	by a driver's if_output routine.  This routine must be called with
739
 *	output packet completion interrupts locked out (to avoid racing with
740
 *	rmc_dequeue_next).
741
 *
742
 *	Returns:	0 on successful queueing
743
 *			-1 when packet drop occurs
744
 */
745
int
746
rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
747
{
748
	struct timeval	 now;
749
	struct rm_ifdat *ifd = cl->ifdat_;
750
	int		 cpri = cl->pri_;
751
	int		 is_empty = qempty(cl->q_);
752

753
	RM_GETTIME(now);
754
	if (ifd->cutoff_ > 0) {
755
		if (TV_LT(&cl->undertime_, &now)) {
756
			if (ifd->cutoff_ > cl->depth_)
757
				ifd->cutoff_ = cl->depth_;
758
			CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
759
		}
760
#if 1 /* ALTQ */
761
		else {
762
			/*
763
			 * the class is overlimit. if the class has
764
			 * underlimit ancestors, set cutoff to the lowest
765
			 * depth among them.
766
			 */
767
			struct rm_class *borrow = cl->borrow_;
768

769
			while (borrow != NULL &&
770
			       borrow->depth_ < ifd->cutoff_) {
771
				if (TV_LT(&borrow->undertime_, &now)) {
772
					ifd->cutoff_ = borrow->depth_;
773
					CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
774
					break;
775
				}
776
				borrow = borrow->borrow_;
777
			}
778
		}
779
#else /* !ALTQ */
780
		else if ((ifd->cutoff_ > 1) && cl->borrow_) {
781
			if (TV_LT(&cl->borrow_->undertime_, &now)) {
782
				ifd->cutoff_ = cl->borrow_->depth_;
783
				CBQTRACE(rmc_queue_packet, 'ffob',
784
					 cl->borrow_->depth_);
785
			}
786
		}
787
#endif /* !ALTQ */
788
	}
789

790
	if (_rmc_addq(cl, m) < 0)
791
		/* failed */
792
		return (-1);
793

794
	if (is_empty) {
795
		CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
796
		ifd->na_[cpri]++;
797
	}
798

799
	if (qlen(cl->q_) > qlimit(cl->q_)) {
800
		/* note: qlimit can be set to 0 or 1 */
801
		rmc_drop_action(cl);
802
		return (-1);
803
	}
804
	return (0);
805
}
806

807
/*
808
 * void
809
 * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
810
 *	classes to see if there are satified.
811
 */
812

813
static void
814
rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
815
{
816
	int		 i;
817
	rm_class_t	*p, *bp;
818

819
	for (i = RM_MAXPRIO - 1; i >= 0; i--) {
820
		if ((bp = ifd->active_[i]) != NULL) {
821
			p = bp;
822
			do {
823
				if (!rmc_satisfied(p, now)) {
824
					ifd->cutoff_ = p->depth_;
825
					return;
826
				}
827
				p = p->peer_;
828
			} while (p != bp);
829
		}
830
	}
831

832
	reset_cutoff(ifd);
833
}
834

835
/*
836
 * rmc_satisfied - Return 1 of the class is satisfied.  O, otherwise.
837
 */
838

839
static int
840
rmc_satisfied(struct rm_class *cl, struct timeval *now)
841
{
842
	rm_class_t	*p;
843

844
	if (cl == NULL)
845
		return (1);
846
	if (TV_LT(now, &cl->undertime_))
847
		return (1);
848
	if (cl->depth_ == 0) {
849
		if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
850
			return (0);
851
		else
852
			return (1);
853
	}
854
	if (cl->children_ != NULL) {
855
		p = cl->children_;
856
		while (p != NULL) {
857
			if (!rmc_satisfied(p, now))
858
				return (0);
859
			p = p->next_;
860
		}
861
	}
862

863
	return (1);
864
}
865

866
/*
867
 * Return 1 if class 'cl' is under limit or can borrow from a parent,
868
 * 0 if overlimit.  As a side-effect, this routine will invoke the
869
 * class overlimit action if the class if overlimit.
870
 */
871

872
static int
873
rmc_under_limit(struct rm_class *cl, struct timeval *now)
874
{
875
	rm_class_t	*p = cl;
876
	rm_class_t	*top;
877
	struct rm_ifdat	*ifd = cl->ifdat_;
878

879
	ifd->borrowed_[ifd->qi_] = NULL;
880
	/*
881
	 * If cl is the root class, then always return that it is
882
	 * underlimit.  Otherwise, check to see if the class is underlimit.
883
	 */
884
	if (cl->parent_ == NULL)
885
		return (1);
886

887
	if (cl->sleeping_) {
888
		if (TV_LT(now, &cl->undertime_))
889
			return (0);
890

891
		CALLOUT_STOP(&cl->callout_);
892
		cl->sleeping_ = 0;
893
		cl->undertime_.tv_sec = 0;
894
		return (1);
895
	}
896

897
	top = NULL;
898
	while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
899
		if (((cl = cl->borrow_) == NULL) ||
900
		    (cl->depth_ > ifd->cutoff_)) {
901
#ifdef ADJUST_CUTOFF
902
			if (cl != NULL)
903
				/* cutoff is taking effect, just
904
				   return false without calling
905
				   the delay action. */
906
				return (0);
907
#endif
908
#ifdef BORROW_OFFTIME
909
			/*
910
			 * check if the class can borrow offtime too.
911
			 * borrow offtime from the top of the borrow
912
			 * chain if the top class is not overloaded.
913
			 */
914
			if (cl != NULL) {
915
				/* cutoff is taking effect, use this class as top. */
916
				top = cl;
917
				CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
918
			}
919
			if (top != NULL && top->avgidle_ == top->minidle_)
920
				top = NULL;
921
			p->overtime_ = *now;
922
			(p->overlimit)(p, top);
923
#else
924
			p->overtime_ = *now;
925
			(p->overlimit)(p, NULL);
926
#endif
927
			return (0);
928
		}
929
		top = cl;
930
	}
931

932
	if (cl != p)
933
		ifd->borrowed_[ifd->qi_] = cl;
934
	return (1);
935
}
936

937
/*
938
 * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
939
 *	Packet-by-packet round robin.
940
 *
941
 * The heart of the weighted round-robin scheduler, which decides which
942
 * class next gets to send a packet.  Highest priority first, then
943
 * weighted round-robin within priorites.
944
 *
945
 * Each able-to-send class gets to send until its byte allocation is
946
 * exhausted.  Thus, the active pointer is only changed after a class has
947
 * exhausted its allocation.
948
 *
949
 * If the scheduler finds no class that is underlimit or able to borrow,
950
 * then the first class found that had a nonzero queue and is allowed to
951
 * borrow gets to send.
952
 */
953

954
static mbuf_t *
955
_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
956
{
957
	struct rm_class	*cl = NULL, *first = NULL;
958
	u_int		 deficit;
959
	int		 cpri;
960
	mbuf_t		*m;
961
	struct timeval	 now;
962

963
	RM_GETTIME(now);
964

965
	/*
966
	 * if the driver polls the top of the queue and then removes
967
	 * the polled packet, we must return the same packet.
968
	 */
969
	if (op == ALTDQ_REMOVE && ifd->pollcache_) {
970
		cl = ifd->pollcache_;
971
		cpri = cl->pri_;
972
		if (ifd->efficient_) {
973
			/* check if this class is overlimit */
974
			if (cl->undertime_.tv_sec != 0 &&
975
			    rmc_under_limit(cl, &now) == 0)
976
				first = cl;
977
		}
978
		ifd->pollcache_ = NULL;
979
		goto _wrr_out;
980
	}
981
	else {
982
		/* mode == ALTDQ_POLL || pollcache == NULL */
983
		ifd->pollcache_ = NULL;
984
		ifd->borrowed_[ifd->qi_] = NULL;
985
	}
986
#ifdef ADJUST_CUTOFF
987
 _again:
988
#endif
989
	for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
990
		if (ifd->na_[cpri] == 0)
991
			continue;
992
		deficit = 0;
993
		/*
994
		 * Loop through twice for a priority level, if some class
995
		 * was unable to send a packet the first round because
996
		 * of the weighted round-robin mechanism.
997
		 * During the second loop at this level, deficit==2.
998
		 * (This second loop is not needed if for every class,
999
		 * "M[cl->pri_])" times "cl->allotment" is greater than
1000
		 * the byte size for the largest packet in the class.)
1001
		 */
1002
 _wrr_loop:
1003
		cl = ifd->active_[cpri];
1004
		ASSERT(cl != NULL);
1005
		do {
1006
			if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
1007
				cl->bytes_alloc_ += cl->w_allotment_;
1008
			if (!qempty(cl->q_)) {
1009
				if ((cl->undertime_.tv_sec == 0) ||
1010
				    rmc_under_limit(cl, &now)) {
1011
					if (cl->bytes_alloc_ > 0 || deficit > 1)
1012
						goto _wrr_out;
1013

1014
					/* underlimit but no alloc */
1015
					deficit = 1;
1016
#if 1
1017
					ifd->borrowed_[ifd->qi_] = NULL;
1018
#endif
1019
				}
1020
				else if (first == NULL && cl->borrow_ != NULL)
1021
					first = cl; /* borrowing candidate */
1022
			}
1023

1024
			cl->bytes_alloc_ = 0;
1025
			cl = cl->peer_;
1026
		} while (cl != ifd->active_[cpri]);
1027

1028
		if (deficit == 1) {
1029
			/* first loop found an underlimit class with deficit */
1030
			/* Loop on same priority level, with new deficit.  */
1031
			deficit = 2;
1032
			goto _wrr_loop;
1033
		}
1034
	}
1035

1036
#ifdef ADJUST_CUTOFF
1037
	/*
1038
	 * no underlimit class found.  if cutoff is taking effect,
1039
	 * increase cutoff and try again.
1040
	 */
1041
	if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1042
		ifd->cutoff_++;
1043
		CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
1044
		goto _again;
1045
	}
1046
#endif /* ADJUST_CUTOFF */
1047
	/*
1048
	 * If LINK_EFFICIENCY is turned on, then the first overlimit
1049
	 * class we encounter will send a packet if all the classes
1050
	 * of the link-sharing structure are overlimit.
1051
	 */
1052
	reset_cutoff(ifd);
1053
	CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
1054

1055
	if (!ifd->efficient_ || first == NULL)
1056
		return (NULL);
1057

1058
	cl = first;
1059
	cpri = cl->pri_;
1060
#if 0	/* too time-consuming for nothing */
1061
	if (cl->sleeping_)
1062
		CALLOUT_STOP(&cl->callout_);
1063
	cl->sleeping_ = 0;
1064
	cl->undertime_.tv_sec = 0;
1065
#endif
1066
	ifd->borrowed_[ifd->qi_] = cl->borrow_;
1067
	ifd->cutoff_ = cl->borrow_->depth_;
1068

1069
	/*
1070
	 * Deque the packet and do the book keeping...
1071
	 */
1072
 _wrr_out:
1073
	if (op == ALTDQ_REMOVE) {
1074
		m = _rmc_getq(cl);
1075
		if (m == NULL)
1076
			panic("_rmc_wrr_dequeue_next");
1077
		if (qempty(cl->q_))
1078
			ifd->na_[cpri]--;
1079

1080
		/*
1081
		 * Update class statistics and link data.
1082
		 */
1083
		if (cl->bytes_alloc_ > 0)
1084
			cl->bytes_alloc_ -= m_pktlen(m);
1085

1086
		if ((cl->bytes_alloc_ <= 0) || first == cl)
1087
			ifd->active_[cl->pri_] = cl->peer_;
1088
		else
1089
			ifd->active_[cl->pri_] = cl;
1090

1091
		ifd->class_[ifd->qi_] = cl;
1092
		ifd->curlen_[ifd->qi_] = m_pktlen(m);
1093
		ifd->now_[ifd->qi_] = now;
1094
		ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1095
		ifd->queued_++;
1096
	} else {
1097
		/* mode == ALTDQ_PPOLL */
1098
		m = _rmc_pollq(cl);
1099
		ifd->pollcache_ = cl;
1100
	}
1101
	return (m);
1102
}
1103

1104
/*
1105
 * Dequeue & return next packet from the highest priority class that
1106
 * has a packet to send & has enough allocation to send it.  This
1107
 * routine is called by a driver whenever it needs a new packet to
1108
 * output.
1109
 */
1110
static mbuf_t *
1111
_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
1112
{
1113
	mbuf_t		*m;
1114
	int		 cpri;
1115
	struct rm_class	*cl, *first = NULL;
1116
	struct timeval	 now;
1117

1118
	RM_GETTIME(now);
1119

1120
	/*
1121
	 * if the driver polls the top of the queue and then removes
1122
	 * the polled packet, we must return the same packet.
1123
	 */
1124
	if (op == ALTDQ_REMOVE && ifd->pollcache_) {
1125
		cl = ifd->pollcache_;
1126
		cpri = cl->pri_;
1127
		ifd->pollcache_ = NULL;
1128
		goto _prr_out;
1129
	} else {
1130
		/* mode == ALTDQ_POLL || pollcache == NULL */
1131
		ifd->pollcache_ = NULL;
1132
		ifd->borrowed_[ifd->qi_] = NULL;
1133
	}
1134
#ifdef ADJUST_CUTOFF
1135
 _again:
1136
#endif
1137
	for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
1138
		if (ifd->na_[cpri] == 0)
1139
			continue;
1140
		cl = ifd->active_[cpri];
1141
		ASSERT(cl != NULL);
1142
		do {
1143
			if (!qempty(cl->q_)) {
1144
				if ((cl->undertime_.tv_sec == 0) ||
1145
				    rmc_under_limit(cl, &now))
1146
					goto _prr_out;
1147
				if (first == NULL && cl->borrow_ != NULL)
1148
					first = cl;
1149
			}
1150
			cl = cl->peer_;
1151
		} while (cl != ifd->active_[cpri]);
1152
	}
1153

1154
#ifdef ADJUST_CUTOFF
1155
	/*
1156
	 * no underlimit class found.  if cutoff is taking effect, increase
1157
	 * cutoff and try again.
1158
	 */
1159
	if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1160
		ifd->cutoff_++;
1161
		goto _again;
1162
	}
1163
#endif /* ADJUST_CUTOFF */
1164
	/*
1165
	 * If LINK_EFFICIENCY is turned on, then the first overlimit
1166
	 * class we encounter will send a packet if all the classes
1167
	 * of the link-sharing structure are overlimit.
1168
	 */
1169
	reset_cutoff(ifd);
1170
	if (!ifd->efficient_ || first == NULL)
1171
		return (NULL);
1172

1173
	cl = first;
1174
	cpri = cl->pri_;
1175
#if 0	/* too time-consuming for nothing */
1176
	if (cl->sleeping_)
1177
		CALLOUT_STOP(&cl->callout_);
1178
	cl->sleeping_ = 0;
1179
	cl->undertime_.tv_sec = 0;
1180
#endif
1181
	ifd->borrowed_[ifd->qi_] = cl->borrow_;
1182
	ifd->cutoff_ = cl->borrow_->depth_;
1183

1184
	/*
1185
	 * Deque the packet and do the book keeping...
1186
	 */
1187
 _prr_out:
1188
	if (op == ALTDQ_REMOVE) {
1189
		m = _rmc_getq(cl);
1190
		if (m == NULL)
1191
			panic("_rmc_prr_dequeue_next");
1192
		if (qempty(cl->q_))
1193
			ifd->na_[cpri]--;
1194

1195
		ifd->active_[cpri] = cl->peer_;
1196

1197
		ifd->class_[ifd->qi_] = cl;
1198
		ifd->curlen_[ifd->qi_] = m_pktlen(m);
1199
		ifd->now_[ifd->qi_] = now;
1200
		ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1201
		ifd->queued_++;
1202
	} else {
1203
		/* mode == ALTDQ_POLL */
1204
		m = _rmc_pollq(cl);
1205
		ifd->pollcache_ = cl;
1206
	}
1207
	return (m);
1208
}
1209

1210
/*
1211
 * mbuf_t *
1212
 * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
1213
 *	is invoked by the packet driver to get the next packet to be
1214
 *	dequeued and output on the link.  If WRR is enabled, then the
1215
 *	WRR dequeue next routine will determine the next packet to sent.
1216
 *	Otherwise, packet-by-packet round robin is invoked.
1217
 *
1218
 *	Returns:	NULL, if a packet is not available or if all
1219
 *			classes are overlimit.
1220
 *
1221
 *			Otherwise, Pointer to the next packet.
1222
 */
1223

1224
mbuf_t *
1225
rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
1226
{
1227
	if (ifd->queued_ >= ifd->maxqueued_)
1228
		return (NULL);
1229
	else if (ifd->wrr_)
1230
		return (_rmc_wrr_dequeue_next(ifd, mode));
1231
	else
1232
		return (_rmc_prr_dequeue_next(ifd, mode));
1233
}
1234

1235
/*
1236
 * Update the utilization estimate for the packet that just completed.
1237
 * The packet's class & the parent(s) of that class all get their
1238
 * estimators updated.  This routine is called by the driver's output-
1239
 * packet-completion interrupt service routine.
1240
 */
1241

1242
/*
1243
 * a macro to approximate "divide by 1000" that gives 0.000999,
1244
 * if a value has enough effective digits.
1245
 * (on pentium, mul takes 9 cycles but div takes 46!)
1246
 */
1247
#define	NSEC_TO_USEC(t)	(((t) >> 10) + ((t) >> 16) + ((t) >> 17))
1248
void
1249
rmc_update_class_util(struct rm_ifdat *ifd)
1250
{
1251
	int		 idle, avgidle, pktlen;
1252
	int		 pkt_time, tidle;
1253
	rm_class_t	*cl, *borrowed;
1254
	rm_class_t	*borrows;
1255
	struct timeval	*nowp;
1256

1257
	/*
1258
	 * Get the most recent completed class.
1259
	 */
1260
	if ((cl = ifd->class_[ifd->qo_]) == NULL)
1261
		return;
1262

1263
	pktlen = ifd->curlen_[ifd->qo_];
1264
	borrowed = ifd->borrowed_[ifd->qo_];
1265
	borrows = borrowed;
1266

1267
	PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1268

1269
	/*
1270
	 * Run estimator on class and its ancestors.
1271
	 */
1272
	/*
1273
	 * rm_update_class_util is designed to be called when the
1274
	 * transfer is completed from a xmit complete interrupt,
1275
	 * but most drivers don't implement an upcall for that.
1276
	 * so, just use estimated completion time.
1277
	 * as a result, ifd->qi_ and ifd->qo_ are always synced.
1278
	 */
1279
	nowp = &ifd->now_[ifd->qo_];
1280
	/* get pkt_time (for link) in usec */
1281
#if 1  /* use approximation */
1282
	pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
1283
	pkt_time = NSEC_TO_USEC(pkt_time);
1284
#else
1285
	pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
1286
#endif
1287
#if 1 /* ALTQ4PPP */
1288
	if (TV_LT(nowp, &ifd->ifnow_)) {
1289
		int iftime;
1290

1291
		/*
1292
		 * make sure the estimated completion time does not go
1293
		 * too far.  it can happen when the link layer supports
1294
		 * data compression or the interface speed is set to
1295
		 * a much lower value.
1296
		 */
1297
		TV_DELTA(&ifd->ifnow_, nowp, iftime);
1298
		if (iftime+pkt_time < ifd->maxiftime_) {
1299
			TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1300
		} else {
1301
			TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
1302
		}
1303
	} else {
1304
		TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1305
	}
1306
#else
1307
	if (TV_LT(nowp, &ifd->ifnow_)) {
1308
		TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1309
	} else {
1310
		TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1311
	}
1312
#endif
1313

1314
	while (cl != NULL) {
1315
		TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
1316
		if (idle >= 2000000)
1317
			/*
1318
			 * this class is idle enough, reset avgidle.
1319
			 * (TV_DELTA returns 2000000 us when delta is large.)
1320
			 */
1321
			cl->avgidle_ = cl->maxidle_;
1322

1323
		/* get pkt_time (for class) in usec */
1324
#if 1  /* use approximation */
1325
		pkt_time = pktlen * cl->ns_per_byte_;
1326
		pkt_time = NSEC_TO_USEC(pkt_time);
1327
#else
1328
		pkt_time = pktlen * cl->ns_per_byte_ / 1000;
1329
#endif
1330
		idle -= pkt_time;
1331

1332
		avgidle = cl->avgidle_;
1333
		avgidle += idle - (avgidle >> RM_FILTER_GAIN);
1334
		cl->avgidle_ = avgidle;
1335

1336
		/* Are we overlimit ? */
1337
		if (avgidle <= 0) {
1338
			CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
1339
#if 1 /* ALTQ */
1340
			/*
1341
			 * need some lower bound for avgidle, otherwise
1342
			 * a borrowing class gets unbounded penalty.
1343
			 */
1344
			if (avgidle < cl->minidle_)
1345
				avgidle = cl->avgidle_ = cl->minidle_;
1346
#endif
1347
			/* set next idle to make avgidle 0 */
1348
			tidle = pkt_time +
1349
				(((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
1350
			TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
1351
			++cl->stats_.over;
1352
		} else {
1353
			cl->avgidle_ =
1354
			    (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
1355
			cl->undertime_.tv_sec = 0;
1356
			if (cl->sleeping_) {
1357
				CALLOUT_STOP(&cl->callout_);
1358
				cl->sleeping_ = 0;
1359
			}
1360
		}
1361

1362
		if (borrows != NULL) {
1363
			if (borrows != cl)
1364
				++cl->stats_.borrows;
1365
			else
1366
				borrows = NULL;
1367
		}
1368
		cl->last_ = ifd->ifnow_;
1369
		cl->last_pkttime_ = pkt_time;
1370

1371
#if 1
1372
		if (cl->parent_ == NULL) {
1373
			/* take stats of root class */
1374
			PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1375
		}
1376
#endif
1377

1378
		cl = cl->parent_;
1379
	}
1380

1381
	/*
1382
	 * Check to see if cutoff needs to set to a new level.
1383
	 */
1384
	cl = ifd->class_[ifd->qo_];
1385
	if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
1386
#if 1 /* ALTQ */
1387
		if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
1388
			rmc_tl_satisfied(ifd, nowp);
1389
			CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1390
		} else {
1391
			ifd->cutoff_ = borrowed->depth_;
1392
			CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1393
		}
1394
#else /* !ALTQ */
1395
		if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
1396
			reset_cutoff(ifd);
1397
#ifdef notdef
1398
			rmc_tl_satisfied(ifd, &now);
1399
#endif
1400
			CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1401
		} else {
1402
			ifd->cutoff_ = borrowed->depth_;
1403
			CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1404
		}
1405
#endif /* !ALTQ */
1406
	}
1407

1408
	/*
1409
	 * Release class slot
1410
	 */
1411
	ifd->borrowed_[ifd->qo_] = NULL;
1412
	ifd->class_[ifd->qo_] = NULL;
1413
	ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
1414
	ifd->queued_--;
1415
}
1416

1417
/*
1418
 * void
1419
 * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
1420
 *	over-limit action routines.  These get invoked by rmc_under_limit()
1421
 *	if a class with packets to send if over its bandwidth limit & can't
1422
 *	borrow from a parent class.
1423
 *
1424
 *	Returns: NONE
1425
 */
1426

1427
static void
1428
rmc_drop_action(struct rm_class *cl)
1429
{
1430
	struct rm_ifdat	*ifd = cl->ifdat_;
1431

1432
	ASSERT(qlen(cl->q_) > 0);
1433
	_rmc_dropq(cl);
1434
	if (qempty(cl->q_))
1435
		ifd->na_[cl->pri_]--;
1436
}
1437

1438
void rmc_dropall(struct rm_class *cl)
1439
{
1440
	struct rm_ifdat	*ifd = cl->ifdat_;
1441

1442
	if (!qempty(cl->q_)) {
1443
		_flushq(cl->q_);
1444

1445
		ifd->na_[cl->pri_]--;
1446
	}
1447
}
1448

1449
static int
1450
hzto(struct timeval *tv)
1451
{
1452
	struct timeval t2;
1453

1454
	getmicrotime(&t2);
1455
	t2.tv_sec = tv->tv_sec - t2.tv_sec;
1456
	t2.tv_usec = tv->tv_usec - t2.tv_usec;
1457
	return (tvtohz(&t2));
1458
}
1459

1460
/*
1461
 * void
1462
 * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
1463
 *	delay action routine.  It is invoked via rmc_under_limit when the
1464
 *	packet is discoverd to be overlimit.
1465
 *
1466
 *	If the delay action is result of borrow class being overlimit, then
1467
 *	delay for the offtime of the borrowing class that is overlimit.
1468
 *
1469
 *	Returns: NONE
1470
 */
1471

1472
void
1473
rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
1474
{
1475
	int	delay, t, extradelay;
1476

1477
	cl->stats_.overactions++;
1478
	TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
1479
#ifndef BORROW_OFFTIME
1480
	delay += cl->offtime_;
1481
#endif
1482

1483
	if (!cl->sleeping_) {
1484
		CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
1485
#ifdef BORROW_OFFTIME
1486
		if (borrow != NULL)
1487
			extradelay = borrow->offtime_;
1488
		else
1489
#endif
1490
			extradelay = cl->offtime_;
1491

1492
#ifdef ALTQ
1493
		/*
1494
		 * XXX recalculate suspend time:
1495
		 * current undertime is (tidle + pkt_time) calculated
1496
		 * from the last transmission.
1497
		 *	tidle: time required to bring avgidle back to 0
1498
		 *	pkt_time: target waiting time for this class
1499
		 * we need to replace pkt_time by offtime
1500
		 */
1501
		extradelay -= cl->last_pkttime_;
1502
#endif
1503
		if (extradelay > 0) {
1504
			TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
1505
			delay += extradelay;
1506
		}
1507

1508
		cl->sleeping_ = 1;
1509
		cl->stats_.delays++;
1510

1511
		/*
1512
		 * Since packets are phased randomly with respect to the
1513
		 * clock, 1 tick (the next clock tick) can be an arbitrarily
1514
		 * short time so we have to wait for at least two ticks.
1515
		 * NOTE:  If there's no other traffic, we need the timer as
1516
		 * a 'backstop' to restart this class.
1517
		 */
1518
		if (delay > tick * 2) {
1519
			/* FreeBSD rounds up the tick */
1520
			t = hzto(&cl->undertime_);
1521
		} else
1522
			t = 2;
1523
		CALLOUT_RESET(&cl->callout_, t, rmc_restart, cl);
1524
	}
1525
}
1526

1527
/*
1528
 * void
1529
 * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
1530
 *	called by the system timer code & is responsible checking if the
1531
 *	class is still sleeping (it might have been restarted as a side
1532
 *	effect of the queue scan on a packet arrival) and, if so, restarting
1533
 *	output for the class.  Inspecting the class state & restarting output
1534
 *	require locking the class structure.  In general the driver is
1535
 *	responsible for locking but this is the only routine that is not
1536
 *	called directly or indirectly from the interface driver so it has
1537
 *	know about system locking conventions.  Under bsd, locking is done
1538
 *	by raising IPL to splimp so that's what's implemented here.  On a
1539
 *	different system this would probably need to be changed.
1540
 *
1541
 *	Returns:	NONE
1542
 */
1543

1544
static void
1545
rmc_restart(void *arg)
1546
{
1547
	struct rm_class *cl = arg;
1548
	struct rm_ifdat	*ifd = cl->ifdat_;
1549
	struct epoch_tracker et;
1550
	int		 s;
1551

1552
	s = splnet();
1553
	NET_EPOCH_ENTER(et);
1554
	IFQ_LOCK(ifd->ifq_);
1555
	CURVNET_SET(ifd->ifq_->altq_ifp->if_vnet);
1556
	if (cl->sleeping_) {
1557
		cl->sleeping_ = 0;
1558
		cl->undertime_.tv_sec = 0;
1559

1560
		if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
1561
			CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
1562
			(ifd->restart)(ifd->ifq_);
1563
		}
1564
	}
1565
	CURVNET_RESTORE();
1566
	IFQ_UNLOCK(ifd->ifq_);
1567
	NET_EPOCH_EXIT(et);
1568
	splx(s);
1569
}
1570

1571
/*
1572
 * void
1573
 * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
1574
 *	handling routine for the root class of the link sharing structure.
1575
 *
1576
 *	Returns: NONE
1577
 */
1578

1579
static void
1580
rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
1581
{
1582
    panic("rmc_root_overlimit");
1583
}
1584

1585
/*
1586
 * Packet Queue handling routines.  Eventually, this is to localize the
1587
 *	effects on the code whether queues are red queues or droptail
1588
 *	queues.
1589
 */
1590

1591
static int
1592
_rmc_addq(rm_class_t *cl, mbuf_t *m)
1593
{
1594
#ifdef ALTQ_RIO
1595
	if (q_is_rio(cl->q_))
1596
		return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
1597
#endif
1598
#ifdef ALTQ_RED
1599
	if (q_is_red(cl->q_))
1600
		return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
1601
#endif /* ALTQ_RED */
1602
#ifdef ALTQ_CODEL
1603
	if (q_is_codel(cl->q_))
1604
		return codel_addq(cl->codel_, cl->q_, m);
1605
#endif
1606

1607
	if (cl->flags_ & RMCF_CLEARDSCP)
1608
		write_dsfield(m, cl->pktattr_, 0);
1609

1610
	_addq(cl->q_, m);
1611
	return (0);
1612
}
1613

1614
/* note: _rmc_dropq is not called for red */
1615
static void
1616
_rmc_dropq(rm_class_t *cl)
1617
{
1618
	mbuf_t	*m;
1619

1620
	if ((m = _getq(cl->q_)) != NULL)
1621
		m_freem(m);
1622
}
1623

1624
static mbuf_t *
1625
_rmc_getq(rm_class_t *cl)
1626
{
1627
#ifdef ALTQ_RIO
1628
	if (q_is_rio(cl->q_))
1629
		return rio_getq((rio_t *)cl->red_, cl->q_);
1630
#endif
1631
#ifdef ALTQ_RED
1632
	if (q_is_red(cl->q_))
1633
		return red_getq(cl->red_, cl->q_);
1634
#endif
1635
#ifdef ALTQ_CODEL
1636
	if (q_is_codel(cl->q_))
1637
		return codel_getq(cl->codel_, cl->q_);
1638
#endif
1639
	return _getq(cl->q_);
1640
}
1641

1642
static mbuf_t *
1643
_rmc_pollq(rm_class_t *cl)
1644
{
1645
	return qhead(cl->q_);
1646
}
1647

1648
#ifdef CBQ_TRACE
1649

1650
struct cbqtrace		 cbqtrace_buffer[NCBQTRACE+1];
1651
struct cbqtrace		*cbqtrace_ptr = NULL;
1652
int			 cbqtrace_count;
1653

1654
/*
1655
 * DDB hook to trace cbq events:
1656
 *  the last 1024 events are held in a circular buffer.
1657
 *  use "call cbqtrace_dump(N)" to display 20 events from Nth event.
1658
 */
1659
void cbqtrace_dump(int);
1660
static char *rmc_funcname(void *);
1661

1662
static struct rmc_funcs {
1663
	void	*func;
1664
	char	*name;
1665
} rmc_funcs[] =
1666
{
1667
	rmc_init,		"rmc_init",
1668
	rmc_queue_packet,	"rmc_queue_packet",
1669
	rmc_under_limit,	"rmc_under_limit",
1670
	rmc_update_class_util,	"rmc_update_class_util",
1671
	rmc_delay_action,	"rmc_delay_action",
1672
	rmc_restart,		"rmc_restart",
1673
	_rmc_wrr_dequeue_next,	"_rmc_wrr_dequeue_next",
1674
	NULL,			NULL
1675
};
1676

1677
static char *rmc_funcname(void *func)
1678
{
1679
	struct rmc_funcs *fp;
1680

1681
	for (fp = rmc_funcs; fp->func != NULL; fp++)
1682
		if (fp->func == func)
1683
			return (fp->name);
1684
	return ("unknown");
1685
}
1686

1687
void cbqtrace_dump(int counter)
1688
{
1689
	int	 i, *p;
1690
	char	*cp;
1691

1692
	counter = counter % NCBQTRACE;
1693
	p = (int *)&cbqtrace_buffer[counter];
1694

1695
	for (i=0; i<20; i++) {
1696
		printf("[0x%x] ", *p++);
1697
		printf("%s: ", rmc_funcname((void *)*p++));
1698
		cp = (char *)p++;
1699
		printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
1700
		printf("%d\n",*p++);
1701

1702
		if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
1703
			p = (int *)cbqtrace_buffer;
1704
	}
1705
}
1706
#endif /* CBQ_TRACE */
1707
#endif /* ALTQ_CBQ */
1708

1709
#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \
1710
    defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL)
1711
#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
1712

1713
void
1714
_addq(class_queue_t *q, mbuf_t *m)
1715
{
1716
        mbuf_t	*m0;
1717

1718
	if ((m0 = qtail(q)) != NULL)
1719
		m->m_nextpkt = m0->m_nextpkt;
1720
	else
1721
		m0 = m;
1722
	m0->m_nextpkt = m;
1723
	qtail(q) = m;
1724
	qlen(q)++;
1725
}
1726

1727
mbuf_t *
1728
_getq(class_queue_t *q)
1729
{
1730
	mbuf_t	*m, *m0;
1731

1732
	if ((m = qtail(q)) == NULL)
1733
		return (NULL);
1734
	if ((m0 = m->m_nextpkt) != m)
1735
		m->m_nextpkt = m0->m_nextpkt;
1736
	else {
1737
		ASSERT(qlen(q) == 1);
1738
		qtail(q) = NULL;
1739
	}
1740
	qlen(q)--;
1741
	m0->m_nextpkt = NULL;
1742
	return (m0);
1743
}
1744

1745
/* drop a packet at the tail of the queue */
1746
mbuf_t *
1747
_getq_tail(class_queue_t *q)
1748
{
1749
	mbuf_t	*m, *m0, *prev;
1750

1751
	if ((m = m0 = qtail(q)) == NULL)
1752
		return NULL;
1753
	do {
1754
		prev = m0;
1755
		m0 = m0->m_nextpkt;
1756
	} while (m0 != m);
1757
	prev->m_nextpkt = m->m_nextpkt;
1758
	if (prev == m)  {
1759
		ASSERT(qlen(q) == 1);
1760
		qtail(q) = NULL;
1761
	} else
1762
		qtail(q) = prev;
1763
	qlen(q)--;
1764
	m->m_nextpkt = NULL;
1765
	return (m);
1766
}
1767

1768
/* randomly select a packet in the queue */
1769
mbuf_t *
1770
_getq_random(class_queue_t *q)
1771
{
1772
	struct mbuf	*m;
1773
	int		 i, n;
1774

1775
	if ((m = qtail(q)) == NULL)
1776
		return NULL;
1777
	if (m->m_nextpkt == m) {
1778
		ASSERT(qlen(q) == 1);
1779
		qtail(q) = NULL;
1780
	} else {
1781
		struct mbuf *prev = NULL;
1782

1783
		n = arc4random() % qlen(q) + 1;
1784
		for (i = 0; i < n; i++) {
1785
			prev = m;
1786
			m = m->m_nextpkt;
1787
		}
1788
		prev->m_nextpkt = m->m_nextpkt;
1789
		if (m == qtail(q))
1790
			qtail(q) = prev;
1791
	}
1792
	qlen(q)--;
1793
	m->m_nextpkt = NULL;
1794
	return (m);
1795
}
1796

1797
void
1798
_removeq(class_queue_t *q, mbuf_t *m)
1799
{
1800
	mbuf_t	*m0, *prev;
1801

1802
	m0 = qtail(q);
1803
	do {
1804
		prev = m0;
1805
		m0 = m0->m_nextpkt;
1806
	} while (m0 != m);
1807
	prev->m_nextpkt = m->m_nextpkt;
1808
	if (prev == m)
1809
		qtail(q) = NULL;
1810
	else if (qtail(q) == m)
1811
		qtail(q) = prev;
1812
	qlen(q)--;
1813
}
1814

1815
void
1816
_flushq(class_queue_t *q)
1817
{
1818
	mbuf_t *m;
1819

1820
	while ((m = _getq(q)) != NULL)
1821
		m_freem(m);
1822
	ASSERT(qlen(q) == 0);
1823
}
1824

1825
#endif /* !__GNUC__ || ALTQ_DEBUG */
1826
#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
1827

1828
Product

Resources

Company