CoCalc -- sch

GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/sched/sch_cbq.c
¹⁷⁵¹⁰ views
1
/*
2
 * net/sched/sch_cbq.c	Class-Based Queueing discipline.
3
 *
4
 *		This program is free software; you can redistribute it and/or
5
 *		modify it under the terms of the GNU General Public License
6
 *		as published by the Free Software Foundation; either version
7
 *		2 of the License, or (at your option) any later version.
8
 *
9
 * Authors:	Alexey Kuznetsov, <[email protected]>
10
 *
11
 */
12

13
#include <linux/module.h>
14
#include <linux/slab.h>
15
#include <linux/types.h>
16
#include <linux/kernel.h>
17
#include <linux/string.h>
18
#include <linux/errno.h>
19
#include <linux/skbuff.h>
20
#include <net/netlink.h>
21
#include <net/pkt_sched.h>
22

23

24
/*	Class-Based Queueing (CBQ) algorithm.
25
	=======================================
26

27
	Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
28
		 Management Models for Packet Networks",
29
		 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30

31
		 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
32

33
		 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
34
		 Parameters", 1996
35

36
		 [4] Sally Floyd and Michael Speer, "Experimental Results
37
		 for Class-Based Queueing", 1998, not published.
38

39
	-----------------------------------------------------------------------
40

41
	Algorithm skeleton was taken from NS simulator cbq.cc.
42
	If someone wants to check this code against the LBL version,
43
	he should take into account that ONLY the skeleton was borrowed,
44
	the implementation is different. Particularly:
45

46
	--- The WRR algorithm is different. Our version looks more
47
	reasonable (I hope) and works when quanta are allowed to be
48
	less than MTU, which is always the case when real time classes
49
	have small rates. Note, that the statement of [3] is
50
	incomplete, delay may actually be estimated even if class
51
	per-round allotment is less than MTU. Namely, if per-round
52
	allotment is W*r_i, and r_1+...+r_k = r < 1
53

54
	delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55

56
	In the worst case we have IntServ estimate with D = W*r+k*MTU
57
	and C = MTU*r. The proof (if correct at all) is trivial.
58

59

60
	--- It seems that cbq-2.0 is not very accurate. At least, I cannot
61
	interpret some places, which look like wrong translations
62
	from NS. Anyone is advised to find these differences
63
	and explain to me, why I am wrong 8).
64

65
	--- Linux has no EOI event, so that we cannot estimate true class
66
	idle time. Workaround is to consider the next dequeue event
67
	as sign that previous packet is finished. This is wrong because of
68
	internal device queueing, but on a permanently loaded link it is true.
69
	Moreover, combined with clock integrator, this scheme looks
70
	very close to an ideal solution.  */
71

72
struct cbq_sched_data;
73

74

75
struct cbq_class {
76
	struct Qdisc_class_common common;
77
	struct cbq_class	*next_alive;	/* next class with backlog in this priority band */
78

79
/* Parameters */
80
	unsigned char		priority;	/* class priority */
81
	unsigned char		priority2;	/* priority to be used after overlimit */
82
	unsigned char		ewma_log;	/* time constant for idle time calculation */
83
	unsigned char		ovl_strategy;
84
#ifdef CONFIG_NET_CLS_ACT
85
	unsigned char		police;
86
#endif
87

88
	u32			defmap;
89

90
	/* Link-sharing scheduler parameters */
91
	long			maxidle;	/* Class parameters: see below. */
92
	long			offtime;
93
	long			minidle;
94
	u32			avpkt;
95
	struct qdisc_rate_table	*R_tab;
96

97
	/* Overlimit strategy parameters */
98
	void			(*overlimit)(struct cbq_class *cl);
99
	psched_tdiff_t		penalty;
100

101
	/* General scheduler (WRR) parameters */
102
	long			allot;
103
	long			quantum;	/* Allotment per WRR round */
104
	long			weight;		/* Relative allotment: see below */
105

106
	struct Qdisc		*qdisc;		/* Ptr to CBQ discipline */
107
	struct cbq_class	*split;		/* Ptr to split node */
108
	struct cbq_class	*share;		/* Ptr to LS parent in the class tree */
109
	struct cbq_class	*tparent;	/* Ptr to tree parent in the class tree */
110
	struct cbq_class	*borrow;	/* NULL if class is bandwidth limited;
111
						   parent otherwise */
112
	struct cbq_class	*sibling;	/* Sibling chain */
113
	struct cbq_class	*children;	/* Pointer to children chain */
114

115
	struct Qdisc		*q;		/* Elementary queueing discipline */
116

117

118
/* Variables */
119
	unsigned char		cpriority;	/* Effective priority */
120
	unsigned char		delayed;
121
	unsigned char		level;		/* level of the class in hierarchy:
122
						   0 for leaf classes, and maximal
123
						   level of children + 1 for nodes.
124
						 */
125

126
	psched_time_t		last;		/* Last end of service */
127
	psched_time_t		undertime;
128
	long			avgidle;
129
	long			deficit;	/* Saved deficit for WRR */
130
	psched_time_t		penalized;
131
	struct gnet_stats_basic_packed bstats;
132
	struct gnet_stats_queue qstats;
133
	struct gnet_stats_rate_est rate_est;
134
	struct tc_cbq_xstats	xstats;
135

136
	struct tcf_proto	*filter_list;
137

138
	int			refcnt;
139
	int			filters;
140

141
	struct cbq_class	*defaults[TC_PRIO_MAX + 1];
142
};
143

144
struct cbq_sched_data {
145
	struct Qdisc_class_hash	clhash;			/* Hash table of all classes */
146
	int			nclasses[TC_CBQ_MAXPRIO + 1];
147
	unsigned int		quanta[TC_CBQ_MAXPRIO + 1];
148

149
	struct cbq_class	link;
150

151
	unsigned int		activemask;
152
	struct cbq_class	*active[TC_CBQ_MAXPRIO + 1];	/* List of all classes
153
								   with backlog */
154

155
#ifdef CONFIG_NET_CLS_ACT
156
	struct cbq_class	*rx_class;
157
#endif
158
	struct cbq_class	*tx_class;
159
	struct cbq_class	*tx_borrowed;
160
	int			tx_len;
161
	psched_time_t		now;		/* Cached timestamp */
162
	psched_time_t		now_rt;		/* Cached real time */
163
	unsigned int		pmask;
164

165
	struct hrtimer		delay_timer;
166
	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
167
						   started when CBQ has
168
						   backlog, but cannot
169
						   transmit just now */
170
	psched_tdiff_t		wd_expires;
171
	int			toplevel;
172
	u32			hgenerator;
173
};
174

175

176
#define L2T(cl, len)	qdisc_l2t((cl)->R_tab, len)
177

178
static inline struct cbq_class *
179
cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
180
{
181
	struct Qdisc_class_common *clc;
182

183
	clc = qdisc_class_find(&q->clhash, classid);
184
	if (clc == NULL)
185
		return NULL;
186
	return container_of(clc, struct cbq_class, common);
187
}
188

189
#ifdef CONFIG_NET_CLS_ACT
190

191
static struct cbq_class *
192
cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
193
{
194
	struct cbq_class *cl;
195

196
	for (cl = this->tparent; cl; cl = cl->tparent) {
197
		struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
198

199
		if (new != NULL && new != this)
200
			return new;
201
	}
202
	return NULL;
203
}
204

205
#endif
206

207
/* Classify packet. The procedure is pretty complicated, but
208
 * it allows us to combine link sharing and priority scheduling
209
 * transparently.
210
 *
211
 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
212
 * so that it resolves to split nodes. Then packets are classified
213
 * by logical priority, or a more specific classifier may be attached
214
 * to the split node.
215
 */
216

217
static struct cbq_class *
218
cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
219
{
220
	struct cbq_sched_data *q = qdisc_priv(sch);
221
	struct cbq_class *head = &q->link;
222
	struct cbq_class **defmap;
223
	struct cbq_class *cl = NULL;
224
	u32 prio = skb->priority;
225
	struct tcf_result res;
226

227
	/*
228
	 *  Step 1. If skb->priority points to one of our classes, use it.
229
	 */
230
	if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
231
	    (cl = cbq_class_lookup(q, prio)) != NULL)
232
		return cl;
233

234
	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
235
	for (;;) {
236
		int result = 0;
237
		defmap = head->defaults;
238

239
		/*
240
		 * Step 2+n. Apply classifier.
241
		 */
242
		if (!head->filter_list ||
243
		    (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
244
			goto fallback;
245

246
		cl = (void *)res.class;
247
		if (!cl) {
248
			if (TC_H_MAJ(res.classid))
249
				cl = cbq_class_lookup(q, res.classid);
250
			else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
251
				cl = defmap[TC_PRIO_BESTEFFORT];
252

253
			if (cl == NULL || cl->level >= head->level)
254
				goto fallback;
255
		}
256

257
#ifdef CONFIG_NET_CLS_ACT
258
		switch (result) {
259
		case TC_ACT_QUEUED:
260
		case TC_ACT_STOLEN:
261
			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
262
		case TC_ACT_SHOT:
263
			return NULL;
264
		case TC_ACT_RECLASSIFY:
265
			return cbq_reclassify(skb, cl);
266
		}
267
#endif
268
		if (cl->level == 0)
269
			return cl;
270

271
		/*
272
		 * Step 3+n. If classifier selected a link sharing class,
273
		 *	   apply agency specific classifier.
274
		 *	   Repeat this procdure until we hit a leaf node.
275
		 */
276
		head = cl;
277
	}
278

279
fallback:
280
	cl = head;
281

282
	/*
283
	 * Step 4. No success...
284
	 */
285
	if (TC_H_MAJ(prio) == 0 &&
286
	    !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
287
	    !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
288
		return head;
289

290
	return cl;
291
}
292

293
/*
294
 * A packet has just been enqueued on the empty class.
295
 * cbq_activate_class adds it to the tail of active class list
296
 * of its priority band.
297
 */
298

299
static inline void cbq_activate_class(struct cbq_class *cl)
300
{
301
	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
302
	int prio = cl->cpriority;
303
	struct cbq_class *cl_tail;
304

305
	cl_tail = q->active[prio];
306
	q->active[prio] = cl;
307

308
	if (cl_tail != NULL) {
309
		cl->next_alive = cl_tail->next_alive;
310
		cl_tail->next_alive = cl;
311
	} else {
312
		cl->next_alive = cl;
313
		q->activemask |= (1<<prio);
314
	}
315
}
316

317
/*
318
 * Unlink class from active chain.
319
 * Note that this same procedure is done directly in cbq_dequeue*
320
 * during round-robin procedure.
321
 */
322

323
static void cbq_deactivate_class(struct cbq_class *this)
324
{
325
	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
326
	int prio = this->cpriority;
327
	struct cbq_class *cl;
328
	struct cbq_class *cl_prev = q->active[prio];
329

330
	do {
331
		cl = cl_prev->next_alive;
332
		if (cl == this) {
333
			cl_prev->next_alive = cl->next_alive;
334
			cl->next_alive = NULL;
335

336
			if (cl == q->active[prio]) {
337
				q->active[prio] = cl_prev;
338
				if (cl == q->active[prio]) {
339
					q->active[prio] = NULL;
340
					q->activemask &= ~(1<<prio);
341
					return;
342
				}
343
			}
344
			return;
345
		}
346
	} while ((cl_prev = cl) != q->active[prio]);
347
}
348

349
static void
350
cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
351
{
352
	int toplevel = q->toplevel;
353

354
	if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
355
		psched_time_t now;
356
		psched_tdiff_t incr;
357

358
		now = psched_get_time();
359
		incr = now - q->now_rt;
360
		now = q->now + incr;
361

362
		do {
363
			if (cl->undertime < now) {
364
				q->toplevel = cl->level;
365
				return;
366
			}
367
		} while ((cl = cl->borrow) != NULL && toplevel > cl->level);
368
	}
369
}
370

371
static int
372
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
373
{
374
	struct cbq_sched_data *q = qdisc_priv(sch);
375
	int uninitialized_var(ret);
376
	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
377

378
#ifdef CONFIG_NET_CLS_ACT
379
	q->rx_class = cl;
380
#endif
381
	if (cl == NULL) {
382
		if (ret & __NET_XMIT_BYPASS)
383
			sch->qstats.drops++;
384
		kfree_skb(skb);
385
		return ret;
386
	}
387

388
#ifdef CONFIG_NET_CLS_ACT
389
	cl->q->__parent = sch;
390
#endif
391
	ret = qdisc_enqueue(skb, cl->q);
392
	if (ret == NET_XMIT_SUCCESS) {
393
		sch->q.qlen++;
394
		cbq_mark_toplevel(q, cl);
395
		if (!cl->next_alive)
396
			cbq_activate_class(cl);
397
		return ret;
398
	}
399

400
	if (net_xmit_drop_count(ret)) {
401
		sch->qstats.drops++;
402
		cbq_mark_toplevel(q, cl);
403
		cl->qstats.drops++;
404
	}
405
	return ret;
406
}
407

408
/* Overlimit actions */
409

410
/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
411

412
static void cbq_ovl_classic(struct cbq_class *cl)
413
{
414
	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
415
	psched_tdiff_t delay = cl->undertime - q->now;
416

417
	if (!cl->delayed) {
418
		delay += cl->offtime;
419

420
		/*
421
		 * Class goes to sleep, so that it will have no
422
		 * chance to work avgidle. Let's forgive it 8)
423
		 *
424
		 * BTW cbq-2.0 has a crap in this
425
		 * place, apparently they forgot to shift it by cl->ewma_log.
426
		 */
427
		if (cl->avgidle < 0)
428
			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
429
		if (cl->avgidle < cl->minidle)
430
			cl->avgidle = cl->minidle;
431
		if (delay <= 0)
432
			delay = 1;
433
		cl->undertime = q->now + delay;
434

435
		cl->xstats.overactions++;
436
		cl->delayed = 1;
437
	}
438
	if (q->wd_expires == 0 || q->wd_expires > delay)
439
		q->wd_expires = delay;
440

441
	/* Dirty work! We must schedule wakeups based on
442
	 * real available rate, rather than leaf rate,
443
	 * which may be tiny (even zero).
444
	 */
445
	if (q->toplevel == TC_CBQ_MAXLEVEL) {
446
		struct cbq_class *b;
447
		psched_tdiff_t base_delay = q->wd_expires;
448

449
		for (b = cl->borrow; b; b = b->borrow) {
450
			delay = b->undertime - q->now;
451
			if (delay < base_delay) {
452
				if (delay <= 0)
453
					delay = 1;
454
				base_delay = delay;
455
			}
456
		}
457

458
		q->wd_expires = base_delay;
459
	}
460
}
461

462
/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
463
 * they go overlimit
464
 */
465

466
static void cbq_ovl_rclassic(struct cbq_class *cl)
467
{
468
	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
469
	struct cbq_class *this = cl;
470

471
	do {
472
		if (cl->level > q->toplevel) {
473
			cl = NULL;
474
			break;
475
		}
476
	} while ((cl = cl->borrow) != NULL);
477

478
	if (cl == NULL)
479
		cl = this;
480
	cbq_ovl_classic(cl);
481
}
482

483
/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
484

485
static void cbq_ovl_delay(struct cbq_class *cl)
486
{
487
	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
488
	psched_tdiff_t delay = cl->undertime - q->now;
489

490
	if (test_bit(__QDISC_STATE_DEACTIVATED,
491
		     &qdisc_root_sleeping(cl->qdisc)->state))
492
		return;
493

494
	if (!cl->delayed) {
495
		psched_time_t sched = q->now;
496
		ktime_t expires;
497

498
		delay += cl->offtime;
499
		if (cl->avgidle < 0)
500
			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
501
		if (cl->avgidle < cl->minidle)
502
			cl->avgidle = cl->minidle;
503
		cl->undertime = q->now + delay;
504

505
		if (delay > 0) {
506
			sched += delay + cl->penalty;
507
			cl->penalized = sched;
508
			cl->cpriority = TC_CBQ_MAXPRIO;
509
			q->pmask |= (1<<TC_CBQ_MAXPRIO);
510

511
			expires = ktime_set(0, 0);
512
			expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
513
			if (hrtimer_try_to_cancel(&q->delay_timer) &&
514
			    ktime_to_ns(ktime_sub(
515
					hrtimer_get_expires(&q->delay_timer),
516
					expires)) > 0)
517
				hrtimer_set_expires(&q->delay_timer, expires);
518
			hrtimer_restart(&q->delay_timer);
519
			cl->delayed = 1;
520
			cl->xstats.overactions++;
521
			return;
522
		}
523
		delay = 1;
524
	}
525
	if (q->wd_expires == 0 || q->wd_expires > delay)
526
		q->wd_expires = delay;
527
}
528

529
/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
530

531
static void cbq_ovl_lowprio(struct cbq_class *cl)
532
{
533
	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
534

535
	cl->penalized = q->now + cl->penalty;
536

537
	if (cl->cpriority != cl->priority2) {
538
		cl->cpriority = cl->priority2;
539
		q->pmask |= (1<<cl->cpriority);
540
		cl->xstats.overactions++;
541
	}
542
	cbq_ovl_classic(cl);
543
}
544

545
/* TC_CBQ_OVL_DROP: penalize class by dropping */
546

547
static void cbq_ovl_drop(struct cbq_class *cl)
548
{
549
	if (cl->q->ops->drop)
550
		if (cl->q->ops->drop(cl->q))
551
			cl->qdisc->q.qlen--;
552
	cl->xstats.overactions++;
553
	cbq_ovl_classic(cl);
554
}
555

556
static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
557
				       psched_time_t now)
558
{
559
	struct cbq_class *cl;
560
	struct cbq_class *cl_prev = q->active[prio];
561
	psched_time_t sched = now;
562

563
	if (cl_prev == NULL)
564
		return 0;
565

566
	do {
567
		cl = cl_prev->next_alive;
568
		if (now - cl->penalized > 0) {
569
			cl_prev->next_alive = cl->next_alive;
570
			cl->next_alive = NULL;
571
			cl->cpriority = cl->priority;
572
			cl->delayed = 0;
573
			cbq_activate_class(cl);
574

575
			if (cl == q->active[prio]) {
576
				q->active[prio] = cl_prev;
577
				if (cl == q->active[prio]) {
578
					q->active[prio] = NULL;
579
					return 0;
580
				}
581
			}
582

583
			cl = cl_prev->next_alive;
584
		} else if (sched - cl->penalized > 0)
585
			sched = cl->penalized;
586
	} while ((cl_prev = cl) != q->active[prio]);
587

588
	return sched - now;
589
}
590

591
static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
592
{
593
	struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
594
						delay_timer);
595
	struct Qdisc *sch = q->watchdog.qdisc;
596
	psched_time_t now;
597
	psched_tdiff_t delay = 0;
598
	unsigned int pmask;
599

600
	now = psched_get_time();
601

602
	pmask = q->pmask;
603
	q->pmask = 0;
604

605
	while (pmask) {
606
		int prio = ffz(~pmask);
607
		psched_tdiff_t tmp;
608

609
		pmask &= ~(1<<prio);
610

611
		tmp = cbq_undelay_prio(q, prio, now);
612
		if (tmp > 0) {
613
			q->pmask |= 1<<prio;
614
			if (tmp < delay || delay == 0)
615
				delay = tmp;
616
		}
617
	}
618

619
	if (delay) {
620
		ktime_t time;
621

622
		time = ktime_set(0, 0);
623
		time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
624
		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
625
	}
626

627
	qdisc_unthrottled(sch);
628
	__netif_schedule(qdisc_root(sch));
629
	return HRTIMER_NORESTART;
630
}
631

632
#ifdef CONFIG_NET_CLS_ACT
633
static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
634
{
635
	struct Qdisc *sch = child->__parent;
636
	struct cbq_sched_data *q = qdisc_priv(sch);
637
	struct cbq_class *cl = q->rx_class;
638

639
	q->rx_class = NULL;
640

641
	if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
642
		int ret;
643

644
		cbq_mark_toplevel(q, cl);
645

646
		q->rx_class = cl;
647
		cl->q->__parent = sch;
648

649
		ret = qdisc_enqueue(skb, cl->q);
650
		if (ret == NET_XMIT_SUCCESS) {
651
			sch->q.qlen++;
652
			if (!cl->next_alive)
653
				cbq_activate_class(cl);
654
			return 0;
655
		}
656
		if (net_xmit_drop_count(ret))
657
			sch->qstats.drops++;
658
		return 0;
659
	}
660

661
	sch->qstats.drops++;
662
	return -1;
663
}
664
#endif
665

666
/*
667
 * It is mission critical procedure.
668
 *
669
 * We "regenerate" toplevel cutoff, if transmitting class
670
 * has backlog and it is not regulated. It is not part of
671
 * original CBQ description, but looks more reasonable.
672
 * Probably, it is wrong. This question needs further investigation.
673
 */
674

675
static inline void
676
cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
677
		    struct cbq_class *borrowed)
678
{
679
	if (cl && q->toplevel >= borrowed->level) {
680
		if (cl->q->q.qlen > 1) {
681
			do {
682
				if (borrowed->undertime == PSCHED_PASTPERFECT) {
683
					q->toplevel = borrowed->level;
684
					return;
685
				}
686
			} while ((borrowed = borrowed->borrow) != NULL);
687
		}
688
#if 0
689
	/* It is not necessary now. Uncommenting it
690
	   will save CPU cycles, but decrease fairness.
691
	 */
692
		q->toplevel = TC_CBQ_MAXLEVEL;
693
#endif
694
	}
695
}
696

697
static void
698
cbq_update(struct cbq_sched_data *q)
699
{
700
	struct cbq_class *this = q->tx_class;
701
	struct cbq_class *cl = this;
702
	int len = q->tx_len;
703

704
	q->tx_class = NULL;
705

706
	for ( ; cl; cl = cl->share) {
707
		long avgidle = cl->avgidle;
708
		long idle;
709

710
		cl->bstats.packets++;
711
		cl->bstats.bytes += len;
712

713
		/*
714
		 * (now - last) is total time between packet right edges.
715
		 * (last_pktlen/rate) is "virtual" busy time, so that
716
		 *
717
		 *	idle = (now - last) - last_pktlen/rate
718
		 */
719

720
		idle = q->now - cl->last;
721
		if ((unsigned long)idle > 128*1024*1024) {
722
			avgidle = cl->maxidle;
723
		} else {
724
			idle -= L2T(cl, len);
725

726
		/* true_avgidle := (1-W)*true_avgidle + W*idle,
727
		 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
728
		 * cl->avgidle == true_avgidle/W,
729
		 * hence:
730
		 */
731
			avgidle += idle - (avgidle>>cl->ewma_log);
732
		}
733

734
		if (avgidle <= 0) {
735
			/* Overlimit or at-limit */
736

737
			if (avgidle < cl->minidle)
738
				avgidle = cl->minidle;
739

740
			cl->avgidle = avgidle;
741

742
			/* Calculate expected time, when this class
743
			 * will be allowed to send.
744
			 * It will occur, when:
745
			 * (1-W)*true_avgidle + W*delay = 0, i.e.
746
			 * idle = (1/W - 1)*(-true_avgidle)
747
			 * or
748
			 * idle = (1 - W)*(-cl->avgidle);
749
			 */
750
			idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
751

752
			/*
753
			 * That is not all.
754
			 * To maintain the rate allocated to the class,
755
			 * we add to undertime virtual clock,
756
			 * necessary to complete transmitted packet.
757
			 * (len/phys_bandwidth has been already passed
758
			 * to the moment of cbq_update)
759
			 */
760

761
			idle -= L2T(&q->link, len);
762
			idle += L2T(cl, len);
763

764
			cl->undertime = q->now + idle;
765
		} else {
766
			/* Underlimit */
767

768
			cl->undertime = PSCHED_PASTPERFECT;
769
			if (avgidle > cl->maxidle)
770
				cl->avgidle = cl->maxidle;
771
			else
772
				cl->avgidle = avgidle;
773
		}
774
		cl->last = q->now;
775
	}
776

777
	cbq_update_toplevel(q, this, q->tx_borrowed);
778
}
779

780
static inline struct cbq_class *
781
cbq_under_limit(struct cbq_class *cl)
782
{
783
	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
784
	struct cbq_class *this_cl = cl;
785

786
	if (cl->tparent == NULL)
787
		return cl;
788

789
	if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
790
		cl->delayed = 0;
791
		return cl;
792
	}
793

794
	do {
795
		/* It is very suspicious place. Now overlimit
796
		 * action is generated for not bounded classes
797
		 * only if link is completely congested.
798
		 * Though it is in agree with ancestor-only paradigm,
799
		 * it looks very stupid. Particularly,
800
		 * it means that this chunk of code will either
801
		 * never be called or result in strong amplification
802
		 * of burstiness. Dangerous, silly, and, however,
803
		 * no another solution exists.
804
		 */
805
		cl = cl->borrow;
806
		if (!cl) {
807
			this_cl->qstats.overlimits++;
808
			this_cl->overlimit(this_cl);
809
			return NULL;
810
		}
811
		if (cl->level > q->toplevel)
812
			return NULL;
813
	} while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
814

815
	cl->delayed = 0;
816
	return cl;
817
}
818

819
static inline struct sk_buff *
820
cbq_dequeue_prio(struct Qdisc *sch, int prio)
821
{
822
	struct cbq_sched_data *q = qdisc_priv(sch);
823
	struct cbq_class *cl_tail, *cl_prev, *cl;
824
	struct sk_buff *skb;
825
	int deficit;
826

827
	cl_tail = cl_prev = q->active[prio];
828
	cl = cl_prev->next_alive;
829

830
	do {
831
		deficit = 0;
832

833
		/* Start round */
834
		do {
835
			struct cbq_class *borrow = cl;
836

837
			if (cl->q->q.qlen &&
838
			    (borrow = cbq_under_limit(cl)) == NULL)
839
				goto skip_class;
840

841
			if (cl->deficit <= 0) {
842
				/* Class exhausted its allotment per
843
				 * this round. Switch to the next one.
844
				 */
845
				deficit = 1;
846
				cl->deficit += cl->quantum;
847
				goto next_class;
848
			}
849

850
			skb = cl->q->dequeue(cl->q);
851

852
			/* Class did not give us any skb :-(
853
			 * It could occur even if cl->q->q.qlen != 0
854
			 * f.e. if cl->q == "tbf"
855
			 */
856
			if (skb == NULL)
857
				goto skip_class;
858

859
			cl->deficit -= qdisc_pkt_len(skb);
860
			q->tx_class = cl;
861
			q->tx_borrowed = borrow;
862
			if (borrow != cl) {
863
#ifndef CBQ_XSTATS_BORROWS_BYTES
864
				borrow->xstats.borrows++;
865
				cl->xstats.borrows++;
866
#else
867
				borrow->xstats.borrows += qdisc_pkt_len(skb);
868
				cl->xstats.borrows += qdisc_pkt_len(skb);
869
#endif
870
			}
871
			q->tx_len = qdisc_pkt_len(skb);
872

873
			if (cl->deficit <= 0) {
874
				q->active[prio] = cl;
875
				cl = cl->next_alive;
876
				cl->deficit += cl->quantum;
877
			}
878
			return skb;
879

880
skip_class:
881
			if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
882
				/* Class is empty or penalized.
883
				 * Unlink it from active chain.
884
				 */
885
				cl_prev->next_alive = cl->next_alive;
886
				cl->next_alive = NULL;
887

888
				/* Did cl_tail point to it? */
889
				if (cl == cl_tail) {
890
					/* Repair it! */
891
					cl_tail = cl_prev;
892

893
					/* Was it the last class in this band? */
894
					if (cl == cl_tail) {
895
						/* Kill the band! */
896
						q->active[prio] = NULL;
897
						q->activemask &= ~(1<<prio);
898
						if (cl->q->q.qlen)
899
							cbq_activate_class(cl);
900
						return NULL;
901
					}
902

903
					q->active[prio] = cl_tail;
904
				}
905
				if (cl->q->q.qlen)
906
					cbq_activate_class(cl);
907

908
				cl = cl_prev;
909
			}
910

911
next_class:
912
			cl_prev = cl;
913
			cl = cl->next_alive;
914
		} while (cl_prev != cl_tail);
915
	} while (deficit);
916

917
	q->active[prio] = cl_prev;
918

919
	return NULL;
920
}
921

922
static inline struct sk_buff *
923
cbq_dequeue_1(struct Qdisc *sch)
924
{
925
	struct cbq_sched_data *q = qdisc_priv(sch);
926
	struct sk_buff *skb;
927
	unsigned int activemask;
928

929
	activemask = q->activemask & 0xFF;
930
	while (activemask) {
931
		int prio = ffz(~activemask);
932
		activemask &= ~(1<<prio);
933
		skb = cbq_dequeue_prio(sch, prio);
934
		if (skb)
935
			return skb;
936
	}
937
	return NULL;
938
}
939

940
static struct sk_buff *
941
cbq_dequeue(struct Qdisc *sch)
942
{
943
	struct sk_buff *skb;
944
	struct cbq_sched_data *q = qdisc_priv(sch);
945
	psched_time_t now;
946
	psched_tdiff_t incr;
947

948
	now = psched_get_time();
949
	incr = now - q->now_rt;
950

951
	if (q->tx_class) {
952
		psched_tdiff_t incr2;
953
		/* Time integrator. We calculate EOS time
954
		 * by adding expected packet transmission time.
955
		 * If real time is greater, we warp artificial clock,
956
		 * so that:
957
		 *
958
		 * cbq_time = max(real_time, work);
959
		 */
960
		incr2 = L2T(&q->link, q->tx_len);
961
		q->now += incr2;
962
		cbq_update(q);
963
		if ((incr -= incr2) < 0)
964
			incr = 0;
965
	}
966
	q->now += incr;
967
	q->now_rt = now;
968

969
	for (;;) {
970
		q->wd_expires = 0;
971

972
		skb = cbq_dequeue_1(sch);
973
		if (skb) {
974
			qdisc_bstats_update(sch, skb);
975
			sch->q.qlen--;
976
			qdisc_unthrottled(sch);
977
			return skb;
978
		}
979

980
		/* All the classes are overlimit.
981
		 *
982
		 * It is possible, if:
983
		 *
984
		 * 1. Scheduler is empty.
985
		 * 2. Toplevel cutoff inhibited borrowing.
986
		 * 3. Root class is overlimit.
987
		 *
988
		 * Reset 2d and 3d conditions and retry.
989
		 *
990
		 * Note, that NS and cbq-2.0 are buggy, peeking
991
		 * an arbitrary class is appropriate for ancestor-only
992
		 * sharing, but not for toplevel algorithm.
993
		 *
994
		 * Our version is better, but slower, because it requires
995
		 * two passes, but it is unavoidable with top-level sharing.
996
		 */
997

998
		if (q->toplevel == TC_CBQ_MAXLEVEL &&
999
		    q->link.undertime == PSCHED_PASTPERFECT)
1000
			break;
1001

1002
		q->toplevel = TC_CBQ_MAXLEVEL;
1003
		q->link.undertime = PSCHED_PASTPERFECT;
1004
	}
1005

1006
	/* No packets in scheduler or nobody wants to give them to us :-(
1007
	 * Sigh... start watchdog timer in the last case.
1008
	 */
1009

1010
	if (sch->q.qlen) {
1011
		sch->qstats.overlimits++;
1012
		if (q->wd_expires)
1013
			qdisc_watchdog_schedule(&q->watchdog,
1014
						now + q->wd_expires);
1015
	}
1016
	return NULL;
1017
}
1018

1019
/* CBQ class maintanance routines */
1020

1021
static void cbq_adjust_levels(struct cbq_class *this)
1022
{
1023
	if (this == NULL)
1024
		return;
1025

1026
	do {
1027
		int level = 0;
1028
		struct cbq_class *cl;
1029

1030
		cl = this->children;
1031
		if (cl) {
1032
			do {
1033
				if (cl->level > level)
1034
					level = cl->level;
1035
			} while ((cl = cl->sibling) != this->children);
1036
		}
1037
		this->level = level + 1;
1038
	} while ((this = this->tparent) != NULL);
1039
}
1040

1041
static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1042
{
1043
	struct cbq_class *cl;
1044
	struct hlist_node *n;
1045
	unsigned int h;
1046

1047
	if (q->quanta[prio] == 0)
1048
		return;
1049

1050
	for (h = 0; h < q->clhash.hashsize; h++) {
1051
		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1052
			/* BUGGGG... Beware! This expression suffer of
1053
			 * arithmetic overflows!
1054
			 */
1055
			if (cl->priority == prio) {
1056
				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1057
					q->quanta[prio];
1058
			}
1059
			if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1060
				pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1061
					   cl->common.classid, cl->quantum);
1062
				cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1063
			}
1064
		}
1065
	}
1066
}
1067

1068
static void cbq_sync_defmap(struct cbq_class *cl)
1069
{
1070
	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1071
	struct cbq_class *split = cl->split;
1072
	unsigned int h;
1073
	int i;
1074

1075
	if (split == NULL)
1076
		return;
1077

1078
	for (i = 0; i <= TC_PRIO_MAX; i++) {
1079
		if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1080
			split->defaults[i] = NULL;
1081
	}
1082

1083
	for (i = 0; i <= TC_PRIO_MAX; i++) {
1084
		int level = split->level;
1085

1086
		if (split->defaults[i])
1087
			continue;
1088

1089
		for (h = 0; h < q->clhash.hashsize; h++) {
1090
			struct hlist_node *n;
1091
			struct cbq_class *c;
1092

1093
			hlist_for_each_entry(c, n, &q->clhash.hash[h],
1094
					     common.hnode) {
1095
				if (c->split == split && c->level < level &&
1096
				    c->defmap & (1<<i)) {
1097
					split->defaults[i] = c;
1098
					level = c->level;
1099
				}
1100
			}
1101
		}
1102
	}
1103
}
1104

1105
static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1106
{
1107
	struct cbq_class *split = NULL;
1108

1109
	if (splitid == 0) {
1110
		split = cl->split;
1111
		if (!split)
1112
			return;
1113
		splitid = split->common.classid;
1114
	}
1115

1116
	if (split == NULL || split->common.classid != splitid) {
1117
		for (split = cl->tparent; split; split = split->tparent)
1118
			if (split->common.classid == splitid)
1119
				break;
1120
	}
1121

1122
	if (split == NULL)
1123
		return;
1124

1125
	if (cl->split != split) {
1126
		cl->defmap = 0;
1127
		cbq_sync_defmap(cl);
1128
		cl->split = split;
1129
		cl->defmap = def & mask;
1130
	} else
1131
		cl->defmap = (cl->defmap & ~mask) | (def & mask);
1132

1133
	cbq_sync_defmap(cl);
1134
}
1135

1136
static void cbq_unlink_class(struct cbq_class *this)
1137
{
1138
	struct cbq_class *cl, **clp;
1139
	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1140

1141
	qdisc_class_hash_remove(&q->clhash, &this->common);
1142

1143
	if (this->tparent) {
1144
		clp = &this->sibling;
1145
		cl = *clp;
1146
		do {
1147
			if (cl == this) {
1148
				*clp = cl->sibling;
1149
				break;
1150
			}
1151
			clp = &cl->sibling;
1152
		} while ((cl = *clp) != this->sibling);
1153

1154
		if (this->tparent->children == this) {
1155
			this->tparent->children = this->sibling;
1156
			if (this->sibling == this)
1157
				this->tparent->children = NULL;
1158
		}
1159
	} else {
1160
		WARN_ON(this->sibling != this);
1161
	}
1162
}
1163

1164
static void cbq_link_class(struct cbq_class *this)
1165
{
1166
	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1167
	struct cbq_class *parent = this->tparent;
1168

1169
	this->sibling = this;
1170
	qdisc_class_hash_insert(&q->clhash, &this->common);
1171

1172
	if (parent == NULL)
1173
		return;
1174

1175
	if (parent->children == NULL) {
1176
		parent->children = this;
1177
	} else {
1178
		this->sibling = parent->children->sibling;
1179
		parent->children->sibling = this;
1180
	}
1181
}
1182

1183
static unsigned int cbq_drop(struct Qdisc *sch)
1184
{
1185
	struct cbq_sched_data *q = qdisc_priv(sch);
1186
	struct cbq_class *cl, *cl_head;
1187
	int prio;
1188
	unsigned int len;
1189

1190
	for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1191
		cl_head = q->active[prio];
1192
		if (!cl_head)
1193
			continue;
1194

1195
		cl = cl_head;
1196
		do {
1197
			if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1198
				sch->q.qlen--;
1199
				if (!cl->q->q.qlen)
1200
					cbq_deactivate_class(cl);
1201
				return len;
1202
			}
1203
		} while ((cl = cl->next_alive) != cl_head);
1204
	}
1205
	return 0;
1206
}
1207

1208
static void
1209
cbq_reset(struct Qdisc *sch)
1210
{
1211
	struct cbq_sched_data *q = qdisc_priv(sch);
1212
	struct cbq_class *cl;
1213
	struct hlist_node *n;
1214
	int prio;
1215
	unsigned int h;
1216

1217
	q->activemask = 0;
1218
	q->pmask = 0;
1219
	q->tx_class = NULL;
1220
	q->tx_borrowed = NULL;
1221
	qdisc_watchdog_cancel(&q->watchdog);
1222
	hrtimer_cancel(&q->delay_timer);
1223
	q->toplevel = TC_CBQ_MAXLEVEL;
1224
	q->now = psched_get_time();
1225
	q->now_rt = q->now;
1226

1227
	for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1228
		q->active[prio] = NULL;
1229

1230
	for (h = 0; h < q->clhash.hashsize; h++) {
1231
		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1232
			qdisc_reset(cl->q);
1233

1234
			cl->next_alive = NULL;
1235
			cl->undertime = PSCHED_PASTPERFECT;
1236
			cl->avgidle = cl->maxidle;
1237
			cl->deficit = cl->quantum;
1238
			cl->cpriority = cl->priority;
1239
		}
1240
	}
1241
	sch->q.qlen = 0;
1242
}
1243

1244

1245
static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1246
{
1247
	if (lss->change & TCF_CBQ_LSS_FLAGS) {
1248
		cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1249
		cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1250
	}
1251
	if (lss->change & TCF_CBQ_LSS_EWMA)
1252
		cl->ewma_log = lss->ewma_log;
1253
	if (lss->change & TCF_CBQ_LSS_AVPKT)
1254
		cl->avpkt = lss->avpkt;
1255
	if (lss->change & TCF_CBQ_LSS_MINIDLE)
1256
		cl->minidle = -(long)lss->minidle;
1257
	if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1258
		cl->maxidle = lss->maxidle;
1259
		cl->avgidle = lss->maxidle;
1260
	}
1261
	if (lss->change & TCF_CBQ_LSS_OFFTIME)
1262
		cl->offtime = lss->offtime;
1263
	return 0;
1264
}
1265

1266
static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1267
{
1268
	q->nclasses[cl->priority]--;
1269
	q->quanta[cl->priority] -= cl->weight;
1270
	cbq_normalize_quanta(q, cl->priority);
1271
}
1272

1273
static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1274
{
1275
	q->nclasses[cl->priority]++;
1276
	q->quanta[cl->priority] += cl->weight;
1277
	cbq_normalize_quanta(q, cl->priority);
1278
}
1279

1280
static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1281
{
1282
	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1283

1284
	if (wrr->allot)
1285
		cl->allot = wrr->allot;
1286
	if (wrr->weight)
1287
		cl->weight = wrr->weight;
1288
	if (wrr->priority) {
1289
		cl->priority = wrr->priority - 1;
1290
		cl->cpriority = cl->priority;
1291
		if (cl->priority >= cl->priority2)
1292
			cl->priority2 = TC_CBQ_MAXPRIO - 1;
1293
	}
1294

1295
	cbq_addprio(q, cl);
1296
	return 0;
1297
}
1298

1299
static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1300
{
1301
	switch (ovl->strategy) {
1302
	case TC_CBQ_OVL_CLASSIC:
1303
		cl->overlimit = cbq_ovl_classic;
1304
		break;
1305
	case TC_CBQ_OVL_DELAY:
1306
		cl->overlimit = cbq_ovl_delay;
1307
		break;
1308
	case TC_CBQ_OVL_LOWPRIO:
1309
		if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1310
		    ovl->priority2 - 1 <= cl->priority)
1311
			return -EINVAL;
1312
		cl->priority2 = ovl->priority2 - 1;
1313
		cl->overlimit = cbq_ovl_lowprio;
1314
		break;
1315
	case TC_CBQ_OVL_DROP:
1316
		cl->overlimit = cbq_ovl_drop;
1317
		break;
1318
	case TC_CBQ_OVL_RCLASSIC:
1319
		cl->overlimit = cbq_ovl_rclassic;
1320
		break;
1321
	default:
1322
		return -EINVAL;
1323
	}
1324
	cl->penalty = ovl->penalty;
1325
	return 0;
1326
}
1327

1328
#ifdef CONFIG_NET_CLS_ACT
1329
static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1330
{
1331
	cl->police = p->police;
1332

1333
	if (cl->q->handle) {
1334
		if (p->police == TC_POLICE_RECLASSIFY)
1335
			cl->q->reshape_fail = cbq_reshape_fail;
1336
		else
1337
			cl->q->reshape_fail = NULL;
1338
	}
1339
	return 0;
1340
}
1341
#endif
1342

1343
static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1344
{
1345
	cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1346
	return 0;
1347
}
1348

1349
static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1350
	[TCA_CBQ_LSSOPT]	= { .len = sizeof(struct tc_cbq_lssopt) },
1351
	[TCA_CBQ_WRROPT]	= { .len = sizeof(struct tc_cbq_wrropt) },
1352
	[TCA_CBQ_FOPT]		= { .len = sizeof(struct tc_cbq_fopt) },
1353
	[TCA_CBQ_OVL_STRATEGY]	= { .len = sizeof(struct tc_cbq_ovl) },
1354
	[TCA_CBQ_RATE]		= { .len = sizeof(struct tc_ratespec) },
1355
	[TCA_CBQ_RTAB]		= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1356
	[TCA_CBQ_POLICE]	= { .len = sizeof(struct tc_cbq_police) },
1357
};
1358

1359
static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1360
{
1361
	struct cbq_sched_data *q = qdisc_priv(sch);
1362
	struct nlattr *tb[TCA_CBQ_MAX + 1];
1363
	struct tc_ratespec *r;
1364
	int err;
1365

1366
	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1367
	if (err < 0)
1368
		return err;
1369

1370
	if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1371
		return -EINVAL;
1372

1373
	r = nla_data(tb[TCA_CBQ_RATE]);
1374

1375
	if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1376
		return -EINVAL;
1377

1378
	err = qdisc_class_hash_init(&q->clhash);
1379
	if (err < 0)
1380
		goto put_rtab;
1381

1382
	q->link.refcnt = 1;
1383
	q->link.sibling = &q->link;
1384
	q->link.common.classid = sch->handle;
1385
	q->link.qdisc = sch;
1386
	q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1387
				      sch->handle);
1388
	if (!q->link.q)
1389
		q->link.q = &noop_qdisc;
1390

1391
	q->link.priority = TC_CBQ_MAXPRIO - 1;
1392
	q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1393
	q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1394
	q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1395
	q->link.overlimit = cbq_ovl_classic;
1396
	q->link.allot = psched_mtu(qdisc_dev(sch));
1397
	q->link.quantum = q->link.allot;
1398
	q->link.weight = q->link.R_tab->rate.rate;
1399

1400
	q->link.ewma_log = TC_CBQ_DEF_EWMA;
1401
	q->link.avpkt = q->link.allot/2;
1402
	q->link.minidle = -0x7FFFFFFF;
1403

1404
	qdisc_watchdog_init(&q->watchdog, sch);
1405
	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1406
	q->delay_timer.function = cbq_undelay;
1407
	q->toplevel = TC_CBQ_MAXLEVEL;
1408
	q->now = psched_get_time();
1409
	q->now_rt = q->now;
1410

1411
	cbq_link_class(&q->link);
1412

1413
	if (tb[TCA_CBQ_LSSOPT])
1414
		cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1415

1416
	cbq_addprio(q, &q->link);
1417
	return 0;
1418

1419
put_rtab:
1420
	qdisc_put_rtab(q->link.R_tab);
1421
	return err;
1422
}
1423

1424
static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1425
{
1426
	unsigned char *b = skb_tail_pointer(skb);
1427

1428
	NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1429
	return skb->len;
1430

1431
nla_put_failure:
1432
	nlmsg_trim(skb, b);
1433
	return -1;
1434
}
1435

1436
static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1437
{
1438
	unsigned char *b = skb_tail_pointer(skb);
1439
	struct tc_cbq_lssopt opt;
1440

1441
	opt.flags = 0;
1442
	if (cl->borrow == NULL)
1443
		opt.flags |= TCF_CBQ_LSS_BOUNDED;
1444
	if (cl->share == NULL)
1445
		opt.flags |= TCF_CBQ_LSS_ISOLATED;
1446
	opt.ewma_log = cl->ewma_log;
1447
	opt.level = cl->level;
1448
	opt.avpkt = cl->avpkt;
1449
	opt.maxidle = cl->maxidle;
1450
	opt.minidle = (u32)(-cl->minidle);
1451
	opt.offtime = cl->offtime;
1452
	opt.change = ~0;
1453
	NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1454
	return skb->len;
1455

1456
nla_put_failure:
1457
	nlmsg_trim(skb, b);
1458
	return -1;
1459
}
1460

1461
static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1462
{
1463
	unsigned char *b = skb_tail_pointer(skb);
1464
	struct tc_cbq_wrropt opt;
1465

1466
	opt.flags = 0;
1467
	opt.allot = cl->allot;
1468
	opt.priority = cl->priority + 1;
1469
	opt.cpriority = cl->cpriority + 1;
1470
	opt.weight = cl->weight;
1471
	NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1472
	return skb->len;
1473

1474
nla_put_failure:
1475
	nlmsg_trim(skb, b);
1476
	return -1;
1477
}
1478

1479
static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1480
{
1481
	unsigned char *b = skb_tail_pointer(skb);
1482
	struct tc_cbq_ovl opt;
1483

1484
	opt.strategy = cl->ovl_strategy;
1485
	opt.priority2 = cl->priority2 + 1;
1486
	opt.pad = 0;
1487
	opt.penalty = cl->penalty;
1488
	NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1489
	return skb->len;
1490

1491
nla_put_failure:
1492
	nlmsg_trim(skb, b);
1493
	return -1;
1494
}
1495

1496
static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1497
{
1498
	unsigned char *b = skb_tail_pointer(skb);
1499
	struct tc_cbq_fopt opt;
1500

1501
	if (cl->split || cl->defmap) {
1502
		opt.split = cl->split ? cl->split->common.classid : 0;
1503
		opt.defmap = cl->defmap;
1504
		opt.defchange = ~0;
1505
		NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1506
	}
1507
	return skb->len;
1508

1509
nla_put_failure:
1510
	nlmsg_trim(skb, b);
1511
	return -1;
1512
}
1513

1514
#ifdef CONFIG_NET_CLS_ACT
1515
static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1516
{
1517
	unsigned char *b = skb_tail_pointer(skb);
1518
	struct tc_cbq_police opt;
1519

1520
	if (cl->police) {
1521
		opt.police = cl->police;
1522
		opt.__res1 = 0;
1523
		opt.__res2 = 0;
1524
		NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1525
	}
1526
	return skb->len;
1527

1528
nla_put_failure:
1529
	nlmsg_trim(skb, b);
1530
	return -1;
1531
}
1532
#endif
1533

1534
static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1535
{
1536
	if (cbq_dump_lss(skb, cl) < 0 ||
1537
	    cbq_dump_rate(skb, cl) < 0 ||
1538
	    cbq_dump_wrr(skb, cl) < 0 ||
1539
	    cbq_dump_ovl(skb, cl) < 0 ||
1540
#ifdef CONFIG_NET_CLS_ACT
1541
	    cbq_dump_police(skb, cl) < 0 ||
1542
#endif
1543
	    cbq_dump_fopt(skb, cl) < 0)
1544
		return -1;
1545
	return 0;
1546
}
1547

1548
static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1549
{
1550
	struct cbq_sched_data *q = qdisc_priv(sch);
1551
	struct nlattr *nest;
1552

1553
	nest = nla_nest_start(skb, TCA_OPTIONS);
1554
	if (nest == NULL)
1555
		goto nla_put_failure;
1556
	if (cbq_dump_attr(skb, &q->link) < 0)
1557
		goto nla_put_failure;
1558
	nla_nest_end(skb, nest);
1559
	return skb->len;
1560

1561
nla_put_failure:
1562
	nla_nest_cancel(skb, nest);
1563
	return -1;
1564
}
1565

1566
static int
1567
cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1568
{
1569
	struct cbq_sched_data *q = qdisc_priv(sch);
1570

1571
	q->link.xstats.avgidle = q->link.avgidle;
1572
	return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1573
}
1574

1575
static int
1576
cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1577
	       struct sk_buff *skb, struct tcmsg *tcm)
1578
{
1579
	struct cbq_class *cl = (struct cbq_class *)arg;
1580
	struct nlattr *nest;
1581

1582
	if (cl->tparent)
1583
		tcm->tcm_parent = cl->tparent->common.classid;
1584
	else
1585
		tcm->tcm_parent = TC_H_ROOT;
1586
	tcm->tcm_handle = cl->common.classid;
1587
	tcm->tcm_info = cl->q->handle;
1588

1589
	nest = nla_nest_start(skb, TCA_OPTIONS);
1590
	if (nest == NULL)
1591
		goto nla_put_failure;
1592
	if (cbq_dump_attr(skb, cl) < 0)
1593
		goto nla_put_failure;
1594
	nla_nest_end(skb, nest);
1595
	return skb->len;
1596

1597
nla_put_failure:
1598
	nla_nest_cancel(skb, nest);
1599
	return -1;
1600
}
1601

1602
static int
1603
cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1604
	struct gnet_dump *d)
1605
{
1606
	struct cbq_sched_data *q = qdisc_priv(sch);
1607
	struct cbq_class *cl = (struct cbq_class *)arg;
1608

1609
	cl->qstats.qlen = cl->q->q.qlen;
1610
	cl->xstats.avgidle = cl->avgidle;
1611
	cl->xstats.undertime = 0;
1612

1613
	if (cl->undertime != PSCHED_PASTPERFECT)
1614
		cl->xstats.undertime = cl->undertime - q->now;
1615

1616
	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1617
	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1618
	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
1619
		return -1;
1620

1621
	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1622
}
1623

1624
static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1625
		     struct Qdisc **old)
1626
{
1627
	struct cbq_class *cl = (struct cbq_class *)arg;
1628

1629
	if (new == NULL) {
1630
		new = qdisc_create_dflt(sch->dev_queue,
1631
					&pfifo_qdisc_ops, cl->common.classid);
1632
		if (new == NULL)
1633
			return -ENOBUFS;
1634
	} else {
1635
#ifdef CONFIG_NET_CLS_ACT
1636
		if (cl->police == TC_POLICE_RECLASSIFY)
1637
			new->reshape_fail = cbq_reshape_fail;
1638
#endif
1639
	}
1640
	sch_tree_lock(sch);
1641
	*old = cl->q;
1642
	cl->q = new;
1643
	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1644
	qdisc_reset(*old);
1645
	sch_tree_unlock(sch);
1646

1647
	return 0;
1648
}
1649

1650
static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1651
{
1652
	struct cbq_class *cl = (struct cbq_class *)arg;
1653

1654
	return cl->q;
1655
}
1656

1657
static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1658
{
1659
	struct cbq_class *cl = (struct cbq_class *)arg;
1660

1661
	if (cl->q->q.qlen == 0)
1662
		cbq_deactivate_class(cl);
1663
}
1664

1665
static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1666
{
1667
	struct cbq_sched_data *q = qdisc_priv(sch);
1668
	struct cbq_class *cl = cbq_class_lookup(q, classid);
1669

1670
	if (cl) {
1671
		cl->refcnt++;
1672
		return (unsigned long)cl;
1673
	}
1674
	return 0;
1675
}
1676

1677
static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1678
{
1679
	struct cbq_sched_data *q = qdisc_priv(sch);
1680

1681
	WARN_ON(cl->filters);
1682

1683
	tcf_destroy_chain(&cl->filter_list);
1684
	qdisc_destroy(cl->q);
1685
	qdisc_put_rtab(cl->R_tab);
1686
	gen_kill_estimator(&cl->bstats, &cl->rate_est);
1687
	if (cl != &q->link)
1688
		kfree(cl);
1689
}
1690

1691
static void cbq_destroy(struct Qdisc *sch)
1692
{
1693
	struct cbq_sched_data *q = qdisc_priv(sch);
1694
	struct hlist_node *n, *next;
1695
	struct cbq_class *cl;
1696
	unsigned int h;
1697

1698
#ifdef CONFIG_NET_CLS_ACT
1699
	q->rx_class = NULL;
1700
#endif
1701
	/*
1702
	 * Filters must be destroyed first because we don't destroy the
1703
	 * classes from root to leafs which means that filters can still
1704
	 * be bound to classes which have been destroyed already. --TGR '04
1705
	 */
1706
	for (h = 0; h < q->clhash.hashsize; h++) {
1707
		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1708
			tcf_destroy_chain(&cl->filter_list);
1709
	}
1710
	for (h = 0; h < q->clhash.hashsize; h++) {
1711
		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1712
					  common.hnode)
1713
			cbq_destroy_class(sch, cl);
1714
	}
1715
	qdisc_class_hash_destroy(&q->clhash);
1716
}
1717

1718
static void cbq_put(struct Qdisc *sch, unsigned long arg)
1719
{
1720
	struct cbq_class *cl = (struct cbq_class *)arg;
1721

1722
	if (--cl->refcnt == 0) {
1723
#ifdef CONFIG_NET_CLS_ACT
1724
		spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1725
		struct cbq_sched_data *q = qdisc_priv(sch);
1726

1727
		spin_lock_bh(root_lock);
1728
		if (q->rx_class == cl)
1729
			q->rx_class = NULL;
1730
		spin_unlock_bh(root_lock);
1731
#endif
1732

1733
		cbq_destroy_class(sch, cl);
1734
	}
1735
}
1736

1737
static int
1738
cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1739
		 unsigned long *arg)
1740
{
1741
	int err;
1742
	struct cbq_sched_data *q = qdisc_priv(sch);
1743
	struct cbq_class *cl = (struct cbq_class *)*arg;
1744
	struct nlattr *opt = tca[TCA_OPTIONS];
1745
	struct nlattr *tb[TCA_CBQ_MAX + 1];
1746
	struct cbq_class *parent;
1747
	struct qdisc_rate_table *rtab = NULL;
1748

1749
	if (opt == NULL)
1750
		return -EINVAL;
1751

1752
	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1753
	if (err < 0)
1754
		return err;
1755

1756
	if (cl) {
1757
		/* Check parent */
1758
		if (parentid) {
1759
			if (cl->tparent &&
1760
			    cl->tparent->common.classid != parentid)
1761
				return -EINVAL;
1762
			if (!cl->tparent && parentid != TC_H_ROOT)
1763
				return -EINVAL;
1764
		}
1765

1766
		if (tb[TCA_CBQ_RATE]) {
1767
			rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1768
					      tb[TCA_CBQ_RTAB]);
1769
			if (rtab == NULL)
1770
				return -EINVAL;
1771
		}
1772

1773
		if (tca[TCA_RATE]) {
1774
			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1775
						    qdisc_root_sleeping_lock(sch),
1776
						    tca[TCA_RATE]);
1777
			if (err) {
1778
				if (rtab)
1779
					qdisc_put_rtab(rtab);
1780
				return err;
1781
			}
1782
		}
1783

1784
		/* Change class parameters */
1785
		sch_tree_lock(sch);
1786

1787
		if (cl->next_alive != NULL)
1788
			cbq_deactivate_class(cl);
1789

1790
		if (rtab) {
1791
			qdisc_put_rtab(cl->R_tab);
1792
			cl->R_tab = rtab;
1793
		}
1794

1795
		if (tb[TCA_CBQ_LSSOPT])
1796
			cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1797

1798
		if (tb[TCA_CBQ_WRROPT]) {
1799
			cbq_rmprio(q, cl);
1800
			cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1801
		}
1802

1803
		if (tb[TCA_CBQ_OVL_STRATEGY])
1804
			cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1805

1806
#ifdef CONFIG_NET_CLS_ACT
1807
		if (tb[TCA_CBQ_POLICE])
1808
			cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1809
#endif
1810

1811
		if (tb[TCA_CBQ_FOPT])
1812
			cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1813

1814
		if (cl->q->q.qlen)
1815
			cbq_activate_class(cl);
1816

1817
		sch_tree_unlock(sch);
1818

1819
		return 0;
1820
	}
1821

1822
	if (parentid == TC_H_ROOT)
1823
		return -EINVAL;
1824

1825
	if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1826
	    tb[TCA_CBQ_LSSOPT] == NULL)
1827
		return -EINVAL;
1828

1829
	rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1830
	if (rtab == NULL)
1831
		return -EINVAL;
1832

1833
	if (classid) {
1834
		err = -EINVAL;
1835
		if (TC_H_MAJ(classid ^ sch->handle) ||
1836
		    cbq_class_lookup(q, classid))
1837
			goto failure;
1838
	} else {
1839
		int i;
1840
		classid = TC_H_MAKE(sch->handle, 0x8000);
1841

1842
		for (i = 0; i < 0x8000; i++) {
1843
			if (++q->hgenerator >= 0x8000)
1844
				q->hgenerator = 1;
1845
			if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1846
				break;
1847
		}
1848
		err = -ENOSR;
1849
		if (i >= 0x8000)
1850
			goto failure;
1851
		classid = classid|q->hgenerator;
1852
	}
1853

1854
	parent = &q->link;
1855
	if (parentid) {
1856
		parent = cbq_class_lookup(q, parentid);
1857
		err = -EINVAL;
1858
		if (parent == NULL)
1859
			goto failure;
1860
	}
1861

1862
	err = -ENOBUFS;
1863
	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1864
	if (cl == NULL)
1865
		goto failure;
1866

1867
	if (tca[TCA_RATE]) {
1868
		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1869
					qdisc_root_sleeping_lock(sch),
1870
					tca[TCA_RATE]);
1871
		if (err) {
1872
			kfree(cl);
1873
			goto failure;
1874
		}
1875
	}
1876

1877
	cl->R_tab = rtab;
1878
	rtab = NULL;
1879
	cl->refcnt = 1;
1880
	cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1881
	if (!cl->q)
1882
		cl->q = &noop_qdisc;
1883
	cl->common.classid = classid;
1884
	cl->tparent = parent;
1885
	cl->qdisc = sch;
1886
	cl->allot = parent->allot;
1887
	cl->quantum = cl->allot;
1888
	cl->weight = cl->R_tab->rate.rate;
1889

1890
	sch_tree_lock(sch);
1891
	cbq_link_class(cl);
1892
	cl->borrow = cl->tparent;
1893
	if (cl->tparent != &q->link)
1894
		cl->share = cl->tparent;
1895
	cbq_adjust_levels(parent);
1896
	cl->minidle = -0x7FFFFFFF;
1897
	cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1898
	cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1899
	if (cl->ewma_log == 0)
1900
		cl->ewma_log = q->link.ewma_log;
1901
	if (cl->maxidle == 0)
1902
		cl->maxidle = q->link.maxidle;
1903
	if (cl->avpkt == 0)
1904
		cl->avpkt = q->link.avpkt;
1905
	cl->overlimit = cbq_ovl_classic;
1906
	if (tb[TCA_CBQ_OVL_STRATEGY])
1907
		cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1908
#ifdef CONFIG_NET_CLS_ACT
1909
	if (tb[TCA_CBQ_POLICE])
1910
		cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1911
#endif
1912
	if (tb[TCA_CBQ_FOPT])
1913
		cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1914
	sch_tree_unlock(sch);
1915

1916
	qdisc_class_hash_grow(sch, &q->clhash);
1917

1918
	*arg = (unsigned long)cl;
1919
	return 0;
1920

1921
failure:
1922
	qdisc_put_rtab(rtab);
1923
	return err;
1924
}
1925

1926
static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1927
{
1928
	struct cbq_sched_data *q = qdisc_priv(sch);
1929
	struct cbq_class *cl = (struct cbq_class *)arg;
1930
	unsigned int qlen;
1931

1932
	if (cl->filters || cl->children || cl == &q->link)
1933
		return -EBUSY;
1934

1935
	sch_tree_lock(sch);
1936

1937
	qlen = cl->q->q.qlen;
1938
	qdisc_reset(cl->q);
1939
	qdisc_tree_decrease_qlen(cl->q, qlen);
1940

1941
	if (cl->next_alive)
1942
		cbq_deactivate_class(cl);
1943

1944
	if (q->tx_borrowed == cl)
1945
		q->tx_borrowed = q->tx_class;
1946
	if (q->tx_class == cl) {
1947
		q->tx_class = NULL;
1948
		q->tx_borrowed = NULL;
1949
	}
1950
#ifdef CONFIG_NET_CLS_ACT
1951
	if (q->rx_class == cl)
1952
		q->rx_class = NULL;
1953
#endif
1954

1955
	cbq_unlink_class(cl);
1956
	cbq_adjust_levels(cl->tparent);
1957
	cl->defmap = 0;
1958
	cbq_sync_defmap(cl);
1959

1960
	cbq_rmprio(q, cl);
1961
	sch_tree_unlock(sch);
1962

1963
	BUG_ON(--cl->refcnt == 0);
1964
	/*
1965
	 * This shouldn't happen: we "hold" one cops->get() when called
1966
	 * from tc_ctl_tclass; the destroy method is done from cops->put().
1967
	 */
1968

1969
	return 0;
1970
}
1971

1972
static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1973
{
1974
	struct cbq_sched_data *q = qdisc_priv(sch);
1975
	struct cbq_class *cl = (struct cbq_class *)arg;
1976

1977
	if (cl == NULL)
1978
		cl = &q->link;
1979

1980
	return &cl->filter_list;
1981
}
1982

1983
static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1984
				     u32 classid)
1985
{
1986
	struct cbq_sched_data *q = qdisc_priv(sch);
1987
	struct cbq_class *p = (struct cbq_class *)parent;
1988
	struct cbq_class *cl = cbq_class_lookup(q, classid);
1989

1990
	if (cl) {
1991
		if (p && p->level <= cl->level)
1992
			return 0;
1993
		cl->filters++;
1994
		return (unsigned long)cl;
1995
	}
1996
	return 0;
1997
}
1998

1999
static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2000
{
2001
	struct cbq_class *cl = (struct cbq_class *)arg;
2002

2003
	cl->filters--;
2004
}
2005

2006
static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2007
{
2008
	struct cbq_sched_data *q = qdisc_priv(sch);
2009
	struct cbq_class *cl;
2010
	struct hlist_node *n;
2011
	unsigned int h;
2012

2013
	if (arg->stop)
2014
		return;
2015

2016
	for (h = 0; h < q->clhash.hashsize; h++) {
2017
		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2018
			if (arg->count < arg->skip) {
2019
				arg->count++;
2020
				continue;
2021
			}
2022
			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2023
				arg->stop = 1;
2024
				return;
2025
			}
2026
			arg->count++;
2027
		}
2028
	}
2029
}
2030

2031
static const struct Qdisc_class_ops cbq_class_ops = {
2032
	.graft		=	cbq_graft,
2033
	.leaf		=	cbq_leaf,
2034
	.qlen_notify	=	cbq_qlen_notify,
2035
	.get		=	cbq_get,
2036
	.put		=	cbq_put,
2037
	.change		=	cbq_change_class,
2038
	.delete		=	cbq_delete,
2039
	.walk		=	cbq_walk,
2040
	.tcf_chain	=	cbq_find_tcf,
2041
	.bind_tcf	=	cbq_bind_filter,
2042
	.unbind_tcf	=	cbq_unbind_filter,
2043
	.dump		=	cbq_dump_class,
2044
	.dump_stats	=	cbq_dump_class_stats,
2045
};
2046

2047
static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2048
	.next		=	NULL,
2049
	.cl_ops		=	&cbq_class_ops,
2050
	.id		=	"cbq",
2051
	.priv_size	=	sizeof(struct cbq_sched_data),
2052
	.enqueue	=	cbq_enqueue,
2053
	.dequeue	=	cbq_dequeue,
2054
	.peek		=	qdisc_peek_dequeued,
2055
	.drop		=	cbq_drop,
2056
	.init		=	cbq_init,
2057
	.reset		=	cbq_reset,
2058
	.destroy	=	cbq_destroy,
2059
	.change		=	NULL,
2060
	.dump		=	cbq_dump,
2061
	.dump_stats	=	cbq_dump_stats,
2062
	.owner		=	THIS_MODULE,
2063
};
2064

2065
static int __init cbq_module_init(void)
2066
{
2067
	return register_qdisc(&cbq_qdisc_ops);
2068
}
2069
static void __exit cbq_module_exit(void)
2070
{
2071
	unregister_qdisc(&cbq_qdisc_ops);
2072
}
2073
module_init(cbq_module_init)
2074
module_exit(cbq_module_exit)
2075
MODULE_LICENSE("GPL");
2076

2077
Product

Resources

Company