CoCalc -- sch

GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/sched/sch_netem.c
¹⁷⁵³⁹ views
1
/*
2
 * net/sched/sch_netem.c	Network emulator
3
 *
4
 * 		This program is free software; you can redistribute it and/or
5
 * 		modify it under the terms of the GNU General Public License
6
 * 		as published by the Free Software Foundation; either version
7
 * 		2 of the License.
8
 *
9
 *  		Many of the algorithms and ideas for this came from
10
 *		NIST Net which is not copyrighted.
11
 *
12
 * Authors:	Stephen Hemminger <[email protected]>
13
 *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14
 */
15

16
#include <linux/module.h>
17
#include <linux/slab.h>
18
#include <linux/types.h>
19
#include <linux/kernel.h>
20
#include <linux/errno.h>
21
#include <linux/skbuff.h>
22
#include <linux/vmalloc.h>
23
#include <linux/rtnetlink.h>
24

25
#include <net/netlink.h>
26
#include <net/pkt_sched.h>
27

28
#define VERSION "1.3"
29

30
/*	Network Emulation Queuing algorithm.
31
	====================================
32

33
	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
34
		 Network Emulation Tool
35
		 [2] Luigi Rizzo, DummyNet for FreeBSD
36

37
	 ----------------------------------------------------------------
38

39
	 This started out as a simple way to delay outgoing packets to
40
	 test TCP but has grown to include most of the functionality
41
	 of a full blown network emulator like NISTnet. It can delay
42
	 packets and add random jitter (and correlation). The random
43
	 distribution can be loaded from a table as well to provide
44
	 normal, Pareto, or experimental curves. Packet loss,
45
	 duplication, and reordering can also be emulated.
46

47
	 This qdisc does not do classification that can be handled in
48
	 layering other disciplines.  It does not need to do bandwidth
49
	 control either since that can be handled by using token
50
	 bucket or other rate control.
51

52
     Correlated Loss Generator models
53

54
	Added generation of correlated loss according to the
55
	"Gilbert-Elliot" model, a 4-state markov model.
56

57
	References:
58
	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
59
	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
60
	and intuitive loss model for packet networks and its implementation
61
	in the Netem module in the Linux kernel", available in [1]
62

63
	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
64
		 Fabio Ludovici <fabio.ludovici at yahoo.it>
65
*/
66

67
struct netem_sched_data {
68
	struct Qdisc	*qdisc;
69
	struct qdisc_watchdog watchdog;
70

71
	psched_tdiff_t latency;
72
	psched_tdiff_t jitter;
73

74
	u32 loss;
75
	u32 limit;
76
	u32 counter;
77
	u32 gap;
78
	u32 duplicate;
79
	u32 reorder;
80
	u32 corrupt;
81

82
	struct crndstate {
83
		u32 last;
84
		u32 rho;
85
	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
86

87
	struct disttable {
88
		u32  size;
89
		s16 table[0];
90
	} *delay_dist;
91

92
	enum  {
93
		CLG_RANDOM,
94
		CLG_4_STATES,
95
		CLG_GILB_ELL,
96
	} loss_model;
97

98
	/* Correlated Loss Generation models */
99
	struct clgstate {
100
		/* state of the Markov chain */
101
		u8 state;
102

103
		/* 4-states and Gilbert-Elliot models */
104
		u32 a1;	/* p13 for 4-states or p for GE */
105
		u32 a2;	/* p31 for 4-states or r for GE */
106
		u32 a3;	/* p32 for 4-states or h for GE */
107
		u32 a4;	/* p14 for 4-states or 1-k for GE */
108
		u32 a5; /* p23 used only in 4-states */
109
	} clg;
110

111
};
112

113
/* Time stamp put into socket buffer control block */
114
struct netem_skb_cb {
115
	psched_time_t	time_to_send;
116
};
117

118
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
119
{
120
	BUILD_BUG_ON(sizeof(skb->cb) <
121
		sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
122
	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
123
}
124

125
/* init_crandom - initialize correlated random number generator
126
 * Use entropy source for initial seed.
127
 */
128
static void init_crandom(struct crndstate *state, unsigned long rho)
129
{
130
	state->rho = rho;
131
	state->last = net_random();
132
}
133

134
/* get_crandom - correlated random number generator
135
 * Next number depends on last value.
136
 * rho is scaled to avoid floating point.
137
 */
138
static u32 get_crandom(struct crndstate *state)
139
{
140
	u64 value, rho;
141
	unsigned long answer;
142

143
	if (state->rho == 0)	/* no correlation */
144
		return net_random();
145

146
	value = net_random();
147
	rho = (u64)state->rho + 1;
148
	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
149
	state->last = answer;
150
	return answer;
151
}
152

153
/* loss_4state - 4-state model loss generator
154
 * Generates losses according to the 4-state Markov chain adopted in
155
 * the GI (General and Intuitive) loss model.
156
 */
157
static bool loss_4state(struct netem_sched_data *q)
158
{
159
	struct clgstate *clg = &q->clg;
160
	u32 rnd = net_random();
161

162
	/*
163
	 * Makes a comparison between rnd and the transition
164
	 * probabilities outgoing from the current state, then decides the
165
	 * next state and if the next packet has to be transmitted or lost.
166
	 * The four states correspond to:
167
	 *   1 => successfully transmitted packets within a gap period
168
	 *   4 => isolated losses within a gap period
169
	 *   3 => lost packets within a burst period
170
	 *   2 => successfully transmitted packets within a burst period
171
	 */
172
	switch (clg->state) {
173
	case 1:
174
		if (rnd < clg->a4) {
175
			clg->state = 4;
176
			return true;
177
		} else if (clg->a4 < rnd && rnd < clg->a1) {
178
			clg->state = 3;
179
			return true;
180
		} else if (clg->a1 < rnd)
181
			clg->state = 1;
182

183
		break;
184
	case 2:
185
		if (rnd < clg->a5) {
186
			clg->state = 3;
187
			return true;
188
		} else
189
			clg->state = 2;
190

191
		break;
192
	case 3:
193
		if (rnd < clg->a3)
194
			clg->state = 2;
195
		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
196
			clg->state = 1;
197
			return true;
198
		} else if (clg->a2 + clg->a3 < rnd) {
199
			clg->state = 3;
200
			return true;
201
		}
202
		break;
203
	case 4:
204
		clg->state = 1;
205
		break;
206
	}
207

208
	return false;
209
}
210

211
/* loss_gilb_ell - Gilbert-Elliot model loss generator
212
 * Generates losses according to the Gilbert-Elliot loss model or
213
 * its special cases  (Gilbert or Simple Gilbert)
214
 *
215
 * Makes a comparison between random number and the transition
216
 * probabilities outgoing from the current state, then decides the
217
 * next state. A second random number is extracted and the comparison
218
 * with the loss probability of the current state decides if the next
219
 * packet will be transmitted or lost.
220
 */
221
static bool loss_gilb_ell(struct netem_sched_data *q)
222
{
223
	struct clgstate *clg = &q->clg;
224

225
	switch (clg->state) {
226
	case 1:
227
		if (net_random() < clg->a1)
228
			clg->state = 2;
229
		if (net_random() < clg->a4)
230
			return true;
231
	case 2:
232
		if (net_random() < clg->a2)
233
			clg->state = 1;
234
		if (clg->a3 > net_random())
235
			return true;
236
	}
237

238
	return false;
239
}
240

241
static bool loss_event(struct netem_sched_data *q)
242
{
243
	switch (q->loss_model) {
244
	case CLG_RANDOM:
245
		/* Random packet drop 0 => none, ~0 => all */
246
		return q->loss && q->loss >= get_crandom(&q->loss_cor);
247

248
	case CLG_4_STATES:
249
		/* 4state loss model algorithm (used also for GI model)
250
		* Extracts a value from the markov 4 state loss generator,
251
		* if it is 1 drops a packet and if needed writes the event in
252
		* the kernel logs
253
		*/
254
		return loss_4state(q);
255

256
	case CLG_GILB_ELL:
257
		/* Gilbert-Elliot loss model algorithm
258
		* Extracts a value from the Gilbert-Elliot loss generator,
259
		* if it is 1 drops a packet and if needed writes the event in
260
		* the kernel logs
261
		*/
262
		return loss_gilb_ell(q);
263
	}
264

265
	return false;	/* not reached */
266
}
267

268

269
/* tabledist - return a pseudo-randomly distributed value with mean mu and
270
 * std deviation sigma.  Uses table lookup to approximate the desired
271
 * distribution, and a uniformly-distributed pseudo-random source.
272
 */
273
static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
274
				struct crndstate *state,
275
				const struct disttable *dist)
276
{
277
	psched_tdiff_t x;
278
	long t;
279
	u32 rnd;
280

281
	if (sigma == 0)
282
		return mu;
283

284
	rnd = get_crandom(state);
285

286
	/* default uniform distribution */
287
	if (dist == NULL)
288
		return (rnd % (2*sigma)) - sigma + mu;
289

290
	t = dist->table[rnd % dist->size];
291
	x = (sigma % NETEM_DIST_SCALE) * t;
292
	if (x >= 0)
293
		x += NETEM_DIST_SCALE/2;
294
	else
295
		x -= NETEM_DIST_SCALE/2;
296

297
	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
298
}
299

300
/*
301
 * Insert one skb into qdisc.
302
 * Note: parent depends on return value to account for queue length.
303
 * 	NET_XMIT_DROP: queue length didn't change.
304
 *      NET_XMIT_SUCCESS: one skb was queued.
305
 */
306
static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
307
{
308
	struct netem_sched_data *q = qdisc_priv(sch);
309
	/* We don't fill cb now as skb_unshare() may invalidate it */
310
	struct netem_skb_cb *cb;
311
	struct sk_buff *skb2;
312
	int ret;
313
	int count = 1;
314

315
	/* Random duplication */
316
	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
317
		++count;
318

319
	/* Drop packet? */
320
	if (loss_event(q))
321
		--count;
322

323
	if (count == 0) {
324
		sch->qstats.drops++;
325
		kfree_skb(skb);
326
		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
327
	}
328

329
	skb_orphan(skb);
330

331
	/*
332
	 * If we need to duplicate packet, then re-insert at top of the
333
	 * qdisc tree, since parent queuer expects that only one
334
	 * skb will be queued.
335
	 */
336
	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
337
		struct Qdisc *rootq = qdisc_root(sch);
338
		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
339
		q->duplicate = 0;
340

341
		qdisc_enqueue_root(skb2, rootq);
342
		q->duplicate = dupsave;
343
	}
344

345
	/*
346
	 * Randomized packet corruption.
347
	 * Make copy if needed since we are modifying
348
	 * If packet is going to be hardware checksummed, then
349
	 * do it now in software before we mangle it.
350
	 */
351
	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
352
		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
353
		    (skb->ip_summed == CHECKSUM_PARTIAL &&
354
		     skb_checksum_help(skb))) {
355
			sch->qstats.drops++;
356
			return NET_XMIT_DROP;
357
		}
358

359
		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
360
	}
361

362
	cb = netem_skb_cb(skb);
363
	if (q->gap == 0 ||		/* not doing reordering */
364
	    q->counter < q->gap ||	/* inside last reordering gap */
365
	    q->reorder < get_crandom(&q->reorder_cor)) {
366
		psched_time_t now;
367
		psched_tdiff_t delay;
368

369
		delay = tabledist(q->latency, q->jitter,
370
				  &q->delay_cor, q->delay_dist);
371

372
		now = psched_get_time();
373
		cb->time_to_send = now + delay;
374
		++q->counter;
375
		ret = qdisc_enqueue(skb, q->qdisc);
376
	} else {
377
		/*
378
		 * Do re-ordering by putting one out of N packets at the front
379
		 * of the queue.
380
		 */
381
		cb->time_to_send = psched_get_time();
382
		q->counter = 0;
383

384
		__skb_queue_head(&q->qdisc->q, skb);
385
		q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
386
		q->qdisc->qstats.requeues++;
387
		ret = NET_XMIT_SUCCESS;
388
	}
389

390
	if (ret != NET_XMIT_SUCCESS) {
391
		if (net_xmit_drop_count(ret)) {
392
			sch->qstats.drops++;
393
			return ret;
394
		}
395
	}
396

397
	sch->q.qlen++;
398
	return NET_XMIT_SUCCESS;
399
}
400

401
static unsigned int netem_drop(struct Qdisc *sch)
402
{
403
	struct netem_sched_data *q = qdisc_priv(sch);
404
	unsigned int len = 0;
405

406
	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
407
		sch->q.qlen--;
408
		sch->qstats.drops++;
409
	}
410
	return len;
411
}
412

413
static struct sk_buff *netem_dequeue(struct Qdisc *sch)
414
{
415
	struct netem_sched_data *q = qdisc_priv(sch);
416
	struct sk_buff *skb;
417

418
	if (qdisc_is_throttled(sch))
419
		return NULL;
420

421
	skb = q->qdisc->ops->peek(q->qdisc);
422
	if (skb) {
423
		const struct netem_skb_cb *cb = netem_skb_cb(skb);
424
		psched_time_t now = psched_get_time();
425

426
		/* if more time remaining? */
427
		if (cb->time_to_send <= now) {
428
			skb = qdisc_dequeue_peeked(q->qdisc);
429
			if (unlikely(!skb))
430
				return NULL;
431

432
#ifdef CONFIG_NET_CLS_ACT
433
			/*
434
			 * If it's at ingress let's pretend the delay is
435
			 * from the network (tstamp will be updated).
436
			 */
437
			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
438
				skb->tstamp.tv64 = 0;
439
#endif
440

441
			sch->q.qlen--;
442
			qdisc_unthrottled(sch);
443
			qdisc_bstats_update(sch, skb);
444
			return skb;
445
		}
446

447
		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
448
	}
449

450
	return NULL;
451
}
452

453
static void netem_reset(struct Qdisc *sch)
454
{
455
	struct netem_sched_data *q = qdisc_priv(sch);
456

457
	qdisc_reset(q->qdisc);
458
	sch->q.qlen = 0;
459
	qdisc_watchdog_cancel(&q->watchdog);
460
}
461

462
static void dist_free(struct disttable *d)
463
{
464
	if (d) {
465
		if (is_vmalloc_addr(d))
466
			vfree(d);
467
		else
468
			kfree(d);
469
	}
470
}
471

472
/*
473
 * Distribution data is a variable size payload containing
474
 * signed 16 bit values.
475
 */
476
static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
477
{
478
	struct netem_sched_data *q = qdisc_priv(sch);
479
	size_t n = nla_len(attr)/sizeof(__s16);
480
	const __s16 *data = nla_data(attr);
481
	spinlock_t *root_lock;
482
	struct disttable *d;
483
	int i;
484
	size_t s;
485

486
	if (n > NETEM_DIST_MAX)
487
		return -EINVAL;
488

489
	s = sizeof(struct disttable) + n * sizeof(s16);
490
	d = kmalloc(s, GFP_KERNEL);
491
	if (!d)
492
		d = vmalloc(s);
493
	if (!d)
494
		return -ENOMEM;
495

496
	d->size = n;
497
	for (i = 0; i < n; i++)
498
		d->table[i] = data[i];
499

500
	root_lock = qdisc_root_sleeping_lock(sch);
501

502
	spin_lock_bh(root_lock);
503
	dist_free(q->delay_dist);
504
	q->delay_dist = d;
505
	spin_unlock_bh(root_lock);
506
	return 0;
507
}
508

509
static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
510
{
511
	struct netem_sched_data *q = qdisc_priv(sch);
512
	const struct tc_netem_corr *c = nla_data(attr);
513

514
	init_crandom(&q->delay_cor, c->delay_corr);
515
	init_crandom(&q->loss_cor, c->loss_corr);
516
	init_crandom(&q->dup_cor, c->dup_corr);
517
}
518

519
static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
520
{
521
	struct netem_sched_data *q = qdisc_priv(sch);
522
	const struct tc_netem_reorder *r = nla_data(attr);
523

524
	q->reorder = r->probability;
525
	init_crandom(&q->reorder_cor, r->correlation);
526
}
527

528
static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
529
{
530
	struct netem_sched_data *q = qdisc_priv(sch);
531
	const struct tc_netem_corrupt *r = nla_data(attr);
532

533
	q->corrupt = r->probability;
534
	init_crandom(&q->corrupt_cor, r->correlation);
535
}
536

537
static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
538
{
539
	struct netem_sched_data *q = qdisc_priv(sch);
540
	const struct nlattr *la;
541
	int rem;
542

543
	nla_for_each_nested(la, attr, rem) {
544
		u16 type = nla_type(la);
545

546
		switch(type) {
547
		case NETEM_LOSS_GI: {
548
			const struct tc_netem_gimodel *gi = nla_data(la);
549

550
			if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
551
				pr_info("netem: incorrect gi model size\n");
552
				return -EINVAL;
553
			}
554

555
			q->loss_model = CLG_4_STATES;
556

557
			q->clg.state = 1;
558
			q->clg.a1 = gi->p13;
559
			q->clg.a2 = gi->p31;
560
			q->clg.a3 = gi->p32;
561
			q->clg.a4 = gi->p14;
562
			q->clg.a5 = gi->p23;
563
			break;
564
		}
565

566
		case NETEM_LOSS_GE: {
567
			const struct tc_netem_gemodel *ge = nla_data(la);
568

569
			if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
570
				pr_info("netem: incorrect gi model size\n");
571
				return -EINVAL;
572
			}
573

574
			q->loss_model = CLG_GILB_ELL;
575
			q->clg.state = 1;
576
			q->clg.a1 = ge->p;
577
			q->clg.a2 = ge->r;
578
			q->clg.a3 = ge->h;
579
			q->clg.a4 = ge->k1;
580
			break;
581
		}
582

583
		default:
584
			pr_info("netem: unknown loss type %u\n", type);
585
			return -EINVAL;
586
		}
587
	}
588

589
	return 0;
590
}
591

592
static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
593
	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
594
	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
595
	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
596
	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
597
};
598

599
static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
600
		      const struct nla_policy *policy, int len)
601
{
602
	int nested_len = nla_len(nla) - NLA_ALIGN(len);
603

604
	if (nested_len < 0) {
605
		pr_info("netem: invalid attributes len %d\n", nested_len);
606
		return -EINVAL;
607
	}
608

609
	if (nested_len >= nla_attr_size(0))
610
		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
611
				 nested_len, policy);
612

613
	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
614
	return 0;
615
}
616

617
/* Parse netlink message to set options */
618
static int netem_change(struct Qdisc *sch, struct nlattr *opt)
619
{
620
	struct netem_sched_data *q = qdisc_priv(sch);
621
	struct nlattr *tb[TCA_NETEM_MAX + 1];
622
	struct tc_netem_qopt *qopt;
623
	int ret;
624

625
	if (opt == NULL)
626
		return -EINVAL;
627

628
	qopt = nla_data(opt);
629
	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
630
	if (ret < 0)
631
		return ret;
632

633
	ret = fifo_set_limit(q->qdisc, qopt->limit);
634
	if (ret) {
635
		pr_info("netem: can't set fifo limit\n");
636
		return ret;
637
	}
638

639
	q->latency = qopt->latency;
640
	q->jitter = qopt->jitter;
641
	q->limit = qopt->limit;
642
	q->gap = qopt->gap;
643
	q->counter = 0;
644
	q->loss = qopt->loss;
645
	q->duplicate = qopt->duplicate;
646

647
	/* for compatibility with earlier versions.
648
	 * if gap is set, need to assume 100% probability
649
	 */
650
	if (q->gap)
651
		q->reorder = ~0;
652

653
	if (tb[TCA_NETEM_CORR])
654
		get_correlation(sch, tb[TCA_NETEM_CORR]);
655

656
	if (tb[TCA_NETEM_DELAY_DIST]) {
657
		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
658
		if (ret)
659
			return ret;
660
	}
661

662
	if (tb[TCA_NETEM_REORDER])
663
		get_reorder(sch, tb[TCA_NETEM_REORDER]);
664

665
	if (tb[TCA_NETEM_CORRUPT])
666
		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
667

668
	q->loss_model = CLG_RANDOM;
669
	if (tb[TCA_NETEM_LOSS])
670
		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
671

672
	return ret;
673
}
674

675
/*
676
 * Special case version of FIFO queue for use by netem.
677
 * It queues in order based on timestamps in skb's
678
 */
679
struct fifo_sched_data {
680
	u32 limit;
681
	psched_time_t oldest;
682
};
683

684
static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
685
{
686
	struct fifo_sched_data *q = qdisc_priv(sch);
687
	struct sk_buff_head *list = &sch->q;
688
	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
689
	struct sk_buff *skb;
690

691
	if (likely(skb_queue_len(list) < q->limit)) {
692
		/* Optimize for add at tail */
693
		if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
694
			q->oldest = tnext;
695
			return qdisc_enqueue_tail(nskb, sch);
696
		}
697

698
		skb_queue_reverse_walk(list, skb) {
699
			const struct netem_skb_cb *cb = netem_skb_cb(skb);
700

701
			if (tnext >= cb->time_to_send)
702
				break;
703
		}
704

705
		__skb_queue_after(list, skb, nskb);
706

707
		sch->qstats.backlog += qdisc_pkt_len(nskb);
708

709
		return NET_XMIT_SUCCESS;
710
	}
711

712
	return qdisc_reshape_fail(nskb, sch);
713
}
714

715
static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
716
{
717
	struct fifo_sched_data *q = qdisc_priv(sch);
718

719
	if (opt) {
720
		struct tc_fifo_qopt *ctl = nla_data(opt);
721
		if (nla_len(opt) < sizeof(*ctl))
722
			return -EINVAL;
723

724
		q->limit = ctl->limit;
725
	} else
726
		q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
727

728
	q->oldest = PSCHED_PASTPERFECT;
729
	return 0;
730
}
731

732
static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
733
{
734
	struct fifo_sched_data *q = qdisc_priv(sch);
735
	struct tc_fifo_qopt opt = { .limit = q->limit };
736

737
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
738
	return skb->len;
739

740
nla_put_failure:
741
	return -1;
742
}
743

744
static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
745
	.id		=	"tfifo",
746
	.priv_size	=	sizeof(struct fifo_sched_data),
747
	.enqueue	=	tfifo_enqueue,
748
	.dequeue	=	qdisc_dequeue_head,
749
	.peek		=	qdisc_peek_head,
750
	.drop		=	qdisc_queue_drop,
751
	.init		=	tfifo_init,
752
	.reset		=	qdisc_reset_queue,
753
	.change		=	tfifo_init,
754
	.dump		=	tfifo_dump,
755
};
756

757
static int netem_init(struct Qdisc *sch, struct nlattr *opt)
758
{
759
	struct netem_sched_data *q = qdisc_priv(sch);
760
	int ret;
761

762
	if (!opt)
763
		return -EINVAL;
764

765
	qdisc_watchdog_init(&q->watchdog, sch);
766

767
	q->loss_model = CLG_RANDOM;
768
	q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
769
				     TC_H_MAKE(sch->handle, 1));
770
	if (!q->qdisc) {
771
		pr_notice("netem: qdisc create tfifo qdisc failed\n");
772
		return -ENOMEM;
773
	}
774

775
	ret = netem_change(sch, opt);
776
	if (ret) {
777
		pr_info("netem: change failed\n");
778
		qdisc_destroy(q->qdisc);
779
	}
780
	return ret;
781
}
782

783
static void netem_destroy(struct Qdisc *sch)
784
{
785
	struct netem_sched_data *q = qdisc_priv(sch);
786

787
	qdisc_watchdog_cancel(&q->watchdog);
788
	qdisc_destroy(q->qdisc);
789
	dist_free(q->delay_dist);
790
}
791

792
static int dump_loss_model(const struct netem_sched_data *q,
793
			   struct sk_buff *skb)
794
{
795
	struct nlattr *nest;
796

797
	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
798
	if (nest == NULL)
799
		goto nla_put_failure;
800

801
	switch (q->loss_model) {
802
	case CLG_RANDOM:
803
		/* legacy loss model */
804
		nla_nest_cancel(skb, nest);
805
		return 0;	/* no data */
806

807
	case CLG_4_STATES: {
808
		struct tc_netem_gimodel gi = {
809
			.p13 = q->clg.a1,
810
			.p31 = q->clg.a2,
811
			.p32 = q->clg.a3,
812
			.p14 = q->clg.a4,
813
			.p23 = q->clg.a5,
814
		};
815

816
		NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
817
		break;
818
	}
819
	case CLG_GILB_ELL: {
820
		struct tc_netem_gemodel ge = {
821
			.p = q->clg.a1,
822
			.r = q->clg.a2,
823
			.h = q->clg.a3,
824
			.k1 = q->clg.a4,
825
		};
826

827
		NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
828
		break;
829
	}
830
	}
831

832
	nla_nest_end(skb, nest);
833
	return 0;
834

835
nla_put_failure:
836
	nla_nest_cancel(skb, nest);
837
	return -1;
838
}
839

840
static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
841
{
842
	const struct netem_sched_data *q = qdisc_priv(sch);
843
	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
844
	struct tc_netem_qopt qopt;
845
	struct tc_netem_corr cor;
846
	struct tc_netem_reorder reorder;
847
	struct tc_netem_corrupt corrupt;
848

849
	qopt.latency = q->latency;
850
	qopt.jitter = q->jitter;
851
	qopt.limit = q->limit;
852
	qopt.loss = q->loss;
853
	qopt.gap = q->gap;
854
	qopt.duplicate = q->duplicate;
855
	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
856

857
	cor.delay_corr = q->delay_cor.rho;
858
	cor.loss_corr = q->loss_cor.rho;
859
	cor.dup_corr = q->dup_cor.rho;
860
	NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
861

862
	reorder.probability = q->reorder;
863
	reorder.correlation = q->reorder_cor.rho;
864
	NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
865

866
	corrupt.probability = q->corrupt;
867
	corrupt.correlation = q->corrupt_cor.rho;
868
	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
869

870
	if (dump_loss_model(q, skb) != 0)
871
		goto nla_put_failure;
872

873
	return nla_nest_end(skb, nla);
874

875
nla_put_failure:
876
	nlmsg_trim(skb, nla);
877
	return -1;
878
}
879

880
static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
881
			  struct sk_buff *skb, struct tcmsg *tcm)
882
{
883
	struct netem_sched_data *q = qdisc_priv(sch);
884

885
	if (cl != 1) 	/* only one class */
886
		return -ENOENT;
887

888
	tcm->tcm_handle |= TC_H_MIN(1);
889
	tcm->tcm_info = q->qdisc->handle;
890

891
	return 0;
892
}
893

894
static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
895
		     struct Qdisc **old)
896
{
897
	struct netem_sched_data *q = qdisc_priv(sch);
898

899
	if (new == NULL)
900
		new = &noop_qdisc;
901

902
	sch_tree_lock(sch);
903
	*old = q->qdisc;
904
	q->qdisc = new;
905
	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
906
	qdisc_reset(*old);
907
	sch_tree_unlock(sch);
908

909
	return 0;
910
}
911

912
static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
913
{
914
	struct netem_sched_data *q = qdisc_priv(sch);
915
	return q->qdisc;
916
}
917

918
static unsigned long netem_get(struct Qdisc *sch, u32 classid)
919
{
920
	return 1;
921
}
922

923
static void netem_put(struct Qdisc *sch, unsigned long arg)
924
{
925
}
926

927
static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
928
{
929
	if (!walker->stop) {
930
		if (walker->count >= walker->skip)
931
			if (walker->fn(sch, 1, walker) < 0) {
932
				walker->stop = 1;
933
				return;
934
			}
935
		walker->count++;
936
	}
937
}
938

939
static const struct Qdisc_class_ops netem_class_ops = {
940
	.graft		=	netem_graft,
941
	.leaf		=	netem_leaf,
942
	.get		=	netem_get,
943
	.put		=	netem_put,
944
	.walk		=	netem_walk,
945
	.dump		=	netem_dump_class,
946
};
947

948
static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
949
	.id		=	"netem",
950
	.cl_ops		=	&netem_class_ops,
951
	.priv_size	=	sizeof(struct netem_sched_data),
952
	.enqueue	=	netem_enqueue,
953
	.dequeue	=	netem_dequeue,
954
	.peek		=	qdisc_peek_dequeued,
955
	.drop		=	netem_drop,
956
	.init		=	netem_init,
957
	.reset		=	netem_reset,
958
	.destroy	=	netem_destroy,
959
	.change		=	netem_change,
960
	.dump		=	netem_dump,
961
	.owner		=	THIS_MODULE,
962
};
963

964

965
static int __init netem_module_init(void)
966
{
967
	pr_info("netem: version " VERSION "\n");
968
	return register_qdisc(&netem_qdisc_ops);
969
}
970
static void __exit netem_module_exit(void)
971
{
972
	unregister_qdisc(&netem_qdisc_ops);
973
}
974
module_init(netem_module_init)
975
module_exit(netem_module_exit)
976
MODULE_LICENSE("GPL");
977

978
Product

Resources

Company