Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/sched/cls_rsvp.h
15111 views
1
/*
2
* net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3
*
4
* This program is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU General Public License
6
* as published by the Free Software Foundation; either version
7
* 2 of the License, or (at your option) any later version.
8
*
9
* Authors: Alexey Kuznetsov, <[email protected]>
10
*/
11
12
/*
13
Comparing to general packet classification problem,
14
RSVP needs only sevaral relatively simple rules:
15
16
* (dst, protocol) are always specified,
17
so that we are able to hash them.
18
* src may be exact, or may be wildcard, so that
19
we can keep a hash table plus one wildcard entry.
20
* source port (or flow label) is important only if src is given.
21
22
IMPLEMENTATION.
23
24
We use a two level hash table: The top level is keyed by
25
destination address and protocol ID, every bucket contains a list
26
of "rsvp sessions", identified by destination address, protocol and
27
DPI(="Destination Port ID"): triple (key, mask, offset).
28
29
Every bucket has a smaller hash table keyed by source address
30
(cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31
Every bucket is again a list of "RSVP flows", selected by
32
source address and SPI(="Source Port ID" here rather than
33
"security parameter index"): triple (key, mask, offset).
34
35
36
NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37
and all fragmented packets go to the best-effort traffic class.
38
39
40
NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41
only one "Generalized Port Identifier". So that for classic
42
ah, esp (and udp,tcp) both *pi should coincide or one of them
43
should be wildcard.
44
45
At first sight, this redundancy is just a waste of CPU
46
resources. But DPI and SPI add the possibility to assign different
47
priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50
NOTE 3. One complication is the case of tunneled packets.
51
We implement it as following: if the first lookup
52
matches a special session with "tunnelhdr" value not zero,
53
flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54
In this case, we pull tunnelhdr bytes and restart lookup
55
with tunnel ID added to the list of keys. Simple and stupid 8)8)
56
It's enough for PIMREG and IPIP.
57
58
59
NOTE 4. Two GPIs make it possible to parse even GRE packets.
60
F.e. DPI can select ETH_P_IP (and necessary flags to make
61
tunnelhdr correct) in GRE protocol field and SPI matches
62
GRE key. Is it not nice? 8)8)
63
64
65
Well, as result, despite its simplicity, we get a pretty
66
powerful classification engine. */
67
68
69
struct rsvp_head {
70
u32 tmap[256/32];
71
u32 hgenerator;
72
u8 tgenerator;
73
struct rsvp_session *ht[256];
74
};
75
76
struct rsvp_session {
77
struct rsvp_session *next;
78
__be32 dst[RSVP_DST_LEN];
79
struct tc_rsvp_gpi dpi;
80
u8 protocol;
81
u8 tunnelid;
82
/* 16 (src,sport) hash slots, and one wildcard source slot */
83
struct rsvp_filter *ht[16 + 1];
84
};
85
86
87
struct rsvp_filter {
88
struct rsvp_filter *next;
89
__be32 src[RSVP_DST_LEN];
90
struct tc_rsvp_gpi spi;
91
u8 tunnelhdr;
92
93
struct tcf_result res;
94
struct tcf_exts exts;
95
96
u32 handle;
97
struct rsvp_session *sess;
98
};
99
100
static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
101
{
102
unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
103
104
h ^= h>>16;
105
h ^= h>>8;
106
return (h ^ protocol ^ tunnelid) & 0xFF;
107
}
108
109
static inline unsigned int hash_src(__be32 *src)
110
{
111
unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
112
113
h ^= h>>16;
114
h ^= h>>8;
115
h ^= h>>4;
116
return h & 0xF;
117
}
118
119
static struct tcf_ext_map rsvp_ext_map = {
120
.police = TCA_RSVP_POLICE,
121
.action = TCA_RSVP_ACT
122
};
123
124
#define RSVP_APPLY_RESULT() \
125
{ \
126
int r = tcf_exts_exec(skb, &f->exts, res); \
127
if (r < 0) \
128
continue; \
129
else if (r > 0) \
130
return r; \
131
}
132
133
static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
134
struct tcf_result *res)
135
{
136
struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
137
struct rsvp_session *s;
138
struct rsvp_filter *f;
139
unsigned int h1, h2;
140
__be32 *dst, *src;
141
u8 protocol;
142
u8 tunnelid = 0;
143
u8 *xprt;
144
#if RSVP_DST_LEN == 4
145
struct ipv6hdr *nhptr;
146
147
if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
148
return -1;
149
nhptr = ipv6_hdr(skb);
150
#else
151
struct iphdr *nhptr;
152
153
if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
154
return -1;
155
nhptr = ip_hdr(skb);
156
#endif
157
158
restart:
159
160
#if RSVP_DST_LEN == 4
161
src = &nhptr->saddr.s6_addr32[0];
162
dst = &nhptr->daddr.s6_addr32[0];
163
protocol = nhptr->nexthdr;
164
xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
165
#else
166
src = &nhptr->saddr;
167
dst = &nhptr->daddr;
168
protocol = nhptr->protocol;
169
xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
170
if (nhptr->frag_off & htons(IP_MF | IP_OFFSET))
171
return -1;
172
#endif
173
174
h1 = hash_dst(dst, protocol, tunnelid);
175
h2 = hash_src(src);
176
177
for (s = sht[h1]; s; s = s->next) {
178
if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
179
protocol == s->protocol &&
180
!(s->dpi.mask &
181
(*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
182
#if RSVP_DST_LEN == 4
183
dst[0] == s->dst[0] &&
184
dst[1] == s->dst[1] &&
185
dst[2] == s->dst[2] &&
186
#endif
187
tunnelid == s->tunnelid) {
188
189
for (f = s->ht[h2]; f; f = f->next) {
190
if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
191
!(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
192
#if RSVP_DST_LEN == 4
193
&&
194
src[0] == f->src[0] &&
195
src[1] == f->src[1] &&
196
src[2] == f->src[2]
197
#endif
198
) {
199
*res = f->res;
200
RSVP_APPLY_RESULT();
201
202
matched:
203
if (f->tunnelhdr == 0)
204
return 0;
205
206
tunnelid = f->res.classid;
207
nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
208
goto restart;
209
}
210
}
211
212
/* And wildcard bucket... */
213
for (f = s->ht[16]; f; f = f->next) {
214
*res = f->res;
215
RSVP_APPLY_RESULT();
216
goto matched;
217
}
218
return -1;
219
}
220
}
221
return -1;
222
}
223
224
static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
225
{
226
struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
227
struct rsvp_session *s;
228
struct rsvp_filter *f;
229
unsigned int h1 = handle & 0xFF;
230
unsigned int h2 = (handle >> 8) & 0xFF;
231
232
if (h2 > 16)
233
return 0;
234
235
for (s = sht[h1]; s; s = s->next) {
236
for (f = s->ht[h2]; f; f = f->next) {
237
if (f->handle == handle)
238
return (unsigned long)f;
239
}
240
}
241
return 0;
242
}
243
244
static void rsvp_put(struct tcf_proto *tp, unsigned long f)
245
{
246
}
247
248
static int rsvp_init(struct tcf_proto *tp)
249
{
250
struct rsvp_head *data;
251
252
data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
253
if (data) {
254
tp->root = data;
255
return 0;
256
}
257
return -ENOBUFS;
258
}
259
260
static void
261
rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
262
{
263
tcf_unbind_filter(tp, &f->res);
264
tcf_exts_destroy(tp, &f->exts);
265
kfree(f);
266
}
267
268
static void rsvp_destroy(struct tcf_proto *tp)
269
{
270
struct rsvp_head *data = xchg(&tp->root, NULL);
271
struct rsvp_session **sht;
272
int h1, h2;
273
274
if (data == NULL)
275
return;
276
277
sht = data->ht;
278
279
for (h1 = 0; h1 < 256; h1++) {
280
struct rsvp_session *s;
281
282
while ((s = sht[h1]) != NULL) {
283
sht[h1] = s->next;
284
285
for (h2 = 0; h2 <= 16; h2++) {
286
struct rsvp_filter *f;
287
288
while ((f = s->ht[h2]) != NULL) {
289
s->ht[h2] = f->next;
290
rsvp_delete_filter(tp, f);
291
}
292
}
293
kfree(s);
294
}
295
}
296
kfree(data);
297
}
298
299
static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
300
{
301
struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
302
unsigned int h = f->handle;
303
struct rsvp_session **sp;
304
struct rsvp_session *s = f->sess;
305
int i;
306
307
for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
308
if (*fp == f) {
309
tcf_tree_lock(tp);
310
*fp = f->next;
311
tcf_tree_unlock(tp);
312
rsvp_delete_filter(tp, f);
313
314
/* Strip tree */
315
316
for (i = 0; i <= 16; i++)
317
if (s->ht[i])
318
return 0;
319
320
/* OK, session has no flows */
321
for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
322
*sp; sp = &(*sp)->next) {
323
if (*sp == s) {
324
tcf_tree_lock(tp);
325
*sp = s->next;
326
tcf_tree_unlock(tp);
327
328
kfree(s);
329
return 0;
330
}
331
}
332
333
return 0;
334
}
335
}
336
return 0;
337
}
338
339
static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
340
{
341
struct rsvp_head *data = tp->root;
342
int i = 0xFFFF;
343
344
while (i-- > 0) {
345
u32 h;
346
347
if ((data->hgenerator += 0x10000) == 0)
348
data->hgenerator = 0x10000;
349
h = data->hgenerator|salt;
350
if (rsvp_get(tp, h) == 0)
351
return h;
352
}
353
return 0;
354
}
355
356
static int tunnel_bts(struct rsvp_head *data)
357
{
358
int n = data->tgenerator >> 5;
359
u32 b = 1 << (data->tgenerator & 0x1F);
360
361
if (data->tmap[n] & b)
362
return 0;
363
data->tmap[n] |= b;
364
return 1;
365
}
366
367
static void tunnel_recycle(struct rsvp_head *data)
368
{
369
struct rsvp_session **sht = data->ht;
370
u32 tmap[256/32];
371
int h1, h2;
372
373
memset(tmap, 0, sizeof(tmap));
374
375
for (h1 = 0; h1 < 256; h1++) {
376
struct rsvp_session *s;
377
for (s = sht[h1]; s; s = s->next) {
378
for (h2 = 0; h2 <= 16; h2++) {
379
struct rsvp_filter *f;
380
381
for (f = s->ht[h2]; f; f = f->next) {
382
if (f->tunnelhdr == 0)
383
continue;
384
data->tgenerator = f->res.classid;
385
tunnel_bts(data);
386
}
387
}
388
}
389
}
390
391
memcpy(data->tmap, tmap, sizeof(tmap));
392
}
393
394
static u32 gen_tunnel(struct rsvp_head *data)
395
{
396
int i, k;
397
398
for (k = 0; k < 2; k++) {
399
for (i = 255; i > 0; i--) {
400
if (++data->tgenerator == 0)
401
data->tgenerator = 1;
402
if (tunnel_bts(data))
403
return data->tgenerator;
404
}
405
tunnel_recycle(data);
406
}
407
return 0;
408
}
409
410
static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
411
[TCA_RSVP_CLASSID] = { .type = NLA_U32 },
412
[TCA_RSVP_DST] = { .type = NLA_BINARY,
413
.len = RSVP_DST_LEN * sizeof(u32) },
414
[TCA_RSVP_SRC] = { .type = NLA_BINARY,
415
.len = RSVP_DST_LEN * sizeof(u32) },
416
[TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
417
};
418
419
static int rsvp_change(struct tcf_proto *tp, unsigned long base,
420
u32 handle,
421
struct nlattr **tca,
422
unsigned long *arg)
423
{
424
struct rsvp_head *data = tp->root;
425
struct rsvp_filter *f, **fp;
426
struct rsvp_session *s, **sp;
427
struct tc_rsvp_pinfo *pinfo = NULL;
428
struct nlattr *opt = tca[TCA_OPTIONS-1];
429
struct nlattr *tb[TCA_RSVP_MAX + 1];
430
struct tcf_exts e;
431
unsigned int h1, h2;
432
__be32 *dst;
433
int err;
434
435
if (opt == NULL)
436
return handle ? -EINVAL : 0;
437
438
err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
439
if (err < 0)
440
return err;
441
442
err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
443
if (err < 0)
444
return err;
445
446
f = (struct rsvp_filter *)*arg;
447
if (f) {
448
/* Node exists: adjust only classid */
449
450
if (f->handle != handle && handle)
451
goto errout2;
452
if (tb[TCA_RSVP_CLASSID-1]) {
453
f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
454
tcf_bind_filter(tp, &f->res, base);
455
}
456
457
tcf_exts_change(tp, &f->exts, &e);
458
return 0;
459
}
460
461
/* Now more serious part... */
462
err = -EINVAL;
463
if (handle)
464
goto errout2;
465
if (tb[TCA_RSVP_DST-1] == NULL)
466
goto errout2;
467
468
err = -ENOBUFS;
469
f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
470
if (f == NULL)
471
goto errout2;
472
473
h2 = 16;
474
if (tb[TCA_RSVP_SRC-1]) {
475
memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
476
h2 = hash_src(f->src);
477
}
478
if (tb[TCA_RSVP_PINFO-1]) {
479
pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
480
f->spi = pinfo->spi;
481
f->tunnelhdr = pinfo->tunnelhdr;
482
}
483
if (tb[TCA_RSVP_CLASSID-1])
484
f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
485
486
dst = nla_data(tb[TCA_RSVP_DST-1]);
487
h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
488
489
err = -ENOMEM;
490
if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
491
goto errout;
492
493
if (f->tunnelhdr) {
494
err = -EINVAL;
495
if (f->res.classid > 255)
496
goto errout;
497
498
err = -ENOMEM;
499
if (f->res.classid == 0 &&
500
(f->res.classid = gen_tunnel(data)) == 0)
501
goto errout;
502
}
503
504
for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
505
if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
506
pinfo && pinfo->protocol == s->protocol &&
507
memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
508
#if RSVP_DST_LEN == 4
509
dst[0] == s->dst[0] &&
510
dst[1] == s->dst[1] &&
511
dst[2] == s->dst[2] &&
512
#endif
513
pinfo->tunnelid == s->tunnelid) {
514
515
insert:
516
/* OK, we found appropriate session */
517
518
fp = &s->ht[h2];
519
520
f->sess = s;
521
if (f->tunnelhdr == 0)
522
tcf_bind_filter(tp, &f->res, base);
523
524
tcf_exts_change(tp, &f->exts, &e);
525
526
for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
527
if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
528
break;
529
f->next = *fp;
530
wmb();
531
*fp = f;
532
533
*arg = (unsigned long)f;
534
return 0;
535
}
536
}
537
538
/* No session found. Create new one. */
539
540
err = -ENOBUFS;
541
s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
542
if (s == NULL)
543
goto errout;
544
memcpy(s->dst, dst, sizeof(s->dst));
545
546
if (pinfo) {
547
s->dpi = pinfo->dpi;
548
s->protocol = pinfo->protocol;
549
s->tunnelid = pinfo->tunnelid;
550
}
551
for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
552
if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
553
break;
554
}
555
s->next = *sp;
556
wmb();
557
*sp = s;
558
559
goto insert;
560
561
errout:
562
kfree(f);
563
errout2:
564
tcf_exts_destroy(tp, &e);
565
return err;
566
}
567
568
static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
569
{
570
struct rsvp_head *head = tp->root;
571
unsigned int h, h1;
572
573
if (arg->stop)
574
return;
575
576
for (h = 0; h < 256; h++) {
577
struct rsvp_session *s;
578
579
for (s = head->ht[h]; s; s = s->next) {
580
for (h1 = 0; h1 <= 16; h1++) {
581
struct rsvp_filter *f;
582
583
for (f = s->ht[h1]; f; f = f->next) {
584
if (arg->count < arg->skip) {
585
arg->count++;
586
continue;
587
}
588
if (arg->fn(tp, (unsigned long)f, arg) < 0) {
589
arg->stop = 1;
590
return;
591
}
592
arg->count++;
593
}
594
}
595
}
596
}
597
}
598
599
static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
600
struct sk_buff *skb, struct tcmsg *t)
601
{
602
struct rsvp_filter *f = (struct rsvp_filter *)fh;
603
struct rsvp_session *s;
604
unsigned char *b = skb_tail_pointer(skb);
605
struct nlattr *nest;
606
struct tc_rsvp_pinfo pinfo;
607
608
if (f == NULL)
609
return skb->len;
610
s = f->sess;
611
612
t->tcm_handle = f->handle;
613
614
nest = nla_nest_start(skb, TCA_OPTIONS);
615
if (nest == NULL)
616
goto nla_put_failure;
617
618
NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
619
pinfo.dpi = s->dpi;
620
pinfo.spi = f->spi;
621
pinfo.protocol = s->protocol;
622
pinfo.tunnelid = s->tunnelid;
623
pinfo.tunnelhdr = f->tunnelhdr;
624
pinfo.pad = 0;
625
NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
626
if (f->res.classid)
627
NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
628
if (((f->handle >> 8) & 0xFF) != 16)
629
NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
630
631
if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
632
goto nla_put_failure;
633
634
nla_nest_end(skb, nest);
635
636
if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
637
goto nla_put_failure;
638
return skb->len;
639
640
nla_put_failure:
641
nlmsg_trim(skb, b);
642
return -1;
643
}
644
645
static struct tcf_proto_ops RSVP_OPS = {
646
.next = NULL,
647
.kind = RSVP_ID,
648
.classify = rsvp_classify,
649
.init = rsvp_init,
650
.destroy = rsvp_destroy,
651
.get = rsvp_get,
652
.put = rsvp_put,
653
.change = rsvp_change,
654
.delete = rsvp_delete,
655
.walk = rsvp_walk,
656
.dump = rsvp_dump,
657
.owner = THIS_MODULE,
658
};
659
660
static int __init init_rsvp(void)
661
{
662
return register_tcf_proto_ops(&RSVP_OPS);
663
}
664
665
static void __exit exit_rsvp(void)
666
{
667
unregister_tcf_proto_ops(&RSVP_OPS);
668
}
669
670
module_init(init_rsvp)
671
module_exit(exit_rsvp)
672
673