Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/ipv4/fib_rules.c
26282 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* INET An implementation of the TCP/IP protocol suite for the LINUX
4
* operating system. INET is implemented using the BSD Socket
5
* interface as the means of communication with the user level.
6
*
7
* IPv4 Forwarding Information Base: policy rules.
8
*
9
* Authors: Alexey Kuznetsov, <[email protected]>
10
* Thomas Graf <[email protected]>
11
*
12
* Fixes:
13
* Rani Assaf : local_rule cannot be deleted
14
* Marc Boucher : routing by fwmark
15
*/
16
17
#include <linux/types.h>
18
#include <linux/kernel.h>
19
#include <linux/netdevice.h>
20
#include <linux/netlink.h>
21
#include <linux/inetdevice.h>
22
#include <linux/init.h>
23
#include <linux/list.h>
24
#include <linux/rcupdate.h>
25
#include <linux/export.h>
26
#include <net/inet_dscp.h>
27
#include <net/ip.h>
28
#include <net/route.h>
29
#include <net/tcp.h>
30
#include <net/ip_fib.h>
31
#include <net/nexthop.h>
32
#include <net/fib_rules.h>
33
#include <linux/indirect_call_wrapper.h>
34
35
struct fib4_rule {
36
struct fib_rule common;
37
u8 dst_len;
38
u8 src_len;
39
dscp_t dscp;
40
dscp_t dscp_mask;
41
u8 dscp_full:1; /* DSCP or TOS selector */
42
__be32 src;
43
__be32 srcmask;
44
__be32 dst;
45
__be32 dstmask;
46
#ifdef CONFIG_IP_ROUTE_CLASSID
47
u32 tclassid;
48
#endif
49
};
50
51
static bool fib4_rule_matchall(const struct fib_rule *rule)
52
{
53
struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
54
55
if (r->dst_len || r->src_len || r->dscp)
56
return false;
57
return fib_rule_matchall(rule);
58
}
59
60
bool fib4_rule_default(const struct fib_rule *rule)
61
{
62
if (!fib4_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
63
rule->l3mdev)
64
return false;
65
if (rule->table != RT_TABLE_LOCAL && rule->table != RT_TABLE_MAIN &&
66
rule->table != RT_TABLE_DEFAULT)
67
return false;
68
return true;
69
}
70
EXPORT_SYMBOL_GPL(fib4_rule_default);
71
72
int fib4_rules_dump(struct net *net, struct notifier_block *nb,
73
struct netlink_ext_ack *extack)
74
{
75
return fib_rules_dump(net, nb, AF_INET, extack);
76
}
77
78
unsigned int fib4_rules_seq_read(const struct net *net)
79
{
80
return fib_rules_seq_read(net, AF_INET);
81
}
82
83
int __fib_lookup(struct net *net, struct flowi4 *flp,
84
struct fib_result *res, unsigned int flags)
85
{
86
struct fib_lookup_arg arg = {
87
.result = res,
88
.flags = flags,
89
};
90
int err;
91
92
/* update flow if oif or iif point to device enslaved to l3mdev */
93
l3mdev_update_flow(net, flowi4_to_flowi(flp));
94
95
err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg);
96
#ifdef CONFIG_IP_ROUTE_CLASSID
97
if (arg.rule)
98
res->tclassid = ((struct fib4_rule *)arg.rule)->tclassid;
99
else
100
res->tclassid = 0;
101
#endif
102
103
if (err == -ESRCH)
104
err = -ENETUNREACH;
105
106
return err;
107
}
108
EXPORT_SYMBOL_GPL(__fib_lookup);
109
110
INDIRECT_CALLABLE_SCOPE int fib4_rule_action(struct fib_rule *rule,
111
struct flowi *flp, int flags,
112
struct fib_lookup_arg *arg)
113
{
114
int err = -EAGAIN;
115
struct fib_table *tbl;
116
u32 tb_id;
117
118
switch (rule->action) {
119
case FR_ACT_TO_TBL:
120
break;
121
122
case FR_ACT_UNREACHABLE:
123
return -ENETUNREACH;
124
125
case FR_ACT_PROHIBIT:
126
return -EACCES;
127
128
case FR_ACT_BLACKHOLE:
129
default:
130
return -EINVAL;
131
}
132
133
rcu_read_lock();
134
135
tb_id = fib_rule_get_table(rule, arg);
136
tbl = fib_get_table(rule->fr_net, tb_id);
137
if (tbl)
138
err = fib_table_lookup(tbl, &flp->u.ip4,
139
(struct fib_result *)arg->result,
140
arg->flags);
141
142
rcu_read_unlock();
143
return err;
144
}
145
146
INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule,
147
int flags,
148
struct fib_lookup_arg *arg)
149
{
150
struct fib_result *result = arg->result;
151
struct net_device *dev = NULL;
152
153
if (result->fi) {
154
struct fib_nh_common *nhc = fib_info_nhc(result->fi, 0);
155
156
dev = nhc->nhc_dev;
157
}
158
159
/* do not accept result if the route does
160
* not meet the required prefix length
161
*/
162
if (result->prefixlen <= rule->suppress_prefixlen)
163
goto suppress_route;
164
165
/* do not accept result if the route uses a device
166
* belonging to a forbidden interface group
167
*/
168
if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup)
169
goto suppress_route;
170
171
return false;
172
173
suppress_route:
174
if (!(arg->flags & FIB_LOOKUP_NOREF))
175
fib_info_put(result->fi);
176
return true;
177
}
178
179
INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
180
struct flowi *fl, int flags)
181
{
182
struct fib4_rule *r = (struct fib4_rule *) rule;
183
struct flowi4 *fl4 = &fl->u.ip4;
184
__be32 daddr = fl4->daddr;
185
__be32 saddr = fl4->saddr;
186
187
if (((saddr ^ r->src) & r->srcmask) ||
188
((daddr ^ r->dst) & r->dstmask))
189
return 0;
190
191
/* When DSCP selector is used we need to match on the entire DSCP field
192
* in the flow information structure. When TOS selector is used we need
193
* to mask the upper three DSCP bits prior to matching to maintain
194
* legacy behavior.
195
*/
196
if (r->dscp_full &&
197
(r->dscp ^ inet_dsfield_to_dscp(fl4->flowi4_tos)) & r->dscp_mask)
198
return 0;
199
else if (!r->dscp_full && r->dscp &&
200
!fib_dscp_masked_match(r->dscp, fl4))
201
return 0;
202
203
if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
204
return 0;
205
206
if (!fib_rule_port_match(&rule->sport_range, rule->sport_mask,
207
fl4->fl4_sport))
208
return 0;
209
210
if (!fib_rule_port_match(&rule->dport_range, rule->dport_mask,
211
fl4->fl4_dport))
212
return 0;
213
214
return 1;
215
}
216
217
static struct fib_table *fib_empty_table(struct net *net)
218
{
219
u32 id = 1;
220
221
while (1) {
222
if (!fib_get_table(net, id))
223
return fib_new_table(net, id);
224
225
if (id++ == RT_TABLE_MAX)
226
break;
227
}
228
return NULL;
229
}
230
231
static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4,
232
struct netlink_ext_ack *extack)
233
{
234
if (rule4->dscp) {
235
NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
236
return -EINVAL;
237
}
238
239
rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
240
rule4->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK);
241
rule4->dscp_full = true;
242
243
return 0;
244
}
245
246
static int fib4_nl2rule_dscp_mask(const struct nlattr *nla,
247
struct fib4_rule *rule4,
248
struct netlink_ext_ack *extack)
249
{
250
dscp_t dscp_mask;
251
252
if (!rule4->dscp_full) {
253
NL_SET_ERR_MSG_ATTR(extack, nla,
254
"Cannot specify DSCP mask without DSCP value");
255
return -EINVAL;
256
}
257
258
dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
259
if (rule4->dscp & ~dscp_mask) {
260
NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask");
261
return -EINVAL;
262
}
263
264
rule4->dscp_mask = dscp_mask;
265
266
return 0;
267
}
268
269
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
270
struct fib_rule_hdr *frh,
271
struct nlattr **tb,
272
struct netlink_ext_ack *extack)
273
{
274
struct fib4_rule *rule4 = (struct fib4_rule *)rule;
275
struct net *net = rule->fr_net;
276
int err = -EINVAL;
277
278
if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) {
279
NL_SET_ERR_MSG(extack,
280
"Flow label cannot be specified for IPv4 FIB rules");
281
goto errout;
282
}
283
284
if (!inet_validate_dscp(frh->tos)) {
285
NL_SET_ERR_MSG(extack,
286
"Invalid dsfield (tos): ECN bits must be 0");
287
goto errout;
288
}
289
/* IPv4 currently doesn't handle high order DSCP bits correctly */
290
if (frh->tos & ~IPTOS_TOS_MASK) {
291
NL_SET_ERR_MSG(extack, "Invalid tos");
292
goto errout;
293
}
294
rule4->dscp = inet_dsfield_to_dscp(frh->tos);
295
296
if (tb[FRA_DSCP] &&
297
fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0)
298
goto errout;
299
300
if (tb[FRA_DSCP_MASK] &&
301
fib4_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule4, extack) < 0)
302
goto errout;
303
304
/* split local/main if they are not already split */
305
err = fib_unmerge(net);
306
if (err)
307
goto errout;
308
309
if (rule->table == RT_TABLE_UNSPEC && !rule->l3mdev) {
310
if (rule->action == FR_ACT_TO_TBL) {
311
struct fib_table *table;
312
313
table = fib_empty_table(net);
314
if (!table) {
315
err = -ENOBUFS;
316
goto errout;
317
}
318
319
rule->table = table->tb_id;
320
}
321
}
322
323
if (frh->src_len)
324
rule4->src = nla_get_in_addr(tb[FRA_SRC]);
325
326
if (frh->dst_len)
327
rule4->dst = nla_get_in_addr(tb[FRA_DST]);
328
329
#ifdef CONFIG_IP_ROUTE_CLASSID
330
if (tb[FRA_FLOW]) {
331
rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
332
if (rule4->tclassid)
333
atomic_inc(&net->ipv4.fib_num_tclassid_users);
334
}
335
#endif
336
337
if (fib_rule_requires_fldissect(rule))
338
net->ipv4.fib_rules_require_fldissect++;
339
340
rule4->src_len = frh->src_len;
341
rule4->srcmask = inet_make_mask(rule4->src_len);
342
rule4->dst_len = frh->dst_len;
343
rule4->dstmask = inet_make_mask(rule4->dst_len);
344
345
net->ipv4.fib_has_custom_rules = true;
346
347
err = 0;
348
errout:
349
return err;
350
}
351
352
static int fib4_rule_delete(struct fib_rule *rule)
353
{
354
struct net *net = rule->fr_net;
355
int err;
356
357
/* split local/main if they are not already split */
358
err = fib_unmerge(net);
359
if (err)
360
goto errout;
361
362
#ifdef CONFIG_IP_ROUTE_CLASSID
363
if (((struct fib4_rule *)rule)->tclassid)
364
atomic_dec(&net->ipv4.fib_num_tclassid_users);
365
#endif
366
net->ipv4.fib_has_custom_rules = true;
367
368
if (net->ipv4.fib_rules_require_fldissect &&
369
fib_rule_requires_fldissect(rule))
370
net->ipv4.fib_rules_require_fldissect--;
371
errout:
372
return err;
373
}
374
375
static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
376
struct nlattr **tb)
377
{
378
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
379
380
if (frh->src_len && (rule4->src_len != frh->src_len))
381
return 0;
382
383
if (frh->dst_len && (rule4->dst_len != frh->dst_len))
384
return 0;
385
386
if (frh->tos &&
387
(rule4->dscp_full ||
388
inet_dscp_to_dsfield(rule4->dscp) != frh->tos))
389
return 0;
390
391
if (tb[FRA_DSCP]) {
392
dscp_t dscp;
393
394
dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
395
if (!rule4->dscp_full || rule4->dscp != dscp)
396
return 0;
397
}
398
399
if (tb[FRA_DSCP_MASK]) {
400
dscp_t dscp_mask;
401
402
dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2);
403
if (!rule4->dscp_full || rule4->dscp_mask != dscp_mask)
404
return 0;
405
}
406
407
#ifdef CONFIG_IP_ROUTE_CLASSID
408
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
409
return 0;
410
#endif
411
412
if (frh->src_len && (rule4->src != nla_get_in_addr(tb[FRA_SRC])))
413
return 0;
414
415
if (frh->dst_len && (rule4->dst != nla_get_in_addr(tb[FRA_DST])))
416
return 0;
417
418
return 1;
419
}
420
421
static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
422
struct fib_rule_hdr *frh)
423
{
424
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
425
426
frh->dst_len = rule4->dst_len;
427
frh->src_len = rule4->src_len;
428
429
if (rule4->dscp_full) {
430
frh->tos = 0;
431
if (nla_put_u8(skb, FRA_DSCP,
432
inet_dscp_to_dsfield(rule4->dscp) >> 2) ||
433
nla_put_u8(skb, FRA_DSCP_MASK,
434
inet_dscp_to_dsfield(rule4->dscp_mask) >> 2))
435
goto nla_put_failure;
436
} else {
437
frh->tos = inet_dscp_to_dsfield(rule4->dscp);
438
}
439
440
if ((rule4->dst_len &&
441
nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
442
(rule4->src_len &&
443
nla_put_in_addr(skb, FRA_SRC, rule4->src)))
444
goto nla_put_failure;
445
#ifdef CONFIG_IP_ROUTE_CLASSID
446
if (rule4->tclassid &&
447
nla_put_u32(skb, FRA_FLOW, rule4->tclassid))
448
goto nla_put_failure;
449
#endif
450
return 0;
451
452
nla_put_failure:
453
return -ENOBUFS;
454
}
455
456
static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
457
{
458
return nla_total_size(4) /* dst */
459
+ nla_total_size(4) /* src */
460
+ nla_total_size(4) /* flow */
461
+ nla_total_size(1) /* dscp */
462
+ nla_total_size(1); /* dscp mask */
463
}
464
465
static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
466
{
467
rt_cache_flush(ops->fro_net);
468
}
469
470
static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = {
471
.family = AF_INET,
472
.rule_size = sizeof(struct fib4_rule),
473
.addr_size = sizeof(u32),
474
.action = fib4_rule_action,
475
.suppress = fib4_rule_suppress,
476
.match = fib4_rule_match,
477
.configure = fib4_rule_configure,
478
.delete = fib4_rule_delete,
479
.compare = fib4_rule_compare,
480
.fill = fib4_rule_fill,
481
.nlmsg_payload = fib4_rule_nlmsg_payload,
482
.flush_cache = fib4_rule_flush_cache,
483
.nlgroup = RTNLGRP_IPV4_RULE,
484
.owner = THIS_MODULE,
485
};
486
487
static int fib_default_rules_init(struct fib_rules_ops *ops)
488
{
489
int err;
490
491
err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL);
492
if (err < 0)
493
return err;
494
err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN);
495
if (err < 0)
496
return err;
497
err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT);
498
if (err < 0)
499
return err;
500
return 0;
501
}
502
503
int __net_init fib4_rules_init(struct net *net)
504
{
505
int err;
506
struct fib_rules_ops *ops;
507
508
ops = fib_rules_register(&fib4_rules_ops_template, net);
509
if (IS_ERR(ops))
510
return PTR_ERR(ops);
511
512
err = fib_default_rules_init(ops);
513
if (err < 0)
514
goto fail;
515
net->ipv4.rules_ops = ops;
516
net->ipv4.fib_has_custom_rules = false;
517
net->ipv4.fib_rules_require_fldissect = 0;
518
return 0;
519
520
fail:
521
/* also cleans all rules already added */
522
fib_rules_unregister(ops);
523
return err;
524
}
525
526
void __net_exit fib4_rules_exit(struct net *net)
527
{
528
fib_rules_unregister(net->ipv4.rules_ops);
529
}
530
531