Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/samples/bpf/sockex3_kern.c
25924 views
1
/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
2
*
3
* This program is free software; you can redistribute it and/or
4
* modify it under the terms of version 2 of the GNU General Public
5
* License as published by the Free Software Foundation.
6
*/
7
#include <uapi/linux/bpf.h>
8
#include <uapi/linux/in.h>
9
#include <uapi/linux/if.h>
10
#include <uapi/linux/if_ether.h>
11
#include <uapi/linux/ip.h>
12
#include <uapi/linux/ipv6.h>
13
#include <uapi/linux/if_tunnel.h>
14
#include <uapi/linux/mpls.h>
15
#include <bpf/bpf_helpers.h>
16
#include "bpf_legacy.h"
17
#define IP_MF 0x2000
18
#define IP_OFFSET 0x1FFF
19
20
#define PARSE_VLAN 1
21
#define PARSE_MPLS 2
22
#define PARSE_IP 3
23
#define PARSE_IPV6 4
24
25
struct vlan_hdr {
26
__be16 h_vlan_TCI;
27
__be16 h_vlan_encapsulated_proto;
28
};
29
30
struct flow_key_record {
31
__be32 src;
32
__be32 dst;
33
union {
34
__be32 ports;
35
__be16 port16[2];
36
};
37
__u32 ip_proto;
38
};
39
40
static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto);
41
42
static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
43
{
44
return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
45
& (IP_MF | IP_OFFSET);
46
}
47
48
static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
49
{
50
__u64 w0 = load_word(ctx, off);
51
__u64 w1 = load_word(ctx, off + 4);
52
__u64 w2 = load_word(ctx, off + 8);
53
__u64 w3 = load_word(ctx, off + 12);
54
55
return (__u32)(w0 ^ w1 ^ w2 ^ w3);
56
}
57
58
struct globals {
59
struct flow_key_record flow;
60
};
61
62
struct {
63
__uint(type, BPF_MAP_TYPE_ARRAY);
64
__type(key, __u32);
65
__type(value, struct globals);
66
__uint(max_entries, 32);
67
} percpu_map SEC(".maps");
68
69
/* user poor man's per_cpu until native support is ready */
70
static struct globals *this_cpu_globals(void)
71
{
72
u32 key = bpf_get_smp_processor_id();
73
74
return bpf_map_lookup_elem(&percpu_map, &key);
75
}
76
77
/* some simple stats for user space consumption */
78
struct pair {
79
__u64 packets;
80
__u64 bytes;
81
};
82
83
struct {
84
__uint(type, BPF_MAP_TYPE_HASH);
85
__type(key, struct flow_key_record);
86
__type(value, struct pair);
87
__uint(max_entries, 1024);
88
} hash_map SEC(".maps");
89
90
static void update_stats(struct __sk_buff *skb, struct globals *g)
91
{
92
struct flow_key_record key = g->flow;
93
struct pair *value;
94
95
value = bpf_map_lookup_elem(&hash_map, &key);
96
if (value) {
97
__sync_fetch_and_add(&value->packets, 1);
98
__sync_fetch_and_add(&value->bytes, skb->len);
99
} else {
100
struct pair val = {1, skb->len};
101
102
bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
103
}
104
}
105
106
static __always_inline void parse_ip_proto(struct __sk_buff *skb,
107
struct globals *g, __u32 ip_proto)
108
{
109
__u32 nhoff = skb->cb[0];
110
int poff;
111
112
switch (ip_proto) {
113
case IPPROTO_GRE: {
114
struct gre_hdr {
115
__be16 flags;
116
__be16 proto;
117
};
118
119
__u32 gre_flags = load_half(skb,
120
nhoff + offsetof(struct gre_hdr, flags));
121
__u32 gre_proto = load_half(skb,
122
nhoff + offsetof(struct gre_hdr, proto));
123
124
if (gre_flags & (GRE_VERSION|GRE_ROUTING))
125
break;
126
127
nhoff += 4;
128
if (gre_flags & GRE_CSUM)
129
nhoff += 4;
130
if (gre_flags & GRE_KEY)
131
nhoff += 4;
132
if (gre_flags & GRE_SEQ)
133
nhoff += 4;
134
135
skb->cb[0] = nhoff;
136
parse_eth_proto(skb, gre_proto);
137
break;
138
}
139
case IPPROTO_IPIP:
140
parse_eth_proto(skb, ETH_P_IP);
141
break;
142
case IPPROTO_IPV6:
143
parse_eth_proto(skb, ETH_P_IPV6);
144
break;
145
case IPPROTO_TCP:
146
case IPPROTO_UDP:
147
g->flow.ports = load_word(skb, nhoff);
148
case IPPROTO_ICMP:
149
g->flow.ip_proto = ip_proto;
150
update_stats(skb, g);
151
break;
152
default:
153
break;
154
}
155
}
156
157
SEC("socket")
158
int bpf_func_ip(struct __sk_buff *skb)
159
{
160
struct globals *g = this_cpu_globals();
161
__u32 nhoff, verlen, ip_proto;
162
163
if (!g)
164
return 0;
165
166
nhoff = skb->cb[0];
167
168
if (unlikely(ip_is_fragment(skb, nhoff)))
169
return 0;
170
171
ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
172
173
if (ip_proto != IPPROTO_GRE) {
174
g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
175
g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
176
}
177
178
verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
179
nhoff += (verlen & 0xF) << 2;
180
181
skb->cb[0] = nhoff;
182
parse_ip_proto(skb, g, ip_proto);
183
return 0;
184
}
185
186
SEC("socket")
187
int bpf_func_ipv6(struct __sk_buff *skb)
188
{
189
struct globals *g = this_cpu_globals();
190
__u32 nhoff, ip_proto;
191
192
if (!g)
193
return 0;
194
195
nhoff = skb->cb[0];
196
197
ip_proto = load_byte(skb,
198
nhoff + offsetof(struct ipv6hdr, nexthdr));
199
g->flow.src = ipv6_addr_hash(skb,
200
nhoff + offsetof(struct ipv6hdr, saddr));
201
g->flow.dst = ipv6_addr_hash(skb,
202
nhoff + offsetof(struct ipv6hdr, daddr));
203
nhoff += sizeof(struct ipv6hdr);
204
205
skb->cb[0] = nhoff;
206
parse_ip_proto(skb, g, ip_proto);
207
return 0;
208
}
209
210
SEC("socket")
211
int bpf_func_vlan(struct __sk_buff *skb)
212
{
213
__u32 nhoff, proto;
214
215
nhoff = skb->cb[0];
216
217
proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
218
h_vlan_encapsulated_proto));
219
nhoff += sizeof(struct vlan_hdr);
220
skb->cb[0] = nhoff;
221
222
parse_eth_proto(skb, proto);
223
224
return 0;
225
}
226
227
SEC("socket")
228
int bpf_func_mpls(struct __sk_buff *skb)
229
{
230
__u32 nhoff, label;
231
232
nhoff = skb->cb[0];
233
234
label = load_word(skb, nhoff);
235
nhoff += sizeof(struct mpls_label);
236
skb->cb[0] = nhoff;
237
238
if (label & MPLS_LS_S_MASK) {
239
__u8 verlen = load_byte(skb, nhoff);
240
if ((verlen & 0xF0) == 4)
241
parse_eth_proto(skb, ETH_P_IP);
242
else
243
parse_eth_proto(skb, ETH_P_IPV6);
244
} else {
245
parse_eth_proto(skb, ETH_P_MPLS_UC);
246
}
247
248
return 0;
249
}
250
251
struct {
252
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
253
__uint(key_size, sizeof(u32));
254
__uint(max_entries, 8);
255
__array(values, u32 (void *));
256
} prog_array_init SEC(".maps") = {
257
.values = {
258
[PARSE_VLAN] = (void *)&bpf_func_vlan,
259
[PARSE_IP] = (void *)&bpf_func_ip,
260
[PARSE_IPV6] = (void *)&bpf_func_ipv6,
261
[PARSE_MPLS] = (void *)&bpf_func_mpls,
262
},
263
};
264
265
/* Protocol dispatch routine. It tail-calls next BPF program depending
266
* on eth proto. Note, we could have used ...
267
*
268
* bpf_tail_call(skb, &prog_array_init, proto);
269
*
270
* ... but it would need large prog_array and cannot be optimised given
271
* the map key is not static.
272
*/
273
static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
274
{
275
switch (proto) {
276
case ETH_P_8021Q:
277
case ETH_P_8021AD:
278
bpf_tail_call(skb, &prog_array_init, PARSE_VLAN);
279
break;
280
case ETH_P_MPLS_UC:
281
case ETH_P_MPLS_MC:
282
bpf_tail_call(skb, &prog_array_init, PARSE_MPLS);
283
break;
284
case ETH_P_IP:
285
bpf_tail_call(skb, &prog_array_init, PARSE_IP);
286
break;
287
case ETH_P_IPV6:
288
bpf_tail_call(skb, &prog_array_init, PARSE_IPV6);
289
break;
290
}
291
}
292
293
SEC("socket")
294
int main_prog(struct __sk_buff *skb)
295
{
296
__u32 nhoff = ETH_HLEN;
297
__u32 proto = load_half(skb, 12);
298
299
skb->cb[0] = nhoff;
300
parse_eth_proto(skb, proto);
301
return 0;
302
}
303
304
char _license[] SEC("license") = "GPL";
305
306