Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/core/drop_monitor.c
15109 views
1
/*
2
* Monitoring code for network dropped packet alerts
3
*
4
* Copyright (C) 2009 Neil Horman <[email protected]>
5
*/
6
7
#include <linux/netdevice.h>
8
#include <linux/etherdevice.h>
9
#include <linux/string.h>
10
#include <linux/if_arp.h>
11
#include <linux/inetdevice.h>
12
#include <linux/inet.h>
13
#include <linux/interrupt.h>
14
#include <linux/netpoll.h>
15
#include <linux/sched.h>
16
#include <linux/delay.h>
17
#include <linux/types.h>
18
#include <linux/workqueue.h>
19
#include <linux/netlink.h>
20
#include <linux/net_dropmon.h>
21
#include <linux/percpu.h>
22
#include <linux/timer.h>
23
#include <linux/bitops.h>
24
#include <linux/slab.h>
25
#include <net/genetlink.h>
26
#include <net/netevent.h>
27
28
#include <trace/events/skb.h>
29
#include <trace/events/napi.h>
30
31
#include <asm/unaligned.h>
32
33
#define TRACE_ON 1
34
#define TRACE_OFF 0
35
36
static void send_dm_alert(struct work_struct *unused);
37
38
39
/*
40
* Globals, our netlink socket pointer
41
* and the work handle that will send up
42
* netlink alerts
43
*/
44
static int trace_state = TRACE_OFF;
45
static DEFINE_SPINLOCK(trace_state_lock);
46
47
struct per_cpu_dm_data {
48
struct work_struct dm_alert_work;
49
struct sk_buff *skb;
50
atomic_t dm_hit_count;
51
struct timer_list send_timer;
52
};
53
54
struct dm_hw_stat_delta {
55
struct net_device *dev;
56
unsigned long last_rx;
57
struct list_head list;
58
struct rcu_head rcu;
59
unsigned long last_drop_val;
60
};
61
62
static struct genl_family net_drop_monitor_family = {
63
.id = GENL_ID_GENERATE,
64
.hdrsize = 0,
65
.name = "NET_DM",
66
.version = 2,
67
.maxattr = NET_DM_CMD_MAX,
68
};
69
70
static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
71
72
static int dm_hit_limit = 64;
73
static int dm_delay = 1;
74
static unsigned long dm_hw_check_delta = 2*HZ;
75
static LIST_HEAD(hw_stats_list);
76
77
static void reset_per_cpu_data(struct per_cpu_dm_data *data)
78
{
79
size_t al;
80
struct net_dm_alert_msg *msg;
81
struct nlattr *nla;
82
83
al = sizeof(struct net_dm_alert_msg);
84
al += dm_hit_limit * sizeof(struct net_dm_drop_point);
85
al += sizeof(struct nlattr);
86
87
data->skb = genlmsg_new(al, GFP_KERNEL);
88
genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
89
0, NET_DM_CMD_ALERT);
90
nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg));
91
msg = nla_data(nla);
92
memset(msg, 0, al);
93
atomic_set(&data->dm_hit_count, dm_hit_limit);
94
}
95
96
static void send_dm_alert(struct work_struct *unused)
97
{
98
struct sk_buff *skb;
99
struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
100
101
/*
102
* Grab the skb we're about to send
103
*/
104
skb = data->skb;
105
106
/*
107
* Replace it with a new one
108
*/
109
reset_per_cpu_data(data);
110
111
/*
112
* Ship it!
113
*/
114
genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
115
116
}
117
118
/*
119
* This is the timer function to delay the sending of an alert
120
* in the event that more drops will arrive during the
121
* hysteresis period. Note that it operates under the timer interrupt
122
* so we don't need to disable preemption here
123
*/
124
static void sched_send_work(unsigned long unused)
125
{
126
struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
127
128
schedule_work(&data->dm_alert_work);
129
}
130
131
static void trace_drop_common(struct sk_buff *skb, void *location)
132
{
133
struct net_dm_alert_msg *msg;
134
struct nlmsghdr *nlh;
135
struct nlattr *nla;
136
int i;
137
struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
138
139
140
if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
141
/*
142
* we're already at zero, discard this hit
143
*/
144
goto out;
145
}
146
147
nlh = (struct nlmsghdr *)data->skb->data;
148
nla = genlmsg_data(nlmsg_data(nlh));
149
msg = nla_data(nla);
150
for (i = 0; i < msg->entries; i++) {
151
if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
152
msg->points[i].count++;
153
goto out;
154
}
155
}
156
157
/*
158
* We need to create a new entry
159
*/
160
__nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
161
nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
162
memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
163
msg->points[msg->entries].count = 1;
164
msg->entries++;
165
166
if (!timer_pending(&data->send_timer)) {
167
data->send_timer.expires = jiffies + dm_delay * HZ;
168
add_timer_on(&data->send_timer, smp_processor_id());
169
}
170
171
out:
172
return;
173
}
174
175
static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
176
{
177
trace_drop_common(skb, location);
178
}
179
180
static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
181
{
182
struct dm_hw_stat_delta *new_stat;
183
184
/*
185
* Don't check napi structures with no associated device
186
*/
187
if (!napi->dev)
188
return;
189
190
rcu_read_lock();
191
list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
192
/*
193
* only add a note to our monitor buffer if:
194
* 1) this is the dev we received on
195
* 2) its after the last_rx delta
196
* 3) our rx_dropped count has gone up
197
*/
198
if ((new_stat->dev == napi->dev) &&
199
(time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
200
(napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
201
trace_drop_common(NULL, NULL);
202
new_stat->last_drop_val = napi->dev->stats.rx_dropped;
203
new_stat->last_rx = jiffies;
204
break;
205
}
206
}
207
rcu_read_unlock();
208
}
209
210
static int set_all_monitor_traces(int state)
211
{
212
int rc = 0;
213
struct dm_hw_stat_delta *new_stat = NULL;
214
struct dm_hw_stat_delta *temp;
215
216
spin_lock(&trace_state_lock);
217
218
if (state == trace_state) {
219
rc = -EAGAIN;
220
goto out_unlock;
221
}
222
223
switch (state) {
224
case TRACE_ON:
225
rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
226
rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
227
break;
228
case TRACE_OFF:
229
rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
230
rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
231
232
tracepoint_synchronize_unregister();
233
234
/*
235
* Clean the device list
236
*/
237
list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
238
if (new_stat->dev == NULL) {
239
list_del_rcu(&new_stat->list);
240
kfree_rcu(new_stat, rcu);
241
}
242
}
243
break;
244
default:
245
rc = 1;
246
break;
247
}
248
249
if (!rc)
250
trace_state = state;
251
else
252
rc = -EINPROGRESS;
253
254
out_unlock:
255
spin_unlock(&trace_state_lock);
256
257
return rc;
258
}
259
260
261
static int net_dm_cmd_config(struct sk_buff *skb,
262
struct genl_info *info)
263
{
264
return -ENOTSUPP;
265
}
266
267
static int net_dm_cmd_trace(struct sk_buff *skb,
268
struct genl_info *info)
269
{
270
switch (info->genlhdr->cmd) {
271
case NET_DM_CMD_START:
272
return set_all_monitor_traces(TRACE_ON);
273
break;
274
case NET_DM_CMD_STOP:
275
return set_all_monitor_traces(TRACE_OFF);
276
break;
277
}
278
279
return -ENOTSUPP;
280
}
281
282
static int dropmon_net_event(struct notifier_block *ev_block,
283
unsigned long event, void *ptr)
284
{
285
struct net_device *dev = ptr;
286
struct dm_hw_stat_delta *new_stat = NULL;
287
struct dm_hw_stat_delta *tmp;
288
289
switch (event) {
290
case NETDEV_REGISTER:
291
new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
292
293
if (!new_stat)
294
goto out;
295
296
new_stat->dev = dev;
297
new_stat->last_rx = jiffies;
298
spin_lock(&trace_state_lock);
299
list_add_rcu(&new_stat->list, &hw_stats_list);
300
spin_unlock(&trace_state_lock);
301
break;
302
case NETDEV_UNREGISTER:
303
spin_lock(&trace_state_lock);
304
list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
305
if (new_stat->dev == dev) {
306
new_stat->dev = NULL;
307
if (trace_state == TRACE_OFF) {
308
list_del_rcu(&new_stat->list);
309
kfree_rcu(new_stat, rcu);
310
break;
311
}
312
}
313
}
314
spin_unlock(&trace_state_lock);
315
break;
316
}
317
out:
318
return NOTIFY_DONE;
319
}
320
321
static struct genl_ops dropmon_ops[] = {
322
{
323
.cmd = NET_DM_CMD_CONFIG,
324
.doit = net_dm_cmd_config,
325
},
326
{
327
.cmd = NET_DM_CMD_START,
328
.doit = net_dm_cmd_trace,
329
},
330
{
331
.cmd = NET_DM_CMD_STOP,
332
.doit = net_dm_cmd_trace,
333
},
334
};
335
336
static struct notifier_block dropmon_net_notifier = {
337
.notifier_call = dropmon_net_event
338
};
339
340
static int __init init_net_drop_monitor(void)
341
{
342
struct per_cpu_dm_data *data;
343
int cpu, rc;
344
345
printk(KERN_INFO "Initializing network drop monitor service\n");
346
347
if (sizeof(void *) > 8) {
348
printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
349
return -ENOSPC;
350
}
351
352
rc = genl_register_family_with_ops(&net_drop_monitor_family,
353
dropmon_ops,
354
ARRAY_SIZE(dropmon_ops));
355
if (rc) {
356
printk(KERN_ERR "Could not create drop monitor netlink family\n");
357
return rc;
358
}
359
360
rc = register_netdevice_notifier(&dropmon_net_notifier);
361
if (rc < 0) {
362
printk(KERN_CRIT "Failed to register netdevice notifier\n");
363
goto out_unreg;
364
}
365
366
rc = 0;
367
368
for_each_present_cpu(cpu) {
369
data = &per_cpu(dm_cpu_data, cpu);
370
reset_per_cpu_data(data);
371
INIT_WORK(&data->dm_alert_work, send_dm_alert);
372
init_timer(&data->send_timer);
373
data->send_timer.data = cpu;
374
data->send_timer.function = sched_send_work;
375
}
376
377
goto out;
378
379
out_unreg:
380
genl_unregister_family(&net_drop_monitor_family);
381
out:
382
return rc;
383
}
384
385
late_initcall(init_net_drop_monitor);
386
387