Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/core/drop_monitor.c
26278 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Monitoring code for network dropped packet alerts
4
*
5
* Copyright (C) 2009 Neil Horman <[email protected]>
6
*/
7
8
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10
#include <linux/netdevice.h>
11
#include <linux/etherdevice.h>
12
#include <linux/string.h>
13
#include <linux/if_arp.h>
14
#include <linux/inetdevice.h>
15
#include <linux/inet.h>
16
#include <linux/interrupt.h>
17
#include <linux/netpoll.h>
18
#include <linux/sched.h>
19
#include <linux/delay.h>
20
#include <linux/types.h>
21
#include <linux/workqueue.h>
22
#include <linux/netlink.h>
23
#include <linux/net_dropmon.h>
24
#include <linux/bitfield.h>
25
#include <linux/percpu.h>
26
#include <linux/timer.h>
27
#include <linux/bitops.h>
28
#include <linux/slab.h>
29
#include <linux/module.h>
30
#include <net/genetlink.h>
31
#include <net/netevent.h>
32
#include <net/flow_offload.h>
33
#include <net/dropreason.h>
34
#include <net/devlink.h>
35
36
#include <trace/events/skb.h>
37
#include <trace/events/napi.h>
38
#include <trace/events/devlink.h>
39
40
#include <linux/unaligned.h>
41
42
#define TRACE_ON 1
43
#define TRACE_OFF 0
44
45
/*
46
* Globals, our netlink socket pointer
47
* and the work handle that will send up
48
* netlink alerts
49
*/
50
static int trace_state = TRACE_OFF;
51
static bool monitor_hw;
52
53
/* net_dm_mutex
54
*
55
* An overall lock guarding every operation coming from userspace.
56
*/
57
static DEFINE_MUTEX(net_dm_mutex);
58
59
struct net_dm_stats {
60
u64_stats_t dropped;
61
struct u64_stats_sync syncp;
62
};
63
64
#define NET_DM_MAX_HW_TRAP_NAME_LEN 40
65
66
struct net_dm_hw_entry {
67
char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN];
68
u32 count;
69
};
70
71
struct net_dm_hw_entries {
72
u32 num_entries;
73
struct net_dm_hw_entry entries[];
74
};
75
76
struct per_cpu_dm_data {
77
raw_spinlock_t lock; /* Protects 'skb', 'hw_entries' and
78
* 'send_timer'
79
*/
80
union {
81
struct sk_buff *skb;
82
struct net_dm_hw_entries *hw_entries;
83
};
84
struct sk_buff_head drop_queue;
85
struct work_struct dm_alert_work;
86
struct timer_list send_timer;
87
struct net_dm_stats stats;
88
};
89
90
struct dm_hw_stat_delta {
91
unsigned long last_rx;
92
unsigned long last_drop_val;
93
struct rcu_head rcu;
94
};
95
96
static struct genl_family net_drop_monitor_family;
97
98
static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
99
static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
100
101
static int dm_hit_limit = 64;
102
static int dm_delay = 1;
103
static unsigned long dm_hw_check_delta = 2*HZ;
104
105
static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
106
static u32 net_dm_trunc_len;
107
static u32 net_dm_queue_len = 1000;
108
109
struct net_dm_alert_ops {
110
void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
111
void *location,
112
enum skb_drop_reason reason,
113
struct sock *rx_sk);
114
void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
115
int work, int budget);
116
void (*work_item_func)(struct work_struct *work);
117
void (*hw_work_item_func)(struct work_struct *work);
118
void (*hw_trap_probe)(void *ignore, const struct devlink *devlink,
119
struct sk_buff *skb,
120
const struct devlink_trap_metadata *metadata);
121
};
122
123
struct net_dm_skb_cb {
124
union {
125
struct devlink_trap_metadata *hw_metadata;
126
void *pc;
127
};
128
enum skb_drop_reason reason;
129
};
130
131
#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
132
133
static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
134
{
135
size_t al;
136
struct net_dm_alert_msg *msg;
137
struct nlattr *nla;
138
struct sk_buff *skb;
139
unsigned long flags;
140
void *msg_header;
141
142
al = sizeof(struct net_dm_alert_msg);
143
al += dm_hit_limit * sizeof(struct net_dm_drop_point);
144
al += sizeof(struct nlattr);
145
146
skb = genlmsg_new(al, GFP_KERNEL);
147
148
if (!skb)
149
goto err;
150
151
msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
152
0, NET_DM_CMD_ALERT);
153
if (!msg_header) {
154
nlmsg_free(skb);
155
skb = NULL;
156
goto err;
157
}
158
nla = nla_reserve(skb, NLA_UNSPEC,
159
sizeof(struct net_dm_alert_msg));
160
if (!nla) {
161
nlmsg_free(skb);
162
skb = NULL;
163
goto err;
164
}
165
msg = nla_data(nla);
166
memset(msg, 0, al);
167
goto out;
168
169
err:
170
mod_timer(&data->send_timer, jiffies + HZ / 10);
171
out:
172
raw_spin_lock_irqsave(&data->lock, flags);
173
swap(data->skb, skb);
174
raw_spin_unlock_irqrestore(&data->lock, flags);
175
176
if (skb) {
177
struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
178
struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh);
179
180
genlmsg_end(skb, genlmsg_data(gnlh));
181
}
182
183
return skb;
184
}
185
186
static const struct genl_multicast_group dropmon_mcgrps[] = {
187
{ .name = "events", .flags = GENL_MCAST_CAP_SYS_ADMIN, },
188
};
189
190
static void send_dm_alert(struct work_struct *work)
191
{
192
struct sk_buff *skb;
193
struct per_cpu_dm_data *data;
194
195
data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
196
197
skb = reset_per_cpu_data(data);
198
199
if (skb)
200
genlmsg_multicast(&net_drop_monitor_family, skb, 0,
201
0, GFP_KERNEL);
202
}
203
204
/*
205
* This is the timer function to delay the sending of an alert
206
* in the event that more drops will arrive during the
207
* hysteresis period.
208
*/
209
static void sched_send_work(struct timer_list *t)
210
{
211
struct per_cpu_dm_data *data = timer_container_of(data, t, send_timer);
212
213
schedule_work(&data->dm_alert_work);
214
}
215
216
static void trace_drop_common(struct sk_buff *skb, void *location)
217
{
218
struct net_dm_alert_msg *msg;
219
struct net_dm_drop_point *point;
220
struct nlmsghdr *nlh;
221
struct nlattr *nla;
222
int i;
223
struct sk_buff *dskb;
224
struct per_cpu_dm_data *data;
225
unsigned long flags;
226
227
local_irq_save(flags);
228
data = this_cpu_ptr(&dm_cpu_data);
229
raw_spin_lock(&data->lock);
230
dskb = data->skb;
231
232
if (!dskb)
233
goto out;
234
235
nlh = (struct nlmsghdr *)dskb->data;
236
nla = genlmsg_data(nlmsg_data(nlh));
237
msg = nla_data(nla);
238
point = msg->points;
239
for (i = 0; i < msg->entries; i++) {
240
if (!memcmp(&location, &point->pc, sizeof(void *))) {
241
point->count++;
242
goto out;
243
}
244
point++;
245
}
246
if (msg->entries == dm_hit_limit)
247
goto out;
248
/*
249
* We need to create a new entry
250
*/
251
__nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
252
nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
253
memcpy(point->pc, &location, sizeof(void *));
254
point->count = 1;
255
msg->entries++;
256
257
if (!timer_pending(&data->send_timer)) {
258
data->send_timer.expires = jiffies + dm_delay * HZ;
259
add_timer(&data->send_timer);
260
}
261
262
out:
263
raw_spin_unlock_irqrestore(&data->lock, flags);
264
}
265
266
static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
267
void *location,
268
enum skb_drop_reason reason,
269
struct sock *rx_sk)
270
{
271
trace_drop_common(skb, location);
272
}
273
274
static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
275
int work, int budget)
276
{
277
struct net_device *dev = napi->dev;
278
struct dm_hw_stat_delta *stat;
279
/*
280
* Don't check napi structures with no associated device
281
*/
282
if (!dev)
283
return;
284
285
rcu_read_lock();
286
stat = rcu_dereference(dev->dm_private);
287
if (stat) {
288
/*
289
* only add a note to our monitor buffer if:
290
* 1) its after the last_rx delta
291
* 2) our rx_dropped count has gone up
292
*/
293
if (time_after(jiffies, stat->last_rx + dm_hw_check_delta) &&
294
(dev->stats.rx_dropped != stat->last_drop_val)) {
295
trace_drop_common(NULL, NULL);
296
stat->last_drop_val = dev->stats.rx_dropped;
297
stat->last_rx = jiffies;
298
}
299
}
300
rcu_read_unlock();
301
}
302
303
static struct net_dm_hw_entries *
304
net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data)
305
{
306
struct net_dm_hw_entries *hw_entries;
307
unsigned long flags;
308
309
hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit),
310
GFP_KERNEL);
311
if (!hw_entries) {
312
/* If the memory allocation failed, we try to perform another
313
* allocation in 1/10 second. Otherwise, the probe function
314
* will constantly bail out.
315
*/
316
mod_timer(&hw_data->send_timer, jiffies + HZ / 10);
317
}
318
319
raw_spin_lock_irqsave(&hw_data->lock, flags);
320
swap(hw_data->hw_entries, hw_entries);
321
raw_spin_unlock_irqrestore(&hw_data->lock, flags);
322
323
return hw_entries;
324
}
325
326
static int net_dm_hw_entry_put(struct sk_buff *msg,
327
const struct net_dm_hw_entry *hw_entry)
328
{
329
struct nlattr *attr;
330
331
attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY);
332
if (!attr)
333
return -EMSGSIZE;
334
335
if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name))
336
goto nla_put_failure;
337
338
if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count))
339
goto nla_put_failure;
340
341
nla_nest_end(msg, attr);
342
343
return 0;
344
345
nla_put_failure:
346
nla_nest_cancel(msg, attr);
347
return -EMSGSIZE;
348
}
349
350
static int net_dm_hw_entries_put(struct sk_buff *msg,
351
const struct net_dm_hw_entries *hw_entries)
352
{
353
struct nlattr *attr;
354
int i;
355
356
attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES);
357
if (!attr)
358
return -EMSGSIZE;
359
360
for (i = 0; i < hw_entries->num_entries; i++) {
361
int rc;
362
363
rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]);
364
if (rc)
365
goto nla_put_failure;
366
}
367
368
nla_nest_end(msg, attr);
369
370
return 0;
371
372
nla_put_failure:
373
nla_nest_cancel(msg, attr);
374
return -EMSGSIZE;
375
}
376
377
static int
378
net_dm_hw_summary_report_fill(struct sk_buff *msg,
379
const struct net_dm_hw_entries *hw_entries)
380
{
381
struct net_dm_alert_msg anc_hdr = { 0 };
382
void *hdr;
383
int rc;
384
385
hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
386
NET_DM_CMD_ALERT);
387
if (!hdr)
388
return -EMSGSIZE;
389
390
/* We need to put the ancillary header in order not to break user
391
* space.
392
*/
393
if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr))
394
goto nla_put_failure;
395
396
rc = net_dm_hw_entries_put(msg, hw_entries);
397
if (rc)
398
goto nla_put_failure;
399
400
genlmsg_end(msg, hdr);
401
402
return 0;
403
404
nla_put_failure:
405
genlmsg_cancel(msg, hdr);
406
return -EMSGSIZE;
407
}
408
409
static void net_dm_hw_summary_work(struct work_struct *work)
410
{
411
struct net_dm_hw_entries *hw_entries;
412
struct per_cpu_dm_data *hw_data;
413
struct sk_buff *msg;
414
int rc;
415
416
hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
417
418
hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
419
if (!hw_entries)
420
return;
421
422
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
423
if (!msg)
424
goto out;
425
426
rc = net_dm_hw_summary_report_fill(msg, hw_entries);
427
if (rc) {
428
nlmsg_free(msg);
429
goto out;
430
}
431
432
genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
433
434
out:
435
kfree(hw_entries);
436
}
437
438
static void
439
net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink,
440
struct sk_buff *skb,
441
const struct devlink_trap_metadata *metadata)
442
{
443
struct net_dm_hw_entries *hw_entries;
444
struct net_dm_hw_entry *hw_entry;
445
struct per_cpu_dm_data *hw_data;
446
unsigned long flags;
447
int i;
448
449
if (metadata->trap_type == DEVLINK_TRAP_TYPE_CONTROL)
450
return;
451
452
hw_data = this_cpu_ptr(&dm_hw_cpu_data);
453
raw_spin_lock_irqsave(&hw_data->lock, flags);
454
hw_entries = hw_data->hw_entries;
455
456
if (!hw_entries)
457
goto out;
458
459
for (i = 0; i < hw_entries->num_entries; i++) {
460
hw_entry = &hw_entries->entries[i];
461
if (!strncmp(hw_entry->trap_name, metadata->trap_name,
462
NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) {
463
hw_entry->count++;
464
goto out;
465
}
466
}
467
if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit))
468
goto out;
469
470
hw_entry = &hw_entries->entries[hw_entries->num_entries];
471
strscpy(hw_entry->trap_name, metadata->trap_name,
472
NET_DM_MAX_HW_TRAP_NAME_LEN - 1);
473
hw_entry->count = 1;
474
hw_entries->num_entries++;
475
476
if (!timer_pending(&hw_data->send_timer)) {
477
hw_data->send_timer.expires = jiffies + dm_delay * HZ;
478
add_timer(&hw_data->send_timer);
479
}
480
481
out:
482
raw_spin_unlock_irqrestore(&hw_data->lock, flags);
483
}
484
485
static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
486
.kfree_skb_probe = trace_kfree_skb_hit,
487
.napi_poll_probe = trace_napi_poll_hit,
488
.work_item_func = send_dm_alert,
489
.hw_work_item_func = net_dm_hw_summary_work,
490
.hw_trap_probe = net_dm_hw_trap_summary_probe,
491
};
492
493
static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
494
struct sk_buff *skb,
495
void *location,
496
enum skb_drop_reason reason,
497
struct sock *rx_sk)
498
{
499
ktime_t tstamp = ktime_get_real();
500
struct per_cpu_dm_data *data;
501
struct net_dm_skb_cb *cb;
502
struct sk_buff *nskb;
503
unsigned long flags;
504
505
if (!skb_mac_header_was_set(skb))
506
return;
507
508
nskb = skb_clone(skb, GFP_ATOMIC);
509
if (!nskb)
510
return;
511
512
cb = NET_DM_SKB_CB(nskb);
513
cb->reason = reason;
514
cb->pc = location;
515
/* Override the timestamp because we care about the time when the
516
* packet was dropped.
517
*/
518
nskb->tstamp = tstamp;
519
520
data = this_cpu_ptr(&dm_cpu_data);
521
522
spin_lock_irqsave(&data->drop_queue.lock, flags);
523
if (skb_queue_len(&data->drop_queue) < net_dm_queue_len)
524
__skb_queue_tail(&data->drop_queue, nskb);
525
else
526
goto unlock_free;
527
spin_unlock_irqrestore(&data->drop_queue.lock, flags);
528
529
schedule_work(&data->dm_alert_work);
530
531
return;
532
533
unlock_free:
534
spin_unlock_irqrestore(&data->drop_queue.lock, flags);
535
u64_stats_update_begin(&data->stats.syncp);
536
u64_stats_inc(&data->stats.dropped);
537
u64_stats_update_end(&data->stats.syncp);
538
consume_skb(nskb);
539
}
540
541
static void net_dm_packet_trace_napi_poll_hit(void *ignore,
542
struct napi_struct *napi,
543
int work, int budget)
544
{
545
}
546
547
static size_t net_dm_in_port_size(void)
548
{
549
/* NET_DM_ATTR_IN_PORT nest */
550
return nla_total_size(0) +
551
/* NET_DM_ATTR_PORT_NETDEV_IFINDEX */
552
nla_total_size(sizeof(u32)) +
553
/* NET_DM_ATTR_PORT_NETDEV_NAME */
554
nla_total_size(IFNAMSIZ + 1);
555
}
556
557
#define NET_DM_MAX_SYMBOL_LEN 40
558
#define NET_DM_MAX_REASON_LEN 50
559
560
static size_t net_dm_packet_report_size(size_t payload_len)
561
{
562
size_t size;
563
564
size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
565
566
return NLMSG_ALIGN(size) +
567
/* NET_DM_ATTR_ORIGIN */
568
nla_total_size(sizeof(u16)) +
569
/* NET_DM_ATTR_PC */
570
nla_total_size(sizeof(u64)) +
571
/* NET_DM_ATTR_SYMBOL */
572
nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) +
573
/* NET_DM_ATTR_IN_PORT */
574
net_dm_in_port_size() +
575
/* NET_DM_ATTR_TIMESTAMP */
576
nla_total_size(sizeof(u64)) +
577
/* NET_DM_ATTR_ORIG_LEN */
578
nla_total_size(sizeof(u32)) +
579
/* NET_DM_ATTR_PROTO */
580
nla_total_size(sizeof(u16)) +
581
/* NET_DM_ATTR_REASON */
582
nla_total_size(NET_DM_MAX_REASON_LEN + 1) +
583
/* NET_DM_ATTR_PAYLOAD */
584
nla_total_size(payload_len);
585
}
586
587
static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex,
588
const char *name)
589
{
590
struct nlattr *attr;
591
592
attr = nla_nest_start(msg, NET_DM_ATTR_IN_PORT);
593
if (!attr)
594
return -EMSGSIZE;
595
596
if (ifindex &&
597
nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex))
598
goto nla_put_failure;
599
600
if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name))
601
goto nla_put_failure;
602
603
nla_nest_end(msg, attr);
604
605
return 0;
606
607
nla_put_failure:
608
nla_nest_cancel(msg, attr);
609
return -EMSGSIZE;
610
}
611
612
static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
613
size_t payload_len)
614
{
615
struct net_dm_skb_cb *cb = NET_DM_SKB_CB(skb);
616
const struct drop_reason_list *list = NULL;
617
unsigned int subsys, subsys_reason;
618
char buf[NET_DM_MAX_SYMBOL_LEN];
619
struct nlattr *attr;
620
void *hdr;
621
int rc;
622
623
hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
624
NET_DM_CMD_PACKET_ALERT);
625
if (!hdr)
626
return -EMSGSIZE;
627
628
if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
629
goto nla_put_failure;
630
631
if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, (u64)(uintptr_t)cb->pc,
632
NET_DM_ATTR_PAD))
633
goto nla_put_failure;
634
635
rcu_read_lock();
636
subsys = u32_get_bits(cb->reason, SKB_DROP_REASON_SUBSYS_MASK);
637
if (subsys < SKB_DROP_REASON_SUBSYS_NUM)
638
list = rcu_dereference(drop_reasons_by_subsys[subsys]);
639
subsys_reason = cb->reason & ~SKB_DROP_REASON_SUBSYS_MASK;
640
if (!list ||
641
subsys_reason >= list->n_reasons ||
642
!list->reasons[subsys_reason] ||
643
strlen(list->reasons[subsys_reason]) > NET_DM_MAX_REASON_LEN) {
644
list = rcu_dereference(drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_CORE]);
645
subsys_reason = SKB_DROP_REASON_NOT_SPECIFIED;
646
}
647
if (nla_put_string(msg, NET_DM_ATTR_REASON,
648
list->reasons[subsys_reason])) {
649
rcu_read_unlock();
650
goto nla_put_failure;
651
}
652
rcu_read_unlock();
653
654
snprintf(buf, sizeof(buf), "%pS", cb->pc);
655
if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
656
goto nla_put_failure;
657
658
rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL);
659
if (rc)
660
goto nla_put_failure;
661
662
if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
663
ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
664
goto nla_put_failure;
665
666
if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
667
goto nla_put_failure;
668
669
if (!payload_len)
670
goto out;
671
672
if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
673
goto nla_put_failure;
674
675
attr = skb_put(msg, nla_total_size(payload_len));
676
attr->nla_type = NET_DM_ATTR_PAYLOAD;
677
attr->nla_len = nla_attr_size(payload_len);
678
if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
679
goto nla_put_failure;
680
681
out:
682
genlmsg_end(msg, hdr);
683
684
return 0;
685
686
nla_put_failure:
687
genlmsg_cancel(msg, hdr);
688
return -EMSGSIZE;
689
}
690
691
#define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO)
692
693
static void net_dm_packet_report(struct sk_buff *skb)
694
{
695
struct sk_buff *msg;
696
size_t payload_len;
697
int rc;
698
699
/* Make sure we start copying the packet from the MAC header */
700
if (skb->data > skb_mac_header(skb))
701
skb_push(skb, skb->data - skb_mac_header(skb));
702
else
703
skb_pull(skb, skb_mac_header(skb) - skb->data);
704
705
/* Ensure packet fits inside a single netlink attribute */
706
payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
707
if (net_dm_trunc_len)
708
payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
709
710
msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
711
if (!msg)
712
goto out;
713
714
rc = net_dm_packet_report_fill(msg, skb, payload_len);
715
if (rc) {
716
nlmsg_free(msg);
717
goto out;
718
}
719
720
genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
721
722
out:
723
consume_skb(skb);
724
}
725
726
static void net_dm_packet_work(struct work_struct *work)
727
{
728
struct per_cpu_dm_data *data;
729
struct sk_buff_head list;
730
struct sk_buff *skb;
731
unsigned long flags;
732
733
data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
734
735
__skb_queue_head_init(&list);
736
737
spin_lock_irqsave(&data->drop_queue.lock, flags);
738
skb_queue_splice_tail_init(&data->drop_queue, &list);
739
spin_unlock_irqrestore(&data->drop_queue.lock, flags);
740
741
while ((skb = __skb_dequeue(&list)))
742
net_dm_packet_report(skb);
743
}
744
745
static size_t
746
net_dm_flow_action_cookie_size(const struct devlink_trap_metadata *hw_metadata)
747
{
748
return hw_metadata->fa_cookie ?
749
nla_total_size(hw_metadata->fa_cookie->cookie_len) : 0;
750
}
751
752
static size_t
753
net_dm_hw_packet_report_size(size_t payload_len,
754
const struct devlink_trap_metadata *hw_metadata)
755
{
756
size_t size;
757
758
size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
759
760
return NLMSG_ALIGN(size) +
761
/* NET_DM_ATTR_ORIGIN */
762
nla_total_size(sizeof(u16)) +
763
/* NET_DM_ATTR_HW_TRAP_GROUP_NAME */
764
nla_total_size(strlen(hw_metadata->trap_group_name) + 1) +
765
/* NET_DM_ATTR_HW_TRAP_NAME */
766
nla_total_size(strlen(hw_metadata->trap_name) + 1) +
767
/* NET_DM_ATTR_IN_PORT */
768
net_dm_in_port_size() +
769
/* NET_DM_ATTR_FLOW_ACTION_COOKIE */
770
net_dm_flow_action_cookie_size(hw_metadata) +
771
/* NET_DM_ATTR_TIMESTAMP */
772
nla_total_size(sizeof(u64)) +
773
/* NET_DM_ATTR_ORIG_LEN */
774
nla_total_size(sizeof(u32)) +
775
/* NET_DM_ATTR_PROTO */
776
nla_total_size(sizeof(u16)) +
777
/* NET_DM_ATTR_PAYLOAD */
778
nla_total_size(payload_len);
779
}
780
781
static int net_dm_hw_packet_report_fill(struct sk_buff *msg,
782
struct sk_buff *skb, size_t payload_len)
783
{
784
struct devlink_trap_metadata *hw_metadata;
785
struct nlattr *attr;
786
void *hdr;
787
788
hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
789
790
hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
791
NET_DM_CMD_PACKET_ALERT);
792
if (!hdr)
793
return -EMSGSIZE;
794
795
if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW))
796
goto nla_put_failure;
797
798
if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME,
799
hw_metadata->trap_group_name))
800
goto nla_put_failure;
801
802
if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME,
803
hw_metadata->trap_name))
804
goto nla_put_failure;
805
806
if (hw_metadata->input_dev) {
807
struct net_device *dev = hw_metadata->input_dev;
808
int rc;
809
810
rc = net_dm_packet_report_in_port_put(msg, dev->ifindex,
811
dev->name);
812
if (rc)
813
goto nla_put_failure;
814
}
815
816
if (hw_metadata->fa_cookie &&
817
nla_put(msg, NET_DM_ATTR_FLOW_ACTION_COOKIE,
818
hw_metadata->fa_cookie->cookie_len,
819
hw_metadata->fa_cookie->cookie))
820
goto nla_put_failure;
821
822
if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
823
ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
824
goto nla_put_failure;
825
826
if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
827
goto nla_put_failure;
828
829
if (!payload_len)
830
goto out;
831
832
if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
833
goto nla_put_failure;
834
835
attr = skb_put(msg, nla_total_size(payload_len));
836
attr->nla_type = NET_DM_ATTR_PAYLOAD;
837
attr->nla_len = nla_attr_size(payload_len);
838
if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
839
goto nla_put_failure;
840
841
out:
842
genlmsg_end(msg, hdr);
843
844
return 0;
845
846
nla_put_failure:
847
genlmsg_cancel(msg, hdr);
848
return -EMSGSIZE;
849
}
850
851
static struct devlink_trap_metadata *
852
net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata)
853
{
854
const struct flow_action_cookie *fa_cookie;
855
struct devlink_trap_metadata *hw_metadata;
856
const char *trap_group_name;
857
const char *trap_name;
858
859
hw_metadata = kzalloc(sizeof(*hw_metadata), GFP_ATOMIC);
860
if (!hw_metadata)
861
return NULL;
862
863
trap_group_name = kstrdup(metadata->trap_group_name, GFP_ATOMIC);
864
if (!trap_group_name)
865
goto free_hw_metadata;
866
hw_metadata->trap_group_name = trap_group_name;
867
868
trap_name = kstrdup(metadata->trap_name, GFP_ATOMIC);
869
if (!trap_name)
870
goto free_trap_group;
871
hw_metadata->trap_name = trap_name;
872
873
if (metadata->fa_cookie) {
874
size_t cookie_size = sizeof(*fa_cookie) +
875
metadata->fa_cookie->cookie_len;
876
877
fa_cookie = kmemdup(metadata->fa_cookie, cookie_size,
878
GFP_ATOMIC);
879
if (!fa_cookie)
880
goto free_trap_name;
881
hw_metadata->fa_cookie = fa_cookie;
882
}
883
884
hw_metadata->input_dev = metadata->input_dev;
885
netdev_hold(hw_metadata->input_dev, &hw_metadata->dev_tracker,
886
GFP_ATOMIC);
887
888
return hw_metadata;
889
890
free_trap_name:
891
kfree(trap_name);
892
free_trap_group:
893
kfree(trap_group_name);
894
free_hw_metadata:
895
kfree(hw_metadata);
896
return NULL;
897
}
898
899
static void
900
net_dm_hw_metadata_free(struct devlink_trap_metadata *hw_metadata)
901
{
902
netdev_put(hw_metadata->input_dev, &hw_metadata->dev_tracker);
903
kfree(hw_metadata->fa_cookie);
904
kfree(hw_metadata->trap_name);
905
kfree(hw_metadata->trap_group_name);
906
kfree(hw_metadata);
907
}
908
909
static void net_dm_hw_packet_report(struct sk_buff *skb)
910
{
911
struct devlink_trap_metadata *hw_metadata;
912
struct sk_buff *msg;
913
size_t payload_len;
914
int rc;
915
916
if (skb->data > skb_mac_header(skb))
917
skb_push(skb, skb->data - skb_mac_header(skb));
918
else
919
skb_pull(skb, skb_mac_header(skb) - skb->data);
920
921
payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
922
if (net_dm_trunc_len)
923
payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
924
925
hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
926
msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata),
927
GFP_KERNEL);
928
if (!msg)
929
goto out;
930
931
rc = net_dm_hw_packet_report_fill(msg, skb, payload_len);
932
if (rc) {
933
nlmsg_free(msg);
934
goto out;
935
}
936
937
genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
938
939
out:
940
net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata);
941
consume_skb(skb);
942
}
943
944
static void net_dm_hw_packet_work(struct work_struct *work)
945
{
946
struct per_cpu_dm_data *hw_data;
947
struct sk_buff_head list;
948
struct sk_buff *skb;
949
unsigned long flags;
950
951
hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
952
953
__skb_queue_head_init(&list);
954
955
spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
956
skb_queue_splice_tail_init(&hw_data->drop_queue, &list);
957
spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
958
959
while ((skb = __skb_dequeue(&list)))
960
net_dm_hw_packet_report(skb);
961
}
962
963
static void
964
net_dm_hw_trap_packet_probe(void *ignore, const struct devlink *devlink,
965
struct sk_buff *skb,
966
const struct devlink_trap_metadata *metadata)
967
{
968
struct devlink_trap_metadata *n_hw_metadata;
969
ktime_t tstamp = ktime_get_real();
970
struct per_cpu_dm_data *hw_data;
971
struct sk_buff *nskb;
972
unsigned long flags;
973
974
if (metadata->trap_type == DEVLINK_TRAP_TYPE_CONTROL)
975
return;
976
977
if (!skb_mac_header_was_set(skb))
978
return;
979
980
nskb = skb_clone(skb, GFP_ATOMIC);
981
if (!nskb)
982
return;
983
984
n_hw_metadata = net_dm_hw_metadata_copy(metadata);
985
if (!n_hw_metadata)
986
goto free;
987
988
NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata;
989
nskb->tstamp = tstamp;
990
991
hw_data = this_cpu_ptr(&dm_hw_cpu_data);
992
993
spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
994
if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len)
995
__skb_queue_tail(&hw_data->drop_queue, nskb);
996
else
997
goto unlock_free;
998
spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
999
1000
schedule_work(&hw_data->dm_alert_work);
1001
1002
return;
1003
1004
unlock_free:
1005
spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
1006
u64_stats_update_begin(&hw_data->stats.syncp);
1007
u64_stats_inc(&hw_data->stats.dropped);
1008
u64_stats_update_end(&hw_data->stats.syncp);
1009
net_dm_hw_metadata_free(n_hw_metadata);
1010
free:
1011
consume_skb(nskb);
1012
}
1013
1014
static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
1015
.kfree_skb_probe = net_dm_packet_trace_kfree_skb_hit,
1016
.napi_poll_probe = net_dm_packet_trace_napi_poll_hit,
1017
.work_item_func = net_dm_packet_work,
1018
.hw_work_item_func = net_dm_hw_packet_work,
1019
.hw_trap_probe = net_dm_hw_trap_packet_probe,
1020
};
1021
1022
static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
1023
[NET_DM_ALERT_MODE_SUMMARY] = &net_dm_alert_summary_ops,
1024
[NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops,
1025
};
1026
1027
#if IS_ENABLED(CONFIG_NET_DEVLINK)
1028
static int net_dm_hw_probe_register(const struct net_dm_alert_ops *ops)
1029
{
1030
return register_trace_devlink_trap_report(ops->hw_trap_probe, NULL);
1031
}
1032
1033
static void net_dm_hw_probe_unregister(const struct net_dm_alert_ops *ops)
1034
{
1035
unregister_trace_devlink_trap_report(ops->hw_trap_probe, NULL);
1036
tracepoint_synchronize_unregister();
1037
}
1038
#else
1039
static int net_dm_hw_probe_register(const struct net_dm_alert_ops *ops)
1040
{
1041
return -EOPNOTSUPP;
1042
}
1043
1044
static void net_dm_hw_probe_unregister(const struct net_dm_alert_ops *ops)
1045
{
1046
}
1047
#endif
1048
1049
static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
1050
{
1051
const struct net_dm_alert_ops *ops;
1052
int cpu, rc;
1053
1054
if (monitor_hw) {
1055
NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled");
1056
return -EAGAIN;
1057
}
1058
1059
ops = net_dm_alert_ops_arr[net_dm_alert_mode];
1060
1061
if (!try_module_get(THIS_MODULE)) {
1062
NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
1063
return -ENODEV;
1064
}
1065
1066
for_each_possible_cpu(cpu) {
1067
struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1068
struct net_dm_hw_entries *hw_entries;
1069
1070
INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func);
1071
timer_setup(&hw_data->send_timer, sched_send_work, 0);
1072
hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
1073
kfree(hw_entries);
1074
}
1075
1076
rc = net_dm_hw_probe_register(ops);
1077
if (rc) {
1078
NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to devlink_trap_probe() tracepoint");
1079
goto err_module_put;
1080
}
1081
1082
monitor_hw = true;
1083
1084
return 0;
1085
1086
err_module_put:
1087
for_each_possible_cpu(cpu) {
1088
struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1089
struct sk_buff *skb;
1090
1091
timer_delete_sync(&hw_data->send_timer);
1092
cancel_work_sync(&hw_data->dm_alert_work);
1093
while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
1094
struct devlink_trap_metadata *hw_metadata;
1095
1096
hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
1097
net_dm_hw_metadata_free(hw_metadata);
1098
consume_skb(skb);
1099
}
1100
}
1101
module_put(THIS_MODULE);
1102
return rc;
1103
}
1104
1105
static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
1106
{
1107
const struct net_dm_alert_ops *ops;
1108
int cpu;
1109
1110
if (!monitor_hw) {
1111
NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
1112
return;
1113
}
1114
1115
ops = net_dm_alert_ops_arr[net_dm_alert_mode];
1116
1117
monitor_hw = false;
1118
1119
net_dm_hw_probe_unregister(ops);
1120
1121
for_each_possible_cpu(cpu) {
1122
struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1123
struct sk_buff *skb;
1124
1125
timer_delete_sync(&hw_data->send_timer);
1126
cancel_work_sync(&hw_data->dm_alert_work);
1127
while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
1128
struct devlink_trap_metadata *hw_metadata;
1129
1130
hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
1131
net_dm_hw_metadata_free(hw_metadata);
1132
consume_skb(skb);
1133
}
1134
}
1135
1136
module_put(THIS_MODULE);
1137
}
1138
1139
static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
1140
{
1141
const struct net_dm_alert_ops *ops;
1142
int cpu, rc;
1143
1144
ops = net_dm_alert_ops_arr[net_dm_alert_mode];
1145
1146
if (!try_module_get(THIS_MODULE)) {
1147
NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
1148
return -ENODEV;
1149
}
1150
1151
for_each_possible_cpu(cpu) {
1152
struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
1153
struct sk_buff *skb;
1154
1155
INIT_WORK(&data->dm_alert_work, ops->work_item_func);
1156
timer_setup(&data->send_timer, sched_send_work, 0);
1157
/* Allocate a new per-CPU skb for the summary alert message and
1158
* free the old one which might contain stale data from
1159
* previous tracing.
1160
*/
1161
skb = reset_per_cpu_data(data);
1162
consume_skb(skb);
1163
}
1164
1165
rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL);
1166
if (rc) {
1167
NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
1168
goto err_module_put;
1169
}
1170
1171
rc = register_trace_napi_poll(ops->napi_poll_probe, NULL);
1172
if (rc) {
1173
NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint");
1174
goto err_unregister_trace;
1175
}
1176
1177
return 0;
1178
1179
err_unregister_trace:
1180
unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
1181
err_module_put:
1182
for_each_possible_cpu(cpu) {
1183
struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
1184
struct sk_buff *skb;
1185
1186
timer_delete_sync(&data->send_timer);
1187
cancel_work_sync(&data->dm_alert_work);
1188
while ((skb = __skb_dequeue(&data->drop_queue)))
1189
consume_skb(skb);
1190
}
1191
module_put(THIS_MODULE);
1192
return rc;
1193
}
1194
1195
static void net_dm_trace_off_set(void)
1196
{
1197
const struct net_dm_alert_ops *ops;
1198
int cpu;
1199
1200
ops = net_dm_alert_ops_arr[net_dm_alert_mode];
1201
1202
unregister_trace_napi_poll(ops->napi_poll_probe, NULL);
1203
unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
1204
1205
tracepoint_synchronize_unregister();
1206
1207
/* Make sure we do not send notifications to user space after request
1208
* to stop tracing returns.
1209
*/
1210
for_each_possible_cpu(cpu) {
1211
struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
1212
struct sk_buff *skb;
1213
1214
timer_delete_sync(&data->send_timer);
1215
cancel_work_sync(&data->dm_alert_work);
1216
while ((skb = __skb_dequeue(&data->drop_queue)))
1217
consume_skb(skb);
1218
}
1219
1220
module_put(THIS_MODULE);
1221
}
1222
1223
static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
1224
{
1225
int rc = 0;
1226
1227
if (state == trace_state) {
1228
NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state");
1229
return -EAGAIN;
1230
}
1231
1232
switch (state) {
1233
case TRACE_ON:
1234
rc = net_dm_trace_on_set(extack);
1235
break;
1236
case TRACE_OFF:
1237
net_dm_trace_off_set();
1238
break;
1239
default:
1240
rc = 1;
1241
break;
1242
}
1243
1244
if (!rc)
1245
trace_state = state;
1246
else
1247
rc = -EINPROGRESS;
1248
1249
return rc;
1250
}
1251
1252
static bool net_dm_is_monitoring(void)
1253
{
1254
return trace_state == TRACE_ON || monitor_hw;
1255
}
1256
1257
static int net_dm_alert_mode_get_from_info(struct genl_info *info,
1258
enum net_dm_alert_mode *p_alert_mode)
1259
{
1260
u8 val;
1261
1262
val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]);
1263
1264
switch (val) {
1265
case NET_DM_ALERT_MODE_SUMMARY:
1266
case NET_DM_ALERT_MODE_PACKET:
1267
*p_alert_mode = val;
1268
break;
1269
default:
1270
return -EINVAL;
1271
}
1272
1273
return 0;
1274
}
1275
1276
static int net_dm_alert_mode_set(struct genl_info *info)
1277
{
1278
struct netlink_ext_ack *extack = info->extack;
1279
enum net_dm_alert_mode alert_mode;
1280
int rc;
1281
1282
if (!info->attrs[NET_DM_ATTR_ALERT_MODE])
1283
return 0;
1284
1285
rc = net_dm_alert_mode_get_from_info(info, &alert_mode);
1286
if (rc) {
1287
NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode");
1288
return -EINVAL;
1289
}
1290
1291
net_dm_alert_mode = alert_mode;
1292
1293
return 0;
1294
}
1295
1296
static void net_dm_trunc_len_set(struct genl_info *info)
1297
{
1298
if (!info->attrs[NET_DM_ATTR_TRUNC_LEN])
1299
return;
1300
1301
net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
1302
}
1303
1304
static void net_dm_queue_len_set(struct genl_info *info)
1305
{
1306
if (!info->attrs[NET_DM_ATTR_QUEUE_LEN])
1307
return;
1308
1309
net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]);
1310
}
1311
1312
static int net_dm_cmd_config(struct sk_buff *skb,
1313
struct genl_info *info)
1314
{
1315
struct netlink_ext_ack *extack = info->extack;
1316
int rc;
1317
1318
if (net_dm_is_monitoring()) {
1319
NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring");
1320
return -EBUSY;
1321
}
1322
1323
rc = net_dm_alert_mode_set(info);
1324
if (rc)
1325
return rc;
1326
1327
net_dm_trunc_len_set(info);
1328
1329
net_dm_queue_len_set(info);
1330
1331
return 0;
1332
}
1333
1334
static int net_dm_monitor_start(bool set_sw, bool set_hw,
1335
struct netlink_ext_ack *extack)
1336
{
1337
bool sw_set = false;
1338
int rc;
1339
1340
if (set_sw) {
1341
rc = set_all_monitor_traces(TRACE_ON, extack);
1342
if (rc)
1343
return rc;
1344
sw_set = true;
1345
}
1346
1347
if (set_hw) {
1348
rc = net_dm_hw_monitor_start(extack);
1349
if (rc)
1350
goto err_monitor_hw;
1351
}
1352
1353
return 0;
1354
1355
err_monitor_hw:
1356
if (sw_set)
1357
set_all_monitor_traces(TRACE_OFF, extack);
1358
return rc;
1359
}
1360
1361
static void net_dm_monitor_stop(bool set_sw, bool set_hw,
1362
struct netlink_ext_ack *extack)
1363
{
1364
if (set_hw)
1365
net_dm_hw_monitor_stop(extack);
1366
if (set_sw)
1367
set_all_monitor_traces(TRACE_OFF, extack);
1368
}
1369
1370
static int net_dm_cmd_trace(struct sk_buff *skb,
1371
struct genl_info *info)
1372
{
1373
bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS];
1374
bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS];
1375
struct netlink_ext_ack *extack = info->extack;
1376
1377
/* To maintain backward compatibility, we start / stop monitoring of
1378
* software drops if no flag is specified.
1379
*/
1380
if (!set_sw && !set_hw)
1381
set_sw = true;
1382
1383
switch (info->genlhdr->cmd) {
1384
case NET_DM_CMD_START:
1385
return net_dm_monitor_start(set_sw, set_hw, extack);
1386
case NET_DM_CMD_STOP:
1387
net_dm_monitor_stop(set_sw, set_hw, extack);
1388
return 0;
1389
}
1390
1391
return -EOPNOTSUPP;
1392
}
1393
1394
static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info)
1395
{
1396
void *hdr;
1397
1398
hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
1399
&net_drop_monitor_family, 0, NET_DM_CMD_CONFIG_NEW);
1400
if (!hdr)
1401
return -EMSGSIZE;
1402
1403
if (nla_put_u8(msg, NET_DM_ATTR_ALERT_MODE, net_dm_alert_mode))
1404
goto nla_put_failure;
1405
1406
if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len))
1407
goto nla_put_failure;
1408
1409
if (nla_put_u32(msg, NET_DM_ATTR_QUEUE_LEN, net_dm_queue_len))
1410
goto nla_put_failure;
1411
1412
genlmsg_end(msg, hdr);
1413
1414
return 0;
1415
1416
nla_put_failure:
1417
genlmsg_cancel(msg, hdr);
1418
return -EMSGSIZE;
1419
}
1420
1421
static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info)
1422
{
1423
struct sk_buff *msg;
1424
int rc;
1425
1426
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1427
if (!msg)
1428
return -ENOMEM;
1429
1430
rc = net_dm_config_fill(msg, info);
1431
if (rc)
1432
goto free_msg;
1433
1434
return genlmsg_reply(msg, info);
1435
1436
free_msg:
1437
nlmsg_free(msg);
1438
return rc;
1439
}
1440
1441
static void net_dm_stats_read(struct net_dm_stats *stats)
1442
{
1443
int cpu;
1444
1445
memset(stats, 0, sizeof(*stats));
1446
for_each_possible_cpu(cpu) {
1447
struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
1448
struct net_dm_stats *cpu_stats = &data->stats;
1449
unsigned int start;
1450
u64 dropped;
1451
1452
do {
1453
start = u64_stats_fetch_begin(&cpu_stats->syncp);
1454
dropped = u64_stats_read(&cpu_stats->dropped);
1455
} while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
1456
1457
u64_stats_add(&stats->dropped, dropped);
1458
}
1459
}
1460
1461
static int net_dm_stats_put(struct sk_buff *msg)
1462
{
1463
struct net_dm_stats stats;
1464
struct nlattr *attr;
1465
1466
net_dm_stats_read(&stats);
1467
1468
attr = nla_nest_start(msg, NET_DM_ATTR_STATS);
1469
if (!attr)
1470
return -EMSGSIZE;
1471
1472
if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
1473
u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD))
1474
goto nla_put_failure;
1475
1476
nla_nest_end(msg, attr);
1477
1478
return 0;
1479
1480
nla_put_failure:
1481
nla_nest_cancel(msg, attr);
1482
return -EMSGSIZE;
1483
}
1484
1485
static void net_dm_hw_stats_read(struct net_dm_stats *stats)
1486
{
1487
int cpu;
1488
1489
memset(stats, 0, sizeof(*stats));
1490
for_each_possible_cpu(cpu) {
1491
struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1492
struct net_dm_stats *cpu_stats = &hw_data->stats;
1493
unsigned int start;
1494
u64 dropped;
1495
1496
do {
1497
start = u64_stats_fetch_begin(&cpu_stats->syncp);
1498
dropped = u64_stats_read(&cpu_stats->dropped);
1499
} while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
1500
1501
u64_stats_add(&stats->dropped, dropped);
1502
}
1503
}
1504
1505
static int net_dm_hw_stats_put(struct sk_buff *msg)
1506
{
1507
struct net_dm_stats stats;
1508
struct nlattr *attr;
1509
1510
net_dm_hw_stats_read(&stats);
1511
1512
attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS);
1513
if (!attr)
1514
return -EMSGSIZE;
1515
1516
if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
1517
u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD))
1518
goto nla_put_failure;
1519
1520
nla_nest_end(msg, attr);
1521
1522
return 0;
1523
1524
nla_put_failure:
1525
nla_nest_cancel(msg, attr);
1526
return -EMSGSIZE;
1527
}
1528
1529
static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
1530
{
1531
void *hdr;
1532
int rc;
1533
1534
hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
1535
&net_drop_monitor_family, 0, NET_DM_CMD_STATS_NEW);
1536
if (!hdr)
1537
return -EMSGSIZE;
1538
1539
rc = net_dm_stats_put(msg);
1540
if (rc)
1541
goto nla_put_failure;
1542
1543
rc = net_dm_hw_stats_put(msg);
1544
if (rc)
1545
goto nla_put_failure;
1546
1547
genlmsg_end(msg, hdr);
1548
1549
return 0;
1550
1551
nla_put_failure:
1552
genlmsg_cancel(msg, hdr);
1553
return -EMSGSIZE;
1554
}
1555
1556
static int net_dm_cmd_stats_get(struct sk_buff *skb, struct genl_info *info)
1557
{
1558
struct sk_buff *msg;
1559
int rc;
1560
1561
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1562
if (!msg)
1563
return -ENOMEM;
1564
1565
rc = net_dm_stats_fill(msg, info);
1566
if (rc)
1567
goto free_msg;
1568
1569
return genlmsg_reply(msg, info);
1570
1571
free_msg:
1572
nlmsg_free(msg);
1573
return rc;
1574
}
1575
1576
static int dropmon_net_event(struct notifier_block *ev_block,
1577
unsigned long event, void *ptr)
1578
{
1579
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1580
struct dm_hw_stat_delta *stat;
1581
1582
switch (event) {
1583
case NETDEV_REGISTER:
1584
if (WARN_ON_ONCE(rtnl_dereference(dev->dm_private)))
1585
break;
1586
stat = kzalloc(sizeof(*stat), GFP_KERNEL);
1587
if (!stat)
1588
break;
1589
1590
stat->last_rx = jiffies;
1591
rcu_assign_pointer(dev->dm_private, stat);
1592
1593
break;
1594
case NETDEV_UNREGISTER:
1595
stat = rtnl_dereference(dev->dm_private);
1596
if (stat) {
1597
rcu_assign_pointer(dev->dm_private, NULL);
1598
kfree_rcu(stat, rcu);
1599
}
1600
break;
1601
}
1602
return NOTIFY_DONE;
1603
}
1604
1605
static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
1606
[NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
1607
[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
1608
[NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
1609
[NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
1610
[NET_DM_ATTR_SW_DROPS] = {. type = NLA_FLAG },
1611
[NET_DM_ATTR_HW_DROPS] = {. type = NLA_FLAG },
1612
};
1613
1614
static const struct genl_small_ops dropmon_ops[] = {
1615
{
1616
.cmd = NET_DM_CMD_CONFIG,
1617
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1618
.doit = net_dm_cmd_config,
1619
.flags = GENL_ADMIN_PERM,
1620
},
1621
{
1622
.cmd = NET_DM_CMD_START,
1623
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1624
.doit = net_dm_cmd_trace,
1625
.flags = GENL_ADMIN_PERM,
1626
},
1627
{
1628
.cmd = NET_DM_CMD_STOP,
1629
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1630
.doit = net_dm_cmd_trace,
1631
.flags = GENL_ADMIN_PERM,
1632
},
1633
{
1634
.cmd = NET_DM_CMD_CONFIG_GET,
1635
.doit = net_dm_cmd_config_get,
1636
},
1637
{
1638
.cmd = NET_DM_CMD_STATS_GET,
1639
.doit = net_dm_cmd_stats_get,
1640
},
1641
};
1642
1643
static int net_dm_nl_pre_doit(const struct genl_split_ops *ops,
1644
struct sk_buff *skb, struct genl_info *info)
1645
{
1646
mutex_lock(&net_dm_mutex);
1647
1648
return 0;
1649
}
1650
1651
static void net_dm_nl_post_doit(const struct genl_split_ops *ops,
1652
struct sk_buff *skb, struct genl_info *info)
1653
{
1654
mutex_unlock(&net_dm_mutex);
1655
}
1656
1657
static struct genl_family net_drop_monitor_family __ro_after_init = {
1658
.hdrsize = 0,
1659
.name = "NET_DM",
1660
.version = 2,
1661
.maxattr = NET_DM_ATTR_MAX,
1662
.policy = net_dm_nl_policy,
1663
.pre_doit = net_dm_nl_pre_doit,
1664
.post_doit = net_dm_nl_post_doit,
1665
.module = THIS_MODULE,
1666
.small_ops = dropmon_ops,
1667
.n_small_ops = ARRAY_SIZE(dropmon_ops),
1668
.resv_start_op = NET_DM_CMD_STATS_GET + 1,
1669
.mcgrps = dropmon_mcgrps,
1670
.n_mcgrps = ARRAY_SIZE(dropmon_mcgrps),
1671
};
1672
1673
static struct notifier_block dropmon_net_notifier = {
1674
.notifier_call = dropmon_net_event
1675
};
1676
1677
static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data)
1678
{
1679
raw_spin_lock_init(&data->lock);
1680
skb_queue_head_init(&data->drop_queue);
1681
u64_stats_init(&data->stats.syncp);
1682
}
1683
1684
static void __net_dm_cpu_data_fini(struct per_cpu_dm_data *data)
1685
{
1686
WARN_ON(!skb_queue_empty(&data->drop_queue));
1687
}
1688
1689
static void net_dm_cpu_data_init(int cpu)
1690
{
1691
struct per_cpu_dm_data *data;
1692
1693
data = &per_cpu(dm_cpu_data, cpu);
1694
__net_dm_cpu_data_init(data);
1695
}
1696
1697
static void net_dm_cpu_data_fini(int cpu)
1698
{
1699
struct per_cpu_dm_data *data;
1700
1701
data = &per_cpu(dm_cpu_data, cpu);
1702
/* At this point, we should have exclusive access
1703
* to this struct and can free the skb inside it.
1704
*/
1705
consume_skb(data->skb);
1706
__net_dm_cpu_data_fini(data);
1707
}
1708
1709
static void net_dm_hw_cpu_data_init(int cpu)
1710
{
1711
struct per_cpu_dm_data *hw_data;
1712
1713
hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1714
__net_dm_cpu_data_init(hw_data);
1715
}
1716
1717
static void net_dm_hw_cpu_data_fini(int cpu)
1718
{
1719
struct per_cpu_dm_data *hw_data;
1720
1721
hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1722
kfree(hw_data->hw_entries);
1723
__net_dm_cpu_data_fini(hw_data);
1724
}
1725
1726
static int __init init_net_drop_monitor(void)
1727
{
1728
int cpu, rc;
1729
1730
pr_info("Initializing network drop monitor service\n");
1731
1732
if (sizeof(void *) > 8) {
1733
pr_err("Unable to store program counters on this arch, Drop monitor failed\n");
1734
return -ENOSPC;
1735
}
1736
1737
for_each_possible_cpu(cpu) {
1738
net_dm_cpu_data_init(cpu);
1739
net_dm_hw_cpu_data_init(cpu);
1740
}
1741
1742
rc = register_netdevice_notifier(&dropmon_net_notifier);
1743
if (rc < 0) {
1744
pr_crit("Failed to register netdevice notifier\n");
1745
return rc;
1746
}
1747
1748
rc = genl_register_family(&net_drop_monitor_family);
1749
if (rc) {
1750
pr_err("Could not create drop monitor netlink family\n");
1751
goto out_unreg;
1752
}
1753
WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT);
1754
1755
rc = 0;
1756
1757
goto out;
1758
1759
out_unreg:
1760
WARN_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
1761
out:
1762
return rc;
1763
}
1764
1765
static void exit_net_drop_monitor(void)
1766
{
1767
int cpu;
1768
1769
/*
1770
* Because of the module_get/put we do in the trace state change path
1771
* we are guaranteed not to have any current users when we get here
1772
*/
1773
BUG_ON(genl_unregister_family(&net_drop_monitor_family));
1774
1775
BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
1776
1777
for_each_possible_cpu(cpu) {
1778
net_dm_hw_cpu_data_fini(cpu);
1779
net_dm_cpu_data_fini(cpu);
1780
}
1781
}
1782
1783
module_init(init_net_drop_monitor);
1784
module_exit(exit_net_drop_monitor);
1785
1786
MODULE_LICENSE("GPL v2");
1787
MODULE_AUTHOR("Neil Horman <[email protected]>");
1788
MODULE_ALIAS_GENL_FAMILY("NET_DM");
1789
MODULE_DESCRIPTION("Monitoring code for network dropped packet alerts");
1790
1791