Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/netlink/af_netlink.c
15109 views
1
/*
2
* NETLINK Kernel-user communication protocol.
3
*
4
* Authors: Alan Cox <[email protected]>
5
* Alexey Kuznetsov <[email protected]>
6
*
7
* This program is free software; you can redistribute it and/or
8
* modify it under the terms of the GNU General Public License
9
* as published by the Free Software Foundation; either version
10
* 2 of the License, or (at your option) any later version.
11
*
12
* Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
13
* added netlink_proto_exit
14
* Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <[email protected]>
15
* use nlk_sk, as sk->protinfo is on a diet 8)
16
* Fri Jul 22 19:51:12 MEST 2005 Harald Welte <[email protected]>
17
* - inc module use count of module that owns
18
* the kernel socket in case userspace opens
19
* socket of same protocol
20
* - remove all module support, since netlink is
21
* mandatory if CONFIG_NET=y these days
22
*/
23
24
#include <linux/module.h>
25
26
#include <linux/capability.h>
27
#include <linux/kernel.h>
28
#include <linux/init.h>
29
#include <linux/signal.h>
30
#include <linux/sched.h>
31
#include <linux/errno.h>
32
#include <linux/string.h>
33
#include <linux/stat.h>
34
#include <linux/socket.h>
35
#include <linux/un.h>
36
#include <linux/fcntl.h>
37
#include <linux/termios.h>
38
#include <linux/sockios.h>
39
#include <linux/net.h>
40
#include <linux/fs.h>
41
#include <linux/slab.h>
42
#include <asm/uaccess.h>
43
#include <linux/skbuff.h>
44
#include <linux/netdevice.h>
45
#include <linux/rtnetlink.h>
46
#include <linux/proc_fs.h>
47
#include <linux/seq_file.h>
48
#include <linux/notifier.h>
49
#include <linux/security.h>
50
#include <linux/jhash.h>
51
#include <linux/jiffies.h>
52
#include <linux/random.h>
53
#include <linux/bitops.h>
54
#include <linux/mm.h>
55
#include <linux/types.h>
56
#include <linux/audit.h>
57
#include <linux/mutex.h>
58
59
#include <net/net_namespace.h>
60
#include <net/sock.h>
61
#include <net/scm.h>
62
#include <net/netlink.h>
63
64
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
65
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
66
67
struct netlink_sock {
68
/* struct sock has to be the first member of netlink_sock */
69
struct sock sk;
70
u32 pid;
71
u32 dst_pid;
72
u32 dst_group;
73
u32 flags;
74
u32 subscriptions;
75
u32 ngroups;
76
unsigned long *groups;
77
unsigned long state;
78
wait_queue_head_t wait;
79
struct netlink_callback *cb;
80
struct mutex *cb_mutex;
81
struct mutex cb_def_mutex;
82
void (*netlink_rcv)(struct sk_buff *skb);
83
struct module *module;
84
};
85
86
struct listeners {
87
struct rcu_head rcu;
88
unsigned long masks[0];
89
};
90
91
#define NETLINK_KERNEL_SOCKET 0x1
92
#define NETLINK_RECV_PKTINFO 0x2
93
#define NETLINK_BROADCAST_SEND_ERROR 0x4
94
#define NETLINK_RECV_NO_ENOBUFS 0x8
95
96
static inline struct netlink_sock *nlk_sk(struct sock *sk)
97
{
98
return container_of(sk, struct netlink_sock, sk);
99
}
100
101
static inline int netlink_is_kernel(struct sock *sk)
102
{
103
return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
104
}
105
106
struct nl_pid_hash {
107
struct hlist_head *table;
108
unsigned long rehash_time;
109
110
unsigned int mask;
111
unsigned int shift;
112
113
unsigned int entries;
114
unsigned int max_shift;
115
116
u32 rnd;
117
};
118
119
struct netlink_table {
120
struct nl_pid_hash hash;
121
struct hlist_head mc_list;
122
struct listeners __rcu *listeners;
123
unsigned int nl_nonroot;
124
unsigned int groups;
125
struct mutex *cb_mutex;
126
struct module *module;
127
int registered;
128
};
129
130
static struct netlink_table *nl_table;
131
132
static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
133
134
static int netlink_dump(struct sock *sk);
135
static void netlink_destroy_callback(struct netlink_callback *cb);
136
137
static DEFINE_RWLOCK(nl_table_lock);
138
static atomic_t nl_table_users = ATOMIC_INIT(0);
139
140
static ATOMIC_NOTIFIER_HEAD(netlink_chain);
141
142
static u32 netlink_group_mask(u32 group)
143
{
144
return group ? 1 << (group - 1) : 0;
145
}
146
147
static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
148
{
149
return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
150
}
151
152
static void netlink_sock_destruct(struct sock *sk)
153
{
154
struct netlink_sock *nlk = nlk_sk(sk);
155
156
if (nlk->cb) {
157
if (nlk->cb->done)
158
nlk->cb->done(nlk->cb);
159
netlink_destroy_callback(nlk->cb);
160
}
161
162
skb_queue_purge(&sk->sk_receive_queue);
163
164
if (!sock_flag(sk, SOCK_DEAD)) {
165
printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
166
return;
167
}
168
169
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
170
WARN_ON(atomic_read(&sk->sk_wmem_alloc));
171
WARN_ON(nlk_sk(sk)->groups);
172
}
173
174
/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
175
* SMP. Look, when several writers sleep and reader wakes them up, all but one
176
* immediately hit write lock and grab all the cpus. Exclusive sleep solves
177
* this, _but_ remember, it adds useless work on UP machines.
178
*/
179
180
void netlink_table_grab(void)
181
__acquires(nl_table_lock)
182
{
183
might_sleep();
184
185
write_lock_irq(&nl_table_lock);
186
187
if (atomic_read(&nl_table_users)) {
188
DECLARE_WAITQUEUE(wait, current);
189
190
add_wait_queue_exclusive(&nl_table_wait, &wait);
191
for (;;) {
192
set_current_state(TASK_UNINTERRUPTIBLE);
193
if (atomic_read(&nl_table_users) == 0)
194
break;
195
write_unlock_irq(&nl_table_lock);
196
schedule();
197
write_lock_irq(&nl_table_lock);
198
}
199
200
__set_current_state(TASK_RUNNING);
201
remove_wait_queue(&nl_table_wait, &wait);
202
}
203
}
204
205
void netlink_table_ungrab(void)
206
__releases(nl_table_lock)
207
{
208
write_unlock_irq(&nl_table_lock);
209
wake_up(&nl_table_wait);
210
}
211
212
static inline void
213
netlink_lock_table(void)
214
{
215
/* read_lock() synchronizes us to netlink_table_grab */
216
217
read_lock(&nl_table_lock);
218
atomic_inc(&nl_table_users);
219
read_unlock(&nl_table_lock);
220
}
221
222
static inline void
223
netlink_unlock_table(void)
224
{
225
if (atomic_dec_and_test(&nl_table_users))
226
wake_up(&nl_table_wait);
227
}
228
229
static inline struct sock *netlink_lookup(struct net *net, int protocol,
230
u32 pid)
231
{
232
struct nl_pid_hash *hash = &nl_table[protocol].hash;
233
struct hlist_head *head;
234
struct sock *sk;
235
struct hlist_node *node;
236
237
read_lock(&nl_table_lock);
238
head = nl_pid_hashfn(hash, pid);
239
sk_for_each(sk, node, head) {
240
if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) {
241
sock_hold(sk);
242
goto found;
243
}
244
}
245
sk = NULL;
246
found:
247
read_unlock(&nl_table_lock);
248
return sk;
249
}
250
251
static inline struct hlist_head *nl_pid_hash_zalloc(size_t size)
252
{
253
if (size <= PAGE_SIZE)
254
return kzalloc(size, GFP_ATOMIC);
255
else
256
return (struct hlist_head *)
257
__get_free_pages(GFP_ATOMIC | __GFP_ZERO,
258
get_order(size));
259
}
260
261
static inline void nl_pid_hash_free(struct hlist_head *table, size_t size)
262
{
263
if (size <= PAGE_SIZE)
264
kfree(table);
265
else
266
free_pages((unsigned long)table, get_order(size));
267
}
268
269
static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
270
{
271
unsigned int omask, mask, shift;
272
size_t osize, size;
273
struct hlist_head *otable, *table;
274
int i;
275
276
omask = mask = hash->mask;
277
osize = size = (mask + 1) * sizeof(*table);
278
shift = hash->shift;
279
280
if (grow) {
281
if (++shift > hash->max_shift)
282
return 0;
283
mask = mask * 2 + 1;
284
size *= 2;
285
}
286
287
table = nl_pid_hash_zalloc(size);
288
if (!table)
289
return 0;
290
291
otable = hash->table;
292
hash->table = table;
293
hash->mask = mask;
294
hash->shift = shift;
295
get_random_bytes(&hash->rnd, sizeof(hash->rnd));
296
297
for (i = 0; i <= omask; i++) {
298
struct sock *sk;
299
struct hlist_node *node, *tmp;
300
301
sk_for_each_safe(sk, node, tmp, &otable[i])
302
__sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid));
303
}
304
305
nl_pid_hash_free(otable, osize);
306
hash->rehash_time = jiffies + 10 * 60 * HZ;
307
return 1;
308
}
309
310
static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
311
{
312
int avg = hash->entries >> hash->shift;
313
314
if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1))
315
return 1;
316
317
if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
318
nl_pid_hash_rehash(hash, 0);
319
return 1;
320
}
321
322
return 0;
323
}
324
325
static const struct proto_ops netlink_ops;
326
327
static void
328
netlink_update_listeners(struct sock *sk)
329
{
330
struct netlink_table *tbl = &nl_table[sk->sk_protocol];
331
struct hlist_node *node;
332
unsigned long mask;
333
unsigned int i;
334
335
for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
336
mask = 0;
337
sk_for_each_bound(sk, node, &tbl->mc_list) {
338
if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
339
mask |= nlk_sk(sk)->groups[i];
340
}
341
tbl->listeners->masks[i] = mask;
342
}
343
/* this function is only called with the netlink table "grabbed", which
344
* makes sure updates are visible before bind or setsockopt return. */
345
}
346
347
static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
348
{
349
struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
350
struct hlist_head *head;
351
int err = -EADDRINUSE;
352
struct sock *osk;
353
struct hlist_node *node;
354
int len;
355
356
netlink_table_grab();
357
head = nl_pid_hashfn(hash, pid);
358
len = 0;
359
sk_for_each(osk, node, head) {
360
if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid))
361
break;
362
len++;
363
}
364
if (node)
365
goto err;
366
367
err = -EBUSY;
368
if (nlk_sk(sk)->pid)
369
goto err;
370
371
err = -ENOMEM;
372
if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
373
goto err;
374
375
if (len && nl_pid_hash_dilute(hash, len))
376
head = nl_pid_hashfn(hash, pid);
377
hash->entries++;
378
nlk_sk(sk)->pid = pid;
379
sk_add_node(sk, head);
380
err = 0;
381
382
err:
383
netlink_table_ungrab();
384
return err;
385
}
386
387
static void netlink_remove(struct sock *sk)
388
{
389
netlink_table_grab();
390
if (sk_del_node_init(sk))
391
nl_table[sk->sk_protocol].hash.entries--;
392
if (nlk_sk(sk)->subscriptions)
393
__sk_del_bind_node(sk);
394
netlink_table_ungrab();
395
}
396
397
static struct proto netlink_proto = {
398
.name = "NETLINK",
399
.owner = THIS_MODULE,
400
.obj_size = sizeof(struct netlink_sock),
401
};
402
403
static int __netlink_create(struct net *net, struct socket *sock,
404
struct mutex *cb_mutex, int protocol)
405
{
406
struct sock *sk;
407
struct netlink_sock *nlk;
408
409
sock->ops = &netlink_ops;
410
411
sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
412
if (!sk)
413
return -ENOMEM;
414
415
sock_init_data(sock, sk);
416
417
nlk = nlk_sk(sk);
418
if (cb_mutex)
419
nlk->cb_mutex = cb_mutex;
420
else {
421
nlk->cb_mutex = &nlk->cb_def_mutex;
422
mutex_init(nlk->cb_mutex);
423
}
424
init_waitqueue_head(&nlk->wait);
425
426
sk->sk_destruct = netlink_sock_destruct;
427
sk->sk_protocol = protocol;
428
return 0;
429
}
430
431
static int netlink_create(struct net *net, struct socket *sock, int protocol,
432
int kern)
433
{
434
struct module *module = NULL;
435
struct mutex *cb_mutex;
436
struct netlink_sock *nlk;
437
int err = 0;
438
439
sock->state = SS_UNCONNECTED;
440
441
if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
442
return -ESOCKTNOSUPPORT;
443
444
if (protocol < 0 || protocol >= MAX_LINKS)
445
return -EPROTONOSUPPORT;
446
447
netlink_lock_table();
448
#ifdef CONFIG_MODULES
449
if (!nl_table[protocol].registered) {
450
netlink_unlock_table();
451
request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
452
netlink_lock_table();
453
}
454
#endif
455
if (nl_table[protocol].registered &&
456
try_module_get(nl_table[protocol].module))
457
module = nl_table[protocol].module;
458
else
459
err = -EPROTONOSUPPORT;
460
cb_mutex = nl_table[protocol].cb_mutex;
461
netlink_unlock_table();
462
463
if (err < 0)
464
goto out;
465
466
err = __netlink_create(net, sock, cb_mutex, protocol);
467
if (err < 0)
468
goto out_module;
469
470
local_bh_disable();
471
sock_prot_inuse_add(net, &netlink_proto, 1);
472
local_bh_enable();
473
474
nlk = nlk_sk(sock->sk);
475
nlk->module = module;
476
out:
477
return err;
478
479
out_module:
480
module_put(module);
481
goto out;
482
}
483
484
static int netlink_release(struct socket *sock)
485
{
486
struct sock *sk = sock->sk;
487
struct netlink_sock *nlk;
488
489
if (!sk)
490
return 0;
491
492
netlink_remove(sk);
493
sock_orphan(sk);
494
nlk = nlk_sk(sk);
495
496
/*
497
* OK. Socket is unlinked, any packets that arrive now
498
* will be purged.
499
*/
500
501
sock->sk = NULL;
502
wake_up_interruptible_all(&nlk->wait);
503
504
skb_queue_purge(&sk->sk_write_queue);
505
506
if (nlk->pid) {
507
struct netlink_notify n = {
508
.net = sock_net(sk),
509
.protocol = sk->sk_protocol,
510
.pid = nlk->pid,
511
};
512
atomic_notifier_call_chain(&netlink_chain,
513
NETLINK_URELEASE, &n);
514
}
515
516
module_put(nlk->module);
517
518
netlink_table_grab();
519
if (netlink_is_kernel(sk)) {
520
BUG_ON(nl_table[sk->sk_protocol].registered == 0);
521
if (--nl_table[sk->sk_protocol].registered == 0) {
522
kfree(nl_table[sk->sk_protocol].listeners);
523
nl_table[sk->sk_protocol].module = NULL;
524
nl_table[sk->sk_protocol].registered = 0;
525
}
526
} else if (nlk->subscriptions)
527
netlink_update_listeners(sk);
528
netlink_table_ungrab();
529
530
kfree(nlk->groups);
531
nlk->groups = NULL;
532
533
local_bh_disable();
534
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
535
local_bh_enable();
536
sock_put(sk);
537
return 0;
538
}
539
540
static int netlink_autobind(struct socket *sock)
541
{
542
struct sock *sk = sock->sk;
543
struct net *net = sock_net(sk);
544
struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
545
struct hlist_head *head;
546
struct sock *osk;
547
struct hlist_node *node;
548
s32 pid = task_tgid_vnr(current);
549
int err;
550
static s32 rover = -4097;
551
552
retry:
553
cond_resched();
554
netlink_table_grab();
555
head = nl_pid_hashfn(hash, pid);
556
sk_for_each(osk, node, head) {
557
if (!net_eq(sock_net(osk), net))
558
continue;
559
if (nlk_sk(osk)->pid == pid) {
560
/* Bind collision, search negative pid values. */
561
pid = rover--;
562
if (rover > -4097)
563
rover = -4097;
564
netlink_table_ungrab();
565
goto retry;
566
}
567
}
568
netlink_table_ungrab();
569
570
err = netlink_insert(sk, net, pid);
571
if (err == -EADDRINUSE)
572
goto retry;
573
574
/* If 2 threads race to autobind, that is fine. */
575
if (err == -EBUSY)
576
err = 0;
577
578
return err;
579
}
580
581
static inline int netlink_capable(struct socket *sock, unsigned int flag)
582
{
583
return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
584
capable(CAP_NET_ADMIN);
585
}
586
587
static void
588
netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
589
{
590
struct netlink_sock *nlk = nlk_sk(sk);
591
592
if (nlk->subscriptions && !subscriptions)
593
__sk_del_bind_node(sk);
594
else if (!nlk->subscriptions && subscriptions)
595
sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
596
nlk->subscriptions = subscriptions;
597
}
598
599
static int netlink_realloc_groups(struct sock *sk)
600
{
601
struct netlink_sock *nlk = nlk_sk(sk);
602
unsigned int groups;
603
unsigned long *new_groups;
604
int err = 0;
605
606
netlink_table_grab();
607
608
groups = nl_table[sk->sk_protocol].groups;
609
if (!nl_table[sk->sk_protocol].registered) {
610
err = -ENOENT;
611
goto out_unlock;
612
}
613
614
if (nlk->ngroups >= groups)
615
goto out_unlock;
616
617
new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
618
if (new_groups == NULL) {
619
err = -ENOMEM;
620
goto out_unlock;
621
}
622
memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
623
NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
624
625
nlk->groups = new_groups;
626
nlk->ngroups = groups;
627
out_unlock:
628
netlink_table_ungrab();
629
return err;
630
}
631
632
static int netlink_bind(struct socket *sock, struct sockaddr *addr,
633
int addr_len)
634
{
635
struct sock *sk = sock->sk;
636
struct net *net = sock_net(sk);
637
struct netlink_sock *nlk = nlk_sk(sk);
638
struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
639
int err;
640
641
if (nladdr->nl_family != AF_NETLINK)
642
return -EINVAL;
643
644
/* Only superuser is allowed to listen multicasts */
645
if (nladdr->nl_groups) {
646
if (!netlink_capable(sock, NL_NONROOT_RECV))
647
return -EPERM;
648
err = netlink_realloc_groups(sk);
649
if (err)
650
return err;
651
}
652
653
if (nlk->pid) {
654
if (nladdr->nl_pid != nlk->pid)
655
return -EINVAL;
656
} else {
657
err = nladdr->nl_pid ?
658
netlink_insert(sk, net, nladdr->nl_pid) :
659
netlink_autobind(sock);
660
if (err)
661
return err;
662
}
663
664
if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
665
return 0;
666
667
netlink_table_grab();
668
netlink_update_subscriptions(sk, nlk->subscriptions +
669
hweight32(nladdr->nl_groups) -
670
hweight32(nlk->groups[0]));
671
nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
672
netlink_update_listeners(sk);
673
netlink_table_ungrab();
674
675
return 0;
676
}
677
678
static int netlink_connect(struct socket *sock, struct sockaddr *addr,
679
int alen, int flags)
680
{
681
int err = 0;
682
struct sock *sk = sock->sk;
683
struct netlink_sock *nlk = nlk_sk(sk);
684
struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
685
686
if (alen < sizeof(addr->sa_family))
687
return -EINVAL;
688
689
if (addr->sa_family == AF_UNSPEC) {
690
sk->sk_state = NETLINK_UNCONNECTED;
691
nlk->dst_pid = 0;
692
nlk->dst_group = 0;
693
return 0;
694
}
695
if (addr->sa_family != AF_NETLINK)
696
return -EINVAL;
697
698
/* Only superuser is allowed to send multicasts */
699
if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
700
return -EPERM;
701
702
if (!nlk->pid)
703
err = netlink_autobind(sock);
704
705
if (err == 0) {
706
sk->sk_state = NETLINK_CONNECTED;
707
nlk->dst_pid = nladdr->nl_pid;
708
nlk->dst_group = ffs(nladdr->nl_groups);
709
}
710
711
return err;
712
}
713
714
static int netlink_getname(struct socket *sock, struct sockaddr *addr,
715
int *addr_len, int peer)
716
{
717
struct sock *sk = sock->sk;
718
struct netlink_sock *nlk = nlk_sk(sk);
719
DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
720
721
nladdr->nl_family = AF_NETLINK;
722
nladdr->nl_pad = 0;
723
*addr_len = sizeof(*nladdr);
724
725
if (peer) {
726
nladdr->nl_pid = nlk->dst_pid;
727
nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
728
} else {
729
nladdr->nl_pid = nlk->pid;
730
nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
731
}
732
return 0;
733
}
734
735
static void netlink_overrun(struct sock *sk)
736
{
737
struct netlink_sock *nlk = nlk_sk(sk);
738
739
if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
740
if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
741
sk->sk_err = ENOBUFS;
742
sk->sk_error_report(sk);
743
}
744
}
745
atomic_inc(&sk->sk_drops);
746
}
747
748
static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
749
{
750
struct sock *sock;
751
struct netlink_sock *nlk;
752
753
sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid);
754
if (!sock)
755
return ERR_PTR(-ECONNREFUSED);
756
757
/* Don't bother queuing skb if kernel socket has no input function */
758
nlk = nlk_sk(sock);
759
if (sock->sk_state == NETLINK_CONNECTED &&
760
nlk->dst_pid != nlk_sk(ssk)->pid) {
761
sock_put(sock);
762
return ERR_PTR(-ECONNREFUSED);
763
}
764
return sock;
765
}
766
767
struct sock *netlink_getsockbyfilp(struct file *filp)
768
{
769
struct inode *inode = filp->f_path.dentry->d_inode;
770
struct sock *sock;
771
772
if (!S_ISSOCK(inode->i_mode))
773
return ERR_PTR(-ENOTSOCK);
774
775
sock = SOCKET_I(inode)->sk;
776
if (sock->sk_family != AF_NETLINK)
777
return ERR_PTR(-EINVAL);
778
779
sock_hold(sock);
780
return sock;
781
}
782
783
/*
784
* Attach a skb to a netlink socket.
785
* The caller must hold a reference to the destination socket. On error, the
786
* reference is dropped. The skb is not send to the destination, just all
787
* all error checks are performed and memory in the queue is reserved.
788
* Return values:
789
* < 0: error. skb freed, reference to sock dropped.
790
* 0: continue
791
* 1: repeat lookup - reference dropped while waiting for socket memory.
792
*/
793
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
794
long *timeo, struct sock *ssk)
795
{
796
struct netlink_sock *nlk;
797
798
nlk = nlk_sk(sk);
799
800
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
801
test_bit(0, &nlk->state)) {
802
DECLARE_WAITQUEUE(wait, current);
803
if (!*timeo) {
804
if (!ssk || netlink_is_kernel(ssk))
805
netlink_overrun(sk);
806
sock_put(sk);
807
kfree_skb(skb);
808
return -EAGAIN;
809
}
810
811
__set_current_state(TASK_INTERRUPTIBLE);
812
add_wait_queue(&nlk->wait, &wait);
813
814
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
815
test_bit(0, &nlk->state)) &&
816
!sock_flag(sk, SOCK_DEAD))
817
*timeo = schedule_timeout(*timeo);
818
819
__set_current_state(TASK_RUNNING);
820
remove_wait_queue(&nlk->wait, &wait);
821
sock_put(sk);
822
823
if (signal_pending(current)) {
824
kfree_skb(skb);
825
return sock_intr_errno(*timeo);
826
}
827
return 1;
828
}
829
skb_set_owner_r(skb, sk);
830
return 0;
831
}
832
833
int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
834
{
835
int len = skb->len;
836
837
skb_queue_tail(&sk->sk_receive_queue, skb);
838
sk->sk_data_ready(sk, len);
839
sock_put(sk);
840
return len;
841
}
842
843
void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
844
{
845
kfree_skb(skb);
846
sock_put(sk);
847
}
848
849
static inline struct sk_buff *netlink_trim(struct sk_buff *skb,
850
gfp_t allocation)
851
{
852
int delta;
853
854
skb_orphan(skb);
855
856
delta = skb->end - skb->tail;
857
if (delta * 2 < skb->truesize)
858
return skb;
859
860
if (skb_shared(skb)) {
861
struct sk_buff *nskb = skb_clone(skb, allocation);
862
if (!nskb)
863
return skb;
864
kfree_skb(skb);
865
skb = nskb;
866
}
867
868
if (!pskb_expand_head(skb, 0, -delta, allocation))
869
skb->truesize -= delta;
870
871
return skb;
872
}
873
874
static inline void netlink_rcv_wake(struct sock *sk)
875
{
876
struct netlink_sock *nlk = nlk_sk(sk);
877
878
if (skb_queue_empty(&sk->sk_receive_queue))
879
clear_bit(0, &nlk->state);
880
if (!test_bit(0, &nlk->state))
881
wake_up_interruptible(&nlk->wait);
882
}
883
884
static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
885
{
886
int ret;
887
struct netlink_sock *nlk = nlk_sk(sk);
888
889
ret = -ECONNREFUSED;
890
if (nlk->netlink_rcv != NULL) {
891
ret = skb->len;
892
skb_set_owner_r(skb, sk);
893
nlk->netlink_rcv(skb);
894
}
895
kfree_skb(skb);
896
sock_put(sk);
897
return ret;
898
}
899
900
int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
901
u32 pid, int nonblock)
902
{
903
struct sock *sk;
904
int err;
905
long timeo;
906
907
skb = netlink_trim(skb, gfp_any());
908
909
timeo = sock_sndtimeo(ssk, nonblock);
910
retry:
911
sk = netlink_getsockbypid(ssk, pid);
912
if (IS_ERR(sk)) {
913
kfree_skb(skb);
914
return PTR_ERR(sk);
915
}
916
if (netlink_is_kernel(sk))
917
return netlink_unicast_kernel(sk, skb);
918
919
if (sk_filter(sk, skb)) {
920
err = skb->len;
921
kfree_skb(skb);
922
sock_put(sk);
923
return err;
924
}
925
926
err = netlink_attachskb(sk, skb, &timeo, ssk);
927
if (err == 1)
928
goto retry;
929
if (err)
930
return err;
931
932
return netlink_sendskb(sk, skb);
933
}
934
EXPORT_SYMBOL(netlink_unicast);
935
936
int netlink_has_listeners(struct sock *sk, unsigned int group)
937
{
938
int res = 0;
939
struct listeners *listeners;
940
941
BUG_ON(!netlink_is_kernel(sk));
942
943
rcu_read_lock();
944
listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
945
946
if (group - 1 < nl_table[sk->sk_protocol].groups)
947
res = test_bit(group - 1, listeners->masks);
948
949
rcu_read_unlock();
950
951
return res;
952
}
953
EXPORT_SYMBOL_GPL(netlink_has_listeners);
954
955
static inline int netlink_broadcast_deliver(struct sock *sk,
956
struct sk_buff *skb)
957
{
958
struct netlink_sock *nlk = nlk_sk(sk);
959
960
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
961
!test_bit(0, &nlk->state)) {
962
skb_set_owner_r(skb, sk);
963
skb_queue_tail(&sk->sk_receive_queue, skb);
964
sk->sk_data_ready(sk, skb->len);
965
return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf;
966
}
967
return -1;
968
}
969
970
struct netlink_broadcast_data {
971
struct sock *exclude_sk;
972
struct net *net;
973
u32 pid;
974
u32 group;
975
int failure;
976
int delivery_failure;
977
int congested;
978
int delivered;
979
gfp_t allocation;
980
struct sk_buff *skb, *skb2;
981
int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
982
void *tx_data;
983
};
984
985
static inline int do_one_broadcast(struct sock *sk,
986
struct netlink_broadcast_data *p)
987
{
988
struct netlink_sock *nlk = nlk_sk(sk);
989
int val;
990
991
if (p->exclude_sk == sk)
992
goto out;
993
994
if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
995
!test_bit(p->group - 1, nlk->groups))
996
goto out;
997
998
if (!net_eq(sock_net(sk), p->net))
999
goto out;
1000
1001
if (p->failure) {
1002
netlink_overrun(sk);
1003
goto out;
1004
}
1005
1006
sock_hold(sk);
1007
if (p->skb2 == NULL) {
1008
if (skb_shared(p->skb)) {
1009
p->skb2 = skb_clone(p->skb, p->allocation);
1010
} else {
1011
p->skb2 = skb_get(p->skb);
1012
/*
1013
* skb ownership may have been set when
1014
* delivered to a previous socket.
1015
*/
1016
skb_orphan(p->skb2);
1017
}
1018
}
1019
if (p->skb2 == NULL) {
1020
netlink_overrun(sk);
1021
/* Clone failed. Notify ALL listeners. */
1022
p->failure = 1;
1023
if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1024
p->delivery_failure = 1;
1025
} else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1026
kfree_skb(p->skb2);
1027
p->skb2 = NULL;
1028
} else if (sk_filter(sk, p->skb2)) {
1029
kfree_skb(p->skb2);
1030
p->skb2 = NULL;
1031
} else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1032
netlink_overrun(sk);
1033
if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1034
p->delivery_failure = 1;
1035
} else {
1036
p->congested |= val;
1037
p->delivered = 1;
1038
p->skb2 = NULL;
1039
}
1040
sock_put(sk);
1041
1042
out:
1043
return 0;
1044
}
1045
1046
int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
1047
u32 group, gfp_t allocation,
1048
int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1049
void *filter_data)
1050
{
1051
struct net *net = sock_net(ssk);
1052
struct netlink_broadcast_data info;
1053
struct hlist_node *node;
1054
struct sock *sk;
1055
1056
skb = netlink_trim(skb, allocation);
1057
1058
info.exclude_sk = ssk;
1059
info.net = net;
1060
info.pid = pid;
1061
info.group = group;
1062
info.failure = 0;
1063
info.delivery_failure = 0;
1064
info.congested = 0;
1065
info.delivered = 0;
1066
info.allocation = allocation;
1067
info.skb = skb;
1068
info.skb2 = NULL;
1069
info.tx_filter = filter;
1070
info.tx_data = filter_data;
1071
1072
/* While we sleep in clone, do not allow to change socket list */
1073
1074
netlink_lock_table();
1075
1076
sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1077
do_one_broadcast(sk, &info);
1078
1079
consume_skb(skb);
1080
1081
netlink_unlock_table();
1082
1083
if (info.delivery_failure) {
1084
kfree_skb(info.skb2);
1085
return -ENOBUFS;
1086
} else
1087
consume_skb(info.skb2);
1088
1089
if (info.delivered) {
1090
if (info.congested && (allocation & __GFP_WAIT))
1091
yield();
1092
return 0;
1093
}
1094
return -ESRCH;
1095
}
1096
EXPORT_SYMBOL(netlink_broadcast_filtered);
1097
1098
int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1099
u32 group, gfp_t allocation)
1100
{
1101
return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
1102
NULL, NULL);
1103
}
1104
EXPORT_SYMBOL(netlink_broadcast);
1105
1106
struct netlink_set_err_data {
1107
struct sock *exclude_sk;
1108
u32 pid;
1109
u32 group;
1110
int code;
1111
};
1112
1113
static inline int do_one_set_err(struct sock *sk,
1114
struct netlink_set_err_data *p)
1115
{
1116
struct netlink_sock *nlk = nlk_sk(sk);
1117
int ret = 0;
1118
1119
if (sk == p->exclude_sk)
1120
goto out;
1121
1122
if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1123
goto out;
1124
1125
if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
1126
!test_bit(p->group - 1, nlk->groups))
1127
goto out;
1128
1129
if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
1130
ret = 1;
1131
goto out;
1132
}
1133
1134
sk->sk_err = p->code;
1135
sk->sk_error_report(sk);
1136
out:
1137
return ret;
1138
}
1139
1140
/**
1141
* netlink_set_err - report error to broadcast listeners
1142
* @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1143
* @pid: the PID of a process that we want to skip (if any)
1144
* @groups: the broadcast group that will notice the error
1145
* @code: error code, must be negative (as usual in kernelspace)
1146
*
1147
* This function returns the number of broadcast listeners that have set the
1148
* NETLINK_RECV_NO_ENOBUFS socket option.
1149
*/
1150
int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
1151
{
1152
struct netlink_set_err_data info;
1153
struct hlist_node *node;
1154
struct sock *sk;
1155
int ret = 0;
1156
1157
info.exclude_sk = ssk;
1158
info.pid = pid;
1159
info.group = group;
1160
/* sk->sk_err wants a positive error value */
1161
info.code = -code;
1162
1163
read_lock(&nl_table_lock);
1164
1165
sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1166
ret += do_one_set_err(sk, &info);
1167
1168
read_unlock(&nl_table_lock);
1169
return ret;
1170
}
1171
EXPORT_SYMBOL(netlink_set_err);
1172
1173
/* must be called with netlink table grabbed */
1174
static void netlink_update_socket_mc(struct netlink_sock *nlk,
1175
unsigned int group,
1176
int is_new)
1177
{
1178
int old, new = !!is_new, subscriptions;
1179
1180
old = test_bit(group - 1, nlk->groups);
1181
subscriptions = nlk->subscriptions - old + new;
1182
if (new)
1183
__set_bit(group - 1, nlk->groups);
1184
else
1185
__clear_bit(group - 1, nlk->groups);
1186
netlink_update_subscriptions(&nlk->sk, subscriptions);
1187
netlink_update_listeners(&nlk->sk);
1188
}
1189
1190
static int netlink_setsockopt(struct socket *sock, int level, int optname,
1191
char __user *optval, unsigned int optlen)
1192
{
1193
struct sock *sk = sock->sk;
1194
struct netlink_sock *nlk = nlk_sk(sk);
1195
unsigned int val = 0;
1196
int err;
1197
1198
if (level != SOL_NETLINK)
1199
return -ENOPROTOOPT;
1200
1201
if (optlen >= sizeof(int) &&
1202
get_user(val, (unsigned int __user *)optval))
1203
return -EFAULT;
1204
1205
switch (optname) {
1206
case NETLINK_PKTINFO:
1207
if (val)
1208
nlk->flags |= NETLINK_RECV_PKTINFO;
1209
else
1210
nlk->flags &= ~NETLINK_RECV_PKTINFO;
1211
err = 0;
1212
break;
1213
case NETLINK_ADD_MEMBERSHIP:
1214
case NETLINK_DROP_MEMBERSHIP: {
1215
if (!netlink_capable(sock, NL_NONROOT_RECV))
1216
return -EPERM;
1217
err = netlink_realloc_groups(sk);
1218
if (err)
1219
return err;
1220
if (!val || val - 1 >= nlk->ngroups)
1221
return -EINVAL;
1222
netlink_table_grab();
1223
netlink_update_socket_mc(nlk, val,
1224
optname == NETLINK_ADD_MEMBERSHIP);
1225
netlink_table_ungrab();
1226
err = 0;
1227
break;
1228
}
1229
case NETLINK_BROADCAST_ERROR:
1230
if (val)
1231
nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
1232
else
1233
nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
1234
err = 0;
1235
break;
1236
case NETLINK_NO_ENOBUFS:
1237
if (val) {
1238
nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
1239
clear_bit(0, &nlk->state);
1240
wake_up_interruptible(&nlk->wait);
1241
} else
1242
nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1243
err = 0;
1244
break;
1245
default:
1246
err = -ENOPROTOOPT;
1247
}
1248
return err;
1249
}
1250
1251
static int netlink_getsockopt(struct socket *sock, int level, int optname,
1252
char __user *optval, int __user *optlen)
1253
{
1254
struct sock *sk = sock->sk;
1255
struct netlink_sock *nlk = nlk_sk(sk);
1256
int len, val, err;
1257
1258
if (level != SOL_NETLINK)
1259
return -ENOPROTOOPT;
1260
1261
if (get_user(len, optlen))
1262
return -EFAULT;
1263
if (len < 0)
1264
return -EINVAL;
1265
1266
switch (optname) {
1267
case NETLINK_PKTINFO:
1268
if (len < sizeof(int))
1269
return -EINVAL;
1270
len = sizeof(int);
1271
val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
1272
if (put_user(len, optlen) ||
1273
put_user(val, optval))
1274
return -EFAULT;
1275
err = 0;
1276
break;
1277
case NETLINK_BROADCAST_ERROR:
1278
if (len < sizeof(int))
1279
return -EINVAL;
1280
len = sizeof(int);
1281
val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
1282
if (put_user(len, optlen) ||
1283
put_user(val, optval))
1284
return -EFAULT;
1285
err = 0;
1286
break;
1287
case NETLINK_NO_ENOBUFS:
1288
if (len < sizeof(int))
1289
return -EINVAL;
1290
len = sizeof(int);
1291
val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
1292
if (put_user(len, optlen) ||
1293
put_user(val, optval))
1294
return -EFAULT;
1295
err = 0;
1296
break;
1297
default:
1298
err = -ENOPROTOOPT;
1299
}
1300
return err;
1301
}
1302
1303
static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1304
{
1305
struct nl_pktinfo info;
1306
1307
info.group = NETLINK_CB(skb).dst_group;
1308
put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1309
}
1310
1311
static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1312
struct msghdr *msg, size_t len)
1313
{
1314
struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1315
struct sock *sk = sock->sk;
1316
struct netlink_sock *nlk = nlk_sk(sk);
1317
struct sockaddr_nl *addr = msg->msg_name;
1318
u32 dst_pid;
1319
u32 dst_group;
1320
struct sk_buff *skb;
1321
int err;
1322
struct scm_cookie scm;
1323
1324
if (msg->msg_flags&MSG_OOB)
1325
return -EOPNOTSUPP;
1326
1327
if (NULL == siocb->scm) {
1328
siocb->scm = &scm;
1329
memset(&scm, 0, sizeof(scm));
1330
}
1331
err = scm_send(sock, msg, siocb->scm);
1332
if (err < 0)
1333
return err;
1334
1335
if (msg->msg_namelen) {
1336
err = -EINVAL;
1337
if (addr->nl_family != AF_NETLINK)
1338
goto out;
1339
dst_pid = addr->nl_pid;
1340
dst_group = ffs(addr->nl_groups);
1341
err = -EPERM;
1342
if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
1343
goto out;
1344
} else {
1345
dst_pid = nlk->dst_pid;
1346
dst_group = nlk->dst_group;
1347
}
1348
1349
if (!nlk->pid) {
1350
err = netlink_autobind(sock);
1351
if (err)
1352
goto out;
1353
}
1354
1355
err = -EMSGSIZE;
1356
if (len > sk->sk_sndbuf - 32)
1357
goto out;
1358
err = -ENOBUFS;
1359
skb = alloc_skb(len, GFP_KERNEL);
1360
if (skb == NULL)
1361
goto out;
1362
1363
NETLINK_CB(skb).pid = nlk->pid;
1364
NETLINK_CB(skb).dst_group = dst_group;
1365
memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1366
1367
err = -EFAULT;
1368
if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1369
kfree_skb(skb);
1370
goto out;
1371
}
1372
1373
err = security_netlink_send(sk, skb);
1374
if (err) {
1375
kfree_skb(skb);
1376
goto out;
1377
}
1378
1379
if (dst_group) {
1380
atomic_inc(&skb->users);
1381
netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
1382
}
1383
err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
1384
1385
out:
1386
scm_destroy(siocb->scm);
1387
return err;
1388
}
1389
1390
static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1391
struct msghdr *msg, size_t len,
1392
int flags)
1393
{
1394
struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1395
struct scm_cookie scm;
1396
struct sock *sk = sock->sk;
1397
struct netlink_sock *nlk = nlk_sk(sk);
1398
int noblock = flags&MSG_DONTWAIT;
1399
size_t copied;
1400
struct sk_buff *skb, *data_skb;
1401
int err, ret;
1402
1403
if (flags&MSG_OOB)
1404
return -EOPNOTSUPP;
1405
1406
copied = 0;
1407
1408
skb = skb_recv_datagram(sk, flags, noblock, &err);
1409
if (skb == NULL)
1410
goto out;
1411
1412
data_skb = skb;
1413
1414
#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1415
if (unlikely(skb_shinfo(skb)->frag_list)) {
1416
/*
1417
* If this skb has a frag_list, then here that means that we
1418
* will have to use the frag_list skb's data for compat tasks
1419
* and the regular skb's data for normal (non-compat) tasks.
1420
*
1421
* If we need to send the compat skb, assign it to the
1422
* 'data_skb' variable so that it will be used below for data
1423
* copying. We keep 'skb' for everything else, including
1424
* freeing both later.
1425
*/
1426
if (flags & MSG_CMSG_COMPAT)
1427
data_skb = skb_shinfo(skb)->frag_list;
1428
}
1429
#endif
1430
1431
msg->msg_namelen = 0;
1432
1433
copied = data_skb->len;
1434
if (len < copied) {
1435
msg->msg_flags |= MSG_TRUNC;
1436
copied = len;
1437
}
1438
1439
skb_reset_transport_header(data_skb);
1440
err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
1441
1442
if (msg->msg_name) {
1443
struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
1444
addr->nl_family = AF_NETLINK;
1445
addr->nl_pad = 0;
1446
addr->nl_pid = NETLINK_CB(skb).pid;
1447
addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
1448
msg->msg_namelen = sizeof(*addr);
1449
}
1450
1451
if (nlk->flags & NETLINK_RECV_PKTINFO)
1452
netlink_cmsg_recv_pktinfo(msg, skb);
1453
1454
if (NULL == siocb->scm) {
1455
memset(&scm, 0, sizeof(scm));
1456
siocb->scm = &scm;
1457
}
1458
siocb->scm->creds = *NETLINK_CREDS(skb);
1459
if (flags & MSG_TRUNC)
1460
copied = data_skb->len;
1461
1462
skb_free_datagram(sk, skb);
1463
1464
if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1465
ret = netlink_dump(sk);
1466
if (ret) {
1467
sk->sk_err = ret;
1468
sk->sk_error_report(sk);
1469
}
1470
}
1471
1472
scm_recv(sock, msg, siocb->scm, flags);
1473
out:
1474
netlink_rcv_wake(sk);
1475
return err ? : copied;
1476
}
1477
1478
static void netlink_data_ready(struct sock *sk, int len)
1479
{
1480
BUG();
1481
}
1482
1483
/*
1484
* We export these functions to other modules. They provide a
1485
* complete set of kernel non-blocking support for message
1486
* queueing.
1487
*/
1488
1489
struct sock *
1490
netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1491
void (*input)(struct sk_buff *skb),
1492
struct mutex *cb_mutex, struct module *module)
1493
{
1494
struct socket *sock;
1495
struct sock *sk;
1496
struct netlink_sock *nlk;
1497
struct listeners *listeners = NULL;
1498
1499
BUG_ON(!nl_table);
1500
1501
if (unit < 0 || unit >= MAX_LINKS)
1502
return NULL;
1503
1504
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1505
return NULL;
1506
1507
/*
1508
* We have to just have a reference on the net from sk, but don't
1509
* get_net it. Besides, we cannot get and then put the net here.
1510
* So we create one inside init_net and the move it to net.
1511
*/
1512
1513
if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
1514
goto out_sock_release_nosk;
1515
1516
sk = sock->sk;
1517
sk_change_net(sk, net);
1518
1519
if (groups < 32)
1520
groups = 32;
1521
1522
listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1523
if (!listeners)
1524
goto out_sock_release;
1525
1526
sk->sk_data_ready = netlink_data_ready;
1527
if (input)
1528
nlk_sk(sk)->netlink_rcv = input;
1529
1530
if (netlink_insert(sk, net, 0))
1531
goto out_sock_release;
1532
1533
nlk = nlk_sk(sk);
1534
nlk->flags |= NETLINK_KERNEL_SOCKET;
1535
1536
netlink_table_grab();
1537
if (!nl_table[unit].registered) {
1538
nl_table[unit].groups = groups;
1539
rcu_assign_pointer(nl_table[unit].listeners, listeners);
1540
nl_table[unit].cb_mutex = cb_mutex;
1541
nl_table[unit].module = module;
1542
nl_table[unit].registered = 1;
1543
} else {
1544
kfree(listeners);
1545
nl_table[unit].registered++;
1546
}
1547
netlink_table_ungrab();
1548
return sk;
1549
1550
out_sock_release:
1551
kfree(listeners);
1552
netlink_kernel_release(sk);
1553
return NULL;
1554
1555
out_sock_release_nosk:
1556
sock_release(sock);
1557
return NULL;
1558
}
1559
EXPORT_SYMBOL(netlink_kernel_create);
1560
1561
1562
void
1563
netlink_kernel_release(struct sock *sk)
1564
{
1565
sk_release_kernel(sk);
1566
}
1567
EXPORT_SYMBOL(netlink_kernel_release);
1568
1569
int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1570
{
1571
struct listeners *new, *old;
1572
struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1573
1574
if (groups < 32)
1575
groups = 32;
1576
1577
if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
1578
new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1579
if (!new)
1580
return -ENOMEM;
1581
old = rcu_dereference_raw(tbl->listeners);
1582
memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1583
rcu_assign_pointer(tbl->listeners, new);
1584
1585
kfree_rcu(old, rcu);
1586
}
1587
tbl->groups = groups;
1588
1589
return 0;
1590
}
1591
1592
/**
1593
* netlink_change_ngroups - change number of multicast groups
1594
*
1595
* This changes the number of multicast groups that are available
1596
* on a certain netlink family. Note that it is not possible to
1597
* change the number of groups to below 32. Also note that it does
1598
* not implicitly call netlink_clear_multicast_users() when the
1599
* number of groups is reduced.
1600
*
1601
* @sk: The kernel netlink socket, as returned by netlink_kernel_create().
1602
* @groups: The new number of groups.
1603
*/
1604
int netlink_change_ngroups(struct sock *sk, unsigned int groups)
1605
{
1606
int err;
1607
1608
netlink_table_grab();
1609
err = __netlink_change_ngroups(sk, groups);
1610
netlink_table_ungrab();
1611
1612
return err;
1613
}
1614
1615
void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1616
{
1617
struct sock *sk;
1618
struct hlist_node *node;
1619
struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
1620
1621
sk_for_each_bound(sk, node, &tbl->mc_list)
1622
netlink_update_socket_mc(nlk_sk(sk), group, 0);
1623
}
1624
1625
/**
1626
* netlink_clear_multicast_users - kick off multicast listeners
1627
*
1628
* This function removes all listeners from the given group.
1629
* @ksk: The kernel netlink socket, as returned by
1630
* netlink_kernel_create().
1631
* @group: The multicast group to clear.
1632
*/
1633
void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1634
{
1635
netlink_table_grab();
1636
__netlink_clear_multicast_users(ksk, group);
1637
netlink_table_ungrab();
1638
}
1639
1640
void netlink_set_nonroot(int protocol, unsigned int flags)
1641
{
1642
if ((unsigned int)protocol < MAX_LINKS)
1643
nl_table[protocol].nl_nonroot = flags;
1644
}
1645
EXPORT_SYMBOL(netlink_set_nonroot);
1646
1647
static void netlink_destroy_callback(struct netlink_callback *cb)
1648
{
1649
kfree_skb(cb->skb);
1650
kfree(cb);
1651
}
1652
1653
/*
1654
* It looks a bit ugly.
1655
* It would be better to create kernel thread.
1656
*/
1657
1658
static int netlink_dump(struct sock *sk)
1659
{
1660
struct netlink_sock *nlk = nlk_sk(sk);
1661
struct netlink_callback *cb;
1662
struct sk_buff *skb;
1663
struct nlmsghdr *nlh;
1664
int len, err = -ENOBUFS;
1665
1666
skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
1667
if (!skb)
1668
goto errout;
1669
1670
mutex_lock(nlk->cb_mutex);
1671
1672
cb = nlk->cb;
1673
if (cb == NULL) {
1674
err = -EINVAL;
1675
goto errout_skb;
1676
}
1677
1678
len = cb->dump(skb, cb);
1679
1680
if (len > 0) {
1681
mutex_unlock(nlk->cb_mutex);
1682
1683
if (sk_filter(sk, skb))
1684
kfree_skb(skb);
1685
else {
1686
skb_queue_tail(&sk->sk_receive_queue, skb);
1687
sk->sk_data_ready(sk, skb->len);
1688
}
1689
return 0;
1690
}
1691
1692
nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1693
if (!nlh)
1694
goto errout_skb;
1695
1696
memcpy(nlmsg_data(nlh), &len, sizeof(len));
1697
1698
if (sk_filter(sk, skb))
1699
kfree_skb(skb);
1700
else {
1701
skb_queue_tail(&sk->sk_receive_queue, skb);
1702
sk->sk_data_ready(sk, skb->len);
1703
}
1704
1705
if (cb->done)
1706
cb->done(cb);
1707
nlk->cb = NULL;
1708
mutex_unlock(nlk->cb_mutex);
1709
1710
netlink_destroy_callback(cb);
1711
return 0;
1712
1713
errout_skb:
1714
mutex_unlock(nlk->cb_mutex);
1715
kfree_skb(skb);
1716
errout:
1717
return err;
1718
}
1719
1720
int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1721
const struct nlmsghdr *nlh,
1722
int (*dump)(struct sk_buff *skb,
1723
struct netlink_callback *),
1724
int (*done)(struct netlink_callback *))
1725
{
1726
struct netlink_callback *cb;
1727
struct sock *sk;
1728
struct netlink_sock *nlk;
1729
int ret;
1730
1731
cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1732
if (cb == NULL)
1733
return -ENOBUFS;
1734
1735
cb->dump = dump;
1736
cb->done = done;
1737
cb->nlh = nlh;
1738
atomic_inc(&skb->users);
1739
cb->skb = skb;
1740
1741
sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid);
1742
if (sk == NULL) {
1743
netlink_destroy_callback(cb);
1744
return -ECONNREFUSED;
1745
}
1746
nlk = nlk_sk(sk);
1747
/* A dump is in progress... */
1748
mutex_lock(nlk->cb_mutex);
1749
if (nlk->cb) {
1750
mutex_unlock(nlk->cb_mutex);
1751
netlink_destroy_callback(cb);
1752
sock_put(sk);
1753
return -EBUSY;
1754
}
1755
nlk->cb = cb;
1756
mutex_unlock(nlk->cb_mutex);
1757
1758
ret = netlink_dump(sk);
1759
1760
sock_put(sk);
1761
1762
if (ret)
1763
return ret;
1764
1765
/* We successfully started a dump, by returning -EINTR we
1766
* signal not to send ACK even if it was requested.
1767
*/
1768
return -EINTR;
1769
}
1770
EXPORT_SYMBOL(netlink_dump_start);
1771
1772
void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1773
{
1774
struct sk_buff *skb;
1775
struct nlmsghdr *rep;
1776
struct nlmsgerr *errmsg;
1777
size_t payload = sizeof(*errmsg);
1778
1779
/* error messages get the original request appened */
1780
if (err)
1781
payload += nlmsg_len(nlh);
1782
1783
skb = nlmsg_new(payload, GFP_KERNEL);
1784
if (!skb) {
1785
struct sock *sk;
1786
1787
sk = netlink_lookup(sock_net(in_skb->sk),
1788
in_skb->sk->sk_protocol,
1789
NETLINK_CB(in_skb).pid);
1790
if (sk) {
1791
sk->sk_err = ENOBUFS;
1792
sk->sk_error_report(sk);
1793
sock_put(sk);
1794
}
1795
return;
1796
}
1797
1798
rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
1799
NLMSG_ERROR, payload, 0);
1800
errmsg = nlmsg_data(rep);
1801
errmsg->error = err;
1802
memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
1803
netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1804
}
1805
EXPORT_SYMBOL(netlink_ack);
1806
1807
int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1808
struct nlmsghdr *))
1809
{
1810
struct nlmsghdr *nlh;
1811
int err;
1812
1813
while (skb->len >= nlmsg_total_size(0)) {
1814
int msglen;
1815
1816
nlh = nlmsg_hdr(skb);
1817
err = 0;
1818
1819
if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
1820
return 0;
1821
1822
/* Only requests are handled by the kernel */
1823
if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
1824
goto ack;
1825
1826
/* Skip control messages */
1827
if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
1828
goto ack;
1829
1830
err = cb(skb, nlh);
1831
if (err == -EINTR)
1832
goto skip;
1833
1834
ack:
1835
if (nlh->nlmsg_flags & NLM_F_ACK || err)
1836
netlink_ack(skb, nlh, err);
1837
1838
skip:
1839
msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1840
if (msglen > skb->len)
1841
msglen = skb->len;
1842
skb_pull(skb, msglen);
1843
}
1844
1845
return 0;
1846
}
1847
EXPORT_SYMBOL(netlink_rcv_skb);
1848
1849
/**
1850
* nlmsg_notify - send a notification netlink message
1851
* @sk: netlink socket to use
1852
* @skb: notification message
1853
* @pid: destination netlink pid for reports or 0
1854
* @group: destination multicast group or 0
1855
* @report: 1 to report back, 0 to disable
1856
* @flags: allocation flags
1857
*/
1858
int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
1859
unsigned int group, int report, gfp_t flags)
1860
{
1861
int err = 0;
1862
1863
if (group) {
1864
int exclude_pid = 0;
1865
1866
if (report) {
1867
atomic_inc(&skb->users);
1868
exclude_pid = pid;
1869
}
1870
1871
/* errors reported via destination sk->sk_err, but propagate
1872
* delivery errors if NETLINK_BROADCAST_ERROR flag is set */
1873
err = nlmsg_multicast(sk, skb, exclude_pid, group, flags);
1874
}
1875
1876
if (report) {
1877
int err2;
1878
1879
err2 = nlmsg_unicast(sk, skb, pid);
1880
if (!err || err == -ESRCH)
1881
err = err2;
1882
}
1883
1884
return err;
1885
}
1886
EXPORT_SYMBOL(nlmsg_notify);
1887
1888
#ifdef CONFIG_PROC_FS
1889
struct nl_seq_iter {
1890
struct seq_net_private p;
1891
int link;
1892
int hash_idx;
1893
};
1894
1895
static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1896
{
1897
struct nl_seq_iter *iter = seq->private;
1898
int i, j;
1899
struct sock *s;
1900
struct hlist_node *node;
1901
loff_t off = 0;
1902
1903
for (i = 0; i < MAX_LINKS; i++) {
1904
struct nl_pid_hash *hash = &nl_table[i].hash;
1905
1906
for (j = 0; j <= hash->mask; j++) {
1907
sk_for_each(s, node, &hash->table[j]) {
1908
if (sock_net(s) != seq_file_net(seq))
1909
continue;
1910
if (off == pos) {
1911
iter->link = i;
1912
iter->hash_idx = j;
1913
return s;
1914
}
1915
++off;
1916
}
1917
}
1918
}
1919
return NULL;
1920
}
1921
1922
static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
1923
__acquires(nl_table_lock)
1924
{
1925
read_lock(&nl_table_lock);
1926
return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1927
}
1928
1929
static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1930
{
1931
struct sock *s;
1932
struct nl_seq_iter *iter;
1933
int i, j;
1934
1935
++*pos;
1936
1937
if (v == SEQ_START_TOKEN)
1938
return netlink_seq_socket_idx(seq, 0);
1939
1940
iter = seq->private;
1941
s = v;
1942
do {
1943
s = sk_next(s);
1944
} while (s && sock_net(s) != seq_file_net(seq));
1945
if (s)
1946
return s;
1947
1948
i = iter->link;
1949
j = iter->hash_idx + 1;
1950
1951
do {
1952
struct nl_pid_hash *hash = &nl_table[i].hash;
1953
1954
for (; j <= hash->mask; j++) {
1955
s = sk_head(&hash->table[j]);
1956
while (s && sock_net(s) != seq_file_net(seq))
1957
s = sk_next(s);
1958
if (s) {
1959
iter->link = i;
1960
iter->hash_idx = j;
1961
return s;
1962
}
1963
}
1964
1965
j = 0;
1966
} while (++i < MAX_LINKS);
1967
1968
return NULL;
1969
}
1970
1971
static void netlink_seq_stop(struct seq_file *seq, void *v)
1972
__releases(nl_table_lock)
1973
{
1974
read_unlock(&nl_table_lock);
1975
}
1976
1977
1978
static int netlink_seq_show(struct seq_file *seq, void *v)
1979
{
1980
if (v == SEQ_START_TOKEN)
1981
seq_puts(seq,
1982
"sk Eth Pid Groups "
1983
"Rmem Wmem Dump Locks Drops Inode\n");
1984
else {
1985
struct sock *s = v;
1986
struct netlink_sock *nlk = nlk_sk(s);
1987
1988
seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
1989
s,
1990
s->sk_protocol,
1991
nlk->pid,
1992
nlk->groups ? (u32)nlk->groups[0] : 0,
1993
sk_rmem_alloc_get(s),
1994
sk_wmem_alloc_get(s),
1995
nlk->cb,
1996
atomic_read(&s->sk_refcnt),
1997
atomic_read(&s->sk_drops),
1998
sock_i_ino(s)
1999
);
2000
2001
}
2002
return 0;
2003
}
2004
2005
static const struct seq_operations netlink_seq_ops = {
2006
.start = netlink_seq_start,
2007
.next = netlink_seq_next,
2008
.stop = netlink_seq_stop,
2009
.show = netlink_seq_show,
2010
};
2011
2012
2013
static int netlink_seq_open(struct inode *inode, struct file *file)
2014
{
2015
return seq_open_net(inode, file, &netlink_seq_ops,
2016
sizeof(struct nl_seq_iter));
2017
}
2018
2019
static const struct file_operations netlink_seq_fops = {
2020
.owner = THIS_MODULE,
2021
.open = netlink_seq_open,
2022
.read = seq_read,
2023
.llseek = seq_lseek,
2024
.release = seq_release_net,
2025
};
2026
2027
#endif
2028
2029
int netlink_register_notifier(struct notifier_block *nb)
2030
{
2031
return atomic_notifier_chain_register(&netlink_chain, nb);
2032
}
2033
EXPORT_SYMBOL(netlink_register_notifier);
2034
2035
int netlink_unregister_notifier(struct notifier_block *nb)
2036
{
2037
return atomic_notifier_chain_unregister(&netlink_chain, nb);
2038
}
2039
EXPORT_SYMBOL(netlink_unregister_notifier);
2040
2041
static const struct proto_ops netlink_ops = {
2042
.family = PF_NETLINK,
2043
.owner = THIS_MODULE,
2044
.release = netlink_release,
2045
.bind = netlink_bind,
2046
.connect = netlink_connect,
2047
.socketpair = sock_no_socketpair,
2048
.accept = sock_no_accept,
2049
.getname = netlink_getname,
2050
.poll = datagram_poll,
2051
.ioctl = sock_no_ioctl,
2052
.listen = sock_no_listen,
2053
.shutdown = sock_no_shutdown,
2054
.setsockopt = netlink_setsockopt,
2055
.getsockopt = netlink_getsockopt,
2056
.sendmsg = netlink_sendmsg,
2057
.recvmsg = netlink_recvmsg,
2058
.mmap = sock_no_mmap,
2059
.sendpage = sock_no_sendpage,
2060
};
2061
2062
static const struct net_proto_family netlink_family_ops = {
2063
.family = PF_NETLINK,
2064
.create = netlink_create,
2065
.owner = THIS_MODULE, /* for consistency 8) */
2066
};
2067
2068
static int __net_init netlink_net_init(struct net *net)
2069
{
2070
#ifdef CONFIG_PROC_FS
2071
if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
2072
return -ENOMEM;
2073
#endif
2074
return 0;
2075
}
2076
2077
static void __net_exit netlink_net_exit(struct net *net)
2078
{
2079
#ifdef CONFIG_PROC_FS
2080
proc_net_remove(net, "netlink");
2081
#endif
2082
}
2083
2084
static void __init netlink_add_usersock_entry(void)
2085
{
2086
struct listeners *listeners;
2087
int groups = 32;
2088
2089
listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2090
if (!listeners)
2091
panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2092
2093
netlink_table_grab();
2094
2095
nl_table[NETLINK_USERSOCK].groups = groups;
2096
rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2097
nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2098
nl_table[NETLINK_USERSOCK].registered = 1;
2099
2100
netlink_table_ungrab();
2101
}
2102
2103
static struct pernet_operations __net_initdata netlink_net_ops = {
2104
.init = netlink_net_init,
2105
.exit = netlink_net_exit,
2106
};
2107
2108
static int __init netlink_proto_init(void)
2109
{
2110
struct sk_buff *dummy_skb;
2111
int i;
2112
unsigned long limit;
2113
unsigned int order;
2114
int err = proto_register(&netlink_proto, 0);
2115
2116
if (err != 0)
2117
goto out;
2118
2119
BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
2120
2121
nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2122
if (!nl_table)
2123
goto panic;
2124
2125
if (totalram_pages >= (128 * 1024))
2126
limit = totalram_pages >> (21 - PAGE_SHIFT);
2127
else
2128
limit = totalram_pages >> (23 - PAGE_SHIFT);
2129
2130
order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
2131
limit = (1UL << order) / sizeof(struct hlist_head);
2132
order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
2133
2134
for (i = 0; i < MAX_LINKS; i++) {
2135
struct nl_pid_hash *hash = &nl_table[i].hash;
2136
2137
hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
2138
if (!hash->table) {
2139
while (i-- > 0)
2140
nl_pid_hash_free(nl_table[i].hash.table,
2141
1 * sizeof(*hash->table));
2142
kfree(nl_table);
2143
goto panic;
2144
}
2145
hash->max_shift = order;
2146
hash->shift = 0;
2147
hash->mask = 0;
2148
hash->rehash_time = jiffies;
2149
}
2150
2151
netlink_add_usersock_entry();
2152
2153
sock_register(&netlink_family_ops);
2154
register_pernet_subsys(&netlink_net_ops);
2155
/* The netlink device handler may be needed early. */
2156
rtnetlink_init();
2157
out:
2158
return err;
2159
panic:
2160
panic("netlink_init: Cannot allocate nl_table\n");
2161
}
2162
2163
core_initcall(netlink_proto_init);
2164
2165