Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/security/device_cgroup.c
10811 views
1
/*
2
* device_cgroup.c - device cgroup subsystem
3
*
4
* Copyright 2007 IBM Corp
5
*/
6
7
#include <linux/device_cgroup.h>
8
#include <linux/cgroup.h>
9
#include <linux/ctype.h>
10
#include <linux/list.h>
11
#include <linux/uaccess.h>
12
#include <linux/seq_file.h>
13
#include <linux/slab.h>
14
#include <linux/rcupdate.h>
15
#include <linux/mutex.h>
16
17
#define ACC_MKNOD 1
18
#define ACC_READ 2
19
#define ACC_WRITE 4
20
#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)
21
22
#define DEV_BLOCK 1
23
#define DEV_CHAR 2
24
#define DEV_ALL 4 /* this represents all devices */
25
26
static DEFINE_MUTEX(devcgroup_mutex);
27
28
/*
29
* whitelist locking rules:
30
* hold devcgroup_mutex for update/read.
31
* hold rcu_read_lock() for read.
32
*/
33
34
struct dev_whitelist_item {
35
u32 major, minor;
36
short type;
37
short access;
38
struct list_head list;
39
struct rcu_head rcu;
40
};
41
42
struct dev_cgroup {
43
struct cgroup_subsys_state css;
44
struct list_head whitelist;
45
};
46
47
static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
48
{
49
return container_of(s, struct dev_cgroup, css);
50
}
51
52
static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
53
{
54
return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
55
}
56
57
static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
58
{
59
return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
60
}
61
62
struct cgroup_subsys devices_subsys;
63
64
static int devcgroup_can_attach(struct cgroup_subsys *ss,
65
struct cgroup *new_cgroup, struct task_struct *task)
66
{
67
if (current != task && !capable(CAP_SYS_ADMIN))
68
return -EPERM;
69
70
return 0;
71
}
72
73
/*
74
* called under devcgroup_mutex
75
*/
76
static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig)
77
{
78
struct dev_whitelist_item *wh, *tmp, *new;
79
80
list_for_each_entry(wh, orig, list) {
81
new = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
82
if (!new)
83
goto free_and_exit;
84
list_add_tail(&new->list, dest);
85
}
86
87
return 0;
88
89
free_and_exit:
90
list_for_each_entry_safe(wh, tmp, dest, list) {
91
list_del(&wh->list);
92
kfree(wh);
93
}
94
return -ENOMEM;
95
}
96
97
/* Stupid prototype - don't bother combining existing entries */
98
/*
99
* called under devcgroup_mutex
100
*/
101
static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
102
struct dev_whitelist_item *wh)
103
{
104
struct dev_whitelist_item *whcopy, *walk;
105
106
whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
107
if (!whcopy)
108
return -ENOMEM;
109
110
list_for_each_entry(walk, &dev_cgroup->whitelist, list) {
111
if (walk->type != wh->type)
112
continue;
113
if (walk->major != wh->major)
114
continue;
115
if (walk->minor != wh->minor)
116
continue;
117
118
walk->access |= wh->access;
119
kfree(whcopy);
120
whcopy = NULL;
121
}
122
123
if (whcopy != NULL)
124
list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist);
125
return 0;
126
}
127
128
static void whitelist_item_free(struct rcu_head *rcu)
129
{
130
struct dev_whitelist_item *item;
131
132
item = container_of(rcu, struct dev_whitelist_item, rcu);
133
kfree(item);
134
}
135
136
/*
137
* called under devcgroup_mutex
138
*/
139
static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
140
struct dev_whitelist_item *wh)
141
{
142
struct dev_whitelist_item *walk, *tmp;
143
144
list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) {
145
if (walk->type == DEV_ALL)
146
goto remove;
147
if (walk->type != wh->type)
148
continue;
149
if (walk->major != ~0 && walk->major != wh->major)
150
continue;
151
if (walk->minor != ~0 && walk->minor != wh->minor)
152
continue;
153
154
remove:
155
walk->access &= ~wh->access;
156
if (!walk->access) {
157
list_del_rcu(&walk->list);
158
call_rcu(&walk->rcu, whitelist_item_free);
159
}
160
}
161
}
162
163
/*
164
* called from kernel/cgroup.c with cgroup_lock() held.
165
*/
166
static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
167
struct cgroup *cgroup)
168
{
169
struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
170
struct cgroup *parent_cgroup;
171
int ret;
172
173
dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
174
if (!dev_cgroup)
175
return ERR_PTR(-ENOMEM);
176
INIT_LIST_HEAD(&dev_cgroup->whitelist);
177
parent_cgroup = cgroup->parent;
178
179
if (parent_cgroup == NULL) {
180
struct dev_whitelist_item *wh;
181
wh = kmalloc(sizeof(*wh), GFP_KERNEL);
182
if (!wh) {
183
kfree(dev_cgroup);
184
return ERR_PTR(-ENOMEM);
185
}
186
wh->minor = wh->major = ~0;
187
wh->type = DEV_ALL;
188
wh->access = ACC_MASK;
189
list_add(&wh->list, &dev_cgroup->whitelist);
190
} else {
191
parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
192
mutex_lock(&devcgroup_mutex);
193
ret = dev_whitelist_copy(&dev_cgroup->whitelist,
194
&parent_dev_cgroup->whitelist);
195
mutex_unlock(&devcgroup_mutex);
196
if (ret) {
197
kfree(dev_cgroup);
198
return ERR_PTR(ret);
199
}
200
}
201
202
return &dev_cgroup->css;
203
}
204
205
static void devcgroup_destroy(struct cgroup_subsys *ss,
206
struct cgroup *cgroup)
207
{
208
struct dev_cgroup *dev_cgroup;
209
struct dev_whitelist_item *wh, *tmp;
210
211
dev_cgroup = cgroup_to_devcgroup(cgroup);
212
list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) {
213
list_del(&wh->list);
214
kfree(wh);
215
}
216
kfree(dev_cgroup);
217
}
218
219
#define DEVCG_ALLOW 1
220
#define DEVCG_DENY 2
221
#define DEVCG_LIST 3
222
223
#define MAJMINLEN 13
224
#define ACCLEN 4
225
226
static void set_access(char *acc, short access)
227
{
228
int idx = 0;
229
memset(acc, 0, ACCLEN);
230
if (access & ACC_READ)
231
acc[idx++] = 'r';
232
if (access & ACC_WRITE)
233
acc[idx++] = 'w';
234
if (access & ACC_MKNOD)
235
acc[idx++] = 'm';
236
}
237
238
static char type_to_char(short type)
239
{
240
if (type == DEV_ALL)
241
return 'a';
242
if (type == DEV_CHAR)
243
return 'c';
244
if (type == DEV_BLOCK)
245
return 'b';
246
return 'X';
247
}
248
249
static void set_majmin(char *str, unsigned m)
250
{
251
if (m == ~0)
252
strcpy(str, "*");
253
else
254
sprintf(str, "%u", m);
255
}
256
257
static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
258
struct seq_file *m)
259
{
260
struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup);
261
struct dev_whitelist_item *wh;
262
char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
263
264
rcu_read_lock();
265
list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) {
266
set_access(acc, wh->access);
267
set_majmin(maj, wh->major);
268
set_majmin(min, wh->minor);
269
seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type),
270
maj, min, acc);
271
}
272
rcu_read_unlock();
273
274
return 0;
275
}
276
277
/*
278
* may_access_whitelist:
279
* does the access granted to dev_cgroup c contain the access
280
* requested in whitelist item refwh.
281
* return 1 if yes, 0 if no.
282
* call with devcgroup_mutex held
283
*/
284
static int may_access_whitelist(struct dev_cgroup *c,
285
struct dev_whitelist_item *refwh)
286
{
287
struct dev_whitelist_item *whitem;
288
289
list_for_each_entry(whitem, &c->whitelist, list) {
290
if (whitem->type & DEV_ALL)
291
return 1;
292
if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK))
293
continue;
294
if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR))
295
continue;
296
if (whitem->major != ~0 && whitem->major != refwh->major)
297
continue;
298
if (whitem->minor != ~0 && whitem->minor != refwh->minor)
299
continue;
300
if (refwh->access & (~whitem->access))
301
continue;
302
return 1;
303
}
304
return 0;
305
}
306
307
/*
308
* parent_has_perm:
309
* when adding a new allow rule to a device whitelist, the rule
310
* must be allowed in the parent device
311
*/
312
static int parent_has_perm(struct dev_cgroup *childcg,
313
struct dev_whitelist_item *wh)
314
{
315
struct cgroup *pcg = childcg->css.cgroup->parent;
316
struct dev_cgroup *parent;
317
318
if (!pcg)
319
return 1;
320
parent = cgroup_to_devcgroup(pcg);
321
return may_access_whitelist(parent, wh);
322
}
323
324
/*
325
* Modify the whitelist using allow/deny rules.
326
* CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD
327
* so we can give a container CAP_MKNOD to let it create devices but not
328
* modify the whitelist.
329
* It seems likely we'll want to add a CAP_CONTAINER capability to allow
330
* us to also grant CAP_SYS_ADMIN to containers without giving away the
331
* device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN
332
*
333
* Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting
334
* new access is only allowed if you're in the top-level cgroup, or your
335
* parent cgroup has the access you're asking for.
336
*/
337
static int devcgroup_update_access(struct dev_cgroup *devcgroup,
338
int filetype, const char *buffer)
339
{
340
const char *b;
341
char *endp;
342
int count;
343
struct dev_whitelist_item wh;
344
345
if (!capable(CAP_SYS_ADMIN))
346
return -EPERM;
347
348
memset(&wh, 0, sizeof(wh));
349
b = buffer;
350
351
switch (*b) {
352
case 'a':
353
wh.type = DEV_ALL;
354
wh.access = ACC_MASK;
355
wh.major = ~0;
356
wh.minor = ~0;
357
goto handle;
358
case 'b':
359
wh.type = DEV_BLOCK;
360
break;
361
case 'c':
362
wh.type = DEV_CHAR;
363
break;
364
default:
365
return -EINVAL;
366
}
367
b++;
368
if (!isspace(*b))
369
return -EINVAL;
370
b++;
371
if (*b == '*') {
372
wh.major = ~0;
373
b++;
374
} else if (isdigit(*b)) {
375
wh.major = simple_strtoul(b, &endp, 10);
376
b = endp;
377
} else {
378
return -EINVAL;
379
}
380
if (*b != ':')
381
return -EINVAL;
382
b++;
383
384
/* read minor */
385
if (*b == '*') {
386
wh.minor = ~0;
387
b++;
388
} else if (isdigit(*b)) {
389
wh.minor = simple_strtoul(b, &endp, 10);
390
b = endp;
391
} else {
392
return -EINVAL;
393
}
394
if (!isspace(*b))
395
return -EINVAL;
396
for (b++, count = 0; count < 3; count++, b++) {
397
switch (*b) {
398
case 'r':
399
wh.access |= ACC_READ;
400
break;
401
case 'w':
402
wh.access |= ACC_WRITE;
403
break;
404
case 'm':
405
wh.access |= ACC_MKNOD;
406
break;
407
case '\n':
408
case '\0':
409
count = 3;
410
break;
411
default:
412
return -EINVAL;
413
}
414
}
415
416
handle:
417
switch (filetype) {
418
case DEVCG_ALLOW:
419
if (!parent_has_perm(devcgroup, &wh))
420
return -EPERM;
421
return dev_whitelist_add(devcgroup, &wh);
422
case DEVCG_DENY:
423
dev_whitelist_rm(devcgroup, &wh);
424
break;
425
default:
426
return -EINVAL;
427
}
428
return 0;
429
}
430
431
static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
432
const char *buffer)
433
{
434
int retval;
435
436
mutex_lock(&devcgroup_mutex);
437
retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
438
cft->private, buffer);
439
mutex_unlock(&devcgroup_mutex);
440
return retval;
441
}
442
443
static struct cftype dev_cgroup_files[] = {
444
{
445
.name = "allow",
446
.write_string = devcgroup_access_write,
447
.private = DEVCG_ALLOW,
448
},
449
{
450
.name = "deny",
451
.write_string = devcgroup_access_write,
452
.private = DEVCG_DENY,
453
},
454
{
455
.name = "list",
456
.read_seq_string = devcgroup_seq_read,
457
.private = DEVCG_LIST,
458
},
459
};
460
461
static int devcgroup_populate(struct cgroup_subsys *ss,
462
struct cgroup *cgroup)
463
{
464
return cgroup_add_files(cgroup, ss, dev_cgroup_files,
465
ARRAY_SIZE(dev_cgroup_files));
466
}
467
468
struct cgroup_subsys devices_subsys = {
469
.name = "devices",
470
.can_attach = devcgroup_can_attach,
471
.create = devcgroup_create,
472
.destroy = devcgroup_destroy,
473
.populate = devcgroup_populate,
474
.subsys_id = devices_subsys_id,
475
};
476
477
int __devcgroup_inode_permission(struct inode *inode, int mask)
478
{
479
struct dev_cgroup *dev_cgroup;
480
struct dev_whitelist_item *wh;
481
482
rcu_read_lock();
483
484
dev_cgroup = task_devcgroup(current);
485
486
list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
487
if (wh->type & DEV_ALL)
488
goto found;
489
if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode))
490
continue;
491
if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode))
492
continue;
493
if (wh->major != ~0 && wh->major != imajor(inode))
494
continue;
495
if (wh->minor != ~0 && wh->minor != iminor(inode))
496
continue;
497
498
if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE))
499
continue;
500
if ((mask & MAY_READ) && !(wh->access & ACC_READ))
501
continue;
502
found:
503
rcu_read_unlock();
504
return 0;
505
}
506
507
rcu_read_unlock();
508
509
return -EPERM;
510
}
511
512
int devcgroup_inode_mknod(int mode, dev_t dev)
513
{
514
struct dev_cgroup *dev_cgroup;
515
struct dev_whitelist_item *wh;
516
517
if (!S_ISBLK(mode) && !S_ISCHR(mode))
518
return 0;
519
520
rcu_read_lock();
521
522
dev_cgroup = task_devcgroup(current);
523
524
list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
525
if (wh->type & DEV_ALL)
526
goto found;
527
if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode))
528
continue;
529
if ((wh->type & DEV_CHAR) && !S_ISCHR(mode))
530
continue;
531
if (wh->major != ~0 && wh->major != MAJOR(dev))
532
continue;
533
if (wh->minor != ~0 && wh->minor != MINOR(dev))
534
continue;
535
536
if (!(wh->access & ACC_MKNOD))
537
continue;
538
found:
539
rcu_read_unlock();
540
return 0;
541
}
542
543
rcu_read_unlock();
544
545
return -EPERM;
546
}
547
548