Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/security/commoncap.c
49039 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/* Common capabilities, needed by capability.o.
3
*/
4
5
#include <linux/capability.h>
6
#include <linux/audit.h>
7
#include <linux/init.h>
8
#include <linux/kernel.h>
9
#include <linux/lsm_hooks.h>
10
#include <linux/file.h>
11
#include <linux/mm.h>
12
#include <linux/mman.h>
13
#include <linux/pagemap.h>
14
#include <linux/swap.h>
15
#include <linux/skbuff.h>
16
#include <linux/netlink.h>
17
#include <linux/ptrace.h>
18
#include <linux/xattr.h>
19
#include <linux/hugetlb.h>
20
#include <linux/mount.h>
21
#include <linux/sched.h>
22
#include <linux/prctl.h>
23
#include <linux/securebits.h>
24
#include <linux/user_namespace.h>
25
#include <linux/binfmts.h>
26
#include <linux/personality.h>
27
#include <linux/mnt_idmapping.h>
28
#include <uapi/linux/lsm.h>
29
30
#define CREATE_TRACE_POINTS
31
#include <trace/events/capability.h>
32
33
/*
34
* If a non-root user executes a setuid-root binary in
35
* !secure(SECURE_NOROOT) mode, then we raise capabilities.
36
* However if fE is also set, then the intent is for only
37
* the file capabilities to be applied, and the setuid-root
38
* bit is left on either to change the uid (plausible) or
39
* to get full privilege on a kernel without file capabilities
40
* support. So in that case we do not raise capabilities.
41
*
42
* Warn if that happens, once per boot.
43
*/
44
static void warn_setuid_and_fcaps_mixed(const char *fname)
45
{
46
static int warned;
47
if (!warned) {
48
printk(KERN_INFO "warning: `%s' has both setuid-root and"
49
" effective capabilities. Therefore not raising all"
50
" capabilities.\n", fname);
51
warned = 1;
52
}
53
}
54
55
/**
56
* cap_capable_helper - Determine whether a task has a particular effective
57
* capability.
58
* @cred: The credentials to use
59
* @target_ns: The user namespace of the resource being accessed
60
* @cred_ns: The user namespace of the credentials
61
* @cap: The capability to check for
62
*
63
* Determine whether the nominated task has the specified capability amongst
64
* its effective set, returning 0 if it does, -ve if it does not.
65
*
66
* See cap_capable for more details.
67
*/
68
static inline int cap_capable_helper(const struct cred *cred,
69
struct user_namespace *target_ns,
70
const struct user_namespace *cred_ns,
71
int cap)
72
{
73
struct user_namespace *ns = target_ns;
74
75
/* See if cred has the capability in the target user namespace
76
* by examining the target user namespace and all of the target
77
* user namespace's parents.
78
*/
79
for (;;) {
80
/* Do we have the necessary capabilities? */
81
if (likely(ns == cred_ns))
82
return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
83
84
/*
85
* If we're already at a lower level than we're looking for,
86
* we're done searching.
87
*/
88
if (ns->level <= cred_ns->level)
89
return -EPERM;
90
91
/*
92
* The owner of the user namespace in the parent of the
93
* user namespace has all caps.
94
*/
95
if ((ns->parent == cred_ns) && uid_eq(ns->owner, cred->euid))
96
return 0;
97
98
/*
99
* If you have a capability in a parent user ns, then you have
100
* it over all children user namespaces as well.
101
*/
102
ns = ns->parent;
103
}
104
105
/* We never get here */
106
}
107
108
/**
109
* cap_capable - Determine whether a task has a particular effective capability
110
* @cred: The credentials to use
111
* @target_ns: The user namespace of the resource being accessed
112
* @cap: The capability to check for
113
* @opts: Bitmask of options defined in include/linux/security.h (unused)
114
*
115
* Determine whether the nominated task has the specified capability amongst
116
* its effective set, returning 0 if it does, -ve if it does not.
117
*
118
* NOTE WELL: cap_capable() has reverse semantics to the capable() call
119
* and friends. That is cap_capable() returns an int 0 when a task has
120
* a capability, while the kernel's capable(), has_ns_capability(),
121
* has_ns_capability_noaudit(), and has_capability_noaudit() return a
122
* bool true (1) for this case.
123
*/
124
int cap_capable(const struct cred *cred, struct user_namespace *target_ns,
125
int cap, unsigned int opts)
126
{
127
const struct user_namespace *cred_ns = cred->user_ns;
128
int ret = cap_capable_helper(cred, target_ns, cred_ns, cap);
129
130
trace_cap_capable(cred, target_ns, cred_ns, cap, ret);
131
return ret;
132
}
133
134
/**
135
* cap_settime - Determine whether the current process may set the system clock
136
* @ts: The time to set
137
* @tz: The timezone to set
138
*
139
* Determine whether the current process may set the system clock and timezone
140
* information, returning 0 if permission granted, -ve if denied.
141
*/
142
int cap_settime(const struct timespec64 *ts, const struct timezone *tz)
143
{
144
if (!capable(CAP_SYS_TIME))
145
return -EPERM;
146
return 0;
147
}
148
149
/**
150
* cap_ptrace_access_check - Determine whether the current process may access
151
* another
152
* @child: The process to be accessed
153
* @mode: The mode of attachment.
154
*
155
* If we are in the same or an ancestor user_ns and have all the target
156
* task's capabilities, then ptrace access is allowed.
157
* If we have the ptrace capability to the target user_ns, then ptrace
158
* access is allowed.
159
* Else denied.
160
*
161
* Determine whether a process may access another, returning 0 if permission
162
* granted, -ve if denied.
163
*/
164
int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
165
{
166
int ret = 0;
167
const struct cred *cred, *child_cred;
168
const kernel_cap_t *caller_caps;
169
170
rcu_read_lock();
171
cred = current_cred();
172
child_cred = __task_cred(child);
173
if (mode & PTRACE_MODE_FSCREDS)
174
caller_caps = &cred->cap_effective;
175
else
176
caller_caps = &cred->cap_permitted;
177
if (cred->user_ns == child_cred->user_ns &&
178
cap_issubset(child_cred->cap_permitted, *caller_caps))
179
goto out;
180
if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
181
goto out;
182
ret = -EPERM;
183
out:
184
rcu_read_unlock();
185
return ret;
186
}
187
188
/**
189
* cap_ptrace_traceme - Determine whether another process may trace the current
190
* @parent: The task proposed to be the tracer
191
*
192
* If parent is in the same or an ancestor user_ns and has all current's
193
* capabilities, then ptrace access is allowed.
194
* If parent has the ptrace capability to current's user_ns, then ptrace
195
* access is allowed.
196
* Else denied.
197
*
198
* Determine whether the nominated task is permitted to trace the current
199
* process, returning 0 if permission is granted, -ve if denied.
200
*/
201
int cap_ptrace_traceme(struct task_struct *parent)
202
{
203
int ret = 0;
204
const struct cred *cred, *child_cred;
205
206
rcu_read_lock();
207
cred = __task_cred(parent);
208
child_cred = current_cred();
209
if (cred->user_ns == child_cred->user_ns &&
210
cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
211
goto out;
212
if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))
213
goto out;
214
ret = -EPERM;
215
out:
216
rcu_read_unlock();
217
return ret;
218
}
219
220
/**
221
* cap_capget - Retrieve a task's capability sets
222
* @target: The task from which to retrieve the capability sets
223
* @effective: The place to record the effective set
224
* @inheritable: The place to record the inheritable set
225
* @permitted: The place to record the permitted set
226
*
227
* This function retrieves the capabilities of the nominated task and returns
228
* them to the caller.
229
*/
230
int cap_capget(const struct task_struct *target, kernel_cap_t *effective,
231
kernel_cap_t *inheritable, kernel_cap_t *permitted)
232
{
233
const struct cred *cred;
234
235
/* Derived from kernel/capability.c:sys_capget. */
236
rcu_read_lock();
237
cred = __task_cred(target);
238
*effective = cred->cap_effective;
239
*inheritable = cred->cap_inheritable;
240
*permitted = cred->cap_permitted;
241
rcu_read_unlock();
242
return 0;
243
}
244
245
/*
246
* Determine whether the inheritable capabilities are limited to the old
247
* permitted set. Returns 1 if they are limited, 0 if they are not.
248
*/
249
static inline int cap_inh_is_capped(void)
250
{
251
/* they are so limited unless the current task has the CAP_SETPCAP
252
* capability
253
*/
254
if (cap_capable(current_cred(), current_cred()->user_ns,
255
CAP_SETPCAP, CAP_OPT_NONE) == 0)
256
return 0;
257
return 1;
258
}
259
260
/**
261
* cap_capset - Validate and apply proposed changes to current's capabilities
262
* @new: The proposed new credentials; alterations should be made here
263
* @old: The current task's current credentials
264
* @effective: A pointer to the proposed new effective capabilities set
265
* @inheritable: A pointer to the proposed new inheritable capabilities set
266
* @permitted: A pointer to the proposed new permitted capabilities set
267
*
268
* This function validates and applies a proposed mass change to the current
269
* process's capability sets. The changes are made to the proposed new
270
* credentials, and assuming no error, will be committed by the caller of LSM.
271
*/
272
int cap_capset(struct cred *new,
273
const struct cred *old,
274
const kernel_cap_t *effective,
275
const kernel_cap_t *inheritable,
276
const kernel_cap_t *permitted)
277
{
278
if (cap_inh_is_capped() &&
279
!cap_issubset(*inheritable,
280
cap_combine(old->cap_inheritable,
281
old->cap_permitted)))
282
/* incapable of using this inheritable set */
283
return -EPERM;
284
285
if (!cap_issubset(*inheritable,
286
cap_combine(old->cap_inheritable,
287
old->cap_bset)))
288
/* no new pI capabilities outside bounding set */
289
return -EPERM;
290
291
/* verify restrictions on target's new Permitted set */
292
if (!cap_issubset(*permitted, old->cap_permitted))
293
return -EPERM;
294
295
/* verify the _new_Effective_ is a subset of the _new_Permitted_ */
296
if (!cap_issubset(*effective, *permitted))
297
return -EPERM;
298
299
new->cap_effective = *effective;
300
new->cap_inheritable = *inheritable;
301
new->cap_permitted = *permitted;
302
303
/*
304
* Mask off ambient bits that are no longer both permitted and
305
* inheritable.
306
*/
307
new->cap_ambient = cap_intersect(new->cap_ambient,
308
cap_intersect(*permitted,
309
*inheritable));
310
if (WARN_ON(!cap_ambient_invariant_ok(new)))
311
return -EINVAL;
312
return 0;
313
}
314
315
/**
316
* cap_inode_need_killpriv - Determine if inode change affects privileges
317
* @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV
318
*
319
* Determine if an inode having a change applied that's marked ATTR_KILL_PRIV
320
* affects the security markings on that inode, and if it is, should
321
* inode_killpriv() be invoked or the change rejected.
322
*
323
* Return: 1 if security.capability has a value, meaning inode_killpriv()
324
* is required, 0 otherwise, meaning inode_killpriv() is not required.
325
*/
326
int cap_inode_need_killpriv(struct dentry *dentry)
327
{
328
struct inode *inode = d_backing_inode(dentry);
329
int error;
330
331
error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0);
332
return error > 0;
333
}
334
335
/**
336
* cap_inode_killpriv - Erase the security markings on an inode
337
*
338
* @idmap: idmap of the mount the inode was found from
339
* @dentry: The inode/dentry to alter
340
*
341
* Erase the privilege-enhancing security markings on an inode.
342
*
343
* If the inode has been found through an idmapped mount the idmap of
344
* the vfsmount must be passed through @idmap. This function will then
345
* take care to map the inode according to @idmap before checking
346
* permissions. On non-idmapped mounts or if permission checking is to be
347
* performed on the raw inode simply pass @nop_mnt_idmap.
348
*
349
* Return: 0 if successful, -ve on error.
350
*/
351
int cap_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry)
352
{
353
int error;
354
355
error = __vfs_removexattr(idmap, dentry, XATTR_NAME_CAPS);
356
if (error == -EOPNOTSUPP)
357
error = 0;
358
return error;
359
}
360
361
/**
362
* kuid_root_in_ns - check whether the given kuid is root in the given ns
363
* @kuid: the kuid to be tested
364
* @ns: the user namespace to test against
365
*
366
* Returns true if @kuid represents the root user in @ns, false otherwise.
367
*/
368
static bool kuid_root_in_ns(kuid_t kuid, struct user_namespace *ns)
369
{
370
for (;; ns = ns->parent) {
371
if (from_kuid(ns, kuid) == 0)
372
return true;
373
if (ns == &init_user_ns)
374
break;
375
}
376
377
return false;
378
}
379
380
static bool vfsuid_root_in_currentns(vfsuid_t vfsuid)
381
{
382
kuid_t kuid;
383
384
if (!vfsuid_valid(vfsuid))
385
return false;
386
kuid = vfsuid_into_kuid(vfsuid);
387
return kuid_root_in_ns(kuid, current_user_ns());
388
}
389
390
static __u32 sansflags(__u32 m)
391
{
392
return m & ~VFS_CAP_FLAGS_EFFECTIVE;
393
}
394
395
static bool is_v2header(int size, const struct vfs_cap_data *cap)
396
{
397
if (size != XATTR_CAPS_SZ_2)
398
return false;
399
return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
400
}
401
402
static bool is_v3header(int size, const struct vfs_cap_data *cap)
403
{
404
if (size != XATTR_CAPS_SZ_3)
405
return false;
406
return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
407
}
408
409
/*
410
* getsecurity: We are called for security.* before any attempt to read the
411
* xattr from the inode itself.
412
*
413
* This gives us a chance to read the on-disk value and convert it. If we
414
* return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler.
415
*
416
* Note we are not called by vfs_getxattr_alloc(), but that is only called
417
* by the integrity subsystem, which really wants the unconverted values -
418
* so that's good.
419
*/
420
int cap_inode_getsecurity(struct mnt_idmap *idmap,
421
struct inode *inode, const char *name, void **buffer,
422
bool alloc)
423
{
424
int size;
425
kuid_t kroot;
426
vfsuid_t vfsroot;
427
u32 nsmagic, magic;
428
uid_t root, mappedroot;
429
char *tmpbuf = NULL;
430
struct vfs_cap_data *cap;
431
struct vfs_ns_cap_data *nscap = NULL;
432
struct dentry *dentry;
433
struct user_namespace *fs_ns;
434
435
if (strcmp(name, "capability") != 0)
436
return -EOPNOTSUPP;
437
438
dentry = d_find_any_alias(inode);
439
if (!dentry)
440
return -EINVAL;
441
size = vfs_getxattr_alloc(idmap, dentry, XATTR_NAME_CAPS, &tmpbuf,
442
sizeof(struct vfs_ns_cap_data), GFP_NOFS);
443
dput(dentry);
444
/* gcc11 complains if we don't check for !tmpbuf */
445
if (size < 0 || !tmpbuf)
446
goto out_free;
447
448
fs_ns = inode->i_sb->s_user_ns;
449
cap = (struct vfs_cap_data *) tmpbuf;
450
if (is_v2header(size, cap)) {
451
root = 0;
452
} else if (is_v3header(size, cap)) {
453
nscap = (struct vfs_ns_cap_data *) tmpbuf;
454
root = le32_to_cpu(nscap->rootid);
455
} else {
456
size = -EINVAL;
457
goto out_free;
458
}
459
460
kroot = make_kuid(fs_ns, root);
461
462
/* If this is an idmapped mount shift the kuid. */
463
vfsroot = make_vfsuid(idmap, fs_ns, kroot);
464
465
/* If the root kuid maps to a valid uid in current ns, then return
466
* this as a nscap. */
467
mappedroot = from_kuid(current_user_ns(), vfsuid_into_kuid(vfsroot));
468
if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
469
size = sizeof(struct vfs_ns_cap_data);
470
if (alloc) {
471
if (!nscap) {
472
/* v2 -> v3 conversion */
473
nscap = kzalloc(size, GFP_ATOMIC);
474
if (!nscap) {
475
size = -ENOMEM;
476
goto out_free;
477
}
478
nsmagic = VFS_CAP_REVISION_3;
479
magic = le32_to_cpu(cap->magic_etc);
480
if (magic & VFS_CAP_FLAGS_EFFECTIVE)
481
nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
482
memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
483
nscap->magic_etc = cpu_to_le32(nsmagic);
484
} else {
485
/* use allocated v3 buffer */
486
tmpbuf = NULL;
487
}
488
nscap->rootid = cpu_to_le32(mappedroot);
489
*buffer = nscap;
490
}
491
goto out_free;
492
}
493
494
if (!vfsuid_root_in_currentns(vfsroot)) {
495
size = -EOVERFLOW;
496
goto out_free;
497
}
498
499
/* This comes from a parent namespace. Return as a v2 capability */
500
size = sizeof(struct vfs_cap_data);
501
if (alloc) {
502
if (nscap) {
503
/* v3 -> v2 conversion */
504
cap = kzalloc(size, GFP_ATOMIC);
505
if (!cap) {
506
size = -ENOMEM;
507
goto out_free;
508
}
509
magic = VFS_CAP_REVISION_2;
510
nsmagic = le32_to_cpu(nscap->magic_etc);
511
if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
512
magic |= VFS_CAP_FLAGS_EFFECTIVE;
513
memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
514
cap->magic_etc = cpu_to_le32(magic);
515
} else {
516
/* use unconverted v2 */
517
tmpbuf = NULL;
518
}
519
*buffer = cap;
520
}
521
out_free:
522
kfree(tmpbuf);
523
return size;
524
}
525
526
/**
527
* rootid_from_xattr - translate root uid of vfs caps
528
*
529
* @value: vfs caps value which may be modified by this function
530
* @size: size of @ivalue
531
* @task_ns: user namespace of the caller
532
*/
533
static vfsuid_t rootid_from_xattr(const void *value, size_t size,
534
struct user_namespace *task_ns)
535
{
536
const struct vfs_ns_cap_data *nscap = value;
537
uid_t rootid = 0;
538
539
if (size == XATTR_CAPS_SZ_3)
540
rootid = le32_to_cpu(nscap->rootid);
541
542
return VFSUIDT_INIT(make_kuid(task_ns, rootid));
543
}
544
545
static bool validheader(size_t size, const struct vfs_cap_data *cap)
546
{
547
return is_v2header(size, cap) || is_v3header(size, cap);
548
}
549
550
/**
551
* cap_convert_nscap - check vfs caps
552
*
553
* @idmap: idmap of the mount the inode was found from
554
* @dentry: used to retrieve inode to check permissions on
555
* @ivalue: vfs caps value which may be modified by this function
556
* @size: size of @ivalue
557
*
558
* User requested a write of security.capability. If needed, update the
559
* xattr to change from v2 to v3, or to fixup the v3 rootid.
560
*
561
* If the inode has been found through an idmapped mount the idmap of
562
* the vfsmount must be passed through @idmap. This function will then
563
* take care to map the inode according to @idmap before checking
564
* permissions. On non-idmapped mounts or if permission checking is to be
565
* performed on the raw inode simply pass @nop_mnt_idmap.
566
*
567
* Return: On success, return the new size; on error, return < 0.
568
*/
569
int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry,
570
const void **ivalue, size_t size)
571
{
572
struct vfs_ns_cap_data *nscap;
573
uid_t nsrootid;
574
const struct vfs_cap_data *cap = *ivalue;
575
__u32 magic, nsmagic;
576
struct inode *inode = d_backing_inode(dentry);
577
struct user_namespace *task_ns = current_user_ns(),
578
*fs_ns = inode->i_sb->s_user_ns;
579
kuid_t rootid;
580
vfsuid_t vfsrootid;
581
size_t newsize;
582
583
if (!*ivalue)
584
return -EINVAL;
585
if (!validheader(size, cap))
586
return -EINVAL;
587
if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
588
return -EPERM;
589
if (size == XATTR_CAPS_SZ_2 && (idmap == &nop_mnt_idmap))
590
if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
591
/* user is privileged, just write the v2 */
592
return size;
593
594
vfsrootid = rootid_from_xattr(*ivalue, size, task_ns);
595
if (!vfsuid_valid(vfsrootid))
596
return -EINVAL;
597
598
rootid = from_vfsuid(idmap, fs_ns, vfsrootid);
599
if (!uid_valid(rootid))
600
return -EINVAL;
601
602
nsrootid = from_kuid(fs_ns, rootid);
603
if (nsrootid == -1)
604
return -EINVAL;
605
606
newsize = sizeof(struct vfs_ns_cap_data);
607
nscap = kmalloc(newsize, GFP_ATOMIC);
608
if (!nscap)
609
return -ENOMEM;
610
nscap->rootid = cpu_to_le32(nsrootid);
611
nsmagic = VFS_CAP_REVISION_3;
612
magic = le32_to_cpu(cap->magic_etc);
613
if (magic & VFS_CAP_FLAGS_EFFECTIVE)
614
nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
615
nscap->magic_etc = cpu_to_le32(nsmagic);
616
memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
617
618
*ivalue = nscap;
619
return newsize;
620
}
621
622
/*
623
* Calculate the new process capability sets from the capability sets attached
624
* to a file.
625
*/
626
static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
627
struct linux_binprm *bprm,
628
bool *effective,
629
bool *has_fcap)
630
{
631
struct cred *new = bprm->cred;
632
int ret = 0;
633
634
if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
635
*effective = true;
636
637
if (caps->magic_etc & VFS_CAP_REVISION_MASK)
638
*has_fcap = true;
639
640
/*
641
* pP' = (X & fP) | (pI & fI)
642
* The addition of pA' is handled later.
643
*/
644
new->cap_permitted.val =
645
(new->cap_bset.val & caps->permitted.val) |
646
(new->cap_inheritable.val & caps->inheritable.val);
647
648
if (caps->permitted.val & ~new->cap_permitted.val)
649
/* insufficient to execute correctly */
650
ret = -EPERM;
651
652
/*
653
* For legacy apps, with no internal support for recognizing they
654
* do not have enough capabilities, we return an error if they are
655
* missing some "forced" (aka file-permitted) capabilities.
656
*/
657
return *effective ? ret : 0;
658
}
659
660
/**
661
* get_vfs_caps_from_disk - retrieve vfs caps from disk
662
*
663
* @idmap: idmap of the mount the inode was found from
664
* @dentry: dentry from which @inode is retrieved
665
* @cpu_caps: vfs capabilities
666
*
667
* Extract the on-exec-apply capability sets for an executable file.
668
*
669
* If the inode has been found through an idmapped mount the idmap of
670
* the vfsmount must be passed through @idmap. This function will then
671
* take care to map the inode according to @idmap before checking
672
* permissions. On non-idmapped mounts or if permission checking is to be
673
* performed on the raw inode simply pass @nop_mnt_idmap.
674
*/
675
int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
676
const struct dentry *dentry,
677
struct cpu_vfs_cap_data *cpu_caps)
678
{
679
struct inode *inode = d_backing_inode(dentry);
680
__u32 magic_etc;
681
int size;
682
struct vfs_ns_cap_data data, *nscaps = &data;
683
struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
684
kuid_t rootkuid;
685
vfsuid_t rootvfsuid;
686
struct user_namespace *fs_ns;
687
688
memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
689
690
if (!inode)
691
return -ENODATA;
692
693
fs_ns = inode->i_sb->s_user_ns;
694
size = __vfs_getxattr((struct dentry *)dentry, inode,
695
XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
696
if (size == -ENODATA || size == -EOPNOTSUPP)
697
/* no data, that's ok */
698
return -ENODATA;
699
700
if (size < 0)
701
return size;
702
703
if (size < sizeof(magic_etc))
704
return -EINVAL;
705
706
cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
707
708
rootkuid = make_kuid(fs_ns, 0);
709
switch (magic_etc & VFS_CAP_REVISION_MASK) {
710
case VFS_CAP_REVISION_1:
711
if (size != XATTR_CAPS_SZ_1)
712
return -EINVAL;
713
break;
714
case VFS_CAP_REVISION_2:
715
if (size != XATTR_CAPS_SZ_2)
716
return -EINVAL;
717
break;
718
case VFS_CAP_REVISION_3:
719
if (size != XATTR_CAPS_SZ_3)
720
return -EINVAL;
721
rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
722
break;
723
724
default:
725
return -EINVAL;
726
}
727
728
rootvfsuid = make_vfsuid(idmap, fs_ns, rootkuid);
729
if (!vfsuid_valid(rootvfsuid))
730
return -ENODATA;
731
732
/* Limit the caps to the mounter of the filesystem
733
* or the more limited uid specified in the xattr.
734
*/
735
if (!vfsuid_root_in_currentns(rootvfsuid))
736
return -ENODATA;
737
738
cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted);
739
cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable);
740
741
/*
742
* Rev1 had just a single 32-bit word, later expanded
743
* to a second one for the high bits
744
*/
745
if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
746
cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;
747
cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;
748
}
749
750
cpu_caps->permitted.val &= CAP_VALID_MASK;
751
cpu_caps->inheritable.val &= CAP_VALID_MASK;
752
753
cpu_caps->rootid = vfsuid_into_kuid(rootvfsuid);
754
755
return 0;
756
}
757
758
/*
759
* Attempt to get the on-exec apply capability sets for an executable file from
760
* its xattrs and, if present, apply them to the proposed credentials being
761
* constructed by execve().
762
*/
763
static int get_file_caps(struct linux_binprm *bprm, const struct file *file,
764
bool *effective, bool *has_fcap)
765
{
766
int rc = 0;
767
struct cpu_vfs_cap_data vcaps;
768
769
cap_clear(bprm->cred->cap_permitted);
770
771
if (!file_caps_enabled)
772
return 0;
773
774
if (!mnt_may_suid(file->f_path.mnt))
775
return 0;
776
777
/*
778
* This check is redundant with mnt_may_suid() but is kept to make
779
* explicit that capability bits are limited to s_user_ns and its
780
* descendants.
781
*/
782
if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns))
783
return 0;
784
785
rc = get_vfs_caps_from_disk(file_mnt_idmap(file),
786
file->f_path.dentry, &vcaps);
787
if (rc < 0) {
788
if (rc == -EINVAL)
789
printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
790
bprm->filename);
791
else if (rc == -ENODATA)
792
rc = 0;
793
goto out;
794
}
795
796
rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_fcap);
797
798
out:
799
if (rc)
800
cap_clear(bprm->cred->cap_permitted);
801
802
return rc;
803
}
804
805
static inline bool root_privileged(void) { return !issecure(SECURE_NOROOT); }
806
807
static inline bool __is_real(kuid_t uid, struct cred *cred)
808
{ return uid_eq(cred->uid, uid); }
809
810
static inline bool __is_eff(kuid_t uid, struct cred *cred)
811
{ return uid_eq(cred->euid, uid); }
812
813
static inline bool __is_suid(kuid_t uid, struct cred *cred)
814
{ return !__is_real(uid, cred) && __is_eff(uid, cred); }
815
816
/*
817
* handle_privileged_root - Handle case of privileged root
818
* @bprm: The execution parameters, including the proposed creds
819
* @has_fcap: Are any file capabilities set?
820
* @effective: Do we have effective root privilege?
821
* @root_uid: This namespace' root UID WRT initial USER namespace
822
*
823
* Handle the case where root is privileged and hasn't been neutered by
824
* SECURE_NOROOT. If file capabilities are set, they won't be combined with
825
* set UID root and nothing is changed. If we are root, cap_permitted is
826
* updated. If we have become set UID root, the effective bit is set.
827
*/
828
static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap,
829
bool *effective, kuid_t root_uid)
830
{
831
const struct cred *old = current_cred();
832
struct cred *new = bprm->cred;
833
834
if (!root_privileged())
835
return;
836
/*
837
* If the legacy file capability is set, then don't set privs
838
* for a setuid root binary run by a non-root user. Do set it
839
* for a root user just to cause least surprise to an admin.
840
*/
841
if (has_fcap && __is_suid(root_uid, new)) {
842
warn_setuid_and_fcaps_mixed(bprm->filename);
843
return;
844
}
845
/*
846
* To support inheritance of root-permissions and suid-root
847
* executables under compatibility mode, we override the
848
* capability sets for the file.
849
*/
850
if (__is_eff(root_uid, new) || __is_real(root_uid, new)) {
851
/* pP' = (cap_bset & ~0) | (pI & ~0) */
852
new->cap_permitted = cap_combine(old->cap_bset,
853
old->cap_inheritable);
854
}
855
/*
856
* If only the real uid is 0, we do not set the effective bit.
857
*/
858
if (__is_eff(root_uid, new))
859
*effective = true;
860
}
861
862
#define __cap_gained(field, target, source) \
863
!cap_issubset(target->cap_##field, source->cap_##field)
864
#define __cap_grew(target, source, cred) \
865
!cap_issubset(cred->cap_##target, cred->cap_##source)
866
#define __cap_full(field, cred) \
867
cap_issubset(CAP_FULL_SET, cred->cap_##field)
868
869
/*
870
* 1) Audit candidate if current->cap_effective is set
871
*
872
* We do not bother to audit if 3 things are true:
873
* 1) cap_effective has all caps
874
* 2) we became root *OR* are were already root
875
* 3) root is supposed to have all caps (SECURE_NOROOT)
876
* Since this is just a normal root execing a process.
877
*
878
* Number 1 above might fail if you don't have a full bset, but I think
879
* that is interesting information to audit.
880
*
881
* A number of other conditions require logging:
882
* 2) something prevented setuid root getting all caps
883
* 3) non-setuid root gets fcaps
884
* 4) non-setuid root gets ambient
885
*/
886
static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old,
887
kuid_t root, bool has_fcap)
888
{
889
bool ret = false;
890
891
if ((__cap_grew(effective, ambient, new) &&
892
!(__cap_full(effective, new) &&
893
(__is_eff(root, new) || __is_real(root, new)) &&
894
root_privileged())) ||
895
(root_privileged() &&
896
__is_suid(root, new) &&
897
!__cap_full(effective, new)) ||
898
(uid_eq(new->euid, old->euid) &&
899
((has_fcap &&
900
__cap_gained(permitted, new, old)) ||
901
__cap_gained(ambient, new, old))))
902
903
ret = true;
904
905
return ret;
906
}
907
908
/**
909
* cap_bprm_creds_from_file - Set up the proposed credentials for execve().
910
* @bprm: The execution parameters, including the proposed creds
911
* @file: The file to pull the credentials from
912
*
913
* Set up the proposed credentials for a new execution context being
914
* constructed by execve(). The proposed creds in @bprm->cred is altered,
915
* which won't take effect immediately.
916
*
917
* Return: 0 if successful, -ve on error.
918
*/
919
int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file)
920
{
921
/* Process setpcap binaries and capabilities for uid 0 */
922
const struct cred *old = current_cred();
923
struct cred *new = bprm->cred;
924
bool effective = false, has_fcap = false, id_changed;
925
int ret;
926
kuid_t root_uid;
927
928
if (WARN_ON(!cap_ambient_invariant_ok(old)))
929
return -EPERM;
930
931
ret = get_file_caps(bprm, file, &effective, &has_fcap);
932
if (ret < 0)
933
return ret;
934
935
root_uid = make_kuid(new->user_ns, 0);
936
937
handle_privileged_root(bprm, has_fcap, &effective, root_uid);
938
939
/* if we have fs caps, clear dangerous personality flags */
940
if (__cap_gained(permitted, new, old))
941
bprm->per_clear |= PER_CLEAR_ON_SETID;
942
943
/* Don't let someone trace a set[ug]id/setpcap binary with the revised
944
* credentials unless they have the appropriate permit.
945
*
946
* In addition, if NO_NEW_PRIVS, then ensure we get no new privs.
947
*/
948
id_changed = !uid_eq(new->euid, old->euid) || !in_group_p(new->egid);
949
950
if ((id_changed || __cap_gained(permitted, new, old)) &&
951
((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
952
!ptracer_capable(current, new->user_ns))) {
953
/* downgrade; they get no more than they had, and maybe less */
954
if (!ns_capable(new->user_ns, CAP_SETUID) ||
955
(bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
956
new->euid = new->uid;
957
new->egid = new->gid;
958
}
959
new->cap_permitted = cap_intersect(new->cap_permitted,
960
old->cap_permitted);
961
}
962
963
new->suid = new->fsuid = new->euid;
964
new->sgid = new->fsgid = new->egid;
965
966
/* File caps or setid cancels ambient. */
967
if (has_fcap || id_changed)
968
cap_clear(new->cap_ambient);
969
970
/*
971
* Now that we've computed pA', update pP' to give:
972
* pP' = (X & fP) | (pI & fI) | pA'
973
*/
974
new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);
975
976
/*
977
* Set pE' = (fE ? pP' : pA'). Because pA' is zero if fE is set,
978
* this is the same as pE' = (fE ? pP' : 0) | pA'.
979
*/
980
if (effective)
981
new->cap_effective = new->cap_permitted;
982
else
983
new->cap_effective = new->cap_ambient;
984
985
if (WARN_ON(!cap_ambient_invariant_ok(new)))
986
return -EPERM;
987
988
if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {
989
ret = audit_log_bprm_fcaps(bprm, new, old);
990
if (ret < 0)
991
return ret;
992
}
993
994
new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
995
996
if (WARN_ON(!cap_ambient_invariant_ok(new)))
997
return -EPERM;
998
999
/* Check for privilege-elevated exec. */
1000
if (id_changed ||
1001
!uid_eq(new->euid, old->uid) ||
1002
!gid_eq(new->egid, old->gid) ||
1003
(!__is_real(root_uid, new) &&
1004
(effective ||
1005
__cap_grew(permitted, ambient, new))))
1006
bprm->secureexec = 1;
1007
1008
return 0;
1009
}
1010
1011
/**
1012
* cap_inode_setxattr - Determine whether an xattr may be altered
1013
* @dentry: The inode/dentry being altered
1014
* @name: The name of the xattr to be changed
1015
* @value: The value that the xattr will be changed to
1016
* @size: The size of value
1017
* @flags: The replacement flag
1018
*
1019
* Determine whether an xattr may be altered or set on an inode, returning 0 if
1020
* permission is granted, -ve if denied.
1021
*
1022
* This is used to make sure security xattrs don't get updated or set by those
1023
* who aren't privileged to do so.
1024
*/
1025
int cap_inode_setxattr(struct dentry *dentry, const char *name,
1026
const void *value, size_t size, int flags)
1027
{
1028
struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
1029
1030
/* Ignore non-security xattrs */
1031
if (strncmp(name, XATTR_SECURITY_PREFIX,
1032
XATTR_SECURITY_PREFIX_LEN) != 0)
1033
return 0;
1034
1035
/*
1036
* For XATTR_NAME_CAPS the check will be done in
1037
* cap_convert_nscap(), called by setxattr()
1038
*/
1039
if (strcmp(name, XATTR_NAME_CAPS) == 0)
1040
return 0;
1041
1042
if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1043
return -EPERM;
1044
return 0;
1045
}
1046
1047
/**
1048
* cap_inode_removexattr - Determine whether an xattr may be removed
1049
*
1050
* @idmap: idmap of the mount the inode was found from
1051
* @dentry: The inode/dentry being altered
1052
* @name: The name of the xattr to be changed
1053
*
1054
* Determine whether an xattr may be removed from an inode, returning 0 if
1055
* permission is granted, -ve if denied.
1056
*
1057
* If the inode has been found through an idmapped mount the idmap of
1058
* the vfsmount must be passed through @idmap. This function will then
1059
* take care to map the inode according to @idmap before checking
1060
* permissions. On non-idmapped mounts or if permission checking is to be
1061
* performed on the raw inode simply pass @nop_mnt_idmap.
1062
*
1063
* This is used to make sure security xattrs don't get removed by those who
1064
* aren't privileged to remove them.
1065
*/
1066
int cap_inode_removexattr(struct mnt_idmap *idmap,
1067
struct dentry *dentry, const char *name)
1068
{
1069
struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
1070
1071
/* Ignore non-security xattrs */
1072
if (strncmp(name, XATTR_SECURITY_PREFIX,
1073
XATTR_SECURITY_PREFIX_LEN) != 0)
1074
return 0;
1075
1076
if (strcmp(name, XATTR_NAME_CAPS) == 0) {
1077
/* security.capability gets namespaced */
1078
struct inode *inode = d_backing_inode(dentry);
1079
if (!inode)
1080
return -EINVAL;
1081
if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
1082
return -EPERM;
1083
return 0;
1084
}
1085
1086
if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1087
return -EPERM;
1088
return 0;
1089
}
1090
1091
/*
1092
* cap_emulate_setxuid() fixes the effective / permitted capabilities of
1093
* a process after a call to setuid, setreuid, or setresuid.
1094
*
1095
* 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
1096
* {r,e,s}uid != 0, the permitted and effective capabilities are
1097
* cleared.
1098
*
1099
* 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
1100
* capabilities of the process are cleared.
1101
*
1102
* 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
1103
* capabilities are set to the permitted capabilities.
1104
*
1105
* fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
1106
* never happen.
1107
*
1108
* -astor
1109
*
1110
* cevans - New behaviour, Oct '99
1111
* A process may, via prctl(), elect to keep its capabilities when it
1112
* calls setuid() and switches away from uid==0. Both permitted and
1113
* effective sets will be retained.
1114
* Without this change, it was impossible for a daemon to drop only some
1115
* of its privilege. The call to setuid(!=0) would drop all privileges!
1116
* Keeping uid 0 is not an option because uid 0 owns too many vital
1117
* files..
1118
* Thanks to Olaf Kirch and Peter Benie for spotting this.
1119
*/
1120
static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
1121
{
1122
kuid_t root_uid = make_kuid(old->user_ns, 0);
1123
1124
if ((uid_eq(old->uid, root_uid) ||
1125
uid_eq(old->euid, root_uid) ||
1126
uid_eq(old->suid, root_uid)) &&
1127
(!uid_eq(new->uid, root_uid) &&
1128
!uid_eq(new->euid, root_uid) &&
1129
!uid_eq(new->suid, root_uid))) {
1130
if (!issecure(SECURE_KEEP_CAPS)) {
1131
cap_clear(new->cap_permitted);
1132
cap_clear(new->cap_effective);
1133
}
1134
1135
/*
1136
* Pre-ambient programs expect setresuid to nonroot followed
1137
* by exec to drop capabilities. We should make sure that
1138
* this remains the case.
1139
*/
1140
cap_clear(new->cap_ambient);
1141
}
1142
if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
1143
cap_clear(new->cap_effective);
1144
if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
1145
new->cap_effective = new->cap_permitted;
1146
}
1147
1148
/**
1149
* cap_task_fix_setuid - Fix up the results of setuid() call
1150
* @new: The proposed credentials
1151
* @old: The current task's current credentials
1152
* @flags: Indications of what has changed
1153
*
1154
* Fix up the results of setuid() call before the credential changes are
1155
* actually applied.
1156
*
1157
* Return: 0 to grant the changes, -ve to deny them.
1158
*/
1159
int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
1160
{
1161
switch (flags) {
1162
case LSM_SETID_RE:
1163
case LSM_SETID_ID:
1164
case LSM_SETID_RES:
1165
/* juggle the capabilities to follow [RES]UID changes unless
1166
* otherwise suppressed */
1167
if (!issecure(SECURE_NO_SETUID_FIXUP))
1168
cap_emulate_setxuid(new, old);
1169
break;
1170
1171
case LSM_SETID_FS:
1172
/* juggle the capabilities to follow FSUID changes, unless
1173
* otherwise suppressed
1174
*
1175
* FIXME - is fsuser used for all CAP_FS_MASK capabilities?
1176
* if not, we might be a bit too harsh here.
1177
*/
1178
if (!issecure(SECURE_NO_SETUID_FIXUP)) {
1179
kuid_t root_uid = make_kuid(old->user_ns, 0);
1180
if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
1181
new->cap_effective =
1182
cap_drop_fs_set(new->cap_effective);
1183
1184
if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))
1185
new->cap_effective =
1186
cap_raise_fs_set(new->cap_effective,
1187
new->cap_permitted);
1188
}
1189
break;
1190
1191
default:
1192
return -EINVAL;
1193
}
1194
1195
return 0;
1196
}
1197
1198
/*
1199
* Rationale: code calling task_setscheduler, task_setioprio, and
1200
* task_setnice, assumes that
1201
* . if capable(cap_sys_nice), then those actions should be allowed
1202
* . if not capable(cap_sys_nice), but acting on your own processes,
1203
* then those actions should be allowed
1204
* This is insufficient now since you can call code without suid, but
1205
* yet with increased caps.
1206
* So we check for increased caps on the target process.
1207
*/
1208
static int cap_safe_nice(struct task_struct *p)
1209
{
1210
int is_subset, ret = 0;
1211
1212
rcu_read_lock();
1213
is_subset = cap_issubset(__task_cred(p)->cap_permitted,
1214
current_cred()->cap_permitted);
1215
if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
1216
ret = -EPERM;
1217
rcu_read_unlock();
1218
1219
return ret;
1220
}
1221
1222
/**
1223
* cap_task_setscheduler - Determine if scheduler policy change is permitted
1224
* @p: The task to affect
1225
*
1226
* Determine if the requested scheduler policy change is permitted for the
1227
* specified task.
1228
*
1229
* Return: 0 if permission is granted, -ve if denied.
1230
*/
1231
int cap_task_setscheduler(struct task_struct *p)
1232
{
1233
return cap_safe_nice(p);
1234
}
1235
1236
/**
1237
* cap_task_setioprio - Determine if I/O priority change is permitted
1238
* @p: The task to affect
1239
* @ioprio: The I/O priority to set
1240
*
1241
* Determine if the requested I/O priority change is permitted for the specified
1242
* task.
1243
*
1244
* Return: 0 if permission is granted, -ve if denied.
1245
*/
1246
int cap_task_setioprio(struct task_struct *p, int ioprio)
1247
{
1248
return cap_safe_nice(p);
1249
}
1250
1251
/**
1252
* cap_task_setnice - Determine if task priority change is permitted
1253
* @p: The task to affect
1254
* @nice: The nice value to set
1255
*
1256
* Determine if the requested task priority change is permitted for the
1257
* specified task.
1258
*
1259
* Return: 0 if permission is granted, -ve if denied.
1260
*/
1261
int cap_task_setnice(struct task_struct *p, int nice)
1262
{
1263
return cap_safe_nice(p);
1264
}
1265
1266
/*
1267
* Implement PR_CAPBSET_DROP. Attempt to remove the specified capability from
1268
* the current task's bounding set. Returns 0 on success, -ve on error.
1269
*/
1270
static int cap_prctl_drop(unsigned long cap)
1271
{
1272
struct cred *new;
1273
1274
if (!ns_capable(current_user_ns(), CAP_SETPCAP))
1275
return -EPERM;
1276
if (!cap_valid(cap))
1277
return -EINVAL;
1278
1279
new = prepare_creds();
1280
if (!new)
1281
return -ENOMEM;
1282
cap_lower(new->cap_bset, cap);
1283
return commit_creds(new);
1284
}
1285
1286
/**
1287
* cap_task_prctl - Implement process control functions for this security module
1288
* @option: The process control function requested
1289
* @arg2: The argument data for this function
1290
* @arg3: The argument data for this function
1291
* @arg4: The argument data for this function
1292
* @arg5: The argument data for this function
1293
*
1294
* Allow process control functions (sys_prctl()) to alter capabilities; may
1295
* also deny access to other functions not otherwise implemented here.
1296
*
1297
* Return: 0 or +ve on success, -ENOSYS if this function is not implemented
1298
* here, other -ve on error. If -ENOSYS is returned, sys_prctl() and other LSM
1299
* modules will consider performing the function.
1300
*/
1301
int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
1302
unsigned long arg4, unsigned long arg5)
1303
{
1304
const struct cred *old = current_cred();
1305
struct cred *new;
1306
1307
switch (option) {
1308
case PR_CAPBSET_READ:
1309
if (!cap_valid(arg2))
1310
return -EINVAL;
1311
return !!cap_raised(old->cap_bset, arg2);
1312
1313
case PR_CAPBSET_DROP:
1314
return cap_prctl_drop(arg2);
1315
1316
/*
1317
* The next four prctl's remain to assist with transitioning a
1318
* system from legacy UID=0 based privilege (when filesystem
1319
* capabilities are not in use) to a system using filesystem
1320
* capabilities only - as the POSIX.1e draft intended.
1321
*
1322
* Note:
1323
*
1324
* PR_SET_SECUREBITS =
1325
* issecure_mask(SECURE_KEEP_CAPS_LOCKED)
1326
* | issecure_mask(SECURE_NOROOT)
1327
* | issecure_mask(SECURE_NOROOT_LOCKED)
1328
* | issecure_mask(SECURE_NO_SETUID_FIXUP)
1329
* | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)
1330
*
1331
* will ensure that the current process and all of its
1332
* children will be locked into a pure
1333
* capability-based-privilege environment.
1334
*/
1335
case PR_SET_SECUREBITS:
1336
if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)
1337
& (old->securebits ^ arg2)) /*[1]*/
1338
|| ((old->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/
1339
|| (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/
1340
/*
1341
* [1] no changing of bits that are locked
1342
* [2] no unlocking of locks
1343
* [3] no setting of unsupported bits
1344
*/
1345
)
1346
/* cannot change a locked bit */
1347
return -EPERM;
1348
1349
/*
1350
* Doing anything requires privilege (go read about the
1351
* "sendmail capabilities bug"), except for unprivileged bits.
1352
* Indeed, the SECURE_ALL_UNPRIVILEGED bits are not
1353
* restrictions enforced by the kernel but by user space on
1354
* itself.
1355
*/
1356
if (cap_capable(current_cred(), current_cred()->user_ns,
1357
CAP_SETPCAP, CAP_OPT_NONE) != 0) {
1358
const unsigned long unpriv_and_locks =
1359
SECURE_ALL_UNPRIVILEGED |
1360
SECURE_ALL_UNPRIVILEGED << 1;
1361
const unsigned long changed = old->securebits ^ arg2;
1362
1363
/* For legacy reason, denies non-change. */
1364
if (!changed)
1365
return -EPERM;
1366
1367
/* Denies privileged changes. */
1368
if (changed & ~unpriv_and_locks)
1369
return -EPERM;
1370
}
1371
1372
new = prepare_creds();
1373
if (!new)
1374
return -ENOMEM;
1375
new->securebits = arg2;
1376
return commit_creds(new);
1377
1378
case PR_GET_SECUREBITS:
1379
return old->securebits;
1380
1381
case PR_GET_KEEPCAPS:
1382
return !!issecure(SECURE_KEEP_CAPS);
1383
1384
case PR_SET_KEEPCAPS:
1385
if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */
1386
return -EINVAL;
1387
if (issecure(SECURE_KEEP_CAPS_LOCKED))
1388
return -EPERM;
1389
1390
new = prepare_creds();
1391
if (!new)
1392
return -ENOMEM;
1393
if (arg2)
1394
new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
1395
else
1396
new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
1397
return commit_creds(new);
1398
1399
case PR_CAP_AMBIENT:
1400
if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {
1401
if (arg3 | arg4 | arg5)
1402
return -EINVAL;
1403
1404
new = prepare_creds();
1405
if (!new)
1406
return -ENOMEM;
1407
cap_clear(new->cap_ambient);
1408
return commit_creds(new);
1409
}
1410
1411
if (((!cap_valid(arg3)) | arg4 | arg5))
1412
return -EINVAL;
1413
1414
if (arg2 == PR_CAP_AMBIENT_IS_SET) {
1415
return !!cap_raised(current_cred()->cap_ambient, arg3);
1416
} else if (arg2 != PR_CAP_AMBIENT_RAISE &&
1417
arg2 != PR_CAP_AMBIENT_LOWER) {
1418
return -EINVAL;
1419
} else {
1420
if (arg2 == PR_CAP_AMBIENT_RAISE &&
1421
(!cap_raised(current_cred()->cap_permitted, arg3) ||
1422
!cap_raised(current_cred()->cap_inheritable,
1423
arg3) ||
1424
issecure(SECURE_NO_CAP_AMBIENT_RAISE)))
1425
return -EPERM;
1426
1427
new = prepare_creds();
1428
if (!new)
1429
return -ENOMEM;
1430
if (arg2 == PR_CAP_AMBIENT_RAISE)
1431
cap_raise(new->cap_ambient, arg3);
1432
else
1433
cap_lower(new->cap_ambient, arg3);
1434
return commit_creds(new);
1435
}
1436
1437
default:
1438
/* No functionality available - continue with default */
1439
return -ENOSYS;
1440
}
1441
}
1442
1443
/**
1444
* cap_vm_enough_memory - Determine whether a new virtual mapping is permitted
1445
* @mm: The VM space in which the new mapping is to be made
1446
* @pages: The size of the mapping
1447
*
1448
* Determine whether the allocation of a new virtual mapping by the current
1449
* task is permitted.
1450
*
1451
* Return: 0 if permission granted, negative error code if not.
1452
*/
1453
int cap_vm_enough_memory(struct mm_struct *mm, long pages)
1454
{
1455
return cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,
1456
CAP_OPT_NOAUDIT);
1457
}
1458
1459
/**
1460
* cap_mmap_addr - check if able to map given addr
1461
* @addr: address attempting to be mapped
1462
*
1463
* If the process is attempting to map memory below dac_mmap_min_addr they need
1464
* CAP_SYS_RAWIO. The other parameters to this function are unused by the
1465
* capability security module.
1466
*
1467
* Return: 0 if this mapping should be allowed or -EPERM if not.
1468
*/
1469
int cap_mmap_addr(unsigned long addr)
1470
{
1471
int ret = 0;
1472
1473
if (addr < dac_mmap_min_addr) {
1474
ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
1475
CAP_OPT_NONE);
1476
/* set PF_SUPERPRIV if it turns out we allow the low mmap */
1477
if (ret == 0)
1478
current->flags |= PF_SUPERPRIV;
1479
}
1480
return ret;
1481
}
1482
1483
#ifdef CONFIG_SECURITY
1484
1485
static const struct lsm_id capability_lsmid = {
1486
.name = "capability",
1487
.id = LSM_ID_CAPABILITY,
1488
};
1489
1490
static struct security_hook_list capability_hooks[] __ro_after_init = {
1491
LSM_HOOK_INIT(capable, cap_capable),
1492
LSM_HOOK_INIT(settime, cap_settime),
1493
LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check),
1494
LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme),
1495
LSM_HOOK_INIT(capget, cap_capget),
1496
LSM_HOOK_INIT(capset, cap_capset),
1497
LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file),
1498
LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
1499
LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
1500
LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
1501
LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
1502
LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),
1503
LSM_HOOK_INIT(task_prctl, cap_task_prctl),
1504
LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler),
1505
LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),
1506
LSM_HOOK_INIT(task_setnice, cap_task_setnice),
1507
LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
1508
};
1509
1510
static int __init capability_init(void)
1511
{
1512
security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
1513
&capability_lsmid);
1514
return 0;
1515
}
1516
1517
DEFINE_LSM(capability) = {
1518
.id = &capability_lsmid,
1519
.order = LSM_ORDER_FIRST,
1520
.init = capability_init,
1521
};
1522
1523
#endif /* CONFIG_SECURITY */
1524
1525