// SPDX-License-Identifier: GPL-2.0-or-later1/* Common capabilities, needed by capability.o.2*/34#include <linux/capability.h>5#include <linux/audit.h>6#include <linux/init.h>7#include <linux/kernel.h>8#include <linux/lsm_hooks.h>9#include <linux/file.h>10#include <linux/mm.h>11#include <linux/mman.h>12#include <linux/pagemap.h>13#include <linux/swap.h>14#include <linux/skbuff.h>15#include <linux/netlink.h>16#include <linux/ptrace.h>17#include <linux/xattr.h>18#include <linux/hugetlb.h>19#include <linux/mount.h>20#include <linux/sched.h>21#include <linux/prctl.h>22#include <linux/securebits.h>23#include <linux/user_namespace.h>24#include <linux/binfmts.h>25#include <linux/personality.h>26#include <linux/mnt_idmapping.h>27#include <uapi/linux/lsm.h>2829#define CREATE_TRACE_POINTS30#include <trace/events/capability.h>3132/*33* If a non-root user executes a setuid-root binary in34* !secure(SECURE_NOROOT) mode, then we raise capabilities.35* However if fE is also set, then the intent is for only36* the file capabilities to be applied, and the setuid-root37* bit is left on either to change the uid (plausible) or38* to get full privilege on a kernel without file capabilities39* support. So in that case we do not raise capabilities.40*41* Warn if that happens, once per boot.42*/43static void warn_setuid_and_fcaps_mixed(const char *fname)44{45static int warned;46if (!warned) {47printk(KERN_INFO "warning: `%s' has both setuid-root and"48" effective capabilities. Therefore not raising all"49" capabilities.\n", fname);50warned = 1;51}52}5354/**55* cap_capable_helper - Determine whether a task has a particular effective56* capability.57* @cred: The credentials to use58* @target_ns: The user namespace of the resource being accessed59* @cred_ns: The user namespace of the credentials60* @cap: The capability to check for61*62* Determine whether the nominated task has the specified capability amongst63* its effective set, returning 0 if it does, -ve if it does not.64*65* See cap_capable for more details.66*/67static inline int cap_capable_helper(const struct cred *cred,68struct user_namespace *target_ns,69const struct user_namespace *cred_ns,70int cap)71{72struct user_namespace *ns = target_ns;7374/* See if cred has the capability in the target user namespace75* by examining the target user namespace and all of the target76* user namespace's parents.77*/78for (;;) {79/* Do we have the necessary capabilities? */80if (likely(ns == cred_ns))81return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;8283/*84* If we're already at a lower level than we're looking for,85* we're done searching.86*/87if (ns->level <= cred_ns->level)88return -EPERM;8990/*91* The owner of the user namespace in the parent of the92* user namespace has all caps.93*/94if ((ns->parent == cred_ns) && uid_eq(ns->owner, cred->euid))95return 0;9697/*98* If you have a capability in a parent user ns, then you have99* it over all children user namespaces as well.100*/101ns = ns->parent;102}103104/* We never get here */105}106107/**108* cap_capable - Determine whether a task has a particular effective capability109* @cred: The credentials to use110* @target_ns: The user namespace of the resource being accessed111* @cap: The capability to check for112* @opts: Bitmask of options defined in include/linux/security.h (unused)113*114* Determine whether the nominated task has the specified capability amongst115* its effective set, returning 0 if it does, -ve if it does not.116*117* NOTE WELL: cap_capable() has reverse semantics to the capable() call118* and friends. That is cap_capable() returns an int 0 when a task has119* a capability, while the kernel's capable(), has_ns_capability(),120* has_ns_capability_noaudit(), and has_capability_noaudit() return a121* bool true (1) for this case.122*/123int cap_capable(const struct cred *cred, struct user_namespace *target_ns,124int cap, unsigned int opts)125{126const struct user_namespace *cred_ns = cred->user_ns;127int ret = cap_capable_helper(cred, target_ns, cred_ns, cap);128129trace_cap_capable(cred, target_ns, cred_ns, cap, ret);130return ret;131}132133/**134* cap_settime - Determine whether the current process may set the system clock135* @ts: The time to set136* @tz: The timezone to set137*138* Determine whether the current process may set the system clock and timezone139* information, returning 0 if permission granted, -ve if denied.140*/141int cap_settime(const struct timespec64 *ts, const struct timezone *tz)142{143if (!capable(CAP_SYS_TIME))144return -EPERM;145return 0;146}147148/**149* cap_ptrace_access_check - Determine whether the current process may access150* another151* @child: The process to be accessed152* @mode: The mode of attachment.153*154* If we are in the same or an ancestor user_ns and have all the target155* task's capabilities, then ptrace access is allowed.156* If we have the ptrace capability to the target user_ns, then ptrace157* access is allowed.158* Else denied.159*160* Determine whether a process may access another, returning 0 if permission161* granted, -ve if denied.162*/163int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)164{165int ret = 0;166const struct cred *cred, *child_cred;167const kernel_cap_t *caller_caps;168169rcu_read_lock();170cred = current_cred();171child_cred = __task_cred(child);172if (mode & PTRACE_MODE_FSCREDS)173caller_caps = &cred->cap_effective;174else175caller_caps = &cred->cap_permitted;176if (cred->user_ns == child_cred->user_ns &&177cap_issubset(child_cred->cap_permitted, *caller_caps))178goto out;179if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))180goto out;181ret = -EPERM;182out:183rcu_read_unlock();184return ret;185}186187/**188* cap_ptrace_traceme - Determine whether another process may trace the current189* @parent: The task proposed to be the tracer190*191* If parent is in the same or an ancestor user_ns and has all current's192* capabilities, then ptrace access is allowed.193* If parent has the ptrace capability to current's user_ns, then ptrace194* access is allowed.195* Else denied.196*197* Determine whether the nominated task is permitted to trace the current198* process, returning 0 if permission is granted, -ve if denied.199*/200int cap_ptrace_traceme(struct task_struct *parent)201{202int ret = 0;203const struct cred *cred, *child_cred;204205rcu_read_lock();206cred = __task_cred(parent);207child_cred = current_cred();208if (cred->user_ns == child_cred->user_ns &&209cap_issubset(child_cred->cap_permitted, cred->cap_permitted))210goto out;211if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))212goto out;213ret = -EPERM;214out:215rcu_read_unlock();216return ret;217}218219/**220* cap_capget - Retrieve a task's capability sets221* @target: The task from which to retrieve the capability sets222* @effective: The place to record the effective set223* @inheritable: The place to record the inheritable set224* @permitted: The place to record the permitted set225*226* This function retrieves the capabilities of the nominated task and returns227* them to the caller.228*/229int cap_capget(const struct task_struct *target, kernel_cap_t *effective,230kernel_cap_t *inheritable, kernel_cap_t *permitted)231{232const struct cred *cred;233234/* Derived from kernel/capability.c:sys_capget. */235rcu_read_lock();236cred = __task_cred(target);237*effective = cred->cap_effective;238*inheritable = cred->cap_inheritable;239*permitted = cred->cap_permitted;240rcu_read_unlock();241return 0;242}243244/*245* Determine whether the inheritable capabilities are limited to the old246* permitted set. Returns 1 if they are limited, 0 if they are not.247*/248static inline int cap_inh_is_capped(void)249{250/* they are so limited unless the current task has the CAP_SETPCAP251* capability252*/253if (cap_capable(current_cred(), current_cred()->user_ns,254CAP_SETPCAP, CAP_OPT_NONE) == 0)255return 0;256return 1;257}258259/**260* cap_capset - Validate and apply proposed changes to current's capabilities261* @new: The proposed new credentials; alterations should be made here262* @old: The current task's current credentials263* @effective: A pointer to the proposed new effective capabilities set264* @inheritable: A pointer to the proposed new inheritable capabilities set265* @permitted: A pointer to the proposed new permitted capabilities set266*267* This function validates and applies a proposed mass change to the current268* process's capability sets. The changes are made to the proposed new269* credentials, and assuming no error, will be committed by the caller of LSM.270*/271int cap_capset(struct cred *new,272const struct cred *old,273const kernel_cap_t *effective,274const kernel_cap_t *inheritable,275const kernel_cap_t *permitted)276{277if (cap_inh_is_capped() &&278!cap_issubset(*inheritable,279cap_combine(old->cap_inheritable,280old->cap_permitted)))281/* incapable of using this inheritable set */282return -EPERM;283284if (!cap_issubset(*inheritable,285cap_combine(old->cap_inheritable,286old->cap_bset)))287/* no new pI capabilities outside bounding set */288return -EPERM;289290/* verify restrictions on target's new Permitted set */291if (!cap_issubset(*permitted, old->cap_permitted))292return -EPERM;293294/* verify the _new_Effective_ is a subset of the _new_Permitted_ */295if (!cap_issubset(*effective, *permitted))296return -EPERM;297298new->cap_effective = *effective;299new->cap_inheritable = *inheritable;300new->cap_permitted = *permitted;301302/*303* Mask off ambient bits that are no longer both permitted and304* inheritable.305*/306new->cap_ambient = cap_intersect(new->cap_ambient,307cap_intersect(*permitted,308*inheritable));309if (WARN_ON(!cap_ambient_invariant_ok(new)))310return -EINVAL;311return 0;312}313314/**315* cap_inode_need_killpriv - Determine if inode change affects privileges316* @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV317*318* Determine if an inode having a change applied that's marked ATTR_KILL_PRIV319* affects the security markings on that inode, and if it is, should320* inode_killpriv() be invoked or the change rejected.321*322* Return: 1 if security.capability has a value, meaning inode_killpriv()323* is required, 0 otherwise, meaning inode_killpriv() is not required.324*/325int cap_inode_need_killpriv(struct dentry *dentry)326{327struct inode *inode = d_backing_inode(dentry);328int error;329330error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0);331return error > 0;332}333334/**335* cap_inode_killpriv - Erase the security markings on an inode336*337* @idmap: idmap of the mount the inode was found from338* @dentry: The inode/dentry to alter339*340* Erase the privilege-enhancing security markings on an inode.341*342* If the inode has been found through an idmapped mount the idmap of343* the vfsmount must be passed through @idmap. This function will then344* take care to map the inode according to @idmap before checking345* permissions. On non-idmapped mounts or if permission checking is to be346* performed on the raw inode simply pass @nop_mnt_idmap.347*348* Return: 0 if successful, -ve on error.349*/350int cap_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry)351{352int error;353354error = __vfs_removexattr(idmap, dentry, XATTR_NAME_CAPS);355if (error == -EOPNOTSUPP)356error = 0;357return error;358}359360/**361* kuid_root_in_ns - check whether the given kuid is root in the given ns362* @kuid: the kuid to be tested363* @ns: the user namespace to test against364*365* Returns true if @kuid represents the root user in @ns, false otherwise.366*/367static bool kuid_root_in_ns(kuid_t kuid, struct user_namespace *ns)368{369for (;; ns = ns->parent) {370if (from_kuid(ns, kuid) == 0)371return true;372if (ns == &init_user_ns)373break;374}375376return false;377}378379static bool vfsuid_root_in_currentns(vfsuid_t vfsuid)380{381kuid_t kuid;382383if (!vfsuid_valid(vfsuid))384return false;385kuid = vfsuid_into_kuid(vfsuid);386return kuid_root_in_ns(kuid, current_user_ns());387}388389static __u32 sansflags(__u32 m)390{391return m & ~VFS_CAP_FLAGS_EFFECTIVE;392}393394static bool is_v2header(int size, const struct vfs_cap_data *cap)395{396if (size != XATTR_CAPS_SZ_2)397return false;398return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;399}400401static bool is_v3header(int size, const struct vfs_cap_data *cap)402{403if (size != XATTR_CAPS_SZ_3)404return false;405return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;406}407408/*409* getsecurity: We are called for security.* before any attempt to read the410* xattr from the inode itself.411*412* This gives us a chance to read the on-disk value and convert it. If we413* return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler.414*415* Note we are not called by vfs_getxattr_alloc(), but that is only called416* by the integrity subsystem, which really wants the unconverted values -417* so that's good.418*/419int cap_inode_getsecurity(struct mnt_idmap *idmap,420struct inode *inode, const char *name, void **buffer,421bool alloc)422{423int size;424kuid_t kroot;425vfsuid_t vfsroot;426u32 nsmagic, magic;427uid_t root, mappedroot;428char *tmpbuf = NULL;429struct vfs_cap_data *cap;430struct vfs_ns_cap_data *nscap = NULL;431struct dentry *dentry;432struct user_namespace *fs_ns;433434if (strcmp(name, "capability") != 0)435return -EOPNOTSUPP;436437dentry = d_find_any_alias(inode);438if (!dentry)439return -EINVAL;440size = vfs_getxattr_alloc(idmap, dentry, XATTR_NAME_CAPS, &tmpbuf,441sizeof(struct vfs_ns_cap_data), GFP_NOFS);442dput(dentry);443/* gcc11 complains if we don't check for !tmpbuf */444if (size < 0 || !tmpbuf)445goto out_free;446447fs_ns = inode->i_sb->s_user_ns;448cap = (struct vfs_cap_data *) tmpbuf;449if (is_v2header(size, cap)) {450root = 0;451} else if (is_v3header(size, cap)) {452nscap = (struct vfs_ns_cap_data *) tmpbuf;453root = le32_to_cpu(nscap->rootid);454} else {455size = -EINVAL;456goto out_free;457}458459kroot = make_kuid(fs_ns, root);460461/* If this is an idmapped mount shift the kuid. */462vfsroot = make_vfsuid(idmap, fs_ns, kroot);463464/* If the root kuid maps to a valid uid in current ns, then return465* this as a nscap. */466mappedroot = from_kuid(current_user_ns(), vfsuid_into_kuid(vfsroot));467if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {468size = sizeof(struct vfs_ns_cap_data);469if (alloc) {470if (!nscap) {471/* v2 -> v3 conversion */472nscap = kzalloc(size, GFP_ATOMIC);473if (!nscap) {474size = -ENOMEM;475goto out_free;476}477nsmagic = VFS_CAP_REVISION_3;478magic = le32_to_cpu(cap->magic_etc);479if (magic & VFS_CAP_FLAGS_EFFECTIVE)480nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;481memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);482nscap->magic_etc = cpu_to_le32(nsmagic);483} else {484/* use allocated v3 buffer */485tmpbuf = NULL;486}487nscap->rootid = cpu_to_le32(mappedroot);488*buffer = nscap;489}490goto out_free;491}492493if (!vfsuid_root_in_currentns(vfsroot)) {494size = -EOVERFLOW;495goto out_free;496}497498/* This comes from a parent namespace. Return as a v2 capability */499size = sizeof(struct vfs_cap_data);500if (alloc) {501if (nscap) {502/* v3 -> v2 conversion */503cap = kzalloc(size, GFP_ATOMIC);504if (!cap) {505size = -ENOMEM;506goto out_free;507}508magic = VFS_CAP_REVISION_2;509nsmagic = le32_to_cpu(nscap->magic_etc);510if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)511magic |= VFS_CAP_FLAGS_EFFECTIVE;512memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);513cap->magic_etc = cpu_to_le32(magic);514} else {515/* use unconverted v2 */516tmpbuf = NULL;517}518*buffer = cap;519}520out_free:521kfree(tmpbuf);522return size;523}524525/**526* rootid_from_xattr - translate root uid of vfs caps527*528* @value: vfs caps value which may be modified by this function529* @size: size of @ivalue530* @task_ns: user namespace of the caller531*/532static vfsuid_t rootid_from_xattr(const void *value, size_t size,533struct user_namespace *task_ns)534{535const struct vfs_ns_cap_data *nscap = value;536uid_t rootid = 0;537538if (size == XATTR_CAPS_SZ_3)539rootid = le32_to_cpu(nscap->rootid);540541return VFSUIDT_INIT(make_kuid(task_ns, rootid));542}543544static bool validheader(size_t size, const struct vfs_cap_data *cap)545{546return is_v2header(size, cap) || is_v3header(size, cap);547}548549/**550* cap_convert_nscap - check vfs caps551*552* @idmap: idmap of the mount the inode was found from553* @dentry: used to retrieve inode to check permissions on554* @ivalue: vfs caps value which may be modified by this function555* @size: size of @ivalue556*557* User requested a write of security.capability. If needed, update the558* xattr to change from v2 to v3, or to fixup the v3 rootid.559*560* If the inode has been found through an idmapped mount the idmap of561* the vfsmount must be passed through @idmap. This function will then562* take care to map the inode according to @idmap before checking563* permissions. On non-idmapped mounts or if permission checking is to be564* performed on the raw inode simply pass @nop_mnt_idmap.565*566* Return: On success, return the new size; on error, return < 0.567*/568int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry,569const void **ivalue, size_t size)570{571struct vfs_ns_cap_data *nscap;572uid_t nsrootid;573const struct vfs_cap_data *cap = *ivalue;574__u32 magic, nsmagic;575struct inode *inode = d_backing_inode(dentry);576struct user_namespace *task_ns = current_user_ns(),577*fs_ns = inode->i_sb->s_user_ns;578kuid_t rootid;579vfsuid_t vfsrootid;580size_t newsize;581582if (!*ivalue)583return -EINVAL;584if (!validheader(size, cap))585return -EINVAL;586if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))587return -EPERM;588if (size == XATTR_CAPS_SZ_2 && (idmap == &nop_mnt_idmap))589if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))590/* user is privileged, just write the v2 */591return size;592593vfsrootid = rootid_from_xattr(*ivalue, size, task_ns);594if (!vfsuid_valid(vfsrootid))595return -EINVAL;596597rootid = from_vfsuid(idmap, fs_ns, vfsrootid);598if (!uid_valid(rootid))599return -EINVAL;600601nsrootid = from_kuid(fs_ns, rootid);602if (nsrootid == -1)603return -EINVAL;604605newsize = sizeof(struct vfs_ns_cap_data);606nscap = kmalloc(newsize, GFP_ATOMIC);607if (!nscap)608return -ENOMEM;609nscap->rootid = cpu_to_le32(nsrootid);610nsmagic = VFS_CAP_REVISION_3;611magic = le32_to_cpu(cap->magic_etc);612if (magic & VFS_CAP_FLAGS_EFFECTIVE)613nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;614nscap->magic_etc = cpu_to_le32(nsmagic);615memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);616617*ivalue = nscap;618return newsize;619}620621/*622* Calculate the new process capability sets from the capability sets attached623* to a file.624*/625static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,626struct linux_binprm *bprm,627bool *effective,628bool *has_fcap)629{630struct cred *new = bprm->cred;631int ret = 0;632633if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)634*effective = true;635636if (caps->magic_etc & VFS_CAP_REVISION_MASK)637*has_fcap = true;638639/*640* pP' = (X & fP) | (pI & fI)641* The addition of pA' is handled later.642*/643new->cap_permitted.val =644(new->cap_bset.val & caps->permitted.val) |645(new->cap_inheritable.val & caps->inheritable.val);646647if (caps->permitted.val & ~new->cap_permitted.val)648/* insufficient to execute correctly */649ret = -EPERM;650651/*652* For legacy apps, with no internal support for recognizing they653* do not have enough capabilities, we return an error if they are654* missing some "forced" (aka file-permitted) capabilities.655*/656return *effective ? ret : 0;657}658659/**660* get_vfs_caps_from_disk - retrieve vfs caps from disk661*662* @idmap: idmap of the mount the inode was found from663* @dentry: dentry from which @inode is retrieved664* @cpu_caps: vfs capabilities665*666* Extract the on-exec-apply capability sets for an executable file.667*668* If the inode has been found through an idmapped mount the idmap of669* the vfsmount must be passed through @idmap. This function will then670* take care to map the inode according to @idmap before checking671* permissions. On non-idmapped mounts or if permission checking is to be672* performed on the raw inode simply pass @nop_mnt_idmap.673*/674int get_vfs_caps_from_disk(struct mnt_idmap *idmap,675const struct dentry *dentry,676struct cpu_vfs_cap_data *cpu_caps)677{678struct inode *inode = d_backing_inode(dentry);679__u32 magic_etc;680int size;681struct vfs_ns_cap_data data, *nscaps = &data;682struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;683kuid_t rootkuid;684vfsuid_t rootvfsuid;685struct user_namespace *fs_ns;686687memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));688689if (!inode)690return -ENODATA;691692fs_ns = inode->i_sb->s_user_ns;693size = __vfs_getxattr((struct dentry *)dentry, inode,694XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);695if (size == -ENODATA || size == -EOPNOTSUPP)696/* no data, that's ok */697return -ENODATA;698699if (size < 0)700return size;701702if (size < sizeof(magic_etc))703return -EINVAL;704705cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);706707rootkuid = make_kuid(fs_ns, 0);708switch (magic_etc & VFS_CAP_REVISION_MASK) {709case VFS_CAP_REVISION_1:710if (size != XATTR_CAPS_SZ_1)711return -EINVAL;712break;713case VFS_CAP_REVISION_2:714if (size != XATTR_CAPS_SZ_2)715return -EINVAL;716break;717case VFS_CAP_REVISION_3:718if (size != XATTR_CAPS_SZ_3)719return -EINVAL;720rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));721break;722723default:724return -EINVAL;725}726727rootvfsuid = make_vfsuid(idmap, fs_ns, rootkuid);728if (!vfsuid_valid(rootvfsuid))729return -ENODATA;730731/* Limit the caps to the mounter of the filesystem732* or the more limited uid specified in the xattr.733*/734if (!vfsuid_root_in_currentns(rootvfsuid))735return -ENODATA;736737cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted);738cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable);739740/*741* Rev1 had just a single 32-bit word, later expanded742* to a second one for the high bits743*/744if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {745cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;746cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;747}748749cpu_caps->permitted.val &= CAP_VALID_MASK;750cpu_caps->inheritable.val &= CAP_VALID_MASK;751752cpu_caps->rootid = vfsuid_into_kuid(rootvfsuid);753754return 0;755}756757/*758* Attempt to get the on-exec apply capability sets for an executable file from759* its xattrs and, if present, apply them to the proposed credentials being760* constructed by execve().761*/762static int get_file_caps(struct linux_binprm *bprm, const struct file *file,763bool *effective, bool *has_fcap)764{765int rc = 0;766struct cpu_vfs_cap_data vcaps;767768cap_clear(bprm->cred->cap_permitted);769770if (!file_caps_enabled)771return 0;772773if (!mnt_may_suid(file->f_path.mnt))774return 0;775776/*777* This check is redundant with mnt_may_suid() but is kept to make778* explicit that capability bits are limited to s_user_ns and its779* descendants.780*/781if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns))782return 0;783784rc = get_vfs_caps_from_disk(file_mnt_idmap(file),785file->f_path.dentry, &vcaps);786if (rc < 0) {787if (rc == -EINVAL)788printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",789bprm->filename);790else if (rc == -ENODATA)791rc = 0;792goto out;793}794795rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_fcap);796797out:798if (rc)799cap_clear(bprm->cred->cap_permitted);800801return rc;802}803804static inline bool root_privileged(void) { return !issecure(SECURE_NOROOT); }805806static inline bool __is_real(kuid_t uid, struct cred *cred)807{ return uid_eq(cred->uid, uid); }808809static inline bool __is_eff(kuid_t uid, struct cred *cred)810{ return uid_eq(cred->euid, uid); }811812static inline bool __is_suid(kuid_t uid, struct cred *cred)813{ return !__is_real(uid, cred) && __is_eff(uid, cred); }814815/*816* handle_privileged_root - Handle case of privileged root817* @bprm: The execution parameters, including the proposed creds818* @has_fcap: Are any file capabilities set?819* @effective: Do we have effective root privilege?820* @root_uid: This namespace' root UID WRT initial USER namespace821*822* Handle the case where root is privileged and hasn't been neutered by823* SECURE_NOROOT. If file capabilities are set, they won't be combined with824* set UID root and nothing is changed. If we are root, cap_permitted is825* updated. If we have become set UID root, the effective bit is set.826*/827static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap,828bool *effective, kuid_t root_uid)829{830const struct cred *old = current_cred();831struct cred *new = bprm->cred;832833if (!root_privileged())834return;835/*836* If the legacy file capability is set, then don't set privs837* for a setuid root binary run by a non-root user. Do set it838* for a root user just to cause least surprise to an admin.839*/840if (has_fcap && __is_suid(root_uid, new)) {841warn_setuid_and_fcaps_mixed(bprm->filename);842return;843}844/*845* To support inheritance of root-permissions and suid-root846* executables under compatibility mode, we override the847* capability sets for the file.848*/849if (__is_eff(root_uid, new) || __is_real(root_uid, new)) {850/* pP' = (cap_bset & ~0) | (pI & ~0) */851new->cap_permitted = cap_combine(old->cap_bset,852old->cap_inheritable);853}854/*855* If only the real uid is 0, we do not set the effective bit.856*/857if (__is_eff(root_uid, new))858*effective = true;859}860861#define __cap_gained(field, target, source) \862!cap_issubset(target->cap_##field, source->cap_##field)863#define __cap_grew(target, source, cred) \864!cap_issubset(cred->cap_##target, cred->cap_##source)865#define __cap_full(field, cred) \866cap_issubset(CAP_FULL_SET, cred->cap_##field)867868/*869* 1) Audit candidate if current->cap_effective is set870*871* We do not bother to audit if 3 things are true:872* 1) cap_effective has all caps873* 2) we became root *OR* are were already root874* 3) root is supposed to have all caps (SECURE_NOROOT)875* Since this is just a normal root execing a process.876*877* Number 1 above might fail if you don't have a full bset, but I think878* that is interesting information to audit.879*880* A number of other conditions require logging:881* 2) something prevented setuid root getting all caps882* 3) non-setuid root gets fcaps883* 4) non-setuid root gets ambient884*/885static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old,886kuid_t root, bool has_fcap)887{888bool ret = false;889890if ((__cap_grew(effective, ambient, new) &&891!(__cap_full(effective, new) &&892(__is_eff(root, new) || __is_real(root, new)) &&893root_privileged())) ||894(root_privileged() &&895__is_suid(root, new) &&896!__cap_full(effective, new)) ||897(uid_eq(new->euid, old->euid) &&898((has_fcap &&899__cap_gained(permitted, new, old)) ||900__cap_gained(ambient, new, old))))901902ret = true;903904return ret;905}906907/**908* cap_bprm_creds_from_file - Set up the proposed credentials for execve().909* @bprm: The execution parameters, including the proposed creds910* @file: The file to pull the credentials from911*912* Set up the proposed credentials for a new execution context being913* constructed by execve(). The proposed creds in @bprm->cred is altered,914* which won't take effect immediately.915*916* Return: 0 if successful, -ve on error.917*/918int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file)919{920/* Process setpcap binaries and capabilities for uid 0 */921const struct cred *old = current_cred();922struct cred *new = bprm->cred;923bool effective = false, has_fcap = false, id_changed;924int ret;925kuid_t root_uid;926927if (WARN_ON(!cap_ambient_invariant_ok(old)))928return -EPERM;929930ret = get_file_caps(bprm, file, &effective, &has_fcap);931if (ret < 0)932return ret;933934root_uid = make_kuid(new->user_ns, 0);935936handle_privileged_root(bprm, has_fcap, &effective, root_uid);937938/* if we have fs caps, clear dangerous personality flags */939if (__cap_gained(permitted, new, old))940bprm->per_clear |= PER_CLEAR_ON_SETID;941942/* Don't let someone trace a set[ug]id/setpcap binary with the revised943* credentials unless they have the appropriate permit.944*945* In addition, if NO_NEW_PRIVS, then ensure we get no new privs.946*/947id_changed = !uid_eq(new->euid, old->euid) || !in_group_p(new->egid);948949if ((id_changed || __cap_gained(permitted, new, old)) &&950((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||951!ptracer_capable(current, new->user_ns))) {952/* downgrade; they get no more than they had, and maybe less */953if (!ns_capable(new->user_ns, CAP_SETUID) ||954(bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {955new->euid = new->uid;956new->egid = new->gid;957}958new->cap_permitted = cap_intersect(new->cap_permitted,959old->cap_permitted);960}961962new->suid = new->fsuid = new->euid;963new->sgid = new->fsgid = new->egid;964965/* File caps or setid cancels ambient. */966if (has_fcap || id_changed)967cap_clear(new->cap_ambient);968969/*970* Now that we've computed pA', update pP' to give:971* pP' = (X & fP) | (pI & fI) | pA'972*/973new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);974975/*976* Set pE' = (fE ? pP' : pA'). Because pA' is zero if fE is set,977* this is the same as pE' = (fE ? pP' : 0) | pA'.978*/979if (effective)980new->cap_effective = new->cap_permitted;981else982new->cap_effective = new->cap_ambient;983984if (WARN_ON(!cap_ambient_invariant_ok(new)))985return -EPERM;986987if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {988ret = audit_log_bprm_fcaps(bprm, new, old);989if (ret < 0)990return ret;991}992993new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);994995if (WARN_ON(!cap_ambient_invariant_ok(new)))996return -EPERM;997998/* Check for privilege-elevated exec. */999if (id_changed ||1000!uid_eq(new->euid, old->uid) ||1001!gid_eq(new->egid, old->gid) ||1002(!__is_real(root_uid, new) &&1003(effective ||1004__cap_grew(permitted, ambient, new))))1005bprm->secureexec = 1;10061007return 0;1008}10091010/**1011* cap_inode_setxattr - Determine whether an xattr may be altered1012* @dentry: The inode/dentry being altered1013* @name: The name of the xattr to be changed1014* @value: The value that the xattr will be changed to1015* @size: The size of value1016* @flags: The replacement flag1017*1018* Determine whether an xattr may be altered or set on an inode, returning 0 if1019* permission is granted, -ve if denied.1020*1021* This is used to make sure security xattrs don't get updated or set by those1022* who aren't privileged to do so.1023*/1024int cap_inode_setxattr(struct dentry *dentry, const char *name,1025const void *value, size_t size, int flags)1026{1027struct user_namespace *user_ns = dentry->d_sb->s_user_ns;10281029/* Ignore non-security xattrs */1030if (strncmp(name, XATTR_SECURITY_PREFIX,1031XATTR_SECURITY_PREFIX_LEN) != 0)1032return 0;10331034/*1035* For XATTR_NAME_CAPS the check will be done in1036* cap_convert_nscap(), called by setxattr()1037*/1038if (strcmp(name, XATTR_NAME_CAPS) == 0)1039return 0;10401041if (!ns_capable(user_ns, CAP_SYS_ADMIN))1042return -EPERM;1043return 0;1044}10451046/**1047* cap_inode_removexattr - Determine whether an xattr may be removed1048*1049* @idmap: idmap of the mount the inode was found from1050* @dentry: The inode/dentry being altered1051* @name: The name of the xattr to be changed1052*1053* Determine whether an xattr may be removed from an inode, returning 0 if1054* permission is granted, -ve if denied.1055*1056* If the inode has been found through an idmapped mount the idmap of1057* the vfsmount must be passed through @idmap. This function will then1058* take care to map the inode according to @idmap before checking1059* permissions. On non-idmapped mounts or if permission checking is to be1060* performed on the raw inode simply pass @nop_mnt_idmap.1061*1062* This is used to make sure security xattrs don't get removed by those who1063* aren't privileged to remove them.1064*/1065int cap_inode_removexattr(struct mnt_idmap *idmap,1066struct dentry *dentry, const char *name)1067{1068struct user_namespace *user_ns = dentry->d_sb->s_user_ns;10691070/* Ignore non-security xattrs */1071if (strncmp(name, XATTR_SECURITY_PREFIX,1072XATTR_SECURITY_PREFIX_LEN) != 0)1073return 0;10741075if (strcmp(name, XATTR_NAME_CAPS) == 0) {1076/* security.capability gets namespaced */1077struct inode *inode = d_backing_inode(dentry);1078if (!inode)1079return -EINVAL;1080if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))1081return -EPERM;1082return 0;1083}10841085if (!ns_capable(user_ns, CAP_SYS_ADMIN))1086return -EPERM;1087return 0;1088}10891090/*1091* cap_emulate_setxuid() fixes the effective / permitted capabilities of1092* a process after a call to setuid, setreuid, or setresuid.1093*1094* 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of1095* {r,e,s}uid != 0, the permitted and effective capabilities are1096* cleared.1097*1098* 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective1099* capabilities of the process are cleared.1100*1101* 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective1102* capabilities are set to the permitted capabilities.1103*1104* fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should1105* never happen.1106*1107* -astor1108*1109* cevans - New behaviour, Oct '991110* A process may, via prctl(), elect to keep its capabilities when it1111* calls setuid() and switches away from uid==0. Both permitted and1112* effective sets will be retained.1113* Without this change, it was impossible for a daemon to drop only some1114* of its privilege. The call to setuid(!=0) would drop all privileges!1115* Keeping uid 0 is not an option because uid 0 owns too many vital1116* files..1117* Thanks to Olaf Kirch and Peter Benie for spotting this.1118*/1119static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)1120{1121kuid_t root_uid = make_kuid(old->user_ns, 0);11221123if ((uid_eq(old->uid, root_uid) ||1124uid_eq(old->euid, root_uid) ||1125uid_eq(old->suid, root_uid)) &&1126(!uid_eq(new->uid, root_uid) &&1127!uid_eq(new->euid, root_uid) &&1128!uid_eq(new->suid, root_uid))) {1129if (!issecure(SECURE_KEEP_CAPS)) {1130cap_clear(new->cap_permitted);1131cap_clear(new->cap_effective);1132}11331134/*1135* Pre-ambient programs expect setresuid to nonroot followed1136* by exec to drop capabilities. We should make sure that1137* this remains the case.1138*/1139cap_clear(new->cap_ambient);1140}1141if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))1142cap_clear(new->cap_effective);1143if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))1144new->cap_effective = new->cap_permitted;1145}11461147/**1148* cap_task_fix_setuid - Fix up the results of setuid() call1149* @new: The proposed credentials1150* @old: The current task's current credentials1151* @flags: Indications of what has changed1152*1153* Fix up the results of setuid() call before the credential changes are1154* actually applied.1155*1156* Return: 0 to grant the changes, -ve to deny them.1157*/1158int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)1159{1160switch (flags) {1161case LSM_SETID_RE:1162case LSM_SETID_ID:1163case LSM_SETID_RES:1164/* juggle the capabilities to follow [RES]UID changes unless1165* otherwise suppressed */1166if (!issecure(SECURE_NO_SETUID_FIXUP))1167cap_emulate_setxuid(new, old);1168break;11691170case LSM_SETID_FS:1171/* juggle the capabilities to follow FSUID changes, unless1172* otherwise suppressed1173*1174* FIXME - is fsuser used for all CAP_FS_MASK capabilities?1175* if not, we might be a bit too harsh here.1176*/1177if (!issecure(SECURE_NO_SETUID_FIXUP)) {1178kuid_t root_uid = make_kuid(old->user_ns, 0);1179if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))1180new->cap_effective =1181cap_drop_fs_set(new->cap_effective);11821183if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))1184new->cap_effective =1185cap_raise_fs_set(new->cap_effective,1186new->cap_permitted);1187}1188break;11891190default:1191return -EINVAL;1192}11931194return 0;1195}11961197/*1198* Rationale: code calling task_setscheduler, task_setioprio, and1199* task_setnice, assumes that1200* . if capable(cap_sys_nice), then those actions should be allowed1201* . if not capable(cap_sys_nice), but acting on your own processes,1202* then those actions should be allowed1203* This is insufficient now since you can call code without suid, but1204* yet with increased caps.1205* So we check for increased caps on the target process.1206*/1207static int cap_safe_nice(struct task_struct *p)1208{1209int is_subset, ret = 0;12101211rcu_read_lock();1212is_subset = cap_issubset(__task_cred(p)->cap_permitted,1213current_cred()->cap_permitted);1214if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))1215ret = -EPERM;1216rcu_read_unlock();12171218return ret;1219}12201221/**1222* cap_task_setscheduler - Determine if scheduler policy change is permitted1223* @p: The task to affect1224*1225* Determine if the requested scheduler policy change is permitted for the1226* specified task.1227*1228* Return: 0 if permission is granted, -ve if denied.1229*/1230int cap_task_setscheduler(struct task_struct *p)1231{1232return cap_safe_nice(p);1233}12341235/**1236* cap_task_setioprio - Determine if I/O priority change is permitted1237* @p: The task to affect1238* @ioprio: The I/O priority to set1239*1240* Determine if the requested I/O priority change is permitted for the specified1241* task.1242*1243* Return: 0 if permission is granted, -ve if denied.1244*/1245int cap_task_setioprio(struct task_struct *p, int ioprio)1246{1247return cap_safe_nice(p);1248}12491250/**1251* cap_task_setnice - Determine if task priority change is permitted1252* @p: The task to affect1253* @nice: The nice value to set1254*1255* Determine if the requested task priority change is permitted for the1256* specified task.1257*1258* Return: 0 if permission is granted, -ve if denied.1259*/1260int cap_task_setnice(struct task_struct *p, int nice)1261{1262return cap_safe_nice(p);1263}12641265/*1266* Implement PR_CAPBSET_DROP. Attempt to remove the specified capability from1267* the current task's bounding set. Returns 0 on success, -ve on error.1268*/1269static int cap_prctl_drop(unsigned long cap)1270{1271struct cred *new;12721273if (!ns_capable(current_user_ns(), CAP_SETPCAP))1274return -EPERM;1275if (!cap_valid(cap))1276return -EINVAL;12771278new = prepare_creds();1279if (!new)1280return -ENOMEM;1281cap_lower(new->cap_bset, cap);1282return commit_creds(new);1283}12841285/**1286* cap_task_prctl - Implement process control functions for this security module1287* @option: The process control function requested1288* @arg2: The argument data for this function1289* @arg3: The argument data for this function1290* @arg4: The argument data for this function1291* @arg5: The argument data for this function1292*1293* Allow process control functions (sys_prctl()) to alter capabilities; may1294* also deny access to other functions not otherwise implemented here.1295*1296* Return: 0 or +ve on success, -ENOSYS if this function is not implemented1297* here, other -ve on error. If -ENOSYS is returned, sys_prctl() and other LSM1298* modules will consider performing the function.1299*/1300int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,1301unsigned long arg4, unsigned long arg5)1302{1303const struct cred *old = current_cred();1304struct cred *new;13051306switch (option) {1307case PR_CAPBSET_READ:1308if (!cap_valid(arg2))1309return -EINVAL;1310return !!cap_raised(old->cap_bset, arg2);13111312case PR_CAPBSET_DROP:1313return cap_prctl_drop(arg2);13141315/*1316* The next four prctl's remain to assist with transitioning a1317* system from legacy UID=0 based privilege (when filesystem1318* capabilities are not in use) to a system using filesystem1319* capabilities only - as the POSIX.1e draft intended.1320*1321* Note:1322*1323* PR_SET_SECUREBITS =1324* issecure_mask(SECURE_KEEP_CAPS_LOCKED)1325* | issecure_mask(SECURE_NOROOT)1326* | issecure_mask(SECURE_NOROOT_LOCKED)1327* | issecure_mask(SECURE_NO_SETUID_FIXUP)1328* | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)1329*1330* will ensure that the current process and all of its1331* children will be locked into a pure1332* capability-based-privilege environment.1333*/1334case PR_SET_SECUREBITS:1335if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)1336& (old->securebits ^ arg2)) /*[1]*/1337|| ((old->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/1338|| (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/1339/*1340* [1] no changing of bits that are locked1341* [2] no unlocking of locks1342* [3] no setting of unsupported bits1343*/1344)1345/* cannot change a locked bit */1346return -EPERM;13471348/*1349* Doing anything requires privilege (go read about the1350* "sendmail capabilities bug"), except for unprivileged bits.1351* Indeed, the SECURE_ALL_UNPRIVILEGED bits are not1352* restrictions enforced by the kernel but by user space on1353* itself.1354*/1355if (cap_capable(current_cred(), current_cred()->user_ns,1356CAP_SETPCAP, CAP_OPT_NONE) != 0) {1357const unsigned long unpriv_and_locks =1358SECURE_ALL_UNPRIVILEGED |1359SECURE_ALL_UNPRIVILEGED << 1;1360const unsigned long changed = old->securebits ^ arg2;13611362/* For legacy reason, denies non-change. */1363if (!changed)1364return -EPERM;13651366/* Denies privileged changes. */1367if (changed & ~unpriv_and_locks)1368return -EPERM;1369}13701371new = prepare_creds();1372if (!new)1373return -ENOMEM;1374new->securebits = arg2;1375return commit_creds(new);13761377case PR_GET_SECUREBITS:1378return old->securebits;13791380case PR_GET_KEEPCAPS:1381return !!issecure(SECURE_KEEP_CAPS);13821383case PR_SET_KEEPCAPS:1384if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */1385return -EINVAL;1386if (issecure(SECURE_KEEP_CAPS_LOCKED))1387return -EPERM;13881389new = prepare_creds();1390if (!new)1391return -ENOMEM;1392if (arg2)1393new->securebits |= issecure_mask(SECURE_KEEP_CAPS);1394else1395new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);1396return commit_creds(new);13971398case PR_CAP_AMBIENT:1399if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {1400if (arg3 | arg4 | arg5)1401return -EINVAL;14021403new = prepare_creds();1404if (!new)1405return -ENOMEM;1406cap_clear(new->cap_ambient);1407return commit_creds(new);1408}14091410if (((!cap_valid(arg3)) | arg4 | arg5))1411return -EINVAL;14121413if (arg2 == PR_CAP_AMBIENT_IS_SET) {1414return !!cap_raised(current_cred()->cap_ambient, arg3);1415} else if (arg2 != PR_CAP_AMBIENT_RAISE &&1416arg2 != PR_CAP_AMBIENT_LOWER) {1417return -EINVAL;1418} else {1419if (arg2 == PR_CAP_AMBIENT_RAISE &&1420(!cap_raised(current_cred()->cap_permitted, arg3) ||1421!cap_raised(current_cred()->cap_inheritable,1422arg3) ||1423issecure(SECURE_NO_CAP_AMBIENT_RAISE)))1424return -EPERM;14251426new = prepare_creds();1427if (!new)1428return -ENOMEM;1429if (arg2 == PR_CAP_AMBIENT_RAISE)1430cap_raise(new->cap_ambient, arg3);1431else1432cap_lower(new->cap_ambient, arg3);1433return commit_creds(new);1434}14351436default:1437/* No functionality available - continue with default */1438return -ENOSYS;1439}1440}14411442/**1443* cap_vm_enough_memory - Determine whether a new virtual mapping is permitted1444* @mm: The VM space in which the new mapping is to be made1445* @pages: The size of the mapping1446*1447* Determine whether the allocation of a new virtual mapping by the current1448* task is permitted.1449*1450* Return: 0 if permission granted, negative error code if not.1451*/1452int cap_vm_enough_memory(struct mm_struct *mm, long pages)1453{1454return cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,1455CAP_OPT_NOAUDIT);1456}14571458/**1459* cap_mmap_addr - check if able to map given addr1460* @addr: address attempting to be mapped1461*1462* If the process is attempting to map memory below dac_mmap_min_addr they need1463* CAP_SYS_RAWIO. The other parameters to this function are unused by the1464* capability security module.1465*1466* Return: 0 if this mapping should be allowed or -EPERM if not.1467*/1468int cap_mmap_addr(unsigned long addr)1469{1470int ret = 0;14711472if (addr < dac_mmap_min_addr) {1473ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,1474CAP_OPT_NONE);1475/* set PF_SUPERPRIV if it turns out we allow the low mmap */1476if (ret == 0)1477current->flags |= PF_SUPERPRIV;1478}1479return ret;1480}14811482#ifdef CONFIG_SECURITY14831484static const struct lsm_id capability_lsmid = {1485.name = "capability",1486.id = LSM_ID_CAPABILITY,1487};14881489static struct security_hook_list capability_hooks[] __ro_after_init = {1490LSM_HOOK_INIT(capable, cap_capable),1491LSM_HOOK_INIT(settime, cap_settime),1492LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check),1493LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme),1494LSM_HOOK_INIT(capget, cap_capget),1495LSM_HOOK_INIT(capset, cap_capset),1496LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file),1497LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),1498LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),1499LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),1500LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),1501LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),1502LSM_HOOK_INIT(task_prctl, cap_task_prctl),1503LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler),1504LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),1505LSM_HOOK_INIT(task_setnice, cap_task_setnice),1506LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),1507};15081509static int __init capability_init(void)1510{1511security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),1512&capability_lsmid);1513return 0;1514}15151516DEFINE_LSM(capability) = {1517.id = &capability_lsmid,1518.order = LSM_ORDER_FIRST,1519.init = capability_init,1520};15211522#endif /* CONFIG_SECURITY */152315241525