Path: blob/main/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
48775 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/21/*22* Copyright (c) 2011, Lawrence Livermore National Security, LLC.23*24* Extended attributes (xattr) on Solaris are implemented as files25* which exist in a hidden xattr directory. These extended attributes26* can be accessed using the attropen() system call which opens27* the extended attribute. It can then be manipulated just like28* a standard file descriptor. This has a couple advantages such29* as practically no size limit on the file, and the extended30* attributes permissions may differ from those of the parent file.31* This interface is really quite clever, but it's also completely32* different than what is supported on Linux. It also comes with a33* steep performance penalty when accessing small xattrs because they34* are not stored with the parent file.35*36* Under Linux extended attributes are manipulated by the system37* calls getxattr(2), setxattr(2), and listxattr(2). They consider38* extended attributes to be name/value pairs where the name is a39* NULL terminated string. The name must also include one of the40* following namespace prefixes:41*42* user - No restrictions and is available to user applications.43* trusted - Restricted to kernel and root (CAP_SYS_ADMIN) use.44* system - Used for access control lists (system.nfs4_acl, etc).45* security - Used by SELinux to store a files security context.46*47* The value under Linux to limited to 65536 bytes of binary data.48* In practice, individual xattrs tend to be much smaller than this49* and are typically less than 100 bytes. A good example of this50* are the security.selinux xattrs which are less than 100 bytes and51* exist for every file when xattr labeling is enabled.52*53* The Linux xattr implementation has been written to take advantage of54* this typical usage. When the dataset property 'xattr=sa' is set,55* then xattrs will be preferentially stored as System Attributes (SA).56* This allows tiny xattrs (~100 bytes) to be stored with the dnode and57* up to 64k of xattrs to be stored in the spill block. If additional58* xattr space is required, which is unlikely under Linux, they will59* be stored using the traditional directory approach.60*61* This optimization results in roughly a 3x performance improvement62* when accessing xattrs because it avoids the need to perform a seek63* for every xattr value. When multiple xattrs are stored per-file64* the performance improvements are even greater because all of the65* xattrs stored in the spill block will be cached.66*67* However, by default SA based xattrs are disabled in the Linux port68* to maximize compatibility with other implementations. If you do69* enable SA based xattrs then they will not be visible on platforms70* which do not support this feature.71*72* NOTE: One additional consequence of the xattr directory implementation73* is that when an extended attribute is manipulated an inode is created.74* This inode will exist in the Linux inode cache but there will be no75* associated entry in the dentry cache which references it. This is76* safe but it may result in some confusion. Enabling SA based xattrs77* largely avoids the issue except in the overflow case.78*/7980#include <sys/zfs_znode.h>81#include <sys/zfs_vfsops.h>82#include <sys/zfs_vnops.h>83#include <sys/zap.h>84#include <sys/vfs.h>85#include <sys/zpl.h>86#include <linux/vfs_compat.h>8788enum xattr_permission {89XAPERM_DENY,90XAPERM_ALLOW,91XAPERM_COMPAT,92};9394typedef struct xattr_filldir {95size_t size;96size_t offset;97char *buf;98struct dentry *dentry;99} xattr_filldir_t;100101static enum xattr_permission zpl_xattr_permission(xattr_filldir_t *,102const char *, int);103104static int zfs_xattr_compat = 0;105106/*107* Determine is a given xattr name should be visible and if so copy it108* in to the provided buffer (xf->buf).109*/110static int111zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len)112{113enum xattr_permission perm;114115/* Check permissions using the per-namespace list xattr handler. */116perm = zpl_xattr_permission(xf, name, name_len);117if (perm == XAPERM_DENY)118return (0);119120/* Prefix the name with "user." if it does not have a namespace. */121if (perm == XAPERM_COMPAT) {122if (xf->buf) {123if (xf->offset + XATTR_USER_PREFIX_LEN + 1 > xf->size)124return (-ERANGE);125126memcpy(xf->buf + xf->offset, XATTR_USER_PREFIX,127XATTR_USER_PREFIX_LEN);128xf->buf[xf->offset + XATTR_USER_PREFIX_LEN] = '\0';129}130131xf->offset += XATTR_USER_PREFIX_LEN;132}133134/* When xf->buf is NULL only calculate the required size. */135if (xf->buf) {136if (xf->offset + name_len + 1 > xf->size)137return (-ERANGE);138139memcpy(xf->buf + xf->offset, name, name_len);140xf->buf[xf->offset + name_len] = '\0';141}142143xf->offset += (name_len + 1);144145return (0);146}147148/*149* Read as many directory entry names as will fit in to the provided buffer,150* or when no buffer is provided calculate the required buffer size.151*/152static int153zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf)154{155zap_cursor_t zc;156zap_attribute_t *zap = zap_attribute_alloc();157int error;158159zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id);160161while ((error = -zap_cursor_retrieve(&zc, zap)) == 0) {162163if (zap->za_integer_length != 8 || zap->za_num_integers != 1) {164error = -ENXIO;165break;166}167168error = zpl_xattr_filldir(xf, zap->za_name,169strlen(zap->za_name));170if (error)171break;172173zap_cursor_advance(&zc);174}175176zap_cursor_fini(&zc);177zap_attribute_free(zap);178179if (error == -ENOENT)180error = 0;181182return (error);183}184185static ssize_t186zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)187{188struct inode *ip = xf->dentry->d_inode;189struct inode *dxip = NULL;190znode_t *dxzp;191int error;192193/* Lookup the xattr directory */194error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,195cr, NULL, NULL);196if (error) {197if (error == -ENOENT)198error = 0;199200return (error);201}202203dxip = ZTOI(dxzp);204error = zpl_xattr_readdir(dxip, xf);205iput(dxip);206207return (error);208}209210static ssize_t211zpl_xattr_list_sa(xattr_filldir_t *xf)212{213znode_t *zp = ITOZ(xf->dentry->d_inode);214nvpair_t *nvp = NULL;215int error = 0;216217mutex_enter(&zp->z_lock);218if (zp->z_xattr_cached == NULL)219error = -zfs_sa_get_xattr(zp);220mutex_exit(&zp->z_lock);221222if (error)223return (error);224225ASSERT(zp->z_xattr_cached);226227while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {228ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);229230error = zpl_xattr_filldir(xf, nvpair_name(nvp),231strlen(nvpair_name(nvp)));232if (error)233return (error);234}235236return (0);237}238239ssize_t240zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)241{242znode_t *zp = ITOZ(dentry->d_inode);243zfsvfs_t *zfsvfs = ZTOZSB(zp);244xattr_filldir_t xf = { buffer_size, 0, buffer, dentry };245cred_t *cr = CRED();246fstrans_cookie_t cookie;247int error = 0;248249crhold(cr);250cookie = spl_fstrans_mark();251if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)252goto out1;253rw_enter(&zp->z_xattr_lock, RW_READER);254255if (zfsvfs->z_use_sa && zp->z_is_sa) {256error = zpl_xattr_list_sa(&xf);257if (error)258goto out;259}260261error = zpl_xattr_list_dir(&xf, cr);262if (error)263goto out;264265error = xf.offset;266out:267268rw_exit(&zp->z_xattr_lock);269zpl_exit(zfsvfs, FTAG);270out1:271spl_fstrans_unmark(cookie);272crfree(cr);273274return (error);275}276277static int278zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,279size_t size, cred_t *cr)280{281fstrans_cookie_t cookie;282struct inode *xip = NULL;283znode_t *dxzp = NULL;284znode_t *xzp = NULL;285int error;286287/* Lookup the xattr directory */288error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,289cr, NULL, NULL);290if (error)291goto out;292293/* Lookup a specific xattr name in the directory */294error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);295if (error)296goto out;297298xip = ZTOI(xzp);299if (!size) {300error = i_size_read(xip);301goto out;302}303304if (size < i_size_read(xip)) {305error = -ERANGE;306goto out;307}308309struct iovec iov;310iov.iov_base = (void *)value;311iov.iov_len = size;312313zfs_uio_t uio;314zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, size, 0);315316cookie = spl_fstrans_mark();317error = -zfs_read(ITOZ(xip), &uio, 0, cr);318spl_fstrans_unmark(cookie);319320if (error == 0)321error = size - zfs_uio_resid(&uio);322out:323if (xzp)324zrele(xzp);325326if (dxzp)327zrele(dxzp);328329return (error);330}331332static int333zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)334{335znode_t *zp = ITOZ(ip);336uchar_t *nv_value;337uint_t nv_size;338int error = 0;339340ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));341342mutex_enter(&zp->z_lock);343if (zp->z_xattr_cached == NULL)344error = -zfs_sa_get_xattr(zp);345mutex_exit(&zp->z_lock);346347if (error)348return (error);349350ASSERT(zp->z_xattr_cached);351error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,352&nv_value, &nv_size);353if (error)354return (error);355356if (size == 0 || value == NULL)357return (nv_size);358359if (size < nv_size)360return (-ERANGE);361362memcpy(value, nv_value, nv_size);363364return (nv_size);365}366367static int368__zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,369cred_t *cr)370{371znode_t *zp = ITOZ(ip);372zfsvfs_t *zfsvfs = ZTOZSB(zp);373int error;374375ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));376377if (zfsvfs->z_use_sa && zp->z_is_sa) {378error = zpl_xattr_get_sa(ip, name, value, size);379if (error != -ENOENT)380goto out;381}382383error = zpl_xattr_get_dir(ip, name, value, size, cr);384out:385if (error == -ENOENT)386error = -ENODATA;387388return (error);389}390391#define XATTR_NOENT 0x0392#define XATTR_IN_SA 0x1393#define XATTR_IN_DIR 0x2394/* check where the xattr resides */395static int396__zpl_xattr_where(struct inode *ip, const char *name, int *where, cred_t *cr)397{398znode_t *zp = ITOZ(ip);399zfsvfs_t *zfsvfs = ZTOZSB(zp);400int error;401402ASSERT(where);403ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));404405*where = XATTR_NOENT;406if (zfsvfs->z_use_sa && zp->z_is_sa) {407error = zpl_xattr_get_sa(ip, name, NULL, 0);408if (error >= 0)409*where |= XATTR_IN_SA;410else if (error != -ENOENT)411return (error);412}413414error = zpl_xattr_get_dir(ip, name, NULL, 0, cr);415if (error >= 0)416*where |= XATTR_IN_DIR;417else if (error != -ENOENT)418return (error);419420if (*where == (XATTR_IN_SA|XATTR_IN_DIR))421cmn_err(CE_WARN, "ZFS: inode %p has xattr \"%s\""422" in both SA and dir", ip, name);423if (*where == XATTR_NOENT)424error = -ENODATA;425else426error = 0;427return (error);428}429430static int431zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)432{433znode_t *zp = ITOZ(ip);434zfsvfs_t *zfsvfs = ZTOZSB(zp);435cred_t *cr = CRED();436fstrans_cookie_t cookie;437int error;438439crhold(cr);440cookie = spl_fstrans_mark();441if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)442goto out;443rw_enter(&zp->z_xattr_lock, RW_READER);444error = __zpl_xattr_get(ip, name, value, size, cr);445rw_exit(&zp->z_xattr_lock);446zpl_exit(zfsvfs, FTAG);447out:448spl_fstrans_unmark(cookie);449crfree(cr);450451return (error);452}453454static int455zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,456size_t size, int flags, cred_t *cr)457{458znode_t *dxzp = NULL;459znode_t *xzp = NULL;460vattr_t *vap = NULL;461int lookup_flags, error;462const int xattr_mode = S_IFREG | 0644;463loff_t pos = 0;464465/*466* Lookup the xattr directory. When we're adding an entry pass467* CREATE_XATTR_DIR to ensure the xattr directory is created.468* When removing an entry this flag is not passed to avoid469* unnecessarily creating a new xattr directory.470*/471lookup_flags = LOOKUP_XATTR;472if (value != NULL)473lookup_flags |= CREATE_XATTR_DIR;474475error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, lookup_flags,476cr, NULL, NULL);477if (error)478goto out;479480/* Lookup a specific xattr name in the directory */481error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);482if (error && (error != -ENOENT))483goto out;484485error = 0;486487/* Remove a specific name xattr when value is set to NULL. */488if (value == NULL) {489if (xzp)490error = -zfs_remove(dxzp, (char *)name, cr, 0);491492goto out;493}494495/* Lookup failed create a new xattr. */496if (xzp == NULL) {497vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);498vap->va_mode = xattr_mode;499vap->va_mask = ATTR_MODE;500vap->va_uid = crgetuid(cr);501vap->va_gid = crgetgid(cr);502503error = -zfs_create(dxzp, (char *)name, vap, 0, 0644, &xzp,504cr, ATTR_NOACLCHECK, NULL, zfs_init_idmap);505if (error)506goto out;507}508509ASSERT(xzp != NULL);510511error = -zfs_freesp(xzp, 0, 0, xattr_mode, TRUE);512if (error)513goto out;514515error = -zfs_write_simple(xzp, value, size, pos, NULL);516out:517if (error == 0) {518zpl_inode_set_ctime_to_ts(ip, current_time(ip));519zfs_mark_inode_dirty(ip);520}521522if (vap)523kmem_free(vap, sizeof (vattr_t));524525if (xzp)526zrele(xzp);527528if (dxzp)529zrele(dxzp);530531if (error == -ENOENT)532error = -ENODATA;533534ASSERT3S(error, <=, 0);535536return (error);537}538539static int540zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,541size_t size, int flags, cred_t *cr)542{543znode_t *zp = ITOZ(ip);544nvlist_t *nvl;545size_t sa_size;546int error = 0;547548mutex_enter(&zp->z_lock);549if (zp->z_xattr_cached == NULL)550error = -zfs_sa_get_xattr(zp);551mutex_exit(&zp->z_lock);552553if (error)554return (error);555556ASSERT(zp->z_xattr_cached);557nvl = zp->z_xattr_cached;558559if (value == NULL) {560error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);561if (error == -ENOENT)562error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);563} else {564/* Limited to 32k to keep nvpair memory allocations small */565if (size > DXATTR_MAX_ENTRY_SIZE)566return (-EFBIG);567568/* Prevent the DXATTR SA from consuming the entire SA region */569error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);570if (error)571return (error);572573if (sa_size > DXATTR_MAX_SA_SIZE)574return (-EFBIG);575576error = -nvlist_add_byte_array(nvl, name,577(uchar_t *)value, size);578}579580/*581* Update the SA for additions, modifications, and removals. On582* error drop the inconsistent cached version of the nvlist, it583* will be reconstructed from the ARC when next accessed.584*/585if (error == 0)586error = -zfs_sa_set_xattr(zp, name, value, size);587588if (error) {589nvlist_free(nvl);590zp->z_xattr_cached = NULL;591}592593ASSERT3S(error, <=, 0);594595return (error);596}597598static int599zpl_xattr_set(struct inode *ip, const char *name, const void *value,600size_t size, int flags)601{602znode_t *zp = ITOZ(ip);603zfsvfs_t *zfsvfs = ZTOZSB(zp);604cred_t *cr = CRED();605fstrans_cookie_t cookie;606int where;607int error;608609crhold(cr);610cookie = spl_fstrans_mark();611if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)612goto out1;613rw_enter(&zp->z_xattr_lock, RW_WRITER);614615/*616* Before setting the xattr check to see if it already exists.617* This is done to ensure the following optional flags are honored.618*619* XATTR_CREATE: fail if xattr already exists620* XATTR_REPLACE: fail if xattr does not exist621*622* We also want to know if it resides in sa or dir, so we can make623* sure we don't end up with duplicate in both places.624*/625error = __zpl_xattr_where(ip, name, &where, cr);626if (error < 0) {627if (error != -ENODATA)628goto out;629if (flags & XATTR_REPLACE)630goto out;631632/* The xattr to be removed already doesn't exist */633error = 0;634if (value == NULL)635goto out;636} else {637error = -EEXIST;638if (flags & XATTR_CREATE)639goto out;640}641642/* Preferentially store the xattr as a SA for better performance */643if (zfsvfs->z_use_sa && zp->z_is_sa &&644(zfsvfs->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {645error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);646if (error == 0) {647/*648* Successfully put into SA, we need to clear the one649* in dir.650*/651if (where & XATTR_IN_DIR)652zpl_xattr_set_dir(ip, name, NULL, 0, 0, cr);653goto out;654}655}656657error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);658/*659* Successfully put into dir, we need to clear the one in SA.660*/661if (error == 0 && (where & XATTR_IN_SA))662zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);663out:664rw_exit(&zp->z_xattr_lock);665zpl_exit(zfsvfs, FTAG);666out1:667spl_fstrans_unmark(cookie);668crfree(cr);669ASSERT3S(error, <=, 0);670671return (error);672}673674/*675* Extended user attributes676*677* "Extended user attributes may be assigned to files and directories for678* storing arbitrary additional information such as the mime type,679* character set or encoding of a file. The access permissions for user680* attributes are defined by the file permission bits: read permission681* is required to retrieve the attribute value, and writer permission is682* required to change it.683*684* The file permission bits of regular files and directories are685* interpreted differently from the file permission bits of special686* files and symbolic links. For regular files and directories the file687* permission bits define access to the file's contents, while for688* device special files they define access to the device described by689* the special file. The file permissions of symbolic links are not690* used in access checks. These differences would allow users to691* consume filesystem resources in a way not controllable by disk quotas692* for group or world writable special files and directories.693*694* For this reason, extended user attributes are allowed only for695* regular files and directories, and access to extended user attributes696* is restricted to the owner and to users with appropriate capabilities697* for directories with the sticky bit set (see the chmod(1) manual page698* for an explanation of the sticky bit)." - xattr(7)699*700* ZFS allows extended user attributes to be disabled administratively701* by setting the 'xattr=off' property on the dataset.702*/703static int704__zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,705const char *name, size_t name_len)706{707return (ITOZSB(ip)->z_flags & ZSB_XATTR);708}709ZPL_XATTR_LIST_WRAPPER(zpl_xattr_user_list);710711static int712__zpl_xattr_user_get(struct inode *ip, const char *name,713void *value, size_t size)714{715int error;716/* xattr_resolve_name will do this for us if this is defined */717if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))718return (-EINVAL);719if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))720return (-EOPNOTSUPP);721722/*723* Try to look up the name with the namespace prefix first for724* compatibility with xattrs from this platform. If that fails,725* try again without the namespace prefix for compatibility with726* other platforms.727*/728char *xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);729error = zpl_xattr_get(ip, xattr_name, value, size);730kmem_strfree(xattr_name);731if (error == -ENODATA)732error = zpl_xattr_get(ip, name, value, size);733734return (error);735}736ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);737738static int739__zpl_xattr_user_set(zidmap_t *user_ns,740struct inode *ip, const char *name,741const void *value, size_t size, int flags)742{743(void) user_ns;744int error = 0;745/* xattr_resolve_name will do this for us if this is defined */746if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))747return (-EINVAL);748if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))749return (-EOPNOTSUPP);750751/*752* Remove alternate compat version of the xattr so we only set the753* version specified by the zfs_xattr_compat tunable.754*755* The following flags must be handled correctly:756*757* XATTR_CREATE: fail if xattr already exists758* XATTR_REPLACE: fail if xattr does not exist759*/760char *prefixed_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);761const char *clear_name, *set_name;762if (zfs_xattr_compat) {763clear_name = prefixed_name;764set_name = name;765} else {766clear_name = name;767set_name = prefixed_name;768}769/*770* Clear the old value with the alternative name format, if it exists.771*/772error = zpl_xattr_set(ip, clear_name, NULL, 0, flags);773/*774* XATTR_CREATE was specified and we failed to clear the xattr775* because it already exists. Stop here.776*/777if (error == -EEXIST)778goto out;779/*780* If XATTR_REPLACE was specified and we succeeded to clear781* an xattr, we don't need to replace anything when setting782* the new value. If we failed with -ENODATA that's fine,783* there was nothing to be cleared and we can ignore the error.784*/785if (error == 0)786flags &= ~XATTR_REPLACE;787/*788* Set the new value with the configured name format.789*/790error = zpl_xattr_set(ip, set_name, value, size, flags);791out:792kmem_strfree(prefixed_name);793return (error);794}795ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);796797static xattr_handler_t zpl_xattr_user_handler =798{799.prefix = XATTR_USER_PREFIX,800.list = zpl_xattr_user_list,801.get = zpl_xattr_user_get,802.set = zpl_xattr_user_set,803};804805/*806* Trusted extended attributes807*808* "Trusted extended attributes are visible and accessible only to809* processes that have the CAP_SYS_ADMIN capability. Attributes in this810* class are used to implement mechanisms in user space (i.e., outside811* the kernel) which keep information in extended attributes to which812* ordinary processes should not have access." - xattr(7)813*/814static int815__zpl_xattr_trusted_list(struct inode *ip, char *list, size_t list_size,816const char *name, size_t name_len)817{818return (capable(CAP_SYS_ADMIN));819}820ZPL_XATTR_LIST_WRAPPER(zpl_xattr_trusted_list);821822static int823__zpl_xattr_trusted_get(struct inode *ip, const char *name,824void *value, size_t size)825{826char *xattr_name;827int error;828829if (!capable(CAP_SYS_ADMIN))830return (-EACCES);831/* xattr_resolve_name will do this for us if this is defined */832xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);833error = zpl_xattr_get(ip, xattr_name, value, size);834kmem_strfree(xattr_name);835836return (error);837}838ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);839840static int841__zpl_xattr_trusted_set(zidmap_t *user_ns,842struct inode *ip, const char *name,843const void *value, size_t size, int flags)844{845(void) user_ns;846char *xattr_name;847int error;848849if (!capable(CAP_SYS_ADMIN))850return (-EACCES);851/* xattr_resolve_name will do this for us if this is defined */852xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);853error = zpl_xattr_set(ip, xattr_name, value, size, flags);854kmem_strfree(xattr_name);855856return (error);857}858ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);859860static xattr_handler_t zpl_xattr_trusted_handler = {861.prefix = XATTR_TRUSTED_PREFIX,862.list = zpl_xattr_trusted_list,863.get = zpl_xattr_trusted_get,864.set = zpl_xattr_trusted_set,865};866867/*868* Extended security attributes869*870* "The security attribute namespace is used by kernel security modules,871* such as Security Enhanced Linux, and also to implement file872* capabilities (see capabilities(7)). Read and write access873* permissions to security attributes depend on the policy implemented874* for each security attribute by the security module. When no security875* module is loaded, all processes have read access to extended security876* attributes, and write access is limited to processes that have the877* CAP_SYS_ADMIN capability." - xattr(7)878*/879static int880__zpl_xattr_security_list(struct inode *ip, char *list, size_t list_size,881const char *name, size_t name_len)882{883return (1);884}885ZPL_XATTR_LIST_WRAPPER(zpl_xattr_security_list);886887static int888__zpl_xattr_security_get(struct inode *ip, const char *name,889void *value, size_t size)890{891char *xattr_name;892int error;893/* xattr_resolve_name will do this for us if this is defined */894xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);895error = zpl_xattr_get(ip, xattr_name, value, size);896kmem_strfree(xattr_name);897898return (error);899}900ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);901902static int903__zpl_xattr_security_set(zidmap_t *user_ns,904struct inode *ip, const char *name,905const void *value, size_t size, int flags)906{907(void) user_ns;908char *xattr_name;909int error;910/* xattr_resolve_name will do this for us if this is defined */911xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);912error = zpl_xattr_set(ip, xattr_name, value, size, flags);913kmem_strfree(xattr_name);914915return (error);916}917ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);918919static int920zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,921void *fs_info)922{923const struct xattr *xattr;924int error = 0;925926for (xattr = xattrs; xattr->name != NULL; xattr++) {927error = __zpl_xattr_security_set(NULL, ip,928xattr->name, xattr->value, xattr->value_len, 0);929930if (error < 0)931break;932}933934return (error);935}936937int938zpl_xattr_security_init(struct inode *ip, struct inode *dip,939const struct qstr *qstr)940{941return security_inode_init_security(ip, dip, qstr,942&zpl_xattr_security_init_impl, NULL);943}944945/*946* Security xattr namespace handlers.947*/948static xattr_handler_t zpl_xattr_security_handler = {949.prefix = XATTR_SECURITY_PREFIX,950.list = zpl_xattr_security_list,951.get = zpl_xattr_security_get,952.set = zpl_xattr_security_set,953};954955/*956* Extended system attributes957*958* "Extended system attributes are used by the kernel to store system959* objects such as Access Control Lists. Read and write access permissions960* to system attributes depend on the policy implemented for each system961* attribute implemented by filesystems in the kernel." - xattr(7)962*/963#ifdef CONFIG_FS_POSIX_ACL964static int965zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type)966{967char *name, *value = NULL;968int error = 0;969size_t size = 0;970971if (S_ISLNK(ip->i_mode))972return (-EOPNOTSUPP);973974switch (type) {975case ACL_TYPE_ACCESS:976name = XATTR_NAME_POSIX_ACL_ACCESS;977if (acl) {978umode_t mode = ip->i_mode;979error = posix_acl_equiv_mode(acl, &mode);980if (error < 0) {981return (error);982} else {983/*984* The mode bits will have been set by985* ->zfs_setattr()->zfs_acl_chmod_setattr()986* using the ZFS ACL conversion. If they987* differ from the Posix ACL conversion dirty988* the inode to write the Posix mode bits.989*/990if (ip->i_mode != mode) {991ip->i_mode = ITOZ(ip)->z_mode = mode;992zpl_inode_set_ctime_to_ts(ip,993current_time(ip));994zfs_mark_inode_dirty(ip);995}996997if (error == 0)998acl = NULL;999}1000}1001break;10021003case ACL_TYPE_DEFAULT:1004name = XATTR_NAME_POSIX_ACL_DEFAULT;1005if (!S_ISDIR(ip->i_mode))1006return (acl ? -EACCES : 0);1007break;10081009default:1010return (-EINVAL);1011}10121013if (acl) {1014size = posix_acl_xattr_size(acl->a_count);1015value = kmem_alloc(size, KM_SLEEP);10161017error = zpl_acl_to_xattr(acl, value, size);1018if (error < 0) {1019kmem_free(value, size);1020return (error);1021}1022}10231024error = zpl_xattr_set(ip, name, value, size, 0);1025if (value)1026kmem_free(value, size);10271028if (!error) {1029if (acl)1030set_cached_acl(ip, type, acl);1031else1032forget_cached_acl(ip, type);1033}10341035return (error);1036}10371038int1039#ifdef HAVE_SET_ACL_USERNS1040zpl_set_acl(struct user_namespace *userns, struct inode *ip,1041struct posix_acl *acl, int type)1042#elif defined(HAVE_SET_ACL_IDMAP_DENTRY)1043zpl_set_acl(struct mnt_idmap *userns, struct dentry *dentry,1044struct posix_acl *acl, int type)1045#elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)1046zpl_set_acl(struct user_namespace *userns, struct dentry *dentry,1047struct posix_acl *acl, int type)1048#else1049zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)1050#endif /* HAVE_SET_ACL_USERNS */1051{1052#ifdef HAVE_SET_ACL_USERNS_DENTRY_ARG21053return (zpl_set_acl_impl(d_inode(dentry), acl, type));1054#elif defined(HAVE_SET_ACL_IDMAP_DENTRY)1055return (zpl_set_acl_impl(d_inode(dentry), acl, type));1056#else1057return (zpl_set_acl_impl(ip, acl, type));1058#endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */1059}10601061static struct posix_acl *1062zpl_get_acl_impl(struct inode *ip, int type)1063{1064struct posix_acl *acl;1065void *value = NULL;1066char *name;10671068switch (type) {1069case ACL_TYPE_ACCESS:1070name = XATTR_NAME_POSIX_ACL_ACCESS;1071break;1072case ACL_TYPE_DEFAULT:1073name = XATTR_NAME_POSIX_ACL_DEFAULT;1074break;1075default:1076return (ERR_PTR(-EINVAL));1077}10781079int size = zpl_xattr_get(ip, name, NULL, 0);1080if (size > 0) {1081value = kmem_alloc(size, KM_SLEEP);1082size = zpl_xattr_get(ip, name, value, size);1083}10841085if (size > 0) {1086acl = zpl_acl_from_xattr(value, size);1087} else if (size == -ENODATA || size == -ENOSYS) {1088acl = NULL;1089} else {1090acl = ERR_PTR(-EIO);1091}10921093if (size > 0)1094kmem_free(value, size);10951096return (acl);1097}10981099#if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL)1100struct posix_acl *1101zpl_get_acl(struct inode *ip, int type, bool rcu)1102{1103if (rcu)1104return (ERR_PTR(-ECHILD));11051106return (zpl_get_acl_impl(ip, type));1107}1108#elif defined(HAVE_GET_ACL)1109struct posix_acl *1110zpl_get_acl(struct inode *ip, int type)1111{1112return (zpl_get_acl_impl(ip, type));1113}1114#else1115#error "Unsupported iops->get_acl() implementation"1116#endif /* HAVE_GET_ACL_RCU */11171118int1119zpl_init_acl(struct inode *ip, struct inode *dir)1120{1121struct posix_acl *acl = NULL;1122int error = 0;11231124if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)1125return (0);11261127if (!S_ISLNK(ip->i_mode)) {1128acl = zpl_get_acl_impl(dir, ACL_TYPE_DEFAULT);1129if (IS_ERR(acl))1130return (PTR_ERR(acl));1131if (!acl) {1132ITOZ(ip)->z_mode = (ip->i_mode &= ~current_umask());1133zpl_inode_set_ctime_to_ts(ip, current_time(ip));1134zfs_mark_inode_dirty(ip);1135return (0);1136}1137}11381139if (acl) {1140umode_t mode;11411142if (S_ISDIR(ip->i_mode)) {1143error = zpl_set_acl_impl(ip, acl, ACL_TYPE_DEFAULT);1144if (error)1145goto out;1146}11471148mode = ip->i_mode;1149error = __posix_acl_create(&acl, GFP_KERNEL, &mode);1150if (error >= 0) {1151ip->i_mode = ITOZ(ip)->z_mode = mode;1152zfs_mark_inode_dirty(ip);1153if (error > 0) {1154error = zpl_set_acl_impl(ip, acl,1155ACL_TYPE_ACCESS);1156}1157}1158}1159out:1160zpl_posix_acl_release(acl);11611162return (error);1163}11641165int1166zpl_chmod_acl(struct inode *ip)1167{1168struct posix_acl *acl;1169int error;11701171if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)1172return (0);11731174if (S_ISLNK(ip->i_mode))1175return (-EOPNOTSUPP);11761177acl = zpl_get_acl_impl(ip, ACL_TYPE_ACCESS);1178if (IS_ERR(acl) || !acl)1179return (PTR_ERR(acl));11801181error = __posix_acl_chmod(&acl, GFP_KERNEL, ip->i_mode);1182if (!error)1183error = zpl_set_acl_impl(ip, acl, ACL_TYPE_ACCESS);11841185zpl_posix_acl_release(acl);11861187return (error);1188}11891190static int1191__zpl_xattr_acl_list_access(struct inode *ip, char *list, size_t list_size,1192const char *name, size_t name_len)1193{1194char *xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;1195size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_ACCESS);11961197if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)1198return (0);11991200if (list && xattr_size <= list_size)1201memcpy(list, xattr_name, xattr_size);12021203return (xattr_size);1204}1205ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_access);12061207static int1208__zpl_xattr_acl_list_default(struct inode *ip, char *list, size_t list_size,1209const char *name, size_t name_len)1210{1211char *xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;1212size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_DEFAULT);12131214if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)1215return (0);12161217if (list && xattr_size <= list_size)1218memcpy(list, xattr_name, xattr_size);12191220return (xattr_size);1221}1222ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_default);12231224static int1225__zpl_xattr_acl_get_access(struct inode *ip, const char *name,1226void *buffer, size_t size)1227{1228struct posix_acl *acl;1229int type = ACL_TYPE_ACCESS;1230int error;1231/* xattr_resolve_name will do this for us if this is defined */1232if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)1233return (-EOPNOTSUPP);12341235acl = zpl_get_acl_impl(ip, type);1236if (IS_ERR(acl))1237return (PTR_ERR(acl));1238if (acl == NULL)1239return (-ENODATA);12401241error = zpl_acl_to_xattr(acl, buffer, size);1242zpl_posix_acl_release(acl);12431244return (error);1245}1246ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_access);12471248static int1249__zpl_xattr_acl_get_default(struct inode *ip, const char *name,1250void *buffer, size_t size)1251{1252struct posix_acl *acl;1253int type = ACL_TYPE_DEFAULT;1254int error;1255/* xattr_resolve_name will do this for us if this is defined */1256if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)1257return (-EOPNOTSUPP);12581259acl = zpl_get_acl_impl(ip, type);1260if (IS_ERR(acl))1261return (PTR_ERR(acl));1262if (acl == NULL)1263return (-ENODATA);12641265error = zpl_acl_to_xattr(acl, buffer, size);1266zpl_posix_acl_release(acl);12671268return (error);1269}1270ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);12711272static int1273__zpl_xattr_acl_set_access(zidmap_t *mnt_ns,1274struct inode *ip, const char *name,1275const void *value, size_t size, int flags)1276{1277struct posix_acl *acl;1278int type = ACL_TYPE_ACCESS;1279int error = 0;1280/* xattr_resolve_name will do this for us if this is defined */1281if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)1282return (-EOPNOTSUPP);12831284#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)1285if (!zpl_inode_owner_or_capable(mnt_ns, ip))1286return (-EPERM);1287#else1288(void) mnt_ns;1289if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))1290return (-EPERM);1291#endif12921293if (value) {1294acl = zpl_acl_from_xattr(value, size);1295if (IS_ERR(acl))1296return (PTR_ERR(acl));1297else if (acl) {1298error = posix_acl_valid(ip->i_sb->s_user_ns, acl);1299if (error) {1300zpl_posix_acl_release(acl);1301return (error);1302}1303}1304} else {1305acl = NULL;1306}1307error = zpl_set_acl_impl(ip, acl, type);1308zpl_posix_acl_release(acl);13091310return (error);1311}1312ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);13131314static int1315__zpl_xattr_acl_set_default(zidmap_t *mnt_ns,1316struct inode *ip, const char *name,1317const void *value, size_t size, int flags)1318{1319struct posix_acl *acl;1320int type = ACL_TYPE_DEFAULT;1321int error = 0;1322/* xattr_resolve_name will do this for us if this is defined */1323if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)1324return (-EOPNOTSUPP);13251326#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)1327if (!zpl_inode_owner_or_capable(mnt_ns, ip))1328return (-EPERM);1329#else1330(void) mnt_ns;1331if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))1332return (-EPERM);1333#endif13341335if (value) {1336acl = zpl_acl_from_xattr(value, size);1337if (IS_ERR(acl))1338return (PTR_ERR(acl));1339else if (acl) {1340error = posix_acl_valid(ip->i_sb->s_user_ns, acl);1341if (error) {1342zpl_posix_acl_release(acl);1343return (error);1344}1345}1346} else {1347acl = NULL;1348}13491350error = zpl_set_acl_impl(ip, acl, type);1351zpl_posix_acl_release(acl);13521353return (error);1354}1355ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default);13561357/*1358* ACL access xattr namespace handlers.1359*1360* Use .name instead of .prefix when available. xattr_resolve_name will match1361* whole name and reject anything that has .name only as prefix.1362*/1363static xattr_handler_t zpl_xattr_acl_access_handler = {1364.name = XATTR_NAME_POSIX_ACL_ACCESS,1365.list = zpl_xattr_acl_list_access,1366.get = zpl_xattr_acl_get_access,1367.set = zpl_xattr_acl_set_access,1368.flags = ACL_TYPE_ACCESS,1369};13701371/*1372* ACL default xattr namespace handlers.1373*1374* Use .name instead of .prefix. xattr_resolve_name will match whole name and1375* reject anything that has .name only as prefix.1376*/1377static xattr_handler_t zpl_xattr_acl_default_handler = {1378.name = XATTR_NAME_POSIX_ACL_DEFAULT,1379.list = zpl_xattr_acl_list_default,1380.get = zpl_xattr_acl_get_default,1381.set = zpl_xattr_acl_set_default,1382.flags = ACL_TYPE_DEFAULT,1383};13841385#endif /* CONFIG_FS_POSIX_ACL */13861387xattr_handler_t *zpl_xattr_handlers[] = {1388&zpl_xattr_security_handler,1389&zpl_xattr_trusted_handler,1390&zpl_xattr_user_handler,1391#ifdef CONFIG_FS_POSIX_ACL1392&zpl_xattr_acl_access_handler,1393&zpl_xattr_acl_default_handler,1394#endif /* CONFIG_FS_POSIX_ACL */1395NULL1396};13971398static const struct xattr_handler *1399zpl_xattr_handler(const char *name)1400{1401if (strncmp(name, XATTR_USER_PREFIX,1402XATTR_USER_PREFIX_LEN) == 0)1403return (&zpl_xattr_user_handler);14041405if (strncmp(name, XATTR_TRUSTED_PREFIX,1406XATTR_TRUSTED_PREFIX_LEN) == 0)1407return (&zpl_xattr_trusted_handler);14081409if (strncmp(name, XATTR_SECURITY_PREFIX,1410XATTR_SECURITY_PREFIX_LEN) == 0)1411return (&zpl_xattr_security_handler);14121413#ifdef CONFIG_FS_POSIX_ACL1414if (strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,1415sizeof (XATTR_NAME_POSIX_ACL_ACCESS)) == 0)1416return (&zpl_xattr_acl_access_handler);14171418if (strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,1419sizeof (XATTR_NAME_POSIX_ACL_DEFAULT)) == 0)1420return (&zpl_xattr_acl_default_handler);1421#endif /* CONFIG_FS_POSIX_ACL */14221423return (NULL);1424}14251426static enum xattr_permission1427zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)1428{1429const struct xattr_handler *handler;1430struct dentry *d __maybe_unused = xf->dentry;1431enum xattr_permission perm = XAPERM_ALLOW;14321433handler = zpl_xattr_handler(name);1434if (handler == NULL) {1435/* Do not expose FreeBSD system namespace xattrs. */1436if (ZFS_XA_NS_PREFIX_MATCH(FREEBSD, name))1437return (XAPERM_DENY);1438/*1439* Anything that doesn't match a known namespace gets put in the1440* user namespace for compatibility with other platforms.1441*/1442perm = XAPERM_COMPAT;1443handler = &zpl_xattr_user_handler;1444}14451446if (handler->list) {1447if (!handler->list(d))1448return (XAPERM_DENY);1449}14501451return (perm);1452}14531454#ifdef CONFIG_FS_POSIX_ACL14551456struct acl_rel_struct {1457struct acl_rel_struct *next;1458struct posix_acl *acl;1459clock_t time;1460};14611462#define ACL_REL_GRACE (60*HZ)1463#define ACL_REL_WINDOW (1*HZ)1464#define ACL_REL_SCHED (ACL_REL_GRACE+ACL_REL_WINDOW)14651466/*1467* Lockless multi-producer single-consumer fifo list.1468* Nodes are added to tail and removed from head. Tail pointer is our1469* synchronization point. It always points to the next pointer of the last1470* node, or head if list is empty.1471*/1472static struct acl_rel_struct *acl_rel_head = NULL;1473static struct acl_rel_struct **acl_rel_tail = &acl_rel_head;14741475static void1476zpl_posix_acl_free(void *arg)1477{1478struct acl_rel_struct *freelist = NULL;1479struct acl_rel_struct *a;1480clock_t new_time;1481boolean_t refire = B_FALSE;14821483ASSERT3P(acl_rel_head, !=, NULL);1484while (acl_rel_head) {1485a = acl_rel_head;1486if (ddi_get_lbolt() - a->time >= ACL_REL_GRACE) {1487/*1488* If a is the last node we need to reset tail, but we1489* need to use cmpxchg to make sure it is still the1490* last node.1491*/1492if (acl_rel_tail == &a->next) {1493acl_rel_head = NULL;1494if (cmpxchg(&acl_rel_tail, &a->next,1495&acl_rel_head) == &a->next) {1496ASSERT0P(a->next);1497a->next = freelist;1498freelist = a;1499break;1500}1501}1502/*1503* a is not last node, make sure next pointer is set1504* by the adder and advance the head.1505*/1506while (READ_ONCE(a->next) == NULL)1507cpu_relax();1508acl_rel_head = a->next;1509a->next = freelist;1510freelist = a;1511} else {1512/*1513* a is still in grace period. We are responsible to1514* reschedule the free task, since adder will only do1515* so if list is empty.1516*/1517new_time = a->time + ACL_REL_SCHED;1518refire = B_TRUE;1519break;1520}1521}15221523if (refire)1524taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,1525NULL, TQ_SLEEP, new_time);15261527while (freelist) {1528a = freelist;1529freelist = a->next;1530kfree(a->acl);1531kmem_free(a, sizeof (struct acl_rel_struct));1532}1533}15341535void1536zpl_posix_acl_release_impl(struct posix_acl *acl)1537{1538struct acl_rel_struct *a, **prev;15391540a = kmem_alloc(sizeof (struct acl_rel_struct), KM_SLEEP);1541a->next = NULL;1542a->acl = acl;1543a->time = ddi_get_lbolt();1544/* atomically points tail to us and get the previous tail */1545prev = xchg(&acl_rel_tail, &a->next);1546ASSERT0P(*prev);1547*prev = a;1548/* if it was empty before, schedule the free task */1549if (prev == &acl_rel_head)1550taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,1551NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);1552}1553#endif15541555ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW,1556"Use legacy ZFS xattr naming for writing new user namespace xattrs");155715581559