Path: blob/main/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
108106 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/21/*22* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.23* Copyright (c) 2013 by Delphix. All rights reserved.24* Copyright 2014 Nexenta Systems, Inc. All rights reserved.25*/262728#include <sys/types.h>29#include <sys/param.h>30#include <sys/time.h>31#include <sys/sysmacros.h>32#include <sys/vfs.h>33#include <sys/vnode.h>34#include <sys/sid.h>35#include <sys/file.h>36#include <sys/stat.h>37#include <sys/kmem.h>38#include <sys/cmn_err.h>39#include <sys/errno.h>40#include <sys/fs/zfs.h>41#include <sys/policy.h>42#include <sys/zfs_znode.h>43#include <sys/zfs_fuid.h>44#include <sys/zfs_acl.h>45#include <sys/zfs_dir.h>46#include <sys/zfs_quota.h>47#include <sys/zfs_vfsops.h>48#include <sys/dmu.h>49#include <sys/dnode.h>50#include <sys/zap.h>51#include <sys/sa.h>52#include <sys/trace_acl.h>53#include <sys/zpl.h>5455#define ALLOW ACE_ACCESS_ALLOWED_ACE_TYPE56#define DENY ACE_ACCESS_DENIED_ACE_TYPE57#define MAX_ACE_TYPE ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE58#define MIN_ACE_TYPE ALLOW5960#define OWNING_GROUP (ACE_GROUP|ACE_IDENTIFIER_GROUP)61#define EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \62ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)63#define EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \64ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)65#define OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \66ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)6768#define ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \69ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \70ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \71ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)7273#define WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)74#define WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \75ACE_DELETE|ACE_DELETE_CHILD)76#define WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)7778#define OGE_CLEAR (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \79ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)8081#define OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \82ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)8384#define ALL_INHERIT (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \85ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)8687#define RESTRICTED_CLEAR (ACE_WRITE_ACL|ACE_WRITE_OWNER)8889#define V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\90ZFS_ACL_PROTECTED)9192#define ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\93ZFS_ACL_OBJ_ACE)9495#define ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)9697#define IDMAP_WK_CREATOR_OWNER_UID 2147483648U9899static uint16_t100zfs_ace_v0_get_type(void *acep)101{102return (((zfs_oldace_t *)acep)->z_type);103}104105static uint16_t106zfs_ace_v0_get_flags(void *acep)107{108return (((zfs_oldace_t *)acep)->z_flags);109}110111static uint32_t112zfs_ace_v0_get_mask(void *acep)113{114return (((zfs_oldace_t *)acep)->z_access_mask);115}116117static uint64_t118zfs_ace_v0_get_who(void *acep)119{120return (((zfs_oldace_t *)acep)->z_fuid);121}122123static void124zfs_ace_v0_set_type(void *acep, uint16_t type)125{126((zfs_oldace_t *)acep)->z_type = type;127}128129static void130zfs_ace_v0_set_flags(void *acep, uint16_t flags)131{132((zfs_oldace_t *)acep)->z_flags = flags;133}134135static void136zfs_ace_v0_set_mask(void *acep, uint32_t mask)137{138((zfs_oldace_t *)acep)->z_access_mask = mask;139}140141static void142zfs_ace_v0_set_who(void *acep, uint64_t who)143{144((zfs_oldace_t *)acep)->z_fuid = who;145}146147static size_t148zfs_ace_v0_size(void *acep)149{150(void) acep;151return (sizeof (zfs_oldace_t));152}153154static size_t155zfs_ace_v0_abstract_size(void)156{157return (sizeof (zfs_oldace_t));158}159160static int161zfs_ace_v0_mask_off(void)162{163return (offsetof(zfs_oldace_t, z_access_mask));164}165166static int167zfs_ace_v0_data(void *acep, void **datap)168{169(void) acep;170*datap = NULL;171return (0);172}173174static const acl_ops_t zfs_acl_v0_ops = {175.ace_mask_get = zfs_ace_v0_get_mask,176.ace_mask_set = zfs_ace_v0_set_mask,177.ace_flags_get = zfs_ace_v0_get_flags,178.ace_flags_set = zfs_ace_v0_set_flags,179.ace_type_get = zfs_ace_v0_get_type,180.ace_type_set = zfs_ace_v0_set_type,181.ace_who_get = zfs_ace_v0_get_who,182.ace_who_set = zfs_ace_v0_set_who,183.ace_size = zfs_ace_v0_size,184.ace_abstract_size = zfs_ace_v0_abstract_size,185.ace_mask_off = zfs_ace_v0_mask_off,186.ace_data = zfs_ace_v0_data187};188189static uint16_t190zfs_ace_fuid_get_type(void *acep)191{192return (((zfs_ace_hdr_t *)acep)->z_type);193}194195static uint16_t196zfs_ace_fuid_get_flags(void *acep)197{198return (((zfs_ace_hdr_t *)acep)->z_flags);199}200201static uint32_t202zfs_ace_fuid_get_mask(void *acep)203{204return (((zfs_ace_hdr_t *)acep)->z_access_mask);205}206207static uint64_t208zfs_ace_fuid_get_who(void *args)209{210uint16_t entry_type;211zfs_ace_t *acep = args;212213entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;214215if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||216entry_type == ACE_EVERYONE)217return (-1);218return (((zfs_ace_t *)acep)->z_fuid);219}220221static void222zfs_ace_fuid_set_type(void *acep, uint16_t type)223{224((zfs_ace_hdr_t *)acep)->z_type = type;225}226227static void228zfs_ace_fuid_set_flags(void *acep, uint16_t flags)229{230((zfs_ace_hdr_t *)acep)->z_flags = flags;231}232233static void234zfs_ace_fuid_set_mask(void *acep, uint32_t mask)235{236((zfs_ace_hdr_t *)acep)->z_access_mask = mask;237}238239static void240zfs_ace_fuid_set_who(void *arg, uint64_t who)241{242zfs_ace_t *acep = arg;243244uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;245246if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||247entry_type == ACE_EVERYONE)248return;249acep->z_fuid = who;250}251252static size_t253zfs_ace_fuid_size(void *acep)254{255zfs_ace_hdr_t *zacep = acep;256uint16_t entry_type;257258switch (zacep->z_type) {259case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:260case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:261case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:262case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:263return (sizeof (zfs_object_ace_t));264case ALLOW:265case DENY:266entry_type =267(((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);268if (entry_type == ACE_OWNER ||269entry_type == OWNING_GROUP ||270entry_type == ACE_EVERYONE)271return (sizeof (zfs_ace_hdr_t));272zfs_fallthrough;273default:274return (sizeof (zfs_ace_t));275}276}277278static size_t279zfs_ace_fuid_abstract_size(void)280{281return (sizeof (zfs_ace_hdr_t));282}283284static int285zfs_ace_fuid_mask_off(void)286{287return (offsetof(zfs_ace_hdr_t, z_access_mask));288}289290static int291zfs_ace_fuid_data(void *acep, void **datap)292{293zfs_ace_t *zacep = acep;294zfs_object_ace_t *zobjp;295296switch (zacep->z_hdr.z_type) {297case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:298case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:299case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:300case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:301zobjp = acep;302*datap = (caddr_t)zobjp + sizeof (zfs_ace_t);303return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));304default:305*datap = NULL;306return (0);307}308}309310static const acl_ops_t zfs_acl_fuid_ops = {311.ace_mask_get = zfs_ace_fuid_get_mask,312.ace_mask_set = zfs_ace_fuid_set_mask,313.ace_flags_get = zfs_ace_fuid_get_flags,314.ace_flags_set = zfs_ace_fuid_set_flags,315.ace_type_get = zfs_ace_fuid_get_type,316.ace_type_set = zfs_ace_fuid_set_type,317.ace_who_get = zfs_ace_fuid_get_who,318.ace_who_set = zfs_ace_fuid_set_who,319.ace_size = zfs_ace_fuid_size,320.ace_abstract_size = zfs_ace_fuid_abstract_size,321.ace_mask_off = zfs_ace_fuid_mask_off,322.ace_data = zfs_ace_fuid_data323};324325/*326* The following three functions are provided for compatibility with327* older ZPL version in order to determine if the file use to have328* an external ACL and what version of ACL previously existed on the329* file. Would really be nice to not need this, sigh.330*/331uint64_t332zfs_external_acl(znode_t *zp)333{334zfs_acl_phys_t acl_phys;335int error;336337if (zp->z_is_sa)338return (0);339340/*341* Need to deal with a potential342* race where zfs_sa_upgrade could cause343* z_isa_sa to change.344*345* If the lookup fails then the state of z_is_sa should have346* changed.347*/348349if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(ZTOZSB(zp)),350&acl_phys, sizeof (acl_phys))) == 0)351return (acl_phys.z_acl_extern_obj);352else {353/*354* after upgrade the SA_ZPL_ZNODE_ACL should have been355* removed356*/357VERIFY(zp->z_is_sa && error == ENOENT);358return (0);359}360}361362/*363* Determine size of ACL in bytes364*365* This is more complicated than it should be since we have to deal366* with old external ACLs.367*/368static int369zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,370zfs_acl_phys_t *aclphys)371{372zfsvfs_t *zfsvfs = ZTOZSB(zp);373uint64_t acl_count;374int size;375int error;376377ASSERT(MUTEX_HELD(&zp->z_acl_lock));378if (zp->z_is_sa) {379if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),380&size)) != 0)381return (error);382*aclsize = size;383if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),384&acl_count, sizeof (acl_count))) != 0)385return (error);386*aclcount = acl_count;387} else {388if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),389aclphys, sizeof (*aclphys))) != 0)390return (error);391392if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {393*aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);394*aclcount = aclphys->z_acl_size;395} else {396*aclsize = aclphys->z_acl_size;397*aclcount = aclphys->z_acl_count;398}399}400return (0);401}402403int404zfs_znode_acl_version(znode_t *zp)405{406zfs_acl_phys_t acl_phys;407408if (zp->z_is_sa)409return (ZFS_ACL_VERSION_FUID);410else {411int error;412413/*414* Need to deal with a potential415* race where zfs_sa_upgrade could cause416* z_isa_sa to change.417*418* If the lookup fails then the state of z_is_sa should have419* changed.420*/421if ((error = sa_lookup(zp->z_sa_hdl,422SA_ZPL_ZNODE_ACL(ZTOZSB(zp)),423&acl_phys, sizeof (acl_phys))) == 0)424return (acl_phys.z_acl_version);425else {426/*427* After upgrade SA_ZPL_ZNODE_ACL should have428* been removed.429*/430VERIFY(zp->z_is_sa && error == ENOENT);431return (ZFS_ACL_VERSION_FUID);432}433}434}435436static int437zfs_acl_version(int version)438{439if (version < ZPL_VERSION_FUID)440return (ZFS_ACL_VERSION_INITIAL);441else442return (ZFS_ACL_VERSION_FUID);443}444445static int446zfs_acl_version_zp(znode_t *zp)447{448return (zfs_acl_version(ZTOZSB(zp)->z_version));449}450451zfs_acl_t *452zfs_acl_alloc(int vers)453{454zfs_acl_t *aclp;455456aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);457list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),458offsetof(zfs_acl_node_t, z_next));459aclp->z_version = vers;460if (vers == ZFS_ACL_VERSION_FUID)461aclp->z_ops = &zfs_acl_fuid_ops;462else463aclp->z_ops = &zfs_acl_v0_ops;464return (aclp);465}466467zfs_acl_node_t *468zfs_acl_node_alloc(size_t bytes)469{470zfs_acl_node_t *aclnode;471472aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);473if (bytes) {474aclnode->z_acldata = kmem_zalloc(bytes, KM_SLEEP);475aclnode->z_allocdata = aclnode->z_acldata;476aclnode->z_allocsize = bytes;477aclnode->z_size = bytes;478}479480return (aclnode);481}482483static void484zfs_acl_node_free(zfs_acl_node_t *aclnode)485{486if (aclnode->z_allocsize)487kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);488kmem_free(aclnode, sizeof (zfs_acl_node_t));489}490491static void492zfs_acl_release_nodes(zfs_acl_t *aclp)493{494zfs_acl_node_t *aclnode;495496while ((aclnode = list_remove_head(&aclp->z_acl)))497zfs_acl_node_free(aclnode);498aclp->z_acl_count = 0;499aclp->z_acl_bytes = 0;500}501502void503zfs_acl_free(zfs_acl_t *aclp)504{505zfs_acl_release_nodes(aclp);506list_destroy(&aclp->z_acl);507kmem_free(aclp, sizeof (zfs_acl_t));508}509510static boolean_t511zfs_acl_valid_ace_type(uint_t type, uint_t flags)512{513uint16_t entry_type;514515switch (type) {516case ALLOW:517case DENY:518case ACE_SYSTEM_AUDIT_ACE_TYPE:519case ACE_SYSTEM_ALARM_ACE_TYPE:520entry_type = flags & ACE_TYPE_FLAGS;521return (entry_type == ACE_OWNER ||522entry_type == OWNING_GROUP ||523entry_type == ACE_EVERYONE || entry_type == 0 ||524entry_type == ACE_IDENTIFIER_GROUP);525default:526if (type <= MAX_ACE_TYPE)527return (B_TRUE);528}529return (B_FALSE);530}531532static boolean_t533zfs_ace_valid(umode_t obj_mode, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)534{535/*536* first check type of entry537*/538539if (!zfs_acl_valid_ace_type(type, iflags))540return (B_FALSE);541542switch (type) {543case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:544case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:545case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:546case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:547if (aclp->z_version < ZFS_ACL_VERSION_FUID)548return (B_FALSE);549aclp->z_hints |= ZFS_ACL_OBJ_ACE;550}551552/*553* next check inheritance level flags554*/555556if (S_ISDIR(obj_mode) &&557(iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))558aclp->z_hints |= ZFS_INHERIT_ACE;559560if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {561if ((iflags & (ACE_FILE_INHERIT_ACE|562ACE_DIRECTORY_INHERIT_ACE)) == 0) {563return (B_FALSE);564}565}566567return (B_TRUE);568}569570static void *571zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,572uint32_t *access_mask, uint16_t *iflags, uint16_t *type)573{574zfs_acl_node_t *aclnode;575576ASSERT(aclp);577578if (start == NULL) {579aclnode = list_head(&aclp->z_acl);580if (aclnode == NULL)581return (NULL);582583aclp->z_next_ace = aclnode->z_acldata;584aclp->z_curr_node = aclnode;585aclnode->z_ace_idx = 0;586}587588aclnode = aclp->z_curr_node;589590if (aclnode == NULL)591return (NULL);592593if (aclnode->z_ace_idx >= aclnode->z_ace_count) {594aclnode = list_next(&aclp->z_acl, aclnode);595if (aclnode == NULL)596return (NULL);597else {598aclp->z_curr_node = aclnode;599aclnode->z_ace_idx = 0;600aclp->z_next_ace = aclnode->z_acldata;601}602}603604if (aclnode->z_ace_idx < aclnode->z_ace_count) {605void *acep = aclp->z_next_ace;606size_t ace_size;607608/*609* Make sure we don't overstep our bounds610*/611ace_size = aclp->z_ops->ace_size(acep);612613if (((caddr_t)acep + ace_size) >614((caddr_t)aclnode->z_acldata + aclnode->z_size)) {615return (NULL);616}617618*iflags = aclp->z_ops->ace_flags_get(acep);619*type = aclp->z_ops->ace_type_get(acep);620*access_mask = aclp->z_ops->ace_mask_get(acep);621*who = aclp->z_ops->ace_who_get(acep);622aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;623aclnode->z_ace_idx++;624625return ((void *)acep);626}627return (NULL);628}629630static uintptr_t631zfs_ace_walk(void *datap, uintptr_t cookie, int aclcnt,632uint16_t *flags, uint16_t *type, uint32_t *mask)633{634(void) aclcnt;635zfs_acl_t *aclp = datap;636zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)cookie;637uint64_t who;638639acep = zfs_acl_next_ace(aclp, acep, &who, mask,640flags, type);641return ((uintptr_t)acep);642}643644/*645* Copy ACE to internal ZFS format.646* While processing the ACL each ACE will be validated for correctness.647* ACE FUIDs will be created later.648*/649static int650zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, umode_t obj_mode, zfs_acl_t *aclp,651void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,652zfs_fuid_info_t **fuidp, cred_t *cr)653{654int i;655uint16_t entry_type;656zfs_ace_t *aceptr = z_acl;657ace_t *acep = datap;658zfs_object_ace_t *zobjacep;659ace_object_t *aceobjp;660661for (i = 0; i != aclcnt; i++) {662aceptr->z_hdr.z_access_mask = acep->a_access_mask;663aceptr->z_hdr.z_flags = acep->a_flags;664aceptr->z_hdr.z_type = acep->a_type;665entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;666if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&667entry_type != ACE_EVERYONE) {668aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,669cr, (entry_type == 0) ?670ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);671}672673/*674* Make sure ACE is valid675*/676if (zfs_ace_valid(obj_mode, aclp, aceptr->z_hdr.z_type,677aceptr->z_hdr.z_flags) != B_TRUE)678return (SET_ERROR(EINVAL));679680switch (acep->a_type) {681case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:682case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:683case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:684case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:685zobjacep = (zfs_object_ace_t *)aceptr;686aceobjp = (ace_object_t *)acep;687688memcpy(zobjacep->z_object_type, aceobjp->a_obj_type,689sizeof (aceobjp->a_obj_type));690memcpy(zobjacep->z_inherit_type,691aceobjp->a_inherit_obj_type,692sizeof (aceobjp->a_inherit_obj_type));693acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));694break;695default:696acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));697}698699aceptr = (zfs_ace_t *)((caddr_t)aceptr +700aclp->z_ops->ace_size(aceptr));701}702703*size = (caddr_t)aceptr - (caddr_t)z_acl;704705return (0);706}707708/*709* Copy ZFS ACEs to fixed size ace_t layout710*/711static void712zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,713void *datap, int filter)714{715uint64_t who;716uint32_t access_mask;717uint16_t iflags, type;718zfs_ace_hdr_t *zacep = NULL;719ace_t *acep = datap;720ace_object_t *objacep;721zfs_object_ace_t *zobjacep;722size_t ace_size;723uint16_t entry_type;724725while ((zacep = zfs_acl_next_ace(aclp, zacep,726&who, &access_mask, &iflags, &type))) {727728switch (type) {729case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:730case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:731case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:732case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:733if (filter) {734continue;735}736zobjacep = (zfs_object_ace_t *)zacep;737objacep = (ace_object_t *)acep;738memcpy(objacep->a_obj_type,739zobjacep->z_object_type,740sizeof (zobjacep->z_object_type));741memcpy(objacep->a_inherit_obj_type,742zobjacep->z_inherit_type,743sizeof (zobjacep->z_inherit_type));744ace_size = sizeof (ace_object_t);745break;746default:747ace_size = sizeof (ace_t);748break;749}750751entry_type = (iflags & ACE_TYPE_FLAGS);752if ((entry_type != ACE_OWNER &&753entry_type != OWNING_GROUP &&754entry_type != ACE_EVERYONE)) {755acep->a_who = zfs_fuid_map_id(zfsvfs, who,756cr, (entry_type & ACE_IDENTIFIER_GROUP) ?757ZFS_ACE_GROUP : ZFS_ACE_USER);758} else {759acep->a_who = (uid_t)(int64_t)who;760}761acep->a_access_mask = access_mask;762acep->a_flags = iflags;763acep->a_type = type;764acep = (ace_t *)((caddr_t)acep + ace_size);765}766}767768static int769zfs_copy_ace_2_oldace(umode_t obj_mode, zfs_acl_t *aclp, ace_t *acep,770zfs_oldace_t *z_acl, int aclcnt, size_t *size)771{772int i;773zfs_oldace_t *aceptr = z_acl;774775for (i = 0; i != aclcnt; i++, aceptr++) {776aceptr->z_access_mask = acep[i].a_access_mask;777aceptr->z_type = acep[i].a_type;778aceptr->z_flags = acep[i].a_flags;779aceptr->z_fuid = acep[i].a_who;780/*781* Make sure ACE is valid782*/783if (zfs_ace_valid(obj_mode, aclp, aceptr->z_type,784aceptr->z_flags) != B_TRUE)785return (SET_ERROR(EINVAL));786}787*size = (caddr_t)aceptr - (caddr_t)z_acl;788return (0);789}790791/*792* convert old ACL format to new793*/794void795zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)796{797zfs_oldace_t *oldaclp;798int i;799uint16_t type, iflags;800uint32_t access_mask;801uint64_t who;802void *cookie = NULL;803zfs_acl_node_t *newaclnode;804805ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL);806/*807* First create the ACE in a contiguous piece of memory808* for zfs_copy_ace_2_fuid().809*810* We only convert an ACL once, so this won't happen811* every time.812*/813oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,814KM_SLEEP);815i = 0;816while ((cookie = zfs_acl_next_ace(aclp, cookie, &who,817&access_mask, &iflags, &type))) {818oldaclp[i].z_flags = iflags;819oldaclp[i].z_type = type;820oldaclp[i].z_fuid = who;821oldaclp[i++].z_access_mask = access_mask;822}823824newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *825sizeof (zfs_object_ace_t));826aclp->z_ops = &zfs_acl_fuid_ops;827VERIFY(zfs_copy_ace_2_fuid(ZTOZSB(zp), ZTOI(zp)->i_mode,828aclp, oldaclp, newaclnode->z_acldata, aclp->z_acl_count,829&newaclnode->z_size, NULL, cr) == 0);830newaclnode->z_ace_count = aclp->z_acl_count;831aclp->z_version = ZFS_ACL_VERSION;832kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));833834/*835* Release all previous ACL nodes836*/837838zfs_acl_release_nodes(aclp);839840list_insert_head(&aclp->z_acl, newaclnode);841842aclp->z_acl_bytes = newaclnode->z_size;843aclp->z_acl_count = newaclnode->z_ace_count;844845}846847/*848* Convert unix access mask to v4 access mask849*/850static uint32_t851zfs_unix_to_v4(uint32_t access_mask)852{853uint32_t new_mask = 0;854855if (access_mask & S_IXOTH)856new_mask |= ACE_EXECUTE;857if (access_mask & S_IWOTH)858new_mask |= ACE_WRITE_DATA;859if (access_mask & S_IROTH)860new_mask |= ACE_READ_DATA;861return (new_mask);862}863864865static int866zfs_v4_to_unix(uint32_t access_mask, int *unmapped)867{868int new_mask = 0;869870*unmapped = access_mask &871(ACE_WRITE_OWNER | ACE_WRITE_ACL | ACE_DELETE);872873if (access_mask & WRITE_MASK)874new_mask |= S_IWOTH;875if (access_mask & ACE_READ_DATA)876new_mask |= S_IROTH;877if (access_mask & ACE_EXECUTE)878new_mask |= S_IXOTH;879880return (new_mask);881}882883884static void885zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,886uint16_t access_type, uint64_t fuid, uint16_t entry_type)887{888uint16_t type = entry_type & ACE_TYPE_FLAGS;889890aclp->z_ops->ace_mask_set(acep, access_mask);891aclp->z_ops->ace_type_set(acep, access_type);892aclp->z_ops->ace_flags_set(acep, entry_type);893if ((type != ACE_OWNER && type != OWNING_GROUP &&894type != ACE_EVERYONE))895aclp->z_ops->ace_who_set(acep, fuid);896}897898/*899* Determine mode of file based on ACL.900*/901uint64_t902zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,903uint64_t *pflags, uint64_t fuid, uint64_t fgid)904{905int entry_type;906mode_t mode;907mode_t seen = 0;908zfs_ace_hdr_t *acep = NULL;909uint64_t who;910uint16_t iflags, type;911uint32_t access_mask;912boolean_t an_exec_denied = B_FALSE;913914mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));915916while ((acep = zfs_acl_next_ace(aclp, acep, &who,917&access_mask, &iflags, &type))) {918919if (!zfs_acl_valid_ace_type(type, iflags))920continue;921922entry_type = (iflags & ACE_TYPE_FLAGS);923924/*925* Skip over any inherit_only ACEs926*/927if (iflags & ACE_INHERIT_ONLY_ACE)928continue;929930if (entry_type == ACE_OWNER || (entry_type == 0 &&931who == fuid)) {932if ((access_mask & ACE_READ_DATA) &&933(!(seen & S_IRUSR))) {934seen |= S_IRUSR;935if (type == ALLOW) {936mode |= S_IRUSR;937}938}939if ((access_mask & ACE_WRITE_DATA) &&940(!(seen & S_IWUSR))) {941seen |= S_IWUSR;942if (type == ALLOW) {943mode |= S_IWUSR;944}945}946if ((access_mask & ACE_EXECUTE) &&947(!(seen & S_IXUSR))) {948seen |= S_IXUSR;949if (type == ALLOW) {950mode |= S_IXUSR;951}952}953} else if (entry_type == OWNING_GROUP ||954(entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) {955if ((access_mask & ACE_READ_DATA) &&956(!(seen & S_IRGRP))) {957seen |= S_IRGRP;958if (type == ALLOW) {959mode |= S_IRGRP;960}961}962if ((access_mask & ACE_WRITE_DATA) &&963(!(seen & S_IWGRP))) {964seen |= S_IWGRP;965if (type == ALLOW) {966mode |= S_IWGRP;967}968}969if ((access_mask & ACE_EXECUTE) &&970(!(seen & S_IXGRP))) {971seen |= S_IXGRP;972if (type == ALLOW) {973mode |= S_IXGRP;974}975}976} else if (entry_type == ACE_EVERYONE) {977if ((access_mask & ACE_READ_DATA)) {978if (!(seen & S_IRUSR)) {979seen |= S_IRUSR;980if (type == ALLOW) {981mode |= S_IRUSR;982}983}984if (!(seen & S_IRGRP)) {985seen |= S_IRGRP;986if (type == ALLOW) {987mode |= S_IRGRP;988}989}990if (!(seen & S_IROTH)) {991seen |= S_IROTH;992if (type == ALLOW) {993mode |= S_IROTH;994}995}996}997if ((access_mask & ACE_WRITE_DATA)) {998if (!(seen & S_IWUSR)) {999seen |= S_IWUSR;1000if (type == ALLOW) {1001mode |= S_IWUSR;1002}1003}1004if (!(seen & S_IWGRP)) {1005seen |= S_IWGRP;1006if (type == ALLOW) {1007mode |= S_IWGRP;1008}1009}1010if (!(seen & S_IWOTH)) {1011seen |= S_IWOTH;1012if (type == ALLOW) {1013mode |= S_IWOTH;1014}1015}1016}1017if ((access_mask & ACE_EXECUTE)) {1018if (!(seen & S_IXUSR)) {1019seen |= S_IXUSR;1020if (type == ALLOW) {1021mode |= S_IXUSR;1022}1023}1024if (!(seen & S_IXGRP)) {1025seen |= S_IXGRP;1026if (type == ALLOW) {1027mode |= S_IXGRP;1028}1029}1030if (!(seen & S_IXOTH)) {1031seen |= S_IXOTH;1032if (type == ALLOW) {1033mode |= S_IXOTH;1034}1035}1036}1037} else {1038/*1039* Only care if this IDENTIFIER_GROUP or1040* USER ACE denies execute access to someone,1041* mode is not affected1042*/1043if ((access_mask & ACE_EXECUTE) && type == DENY)1044an_exec_denied = B_TRUE;1045}1046}10471048/*1049* Failure to allow is effectively a deny, so execute permission1050* is denied if it was never mentioned or if we explicitly1051* weren't allowed it.1052*/1053if (!an_exec_denied &&1054((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||1055(mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))1056an_exec_denied = B_TRUE;10571058if (an_exec_denied)1059*pflags &= ~ZFS_NO_EXECS_DENIED;1060else1061*pflags |= ZFS_NO_EXECS_DENIED;10621063return (mode);1064}10651066/*1067* Read an external acl object. If the intent is to modify, always1068* create a new acl and leave any cached acl in place.1069*/1070int1071zfs_acl_node_read(struct znode *zp, boolean_t have_lock, zfs_acl_t **aclpp,1072boolean_t will_modify)1073{1074zfs_acl_t *aclp;1075int aclsize = 0;1076int acl_count = 0;1077zfs_acl_node_t *aclnode;1078zfs_acl_phys_t znode_acl;1079int version;1080int error;1081boolean_t drop_lock = B_FALSE;10821083ASSERT(MUTEX_HELD(&zp->z_acl_lock));10841085if (zp->z_acl_cached && !will_modify) {1086*aclpp = zp->z_acl_cached;1087return (0);1088}10891090/*1091* close race where znode could be upgrade while trying to1092* read the znode attributes.1093*1094* But this could only happen if the file isn't already an SA1095* znode1096*/1097if (!zp->z_is_sa && !have_lock) {1098mutex_enter(&zp->z_lock);1099drop_lock = B_TRUE;1100}1101version = zfs_znode_acl_version(zp);11021103if ((error = zfs_acl_znode_info(zp, &aclsize,1104&acl_count, &znode_acl)) != 0) {1105goto done;1106}11071108aclp = zfs_acl_alloc(version);11091110aclp->z_acl_count = acl_count;1111aclp->z_acl_bytes = aclsize;11121113aclnode = zfs_acl_node_alloc(aclsize);1114aclnode->z_ace_count = aclp->z_acl_count;1115aclnode->z_size = aclsize;11161117if (!zp->z_is_sa) {1118if (znode_acl.z_acl_extern_obj) {1119error = dmu_read(ZTOZSB(zp)->z_os,1120znode_acl.z_acl_extern_obj, 0, aclnode->z_size,1121aclnode->z_acldata, DMU_READ_PREFETCH);1122} else {1123memcpy(aclnode->z_acldata, znode_acl.z_ace_data,1124aclnode->z_size);1125}1126} else {1127error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(ZTOZSB(zp)),1128aclnode->z_acldata, aclnode->z_size);1129}11301131if (error != 0) {1132zfs_acl_free(aclp);1133zfs_acl_node_free(aclnode);1134/* convert checksum errors into IO errors */1135if (error == ECKSUM)1136error = SET_ERROR(EIO);1137goto done;1138}11391140list_insert_head(&aclp->z_acl, aclnode);11411142*aclpp = aclp;1143if (!will_modify)1144zp->z_acl_cached = aclp;1145done:1146if (drop_lock)1147mutex_exit(&zp->z_lock);1148return (error);1149}11501151void1152zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,1153boolean_t start, void *userdata)1154{1155(void) buflen;1156zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;11571158if (start) {1159cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);1160} else {1161cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,1162cb->cb_acl_node);1163}1164ASSERT3P(cb->cb_acl_node, !=, NULL);1165*dataptr = cb->cb_acl_node->z_acldata;1166*length = cb->cb_acl_node->z_size;1167}11681169int1170zfs_acl_chown_setattr(znode_t *zp)1171{1172int error;1173zfs_acl_t *aclp;11741175if (ZTOZSB(zp)->z_acl_type == ZFS_ACLTYPE_POSIX)1176return (0);11771178ASSERT(MUTEX_HELD(&zp->z_lock));1179ASSERT(MUTEX_HELD(&zp->z_acl_lock));11801181error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE);1182if (error == 0 && aclp->z_acl_count > 0)1183zp->z_mode = ZTOI(zp)->i_mode =1184zfs_mode_compute(zp->z_mode, aclp,1185&zp->z_pflags, KUID_TO_SUID(ZTOI(zp)->i_uid),1186KGID_TO_SGID(ZTOI(zp)->i_gid));11871188/*1189* Some ZFS implementations (ZEVO) create neither a ZNODE_ACL1190* nor a DACL_ACES SA in which case ENOENT is returned from1191* zfs_acl_node_read() when the SA can't be located.1192* Allow chown/chgrp to succeed in these cases rather than1193* returning an error that makes no sense in the context of1194* the caller.1195*/1196if (error == ENOENT)1197return (0);11981199return (error);1200}12011202typedef struct trivial_acl {1203uint32_t allow0; /* allow mask for bits only in owner */1204uint32_t deny1; /* deny mask for bits not in owner */1205uint32_t deny2; /* deny mask for bits not in group */1206uint32_t owner; /* allow mask matching mode */1207uint32_t group; /* allow mask matching mode */1208uint32_t everyone; /* allow mask matching mode */1209} trivial_acl_t;12101211static void1212acl_trivial_access_masks(mode_t mode, boolean_t isdir, trivial_acl_t *masks)1213{1214uint32_t read_mask = ACE_READ_DATA;1215uint32_t write_mask = ACE_WRITE_DATA|ACE_APPEND_DATA;1216uint32_t execute_mask = ACE_EXECUTE;12171218if (isdir)1219write_mask |= ACE_DELETE_CHILD;12201221masks->deny1 = 0;12221223if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH)))1224masks->deny1 |= read_mask;1225if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH)))1226masks->deny1 |= write_mask;1227if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH)))1228masks->deny1 |= execute_mask;12291230masks->deny2 = 0;1231if (!(mode & S_IRGRP) && (mode & S_IROTH))1232masks->deny2 |= read_mask;1233if (!(mode & S_IWGRP) && (mode & S_IWOTH))1234masks->deny2 |= write_mask;1235if (!(mode & S_IXGRP) && (mode & S_IXOTH))1236masks->deny2 |= execute_mask;12371238masks->allow0 = 0;1239if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH)))1240masks->allow0 |= read_mask;1241if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH)))1242masks->allow0 |= write_mask;1243if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH)))1244masks->allow0 |= execute_mask;12451246masks->owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|1247ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES|1248ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE;1249if (mode & S_IRUSR)1250masks->owner |= read_mask;1251if (mode & S_IWUSR)1252masks->owner |= write_mask;1253if (mode & S_IXUSR)1254masks->owner |= execute_mask;12551256masks->group = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS|1257ACE_SYNCHRONIZE;1258if (mode & S_IRGRP)1259masks->group |= read_mask;1260if (mode & S_IWGRP)1261masks->group |= write_mask;1262if (mode & S_IXGRP)1263masks->group |= execute_mask;12641265masks->everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS|1266ACE_SYNCHRONIZE;1267if (mode & S_IROTH)1268masks->everyone |= read_mask;1269if (mode & S_IWOTH)1270masks->everyone |= write_mask;1271if (mode & S_IXOTH)1272masks->everyone |= execute_mask;1273}12741275/*1276* ace_trivial:1277* determine whether an ace_t acl is trivial1278*1279* Trivialness implies that the acl is composed of only1280* owner, group, everyone entries. ACL can't1281* have read_acl denied, and write_owner/write_acl/write_attributes1282* can only be owner@ entry.1283*/1284static int1285ace_trivial_common(void *acep, int aclcnt,1286uintptr_t (*walk)(void *, uintptr_t, int,1287uint16_t *, uint16_t *, uint32_t *))1288{1289uint16_t flags;1290uint32_t mask;1291uint16_t type;1292uint64_t cookie = 0;12931294while ((cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask))) {1295switch (flags & ACE_TYPE_FLAGS) {1296case ACE_OWNER:1297case ACE_GROUP|ACE_IDENTIFIER_GROUP:1298case ACE_EVERYONE:1299break;1300default:1301return (1);1302}13031304if (flags & (ACE_FILE_INHERIT_ACE|1305ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|1306ACE_INHERIT_ONLY_ACE))1307return (1);13081309/*1310* Special check for some special bits1311*1312* Don't allow anybody to deny reading basic1313* attributes or a files ACL.1314*/1315if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&1316(type == ACE_ACCESS_DENIED_ACE_TYPE))1317return (1);13181319/*1320* Delete permission is never set by default1321*/1322if (mask & ACE_DELETE)1323return (1);13241325/*1326* Child delete permission should be accompanied by write1327*/1328if ((mask & ACE_DELETE_CHILD) && !(mask & ACE_WRITE_DATA))1329return (1);13301331/*1332* only allow owner@ to have1333* write_acl/write_owner/write_attributes/write_xattr/1334*/1335if (type == ACE_ACCESS_ALLOWED_ACE_TYPE &&1336(!(flags & ACE_OWNER) && (mask &1337(ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES|1338ACE_WRITE_NAMED_ATTRS))))1339return (1);13401341}13421343return (0);1344}13451346/*1347* common code for setting ACLs.1348*1349* This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.1350* zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's1351* already checked the acl and knows whether to inherit.1352*/1353int1354zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)1355{1356int error;1357zfsvfs_t *zfsvfs = ZTOZSB(zp);1358dmu_object_type_t otype;1359zfs_acl_locator_cb_t locate = { 0 };1360uint64_t mode;1361sa_bulk_attr_t bulk[5];1362uint64_t ctime[2];1363int count = 0;1364zfs_acl_phys_t acl_phys;13651366mode = zp->z_mode;13671368mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,1369KUID_TO_SUID(ZTOI(zp)->i_uid), KGID_TO_SGID(ZTOI(zp)->i_gid));13701371zp->z_mode = ZTOI(zp)->i_mode = mode;1372SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,1373&mode, sizeof (mode));1374SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,1375&zp->z_pflags, sizeof (zp->z_pflags));1376SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,1377&ctime, sizeof (ctime));13781379if (zp->z_acl_cached) {1380zfs_acl_free(zp->z_acl_cached);1381zp->z_acl_cached = NULL;1382}13831384/*1385* Upgrade needed?1386*/1387if (!zfsvfs->z_use_fuids) {1388otype = DMU_OT_OLDACL;1389} else {1390if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&1391(zfsvfs->z_version >= ZPL_VERSION_FUID))1392zfs_acl_xform(zp, aclp, cr);1393ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);1394otype = DMU_OT_ACL;1395}13961397/*1398* Arrgh, we have to handle old on disk format1399* as well as newer (preferred) SA format.1400*/14011402if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */1403locate.cb_aclp = aclp;1404SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),1405zfs_acl_data_locator, &locate, aclp->z_acl_bytes);1406SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),1407NULL, &aclp->z_acl_count, sizeof (uint64_t));1408} else { /* Painful legacy way */1409zfs_acl_node_t *aclnode;1410uint64_t off = 0;1411uint64_t aoid;14121413if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),1414&acl_phys, sizeof (acl_phys))) != 0)1415return (error);14161417aoid = acl_phys.z_acl_extern_obj;14181419if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {1420/*1421* If ACL was previously external and we are now1422* converting to new ACL format then release old1423* ACL object and create a new one.1424*/1425if (aoid &&1426aclp->z_version != acl_phys.z_acl_version) {1427error = dmu_object_free(zfsvfs->z_os, aoid, tx);1428if (error)1429return (error);1430aoid = 0;1431}1432if (aoid == 0) {1433aoid = dmu_object_alloc(zfsvfs->z_os,1434otype, aclp->z_acl_bytes,1435otype == DMU_OT_ACL ?1436DMU_OT_SYSACL : DMU_OT_NONE,1437otype == DMU_OT_ACL ?1438DN_OLD_MAX_BONUSLEN : 0, tx);1439} else {1440(void) dmu_object_set_blocksize(zfsvfs->z_os,1441aoid, aclp->z_acl_bytes, 0, tx);1442}1443acl_phys.z_acl_extern_obj = aoid;1444for (aclnode = list_head(&aclp->z_acl); aclnode;1445aclnode = list_next(&aclp->z_acl, aclnode)) {1446if (aclnode->z_ace_count == 0)1447continue;1448dmu_write(zfsvfs->z_os, aoid, off,1449aclnode->z_size, aclnode->z_acldata, tx,1450DMU_READ_NO_PREFETCH);1451off += aclnode->z_size;1452}1453} else {1454void *start = acl_phys.z_ace_data;1455/*1456* Migrating back embedded?1457*/1458if (acl_phys.z_acl_extern_obj) {1459error = dmu_object_free(zfsvfs->z_os,1460acl_phys.z_acl_extern_obj, tx);1461if (error)1462return (error);1463acl_phys.z_acl_extern_obj = 0;1464}14651466for (aclnode = list_head(&aclp->z_acl); aclnode;1467aclnode = list_next(&aclp->z_acl, aclnode)) {1468if (aclnode->z_ace_count == 0)1469continue;1470memcpy(start, aclnode->z_acldata,1471aclnode->z_size);1472start = (caddr_t)start + aclnode->z_size;1473}1474}1475/*1476* If Old version then swap count/bytes to match old1477* layout of znode_acl_phys_t.1478*/1479if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {1480acl_phys.z_acl_size = aclp->z_acl_count;1481acl_phys.z_acl_count = aclp->z_acl_bytes;1482} else {1483acl_phys.z_acl_size = aclp->z_acl_bytes;1484acl_phys.z_acl_count = aclp->z_acl_count;1485}1486acl_phys.z_acl_version = aclp->z_version;14871488SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,1489&acl_phys, sizeof (acl_phys));1490}14911492/*1493* Replace ACL wide bits, but first clear them.1494*/1495zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;14961497zp->z_pflags |= aclp->z_hints;14981499if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)1500zp->z_pflags |= ZFS_ACL_TRIVIAL;15011502zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime);1503return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));1504}15051506static void1507zfs_acl_chmod(boolean_t isdir, uint64_t mode, boolean_t split, boolean_t trim,1508zfs_acl_t *aclp)1509{1510void *acep = NULL;1511uint64_t who;1512int new_count, new_bytes;1513int ace_size;1514int entry_type;1515uint16_t iflags, type;1516uint32_t access_mask;1517zfs_acl_node_t *newnode;1518size_t abstract_size = aclp->z_ops->ace_abstract_size();1519void *zacep;1520trivial_acl_t masks;15211522new_count = new_bytes = 0;15231524acl_trivial_access_masks((mode_t)mode, isdir, &masks);15251526newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);15271528zacep = newnode->z_acldata;1529if (masks.allow0) {1530zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER);1531zacep = (void *)((uintptr_t)zacep + abstract_size);1532new_count++;1533new_bytes += abstract_size;1534}1535if (masks.deny1) {1536zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);1537zacep = (void *)((uintptr_t)zacep + abstract_size);1538new_count++;1539new_bytes += abstract_size;1540}1541if (masks.deny2) {1542zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP);1543zacep = (void *)((uintptr_t)zacep + abstract_size);1544new_count++;1545new_bytes += abstract_size;1546}15471548while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,1549&iflags, &type))) {1550entry_type = (iflags & ACE_TYPE_FLAGS);1551/*1552* ACEs used to represent the file mode may be divided1553* into an equivalent pair of inherit-only and regular1554* ACEs, if they are inheritable.1555* Skip regular ACEs, which are replaced by the new mode.1556*/1557if (split && (entry_type == ACE_OWNER ||1558entry_type == OWNING_GROUP ||1559entry_type == ACE_EVERYONE)) {1560if (!isdir || !(iflags &1561(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))1562continue;1563/*1564* We preserve owner@, group@, or @everyone1565* permissions, if they are inheritable, by1566* copying them to inherit_only ACEs. This1567* prevents inheritable permissions from being1568* altered along with the file mode.1569*/1570iflags |= ACE_INHERIT_ONLY_ACE;1571}15721573/*1574* If this ACL has any inheritable ACEs, mark that in1575* the hints (which are later masked into the pflags)1576* so create knows to do inheritance.1577*/1578if (isdir && (iflags &1579(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))1580aclp->z_hints |= ZFS_INHERIT_ACE;15811582if ((type != ALLOW && type != DENY) ||1583(iflags & ACE_INHERIT_ONLY_ACE)) {1584switch (type) {1585case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:1586case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:1587case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:1588case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:1589aclp->z_hints |= ZFS_ACL_OBJ_ACE;1590break;1591}1592} else {1593/*1594* Limit permissions to be no greater than1595* group permissions.1596* The "aclinherit" and "aclmode" properties1597* affect policy for create and chmod(2),1598* respectively.1599*/1600if ((type == ALLOW) && trim)1601access_mask &= masks.group;1602}1603zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);1604ace_size = aclp->z_ops->ace_size(acep);1605zacep = (void *)((uintptr_t)zacep + ace_size);1606new_count++;1607new_bytes += ace_size;1608}1609zfs_set_ace(aclp, zacep, masks.owner, ALLOW, -1, ACE_OWNER);1610zacep = (void *)((uintptr_t)zacep + abstract_size);1611zfs_set_ace(aclp, zacep, masks.group, ALLOW, -1, OWNING_GROUP);1612zacep = (void *)((uintptr_t)zacep + abstract_size);1613zfs_set_ace(aclp, zacep, masks.everyone, ALLOW, -1, ACE_EVERYONE);16141615new_count += 3;1616new_bytes += abstract_size * 3;1617zfs_acl_release_nodes(aclp);1618aclp->z_acl_count = new_count;1619aclp->z_acl_bytes = new_bytes;1620newnode->z_ace_count = new_count;1621newnode->z_size = new_bytes;1622list_insert_tail(&aclp->z_acl, newnode);1623}16241625int1626zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)1627{1628int error = 0;16291630mutex_enter(&zp->z_acl_lock);1631mutex_enter(&zp->z_lock);1632if (ZTOZSB(zp)->z_acl_mode == ZFS_ACL_DISCARD)1633*aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));1634else1635error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE);16361637if (error == 0) {1638(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;1639zfs_acl_chmod(S_ISDIR(ZTOI(zp)->i_mode), mode, B_TRUE,1640(ZTOZSB(zp)->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);1641}1642mutex_exit(&zp->z_lock);1643mutex_exit(&zp->z_acl_lock);16441645return (error);1646}16471648/*1649* Should ACE be inherited?1650*/1651static int1652zfs_ace_can_use(umode_t obj_mode, uint16_t acep_flags)1653{1654int iflags = (acep_flags & 0xf);16551656if (S_ISDIR(obj_mode) && (iflags & ACE_DIRECTORY_INHERIT_ACE))1657return (1);1658else if (iflags & ACE_FILE_INHERIT_ACE)1659return (!(S_ISDIR(obj_mode) &&1660(iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));1661return (0);1662}16631664/*1665* inherit inheritable ACEs from parent1666*/1667static zfs_acl_t *1668zfs_acl_inherit(zfsvfs_t *zfsvfs, umode_t va_mode, zfs_acl_t *paclp,1669uint64_t mode, boolean_t *need_chmod)1670{1671void *pacep = NULL;1672void *acep;1673zfs_acl_node_t *aclnode;1674zfs_acl_t *aclp = NULL;1675uint64_t who;1676uint32_t access_mask;1677uint16_t iflags, newflags, type;1678size_t ace_size;1679void *data1, *data2;1680size_t data1sz, data2sz;1681uint_t aclinherit;1682boolean_t isdir = S_ISDIR(va_mode);1683boolean_t isreg = S_ISREG(va_mode);16841685*need_chmod = B_TRUE;16861687aclp = zfs_acl_alloc(paclp->z_version);1688aclinherit = zfsvfs->z_acl_inherit;1689if (aclinherit == ZFS_ACL_DISCARD || S_ISLNK(va_mode))1690return (aclp);16911692while ((pacep = zfs_acl_next_ace(paclp, pacep, &who,1693&access_mask, &iflags, &type))) {16941695/*1696* don't inherit bogus ACEs1697*/1698if (!zfs_acl_valid_ace_type(type, iflags))1699continue;17001701/*1702* Check if ACE is inheritable by this vnode1703*/1704if ((aclinherit == ZFS_ACL_NOALLOW && type == ALLOW) ||1705!zfs_ace_can_use(va_mode, iflags))1706continue;17071708/*1709* If owner@, group@, or everyone@ inheritable1710* then zfs_acl_chmod() isn't needed.1711*/1712if ((aclinherit == ZFS_ACL_PASSTHROUGH ||1713aclinherit == ZFS_ACL_PASSTHROUGH_X) &&1714((iflags & (ACE_OWNER|ACE_EVERYONE)) ||1715((iflags & OWNING_GROUP) == OWNING_GROUP)) &&1716(isreg || (isdir && (iflags & ACE_DIRECTORY_INHERIT_ACE))))1717*need_chmod = B_FALSE;17181719/*1720* Strip inherited execute permission from file if1721* not in mode1722*/1723if (aclinherit == ZFS_ACL_PASSTHROUGH_X && type == ALLOW &&1724!isdir && ((mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)) {1725access_mask &= ~ACE_EXECUTE;1726}17271728/*1729* Strip write_acl and write_owner from permissions1730* when inheriting an ACE1731*/1732if (aclinherit == ZFS_ACL_RESTRICTED && type == ALLOW) {1733access_mask &= ~RESTRICTED_CLEAR;1734}17351736ace_size = aclp->z_ops->ace_size(pacep);1737aclnode = zfs_acl_node_alloc(ace_size);1738list_insert_tail(&aclp->z_acl, aclnode);1739acep = aclnode->z_acldata;17401741zfs_set_ace(aclp, acep, access_mask, type,1742who, iflags|ACE_INHERITED_ACE);17431744/*1745* Copy special opaque data if any1746*/1747if ((data1sz = paclp->z_ops->ace_data(pacep, &data1)) != 0) {1748VERIFY((data2sz = aclp->z_ops->ace_data(acep,1749&data2)) == data1sz);1750memcpy(data2, data1, data2sz);1751}17521753aclp->z_acl_count++;1754aclnode->z_ace_count++;1755aclp->z_acl_bytes += aclnode->z_size;1756newflags = aclp->z_ops->ace_flags_get(acep);17571758/*1759* If ACE is not to be inherited further, or if the vnode is1760* not a directory, remove all inheritance flags1761*/1762if (!isdir || (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)) {1763newflags &= ~ALL_INHERIT;1764aclp->z_ops->ace_flags_set(acep,1765newflags|ACE_INHERITED_ACE);1766continue;1767}17681769/*1770* This directory has an inheritable ACE1771*/1772aclp->z_hints |= ZFS_INHERIT_ACE;17731774/*1775* If only FILE_INHERIT is set then turn on1776* inherit_only1777*/1778if ((iflags & (ACE_FILE_INHERIT_ACE |1779ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {1780newflags |= ACE_INHERIT_ONLY_ACE;1781aclp->z_ops->ace_flags_set(acep,1782newflags|ACE_INHERITED_ACE);1783} else {1784newflags &= ~ACE_INHERIT_ONLY_ACE;1785aclp->z_ops->ace_flags_set(acep,1786newflags|ACE_INHERITED_ACE);1787}1788}1789if (zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&1790aclp->z_acl_count != 0) {1791*need_chmod = B_FALSE;1792}17931794return (aclp);1795}17961797/*1798* Create file system object initial permissions1799* including inheritable ACEs.1800* Also, create FUIDs for owner and group.1801*/1802int1803zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,1804vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids, zidmap_t *mnt_ns)1805{1806int error;1807zfsvfs_t *zfsvfs = ZTOZSB(dzp);1808zfs_acl_t *paclp;1809gid_t gid = vap->va_gid;1810boolean_t need_chmod = B_TRUE;1811boolean_t trim = B_FALSE;1812boolean_t inherited = B_FALSE;18131814memset(acl_ids, 0, sizeof (zfs_acl_ids_t));1815acl_ids->z_mode = vap->va_mode;18161817if (vsecp)1818if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_mode, vsecp,1819cr, &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)1820return (error);18211822acl_ids->z_fuid = vap->va_uid;1823acl_ids->z_fgid = vap->va_gid;1824#ifdef HAVE_KSID1825/*1826* Determine uid and gid.1827*/1828if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||1829((flag & IS_XATTR) && (S_ISDIR(vap->va_mode)))) {1830acl_ids->z_fuid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_uid,1831cr, ZFS_OWNER, &acl_ids->z_fuidp);1832acl_ids->z_fgid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,1833cr, ZFS_GROUP, &acl_ids->z_fuidp);1834gid = vap->va_gid;1835} else {1836acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,1837cr, &acl_ids->z_fuidp);1838acl_ids->z_fgid = 0;1839if (vap->va_mask & AT_GID) {1840acl_ids->z_fgid = zfs_fuid_create(zfsvfs,1841(uint64_t)vap->va_gid,1842cr, ZFS_GROUP, &acl_ids->z_fuidp);1843gid = vap->va_gid;1844if (acl_ids->z_fgid != KGID_TO_SGID(ZTOI(dzp)->i_gid) &&1845!groupmember(vap->va_gid, cr) &&1846secpolicy_vnode_create_gid(cr) != 0)1847acl_ids->z_fgid = 0;1848}1849if (acl_ids->z_fgid == 0) {1850if (dzp->z_mode & S_ISGID) {1851char *domain;1852uint32_t rid;18531854acl_ids->z_fgid = KGID_TO_SGID(1855ZTOI(dzp)->i_gid);1856gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,1857cr, ZFS_GROUP);18581859if (zfsvfs->z_use_fuids &&1860IS_EPHEMERAL(acl_ids->z_fgid)) {1861domain = zfs_fuid_idx_domain(1862&zfsvfs->z_fuid_idx,1863FUID_INDEX(acl_ids->z_fgid));1864rid = FUID_RID(acl_ids->z_fgid);1865zfs_fuid_node_add(&acl_ids->z_fuidp,1866domain, rid,1867FUID_INDEX(acl_ids->z_fgid),1868acl_ids->z_fgid, ZFS_GROUP);1869}1870} else {1871acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,1872ZFS_GROUP, cr, &acl_ids->z_fuidp);1873gid = crgetgid(cr);1874}1875}1876}1877#endif /* HAVE_KSID */18781879/*1880* If we're creating a directory, and the parent directory has the1881* set-GID bit set, set in on the new directory.1882* Otherwise, if the user is neither privileged nor a member of the1883* file's new group, clear the file's set-GID bit.1884*/18851886if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&1887(S_ISDIR(vap->va_mode))) {1888acl_ids->z_mode |= S_ISGID;1889} else {1890if ((acl_ids->z_mode & S_ISGID) &&1891secpolicy_vnode_setids_setgids(cr, gid, mnt_ns,1892zfs_i_user_ns(ZTOI(dzp))) != 0) {1893acl_ids->z_mode &= ~S_ISGID;1894}1895}18961897if (acl_ids->z_aclp == NULL) {1898mutex_enter(&dzp->z_acl_lock);1899mutex_enter(&dzp->z_lock);1900if (!(flag & IS_ROOT_NODE) &&1901(dzp->z_pflags & ZFS_INHERIT_ACE) &&1902!(dzp->z_pflags & ZFS_XATTR)) {1903VERIFY0(zfs_acl_node_read(dzp, B_TRUE,1904&paclp, B_FALSE));1905acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,1906vap->va_mode, paclp, acl_ids->z_mode, &need_chmod);1907inherited = B_TRUE;1908} else {1909acl_ids->z_aclp =1910zfs_acl_alloc(zfs_acl_version_zp(dzp));1911acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;1912}1913mutex_exit(&dzp->z_lock);1914mutex_exit(&dzp->z_acl_lock);19151916if (need_chmod) {1917if (S_ISDIR(vap->va_mode))1918acl_ids->z_aclp->z_hints |=1919ZFS_ACL_AUTO_INHERIT;19201921if (zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK &&1922zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH &&1923zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH_X)1924trim = B_TRUE;1925zfs_acl_chmod(S_ISDIR(vap->va_mode), acl_ids->z_mode,1926B_FALSE, trim, acl_ids->z_aclp);1927}1928}19291930if (inherited || vsecp) {1931acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,1932acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,1933acl_ids->z_fuid, acl_ids->z_fgid);1934if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)1935acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;1936}19371938return (0);1939}19401941/*1942* Free ACL and fuid_infop, but not the acl_ids structure1943*/1944void1945zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)1946{1947if (acl_ids->z_aclp)1948zfs_acl_free(acl_ids->z_aclp);1949if (acl_ids->z_fuidp)1950zfs_fuid_info_free(acl_ids->z_fuidp);1951acl_ids->z_aclp = NULL;1952acl_ids->z_fuidp = NULL;1953}19541955boolean_t1956zfs_acl_ids_overquota(zfsvfs_t *zv, zfs_acl_ids_t *acl_ids, uint64_t projid)1957{1958return (zfs_id_overquota(zv, DMU_USERUSED_OBJECT, acl_ids->z_fuid) ||1959zfs_id_overquota(zv, DMU_GROUPUSED_OBJECT, acl_ids->z_fgid) ||1960(projid != ZFS_DEFAULT_PROJID && projid != ZFS_INVALID_PROJID &&1961zfs_id_overquota(zv, DMU_PROJECTUSED_OBJECT, projid)));1962}19631964/*1965* Retrieve a file's ACL1966*/1967int1968zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)1969{1970zfs_acl_t *aclp;1971ulong_t mask;1972int error;1973int count = 0;1974int largeace = 0;19751976mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |1977VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);19781979if (mask == 0)1980return (SET_ERROR(ENOSYS));19811982if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr,1983zfs_init_idmap)))1984return (error);19851986mutex_enter(&zp->z_acl_lock);19871988error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);1989if (error != 0) {1990mutex_exit(&zp->z_acl_lock);1991return (error);1992}19931994/*1995* Scan ACL to determine number of ACEs1996*/1997if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {1998void *zacep = NULL;1999uint64_t who;2000uint32_t access_mask;2001uint16_t type, iflags;20022003while ((zacep = zfs_acl_next_ace(aclp, zacep,2004&who, &access_mask, &iflags, &type))) {2005switch (type) {2006case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:2007case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:2008case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:2009case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:2010largeace++;2011continue;2012default:2013count++;2014}2015}2016vsecp->vsa_aclcnt = count;2017} else2018count = (int)aclp->z_acl_count;20192020if (mask & VSA_ACECNT) {2021vsecp->vsa_aclcnt = count;2022}20232024if (mask & VSA_ACE) {2025size_t aclsz;20262027aclsz = count * sizeof (ace_t) +2028sizeof (ace_object_t) * largeace;20292030vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);2031vsecp->vsa_aclentsz = aclsz;20322033if (aclp->z_version == ZFS_ACL_VERSION_FUID)2034zfs_copy_fuid_2_ace(ZTOZSB(zp), aclp, cr,2035vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));2036else {2037zfs_acl_node_t *aclnode;2038void *start = vsecp->vsa_aclentp;20392040for (aclnode = list_head(&aclp->z_acl); aclnode;2041aclnode = list_next(&aclp->z_acl, aclnode)) {2042memcpy(start, aclnode->z_acldata,2043aclnode->z_size);2044start = (caddr_t)start + aclnode->z_size;2045}2046ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==2047aclp->z_acl_bytes);2048}2049}2050if (mask & VSA_ACE_ACLFLAGS) {2051vsecp->vsa_aclflags = 0;2052if (zp->z_pflags & ZFS_ACL_DEFAULTED)2053vsecp->vsa_aclflags |= ACL_DEFAULTED;2054if (zp->z_pflags & ZFS_ACL_PROTECTED)2055vsecp->vsa_aclflags |= ACL_PROTECTED;2056if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)2057vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;2058}20592060mutex_exit(&zp->z_acl_lock);20612062return (0);2063}20642065int2066zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, umode_t obj_mode,2067vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)2068{2069zfs_acl_t *aclp;2070zfs_acl_node_t *aclnode;2071int aclcnt = vsecp->vsa_aclcnt;2072int error;20732074if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)2075return (SET_ERROR(EINVAL));20762077aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));20782079aclp->z_hints = 0;2080aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));2081if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {2082if ((error = zfs_copy_ace_2_oldace(obj_mode, aclp,2083(ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,2084aclcnt, &aclnode->z_size)) != 0) {2085zfs_acl_free(aclp);2086zfs_acl_node_free(aclnode);2087return (error);2088}2089} else {2090if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_mode, aclp,2091vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,2092&aclnode->z_size, fuidp, cr)) != 0) {2093zfs_acl_free(aclp);2094zfs_acl_node_free(aclnode);2095return (error);2096}2097}2098aclp->z_acl_bytes = aclnode->z_size;2099aclnode->z_ace_count = aclcnt;2100aclp->z_acl_count = aclcnt;2101list_insert_head(&aclp->z_acl, aclnode);21022103/*2104* If flags are being set then add them to z_hints2105*/2106if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {2107if (vsecp->vsa_aclflags & ACL_PROTECTED)2108aclp->z_hints |= ZFS_ACL_PROTECTED;2109if (vsecp->vsa_aclflags & ACL_DEFAULTED)2110aclp->z_hints |= ZFS_ACL_DEFAULTED;2111if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)2112aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;2113}21142115*zaclp = aclp;21162117return (0);2118}21192120/*2121* Set a file's ACL2122*/2123int2124zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)2125{2126zfsvfs_t *zfsvfs = ZTOZSB(zp);2127zilog_t *zilog = zfsvfs->z_log;2128ulong_t mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);2129dmu_tx_t *tx;2130int error;2131zfs_acl_t *aclp;2132zfs_fuid_info_t *fuidp = NULL;2133boolean_t fuid_dirtied;2134uint64_t acl_obj;21352136if (mask == 0)2137return (SET_ERROR(ENOSYS));21382139if (zp->z_pflags & ZFS_IMMUTABLE)2140return (SET_ERROR(EPERM));21412142if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,2143zfs_init_idmap)))2144return (error);21452146error = zfs_vsec_2_aclp(zfsvfs, ZTOI(zp)->i_mode, vsecp, cr, &fuidp,2147&aclp);2148if (error)2149return (error);21502151/*2152* If ACL wide flags aren't being set then preserve any2153* existing flags.2154*/2155if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {2156aclp->z_hints |=2157(zp->z_pflags & V4_ACL_WIDE_FLAGS);2158}2159top:2160mutex_enter(&zp->z_acl_lock);2161mutex_enter(&zp->z_lock);21622163tx = dmu_tx_create(zfsvfs->z_os);21642165dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);21662167fuid_dirtied = zfsvfs->z_fuid_dirty;2168if (fuid_dirtied)2169zfs_fuid_txhold(zfsvfs, tx);21702171/*2172* If old version and ACL won't fit in bonus and we aren't2173* upgrading then take out necessary DMU holds2174*/21752176if ((acl_obj = zfs_external_acl(zp)) != 0) {2177if (zfsvfs->z_version >= ZPL_VERSION_FUID &&2178zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {2179dmu_tx_hold_free(tx, acl_obj, 0,2180DMU_OBJECT_END);2181dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,2182aclp->z_acl_bytes);2183} else {2184dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);2185}2186} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {2187dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);2188}21892190zfs_sa_upgrade_txholds(tx, zp);2191error = dmu_tx_assign(tx, DMU_TX_NOWAIT);2192if (error) {2193mutex_exit(&zp->z_acl_lock);2194mutex_exit(&zp->z_lock);21952196if (error == ERESTART) {2197dmu_tx_wait(tx);2198dmu_tx_abort(tx);2199goto top;2200}2201dmu_tx_abort(tx);2202zfs_acl_free(aclp);2203return (error);2204}22052206error = zfs_aclset_common(zp, aclp, cr, tx);2207ASSERT0(error);2208ASSERT0P(zp->z_acl_cached);2209zp->z_acl_cached = aclp;22102211if (fuid_dirtied)2212zfs_fuid_sync(zfsvfs, tx);22132214zfs_log_acl(zilog, tx, zp, vsecp, fuidp);22152216if (fuidp)2217zfs_fuid_info_free(fuidp);2218dmu_tx_commit(tx);22192220mutex_exit(&zp->z_lock);2221mutex_exit(&zp->z_acl_lock);22222223return (error);2224}22252226/*2227* Check accesses of interest (AoI) against attributes of the dataset2228* such as read-only. Returns zero if no AoI conflict with dataset2229* attributes, otherwise an appropriate errno is returned.2230*/2231static int2232zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)2233{2234if ((v4_mode & WRITE_MASK) && (zfs_is_readonly(ZTOZSB(zp))) &&2235(!Z_ISDEV(ZTOI(zp)->i_mode) || (v4_mode & WRITE_MASK_ATTRS))) {2236return (SET_ERROR(EROFS));2237}22382239/*2240* Intentionally allow ZFS_READONLY through here.2241* See zfs_zaccess_common().2242*/2243if ((v4_mode & WRITE_MASK_DATA) &&2244(zp->z_pflags & ZFS_IMMUTABLE)) {2245return (SET_ERROR(EPERM));2246}22472248if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&2249(zp->z_pflags & ZFS_NOUNLINK)) {2250return (SET_ERROR(EPERM));2251}22522253if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&2254(zp->z_pflags & ZFS_AV_QUARANTINED))) {2255return (SET_ERROR(EACCES));2256}22572258return (0);2259}22602261/*2262* The primary usage of this function is to loop through all of the2263* ACEs in the znode, determining what accesses of interest (AoI) to2264* the caller are allowed or denied. The AoI are expressed as bits in2265* the working_mode parameter. As each ACE is processed, bits covered2266* by that ACE are removed from the working_mode. This removal2267* facilitates two things. The first is that when the working mode is2268* empty (= 0), we know we've looked at all the AoI. The second is2269* that the ACE interpretation rules don't allow a later ACE to undo2270* something granted or denied by an earlier ACE. Removing the2271* discovered access or denial enforces this rule. At the end of2272* processing the ACEs, all AoI that were found to be denied are2273* placed into the working_mode, giving the caller a mask of denied2274* accesses. Returns:2275* 0 if all AoI granted2276* EACCES if the denied mask is non-zero2277* other error if abnormal failure (e.g., IO error)2278*2279* A secondary usage of the function is to determine if any of the2280* AoI are granted. If an ACE grants any access in2281* the working_mode, we immediately short circuit out of the function.2282* This mode is chosen by setting anyaccess to B_TRUE. The2283* working_mode is not a denied access mask upon exit if the function2284* is used in this manner.2285*/2286static int2287zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,2288boolean_t anyaccess, cred_t *cr, zidmap_t *mnt_ns)2289{2290zfsvfs_t *zfsvfs = ZTOZSB(zp);2291zfs_acl_t *aclp;2292int error;2293uid_t uid = crgetuid(cr);2294uint64_t who;2295uint16_t type, iflags;2296uint16_t entry_type;2297uint32_t access_mask;2298uint32_t deny_mask = 0;2299zfs_ace_hdr_t *acep = NULL;2300boolean_t checkit;2301uid_t gowner;2302uid_t fowner;23032304if (mnt_ns) {2305fowner = zfs_uid_to_vfsuid(mnt_ns, zfs_i_user_ns(ZTOI(zp)),2306KUID_TO_SUID(ZTOI(zp)->i_uid));2307gowner = zfs_gid_to_vfsgid(mnt_ns, zfs_i_user_ns(ZTOI(zp)),2308KGID_TO_SGID(ZTOI(zp)->i_gid));2309} else2310zfs_fuid_map_ids(zp, cr, &fowner, &gowner);23112312mutex_enter(&zp->z_acl_lock);23132314error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);2315if (error != 0) {2316mutex_exit(&zp->z_acl_lock);2317return (error);2318}23192320ASSERT(zp->z_acl_cached);23212322while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,2323&iflags, &type))) {2324uint32_t mask_matched;23252326if (!zfs_acl_valid_ace_type(type, iflags))2327continue;23282329if (S_ISDIR(ZTOI(zp)->i_mode) &&2330(iflags & ACE_INHERIT_ONLY_ACE))2331continue;23322333/* Skip ACE if it does not affect any AoI */2334mask_matched = (access_mask & *working_mode);2335if (!mask_matched)2336continue;23372338entry_type = (iflags & ACE_TYPE_FLAGS);23392340checkit = B_FALSE;23412342switch (entry_type) {2343case ACE_OWNER:2344if (uid == fowner)2345checkit = B_TRUE;2346break;2347case OWNING_GROUP:2348who = gowner;2349zfs_fallthrough;2350case ACE_IDENTIFIER_GROUP:2351checkit = zfs_groupmember(zfsvfs, who, cr);2352break;2353case ACE_EVERYONE:2354checkit = B_TRUE;2355break;23562357/* USER Entry */2358default:2359if (entry_type == 0) {2360uid_t newid;23612362newid = zfs_fuid_map_id(zfsvfs, who, cr,2363ZFS_ACE_USER);2364if (newid != IDMAP_WK_CREATOR_OWNER_UID &&2365uid == newid)2366checkit = B_TRUE;2367break;2368} else {2369mutex_exit(&zp->z_acl_lock);2370return (SET_ERROR(EIO));2371}2372}23732374if (checkit) {2375if (type == DENY) {2376DTRACE_PROBE3(zfs__ace__denies,2377znode_t *, zp,2378zfs_ace_hdr_t *, acep,2379uint32_t, mask_matched);2380deny_mask |= mask_matched;2381} else {2382DTRACE_PROBE3(zfs__ace__allows,2383znode_t *, zp,2384zfs_ace_hdr_t *, acep,2385uint32_t, mask_matched);2386if (anyaccess) {2387mutex_exit(&zp->z_acl_lock);2388return (0);2389}2390}2391*working_mode &= ~mask_matched;2392}23932394/* Are we done? */2395if (*working_mode == 0)2396break;2397}23982399mutex_exit(&zp->z_acl_lock);24002401/* Put the found 'denies' back on the working mode */2402if (deny_mask) {2403*working_mode |= deny_mask;2404return (SET_ERROR(EACCES));2405} else if (*working_mode) {2406return (-1);2407}24082409return (0);2410}24112412/*2413* Return true if any access whatsoever granted, we don't actually2414* care what access is granted.2415*/2416boolean_t2417zfs_has_access(znode_t *zp, cred_t *cr)2418{2419uint32_t have = ACE_ALL_PERMS;24202421if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr,2422zfs_init_idmap) != 0) {2423uid_t owner;24242425owner = zfs_fuid_map_id(ZTOZSB(zp),2426KUID_TO_SUID(ZTOI(zp)->i_uid), cr, ZFS_OWNER);2427return (secpolicy_vnode_any_access(cr, ZTOI(zp), owner) == 0);2428}2429return (B_TRUE);2430}24312432/*2433* Simplified access check for case where ACL is known to not contain2434* information beyond what is defined in the mode. In this case, we2435* can pass along to the kernel / vfs generic_permission() check, which2436* evaluates the mode and POSIX ACL.2437*2438* NFSv4 ACLs allow granting permissions that are usually relegated only2439* to the file owner or superuser. Examples are ACE_WRITE_OWNER (chown),2440* ACE_WRITE_ACL(chmod), and ACE_DELETE. ACE_DELETE requests must fail2441* because with conventional posix permissions, right to delete file2442* is determined by write bit on the parent dir.2443*2444* If unmappable perms are requested, then we must return EPERM2445* and include those bits in the working_mode so that the caller of2446* zfs_zaccess_common() can decide whether to perform additional2447* policy / capability checks. EACCES is used in zfs_zaccess_aces_check()2448* to indicate access check failed due to explicit DENY entry, and so2449* we want to avoid that here.2450*/2451static int2452zfs_zaccess_trivial(znode_t *zp, uint32_t *working_mode, cred_t *cr,2453zidmap_t *mnt_ns)2454{2455int err, mask;2456int unmapped = 0;24572458ASSERT(zp->z_pflags & ZFS_ACL_TRIVIAL);24592460mask = zfs_v4_to_unix(*working_mode, &unmapped);2461if (mask == 0 || unmapped) {2462*working_mode = unmapped;2463return (unmapped ? SET_ERROR(EPERM) : 0);2464}24652466#if (defined(HAVE_IOPS_PERMISSION_USERNS) || \2467defined(HAVE_IOPS_PERMISSION_IDMAP))2468err = generic_permission(mnt_ns, ZTOI(zp), mask);2469#else2470err = generic_permission(ZTOI(zp), mask);2471#endif2472if (err != 0) {2473return (SET_ERROR(EPERM));2474}24752476*working_mode = unmapped;24772478return (0);2479}24802481static int2482zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,2483boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr, zidmap_t *mnt_ns)2484{2485zfsvfs_t *zfsvfs = ZTOZSB(zp);2486int err;24872488*working_mode = v4_mode;2489*check_privs = B_TRUE;24902491/*2492* Short circuit empty requests2493*/2494if (v4_mode == 0 || zfsvfs->z_replay) {2495*working_mode = 0;2496return (0);2497}24982499if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {2500*check_privs = B_FALSE;2501return (err);2502}25032504/*2505* The caller requested that the ACL check be skipped. This2506* would only happen if the caller checked VOP_ACCESS() with a2507* 32 bit ACE mask and already had the appropriate permissions.2508*/2509if (skipaclchk) {2510*working_mode = 0;2511return (0);2512}25132514/*2515* Note: ZFS_READONLY represents the "DOS R/O" attribute.2516* When that flag is set, we should behave as if write access2517* were not granted by anything in the ACL. In particular:2518* We _must_ allow writes after opening the file r/w, then2519* setting the DOS R/O attribute, and writing some more.2520* (Similar to how you can write after fchmod(fd, 0444).)2521*2522* Therefore ZFS_READONLY is ignored in the dataset check2523* above, and checked here as if part of the ACL check.2524* Also note: DOS R/O is ignored for directories.2525*/2526if ((v4_mode & WRITE_MASK_DATA) &&2527!S_ISDIR(ZTOI(zp)->i_mode) &&2528(zp->z_pflags & ZFS_READONLY)) {2529return (SET_ERROR(EPERM));2530}25312532if (zp->z_pflags & ZFS_ACL_TRIVIAL)2533return (zfs_zaccess_trivial(zp, working_mode, cr, mnt_ns));25342535return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr, mnt_ns));2536}25372538static int2539zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,2540cred_t *cr, zidmap_t *mnt_ns)2541{2542if (*working_mode != ACE_WRITE_DATA)2543return (SET_ERROR(EACCES));25442545return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,2546check_privs, B_FALSE, cr, mnt_ns));2547}25482549int2550zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)2551{2552boolean_t owner = B_FALSE;2553boolean_t groupmbr = B_FALSE;2554boolean_t is_attr;2555uid_t uid = crgetuid(cr);2556int error;25572558if (zdp->z_pflags & ZFS_AV_QUARANTINED)2559return (SET_ERROR(EACCES));25602561is_attr = ((zdp->z_pflags & ZFS_XATTR) &&2562(S_ISDIR(ZTOI(zdp)->i_mode)));2563if (is_attr)2564goto slow;256525662567mutex_enter(&zdp->z_acl_lock);25682569if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) {2570mutex_exit(&zdp->z_acl_lock);2571return (0);2572}25732574if (KUID_TO_SUID(ZTOI(zdp)->i_uid) != 0 ||2575KGID_TO_SGID(ZTOI(zdp)->i_gid) != 0) {2576mutex_exit(&zdp->z_acl_lock);2577goto slow;2578}25792580if (uid == KUID_TO_SUID(ZTOI(zdp)->i_uid)) {2581if (zdp->z_mode & S_IXUSR) {2582mutex_exit(&zdp->z_acl_lock);2583return (0);2584} else {2585mutex_exit(&zdp->z_acl_lock);2586goto slow;2587}2588}2589if (groupmember(KGID_TO_SGID(ZTOI(zdp)->i_gid), cr)) {2590if (zdp->z_mode & S_IXGRP) {2591mutex_exit(&zdp->z_acl_lock);2592return (0);2593} else {2594mutex_exit(&zdp->z_acl_lock);2595goto slow;2596}2597}2598if (!owner && !groupmbr) {2599if (zdp->z_mode & S_IXOTH) {2600mutex_exit(&zdp->z_acl_lock);2601return (0);2602}2603}26042605mutex_exit(&zdp->z_acl_lock);26062607slow:2608DTRACE_PROBE(zfs__fastpath__execute__access__miss);2609if ((error = zfs_enter(ZTOZSB(zdp), FTAG)) != 0)2610return (error);2611error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,2612zfs_init_idmap);2613zfs_exit(ZTOZSB(zdp), FTAG);2614return (error);2615}26162617/*2618* Determine whether Access should be granted/denied.2619*2620* The least priv subsystem is always consulted as a basic privilege2621* can define any form of access.2622*/2623int2624zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr,2625zidmap_t *mnt_ns)2626{2627uint32_t working_mode;2628int error;2629int is_attr;2630boolean_t check_privs;2631znode_t *xzp;2632znode_t *check_zp = zp;2633mode_t needed_bits;2634uid_t owner;26352636is_attr = ((zp->z_pflags & ZFS_XATTR) && S_ISDIR(ZTOI(zp)->i_mode));26372638/*2639* If attribute then validate against base file2640*/2641if (is_attr) {2642if ((error = zfs_zget(ZTOZSB(zp),2643zp->z_xattr_parent, &xzp)) != 0) {2644return (error);2645}26462647check_zp = xzp;26482649/*2650* fixup mode to map to xattr perms2651*/26522653if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {2654mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);2655mode |= ACE_WRITE_NAMED_ATTRS;2656}26572658if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {2659mode &= ~(ACE_READ_DATA|ACE_EXECUTE);2660mode |= ACE_READ_NAMED_ATTRS;2661}2662}26632664owner = zfs_uid_to_vfsuid(mnt_ns, zfs_i_user_ns(ZTOI(zp)),2665KUID_TO_SUID(ZTOI(zp)->i_uid));2666owner = zfs_fuid_map_id(ZTOZSB(zp), owner, cr, ZFS_OWNER);26672668/*2669* Map the bits required to the standard inode flags2670* S_IRUSR|S_IWUSR|S_IXUSR in the needed_bits. Map the bits2671* mapped by working_mode (currently missing) in missing_bits.2672* Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),2673* needed_bits.2674*/2675needed_bits = 0;26762677working_mode = mode;2678if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&2679owner == crgetuid(cr))2680working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);26812682if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|2683ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))2684needed_bits |= S_IRUSR;2685if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|2686ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))2687needed_bits |= S_IWUSR;2688if (working_mode & ACE_EXECUTE)2689needed_bits |= S_IXUSR;26902691if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,2692&check_privs, skipaclchk, cr, mnt_ns)) == 0) {2693if (is_attr)2694zrele(xzp);2695return (secpolicy_vnode_access2(cr, ZTOI(zp), owner,2696needed_bits, needed_bits));2697}26982699if (error && !check_privs) {2700if (is_attr)2701zrele(xzp);2702return (error);2703}27042705if (error && (flags & V_APPEND)) {2706error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr,2707mnt_ns);2708}27092710if (error && check_privs) {2711mode_t checkmode = 0;27122713/*2714* First check for implicit owner permission on2715* read_acl/read_attributes2716*/27172718ASSERT(working_mode != 0);27192720if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&2721owner == crgetuid(cr)))2722working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);27232724if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|2725ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))2726checkmode |= S_IRUSR;2727if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|2728ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))2729checkmode |= S_IWUSR;2730if (working_mode & ACE_EXECUTE)2731checkmode |= S_IXUSR;27322733error = secpolicy_vnode_access2(cr, ZTOI(check_zp), owner,2734needed_bits & ~checkmode, needed_bits);27352736if (error == 0 && (working_mode & ACE_WRITE_OWNER))2737error = secpolicy_vnode_chown(cr, owner);2738if (error == 0 && (working_mode & ACE_WRITE_ACL))2739error = secpolicy_vnode_setdac(cr, owner);27402741if (error == 0 && (working_mode &2742(ACE_DELETE|ACE_DELETE_CHILD)))2743error = secpolicy_vnode_remove(cr);27442745if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {2746error = secpolicy_vnode_chown(cr, owner);2747}2748if (error == 0) {2749/*2750* See if any bits other than those already checked2751* for are still present. If so then return EACCES2752*/2753if (working_mode & ~(ZFS_CHECKED_MASKS)) {2754error = SET_ERROR(EACCES);2755}2756}2757} else if (error == 0) {2758error = secpolicy_vnode_access2(cr, ZTOI(zp), owner,2759needed_bits, needed_bits);2760}27612762if (is_attr)2763zrele(xzp);27642765return (error);2766}27672768/*2769* Translate traditional unix S_IRUSR/S_IWUSR/S_IXUSR mode into2770* NFSv4-style ZFS ACL format and call zfs_zaccess()2771*/2772int2773zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr,2774zidmap_t *mnt_ns)2775{2776return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr,2777mnt_ns));2778}27792780/*2781* Access function for secpolicy_vnode_setattr2782*/2783int2784zfs_zaccess_unix(void *zp, int mode, cred_t *cr)2785{2786int v4_mode = zfs_unix_to_v4(mode >> 6);27872788return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr, zfs_init_idmap));2789}27902791/* See zfs_zaccess_delete() */2792static const boolean_t zfs_write_implies_delete_child = B_TRUE;27932794/*2795* Determine whether delete access should be granted.2796*2797* The following chart outlines how we handle delete permissions which is2798* how recent versions of windows (Windows 2008) handles it. The efficiency2799* comes from not having to check the parent ACL where the object itself grants2800* delete:2801*2802* -------------------------------------------------------2803* | Parent Dir | Target Object Permissions |2804* | permissions | |2805* -------------------------------------------------------2806* | | ACL Allows | ACL Denies| Delete |2807* | | Delete | Delete | unspecified|2808* -------------------------------------------------------2809* | ACL Allows | Permit | Deny * | Permit |2810* | DELETE_CHILD | | | |2811* -------------------------------------------------------2812* | ACL Denies | Permit | Deny | Deny |2813* | DELETE_CHILD | | | |2814* -------------------------------------------------------2815* | ACL specifies | | | |2816* | only allow | Permit | Deny * | Permit |2817* | write and | | | |2818* | execute | | | |2819* -------------------------------------------------------2820* | ACL denies | | | |2821* | write and | Permit | Deny | Deny |2822* | execute | | | |2823* -------------------------------------------------------2824* ^2825* |2826* Re. execute permission on the directory: if that's missing,2827* the vnode lookup of the target will fail before we get here.2828*2829* Re [*] in the table above: NFSv4 would normally Permit delete for2830* these two cells of the matrix.2831* See acl.h for notes on which ACE_... flags should be checked for which2832* operations. Specifically, the NFSv4 committee recommendation is in2833* conflict with the Windows interpretation of DENY ACEs, where DENY ACEs2834* should take precedence ahead of ALLOW ACEs.2835*2836* This implementation always consults the target object's ACL first.2837* If a DENY ACE is present on the target object that specifies ACE_DELETE,2838* delete access is denied. If an ALLOW ACE with ACE_DELETE is present on2839* the target object, access is allowed. If and only if no entries with2840* ACE_DELETE are present in the object's ACL, check the container's ACL2841* for entries with ACE_DELETE_CHILD.2842*2843* A summary of the logic implemented from the table above is as follows:2844*2845* First check for DENY ACEs that apply.2846* If either target or container has a deny, EACCES.2847*2848* Delete access can then be summarized as follows:2849* 1: The object to be deleted grants ACE_DELETE, or2850* 2: The containing directory grants ACE_DELETE_CHILD.2851* In a Windows system, that would be the end of the story.2852* In this system, (2) has some complications...2853* 2a: "sticky" bit on a directory adds restrictions, and2854* 2b: existing ACEs from previous versions of ZFS may2855* not carry ACE_DELETE_CHILD where they should, so we2856* also allow delete when ACE_WRITE_DATA is granted.2857*2858* Note: 2b is technically a work-around for a prior bug,2859* which hopefully can go away some day. For those who2860* no longer need the work around, and for testing, this2861* work-around is made conditional via the tunable:2862* zfs_write_implies_delete_child2863*/2864int2865zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr, zidmap_t *mnt_ns)2866{2867uint32_t wanted_dirperms;2868uint32_t dzp_working_mode = 0;2869uint32_t zp_working_mode = 0;2870int dzp_error, zp_error;2871boolean_t dzpcheck_privs;2872boolean_t zpcheck_privs;28732874if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))2875return (SET_ERROR(EPERM));28762877/*2878* Case 1:2879* If target object grants ACE_DELETE then we are done. This is2880* indicated by a return value of 0. For this case we don't worry2881* about the sticky bit because sticky only applies to the parent2882* directory and this is the child access result.2883*2884* If we encounter a DENY ACE here, we're also done (EACCES).2885* Note that if we hit a DENY ACE here (on the target) it should2886* take precedence over a DENY ACE on the container, so that when2887* we have more complete auditing support we will be able to2888* report an access failure against the specific target.2889* (This is part of why we're checking the target first.)2890*/2891zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,2892&zpcheck_privs, B_FALSE, cr, mnt_ns);2893if (zp_error == EACCES) {2894/* We hit a DENY ACE. */2895if (!zpcheck_privs)2896return (SET_ERROR(zp_error));2897return (secpolicy_vnode_remove(cr));28982899}2900if (zp_error == 0)2901return (0);29022903/*2904* Case 2:2905* If the containing directory grants ACE_DELETE_CHILD,2906* or we're in backward compatibility mode and the2907* containing directory has ACE_WRITE_DATA, allow.2908* Case 2b is handled with wanted_dirperms.2909*/2910wanted_dirperms = ACE_DELETE_CHILD;2911if (zfs_write_implies_delete_child)2912wanted_dirperms |= ACE_WRITE_DATA;2913dzp_error = zfs_zaccess_common(dzp, wanted_dirperms,2914&dzp_working_mode, &dzpcheck_privs, B_FALSE, cr, mnt_ns);2915if (dzp_error == EACCES) {2916/* We hit a DENY ACE. */2917if (!dzpcheck_privs)2918return (SET_ERROR(dzp_error));2919return (secpolicy_vnode_remove(cr));2920}29212922/*2923* Cases 2a, 2b (continued)2924*2925* Note: dzp_working_mode now contains any permissions2926* that were NOT granted. Therefore, if any of the2927* wanted_dirperms WERE granted, we will have:2928* dzp_working_mode != wanted_dirperms2929* We're really asking if ANY of those permissions2930* were granted, and if so, grant delete access.2931*/2932if (dzp_working_mode != wanted_dirperms)2933dzp_error = 0;29342935/*2936* dzp_error is 0 if the container granted us permissions to "modify".2937* If we do not have permission via one or more ACEs, our current2938* privileges may still permit us to modify the container.2939*2940* dzpcheck_privs is false when i.e. the FS is read-only.2941* Otherwise, do privilege checks for the container.2942*/2943if (dzp_error != 0 && dzpcheck_privs) {2944uid_t owner;29452946/*2947* The secpolicy call needs the requested access and2948* the current access mode of the container, but it2949* only knows about Unix-style modes (VEXEC, VWRITE),2950* so this must condense the fine-grained ACE bits into2951* Unix modes.2952*2953* The VEXEC flag is easy, because we know that has2954* always been checked before we get here (during the2955* lookup of the target vnode). The container has not2956* granted us permissions to "modify", so we do not set2957* the VWRITE flag in the current access mode.2958*/2959owner = zfs_fuid_map_id(ZTOZSB(dzp),2960KUID_TO_SUID(ZTOI(dzp)->i_uid), cr, ZFS_OWNER);2961dzp_error = secpolicy_vnode_access2(cr, ZTOI(dzp),2962owner, S_IXUSR, S_IWUSR|S_IXUSR);2963}2964if (dzp_error != 0) {2965/*2966* Note: We may have dzp_error = -1 here (from2967* zfs_zacess_common). Don't return that.2968*/2969return (SET_ERROR(EACCES));2970}297129722973/*2974* At this point, we know that the directory permissions allow2975* us to modify, but we still need to check for the additional2976* restrictions that apply when the "sticky bit" is set.2977*2978* Yes, zfs_sticky_remove_access() also checks this bit, but2979* checking it here and skipping the call below is nice when2980* you're watching all of this with dtrace.2981*/2982if ((dzp->z_mode & S_ISVTX) == 0)2983return (0);29842985/*2986* zfs_sticky_remove_access will succeed if:2987* 1. The sticky bit is absent.2988* 2. We pass the sticky bit restrictions.2989* 3. We have privileges that always allow file removal.2990*/2991return (zfs_sticky_remove_access(dzp, zp, cr));2992}29932994int2995zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,2996znode_t *tzp, cred_t *cr, zidmap_t *mnt_ns)2997{2998int add_perm;2999int error;30003001if (szp->z_pflags & ZFS_AV_QUARANTINED)3002return (SET_ERROR(EACCES));30033004add_perm = S_ISDIR(ZTOI(szp)->i_mode) ?3005ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;30063007/*3008* Rename permissions are combination of delete permission +3009* add file/subdir permission.3010*/30113012/*3013* first make sure we do the delete portion.3014*3015* If that succeeds then check for add_file/add_subdir permissions3016*/30173018if ((error = zfs_zaccess_delete(sdzp, szp, cr, mnt_ns)))3019return (error);30203021/*3022* If we have a tzp, see if we can delete it?3023*/3024if (tzp) {3025if ((error = zfs_zaccess_delete(tdzp, tzp, cr, mnt_ns)))3026return (error);3027}30283029/*3030* Now check for add permissions3031*/3032error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr, mnt_ns);30333034return (error);3035}303630373038