Path: blob/main/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
48774 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/21/*22* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.23* Copyright (c) 2012, 2018 by Delphix. All rights reserved.24*/2526/* Portions Copyright 2010 Robert Milkowski */2728#include <sys/types.h>29#include <sys/param.h>30#include <sys/sysmacros.h>31#include <sys/kmem.h>32#include <sys/pathname.h>33#include <sys/vnode.h>34#include <sys/vfs.h>35#include <sys/mntent.h>36#include <sys/cmn_err.h>37#include <sys/zfs_znode.h>38#include <sys/zfs_vnops.h>39#include <sys/zfs_dir.h>40#include <sys/zil.h>41#include <sys/fs/zfs.h>42#include <sys/dmu.h>43#include <sys/dsl_prop.h>44#include <sys/dsl_dataset.h>45#include <sys/dsl_deleg.h>46#include <sys/spa.h>47#include <sys/zap.h>48#include <sys/sa.h>49#include <sys/sa_impl.h>50#include <sys/policy.h>51#include <sys/atomic.h>52#include <sys/zfs_ioctl.h>53#include <sys/zfs_ctldir.h>54#include <sys/zfs_fuid.h>55#include <sys/zfs_quota.h>56#include <sys/sunddi.h>57#include <sys/dmu_objset.h>58#include <sys/dsl_dir.h>59#include <sys/objlist.h>60#include <sys/zfeature.h>61#include <sys/zpl.h>62#include <linux/vfs_compat.h>63#include <linux/fs.h>64#include "zfs_comutil.h"6566enum {67TOKEN_RO,68TOKEN_RW,69TOKEN_SETUID,70TOKEN_NOSETUID,71TOKEN_EXEC,72TOKEN_NOEXEC,73TOKEN_DEVICES,74TOKEN_NODEVICES,75TOKEN_DIRXATTR,76TOKEN_SAXATTR,77TOKEN_XATTR,78TOKEN_NOXATTR,79TOKEN_ATIME,80TOKEN_NOATIME,81TOKEN_RELATIME,82TOKEN_NORELATIME,83TOKEN_NBMAND,84TOKEN_NONBMAND,85TOKEN_MNTPOINT,86TOKEN_LAST,87};8889static const match_table_t zpl_tokens = {90{ TOKEN_RO, MNTOPT_RO },91{ TOKEN_RW, MNTOPT_RW },92{ TOKEN_SETUID, MNTOPT_SETUID },93{ TOKEN_NOSETUID, MNTOPT_NOSETUID },94{ TOKEN_EXEC, MNTOPT_EXEC },95{ TOKEN_NOEXEC, MNTOPT_NOEXEC },96{ TOKEN_DEVICES, MNTOPT_DEVICES },97{ TOKEN_NODEVICES, MNTOPT_NODEVICES },98{ TOKEN_DIRXATTR, MNTOPT_DIRXATTR },99{ TOKEN_SAXATTR, MNTOPT_SAXATTR },100{ TOKEN_XATTR, MNTOPT_XATTR },101{ TOKEN_NOXATTR, MNTOPT_NOXATTR },102{ TOKEN_ATIME, MNTOPT_ATIME },103{ TOKEN_NOATIME, MNTOPT_NOATIME },104{ TOKEN_RELATIME, MNTOPT_RELATIME },105{ TOKEN_NORELATIME, MNTOPT_NORELATIME },106{ TOKEN_NBMAND, MNTOPT_NBMAND },107{ TOKEN_NONBMAND, MNTOPT_NONBMAND },108{ TOKEN_MNTPOINT, MNTOPT_MNTPOINT "=%s" },109{ TOKEN_LAST, NULL },110};111112static void113zfsvfs_vfs_free(vfs_t *vfsp)114{115if (vfsp != NULL) {116if (vfsp->vfs_mntpoint != NULL)117kmem_strfree(vfsp->vfs_mntpoint);118mutex_destroy(&vfsp->vfs_mntpt_lock);119kmem_free(vfsp, sizeof (vfs_t));120}121}122123static int124zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp)125{126switch (token) {127case TOKEN_RO:128vfsp->vfs_readonly = B_TRUE;129vfsp->vfs_do_readonly = B_TRUE;130break;131case TOKEN_RW:132vfsp->vfs_readonly = B_FALSE;133vfsp->vfs_do_readonly = B_TRUE;134break;135case TOKEN_SETUID:136vfsp->vfs_setuid = B_TRUE;137vfsp->vfs_do_setuid = B_TRUE;138break;139case TOKEN_NOSETUID:140vfsp->vfs_setuid = B_FALSE;141vfsp->vfs_do_setuid = B_TRUE;142break;143case TOKEN_EXEC:144vfsp->vfs_exec = B_TRUE;145vfsp->vfs_do_exec = B_TRUE;146break;147case TOKEN_NOEXEC:148vfsp->vfs_exec = B_FALSE;149vfsp->vfs_do_exec = B_TRUE;150break;151case TOKEN_DEVICES:152vfsp->vfs_devices = B_TRUE;153vfsp->vfs_do_devices = B_TRUE;154break;155case TOKEN_NODEVICES:156vfsp->vfs_devices = B_FALSE;157vfsp->vfs_do_devices = B_TRUE;158break;159case TOKEN_DIRXATTR:160vfsp->vfs_xattr = ZFS_XATTR_DIR;161vfsp->vfs_do_xattr = B_TRUE;162break;163case TOKEN_SAXATTR:164vfsp->vfs_xattr = ZFS_XATTR_SA;165vfsp->vfs_do_xattr = B_TRUE;166break;167case TOKEN_XATTR:168vfsp->vfs_xattr = ZFS_XATTR_SA;169vfsp->vfs_do_xattr = B_TRUE;170break;171case TOKEN_NOXATTR:172vfsp->vfs_xattr = ZFS_XATTR_OFF;173vfsp->vfs_do_xattr = B_TRUE;174break;175case TOKEN_ATIME:176vfsp->vfs_atime = B_TRUE;177vfsp->vfs_do_atime = B_TRUE;178break;179case TOKEN_NOATIME:180vfsp->vfs_atime = B_FALSE;181vfsp->vfs_do_atime = B_TRUE;182break;183case TOKEN_RELATIME:184vfsp->vfs_relatime = B_TRUE;185vfsp->vfs_do_relatime = B_TRUE;186break;187case TOKEN_NORELATIME:188vfsp->vfs_relatime = B_FALSE;189vfsp->vfs_do_relatime = B_TRUE;190break;191case TOKEN_NBMAND:192vfsp->vfs_nbmand = B_TRUE;193vfsp->vfs_do_nbmand = B_TRUE;194break;195case TOKEN_NONBMAND:196vfsp->vfs_nbmand = B_FALSE;197vfsp->vfs_do_nbmand = B_TRUE;198break;199case TOKEN_MNTPOINT:200if (vfsp->vfs_mntpoint != NULL)201kmem_strfree(vfsp->vfs_mntpoint);202vfsp->vfs_mntpoint = match_strdup(&args[0]);203if (vfsp->vfs_mntpoint == NULL)204return (SET_ERROR(ENOMEM));205break;206default:207break;208}209210return (0);211}212213/*214* Parse the raw mntopts and return a vfs_t describing the options.215*/216static int217zfsvfs_parse_options(char *mntopts, vfs_t **vfsp)218{219vfs_t *tmp_vfsp;220int error;221222tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);223mutex_init(&tmp_vfsp->vfs_mntpt_lock, NULL, MUTEX_DEFAULT, NULL);224225if (mntopts != NULL) {226substring_t args[MAX_OPT_ARGS];227char *tmp_mntopts, *p, *t;228int token;229230tmp_mntopts = t = kmem_strdup(mntopts);231if (tmp_mntopts == NULL)232return (SET_ERROR(ENOMEM));233234while ((p = strsep(&t, ",")) != NULL) {235if (!*p)236continue;237238args[0].to = args[0].from = NULL;239token = match_token(p, zpl_tokens, args);240error = zfsvfs_parse_option(p, token, args, tmp_vfsp);241if (error) {242kmem_strfree(tmp_mntopts);243zfsvfs_vfs_free(tmp_vfsp);244return (error);245}246}247248kmem_strfree(tmp_mntopts);249}250251*vfsp = tmp_vfsp;252253return (0);254}255256boolean_t257zfs_is_readonly(zfsvfs_t *zfsvfs)258{259return (!!(zfsvfs->z_sb->s_flags & SB_RDONLY));260}261262int263zfs_sync(struct super_block *sb, int wait, cred_t *cr)264{265(void) cr;266zfsvfs_t *zfsvfs = sb->s_fs_info;267ASSERT3P(zfsvfs, !=, NULL);268269/*270* Semantically, the only requirement is that the sync be initiated.271* The DMU syncs out txgs frequently, so there's nothing to do.272*/273if (!wait)274return (0);275276int err = zfs_enter(zfsvfs, FTAG);277if (err != 0)278return (err);279280/*281* Sync any pending writes, but do not block if the pool is suspended.282* This is to help with shutting down with pools suspended, as we don't283* want to block in that case.284*/285err = zil_commit_flags(zfsvfs->z_log, 0, ZIL_COMMIT_NOW);286zfs_exit(zfsvfs, FTAG);287288return (err);289}290291static void292atime_changed_cb(void *arg, uint64_t newval)293{294zfsvfs_t *zfsvfs = arg;295struct super_block *sb = zfsvfs->z_sb;296297if (sb == NULL)298return;299/*300* Update SB_NOATIME bit in VFS super block. Since atime update is301* determined by atime_needs_update(), atime_needs_update() needs to302* return false if atime is turned off, and not unconditionally return303* false if atime is turned on.304*/305if (newval)306sb->s_flags &= ~SB_NOATIME;307else308sb->s_flags |= SB_NOATIME;309}310311static void312relatime_changed_cb(void *arg, uint64_t newval)313{314((zfsvfs_t *)arg)->z_relatime = newval;315}316317static void318xattr_changed_cb(void *arg, uint64_t newval)319{320zfsvfs_t *zfsvfs = arg;321322if (newval == ZFS_XATTR_OFF) {323zfsvfs->z_flags &= ~ZSB_XATTR;324} else {325zfsvfs->z_flags |= ZSB_XATTR;326327if (newval == ZFS_XATTR_SA)328zfsvfs->z_xattr_sa = B_TRUE;329else330zfsvfs->z_xattr_sa = B_FALSE;331}332}333334static void335acltype_changed_cb(void *arg, uint64_t newval)336{337zfsvfs_t *zfsvfs = arg;338339switch (newval) {340case ZFS_ACLTYPE_NFSV4:341case ZFS_ACLTYPE_OFF:342zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;343zfsvfs->z_sb->s_flags &= ~SB_POSIXACL;344break;345case ZFS_ACLTYPE_POSIX:346#ifdef CONFIG_FS_POSIX_ACL347zfsvfs->z_acl_type = ZFS_ACLTYPE_POSIX;348zfsvfs->z_sb->s_flags |= SB_POSIXACL;349#else350zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;351zfsvfs->z_sb->s_flags &= ~SB_POSIXACL;352#endif /* CONFIG_FS_POSIX_ACL */353break;354default:355break;356}357}358359static void360blksz_changed_cb(void *arg, uint64_t newval)361{362zfsvfs_t *zfsvfs = arg;363ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));364ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);365ASSERT(ISP2(newval));366367zfsvfs->z_max_blksz = newval;368}369370static void371readonly_changed_cb(void *arg, uint64_t newval)372{373zfsvfs_t *zfsvfs = arg;374struct super_block *sb = zfsvfs->z_sb;375376if (sb == NULL)377return;378379if (newval)380sb->s_flags |= SB_RDONLY;381else382sb->s_flags &= ~SB_RDONLY;383}384385static void386devices_changed_cb(void *arg, uint64_t newval)387{388}389390static void391setuid_changed_cb(void *arg, uint64_t newval)392{393}394395static void396exec_changed_cb(void *arg, uint64_t newval)397{398}399400static void401nbmand_changed_cb(void *arg, uint64_t newval)402{403zfsvfs_t *zfsvfs = arg;404struct super_block *sb = zfsvfs->z_sb;405406if (sb == NULL)407return;408409if (newval == TRUE)410sb->s_flags |= SB_MANDLOCK;411else412sb->s_flags &= ~SB_MANDLOCK;413}414415static void416snapdir_changed_cb(void *arg, uint64_t newval)417{418((zfsvfs_t *)arg)->z_show_ctldir = newval;419}420421static void422acl_mode_changed_cb(void *arg, uint64_t newval)423{424zfsvfs_t *zfsvfs = arg;425426zfsvfs->z_acl_mode = newval;427}428429static void430acl_inherit_changed_cb(void *arg, uint64_t newval)431{432((zfsvfs_t *)arg)->z_acl_inherit = newval;433}434435static void436longname_changed_cb(void *arg, uint64_t newval)437{438((zfsvfs_t *)arg)->z_longname = newval;439}440441static int442zfs_register_callbacks(vfs_t *vfsp)443{444struct dsl_dataset *ds = NULL;445objset_t *os = NULL;446zfsvfs_t *zfsvfs = NULL;447int error = 0;448449ASSERT(vfsp);450zfsvfs = vfsp->vfs_data;451ASSERT(zfsvfs);452os = zfsvfs->z_os;453454/*455* The act of registering our callbacks will destroy any mount456* options we may have. In order to enable temporary overrides457* of mount options, we stash away the current values and458* restore them after we register the callbacks.459*/460if (zfs_is_readonly(zfsvfs) || !spa_writeable(dmu_objset_spa(os))) {461vfsp->vfs_do_readonly = B_TRUE;462vfsp->vfs_readonly = B_TRUE;463}464465/*466* Register property callbacks.467*468* It would probably be fine to just check for i/o error from469* the first prop_register(), but I guess I like to go470* overboard...471*/472ds = dmu_objset_ds(os);473dsl_pool_config_enter(dmu_objset_pool(os), FTAG);474error = dsl_prop_register(ds,475zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);476error = error ? error : dsl_prop_register(ds,477zfs_prop_to_name(ZFS_PROP_RELATIME), relatime_changed_cb, zfsvfs);478error = error ? error : dsl_prop_register(ds,479zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);480error = error ? error : dsl_prop_register(ds,481zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);482error = error ? error : dsl_prop_register(ds,483zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);484error = error ? error : dsl_prop_register(ds,485zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);486error = error ? error : dsl_prop_register(ds,487zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);488error = error ? error : dsl_prop_register(ds,489zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);490error = error ? error : dsl_prop_register(ds,491zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);492error = error ? error : dsl_prop_register(ds,493zfs_prop_to_name(ZFS_PROP_ACLTYPE), acltype_changed_cb, zfsvfs);494error = error ? error : dsl_prop_register(ds,495zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);496error = error ? error : dsl_prop_register(ds,497zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,498zfsvfs);499error = error ? error : dsl_prop_register(ds,500zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zfsvfs);501error = error ? error : dsl_prop_register(ds,502zfs_prop_to_name(ZFS_PROP_LONGNAME), longname_changed_cb, zfsvfs);503dsl_pool_config_exit(dmu_objset_pool(os), FTAG);504if (error)505goto unregister;506507/*508* Invoke our callbacks to restore temporary mount options.509*/510if (vfsp->vfs_do_readonly)511readonly_changed_cb(zfsvfs, vfsp->vfs_readonly);512if (vfsp->vfs_do_setuid)513setuid_changed_cb(zfsvfs, vfsp->vfs_setuid);514if (vfsp->vfs_do_exec)515exec_changed_cb(zfsvfs, vfsp->vfs_exec);516if (vfsp->vfs_do_devices)517devices_changed_cb(zfsvfs, vfsp->vfs_devices);518if (vfsp->vfs_do_xattr)519xattr_changed_cb(zfsvfs, vfsp->vfs_xattr);520if (vfsp->vfs_do_atime)521atime_changed_cb(zfsvfs, vfsp->vfs_atime);522if (vfsp->vfs_do_relatime)523relatime_changed_cb(zfsvfs, vfsp->vfs_relatime);524if (vfsp->vfs_do_nbmand)525nbmand_changed_cb(zfsvfs, vfsp->vfs_nbmand);526527return (0);528529unregister:530dsl_prop_unregister_all(ds, zfsvfs);531return (error);532}533534/*535* Takes a dataset, a property, a value and that value's setpoint as536* found in the ZAP. Checks if the property has been changed in the vfs.537* If so, val and setpoint will be overwritten with updated content.538* Otherwise, they are left unchanged.539*/540int541zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,542char *setpoint)543{544int error;545zfsvfs_t *zfvp;546vfs_t *vfsp;547objset_t *os;548uint64_t tmp = *val;549550error = dmu_objset_from_ds(ds, &os);551if (error != 0)552return (error);553554if (dmu_objset_type(os) != DMU_OST_ZFS)555return (EINVAL);556557mutex_enter(&os->os_user_ptr_lock);558zfvp = dmu_objset_get_user(os);559mutex_exit(&os->os_user_ptr_lock);560if (zfvp == NULL)561return (ESRCH);562563vfsp = zfvp->z_vfs;564565switch (zfs_prop) {566case ZFS_PROP_ATIME:567if (vfsp->vfs_do_atime)568tmp = vfsp->vfs_atime;569break;570case ZFS_PROP_RELATIME:571if (vfsp->vfs_do_relatime)572tmp = vfsp->vfs_relatime;573break;574case ZFS_PROP_DEVICES:575if (vfsp->vfs_do_devices)576tmp = vfsp->vfs_devices;577break;578case ZFS_PROP_EXEC:579if (vfsp->vfs_do_exec)580tmp = vfsp->vfs_exec;581break;582case ZFS_PROP_SETUID:583if (vfsp->vfs_do_setuid)584tmp = vfsp->vfs_setuid;585break;586case ZFS_PROP_READONLY:587if (vfsp->vfs_do_readonly)588tmp = vfsp->vfs_readonly;589break;590case ZFS_PROP_XATTR:591if (vfsp->vfs_do_xattr)592tmp = vfsp->vfs_xattr;593break;594case ZFS_PROP_NBMAND:595if (vfsp->vfs_do_nbmand)596tmp = vfsp->vfs_nbmand;597break;598default:599return (ENOENT);600}601602if (tmp != *val) {603if (setpoint)604(void) strcpy(setpoint, "temporary");605*val = tmp;606}607return (0);608}609610/*611* Associate this zfsvfs with the given objset, which must be owned.612* This will cache a bunch of on-disk state from the objset in the613* zfsvfs.614*/615static int616zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)617{618int error;619uint64_t val;620621zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;622zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;623zfsvfs->z_os = os;624625error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);626if (error != 0)627return (error);628if (zfsvfs->z_version >629zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {630(void) printk("Can't mount a version %lld file system "631"on a version %lld pool\n. Pool must be upgraded to mount "632"this file system.\n", (u_longlong_t)zfsvfs->z_version,633(u_longlong_t)spa_version(dmu_objset_spa(os)));634return (SET_ERROR(ENOTSUP));635}636error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);637if (error != 0)638return (error);639zfsvfs->z_norm = (int)val;640641error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);642if (error != 0)643return (error);644zfsvfs->z_utf8 = (val != 0);645646error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);647if (error != 0)648return (error);649zfsvfs->z_case = (uint_t)val;650651if ((error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val)) != 0)652return (error);653zfsvfs->z_acl_type = (uint_t)val;654655/*656* Fold case on file systems that are always or sometimes case657* insensitive.658*/659if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||660zfsvfs->z_case == ZFS_CASE_MIXED)661zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;662663zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);664zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);665666uint64_t sa_obj = 0;667if (zfsvfs->z_use_sa) {668/* should either have both of these objects or none */669error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,670&sa_obj);671if (error != 0)672return (error);673674error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);675if ((error == 0) && (val == ZFS_XATTR_SA))676zfsvfs->z_xattr_sa = B_TRUE;677}678679error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSERQUOTA,680&zfsvfs->z_defaultuserquota);681if (error != 0)682return (error);683684error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPQUOTA,685&zfsvfs->z_defaultgroupquota);686if (error != 0)687return (error);688689error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTQUOTA,690&zfsvfs->z_defaultprojectquota);691if (error != 0)692return (error);693694error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSEROBJQUOTA,695&zfsvfs->z_defaultuserobjquota);696if (error != 0)697return (error);698699error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPOBJQUOTA,700&zfsvfs->z_defaultgroupobjquota);701if (error != 0)702return (error);703704error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTOBJQUOTA,705&zfsvfs->z_defaultprojectobjquota);706if (error != 0)707return (error);708709error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,710&zfsvfs->z_root);711if (error != 0)712return (error);713ASSERT(zfsvfs->z_root != 0);714715error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,716&zfsvfs->z_unlinkedobj);717if (error != 0)718return (error);719720error = zap_lookup(os, MASTER_NODE_OBJ,721zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],7228, 1, &zfsvfs->z_userquota_obj);723if (error == ENOENT)724zfsvfs->z_userquota_obj = 0;725else if (error != 0)726return (error);727728error = zap_lookup(os, MASTER_NODE_OBJ,729zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],7308, 1, &zfsvfs->z_groupquota_obj);731if (error == ENOENT)732zfsvfs->z_groupquota_obj = 0;733else if (error != 0)734return (error);735736error = zap_lookup(os, MASTER_NODE_OBJ,737zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],7388, 1, &zfsvfs->z_projectquota_obj);739if (error == ENOENT)740zfsvfs->z_projectquota_obj = 0;741else if (error != 0)742return (error);743744error = zap_lookup(os, MASTER_NODE_OBJ,745zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],7468, 1, &zfsvfs->z_userobjquota_obj);747if (error == ENOENT)748zfsvfs->z_userobjquota_obj = 0;749else if (error != 0)750return (error);751752error = zap_lookup(os, MASTER_NODE_OBJ,753zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],7548, 1, &zfsvfs->z_groupobjquota_obj);755if (error == ENOENT)756zfsvfs->z_groupobjquota_obj = 0;757else if (error != 0)758return (error);759760error = zap_lookup(os, MASTER_NODE_OBJ,761zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],7628, 1, &zfsvfs->z_projectobjquota_obj);763if (error == ENOENT)764zfsvfs->z_projectobjquota_obj = 0;765else if (error != 0)766return (error);767768error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,769&zfsvfs->z_fuid_obj);770if (error == ENOENT)771zfsvfs->z_fuid_obj = 0;772else if (error != 0)773return (error);774775error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,776&zfsvfs->z_shares_dir);777if (error == ENOENT)778zfsvfs->z_shares_dir = 0;779else if (error != 0)780return (error);781782error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,783&zfsvfs->z_attr_table);784if (error != 0)785return (error);786787if (zfsvfs->z_version >= ZPL_VERSION_SA)788sa_register_update_callback(os, zfs_sa_upgrade);789790return (0);791}792793int794zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)795{796objset_t *os;797zfsvfs_t *zfsvfs;798int error;799boolean_t ro = (readonly || (strchr(osname, '@') != NULL));800801zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);802803error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, &os);804if (error != 0) {805kmem_free(zfsvfs, sizeof (zfsvfs_t));806return (error);807}808809error = zfsvfs_create_impl(zfvp, zfsvfs, os);810811return (error);812}813814815/*816* Note: zfsvfs is assumed to be malloc'd, and will be freed by this function817* on a failure. Do not pass in a statically allocated zfsvfs.818*/819int820zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)821{822int error;823824zfsvfs->z_vfs = NULL;825zfsvfs->z_sb = NULL;826zfsvfs->z_parent = zfsvfs;827828mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);829mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);830list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),831offsetof(znode_t, z_link_node));832ZFS_TEARDOWN_INIT(zfsvfs);833rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);834rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);835836int size = MIN(1 << (highbit64(zfs_object_mutex_size) - 1),837ZFS_OBJ_MTX_MAX);838zfsvfs->z_hold_size = size;839zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,840KM_SLEEP);841zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);842for (int i = 0; i != size; i++) {843avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,844sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));845mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);846}847848error = zfsvfs_init(zfsvfs, os);849if (error != 0) {850dmu_objset_disown(os, B_TRUE, zfsvfs);851*zfvp = NULL;852zfsvfs_free(zfsvfs);853return (error);854}855856zfsvfs->z_drain_task = TASKQID_INVALID;857zfsvfs->z_draining = B_FALSE;858zfsvfs->z_drain_cancel = B_TRUE;859860*zfvp = zfsvfs;861return (0);862}863864static int865zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)866{867int error;868boolean_t readonly = zfs_is_readonly(zfsvfs);869870error = zfs_register_callbacks(zfsvfs->z_vfs);871if (error)872return (error);873874/*875* If we are not mounting (ie: online recv), then we don't876* have to worry about replaying the log as we blocked all877* operations out since we closed the ZIL.878*/879if (mounting) {880ASSERT0P(zfsvfs->z_kstat.dk_kstats);881error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);882if (error)883return (error);884zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data,885&zfsvfs->z_kstat.dk_zil_sums);886887/*888* During replay we remove the read only flag to889* allow replays to succeed.890*/891if (readonly != 0) {892readonly_changed_cb(zfsvfs, B_FALSE);893} else {894zap_stats_t zs;895if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,896&zs) == 0) {897dataset_kstats_update_nunlinks_kstat(898&zfsvfs->z_kstat, zs.zs_num_entries);899dprintf_ds(zfsvfs->z_os->os_dsl_dataset,900"num_entries in unlinked set: %llu",901zs.zs_num_entries);902}903zfs_unlinked_drain(zfsvfs);904dsl_dir_t *dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;905dd->dd_activity_cancelled = B_FALSE;906}907908/*909* Parse and replay the intent log.910*911* Because of ziltest, this must be done after912* zfs_unlinked_drain(). (Further note: ziltest913* doesn't use readonly mounts, where914* zfs_unlinked_drain() isn't called.) This is because915* ziltest causes spa_sync() to think it's committed,916* but actually it is not, so the intent log contains917* many txg's worth of changes.918*919* In particular, if object N is in the unlinked set in920* the last txg to actually sync, then it could be921* actually freed in a later txg and then reallocated922* in a yet later txg. This would write a "create923* object N" record to the intent log. Normally, this924* would be fine because the spa_sync() would have925* written out the fact that object N is free, before926* we could write the "create object N" intent log927* record.928*929* But when we are in ziltest mode, we advance the "open930* txg" without actually spa_sync()-ing the changes to931* disk. So we would see that object N is still932* allocated and in the unlinked set, and there is an933* intent log record saying to allocate it.934*/935if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {936if (zil_replay_disable) {937zil_destroy(zfsvfs->z_log, B_FALSE);938} else {939zfsvfs->z_replay = B_TRUE;940zil_replay(zfsvfs->z_os, zfsvfs,941zfs_replay_vector);942zfsvfs->z_replay = B_FALSE;943}944}945946/* restore readonly bit */947if (readonly != 0)948readonly_changed_cb(zfsvfs, B_TRUE);949} else {950ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL);951zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data,952&zfsvfs->z_kstat.dk_zil_sums);953}954955/*956* Set the objset user_ptr to track its zfsvfs.957*/958mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);959dmu_objset_set_user(zfsvfs->z_os, zfsvfs);960mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);961962return (0);963}964965void966zfsvfs_free(zfsvfs_t *zfsvfs)967{968int i, size = zfsvfs->z_hold_size;969970zfs_fuid_destroy(zfsvfs);971972mutex_destroy(&zfsvfs->z_znodes_lock);973mutex_destroy(&zfsvfs->z_lock);974list_destroy(&zfsvfs->z_all_znodes);975ZFS_TEARDOWN_DESTROY(zfsvfs);976rw_destroy(&zfsvfs->z_teardown_inactive_lock);977rw_destroy(&zfsvfs->z_fuid_lock);978for (i = 0; i != size; i++) {979avl_destroy(&zfsvfs->z_hold_trees[i]);980mutex_destroy(&zfsvfs->z_hold_locks[i]);981}982vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);983vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);984zfsvfs_vfs_free(zfsvfs->z_vfs);985dataset_kstats_destroy(&zfsvfs->z_kstat);986kmem_free(zfsvfs, sizeof (zfsvfs_t));987}988989static void990zfs_set_fuid_feature(zfsvfs_t *zfsvfs)991{992zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);993zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);994}995996static void997zfs_unregister_callbacks(zfsvfs_t *zfsvfs)998{999objset_t *os = zfsvfs->z_os;10001001if (!dmu_objset_is_snapshot(os))1002dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);1003}10041005#ifdef HAVE_MLSLABEL1006/*1007* Check that the hex label string is appropriate for the dataset being1008* mounted into the global_zone proper.1009*1010* Return an error if the hex label string is not default or1011* admin_low/admin_high. For admin_low labels, the corresponding1012* dataset must be readonly.1013*/1014int1015zfs_check_global_label(const char *dsname, const char *hexsl)1016{1017if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)1018return (0);1019if (strcasecmp(hexsl, ADMIN_HIGH) == 0)1020return (0);1021if (strcasecmp(hexsl, ADMIN_LOW) == 0) {1022/* must be readonly */1023uint64_t rdonly;10241025if (dsl_prop_get_integer(dsname,1026zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))1027return (SET_ERROR(EACCES));1028return (rdonly ? 0 : SET_ERROR(EACCES));1029}1030return (SET_ERROR(EACCES));1031}1032#endif /* HAVE_MLSLABEL */10331034static int1035zfs_statfs_project(zfsvfs_t *zfsvfs, znode_t *zp, struct kstatfs *statp,1036uint32_t bshift)1037{1038char buf[20 + DMU_OBJACCT_PREFIX_LEN];1039uint64_t offset = DMU_OBJACCT_PREFIX_LEN;1040uint64_t quota;1041uint64_t used;1042int err;10431044strlcpy(buf, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN + 1);1045err = zfs_id_to_fuidstr(zfsvfs, NULL, zp->z_projid, buf + offset,1046sizeof (buf) - offset, B_FALSE);1047if (err)1048return (err);10491050if (zfsvfs->z_projectquota_obj == 0) {1051if (zfsvfs->z_defaultprojectquota == 0)1052goto objs;1053quota = zfsvfs->z_defaultprojectquota;1054} else {1055err = zap_lookup(zfsvfs->z_os, zfsvfs->z_projectquota_obj,1056buf + offset, 8, 1, "a);1057if (err && (quota = zfsvfs->z_defaultprojectquota) == 0) {1058if (err == ENOENT)1059goto objs;1060return (err);1061}1062}10631064err = zap_lookup(zfsvfs->z_os, DMU_PROJECTUSED_OBJECT,1065buf + offset, 8, 1, &used);1066if (unlikely(err == ENOENT)) {1067uint32_t blksize;1068u_longlong_t nblocks;10691070/*1071* Quota accounting is async, so it is possible race case.1072* There is at least one object with the given project ID.1073*/1074sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);1075if (unlikely(zp->z_blksz == 0))1076blksize = zfsvfs->z_max_blksz;10771078used = blksize * nblocks;1079} else if (err) {1080return (err);1081}10821083statp->f_blocks = quota >> bshift;1084statp->f_bfree = (quota > used) ? ((quota - used) >> bshift) : 0;1085statp->f_bavail = statp->f_bfree;10861087objs:10881089if (zfsvfs->z_projectobjquota_obj == 0) {1090if (zfsvfs->z_defaultprojectobjquota == 0)1091return (0);1092quota = zfsvfs->z_defaultprojectobjquota;1093} else {1094err = zap_lookup(zfsvfs->z_os, zfsvfs->z_projectobjquota_obj,1095buf + offset, 8, 1, "a);1096if (err && (quota = zfsvfs->z_defaultprojectobjquota) == 0) {1097if (err == ENOENT)1098return (0);1099return (err);1100}1101}110211031104err = zap_lookup(zfsvfs->z_os, DMU_PROJECTUSED_OBJECT,1105buf, 8, 1, &used);1106if (unlikely(err == ENOENT)) {1107/*1108* Quota accounting is async, so it is possible race case.1109* There is at least one object with the given project ID.1110*/1111used = 1;1112} else if (err) {1113return (err);1114}11151116statp->f_files = quota;1117statp->f_ffree = (quota > used) ? (quota - used) : 0;11181119return (0);1120}11211122int1123zfs_statvfs(struct inode *ip, struct kstatfs *statp)1124{1125zfsvfs_t *zfsvfs = ITOZSB(ip);1126uint64_t refdbytes, availbytes, usedobjs, availobjs;1127int err = 0;11281129if ((err = zfs_enter(zfsvfs, FTAG)) != 0)1130return (err);11311132dmu_objset_space(zfsvfs->z_os,1133&refdbytes, &availbytes, &usedobjs, &availobjs);11341135uint64_t fsid = dmu_objset_fsid_guid(zfsvfs->z_os);1136/*1137* The underlying storage pool actually uses multiple block1138* size. Under Solaris frsize (fragment size) is reported as1139* the smallest block size we support, and bsize (block size)1140* as the filesystem's maximum block size. Unfortunately,1141* under Linux the fragment size and block size are often used1142* interchangeably. Thus we are forced to report both of them1143* as the filesystem's maximum block size.1144*/1145statp->f_frsize = zfsvfs->z_max_blksz;1146statp->f_bsize = zfsvfs->z_max_blksz;1147uint32_t bshift = fls(statp->f_bsize) - 1;11481149/*1150* The following report "total" blocks of various kinds in1151* the file system, but reported in terms of f_bsize - the1152* "preferred" size.1153*/11541155/* Round up so we never have a filesystem using 0 blocks. */1156refdbytes = P2ROUNDUP(refdbytes, statp->f_bsize);1157statp->f_blocks = (refdbytes + availbytes) >> bshift;1158statp->f_bfree = availbytes >> bshift;1159statp->f_bavail = statp->f_bfree; /* no root reservation */11601161/*1162* statvfs() should really be called statufs(), because it assumes1163* static metadata. ZFS doesn't preallocate files, so the best1164* we can do is report the max that could possibly fit in f_files,1165* and that minus the number actually used in f_ffree.1166* For f_ffree, report the smaller of the number of objects available1167* and the number of blocks (each object will take at least a block).1168*/1169statp->f_ffree = MIN(availobjs, availbytes >> DNODE_SHIFT);1170statp->f_files = statp->f_ffree + usedobjs;1171statp->f_fsid.val[0] = (uint32_t)fsid;1172statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);1173statp->f_type = ZFS_SUPER_MAGIC;1174statp->f_namelen =1175zfsvfs->z_longname ? (ZAP_MAXNAMELEN_NEW - 1) : (MAXNAMELEN - 1);11761177/*1178* We have all of 40 characters to stuff a string here.1179* Is there anything useful we could/should provide?1180*/1181memset(statp->f_spare, 0, sizeof (statp->f_spare));11821183if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&1184dmu_objset_projectquota_present(zfsvfs->z_os)) {1185znode_t *zp = ITOZ(ip);11861187if (zp->z_pflags & ZFS_PROJINHERIT && zp->z_projid &&1188zpl_is_valid_projid(zp->z_projid))1189err = zfs_statfs_project(zfsvfs, zp, statp, bshift);1190}11911192zfs_exit(zfsvfs, FTAG);1193return (err);1194}11951196static int1197zfs_root(zfsvfs_t *zfsvfs, struct inode **ipp)1198{1199znode_t *rootzp;1200int error;12011202if ((error = zfs_enter(zfsvfs, FTAG)) != 0)1203return (error);12041205error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);1206if (error == 0)1207*ipp = ZTOI(rootzp);12081209zfs_exit(zfsvfs, FTAG);1210return (error);1211}12121213/*1214* Dentry and inode caches referenced by a task in non-root memcg are1215* not going to be scanned by the kernel-provided shrinker. So, if1216* kernel prunes nothing, fall back to this manual walk to free dnodes.1217* To avoid scanning the same znodes multiple times they are always rotated1218* to the end of the z_all_znodes list. New znodes are inserted at the1219* end of the list so we're always scanning the oldest znodes first.1220*/1221static int1222zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)1223{1224znode_t **zp_array, *zp;1225int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *));1226int objects = 0;1227int i = 0, j = 0;12281229zp_array = vmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);12301231mutex_enter(&zfsvfs->z_znodes_lock);1232while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) {12331234if ((i++ > nr_to_scan) || (j >= max_array))1235break;12361237ASSERT(list_link_active(&zp->z_link_node));1238list_remove(&zfsvfs->z_all_znodes, zp);1239list_insert_tail(&zfsvfs->z_all_znodes, zp);12401241/* Skip active znodes and .zfs entries */1242if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir)1243continue;12441245if (igrab(ZTOI(zp)) == NULL)1246continue;12471248zp_array[j] = zp;1249j++;1250}1251mutex_exit(&zfsvfs->z_znodes_lock);12521253for (i = 0; i < j; i++) {1254zp = zp_array[i];12551256ASSERT3P(zp, !=, NULL);1257d_prune_aliases(ZTOI(zp));12581259if (atomic_read(&ZTOI(zp)->i_count) == 1)1260objects++;12611262zrele(zp);1263}12641265vmem_free(zp_array, max_array * sizeof (znode_t *));12661267return (objects);1268}12691270/*1271* The ARC has requested that the filesystem drop entries from the dentry1272* and inode caches. This can occur when the ARC needs to free meta data1273* blocks but can't because they are all pinned by entries in these caches.1274*/1275#if defined(HAVE_SUPER_BLOCK_S_SHRINK)1276#define S_SHRINK(sb) (&(sb)->s_shrink)1277#elif defined(HAVE_SUPER_BLOCK_S_SHRINK_PTR)1278#define S_SHRINK(sb) ((sb)->s_shrink)1279#endif12801281int1282zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)1283{1284zfsvfs_t *zfsvfs = sb->s_fs_info;1285int error = 0;1286struct shrinker *shrinker = S_SHRINK(sb);1287struct shrink_control sc = {1288.nr_to_scan = nr_to_scan,1289.gfp_mask = GFP_KERNEL,1290};12911292if ((error = zfs_enter(zfsvfs, FTAG)) != 0)1293return (error);12941295#ifdef SHRINKER_NUMA_AWARE1296if (shrinker->flags & SHRINKER_NUMA_AWARE) {1297long tc = 1;1298for_each_online_node(sc.nid) {1299long c = shrinker->count_objects(shrinker, &sc);1300if (c == 0 || c == SHRINK_EMPTY)1301continue;1302tc += c;1303}1304*objects = 0;1305for_each_online_node(sc.nid) {1306long c = shrinker->count_objects(shrinker, &sc);1307if (c == 0 || c == SHRINK_EMPTY)1308continue;1309if (c > tc)1310tc = c;1311sc.nr_to_scan = mult_frac(nr_to_scan, c, tc) + 1;1312*objects += (*shrinker->scan_objects)(shrinker, &sc);1313}1314} else {1315*objects = (*shrinker->scan_objects)(shrinker, &sc);1316}1317#else1318*objects = (*shrinker->scan_objects)(shrinker, &sc);1319#endif13201321/*1322* Fall back to zfs_prune_aliases if kernel's shrinker did nothing1323* due to dentry and inode caches being referenced by a task running1324* in non-root memcg.1325*/1326if (*objects == 0)1327*objects = zfs_prune_aliases(zfsvfs, nr_to_scan);13281329zfs_exit(zfsvfs, FTAG);13301331dprintf_ds(zfsvfs->z_os->os_dsl_dataset,1332"pruning, nr_to_scan=%lu objects=%d error=%d\n",1333nr_to_scan, *objects, error);13341335return (error);1336}13371338/*1339* Teardown the zfsvfs_t.1340*1341* Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'1342* and 'z_teardown_inactive_lock' held.1343*/1344static int1345zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)1346{1347znode_t *zp;13481349zfs_unlinked_drain_stop_wait(zfsvfs);13501351/*1352* If someone has not already unmounted this file system,1353* drain the zrele_taskq to ensure all active references to the1354* zfsvfs_t have been handled only then can it be safely destroyed.1355*/1356if (zfsvfs->z_os) {1357/*1358* If we're unmounting we have to wait for the list to1359* drain completely.1360*1361* If we're not unmounting there's no guarantee the list1362* will drain completely, but iputs run from the taskq1363* may add the parents of dir-based xattrs to the taskq1364* so we want to wait for these.1365*1366* We can safely check z_all_znodes for being empty because the1367* VFS has already blocked operations which add to it.1368*/1369int round = 0;1370while (!list_is_empty(&zfsvfs->z_all_znodes)) {1371taskq_wait_outstanding(dsl_pool_zrele_taskq(1372dmu_objset_pool(zfsvfs->z_os)), 0);1373if (++round > 1 && !unmounting)1374break;1375}1376}13771378ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);13791380if (!unmounting) {1381/*1382* We purge the parent filesystem's super block as the1383* parent filesystem and all of its snapshots have their1384* inode's super block set to the parent's filesystem's1385* super block. Note, 'z_parent' is self referential1386* for non-snapshots.1387*/1388shrink_dcache_sb(zfsvfs->z_parent->z_sb);1389}13901391/*1392* Close the zil. NB: Can't close the zil while zfs_inactive1393* threads are blocked as zil_close can call zfs_inactive.1394*/1395if (zfsvfs->z_log) {1396zil_close(zfsvfs->z_log);1397zfsvfs->z_log = NULL;1398}13991400rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);14011402/*1403* If we are not unmounting (ie: online recv) and someone already1404* unmounted this file system while we were doing the switcheroo,1405* or a reopen of z_os failed then just bail out now.1406*/1407if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {1408rw_exit(&zfsvfs->z_teardown_inactive_lock);1409ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);1410return (SET_ERROR(EIO));1411}14121413/*1414* At this point there are no VFS ops active, and any new VFS ops1415* will fail with EIO since we have z_teardown_lock for writer (only1416* relevant for forced unmount).1417*1418* Release all holds on dbufs. We also grab an extra reference to all1419* the remaining inodes so that the kernel does not attempt to free1420* any inodes of a suspended fs. This can cause deadlocks since the1421* zfs_resume_fs() process may involve starting threads, which might1422* attempt to free unreferenced inodes to free up memory for the new1423* thread.1424*/1425if (!unmounting) {1426mutex_enter(&zfsvfs->z_znodes_lock);1427for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;1428zp = list_next(&zfsvfs->z_all_znodes, zp)) {1429if (zp->z_sa_hdl)1430zfs_znode_dmu_fini(zp);1431if (igrab(ZTOI(zp)) != NULL)1432zp->z_suspended = B_TRUE;14331434}1435mutex_exit(&zfsvfs->z_znodes_lock);1436}14371438/*1439* If we are unmounting, set the unmounted flag and let new VFS ops1440* unblock. zfs_inactive will have the unmounted behavior, and all1441* other VFS ops will fail with EIO.1442*/1443if (unmounting) {1444zfsvfs->z_unmounted = B_TRUE;1445rw_exit(&zfsvfs->z_teardown_inactive_lock);1446ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);1447}14481449/*1450* z_os will be NULL if there was an error in attempting to reopen1451* zfsvfs, so just return as the properties had already been1452*1453* unregistered and cached data had been evicted before.1454*/1455if (zfsvfs->z_os == NULL)1456return (0);14571458/*1459* Unregister properties.1460*/1461zfs_unregister_callbacks(zfsvfs);14621463/*1464* Evict cached data. We must write out any dirty data before1465* disowning the dataset.1466*/1467objset_t *os = zfsvfs->z_os;1468boolean_t os_dirty = B_FALSE;1469for (int t = 0; t < TXG_SIZE; t++) {1470if (dmu_objset_is_dirty(os, t)) {1471os_dirty = B_TRUE;1472break;1473}1474}1475if (!zfs_is_readonly(zfsvfs) && os_dirty) {1476txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);1477}1478dmu_objset_evict_dbufs(zfsvfs->z_os);1479dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;1480dsl_dir_cancel_waiters(dd);14811482return (0);1483}14841485static atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0);14861487int1488zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)1489{1490const char *osname = zm->mnt_osname;1491struct inode *root_inode = NULL;1492uint64_t recordsize;1493int error = 0;1494zfsvfs_t *zfsvfs = NULL;1495vfs_t *vfs = NULL;1496int canwrite;1497int dataset_visible_zone;14981499ASSERT(zm);1500ASSERT(osname);15011502dataset_visible_zone = zone_dataset_visible(osname, &canwrite);15031504/*1505* Refuse to mount a filesystem if we are in a namespace and the1506* dataset is not visible or writable in that namespace.1507*/1508if (!INGLOBALZONE(curproc) &&1509(!dataset_visible_zone || !canwrite)) {1510return (SET_ERROR(EPERM));1511}15121513error = zfsvfs_parse_options(zm->mnt_data, &vfs);1514if (error)1515return (error);15161517/*1518* If a non-writable filesystem is being mounted without the1519* read-only flag, pretend it was set, as done for snapshots.1520*/1521if (!canwrite)1522vfs->vfs_readonly = B_TRUE;15231524error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);1525if (error) {1526zfsvfs_vfs_free(vfs);1527goto out;1528}15291530if ((error = dsl_prop_get_integer(osname, "recordsize",1531&recordsize, NULL))) {1532zfsvfs_vfs_free(vfs);1533goto out;1534}15351536vfs->vfs_data = zfsvfs;1537zfsvfs->z_vfs = vfs;1538zfsvfs->z_sb = sb;1539sb->s_fs_info = zfsvfs;1540sb->s_magic = ZFS_SUPER_MAGIC;1541sb->s_maxbytes = MAX_LFS_FILESIZE;1542sb->s_time_gran = 1;1543sb->s_blocksize = recordsize;1544sb->s_blocksize_bits = ilog2(recordsize);15451546error = -super_setup_bdi_name(sb, "%.28s-%ld", "zfs",1547atomic_long_inc_return(&zfs_bdi_seq));1548if (error)1549goto out;15501551sb->s_bdi->ra_pages = 0;15521553/* Set callback operations for the file system. */1554sb->s_op = &zpl_super_operations;1555sb->s_xattr = zpl_xattr_handlers;1556sb->s_export_op = &zpl_export_operations;15571558#ifdef HAVE_SET_DEFAULT_D_OP1559set_default_d_op(sb, &zpl_dentry_operations);1560#else1561sb->s_d_op = &zpl_dentry_operations;1562#endif15631564/* Set features for file system. */1565zfs_set_fuid_feature(zfsvfs);15661567if (dmu_objset_is_snapshot(zfsvfs->z_os)) {1568uint64_t pval;15691570atime_changed_cb(zfsvfs, B_FALSE);1571readonly_changed_cb(zfsvfs, B_TRUE);1572if ((error = dsl_prop_get_integer(osname,1573"xattr", &pval, NULL)))1574goto out;1575xattr_changed_cb(zfsvfs, pval);1576if ((error = dsl_prop_get_integer(osname,1577"acltype", &pval, NULL)))1578goto out;1579acltype_changed_cb(zfsvfs, pval);1580zfsvfs->z_issnap = B_TRUE;1581zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;1582zfsvfs->z_snap_defer_time = jiffies;15831584mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);1585dmu_objset_set_user(zfsvfs->z_os, zfsvfs);1586mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);1587} else {1588if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))1589goto out;1590}15911592/* Allocate a root inode for the filesystem. */1593error = zfs_root(zfsvfs, &root_inode);1594if (error) {1595(void) zfs_umount(sb);1596zfsvfs = NULL; /* avoid double-free; first in zfs_umount */1597goto out;1598}15991600/* Allocate a root dentry for the filesystem */1601sb->s_root = d_make_root(root_inode);1602if (sb->s_root == NULL) {1603(void) zfs_umount(sb);1604zfsvfs = NULL; /* avoid double-free; first in zfs_umount */1605error = SET_ERROR(ENOMEM);1606goto out;1607}16081609if (!zfsvfs->z_issnap)1610zfsctl_create(zfsvfs);16111612zfsvfs->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb);1613out:1614if (error) {1615if (zfsvfs != NULL) {1616dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);1617zfsvfs_free(zfsvfs);1618}1619/*1620* make sure we don't have dangling sb->s_fs_info which1621* zfs_preumount will use.1622*/1623sb->s_fs_info = NULL;1624}16251626return (error);1627}16281629/*1630* Called when an unmount is requested and certain sanity checks have1631* already passed. At this point no dentries or inodes have been reclaimed1632* from their respective caches. We drop the extra reference on the .zfs1633* control directory to allow everything to be reclaimed. All snapshots1634* must already have been unmounted to reach this point.1635*/1636void1637zfs_preumount(struct super_block *sb)1638{1639zfsvfs_t *zfsvfs = sb->s_fs_info;16401641/* zfsvfs is NULL when zfs_domount fails during mount */1642if (zfsvfs) {1643zfs_unlinked_drain_stop_wait(zfsvfs);1644zfsctl_destroy(sb->s_fs_info);1645/*1646* Wait for zrele_async before entering evict_inodes in1647* generic_shutdown_super. The reason we must finish before1648* evict_inodes is when lazytime is on, or when zfs_purgedir1649* calls zfs_zget, zrele would bump i_count from 0 to 1. This1650* would race with the i_count check in evict_inodes. This means1651* it could destroy the inode while we are still using it.1652*1653* We wait for two passes. xattr directories in the first pass1654* may add xattr entries in zfs_purgedir, so in the second pass1655* we wait for them. We don't use taskq_wait here because it is1656* a pool wide taskq. Other mounted filesystems can constantly1657* do zrele_async and there's no guarantee when taskq will be1658* empty.1659*/1660taskq_wait_outstanding(dsl_pool_zrele_taskq(1661dmu_objset_pool(zfsvfs->z_os)), 0);1662taskq_wait_outstanding(dsl_pool_zrele_taskq(1663dmu_objset_pool(zfsvfs->z_os)), 0);1664}1665}16661667/*1668* Called once all other unmount released tear down has occurred.1669* It is our responsibility to release any remaining infrastructure.1670*/1671int1672zfs_umount(struct super_block *sb)1673{1674zfsvfs_t *zfsvfs = sb->s_fs_info;1675objset_t *os;16761677if (zfsvfs->z_arc_prune != NULL)1678arc_remove_prune_callback(zfsvfs->z_arc_prune);1679VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE));1680os = zfsvfs->z_os;16811682/*1683* z_os will be NULL if there was an error in1684* attempting to reopen zfsvfs.1685*/1686if (os != NULL) {1687/*1688* Unset the objset user_ptr.1689*/1690mutex_enter(&os->os_user_ptr_lock);1691dmu_objset_set_user(os, NULL);1692mutex_exit(&os->os_user_ptr_lock);16931694/*1695* Finally release the objset1696*/1697dmu_objset_disown(os, B_TRUE, zfsvfs);1698}16991700zfsvfs_free(zfsvfs);1701sb->s_fs_info = NULL;1702return (0);1703}17041705int1706zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm)1707{1708zfsvfs_t *zfsvfs = sb->s_fs_info;1709vfs_t *vfsp;1710boolean_t issnap = dmu_objset_is_snapshot(zfsvfs->z_os);1711int error;17121713if ((issnap || !spa_writeable(dmu_objset_spa(zfsvfs->z_os))) &&1714!(*flags & SB_RDONLY)) {1715*flags |= SB_RDONLY;1716return (EROFS);1717}17181719error = zfsvfs_parse_options(zm->mnt_data, &vfsp);1720if (error)1721return (error);17221723if (!zfs_is_readonly(zfsvfs) && (*flags & SB_RDONLY))1724txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);17251726zfs_unregister_callbacks(zfsvfs);1727zfsvfs_vfs_free(zfsvfs->z_vfs);17281729vfsp->vfs_data = zfsvfs;1730zfsvfs->z_vfs = vfsp;1731if (!issnap)1732(void) zfs_register_callbacks(vfsp);17331734return (error);1735}17361737int1738zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)1739{1740zfsvfs_t *zfsvfs = sb->s_fs_info;1741znode_t *zp;1742uint64_t object = 0;1743uint64_t fid_gen = 0;1744uint64_t gen_mask;1745uint64_t zp_gen;1746int i, err;17471748*ipp = NULL;17491750if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {1751zfid_short_t *zfid = (zfid_short_t *)fidp;17521753for (i = 0; i < sizeof (zfid->zf_object); i++)1754object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);17551756for (i = 0; i < sizeof (zfid->zf_gen); i++)1757fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);1758} else {1759return (SET_ERROR(EINVAL));1760}17611762/* LONG_FID_LEN means snapdirs */1763if (fidp->fid_len == LONG_FID_LEN) {1764zfid_long_t *zlfid = (zfid_long_t *)fidp;1765uint64_t objsetid = 0;1766uint64_t setgen = 0;17671768for (i = 0; i < sizeof (zlfid->zf_setid); i++)1769objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);17701771for (i = 0; i < sizeof (zlfid->zf_setgen); i++)1772setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);17731774if (objsetid != ZFSCTL_INO_SNAPDIRS - object) {1775dprintf("snapdir fid: objsetid (%llu) != "1776"ZFSCTL_INO_SNAPDIRS (%llu) - object (%llu)\n",1777objsetid, ZFSCTL_INO_SNAPDIRS, object);17781779return (SET_ERROR(EINVAL));1780}17811782if (fid_gen > 1 || setgen != 0) {1783dprintf("snapdir fid: fid_gen (%llu) and setgen "1784"(%llu)\n", fid_gen, setgen);1785return (SET_ERROR(EINVAL));1786}17871788return (zfsctl_snapdir_vget(sb, objsetid, fid_gen, ipp));1789}17901791if ((err = zfs_enter(zfsvfs, FTAG)) != 0)1792return (err);1793/* A zero fid_gen means we are in the .zfs control directories */1794if (fid_gen == 0 &&1795(object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {1796if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED) {1797zfs_exit(zfsvfs, FTAG);1798return (SET_ERROR(ENOENT));1799}18001801*ipp = zfsvfs->z_ctldir;1802ASSERT(*ipp != NULL);18031804if (object == ZFSCTL_INO_SNAPDIR) {1805VERIFY0(zfsctl_root_lookup(*ipp, "snapshot", ipp,18060, kcred, NULL, NULL));1807} else {1808/*1809* Must have an existing ref, so igrab()1810* cannot return NULL1811*/1812VERIFY3P(igrab(*ipp), !=, NULL);1813}1814zfs_exit(zfsvfs, FTAG);1815return (0);1816}18171818gen_mask = -1ULL >> (64 - 8 * i);18191820dprintf("getting %llu [%llu mask %llx]\n", object, fid_gen, gen_mask);1821if ((err = zfs_zget(zfsvfs, object, &zp))) {1822zfs_exit(zfsvfs, FTAG);1823return (err);1824}18251826/* Don't export xattr stuff */1827if (zp->z_pflags & ZFS_XATTR) {1828zrele(zp);1829zfs_exit(zfsvfs, FTAG);1830return (SET_ERROR(ENOENT));1831}18321833(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,1834sizeof (uint64_t));1835zp_gen = zp_gen & gen_mask;1836if (zp_gen == 0)1837zp_gen = 1;1838if ((fid_gen == 0) && (zfsvfs->z_root == object))1839fid_gen = zp_gen;1840if (zp->z_unlinked || zp_gen != fid_gen) {1841dprintf("znode gen (%llu) != fid gen (%llu)\n", zp_gen,1842fid_gen);1843zrele(zp);1844zfs_exit(zfsvfs, FTAG);1845return (SET_ERROR(ENOENT));1846}18471848*ipp = ZTOI(zp);1849if (*ipp)1850zfs_znode_update_vfs(ITOZ(*ipp));18511852zfs_exit(zfsvfs, FTAG);1853return (0);1854}18551856/*1857* Block out VFS ops and close zfsvfs_t1858*1859* Note, if successful, then we return with the 'z_teardown_lock' and1860* 'z_teardown_inactive_lock' write held. We leave ownership of the underlying1861* dataset and objset intact so that they can be atomically handed off during1862* a subsequent rollback or recv operation and the resume thereafter.1863*/1864int1865zfs_suspend_fs(zfsvfs_t *zfsvfs)1866{1867int error;18681869if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)1870return (error);18711872return (0);1873}18741875/*1876* Rebuild SA and release VOPs. Note that ownership of the underlying dataset1877* is an invariant across any of the operations that can be performed while the1878* filesystem was suspended. Whether it succeeded or failed, the preconditions1879* are the same: the relevant objset and associated dataset are owned by1880* zfsvfs, held, and long held on entry.1881*/1882int1883zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)1884{1885int err, err2;1886znode_t *zp;18871888ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));1889ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));18901891/*1892* We already own this, so just update the objset_t, as the one we1893* had before may have been evicted.1894*/1895objset_t *os;1896VERIFY3P(ds->ds_owner, ==, zfsvfs);1897VERIFY(dsl_dataset_long_held(ds));1898dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));1899dsl_pool_config_enter(dp, FTAG);1900VERIFY0(dmu_objset_from_ds(ds, &os));1901dsl_pool_config_exit(dp, FTAG);19021903err = zfsvfs_init(zfsvfs, os);1904if (err != 0)1905goto bail;19061907ds->ds_dir->dd_activity_cancelled = B_FALSE;1908VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE));19091910zfs_set_fuid_feature(zfsvfs);1911zfsvfs->z_rollback_time = jiffies;19121913/*1914* Attempt to re-establish all the active inodes with their1915* dbufs. If a zfs_rezget() fails, then we unhash the inode1916* and mark it stale. This prevents a collision if a new1917* inode/object is created which must use the same inode1918* number. The stale inode will be be released when the1919* VFS prunes the dentry holding the remaining references1920* on the stale inode.1921*/1922mutex_enter(&zfsvfs->z_znodes_lock);1923for (zp = list_head(&zfsvfs->z_all_znodes); zp;1924zp = list_next(&zfsvfs->z_all_znodes, zp)) {1925err2 = zfs_rezget(zp);1926if (err2) {1927zpl_d_drop_aliases(ZTOI(zp));1928remove_inode_hash(ZTOI(zp));1929}19301931/* see comment in zfs_suspend_fs() */1932if (zp->z_suspended) {1933zfs_zrele_async(zp);1934zp->z_suspended = B_FALSE;1935}1936}1937mutex_exit(&zfsvfs->z_znodes_lock);19381939if (!zfs_is_readonly(zfsvfs) && !zfsvfs->z_unmounted) {1940/*1941* zfs_suspend_fs() could have interrupted freeing1942* of dnodes. We need to restart this freeing so1943* that we don't "leak" the space.1944*/1945zfs_unlinked_drain(zfsvfs);1946}19471948/*1949* Most of the time zfs_suspend_fs is used for changing the contents1950* of the underlying dataset. ZFS rollback and receive operations1951* might create files for which negative dentries are present in1952* the cache. Since walking the dcache would require a lot of GPL-only1953* code duplication, it's much easier on these rather rare occasions1954* just to flush the whole dcache for the given dataset/filesystem.1955*/1956shrink_dcache_sb(zfsvfs->z_sb);19571958bail:1959if (err != 0)1960zfsvfs->z_unmounted = B_TRUE;19611962/* release the VFS ops */1963rw_exit(&zfsvfs->z_teardown_inactive_lock);1964ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);19651966if (err != 0) {1967/*1968* Since we couldn't setup the sa framework, try to force1969* unmount this file system.1970*/1971if (zfsvfs->z_os)1972(void) zfs_umount(zfsvfs->z_sb);1973}1974return (err);1975}19761977/*1978* Release VOPs and unmount a suspended filesystem.1979*/1980int1981zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)1982{1983ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));1984ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));19851986/*1987* We already own this, so just hold and rele it to update the1988* objset_t, as the one we had before may have been evicted.1989*/1990objset_t *os;1991VERIFY3P(ds->ds_owner, ==, zfsvfs);1992VERIFY(dsl_dataset_long_held(ds));1993dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));1994dsl_pool_config_enter(dp, FTAG);1995VERIFY0(dmu_objset_from_ds(ds, &os));1996dsl_pool_config_exit(dp, FTAG);1997zfsvfs->z_os = os;19981999/* release the VOPs */2000rw_exit(&zfsvfs->z_teardown_inactive_lock);2001ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);20022003/*2004* Try to force unmount this file system.2005*/2006(void) zfs_umount(zfsvfs->z_sb);2007zfsvfs->z_unmounted = B_TRUE;2008return (0);2009}20102011/*2012* Automounted snapshots rely on periodic revalidation2013* to defer snapshots from being automatically unmounted.2014*/20152016inline void2017zfs_exit_fs(zfsvfs_t *zfsvfs)2018{2019if (!zfsvfs->z_issnap)2020return;20212022if (time_after(jiffies, zfsvfs->z_snap_defer_time +2023MAX(zfs_expire_snapshot * HZ / 2, HZ))) {2024zfsvfs->z_snap_defer_time = jiffies;2025zfsctl_snapshot_unmount_delay(zfsvfs->z_os->os_spa,2026dmu_objset_id(zfsvfs->z_os),2027zfs_expire_snapshot);2028}2029}20302031int2032zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)2033{2034int error;2035objset_t *os = zfsvfs->z_os;2036dmu_tx_t *tx;20372038if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)2039return (SET_ERROR(EINVAL));20402041if (newvers < zfsvfs->z_version)2042return (SET_ERROR(EINVAL));20432044if (zfs_spa_version_map(newvers) >2045spa_version(dmu_objset_spa(zfsvfs->z_os)))2046return (SET_ERROR(ENOTSUP));20472048tx = dmu_tx_create(os);2049dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);2050if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {2051dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,2052ZFS_SA_ATTRS);2053dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);2054}2055error = dmu_tx_assign(tx, DMU_TX_WAIT);2056if (error) {2057dmu_tx_abort(tx);2058return (error);2059}20602061error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,20628, 1, &newvers, tx);20632064if (error) {2065dmu_tx_commit(tx);2066return (error);2067}20682069if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {2070uint64_t sa_obj;20712072ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,2073SPA_VERSION_SA);2074sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,2075DMU_OT_NONE, 0, tx);20762077error = zap_add(os, MASTER_NODE_OBJ,2078ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);2079ASSERT0(error);20802081VERIFY0(sa_set_sa_object(os, sa_obj));2082sa_register_update_callback(os, zfs_sa_upgrade);2083}20842085spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,2086"from %llu to %llu", zfsvfs->z_version, newvers);20872088dmu_tx_commit(tx);20892090zfsvfs->z_version = newvers;2091os->os_version = newvers;20922093zfs_set_fuid_feature(zfsvfs);20942095return (0);2096}20972098int2099zfs_set_default_quota(zfsvfs_t *zfsvfs, zfs_prop_t prop, uint64_t quota)2100{2101int error;2102objset_t *os = zfsvfs->z_os;2103const char *propstr = zfs_prop_to_name(prop);2104dmu_tx_t *tx;21052106tx = dmu_tx_create(os);2107dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, propstr);2108error = dmu_tx_assign(tx, DMU_TX_WAIT);2109if (error) {2110dmu_tx_abort(tx);2111return (error);2112}21132114if (quota == 0) {2115error = zap_remove(os, MASTER_NODE_OBJ, propstr, tx);2116if (error == ENOENT)2117error = 0;2118} else {2119error = zap_update(os, MASTER_NODE_OBJ, propstr, 8, 1,2120"a, tx);2121}21222123if (error)2124goto out;21252126switch (prop) {2127case ZFS_PROP_DEFAULTUSERQUOTA:2128zfsvfs->z_defaultuserquota = quota;2129break;2130case ZFS_PROP_DEFAULTGROUPQUOTA:2131zfsvfs->z_defaultgroupquota = quota;2132break;2133case ZFS_PROP_DEFAULTPROJECTQUOTA:2134zfsvfs->z_defaultprojectquota = quota;2135break;2136case ZFS_PROP_DEFAULTUSEROBJQUOTA:2137zfsvfs->z_defaultuserobjquota = quota;2138break;2139case ZFS_PROP_DEFAULTGROUPOBJQUOTA:2140zfsvfs->z_defaultgroupobjquota = quota;2141break;2142case ZFS_PROP_DEFAULTPROJECTOBJQUOTA:2143zfsvfs->z_defaultprojectobjquota = quota;2144break;2145default:2146break;2147}21482149out:2150dmu_tx_commit(tx);2151return (error);2152}21532154/*2155* Return true if the corresponding vfs's unmounted flag is set.2156* Otherwise return false.2157* If this function returns true we know VFS unmount has been initiated.2158*/2159boolean_t2160zfs_get_vfs_flag_unmounted(objset_t *os)2161{2162zfsvfs_t *zfvp;2163boolean_t unmounted = B_FALSE;21642165ASSERT(dmu_objset_type(os) == DMU_OST_ZFS);21662167mutex_enter(&os->os_user_ptr_lock);2168zfvp = dmu_objset_get_user(os);2169if (zfvp != NULL && zfvp->z_unmounted)2170unmounted = B_TRUE;2171mutex_exit(&os->os_user_ptr_lock);21722173return (unmounted);2174}21752176void2177zfsvfs_update_fromname(const char *oldname, const char *newname)2178{2179/*2180* We don't need to do anything here, the devname is always current by2181* virtue of zfsvfs->z_sb->s_op->show_devname.2182*/2183(void) oldname, (void) newname;2184}21852186void2187zfs_init(void)2188{2189zfsctl_init();2190zfs_znode_init();2191dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);2192register_filesystem(&zpl_fs_type);2193}21942195void2196zfs_fini(void)2197{2198/*2199* we don't use outstanding because zpl_posix_acl_free might add more.2200*/2201taskq_wait(system_delay_taskq);2202taskq_wait(system_taskq);2203unregister_filesystem(&zpl_fs_type);2204zfs_znode_fini();2205zfsctl_fini();2206}22072208#if defined(_KERNEL)2209EXPORT_SYMBOL(zfs_suspend_fs);2210EXPORT_SYMBOL(zfs_resume_fs);2211EXPORT_SYMBOL(zfs_set_version);2212EXPORT_SYMBOL(zfsvfs_create);2213EXPORT_SYMBOL(zfsvfs_free);2214EXPORT_SYMBOL(zfs_is_readonly);2215EXPORT_SYMBOL(zfs_domount);2216EXPORT_SYMBOL(zfs_preumount);2217EXPORT_SYMBOL(zfs_umount);2218EXPORT_SYMBOL(zfs_remount);2219EXPORT_SYMBOL(zfs_statvfs);2220EXPORT_SYMBOL(zfs_vget);2221EXPORT_SYMBOL(zfs_prune);2222EXPORT_SYMBOL(zfs_set_default_quota);2223#endif222422252226