Path: blob/main/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c
48378 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/2122/*23* Copyright 2015 Nexenta Systems, Inc. All rights reserved.24* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.25* Copyright (c) 2014, 2022 by Delphix. All rights reserved.26* Copyright 2016 Igor Kozhukhov <[email protected]>27* Copyright 2017 RackTop Systems.28* Copyright (c) 2018 Datto Inc.29* Copyright 2018 OmniOS Community Edition (OmniOSce) Association.30*/3132/*33* Routines to manage ZFS mounts. We separate all the nasty routines that have34* to deal with the OS. The following functions are the main entry points --35* they are used by mount and unmount and when changing a filesystem's36* mountpoint.37*38* zfs_is_mounted()39* zfs_mount()40* zfs_mount_at()41* zfs_unmount()42* zfs_unmountall()43*44* This file also contains the functions used to manage sharing filesystems:45*46* zfs_is_shared()47* zfs_share()48* zfs_unshare()49* zfs_unshareall()50* zfs_commit_shares()51*52* The following functions are available for pool consumers, and will53* mount/unmount and share/unshare all datasets within pool:54*55* zpool_enable_datasets()56* zpool_disable_datasets()57*/5859#include <dirent.h>60#include <dlfcn.h>61#include <errno.h>62#include <fcntl.h>63#include <libgen.h>64#include <libintl.h>65#include <stdio.h>66#include <stdlib.h>67#include <string.h>68#include <unistd.h>69#include <zone.h>70#include <sys/mntent.h>71#include <sys/mount.h>72#include <sys/stat.h>73#include <sys/vfs.h>74#include <sys/dsl_crypt.h>7576#include <libzfs.h>77#include <libzutil.h>7879#include "libzfs_impl.h"80#include <thread_pool.h>8182#include <libshare.h>83#include <sys/systeminfo.h>84#define MAXISALEN 257 /* based on sysinfo(2) man page */8586static void zfs_mount_task(void *);8788static const proto_table_t proto_table[SA_PROTOCOL_COUNT] = {89[SA_PROTOCOL_NFS] =90{ZFS_PROP_SHARENFS, EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},91[SA_PROTOCOL_SMB] =92{ZFS_PROP_SHARESMB, EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},93};9495static const enum sa_protocol share_all_proto[SA_PROTOCOL_COUNT + 1] = {96SA_PROTOCOL_NFS,97SA_PROTOCOL_SMB,98SA_NO_PROTOCOL99};100101102103static boolean_t104dir_is_empty_stat(const char *dirname)105{106struct stat st;107108/*109* We only want to return false if the given path is a non empty110* directory, all other errors are handled elsewhere.111*/112if (stat(dirname, &st) < 0 || !S_ISDIR(st.st_mode)) {113return (B_TRUE);114}115116/*117* An empty directory will still have two entries in it, one118* entry for each of "." and "..".119*/120if (st.st_size > 2) {121return (B_FALSE);122}123124return (B_TRUE);125}126127static boolean_t128dir_is_empty_readdir(const char *dirname)129{130DIR *dirp;131struct dirent64 *dp;132int dirfd;133134if ((dirfd = openat(AT_FDCWD, dirname,135O_RDONLY | O_NDELAY | O_LARGEFILE | O_CLOEXEC, 0)) < 0) {136return (B_TRUE);137}138139if ((dirp = fdopendir(dirfd)) == NULL) {140(void) close(dirfd);141return (B_TRUE);142}143144while ((dp = readdir64(dirp)) != NULL) {145146if (strcmp(dp->d_name, ".") == 0 ||147strcmp(dp->d_name, "..") == 0)148continue;149150(void) closedir(dirp);151return (B_FALSE);152}153154(void) closedir(dirp);155return (B_TRUE);156}157158/*159* Returns true if the specified directory is empty. If we can't open the160* directory at all, return true so that the mount can fail with a more161* informative error message.162*/163static boolean_t164dir_is_empty(const char *dirname)165{166struct statfs64 st;167168/*169* If the statvfs call fails or the filesystem is not a ZFS170* filesystem, fall back to the slow path which uses readdir.171*/172if ((statfs64(dirname, &st) != 0) ||173(st.f_type != ZFS_SUPER_MAGIC)) {174return (dir_is_empty_readdir(dirname));175}176177/*178* At this point, we know the provided path is on a ZFS179* filesystem, so we can use stat instead of readdir to180* determine if the directory is empty or not. We try to avoid181* using readdir because that requires opening "dirname"; this182* open file descriptor can potentially end up in a child183* process if there's a concurrent fork, thus preventing the184* zfs_mount() from otherwise succeeding (the open file185* descriptor inherited by the child process will cause the186* parent's mount to fail with EBUSY). The performance187* implications of replacing the open, read, and close with a188* single stat is nice; but is not the main motivation for the189* added complexity.190*/191return (dir_is_empty_stat(dirname));192}193194/*195* Checks to see if the mount is active. If the filesystem is mounted, we fill196* in 'where' with the current mountpoint, and return 1. Otherwise, we return197* 0.198*/199boolean_t200is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)201{202struct mnttab entry;203204if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0)205return (B_FALSE);206207if (where != NULL)208*where = zfs_strdup(zfs_hdl, entry.mnt_mountp);209210return (B_TRUE);211}212213boolean_t214zfs_is_mounted(zfs_handle_t *zhp, char **where)215{216return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));217}218219/*220* Checks any higher order concerns about whether the given dataset is221* mountable, false otherwise. zfs_is_mountable_internal specifically assumes222* that the caller has verified the sanity of mounting the dataset at223* its mountpoint to the extent the caller wants.224*/225static boolean_t226zfs_is_mountable_internal(zfs_handle_t *zhp)227{228if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&229getzoneid() == GLOBAL_ZONEID)230return (B_FALSE);231232return (B_TRUE);233}234235/*236* Returns true if the given dataset is mountable, false otherwise. Returns the237* mountpoint in 'buf'.238*/239static boolean_t240zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,241zprop_source_t *source, int flags)242{243char sourceloc[MAXNAMELEN];244zprop_source_t sourcetype;245246if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type,247B_FALSE))248return (B_FALSE);249250verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,251&sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);252253if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||254strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)255return (B_FALSE);256257if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)258return (B_FALSE);259260if (!zfs_is_mountable_internal(zhp))261return (B_FALSE);262263if (zfs_prop_get_int(zhp, ZFS_PROP_REDACTED) && !(flags & MS_FORCE))264return (B_FALSE);265266if (source)267*source = sourcetype;268269return (B_TRUE);270}271272/*273* The filesystem is mounted by invoking the system mount utility rather274* than by the system call mount(2). This ensures that the /etc/mtab275* file is correctly locked for the update. Performing our own locking276* and /etc/mtab update requires making an unsafe assumption about how277* the mount utility performs its locking. Unfortunately, this also means278* in the case of a mount failure we do not have the exact errno. We must279* make due with return value from the mount process.280*281* In the long term a shared library called libmount is under development282* which provides a common API to address the locking and errno issues.283* Once the standard mount utility has been updated to use this library284* we can add an autoconf check to conditionally use it.285*286* http://www.kernel.org/pub/linux/utils/util-linux/libmount-docs/index.html287*/288289static int290zfs_add_option(zfs_handle_t *zhp, char *options, int len,291zfs_prop_t prop, const char *on, const char *off)292{293const char *source;294uint64_t value;295296/* Skip adding duplicate default options */297if ((strstr(options, on) != NULL) || (strstr(options, off) != NULL))298return (0);299300/*301* zfs_prop_get_int() is not used to ensure our mount options302* are not influenced by the current /proc/self/mounts contents.303*/304value = getprop_uint64(zhp, prop, &source);305306(void) strlcat(options, ",", len);307(void) strlcat(options, value ? on : off, len);308309return (0);310}311312static int313zfs_add_options(zfs_handle_t *zhp, char *options, int len)314{315int error = 0;316317error = zfs_add_option(zhp, options, len,318ZFS_PROP_ATIME, MNTOPT_ATIME, MNTOPT_NOATIME);319/*320* don't add relatime/strictatime when atime=off, otherwise strictatime321* will force atime=on322*/323if (strstr(options, MNTOPT_NOATIME) == NULL) {324error = zfs_add_option(zhp, options, len,325ZFS_PROP_RELATIME, MNTOPT_RELATIME, MNTOPT_STRICTATIME);326}327error = error ? error : zfs_add_option(zhp, options, len,328ZFS_PROP_DEVICES, MNTOPT_DEVICES, MNTOPT_NODEVICES);329error = error ? error : zfs_add_option(zhp, options, len,330ZFS_PROP_EXEC, MNTOPT_EXEC, MNTOPT_NOEXEC);331error = error ? error : zfs_add_option(zhp, options, len,332ZFS_PROP_READONLY, MNTOPT_RO, MNTOPT_RW);333error = error ? error : zfs_add_option(zhp, options, len,334ZFS_PROP_SETUID, MNTOPT_SETUID, MNTOPT_NOSETUID);335error = error ? error : zfs_add_option(zhp, options, len,336ZFS_PROP_NBMAND, MNTOPT_NBMAND, MNTOPT_NONBMAND);337338return (error);339}340341int342zfs_mount(zfs_handle_t *zhp, const char *options, int flags)343{344char mountpoint[ZFS_MAXPROPLEN];345346if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL,347flags))348return (0);349350return (zfs_mount_at(zhp, options, flags, mountpoint));351}352353/*354* Mount the given filesystem.355*/356int357zfs_mount_at(zfs_handle_t *zhp, const char *options, int flags,358const char *mountpoint)359{360struct stat buf;361char mntopts[MNT_LINE_MAX];362char overlay[ZFS_MAXPROPLEN];363char prop_encroot[MAXNAMELEN];364boolean_t is_encroot;365zfs_handle_t *encroot_hp = zhp;366libzfs_handle_t *hdl = zhp->zfs_hdl;367uint64_t keystatus;368int remount = 0, rc;369370if (options == NULL) {371(void) strlcpy(mntopts, MNTOPT_DEFAULTS, sizeof (mntopts));372} else {373(void) strlcpy(mntopts, options, sizeof (mntopts));374}375376if (strstr(mntopts, MNTOPT_REMOUNT) != NULL)377remount = 1;378379/* Potentially duplicates some checks if invoked by zfs_mount(). */380if (!zfs_is_mountable_internal(zhp))381return (0);382383/*384* If the pool is imported read-only then all mounts must be read-only385*/386if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))387(void) strlcat(mntopts, "," MNTOPT_RO, sizeof (mntopts));388389/*390* Append default mount options which apply to the mount point.391* This is done because under Linux (unlike Solaris) multiple mount392* points may reference a single super block. This means that just393* given a super block there is no back reference to update the per394* mount point options.395*/396rc = zfs_add_options(zhp, mntopts, sizeof (mntopts));397if (rc) {398zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,399"default options unavailable"));400return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,401dgettext(TEXT_DOMAIN, "cannot mount '%s'"),402mountpoint));403}404405/*406* If the filesystem is encrypted the key must be loaded in order to407* mount. If the key isn't loaded, the MS_CRYPT flag decides whether408* or not we attempt to load the keys. Note: we must call409* zfs_refresh_properties() here since some callers of this function410* (most notably zpool_enable_datasets()) may implicitly load our key411* by loading the parent's key first.412*/413if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {414zfs_refresh_properties(zhp);415keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);416417/*418* If the key is unavailable and MS_CRYPT is set give the419* user a chance to enter the key. Otherwise just fail420* immediately.421*/422if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) {423if (flags & MS_CRYPT) {424rc = zfs_crypto_get_encryption_root(zhp,425&is_encroot, prop_encroot);426if (rc) {427zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,428"Failed to get encryption root for "429"'%s'."), zfs_get_name(zhp));430return (rc);431}432433if (!is_encroot) {434encroot_hp = zfs_open(hdl, prop_encroot,435ZFS_TYPE_DATASET);436if (encroot_hp == NULL)437return (hdl->libzfs_error);438}439440rc = zfs_crypto_load_key(encroot_hp,441B_FALSE, NULL);442443if (!is_encroot)444zfs_close(encroot_hp);445if (rc)446return (rc);447} else {448zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,449"encryption key not loaded"));450return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,451dgettext(TEXT_DOMAIN, "cannot mount '%s'"),452mountpoint));453}454}455456}457458/*459* Append zfsutil option so the mount helper allow the mount460*/461strlcat(mntopts, "," MNTOPT_ZFSUTIL, sizeof (mntopts));462463/* Create the directory if it doesn't already exist */464if (lstat(mountpoint, &buf) != 0) {465if (mkdirp(mountpoint, 0755) != 0) {466zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,467"failed to create mountpoint: %s"),468zfs_strerror(errno));469return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,470dgettext(TEXT_DOMAIN, "cannot mount '%s'"),471mountpoint));472}473}474475/*476* Overlay mounts are enabled by default but may be disabled477* via the 'overlay' property. The -O flag remains for compatibility.478*/479if (!(flags & MS_OVERLAY)) {480if (zfs_prop_get(zhp, ZFS_PROP_OVERLAY, overlay,481sizeof (overlay), NULL, NULL, 0, B_FALSE) == 0) {482if (strcmp(overlay, "on") == 0) {483flags |= MS_OVERLAY;484}485}486}487488/*489* Determine if the mountpoint is empty. If so, refuse to perform the490* mount. We don't perform this check if 'remount' is491* specified or if overlay option (-O) is given492*/493if ((flags & MS_OVERLAY) == 0 && !remount &&494!dir_is_empty(mountpoint)) {495zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,496"directory is not empty"));497return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,498dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));499}500501/* perform the mount */502rc = do_mount(zhp, mountpoint, mntopts, flags);503if (rc) {504/*505* Generic errors are nasty, but there are just way too many506* from mount(), and they're well-understood. We pick a few507* common ones to improve upon.508*/509if (rc == EBUSY) {510zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,511"mountpoint or dataset is busy"));512} else if (rc == EPERM) {513zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,514"Insufficient privileges"));515} else if (rc == ENOTSUP) {516int spa_version;517518VERIFY0(zfs_spa_version(zhp, &spa_version));519zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,520"Can't mount a version %llu "521"file system on a version %d pool. Pool must be"522" upgraded to mount this file system."),523(u_longlong_t)zfs_prop_get_int(zhp,524ZFS_PROP_VERSION), spa_version);525} else {526zfs_error_aux(hdl, "%s", zfs_strerror(rc));527}528return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,529dgettext(TEXT_DOMAIN, "cannot mount '%s'"),530zhp->zfs_name));531}532533/* remove the mounted entry before re-adding on remount */534if (remount)535libzfs_mnttab_remove(hdl, zhp->zfs_name);536537/* add the mounted entry into our cache */538libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint, mntopts);539return (0);540}541542/*543* Unmount a single filesystem.544*/545static int546unmount_one(zfs_handle_t *zhp, const char *mountpoint, int flags)547{548int error;549550error = do_unmount(zhp, mountpoint, flags);551if (error != 0) {552int libzfs_err;553554switch (error) {555case EBUSY:556libzfs_err = EZFS_BUSY;557break;558case EIO:559libzfs_err = EZFS_IO;560break;561case ENOENT:562libzfs_err = EZFS_NOENT;563break;564case ENOMEM:565libzfs_err = EZFS_NOMEM;566break;567case EPERM:568libzfs_err = EZFS_PERM;569break;570default:571libzfs_err = EZFS_UMOUNTFAILED;572}573if (zhp) {574return (zfs_error_fmt(zhp->zfs_hdl, libzfs_err,575dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),576mountpoint));577} else {578return (-1);579}580}581582return (0);583}584585/*586* Unmount the given filesystem.587*/588int589zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)590{591libzfs_handle_t *hdl = zhp->zfs_hdl;592struct mnttab entry;593char *mntpt = NULL;594boolean_t encroot, unmounted = B_FALSE;595596/* check to see if we need to unmount the filesystem */597if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&598libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) {599/*600* mountpoint may have come from a call to601* getmnt/getmntany if it isn't NULL. If it is NULL,602* we know it comes from libzfs_mnttab_find which can603* then get freed later. We strdup it to play it safe.604*/605if (mountpoint == NULL)606mntpt = zfs_strdup(hdl, entry.mnt_mountp);607else608mntpt = zfs_strdup(hdl, mountpoint);609610/*611* Unshare and unmount the filesystem612*/613if (zfs_unshare(zhp, mntpt, share_all_proto) != 0) {614free(mntpt);615return (-1);616}617zfs_commit_shares(NULL);618619if (unmount_one(zhp, mntpt, flags) != 0) {620free(mntpt);621(void) zfs_share(zhp, NULL);622zfs_commit_shares(NULL);623return (-1);624}625626libzfs_mnttab_remove(hdl, zhp->zfs_name);627free(mntpt);628unmounted = B_TRUE;629}630631/*632* If the MS_CRYPT flag is provided we must ensure we attempt to633* unload the dataset's key regardless of whether we did any work634* to unmount it. We only do this for encryption roots.635*/636if ((flags & MS_CRYPT) != 0 &&637zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {638zfs_refresh_properties(zhp);639640if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0 &&641unmounted) {642(void) zfs_mount(zhp, NULL, 0);643return (-1);644}645646if (encroot && zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==647ZFS_KEYSTATUS_AVAILABLE &&648zfs_crypto_unload_key(zhp) != 0) {649(void) zfs_mount(zhp, NULL, 0);650return (-1);651}652}653654zpool_disable_volume_os(zhp->zfs_name);655656return (0);657}658659/*660* Unmount this filesystem and any children inheriting the mountpoint property.661* To do this, just act like we're changing the mountpoint property, but don't662* remount the filesystems afterwards.663*/664int665zfs_unmountall(zfs_handle_t *zhp, int flags)666{667prop_changelist_t *clp;668int ret;669670clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,671CL_GATHER_ITER_MOUNTED, flags);672if (clp == NULL)673return (-1);674675ret = changelist_prefix(clp);676changelist_free(clp);677678return (ret);679}680681/*682* Unshare a filesystem by mountpoint.683*/684static int685unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,686enum sa_protocol proto)687{688int err = sa_disable_share(mountpoint, proto);689if (err != SA_OK)690return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,691dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),692name, sa_errorstr(err)));693694return (0);695}696697/*698* Share the given filesystem according to the options in the specified699* protocol specific properties (sharenfs, sharesmb). We rely700* on "libshare" to do the dirty work for us.701*/702int703zfs_share(zfs_handle_t *zhp, const enum sa_protocol *proto)704{705char mountpoint[ZFS_MAXPROPLEN];706char shareopts[ZFS_MAXPROPLEN];707char sourcestr[ZFS_MAXPROPLEN];708const enum sa_protocol *curr_proto;709zprop_source_t sourcetype;710int err = 0;711712if (proto == NULL)713proto = share_all_proto;714715if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL, 0))716return (0);717718for (curr_proto = proto; *curr_proto != SA_NO_PROTOCOL; curr_proto++) {719/*720* Return success if there are no share options.721*/722if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,723shareopts, sizeof (shareopts), &sourcetype, sourcestr,724ZFS_MAXPROPLEN, B_FALSE) != 0 ||725strcmp(shareopts, "off") == 0)726continue;727728/*729* If the 'zoned' property is set, then zfs_is_mountable()730* will have already bailed out if we are in the global zone.731* But local zones cannot be NFS servers, so we ignore it for732* local zones as well.733*/734if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))735continue;736737err = sa_enable_share(zfs_get_name(zhp), mountpoint, shareopts,738*curr_proto);739if (err != SA_OK) {740return (zfs_error_fmt(zhp->zfs_hdl,741proto_table[*curr_proto].p_share_err,742dgettext(TEXT_DOMAIN, "cannot share '%s: %s'"),743zfs_get_name(zhp), sa_errorstr(err)));744}745746}747return (0);748}749750/*751* Check to see if the filesystem is currently shared.752*/753boolean_t754zfs_is_shared(zfs_handle_t *zhp, char **where,755const enum sa_protocol *proto)756{757char *mountpoint;758if (proto == NULL)759proto = share_all_proto;760761if (ZFS_IS_VOLUME(zhp))762return (B_FALSE);763764if (!zfs_is_mounted(zhp, &mountpoint))765return (B_FALSE);766767for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)768if (sa_is_shared(mountpoint, *p)) {769if (where != NULL)770*where = mountpoint;771else772free(mountpoint);773return (B_TRUE);774}775776free(mountpoint);777return (B_FALSE);778}779780void781zfs_commit_shares(const enum sa_protocol *proto)782{783if (proto == NULL)784proto = share_all_proto;785786for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)787sa_commit_shares(*p);788}789790void791zfs_truncate_shares(const enum sa_protocol *proto)792{793if (proto == NULL)794proto = share_all_proto;795796for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)797sa_truncate_shares(*p);798}799800/*801* Unshare the given filesystem.802*/803int804zfs_unshare(zfs_handle_t *zhp, const char *mountpoint,805const enum sa_protocol *proto)806{807libzfs_handle_t *hdl = zhp->zfs_hdl;808struct mnttab entry;809810if (proto == NULL)811proto = share_all_proto;812813if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&814libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) {815816/* check to see if need to unmount the filesystem */817const char *mntpt = mountpoint ?: entry.mnt_mountp;818819for (const enum sa_protocol *curr_proto = proto;820*curr_proto != SA_NO_PROTOCOL; curr_proto++)821if (sa_is_shared(mntpt, *curr_proto) &&822unshare_one(hdl, zhp->zfs_name,823mntpt, *curr_proto) != 0)824return (-1);825}826827return (0);828}829830/*831* Same as zfs_unmountall(), but for NFS and SMB unshares.832*/833int834zfs_unshareall(zfs_handle_t *zhp, const enum sa_protocol *proto)835{836prop_changelist_t *clp;837int ret;838839if (proto == NULL)840proto = share_all_proto;841842clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0);843if (clp == NULL)844return (-1);845846ret = changelist_unshare(clp, proto);847changelist_free(clp);848849return (ret);850}851852/*853* Remove the mountpoint associated with the current dataset, if necessary.854* We only remove the underlying directory if:855*856* - The mountpoint is not 'none' or 'legacy'857* - The mountpoint is non-empty858* - The mountpoint is the default or inherited859* - The 'zoned' property is set, or we're in a local zone860*861* Any other directories we leave alone.862*/863void864remove_mountpoint(zfs_handle_t *zhp)865{866char mountpoint[ZFS_MAXPROPLEN];867zprop_source_t source;868869if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),870&source, 0))871return;872873if (source == ZPROP_SRC_DEFAULT ||874source == ZPROP_SRC_INHERITED) {875/*876* Try to remove the directory, silently ignoring any errors.877* The filesystem may have since been removed or moved around,878* and this error isn't really useful to the administrator in879* any way.880*/881(void) rmdir(mountpoint);882}883}884885/*886* Add the given zfs handle to the cb_handles array, dynamically reallocating887* the array if it is out of space.888*/889void890libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)891{892if (cbp->cb_alloc == cbp->cb_used) {893size_t newsz;894zfs_handle_t **newhandles;895896newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64;897newhandles = zfs_realloc(zhp->zfs_hdl,898cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),899newsz * sizeof (zfs_handle_t *));900cbp->cb_handles = newhandles;901cbp->cb_alloc = newsz;902}903cbp->cb_handles[cbp->cb_used++] = zhp;904}905906/*907* Recursive helper function used during file system enumeration908*/909static int910zfs_iter_cb(zfs_handle_t *zhp, void *data)911{912get_all_cb_t *cbp = data;913914if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) {915zfs_close(zhp);916return (0);917}918919if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) {920zfs_close(zhp);921return (0);922}923924if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==925ZFS_KEYSTATUS_UNAVAILABLE) {926zfs_close(zhp);927return (0);928}929930/*931* If this filesystem is inconsistent and has a receive resume932* token, we can not mount it.933*/934if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&935zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,936NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {937zfs_close(zhp);938return (0);939}940941libzfs_add_handle(cbp, zhp);942if (zfs_iter_filesystems_v2(zhp, 0, zfs_iter_cb, cbp) != 0) {943zfs_close(zhp);944return (-1);945}946return (0);947}948949/*950* Sort comparator that compares two mountpoint paths. We sort these paths so951* that subdirectories immediately follow their parents. This means that we952* effectively treat the '/' character as the lowest value non-nul char.953* Since filesystems from non-global zones can have the same mountpoint954* as other filesystems, the comparator sorts global zone filesystems to955* the top of the list. This means that the global zone will traverse the956* filesystem list in the correct order and can stop when it sees the957* first zoned filesystem. In a non-global zone, only the delegated958* filesystems are seen.959*960* An example sorted list using this comparator would look like:961*962* /foo963* /foo/bar964* /foo/bar/baz965* /foo/baz966* /foo.bar967* /foo (NGZ1)968* /foo (NGZ2)969*970* The mounting code depends on this ordering to deterministically iterate971* over filesystems in order to spawn parallel mount tasks.972*/973static int974mountpoint_cmp(const void *arga, const void *argb)975{976zfs_handle_t *const *zap = arga;977zfs_handle_t *za = *zap;978zfs_handle_t *const *zbp = argb;979zfs_handle_t *zb = *zbp;980char mounta[MAXPATHLEN];981char mountb[MAXPATHLEN];982const char *a = mounta;983const char *b = mountb;984boolean_t gota, gotb;985uint64_t zoneda, zonedb;986987zoneda = zfs_prop_get_int(za, ZFS_PROP_ZONED);988zonedb = zfs_prop_get_int(zb, ZFS_PROP_ZONED);989if (zoneda && !zonedb)990return (1);991if (!zoneda && zonedb)992return (-1);993994gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM);995if (gota) {996verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,997sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);998}999gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM);1000if (gotb) {1001verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,1002sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);1003}10041005if (gota && gotb) {1006while (*a != '\0' && (*a == *b)) {1007a++;1008b++;1009}1010if (*a == *b)1011return (0);1012if (*a == '\0')1013return (-1);1014if (*b == '\0')1015return (1);1016if (*a == '/')1017return (-1);1018if (*b == '/')1019return (1);1020return (*a < *b ? -1 : *a > *b);1021}10221023if (gota)1024return (-1);1025if (gotb)1026return (1);10271028/*1029* If neither filesystem has a mountpoint, revert to sorting by1030* dataset name.1031*/1032return (strcmp(zfs_get_name(za), zfs_get_name(zb)));1033}10341035/*1036* Return true if path2 is a child of path1 or path2 equals path1 or1037* path1 is "/" (path2 is always a child of "/").1038*/1039static boolean_t1040libzfs_path_contains(const char *path1, const char *path2)1041{1042return (strcmp(path1, path2) == 0 || strcmp(path1, "/") == 0 ||1043(strstr(path2, path1) == path2 && path2[strlen(path1)] == '/'));1044}10451046/*1047* Given a mountpoint specified by idx in the handles array, find the first1048* non-descendent of that mountpoint and return its index. Descendant paths1049* start with the parent's path. This function relies on the ordering1050* enforced by mountpoint_cmp().1051*/1052static int1053non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx)1054{1055char parent[ZFS_MAXPROPLEN];1056char child[ZFS_MAXPROPLEN];1057int i;10581059verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,1060sizeof (parent), NULL, NULL, 0, B_FALSE) == 0);10611062for (i = idx + 1; i < num_handles; i++) {1063verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,1064sizeof (child), NULL, NULL, 0, B_FALSE) == 0);1065if (!libzfs_path_contains(parent, child))1066break;1067}1068return (i);1069}10701071typedef struct mnt_param {1072libzfs_handle_t *mnt_hdl;1073tpool_t *mnt_tp;1074zfs_handle_t **mnt_zhps; /* filesystems to mount */1075size_t mnt_num_handles;1076int mnt_idx; /* Index of selected entry to mount */1077zfs_iter_f mnt_func;1078void *mnt_data;1079} mnt_param_t;10801081/*1082* Allocate and populate the parameter struct for mount function, and1083* schedule mounting of the entry selected by idx.1084*/1085static void1086zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,1087size_t num_handles, int idx, zfs_iter_f func, void *data, tpool_t *tp)1088{1089mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t));10901091mnt_param->mnt_hdl = hdl;1092mnt_param->mnt_tp = tp;1093mnt_param->mnt_zhps = handles;1094mnt_param->mnt_num_handles = num_handles;1095mnt_param->mnt_idx = idx;1096mnt_param->mnt_func = func;1097mnt_param->mnt_data = data;10981099if (tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param)) {1100/* Could not dispatch to thread pool; execute directly */1101zfs_mount_task((void*)mnt_param);1102}1103}11041105/*1106* This is the structure used to keep state of mounting or sharing operations1107* during a call to zpool_enable_datasets().1108*/1109typedef struct mount_state {1110/*1111* ms_mntstatus is set to -1 if any mount fails. While multiple threads1112* could update this variable concurrently, no synchronization is1113* needed as it's only ever set to -1.1114*/1115int ms_mntstatus;1116int ms_mntflags;1117const char *ms_mntopts;1118} mount_state_t;11191120static int1121zfs_mount_one(zfs_handle_t *zhp, void *arg)1122{1123mount_state_t *ms = arg;1124int ret = 0;11251126/*1127* don't attempt to mount encrypted datasets with1128* unloaded keys1129*/1130if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==1131ZFS_KEYSTATUS_UNAVAILABLE)1132return (0);11331134if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0)1135ret = ms->ms_mntstatus = -1;1136return (ret);1137}11381139static int1140zfs_share_one(zfs_handle_t *zhp, void *arg)1141{1142mount_state_t *ms = arg;1143int ret = 0;11441145if (zfs_share(zhp, NULL) != 0)1146ret = ms->ms_mntstatus = -1;1147return (ret);1148}11491150/*1151* Thread pool function to mount one file system. On completion, it finds and1152* schedules its children to be mounted. This depends on the sorting done in1153* zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries1154* each descending from the previous) will have no parallelism since we always1155* have to wait for the parent to finish mounting before we can schedule1156* its children.1157*/1158static void1159zfs_mount_task(void *arg)1160{1161mnt_param_t *mp = arg;1162int idx = mp->mnt_idx;1163zfs_handle_t **handles = mp->mnt_zhps;1164size_t num_handles = mp->mnt_num_handles;1165char mountpoint[ZFS_MAXPROPLEN];11661167verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint,1168sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);11691170if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)1171goto out;11721173/*1174* We dispatch tasks to mount filesystems with mountpoints underneath1175* this one. We do this by dispatching the next filesystem with a1176* descendant mountpoint of the one we just mounted, then skip all of1177* its descendants, dispatch the next descendant mountpoint, and so on.1178* The non_descendant_idx() function skips over filesystems that are1179* descendants of the filesystem we just dispatched.1180*/1181for (int i = idx + 1; i < num_handles;1182i = non_descendant_idx(handles, num_handles, i)) {1183char child[ZFS_MAXPROPLEN];1184verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,1185child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0);11861187if (!libzfs_path_contains(mountpoint, child))1188break; /* not a descendant, return */1189zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,1190mp->mnt_func, mp->mnt_data, mp->mnt_tp);1191}11921193out:1194free(mp);1195}11961197/*1198* Issue the func callback for each ZFS handle contained in the handles1199* array. This function is used to mount all datasets, and so this function1200* guarantees that filesystems for parent mountpoints are called before their1201* children. As such, before issuing any callbacks, we first sort the array1202* of handles by mountpoint.1203*1204* Callbacks are issued in one of two ways:1205*1206* 1. Sequentially: If the nthr argument is <= 1 or the ZFS_SERIAL_MOUNT1207* environment variable is set, then we issue callbacks sequentially.1208*1209* 2. In parallel: If the nthr argument is > 1 and the ZFS_SERIAL_MOUNT1210* environment variable is not set, then we use a tpool to dispatch threads1211* to mount filesystems in parallel. This function dispatches tasks to mount1212* the filesystems at the top-level mountpoints, and these tasks in turn1213* are responsible for recursively mounting filesystems in their children1214* mountpoints. The value of the nthr argument will be the number of worker1215* threads for the thread pool.1216*/1217void1218zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,1219size_t num_handles, zfs_iter_f func, void *data, uint_t nthr)1220{1221zoneid_t zoneid = getzoneid();12221223/*1224* The ZFS_SERIAL_MOUNT environment variable is an undocumented1225* variable that can be used as a convenience to do a/b comparison1226* of serial vs. parallel mounting.1227*/1228boolean_t serial_mount = nthr <= 1 ||1229(getenv("ZFS_SERIAL_MOUNT") != NULL);12301231/*1232* Sort the datasets by mountpoint. See mountpoint_cmp for details1233* of how these are sorted.1234*/1235qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp);12361237if (serial_mount) {1238for (int i = 0; i < num_handles; i++) {1239func(handles[i], data);1240}1241return;1242}12431244/*1245* Issue the callback function for each dataset using a parallel1246* algorithm that uses a thread pool to manage threads.1247*/1248tpool_t *tp = tpool_create(1, nthr, 0, NULL);12491250/*1251* There may be multiple "top level" mountpoints outside of the pool's1252* root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of1253* these.1254*/1255for (int i = 0; i < num_handles;1256i = non_descendant_idx(handles, num_handles, i)) {1257/*1258* Since the mountpoints have been sorted so that the zoned1259* filesystems are at the end, a zoned filesystem seen from1260* the global zone means that we're done.1261*/1262if (zoneid == GLOBAL_ZONEID &&1263zfs_prop_get_int(handles[i], ZFS_PROP_ZONED))1264break;1265zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,1266tp);1267}12681269tpool_wait(tp); /* wait for all scheduled mounts to complete */1270tpool_destroy(tp);1271}12721273/*1274* Mount and share all datasets within the given pool. This assumes that no1275* datasets within the pool are currently mounted. nthr will be number of1276* worker threads to use while mounting datasets.1277*/1278int1279zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags,1280uint_t nthr)1281{1282get_all_cb_t cb = { 0 };1283mount_state_t ms = { 0 };1284zfs_handle_t *zfsp;1285int ret = 0;12861287if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,1288ZFS_TYPE_DATASET)) == NULL)1289goto out;12901291/*1292* Gather all non-snapshot datasets within the pool. Start by adding1293* the root filesystem for this pool to the list, and then iterate1294* over all child filesystems.1295*/1296libzfs_add_handle(&cb, zfsp);1297if (zfs_iter_filesystems_v2(zfsp, 0, zfs_iter_cb, &cb) != 0)1298goto out;12991300/*1301* Mount all filesystems1302*/1303ms.ms_mntopts = mntopts;1304ms.ms_mntflags = flags;1305zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,1306zfs_mount_one, &ms, nthr);1307if (ms.ms_mntstatus != 0)1308ret = EZFS_MOUNTFAILED;13091310/*1311* Share all filesystems that need to be shared. This needs to be1312* a separate pass because libshare is not mt-safe, and so we need1313* to share serially.1314*/1315ms.ms_mntstatus = 0;1316zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,1317zfs_share_one, &ms, 1);1318if (ms.ms_mntstatus != 0)1319ret = EZFS_SHAREFAILED;1320else1321zfs_commit_shares(NULL);13221323out:1324for (int i = 0; i < cb.cb_used; i++)1325zfs_close(cb.cb_handles[i]);1326free(cb.cb_handles);13271328return (ret);1329}13301331struct sets_s {1332char *mountpoint;1333zfs_handle_t *dataset;1334};13351336static int1337mountpoint_compare(const void *a, const void *b)1338{1339const struct sets_s *mounta = (struct sets_s *)a;1340const struct sets_s *mountb = (struct sets_s *)b;13411342return (strcmp(mountb->mountpoint, mounta->mountpoint));1343}13441345/*1346* Unshare and unmount all datasets within the given pool. We don't want to1347* rely on traversing the DSL to discover the filesystems within the pool,1348* because this may be expensive (if not all of them are mounted), and can fail1349* arbitrarily (on I/O error, for example). Instead, we walk /proc/self/mounts1350* and gather all the filesystems that are currently mounted.1351*/1352int1353zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)1354{1355int used, alloc;1356FILE *mnttab;1357struct mnttab entry;1358size_t namelen;1359struct sets_s *sets = NULL;1360libzfs_handle_t *hdl = zhp->zpool_hdl;1361int i;1362int ret = -1;1363int flags = (force ? MS_FORCE : 0);13641365namelen = strlen(zhp->zpool_name);13661367if ((mnttab = fopen(MNTTAB, "re")) == NULL)1368return (ENOENT);13691370used = alloc = 0;1371while (getmntent(mnttab, &entry) == 0) {1372/*1373* Ignore non-ZFS entries.1374*/1375if (entry.mnt_fstype == NULL ||1376strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)1377continue;13781379/*1380* Ignore filesystems not within this pool.1381*/1382if (entry.mnt_mountp == NULL ||1383strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||1384(entry.mnt_special[namelen] != '/' &&1385entry.mnt_special[namelen] != '\0'))1386continue;13871388/*1389* At this point we've found a filesystem within our pool. Add1390* it to our growing list.1391*/1392if (used == alloc) {1393if (alloc == 0) {1394sets = zfs_alloc(hdl,13958 * sizeof (struct sets_s));1396alloc = 8;1397} else {1398sets = zfs_realloc(hdl, sets,1399alloc * sizeof (struct sets_s),1400alloc * 2 * sizeof (struct sets_s));14011402alloc *= 2;1403}1404}14051406sets[used].mountpoint = zfs_strdup(hdl, entry.mnt_mountp);14071408/*1409* This is allowed to fail, in case there is some I/O error. It1410* is only used to determine if we need to remove the underlying1411* mountpoint, so failure is not fatal.1412*/1413sets[used].dataset = make_dataset_handle(hdl,1414entry.mnt_special);14151416used++;1417}14181419/*1420* At this point, we have the entire list of filesystems, so sort it by1421* mountpoint.1422*/1423if (used != 0)1424qsort(sets, used, sizeof (struct sets_s), mountpoint_compare);14251426/*1427* Walk through and first unshare everything.1428*/1429for (i = 0; i < used; i++) {1430for (enum sa_protocol p = 0; p < SA_PROTOCOL_COUNT; ++p) {1431if (sa_is_shared(sets[i].mountpoint, p) &&1432unshare_one(hdl, sets[i].mountpoint,1433sets[i].mountpoint, p) != 0)1434goto out;1435}1436}1437zfs_commit_shares(NULL);14381439/*1440* Now unmount everything, removing the underlying directories as1441* appropriate.1442*/1443for (i = 0; i < used; i++) {1444if (unmount_one(sets[i].dataset, sets[i].mountpoint,1445flags) != 0)1446goto out;1447}14481449for (i = 0; i < used; i++) {1450if (sets[i].dataset)1451remove_mountpoint(sets[i].dataset);1452}14531454zpool_disable_datasets_os(zhp, force);14551456ret = 0;1457out:1458(void) fclose(mnttab);1459for (i = 0; i < used; i++) {1460if (sets[i].dataset)1461zfs_close(sets[i].dataset);1462free(sets[i].mountpoint);1463}1464free(sets);14651466return (ret);1467}146814691470