Path: blob/main/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c
107074 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/2122/*23* Copyright 2015 Nexenta Systems, Inc. All rights reserved.24* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.25* Copyright (c) 2014, 2022 by Delphix. All rights reserved.26* Copyright 2016 Igor Kozhukhov <[email protected]>27* Copyright 2017 RackTop Systems.28* Copyright (c) 2018 Datto Inc.29* Copyright 2018 OmniOS Community Edition (OmniOSce) Association.30*/3132/*33* Routines to manage ZFS mounts. We separate all the nasty routines that have34* to deal with the OS. The following functions are the main entry points --35* they are used by mount and unmount and when changing a filesystem's36* mountpoint.37*38* zfs_is_mounted()39* zfs_mount()40* zfs_mount_at()41* zfs_unmount()42* zfs_unmountall()43*44* This file also contains the functions used to manage sharing filesystems:45*46* zfs_is_shared()47* zfs_share()48* zfs_unshare()49* zfs_unshareall()50* zfs_commit_shares()51*52* The following functions are available for pool consumers, and will53* mount/unmount and share/unshare all datasets within pool:54*55* zpool_enable_datasets()56* zpool_disable_datasets()57*/5859#include <dirent.h>60#include <dlfcn.h>61#include <errno.h>62#include <fcntl.h>63#include <libgen.h>64#include <libintl.h>65#include <stdio.h>66#include <stdlib.h>67#include <string.h>68#include <unistd.h>69#include <zone.h>70#include <sys/mntent.h>71#include <sys/mount.h>72#include <sys/stat.h>73#include <sys/vfs.h>74#include <sys/dsl_crypt.h>7576#include <libzfs.h>77#include <libzutil.h>7879#include "libzfs_impl.h"8081#include <libshare.h>82#include <sys/systeminfo.h>83#define MAXISALEN 257 /* based on sysinfo(2) man page */8485static void zfs_mount_task(void *);8687static const proto_table_t proto_table[SA_PROTOCOL_COUNT] = {88[SA_PROTOCOL_NFS] =89{ZFS_PROP_SHARENFS, EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},90[SA_PROTOCOL_SMB] =91{ZFS_PROP_SHARESMB, EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},92};9394static const enum sa_protocol share_all_proto[SA_PROTOCOL_COUNT + 1] = {95SA_PROTOCOL_NFS,96SA_PROTOCOL_SMB,97SA_NO_PROTOCOL98};99100101102static boolean_t103dir_is_empty_stat(const char *dirname)104{105struct stat st;106107/*108* We only want to return false if the given path is a non empty109* directory, all other errors are handled elsewhere.110*/111if (stat(dirname, &st) < 0 || !S_ISDIR(st.st_mode)) {112return (B_TRUE);113}114115/*116* An empty directory will still have two entries in it, one117* entry for each of "." and "..".118*/119if (st.st_size > 2) {120return (B_FALSE);121}122123return (B_TRUE);124}125126static boolean_t127dir_is_empty_readdir(const char *dirname)128{129DIR *dirp;130struct dirent64 *dp;131int dirfd;132133if ((dirfd = openat(AT_FDCWD, dirname,134O_RDONLY | O_NDELAY | O_LARGEFILE | O_CLOEXEC, 0)) < 0) {135return (B_TRUE);136}137138if ((dirp = fdopendir(dirfd)) == NULL) {139(void) close(dirfd);140return (B_TRUE);141}142143while ((dp = readdir64(dirp)) != NULL) {144145if (strcmp(dp->d_name, ".") == 0 ||146strcmp(dp->d_name, "..") == 0)147continue;148149(void) closedir(dirp);150return (B_FALSE);151}152153(void) closedir(dirp);154return (B_TRUE);155}156157/*158* Returns true if the specified directory is empty. If we can't open the159* directory at all, return true so that the mount can fail with a more160* informative error message.161*/162static boolean_t163dir_is_empty(const char *dirname)164{165struct statfs64 st;166167/*168* If the statvfs call fails or the filesystem is not a ZFS169* filesystem, fall back to the slow path which uses readdir.170*/171if ((statfs64(dirname, &st) != 0) ||172(st.f_type != ZFS_SUPER_MAGIC)) {173return (dir_is_empty_readdir(dirname));174}175176/*177* At this point, we know the provided path is on a ZFS178* filesystem, so we can use stat instead of readdir to179* determine if the directory is empty or not. We try to avoid180* using readdir because that requires opening "dirname"; this181* open file descriptor can potentially end up in a child182* process if there's a concurrent fork, thus preventing the183* zfs_mount() from otherwise succeeding (the open file184* descriptor inherited by the child process will cause the185* parent's mount to fail with EBUSY). The performance186* implications of replacing the open, read, and close with a187* single stat is nice; but is not the main motivation for the188* added complexity.189*/190return (dir_is_empty_stat(dirname));191}192193/*194* Checks to see if the mount is active. If the filesystem is mounted, we fill195* in 'where' with the current mountpoint, and return 1. Otherwise, we return196* 0.197*/198boolean_t199is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)200{201struct mnttab entry;202203if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0)204return (B_FALSE);205206if (where != NULL)207*where = zfs_strdup(zfs_hdl, entry.mnt_mountp);208209return (B_TRUE);210}211212boolean_t213zfs_is_mounted(zfs_handle_t *zhp, char **where)214{215return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));216}217218/*219* Checks any higher order concerns about whether the given dataset is220* mountable, false otherwise. zfs_is_mountable_internal specifically assumes221* that the caller has verified the sanity of mounting the dataset at222* its mountpoint to the extent the caller wants.223*/224static boolean_t225zfs_is_mountable_internal(zfs_handle_t *zhp)226{227if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&228getzoneid() == GLOBAL_ZONEID)229return (B_FALSE);230231return (B_TRUE);232}233234/*235* Returns true if the given dataset is mountable, false otherwise. Returns the236* mountpoint in 'buf'.237*/238static boolean_t239zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,240zprop_source_t *source, int flags)241{242char sourceloc[MAXNAMELEN];243zprop_source_t sourcetype;244245if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type,246B_FALSE))247return (B_FALSE);248249verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,250&sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);251252if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||253strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)254return (B_FALSE);255256if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)257return (B_FALSE);258259if (!zfs_is_mountable_internal(zhp))260return (B_FALSE);261262if (zfs_prop_get_int(zhp, ZFS_PROP_REDACTED) && !(flags & MS_FORCE))263return (B_FALSE);264265if (source)266*source = sourcetype;267268return (B_TRUE);269}270271/*272* The filesystem is mounted by invoking the system mount utility rather273* than by the system call mount(2). This ensures that the /etc/mtab274* file is correctly locked for the update. Performing our own locking275* and /etc/mtab update requires making an unsafe assumption about how276* the mount utility performs its locking. Unfortunately, this also means277* in the case of a mount failure we do not have the exact errno. We must278* make due with return value from the mount process.279*280* In the long term a shared library called libmount is under development281* which provides a common API to address the locking and errno issues.282* Once the standard mount utility has been updated to use this library283* we can add an autoconf check to conditionally use it.284*285* http://www.kernel.org/pub/linux/utils/util-linux/libmount-docs/index.html286*/287288static int289zfs_add_option(zfs_handle_t *zhp, char *options, int len,290zfs_prop_t prop, const char *on, const char *off)291{292const char *source;293uint64_t value;294295/* Skip adding duplicate default options */296if ((strstr(options, on) != NULL) || (strstr(options, off) != NULL))297return (0);298299/*300* zfs_prop_get_int() is not used to ensure our mount options301* are not influenced by the current /proc/self/mounts contents.302*/303value = getprop_uint64(zhp, prop, &source);304305(void) strlcat(options, ",", len);306(void) strlcat(options, value ? on : off, len);307308return (0);309}310311static int312zfs_add_options(zfs_handle_t *zhp, char *options, int len)313{314int error = 0;315316error = zfs_add_option(zhp, options, len,317ZFS_PROP_ATIME, MNTOPT_ATIME, MNTOPT_NOATIME);318/*319* don't add relatime/strictatime when atime=off, otherwise strictatime320* will force atime=on321*/322if (strstr(options, MNTOPT_NOATIME) == NULL) {323error = zfs_add_option(zhp, options, len,324ZFS_PROP_RELATIME, MNTOPT_RELATIME, MNTOPT_STRICTATIME);325}326error = error ? error : zfs_add_option(zhp, options, len,327ZFS_PROP_DEVICES, MNTOPT_DEVICES, MNTOPT_NODEVICES);328error = error ? error : zfs_add_option(zhp, options, len,329ZFS_PROP_EXEC, MNTOPT_EXEC, MNTOPT_NOEXEC);330error = error ? error : zfs_add_option(zhp, options, len,331ZFS_PROP_READONLY, MNTOPT_RO, MNTOPT_RW);332error = error ? error : zfs_add_option(zhp, options, len,333ZFS_PROP_SETUID, MNTOPT_SETUID, MNTOPT_NOSETUID);334error = error ? error : zfs_add_option(zhp, options, len,335ZFS_PROP_NBMAND, MNTOPT_NBMAND, MNTOPT_NONBMAND);336337return (error);338}339340int341zfs_mount(zfs_handle_t *zhp, const char *options, int flags)342{343char mountpoint[ZFS_MAXPROPLEN];344345if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL,346flags))347return (0);348349return (zfs_mount_at(zhp, options, flags, mountpoint));350}351352/*353* Mount the given filesystem.354*/355int356zfs_mount_at(zfs_handle_t *zhp, const char *options, int flags,357const char *mountpoint)358{359struct stat buf;360char mntopts[MNT_LINE_MAX];361char overlay[ZFS_MAXPROPLEN];362char prop_encroot[MAXNAMELEN];363boolean_t is_encroot;364zfs_handle_t *encroot_hp = zhp;365libzfs_handle_t *hdl = zhp->zfs_hdl;366uint64_t keystatus;367int remount = 0, rc;368369if (options == NULL) {370(void) strlcpy(mntopts, MNTOPT_DEFAULTS, sizeof (mntopts));371} else {372(void) strlcpy(mntopts, options, sizeof (mntopts));373}374375if (strstr(mntopts, MNTOPT_REMOUNT) != NULL)376remount = 1;377378/* Potentially duplicates some checks if invoked by zfs_mount(). */379if (!zfs_is_mountable_internal(zhp))380return (0);381382/*383* If the pool is imported read-only then all mounts must be read-only384*/385if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))386(void) strlcat(mntopts, "," MNTOPT_RO, sizeof (mntopts));387388/*389* Append default mount options which apply to the mount point.390* This is done because under Linux (unlike Solaris) multiple mount391* points may reference a single super block. This means that just392* given a super block there is no back reference to update the per393* mount point options.394*/395rc = zfs_add_options(zhp, mntopts, sizeof (mntopts));396if (rc) {397zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,398"default options unavailable"));399return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,400dgettext(TEXT_DOMAIN, "cannot mount '%s'"),401mountpoint));402}403404/*405* If the filesystem is encrypted the key must be loaded in order to406* mount. If the key isn't loaded, the MS_CRYPT flag decides whether407* or not we attempt to load the keys. Note: we must call408* zfs_refresh_properties() here since some callers of this function409* (most notably zpool_enable_datasets()) may implicitly load our key410* by loading the parent's key first.411*/412if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {413zfs_refresh_properties(zhp);414keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);415416/*417* If the key is unavailable and MS_CRYPT is set give the418* user a chance to enter the key. Otherwise just fail419* immediately.420*/421if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) {422if (flags & MS_CRYPT) {423rc = zfs_crypto_get_encryption_root(zhp,424&is_encroot, prop_encroot);425if (rc) {426zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,427"Failed to get encryption root for "428"'%s'."), zfs_get_name(zhp));429return (rc);430}431432if (!is_encroot) {433encroot_hp = zfs_open(hdl, prop_encroot,434ZFS_TYPE_DATASET);435if (encroot_hp == NULL)436return (hdl->libzfs_error);437}438439rc = zfs_crypto_load_key(encroot_hp,440B_FALSE, NULL);441442if (!is_encroot)443zfs_close(encroot_hp);444if (rc)445return (rc);446} else {447zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,448"encryption key not loaded"));449return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,450dgettext(TEXT_DOMAIN, "cannot mount '%s'"),451mountpoint));452}453}454455}456457/*458* Append zfsutil option so the mount helper allow the mount459*/460strlcat(mntopts, "," MNTOPT_ZFSUTIL, sizeof (mntopts));461462/* Create the directory if it doesn't already exist */463if (lstat(mountpoint, &buf) != 0) {464if (mkdirp(mountpoint, 0755) != 0) {465zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,466"failed to create mountpoint: %s"),467zfs_strerror(errno));468return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,469dgettext(TEXT_DOMAIN, "cannot mount '%s'"),470mountpoint));471}472}473474/*475* Overlay mounts are enabled by default but may be disabled476* via the 'overlay' property. The -O flag remains for compatibility.477*/478if (!(flags & MS_OVERLAY)) {479if (zfs_prop_get(zhp, ZFS_PROP_OVERLAY, overlay,480sizeof (overlay), NULL, NULL, 0, B_FALSE) == 0) {481if (strcmp(overlay, "on") == 0) {482flags |= MS_OVERLAY;483}484}485}486487/*488* Determine if the mountpoint is empty. If so, refuse to perform the489* mount. We don't perform this check if 'remount' is490* specified or if overlay option (-O) is given491*/492if ((flags & MS_OVERLAY) == 0 && !remount &&493!dir_is_empty(mountpoint)) {494zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,495"directory is not empty"));496return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,497dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));498}499500/* perform the mount */501rc = do_mount(zhp, mountpoint, mntopts, flags);502if (rc) {503/*504* Generic errors are nasty, but there are just way too many505* from mount(), and they're well-understood. We pick a few506* common ones to improve upon.507*/508if (rc == EBUSY) {509zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,510"mountpoint or dataset is busy"));511} else if (rc == EPERM) {512zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,513"Insufficient privileges"));514} else if (rc == ENOTSUP) {515int spa_version;516517VERIFY0(zfs_spa_version(zhp, &spa_version));518zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,519"Can't mount a version %llu "520"file system on a version %d pool. Pool must be"521" upgraded to mount this file system."),522(u_longlong_t)zfs_prop_get_int(zhp,523ZFS_PROP_VERSION), spa_version);524} else {525zfs_error_aux(hdl, "%s", zfs_strerror(rc));526}527return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,528dgettext(TEXT_DOMAIN, "cannot mount '%s'"),529zhp->zfs_name));530}531532/* remove the mounted entry before re-adding on remount */533if (remount)534libzfs_mnttab_remove(hdl, zhp->zfs_name);535536/* add the mounted entry into our cache */537libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint, mntopts);538return (0);539}540541/*542* Unmount a single filesystem.543*/544static int545unmount_one(zfs_handle_t *zhp, const char *mountpoint, int flags)546{547int error;548549error = do_unmount(zhp, mountpoint, flags);550if (error != 0) {551int libzfs_err;552553switch (error) {554case EBUSY:555libzfs_err = EZFS_BUSY;556break;557case EIO:558libzfs_err = EZFS_IO;559break;560case ENOENT:561libzfs_err = EZFS_NOENT;562break;563case ENOMEM:564libzfs_err = EZFS_NOMEM;565break;566case EPERM:567libzfs_err = EZFS_PERM;568break;569default:570libzfs_err = EZFS_UMOUNTFAILED;571}572if (zhp) {573return (zfs_error_fmt(zhp->zfs_hdl, libzfs_err,574dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),575mountpoint));576} else {577return (-1);578}579}580581return (0);582}583584/*585* Unmount the given filesystem.586*/587int588zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)589{590libzfs_handle_t *hdl = zhp->zfs_hdl;591struct mnttab entry;592char *mntpt = NULL;593boolean_t encroot, unmounted = B_FALSE;594595/* check to see if we need to unmount the filesystem */596if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&597libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) {598/*599* mountpoint may have come from a call to600* getmnt/getmntany if it isn't NULL. If it is NULL,601* we know it comes from libzfs_mnttab_find which can602* then get freed later. We strdup it to play it safe.603*/604if (mountpoint == NULL)605mntpt = zfs_strdup(hdl, entry.mnt_mountp);606else607mntpt = zfs_strdup(hdl, mountpoint);608609/*610* Unshare and unmount the filesystem611*/612if (zfs_unshare(zhp, mntpt, share_all_proto) != 0) {613free(mntpt);614return (-1);615}616zfs_commit_shares(NULL);617618if (unmount_one(zhp, mntpt, flags) != 0) {619free(mntpt);620(void) zfs_share(zhp, NULL);621zfs_commit_shares(NULL);622return (-1);623}624625libzfs_mnttab_remove(hdl, zhp->zfs_name);626free(mntpt);627unmounted = B_TRUE;628}629630/*631* If the MS_CRYPT flag is provided we must ensure we attempt to632* unload the dataset's key regardless of whether we did any work633* to unmount it. We only do this for encryption roots.634*/635if ((flags & MS_CRYPT) != 0 &&636zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {637zfs_refresh_properties(zhp);638639if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0 &&640unmounted) {641(void) zfs_mount(zhp, NULL, 0);642return (-1);643}644645if (encroot && zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==646ZFS_KEYSTATUS_AVAILABLE &&647zfs_crypto_unload_key(zhp) != 0) {648(void) zfs_mount(zhp, NULL, 0);649return (-1);650}651}652653zpool_disable_volume_os(zhp->zfs_name);654655return (0);656}657658/*659* Unmount this filesystem and any children inheriting the mountpoint property.660* To do this, just act like we're changing the mountpoint property, but don't661* remount the filesystems afterwards.662*/663int664zfs_unmountall(zfs_handle_t *zhp, int flags)665{666prop_changelist_t *clp;667int ret;668669clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,670CL_GATHER_ITER_MOUNTED, flags);671if (clp == NULL)672return (-1);673674ret = changelist_prefix(clp);675changelist_free(clp);676677return (ret);678}679680/*681* Unshare a filesystem by mountpoint.682*/683static int684unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,685enum sa_protocol proto)686{687int err = sa_disable_share(mountpoint, proto);688if (err != SA_OK)689return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,690dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),691name, sa_errorstr(err)));692693return (0);694}695696/*697* Share the given filesystem according to the options in the specified698* protocol specific properties (sharenfs, sharesmb). We rely699* on "libshare" to do the dirty work for us.700*/701int702zfs_share(zfs_handle_t *zhp, const enum sa_protocol *proto)703{704char mountpoint[ZFS_MAXPROPLEN];705char shareopts[ZFS_MAXPROPLEN];706char sourcestr[ZFS_MAXPROPLEN];707const enum sa_protocol *curr_proto;708zprop_source_t sourcetype;709int err = 0;710711if (proto == NULL)712proto = share_all_proto;713714if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL, 0))715return (0);716717for (curr_proto = proto; *curr_proto != SA_NO_PROTOCOL; curr_proto++) {718/*719* Return success if there are no share options.720*/721if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,722shareopts, sizeof (shareopts), &sourcetype, sourcestr,723ZFS_MAXPROPLEN, B_FALSE) != 0 ||724strcmp(shareopts, "off") == 0)725continue;726727/*728* If the 'zoned' property is set, then zfs_is_mountable()729* will have already bailed out if we are in the global zone.730* But local zones cannot be NFS servers, so we ignore it for731* local zones as well.732*/733if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))734continue;735736err = sa_enable_share(zfs_get_name(zhp), mountpoint, shareopts,737*curr_proto);738if (err != SA_OK) {739return (zfs_error_fmt(zhp->zfs_hdl,740proto_table[*curr_proto].p_share_err,741dgettext(TEXT_DOMAIN, "cannot share '%s: %s'"),742zfs_get_name(zhp), sa_errorstr(err)));743}744745}746return (0);747}748749/*750* Check to see if the filesystem is currently shared.751*/752boolean_t753zfs_is_shared(zfs_handle_t *zhp, char **where,754const enum sa_protocol *proto)755{756char *mountpoint;757if (proto == NULL)758proto = share_all_proto;759760if (ZFS_IS_VOLUME(zhp))761return (B_FALSE);762763if (!zfs_is_mounted(zhp, &mountpoint))764return (B_FALSE);765766for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)767if (sa_is_shared(mountpoint, *p)) {768if (where != NULL)769*where = mountpoint;770else771free(mountpoint);772return (B_TRUE);773}774775free(mountpoint);776return (B_FALSE);777}778779void780zfs_commit_shares(const enum sa_protocol *proto)781{782if (proto == NULL)783proto = share_all_proto;784785for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)786sa_commit_shares(*p);787}788789void790zfs_truncate_shares(const enum sa_protocol *proto)791{792if (proto == NULL)793proto = share_all_proto;794795for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)796sa_truncate_shares(*p);797}798799/*800* Unshare the given filesystem.801*/802int803zfs_unshare(zfs_handle_t *zhp, const char *mountpoint,804const enum sa_protocol *proto)805{806libzfs_handle_t *hdl = zhp->zfs_hdl;807struct mnttab entry;808809if (proto == NULL)810proto = share_all_proto;811812if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&813libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) {814815/* check to see if need to unmount the filesystem */816const char *mntpt = mountpoint ?: entry.mnt_mountp;817818for (const enum sa_protocol *curr_proto = proto;819*curr_proto != SA_NO_PROTOCOL; curr_proto++)820if (sa_is_shared(mntpt, *curr_proto) &&821unshare_one(hdl, zhp->zfs_name,822mntpt, *curr_proto) != 0)823return (-1);824}825826return (0);827}828829/*830* Same as zfs_unmountall(), but for NFS and SMB unshares.831*/832int833zfs_unshareall(zfs_handle_t *zhp, const enum sa_protocol *proto)834{835prop_changelist_t *clp;836int ret;837838if (proto == NULL)839proto = share_all_proto;840841clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0);842if (clp == NULL)843return (-1);844845ret = changelist_unshare(clp, proto);846changelist_free(clp);847848return (ret);849}850851/*852* Remove the mountpoint associated with the current dataset, if necessary.853* We only remove the underlying directory if:854*855* - The mountpoint is not 'none' or 'legacy'856* - The mountpoint is non-empty857* - The mountpoint is the default or inherited858* - The 'zoned' property is set, or we're in a local zone859*860* Any other directories we leave alone.861*/862void863remove_mountpoint(zfs_handle_t *zhp)864{865char mountpoint[ZFS_MAXPROPLEN];866zprop_source_t source;867868if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),869&source, 0))870return;871872if (source == ZPROP_SRC_DEFAULT ||873source == ZPROP_SRC_INHERITED) {874/*875* Try to remove the directory, silently ignoring any errors.876* The filesystem may have since been removed or moved around,877* and this error isn't really useful to the administrator in878* any way.879*/880(void) rmdir(mountpoint);881}882}883884/*885* Add the given zfs handle to the cb_handles array, dynamically reallocating886* the array if it is out of space.887*/888void889libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)890{891if (cbp->cb_alloc == cbp->cb_used) {892size_t newsz;893zfs_handle_t **newhandles;894895newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64;896newhandles = zfs_realloc(zhp->zfs_hdl,897cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),898newsz * sizeof (zfs_handle_t *));899cbp->cb_handles = newhandles;900cbp->cb_alloc = newsz;901}902cbp->cb_handles[cbp->cb_used++] = zhp;903}904905/*906* Recursive helper function used during file system enumeration907*/908static int909zfs_iter_cb(zfs_handle_t *zhp, void *data)910{911get_all_cb_t *cbp = data;912913if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) {914zfs_close(zhp);915return (0);916}917918if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) {919zfs_close(zhp);920return (0);921}922923if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==924ZFS_KEYSTATUS_UNAVAILABLE) {925zfs_close(zhp);926return (0);927}928929/*930* If this filesystem is inconsistent and has a receive resume931* token, we can not mount it.932*/933if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&934zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,935NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {936zfs_close(zhp);937return (0);938}939940libzfs_add_handle(cbp, zhp);941if (zfs_iter_filesystems_v2(zhp, 0, zfs_iter_cb, cbp) != 0) {942zfs_close(zhp);943return (-1);944}945return (0);946}947948/*949* Sort comparator that compares two mountpoint paths. We sort these paths so950* that subdirectories immediately follow their parents. This means that we951* effectively treat the '/' character as the lowest value non-nul char.952* Since filesystems from non-global zones can have the same mountpoint953* as other filesystems, the comparator sorts global zone filesystems to954* the top of the list. This means that the global zone will traverse the955* filesystem list in the correct order and can stop when it sees the956* first zoned filesystem. In a non-global zone, only the delegated957* filesystems are seen.958*959* An example sorted list using this comparator would look like:960*961* /foo962* /foo/bar963* /foo/bar/baz964* /foo/baz965* /foo.bar966* /foo (NGZ1)967* /foo (NGZ2)968*969* The mounting code depends on this ordering to deterministically iterate970* over filesystems in order to spawn parallel mount tasks.971*/972static int973mountpoint_cmp(const void *arga, const void *argb)974{975zfs_handle_t *const *zap = arga;976zfs_handle_t *za = *zap;977zfs_handle_t *const *zbp = argb;978zfs_handle_t *zb = *zbp;979char mounta[MAXPATHLEN];980char mountb[MAXPATHLEN];981const char *a = mounta;982const char *b = mountb;983boolean_t gota, gotb;984uint64_t zoneda, zonedb;985986zoneda = zfs_prop_get_int(za, ZFS_PROP_ZONED);987zonedb = zfs_prop_get_int(zb, ZFS_PROP_ZONED);988if (zoneda && !zonedb)989return (1);990if (!zoneda && zonedb)991return (-1);992993gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM);994if (gota) {995verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,996sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);997}998gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM);999if (gotb) {1000verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,1001sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);1002}10031004if (gota && gotb) {1005while (*a != '\0' && (*a == *b)) {1006a++;1007b++;1008}1009if (*a == *b)1010return (0);1011if (*a == '\0')1012return (-1);1013if (*b == '\0')1014return (1);1015if (*a == '/')1016return (-1);1017if (*b == '/')1018return (1);1019return (*a < *b ? -1 : *a > *b);1020}10211022if (gota)1023return (-1);1024if (gotb)1025return (1);10261027/*1028* If neither filesystem has a mountpoint, revert to sorting by1029* dataset name.1030*/1031return (strcmp(zfs_get_name(za), zfs_get_name(zb)));1032}10331034/*1035* Return true if path2 is a child of path1 or path2 equals path1 or1036* path1 is "/" (path2 is always a child of "/").1037*/1038static boolean_t1039libzfs_path_contains(const char *path1, const char *path2)1040{1041return (strcmp(path1, path2) == 0 || strcmp(path1, "/") == 0 ||1042(strstr(path2, path1) == path2 && path2[strlen(path1)] == '/'));1043}10441045/*1046* Given a mountpoint specified by idx in the handles array, find the first1047* non-descendent of that mountpoint and return its index. Descendant paths1048* start with the parent's path. This function relies on the ordering1049* enforced by mountpoint_cmp().1050*/1051static int1052non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx)1053{1054char parent[ZFS_MAXPROPLEN];1055char child[ZFS_MAXPROPLEN];1056int i;10571058verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,1059sizeof (parent), NULL, NULL, 0, B_FALSE) == 0);10601061for (i = idx + 1; i < num_handles; i++) {1062verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,1063sizeof (child), NULL, NULL, 0, B_FALSE) == 0);1064if (!libzfs_path_contains(parent, child))1065break;1066}1067return (i);1068}10691070typedef struct mnt_param {1071libzfs_handle_t *mnt_hdl;1072taskq_t *mnt_tq;1073zfs_handle_t **mnt_zhps; /* filesystems to mount */1074size_t mnt_num_handles;1075int mnt_idx; /* Index of selected entry to mount */1076zfs_iter_f mnt_func;1077void *mnt_data;1078} mnt_param_t;10791080/*1081* Allocate and populate the parameter struct for mount function, and1082* schedule mounting of the entry selected by idx.1083*/1084static void1085zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,1086size_t num_handles, int idx, zfs_iter_f func, void *data, taskq_t *tq)1087{1088mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t));10891090mnt_param->mnt_hdl = hdl;1091mnt_param->mnt_tq = tq;1092mnt_param->mnt_zhps = handles;1093mnt_param->mnt_num_handles = num_handles;1094mnt_param->mnt_idx = idx;1095mnt_param->mnt_func = func;1096mnt_param->mnt_data = data;10971098if (taskq_dispatch(tq, zfs_mount_task, (void*)mnt_param,1099TQ_SLEEP) == TASKQID_INVALID) {1100/* Could not dispatch to thread pool; execute directly */1101zfs_mount_task((void*)mnt_param);1102}1103}11041105/*1106* This is the structure used to keep state of mounting or sharing operations1107* during a call to zpool_enable_datasets().1108*/1109typedef struct mount_state {1110/*1111* ms_mntstatus is set to -1 if any mount fails. While multiple threads1112* could update this variable concurrently, no synchronization is1113* needed as it's only ever set to -1.1114*/1115int ms_mntstatus;1116int ms_mntflags;1117const char *ms_mntopts;1118} mount_state_t;11191120static int1121zfs_mount_one(zfs_handle_t *zhp, void *arg)1122{1123mount_state_t *ms = arg;1124int ret = 0;11251126/*1127* don't attempt to mount encrypted datasets with1128* unloaded keys1129*/1130if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==1131ZFS_KEYSTATUS_UNAVAILABLE)1132return (0);11331134if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0)1135ret = ms->ms_mntstatus = -1;1136return (ret);1137}11381139static int1140zfs_share_one(zfs_handle_t *zhp, void *arg)1141{1142mount_state_t *ms = arg;1143int ret = 0;11441145if (zfs_share(zhp, NULL) != 0)1146ret = ms->ms_mntstatus = -1;1147return (ret);1148}11491150/*1151* Thread pool function to mount one file system. On completion, it finds and1152* schedules its children to be mounted. This depends on the sorting done in1153* zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries1154* each descending from the previous) will have no parallelism since we always1155* have to wait for the parent to finish mounting before we can schedule1156* its children.1157*/1158static void1159zfs_mount_task(void *arg)1160{1161mnt_param_t *mp = arg;1162int idx = mp->mnt_idx;1163zfs_handle_t **handles = mp->mnt_zhps;1164size_t num_handles = mp->mnt_num_handles;1165char mountpoint[ZFS_MAXPROPLEN];11661167verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint,1168sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);11691170if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)1171goto out;11721173/*1174* We dispatch tasks to mount filesystems with mountpoints underneath1175* this one. We do this by dispatching the next filesystem with a1176* descendant mountpoint of the one we just mounted, then skip all of1177* its descendants, dispatch the next descendant mountpoint, and so on.1178* The non_descendant_idx() function skips over filesystems that are1179* descendants of the filesystem we just dispatched.1180*/1181for (int i = idx + 1; i < num_handles;1182i = non_descendant_idx(handles, num_handles, i)) {1183char child[ZFS_MAXPROPLEN];1184verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,1185child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0);11861187if (!libzfs_path_contains(mountpoint, child))1188break; /* not a descendant, return */1189zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,1190mp->mnt_func, mp->mnt_data, mp->mnt_tq);1191}11921193out:1194free(mp);1195}11961197/*1198* Issue the func callback for each ZFS handle contained in the handles1199* array. This function is used to mount all datasets, and so this function1200* guarantees that filesystems for parent mountpoints are called before their1201* children. As such, before issuing any callbacks, we first sort the array1202* of handles by mountpoint.1203*1204* Callbacks are issued in one of two ways:1205*1206* 1. Sequentially: If the nthr argument is <= 1 or the ZFS_SERIAL_MOUNT1207* environment variable is set, then we issue callbacks sequentially.1208*1209* 2. In parallel: If the nthr argument is > 1 and the ZFS_SERIAL_MOUNT1210* environment variable is not set, then we use a tpool to dispatch threads1211* to mount filesystems in parallel. This function dispatches tasks to mount1212* the filesystems at the top-level mountpoints, and these tasks in turn1213* are responsible for recursively mounting filesystems in their children1214* mountpoints. The value of the nthr argument will be the number of worker1215* threads for the thread pool.1216*/1217void1218zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,1219size_t num_handles, zfs_iter_f func, void *data, uint_t nthr)1220{1221zoneid_t zoneid = getzoneid();12221223/*1224* The ZFS_SERIAL_MOUNT environment variable is an undocumented1225* variable that can be used as a convenience to do a/b comparison1226* of serial vs. parallel mounting.1227*/1228boolean_t serial_mount = nthr <= 1 ||1229(getenv("ZFS_SERIAL_MOUNT") != NULL);12301231/*1232* Sort the datasets by mountpoint. See mountpoint_cmp for details1233* of how these are sorted.1234*/1235qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp);12361237if (serial_mount) {1238for (int i = 0; i < num_handles; i++) {1239func(handles[i], data);1240}1241return;1242}12431244/*1245* Issue the callback function for each dataset using a parallel1246* algorithm that uses a thread pool to manage threads.1247*/1248taskq_t *tq = taskq_create("zfs_foreach_mountpoint", nthr, minclsyspri,12491, INT_MAX, TASKQ_DYNAMIC);12501251/*1252* There may be multiple "top level" mountpoints outside of the pool's1253* root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of1254* these.1255*/1256for (int i = 0; i < num_handles;1257i = non_descendant_idx(handles, num_handles, i)) {1258/*1259* Since the mountpoints have been sorted so that the zoned1260* filesystems are at the end, a zoned filesystem seen from1261* the global zone means that we're done.1262*/1263if (zoneid == GLOBAL_ZONEID &&1264zfs_prop_get_int(handles[i], ZFS_PROP_ZONED))1265break;1266zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,1267tq);1268}12691270taskq_wait(tq); /* wait for all scheduled mounts to complete */1271taskq_destroy(tq);1272}12731274/*1275* Mount and share all datasets within the given pool. This assumes that no1276* datasets within the pool are currently mounted. nthr will be number of1277* worker threads to use while mounting datasets.1278*/1279int1280zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags,1281uint_t nthr)1282{1283get_all_cb_t cb = { 0 };1284mount_state_t ms = { 0 };1285zfs_handle_t *zfsp;1286int ret = 0;12871288if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,1289ZFS_TYPE_DATASET)) == NULL)1290goto out;12911292/*1293* Gather all non-snapshot datasets within the pool. Start by adding1294* the root filesystem for this pool to the list, and then iterate1295* over all child filesystems.1296*/1297libzfs_add_handle(&cb, zfsp);1298if (zfs_iter_filesystems_v2(zfsp, 0, zfs_iter_cb, &cb) != 0)1299goto out;13001301/*1302* Mount all filesystems1303*/1304ms.ms_mntopts = mntopts;1305ms.ms_mntflags = flags;1306zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,1307zfs_mount_one, &ms, nthr);1308if (ms.ms_mntstatus != 0)1309ret = EZFS_MOUNTFAILED;13101311/*1312* Share all filesystems that need to be shared. This needs to be1313* a separate pass because libshare is not mt-safe, and so we need1314* to share serially.1315*/1316ms.ms_mntstatus = 0;1317zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,1318zfs_share_one, &ms, 1);1319if (ms.ms_mntstatus != 0)1320ret = EZFS_SHAREFAILED;1321else1322zfs_commit_shares(NULL);13231324out:1325for (int i = 0; i < cb.cb_used; i++)1326zfs_close(cb.cb_handles[i]);1327free(cb.cb_handles);13281329return (ret);1330}13311332struct sets_s {1333char *mountpoint;1334zfs_handle_t *dataset;1335};13361337static int1338mountpoint_compare(const void *a, const void *b)1339{1340const struct sets_s *mounta = (struct sets_s *)a;1341const struct sets_s *mountb = (struct sets_s *)b;13421343return (strcmp(mountb->mountpoint, mounta->mountpoint));1344}13451346/*1347* Unshare and unmount all datasets within the given pool. We don't want to1348* rely on traversing the DSL to discover the filesystems within the pool,1349* because this may be expensive (if not all of them are mounted), and can fail1350* arbitrarily (on I/O error, for example). Instead, we walk /proc/self/mounts1351* and gather all the filesystems that are currently mounted.1352*/1353int1354zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)1355{1356int used, alloc;1357FILE *mnttab;1358struct mnttab entry;1359size_t namelen;1360struct sets_s *sets = NULL;1361libzfs_handle_t *hdl = zhp->zpool_hdl;1362int i;1363int ret = -1;1364int flags = (force ? MS_FORCE : 0);13651366namelen = strlen(zhp->zpool_name);13671368if ((mnttab = fopen(MNTTAB, "re")) == NULL)1369return (ENOENT);13701371used = alloc = 0;1372while (getmntent(mnttab, &entry) == 0) {1373/*1374* Ignore non-ZFS entries.1375*/1376if (entry.mnt_fstype == NULL ||1377strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)1378continue;13791380/*1381* Ignore filesystems not within this pool.1382*/1383if (entry.mnt_mountp == NULL ||1384strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||1385(entry.mnt_special[namelen] != '/' &&1386entry.mnt_special[namelen] != '\0'))1387continue;13881389/*1390* At this point we've found a filesystem within our pool. Add1391* it to our growing list.1392*/1393if (used == alloc) {1394if (alloc == 0) {1395sets = zfs_alloc(hdl,13968 * sizeof (struct sets_s));1397alloc = 8;1398} else {1399sets = zfs_realloc(hdl, sets,1400alloc * sizeof (struct sets_s),1401alloc * 2 * sizeof (struct sets_s));14021403alloc *= 2;1404}1405}14061407sets[used].mountpoint = zfs_strdup(hdl, entry.mnt_mountp);14081409/*1410* This is allowed to fail, in case there is some I/O error. It1411* is only used to determine if we need to remove the underlying1412* mountpoint, so failure is not fatal.1413*/1414sets[used].dataset = make_dataset_handle(hdl,1415entry.mnt_special);14161417used++;1418}14191420/*1421* At this point, we have the entire list of filesystems, so sort it by1422* mountpoint.1423*/1424if (used != 0)1425qsort(sets, used, sizeof (struct sets_s), mountpoint_compare);14261427/*1428* Walk through and first unshare everything.1429*/1430for (i = 0; i < used; i++) {1431for (enum sa_protocol p = 0; p < SA_PROTOCOL_COUNT; ++p) {1432if (sa_is_shared(sets[i].mountpoint, p) &&1433unshare_one(hdl, sets[i].mountpoint,1434sets[i].mountpoint, p) != 0)1435goto out;1436}1437}1438zfs_commit_shares(NULL);14391440/*1441* Now unmount everything, removing the underlying directories as1442* appropriate.1443*/1444for (i = 0; i < used; i++) {1445if (unmount_one(sets[i].dataset, sets[i].mountpoint,1446flags) != 0)1447goto out;1448}14491450for (i = 0; i < used; i++) {1451if (sets[i].dataset)1452remove_mountpoint(sets[i].dataset);1453}14541455zpool_disable_datasets_os(zhp, force);14561457ret = 0;1458out:1459(void) fclose(mnttab);1460for (i = 0; i < used; i++) {1461if (sets[i].dataset)1462zfs_close(sets[i].dataset);1463free(sets[i].mountpoint);1464}1465free(sets);14661467return (ret);1468}146914701471