Path: blob/main/sys/contrib/openzfs/module/zfs/dsl_dataset.c
48383 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/2122/*23* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.24* Copyright (c) 2011, 2020 by Delphix. All rights reserved.25* Copyright (c) 2014, Joyent, Inc. All rights reserved.26* Copyright (c) 2014 RackTop Systems.27* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.28* Copyright (c) 2016 Actifio, Inc. All rights reserved.29* Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved.30* Copyright 2017 Nexenta Systems, Inc.31* Copyright (c) 2019, Klara Inc.32* Copyright (c) 2019, Allan Jude33* Copyright (c) 2020 The FreeBSD Foundation [1]34* Copyright (c) 2025, Rob Norris <[email protected]>35*36* [1] Portions of this software were developed by Allan Jude37* under sponsorship from the FreeBSD Foundation.38*/3940#include <sys/dmu_objset.h>41#include <sys/dsl_dataset.h>42#include <sys/dsl_dir.h>43#include <sys/dsl_prop.h>44#include <sys/dsl_synctask.h>45#include <sys/dmu_traverse.h>46#include <sys/dmu_impl.h>47#include <sys/dmu_tx.h>48#include <sys/arc.h>49#include <sys/zio.h>50#include <sys/zap.h>51#include <sys/zfeature.h>52#include <sys/unique.h>53#include <sys/zfs_context.h>54#include <sys/zfs_ioctl.h>55#include <sys/spa.h>56#include <sys/spa_impl.h>57#include <sys/vdev.h>58#include <sys/zfs_znode.h>59#include <sys/zfs_onexit.h>60#include <sys/zvol.h>61#include <sys/dsl_scan.h>62#include <sys/dsl_deadlist.h>63#include <sys/dsl_destroy.h>64#include <sys/dsl_userhold.h>65#include <sys/dsl_bookmark.h>66#include <sys/policy.h>67#include <sys/dmu_send.h>68#include <sys/dmu_recv.h>69#include <sys/zio_compress.h>70#include <zfs_fletcher.h>71#include <sys/zio_checksum.h>72#include <sys/brt.h>7374/*75* The SPA supports block sizes up to 16MB. However, very large blocks76* can have an impact on i/o latency (e.g. tying up a spinning disk for77* ~300ms), and also potentially on the memory allocator. Therefore,78* we did not allow the recordsize to be set larger than zfs_max_recordsize79* (former default: 1MB). Larger blocks could be created by changing this80* tunable, and pools with larger blocks could always be imported and used,81* regardless of this setting.82*83* We do, however, still limit it by default to 1M on x86_32, because Linux's84* 3/1 memory split doesn't leave much room for 16M chunks.85*/86#ifdef _ILP3287uint_t zfs_max_recordsize = 1 * 1024 * 1024;88#else89uint_t zfs_max_recordsize = 16 * 1024 * 1024;90#endif91static int zfs_allow_redacted_dataset_mount = 0;9293int zfs_snapshot_history_enabled = 1;9495#define SWITCH64(x, y) \96{ \97uint64_t __tmp = (x); \98(x) = (y); \99(y) = __tmp; \100}101102#define DS_REF_MAX (1ULL << 62)103104static void dsl_dataset_set_remap_deadlist_object(dsl_dataset_t *ds,105uint64_t obj, dmu_tx_t *tx);106static void dsl_dataset_unset_remap_deadlist_object(dsl_dataset_t *ds,107dmu_tx_t *tx);108109static void unload_zfeature(dsl_dataset_t *ds, spa_feature_t f);110111extern uint_t spa_asize_inflation;112113static zil_header_t zero_zil;114115/*116* Figure out how much of this delta should be propagated to the dsl_dir117* layer. If there's a refreservation, that space has already been118* partially accounted for in our ancestors.119*/120static int64_t121parent_delta(dsl_dataset_t *ds, int64_t delta)122{123dsl_dataset_phys_t *ds_phys;124uint64_t old_bytes, new_bytes;125126if (ds->ds_reserved == 0)127return (delta);128129ds_phys = dsl_dataset_phys(ds);130old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved);131new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved);132133ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));134return (new_bytes - old_bytes);135}136137void138dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)139{140spa_t *spa = dmu_tx_pool(tx)->dp_spa;141int used = bp_get_dsize_sync(spa, bp);142int compressed = BP_GET_PSIZE(bp);143int uncompressed = BP_GET_UCSIZE(bp);144int64_t delta;145spa_feature_t f;146147dprintf_bp(bp, "ds=%p", ds);148149ASSERT(dmu_tx_is_syncing(tx));150/* It could have been compressed away to nothing */151if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp))152return;153ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);154ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));155if (ds == NULL) {156dsl_pool_mos_diduse_space(tx->tx_pool,157used, compressed, uncompressed);158return;159}160161ASSERT3U(BP_GET_BIRTH(bp), >,162dsl_dataset_phys(ds)->ds_prev_snap_txg);163dmu_buf_will_dirty(ds->ds_dbuf, tx);164mutex_enter(&ds->ds_lock);165delta = parent_delta(ds, used);166dsl_dataset_phys(ds)->ds_referenced_bytes += used;167dsl_dataset_phys(ds)->ds_compressed_bytes += compressed;168dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed;169dsl_dataset_phys(ds)->ds_unique_bytes += used;170171if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) {172ds->ds_feature_activation[SPA_FEATURE_LARGE_BLOCKS] =173(void *)B_TRUE;174}175176177f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));178if (f != SPA_FEATURE_NONE) {179ASSERT3S(spa_feature_table[f].fi_type, ==,180ZFEATURE_TYPE_BOOLEAN);181ds->ds_feature_activation[f] = (void *)B_TRUE;182}183184f = zio_compress_to_feature(BP_GET_COMPRESS(bp));185if (f != SPA_FEATURE_NONE) {186ASSERT3S(spa_feature_table[f].fi_type, ==,187ZFEATURE_TYPE_BOOLEAN);188ds->ds_feature_activation[f] = (void *)B_TRUE;189}190191/*192* Track block for livelist, but ignore embedded blocks because193* they do not need to be freed.194*/195if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&196BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&197!(BP_IS_EMBEDDED(bp))) {198ASSERT(dsl_dir_is_clone(ds->ds_dir));199ASSERT(spa_feature_is_enabled(spa,200SPA_FEATURE_LIVELIST));201bplist_append(&ds->ds_dir->dd_pending_allocs, bp);202}203204mutex_exit(&ds->ds_lock);205dsl_dir_diduse_transfer_space(ds->ds_dir, delta,206compressed, uncompressed, used,207DD_USED_REFRSRV, DD_USED_HEAD, tx);208}209210/*211* Called when the specified segment has been remapped, and is thus no212* longer referenced in the head dataset. The vdev must be indirect.213*214* If the segment is referenced by a snapshot, put it on the remap deadlist.215* Otherwise, add this segment to the obsolete spacemap.216*/217void218dsl_dataset_block_remapped(dsl_dataset_t *ds, uint64_t vdev, uint64_t offset,219uint64_t size, uint64_t birth, dmu_tx_t *tx)220{221spa_t *spa = ds->ds_dir->dd_pool->dp_spa;222223ASSERT(dmu_tx_is_syncing(tx));224ASSERT(birth <= tx->tx_txg);225ASSERT(!ds->ds_is_snapshot);226227if (birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {228spa_vdev_indirect_mark_obsolete(spa, vdev, offset, size, tx);229} else {230blkptr_t fakebp;231dva_t *dva = &fakebp.blk_dva[0];232233ASSERT(ds != NULL);234235mutex_enter(&ds->ds_remap_deadlist_lock);236if (!dsl_dataset_remap_deadlist_exists(ds)) {237dsl_dataset_create_remap_deadlist(ds, tx);238}239mutex_exit(&ds->ds_remap_deadlist_lock);240241BP_ZERO(&fakebp);242BP_SET_LOGICAL_BIRTH(&fakebp, birth);243DVA_SET_VDEV(dva, vdev);244DVA_SET_OFFSET(dva, offset);245DVA_SET_ASIZE(dva, size);246dsl_deadlist_insert(&ds->ds_remap_deadlist, &fakebp, B_FALSE,247tx);248}249}250251int252dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,253boolean_t async)254{255spa_t *spa = dmu_tx_pool(tx)->dp_spa;256257int used = bp_get_dsize_sync(spa, bp);258int compressed = BP_GET_PSIZE(bp);259int uncompressed = BP_GET_UCSIZE(bp);260261if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp))262return (0);263264ASSERT(dmu_tx_is_syncing(tx));265ASSERT(BP_GET_BIRTH(bp) <= tx->tx_txg);266267if (ds == NULL) {268dsl_free(tx->tx_pool, tx->tx_txg, bp);269dsl_pool_mos_diduse_space(tx->tx_pool,270-used, -compressed, -uncompressed);271return (used);272}273ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);274275ASSERT(!ds->ds_is_snapshot);276dmu_buf_will_dirty(ds->ds_dbuf, tx);277278/*279* Track block for livelist, but ignore embedded blocks because280* they do not need to be freed.281*/282if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&283BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&284!(BP_IS_EMBEDDED(bp))) {285ASSERT(dsl_dir_is_clone(ds->ds_dir));286ASSERT(spa_feature_is_enabled(spa,287SPA_FEATURE_LIVELIST));288bplist_append(&ds->ds_dir->dd_pending_frees, bp);289}290291if (BP_GET_BIRTH(bp) > dsl_dataset_phys(ds)->ds_prev_snap_txg) {292int64_t delta;293294/*295* Put blocks that would create IO on the pool's deadlist for296* dsl_process_async_destroys() to find. This is to prevent297* zio_free() from creating a ZIO_TYPE_FREE IO for them, which298* are very heavy and can lead to out-of-memory conditions if299* something tries to free millions of blocks on the same txg.300*/301boolean_t defer = spa_version(spa) >= SPA_VERSION_DEADLISTS &&302(BP_IS_GANG(bp) || BP_GET_DEDUP(bp) ||303brt_maybe_exists(spa, bp));304305if (defer) {306dprintf_bp(bp, "putting on free list: %s", "");307bpobj_enqueue(&ds->ds_dir->dd_pool->dp_free_bpobj,308bp, B_FALSE, tx);309} else {310dprintf_bp(bp, "freeing ds=%llu",311(u_longlong_t)ds->ds_object);312dsl_free(tx->tx_pool, tx->tx_txg, bp);313}314315mutex_enter(&ds->ds_lock);316ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used ||317!DS_UNIQUE_IS_ACCURATE(ds));318delta = parent_delta(ds, -used);319dsl_dataset_phys(ds)->ds_unique_bytes -= used;320mutex_exit(&ds->ds_lock);321322dsl_dir_diduse_transfer_space(ds->ds_dir,323delta, -compressed, -uncompressed, -used,324DD_USED_REFRSRV, DD_USED_HEAD, tx);325326if (defer)327dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,328DD_USED_HEAD, used, compressed, uncompressed, tx);329} else {330dprintf_bp(bp, "putting on dead list: %s", "");331if (async) {332/*333* We are here as part of zio's write done callback,334* which means we're a zio interrupt thread. We can't335* call dsl_deadlist_insert() now because it may block336* waiting for I/O. Instead, put bp on the deferred337* queue and let dsl_pool_sync() finish the job.338*/339bplist_append(&ds->ds_pending_deadlist, bp);340} else {341dsl_deadlist_insert(&ds->ds_deadlist, bp, B_FALSE, tx);342}343ASSERT3U(ds->ds_prev->ds_object, ==,344dsl_dataset_phys(ds)->ds_prev_snap_obj);345ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0);346/* if (logical birth > prev prev snap txg) prev unique += bs */347if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj ==348ds->ds_object && BP_GET_BIRTH(bp) >349dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) {350dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);351mutex_enter(&ds->ds_prev->ds_lock);352dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used;353mutex_exit(&ds->ds_prev->ds_lock);354}355if (BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg) {356dsl_dir_transfer_space(ds->ds_dir, used,357DD_USED_HEAD, DD_USED_SNAP, tx);358}359}360361dsl_bookmark_block_killed(ds, bp, tx);362363mutex_enter(&ds->ds_lock);364ASSERT3U(dsl_dataset_phys(ds)->ds_referenced_bytes, >=, used);365dsl_dataset_phys(ds)->ds_referenced_bytes -= used;366ASSERT3U(dsl_dataset_phys(ds)->ds_compressed_bytes, >=, compressed);367dsl_dataset_phys(ds)->ds_compressed_bytes -= compressed;368ASSERT3U(dsl_dataset_phys(ds)->ds_uncompressed_bytes, >=, uncompressed);369dsl_dataset_phys(ds)->ds_uncompressed_bytes -= uncompressed;370mutex_exit(&ds->ds_lock);371372return (used);373}374375struct feature_type_uint64_array_arg {376uint64_t length;377uint64_t *array;378};379380static void381unload_zfeature(dsl_dataset_t *ds, spa_feature_t f)382{383switch (spa_feature_table[f].fi_type) {384case ZFEATURE_TYPE_BOOLEAN:385break;386case ZFEATURE_TYPE_UINT64_ARRAY:387{388struct feature_type_uint64_array_arg *ftuaa = ds->ds_feature[f];389kmem_free(ftuaa->array, ftuaa->length * sizeof (uint64_t));390kmem_free(ftuaa, sizeof (*ftuaa));391break;392}393default:394panic("Invalid zfeature type %d", spa_feature_table[f].fi_type);395}396}397398static int399load_zfeature(objset_t *mos, dsl_dataset_t *ds, spa_feature_t f)400{401int err = 0;402switch (spa_feature_table[f].fi_type) {403case ZFEATURE_TYPE_BOOLEAN:404err = zap_contains(mos, ds->ds_object,405spa_feature_table[f].fi_guid);406if (err == 0) {407ds->ds_feature[f] = (void *)B_TRUE;408} else {409ASSERT3U(err, ==, ENOENT);410err = 0;411}412break;413case ZFEATURE_TYPE_UINT64_ARRAY:414{415uint64_t int_size, num_int;416uint64_t *data;417err = zap_length(mos, ds->ds_object,418spa_feature_table[f].fi_guid, &int_size, &num_int);419if (err != 0) {420ASSERT3U(err, ==, ENOENT);421err = 0;422break;423}424ASSERT3U(int_size, ==, sizeof (uint64_t));425data = kmem_alloc(int_size * num_int, KM_SLEEP);426VERIFY0(zap_lookup(mos, ds->ds_object,427spa_feature_table[f].fi_guid, int_size, num_int, data));428struct feature_type_uint64_array_arg *ftuaa =429kmem_alloc(sizeof (*ftuaa), KM_SLEEP);430ftuaa->length = num_int;431ftuaa->array = data;432ds->ds_feature[f] = ftuaa;433break;434}435default:436panic("Invalid zfeature type %d", spa_feature_table[f].fi_type);437}438return (err);439}440441/*442* We have to release the fsid synchronously or we risk that a subsequent443* mount of the same dataset will fail to unique_insert the fsid. This444* failure would manifest itself as the fsid of this dataset changing445* between mounts which makes NFS clients quite unhappy.446*/447static void448dsl_dataset_evict_sync(void *dbu)449{450dsl_dataset_t *ds = dbu;451452ASSERT0P(ds->ds_owner);453454unique_remove(ds->ds_fsid_guid);455}456457static void458dsl_dataset_evict_async(void *dbu)459{460dsl_dataset_t *ds = dbu;461462ASSERT0P(ds->ds_owner);463464ds->ds_dbuf = NULL;465466if (ds->ds_objset != NULL)467dmu_objset_evict(ds->ds_objset);468469if (ds->ds_prev) {470dsl_dataset_rele(ds->ds_prev, ds);471ds->ds_prev = NULL;472}473474dsl_bookmark_fini_ds(ds);475476bplist_destroy(&ds->ds_pending_deadlist);477if (dsl_deadlist_is_open(&ds->ds_deadlist))478dsl_deadlist_close(&ds->ds_deadlist);479if (dsl_deadlist_is_open(&ds->ds_remap_deadlist))480dsl_deadlist_close(&ds->ds_remap_deadlist);481if (ds->ds_dir)482dsl_dir_async_rele(ds->ds_dir, ds);483484ASSERT(!list_link_active(&ds->ds_synced_link));485486for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {487if (dsl_dataset_feature_is_active(ds, f))488unload_zfeature(ds, f);489}490491list_destroy(&ds->ds_prop_cbs);492mutex_destroy(&ds->ds_lock);493mutex_destroy(&ds->ds_opening_lock);494mutex_destroy(&ds->ds_sendstream_lock);495mutex_destroy(&ds->ds_remap_deadlist_lock);496zfs_refcount_destroy(&ds->ds_longholds);497rrw_destroy(&ds->ds_bp_rwlock);498499kmem_free(ds, sizeof (dsl_dataset_t));500}501502int503dsl_dataset_get_snapname(dsl_dataset_t *ds)504{505dsl_dataset_phys_t *headphys;506int err;507dmu_buf_t *headdbuf;508dsl_pool_t *dp = ds->ds_dir->dd_pool;509objset_t *mos = dp->dp_meta_objset;510511if (ds->ds_snapname[0])512return (0);513if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0)514return (0);515516err = dmu_bonus_hold(mos, dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj,517FTAG, &headdbuf);518if (err != 0)519return (err);520headphys = headdbuf->db_data;521err = zap_value_search(dp->dp_meta_objset,522headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname,523sizeof (ds->ds_snapname));524if (err != 0 && zfs_recover == B_TRUE) {525err = 0;526(void) snprintf(ds->ds_snapname, sizeof (ds->ds_snapname),527"SNAPOBJ=%llu-ERR=%d",528(unsigned long long)ds->ds_object, err);529}530dmu_buf_rele(headdbuf, FTAG);531return (err);532}533534int535dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)536{537objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;538uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;539matchtype_t mt = 0;540int err;541542if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)543mt = MT_NORMALIZE;544545err = zap_lookup_norm(mos, snapobj, name, 8, 1,546value, mt, NULL, 0, NULL);547if (err == ENOTSUP && (mt & MT_NORMALIZE))548err = zap_lookup(mos, snapobj, name, 8, 1, value);549return (err);550}551552int553dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx,554boolean_t adj_cnt)555{556objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;557uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;558matchtype_t mt = 0;559int err;560561dsl_dir_snap_cmtime_update(ds->ds_dir, tx);562563if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)564mt = MT_NORMALIZE;565566err = zap_remove_norm(mos, snapobj, name, mt, tx);567if (err == ENOTSUP && (mt & MT_NORMALIZE))568err = zap_remove(mos, snapobj, name, tx);569570if (err == 0 && adj_cnt)571dsl_fs_ss_count_adjust(ds->ds_dir, -1,572DD_FIELD_SNAPSHOT_COUNT, tx);573574return (err);575}576577boolean_t578dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, const void *tag)579{580dmu_buf_t *dbuf = ds->ds_dbuf;581boolean_t result = B_FALSE;582583if (dbuf != NULL && dmu_buf_try_add_ref(dbuf, dp->dp_meta_objset,584ds->ds_object, DMU_BONUS_BLKID, tag)) {585586if (ds == dmu_buf_get_user(dbuf))587result = B_TRUE;588else589dmu_buf_rele(dbuf, tag);590}591592return (result);593}594595int596dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, const void *tag,597dsl_dataset_t **dsp)598{599objset_t *mos = dp->dp_meta_objset;600dmu_buf_t *dbuf;601dsl_dataset_t *ds;602int err;603dmu_object_info_t doi;604605ASSERT(dsl_pool_config_held(dp));606607err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);608if (err != 0)609return (err);610611/* Make sure dsobj has the correct object type. */612dmu_object_info_from_db(dbuf, &doi);613if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) {614dmu_buf_rele(dbuf, tag);615return (SET_ERROR(EINVAL));616}617618ds = dmu_buf_get_user(dbuf);619if (ds == NULL) {620dsl_dataset_t *winner = NULL;621622ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);623ds->ds_dbuf = dbuf;624ds->ds_object = dsobj;625ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0;626list_link_init(&ds->ds_synced_link);627628err = dsl_dir_hold_obj(dp, dsl_dataset_phys(ds)->ds_dir_obj,629NULL, ds, &ds->ds_dir);630if (err != 0) {631kmem_free(ds, sizeof (dsl_dataset_t));632dmu_buf_rele(dbuf, tag);633return (err);634}635636mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);637mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);638mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);639mutex_init(&ds->ds_remap_deadlist_lock,640NULL, MUTEX_DEFAULT, NULL);641rrw_init(&ds->ds_bp_rwlock, B_FALSE);642zfs_refcount_create(&ds->ds_longholds);643644bplist_create(&ds->ds_pending_deadlist);645646list_create(&ds->ds_sendstreams, sizeof (dmu_sendstatus_t),647offsetof(dmu_sendstatus_t, dss_link));648649list_create(&ds->ds_prop_cbs, sizeof (dsl_prop_cb_record_t),650offsetof(dsl_prop_cb_record_t, cbr_ds_node));651652if (doi.doi_type == DMU_OTN_ZAP_METADATA) {653spa_feature_t f;654655for (f = 0; f < SPA_FEATURES; f++) {656if (!(spa_feature_table[f].fi_flags &657ZFEATURE_FLAG_PER_DATASET))658continue;659err = load_zfeature(mos, ds, f);660}661}662663if (!ds->ds_is_snapshot) {664ds->ds_snapname[0] = '\0';665if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {666err = dsl_dataset_hold_obj(dp,667dsl_dataset_phys(ds)->ds_prev_snap_obj,668ds, &ds->ds_prev);669}670if (err != 0)671goto after_dsl_bookmark_fini;672err = dsl_bookmark_init_ds(ds);673} else {674if (zfs_flags & ZFS_DEBUG_SNAPNAMES)675err = dsl_dataset_get_snapname(ds);676if (err == 0 &&677dsl_dataset_phys(ds)->ds_userrefs_obj != 0) {678err = zap_count(679ds->ds_dir->dd_pool->dp_meta_objset,680dsl_dataset_phys(ds)->ds_userrefs_obj,681&ds->ds_userrefs);682}683}684685if (err == 0 && !ds->ds_is_snapshot) {686err = dsl_prop_get_int_ds(ds,687zfs_prop_to_name(ZFS_PROP_REFRESERVATION),688&ds->ds_reserved);689if (err == 0) {690err = dsl_prop_get_int_ds(ds,691zfs_prop_to_name(ZFS_PROP_REFQUOTA),692&ds->ds_quota);693}694} else {695ds->ds_reserved = ds->ds_quota = 0;696}697698if (err == 0 && ds->ds_dir->dd_crypto_obj != 0 &&699ds->ds_is_snapshot &&700zap_contains(mos, dsobj, DS_FIELD_IVSET_GUID) != 0) {701dp->dp_spa->spa_errata =702ZPOOL_ERRATA_ZOL_8308_ENCRYPTION;703}704705if (err == 0) {706err = dsl_deadlist_open(&ds->ds_deadlist,707mos, dsl_dataset_phys(ds)->ds_deadlist_obj);708}709if (err == 0) {710uint64_t remap_deadlist_obj =711dsl_dataset_get_remap_deadlist_object(ds);712if (remap_deadlist_obj != 0) {713err = dsl_deadlist_open(&ds->ds_remap_deadlist,714mos, remap_deadlist_obj);715}716}717718dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict_sync,719dsl_dataset_evict_async, &ds->ds_dbuf);720if (err == 0)721winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);722723if (err != 0 || winner != NULL) {724if (dsl_deadlist_is_open(&ds->ds_deadlist))725dsl_deadlist_close(&ds->ds_deadlist);726if (dsl_deadlist_is_open(&ds->ds_remap_deadlist))727dsl_deadlist_close(&ds->ds_remap_deadlist);728dsl_bookmark_fini_ds(ds);729after_dsl_bookmark_fini:730if (ds->ds_prev)731dsl_dataset_rele(ds->ds_prev, ds);732dsl_dir_rele(ds->ds_dir, ds);733for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {734if (dsl_dataset_feature_is_active(ds, f))735unload_zfeature(ds, f);736}737738list_destroy(&ds->ds_prop_cbs);739list_destroy(&ds->ds_sendstreams);740bplist_destroy(&ds->ds_pending_deadlist);741mutex_destroy(&ds->ds_lock);742mutex_destroy(&ds->ds_opening_lock);743mutex_destroy(&ds->ds_sendstream_lock);744mutex_destroy(&ds->ds_remap_deadlist_lock);745zfs_refcount_destroy(&ds->ds_longholds);746rrw_destroy(&ds->ds_bp_rwlock);747kmem_free(ds, sizeof (dsl_dataset_t));748if (err != 0) {749dmu_buf_rele(dbuf, tag);750return (err);751}752ds = winner;753} else {754ds->ds_fsid_guid =755unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid);756if (ds->ds_fsid_guid !=757dsl_dataset_phys(ds)->ds_fsid_guid) {758zfs_dbgmsg("ds_fsid_guid changed from "759"%llx to %llx for pool %s dataset id %llu",760(long long)761dsl_dataset_phys(ds)->ds_fsid_guid,762(long long)ds->ds_fsid_guid,763spa_name(dp->dp_spa),764(u_longlong_t)dsobj);765}766}767}768769ASSERT3P(ds->ds_dbuf, ==, dbuf);770ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data);771ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 ||772spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||773dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);774*dsp = ds;775776return (0);777}778779int780dsl_dataset_create_key_mapping(dsl_dataset_t *ds)781{782dsl_dir_t *dd = ds->ds_dir;783784if (dd->dd_crypto_obj == 0)785return (0);786787return (spa_keystore_create_mapping(dd->dd_pool->dp_spa,788ds, ds, &ds->ds_key_mapping));789}790791int792dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,793ds_hold_flags_t flags, const void *tag, dsl_dataset_t **dsp)794{795int err;796797err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);798if (err != 0)799return (err);800801ASSERT3P(*dsp, !=, NULL);802803if (flags & DS_HOLD_FLAG_DECRYPT) {804err = dsl_dataset_create_key_mapping(*dsp);805if (err != 0)806dsl_dataset_rele(*dsp, tag);807}808809return (err);810}811812int813dsl_dataset_hold_flags(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,814const void *tag, dsl_dataset_t **dsp)815{816dsl_dir_t *dd;817const char *snapname;818uint64_t obj;819int err = 0;820dsl_dataset_t *ds;821822err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname);823if (err != 0)824return (err);825826ASSERT(dsl_pool_config_held(dp));827obj = dsl_dir_phys(dd)->dd_head_dataset_obj;828if (obj != 0)829err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, &ds);830else831err = SET_ERROR(ENOENT);832833/* we may be looking for a snapshot */834if (err == 0 && snapname != NULL) {835dsl_dataset_t *snap_ds;836837if (*snapname++ != '@') {838dsl_dataset_rele_flags(ds, flags, tag);839dsl_dir_rele(dd, FTAG);840return (SET_ERROR(ENOENT));841}842843dprintf("looking for snapshot '%s'\n", snapname);844err = dsl_dataset_snap_lookup(ds, snapname, &obj);845if (err == 0) {846err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag,847&snap_ds);848}849dsl_dataset_rele_flags(ds, flags, tag);850851if (err == 0) {852mutex_enter(&snap_ds->ds_lock);853if (snap_ds->ds_snapname[0] == 0)854(void) strlcpy(snap_ds->ds_snapname, snapname,855sizeof (snap_ds->ds_snapname));856mutex_exit(&snap_ds->ds_lock);857ds = snap_ds;858}859}860if (err == 0)861*dsp = ds;862dsl_dir_rele(dd, FTAG);863return (err);864}865866int867dsl_dataset_hold(dsl_pool_t *dp, const char *name, const void *tag,868dsl_dataset_t **dsp)869{870return (dsl_dataset_hold_flags(dp, name, 0, tag, dsp));871}872873static int874dsl_dataset_own_obj_impl(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,875const void *tag, boolean_t override, dsl_dataset_t **dsp)876{877int err = dsl_dataset_hold_obj_flags(dp, dsobj, flags, tag, dsp);878if (err != 0)879return (err);880if (!dsl_dataset_tryown(*dsp, tag, override)) {881dsl_dataset_rele_flags(*dsp, flags, tag);882*dsp = NULL;883return (SET_ERROR(EBUSY));884}885return (0);886}887888889int890dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,891const void *tag, dsl_dataset_t **dsp)892{893return (dsl_dataset_own_obj_impl(dp, dsobj, flags, tag, B_FALSE, dsp));894}895896int897dsl_dataset_own_obj_force(dsl_pool_t *dp, uint64_t dsobj,898ds_hold_flags_t flags, const void *tag, dsl_dataset_t **dsp)899{900return (dsl_dataset_own_obj_impl(dp, dsobj, flags, tag, B_TRUE, dsp));901}902903static int904dsl_dataset_own_impl(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,905const void *tag, boolean_t override, dsl_dataset_t **dsp)906{907int err = dsl_dataset_hold_flags(dp, name, flags, tag, dsp);908if (err != 0)909return (err);910if (!dsl_dataset_tryown(*dsp, tag, override)) {911dsl_dataset_rele_flags(*dsp, flags, tag);912return (SET_ERROR(EBUSY));913}914return (0);915}916917int918dsl_dataset_own_force(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,919const void *tag, dsl_dataset_t **dsp)920{921return (dsl_dataset_own_impl(dp, name, flags, tag, B_TRUE, dsp));922}923924int925dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,926const void *tag, dsl_dataset_t **dsp)927{928return (dsl_dataset_own_impl(dp, name, flags, tag, B_FALSE, dsp));929}930931/*932* See the comment above dsl_pool_hold() for details. In summary, a long933* hold is used to prevent destruction of a dataset while the pool hold934* is dropped, allowing other concurrent operations (e.g. spa_sync()).935*936* The dataset and pool must be held when this function is called. After it937* is called, the pool hold may be released while the dataset is still held938* and accessed.939*/940void941dsl_dataset_long_hold(dsl_dataset_t *ds, const void *tag)942{943ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));944(void) zfs_refcount_add(&ds->ds_longholds, tag);945}946947void948dsl_dataset_long_rele(dsl_dataset_t *ds, const void *tag)949{950(void) zfs_refcount_remove(&ds->ds_longholds, tag);951}952953/* Return B_TRUE if there are any long holds on this dataset. */954boolean_t955dsl_dataset_long_held(dsl_dataset_t *ds)956{957return (!zfs_refcount_is_zero(&ds->ds_longholds));958}959960void961dsl_dataset_name(dsl_dataset_t *ds, char *name)962{963if (ds == NULL) {964(void) strlcpy(name, "mos", ZFS_MAX_DATASET_NAME_LEN);965} else {966dsl_dir_name(ds->ds_dir, name);967VERIFY0(dsl_dataset_get_snapname(ds));968if (ds->ds_snapname[0]) {969VERIFY3U(strlcat(name, "@", ZFS_MAX_DATASET_NAME_LEN),970<, ZFS_MAX_DATASET_NAME_LEN);971/*972* We use a "recursive" mutex so that we973* can call dprintf_ds() with ds_lock held.974*/975if (!MUTEX_HELD(&ds->ds_lock)) {976mutex_enter(&ds->ds_lock);977VERIFY3U(strlcat(name, ds->ds_snapname,978ZFS_MAX_DATASET_NAME_LEN), <,979ZFS_MAX_DATASET_NAME_LEN);980mutex_exit(&ds->ds_lock);981} else {982VERIFY3U(strlcat(name, ds->ds_snapname,983ZFS_MAX_DATASET_NAME_LEN), <,984ZFS_MAX_DATASET_NAME_LEN);985}986}987}988}989990int991dsl_dataset_namelen(dsl_dataset_t *ds)992{993VERIFY0(dsl_dataset_get_snapname(ds));994mutex_enter(&ds->ds_lock);995int len = strlen(ds->ds_snapname);996mutex_exit(&ds->ds_lock);997/* add '@' if ds is a snap */998if (len > 0)999len++;1000len += dsl_dir_namelen(ds->ds_dir);1001return (len);1002}10031004void1005dsl_dataset_rele(dsl_dataset_t *ds, const void *tag)1006{1007dmu_buf_rele(ds->ds_dbuf, tag);1008}10091010void1011dsl_dataset_remove_key_mapping(dsl_dataset_t *ds)1012{1013dsl_dir_t *dd = ds->ds_dir;10141015if (dd == NULL || dd->dd_crypto_obj == 0)1016return;10171018(void) spa_keystore_remove_mapping(dd->dd_pool->dp_spa,1019ds->ds_object, ds);1020}10211022void1023dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,1024const void *tag)1025{1026if (flags & DS_HOLD_FLAG_DECRYPT)1027dsl_dataset_remove_key_mapping(ds);10281029dsl_dataset_rele(ds, tag);1030}10311032void1033dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, const void *tag)1034{1035ASSERT3P(ds->ds_owner, ==, tag);1036ASSERT(ds->ds_dbuf != NULL);10371038mutex_enter(&ds->ds_lock);1039ds->ds_owner = NULL;1040mutex_exit(&ds->ds_lock);1041dsl_dataset_long_rele(ds, tag);1042dsl_dataset_rele_flags(ds, flags, tag);1043}10441045boolean_t1046dsl_dataset_tryown(dsl_dataset_t *ds, const void *tag, boolean_t override)1047{1048boolean_t gotit = FALSE;10491050ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));1051mutex_enter(&ds->ds_lock);1052if (ds->ds_owner == NULL && (override || !(DS_IS_INCONSISTENT(ds) ||1053(dsl_dataset_feature_is_active(ds,1054SPA_FEATURE_REDACTED_DATASETS) &&1055!zfs_allow_redacted_dataset_mount)))) {1056ds->ds_owner = tag;1057dsl_dataset_long_hold(ds, tag);1058gotit = TRUE;1059}1060mutex_exit(&ds->ds_lock);1061return (gotit);1062}10631064boolean_t1065dsl_dataset_has_owner(dsl_dataset_t *ds)1066{1067boolean_t rv;1068mutex_enter(&ds->ds_lock);1069rv = (ds->ds_owner != NULL);1070mutex_exit(&ds->ds_lock);1071return (rv);1072}10731074static boolean_t1075zfeature_active(spa_feature_t f, void *arg)1076{1077switch (spa_feature_table[f].fi_type) {1078case ZFEATURE_TYPE_BOOLEAN: {1079boolean_t val = (boolean_t)(uintptr_t)arg;1080ASSERT(val == B_FALSE || val == B_TRUE);1081return (val);1082}1083case ZFEATURE_TYPE_UINT64_ARRAY:1084/*1085* In this case, arg is a uint64_t array. The feature is active1086* if the array is non-null.1087*/1088return (arg != NULL);1089default:1090panic("Invalid zfeature type %d", spa_feature_table[f].fi_type);1091return (B_FALSE);1092}1093}10941095boolean_t1096dsl_dataset_feature_is_active(dsl_dataset_t *ds, spa_feature_t f)1097{1098return (zfeature_active(f, ds->ds_feature[f]));1099}11001101/*1102* The buffers passed out by this function are references to internal buffers;1103* they should not be freed by callers of this function, and they should not be1104* used after the dataset has been released.1105*/1106boolean_t1107dsl_dataset_get_uint64_array_feature(dsl_dataset_t *ds, spa_feature_t f,1108uint64_t *outlength, uint64_t **outp)1109{1110VERIFY(spa_feature_table[f].fi_type & ZFEATURE_TYPE_UINT64_ARRAY);1111if (!dsl_dataset_feature_is_active(ds, f)) {1112return (B_FALSE);1113}1114struct feature_type_uint64_array_arg *ftuaa = ds->ds_feature[f];1115*outp = ftuaa->array;1116*outlength = ftuaa->length;1117return (B_TRUE);1118}11191120void1121dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, void *arg,1122dmu_tx_t *tx)1123{1124spa_t *spa = dmu_tx_pool(tx)->dp_spa;1125objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;1126uint64_t zero = 0;11271128VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET);11291130spa_feature_incr(spa, f, tx);1131dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);11321133switch (spa_feature_table[f].fi_type) {1134case ZFEATURE_TYPE_BOOLEAN:1135ASSERT3S((boolean_t)(uintptr_t)arg, ==, B_TRUE);1136VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid,1137sizeof (zero), 1, &zero, tx));1138break;1139case ZFEATURE_TYPE_UINT64_ARRAY:1140{1141struct feature_type_uint64_array_arg *ftuaa = arg;1142VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid,1143sizeof (uint64_t), ftuaa->length, ftuaa->array, tx));1144break;1145}1146default:1147panic("Invalid zfeature type %d", spa_feature_table[f].fi_type);1148}1149}11501151static void1152dsl_dataset_deactivate_feature_impl(dsl_dataset_t *ds, spa_feature_t f,1153dmu_tx_t *tx)1154{1155spa_t *spa = dmu_tx_pool(tx)->dp_spa;1156objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;1157uint64_t dsobj = ds->ds_object;11581159VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET);11601161VERIFY0(zap_remove(mos, dsobj, spa_feature_table[f].fi_guid, tx));1162spa_feature_decr(spa, f, tx);1163ds->ds_feature[f] = NULL;1164}11651166void1167dsl_dataset_deactivate_feature(dsl_dataset_t *ds, spa_feature_t f, dmu_tx_t *tx)1168{1169unload_zfeature(ds, f);1170dsl_dataset_deactivate_feature_impl(ds, f, tx);1171}11721173uint64_t1174dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,1175dsl_crypto_params_t *dcp, uint64_t flags, dmu_tx_t *tx)1176{1177dsl_pool_t *dp = dd->dd_pool;1178dmu_buf_t *dbuf;1179dsl_dataset_phys_t *dsphys;1180uint64_t dsobj;1181objset_t *mos = dp->dp_meta_objset;11821183if (origin == NULL)1184origin = dp->dp_origin_snap;11851186ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);1187ASSERT(origin == NULL || dsl_dataset_phys(origin)->ds_num_children > 0);1188ASSERT(dmu_tx_is_syncing(tx));1189ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj);11901191dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,1192DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);1193VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));1194dmu_buf_will_dirty(dbuf, tx);1195dsphys = dbuf->db_data;1196memset(dsphys, 0, sizeof (dsl_dataset_phys_t));1197dsphys->ds_dir_obj = dd->dd_object;1198dsphys->ds_flags = flags;1199dsphys->ds_fsid_guid = unique_create();1200(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,1201sizeof (dsphys->ds_guid));1202dsphys->ds_snapnames_zapobj =1203zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,1204DMU_OT_NONE, 0, tx);1205dsphys->ds_creation_time = gethrestime_sec();1206dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;12071208if (origin == NULL) {1209dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);1210} else {1211dsl_dataset_t *ohds; /* head of the origin snapshot */12121213dsphys->ds_prev_snap_obj = origin->ds_object;1214dsphys->ds_prev_snap_txg =1215dsl_dataset_phys(origin)->ds_creation_txg;1216dsphys->ds_referenced_bytes =1217dsl_dataset_phys(origin)->ds_referenced_bytes;1218dsphys->ds_compressed_bytes =1219dsl_dataset_phys(origin)->ds_compressed_bytes;1220dsphys->ds_uncompressed_bytes =1221dsl_dataset_phys(origin)->ds_uncompressed_bytes;1222rrw_enter(&origin->ds_bp_rwlock, RW_READER, FTAG);1223dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp;1224rrw_exit(&origin->ds_bp_rwlock, FTAG);12251226/*1227* Inherit flags that describe the dataset's contents1228* (INCONSISTENT) or properties (Case Insensitive).1229*/1230dsphys->ds_flags |= dsl_dataset_phys(origin)->ds_flags &1231(DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET);12321233for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {1234if (zfeature_active(f, origin->ds_feature[f])) {1235dsl_dataset_activate_feature(dsobj, f,1236origin->ds_feature[f], tx);1237}1238}12391240dmu_buf_will_dirty(origin->ds_dbuf, tx);1241dsl_dataset_phys(origin)->ds_num_children++;12421243VERIFY0(dsl_dataset_hold_obj(dp,1244dsl_dir_phys(origin->ds_dir)->dd_head_dataset_obj,1245FTAG, &ohds));1246dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,1247dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);1248dsl_dataset_rele(ohds, FTAG);12491250if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {1251if (dsl_dataset_phys(origin)->ds_next_clones_obj == 0) {1252dsl_dataset_phys(origin)->ds_next_clones_obj =1253zap_create(mos,1254DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);1255}1256VERIFY0(zap_add_int(mos,1257dsl_dataset_phys(origin)->ds_next_clones_obj,1258dsobj, tx));1259}12601261dmu_buf_will_dirty(dd->dd_dbuf, tx);1262dsl_dir_phys(dd)->dd_origin_obj = origin->ds_object;1263if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {1264if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) {1265dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);1266dsl_dir_phys(origin->ds_dir)->dd_clones =1267zap_create(mos,1268DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);1269}1270VERIFY0(zap_add_int(mos,1271dsl_dir_phys(origin->ds_dir)->dd_clones,1272dsobj, tx));1273}1274}12751276/* handle encryption */1277dsl_dataset_create_crypt_sync(dsobj, dd, origin, dcp, tx);12781279if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)1280dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;12811282dmu_buf_rele(dbuf, FTAG);12831284dmu_buf_will_dirty(dd->dd_dbuf, tx);1285dsl_dir_phys(dd)->dd_head_dataset_obj = dsobj;12861287return (dsobj);1288}12891290static void1291dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)1292{1293objset_t *os;12941295VERIFY0(dmu_objset_from_ds(ds, &os));1296if (memcmp(&os->os_zil_header, &zero_zil, sizeof (zero_zil)) != 0) {1297dsl_pool_t *dp = ds->ds_dir->dd_pool;1298zio_t *zio;12991300memset(&os->os_zil_header, 0, sizeof (os->os_zil_header));1301if (os->os_encrypted)1302os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE;13031304zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);1305dsl_dataset_sync(ds, zio, tx);1306VERIFY0(zio_wait(zio));1307dsl_dataset_sync_done(ds, tx);1308}1309}13101311uint64_t1312dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,1313dsl_dataset_t *origin, uint64_t flags, cred_t *cr,1314dsl_crypto_params_t *dcp, dmu_tx_t *tx)1315{1316dsl_pool_t *dp = pdd->dd_pool;1317uint64_t dsobj, ddobj;1318dsl_dir_t *dd;13191320ASSERT(dmu_tx_is_syncing(tx));1321ASSERT(lastname[0] != '@');1322/*1323* Filesystems will eventually have their origin set to dp_origin_snap,1324* but that's taken care of in dsl_dataset_create_sync_dd. When1325* creating a filesystem, this function is called with origin equal to1326* NULL.1327*/1328if (origin != NULL)1329ASSERT3P(origin, !=, dp->dp_origin_snap);13301331ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);1332VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd));13331334dsobj = dsl_dataset_create_sync_dd(dd, origin, dcp,1335flags & ~DS_CREATE_FLAG_NODIRTY, tx);13361337dsl_deleg_set_create_perms(dd, tx, cr);13381339/*1340* If we are creating a clone and the livelist feature is enabled,1341* add the entry DD_FIELD_LIVELIST to ZAP.1342*/1343if (origin != NULL &&1344spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LIVELIST)) {1345objset_t *mos = dd->dd_pool->dp_meta_objset;1346dsl_dir_zapify(dd, tx);1347uint64_t obj = dsl_deadlist_alloc(mos, tx);1348VERIFY0(zap_add(mos, dd->dd_object, DD_FIELD_LIVELIST,1349sizeof (uint64_t), 1, &obj, tx));1350spa_feature_incr(dp->dp_spa, SPA_FEATURE_LIVELIST, tx);1351}13521353/*1354* Since we're creating a new node we know it's a leaf, so we can1355* initialize the counts if the limit feature is active.1356*/1357if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) {1358uint64_t cnt = 0;1359objset_t *os = dd->dd_pool->dp_meta_objset;13601361dsl_dir_zapify(dd, tx);1362VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,1363sizeof (cnt), 1, &cnt, tx));1364VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,1365sizeof (cnt), 1, &cnt, tx));1366}13671368dsl_dir_rele(dd, FTAG);13691370/*1371* If we are creating a clone, make sure we zero out any stale1372* data from the origin snapshots zil header.1373*/1374if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) {1375dsl_dataset_t *ds;13761377VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));1378dsl_dataset_zero_zil(ds, tx);1379dsl_dataset_rele(ds, FTAG);1380}13811382return (dsobj);1383}13841385/*1386* The unique space in the head dataset can be calculated by subtracting1387* the space used in the most recent snapshot, that is still being used1388* in this file system, from the space currently in use. To figure out1389* the space in the most recent snapshot still in use, we need to take1390* the total space used in the snapshot and subtract out the space that1391* has been freed up since the snapshot was taken.1392*/1393void1394dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)1395{1396uint64_t mrs_used;1397uint64_t dlused, dlcomp, dluncomp;13981399ASSERT(!ds->ds_is_snapshot);14001401if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)1402mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;1403else1404mrs_used = 0;14051406dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);14071408ASSERT3U(dlused, <=, mrs_used);1409dsl_dataset_phys(ds)->ds_unique_bytes =1410dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused);14111412if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=1413SPA_VERSION_UNIQUE_ACCURATE)1414dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;1415}14161417void1418dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,1419dmu_tx_t *tx)1420{1421objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;1422uint64_t count __maybe_unused;1423int err;14241425ASSERT(dsl_dataset_phys(ds)->ds_num_children >= 2);1426err = zap_remove_int(mos, dsl_dataset_phys(ds)->ds_next_clones_obj,1427obj, tx);1428/*1429* The err should not be ENOENT, but a bug in a previous version1430* of the code could cause upgrade_clones_cb() to not set1431* ds_next_snap_obj when it should, leading to a missing entry.1432* If we knew that the pool was created after1433* SPA_VERSION_NEXT_CLONES, we could assert that it isn't1434* ENOENT. However, at least we can check that we don't have1435* too many entries in the next_clones_obj even after failing to1436* remove this one.1437*/1438if (err != ENOENT)1439VERIFY0(err);1440ASSERT0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj,1441&count));1442ASSERT3U(count, <=, dsl_dataset_phys(ds)->ds_num_children - 2);1443}144414451446blkptr_t *1447dsl_dataset_get_blkptr(dsl_dataset_t *ds)1448{1449return (&dsl_dataset_phys(ds)->ds_bp);1450}14511452spa_t *1453dsl_dataset_get_spa(dsl_dataset_t *ds)1454{1455return (ds->ds_dir->dd_pool->dp_spa);1456}14571458void1459dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)1460{1461dsl_pool_t *dp;14621463if (ds == NULL) /* this is the meta-objset */1464return;14651466ASSERT(ds->ds_objset != NULL);14671468if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0)1469panic("dirtying snapshot!");14701471/* Must not dirty a dataset in the same txg where it got snapshotted. */1472ASSERT3U(tx->tx_txg, >, dsl_dataset_phys(ds)->ds_prev_snap_txg);14731474dp = ds->ds_dir->dd_pool;1475if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {1476objset_t *os = ds->ds_objset;14771478/* up the hold count until we can be written out */1479dmu_buf_add_ref(ds->ds_dbuf, ds);14801481/* if this dataset is encrypted, grab a reference to the DCK */1482if (ds->ds_dir->dd_crypto_obj != 0 &&1483!os->os_raw_receive &&1484!os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {1485ASSERT3P(ds->ds_key_mapping, !=, NULL);1486key_mapping_add_ref(ds->ds_key_mapping, ds);1487}1488}1489}14901491static int1492dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)1493{1494uint64_t asize;14951496if (!dmu_tx_is_syncing(tx))1497return (0);14981499/*1500* If there's an fs-only reservation, any blocks that might become1501* owned by the snapshot dataset must be accommodated by space1502* outside of the reservation.1503*/1504ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));1505asize = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, ds->ds_reserved);1506if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))1507return (SET_ERROR(ENOSPC));15081509/*1510* Propagate any reserved space for this snapshot to other1511* snapshot checks in this sync group.1512*/1513if (asize > 0)1514dsl_dir_willuse_space(ds->ds_dir, asize, tx);15151516return (0);1517}15181519int1520dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,1521dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr)1522{1523int error;1524uint64_t value;15251526ds->ds_trysnap_txg = tx->tx_txg;15271528if (!dmu_tx_is_syncing(tx))1529return (0);15301531/*1532* We don't allow multiple snapshots of the same txg. If there1533* is already one, try again.1534*/1535if (dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg)1536return (SET_ERROR(EAGAIN));15371538/*1539* Check for conflicting snapshot name.1540*/1541error = dsl_dataset_snap_lookup(ds, snapname, &value);1542if (error == 0)1543return (SET_ERROR(EEXIST));1544if (error != ENOENT)1545return (error);15461547/*1548* We don't allow taking snapshots of inconsistent datasets, such as1549* those into which we are currently receiving. However, if we are1550* creating this snapshot as part of a receive, this check will be1551* executed atomically with respect to the completion of the receive1552* itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this1553* case we ignore this, knowing it will be fixed up for us shortly in1554* dmu_recv_end_sync().1555*/1556if (!recv && DS_IS_INCONSISTENT(ds))1557return (SET_ERROR(EBUSY));15581559/*1560* Skip the check for temporary snapshots or if we have already checked1561* the counts in dsl_dataset_snapshot_check. This means we really only1562* check the count here when we're receiving a stream.1563*/1564if (cnt != 0 && cr != NULL) {1565error = dsl_fs_ss_limit_check(ds->ds_dir, cnt,1566ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr);1567if (error != 0)1568return (error);1569}15701571error = dsl_dataset_snapshot_reserve_space(ds, tx);1572if (error != 0)1573return (error);15741575return (0);1576}15771578int1579dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx)1580{1581dsl_dataset_snapshot_arg_t *ddsa = arg;1582dsl_pool_t *dp = dmu_tx_pool(tx);1583nvpair_t *pair;1584int rv = 0;15851586/*1587* Pre-compute how many total new snapshots will be created for each1588* level in the tree and below. This is needed for validating the1589* snapshot limit when either taking a recursive snapshot or when1590* taking multiple snapshots.1591*1592* The problem is that the counts are not actually adjusted when1593* we are checking, only when we finally sync. For a single snapshot,1594* this is easy, the count will increase by 1 at each node up the tree,1595* but its more complicated for the recursive/multiple snapshot case.1596*1597* The dsl_fs_ss_limit_check function does recursively check the count1598* at each level up the tree but since it is validating each snapshot1599* independently we need to be sure that we are validating the complete1600* count for the entire set of snapshots. We do this by rolling up the1601* counts for each component of the name into an nvlist and then1602* checking each of those cases with the aggregated count.1603*1604* This approach properly handles not only the recursive snapshot1605* case (where we get all of those on the ddsa_snaps list) but also1606* the sibling case (e.g. snapshot a/b and a/c so that we will also1607* validate the limit on 'a' using a count of 2).1608*1609* We validate the snapshot names in the third loop and only report1610* name errors once.1611*/1612if (dmu_tx_is_syncing(tx)) {1613char *nm;1614nvlist_t *cnt_track = NULL;1615cnt_track = fnvlist_alloc();16161617nm = kmem_alloc(MAXPATHLEN, KM_SLEEP);16181619/* Rollup aggregated counts into the cnt_track list */1620for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);1621pair != NULL;1622pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {1623char *pdelim;1624uint64_t val;16251626(void) strlcpy(nm, nvpair_name(pair), MAXPATHLEN);1627pdelim = strchr(nm, '@');1628if (pdelim == NULL)1629continue;1630*pdelim = '\0';16311632do {1633if (nvlist_lookup_uint64(cnt_track, nm,1634&val) == 0) {1635/* update existing entry */1636fnvlist_add_uint64(cnt_track, nm,1637val + 1);1638} else {1639/* add to list */1640fnvlist_add_uint64(cnt_track, nm, 1);1641}16421643pdelim = strrchr(nm, '/');1644if (pdelim != NULL)1645*pdelim = '\0';1646} while (pdelim != NULL);1647}16481649kmem_free(nm, MAXPATHLEN);16501651/* Check aggregated counts at each level */1652for (pair = nvlist_next_nvpair(cnt_track, NULL);1653pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) {1654int error = 0;1655const char *name;1656uint64_t cnt = 0;1657dsl_dataset_t *ds;16581659name = nvpair_name(pair);1660cnt = fnvpair_value_uint64(pair);1661ASSERT(cnt > 0);16621663error = dsl_dataset_hold(dp, name, FTAG, &ds);1664if (error == 0) {1665error = dsl_fs_ss_limit_check(ds->ds_dir, cnt,1666ZFS_PROP_SNAPSHOT_LIMIT, NULL,1667ddsa->ddsa_cr);1668dsl_dataset_rele(ds, FTAG);1669}16701671if (error != 0) {1672if (ddsa->ddsa_errors != NULL)1673fnvlist_add_int32(ddsa->ddsa_errors,1674name, error);1675rv = error;1676/* only report one error for this check */1677break;1678}1679}1680nvlist_free(cnt_track);1681}16821683for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);1684pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {1685int error = 0;1686dsl_dataset_t *ds;1687const char *name, *atp = NULL;1688char dsname[ZFS_MAX_DATASET_NAME_LEN];16891690name = nvpair_name(pair);1691if (strlen(name) >= ZFS_MAX_DATASET_NAME_LEN)1692error = SET_ERROR(ENAMETOOLONG);1693if (error == 0) {1694atp = strchr(name, '@');1695if (atp == NULL)1696error = SET_ERROR(EINVAL);1697if (error == 0)1698(void) strlcpy(dsname, name, atp - name + 1);1699}1700if (error == 0)1701error = dsl_dataset_hold(dp, dsname, FTAG, &ds);1702if (error == 0) {1703/* passing 0/NULL skips dsl_fs_ss_limit_check */1704error = dsl_dataset_snapshot_check_impl(ds,1705atp + 1, tx, B_FALSE, 0, NULL);1706dsl_dataset_rele(ds, FTAG);1707}17081709if (error != 0) {1710if (ddsa->ddsa_errors != NULL) {1711fnvlist_add_int32(ddsa->ddsa_errors,1712name, error);1713}1714rv = error;1715}1716}17171718return (rv);1719}17201721void1722dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,1723dmu_tx_t *tx)1724{1725dsl_pool_t *dp = ds->ds_dir->dd_pool;1726dmu_buf_t *dbuf;1727dsl_dataset_phys_t *dsphys;1728uint64_t dsobj, crtxg;1729objset_t *mos = dp->dp_meta_objset;1730objset_t *os __maybe_unused;17311732ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));17331734/*1735* If we are on an old pool, the zil must not be active, in which1736* case it will be zeroed. Usually zil_suspend() accomplishes this.1737*/1738ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP ||1739dmu_objset_from_ds(ds, &os) != 0 ||1740memcmp(&os->os_phys->os_zil_header, &zero_zil,1741sizeof (zero_zil)) == 0);17421743/* Should not snapshot a dirty dataset. */1744ASSERT(!txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,1745ds, tx->tx_txg));17461747dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx);17481749/*1750* The origin's ds_creation_txg has to be < TXG_INITIAL1751*/1752if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)1753crtxg = 1;1754else1755crtxg = tx->tx_txg;17561757dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,1758DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);1759VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));1760dmu_buf_will_dirty(dbuf, tx);1761dsphys = dbuf->db_data;1762memset(dsphys, 0, sizeof (dsl_dataset_phys_t));1763dsphys->ds_dir_obj = ds->ds_dir->dd_object;1764dsphys->ds_fsid_guid = unique_create();1765(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,1766sizeof (dsphys->ds_guid));1767dsphys->ds_prev_snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;1768dsphys->ds_prev_snap_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;1769dsphys->ds_next_snap_obj = ds->ds_object;1770dsphys->ds_num_children = 1;1771dsphys->ds_creation_time = gethrestime_sec();1772dsphys->ds_creation_txg = crtxg;1773dsphys->ds_deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj;1774dsphys->ds_referenced_bytes = dsl_dataset_phys(ds)->ds_referenced_bytes;1775dsphys->ds_compressed_bytes = dsl_dataset_phys(ds)->ds_compressed_bytes;1776dsphys->ds_uncompressed_bytes =1777dsl_dataset_phys(ds)->ds_uncompressed_bytes;1778dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags;1779rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);1780dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp;1781rrw_exit(&ds->ds_bp_rwlock, FTAG);1782dmu_buf_rele(dbuf, FTAG);17831784for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {1785if (zfeature_active(f, ds->ds_feature[f])) {1786dsl_dataset_activate_feature(dsobj, f,1787ds->ds_feature[f], tx);1788}1789}17901791ASSERT3U(ds->ds_prev != 0, ==,1792dsl_dataset_phys(ds)->ds_prev_snap_obj != 0);1793if (ds->ds_prev) {1794uint64_t next_clones_obj =1795dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj;1796ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj ==1797ds->ds_object ||1798dsl_dataset_phys(ds->ds_prev)->ds_num_children > 1);1799if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj ==1800ds->ds_object) {1801dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);1802ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==,1803dsl_dataset_phys(ds->ds_prev)->ds_creation_txg);1804dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj = dsobj;1805} else if (next_clones_obj != 0) {1806dsl_dataset_remove_from_next_clones(ds->ds_prev,1807dsphys->ds_next_snap_obj, tx);1808VERIFY0(zap_add_int(mos,1809next_clones_obj, dsobj, tx));1810}1811}18121813/*1814* If we have a reference-reservation on this dataset, we will1815* need to increase the amount of refreservation being charged1816* since our unique space is going to zero.1817*/1818if (ds->ds_reserved) {1819int64_t delta;1820ASSERT(DS_UNIQUE_IS_ACCURATE(ds));1821delta = MIN(dsl_dataset_phys(ds)->ds_unique_bytes,1822ds->ds_reserved);1823dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,1824delta, 0, 0, tx);1825}18261827dmu_buf_will_dirty(ds->ds_dbuf, tx);1828dsl_dataset_phys(ds)->ds_deadlist_obj =1829dsl_deadlist_clone(&ds->ds_deadlist, UINT64_MAX,1830dsl_dataset_phys(ds)->ds_prev_snap_obj, tx);1831dsl_deadlist_close(&ds->ds_deadlist);1832VERIFY0(dsl_deadlist_open(&ds->ds_deadlist, mos,1833dsl_dataset_phys(ds)->ds_deadlist_obj));1834dsl_deadlist_add_key(&ds->ds_deadlist,1835dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);1836dsl_bookmark_snapshotted(ds, tx);18371838if (dsl_dataset_remap_deadlist_exists(ds)) {1839uint64_t remap_deadlist_obj =1840dsl_dataset_get_remap_deadlist_object(ds);1841/*1842* Move the remap_deadlist to the snapshot. The head1843* will create a new remap deadlist on demand, from1844* dsl_dataset_block_remapped().1845*/1846dsl_dataset_unset_remap_deadlist_object(ds, tx);1847dsl_deadlist_close(&ds->ds_remap_deadlist);18481849dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);1850VERIFY0(zap_add(mos, dsobj, DS_FIELD_REMAP_DEADLIST,1851sizeof (remap_deadlist_obj), 1, &remap_deadlist_obj, tx));1852}18531854/*1855* Create a ivset guid for this snapshot if the dataset is1856* encrypted. This may be overridden by a raw receive. A1857* previous implementation of this code did not have this1858* field as part of the on-disk format for ZFS encryption1859* (see errata #4). As part of the remediation for this1860* issue, we ask the user to enable the bookmark_v2 feature1861* which is now a dependency of the encryption feature. We1862* use this as a heuristic to determine when the user has1863* elected to correct any datasets created with the old code.1864* As a result, we only do this step if the bookmark_v21865* feature is enabled, which limits the number of states a1866* given pool / dataset can be in with regards to terms of1867* correcting the issue.1868*/1869if (ds->ds_dir->dd_crypto_obj != 0 &&1870spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARK_V2)) {1871uint64_t ivset_guid = unique_create();18721873dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);1874VERIFY0(zap_add(mos, dsobj, DS_FIELD_IVSET_GUID,1875sizeof (ivset_guid), 1, &ivset_guid, tx));1876}18771878ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, <, tx->tx_txg);1879dsl_dataset_phys(ds)->ds_prev_snap_obj = dsobj;1880dsl_dataset_phys(ds)->ds_prev_snap_txg = crtxg;1881dsl_dataset_phys(ds)->ds_unique_bytes = 0;18821883if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)1884dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;18851886VERIFY0(zap_add(mos, dsl_dataset_phys(ds)->ds_snapnames_zapobj,1887snapname, 8, 1, &dsobj, tx));18881889if (ds->ds_prev)1890dsl_dataset_rele(ds->ds_prev, ds);1891VERIFY0(dsl_dataset_hold_obj(dp,1892dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev));18931894dsl_scan_ds_snapshotted(ds, tx);18951896dsl_dir_snap_cmtime_update(ds->ds_dir, tx);18971898if (zfs_snapshot_history_enabled)1899spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, " ");1900}19011902void1903dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)1904{1905dsl_dataset_snapshot_arg_t *ddsa = arg;1906dsl_pool_t *dp = dmu_tx_pool(tx);1907nvpair_t *pair;19081909for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);1910pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {1911dsl_dataset_t *ds;1912const char *name, *atp;1913char dsname[ZFS_MAX_DATASET_NAME_LEN];19141915name = nvpair_name(pair);1916atp = strchr(name, '@');1917(void) strlcpy(dsname, name, atp - name + 1);1918VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds));19191920dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx);1921if (ddsa->ddsa_props != NULL) {1922dsl_props_set_sync_impl(ds->ds_prev,1923ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx);1924}1925dsl_dataset_rele(ds, FTAG);1926}1927}19281929/*1930* The snapshots must all be in the same pool.1931* All-or-nothing: if there are any failures, nothing will be modified.1932*/1933int1934dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)1935{1936dsl_dataset_snapshot_arg_t ddsa;1937nvpair_t *pair;1938boolean_t needsuspend;1939int error;1940spa_t *spa;1941const char *firstname;1942nvlist_t *suspended = NULL;19431944pair = nvlist_next_nvpair(snaps, NULL);1945if (pair == NULL)1946return (0);1947firstname = nvpair_name(pair);19481949error = spa_open(firstname, &spa, FTAG);1950if (error != 0)1951return (error);1952needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);1953spa_close(spa, FTAG);19541955if (needsuspend) {1956suspended = fnvlist_alloc();1957for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;1958pair = nvlist_next_nvpair(snaps, pair)) {1959char fsname[ZFS_MAX_DATASET_NAME_LEN];1960const char *snapname = nvpair_name(pair);1961const char *atp;1962void *cookie;19631964atp = strchr(snapname, '@');1965if (atp == NULL) {1966error = SET_ERROR(EINVAL);1967break;1968}1969(void) strlcpy(fsname, snapname, atp - snapname + 1);19701971error = zil_suspend(fsname, &cookie);1972if (error != 0)1973break;1974fnvlist_add_uint64(suspended, fsname,1975(uintptr_t)cookie);1976}1977}19781979cred_t *cr = CRED();1980crhold(cr);19811982ddsa.ddsa_snaps = snaps;1983ddsa.ddsa_props = props;1984ddsa.ddsa_errors = errors;1985ddsa.ddsa_cr = cr;19861987if (error == 0) {1988error = dsl_sync_task(firstname, dsl_dataset_snapshot_check,1989dsl_dataset_snapshot_sync, &ddsa,1990fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL);1991}19921993crfree(cr);19941995if (suspended != NULL) {1996for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL;1997pair = nvlist_next_nvpair(suspended, pair)) {1998zil_resume((void *)(uintptr_t)1999fnvpair_value_uint64(pair));2000}2001fnvlist_free(suspended);2002}20032004if (error == 0) {2005for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;2006pair = nvlist_next_nvpair(snaps, pair)) {2007zvol_create_minors(nvpair_name(pair));2008}2009}20102011return (error);2012}20132014typedef struct dsl_dataset_snapshot_tmp_arg {2015const char *ddsta_fsname;2016const char *ddsta_snapname;2017minor_t ddsta_cleanup_minor;2018const char *ddsta_htag;2019} dsl_dataset_snapshot_tmp_arg_t;20202021static int2022dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx)2023{2024dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;2025dsl_pool_t *dp = dmu_tx_pool(tx);2026dsl_dataset_t *ds;2027int error;20282029error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds);2030if (error != 0)2031return (error);20322033/* NULL cred means no limit check for tmp snapshot */2034error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname,2035tx, B_FALSE, 0, NULL);2036if (error != 0) {2037dsl_dataset_rele(ds, FTAG);2038return (error);2039}20402041if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) {2042dsl_dataset_rele(ds, FTAG);2043return (SET_ERROR(ENOTSUP));2044}2045error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag,2046B_TRUE, tx);2047if (error != 0) {2048dsl_dataset_rele(ds, FTAG);2049return (error);2050}20512052dsl_dataset_rele(ds, FTAG);2053return (0);2054}20552056static void2057dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx)2058{2059dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;2060dsl_pool_t *dp = dmu_tx_pool(tx);2061dsl_dataset_t *ds = NULL;20622063VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds));20642065dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx);2066dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag,2067ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx);2068dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx);20692070dsl_dataset_rele(ds, FTAG);2071}20722073int2074dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,2075minor_t cleanup_minor, const char *htag)2076{2077dsl_dataset_snapshot_tmp_arg_t ddsta;2078int error;2079spa_t *spa;2080boolean_t needsuspend;2081void *cookie;20822083ddsta.ddsta_fsname = fsname;2084ddsta.ddsta_snapname = snapname;2085ddsta.ddsta_cleanup_minor = cleanup_minor;2086ddsta.ddsta_htag = htag;20872088error = spa_open(fsname, &spa, FTAG);2089if (error != 0)2090return (error);2091needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);2092spa_close(spa, FTAG);20932094if (needsuspend) {2095error = zil_suspend(fsname, &cookie);2096if (error != 0)2097return (error);2098}20992100error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check,2101dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED);21022103if (needsuspend)2104zil_resume(cookie);2105return (error);2106}21072108/* Nonblocking dataset sync. Assumes dataset:objset is always 1:1 */2109void2110dsl_dataset_sync(dsl_dataset_t *ds, zio_t *rio, dmu_tx_t *tx)2111{2112ASSERT(dmu_tx_is_syncing(tx));2113ASSERT(ds->ds_objset != NULL);2114ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj);21152116/*2117* in case we had to change ds_fsid_guid when we opened it,2118* sync it out now.2119*/2120dmu_buf_will_dirty(ds->ds_dbuf, tx);2121dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid;21222123if (ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] != 0) {2124VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,2125ds->ds_object, DS_FIELD_RESUME_OBJECT, 8, 1,2126&ds->ds_resume_object[tx->tx_txg & TXG_MASK], tx));2127VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,2128ds->ds_object, DS_FIELD_RESUME_OFFSET, 8, 1,2129&ds->ds_resume_offset[tx->tx_txg & TXG_MASK], tx));2130VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,2131ds->ds_object, DS_FIELD_RESUME_BYTES, 8, 1,2132&ds->ds_resume_bytes[tx->tx_txg & TXG_MASK], tx));2133ds->ds_resume_object[tx->tx_txg & TXG_MASK] = 0;2134ds->ds_resume_offset[tx->tx_txg & TXG_MASK] = 0;2135ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0;2136}21372138dmu_objset_sync(ds->ds_objset, rio, tx);2139}21402141/*2142* Check if the percentage of blocks shared between the clone and the2143* snapshot (as opposed to those that are clone only) is below a certain2144* threshold2145*/2146static boolean_t2147dsl_livelist_should_disable(dsl_dataset_t *ds)2148{2149uint64_t used, referenced;2150int percent_shared;21512152used = dsl_dir_get_usedds(ds->ds_dir);2153referenced = dsl_get_referenced(ds);2154if (referenced == 0)2155return (B_FALSE);2156percent_shared = (100 * (referenced - used)) / referenced;2157if (percent_shared <= zfs_livelist_min_percent_shared)2158return (B_TRUE);2159return (B_FALSE);2160}21612162/*2163* Check if it is possible to combine two livelist entries into one.2164* This is the case if the combined number of 'live' blkptrs (ALLOCs that2165* don't have a matching FREE) is under the maximum sublist size.2166* We check this by subtracting twice the total number of frees from the total2167* number of blkptrs. FREEs are counted twice because each FREE blkptr2168* will cancel out an ALLOC blkptr when the livelist is processed.2169*/2170static boolean_t2171dsl_livelist_should_condense(dsl_deadlist_entry_t *first,2172dsl_deadlist_entry_t *next)2173{2174uint64_t total_free = first->dle_bpobj.bpo_phys->bpo_num_freed +2175next->dle_bpobj.bpo_phys->bpo_num_freed;2176uint64_t total_entries = first->dle_bpobj.bpo_phys->bpo_num_blkptrs +2177next->dle_bpobj.bpo_phys->bpo_num_blkptrs;2178if ((total_entries - (2 * total_free)) < zfs_livelist_max_entries)2179return (B_TRUE);2180return (B_FALSE);2181}21822183typedef struct try_condense_arg {2184spa_t *spa;2185dsl_dataset_t *ds;2186} try_condense_arg_t;21872188/*2189* Iterate over the livelist entries, searching for a pair to condense.2190* A nonzero return value means stop, 0 means keep looking.2191*/2192static int2193dsl_livelist_try_condense(void *arg, dsl_deadlist_entry_t *first)2194{2195try_condense_arg_t *tca = arg;2196spa_t *spa = tca->spa;2197dsl_dataset_t *ds = tca->ds;2198dsl_deadlist_t *ll = &ds->ds_dir->dd_livelist;2199dsl_deadlist_entry_t *next;22002201/* The condense thread has not yet been created at import */2202if (spa->spa_livelist_condense_zthr == NULL)2203return (1);22042205/* A condense is already in progress */2206if (spa->spa_to_condense.ds != NULL)2207return (1);22082209next = AVL_NEXT(&ll->dl_tree, &first->dle_node);2210/* The livelist has only one entry - don't condense it */2211if (next == NULL)2212return (1);22132214/* Next is the newest entry - don't condense it */2215if (AVL_NEXT(&ll->dl_tree, &next->dle_node) == NULL)2216return (1);22172218/* This pair is not ready to condense but keep looking */2219if (!dsl_livelist_should_condense(first, next))2220return (0);22212222/*2223* Add a ref to prevent the dataset from being evicted while2224* the condense zthr or synctask are running. Ref will be2225* released at the end of the condense synctask2226*/2227dmu_buf_add_ref(ds->ds_dbuf, spa);22282229spa->spa_to_condense.ds = ds;2230spa->spa_to_condense.first = first;2231spa->spa_to_condense.next = next;2232spa->spa_to_condense.syncing = B_FALSE;2233spa->spa_to_condense.cancelled = B_FALSE;22342235zthr_wakeup(spa->spa_livelist_condense_zthr);2236return (1);2237}22382239static void2240dsl_flush_pending_livelist(dsl_dataset_t *ds, dmu_tx_t *tx)2241{2242dsl_dir_t *dd = ds->ds_dir;2243spa_t *spa = ds->ds_dir->dd_pool->dp_spa;2244dsl_deadlist_entry_t *last = dsl_deadlist_last(&dd->dd_livelist);22452246/* Check if we need to add a new sub-livelist */2247if (last == NULL) {2248/* The livelist is empty */2249dsl_deadlist_add_key(&dd->dd_livelist,2250tx->tx_txg - 1, tx);2251} else if (spa_sync_pass(spa) == 1) {2252/*2253* Check if the newest entry is full. If it is, make a new one.2254* We only do this once per sync because we could overfill a2255* sublist in one sync pass and don't want to add another entry2256* for a txg that is already represented. This ensures that2257* blkptrs born in the same txg are stored in the same sublist.2258*/2259bpobj_t bpobj = last->dle_bpobj;2260uint64_t all = bpobj.bpo_phys->bpo_num_blkptrs;2261uint64_t free = bpobj.bpo_phys->bpo_num_freed;2262uint64_t alloc = all - free;2263if (alloc > zfs_livelist_max_entries) {2264dsl_deadlist_add_key(&dd->dd_livelist,2265tx->tx_txg - 1, tx);2266}2267}22682269/* Insert each entry into the on-disk livelist */2270bplist_iterate(&dd->dd_pending_allocs,2271dsl_deadlist_insert_alloc_cb, &dd->dd_livelist, tx);2272bplist_iterate(&dd->dd_pending_frees,2273dsl_deadlist_insert_free_cb, &dd->dd_livelist, tx);22742275/* Attempt to condense every pair of adjacent entries */2276try_condense_arg_t arg = {2277.spa = spa,2278.ds = ds2279};2280dsl_deadlist_iterate(&dd->dd_livelist, dsl_livelist_try_condense,2281&arg);2282}22832284void2285dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)2286{2287objset_t *os = ds->ds_objset;22882289bplist_iterate(&ds->ds_pending_deadlist,2290dsl_deadlist_insert_alloc_cb, &ds->ds_deadlist, tx);22912292if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist)) {2293dsl_flush_pending_livelist(ds, tx);2294if (dsl_livelist_should_disable(ds)) {2295dsl_dir_remove_livelist(ds->ds_dir, tx, B_TRUE);2296}2297}22982299dsl_bookmark_sync_done(ds, tx);23002301multilist_destroy(&os->os_synced_dnodes);23022303if (os->os_encrypted)2304os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;2305else2306ASSERT0(os->os_next_write_raw[tx->tx_txg & TXG_MASK]);23072308for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {2309if (zfeature_active(f,2310ds->ds_feature_activation[f])) {2311if (zfeature_active(f, ds->ds_feature[f]))2312continue;2313dsl_dataset_activate_feature(ds->ds_object, f,2314ds->ds_feature_activation[f], tx);2315ds->ds_feature[f] = ds->ds_feature_activation[f];2316}2317}23182319ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));2320}23212322int2323get_clones_stat_impl(dsl_dataset_t *ds, nvlist_t *val)2324{2325uint64_t count = 0;2326objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;2327zap_cursor_t zc;2328zap_attribute_t *za;23292330ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));23312332/*2333* There may be missing entries in ds_next_clones_obj2334* due to a bug in a previous version of the code.2335* Only trust it if it has the right number of entries.2336*/2337if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) {2338VERIFY0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj,2339&count));2340}2341if (count != dsl_dataset_phys(ds)->ds_num_children - 1) {2342return (SET_ERROR(ENOENT));2343}23442345za = zap_attribute_alloc();2346for (zap_cursor_init(&zc, mos,2347dsl_dataset_phys(ds)->ds_next_clones_obj);2348zap_cursor_retrieve(&zc, za) == 0;2349zap_cursor_advance(&zc)) {2350dsl_dataset_t *clone;2351char buf[ZFS_MAX_DATASET_NAME_LEN];2352VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,2353za->za_first_integer, FTAG, &clone));2354dsl_dir_name(clone->ds_dir, buf);2355fnvlist_add_boolean(val, buf);2356dsl_dataset_rele(clone, FTAG);2357}2358zap_cursor_fini(&zc);2359zap_attribute_free(za);2360return (0);2361}23622363void2364get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)2365{2366nvlist_t *propval = fnvlist_alloc();2367nvlist_t *val = fnvlist_alloc();23682369if (get_clones_stat_impl(ds, val) == 0) {2370fnvlist_add_nvlist(propval, ZPROP_VALUE, val);2371fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),2372propval);2373}23742375nvlist_free(val);2376nvlist_free(propval);2377}23782379static char *2380get_receive_resume_token_impl(dsl_dataset_t *ds)2381{2382if (!dsl_dataset_has_resume_receive_state(ds))2383return (NULL);23842385dsl_pool_t *dp = ds->ds_dir->dd_pool;2386char *str;2387void *packed;2388uint8_t *compressed;2389uint64_t val;2390nvlist_t *token_nv = fnvlist_alloc();2391size_t packed_size, compressed_size;23922393if (zap_lookup(dp->dp_meta_objset, ds->ds_object,2394DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val) == 0) {2395fnvlist_add_uint64(token_nv, "fromguid", val);2396}2397if (zap_lookup(dp->dp_meta_objset, ds->ds_object,2398DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val) == 0) {2399fnvlist_add_uint64(token_nv, "object", val);2400}2401if (zap_lookup(dp->dp_meta_objset, ds->ds_object,2402DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val) == 0) {2403fnvlist_add_uint64(token_nv, "offset", val);2404}2405if (zap_lookup(dp->dp_meta_objset, ds->ds_object,2406DS_FIELD_RESUME_BYTES, sizeof (val), 1, &val) == 0) {2407fnvlist_add_uint64(token_nv, "bytes", val);2408}2409if (zap_lookup(dp->dp_meta_objset, ds->ds_object,2410DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val) == 0) {2411fnvlist_add_uint64(token_nv, "toguid", val);2412}2413char buf[MAXNAMELEN];2414if (zap_lookup(dp->dp_meta_objset, ds->ds_object,2415DS_FIELD_RESUME_TONAME, 1, sizeof (buf), buf) == 0) {2416fnvlist_add_string(token_nv, "toname", buf);2417}2418if (zap_contains(dp->dp_meta_objset, ds->ds_object,2419DS_FIELD_RESUME_LARGEBLOCK) == 0) {2420fnvlist_add_boolean(token_nv, "largeblockok");2421}2422if (zap_contains(dp->dp_meta_objset, ds->ds_object,2423DS_FIELD_RESUME_EMBEDOK) == 0) {2424fnvlist_add_boolean(token_nv, "embedok");2425}2426if (zap_contains(dp->dp_meta_objset, ds->ds_object,2427DS_FIELD_RESUME_COMPRESSOK) == 0) {2428fnvlist_add_boolean(token_nv, "compressok");2429}2430if (zap_contains(dp->dp_meta_objset, ds->ds_object,2431DS_FIELD_RESUME_RAWOK) == 0) {2432fnvlist_add_boolean(token_nv, "rawok");2433}2434if (dsl_dataset_feature_is_active(ds,2435SPA_FEATURE_REDACTED_DATASETS)) {2436uint64_t num_redact_snaps = 0;2437uint64_t *redact_snaps = NULL;2438VERIFY3B(dsl_dataset_get_uint64_array_feature(ds,2439SPA_FEATURE_REDACTED_DATASETS, &num_redact_snaps,2440&redact_snaps), ==, B_TRUE);2441fnvlist_add_uint64_array(token_nv, "redact_snaps",2442redact_snaps, num_redact_snaps);2443}2444if (zap_contains(dp->dp_meta_objset, ds->ds_object,2445DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS) == 0) {2446uint64_t num_redact_snaps = 0, int_size = 0;2447uint64_t *redact_snaps = NULL;2448VERIFY0(zap_length(dp->dp_meta_objset, ds->ds_object,2449DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS, &int_size,2450&num_redact_snaps));2451ASSERT3U(int_size, ==, sizeof (uint64_t));24522453redact_snaps = kmem_alloc(int_size * num_redact_snaps,2454KM_SLEEP);2455VERIFY0(zap_lookup(dp->dp_meta_objset, ds->ds_object,2456DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS, int_size,2457num_redact_snaps, redact_snaps));2458fnvlist_add_uint64_array(token_nv, "book_redact_snaps",2459redact_snaps, num_redact_snaps);2460kmem_free(redact_snaps, int_size * num_redact_snaps);2461}2462packed = fnvlist_pack(token_nv, &packed_size);2463fnvlist_free(token_nv);2464compressed = kmem_alloc(packed_size, KM_SLEEP);24652466/* Call compress function directly to avoid hole detection. */2467abd_t pabd, cabd;2468abd_get_from_buf_struct(&pabd, packed, packed_size);2469abd_get_from_buf_struct(&cabd, compressed, packed_size);2470compressed_size = zfs_gzip_compress(&pabd, &cabd,2471packed_size, packed_size, 6);2472abd_free(&cabd);2473abd_free(&pabd);24742475zio_cksum_t cksum;2476fletcher_4_native_varsize(compressed, compressed_size, &cksum);24772478size_t alloc_size = compressed_size * 2 + 1;2479str = kmem_alloc(alloc_size, KM_SLEEP);2480for (int i = 0; i < compressed_size; i++) {2481size_t offset = i * 2;2482(void) snprintf(str + offset, alloc_size - offset,2483"%02x", compressed[i]);2484}2485str[compressed_size * 2] = '\0';2486char *propval = kmem_asprintf("%u-%llx-%llx-%s",2487ZFS_SEND_RESUME_TOKEN_VERSION,2488(longlong_t)cksum.zc_word[0],2489(longlong_t)packed_size, str);2490kmem_free(packed, packed_size);2491kmem_free(str, alloc_size);2492kmem_free(compressed, packed_size);2493return (propval);2494}24952496/*2497* Returns a string that represents the receive resume state token. It should2498* be freed with strfree(). NULL is returned if no resume state is present.2499*/2500char *2501get_receive_resume_token(dsl_dataset_t *ds)2502{2503/*2504* A failed "newfs" (e.g. full) resumable receive leaves2505* the stats set on this dataset. Check here for the prop.2506*/2507char *token = get_receive_resume_token_impl(ds);2508if (token != NULL)2509return (token);2510/*2511* A failed incremental resumable receive leaves the2512* stats set on our child named "%recv". Check the child2513* for the prop.2514*/2515/* 6 extra bytes for /%recv */2516char name[ZFS_MAX_DATASET_NAME_LEN + 6];2517dsl_dataset_t *recv_ds;2518dsl_dataset_name(ds, name);2519if (strlcat(name, "/", sizeof (name)) < sizeof (name) &&2520strlcat(name, recv_clone_name, sizeof (name)) < sizeof (name) &&2521dsl_dataset_hold(ds->ds_dir->dd_pool, name, FTAG, &recv_ds) == 0) {2522token = get_receive_resume_token_impl(recv_ds);2523dsl_dataset_rele(recv_ds, FTAG);2524}2525return (token);2526}25272528uint64_t2529dsl_get_refratio(dsl_dataset_t *ds)2530{2531uint64_t ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 :2532(dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 /2533dsl_dataset_phys(ds)->ds_compressed_bytes);2534return (ratio);2535}25362537uint64_t2538dsl_get_logicalreferenced(dsl_dataset_t *ds)2539{2540return (dsl_dataset_phys(ds)->ds_uncompressed_bytes);2541}25422543uint64_t2544dsl_get_compressratio(dsl_dataset_t *ds)2545{2546if (ds->ds_is_snapshot) {2547return (dsl_get_refratio(ds));2548} else {2549dsl_dir_t *dd = ds->ds_dir;2550mutex_enter(&dd->dd_lock);2551uint64_t val = dsl_dir_get_compressratio(dd);2552mutex_exit(&dd->dd_lock);2553return (val);2554}2555}25562557uint64_t2558dsl_get_used(dsl_dataset_t *ds)2559{2560if (ds->ds_is_snapshot) {2561return (dsl_dataset_phys(ds)->ds_unique_bytes);2562} else {2563dsl_dir_t *dd = ds->ds_dir;2564mutex_enter(&dd->dd_lock);2565uint64_t val = dsl_dir_get_used(dd);2566mutex_exit(&dd->dd_lock);2567return (val);2568}2569}25702571uint64_t2572dsl_get_creation(dsl_dataset_t *ds)2573{2574return (dsl_dataset_phys(ds)->ds_creation_time);2575}25762577uint64_t2578dsl_get_creationtxg(dsl_dataset_t *ds)2579{2580return (dsl_dataset_phys(ds)->ds_creation_txg);2581}25822583uint64_t2584dsl_get_refquota(dsl_dataset_t *ds)2585{2586return (ds->ds_quota);2587}25882589uint64_t2590dsl_get_refreservation(dsl_dataset_t *ds)2591{2592return (ds->ds_reserved);2593}25942595uint64_t2596dsl_get_guid(dsl_dataset_t *ds)2597{2598return (dsl_dataset_phys(ds)->ds_guid);2599}26002601uint64_t2602dsl_get_unique(dsl_dataset_t *ds)2603{2604return (dsl_dataset_phys(ds)->ds_unique_bytes);2605}26062607uint64_t2608dsl_get_objsetid(dsl_dataset_t *ds)2609{2610return (ds->ds_object);2611}26122613uint64_t2614dsl_get_userrefs(dsl_dataset_t *ds)2615{2616return (ds->ds_userrefs);2617}26182619uint64_t2620dsl_get_defer_destroy(dsl_dataset_t *ds)2621{2622return (DS_IS_DEFER_DESTROY(ds) ? 1 : 0);2623}26242625uint64_t2626dsl_get_referenced(dsl_dataset_t *ds)2627{2628return (dsl_dataset_phys(ds)->ds_referenced_bytes);2629}26302631uint64_t2632dsl_get_numclones(dsl_dataset_t *ds)2633{2634ASSERT(ds->ds_is_snapshot);2635return (dsl_dataset_phys(ds)->ds_num_children - 1);2636}26372638uint64_t2639dsl_get_inconsistent(dsl_dataset_t *ds)2640{2641return ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT) ?26421 : 0);2643}26442645uint64_t2646dsl_get_redacted(dsl_dataset_t *ds)2647{2648return (dsl_dataset_feature_is_active(ds,2649SPA_FEATURE_REDACTED_DATASETS));2650}26512652uint64_t2653dsl_get_available(dsl_dataset_t *ds)2654{2655uint64_t refdbytes = dsl_get_referenced(ds);2656uint64_t availbytes = dsl_dir_space_available(ds->ds_dir,2657NULL, 0, TRUE);2658if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) {2659availbytes +=2660ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes;2661}2662if (ds->ds_quota != 0) {2663/*2664* Adjust available bytes according to refquota2665*/2666if (refdbytes < ds->ds_quota) {2667availbytes = MIN(availbytes,2668ds->ds_quota - refdbytes);2669} else {2670availbytes = 0;2671}2672}2673return (availbytes);2674}26752676int2677dsl_get_written(dsl_dataset_t *ds, uint64_t *written)2678{2679dsl_pool_t *dp = ds->ds_dir->dd_pool;2680dsl_dataset_t *prev;2681int err = dsl_dataset_hold_obj(dp,2682dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev);2683if (err == 0) {2684uint64_t comp, uncomp;2685err = dsl_dataset_space_written(prev, ds, written,2686&comp, &uncomp);2687dsl_dataset_rele(prev, FTAG);2688}2689return (err);2690}26912692/*2693* 'snap' should be a buffer of size ZFS_MAX_DATASET_NAME_LEN.2694*/2695int2696dsl_get_prev_snap(dsl_dataset_t *ds, char *snap)2697{2698dsl_pool_t *dp = ds->ds_dir->dd_pool;2699if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {2700dsl_dataset_name(ds->ds_prev, snap);2701return (0);2702} else {2703return (SET_ERROR(ENOENT));2704}2705}27062707void2708dsl_get_redact_snaps(dsl_dataset_t *ds, nvlist_t *propval)2709{2710uint64_t nsnaps;2711uint64_t *snaps;2712if (dsl_dataset_get_uint64_array_feature(ds,2713SPA_FEATURE_REDACTED_DATASETS, &nsnaps, &snaps)) {2714fnvlist_add_uint64_array(propval, ZPROP_VALUE, snaps,2715nsnaps);2716}2717}27182719/*2720* Returns the mountpoint property and source for the given dataset in the value2721* and source buffers. The value buffer must be at least as large as MAXPATHLEN2722* and the source buffer as least as large a ZFS_MAX_DATASET_NAME_LEN.2723* Returns 0 on success and an error on failure.2724*/2725int2726dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value,2727char *source)2728{2729int error;2730dsl_pool_t *dp = ds->ds_dir->dd_pool;27312732/* Retrieve the mountpoint value stored in the zap object */2733error = dsl_prop_get_ds(ds, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT), 1,2734ZAP_MAXVALUELEN, value, source);2735if (error != 0) {2736return (error);2737}27382739/*2740* Process the dsname and source to find the full mountpoint string.2741* Can be skipped for 'legacy' or 'none'.2742*/2743if (value[0] == '/') {2744char *buf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);2745char *root = buf;2746const char *relpath;27472748/*2749* If we inherit the mountpoint, even from a dataset2750* with a received value, the source will be the path of2751* the dataset we inherit from. If source is2752* ZPROP_SOURCE_VAL_RECVD, the received value is not2753* inherited.2754*/2755if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) {2756relpath = "";2757} else {2758ASSERT0(strncmp(dsname, source, strlen(source)));2759relpath = dsname + strlen(source);2760if (relpath[0] == '/')2761relpath++;2762}27632764spa_altroot(dp->dp_spa, root, ZAP_MAXVALUELEN);27652766/*2767* Special case an alternate root of '/'. This will2768* avoid having multiple leading slashes in the2769* mountpoint path.2770*/2771if (strcmp(root, "/") == 0)2772root++;27732774/*2775* If the mountpoint is '/' then skip over this2776* if we are obtaining either an alternate root or2777* an inherited mountpoint.2778*/2779char *mnt = value;2780if (value[1] == '\0' && (root[0] != '\0' ||2781relpath[0] != '\0'))2782mnt = value + 1;27832784mnt = kmem_strdup(mnt);27852786if (relpath[0] == '\0') {2787(void) snprintf(value, ZAP_MAXVALUELEN, "%s%s",2788root, mnt);2789} else {2790(void) snprintf(value, ZAP_MAXVALUELEN, "%s%s%s%s",2791root, mnt, relpath[0] == '@' ? "" : "/",2792relpath);2793}2794kmem_free(buf, ZAP_MAXVALUELEN);2795kmem_strfree(mnt);2796}27972798return (0);2799}28002801void2802dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)2803{2804dsl_pool_t *dp __maybe_unused = ds->ds_dir->dd_pool;28052806ASSERT(dsl_pool_config_held(dp));28072808dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO,2809dsl_get_refratio(ds));2810dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,2811dsl_get_logicalreferenced(ds));2812dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,2813dsl_get_compressratio(ds));2814dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,2815dsl_get_used(ds));28162817if (ds->ds_is_snapshot) {2818get_clones_stat(ds, nv);2819} else {2820char buf[ZFS_MAX_DATASET_NAME_LEN];2821if (dsl_get_prev_snap(ds, buf) == 0)2822dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP,2823buf);2824dsl_dir_stats(ds->ds_dir, nv);2825}28262827nvlist_t *propval = fnvlist_alloc();2828dsl_get_redact_snaps(ds, propval);2829fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS),2830propval);2831nvlist_free(propval);28322833dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE,2834dsl_get_available(ds));2835dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED,2836dsl_get_referenced(ds));2837dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,2838dsl_get_creation(ds));2839dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,2840dsl_get_creationtxg(ds));2841dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,2842dsl_get_refquota(ds));2843dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,2844dsl_get_refreservation(ds));2845dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,2846dsl_get_guid(ds));2847dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE,2848dsl_get_unique(ds));2849dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID,2850dsl_get_objsetid(ds));2851dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,2852dsl_get_userrefs(ds));2853dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,2854dsl_get_defer_destroy(ds));2855dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_SNAPSHOTS_CHANGED,2856dsl_dir_snap_cmtime(ds->ds_dir).tv_sec);2857dsl_dataset_crypt_stats(ds, nv);28582859if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {2860uint64_t written;2861if (dsl_get_written(ds, &written) == 0) {2862dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,2863written);2864}2865}28662867if (!dsl_dataset_is_snapshot(ds)) {2868char *token = get_receive_resume_token(ds);2869if (token != NULL) {2870dsl_prop_nvlist_add_string(nv,2871ZFS_PROP_RECEIVE_RESUME_TOKEN, token);2872kmem_strfree(token);2873}2874}2875}28762877void2878dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)2879{2880dsl_pool_t *dp __maybe_unused = ds->ds_dir->dd_pool;2881ASSERT(dsl_pool_config_held(dp));28822883stat->dds_creation_txg = dsl_get_creationtxg(ds);2884stat->dds_inconsistent = dsl_get_inconsistent(ds);2885stat->dds_guid = dsl_get_guid(ds);2886stat->dds_redacted = dsl_get_redacted(ds);2887stat->dds_origin[0] = '\0';2888stat->dds_flags = DDS_FLAG_HAS_ENCRYPTED;2889if (ds->ds_dir->dd_crypto_obj != 0)2890stat->dds_flags |= DDS_FLAG_ENCRYPTED;2891if (ds->ds_is_snapshot) {2892stat->dds_is_snapshot = B_TRUE;2893stat->dds_num_clones = dsl_get_numclones(ds);2894} else {2895stat->dds_is_snapshot = B_FALSE;2896stat->dds_num_clones = 0;28972898if (dsl_dir_is_clone(ds->ds_dir)) {2899dsl_dir_get_origin(ds->ds_dir, stat->dds_origin);2900}2901}2902}29032904uint64_t2905dsl_dataset_fsid_guid(dsl_dataset_t *ds)2906{2907return (ds->ds_fsid_guid);2908}29092910void2911dsl_dataset_space(dsl_dataset_t *ds,2912uint64_t *refdbytesp, uint64_t *availbytesp,2913uint64_t *usedobjsp, uint64_t *availobjsp)2914{2915*refdbytesp = dsl_dataset_phys(ds)->ds_referenced_bytes;2916*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);2917if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes)2918*availbytesp +=2919ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes;2920if (ds->ds_quota != 0) {2921/*2922* Adjust available bytes according to refquota2923*/2924if (*refdbytesp < ds->ds_quota)2925*availbytesp = MIN(*availbytesp,2926ds->ds_quota - *refdbytesp);2927else2928*availbytesp = 0;2929}2930rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);2931*usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp);2932rrw_exit(&ds->ds_bp_rwlock, FTAG);2933*availobjsp = DN_MAX_OBJECT - *usedobjsp;2934}29352936boolean_t2937dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap)2938{2939dsl_pool_t *dp __maybe_unused = ds->ds_dir->dd_pool;2940uint64_t birth;29412942ASSERT(dsl_pool_config_held(dp));2943if (snap == NULL)2944return (B_FALSE);2945rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);2946birth = BP_GET_BIRTH(dsl_dataset_get_blkptr(ds));2947rrw_exit(&ds->ds_bp_rwlock, FTAG);2948if (birth > dsl_dataset_phys(snap)->ds_creation_txg) {2949objset_t *os, *os_snap;2950/*2951* It may be that only the ZIL differs, because it was2952* reset in the head. Don't count that as being2953* modified.2954*/2955if (dmu_objset_from_ds(ds, &os) != 0)2956return (B_TRUE);2957if (dmu_objset_from_ds(snap, &os_snap) != 0)2958return (B_TRUE);2959return (memcmp(&os->os_phys->os_meta_dnode,2960&os_snap->os_phys->os_meta_dnode,2961sizeof (os->os_phys->os_meta_dnode)) != 0);2962}2963return (B_FALSE);2964}29652966static int2967dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp,2968dsl_dataset_t *hds, void *arg)2969{2970(void) dp;2971dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;2972int error;2973uint64_t val;29742975error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);2976if (error != 0) {2977/* ignore nonexistent snapshots */2978return (error == ENOENT ? 0 : error);2979}29802981/* new name should not exist */2982error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val);2983if (error == 0)2984error = SET_ERROR(EEXIST);2985else if (error == ENOENT)2986error = 0;29872988/* dataset name + 1 for the "@" + the new snapshot name must fit */2989if (dsl_dir_namelen(hds->ds_dir) + 1 +2990strlen(ddrsa->ddrsa_newsnapname) >= ZFS_MAX_DATASET_NAME_LEN)2991error = SET_ERROR(ENAMETOOLONG);29922993return (error);2994}29952996int2997dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx)2998{2999dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;3000dsl_pool_t *dp = dmu_tx_pool(tx);3001dsl_dataset_t *hds;3002int error;30033004error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds);3005if (error != 0)3006return (error);30073008if (ddrsa->ddrsa_recursive) {3009error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object,3010dsl_dataset_rename_snapshot_check_impl, ddrsa,3011DS_FIND_CHILDREN);3012} else {3013error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa);3014}3015dsl_dataset_rele(hds, FTAG);3016return (error);3017}30183019static int3020dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,3021dsl_dataset_t *hds, void *arg)3022{3023dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;3024dsl_dataset_t *ds;3025uint64_t val;3026dmu_tx_t *tx = ddrsa->ddrsa_tx;3027char *oldname, *newname;3028int error;30293030error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);3031ASSERT(error == 0 || error == ENOENT);3032if (error == ENOENT) {3033/* ignore nonexistent snapshots */3034return (0);3035}30363037VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds));30383039/* log before we change the name */3040spa_history_log_internal_ds(ds, "rename", tx,3041"-> @%s", ddrsa->ddrsa_newsnapname);30423043VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx,3044B_FALSE));3045mutex_enter(&ds->ds_lock);3046(void) strlcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname,3047sizeof (ds->ds_snapname));3048mutex_exit(&ds->ds_lock);3049VERIFY0(zap_add(dp->dp_meta_objset,3050dsl_dataset_phys(hds)->ds_snapnames_zapobj,3051ds->ds_snapname, 8, 1, &ds->ds_object, tx));30523053oldname = kmem_asprintf("%s@%s", ddrsa->ddrsa_fsname,3054ddrsa->ddrsa_oldsnapname);3055newname = kmem_asprintf("%s@%s", ddrsa->ddrsa_fsname,3056ddrsa->ddrsa_newsnapname);3057zvol_rename_minors(dp->dp_spa, oldname, newname, B_TRUE);3058kmem_strfree(oldname);3059kmem_strfree(newname);30603061dsl_dataset_rele(ds, FTAG);3062return (0);3063}30643065void3066dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx)3067{3068dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;3069dsl_pool_t *dp = dmu_tx_pool(tx);3070dsl_dataset_t *hds = NULL;30713072VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds));3073ddrsa->ddrsa_tx = tx;3074if (ddrsa->ddrsa_recursive) {3075VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object,3076dsl_dataset_rename_snapshot_sync_impl, ddrsa,3077DS_FIND_CHILDREN));3078} else {3079VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa));3080}3081dsl_dataset_rele(hds, FTAG);3082}30833084int3085dsl_dataset_rename_snapshot(const char *fsname,3086const char *oldsnapname, const char *newsnapname, boolean_t recursive)3087{3088dsl_dataset_rename_snapshot_arg_t ddrsa;30893090ddrsa.ddrsa_fsname = fsname;3091ddrsa.ddrsa_oldsnapname = oldsnapname;3092ddrsa.ddrsa_newsnapname = newsnapname;3093ddrsa.ddrsa_recursive = recursive;30943095return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check,3096dsl_dataset_rename_snapshot_sync, &ddrsa,30971, ZFS_SPACE_CHECK_RESERVED));3098}30993100/*3101* If we're doing an ownership handoff, we need to make sure that there is3102* only one long hold on the dataset. We're not allowed to change anything here3103* so we don't permanently release the long hold or regular hold here. We want3104* to do this only when syncing to avoid the dataset unexpectedly going away3105* when we release the long hold.3106*/3107static int3108dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)3109{3110boolean_t held = B_FALSE;31113112if (!dmu_tx_is_syncing(tx))3113return (0);31143115dsl_dir_t *dd = ds->ds_dir;3116mutex_enter(&dd->dd_activity_lock);3117uint64_t holds = zfs_refcount_count(&ds->ds_longholds) -3118(owner != NULL ? 1 : 0);3119/*3120* The value of dd_activity_waiters can chance as soon as we drop the3121* lock, but we're fine with that; new waiters coming in or old3122* waiters leaving doesn't cause problems, since we're going to cancel3123* waiters later anyway. The goal of this check is to verify that no3124* non-waiters have long-holds, and all new long-holds will be3125* prevented because we're holding the pool config as writer.3126*/3127if (holds != dd->dd_activity_waiters)3128held = B_TRUE;3129mutex_exit(&dd->dd_activity_lock);31303131if (held)3132return (SET_ERROR(EBUSY));31333134return (0);3135}31363137int3138dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)3139{3140dsl_dataset_rollback_arg_t *ddra = arg;3141dsl_pool_t *dp = dmu_tx_pool(tx);3142dsl_dataset_t *ds;3143int64_t unused_refres_delta;3144int error;31453146error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);3147if (error != 0)3148return (error);31493150/* must not be a snapshot */3151if (ds->ds_is_snapshot) {3152dsl_dataset_rele(ds, FTAG);3153return (SET_ERROR(EINVAL));3154}31553156/* must have a most recent snapshot */3157if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) {3158dsl_dataset_rele(ds, FTAG);3159return (SET_ERROR(ESRCH));3160}31613162/*3163* No rollback to a snapshot created in the current txg, because3164* the rollback may dirty the dataset and create blocks that are3165* not reachable from the rootbp while having a birth txg that3166* falls into the snapshot's range.3167*/3168if (dmu_tx_is_syncing(tx) &&3169dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) {3170dsl_dataset_rele(ds, FTAG);3171return (SET_ERROR(EAGAIN));3172}31733174/*3175* If the expected target snapshot is specified, then check that3176* the latest snapshot is it.3177*/3178if (ddra->ddra_tosnap != NULL) {3179dsl_dataset_t *snapds;31803181/* Check if the target snapshot exists at all. */3182error = dsl_dataset_hold(dp, ddra->ddra_tosnap, FTAG, &snapds);3183if (error != 0) {3184/*3185* ESRCH is used to signal that the target snapshot does3186* not exist, while ENOENT is used to report that3187* the rolled back dataset does not exist.3188* ESRCH is also used to cover other cases where the3189* target snapshot is not related to the dataset being3190* rolled back such as being in a different pool.3191*/3192if (error == ENOENT || error == EXDEV)3193error = SET_ERROR(ESRCH);3194dsl_dataset_rele(ds, FTAG);3195return (error);3196}3197ASSERT(snapds->ds_is_snapshot);31983199/* Check if the snapshot is the latest snapshot indeed. */3200if (snapds != ds->ds_prev) {3201/*3202* Distinguish between the case where the only problem3203* is intervening snapshots (EEXIST) vs the snapshot3204* not being a valid target for rollback (ESRCH).3205*/3206if (snapds->ds_dir == ds->ds_dir ||3207(dsl_dir_is_clone(ds->ds_dir) &&3208dsl_dir_phys(ds->ds_dir)->dd_origin_obj ==3209snapds->ds_object)) {3210error = SET_ERROR(EEXIST);3211} else {3212error = SET_ERROR(ESRCH);3213}3214dsl_dataset_rele(snapds, FTAG);3215dsl_dataset_rele(ds, FTAG);3216return (error);3217}3218dsl_dataset_rele(snapds, FTAG);3219}32203221/* must not have any bookmarks after the most recent snapshot */3222if (dsl_bookmark_latest_txg(ds) >3223dsl_dataset_phys(ds)->ds_prev_snap_txg) {3224dsl_dataset_rele(ds, FTAG);3225return (SET_ERROR(EEXIST));3226}32273228error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx);3229if (error != 0) {3230dsl_dataset_rele(ds, FTAG);3231return (error);3232}32333234/*3235* Check if the snap we are rolling back to uses more than3236* the refquota.3237*/3238if (ds->ds_quota != 0 &&3239dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes > ds->ds_quota) {3240dsl_dataset_rele(ds, FTAG);3241return (SET_ERROR(EDQUOT));3242}32433244/*3245* When we do the clone swap, we will temporarily use more space3246* due to the refreservation (the head will no longer have any3247* unique space, so the entire amount of the refreservation will need3248* to be free). We will immediately destroy the clone, freeing3249* this space, but the freeing happens over many txg's.3250*/3251unused_refres_delta = (int64_t)MIN(ds->ds_reserved,3252dsl_dataset_phys(ds)->ds_unique_bytes);32533254if (unused_refres_delta > 0 &&3255unused_refres_delta >3256dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) {3257dsl_dataset_rele(ds, FTAG);3258return (SET_ERROR(ENOSPC));3259}32603261dsl_dataset_rele(ds, FTAG);3262return (0);3263}32643265void3266dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)3267{3268dsl_dataset_rollback_arg_t *ddra = arg;3269dsl_pool_t *dp = dmu_tx_pool(tx);3270dsl_dataset_t *ds, *clone;3271uint64_t cloneobj;3272char namebuf[ZFS_MAX_DATASET_NAME_LEN];32733274VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds));32753276dsl_dataset_name(ds->ds_prev, namebuf);3277fnvlist_add_string(ddra->ddra_result, "target", namebuf);32783279cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback",3280ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, NULL, tx);32813282VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone));32833284dsl_dataset_clone_swap_sync_impl(clone, ds, tx);3285dsl_dataset_zero_zil(ds, tx);32863287dsl_destroy_head_sync_impl(clone, tx);32883289dsl_dataset_rele(clone, FTAG);3290dsl_dataset_rele(ds, FTAG);3291}32923293/*3294* Rolls back the given filesystem or volume to the most recent snapshot.3295* The name of the most recent snapshot will be returned under key "target"3296* in the result nvlist.3297*3298* If owner != NULL:3299* - The existing dataset MUST be owned by the specified owner at entry3300* - Upon return, dataset will still be held by the same owner, whether we3301* succeed or not.3302*3303* This mode is required any time the existing filesystem is mounted. See3304* notes above zfs_suspend_fs() for further details.3305*/3306int3307dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,3308nvlist_t *result)3309{3310dsl_dataset_rollback_arg_t ddra;33113312ddra.ddra_fsname = fsname;3313ddra.ddra_tosnap = tosnap;3314ddra.ddra_owner = owner;3315ddra.ddra_result = result;33163317return (dsl_sync_task(fsname, dsl_dataset_rollback_check,3318dsl_dataset_rollback_sync, &ddra,33191, ZFS_SPACE_CHECK_RESERVED));3320}33213322int3323dsl_dataset_clone_check(void *arg, dmu_tx_t *tx)3324{3325dsl_dataset_clone_arg_t *ddca = arg;3326dsl_dir_t *pdd;3327const char *tail;3328int error;3329dsl_dataset_t *origin;3330dsl_pool_t *dp = dmu_tx_pool(tx);33313332if (strchr(ddca->ddca_clone, '@') != NULL)3333return (SET_ERROR(EINVAL));33343335if (strlen(ddca->ddca_clone) >= ZFS_MAX_DATASET_NAME_LEN)3336return (SET_ERROR(ENAMETOOLONG));33373338error = dsl_dir_hold(dp, ddca->ddca_clone, FTAG, &pdd, &tail);3339if (error != 0)3340return (error);3341if (tail == NULL) {3342dsl_dir_rele(pdd, FTAG);3343return (SET_ERROR(EEXIST));3344}33453346error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,3347ddca->ddca_cred);3348if (error != 0) {3349dsl_dir_rele(pdd, FTAG);3350return (SET_ERROR(EDQUOT));3351}33523353error = dsl_dataset_hold(dp, ddca->ddca_origin, FTAG, &origin);3354if (error != 0) {3355dsl_dir_rele(pdd, FTAG);3356return (error);3357}33583359/* You can only clone snapshots, not the head datasets. */3360if (!origin->ds_is_snapshot) {3361dsl_dataset_rele(origin, FTAG);3362dsl_dir_rele(pdd, FTAG);3363return (SET_ERROR(EINVAL));3364}33653366dsl_dataset_rele(origin, FTAG);3367dsl_dir_rele(pdd, FTAG);33683369return (0);3370}33713372void3373dsl_dataset_clone_sync(void *arg, dmu_tx_t *tx)3374{3375dsl_dataset_clone_arg_t *ddca = arg;3376dsl_pool_t *dp = dmu_tx_pool(tx);3377dsl_dir_t *pdd;3378const char *tail;3379dsl_dataset_t *origin, *ds;3380uint64_t obj;3381char namebuf[ZFS_MAX_DATASET_NAME_LEN];33823383VERIFY0(dsl_dir_hold(dp, ddca->ddca_clone, FTAG, &pdd, &tail));3384VERIFY0(dsl_dataset_hold(dp, ddca->ddca_origin, FTAG, &origin));33853386obj = dsl_dataset_create_sync(pdd, tail, origin, 0,3387ddca->ddca_cred, NULL, tx);33883389VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));3390dsl_dataset_name(origin, namebuf);3391spa_history_log_internal_ds(ds, "clone", tx,3392"origin=%s (%llu)", namebuf, (u_longlong_t)origin->ds_object);3393dsl_dataset_rele(ds, FTAG);3394dsl_dataset_rele(origin, FTAG);3395dsl_dir_rele(pdd, FTAG);3396}33973398int3399dsl_dataset_clone(const char *clone, const char *origin)3400{3401dsl_dataset_clone_arg_t ddca;34023403cred_t *cr = CRED();3404crhold(cr);34053406ddca.ddca_clone = clone;3407ddca.ddca_origin = origin;3408ddca.ddca_cred = cr;34093410int rv = dsl_sync_task(clone,3411dsl_dataset_clone_check, dsl_dataset_clone_sync, &ddca,34126, ZFS_SPACE_CHECK_NORMAL);34133414if (rv == 0)3415zvol_create_minors(clone);34163417crfree(cr);34183419return (rv);3420}34213422struct promotenode {3423list_node_t link;3424dsl_dataset_t *ds;3425};34263427static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);3428static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp,3429const void *tag);3430static void promote_rele(dsl_dataset_promote_arg_t *ddpa, const void *tag);34313432int3433dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)3434{3435dsl_dataset_promote_arg_t *ddpa = arg;3436dsl_pool_t *dp = dmu_tx_pool(tx);3437dsl_dataset_t *hds;3438struct promotenode *snap;3439int err;3440uint64_t unused;3441uint64_t ss_mv_cnt;3442size_t max_snap_len;3443boolean_t conflicting_snaps;34443445err = promote_hold(ddpa, dp, FTAG);3446if (err != 0)3447return (err);34483449hds = ddpa->ddpa_clone;3450max_snap_len = MAXNAMELEN - strlen(ddpa->ddpa_clonename) - 1;34513452if (dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE) {3453promote_rele(ddpa, FTAG);3454return (SET_ERROR(EXDEV));3455}34563457snap = list_head(&ddpa->shared_snaps);3458if (snap == NULL) {3459err = SET_ERROR(ENOENT);3460goto out;3461}3462dsl_dataset_t *const origin_ds = snap->ds;34633464/*3465* Encrypted clones share a DSL Crypto Key with their origin's dsl dir.3466* When doing a promote we must make sure the encryption root for3467* both the target and the target's origin does not change to avoid3468* needing to rewrap encryption keys3469*/3470err = dsl_dataset_promote_crypt_check(hds->ds_dir, origin_ds->ds_dir);3471if (err != 0)3472goto out;34733474/*3475* Compute and check the amount of space to transfer. Since this is3476* so expensive, don't do the preliminary check.3477*/3478if (!dmu_tx_is_syncing(tx)) {3479promote_rele(ddpa, FTAG);3480return (0);3481}34823483/* compute origin's new unique space */3484snap = list_tail(&ddpa->clone_snaps);3485ASSERT(snap != NULL);3486ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==,3487origin_ds->ds_object);3488dsl_deadlist_space_range(&snap->ds->ds_deadlist,3489dsl_dataset_phys(origin_ds)->ds_prev_snap_txg, UINT64_MAX,3490&ddpa->unique, &unused, &unused);34913492/*3493* Walk the snapshots that we are moving3494*3495* Compute space to transfer. Consider the incremental changes3496* to used by each snapshot:3497* (my used) = (prev's used) + (blocks born) - (blocks killed)3498* So each snapshot gave birth to:3499* (blocks born) = (my used) - (prev's used) + (blocks killed)3500* So a sequence would look like:3501* (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)3502* Which simplifies to:3503* uN + kN + kN-1 + ... + k1 + k03504* Note however, if we stop before we reach the ORIGIN we get:3505* uN + kN + kN-1 + ... + kM - uM-13506*/3507conflicting_snaps = B_FALSE;3508ss_mv_cnt = 0;3509ddpa->used = dsl_dataset_phys(origin_ds)->ds_referenced_bytes;3510ddpa->comp = dsl_dataset_phys(origin_ds)->ds_compressed_bytes;3511ddpa->uncomp = dsl_dataset_phys(origin_ds)->ds_uncompressed_bytes;3512for (snap = list_head(&ddpa->shared_snaps); snap;3513snap = list_next(&ddpa->shared_snaps, snap)) {3514uint64_t val, dlused, dlcomp, dluncomp;3515dsl_dataset_t *ds = snap->ds;35163517ss_mv_cnt++;35183519/*3520* If there are long holds, we won't be able to evict3521* the objset.3522*/3523if (dsl_dataset_long_held(ds)) {3524err = SET_ERROR(EBUSY);3525goto out;3526}35273528/* Check that the snapshot name does not conflict */3529VERIFY0(dsl_dataset_get_snapname(ds));3530if (strlen(ds->ds_snapname) >= max_snap_len) {3531err = SET_ERROR(ENAMETOOLONG);3532goto out;3533}3534err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);3535if (err == 0) {3536fnvlist_add_boolean(ddpa->err_ds,3537snap->ds->ds_snapname);3538conflicting_snaps = B_TRUE;3539} else if (err != ENOENT) {3540goto out;3541}35423543/* The very first snapshot does not have a deadlist */3544if (dsl_dataset_phys(ds)->ds_prev_snap_obj == 0)3545continue;35463547dsl_deadlist_space(&ds->ds_deadlist,3548&dlused, &dlcomp, &dluncomp);3549ddpa->used += dlused;3550ddpa->comp += dlcomp;3551ddpa->uncomp += dluncomp;3552}35533554/*3555* Check that bookmarks that are being transferred don't have3556* name conflicts.3557*/3558for (dsl_bookmark_node_t *dbn = avl_first(&origin_ds->ds_bookmarks);3559dbn != NULL && dbn->dbn_phys.zbm_creation_txg <=3560dsl_dataset_phys(origin_ds)->ds_creation_txg;3561dbn = AVL_NEXT(&origin_ds->ds_bookmarks, dbn)) {3562if (strlen(dbn->dbn_name) >= max_snap_len) {3563err = SET_ERROR(ENAMETOOLONG);3564goto out;3565}3566zfs_bookmark_phys_t bm;3567err = dsl_bookmark_lookup_impl(ddpa->ddpa_clone,3568dbn->dbn_name, &bm);35693570if (err == 0) {3571fnvlist_add_boolean(ddpa->err_ds, dbn->dbn_name);3572conflicting_snaps = B_TRUE;3573} else if (err == ESRCH) {3574err = 0;3575}3576if (err != 0) {3577goto out;3578}3579}35803581/*3582* In order to return the full list of conflicting snapshots, we check3583* whether there was a conflict after traversing all of them.3584*/3585if (conflicting_snaps) {3586err = SET_ERROR(EEXIST);3587goto out;3588}35893590/*3591* If we are a clone of a clone then we never reached ORIGIN,3592* so we need to subtract out the clone origin's used space.3593*/3594if (ddpa->origin_origin) {3595ddpa->used -=3596dsl_dataset_phys(ddpa->origin_origin)->ds_referenced_bytes;3597ddpa->comp -=3598dsl_dataset_phys(ddpa->origin_origin)->ds_compressed_bytes;3599ddpa->uncomp -=3600dsl_dataset_phys(ddpa->origin_origin)->3601ds_uncompressed_bytes;3602}36033604/* Check that there is enough space and limit headroom here */3605err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,36060, ss_mv_cnt, ddpa->used, ddpa->cr);3607if (err != 0)3608goto out;36093610/*3611* Compute the amounts of space that will be used by snapshots3612* after the promotion (for both origin and clone). For each,3613* it is the amount of space that will be on all of their3614* deadlists (that was not born before their new origin).3615*/3616if (dsl_dir_phys(hds->ds_dir)->dd_flags & DD_FLAG_USED_BREAKDOWN) {3617uint64_t space;36183619/*3620* Note, typically this will not be a clone of a clone,3621* so dd_origin_txg will be < TXG_INITIAL, so3622* these snaplist_space() -> dsl_deadlist_space_range()3623* calls will be fast because they do not have to3624* iterate over all bps.3625*/3626snap = list_head(&ddpa->origin_snaps);3627if (snap == NULL) {3628err = SET_ERROR(ENOENT);3629goto out;3630}3631err = snaplist_space(&ddpa->shared_snaps,3632snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap);3633if (err != 0)3634goto out;36353636err = snaplist_space(&ddpa->clone_snaps,3637snap->ds->ds_dir->dd_origin_txg, &space);3638if (err != 0)3639goto out;3640ddpa->cloneusedsnap += space;3641}3642if (dsl_dir_phys(origin_ds->ds_dir)->dd_flags &3643DD_FLAG_USED_BREAKDOWN) {3644err = snaplist_space(&ddpa->origin_snaps,3645dsl_dataset_phys(origin_ds)->ds_creation_txg,3646&ddpa->originusedsnap);3647if (err != 0)3648goto out;3649}36503651out:3652promote_rele(ddpa, FTAG);3653return (err);3654}36553656void3657dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)3658{3659dsl_dataset_promote_arg_t *ddpa = arg;3660dsl_pool_t *dp = dmu_tx_pool(tx);3661dsl_dataset_t *hds;3662struct promotenode *snap;3663dsl_dataset_t *origin_ds;3664dsl_dataset_t *origin_head;3665dsl_dir_t *dd;3666dsl_dir_t *odd = NULL;3667uint64_t oldnext_obj;3668int64_t delta;36693670ASSERT(nvlist_empty(ddpa->err_ds));36713672VERIFY0(promote_hold(ddpa, dp, FTAG));3673hds = ddpa->ddpa_clone;36743675ASSERT0(dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE);36763677snap = list_head(&ddpa->shared_snaps);3678origin_ds = snap->ds;3679dd = hds->ds_dir;36803681snap = list_head(&ddpa->origin_snaps);3682origin_head = snap->ds;36833684/*3685* We need to explicitly open odd, since origin_ds's dd will be3686* changing.3687*/3688VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object,3689NULL, FTAG, &odd));36903691dsl_dataset_promote_crypt_sync(hds->ds_dir, odd, tx);36923693/* change origin's next snap */3694dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);3695oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj;3696snap = list_tail(&ddpa->clone_snaps);3697ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==,3698origin_ds->ds_object);3699dsl_dataset_phys(origin_ds)->ds_next_snap_obj = snap->ds->ds_object;37003701/* change the origin's next clone */3702if (dsl_dataset_phys(origin_ds)->ds_next_clones_obj) {3703dsl_dataset_remove_from_next_clones(origin_ds,3704snap->ds->ds_object, tx);3705VERIFY0(zap_add_int(dp->dp_meta_objset,3706dsl_dataset_phys(origin_ds)->ds_next_clones_obj,3707oldnext_obj, tx));3708}37093710/* change origin */3711dmu_buf_will_dirty(dd->dd_dbuf, tx);3712ASSERT3U(dsl_dir_phys(dd)->dd_origin_obj, ==, origin_ds->ds_object);3713dsl_dir_phys(dd)->dd_origin_obj = dsl_dir_phys(odd)->dd_origin_obj;3714dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg;3715dmu_buf_will_dirty(odd->dd_dbuf, tx);3716dsl_dir_phys(odd)->dd_origin_obj = origin_ds->ds_object;3717origin_head->ds_dir->dd_origin_txg =3718dsl_dataset_phys(origin_ds)->ds_creation_txg;37193720/* change dd_clone entries */3721if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {3722VERIFY0(zap_remove_int(dp->dp_meta_objset,3723dsl_dir_phys(odd)->dd_clones, hds->ds_object, tx));3724VERIFY0(zap_add_int(dp->dp_meta_objset,3725dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones,3726hds->ds_object, tx));37273728VERIFY0(zap_remove_int(dp->dp_meta_objset,3729dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones,3730origin_head->ds_object, tx));3731if (dsl_dir_phys(dd)->dd_clones == 0) {3732dsl_dir_phys(dd)->dd_clones =3733zap_create(dp->dp_meta_objset, DMU_OT_DSL_CLONES,3734DMU_OT_NONE, 0, tx);3735}3736VERIFY0(zap_add_int(dp->dp_meta_objset,3737dsl_dir_phys(dd)->dd_clones, origin_head->ds_object, tx));3738}37393740/*3741* Move bookmarks to this dir.3742*/3743dsl_bookmark_node_t *dbn_next;3744for (dsl_bookmark_node_t *dbn = avl_first(&origin_head->ds_bookmarks);3745dbn != NULL && dbn->dbn_phys.zbm_creation_txg <=3746dsl_dataset_phys(origin_ds)->ds_creation_txg;3747dbn = dbn_next) {3748dbn_next = AVL_NEXT(&origin_head->ds_bookmarks, dbn);37493750avl_remove(&origin_head->ds_bookmarks, dbn);3751VERIFY0(zap_remove(dp->dp_meta_objset,3752origin_head->ds_bookmarks_obj, dbn->dbn_name, tx));37533754dsl_bookmark_node_add(hds, dbn, tx);3755}37563757dsl_bookmark_next_changed(hds, origin_ds, tx);37583759/* move snapshots to this dir */3760for (snap = list_head(&ddpa->shared_snaps); snap;3761snap = list_next(&ddpa->shared_snaps, snap)) {3762dsl_dataset_t *ds = snap->ds;37633764/*3765* Property callbacks are registered to a particular3766* dsl_dir. Since ours is changing, evict the objset3767* so that they will be unregistered from the old dsl_dir.3768*/3769if (ds->ds_objset) {3770dmu_objset_evict(ds->ds_objset);3771ds->ds_objset = NULL;3772}37733774/* move snap name entry */3775VERIFY0(dsl_dataset_get_snapname(ds));3776VERIFY0(dsl_dataset_snap_remove(origin_head,3777ds->ds_snapname, tx, B_TRUE));3778VERIFY0(zap_add(dp->dp_meta_objset,3779dsl_dataset_phys(hds)->ds_snapnames_zapobj, ds->ds_snapname,37808, 1, &ds->ds_object, tx));3781dsl_fs_ss_count_adjust(hds->ds_dir, 1,3782DD_FIELD_SNAPSHOT_COUNT, tx);37833784/* change containing dsl_dir */3785dmu_buf_will_dirty(ds->ds_dbuf, tx);3786ASSERT3U(dsl_dataset_phys(ds)->ds_dir_obj, ==, odd->dd_object);3787dsl_dataset_phys(ds)->ds_dir_obj = dd->dd_object;3788ASSERT3P(ds->ds_dir, ==, odd);3789dsl_dir_rele(ds->ds_dir, ds);3790VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object,3791NULL, ds, &ds->ds_dir));37923793/* move any clone references */3794if (dsl_dataset_phys(ds)->ds_next_clones_obj &&3795spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {3796zap_cursor_t zc;3797zap_attribute_t *za = zap_attribute_alloc();37983799for (zap_cursor_init(&zc, dp->dp_meta_objset,3800dsl_dataset_phys(ds)->ds_next_clones_obj);3801zap_cursor_retrieve(&zc, za) == 0;3802zap_cursor_advance(&zc)) {3803dsl_dataset_t *cnds;3804uint64_t o;38053806if (za->za_first_integer == oldnext_obj) {3807/*3808* We've already moved the3809* origin's reference.3810*/3811continue;3812}38133814VERIFY0(dsl_dataset_hold_obj(dp,3815za->za_first_integer, FTAG, &cnds));3816o = dsl_dir_phys(cnds->ds_dir)->3817dd_head_dataset_obj;38183819VERIFY0(zap_remove_int(dp->dp_meta_objset,3820dsl_dir_phys(odd)->dd_clones, o, tx));3821VERIFY0(zap_add_int(dp->dp_meta_objset,3822dsl_dir_phys(dd)->dd_clones, o, tx));3823dsl_dataset_rele(cnds, FTAG);3824}3825zap_cursor_fini(&zc);3826zap_attribute_free(za);3827}38283829ASSERT(!dsl_prop_hascb(ds));3830}38313832/*3833* Change space accounting.3834* Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either3835* both be valid, or both be 0 (resulting in delta == 0). This3836* is true for each of {clone,origin} independently.3837*/38383839delta = ddpa->cloneusedsnap -3840dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP];3841ASSERT3S(delta, >=, 0);3842ASSERT3U(ddpa->used, >=, delta);3843dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);3844dsl_dir_diduse_space(dd, DD_USED_HEAD,3845ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx);38463847delta = ddpa->originusedsnap -3848dsl_dir_phys(odd)->dd_used_breakdown[DD_USED_SNAP];3849ASSERT3S(delta, <=, 0);3850ASSERT3U(ddpa->used, >=, -delta);3851dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);3852dsl_dir_diduse_space(odd, DD_USED_HEAD,3853-ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx);38543855dsl_dataset_phys(origin_ds)->ds_unique_bytes = ddpa->unique;38563857/*3858* Since livelists are specific to a clone's origin txg, they3859* are no longer accurate. Destroy the livelist from the clone being3860* promoted. If the origin dataset is a clone, destroy its livelist3861* as well.3862*/3863dsl_dir_remove_livelist(dd, tx, B_TRUE);3864dsl_dir_remove_livelist(odd, tx, B_TRUE);38653866/* log history record */3867spa_history_log_internal_ds(hds, "promote", tx, " ");38683869dsl_dir_rele(odd, FTAG);38703871/*3872* Transfer common error blocks from old head to new head, before3873* calling promote_rele() on ddpa since we need to dereference3874* origin_head and hds.3875*/3876if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_HEAD_ERRLOG)) {3877uint64_t old_head = origin_head->ds_object;3878uint64_t new_head = hds->ds_object;3879spa_swap_errlog(dp->dp_spa, new_head, old_head, tx);3880}38813882promote_rele(ddpa, FTAG);3883}38843885/*3886* Make a list of dsl_dataset_t's for the snapshots between first_obj3887* (exclusive) and last_obj (inclusive). The list will be in reverse3888* order (last_obj will be the list_head()). If first_obj == 0, do all3889* snapshots back to this dataset's origin.3890*/3891static int3892snaplist_make(dsl_pool_t *dp,3893uint64_t first_obj, uint64_t last_obj, list_t *l, const void *tag)3894{3895uint64_t obj = last_obj;38963897list_create(l, sizeof (struct promotenode),3898offsetof(struct promotenode, link));38993900while (obj != first_obj) {3901dsl_dataset_t *ds;3902struct promotenode *snap;3903int err;39043905err = dsl_dataset_hold_obj(dp, obj, tag, &ds);3906ASSERT(err != ENOENT);3907if (err != 0)3908return (err);39093910if (first_obj == 0)3911first_obj = dsl_dir_phys(ds->ds_dir)->dd_origin_obj;39123913snap = kmem_alloc(sizeof (*snap), KM_SLEEP);3914snap->ds = ds;3915list_insert_tail(l, snap);3916obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;3917}39183919return (0);3920}39213922static int3923snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)3924{3925struct promotenode *snap;39263927*spacep = 0;3928for (snap = list_head(l); snap; snap = list_next(l, snap)) {3929uint64_t used, comp, uncomp;3930dsl_deadlist_space_range(&snap->ds->ds_deadlist,3931mintxg, UINT64_MAX, &used, &comp, &uncomp);3932*spacep += used;3933}3934return (0);3935}39363937static void3938snaplist_destroy(list_t *l, const void *tag)3939{3940struct promotenode *snap;39413942if (l == NULL || !list_link_active(&l->list_head))3943return;39443945while ((snap = list_remove_tail(l)) != NULL) {3946dsl_dataset_rele(snap->ds, tag);3947kmem_free(snap, sizeof (*snap));3948}3949list_destroy(l);3950}39513952static int3953promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, const void *tag)3954{3955int error;3956dsl_dir_t *dd;3957struct promotenode *snap;39583959error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag,3960&ddpa->ddpa_clone);3961if (error != 0)3962return (error);3963dd = ddpa->ddpa_clone->ds_dir;39643965if (ddpa->ddpa_clone->ds_is_snapshot ||3966!dsl_dir_is_clone(dd)) {3967dsl_dataset_rele(ddpa->ddpa_clone, tag);3968return (SET_ERROR(EINVAL));3969}39703971error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj,3972&ddpa->shared_snaps, tag);3973if (error != 0)3974goto out;39753976error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object,3977&ddpa->clone_snaps, tag);3978if (error != 0)3979goto out;39803981snap = list_head(&ddpa->shared_snaps);3982ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj);3983error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj,3984dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj,3985&ddpa->origin_snaps, tag);3986if (error != 0)3987goto out;39883989if (dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj != 0) {3990error = dsl_dataset_hold_obj(dp,3991dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj,3992tag, &ddpa->origin_origin);3993if (error != 0)3994goto out;3995}3996out:3997if (error != 0)3998promote_rele(ddpa, tag);3999return (error);4000}40014002static void4003promote_rele(dsl_dataset_promote_arg_t *ddpa, const void *tag)4004{4005snaplist_destroy(&ddpa->shared_snaps, tag);4006snaplist_destroy(&ddpa->clone_snaps, tag);4007snaplist_destroy(&ddpa->origin_snaps, tag);4008if (ddpa->origin_origin != NULL)4009dsl_dataset_rele(ddpa->origin_origin, tag);4010dsl_dataset_rele(ddpa->ddpa_clone, tag);4011}40124013/*4014* Promote a clone.4015*4016* If it fails due to a conflicting snapshot name, "conflsnap" will be filled4017* in with the name. (It must be at least ZFS_MAX_DATASET_NAME_LEN bytes long.)4018*/4019int4020dsl_dataset_promote(const char *name, char *conflsnap)4021{4022dsl_dataset_promote_arg_t ddpa = { 0 };4023uint64_t numsnaps;4024int error;4025nvpair_t *snap_pair;4026objset_t *os;40274028/*4029* We will modify space proportional to the number of4030* snapshots. Compute numsnaps.4031*/4032error = dmu_objset_hold(name, FTAG, &os);4033if (error != 0)4034return (error);4035error = zap_count(dmu_objset_pool(os)->dp_meta_objset,4036dsl_dataset_phys(dmu_objset_ds(os))->ds_snapnames_zapobj,4037&numsnaps);4038dmu_objset_rele(os, FTAG);4039if (error != 0)4040return (error);40414042cred_t *cr = CRED();4043crhold(cr);40444045ddpa.ddpa_clonename = name;4046ddpa.err_ds = fnvlist_alloc();4047ddpa.cr = cr;40484049error = dsl_sync_task(name, dsl_dataset_promote_check,4050dsl_dataset_promote_sync, &ddpa,40512 + numsnaps, ZFS_SPACE_CHECK_RESERVED);40524053crfree(cr);40544055/*4056* Return the first conflicting snapshot found.4057*/4058snap_pair = nvlist_next_nvpair(ddpa.err_ds, NULL);4059if (snap_pair != NULL && conflsnap != NULL)4060(void) strlcpy(conflsnap, nvpair_name(snap_pair),4061ZFS_MAX_DATASET_NAME_LEN);40624063fnvlist_free(ddpa.err_ds);4064return (error);4065}40664067int4068dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,4069dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)4070{4071/*4072* "slack" factor for received datasets with refquota set on them.4073* See the bottom of this function for details on its use.4074*/4075uint64_t refquota_slack = (uint64_t)DMU_MAX_ACCESS *4076spa_asize_inflation;4077int64_t unused_refres_delta;40784079/* they should both be heads */4080if (clone->ds_is_snapshot ||4081origin_head->ds_is_snapshot)4082return (SET_ERROR(EINVAL));40834084/* if we are not forcing, the branch point should be just before them */4085if (!force && clone->ds_prev != origin_head->ds_prev)4086return (SET_ERROR(EINVAL));40874088/* clone should be the clone (unless they are unrelated) */4089if (clone->ds_prev != NULL &&4090clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap &&4091origin_head->ds_dir != clone->ds_prev->ds_dir)4092return (SET_ERROR(EINVAL));40934094/* the clone should be a child of the origin */4095if (clone->ds_dir->dd_parent != origin_head->ds_dir)4096return (SET_ERROR(EINVAL));40974098/* origin_head shouldn't be modified unless 'force' */4099if (!force &&4100dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev))4101return (SET_ERROR(ETXTBSY));41024103/* origin_head should have no long holds (e.g. is not mounted) */4104if (dsl_dataset_handoff_check(origin_head, owner, tx))4105return (SET_ERROR(EBUSY));41064107/* check amount of any unconsumed refreservation */4108unused_refres_delta =4109(int64_t)MIN(origin_head->ds_reserved,4110dsl_dataset_phys(origin_head)->ds_unique_bytes) -4111(int64_t)MIN(origin_head->ds_reserved,4112dsl_dataset_phys(clone)->ds_unique_bytes);41134114if (unused_refres_delta > 0 &&4115unused_refres_delta >4116dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE))4117return (SET_ERROR(ENOSPC));41184119/*4120* The clone can't be too much over the head's refquota.4121*4122* To ensure that the entire refquota can be used, we allow one4123* transaction to exceed the refquota. Therefore, this check4124* needs to also allow for the space referenced to be more than the4125* refquota. The maximum amount of space that one transaction can use4126* on disk is DMU_MAX_ACCESS * spa_asize_inflation. Allowing this4127* overage ensures that we are able to receive a filesystem that4128* exceeds the refquota on the source system.4129*4130* So that overage is the refquota_slack we use below.4131*/4132if (origin_head->ds_quota != 0 &&4133dsl_dataset_phys(clone)->ds_referenced_bytes >4134origin_head->ds_quota + refquota_slack)4135return (SET_ERROR(EDQUOT));41364137return (0);4138}41394140static void4141dsl_dataset_swap_remap_deadlists(dsl_dataset_t *clone,4142dsl_dataset_t *origin, dmu_tx_t *tx)4143{4144uint64_t clone_remap_dl_obj, origin_remap_dl_obj;4145dsl_pool_t *dp = dmu_tx_pool(tx);41464147ASSERT(dsl_pool_sync_context(dp));41484149clone_remap_dl_obj = dsl_dataset_get_remap_deadlist_object(clone);4150origin_remap_dl_obj = dsl_dataset_get_remap_deadlist_object(origin);41514152if (clone_remap_dl_obj != 0) {4153dsl_deadlist_close(&clone->ds_remap_deadlist);4154dsl_dataset_unset_remap_deadlist_object(clone, tx);4155}4156if (origin_remap_dl_obj != 0) {4157dsl_deadlist_close(&origin->ds_remap_deadlist);4158dsl_dataset_unset_remap_deadlist_object(origin, tx);4159}41604161if (clone_remap_dl_obj != 0) {4162dsl_dataset_set_remap_deadlist_object(origin,4163clone_remap_dl_obj, tx);4164VERIFY0(dsl_deadlist_open(&origin->ds_remap_deadlist,4165dp->dp_meta_objset, clone_remap_dl_obj));4166}4167if (origin_remap_dl_obj != 0) {4168dsl_dataset_set_remap_deadlist_object(clone,4169origin_remap_dl_obj, tx);4170VERIFY0(dsl_deadlist_open(&clone->ds_remap_deadlist,4171dp->dp_meta_objset, origin_remap_dl_obj));4172}4173}41744175void4176dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,4177dsl_dataset_t *origin_head, dmu_tx_t *tx)4178{4179dsl_pool_t *dp = dmu_tx_pool(tx);4180int64_t unused_refres_delta;41814182ASSERT0(clone->ds_reserved);4183/*4184* NOTE: On DEBUG kernels there could be a race between this and4185* the check function if spa_asize_inflation is adjusted...4186*/4187ASSERT(origin_head->ds_quota == 0 ||4188dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota +4189DMU_MAX_ACCESS * spa_asize_inflation);4190ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);41914192dsl_dir_cancel_waiters(origin_head->ds_dir);41934194/*4195* Swap per-dataset feature flags.4196*/4197for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {4198if (!(spa_feature_table[f].fi_flags &4199ZFEATURE_FLAG_PER_DATASET)) {4200ASSERT(!dsl_dataset_feature_is_active(clone, f));4201ASSERT(!dsl_dataset_feature_is_active(origin_head, f));4202continue;4203}42044205boolean_t clone_inuse = dsl_dataset_feature_is_active(clone, f);4206void *clone_feature = clone->ds_feature[f];4207boolean_t origin_head_inuse =4208dsl_dataset_feature_is_active(origin_head, f);4209void *origin_head_feature = origin_head->ds_feature[f];42104211if (clone_inuse)4212dsl_dataset_deactivate_feature_impl(clone, f, tx);4213if (origin_head_inuse)4214dsl_dataset_deactivate_feature_impl(origin_head, f, tx);42154216if (clone_inuse) {4217dsl_dataset_activate_feature(origin_head->ds_object, f,4218clone_feature, tx);4219origin_head->ds_feature[f] = clone_feature;4220}4221if (origin_head_inuse) {4222dsl_dataset_activate_feature(clone->ds_object, f,4223origin_head_feature, tx);4224clone->ds_feature[f] = origin_head_feature;4225}4226}42274228dmu_buf_will_dirty(clone->ds_dbuf, tx);4229dmu_buf_will_dirty(origin_head->ds_dbuf, tx);42304231if (clone->ds_objset != NULL) {4232dmu_objset_evict(clone->ds_objset);4233clone->ds_objset = NULL;4234}42354236if (origin_head->ds_objset != NULL) {4237dmu_objset_evict(origin_head->ds_objset);4238origin_head->ds_objset = NULL;4239}42404241unused_refres_delta =4242(int64_t)MIN(origin_head->ds_reserved,4243dsl_dataset_phys(origin_head)->ds_unique_bytes) -4244(int64_t)MIN(origin_head->ds_reserved,4245dsl_dataset_phys(clone)->ds_unique_bytes);42464247/*4248* Reset origin's unique bytes.4249*/4250{4251dsl_dataset_t *origin = clone->ds_prev;4252uint64_t comp, uncomp;42534254dmu_buf_will_dirty(origin->ds_dbuf, tx);4255dsl_deadlist_space_range(&clone->ds_deadlist,4256dsl_dataset_phys(origin)->ds_prev_snap_txg, UINT64_MAX,4257&dsl_dataset_phys(origin)->ds_unique_bytes, &comp, &uncomp);4258}42594260/* swap blkptrs */4261{4262rrw_enter(&clone->ds_bp_rwlock, RW_WRITER, FTAG);4263rrw_enter(&origin_head->ds_bp_rwlock, RW_WRITER, FTAG);4264blkptr_t tmp;4265tmp = dsl_dataset_phys(origin_head)->ds_bp;4266dsl_dataset_phys(origin_head)->ds_bp =4267dsl_dataset_phys(clone)->ds_bp;4268dsl_dataset_phys(clone)->ds_bp = tmp;4269rrw_exit(&origin_head->ds_bp_rwlock, FTAG);4270rrw_exit(&clone->ds_bp_rwlock, FTAG);4271}42724273/* set dd_*_bytes */4274{4275int64_t dused, dcomp, duncomp;4276uint64_t cdl_used, cdl_comp, cdl_uncomp;4277uint64_t odl_used, odl_comp, odl_uncomp;42784279ASSERT3U(dsl_dir_phys(clone->ds_dir)->4280dd_used_breakdown[DD_USED_SNAP], ==, 0);42814282dsl_deadlist_space(&clone->ds_deadlist,4283&cdl_used, &cdl_comp, &cdl_uncomp);4284dsl_deadlist_space(&origin_head->ds_deadlist,4285&odl_used, &odl_comp, &odl_uncomp);42864287dused = dsl_dataset_phys(clone)->ds_referenced_bytes +4288cdl_used -4289(dsl_dataset_phys(origin_head)->ds_referenced_bytes +4290odl_used);4291dcomp = dsl_dataset_phys(clone)->ds_compressed_bytes +4292cdl_comp -4293(dsl_dataset_phys(origin_head)->ds_compressed_bytes +4294odl_comp);4295duncomp = dsl_dataset_phys(clone)->ds_uncompressed_bytes +4296cdl_uncomp -4297(dsl_dataset_phys(origin_head)->ds_uncompressed_bytes +4298odl_uncomp);42994300dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD,4301dused, dcomp, duncomp, tx);4302dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD,4303-dused, -dcomp, -duncomp, tx);43044305/*4306* The difference in the space used by snapshots is the4307* difference in snapshot space due to the head's4308* deadlist (since that's the only thing that's4309* changing that affects the snapused).4310*/4311dsl_deadlist_space_range(&clone->ds_deadlist,4312origin_head->ds_dir->dd_origin_txg, UINT64_MAX,4313&cdl_used, &cdl_comp, &cdl_uncomp);4314dsl_deadlist_space_range(&origin_head->ds_deadlist,4315origin_head->ds_dir->dd_origin_txg, UINT64_MAX,4316&odl_used, &odl_comp, &odl_uncomp);4317dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used,4318DD_USED_HEAD, DD_USED_SNAP, tx);4319}43204321/* swap ds_*_bytes */4322SWITCH64(dsl_dataset_phys(origin_head)->ds_referenced_bytes,4323dsl_dataset_phys(clone)->ds_referenced_bytes);4324SWITCH64(dsl_dataset_phys(origin_head)->ds_compressed_bytes,4325dsl_dataset_phys(clone)->ds_compressed_bytes);4326SWITCH64(dsl_dataset_phys(origin_head)->ds_uncompressed_bytes,4327dsl_dataset_phys(clone)->ds_uncompressed_bytes);4328SWITCH64(dsl_dataset_phys(origin_head)->ds_unique_bytes,4329dsl_dataset_phys(clone)->ds_unique_bytes);43304331/* apply any parent delta for change in unconsumed refreservation */4332dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV,4333unused_refres_delta, 0, 0, tx);43344335/*4336* Swap deadlists.4337*/4338dsl_deadlist_close(&clone->ds_deadlist);4339dsl_deadlist_close(&origin_head->ds_deadlist);4340SWITCH64(dsl_dataset_phys(origin_head)->ds_deadlist_obj,4341dsl_dataset_phys(clone)->ds_deadlist_obj);4342VERIFY0(dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset,4343dsl_dataset_phys(clone)->ds_deadlist_obj));4344VERIFY0(dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset,4345dsl_dataset_phys(origin_head)->ds_deadlist_obj));4346dsl_dataset_swap_remap_deadlists(clone, origin_head, tx);43474348/*4349* If there is a bookmark at the origin, its "next dataset" is4350* changing, so we need to reset its FBN.4351*/4352dsl_bookmark_next_changed(origin_head, origin_head->ds_prev, tx);43534354dsl_scan_ds_clone_swapped(origin_head, clone, tx);43554356/*4357* Destroy any livelists associated with the clone or the origin,4358* since after the swap the corresponding livelists are no longer4359* valid.4360*/4361dsl_dir_remove_livelist(clone->ds_dir, tx, B_TRUE);4362dsl_dir_remove_livelist(origin_head->ds_dir, tx, B_TRUE);43634364spa_history_log_internal_ds(clone, "clone swap", tx,4365"parent=%s", origin_head->ds_dir->dd_myname);4366}43674368/*4369* Given a pool name and a dataset object number in that pool,4370* return the name of that dataset.4371*/4372int4373dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)4374{4375dsl_pool_t *dp;4376dsl_dataset_t *ds;4377int error;43784379error = dsl_pool_hold(pname, FTAG, &dp);4380if (error != 0)4381return (error);43824383error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);4384if (error == 0) {4385dsl_dataset_name(ds, buf);4386dsl_dataset_rele(ds, FTAG);4387}4388dsl_pool_rele(dp, FTAG);43894390return (error);4391}43924393int4394dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,4395uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)4396{4397int error = 0;43984399ASSERT3S(asize, >, 0);44004401/*4402* *ref_rsrv is the portion of asize that will come from any4403* unconsumed refreservation space.4404*/4405*ref_rsrv = 0;44064407mutex_enter(&ds->ds_lock);4408/*4409* Make a space adjustment for reserved bytes.4410*/4411if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) {4412ASSERT3U(*used, >=,4413ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes);4414*used -=4415(ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes);4416*ref_rsrv =4417asize - MIN(asize, parent_delta(ds, asize + inflight));4418}44194420if (!check_quota || ds->ds_quota == 0) {4421mutex_exit(&ds->ds_lock);4422return (0);4423}4424/*4425* If they are requesting more space, and our current estimate4426* is over quota, they get to try again unless the actual4427* on-disk is over quota and there are no pending changes (which4428* may free up space for us).4429*/4430if (dsl_dataset_phys(ds)->ds_referenced_bytes + inflight >=4431ds->ds_quota) {4432if (inflight > 0 ||4433dsl_dataset_phys(ds)->ds_referenced_bytes < ds->ds_quota)4434error = SET_ERROR(ERESTART);4435else4436error = SET_ERROR(EDQUOT);4437}4438mutex_exit(&ds->ds_lock);44394440return (error);4441}44424443typedef struct dsl_dataset_set_qr_arg {4444const char *ddsqra_name;4445zprop_source_t ddsqra_source;4446uint64_t ddsqra_value;4447} dsl_dataset_set_qr_arg_t;444844494450static int4451dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)4452{4453dsl_dataset_set_qr_arg_t *ddsqra = arg;4454dsl_pool_t *dp = dmu_tx_pool(tx);4455dsl_dataset_t *ds;4456int error;4457uint64_t newval;44584459if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA)4460return (SET_ERROR(ENOTSUP));44614462error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);4463if (error != 0)4464return (error);44654466if (ds->ds_is_snapshot) {4467dsl_dataset_rele(ds, FTAG);4468return (SET_ERROR(EINVAL));4469}44704471error = dsl_prop_predict(ds->ds_dir,4472zfs_prop_to_name(ZFS_PROP_REFQUOTA),4473ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);4474if (error != 0) {4475dsl_dataset_rele(ds, FTAG);4476return (error);4477}44784479if (newval == 0) {4480dsl_dataset_rele(ds, FTAG);4481return (0);4482}44834484if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes ||4485newval < ds->ds_reserved) {4486dsl_dataset_rele(ds, FTAG);4487return (SET_ERROR(ENOSPC));4488}44894490dsl_dataset_rele(ds, FTAG);4491return (0);4492}44934494static void4495dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx)4496{4497dsl_dataset_set_qr_arg_t *ddsqra = arg;4498dsl_pool_t *dp = dmu_tx_pool(tx);4499dsl_dataset_t *ds = NULL;4500uint64_t newval;45014502VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));45034504dsl_prop_set_sync_impl(ds,4505zfs_prop_to_name(ZFS_PROP_REFQUOTA),4506ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,4507&ddsqra->ddsqra_value, tx);45084509VERIFY0(dsl_prop_get_int_ds(ds,4510zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval));45114512if (ds->ds_quota != newval) {4513dmu_buf_will_dirty(ds->ds_dbuf, tx);4514ds->ds_quota = newval;4515}4516dsl_dataset_rele(ds, FTAG);4517}45184519int4520dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,4521uint64_t refquota)4522{4523dsl_dataset_set_qr_arg_t ddsqra;45244525ddsqra.ddsqra_name = dsname;4526ddsqra.ddsqra_source = source;4527ddsqra.ddsqra_value = refquota;45284529return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check,4530dsl_dataset_set_refquota_sync, &ddsqra, 0,4531ZFS_SPACE_CHECK_EXTRA_RESERVED));4532}45334534static int4535dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx)4536{4537dsl_dataset_set_qr_arg_t *ddsqra = arg;4538dsl_pool_t *dp = dmu_tx_pool(tx);4539dsl_dataset_t *ds;4540int error;4541uint64_t newval, unique;45424543if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION)4544return (SET_ERROR(ENOTSUP));45454546error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);4547if (error != 0)4548return (error);45494550if (ds->ds_is_snapshot) {4551dsl_dataset_rele(ds, FTAG);4552return (SET_ERROR(EINVAL));4553}45544555error = dsl_prop_predict(ds->ds_dir,4556zfs_prop_to_name(ZFS_PROP_REFRESERVATION),4557ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);4558if (error != 0) {4559dsl_dataset_rele(ds, FTAG);4560return (error);4561}45624563/*4564* If we are doing the preliminary check in open context, the4565* space estimates may be inaccurate.4566*/4567if (!dmu_tx_is_syncing(tx)) {4568dsl_dataset_rele(ds, FTAG);4569return (0);4570}45714572mutex_enter(&ds->ds_lock);4573if (!DS_UNIQUE_IS_ACCURATE(ds))4574dsl_dataset_recalc_head_uniq(ds);4575unique = dsl_dataset_phys(ds)->ds_unique_bytes;4576mutex_exit(&ds->ds_lock);45774578if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) {4579uint64_t delta = MAX(unique, newval) -4580MAX(unique, ds->ds_reserved);45814582if (delta >4583dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) ||4584(ds->ds_quota > 0 && newval > ds->ds_quota)) {4585dsl_dataset_rele(ds, FTAG);4586return (SET_ERROR(ENOSPC));4587}4588}45894590dsl_dataset_rele(ds, FTAG);4591return (0);4592}45934594void4595dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,4596zprop_source_t source, uint64_t value, dmu_tx_t *tx)4597{4598uint64_t newval;4599uint64_t unique;4600int64_t delta;46014602dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION),4603source, sizeof (value), 1, &value, tx);46044605VERIFY0(dsl_prop_get_int_ds(ds,4606zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval));46074608dmu_buf_will_dirty(ds->ds_dbuf, tx);4609mutex_enter(&ds->ds_dir->dd_lock);4610mutex_enter(&ds->ds_lock);4611ASSERT(DS_UNIQUE_IS_ACCURATE(ds));4612unique = dsl_dataset_phys(ds)->ds_unique_bytes;4613delta = MAX(0, (int64_t)(newval - unique)) -4614MAX(0, (int64_t)(ds->ds_reserved - unique));4615ds->ds_reserved = newval;4616mutex_exit(&ds->ds_lock);46174618dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);4619mutex_exit(&ds->ds_dir->dd_lock);4620}46214622static void4623dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx)4624{4625dsl_dataset_set_qr_arg_t *ddsqra = arg;4626dsl_pool_t *dp = dmu_tx_pool(tx);4627dsl_dataset_t *ds = NULL;46284629VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));4630dsl_dataset_set_refreservation_sync_impl(ds,4631ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx);4632dsl_dataset_rele(ds, FTAG);4633}46344635int4636dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,4637uint64_t refreservation)4638{4639dsl_dataset_set_qr_arg_t ddsqra;46404641ddsqra.ddsqra_name = dsname;4642ddsqra.ddsqra_source = source;4643ddsqra.ddsqra_value = refreservation;46444645return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check,4646dsl_dataset_set_refreservation_sync, &ddsqra, 0,4647ZFS_SPACE_CHECK_EXTRA_RESERVED));4648}46494650typedef struct dsl_dataset_set_compression_arg {4651const char *ddsca_name;4652zprop_source_t ddsca_source;4653uint64_t ddsca_value;4654} dsl_dataset_set_compression_arg_t;46554656static int4657dsl_dataset_set_compression_check(void *arg, dmu_tx_t *tx)4658{4659dsl_dataset_set_compression_arg_t *ddsca = arg;4660dsl_pool_t *dp = dmu_tx_pool(tx);46614662uint64_t compval = ZIO_COMPRESS_ALGO(ddsca->ddsca_value);4663spa_feature_t f = zio_compress_to_feature(compval);46644665if (f == SPA_FEATURE_NONE)4666return (SET_ERROR(EINVAL));46674668if (!spa_feature_is_enabled(dp->dp_spa, f))4669return (SET_ERROR(ENOTSUP));46704671return (0);4672}46734674static void4675dsl_dataset_set_compression_sync(void *arg, dmu_tx_t *tx)4676{4677dsl_dataset_set_compression_arg_t *ddsca = arg;4678dsl_pool_t *dp = dmu_tx_pool(tx);4679dsl_dataset_t *ds = NULL;46804681uint64_t compval = ZIO_COMPRESS_ALGO(ddsca->ddsca_value);4682spa_feature_t f = zio_compress_to_feature(compval);4683ASSERT3S(f, !=, SPA_FEATURE_NONE);4684ASSERT3S(spa_feature_table[f].fi_type, ==, ZFEATURE_TYPE_BOOLEAN);46854686VERIFY0(dsl_dataset_hold(dp, ddsca->ddsca_name, FTAG, &ds));4687if (zfeature_active(f, ds->ds_feature[f]) != B_TRUE) {4688ds->ds_feature_activation[f] = (void *)B_TRUE;4689dsl_dataset_activate_feature(ds->ds_object, f,4690ds->ds_feature_activation[f], tx);4691ds->ds_feature[f] = ds->ds_feature_activation[f];4692}4693dsl_dataset_rele(ds, FTAG);4694}46954696int4697dsl_dataset_set_compression(const char *dsname, zprop_source_t source,4698uint64_t compression)4699{4700dsl_dataset_set_compression_arg_t ddsca;47014702/*4703* The sync task is only required for zstd in order to activate4704* the feature flag when the property is first set.4705*/4706if (ZIO_COMPRESS_ALGO(compression) != ZIO_COMPRESS_ZSTD)4707return (0);47084709ddsca.ddsca_name = dsname;4710ddsca.ddsca_source = source;4711ddsca.ddsca_value = compression;47124713return (dsl_sync_task(dsname, dsl_dataset_set_compression_check,4714dsl_dataset_set_compression_sync, &ddsca, 0,4715ZFS_SPACE_CHECK_EXTRA_RESERVED));4716}47174718/*4719* Return (in *usedp) the amount of space referenced by "new" that was not4720* referenced at the time the bookmark corresponds to. "New" may be a4721* snapshot or a head. The bookmark must be before new, in4722* new's filesystem (or its origin) -- caller verifies this.4723*4724* The written space is calculated by considering two components: First, we4725* ignore any freed space, and calculate the written as new's used space4726* minus old's used space. Next, we add in the amount of space that was freed4727* between the two time points, thus reducing new's used space relative to4728* old's. Specifically, this is the space that was born before4729* zbm_creation_txg, and freed before new (ie. on new's deadlist or a4730* previous deadlist).4731*4732* space freed [---------------------]4733* snapshots ---O-------O--------O-------O------4734* bookmark new4735*4736* Note, the bookmark's zbm_*_bytes_refd must be valid, but if the HAS_FBN4737* flag is not set, we will calculate the freed_before_next based on the4738* next snapshot's deadlist, rather than using zbm_*_freed_before_next_snap.4739*/4740static int4741dsl_dataset_space_written_impl(zfs_bookmark_phys_t *bmp,4742dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)4743{4744int err = 0;4745dsl_pool_t *dp = new->ds_dir->dd_pool;47464747ASSERT(dsl_pool_config_held(dp));4748if (dsl_dataset_is_snapshot(new)) {4749ASSERT3U(bmp->zbm_creation_txg, <,4750dsl_dataset_phys(new)->ds_creation_txg);4751}47524753*usedp = 0;4754*usedp += dsl_dataset_phys(new)->ds_referenced_bytes;4755*usedp -= bmp->zbm_referenced_bytes_refd;47564757*compp = 0;4758*compp += dsl_dataset_phys(new)->ds_compressed_bytes;4759*compp -= bmp->zbm_compressed_bytes_refd;47604761*uncompp = 0;4762*uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes;4763*uncompp -= bmp->zbm_uncompressed_bytes_refd;47644765dsl_dataset_t *snap = new;47664767while (dsl_dataset_phys(snap)->ds_prev_snap_txg >4768bmp->zbm_creation_txg) {4769uint64_t used, comp, uncomp;47704771dsl_deadlist_space_range(&snap->ds_deadlist,47720, bmp->zbm_creation_txg,4773&used, &comp, &uncomp);4774*usedp += used;4775*compp += comp;4776*uncompp += uncomp;47774778uint64_t snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj;4779if (snap != new)4780dsl_dataset_rele(snap, FTAG);4781err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);4782if (err != 0)4783break;4784}47854786/*4787* We might not have the FBN if we are calculating written from4788* a snapshot (because we didn't know the correct "next" snapshot4789* until now).4790*/4791if (bmp->zbm_flags & ZBM_FLAG_HAS_FBN) {4792*usedp += bmp->zbm_referenced_freed_before_next_snap;4793*compp += bmp->zbm_compressed_freed_before_next_snap;4794*uncompp += bmp->zbm_uncompressed_freed_before_next_snap;4795} else {4796ASSERT3U(dsl_dataset_phys(snap)->ds_prev_snap_txg, ==,4797bmp->zbm_creation_txg);4798uint64_t used, comp, uncomp;4799dsl_deadlist_space(&snap->ds_deadlist, &used, &comp, &uncomp);4800*usedp += used;4801*compp += comp;4802*uncompp += uncomp;4803}4804if (snap != new)4805dsl_dataset_rele(snap, FTAG);4806return (err);4807}48084809/*4810* Return (in *usedp) the amount of space written in new that was not4811* present at the time the bookmark corresponds to. New may be a4812* snapshot or the head. Old must be a bookmark before new, in4813* new's filesystem (or its origin) -- caller verifies this.4814*/4815int4816dsl_dataset_space_written_bookmark(zfs_bookmark_phys_t *bmp,4817dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)4818{4819if (!(bmp->zbm_flags & ZBM_FLAG_HAS_FBN))4820return (SET_ERROR(ENOTSUP));4821return (dsl_dataset_space_written_impl(bmp, new,4822usedp, compp, uncompp));4823}48244825/*4826* Return (in *usedp) the amount of space written in new that is not4827* present in oldsnap. New may be a snapshot or the head. Old must be4828* a snapshot before new, in new's filesystem (or its origin). If not then4829* fail and return EINVAL.4830*/4831int4832dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,4833uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)4834{4835if (!dsl_dataset_is_before(new, oldsnap, 0))4836return (SET_ERROR(EINVAL));48374838zfs_bookmark_phys_t zbm = { 0 };4839dsl_dataset_phys_t *dsp = dsl_dataset_phys(oldsnap);4840zbm.zbm_guid = dsp->ds_guid;4841zbm.zbm_creation_txg = dsp->ds_creation_txg;4842zbm.zbm_creation_time = dsp->ds_creation_time;4843zbm.zbm_referenced_bytes_refd = dsp->ds_referenced_bytes;4844zbm.zbm_compressed_bytes_refd = dsp->ds_compressed_bytes;4845zbm.zbm_uncompressed_bytes_refd = dsp->ds_uncompressed_bytes;48464847/*4848* If oldsnap is the origin (or origin's origin, ...) of new,4849* we can't easily calculate the effective FBN. Therefore,4850* we do not set ZBM_FLAG_HAS_FBN, so that the _impl will calculate4851* it relative to the correct "next": the next snapshot towards "new",4852* rather than the next snapshot in oldsnap's dsl_dir.4853*/4854return (dsl_dataset_space_written_impl(&zbm, new,4855usedp, compp, uncompp));4856}48574858/*4859* Return (in *usedp) the amount of space that will be reclaimed if firstsnap,4860* lastsnap, and all snapshots in between are deleted.4861*4862* blocks that would be freed [---------------------------]4863* snapshots ---O-------O--------O-------O--------O4864* firstsnap lastsnap4865*4866* This is the set of blocks that were born after the snap before firstsnap,4867* (birth > firstsnap->prev_snap_txg) and died before the snap after the4868* last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).4869* We calculate this by iterating over the relevant deadlists (from the snap4870* after lastsnap, backward to the snap after firstsnap), summing up the4871* space on the deadlist that was born after the snap before firstsnap.4872*/4873int4874dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,4875dsl_dataset_t *lastsnap,4876uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)4877{4878int err = 0;4879uint64_t snapobj;4880dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;48814882ASSERT(firstsnap->ds_is_snapshot);4883ASSERT(lastsnap->ds_is_snapshot);48844885/*4886* Check that the snapshots are in the same dsl_dir, and firstsnap4887* is before lastsnap.4888*/4889if (firstsnap->ds_dir != lastsnap->ds_dir ||4890dsl_dataset_phys(firstsnap)->ds_creation_txg >4891dsl_dataset_phys(lastsnap)->ds_creation_txg)4892return (SET_ERROR(EINVAL));48934894*usedp = *compp = *uncompp = 0;48954896snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj;4897while (snapobj != firstsnap->ds_object) {4898dsl_dataset_t *ds;4899uint64_t used, comp, uncomp;49004901err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);4902if (err != 0)4903break;49044905dsl_deadlist_space_range(&ds->ds_deadlist,4906dsl_dataset_phys(firstsnap)->ds_prev_snap_txg, UINT64_MAX,4907&used, &comp, &uncomp);4908*usedp += used;4909*compp += comp;4910*uncompp += uncomp;49114912snapobj = dsl_dataset_phys(ds)->ds_prev_snap_obj;4913ASSERT3U(snapobj, !=, 0);4914dsl_dataset_rele(ds, FTAG);4915}4916return (err);4917}49184919/*4920* Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.4921* For example, they could both be snapshots of the same filesystem, and4922* 'earlier' is before 'later'. Or 'earlier' could be the origin of4923* 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's4924* filesystem. Or 'earlier' could be the origin's origin.4925*4926* If non-zero, earlier_txg is used instead of earlier's ds_creation_txg.4927*/4928boolean_t4929dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,4930uint64_t earlier_txg)4931{4932dsl_pool_t *dp = later->ds_dir->dd_pool;4933int error;4934boolean_t ret;49354936ASSERT(dsl_pool_config_held(dp));4937ASSERT(earlier->ds_is_snapshot || earlier_txg != 0);49384939if (earlier_txg == 0)4940earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg;49414942if (later->ds_is_snapshot &&4943earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg)4944return (B_FALSE);49454946if (later->ds_dir == earlier->ds_dir)4947return (B_TRUE);49484949/*4950* We check dd_origin_obj explicitly here rather than using4951* dsl_dir_is_clone() so that we will return TRUE if "earlier"4952* is $ORIGIN@$ORIGIN. dsl_dataset_space_written() depends on4953* this behavior.4954*/4955if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == 0)4956return (B_FALSE);49574958dsl_dataset_t *origin;4959error = dsl_dataset_hold_obj(dp,4960dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin);4961if (error != 0)4962return (B_FALSE);4963if (dsl_dataset_phys(origin)->ds_creation_txg == earlier_txg &&4964origin->ds_dir == earlier->ds_dir) {4965dsl_dataset_rele(origin, FTAG);4966return (B_TRUE);4967}4968ret = dsl_dataset_is_before(origin, earlier, earlier_txg);4969dsl_dataset_rele(origin, FTAG);4970return (ret);4971}49724973void4974dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx)4975{4976objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;4977dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx);4978}49794980boolean_t4981dsl_dataset_is_zapified(dsl_dataset_t *ds)4982{4983dmu_object_info_t doi;49844985dmu_object_info_from_db(ds->ds_dbuf, &doi);4986return (doi.doi_type == DMU_OTN_ZAP_METADATA);4987}49884989boolean_t4990dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds)4991{4992return (dsl_dataset_is_zapified(ds) &&4993zap_contains(ds->ds_dir->dd_pool->dp_meta_objset,4994ds->ds_object, DS_FIELD_RESUME_TOGUID) == 0);4995}49964997uint64_t4998dsl_dataset_get_remap_deadlist_object(dsl_dataset_t *ds)4999{5000uint64_t remap_deadlist_obj;5001int err;50025003if (!dsl_dataset_is_zapified(ds))5004return (0);50055006err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_object,5007DS_FIELD_REMAP_DEADLIST, sizeof (remap_deadlist_obj), 1,5008&remap_deadlist_obj);50095010if (err != 0) {5011VERIFY3S(err, ==, ENOENT);5012return (0);5013}50145015ASSERT(remap_deadlist_obj != 0);5016return (remap_deadlist_obj);5017}50185019boolean_t5020dsl_dataset_remap_deadlist_exists(dsl_dataset_t *ds)5021{5022EQUIV(dsl_deadlist_is_open(&ds->ds_remap_deadlist),5023dsl_dataset_get_remap_deadlist_object(ds) != 0);5024return (dsl_deadlist_is_open(&ds->ds_remap_deadlist));5025}50265027static void5028dsl_dataset_set_remap_deadlist_object(dsl_dataset_t *ds, uint64_t obj,5029dmu_tx_t *tx)5030{5031ASSERT(obj != 0);5032dsl_dataset_zapify(ds, tx);5033VERIFY0(zap_add(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_object,5034DS_FIELD_REMAP_DEADLIST, sizeof (obj), 1, &obj, tx));5035}50365037static void5038dsl_dataset_unset_remap_deadlist_object(dsl_dataset_t *ds, dmu_tx_t *tx)5039{5040VERIFY0(zap_remove(ds->ds_dir->dd_pool->dp_meta_objset,5041ds->ds_object, DS_FIELD_REMAP_DEADLIST, tx));5042}50435044void5045dsl_dataset_destroy_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx)5046{5047uint64_t remap_deadlist_object;5048spa_t *spa = ds->ds_dir->dd_pool->dp_spa;50495050ASSERT(dmu_tx_is_syncing(tx));5051ASSERT(dsl_dataset_remap_deadlist_exists(ds));50525053remap_deadlist_object = ds->ds_remap_deadlist.dl_object;5054dsl_deadlist_close(&ds->ds_remap_deadlist);5055dsl_deadlist_free(spa_meta_objset(spa), remap_deadlist_object, tx);5056dsl_dataset_unset_remap_deadlist_object(ds, tx);5057spa_feature_decr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);5058}50595060void5061dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx)5062{5063uint64_t remap_deadlist_obj;5064spa_t *spa = ds->ds_dir->dd_pool->dp_spa;50655066ASSERT(dmu_tx_is_syncing(tx));5067ASSERT(MUTEX_HELD(&ds->ds_remap_deadlist_lock));5068/*5069* Currently we only create remap deadlists when there are indirect5070* vdevs with referenced mappings.5071*/5072ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REMOVAL));50735074remap_deadlist_obj = dsl_deadlist_clone(5075&ds->ds_deadlist, UINT64_MAX,5076dsl_dataset_phys(ds)->ds_prev_snap_obj, tx);5077dsl_dataset_set_remap_deadlist_object(ds,5078remap_deadlist_obj, tx);5079VERIFY0(dsl_deadlist_open(&ds->ds_remap_deadlist, spa_meta_objset(spa),5080remap_deadlist_obj));5081spa_feature_incr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);5082}50835084void5085dsl_dataset_activate_redaction(dsl_dataset_t *ds, uint64_t *redact_snaps,5086uint64_t num_redact_snaps, dmu_tx_t *tx)5087{5088uint64_t dsobj = ds->ds_object;5089struct feature_type_uint64_array_arg *ftuaa =5090kmem_zalloc(sizeof (*ftuaa), KM_SLEEP);5091ftuaa->length = (int64_t)num_redact_snaps;5092if (num_redact_snaps > 0) {5093ftuaa->array = kmem_alloc(num_redact_snaps * sizeof (uint64_t),5094KM_SLEEP);5095memcpy(ftuaa->array, redact_snaps, num_redact_snaps *5096sizeof (uint64_t));5097}5098dsl_dataset_activate_feature(dsobj, SPA_FEATURE_REDACTED_DATASETS,5099ftuaa, tx);5100ds->ds_feature[SPA_FEATURE_REDACTED_DATASETS] = ftuaa;5101}51025103/*5104* Find and return (in *oldest_dsobj) the oldest snapshot of the dsobj5105* dataset whose birth time is >= min_txg.5106*/5107int5108dsl_dataset_oldest_snapshot(spa_t *spa, uint64_t head_ds, uint64_t min_txg,5109uint64_t *oldest_dsobj)5110{5111dsl_dataset_t *ds;5112dsl_pool_t *dp = spa->spa_dsl_pool;51135114int error = dsl_dataset_hold_obj(dp, head_ds, FTAG, &ds);5115if (error != 0)5116return (error);51175118uint64_t prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;5119uint64_t prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;51205121while (prev_obj != 0 && min_txg < prev_obj_txg) {5122dsl_dataset_rele(ds, FTAG);5123if ((error = dsl_dataset_hold_obj(dp, prev_obj,5124FTAG, &ds)) != 0)5125return (error);5126prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;5127prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;5128}5129*oldest_dsobj = ds->ds_object;5130dsl_dataset_rele(ds, FTAG);5131return (0);5132}51335134ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, UINT, ZMOD_RW,5135"Max allowed record size");51365137ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW,5138"Allow mounting of redacted datasets");51395140ZFS_MODULE_PARAM(zfs, zfs_, snapshot_history_enabled, INT, ZMOD_RW,5141"Include snapshot events in pool history/events");51425143EXPORT_SYMBOL(dsl_dataset_hold);5144EXPORT_SYMBOL(dsl_dataset_hold_flags);5145EXPORT_SYMBOL(dsl_dataset_hold_obj);5146EXPORT_SYMBOL(dsl_dataset_hold_obj_flags);5147EXPORT_SYMBOL(dsl_dataset_own);5148EXPORT_SYMBOL(dsl_dataset_own_obj);5149EXPORT_SYMBOL(dsl_dataset_name);5150EXPORT_SYMBOL(dsl_dataset_rele);5151EXPORT_SYMBOL(dsl_dataset_rele_flags);5152EXPORT_SYMBOL(dsl_dataset_disown);5153EXPORT_SYMBOL(dsl_dataset_tryown);5154EXPORT_SYMBOL(dsl_dataset_create_sync);5155EXPORT_SYMBOL(dsl_dataset_create_sync_dd);5156EXPORT_SYMBOL(dsl_dataset_snapshot_check);5157EXPORT_SYMBOL(dsl_dataset_snapshot_sync);5158EXPORT_SYMBOL(dsl_dataset_promote);5159EXPORT_SYMBOL(dsl_dataset_user_hold);5160EXPORT_SYMBOL(dsl_dataset_user_release);5161EXPORT_SYMBOL(dsl_dataset_get_holds);5162EXPORT_SYMBOL(dsl_dataset_get_blkptr);5163EXPORT_SYMBOL(dsl_dataset_get_spa);5164EXPORT_SYMBOL(dsl_dataset_modified_since_snap);5165EXPORT_SYMBOL(dsl_dataset_space_written);5166EXPORT_SYMBOL(dsl_dataset_space_wouldfree);5167EXPORT_SYMBOL(dsl_dataset_sync);5168EXPORT_SYMBOL(dsl_dataset_block_born);5169EXPORT_SYMBOL(dsl_dataset_block_kill);5170EXPORT_SYMBOL(dsl_dataset_dirty);5171EXPORT_SYMBOL(dsl_dataset_stats);5172EXPORT_SYMBOL(dsl_dataset_fast_stat);5173EXPORT_SYMBOL(dsl_dataset_space);5174EXPORT_SYMBOL(dsl_dataset_fsid_guid);5175EXPORT_SYMBOL(dsl_dsobj_to_dsname);5176EXPORT_SYMBOL(dsl_dataset_check_quota);5177EXPORT_SYMBOL(dsl_dataset_clone_swap_check_impl);5178EXPORT_SYMBOL(dsl_dataset_clone_swap_sync_impl);517951805181