Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/openzfs/module/zfs/dsl_dataset.c
48383 views
1
// SPDX-License-Identifier: CDDL-1.0
2
/*
3
* CDDL HEADER START
4
*
5
* The contents of this file are subject to the terms of the
6
* Common Development and Distribution License (the "License").
7
* You may not use this file except in compliance with the License.
8
*
9
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10
* or https://opensource.org/licenses/CDDL-1.0.
11
* See the License for the specific language governing permissions
12
* and limitations under the License.
13
*
14
* When distributing Covered Code, include this CDDL HEADER in each
15
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16
* If applicable, add the following below this CDDL HEADER, with the
17
* fields enclosed by brackets "[]" replaced with your own identifying
18
* information: Portions Copyright [yyyy] [name of copyright owner]
19
*
20
* CDDL HEADER END
21
*/
22
23
/*
24
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
26
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
27
* Copyright (c) 2014 RackTop Systems.
28
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
29
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
30
* Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved.
31
* Copyright 2017 Nexenta Systems, Inc.
32
* Copyright (c) 2019, Klara Inc.
33
* Copyright (c) 2019, Allan Jude
34
* Copyright (c) 2020 The FreeBSD Foundation [1]
35
* Copyright (c) 2025, Rob Norris <[email protected]>
36
*
37
* [1] Portions of this software were developed by Allan Jude
38
* under sponsorship from the FreeBSD Foundation.
39
*/
40
41
#include <sys/dmu_objset.h>
42
#include <sys/dsl_dataset.h>
43
#include <sys/dsl_dir.h>
44
#include <sys/dsl_prop.h>
45
#include <sys/dsl_synctask.h>
46
#include <sys/dmu_traverse.h>
47
#include <sys/dmu_impl.h>
48
#include <sys/dmu_tx.h>
49
#include <sys/arc.h>
50
#include <sys/zio.h>
51
#include <sys/zap.h>
52
#include <sys/zfeature.h>
53
#include <sys/unique.h>
54
#include <sys/zfs_context.h>
55
#include <sys/zfs_ioctl.h>
56
#include <sys/spa.h>
57
#include <sys/spa_impl.h>
58
#include <sys/vdev.h>
59
#include <sys/zfs_znode.h>
60
#include <sys/zfs_onexit.h>
61
#include <sys/zvol.h>
62
#include <sys/dsl_scan.h>
63
#include <sys/dsl_deadlist.h>
64
#include <sys/dsl_destroy.h>
65
#include <sys/dsl_userhold.h>
66
#include <sys/dsl_bookmark.h>
67
#include <sys/policy.h>
68
#include <sys/dmu_send.h>
69
#include <sys/dmu_recv.h>
70
#include <sys/zio_compress.h>
71
#include <zfs_fletcher.h>
72
#include <sys/zio_checksum.h>
73
#include <sys/brt.h>
74
75
/*
76
* The SPA supports block sizes up to 16MB. However, very large blocks
77
* can have an impact on i/o latency (e.g. tying up a spinning disk for
78
* ~300ms), and also potentially on the memory allocator. Therefore,
79
* we did not allow the recordsize to be set larger than zfs_max_recordsize
80
* (former default: 1MB). Larger blocks could be created by changing this
81
* tunable, and pools with larger blocks could always be imported and used,
82
* regardless of this setting.
83
*
84
* We do, however, still limit it by default to 1M on x86_32, because Linux's
85
* 3/1 memory split doesn't leave much room for 16M chunks.
86
*/
87
#ifdef _ILP32
88
uint_t zfs_max_recordsize = 1 * 1024 * 1024;
89
#else
90
uint_t zfs_max_recordsize = 16 * 1024 * 1024;
91
#endif
92
static int zfs_allow_redacted_dataset_mount = 0;
93
94
int zfs_snapshot_history_enabled = 1;
95
96
#define SWITCH64(x, y) \
97
{ \
98
uint64_t __tmp = (x); \
99
(x) = (y); \
100
(y) = __tmp; \
101
}
102
103
#define DS_REF_MAX (1ULL << 62)
104
105
static void dsl_dataset_set_remap_deadlist_object(dsl_dataset_t *ds,
106
uint64_t obj, dmu_tx_t *tx);
107
static void dsl_dataset_unset_remap_deadlist_object(dsl_dataset_t *ds,
108
dmu_tx_t *tx);
109
110
static void unload_zfeature(dsl_dataset_t *ds, spa_feature_t f);
111
112
extern uint_t spa_asize_inflation;
113
114
static zil_header_t zero_zil;
115
116
/*
117
* Figure out how much of this delta should be propagated to the dsl_dir
118
* layer. If there's a refreservation, that space has already been
119
* partially accounted for in our ancestors.
120
*/
121
static int64_t
122
parent_delta(dsl_dataset_t *ds, int64_t delta)
123
{
124
dsl_dataset_phys_t *ds_phys;
125
uint64_t old_bytes, new_bytes;
126
127
if (ds->ds_reserved == 0)
128
return (delta);
129
130
ds_phys = dsl_dataset_phys(ds);
131
old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved);
132
new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
133
134
ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
135
return (new_bytes - old_bytes);
136
}
137
138
void
139
dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
140
{
141
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
142
int used = bp_get_dsize_sync(spa, bp);
143
int compressed = BP_GET_PSIZE(bp);
144
int uncompressed = BP_GET_UCSIZE(bp);
145
int64_t delta;
146
spa_feature_t f;
147
148
dprintf_bp(bp, "ds=%p", ds);
149
150
ASSERT(dmu_tx_is_syncing(tx));
151
/* It could have been compressed away to nothing */
152
if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp))
153
return;
154
ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
155
ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
156
if (ds == NULL) {
157
dsl_pool_mos_diduse_space(tx->tx_pool,
158
used, compressed, uncompressed);
159
return;
160
}
161
162
ASSERT3U(BP_GET_BIRTH(bp), >,
163
dsl_dataset_phys(ds)->ds_prev_snap_txg);
164
dmu_buf_will_dirty(ds->ds_dbuf, tx);
165
mutex_enter(&ds->ds_lock);
166
delta = parent_delta(ds, used);
167
dsl_dataset_phys(ds)->ds_referenced_bytes += used;
168
dsl_dataset_phys(ds)->ds_compressed_bytes += compressed;
169
dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed;
170
dsl_dataset_phys(ds)->ds_unique_bytes += used;
171
172
if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) {
173
ds->ds_feature_activation[SPA_FEATURE_LARGE_BLOCKS] =
174
(void *)B_TRUE;
175
}
176
177
178
f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
179
if (f != SPA_FEATURE_NONE) {
180
ASSERT3S(spa_feature_table[f].fi_type, ==,
181
ZFEATURE_TYPE_BOOLEAN);
182
ds->ds_feature_activation[f] = (void *)B_TRUE;
183
}
184
185
f = zio_compress_to_feature(BP_GET_COMPRESS(bp));
186
if (f != SPA_FEATURE_NONE) {
187
ASSERT3S(spa_feature_table[f].fi_type, ==,
188
ZFEATURE_TYPE_BOOLEAN);
189
ds->ds_feature_activation[f] = (void *)B_TRUE;
190
}
191
192
/*
193
* Track block for livelist, but ignore embedded blocks because
194
* they do not need to be freed.
195
*/
196
if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
197
BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&
198
!(BP_IS_EMBEDDED(bp))) {
199
ASSERT(dsl_dir_is_clone(ds->ds_dir));
200
ASSERT(spa_feature_is_enabled(spa,
201
SPA_FEATURE_LIVELIST));
202
bplist_append(&ds->ds_dir->dd_pending_allocs, bp);
203
}
204
205
mutex_exit(&ds->ds_lock);
206
dsl_dir_diduse_transfer_space(ds->ds_dir, delta,
207
compressed, uncompressed, used,
208
DD_USED_REFRSRV, DD_USED_HEAD, tx);
209
}
210
211
/*
212
* Called when the specified segment has been remapped, and is thus no
213
* longer referenced in the head dataset. The vdev must be indirect.
214
*
215
* If the segment is referenced by a snapshot, put it on the remap deadlist.
216
* Otherwise, add this segment to the obsolete spacemap.
217
*/
218
void
219
dsl_dataset_block_remapped(dsl_dataset_t *ds, uint64_t vdev, uint64_t offset,
220
uint64_t size, uint64_t birth, dmu_tx_t *tx)
221
{
222
spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
223
224
ASSERT(dmu_tx_is_syncing(tx));
225
ASSERT(birth <= tx->tx_txg);
226
ASSERT(!ds->ds_is_snapshot);
227
228
if (birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
229
spa_vdev_indirect_mark_obsolete(spa, vdev, offset, size, tx);
230
} else {
231
blkptr_t fakebp;
232
dva_t *dva = &fakebp.blk_dva[0];
233
234
ASSERT(ds != NULL);
235
236
mutex_enter(&ds->ds_remap_deadlist_lock);
237
if (!dsl_dataset_remap_deadlist_exists(ds)) {
238
dsl_dataset_create_remap_deadlist(ds, tx);
239
}
240
mutex_exit(&ds->ds_remap_deadlist_lock);
241
242
BP_ZERO(&fakebp);
243
BP_SET_LOGICAL_BIRTH(&fakebp, birth);
244
DVA_SET_VDEV(dva, vdev);
245
DVA_SET_OFFSET(dva, offset);
246
DVA_SET_ASIZE(dva, size);
247
dsl_deadlist_insert(&ds->ds_remap_deadlist, &fakebp, B_FALSE,
248
tx);
249
}
250
}
251
252
int
253
dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
254
boolean_t async)
255
{
256
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
257
258
int used = bp_get_dsize_sync(spa, bp);
259
int compressed = BP_GET_PSIZE(bp);
260
int uncompressed = BP_GET_UCSIZE(bp);
261
262
if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp))
263
return (0);
264
265
ASSERT(dmu_tx_is_syncing(tx));
266
ASSERT(BP_GET_BIRTH(bp) <= tx->tx_txg);
267
268
if (ds == NULL) {
269
dsl_free(tx->tx_pool, tx->tx_txg, bp);
270
dsl_pool_mos_diduse_space(tx->tx_pool,
271
-used, -compressed, -uncompressed);
272
return (used);
273
}
274
ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
275
276
ASSERT(!ds->ds_is_snapshot);
277
dmu_buf_will_dirty(ds->ds_dbuf, tx);
278
279
/*
280
* Track block for livelist, but ignore embedded blocks because
281
* they do not need to be freed.
282
*/
283
if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
284
BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&
285
!(BP_IS_EMBEDDED(bp))) {
286
ASSERT(dsl_dir_is_clone(ds->ds_dir));
287
ASSERT(spa_feature_is_enabled(spa,
288
SPA_FEATURE_LIVELIST));
289
bplist_append(&ds->ds_dir->dd_pending_frees, bp);
290
}
291
292
if (BP_GET_BIRTH(bp) > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
293
int64_t delta;
294
295
/*
296
* Put blocks that would create IO on the pool's deadlist for
297
* dsl_process_async_destroys() to find. This is to prevent
298
* zio_free() from creating a ZIO_TYPE_FREE IO for them, which
299
* are very heavy and can lead to out-of-memory conditions if
300
* something tries to free millions of blocks on the same txg.
301
*/
302
boolean_t defer = spa_version(spa) >= SPA_VERSION_DEADLISTS &&
303
(BP_IS_GANG(bp) || BP_GET_DEDUP(bp) ||
304
brt_maybe_exists(spa, bp));
305
306
if (defer) {
307
dprintf_bp(bp, "putting on free list: %s", "");
308
bpobj_enqueue(&ds->ds_dir->dd_pool->dp_free_bpobj,
309
bp, B_FALSE, tx);
310
} else {
311
dprintf_bp(bp, "freeing ds=%llu",
312
(u_longlong_t)ds->ds_object);
313
dsl_free(tx->tx_pool, tx->tx_txg, bp);
314
}
315
316
mutex_enter(&ds->ds_lock);
317
ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used ||
318
!DS_UNIQUE_IS_ACCURATE(ds));
319
delta = parent_delta(ds, -used);
320
dsl_dataset_phys(ds)->ds_unique_bytes -= used;
321
mutex_exit(&ds->ds_lock);
322
323
dsl_dir_diduse_transfer_space(ds->ds_dir,
324
delta, -compressed, -uncompressed, -used,
325
DD_USED_REFRSRV, DD_USED_HEAD, tx);
326
327
if (defer)
328
dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
329
DD_USED_HEAD, used, compressed, uncompressed, tx);
330
} else {
331
dprintf_bp(bp, "putting on dead list: %s", "");
332
if (async) {
333
/*
334
* We are here as part of zio's write done callback,
335
* which means we're a zio interrupt thread. We can't
336
* call dsl_deadlist_insert() now because it may block
337
* waiting for I/O. Instead, put bp on the deferred
338
* queue and let dsl_pool_sync() finish the job.
339
*/
340
bplist_append(&ds->ds_pending_deadlist, bp);
341
} else {
342
dsl_deadlist_insert(&ds->ds_deadlist, bp, B_FALSE, tx);
343
}
344
ASSERT3U(ds->ds_prev->ds_object, ==,
345
dsl_dataset_phys(ds)->ds_prev_snap_obj);
346
ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0);
347
/* if (logical birth > prev prev snap txg) prev unique += bs */
348
if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj ==
349
ds->ds_object && BP_GET_BIRTH(bp) >
350
dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) {
351
dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
352
mutex_enter(&ds->ds_prev->ds_lock);
353
dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used;
354
mutex_exit(&ds->ds_prev->ds_lock);
355
}
356
if (BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg) {
357
dsl_dir_transfer_space(ds->ds_dir, used,
358
DD_USED_HEAD, DD_USED_SNAP, tx);
359
}
360
}
361
362
dsl_bookmark_block_killed(ds, bp, tx);
363
364
mutex_enter(&ds->ds_lock);
365
ASSERT3U(dsl_dataset_phys(ds)->ds_referenced_bytes, >=, used);
366
dsl_dataset_phys(ds)->ds_referenced_bytes -= used;
367
ASSERT3U(dsl_dataset_phys(ds)->ds_compressed_bytes, >=, compressed);
368
dsl_dataset_phys(ds)->ds_compressed_bytes -= compressed;
369
ASSERT3U(dsl_dataset_phys(ds)->ds_uncompressed_bytes, >=, uncompressed);
370
dsl_dataset_phys(ds)->ds_uncompressed_bytes -= uncompressed;
371
mutex_exit(&ds->ds_lock);
372
373
return (used);
374
}
375
376
struct feature_type_uint64_array_arg {
377
uint64_t length;
378
uint64_t *array;
379
};
380
381
static void
382
unload_zfeature(dsl_dataset_t *ds, spa_feature_t f)
383
{
384
switch (spa_feature_table[f].fi_type) {
385
case ZFEATURE_TYPE_BOOLEAN:
386
break;
387
case ZFEATURE_TYPE_UINT64_ARRAY:
388
{
389
struct feature_type_uint64_array_arg *ftuaa = ds->ds_feature[f];
390
kmem_free(ftuaa->array, ftuaa->length * sizeof (uint64_t));
391
kmem_free(ftuaa, sizeof (*ftuaa));
392
break;
393
}
394
default:
395
panic("Invalid zfeature type %d", spa_feature_table[f].fi_type);
396
}
397
}
398
399
static int
400
load_zfeature(objset_t *mos, dsl_dataset_t *ds, spa_feature_t f)
401
{
402
int err = 0;
403
switch (spa_feature_table[f].fi_type) {
404
case ZFEATURE_TYPE_BOOLEAN:
405
err = zap_contains(mos, ds->ds_object,
406
spa_feature_table[f].fi_guid);
407
if (err == 0) {
408
ds->ds_feature[f] = (void *)B_TRUE;
409
} else {
410
ASSERT3U(err, ==, ENOENT);
411
err = 0;
412
}
413
break;
414
case ZFEATURE_TYPE_UINT64_ARRAY:
415
{
416
uint64_t int_size, num_int;
417
uint64_t *data;
418
err = zap_length(mos, ds->ds_object,
419
spa_feature_table[f].fi_guid, &int_size, &num_int);
420
if (err != 0) {
421
ASSERT3U(err, ==, ENOENT);
422
err = 0;
423
break;
424
}
425
ASSERT3U(int_size, ==, sizeof (uint64_t));
426
data = kmem_alloc(int_size * num_int, KM_SLEEP);
427
VERIFY0(zap_lookup(mos, ds->ds_object,
428
spa_feature_table[f].fi_guid, int_size, num_int, data));
429
struct feature_type_uint64_array_arg *ftuaa =
430
kmem_alloc(sizeof (*ftuaa), KM_SLEEP);
431
ftuaa->length = num_int;
432
ftuaa->array = data;
433
ds->ds_feature[f] = ftuaa;
434
break;
435
}
436
default:
437
panic("Invalid zfeature type %d", spa_feature_table[f].fi_type);
438
}
439
return (err);
440
}
441
442
/*
443
* We have to release the fsid synchronously or we risk that a subsequent
444
* mount of the same dataset will fail to unique_insert the fsid. This
445
* failure would manifest itself as the fsid of this dataset changing
446
* between mounts which makes NFS clients quite unhappy.
447
*/
448
static void
449
dsl_dataset_evict_sync(void *dbu)
450
{
451
dsl_dataset_t *ds = dbu;
452
453
ASSERT0P(ds->ds_owner);
454
455
unique_remove(ds->ds_fsid_guid);
456
}
457
458
static void
459
dsl_dataset_evict_async(void *dbu)
460
{
461
dsl_dataset_t *ds = dbu;
462
463
ASSERT0P(ds->ds_owner);
464
465
ds->ds_dbuf = NULL;
466
467
if (ds->ds_objset != NULL)
468
dmu_objset_evict(ds->ds_objset);
469
470
if (ds->ds_prev) {
471
dsl_dataset_rele(ds->ds_prev, ds);
472
ds->ds_prev = NULL;
473
}
474
475
dsl_bookmark_fini_ds(ds);
476
477
bplist_destroy(&ds->ds_pending_deadlist);
478
if (dsl_deadlist_is_open(&ds->ds_deadlist))
479
dsl_deadlist_close(&ds->ds_deadlist);
480
if (dsl_deadlist_is_open(&ds->ds_remap_deadlist))
481
dsl_deadlist_close(&ds->ds_remap_deadlist);
482
if (ds->ds_dir)
483
dsl_dir_async_rele(ds->ds_dir, ds);
484
485
ASSERT(!list_link_active(&ds->ds_synced_link));
486
487
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
488
if (dsl_dataset_feature_is_active(ds, f))
489
unload_zfeature(ds, f);
490
}
491
492
list_destroy(&ds->ds_prop_cbs);
493
mutex_destroy(&ds->ds_lock);
494
mutex_destroy(&ds->ds_opening_lock);
495
mutex_destroy(&ds->ds_sendstream_lock);
496
mutex_destroy(&ds->ds_remap_deadlist_lock);
497
zfs_refcount_destroy(&ds->ds_longholds);
498
rrw_destroy(&ds->ds_bp_rwlock);
499
500
kmem_free(ds, sizeof (dsl_dataset_t));
501
}
502
503
int
504
dsl_dataset_get_snapname(dsl_dataset_t *ds)
505
{
506
dsl_dataset_phys_t *headphys;
507
int err;
508
dmu_buf_t *headdbuf;
509
dsl_pool_t *dp = ds->ds_dir->dd_pool;
510
objset_t *mos = dp->dp_meta_objset;
511
512
if (ds->ds_snapname[0])
513
return (0);
514
if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0)
515
return (0);
516
517
err = dmu_bonus_hold(mos, dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj,
518
FTAG, &headdbuf);
519
if (err != 0)
520
return (err);
521
headphys = headdbuf->db_data;
522
err = zap_value_search(dp->dp_meta_objset,
523
headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname,
524
sizeof (ds->ds_snapname));
525
if (err != 0 && zfs_recover == B_TRUE) {
526
err = 0;
527
(void) snprintf(ds->ds_snapname, sizeof (ds->ds_snapname),
528
"SNAPOBJ=%llu-ERR=%d",
529
(unsigned long long)ds->ds_object, err);
530
}
531
dmu_buf_rele(headdbuf, FTAG);
532
return (err);
533
}
534
535
int
536
dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
537
{
538
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
539
uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;
540
matchtype_t mt = 0;
541
int err;
542
543
if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
544
mt = MT_NORMALIZE;
545
546
err = zap_lookup_norm(mos, snapobj, name, 8, 1,
547
value, mt, NULL, 0, NULL);
548
if (err == ENOTSUP && (mt & MT_NORMALIZE))
549
err = zap_lookup(mos, snapobj, name, 8, 1, value);
550
return (err);
551
}
552
553
int
554
dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx,
555
boolean_t adj_cnt)
556
{
557
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
558
uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;
559
matchtype_t mt = 0;
560
int err;
561
562
dsl_dir_snap_cmtime_update(ds->ds_dir, tx);
563
564
if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
565
mt = MT_NORMALIZE;
566
567
err = zap_remove_norm(mos, snapobj, name, mt, tx);
568
if (err == ENOTSUP && (mt & MT_NORMALIZE))
569
err = zap_remove(mos, snapobj, name, tx);
570
571
if (err == 0 && adj_cnt)
572
dsl_fs_ss_count_adjust(ds->ds_dir, -1,
573
DD_FIELD_SNAPSHOT_COUNT, tx);
574
575
return (err);
576
}
577
578
boolean_t
579
dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, const void *tag)
580
{
581
dmu_buf_t *dbuf = ds->ds_dbuf;
582
boolean_t result = B_FALSE;
583
584
if (dbuf != NULL && dmu_buf_try_add_ref(dbuf, dp->dp_meta_objset,
585
ds->ds_object, DMU_BONUS_BLKID, tag)) {
586
587
if (ds == dmu_buf_get_user(dbuf))
588
result = B_TRUE;
589
else
590
dmu_buf_rele(dbuf, tag);
591
}
592
593
return (result);
594
}
595
596
int
597
dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, const void *tag,
598
dsl_dataset_t **dsp)
599
{
600
objset_t *mos = dp->dp_meta_objset;
601
dmu_buf_t *dbuf;
602
dsl_dataset_t *ds;
603
int err;
604
dmu_object_info_t doi;
605
606
ASSERT(dsl_pool_config_held(dp));
607
608
err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
609
if (err != 0)
610
return (err);
611
612
/* Make sure dsobj has the correct object type. */
613
dmu_object_info_from_db(dbuf, &doi);
614
if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) {
615
dmu_buf_rele(dbuf, tag);
616
return (SET_ERROR(EINVAL));
617
}
618
619
ds = dmu_buf_get_user(dbuf);
620
if (ds == NULL) {
621
dsl_dataset_t *winner = NULL;
622
623
ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
624
ds->ds_dbuf = dbuf;
625
ds->ds_object = dsobj;
626
ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0;
627
list_link_init(&ds->ds_synced_link);
628
629
err = dsl_dir_hold_obj(dp, dsl_dataset_phys(ds)->ds_dir_obj,
630
NULL, ds, &ds->ds_dir);
631
if (err != 0) {
632
kmem_free(ds, sizeof (dsl_dataset_t));
633
dmu_buf_rele(dbuf, tag);
634
return (err);
635
}
636
637
mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
638
mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
639
mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
640
mutex_init(&ds->ds_remap_deadlist_lock,
641
NULL, MUTEX_DEFAULT, NULL);
642
rrw_init(&ds->ds_bp_rwlock, B_FALSE);
643
zfs_refcount_create(&ds->ds_longholds);
644
645
bplist_create(&ds->ds_pending_deadlist);
646
647
list_create(&ds->ds_sendstreams, sizeof (dmu_sendstatus_t),
648
offsetof(dmu_sendstatus_t, dss_link));
649
650
list_create(&ds->ds_prop_cbs, sizeof (dsl_prop_cb_record_t),
651
offsetof(dsl_prop_cb_record_t, cbr_ds_node));
652
653
if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
654
spa_feature_t f;
655
656
for (f = 0; f < SPA_FEATURES; f++) {
657
if (!(spa_feature_table[f].fi_flags &
658
ZFEATURE_FLAG_PER_DATASET))
659
continue;
660
err = load_zfeature(mos, ds, f);
661
}
662
}
663
664
if (!ds->ds_is_snapshot) {
665
ds->ds_snapname[0] = '\0';
666
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
667
err = dsl_dataset_hold_obj(dp,
668
dsl_dataset_phys(ds)->ds_prev_snap_obj,
669
ds, &ds->ds_prev);
670
}
671
if (err != 0)
672
goto after_dsl_bookmark_fini;
673
err = dsl_bookmark_init_ds(ds);
674
} else {
675
if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
676
err = dsl_dataset_get_snapname(ds);
677
if (err == 0 &&
678
dsl_dataset_phys(ds)->ds_userrefs_obj != 0) {
679
err = zap_count(
680
ds->ds_dir->dd_pool->dp_meta_objset,
681
dsl_dataset_phys(ds)->ds_userrefs_obj,
682
&ds->ds_userrefs);
683
}
684
}
685
686
if (err == 0 && !ds->ds_is_snapshot) {
687
err = dsl_prop_get_int_ds(ds,
688
zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
689
&ds->ds_reserved);
690
if (err == 0) {
691
err = dsl_prop_get_int_ds(ds,
692
zfs_prop_to_name(ZFS_PROP_REFQUOTA),
693
&ds->ds_quota);
694
}
695
} else {
696
ds->ds_reserved = ds->ds_quota = 0;
697
}
698
699
if (err == 0 && ds->ds_dir->dd_crypto_obj != 0 &&
700
ds->ds_is_snapshot &&
701
zap_contains(mos, dsobj, DS_FIELD_IVSET_GUID) != 0) {
702
dp->dp_spa->spa_errata =
703
ZPOOL_ERRATA_ZOL_8308_ENCRYPTION;
704
}
705
706
if (err == 0) {
707
err = dsl_deadlist_open(&ds->ds_deadlist,
708
mos, dsl_dataset_phys(ds)->ds_deadlist_obj);
709
}
710
if (err == 0) {
711
uint64_t remap_deadlist_obj =
712
dsl_dataset_get_remap_deadlist_object(ds);
713
if (remap_deadlist_obj != 0) {
714
err = dsl_deadlist_open(&ds->ds_remap_deadlist,
715
mos, remap_deadlist_obj);
716
}
717
}
718
719
dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict_sync,
720
dsl_dataset_evict_async, &ds->ds_dbuf);
721
if (err == 0)
722
winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
723
724
if (err != 0 || winner != NULL) {
725
if (dsl_deadlist_is_open(&ds->ds_deadlist))
726
dsl_deadlist_close(&ds->ds_deadlist);
727
if (dsl_deadlist_is_open(&ds->ds_remap_deadlist))
728
dsl_deadlist_close(&ds->ds_remap_deadlist);
729
dsl_bookmark_fini_ds(ds);
730
after_dsl_bookmark_fini:
731
if (ds->ds_prev)
732
dsl_dataset_rele(ds->ds_prev, ds);
733
dsl_dir_rele(ds->ds_dir, ds);
734
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
735
if (dsl_dataset_feature_is_active(ds, f))
736
unload_zfeature(ds, f);
737
}
738
739
list_destroy(&ds->ds_prop_cbs);
740
list_destroy(&ds->ds_sendstreams);
741
bplist_destroy(&ds->ds_pending_deadlist);
742
mutex_destroy(&ds->ds_lock);
743
mutex_destroy(&ds->ds_opening_lock);
744
mutex_destroy(&ds->ds_sendstream_lock);
745
mutex_destroy(&ds->ds_remap_deadlist_lock);
746
zfs_refcount_destroy(&ds->ds_longholds);
747
rrw_destroy(&ds->ds_bp_rwlock);
748
kmem_free(ds, sizeof (dsl_dataset_t));
749
if (err != 0) {
750
dmu_buf_rele(dbuf, tag);
751
return (err);
752
}
753
ds = winner;
754
} else {
755
ds->ds_fsid_guid =
756
unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid);
757
if (ds->ds_fsid_guid !=
758
dsl_dataset_phys(ds)->ds_fsid_guid) {
759
zfs_dbgmsg("ds_fsid_guid changed from "
760
"%llx to %llx for pool %s dataset id %llu",
761
(long long)
762
dsl_dataset_phys(ds)->ds_fsid_guid,
763
(long long)ds->ds_fsid_guid,
764
spa_name(dp->dp_spa),
765
(u_longlong_t)dsobj);
766
}
767
}
768
}
769
770
ASSERT3P(ds->ds_dbuf, ==, dbuf);
771
ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data);
772
ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 ||
773
spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
774
dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
775
*dsp = ds;
776
777
return (0);
778
}
779
780
int
781
dsl_dataset_create_key_mapping(dsl_dataset_t *ds)
782
{
783
dsl_dir_t *dd = ds->ds_dir;
784
785
if (dd->dd_crypto_obj == 0)
786
return (0);
787
788
return (spa_keystore_create_mapping(dd->dd_pool->dp_spa,
789
ds, ds, &ds->ds_key_mapping));
790
}
791
792
int
793
dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
794
ds_hold_flags_t flags, const void *tag, dsl_dataset_t **dsp)
795
{
796
int err;
797
798
err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
799
if (err != 0)
800
return (err);
801
802
ASSERT3P(*dsp, !=, NULL);
803
804
if (flags & DS_HOLD_FLAG_DECRYPT) {
805
err = dsl_dataset_create_key_mapping(*dsp);
806
if (err != 0)
807
dsl_dataset_rele(*dsp, tag);
808
}
809
810
return (err);
811
}
812
813
int
814
dsl_dataset_hold_flags(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
815
const void *tag, dsl_dataset_t **dsp)
816
{
817
dsl_dir_t *dd;
818
const char *snapname;
819
uint64_t obj;
820
int err = 0;
821
dsl_dataset_t *ds;
822
823
err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname);
824
if (err != 0)
825
return (err);
826
827
ASSERT(dsl_pool_config_held(dp));
828
obj = dsl_dir_phys(dd)->dd_head_dataset_obj;
829
if (obj != 0)
830
err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, &ds);
831
else
832
err = SET_ERROR(ENOENT);
833
834
/* we may be looking for a snapshot */
835
if (err == 0 && snapname != NULL) {
836
dsl_dataset_t *snap_ds;
837
838
if (*snapname++ != '@') {
839
dsl_dataset_rele_flags(ds, flags, tag);
840
dsl_dir_rele(dd, FTAG);
841
return (SET_ERROR(ENOENT));
842
}
843
844
dprintf("looking for snapshot '%s'\n", snapname);
845
err = dsl_dataset_snap_lookup(ds, snapname, &obj);
846
if (err == 0) {
847
err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag,
848
&snap_ds);
849
}
850
dsl_dataset_rele_flags(ds, flags, tag);
851
852
if (err == 0) {
853
mutex_enter(&snap_ds->ds_lock);
854
if (snap_ds->ds_snapname[0] == 0)
855
(void) strlcpy(snap_ds->ds_snapname, snapname,
856
sizeof (snap_ds->ds_snapname));
857
mutex_exit(&snap_ds->ds_lock);
858
ds = snap_ds;
859
}
860
}
861
if (err == 0)
862
*dsp = ds;
863
dsl_dir_rele(dd, FTAG);
864
return (err);
865
}
866
867
int
868
dsl_dataset_hold(dsl_pool_t *dp, const char *name, const void *tag,
869
dsl_dataset_t **dsp)
870
{
871
return (dsl_dataset_hold_flags(dp, name, 0, tag, dsp));
872
}
873
874
static int
875
dsl_dataset_own_obj_impl(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,
876
const void *tag, boolean_t override, dsl_dataset_t **dsp)
877
{
878
int err = dsl_dataset_hold_obj_flags(dp, dsobj, flags, tag, dsp);
879
if (err != 0)
880
return (err);
881
if (!dsl_dataset_tryown(*dsp, tag, override)) {
882
dsl_dataset_rele_flags(*dsp, flags, tag);
883
*dsp = NULL;
884
return (SET_ERROR(EBUSY));
885
}
886
return (0);
887
}
888
889
890
int
891
dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,
892
const void *tag, dsl_dataset_t **dsp)
893
{
894
return (dsl_dataset_own_obj_impl(dp, dsobj, flags, tag, B_FALSE, dsp));
895
}
896
897
int
898
dsl_dataset_own_obj_force(dsl_pool_t *dp, uint64_t dsobj,
899
ds_hold_flags_t flags, const void *tag, dsl_dataset_t **dsp)
900
{
901
return (dsl_dataset_own_obj_impl(dp, dsobj, flags, tag, B_TRUE, dsp));
902
}
903
904
static int
905
dsl_dataset_own_impl(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
906
const void *tag, boolean_t override, dsl_dataset_t **dsp)
907
{
908
int err = dsl_dataset_hold_flags(dp, name, flags, tag, dsp);
909
if (err != 0)
910
return (err);
911
if (!dsl_dataset_tryown(*dsp, tag, override)) {
912
dsl_dataset_rele_flags(*dsp, flags, tag);
913
return (SET_ERROR(EBUSY));
914
}
915
return (0);
916
}
917
918
int
919
dsl_dataset_own_force(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
920
const void *tag, dsl_dataset_t **dsp)
921
{
922
return (dsl_dataset_own_impl(dp, name, flags, tag, B_TRUE, dsp));
923
}
924
925
int
926
dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
927
const void *tag, dsl_dataset_t **dsp)
928
{
929
return (dsl_dataset_own_impl(dp, name, flags, tag, B_FALSE, dsp));
930
}
931
932
/*
933
* See the comment above dsl_pool_hold() for details. In summary, a long
934
* hold is used to prevent destruction of a dataset while the pool hold
935
* is dropped, allowing other concurrent operations (e.g. spa_sync()).
936
*
937
* The dataset and pool must be held when this function is called. After it
938
* is called, the pool hold may be released while the dataset is still held
939
* and accessed.
940
*/
941
void
942
dsl_dataset_long_hold(dsl_dataset_t *ds, const void *tag)
943
{
944
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
945
(void) zfs_refcount_add(&ds->ds_longholds, tag);
946
}
947
948
void
949
dsl_dataset_long_rele(dsl_dataset_t *ds, const void *tag)
950
{
951
(void) zfs_refcount_remove(&ds->ds_longholds, tag);
952
}
953
954
/* Return B_TRUE if there are any long holds on this dataset. */
955
boolean_t
956
dsl_dataset_long_held(dsl_dataset_t *ds)
957
{
958
return (!zfs_refcount_is_zero(&ds->ds_longholds));
959
}
960
961
void
962
dsl_dataset_name(dsl_dataset_t *ds, char *name)
963
{
964
if (ds == NULL) {
965
(void) strlcpy(name, "mos", ZFS_MAX_DATASET_NAME_LEN);
966
} else {
967
dsl_dir_name(ds->ds_dir, name);
968
VERIFY0(dsl_dataset_get_snapname(ds));
969
if (ds->ds_snapname[0]) {
970
VERIFY3U(strlcat(name, "@", ZFS_MAX_DATASET_NAME_LEN),
971
<, ZFS_MAX_DATASET_NAME_LEN);
972
/*
973
* We use a "recursive" mutex so that we
974
* can call dprintf_ds() with ds_lock held.
975
*/
976
if (!MUTEX_HELD(&ds->ds_lock)) {
977
mutex_enter(&ds->ds_lock);
978
VERIFY3U(strlcat(name, ds->ds_snapname,
979
ZFS_MAX_DATASET_NAME_LEN), <,
980
ZFS_MAX_DATASET_NAME_LEN);
981
mutex_exit(&ds->ds_lock);
982
} else {
983
VERIFY3U(strlcat(name, ds->ds_snapname,
984
ZFS_MAX_DATASET_NAME_LEN), <,
985
ZFS_MAX_DATASET_NAME_LEN);
986
}
987
}
988
}
989
}
990
991
int
992
dsl_dataset_namelen(dsl_dataset_t *ds)
993
{
994
VERIFY0(dsl_dataset_get_snapname(ds));
995
mutex_enter(&ds->ds_lock);
996
int len = strlen(ds->ds_snapname);
997
mutex_exit(&ds->ds_lock);
998
/* add '@' if ds is a snap */
999
if (len > 0)
1000
len++;
1001
len += dsl_dir_namelen(ds->ds_dir);
1002
return (len);
1003
}
1004
1005
void
1006
dsl_dataset_rele(dsl_dataset_t *ds, const void *tag)
1007
{
1008
dmu_buf_rele(ds->ds_dbuf, tag);
1009
}
1010
1011
void
1012
dsl_dataset_remove_key_mapping(dsl_dataset_t *ds)
1013
{
1014
dsl_dir_t *dd = ds->ds_dir;
1015
1016
if (dd == NULL || dd->dd_crypto_obj == 0)
1017
return;
1018
1019
(void) spa_keystore_remove_mapping(dd->dd_pool->dp_spa,
1020
ds->ds_object, ds);
1021
}
1022
1023
void
1024
dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,
1025
const void *tag)
1026
{
1027
if (flags & DS_HOLD_FLAG_DECRYPT)
1028
dsl_dataset_remove_key_mapping(ds);
1029
1030
dsl_dataset_rele(ds, tag);
1031
}
1032
1033
void
1034
dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, const void *tag)
1035
{
1036
ASSERT3P(ds->ds_owner, ==, tag);
1037
ASSERT(ds->ds_dbuf != NULL);
1038
1039
mutex_enter(&ds->ds_lock);
1040
ds->ds_owner = NULL;
1041
mutex_exit(&ds->ds_lock);
1042
dsl_dataset_long_rele(ds, tag);
1043
dsl_dataset_rele_flags(ds, flags, tag);
1044
}
1045
1046
boolean_t
1047
dsl_dataset_tryown(dsl_dataset_t *ds, const void *tag, boolean_t override)
1048
{
1049
boolean_t gotit = FALSE;
1050
1051
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
1052
mutex_enter(&ds->ds_lock);
1053
if (ds->ds_owner == NULL && (override || !(DS_IS_INCONSISTENT(ds) ||
1054
(dsl_dataset_feature_is_active(ds,
1055
SPA_FEATURE_REDACTED_DATASETS) &&
1056
!zfs_allow_redacted_dataset_mount)))) {
1057
ds->ds_owner = tag;
1058
dsl_dataset_long_hold(ds, tag);
1059
gotit = TRUE;
1060
}
1061
mutex_exit(&ds->ds_lock);
1062
return (gotit);
1063
}
1064
1065
boolean_t
1066
dsl_dataset_has_owner(dsl_dataset_t *ds)
1067
{
1068
boolean_t rv;
1069
mutex_enter(&ds->ds_lock);
1070
rv = (ds->ds_owner != NULL);
1071
mutex_exit(&ds->ds_lock);
1072
return (rv);
1073
}
1074
1075
static boolean_t
1076
zfeature_active(spa_feature_t f, void *arg)
1077
{
1078
switch (spa_feature_table[f].fi_type) {
1079
case ZFEATURE_TYPE_BOOLEAN: {
1080
boolean_t val = (boolean_t)(uintptr_t)arg;
1081
ASSERT(val == B_FALSE || val == B_TRUE);
1082
return (val);
1083
}
1084
case ZFEATURE_TYPE_UINT64_ARRAY:
1085
/*
1086
* In this case, arg is a uint64_t array. The feature is active
1087
* if the array is non-null.
1088
*/
1089
return (arg != NULL);
1090
default:
1091
panic("Invalid zfeature type %d", spa_feature_table[f].fi_type);
1092
return (B_FALSE);
1093
}
1094
}
1095
1096
boolean_t
1097
dsl_dataset_feature_is_active(dsl_dataset_t *ds, spa_feature_t f)
1098
{
1099
return (zfeature_active(f, ds->ds_feature[f]));
1100
}
1101
1102
/*
1103
* The buffers passed out by this function are references to internal buffers;
1104
* they should not be freed by callers of this function, and they should not be
1105
* used after the dataset has been released.
1106
*/
1107
boolean_t
1108
dsl_dataset_get_uint64_array_feature(dsl_dataset_t *ds, spa_feature_t f,
1109
uint64_t *outlength, uint64_t **outp)
1110
{
1111
VERIFY(spa_feature_table[f].fi_type & ZFEATURE_TYPE_UINT64_ARRAY);
1112
if (!dsl_dataset_feature_is_active(ds, f)) {
1113
return (B_FALSE);
1114
}
1115
struct feature_type_uint64_array_arg *ftuaa = ds->ds_feature[f];
1116
*outp = ftuaa->array;
1117
*outlength = ftuaa->length;
1118
return (B_TRUE);
1119
}
1120
1121
void
1122
dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, void *arg,
1123
dmu_tx_t *tx)
1124
{
1125
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
1126
objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
1127
uint64_t zero = 0;
1128
1129
VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET);
1130
1131
spa_feature_incr(spa, f, tx);
1132
dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
1133
1134
switch (spa_feature_table[f].fi_type) {
1135
case ZFEATURE_TYPE_BOOLEAN:
1136
ASSERT3S((boolean_t)(uintptr_t)arg, ==, B_TRUE);
1137
VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid,
1138
sizeof (zero), 1, &zero, tx));
1139
break;
1140
case ZFEATURE_TYPE_UINT64_ARRAY:
1141
{
1142
struct feature_type_uint64_array_arg *ftuaa = arg;
1143
VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid,
1144
sizeof (uint64_t), ftuaa->length, ftuaa->array, tx));
1145
break;
1146
}
1147
default:
1148
panic("Invalid zfeature type %d", spa_feature_table[f].fi_type);
1149
}
1150
}
1151
1152
static void
1153
dsl_dataset_deactivate_feature_impl(dsl_dataset_t *ds, spa_feature_t f,
1154
dmu_tx_t *tx)
1155
{
1156
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
1157
objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
1158
uint64_t dsobj = ds->ds_object;
1159
1160
VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET);
1161
1162
VERIFY0(zap_remove(mos, dsobj, spa_feature_table[f].fi_guid, tx));
1163
spa_feature_decr(spa, f, tx);
1164
ds->ds_feature[f] = NULL;
1165
}
1166
1167
void
1168
dsl_dataset_deactivate_feature(dsl_dataset_t *ds, spa_feature_t f, dmu_tx_t *tx)
1169
{
1170
unload_zfeature(ds, f);
1171
dsl_dataset_deactivate_feature_impl(ds, f, tx);
1172
}
1173
1174
uint64_t
1175
dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
1176
dsl_crypto_params_t *dcp, uint64_t flags, dmu_tx_t *tx)
1177
{
1178
dsl_pool_t *dp = dd->dd_pool;
1179
dmu_buf_t *dbuf;
1180
dsl_dataset_phys_t *dsphys;
1181
uint64_t dsobj;
1182
objset_t *mos = dp->dp_meta_objset;
1183
1184
if (origin == NULL)
1185
origin = dp->dp_origin_snap;
1186
1187
ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
1188
ASSERT(origin == NULL || dsl_dataset_phys(origin)->ds_num_children > 0);
1189
ASSERT(dmu_tx_is_syncing(tx));
1190
ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj);
1191
1192
dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
1193
DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
1194
VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1195
dmu_buf_will_dirty(dbuf, tx);
1196
dsphys = dbuf->db_data;
1197
memset(dsphys, 0, sizeof (dsl_dataset_phys_t));
1198
dsphys->ds_dir_obj = dd->dd_object;
1199
dsphys->ds_flags = flags;
1200
dsphys->ds_fsid_guid = unique_create();
1201
(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1202
sizeof (dsphys->ds_guid));
1203
dsphys->ds_snapnames_zapobj =
1204
zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
1205
DMU_OT_NONE, 0, tx);
1206
dsphys->ds_creation_time = gethrestime_sec();
1207
dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
1208
1209
if (origin == NULL) {
1210
dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
1211
} else {
1212
dsl_dataset_t *ohds; /* head of the origin snapshot */
1213
1214
dsphys->ds_prev_snap_obj = origin->ds_object;
1215
dsphys->ds_prev_snap_txg =
1216
dsl_dataset_phys(origin)->ds_creation_txg;
1217
dsphys->ds_referenced_bytes =
1218
dsl_dataset_phys(origin)->ds_referenced_bytes;
1219
dsphys->ds_compressed_bytes =
1220
dsl_dataset_phys(origin)->ds_compressed_bytes;
1221
dsphys->ds_uncompressed_bytes =
1222
dsl_dataset_phys(origin)->ds_uncompressed_bytes;
1223
rrw_enter(&origin->ds_bp_rwlock, RW_READER, FTAG);
1224
dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp;
1225
rrw_exit(&origin->ds_bp_rwlock, FTAG);
1226
1227
/*
1228
* Inherit flags that describe the dataset's contents
1229
* (INCONSISTENT) or properties (Case Insensitive).
1230
*/
1231
dsphys->ds_flags |= dsl_dataset_phys(origin)->ds_flags &
1232
(DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET);
1233
1234
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
1235
if (zfeature_active(f, origin->ds_feature[f])) {
1236
dsl_dataset_activate_feature(dsobj, f,
1237
origin->ds_feature[f], tx);
1238
}
1239
}
1240
1241
dmu_buf_will_dirty(origin->ds_dbuf, tx);
1242
dsl_dataset_phys(origin)->ds_num_children++;
1243
1244
VERIFY0(dsl_dataset_hold_obj(dp,
1245
dsl_dir_phys(origin->ds_dir)->dd_head_dataset_obj,
1246
FTAG, &ohds));
1247
dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,
1248
dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);
1249
dsl_dataset_rele(ohds, FTAG);
1250
1251
if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
1252
if (dsl_dataset_phys(origin)->ds_next_clones_obj == 0) {
1253
dsl_dataset_phys(origin)->ds_next_clones_obj =
1254
zap_create(mos,
1255
DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
1256
}
1257
VERIFY0(zap_add_int(mos,
1258
dsl_dataset_phys(origin)->ds_next_clones_obj,
1259
dsobj, tx));
1260
}
1261
1262
dmu_buf_will_dirty(dd->dd_dbuf, tx);
1263
dsl_dir_phys(dd)->dd_origin_obj = origin->ds_object;
1264
if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
1265
if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) {
1266
dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
1267
dsl_dir_phys(origin->ds_dir)->dd_clones =
1268
zap_create(mos,
1269
DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
1270
}
1271
VERIFY0(zap_add_int(mos,
1272
dsl_dir_phys(origin->ds_dir)->dd_clones,
1273
dsobj, tx));
1274
}
1275
}
1276
1277
/* handle encryption */
1278
dsl_dataset_create_crypt_sync(dsobj, dd, origin, dcp, tx);
1279
1280
if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
1281
dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1282
1283
dmu_buf_rele(dbuf, FTAG);
1284
1285
dmu_buf_will_dirty(dd->dd_dbuf, tx);
1286
dsl_dir_phys(dd)->dd_head_dataset_obj = dsobj;
1287
1288
return (dsobj);
1289
}
1290
1291
static void
1292
dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
1293
{
1294
objset_t *os;
1295
1296
VERIFY0(dmu_objset_from_ds(ds, &os));
1297
if (memcmp(&os->os_zil_header, &zero_zil, sizeof (zero_zil)) != 0) {
1298
dsl_pool_t *dp = ds->ds_dir->dd_pool;
1299
zio_t *zio;
1300
1301
memset(&os->os_zil_header, 0, sizeof (os->os_zil_header));
1302
if (os->os_encrypted)
1303
os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE;
1304
1305
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1306
dsl_dataset_sync(ds, zio, tx);
1307
VERIFY0(zio_wait(zio));
1308
dsl_dataset_sync_done(ds, tx);
1309
}
1310
}
1311
1312
uint64_t
1313
dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
1314
dsl_dataset_t *origin, uint64_t flags, cred_t *cr,
1315
dsl_crypto_params_t *dcp, dmu_tx_t *tx)
1316
{
1317
dsl_pool_t *dp = pdd->dd_pool;
1318
uint64_t dsobj, ddobj;
1319
dsl_dir_t *dd;
1320
1321
ASSERT(dmu_tx_is_syncing(tx));
1322
ASSERT(lastname[0] != '@');
1323
/*
1324
* Filesystems will eventually have their origin set to dp_origin_snap,
1325
* but that's taken care of in dsl_dataset_create_sync_dd. When
1326
* creating a filesystem, this function is called with origin equal to
1327
* NULL.
1328
*/
1329
if (origin != NULL)
1330
ASSERT3P(origin, !=, dp->dp_origin_snap);
1331
1332
ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
1333
VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd));
1334
1335
dsobj = dsl_dataset_create_sync_dd(dd, origin, dcp,
1336
flags & ~DS_CREATE_FLAG_NODIRTY, tx);
1337
1338
dsl_deleg_set_create_perms(dd, tx, cr);
1339
1340
/*
1341
* If we are creating a clone and the livelist feature is enabled,
1342
* add the entry DD_FIELD_LIVELIST to ZAP.
1343
*/
1344
if (origin != NULL &&
1345
spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LIVELIST)) {
1346
objset_t *mos = dd->dd_pool->dp_meta_objset;
1347
dsl_dir_zapify(dd, tx);
1348
uint64_t obj = dsl_deadlist_alloc(mos, tx);
1349
VERIFY0(zap_add(mos, dd->dd_object, DD_FIELD_LIVELIST,
1350
sizeof (uint64_t), 1, &obj, tx));
1351
spa_feature_incr(dp->dp_spa, SPA_FEATURE_LIVELIST, tx);
1352
}
1353
1354
/*
1355
* Since we're creating a new node we know it's a leaf, so we can
1356
* initialize the counts if the limit feature is active.
1357
*/
1358
if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) {
1359
uint64_t cnt = 0;
1360
objset_t *os = dd->dd_pool->dp_meta_objset;
1361
1362
dsl_dir_zapify(dd, tx);
1363
VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
1364
sizeof (cnt), 1, &cnt, tx));
1365
VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
1366
sizeof (cnt), 1, &cnt, tx));
1367
}
1368
1369
dsl_dir_rele(dd, FTAG);
1370
1371
/*
1372
* If we are creating a clone, make sure we zero out any stale
1373
* data from the origin snapshots zil header.
1374
*/
1375
if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) {
1376
dsl_dataset_t *ds;
1377
1378
VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
1379
dsl_dataset_zero_zil(ds, tx);
1380
dsl_dataset_rele(ds, FTAG);
1381
}
1382
1383
return (dsobj);
1384
}
1385
1386
/*
1387
* The unique space in the head dataset can be calculated by subtracting
1388
* the space used in the most recent snapshot, that is still being used
1389
* in this file system, from the space currently in use. To figure out
1390
* the space in the most recent snapshot still in use, we need to take
1391
* the total space used in the snapshot and subtract out the space that
1392
* has been freed up since the snapshot was taken.
1393
*/
1394
void
1395
dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
1396
{
1397
uint64_t mrs_used;
1398
uint64_t dlused, dlcomp, dluncomp;
1399
1400
ASSERT(!ds->ds_is_snapshot);
1401
1402
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)
1403
mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;
1404
else
1405
mrs_used = 0;
1406
1407
dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
1408
1409
ASSERT3U(dlused, <=, mrs_used);
1410
dsl_dataset_phys(ds)->ds_unique_bytes =
1411
dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused);
1412
1413
if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
1414
SPA_VERSION_UNIQUE_ACCURATE)
1415
dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1416
}
1417
1418
void
1419
dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
1420
dmu_tx_t *tx)
1421
{
1422
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1423
uint64_t count __maybe_unused;
1424
int err;
1425
1426
ASSERT(dsl_dataset_phys(ds)->ds_num_children >= 2);
1427
err = zap_remove_int(mos, dsl_dataset_phys(ds)->ds_next_clones_obj,
1428
obj, tx);
1429
/*
1430
* The err should not be ENOENT, but a bug in a previous version
1431
* of the code could cause upgrade_clones_cb() to not set
1432
* ds_next_snap_obj when it should, leading to a missing entry.
1433
* If we knew that the pool was created after
1434
* SPA_VERSION_NEXT_CLONES, we could assert that it isn't
1435
* ENOENT. However, at least we can check that we don't have
1436
* too many entries in the next_clones_obj even after failing to
1437
* remove this one.
1438
*/
1439
if (err != ENOENT)
1440
VERIFY0(err);
1441
ASSERT0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj,
1442
&count));
1443
ASSERT3U(count, <=, dsl_dataset_phys(ds)->ds_num_children - 2);
1444
}
1445
1446
1447
blkptr_t *
1448
dsl_dataset_get_blkptr(dsl_dataset_t *ds)
1449
{
1450
return (&dsl_dataset_phys(ds)->ds_bp);
1451
}
1452
1453
spa_t *
1454
dsl_dataset_get_spa(dsl_dataset_t *ds)
1455
{
1456
return (ds->ds_dir->dd_pool->dp_spa);
1457
}
1458
1459
void
1460
dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
1461
{
1462
dsl_pool_t *dp;
1463
1464
if (ds == NULL) /* this is the meta-objset */
1465
return;
1466
1467
ASSERT(ds->ds_objset != NULL);
1468
1469
if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0)
1470
panic("dirtying snapshot!");
1471
1472
/* Must not dirty a dataset in the same txg where it got snapshotted. */
1473
ASSERT3U(tx->tx_txg, >, dsl_dataset_phys(ds)->ds_prev_snap_txg);
1474
1475
dp = ds->ds_dir->dd_pool;
1476
if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
1477
objset_t *os = ds->ds_objset;
1478
1479
/* up the hold count until we can be written out */
1480
dmu_buf_add_ref(ds->ds_dbuf, ds);
1481
1482
/* if this dataset is encrypted, grab a reference to the DCK */
1483
if (ds->ds_dir->dd_crypto_obj != 0 &&
1484
!os->os_raw_receive &&
1485
!os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
1486
ASSERT3P(ds->ds_key_mapping, !=, NULL);
1487
key_mapping_add_ref(ds->ds_key_mapping, ds);
1488
}
1489
}
1490
}
1491
1492
static int
1493
dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
1494
{
1495
uint64_t asize;
1496
1497
if (!dmu_tx_is_syncing(tx))
1498
return (0);
1499
1500
/*
1501
* If there's an fs-only reservation, any blocks that might become
1502
* owned by the snapshot dataset must be accommodated by space
1503
* outside of the reservation.
1504
*/
1505
ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
1506
asize = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, ds->ds_reserved);
1507
if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
1508
return (SET_ERROR(ENOSPC));
1509
1510
/*
1511
* Propagate any reserved space for this snapshot to other
1512
* snapshot checks in this sync group.
1513
*/
1514
if (asize > 0)
1515
dsl_dir_willuse_space(ds->ds_dir, asize, tx);
1516
1517
return (0);
1518
}
1519
1520
int
1521
dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
1522
dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr)
1523
{
1524
int error;
1525
uint64_t value;
1526
1527
ds->ds_trysnap_txg = tx->tx_txg;
1528
1529
if (!dmu_tx_is_syncing(tx))
1530
return (0);
1531
1532
/*
1533
* We don't allow multiple snapshots of the same txg. If there
1534
* is already one, try again.
1535
*/
1536
if (dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg)
1537
return (SET_ERROR(EAGAIN));
1538
1539
/*
1540
* Check for conflicting snapshot name.
1541
*/
1542
error = dsl_dataset_snap_lookup(ds, snapname, &value);
1543
if (error == 0)
1544
return (SET_ERROR(EEXIST));
1545
if (error != ENOENT)
1546
return (error);
1547
1548
/*
1549
* We don't allow taking snapshots of inconsistent datasets, such as
1550
* those into which we are currently receiving. However, if we are
1551
* creating this snapshot as part of a receive, this check will be
1552
* executed atomically with respect to the completion of the receive
1553
* itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this
1554
* case we ignore this, knowing it will be fixed up for us shortly in
1555
* dmu_recv_end_sync().
1556
*/
1557
if (!recv && DS_IS_INCONSISTENT(ds))
1558
return (SET_ERROR(EBUSY));
1559
1560
/*
1561
* Skip the check for temporary snapshots or if we have already checked
1562
* the counts in dsl_dataset_snapshot_check. This means we really only
1563
* check the count here when we're receiving a stream.
1564
*/
1565
if (cnt != 0 && cr != NULL) {
1566
error = dsl_fs_ss_limit_check(ds->ds_dir, cnt,
1567
ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr);
1568
if (error != 0)
1569
return (error);
1570
}
1571
1572
error = dsl_dataset_snapshot_reserve_space(ds, tx);
1573
if (error != 0)
1574
return (error);
1575
1576
return (0);
1577
}
1578
1579
int
1580
dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx)
1581
{
1582
dsl_dataset_snapshot_arg_t *ddsa = arg;
1583
dsl_pool_t *dp = dmu_tx_pool(tx);
1584
nvpair_t *pair;
1585
int rv = 0;
1586
1587
/*
1588
* Pre-compute how many total new snapshots will be created for each
1589
* level in the tree and below. This is needed for validating the
1590
* snapshot limit when either taking a recursive snapshot or when
1591
* taking multiple snapshots.
1592
*
1593
* The problem is that the counts are not actually adjusted when
1594
* we are checking, only when we finally sync. For a single snapshot,
1595
* this is easy, the count will increase by 1 at each node up the tree,
1596
* but its more complicated for the recursive/multiple snapshot case.
1597
*
1598
* The dsl_fs_ss_limit_check function does recursively check the count
1599
* at each level up the tree but since it is validating each snapshot
1600
* independently we need to be sure that we are validating the complete
1601
* count for the entire set of snapshots. We do this by rolling up the
1602
* counts for each component of the name into an nvlist and then
1603
* checking each of those cases with the aggregated count.
1604
*
1605
* This approach properly handles not only the recursive snapshot
1606
* case (where we get all of those on the ddsa_snaps list) but also
1607
* the sibling case (e.g. snapshot a/b and a/c so that we will also
1608
* validate the limit on 'a' using a count of 2).
1609
*
1610
* We validate the snapshot names in the third loop and only report
1611
* name errors once.
1612
*/
1613
if (dmu_tx_is_syncing(tx)) {
1614
char *nm;
1615
nvlist_t *cnt_track = NULL;
1616
cnt_track = fnvlist_alloc();
1617
1618
nm = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1619
1620
/* Rollup aggregated counts into the cnt_track list */
1621
for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
1622
pair != NULL;
1623
pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
1624
char *pdelim;
1625
uint64_t val;
1626
1627
(void) strlcpy(nm, nvpair_name(pair), MAXPATHLEN);
1628
pdelim = strchr(nm, '@');
1629
if (pdelim == NULL)
1630
continue;
1631
*pdelim = '\0';
1632
1633
do {
1634
if (nvlist_lookup_uint64(cnt_track, nm,
1635
&val) == 0) {
1636
/* update existing entry */
1637
fnvlist_add_uint64(cnt_track, nm,
1638
val + 1);
1639
} else {
1640
/* add to list */
1641
fnvlist_add_uint64(cnt_track, nm, 1);
1642
}
1643
1644
pdelim = strrchr(nm, '/');
1645
if (pdelim != NULL)
1646
*pdelim = '\0';
1647
} while (pdelim != NULL);
1648
}
1649
1650
kmem_free(nm, MAXPATHLEN);
1651
1652
/* Check aggregated counts at each level */
1653
for (pair = nvlist_next_nvpair(cnt_track, NULL);
1654
pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) {
1655
int error = 0;
1656
const char *name;
1657
uint64_t cnt = 0;
1658
dsl_dataset_t *ds;
1659
1660
name = nvpair_name(pair);
1661
cnt = fnvpair_value_uint64(pair);
1662
ASSERT(cnt > 0);
1663
1664
error = dsl_dataset_hold(dp, name, FTAG, &ds);
1665
if (error == 0) {
1666
error = dsl_fs_ss_limit_check(ds->ds_dir, cnt,
1667
ZFS_PROP_SNAPSHOT_LIMIT, NULL,
1668
ddsa->ddsa_cr);
1669
dsl_dataset_rele(ds, FTAG);
1670
}
1671
1672
if (error != 0) {
1673
if (ddsa->ddsa_errors != NULL)
1674
fnvlist_add_int32(ddsa->ddsa_errors,
1675
name, error);
1676
rv = error;
1677
/* only report one error for this check */
1678
break;
1679
}
1680
}
1681
nvlist_free(cnt_track);
1682
}
1683
1684
for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
1685
pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
1686
int error = 0;
1687
dsl_dataset_t *ds;
1688
const char *name, *atp = NULL;
1689
char dsname[ZFS_MAX_DATASET_NAME_LEN];
1690
1691
name = nvpair_name(pair);
1692
if (strlen(name) >= ZFS_MAX_DATASET_NAME_LEN)
1693
error = SET_ERROR(ENAMETOOLONG);
1694
if (error == 0) {
1695
atp = strchr(name, '@');
1696
if (atp == NULL)
1697
error = SET_ERROR(EINVAL);
1698
if (error == 0)
1699
(void) strlcpy(dsname, name, atp - name + 1);
1700
}
1701
if (error == 0)
1702
error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
1703
if (error == 0) {
1704
/* passing 0/NULL skips dsl_fs_ss_limit_check */
1705
error = dsl_dataset_snapshot_check_impl(ds,
1706
atp + 1, tx, B_FALSE, 0, NULL);
1707
dsl_dataset_rele(ds, FTAG);
1708
}
1709
1710
if (error != 0) {
1711
if (ddsa->ddsa_errors != NULL) {
1712
fnvlist_add_int32(ddsa->ddsa_errors,
1713
name, error);
1714
}
1715
rv = error;
1716
}
1717
}
1718
1719
return (rv);
1720
}
1721
1722
void
1723
dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
1724
dmu_tx_t *tx)
1725
{
1726
dsl_pool_t *dp = ds->ds_dir->dd_pool;
1727
dmu_buf_t *dbuf;
1728
dsl_dataset_phys_t *dsphys;
1729
uint64_t dsobj, crtxg;
1730
objset_t *mos = dp->dp_meta_objset;
1731
objset_t *os __maybe_unused;
1732
1733
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
1734
1735
/*
1736
* If we are on an old pool, the zil must not be active, in which
1737
* case it will be zeroed. Usually zil_suspend() accomplishes this.
1738
*/
1739
ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP ||
1740
dmu_objset_from_ds(ds, &os) != 0 ||
1741
memcmp(&os->os_phys->os_zil_header, &zero_zil,
1742
sizeof (zero_zil)) == 0);
1743
1744
/* Should not snapshot a dirty dataset. */
1745
ASSERT(!txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
1746
ds, tx->tx_txg));
1747
1748
dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx);
1749
1750
/*
1751
* The origin's ds_creation_txg has to be < TXG_INITIAL
1752
*/
1753
if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
1754
crtxg = 1;
1755
else
1756
crtxg = tx->tx_txg;
1757
1758
dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
1759
DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
1760
VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1761
dmu_buf_will_dirty(dbuf, tx);
1762
dsphys = dbuf->db_data;
1763
memset(dsphys, 0, sizeof (dsl_dataset_phys_t));
1764
dsphys->ds_dir_obj = ds->ds_dir->dd_object;
1765
dsphys->ds_fsid_guid = unique_create();
1766
(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1767
sizeof (dsphys->ds_guid));
1768
dsphys->ds_prev_snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
1769
dsphys->ds_prev_snap_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
1770
dsphys->ds_next_snap_obj = ds->ds_object;
1771
dsphys->ds_num_children = 1;
1772
dsphys->ds_creation_time = gethrestime_sec();
1773
dsphys->ds_creation_txg = crtxg;
1774
dsphys->ds_deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj;
1775
dsphys->ds_referenced_bytes = dsl_dataset_phys(ds)->ds_referenced_bytes;
1776
dsphys->ds_compressed_bytes = dsl_dataset_phys(ds)->ds_compressed_bytes;
1777
dsphys->ds_uncompressed_bytes =
1778
dsl_dataset_phys(ds)->ds_uncompressed_bytes;
1779
dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags;
1780
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
1781
dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp;
1782
rrw_exit(&ds->ds_bp_rwlock, FTAG);
1783
dmu_buf_rele(dbuf, FTAG);
1784
1785
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
1786
if (zfeature_active(f, ds->ds_feature[f])) {
1787
dsl_dataset_activate_feature(dsobj, f,
1788
ds->ds_feature[f], tx);
1789
}
1790
}
1791
1792
ASSERT3U(ds->ds_prev != 0, ==,
1793
dsl_dataset_phys(ds)->ds_prev_snap_obj != 0);
1794
if (ds->ds_prev) {
1795
uint64_t next_clones_obj =
1796
dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj;
1797
ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj ==
1798
ds->ds_object ||
1799
dsl_dataset_phys(ds->ds_prev)->ds_num_children > 1);
1800
if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj ==
1801
ds->ds_object) {
1802
dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1803
ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==,
1804
dsl_dataset_phys(ds->ds_prev)->ds_creation_txg);
1805
dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj = dsobj;
1806
} else if (next_clones_obj != 0) {
1807
dsl_dataset_remove_from_next_clones(ds->ds_prev,
1808
dsphys->ds_next_snap_obj, tx);
1809
VERIFY0(zap_add_int(mos,
1810
next_clones_obj, dsobj, tx));
1811
}
1812
}
1813
1814
/*
1815
* If we have a reference-reservation on this dataset, we will
1816
* need to increase the amount of refreservation being charged
1817
* since our unique space is going to zero.
1818
*/
1819
if (ds->ds_reserved) {
1820
int64_t delta;
1821
ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
1822
delta = MIN(dsl_dataset_phys(ds)->ds_unique_bytes,
1823
ds->ds_reserved);
1824
dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,
1825
delta, 0, 0, tx);
1826
}
1827
1828
dmu_buf_will_dirty(ds->ds_dbuf, tx);
1829
dsl_dataset_phys(ds)->ds_deadlist_obj =
1830
dsl_deadlist_clone(&ds->ds_deadlist, UINT64_MAX,
1831
dsl_dataset_phys(ds)->ds_prev_snap_obj, tx);
1832
dsl_deadlist_close(&ds->ds_deadlist);
1833
VERIFY0(dsl_deadlist_open(&ds->ds_deadlist, mos,
1834
dsl_dataset_phys(ds)->ds_deadlist_obj));
1835
dsl_deadlist_add_key(&ds->ds_deadlist,
1836
dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);
1837
dsl_bookmark_snapshotted(ds, tx);
1838
1839
if (dsl_dataset_remap_deadlist_exists(ds)) {
1840
uint64_t remap_deadlist_obj =
1841
dsl_dataset_get_remap_deadlist_object(ds);
1842
/*
1843
* Move the remap_deadlist to the snapshot. The head
1844
* will create a new remap deadlist on demand, from
1845
* dsl_dataset_block_remapped().
1846
*/
1847
dsl_dataset_unset_remap_deadlist_object(ds, tx);
1848
dsl_deadlist_close(&ds->ds_remap_deadlist);
1849
1850
dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
1851
VERIFY0(zap_add(mos, dsobj, DS_FIELD_REMAP_DEADLIST,
1852
sizeof (remap_deadlist_obj), 1, &remap_deadlist_obj, tx));
1853
}
1854
1855
/*
1856
* Create a ivset guid for this snapshot if the dataset is
1857
* encrypted. This may be overridden by a raw receive. A
1858
* previous implementation of this code did not have this
1859
* field as part of the on-disk format for ZFS encryption
1860
* (see errata #4). As part of the remediation for this
1861
* issue, we ask the user to enable the bookmark_v2 feature
1862
* which is now a dependency of the encryption feature. We
1863
* use this as a heuristic to determine when the user has
1864
* elected to correct any datasets created with the old code.
1865
* As a result, we only do this step if the bookmark_v2
1866
* feature is enabled, which limits the number of states a
1867
* given pool / dataset can be in with regards to terms of
1868
* correcting the issue.
1869
*/
1870
if (ds->ds_dir->dd_crypto_obj != 0 &&
1871
spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARK_V2)) {
1872
uint64_t ivset_guid = unique_create();
1873
1874
dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
1875
VERIFY0(zap_add(mos, dsobj, DS_FIELD_IVSET_GUID,
1876
sizeof (ivset_guid), 1, &ivset_guid, tx));
1877
}
1878
1879
ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, <, tx->tx_txg);
1880
dsl_dataset_phys(ds)->ds_prev_snap_obj = dsobj;
1881
dsl_dataset_phys(ds)->ds_prev_snap_txg = crtxg;
1882
dsl_dataset_phys(ds)->ds_unique_bytes = 0;
1883
1884
if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
1885
dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1886
1887
VERIFY0(zap_add(mos, dsl_dataset_phys(ds)->ds_snapnames_zapobj,
1888
snapname, 8, 1, &dsobj, tx));
1889
1890
if (ds->ds_prev)
1891
dsl_dataset_rele(ds->ds_prev, ds);
1892
VERIFY0(dsl_dataset_hold_obj(dp,
1893
dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev));
1894
1895
dsl_scan_ds_snapshotted(ds, tx);
1896
1897
dsl_dir_snap_cmtime_update(ds->ds_dir, tx);
1898
1899
if (zfs_snapshot_history_enabled)
1900
spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, " ");
1901
}
1902
1903
void
1904
dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)
1905
{
1906
dsl_dataset_snapshot_arg_t *ddsa = arg;
1907
dsl_pool_t *dp = dmu_tx_pool(tx);
1908
nvpair_t *pair;
1909
1910
for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
1911
pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
1912
dsl_dataset_t *ds;
1913
const char *name, *atp;
1914
char dsname[ZFS_MAX_DATASET_NAME_LEN];
1915
1916
name = nvpair_name(pair);
1917
atp = strchr(name, '@');
1918
(void) strlcpy(dsname, name, atp - name + 1);
1919
VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds));
1920
1921
dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx);
1922
if (ddsa->ddsa_props != NULL) {
1923
dsl_props_set_sync_impl(ds->ds_prev,
1924
ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx);
1925
}
1926
dsl_dataset_rele(ds, FTAG);
1927
}
1928
}
1929
1930
/*
1931
* The snapshots must all be in the same pool.
1932
* All-or-nothing: if there are any failures, nothing will be modified.
1933
*/
1934
int
1935
dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
1936
{
1937
dsl_dataset_snapshot_arg_t ddsa;
1938
nvpair_t *pair;
1939
boolean_t needsuspend;
1940
int error;
1941
spa_t *spa;
1942
const char *firstname;
1943
nvlist_t *suspended = NULL;
1944
1945
pair = nvlist_next_nvpair(snaps, NULL);
1946
if (pair == NULL)
1947
return (0);
1948
firstname = nvpair_name(pair);
1949
1950
error = spa_open(firstname, &spa, FTAG);
1951
if (error != 0)
1952
return (error);
1953
needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
1954
spa_close(spa, FTAG);
1955
1956
if (needsuspend) {
1957
suspended = fnvlist_alloc();
1958
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1959
pair = nvlist_next_nvpair(snaps, pair)) {
1960
char fsname[ZFS_MAX_DATASET_NAME_LEN];
1961
const char *snapname = nvpair_name(pair);
1962
const char *atp;
1963
void *cookie;
1964
1965
atp = strchr(snapname, '@');
1966
if (atp == NULL) {
1967
error = SET_ERROR(EINVAL);
1968
break;
1969
}
1970
(void) strlcpy(fsname, snapname, atp - snapname + 1);
1971
1972
error = zil_suspend(fsname, &cookie);
1973
if (error != 0)
1974
break;
1975
fnvlist_add_uint64(suspended, fsname,
1976
(uintptr_t)cookie);
1977
}
1978
}
1979
1980
cred_t *cr = CRED();
1981
crhold(cr);
1982
1983
ddsa.ddsa_snaps = snaps;
1984
ddsa.ddsa_props = props;
1985
ddsa.ddsa_errors = errors;
1986
ddsa.ddsa_cr = cr;
1987
1988
if (error == 0) {
1989
error = dsl_sync_task(firstname, dsl_dataset_snapshot_check,
1990
dsl_dataset_snapshot_sync, &ddsa,
1991
fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL);
1992
}
1993
1994
crfree(cr);
1995
1996
if (suspended != NULL) {
1997
for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL;
1998
pair = nvlist_next_nvpair(suspended, pair)) {
1999
zil_resume((void *)(uintptr_t)
2000
fnvpair_value_uint64(pair));
2001
}
2002
fnvlist_free(suspended);
2003
}
2004
2005
if (error == 0) {
2006
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
2007
pair = nvlist_next_nvpair(snaps, pair)) {
2008
zvol_create_minors(nvpair_name(pair));
2009
}
2010
}
2011
2012
return (error);
2013
}
2014
2015
typedef struct dsl_dataset_snapshot_tmp_arg {
2016
const char *ddsta_fsname;
2017
const char *ddsta_snapname;
2018
minor_t ddsta_cleanup_minor;
2019
const char *ddsta_htag;
2020
} dsl_dataset_snapshot_tmp_arg_t;
2021
2022
static int
2023
dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx)
2024
{
2025
dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;
2026
dsl_pool_t *dp = dmu_tx_pool(tx);
2027
dsl_dataset_t *ds;
2028
int error;
2029
2030
error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds);
2031
if (error != 0)
2032
return (error);
2033
2034
/* NULL cred means no limit check for tmp snapshot */
2035
error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname,
2036
tx, B_FALSE, 0, NULL);
2037
if (error != 0) {
2038
dsl_dataset_rele(ds, FTAG);
2039
return (error);
2040
}
2041
2042
if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) {
2043
dsl_dataset_rele(ds, FTAG);
2044
return (SET_ERROR(ENOTSUP));
2045
}
2046
error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag,
2047
B_TRUE, tx);
2048
if (error != 0) {
2049
dsl_dataset_rele(ds, FTAG);
2050
return (error);
2051
}
2052
2053
dsl_dataset_rele(ds, FTAG);
2054
return (0);
2055
}
2056
2057
static void
2058
dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx)
2059
{
2060
dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;
2061
dsl_pool_t *dp = dmu_tx_pool(tx);
2062
dsl_dataset_t *ds = NULL;
2063
2064
VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds));
2065
2066
dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx);
2067
dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag,
2068
ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx);
2069
dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx);
2070
2071
dsl_dataset_rele(ds, FTAG);
2072
}
2073
2074
int
2075
dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
2076
minor_t cleanup_minor, const char *htag)
2077
{
2078
dsl_dataset_snapshot_tmp_arg_t ddsta;
2079
int error;
2080
spa_t *spa;
2081
boolean_t needsuspend;
2082
void *cookie;
2083
2084
ddsta.ddsta_fsname = fsname;
2085
ddsta.ddsta_snapname = snapname;
2086
ddsta.ddsta_cleanup_minor = cleanup_minor;
2087
ddsta.ddsta_htag = htag;
2088
2089
error = spa_open(fsname, &spa, FTAG);
2090
if (error != 0)
2091
return (error);
2092
needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
2093
spa_close(spa, FTAG);
2094
2095
if (needsuspend) {
2096
error = zil_suspend(fsname, &cookie);
2097
if (error != 0)
2098
return (error);
2099
}
2100
2101
error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check,
2102
dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED);
2103
2104
if (needsuspend)
2105
zil_resume(cookie);
2106
return (error);
2107
}
2108
2109
/* Nonblocking dataset sync. Assumes dataset:objset is always 1:1 */
2110
void
2111
dsl_dataset_sync(dsl_dataset_t *ds, zio_t *rio, dmu_tx_t *tx)
2112
{
2113
ASSERT(dmu_tx_is_syncing(tx));
2114
ASSERT(ds->ds_objset != NULL);
2115
ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj);
2116
2117
/*
2118
* in case we had to change ds_fsid_guid when we opened it,
2119
* sync it out now.
2120
*/
2121
dmu_buf_will_dirty(ds->ds_dbuf, tx);
2122
dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid;
2123
2124
if (ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] != 0) {
2125
VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
2126
ds->ds_object, DS_FIELD_RESUME_OBJECT, 8, 1,
2127
&ds->ds_resume_object[tx->tx_txg & TXG_MASK], tx));
2128
VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
2129
ds->ds_object, DS_FIELD_RESUME_OFFSET, 8, 1,
2130
&ds->ds_resume_offset[tx->tx_txg & TXG_MASK], tx));
2131
VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
2132
ds->ds_object, DS_FIELD_RESUME_BYTES, 8, 1,
2133
&ds->ds_resume_bytes[tx->tx_txg & TXG_MASK], tx));
2134
ds->ds_resume_object[tx->tx_txg & TXG_MASK] = 0;
2135
ds->ds_resume_offset[tx->tx_txg & TXG_MASK] = 0;
2136
ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0;
2137
}
2138
2139
dmu_objset_sync(ds->ds_objset, rio, tx);
2140
}
2141
2142
/*
2143
* Check if the percentage of blocks shared between the clone and the
2144
* snapshot (as opposed to those that are clone only) is below a certain
2145
* threshold
2146
*/
2147
static boolean_t
2148
dsl_livelist_should_disable(dsl_dataset_t *ds)
2149
{
2150
uint64_t used, referenced;
2151
int percent_shared;
2152
2153
used = dsl_dir_get_usedds(ds->ds_dir);
2154
referenced = dsl_get_referenced(ds);
2155
if (referenced == 0)
2156
return (B_FALSE);
2157
percent_shared = (100 * (referenced - used)) / referenced;
2158
if (percent_shared <= zfs_livelist_min_percent_shared)
2159
return (B_TRUE);
2160
return (B_FALSE);
2161
}
2162
2163
/*
2164
* Check if it is possible to combine two livelist entries into one.
2165
* This is the case if the combined number of 'live' blkptrs (ALLOCs that
2166
* don't have a matching FREE) is under the maximum sublist size.
2167
* We check this by subtracting twice the total number of frees from the total
2168
* number of blkptrs. FREEs are counted twice because each FREE blkptr
2169
* will cancel out an ALLOC blkptr when the livelist is processed.
2170
*/
2171
static boolean_t
2172
dsl_livelist_should_condense(dsl_deadlist_entry_t *first,
2173
dsl_deadlist_entry_t *next)
2174
{
2175
uint64_t total_free = first->dle_bpobj.bpo_phys->bpo_num_freed +
2176
next->dle_bpobj.bpo_phys->bpo_num_freed;
2177
uint64_t total_entries = first->dle_bpobj.bpo_phys->bpo_num_blkptrs +
2178
next->dle_bpobj.bpo_phys->bpo_num_blkptrs;
2179
if ((total_entries - (2 * total_free)) < zfs_livelist_max_entries)
2180
return (B_TRUE);
2181
return (B_FALSE);
2182
}
2183
2184
typedef struct try_condense_arg {
2185
spa_t *spa;
2186
dsl_dataset_t *ds;
2187
} try_condense_arg_t;
2188
2189
/*
2190
* Iterate over the livelist entries, searching for a pair to condense.
2191
* A nonzero return value means stop, 0 means keep looking.
2192
*/
2193
static int
2194
dsl_livelist_try_condense(void *arg, dsl_deadlist_entry_t *first)
2195
{
2196
try_condense_arg_t *tca = arg;
2197
spa_t *spa = tca->spa;
2198
dsl_dataset_t *ds = tca->ds;
2199
dsl_deadlist_t *ll = &ds->ds_dir->dd_livelist;
2200
dsl_deadlist_entry_t *next;
2201
2202
/* The condense thread has not yet been created at import */
2203
if (spa->spa_livelist_condense_zthr == NULL)
2204
return (1);
2205
2206
/* A condense is already in progress */
2207
if (spa->spa_to_condense.ds != NULL)
2208
return (1);
2209
2210
next = AVL_NEXT(&ll->dl_tree, &first->dle_node);
2211
/* The livelist has only one entry - don't condense it */
2212
if (next == NULL)
2213
return (1);
2214
2215
/* Next is the newest entry - don't condense it */
2216
if (AVL_NEXT(&ll->dl_tree, &next->dle_node) == NULL)
2217
return (1);
2218
2219
/* This pair is not ready to condense but keep looking */
2220
if (!dsl_livelist_should_condense(first, next))
2221
return (0);
2222
2223
/*
2224
* Add a ref to prevent the dataset from being evicted while
2225
* the condense zthr or synctask are running. Ref will be
2226
* released at the end of the condense synctask
2227
*/
2228
dmu_buf_add_ref(ds->ds_dbuf, spa);
2229
2230
spa->spa_to_condense.ds = ds;
2231
spa->spa_to_condense.first = first;
2232
spa->spa_to_condense.next = next;
2233
spa->spa_to_condense.syncing = B_FALSE;
2234
spa->spa_to_condense.cancelled = B_FALSE;
2235
2236
zthr_wakeup(spa->spa_livelist_condense_zthr);
2237
return (1);
2238
}
2239
2240
static void
2241
dsl_flush_pending_livelist(dsl_dataset_t *ds, dmu_tx_t *tx)
2242
{
2243
dsl_dir_t *dd = ds->ds_dir;
2244
spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
2245
dsl_deadlist_entry_t *last = dsl_deadlist_last(&dd->dd_livelist);
2246
2247
/* Check if we need to add a new sub-livelist */
2248
if (last == NULL) {
2249
/* The livelist is empty */
2250
dsl_deadlist_add_key(&dd->dd_livelist,
2251
tx->tx_txg - 1, tx);
2252
} else if (spa_sync_pass(spa) == 1) {
2253
/*
2254
* Check if the newest entry is full. If it is, make a new one.
2255
* We only do this once per sync because we could overfill a
2256
* sublist in one sync pass and don't want to add another entry
2257
* for a txg that is already represented. This ensures that
2258
* blkptrs born in the same txg are stored in the same sublist.
2259
*/
2260
bpobj_t bpobj = last->dle_bpobj;
2261
uint64_t all = bpobj.bpo_phys->bpo_num_blkptrs;
2262
uint64_t free = bpobj.bpo_phys->bpo_num_freed;
2263
uint64_t alloc = all - free;
2264
if (alloc > zfs_livelist_max_entries) {
2265
dsl_deadlist_add_key(&dd->dd_livelist,
2266
tx->tx_txg - 1, tx);
2267
}
2268
}
2269
2270
/* Insert each entry into the on-disk livelist */
2271
bplist_iterate(&dd->dd_pending_allocs,
2272
dsl_deadlist_insert_alloc_cb, &dd->dd_livelist, tx);
2273
bplist_iterate(&dd->dd_pending_frees,
2274
dsl_deadlist_insert_free_cb, &dd->dd_livelist, tx);
2275
2276
/* Attempt to condense every pair of adjacent entries */
2277
try_condense_arg_t arg = {
2278
.spa = spa,
2279
.ds = ds
2280
};
2281
dsl_deadlist_iterate(&dd->dd_livelist, dsl_livelist_try_condense,
2282
&arg);
2283
}
2284
2285
void
2286
dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
2287
{
2288
objset_t *os = ds->ds_objset;
2289
2290
bplist_iterate(&ds->ds_pending_deadlist,
2291
dsl_deadlist_insert_alloc_cb, &ds->ds_deadlist, tx);
2292
2293
if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist)) {
2294
dsl_flush_pending_livelist(ds, tx);
2295
if (dsl_livelist_should_disable(ds)) {
2296
dsl_dir_remove_livelist(ds->ds_dir, tx, B_TRUE);
2297
}
2298
}
2299
2300
dsl_bookmark_sync_done(ds, tx);
2301
2302
multilist_destroy(&os->os_synced_dnodes);
2303
2304
if (os->os_encrypted)
2305
os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;
2306
else
2307
ASSERT0(os->os_next_write_raw[tx->tx_txg & TXG_MASK]);
2308
2309
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
2310
if (zfeature_active(f,
2311
ds->ds_feature_activation[f])) {
2312
if (zfeature_active(f, ds->ds_feature[f]))
2313
continue;
2314
dsl_dataset_activate_feature(ds->ds_object, f,
2315
ds->ds_feature_activation[f], tx);
2316
ds->ds_feature[f] = ds->ds_feature_activation[f];
2317
}
2318
}
2319
2320
ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
2321
}
2322
2323
int
2324
get_clones_stat_impl(dsl_dataset_t *ds, nvlist_t *val)
2325
{
2326
uint64_t count = 0;
2327
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
2328
zap_cursor_t zc;
2329
zap_attribute_t *za;
2330
2331
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
2332
2333
/*
2334
* There may be missing entries in ds_next_clones_obj
2335
* due to a bug in a previous version of the code.
2336
* Only trust it if it has the right number of entries.
2337
*/
2338
if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) {
2339
VERIFY0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj,
2340
&count));
2341
}
2342
if (count != dsl_dataset_phys(ds)->ds_num_children - 1) {
2343
return (SET_ERROR(ENOENT));
2344
}
2345
2346
za = zap_attribute_alloc();
2347
for (zap_cursor_init(&zc, mos,
2348
dsl_dataset_phys(ds)->ds_next_clones_obj);
2349
zap_cursor_retrieve(&zc, za) == 0;
2350
zap_cursor_advance(&zc)) {
2351
dsl_dataset_t *clone;
2352
char buf[ZFS_MAX_DATASET_NAME_LEN];
2353
VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
2354
za->za_first_integer, FTAG, &clone));
2355
dsl_dir_name(clone->ds_dir, buf);
2356
fnvlist_add_boolean(val, buf);
2357
dsl_dataset_rele(clone, FTAG);
2358
}
2359
zap_cursor_fini(&zc);
2360
zap_attribute_free(za);
2361
return (0);
2362
}
2363
2364
void
2365
get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
2366
{
2367
nvlist_t *propval = fnvlist_alloc();
2368
nvlist_t *val = fnvlist_alloc();
2369
2370
if (get_clones_stat_impl(ds, val) == 0) {
2371
fnvlist_add_nvlist(propval, ZPROP_VALUE, val);
2372
fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
2373
propval);
2374
}
2375
2376
nvlist_free(val);
2377
nvlist_free(propval);
2378
}
2379
2380
static char *
2381
get_receive_resume_token_impl(dsl_dataset_t *ds)
2382
{
2383
if (!dsl_dataset_has_resume_receive_state(ds))
2384
return (NULL);
2385
2386
dsl_pool_t *dp = ds->ds_dir->dd_pool;
2387
char *str;
2388
void *packed;
2389
uint8_t *compressed;
2390
uint64_t val;
2391
nvlist_t *token_nv = fnvlist_alloc();
2392
size_t packed_size, compressed_size;
2393
2394
if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
2395
DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val) == 0) {
2396
fnvlist_add_uint64(token_nv, "fromguid", val);
2397
}
2398
if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
2399
DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val) == 0) {
2400
fnvlist_add_uint64(token_nv, "object", val);
2401
}
2402
if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
2403
DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val) == 0) {
2404
fnvlist_add_uint64(token_nv, "offset", val);
2405
}
2406
if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
2407
DS_FIELD_RESUME_BYTES, sizeof (val), 1, &val) == 0) {
2408
fnvlist_add_uint64(token_nv, "bytes", val);
2409
}
2410
if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
2411
DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val) == 0) {
2412
fnvlist_add_uint64(token_nv, "toguid", val);
2413
}
2414
char buf[MAXNAMELEN];
2415
if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
2416
DS_FIELD_RESUME_TONAME, 1, sizeof (buf), buf) == 0) {
2417
fnvlist_add_string(token_nv, "toname", buf);
2418
}
2419
if (zap_contains(dp->dp_meta_objset, ds->ds_object,
2420
DS_FIELD_RESUME_LARGEBLOCK) == 0) {
2421
fnvlist_add_boolean(token_nv, "largeblockok");
2422
}
2423
if (zap_contains(dp->dp_meta_objset, ds->ds_object,
2424
DS_FIELD_RESUME_EMBEDOK) == 0) {
2425
fnvlist_add_boolean(token_nv, "embedok");
2426
}
2427
if (zap_contains(dp->dp_meta_objset, ds->ds_object,
2428
DS_FIELD_RESUME_COMPRESSOK) == 0) {
2429
fnvlist_add_boolean(token_nv, "compressok");
2430
}
2431
if (zap_contains(dp->dp_meta_objset, ds->ds_object,
2432
DS_FIELD_RESUME_RAWOK) == 0) {
2433
fnvlist_add_boolean(token_nv, "rawok");
2434
}
2435
if (dsl_dataset_feature_is_active(ds,
2436
SPA_FEATURE_REDACTED_DATASETS)) {
2437
uint64_t num_redact_snaps = 0;
2438
uint64_t *redact_snaps = NULL;
2439
VERIFY3B(dsl_dataset_get_uint64_array_feature(ds,
2440
SPA_FEATURE_REDACTED_DATASETS, &num_redact_snaps,
2441
&redact_snaps), ==, B_TRUE);
2442
fnvlist_add_uint64_array(token_nv, "redact_snaps",
2443
redact_snaps, num_redact_snaps);
2444
}
2445
if (zap_contains(dp->dp_meta_objset, ds->ds_object,
2446
DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS) == 0) {
2447
uint64_t num_redact_snaps = 0, int_size = 0;
2448
uint64_t *redact_snaps = NULL;
2449
VERIFY0(zap_length(dp->dp_meta_objset, ds->ds_object,
2450
DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS, &int_size,
2451
&num_redact_snaps));
2452
ASSERT3U(int_size, ==, sizeof (uint64_t));
2453
2454
redact_snaps = kmem_alloc(int_size * num_redact_snaps,
2455
KM_SLEEP);
2456
VERIFY0(zap_lookup(dp->dp_meta_objset, ds->ds_object,
2457
DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS, int_size,
2458
num_redact_snaps, redact_snaps));
2459
fnvlist_add_uint64_array(token_nv, "book_redact_snaps",
2460
redact_snaps, num_redact_snaps);
2461
kmem_free(redact_snaps, int_size * num_redact_snaps);
2462
}
2463
packed = fnvlist_pack(token_nv, &packed_size);
2464
fnvlist_free(token_nv);
2465
compressed = kmem_alloc(packed_size, KM_SLEEP);
2466
2467
/* Call compress function directly to avoid hole detection. */
2468
abd_t pabd, cabd;
2469
abd_get_from_buf_struct(&pabd, packed, packed_size);
2470
abd_get_from_buf_struct(&cabd, compressed, packed_size);
2471
compressed_size = zfs_gzip_compress(&pabd, &cabd,
2472
packed_size, packed_size, 6);
2473
abd_free(&cabd);
2474
abd_free(&pabd);
2475
2476
zio_cksum_t cksum;
2477
fletcher_4_native_varsize(compressed, compressed_size, &cksum);
2478
2479
size_t alloc_size = compressed_size * 2 + 1;
2480
str = kmem_alloc(alloc_size, KM_SLEEP);
2481
for (int i = 0; i < compressed_size; i++) {
2482
size_t offset = i * 2;
2483
(void) snprintf(str + offset, alloc_size - offset,
2484
"%02x", compressed[i]);
2485
}
2486
str[compressed_size * 2] = '\0';
2487
char *propval = kmem_asprintf("%u-%llx-%llx-%s",
2488
ZFS_SEND_RESUME_TOKEN_VERSION,
2489
(longlong_t)cksum.zc_word[0],
2490
(longlong_t)packed_size, str);
2491
kmem_free(packed, packed_size);
2492
kmem_free(str, alloc_size);
2493
kmem_free(compressed, packed_size);
2494
return (propval);
2495
}
2496
2497
/*
2498
* Returns a string that represents the receive resume state token. It should
2499
* be freed with strfree(). NULL is returned if no resume state is present.
2500
*/
2501
char *
2502
get_receive_resume_token(dsl_dataset_t *ds)
2503
{
2504
/*
2505
* A failed "newfs" (e.g. full) resumable receive leaves
2506
* the stats set on this dataset. Check here for the prop.
2507
*/
2508
char *token = get_receive_resume_token_impl(ds);
2509
if (token != NULL)
2510
return (token);
2511
/*
2512
* A failed incremental resumable receive leaves the
2513
* stats set on our child named "%recv". Check the child
2514
* for the prop.
2515
*/
2516
/* 6 extra bytes for /%recv */
2517
char name[ZFS_MAX_DATASET_NAME_LEN + 6];
2518
dsl_dataset_t *recv_ds;
2519
dsl_dataset_name(ds, name);
2520
if (strlcat(name, "/", sizeof (name)) < sizeof (name) &&
2521
strlcat(name, recv_clone_name, sizeof (name)) < sizeof (name) &&
2522
dsl_dataset_hold(ds->ds_dir->dd_pool, name, FTAG, &recv_ds) == 0) {
2523
token = get_receive_resume_token_impl(recv_ds);
2524
dsl_dataset_rele(recv_ds, FTAG);
2525
}
2526
return (token);
2527
}
2528
2529
uint64_t
2530
dsl_get_refratio(dsl_dataset_t *ds)
2531
{
2532
uint64_t ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 :
2533
(dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 /
2534
dsl_dataset_phys(ds)->ds_compressed_bytes);
2535
return (ratio);
2536
}
2537
2538
uint64_t
2539
dsl_get_logicalreferenced(dsl_dataset_t *ds)
2540
{
2541
return (dsl_dataset_phys(ds)->ds_uncompressed_bytes);
2542
}
2543
2544
uint64_t
2545
dsl_get_compressratio(dsl_dataset_t *ds)
2546
{
2547
if (ds->ds_is_snapshot) {
2548
return (dsl_get_refratio(ds));
2549
} else {
2550
dsl_dir_t *dd = ds->ds_dir;
2551
mutex_enter(&dd->dd_lock);
2552
uint64_t val = dsl_dir_get_compressratio(dd);
2553
mutex_exit(&dd->dd_lock);
2554
return (val);
2555
}
2556
}
2557
2558
uint64_t
2559
dsl_get_used(dsl_dataset_t *ds)
2560
{
2561
if (ds->ds_is_snapshot) {
2562
return (dsl_dataset_phys(ds)->ds_unique_bytes);
2563
} else {
2564
dsl_dir_t *dd = ds->ds_dir;
2565
mutex_enter(&dd->dd_lock);
2566
uint64_t val = dsl_dir_get_used(dd);
2567
mutex_exit(&dd->dd_lock);
2568
return (val);
2569
}
2570
}
2571
2572
uint64_t
2573
dsl_get_creation(dsl_dataset_t *ds)
2574
{
2575
return (dsl_dataset_phys(ds)->ds_creation_time);
2576
}
2577
2578
uint64_t
2579
dsl_get_creationtxg(dsl_dataset_t *ds)
2580
{
2581
return (dsl_dataset_phys(ds)->ds_creation_txg);
2582
}
2583
2584
uint64_t
2585
dsl_get_refquota(dsl_dataset_t *ds)
2586
{
2587
return (ds->ds_quota);
2588
}
2589
2590
uint64_t
2591
dsl_get_refreservation(dsl_dataset_t *ds)
2592
{
2593
return (ds->ds_reserved);
2594
}
2595
2596
uint64_t
2597
dsl_get_guid(dsl_dataset_t *ds)
2598
{
2599
return (dsl_dataset_phys(ds)->ds_guid);
2600
}
2601
2602
uint64_t
2603
dsl_get_unique(dsl_dataset_t *ds)
2604
{
2605
return (dsl_dataset_phys(ds)->ds_unique_bytes);
2606
}
2607
2608
uint64_t
2609
dsl_get_objsetid(dsl_dataset_t *ds)
2610
{
2611
return (ds->ds_object);
2612
}
2613
2614
uint64_t
2615
dsl_get_userrefs(dsl_dataset_t *ds)
2616
{
2617
return (ds->ds_userrefs);
2618
}
2619
2620
uint64_t
2621
dsl_get_defer_destroy(dsl_dataset_t *ds)
2622
{
2623
return (DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
2624
}
2625
2626
uint64_t
2627
dsl_get_referenced(dsl_dataset_t *ds)
2628
{
2629
return (dsl_dataset_phys(ds)->ds_referenced_bytes);
2630
}
2631
2632
uint64_t
2633
dsl_get_numclones(dsl_dataset_t *ds)
2634
{
2635
ASSERT(ds->ds_is_snapshot);
2636
return (dsl_dataset_phys(ds)->ds_num_children - 1);
2637
}
2638
2639
uint64_t
2640
dsl_get_inconsistent(dsl_dataset_t *ds)
2641
{
2642
return ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT) ?
2643
1 : 0);
2644
}
2645
2646
uint64_t
2647
dsl_get_redacted(dsl_dataset_t *ds)
2648
{
2649
return (dsl_dataset_feature_is_active(ds,
2650
SPA_FEATURE_REDACTED_DATASETS));
2651
}
2652
2653
uint64_t
2654
dsl_get_available(dsl_dataset_t *ds)
2655
{
2656
uint64_t refdbytes = dsl_get_referenced(ds);
2657
uint64_t availbytes = dsl_dir_space_available(ds->ds_dir,
2658
NULL, 0, TRUE);
2659
if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) {
2660
availbytes +=
2661
ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes;
2662
}
2663
if (ds->ds_quota != 0) {
2664
/*
2665
* Adjust available bytes according to refquota
2666
*/
2667
if (refdbytes < ds->ds_quota) {
2668
availbytes = MIN(availbytes,
2669
ds->ds_quota - refdbytes);
2670
} else {
2671
availbytes = 0;
2672
}
2673
}
2674
return (availbytes);
2675
}
2676
2677
int
2678
dsl_get_written(dsl_dataset_t *ds, uint64_t *written)
2679
{
2680
dsl_pool_t *dp = ds->ds_dir->dd_pool;
2681
dsl_dataset_t *prev;
2682
int err = dsl_dataset_hold_obj(dp,
2683
dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev);
2684
if (err == 0) {
2685
uint64_t comp, uncomp;
2686
err = dsl_dataset_space_written(prev, ds, written,
2687
&comp, &uncomp);
2688
dsl_dataset_rele(prev, FTAG);
2689
}
2690
return (err);
2691
}
2692
2693
/*
2694
* 'snap' should be a buffer of size ZFS_MAX_DATASET_NAME_LEN.
2695
*/
2696
int
2697
dsl_get_prev_snap(dsl_dataset_t *ds, char *snap)
2698
{
2699
dsl_pool_t *dp = ds->ds_dir->dd_pool;
2700
if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {
2701
dsl_dataset_name(ds->ds_prev, snap);
2702
return (0);
2703
} else {
2704
return (SET_ERROR(ENOENT));
2705
}
2706
}
2707
2708
void
2709
dsl_get_redact_snaps(dsl_dataset_t *ds, nvlist_t *propval)
2710
{
2711
uint64_t nsnaps;
2712
uint64_t *snaps;
2713
if (dsl_dataset_get_uint64_array_feature(ds,
2714
SPA_FEATURE_REDACTED_DATASETS, &nsnaps, &snaps)) {
2715
fnvlist_add_uint64_array(propval, ZPROP_VALUE, snaps,
2716
nsnaps);
2717
}
2718
}
2719
2720
/*
2721
* Returns the mountpoint property and source for the given dataset in the value
2722
* and source buffers. The value buffer must be at least as large as MAXPATHLEN
2723
* and the source buffer as least as large a ZFS_MAX_DATASET_NAME_LEN.
2724
* Returns 0 on success and an error on failure.
2725
*/
2726
int
2727
dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value,
2728
char *source)
2729
{
2730
int error;
2731
dsl_pool_t *dp = ds->ds_dir->dd_pool;
2732
2733
/* Retrieve the mountpoint value stored in the zap object */
2734
error = dsl_prop_get_ds(ds, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT), 1,
2735
ZAP_MAXVALUELEN, value, source);
2736
if (error != 0) {
2737
return (error);
2738
}
2739
2740
/*
2741
* Process the dsname and source to find the full mountpoint string.
2742
* Can be skipped for 'legacy' or 'none'.
2743
*/
2744
if (value[0] == '/') {
2745
char *buf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
2746
char *root = buf;
2747
const char *relpath;
2748
2749
/*
2750
* If we inherit the mountpoint, even from a dataset
2751
* with a received value, the source will be the path of
2752
* the dataset we inherit from. If source is
2753
* ZPROP_SOURCE_VAL_RECVD, the received value is not
2754
* inherited.
2755
*/
2756
if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) {
2757
relpath = "";
2758
} else {
2759
ASSERT0(strncmp(dsname, source, strlen(source)));
2760
relpath = dsname + strlen(source);
2761
if (relpath[0] == '/')
2762
relpath++;
2763
}
2764
2765
spa_altroot(dp->dp_spa, root, ZAP_MAXVALUELEN);
2766
2767
/*
2768
* Special case an alternate root of '/'. This will
2769
* avoid having multiple leading slashes in the
2770
* mountpoint path.
2771
*/
2772
if (strcmp(root, "/") == 0)
2773
root++;
2774
2775
/*
2776
* If the mountpoint is '/' then skip over this
2777
* if we are obtaining either an alternate root or
2778
* an inherited mountpoint.
2779
*/
2780
char *mnt = value;
2781
if (value[1] == '\0' && (root[0] != '\0' ||
2782
relpath[0] != '\0'))
2783
mnt = value + 1;
2784
2785
mnt = kmem_strdup(mnt);
2786
2787
if (relpath[0] == '\0') {
2788
(void) snprintf(value, ZAP_MAXVALUELEN, "%s%s",
2789
root, mnt);
2790
} else {
2791
(void) snprintf(value, ZAP_MAXVALUELEN, "%s%s%s%s",
2792
root, mnt, relpath[0] == '@' ? "" : "/",
2793
relpath);
2794
}
2795
kmem_free(buf, ZAP_MAXVALUELEN);
2796
kmem_strfree(mnt);
2797
}
2798
2799
return (0);
2800
}
2801
2802
void
2803
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
2804
{
2805
dsl_pool_t *dp __maybe_unused = ds->ds_dir->dd_pool;
2806
2807
ASSERT(dsl_pool_config_held(dp));
2808
2809
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO,
2810
dsl_get_refratio(ds));
2811
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
2812
dsl_get_logicalreferenced(ds));
2813
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
2814
dsl_get_compressratio(ds));
2815
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
2816
dsl_get_used(ds));
2817
2818
if (ds->ds_is_snapshot) {
2819
get_clones_stat(ds, nv);
2820
} else {
2821
char buf[ZFS_MAX_DATASET_NAME_LEN];
2822
if (dsl_get_prev_snap(ds, buf) == 0)
2823
dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP,
2824
buf);
2825
dsl_dir_stats(ds->ds_dir, nv);
2826
}
2827
2828
nvlist_t *propval = fnvlist_alloc();
2829
dsl_get_redact_snaps(ds, propval);
2830
fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS),
2831
propval);
2832
nvlist_free(propval);
2833
2834
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE,
2835
dsl_get_available(ds));
2836
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED,
2837
dsl_get_referenced(ds));
2838
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
2839
dsl_get_creation(ds));
2840
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
2841
dsl_get_creationtxg(ds));
2842
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
2843
dsl_get_refquota(ds));
2844
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
2845
dsl_get_refreservation(ds));
2846
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
2847
dsl_get_guid(ds));
2848
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE,
2849
dsl_get_unique(ds));
2850
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID,
2851
dsl_get_objsetid(ds));
2852
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,
2853
dsl_get_userrefs(ds));
2854
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
2855
dsl_get_defer_destroy(ds));
2856
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_SNAPSHOTS_CHANGED,
2857
dsl_dir_snap_cmtime(ds->ds_dir).tv_sec);
2858
dsl_dataset_crypt_stats(ds, nv);
2859
2860
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
2861
uint64_t written;
2862
if (dsl_get_written(ds, &written) == 0) {
2863
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
2864
written);
2865
}
2866
}
2867
2868
if (!dsl_dataset_is_snapshot(ds)) {
2869
char *token = get_receive_resume_token(ds);
2870
if (token != NULL) {
2871
dsl_prop_nvlist_add_string(nv,
2872
ZFS_PROP_RECEIVE_RESUME_TOKEN, token);
2873
kmem_strfree(token);
2874
}
2875
}
2876
}
2877
2878
void
2879
dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
2880
{
2881
dsl_pool_t *dp __maybe_unused = ds->ds_dir->dd_pool;
2882
ASSERT(dsl_pool_config_held(dp));
2883
2884
stat->dds_creation_txg = dsl_get_creationtxg(ds);
2885
stat->dds_inconsistent = dsl_get_inconsistent(ds);
2886
stat->dds_guid = dsl_get_guid(ds);
2887
stat->dds_redacted = dsl_get_redacted(ds);
2888
stat->dds_origin[0] = '\0';
2889
stat->dds_flags = DDS_FLAG_HAS_ENCRYPTED;
2890
if (ds->ds_dir->dd_crypto_obj != 0)
2891
stat->dds_flags |= DDS_FLAG_ENCRYPTED;
2892
if (ds->ds_is_snapshot) {
2893
stat->dds_is_snapshot = B_TRUE;
2894
stat->dds_num_clones = dsl_get_numclones(ds);
2895
} else {
2896
stat->dds_is_snapshot = B_FALSE;
2897
stat->dds_num_clones = 0;
2898
2899
if (dsl_dir_is_clone(ds->ds_dir)) {
2900
dsl_dir_get_origin(ds->ds_dir, stat->dds_origin);
2901
}
2902
}
2903
}
2904
2905
uint64_t
2906
dsl_dataset_fsid_guid(dsl_dataset_t *ds)
2907
{
2908
return (ds->ds_fsid_guid);
2909
}
2910
2911
void
2912
dsl_dataset_space(dsl_dataset_t *ds,
2913
uint64_t *refdbytesp, uint64_t *availbytesp,
2914
uint64_t *usedobjsp, uint64_t *availobjsp)
2915
{
2916
*refdbytesp = dsl_dataset_phys(ds)->ds_referenced_bytes;
2917
*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
2918
if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes)
2919
*availbytesp +=
2920
ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes;
2921
if (ds->ds_quota != 0) {
2922
/*
2923
* Adjust available bytes according to refquota
2924
*/
2925
if (*refdbytesp < ds->ds_quota)
2926
*availbytesp = MIN(*availbytesp,
2927
ds->ds_quota - *refdbytesp);
2928
else
2929
*availbytesp = 0;
2930
}
2931
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
2932
*usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp);
2933
rrw_exit(&ds->ds_bp_rwlock, FTAG);
2934
*availobjsp = DN_MAX_OBJECT - *usedobjsp;
2935
}
2936
2937
boolean_t
2938
dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap)
2939
{
2940
dsl_pool_t *dp __maybe_unused = ds->ds_dir->dd_pool;
2941
uint64_t birth;
2942
2943
ASSERT(dsl_pool_config_held(dp));
2944
if (snap == NULL)
2945
return (B_FALSE);
2946
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
2947
birth = BP_GET_BIRTH(dsl_dataset_get_blkptr(ds));
2948
rrw_exit(&ds->ds_bp_rwlock, FTAG);
2949
if (birth > dsl_dataset_phys(snap)->ds_creation_txg) {
2950
objset_t *os, *os_snap;
2951
/*
2952
* It may be that only the ZIL differs, because it was
2953
* reset in the head. Don't count that as being
2954
* modified.
2955
*/
2956
if (dmu_objset_from_ds(ds, &os) != 0)
2957
return (B_TRUE);
2958
if (dmu_objset_from_ds(snap, &os_snap) != 0)
2959
return (B_TRUE);
2960
return (memcmp(&os->os_phys->os_meta_dnode,
2961
&os_snap->os_phys->os_meta_dnode,
2962
sizeof (os->os_phys->os_meta_dnode)) != 0);
2963
}
2964
return (B_FALSE);
2965
}
2966
2967
static int
2968
dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp,
2969
dsl_dataset_t *hds, void *arg)
2970
{
2971
(void) dp;
2972
dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
2973
int error;
2974
uint64_t val;
2975
2976
error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);
2977
if (error != 0) {
2978
/* ignore nonexistent snapshots */
2979
return (error == ENOENT ? 0 : error);
2980
}
2981
2982
/* new name should not exist */
2983
error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val);
2984
if (error == 0)
2985
error = SET_ERROR(EEXIST);
2986
else if (error == ENOENT)
2987
error = 0;
2988
2989
/* dataset name + 1 for the "@" + the new snapshot name must fit */
2990
if (dsl_dir_namelen(hds->ds_dir) + 1 +
2991
strlen(ddrsa->ddrsa_newsnapname) >= ZFS_MAX_DATASET_NAME_LEN)
2992
error = SET_ERROR(ENAMETOOLONG);
2993
2994
return (error);
2995
}
2996
2997
int
2998
dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx)
2999
{
3000
dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
3001
dsl_pool_t *dp = dmu_tx_pool(tx);
3002
dsl_dataset_t *hds;
3003
int error;
3004
3005
error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds);
3006
if (error != 0)
3007
return (error);
3008
3009
if (ddrsa->ddrsa_recursive) {
3010
error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object,
3011
dsl_dataset_rename_snapshot_check_impl, ddrsa,
3012
DS_FIND_CHILDREN);
3013
} else {
3014
error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa);
3015
}
3016
dsl_dataset_rele(hds, FTAG);
3017
return (error);
3018
}
3019
3020
static int
3021
dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
3022
dsl_dataset_t *hds, void *arg)
3023
{
3024
dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
3025
dsl_dataset_t *ds;
3026
uint64_t val;
3027
dmu_tx_t *tx = ddrsa->ddrsa_tx;
3028
char *oldname, *newname;
3029
int error;
3030
3031
error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);
3032
ASSERT(error == 0 || error == ENOENT);
3033
if (error == ENOENT) {
3034
/* ignore nonexistent snapshots */
3035
return (0);
3036
}
3037
3038
VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds));
3039
3040
/* log before we change the name */
3041
spa_history_log_internal_ds(ds, "rename", tx,
3042
"-> @%s", ddrsa->ddrsa_newsnapname);
3043
3044
VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx,
3045
B_FALSE));
3046
mutex_enter(&ds->ds_lock);
3047
(void) strlcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname,
3048
sizeof (ds->ds_snapname));
3049
mutex_exit(&ds->ds_lock);
3050
VERIFY0(zap_add(dp->dp_meta_objset,
3051
dsl_dataset_phys(hds)->ds_snapnames_zapobj,
3052
ds->ds_snapname, 8, 1, &ds->ds_object, tx));
3053
3054
oldname = kmem_asprintf("%s@%s", ddrsa->ddrsa_fsname,
3055
ddrsa->ddrsa_oldsnapname);
3056
newname = kmem_asprintf("%s@%s", ddrsa->ddrsa_fsname,
3057
ddrsa->ddrsa_newsnapname);
3058
zvol_rename_minors(dp->dp_spa, oldname, newname, B_TRUE);
3059
kmem_strfree(oldname);
3060
kmem_strfree(newname);
3061
3062
dsl_dataset_rele(ds, FTAG);
3063
return (0);
3064
}
3065
3066
void
3067
dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx)
3068
{
3069
dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
3070
dsl_pool_t *dp = dmu_tx_pool(tx);
3071
dsl_dataset_t *hds = NULL;
3072
3073
VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds));
3074
ddrsa->ddrsa_tx = tx;
3075
if (ddrsa->ddrsa_recursive) {
3076
VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object,
3077
dsl_dataset_rename_snapshot_sync_impl, ddrsa,
3078
DS_FIND_CHILDREN));
3079
} else {
3080
VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa));
3081
}
3082
dsl_dataset_rele(hds, FTAG);
3083
}
3084
3085
int
3086
dsl_dataset_rename_snapshot(const char *fsname,
3087
const char *oldsnapname, const char *newsnapname, boolean_t recursive)
3088
{
3089
dsl_dataset_rename_snapshot_arg_t ddrsa;
3090
3091
ddrsa.ddrsa_fsname = fsname;
3092
ddrsa.ddrsa_oldsnapname = oldsnapname;
3093
ddrsa.ddrsa_newsnapname = newsnapname;
3094
ddrsa.ddrsa_recursive = recursive;
3095
3096
return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check,
3097
dsl_dataset_rename_snapshot_sync, &ddrsa,
3098
1, ZFS_SPACE_CHECK_RESERVED));
3099
}
3100
3101
/*
3102
* If we're doing an ownership handoff, we need to make sure that there is
3103
* only one long hold on the dataset. We're not allowed to change anything here
3104
* so we don't permanently release the long hold or regular hold here. We want
3105
* to do this only when syncing to avoid the dataset unexpectedly going away
3106
* when we release the long hold.
3107
*/
3108
static int
3109
dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
3110
{
3111
boolean_t held = B_FALSE;
3112
3113
if (!dmu_tx_is_syncing(tx))
3114
return (0);
3115
3116
dsl_dir_t *dd = ds->ds_dir;
3117
mutex_enter(&dd->dd_activity_lock);
3118
uint64_t holds = zfs_refcount_count(&ds->ds_longholds) -
3119
(owner != NULL ? 1 : 0);
3120
/*
3121
* The value of dd_activity_waiters can chance as soon as we drop the
3122
* lock, but we're fine with that; new waiters coming in or old
3123
* waiters leaving doesn't cause problems, since we're going to cancel
3124
* waiters later anyway. The goal of this check is to verify that no
3125
* non-waiters have long-holds, and all new long-holds will be
3126
* prevented because we're holding the pool config as writer.
3127
*/
3128
if (holds != dd->dd_activity_waiters)
3129
held = B_TRUE;
3130
mutex_exit(&dd->dd_activity_lock);
3131
3132
if (held)
3133
return (SET_ERROR(EBUSY));
3134
3135
return (0);
3136
}
3137
3138
int
3139
dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
3140
{
3141
dsl_dataset_rollback_arg_t *ddra = arg;
3142
dsl_pool_t *dp = dmu_tx_pool(tx);
3143
dsl_dataset_t *ds;
3144
int64_t unused_refres_delta;
3145
int error;
3146
3147
error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
3148
if (error != 0)
3149
return (error);
3150
3151
/* must not be a snapshot */
3152
if (ds->ds_is_snapshot) {
3153
dsl_dataset_rele(ds, FTAG);
3154
return (SET_ERROR(EINVAL));
3155
}
3156
3157
/* must have a most recent snapshot */
3158
if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) {
3159
dsl_dataset_rele(ds, FTAG);
3160
return (SET_ERROR(ESRCH));
3161
}
3162
3163
/*
3164
* No rollback to a snapshot created in the current txg, because
3165
* the rollback may dirty the dataset and create blocks that are
3166
* not reachable from the rootbp while having a birth txg that
3167
* falls into the snapshot's range.
3168
*/
3169
if (dmu_tx_is_syncing(tx) &&
3170
dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) {
3171
dsl_dataset_rele(ds, FTAG);
3172
return (SET_ERROR(EAGAIN));
3173
}
3174
3175
/*
3176
* If the expected target snapshot is specified, then check that
3177
* the latest snapshot is it.
3178
*/
3179
if (ddra->ddra_tosnap != NULL) {
3180
dsl_dataset_t *snapds;
3181
3182
/* Check if the target snapshot exists at all. */
3183
error = dsl_dataset_hold(dp, ddra->ddra_tosnap, FTAG, &snapds);
3184
if (error != 0) {
3185
/*
3186
* ESRCH is used to signal that the target snapshot does
3187
* not exist, while ENOENT is used to report that
3188
* the rolled back dataset does not exist.
3189
* ESRCH is also used to cover other cases where the
3190
* target snapshot is not related to the dataset being
3191
* rolled back such as being in a different pool.
3192
*/
3193
if (error == ENOENT || error == EXDEV)
3194
error = SET_ERROR(ESRCH);
3195
dsl_dataset_rele(ds, FTAG);
3196
return (error);
3197
}
3198
ASSERT(snapds->ds_is_snapshot);
3199
3200
/* Check if the snapshot is the latest snapshot indeed. */
3201
if (snapds != ds->ds_prev) {
3202
/*
3203
* Distinguish between the case where the only problem
3204
* is intervening snapshots (EEXIST) vs the snapshot
3205
* not being a valid target for rollback (ESRCH).
3206
*/
3207
if (snapds->ds_dir == ds->ds_dir ||
3208
(dsl_dir_is_clone(ds->ds_dir) &&
3209
dsl_dir_phys(ds->ds_dir)->dd_origin_obj ==
3210
snapds->ds_object)) {
3211
error = SET_ERROR(EEXIST);
3212
} else {
3213
error = SET_ERROR(ESRCH);
3214
}
3215
dsl_dataset_rele(snapds, FTAG);
3216
dsl_dataset_rele(ds, FTAG);
3217
return (error);
3218
}
3219
dsl_dataset_rele(snapds, FTAG);
3220
}
3221
3222
/* must not have any bookmarks after the most recent snapshot */
3223
if (dsl_bookmark_latest_txg(ds) >
3224
dsl_dataset_phys(ds)->ds_prev_snap_txg) {
3225
dsl_dataset_rele(ds, FTAG);
3226
return (SET_ERROR(EEXIST));
3227
}
3228
3229
error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx);
3230
if (error != 0) {
3231
dsl_dataset_rele(ds, FTAG);
3232
return (error);
3233
}
3234
3235
/*
3236
* Check if the snap we are rolling back to uses more than
3237
* the refquota.
3238
*/
3239
if (ds->ds_quota != 0 &&
3240
dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes > ds->ds_quota) {
3241
dsl_dataset_rele(ds, FTAG);
3242
return (SET_ERROR(EDQUOT));
3243
}
3244
3245
/*
3246
* When we do the clone swap, we will temporarily use more space
3247
* due to the refreservation (the head will no longer have any
3248
* unique space, so the entire amount of the refreservation will need
3249
* to be free). We will immediately destroy the clone, freeing
3250
* this space, but the freeing happens over many txg's.
3251
*/
3252
unused_refres_delta = (int64_t)MIN(ds->ds_reserved,
3253
dsl_dataset_phys(ds)->ds_unique_bytes);
3254
3255
if (unused_refres_delta > 0 &&
3256
unused_refres_delta >
3257
dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) {
3258
dsl_dataset_rele(ds, FTAG);
3259
return (SET_ERROR(ENOSPC));
3260
}
3261
3262
dsl_dataset_rele(ds, FTAG);
3263
return (0);
3264
}
3265
3266
void
3267
dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
3268
{
3269
dsl_dataset_rollback_arg_t *ddra = arg;
3270
dsl_pool_t *dp = dmu_tx_pool(tx);
3271
dsl_dataset_t *ds, *clone;
3272
uint64_t cloneobj;
3273
char namebuf[ZFS_MAX_DATASET_NAME_LEN];
3274
3275
VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds));
3276
3277
dsl_dataset_name(ds->ds_prev, namebuf);
3278
fnvlist_add_string(ddra->ddra_result, "target", namebuf);
3279
3280
cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback",
3281
ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, NULL, tx);
3282
3283
VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone));
3284
3285
dsl_dataset_clone_swap_sync_impl(clone, ds, tx);
3286
dsl_dataset_zero_zil(ds, tx);
3287
3288
dsl_destroy_head_sync_impl(clone, tx);
3289
3290
dsl_dataset_rele(clone, FTAG);
3291
dsl_dataset_rele(ds, FTAG);
3292
}
3293
3294
/*
3295
* Rolls back the given filesystem or volume to the most recent snapshot.
3296
* The name of the most recent snapshot will be returned under key "target"
3297
* in the result nvlist.
3298
*
3299
* If owner != NULL:
3300
* - The existing dataset MUST be owned by the specified owner at entry
3301
* - Upon return, dataset will still be held by the same owner, whether we
3302
* succeed or not.
3303
*
3304
* This mode is required any time the existing filesystem is mounted. See
3305
* notes above zfs_suspend_fs() for further details.
3306
*/
3307
int
3308
dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,
3309
nvlist_t *result)
3310
{
3311
dsl_dataset_rollback_arg_t ddra;
3312
3313
ddra.ddra_fsname = fsname;
3314
ddra.ddra_tosnap = tosnap;
3315
ddra.ddra_owner = owner;
3316
ddra.ddra_result = result;
3317
3318
return (dsl_sync_task(fsname, dsl_dataset_rollback_check,
3319
dsl_dataset_rollback_sync, &ddra,
3320
1, ZFS_SPACE_CHECK_RESERVED));
3321
}
3322
3323
int
3324
dsl_dataset_clone_check(void *arg, dmu_tx_t *tx)
3325
{
3326
dsl_dataset_clone_arg_t *ddca = arg;
3327
dsl_dir_t *pdd;
3328
const char *tail;
3329
int error;
3330
dsl_dataset_t *origin;
3331
dsl_pool_t *dp = dmu_tx_pool(tx);
3332
3333
if (strchr(ddca->ddca_clone, '@') != NULL)
3334
return (SET_ERROR(EINVAL));
3335
3336
if (strlen(ddca->ddca_clone) >= ZFS_MAX_DATASET_NAME_LEN)
3337
return (SET_ERROR(ENAMETOOLONG));
3338
3339
error = dsl_dir_hold(dp, ddca->ddca_clone, FTAG, &pdd, &tail);
3340
if (error != 0)
3341
return (error);
3342
if (tail == NULL) {
3343
dsl_dir_rele(pdd, FTAG);
3344
return (SET_ERROR(EEXIST));
3345
}
3346
3347
error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
3348
ddca->ddca_cred);
3349
if (error != 0) {
3350
dsl_dir_rele(pdd, FTAG);
3351
return (SET_ERROR(EDQUOT));
3352
}
3353
3354
error = dsl_dataset_hold(dp, ddca->ddca_origin, FTAG, &origin);
3355
if (error != 0) {
3356
dsl_dir_rele(pdd, FTAG);
3357
return (error);
3358
}
3359
3360
/* You can only clone snapshots, not the head datasets. */
3361
if (!origin->ds_is_snapshot) {
3362
dsl_dataset_rele(origin, FTAG);
3363
dsl_dir_rele(pdd, FTAG);
3364
return (SET_ERROR(EINVAL));
3365
}
3366
3367
dsl_dataset_rele(origin, FTAG);
3368
dsl_dir_rele(pdd, FTAG);
3369
3370
return (0);
3371
}
3372
3373
void
3374
dsl_dataset_clone_sync(void *arg, dmu_tx_t *tx)
3375
{
3376
dsl_dataset_clone_arg_t *ddca = arg;
3377
dsl_pool_t *dp = dmu_tx_pool(tx);
3378
dsl_dir_t *pdd;
3379
const char *tail;
3380
dsl_dataset_t *origin, *ds;
3381
uint64_t obj;
3382
char namebuf[ZFS_MAX_DATASET_NAME_LEN];
3383
3384
VERIFY0(dsl_dir_hold(dp, ddca->ddca_clone, FTAG, &pdd, &tail));
3385
VERIFY0(dsl_dataset_hold(dp, ddca->ddca_origin, FTAG, &origin));
3386
3387
obj = dsl_dataset_create_sync(pdd, tail, origin, 0,
3388
ddca->ddca_cred, NULL, tx);
3389
3390
VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
3391
dsl_dataset_name(origin, namebuf);
3392
spa_history_log_internal_ds(ds, "clone", tx,
3393
"origin=%s (%llu)", namebuf, (u_longlong_t)origin->ds_object);
3394
dsl_dataset_rele(ds, FTAG);
3395
dsl_dataset_rele(origin, FTAG);
3396
dsl_dir_rele(pdd, FTAG);
3397
}
3398
3399
int
3400
dsl_dataset_clone(const char *clone, const char *origin)
3401
{
3402
dsl_dataset_clone_arg_t ddca;
3403
3404
cred_t *cr = CRED();
3405
crhold(cr);
3406
3407
ddca.ddca_clone = clone;
3408
ddca.ddca_origin = origin;
3409
ddca.ddca_cred = cr;
3410
3411
int rv = dsl_sync_task(clone,
3412
dsl_dataset_clone_check, dsl_dataset_clone_sync, &ddca,
3413
6, ZFS_SPACE_CHECK_NORMAL);
3414
3415
if (rv == 0)
3416
zvol_create_minors(clone);
3417
3418
crfree(cr);
3419
3420
return (rv);
3421
}
3422
3423
struct promotenode {
3424
list_node_t link;
3425
dsl_dataset_t *ds;
3426
};
3427
3428
static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
3429
static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp,
3430
const void *tag);
3431
static void promote_rele(dsl_dataset_promote_arg_t *ddpa, const void *tag);
3432
3433
int
3434
dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
3435
{
3436
dsl_dataset_promote_arg_t *ddpa = arg;
3437
dsl_pool_t *dp = dmu_tx_pool(tx);
3438
dsl_dataset_t *hds;
3439
struct promotenode *snap;
3440
int err;
3441
uint64_t unused;
3442
uint64_t ss_mv_cnt;
3443
size_t max_snap_len;
3444
boolean_t conflicting_snaps;
3445
3446
err = promote_hold(ddpa, dp, FTAG);
3447
if (err != 0)
3448
return (err);
3449
3450
hds = ddpa->ddpa_clone;
3451
max_snap_len = MAXNAMELEN - strlen(ddpa->ddpa_clonename) - 1;
3452
3453
if (dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE) {
3454
promote_rele(ddpa, FTAG);
3455
return (SET_ERROR(EXDEV));
3456
}
3457
3458
snap = list_head(&ddpa->shared_snaps);
3459
if (snap == NULL) {
3460
err = SET_ERROR(ENOENT);
3461
goto out;
3462
}
3463
dsl_dataset_t *const origin_ds = snap->ds;
3464
3465
/*
3466
* Encrypted clones share a DSL Crypto Key with their origin's dsl dir.
3467
* When doing a promote we must make sure the encryption root for
3468
* both the target and the target's origin does not change to avoid
3469
* needing to rewrap encryption keys
3470
*/
3471
err = dsl_dataset_promote_crypt_check(hds->ds_dir, origin_ds->ds_dir);
3472
if (err != 0)
3473
goto out;
3474
3475
/*
3476
* Compute and check the amount of space to transfer. Since this is
3477
* so expensive, don't do the preliminary check.
3478
*/
3479
if (!dmu_tx_is_syncing(tx)) {
3480
promote_rele(ddpa, FTAG);
3481
return (0);
3482
}
3483
3484
/* compute origin's new unique space */
3485
snap = list_tail(&ddpa->clone_snaps);
3486
ASSERT(snap != NULL);
3487
ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==,
3488
origin_ds->ds_object);
3489
dsl_deadlist_space_range(&snap->ds->ds_deadlist,
3490
dsl_dataset_phys(origin_ds)->ds_prev_snap_txg, UINT64_MAX,
3491
&ddpa->unique, &unused, &unused);
3492
3493
/*
3494
* Walk the snapshots that we are moving
3495
*
3496
* Compute space to transfer. Consider the incremental changes
3497
* to used by each snapshot:
3498
* (my used) = (prev's used) + (blocks born) - (blocks killed)
3499
* So each snapshot gave birth to:
3500
* (blocks born) = (my used) - (prev's used) + (blocks killed)
3501
* So a sequence would look like:
3502
* (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
3503
* Which simplifies to:
3504
* uN + kN + kN-1 + ... + k1 + k0
3505
* Note however, if we stop before we reach the ORIGIN we get:
3506
* uN + kN + kN-1 + ... + kM - uM-1
3507
*/
3508
conflicting_snaps = B_FALSE;
3509
ss_mv_cnt = 0;
3510
ddpa->used = dsl_dataset_phys(origin_ds)->ds_referenced_bytes;
3511
ddpa->comp = dsl_dataset_phys(origin_ds)->ds_compressed_bytes;
3512
ddpa->uncomp = dsl_dataset_phys(origin_ds)->ds_uncompressed_bytes;
3513
for (snap = list_head(&ddpa->shared_snaps); snap;
3514
snap = list_next(&ddpa->shared_snaps, snap)) {
3515
uint64_t val, dlused, dlcomp, dluncomp;
3516
dsl_dataset_t *ds = snap->ds;
3517
3518
ss_mv_cnt++;
3519
3520
/*
3521
* If there are long holds, we won't be able to evict
3522
* the objset.
3523
*/
3524
if (dsl_dataset_long_held(ds)) {
3525
err = SET_ERROR(EBUSY);
3526
goto out;
3527
}
3528
3529
/* Check that the snapshot name does not conflict */
3530
VERIFY0(dsl_dataset_get_snapname(ds));
3531
if (strlen(ds->ds_snapname) >= max_snap_len) {
3532
err = SET_ERROR(ENAMETOOLONG);
3533
goto out;
3534
}
3535
err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
3536
if (err == 0) {
3537
fnvlist_add_boolean(ddpa->err_ds,
3538
snap->ds->ds_snapname);
3539
conflicting_snaps = B_TRUE;
3540
} else if (err != ENOENT) {
3541
goto out;
3542
}
3543
3544
/* The very first snapshot does not have a deadlist */
3545
if (dsl_dataset_phys(ds)->ds_prev_snap_obj == 0)
3546
continue;
3547
3548
dsl_deadlist_space(&ds->ds_deadlist,
3549
&dlused, &dlcomp, &dluncomp);
3550
ddpa->used += dlused;
3551
ddpa->comp += dlcomp;
3552
ddpa->uncomp += dluncomp;
3553
}
3554
3555
/*
3556
* Check that bookmarks that are being transferred don't have
3557
* name conflicts.
3558
*/
3559
for (dsl_bookmark_node_t *dbn = avl_first(&origin_ds->ds_bookmarks);
3560
dbn != NULL && dbn->dbn_phys.zbm_creation_txg <=
3561
dsl_dataset_phys(origin_ds)->ds_creation_txg;
3562
dbn = AVL_NEXT(&origin_ds->ds_bookmarks, dbn)) {
3563
if (strlen(dbn->dbn_name) >= max_snap_len) {
3564
err = SET_ERROR(ENAMETOOLONG);
3565
goto out;
3566
}
3567
zfs_bookmark_phys_t bm;
3568
err = dsl_bookmark_lookup_impl(ddpa->ddpa_clone,
3569
dbn->dbn_name, &bm);
3570
3571
if (err == 0) {
3572
fnvlist_add_boolean(ddpa->err_ds, dbn->dbn_name);
3573
conflicting_snaps = B_TRUE;
3574
} else if (err == ESRCH) {
3575
err = 0;
3576
}
3577
if (err != 0) {
3578
goto out;
3579
}
3580
}
3581
3582
/*
3583
* In order to return the full list of conflicting snapshots, we check
3584
* whether there was a conflict after traversing all of them.
3585
*/
3586
if (conflicting_snaps) {
3587
err = SET_ERROR(EEXIST);
3588
goto out;
3589
}
3590
3591
/*
3592
* If we are a clone of a clone then we never reached ORIGIN,
3593
* so we need to subtract out the clone origin's used space.
3594
*/
3595
if (ddpa->origin_origin) {
3596
ddpa->used -=
3597
dsl_dataset_phys(ddpa->origin_origin)->ds_referenced_bytes;
3598
ddpa->comp -=
3599
dsl_dataset_phys(ddpa->origin_origin)->ds_compressed_bytes;
3600
ddpa->uncomp -=
3601
dsl_dataset_phys(ddpa->origin_origin)->
3602
ds_uncompressed_bytes;
3603
}
3604
3605
/* Check that there is enough space and limit headroom here */
3606
err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
3607
0, ss_mv_cnt, ddpa->used, ddpa->cr);
3608
if (err != 0)
3609
goto out;
3610
3611
/*
3612
* Compute the amounts of space that will be used by snapshots
3613
* after the promotion (for both origin and clone). For each,
3614
* it is the amount of space that will be on all of their
3615
* deadlists (that was not born before their new origin).
3616
*/
3617
if (dsl_dir_phys(hds->ds_dir)->dd_flags & DD_FLAG_USED_BREAKDOWN) {
3618
uint64_t space;
3619
3620
/*
3621
* Note, typically this will not be a clone of a clone,
3622
* so dd_origin_txg will be < TXG_INITIAL, so
3623
* these snaplist_space() -> dsl_deadlist_space_range()
3624
* calls will be fast because they do not have to
3625
* iterate over all bps.
3626
*/
3627
snap = list_head(&ddpa->origin_snaps);
3628
if (snap == NULL) {
3629
err = SET_ERROR(ENOENT);
3630
goto out;
3631
}
3632
err = snaplist_space(&ddpa->shared_snaps,
3633
snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap);
3634
if (err != 0)
3635
goto out;
3636
3637
err = snaplist_space(&ddpa->clone_snaps,
3638
snap->ds->ds_dir->dd_origin_txg, &space);
3639
if (err != 0)
3640
goto out;
3641
ddpa->cloneusedsnap += space;
3642
}
3643
if (dsl_dir_phys(origin_ds->ds_dir)->dd_flags &
3644
DD_FLAG_USED_BREAKDOWN) {
3645
err = snaplist_space(&ddpa->origin_snaps,
3646
dsl_dataset_phys(origin_ds)->ds_creation_txg,
3647
&ddpa->originusedsnap);
3648
if (err != 0)
3649
goto out;
3650
}
3651
3652
out:
3653
promote_rele(ddpa, FTAG);
3654
return (err);
3655
}
3656
3657
void
3658
dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
3659
{
3660
dsl_dataset_promote_arg_t *ddpa = arg;
3661
dsl_pool_t *dp = dmu_tx_pool(tx);
3662
dsl_dataset_t *hds;
3663
struct promotenode *snap;
3664
dsl_dataset_t *origin_ds;
3665
dsl_dataset_t *origin_head;
3666
dsl_dir_t *dd;
3667
dsl_dir_t *odd = NULL;
3668
uint64_t oldnext_obj;
3669
int64_t delta;
3670
3671
ASSERT(nvlist_empty(ddpa->err_ds));
3672
3673
VERIFY0(promote_hold(ddpa, dp, FTAG));
3674
hds = ddpa->ddpa_clone;
3675
3676
ASSERT0(dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE);
3677
3678
snap = list_head(&ddpa->shared_snaps);
3679
origin_ds = snap->ds;
3680
dd = hds->ds_dir;
3681
3682
snap = list_head(&ddpa->origin_snaps);
3683
origin_head = snap->ds;
3684
3685
/*
3686
* We need to explicitly open odd, since origin_ds's dd will be
3687
* changing.
3688
*/
3689
VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object,
3690
NULL, FTAG, &odd));
3691
3692
dsl_dataset_promote_crypt_sync(hds->ds_dir, odd, tx);
3693
3694
/* change origin's next snap */
3695
dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
3696
oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj;
3697
snap = list_tail(&ddpa->clone_snaps);
3698
ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==,
3699
origin_ds->ds_object);
3700
dsl_dataset_phys(origin_ds)->ds_next_snap_obj = snap->ds->ds_object;
3701
3702
/* change the origin's next clone */
3703
if (dsl_dataset_phys(origin_ds)->ds_next_clones_obj) {
3704
dsl_dataset_remove_from_next_clones(origin_ds,
3705
snap->ds->ds_object, tx);
3706
VERIFY0(zap_add_int(dp->dp_meta_objset,
3707
dsl_dataset_phys(origin_ds)->ds_next_clones_obj,
3708
oldnext_obj, tx));
3709
}
3710
3711
/* change origin */
3712
dmu_buf_will_dirty(dd->dd_dbuf, tx);
3713
ASSERT3U(dsl_dir_phys(dd)->dd_origin_obj, ==, origin_ds->ds_object);
3714
dsl_dir_phys(dd)->dd_origin_obj = dsl_dir_phys(odd)->dd_origin_obj;
3715
dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg;
3716
dmu_buf_will_dirty(odd->dd_dbuf, tx);
3717
dsl_dir_phys(odd)->dd_origin_obj = origin_ds->ds_object;
3718
origin_head->ds_dir->dd_origin_txg =
3719
dsl_dataset_phys(origin_ds)->ds_creation_txg;
3720
3721
/* change dd_clone entries */
3722
if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
3723
VERIFY0(zap_remove_int(dp->dp_meta_objset,
3724
dsl_dir_phys(odd)->dd_clones, hds->ds_object, tx));
3725
VERIFY0(zap_add_int(dp->dp_meta_objset,
3726
dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones,
3727
hds->ds_object, tx));
3728
3729
VERIFY0(zap_remove_int(dp->dp_meta_objset,
3730
dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones,
3731
origin_head->ds_object, tx));
3732
if (dsl_dir_phys(dd)->dd_clones == 0) {
3733
dsl_dir_phys(dd)->dd_clones =
3734
zap_create(dp->dp_meta_objset, DMU_OT_DSL_CLONES,
3735
DMU_OT_NONE, 0, tx);
3736
}
3737
VERIFY0(zap_add_int(dp->dp_meta_objset,
3738
dsl_dir_phys(dd)->dd_clones, origin_head->ds_object, tx));
3739
}
3740
3741
/*
3742
* Move bookmarks to this dir.
3743
*/
3744
dsl_bookmark_node_t *dbn_next;
3745
for (dsl_bookmark_node_t *dbn = avl_first(&origin_head->ds_bookmarks);
3746
dbn != NULL && dbn->dbn_phys.zbm_creation_txg <=
3747
dsl_dataset_phys(origin_ds)->ds_creation_txg;
3748
dbn = dbn_next) {
3749
dbn_next = AVL_NEXT(&origin_head->ds_bookmarks, dbn);
3750
3751
avl_remove(&origin_head->ds_bookmarks, dbn);
3752
VERIFY0(zap_remove(dp->dp_meta_objset,
3753
origin_head->ds_bookmarks_obj, dbn->dbn_name, tx));
3754
3755
dsl_bookmark_node_add(hds, dbn, tx);
3756
}
3757
3758
dsl_bookmark_next_changed(hds, origin_ds, tx);
3759
3760
/* move snapshots to this dir */
3761
for (snap = list_head(&ddpa->shared_snaps); snap;
3762
snap = list_next(&ddpa->shared_snaps, snap)) {
3763
dsl_dataset_t *ds = snap->ds;
3764
3765
/*
3766
* Property callbacks are registered to a particular
3767
* dsl_dir. Since ours is changing, evict the objset
3768
* so that they will be unregistered from the old dsl_dir.
3769
*/
3770
if (ds->ds_objset) {
3771
dmu_objset_evict(ds->ds_objset);
3772
ds->ds_objset = NULL;
3773
}
3774
3775
/* move snap name entry */
3776
VERIFY0(dsl_dataset_get_snapname(ds));
3777
VERIFY0(dsl_dataset_snap_remove(origin_head,
3778
ds->ds_snapname, tx, B_TRUE));
3779
VERIFY0(zap_add(dp->dp_meta_objset,
3780
dsl_dataset_phys(hds)->ds_snapnames_zapobj, ds->ds_snapname,
3781
8, 1, &ds->ds_object, tx));
3782
dsl_fs_ss_count_adjust(hds->ds_dir, 1,
3783
DD_FIELD_SNAPSHOT_COUNT, tx);
3784
3785
/* change containing dsl_dir */
3786
dmu_buf_will_dirty(ds->ds_dbuf, tx);
3787
ASSERT3U(dsl_dataset_phys(ds)->ds_dir_obj, ==, odd->dd_object);
3788
dsl_dataset_phys(ds)->ds_dir_obj = dd->dd_object;
3789
ASSERT3P(ds->ds_dir, ==, odd);
3790
dsl_dir_rele(ds->ds_dir, ds);
3791
VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object,
3792
NULL, ds, &ds->ds_dir));
3793
3794
/* move any clone references */
3795
if (dsl_dataset_phys(ds)->ds_next_clones_obj &&
3796
spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
3797
zap_cursor_t zc;
3798
zap_attribute_t *za = zap_attribute_alloc();
3799
3800
for (zap_cursor_init(&zc, dp->dp_meta_objset,
3801
dsl_dataset_phys(ds)->ds_next_clones_obj);
3802
zap_cursor_retrieve(&zc, za) == 0;
3803
zap_cursor_advance(&zc)) {
3804
dsl_dataset_t *cnds;
3805
uint64_t o;
3806
3807
if (za->za_first_integer == oldnext_obj) {
3808
/*
3809
* We've already moved the
3810
* origin's reference.
3811
*/
3812
continue;
3813
}
3814
3815
VERIFY0(dsl_dataset_hold_obj(dp,
3816
za->za_first_integer, FTAG, &cnds));
3817
o = dsl_dir_phys(cnds->ds_dir)->
3818
dd_head_dataset_obj;
3819
3820
VERIFY0(zap_remove_int(dp->dp_meta_objset,
3821
dsl_dir_phys(odd)->dd_clones, o, tx));
3822
VERIFY0(zap_add_int(dp->dp_meta_objset,
3823
dsl_dir_phys(dd)->dd_clones, o, tx));
3824
dsl_dataset_rele(cnds, FTAG);
3825
}
3826
zap_cursor_fini(&zc);
3827
zap_attribute_free(za);
3828
}
3829
3830
ASSERT(!dsl_prop_hascb(ds));
3831
}
3832
3833
/*
3834
* Change space accounting.
3835
* Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
3836
* both be valid, or both be 0 (resulting in delta == 0). This
3837
* is true for each of {clone,origin} independently.
3838
*/
3839
3840
delta = ddpa->cloneusedsnap -
3841
dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP];
3842
ASSERT3S(delta, >=, 0);
3843
ASSERT3U(ddpa->used, >=, delta);
3844
dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
3845
dsl_dir_diduse_space(dd, DD_USED_HEAD,
3846
ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx);
3847
3848
delta = ddpa->originusedsnap -
3849
dsl_dir_phys(odd)->dd_used_breakdown[DD_USED_SNAP];
3850
ASSERT3S(delta, <=, 0);
3851
ASSERT3U(ddpa->used, >=, -delta);
3852
dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
3853
dsl_dir_diduse_space(odd, DD_USED_HEAD,
3854
-ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx);
3855
3856
dsl_dataset_phys(origin_ds)->ds_unique_bytes = ddpa->unique;
3857
3858
/*
3859
* Since livelists are specific to a clone's origin txg, they
3860
* are no longer accurate. Destroy the livelist from the clone being
3861
* promoted. If the origin dataset is a clone, destroy its livelist
3862
* as well.
3863
*/
3864
dsl_dir_remove_livelist(dd, tx, B_TRUE);
3865
dsl_dir_remove_livelist(odd, tx, B_TRUE);
3866
3867
/* log history record */
3868
spa_history_log_internal_ds(hds, "promote", tx, " ");
3869
3870
dsl_dir_rele(odd, FTAG);
3871
3872
/*
3873
* Transfer common error blocks from old head to new head, before
3874
* calling promote_rele() on ddpa since we need to dereference
3875
* origin_head and hds.
3876
*/
3877
if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_HEAD_ERRLOG)) {
3878
uint64_t old_head = origin_head->ds_object;
3879
uint64_t new_head = hds->ds_object;
3880
spa_swap_errlog(dp->dp_spa, new_head, old_head, tx);
3881
}
3882
3883
promote_rele(ddpa, FTAG);
3884
}
3885
3886
/*
3887
* Make a list of dsl_dataset_t's for the snapshots between first_obj
3888
* (exclusive) and last_obj (inclusive). The list will be in reverse
3889
* order (last_obj will be the list_head()). If first_obj == 0, do all
3890
* snapshots back to this dataset's origin.
3891
*/
3892
static int
3893
snaplist_make(dsl_pool_t *dp,
3894
uint64_t first_obj, uint64_t last_obj, list_t *l, const void *tag)
3895
{
3896
uint64_t obj = last_obj;
3897
3898
list_create(l, sizeof (struct promotenode),
3899
offsetof(struct promotenode, link));
3900
3901
while (obj != first_obj) {
3902
dsl_dataset_t *ds;
3903
struct promotenode *snap;
3904
int err;
3905
3906
err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
3907
ASSERT(err != ENOENT);
3908
if (err != 0)
3909
return (err);
3910
3911
if (first_obj == 0)
3912
first_obj = dsl_dir_phys(ds->ds_dir)->dd_origin_obj;
3913
3914
snap = kmem_alloc(sizeof (*snap), KM_SLEEP);
3915
snap->ds = ds;
3916
list_insert_tail(l, snap);
3917
obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
3918
}
3919
3920
return (0);
3921
}
3922
3923
static int
3924
snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)
3925
{
3926
struct promotenode *snap;
3927
3928
*spacep = 0;
3929
for (snap = list_head(l); snap; snap = list_next(l, snap)) {
3930
uint64_t used, comp, uncomp;
3931
dsl_deadlist_space_range(&snap->ds->ds_deadlist,
3932
mintxg, UINT64_MAX, &used, &comp, &uncomp);
3933
*spacep += used;
3934
}
3935
return (0);
3936
}
3937
3938
static void
3939
snaplist_destroy(list_t *l, const void *tag)
3940
{
3941
struct promotenode *snap;
3942
3943
if (l == NULL || !list_link_active(&l->list_head))
3944
return;
3945
3946
while ((snap = list_remove_tail(l)) != NULL) {
3947
dsl_dataset_rele(snap->ds, tag);
3948
kmem_free(snap, sizeof (*snap));
3949
}
3950
list_destroy(l);
3951
}
3952
3953
static int
3954
promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, const void *tag)
3955
{
3956
int error;
3957
dsl_dir_t *dd;
3958
struct promotenode *snap;
3959
3960
error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag,
3961
&ddpa->ddpa_clone);
3962
if (error != 0)
3963
return (error);
3964
dd = ddpa->ddpa_clone->ds_dir;
3965
3966
if (ddpa->ddpa_clone->ds_is_snapshot ||
3967
!dsl_dir_is_clone(dd)) {
3968
dsl_dataset_rele(ddpa->ddpa_clone, tag);
3969
return (SET_ERROR(EINVAL));
3970
}
3971
3972
error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj,
3973
&ddpa->shared_snaps, tag);
3974
if (error != 0)
3975
goto out;
3976
3977
error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object,
3978
&ddpa->clone_snaps, tag);
3979
if (error != 0)
3980
goto out;
3981
3982
snap = list_head(&ddpa->shared_snaps);
3983
ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj);
3984
error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj,
3985
dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj,
3986
&ddpa->origin_snaps, tag);
3987
if (error != 0)
3988
goto out;
3989
3990
if (dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj != 0) {
3991
error = dsl_dataset_hold_obj(dp,
3992
dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj,
3993
tag, &ddpa->origin_origin);
3994
if (error != 0)
3995
goto out;
3996
}
3997
out:
3998
if (error != 0)
3999
promote_rele(ddpa, tag);
4000
return (error);
4001
}
4002
4003
static void
4004
promote_rele(dsl_dataset_promote_arg_t *ddpa, const void *tag)
4005
{
4006
snaplist_destroy(&ddpa->shared_snaps, tag);
4007
snaplist_destroy(&ddpa->clone_snaps, tag);
4008
snaplist_destroy(&ddpa->origin_snaps, tag);
4009
if (ddpa->origin_origin != NULL)
4010
dsl_dataset_rele(ddpa->origin_origin, tag);
4011
dsl_dataset_rele(ddpa->ddpa_clone, tag);
4012
}
4013
4014
/*
4015
* Promote a clone.
4016
*
4017
* If it fails due to a conflicting snapshot name, "conflsnap" will be filled
4018
* in with the name. (It must be at least ZFS_MAX_DATASET_NAME_LEN bytes long.)
4019
*/
4020
int
4021
dsl_dataset_promote(const char *name, char *conflsnap)
4022
{
4023
dsl_dataset_promote_arg_t ddpa = { 0 };
4024
uint64_t numsnaps;
4025
int error;
4026
nvpair_t *snap_pair;
4027
objset_t *os;
4028
4029
/*
4030
* We will modify space proportional to the number of
4031
* snapshots. Compute numsnaps.
4032
*/
4033
error = dmu_objset_hold(name, FTAG, &os);
4034
if (error != 0)
4035
return (error);
4036
error = zap_count(dmu_objset_pool(os)->dp_meta_objset,
4037
dsl_dataset_phys(dmu_objset_ds(os))->ds_snapnames_zapobj,
4038
&numsnaps);
4039
dmu_objset_rele(os, FTAG);
4040
if (error != 0)
4041
return (error);
4042
4043
cred_t *cr = CRED();
4044
crhold(cr);
4045
4046
ddpa.ddpa_clonename = name;
4047
ddpa.err_ds = fnvlist_alloc();
4048
ddpa.cr = cr;
4049
4050
error = dsl_sync_task(name, dsl_dataset_promote_check,
4051
dsl_dataset_promote_sync, &ddpa,
4052
2 + numsnaps, ZFS_SPACE_CHECK_RESERVED);
4053
4054
crfree(cr);
4055
4056
/*
4057
* Return the first conflicting snapshot found.
4058
*/
4059
snap_pair = nvlist_next_nvpair(ddpa.err_ds, NULL);
4060
if (snap_pair != NULL && conflsnap != NULL)
4061
(void) strlcpy(conflsnap, nvpair_name(snap_pair),
4062
ZFS_MAX_DATASET_NAME_LEN);
4063
4064
fnvlist_free(ddpa.err_ds);
4065
return (error);
4066
}
4067
4068
int
4069
dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
4070
dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
4071
{
4072
/*
4073
* "slack" factor for received datasets with refquota set on them.
4074
* See the bottom of this function for details on its use.
4075
*/
4076
uint64_t refquota_slack = (uint64_t)DMU_MAX_ACCESS *
4077
spa_asize_inflation;
4078
int64_t unused_refres_delta;
4079
4080
/* they should both be heads */
4081
if (clone->ds_is_snapshot ||
4082
origin_head->ds_is_snapshot)
4083
return (SET_ERROR(EINVAL));
4084
4085
/* if we are not forcing, the branch point should be just before them */
4086
if (!force && clone->ds_prev != origin_head->ds_prev)
4087
return (SET_ERROR(EINVAL));
4088
4089
/* clone should be the clone (unless they are unrelated) */
4090
if (clone->ds_prev != NULL &&
4091
clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap &&
4092
origin_head->ds_dir != clone->ds_prev->ds_dir)
4093
return (SET_ERROR(EINVAL));
4094
4095
/* the clone should be a child of the origin */
4096
if (clone->ds_dir->dd_parent != origin_head->ds_dir)
4097
return (SET_ERROR(EINVAL));
4098
4099
/* origin_head shouldn't be modified unless 'force' */
4100
if (!force &&
4101
dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev))
4102
return (SET_ERROR(ETXTBSY));
4103
4104
/* origin_head should have no long holds (e.g. is not mounted) */
4105
if (dsl_dataset_handoff_check(origin_head, owner, tx))
4106
return (SET_ERROR(EBUSY));
4107
4108
/* check amount of any unconsumed refreservation */
4109
unused_refres_delta =
4110
(int64_t)MIN(origin_head->ds_reserved,
4111
dsl_dataset_phys(origin_head)->ds_unique_bytes) -
4112
(int64_t)MIN(origin_head->ds_reserved,
4113
dsl_dataset_phys(clone)->ds_unique_bytes);
4114
4115
if (unused_refres_delta > 0 &&
4116
unused_refres_delta >
4117
dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE))
4118
return (SET_ERROR(ENOSPC));
4119
4120
/*
4121
* The clone can't be too much over the head's refquota.
4122
*
4123
* To ensure that the entire refquota can be used, we allow one
4124
* transaction to exceed the refquota. Therefore, this check
4125
* needs to also allow for the space referenced to be more than the
4126
* refquota. The maximum amount of space that one transaction can use
4127
* on disk is DMU_MAX_ACCESS * spa_asize_inflation. Allowing this
4128
* overage ensures that we are able to receive a filesystem that
4129
* exceeds the refquota on the source system.
4130
*
4131
* So that overage is the refquota_slack we use below.
4132
*/
4133
if (origin_head->ds_quota != 0 &&
4134
dsl_dataset_phys(clone)->ds_referenced_bytes >
4135
origin_head->ds_quota + refquota_slack)
4136
return (SET_ERROR(EDQUOT));
4137
4138
return (0);
4139
}
4140
4141
static void
4142
dsl_dataset_swap_remap_deadlists(dsl_dataset_t *clone,
4143
dsl_dataset_t *origin, dmu_tx_t *tx)
4144
{
4145
uint64_t clone_remap_dl_obj, origin_remap_dl_obj;
4146
dsl_pool_t *dp = dmu_tx_pool(tx);
4147
4148
ASSERT(dsl_pool_sync_context(dp));
4149
4150
clone_remap_dl_obj = dsl_dataset_get_remap_deadlist_object(clone);
4151
origin_remap_dl_obj = dsl_dataset_get_remap_deadlist_object(origin);
4152
4153
if (clone_remap_dl_obj != 0) {
4154
dsl_deadlist_close(&clone->ds_remap_deadlist);
4155
dsl_dataset_unset_remap_deadlist_object(clone, tx);
4156
}
4157
if (origin_remap_dl_obj != 0) {
4158
dsl_deadlist_close(&origin->ds_remap_deadlist);
4159
dsl_dataset_unset_remap_deadlist_object(origin, tx);
4160
}
4161
4162
if (clone_remap_dl_obj != 0) {
4163
dsl_dataset_set_remap_deadlist_object(origin,
4164
clone_remap_dl_obj, tx);
4165
VERIFY0(dsl_deadlist_open(&origin->ds_remap_deadlist,
4166
dp->dp_meta_objset, clone_remap_dl_obj));
4167
}
4168
if (origin_remap_dl_obj != 0) {
4169
dsl_dataset_set_remap_deadlist_object(clone,
4170
origin_remap_dl_obj, tx);
4171
VERIFY0(dsl_deadlist_open(&clone->ds_remap_deadlist,
4172
dp->dp_meta_objset, origin_remap_dl_obj));
4173
}
4174
}
4175
4176
void
4177
dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
4178
dsl_dataset_t *origin_head, dmu_tx_t *tx)
4179
{
4180
dsl_pool_t *dp = dmu_tx_pool(tx);
4181
int64_t unused_refres_delta;
4182
4183
ASSERT0(clone->ds_reserved);
4184
/*
4185
* NOTE: On DEBUG kernels there could be a race between this and
4186
* the check function if spa_asize_inflation is adjusted...
4187
*/
4188
ASSERT(origin_head->ds_quota == 0 ||
4189
dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota +
4190
DMU_MAX_ACCESS * spa_asize_inflation);
4191
ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
4192
4193
dsl_dir_cancel_waiters(origin_head->ds_dir);
4194
4195
/*
4196
* Swap per-dataset feature flags.
4197
*/
4198
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
4199
if (!(spa_feature_table[f].fi_flags &
4200
ZFEATURE_FLAG_PER_DATASET)) {
4201
ASSERT(!dsl_dataset_feature_is_active(clone, f));
4202
ASSERT(!dsl_dataset_feature_is_active(origin_head, f));
4203
continue;
4204
}
4205
4206
boolean_t clone_inuse = dsl_dataset_feature_is_active(clone, f);
4207
void *clone_feature = clone->ds_feature[f];
4208
boolean_t origin_head_inuse =
4209
dsl_dataset_feature_is_active(origin_head, f);
4210
void *origin_head_feature = origin_head->ds_feature[f];
4211
4212
if (clone_inuse)
4213
dsl_dataset_deactivate_feature_impl(clone, f, tx);
4214
if (origin_head_inuse)
4215
dsl_dataset_deactivate_feature_impl(origin_head, f, tx);
4216
4217
if (clone_inuse) {
4218
dsl_dataset_activate_feature(origin_head->ds_object, f,
4219
clone_feature, tx);
4220
origin_head->ds_feature[f] = clone_feature;
4221
}
4222
if (origin_head_inuse) {
4223
dsl_dataset_activate_feature(clone->ds_object, f,
4224
origin_head_feature, tx);
4225
clone->ds_feature[f] = origin_head_feature;
4226
}
4227
}
4228
4229
dmu_buf_will_dirty(clone->ds_dbuf, tx);
4230
dmu_buf_will_dirty(origin_head->ds_dbuf, tx);
4231
4232
if (clone->ds_objset != NULL) {
4233
dmu_objset_evict(clone->ds_objset);
4234
clone->ds_objset = NULL;
4235
}
4236
4237
if (origin_head->ds_objset != NULL) {
4238
dmu_objset_evict(origin_head->ds_objset);
4239
origin_head->ds_objset = NULL;
4240
}
4241
4242
unused_refres_delta =
4243
(int64_t)MIN(origin_head->ds_reserved,
4244
dsl_dataset_phys(origin_head)->ds_unique_bytes) -
4245
(int64_t)MIN(origin_head->ds_reserved,
4246
dsl_dataset_phys(clone)->ds_unique_bytes);
4247
4248
/*
4249
* Reset origin's unique bytes.
4250
*/
4251
{
4252
dsl_dataset_t *origin = clone->ds_prev;
4253
uint64_t comp, uncomp;
4254
4255
dmu_buf_will_dirty(origin->ds_dbuf, tx);
4256
dsl_deadlist_space_range(&clone->ds_deadlist,
4257
dsl_dataset_phys(origin)->ds_prev_snap_txg, UINT64_MAX,
4258
&dsl_dataset_phys(origin)->ds_unique_bytes, &comp, &uncomp);
4259
}
4260
4261
/* swap blkptrs */
4262
{
4263
rrw_enter(&clone->ds_bp_rwlock, RW_WRITER, FTAG);
4264
rrw_enter(&origin_head->ds_bp_rwlock, RW_WRITER, FTAG);
4265
blkptr_t tmp;
4266
tmp = dsl_dataset_phys(origin_head)->ds_bp;
4267
dsl_dataset_phys(origin_head)->ds_bp =
4268
dsl_dataset_phys(clone)->ds_bp;
4269
dsl_dataset_phys(clone)->ds_bp = tmp;
4270
rrw_exit(&origin_head->ds_bp_rwlock, FTAG);
4271
rrw_exit(&clone->ds_bp_rwlock, FTAG);
4272
}
4273
4274
/* set dd_*_bytes */
4275
{
4276
int64_t dused, dcomp, duncomp;
4277
uint64_t cdl_used, cdl_comp, cdl_uncomp;
4278
uint64_t odl_used, odl_comp, odl_uncomp;
4279
4280
ASSERT3U(dsl_dir_phys(clone->ds_dir)->
4281
dd_used_breakdown[DD_USED_SNAP], ==, 0);
4282
4283
dsl_deadlist_space(&clone->ds_deadlist,
4284
&cdl_used, &cdl_comp, &cdl_uncomp);
4285
dsl_deadlist_space(&origin_head->ds_deadlist,
4286
&odl_used, &odl_comp, &odl_uncomp);
4287
4288
dused = dsl_dataset_phys(clone)->ds_referenced_bytes +
4289
cdl_used -
4290
(dsl_dataset_phys(origin_head)->ds_referenced_bytes +
4291
odl_used);
4292
dcomp = dsl_dataset_phys(clone)->ds_compressed_bytes +
4293
cdl_comp -
4294
(dsl_dataset_phys(origin_head)->ds_compressed_bytes +
4295
odl_comp);
4296
duncomp = dsl_dataset_phys(clone)->ds_uncompressed_bytes +
4297
cdl_uncomp -
4298
(dsl_dataset_phys(origin_head)->ds_uncompressed_bytes +
4299
odl_uncomp);
4300
4301
dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD,
4302
dused, dcomp, duncomp, tx);
4303
dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD,
4304
-dused, -dcomp, -duncomp, tx);
4305
4306
/*
4307
* The difference in the space used by snapshots is the
4308
* difference in snapshot space due to the head's
4309
* deadlist (since that's the only thing that's
4310
* changing that affects the snapused).
4311
*/
4312
dsl_deadlist_space_range(&clone->ds_deadlist,
4313
origin_head->ds_dir->dd_origin_txg, UINT64_MAX,
4314
&cdl_used, &cdl_comp, &cdl_uncomp);
4315
dsl_deadlist_space_range(&origin_head->ds_deadlist,
4316
origin_head->ds_dir->dd_origin_txg, UINT64_MAX,
4317
&odl_used, &odl_comp, &odl_uncomp);
4318
dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used,
4319
DD_USED_HEAD, DD_USED_SNAP, tx);
4320
}
4321
4322
/* swap ds_*_bytes */
4323
SWITCH64(dsl_dataset_phys(origin_head)->ds_referenced_bytes,
4324
dsl_dataset_phys(clone)->ds_referenced_bytes);
4325
SWITCH64(dsl_dataset_phys(origin_head)->ds_compressed_bytes,
4326
dsl_dataset_phys(clone)->ds_compressed_bytes);
4327
SWITCH64(dsl_dataset_phys(origin_head)->ds_uncompressed_bytes,
4328
dsl_dataset_phys(clone)->ds_uncompressed_bytes);
4329
SWITCH64(dsl_dataset_phys(origin_head)->ds_unique_bytes,
4330
dsl_dataset_phys(clone)->ds_unique_bytes);
4331
4332
/* apply any parent delta for change in unconsumed refreservation */
4333
dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV,
4334
unused_refres_delta, 0, 0, tx);
4335
4336
/*
4337
* Swap deadlists.
4338
*/
4339
dsl_deadlist_close(&clone->ds_deadlist);
4340
dsl_deadlist_close(&origin_head->ds_deadlist);
4341
SWITCH64(dsl_dataset_phys(origin_head)->ds_deadlist_obj,
4342
dsl_dataset_phys(clone)->ds_deadlist_obj);
4343
VERIFY0(dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset,
4344
dsl_dataset_phys(clone)->ds_deadlist_obj));
4345
VERIFY0(dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset,
4346
dsl_dataset_phys(origin_head)->ds_deadlist_obj));
4347
dsl_dataset_swap_remap_deadlists(clone, origin_head, tx);
4348
4349
/*
4350
* If there is a bookmark at the origin, its "next dataset" is
4351
* changing, so we need to reset its FBN.
4352
*/
4353
dsl_bookmark_next_changed(origin_head, origin_head->ds_prev, tx);
4354
4355
dsl_scan_ds_clone_swapped(origin_head, clone, tx);
4356
4357
/*
4358
* Destroy any livelists associated with the clone or the origin,
4359
* since after the swap the corresponding livelists are no longer
4360
* valid.
4361
*/
4362
dsl_dir_remove_livelist(clone->ds_dir, tx, B_TRUE);
4363
dsl_dir_remove_livelist(origin_head->ds_dir, tx, B_TRUE);
4364
4365
spa_history_log_internal_ds(clone, "clone swap", tx,
4366
"parent=%s", origin_head->ds_dir->dd_myname);
4367
}
4368
4369
/*
4370
* Given a pool name and a dataset object number in that pool,
4371
* return the name of that dataset.
4372
*/
4373
int
4374
dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
4375
{
4376
dsl_pool_t *dp;
4377
dsl_dataset_t *ds;
4378
int error;
4379
4380
error = dsl_pool_hold(pname, FTAG, &dp);
4381
if (error != 0)
4382
return (error);
4383
4384
error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
4385
if (error == 0) {
4386
dsl_dataset_name(ds, buf);
4387
dsl_dataset_rele(ds, FTAG);
4388
}
4389
dsl_pool_rele(dp, FTAG);
4390
4391
return (error);
4392
}
4393
4394
int
4395
dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
4396
uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)
4397
{
4398
int error = 0;
4399
4400
ASSERT3S(asize, >, 0);
4401
4402
/*
4403
* *ref_rsrv is the portion of asize that will come from any
4404
* unconsumed refreservation space.
4405
*/
4406
*ref_rsrv = 0;
4407
4408
mutex_enter(&ds->ds_lock);
4409
/*
4410
* Make a space adjustment for reserved bytes.
4411
*/
4412
if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) {
4413
ASSERT3U(*used, >=,
4414
ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes);
4415
*used -=
4416
(ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes);
4417
*ref_rsrv =
4418
asize - MIN(asize, parent_delta(ds, asize + inflight));
4419
}
4420
4421
if (!check_quota || ds->ds_quota == 0) {
4422
mutex_exit(&ds->ds_lock);
4423
return (0);
4424
}
4425
/*
4426
* If they are requesting more space, and our current estimate
4427
* is over quota, they get to try again unless the actual
4428
* on-disk is over quota and there are no pending changes (which
4429
* may free up space for us).
4430
*/
4431
if (dsl_dataset_phys(ds)->ds_referenced_bytes + inflight >=
4432
ds->ds_quota) {
4433
if (inflight > 0 ||
4434
dsl_dataset_phys(ds)->ds_referenced_bytes < ds->ds_quota)
4435
error = SET_ERROR(ERESTART);
4436
else
4437
error = SET_ERROR(EDQUOT);
4438
}
4439
mutex_exit(&ds->ds_lock);
4440
4441
return (error);
4442
}
4443
4444
typedef struct dsl_dataset_set_qr_arg {
4445
const char *ddsqra_name;
4446
zprop_source_t ddsqra_source;
4447
uint64_t ddsqra_value;
4448
} dsl_dataset_set_qr_arg_t;
4449
4450
4451
static int
4452
dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)
4453
{
4454
dsl_dataset_set_qr_arg_t *ddsqra = arg;
4455
dsl_pool_t *dp = dmu_tx_pool(tx);
4456
dsl_dataset_t *ds;
4457
int error;
4458
uint64_t newval;
4459
4460
if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA)
4461
return (SET_ERROR(ENOTSUP));
4462
4463
error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
4464
if (error != 0)
4465
return (error);
4466
4467
if (ds->ds_is_snapshot) {
4468
dsl_dataset_rele(ds, FTAG);
4469
return (SET_ERROR(EINVAL));
4470
}
4471
4472
error = dsl_prop_predict(ds->ds_dir,
4473
zfs_prop_to_name(ZFS_PROP_REFQUOTA),
4474
ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
4475
if (error != 0) {
4476
dsl_dataset_rele(ds, FTAG);
4477
return (error);
4478
}
4479
4480
if (newval == 0) {
4481
dsl_dataset_rele(ds, FTAG);
4482
return (0);
4483
}
4484
4485
if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes ||
4486
newval < ds->ds_reserved) {
4487
dsl_dataset_rele(ds, FTAG);
4488
return (SET_ERROR(ENOSPC));
4489
}
4490
4491
dsl_dataset_rele(ds, FTAG);
4492
return (0);
4493
}
4494
4495
static void
4496
dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx)
4497
{
4498
dsl_dataset_set_qr_arg_t *ddsqra = arg;
4499
dsl_pool_t *dp = dmu_tx_pool(tx);
4500
dsl_dataset_t *ds = NULL;
4501
uint64_t newval;
4502
4503
VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
4504
4505
dsl_prop_set_sync_impl(ds,
4506
zfs_prop_to_name(ZFS_PROP_REFQUOTA),
4507
ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
4508
&ddsqra->ddsqra_value, tx);
4509
4510
VERIFY0(dsl_prop_get_int_ds(ds,
4511
zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval));
4512
4513
if (ds->ds_quota != newval) {
4514
dmu_buf_will_dirty(ds->ds_dbuf, tx);
4515
ds->ds_quota = newval;
4516
}
4517
dsl_dataset_rele(ds, FTAG);
4518
}
4519
4520
int
4521
dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
4522
uint64_t refquota)
4523
{
4524
dsl_dataset_set_qr_arg_t ddsqra;
4525
4526
ddsqra.ddsqra_name = dsname;
4527
ddsqra.ddsqra_source = source;
4528
ddsqra.ddsqra_value = refquota;
4529
4530
return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check,
4531
dsl_dataset_set_refquota_sync, &ddsqra, 0,
4532
ZFS_SPACE_CHECK_EXTRA_RESERVED));
4533
}
4534
4535
static int
4536
dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx)
4537
{
4538
dsl_dataset_set_qr_arg_t *ddsqra = arg;
4539
dsl_pool_t *dp = dmu_tx_pool(tx);
4540
dsl_dataset_t *ds;
4541
int error;
4542
uint64_t newval, unique;
4543
4544
if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION)
4545
return (SET_ERROR(ENOTSUP));
4546
4547
error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
4548
if (error != 0)
4549
return (error);
4550
4551
if (ds->ds_is_snapshot) {
4552
dsl_dataset_rele(ds, FTAG);
4553
return (SET_ERROR(EINVAL));
4554
}
4555
4556
error = dsl_prop_predict(ds->ds_dir,
4557
zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
4558
ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
4559
if (error != 0) {
4560
dsl_dataset_rele(ds, FTAG);
4561
return (error);
4562
}
4563
4564
/*
4565
* If we are doing the preliminary check in open context, the
4566
* space estimates may be inaccurate.
4567
*/
4568
if (!dmu_tx_is_syncing(tx)) {
4569
dsl_dataset_rele(ds, FTAG);
4570
return (0);
4571
}
4572
4573
mutex_enter(&ds->ds_lock);
4574
if (!DS_UNIQUE_IS_ACCURATE(ds))
4575
dsl_dataset_recalc_head_uniq(ds);
4576
unique = dsl_dataset_phys(ds)->ds_unique_bytes;
4577
mutex_exit(&ds->ds_lock);
4578
4579
if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) {
4580
uint64_t delta = MAX(unique, newval) -
4581
MAX(unique, ds->ds_reserved);
4582
4583
if (delta >
4584
dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) ||
4585
(ds->ds_quota > 0 && newval > ds->ds_quota)) {
4586
dsl_dataset_rele(ds, FTAG);
4587
return (SET_ERROR(ENOSPC));
4588
}
4589
}
4590
4591
dsl_dataset_rele(ds, FTAG);
4592
return (0);
4593
}
4594
4595
void
4596
dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
4597
zprop_source_t source, uint64_t value, dmu_tx_t *tx)
4598
{
4599
uint64_t newval;
4600
uint64_t unique;
4601
int64_t delta;
4602
4603
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
4604
source, sizeof (value), 1, &value, tx);
4605
4606
VERIFY0(dsl_prop_get_int_ds(ds,
4607
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval));
4608
4609
dmu_buf_will_dirty(ds->ds_dbuf, tx);
4610
mutex_enter(&ds->ds_dir->dd_lock);
4611
mutex_enter(&ds->ds_lock);
4612
ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
4613
unique = dsl_dataset_phys(ds)->ds_unique_bytes;
4614
delta = MAX(0, (int64_t)(newval - unique)) -
4615
MAX(0, (int64_t)(ds->ds_reserved - unique));
4616
ds->ds_reserved = newval;
4617
mutex_exit(&ds->ds_lock);
4618
4619
dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
4620
mutex_exit(&ds->ds_dir->dd_lock);
4621
}
4622
4623
static void
4624
dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx)
4625
{
4626
dsl_dataset_set_qr_arg_t *ddsqra = arg;
4627
dsl_pool_t *dp = dmu_tx_pool(tx);
4628
dsl_dataset_t *ds = NULL;
4629
4630
VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
4631
dsl_dataset_set_refreservation_sync_impl(ds,
4632
ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx);
4633
dsl_dataset_rele(ds, FTAG);
4634
}
4635
4636
int
4637
dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
4638
uint64_t refreservation)
4639
{
4640
dsl_dataset_set_qr_arg_t ddsqra;
4641
4642
ddsqra.ddsqra_name = dsname;
4643
ddsqra.ddsqra_source = source;
4644
ddsqra.ddsqra_value = refreservation;
4645
4646
return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check,
4647
dsl_dataset_set_refreservation_sync, &ddsqra, 0,
4648
ZFS_SPACE_CHECK_EXTRA_RESERVED));
4649
}
4650
4651
typedef struct dsl_dataset_set_compression_arg {
4652
const char *ddsca_name;
4653
zprop_source_t ddsca_source;
4654
uint64_t ddsca_value;
4655
} dsl_dataset_set_compression_arg_t;
4656
4657
static int
4658
dsl_dataset_set_compression_check(void *arg, dmu_tx_t *tx)
4659
{
4660
dsl_dataset_set_compression_arg_t *ddsca = arg;
4661
dsl_pool_t *dp = dmu_tx_pool(tx);
4662
4663
uint64_t compval = ZIO_COMPRESS_ALGO(ddsca->ddsca_value);
4664
spa_feature_t f = zio_compress_to_feature(compval);
4665
4666
if (f == SPA_FEATURE_NONE)
4667
return (SET_ERROR(EINVAL));
4668
4669
if (!spa_feature_is_enabled(dp->dp_spa, f))
4670
return (SET_ERROR(ENOTSUP));
4671
4672
return (0);
4673
}
4674
4675
static void
4676
dsl_dataset_set_compression_sync(void *arg, dmu_tx_t *tx)
4677
{
4678
dsl_dataset_set_compression_arg_t *ddsca = arg;
4679
dsl_pool_t *dp = dmu_tx_pool(tx);
4680
dsl_dataset_t *ds = NULL;
4681
4682
uint64_t compval = ZIO_COMPRESS_ALGO(ddsca->ddsca_value);
4683
spa_feature_t f = zio_compress_to_feature(compval);
4684
ASSERT3S(f, !=, SPA_FEATURE_NONE);
4685
ASSERT3S(spa_feature_table[f].fi_type, ==, ZFEATURE_TYPE_BOOLEAN);
4686
4687
VERIFY0(dsl_dataset_hold(dp, ddsca->ddsca_name, FTAG, &ds));
4688
if (zfeature_active(f, ds->ds_feature[f]) != B_TRUE) {
4689
ds->ds_feature_activation[f] = (void *)B_TRUE;
4690
dsl_dataset_activate_feature(ds->ds_object, f,
4691
ds->ds_feature_activation[f], tx);
4692
ds->ds_feature[f] = ds->ds_feature_activation[f];
4693
}
4694
dsl_dataset_rele(ds, FTAG);
4695
}
4696
4697
int
4698
dsl_dataset_set_compression(const char *dsname, zprop_source_t source,
4699
uint64_t compression)
4700
{
4701
dsl_dataset_set_compression_arg_t ddsca;
4702
4703
/*
4704
* The sync task is only required for zstd in order to activate
4705
* the feature flag when the property is first set.
4706
*/
4707
if (ZIO_COMPRESS_ALGO(compression) != ZIO_COMPRESS_ZSTD)
4708
return (0);
4709
4710
ddsca.ddsca_name = dsname;
4711
ddsca.ddsca_source = source;
4712
ddsca.ddsca_value = compression;
4713
4714
return (dsl_sync_task(dsname, dsl_dataset_set_compression_check,
4715
dsl_dataset_set_compression_sync, &ddsca, 0,
4716
ZFS_SPACE_CHECK_EXTRA_RESERVED));
4717
}
4718
4719
/*
4720
* Return (in *usedp) the amount of space referenced by "new" that was not
4721
* referenced at the time the bookmark corresponds to. "New" may be a
4722
* snapshot or a head. The bookmark must be before new, in
4723
* new's filesystem (or its origin) -- caller verifies this.
4724
*
4725
* The written space is calculated by considering two components: First, we
4726
* ignore any freed space, and calculate the written as new's used space
4727
* minus old's used space. Next, we add in the amount of space that was freed
4728
* between the two time points, thus reducing new's used space relative to
4729
* old's. Specifically, this is the space that was born before
4730
* zbm_creation_txg, and freed before new (ie. on new's deadlist or a
4731
* previous deadlist).
4732
*
4733
* space freed [---------------------]
4734
* snapshots ---O-------O--------O-------O------
4735
* bookmark new
4736
*
4737
* Note, the bookmark's zbm_*_bytes_refd must be valid, but if the HAS_FBN
4738
* flag is not set, we will calculate the freed_before_next based on the
4739
* next snapshot's deadlist, rather than using zbm_*_freed_before_next_snap.
4740
*/
4741
static int
4742
dsl_dataset_space_written_impl(zfs_bookmark_phys_t *bmp,
4743
dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
4744
{
4745
int err = 0;
4746
dsl_pool_t *dp = new->ds_dir->dd_pool;
4747
4748
ASSERT(dsl_pool_config_held(dp));
4749
if (dsl_dataset_is_snapshot(new)) {
4750
ASSERT3U(bmp->zbm_creation_txg, <,
4751
dsl_dataset_phys(new)->ds_creation_txg);
4752
}
4753
4754
*usedp = 0;
4755
*usedp += dsl_dataset_phys(new)->ds_referenced_bytes;
4756
*usedp -= bmp->zbm_referenced_bytes_refd;
4757
4758
*compp = 0;
4759
*compp += dsl_dataset_phys(new)->ds_compressed_bytes;
4760
*compp -= bmp->zbm_compressed_bytes_refd;
4761
4762
*uncompp = 0;
4763
*uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes;
4764
*uncompp -= bmp->zbm_uncompressed_bytes_refd;
4765
4766
dsl_dataset_t *snap = new;
4767
4768
while (dsl_dataset_phys(snap)->ds_prev_snap_txg >
4769
bmp->zbm_creation_txg) {
4770
uint64_t used, comp, uncomp;
4771
4772
dsl_deadlist_space_range(&snap->ds_deadlist,
4773
0, bmp->zbm_creation_txg,
4774
&used, &comp, &uncomp);
4775
*usedp += used;
4776
*compp += comp;
4777
*uncompp += uncomp;
4778
4779
uint64_t snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj;
4780
if (snap != new)
4781
dsl_dataset_rele(snap, FTAG);
4782
err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
4783
if (err != 0)
4784
break;
4785
}
4786
4787
/*
4788
* We might not have the FBN if we are calculating written from
4789
* a snapshot (because we didn't know the correct "next" snapshot
4790
* until now).
4791
*/
4792
if (bmp->zbm_flags & ZBM_FLAG_HAS_FBN) {
4793
*usedp += bmp->zbm_referenced_freed_before_next_snap;
4794
*compp += bmp->zbm_compressed_freed_before_next_snap;
4795
*uncompp += bmp->zbm_uncompressed_freed_before_next_snap;
4796
} else {
4797
ASSERT3U(dsl_dataset_phys(snap)->ds_prev_snap_txg, ==,
4798
bmp->zbm_creation_txg);
4799
uint64_t used, comp, uncomp;
4800
dsl_deadlist_space(&snap->ds_deadlist, &used, &comp, &uncomp);
4801
*usedp += used;
4802
*compp += comp;
4803
*uncompp += uncomp;
4804
}
4805
if (snap != new)
4806
dsl_dataset_rele(snap, FTAG);
4807
return (err);
4808
}
4809
4810
/*
4811
* Return (in *usedp) the amount of space written in new that was not
4812
* present at the time the bookmark corresponds to. New may be a
4813
* snapshot or the head. Old must be a bookmark before new, in
4814
* new's filesystem (or its origin) -- caller verifies this.
4815
*/
4816
int
4817
dsl_dataset_space_written_bookmark(zfs_bookmark_phys_t *bmp,
4818
dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
4819
{
4820
if (!(bmp->zbm_flags & ZBM_FLAG_HAS_FBN))
4821
return (SET_ERROR(ENOTSUP));
4822
return (dsl_dataset_space_written_impl(bmp, new,
4823
usedp, compp, uncompp));
4824
}
4825
4826
/*
4827
* Return (in *usedp) the amount of space written in new that is not
4828
* present in oldsnap. New may be a snapshot or the head. Old must be
4829
* a snapshot before new, in new's filesystem (or its origin). If not then
4830
* fail and return EINVAL.
4831
*/
4832
int
4833
dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
4834
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
4835
{
4836
if (!dsl_dataset_is_before(new, oldsnap, 0))
4837
return (SET_ERROR(EINVAL));
4838
4839
zfs_bookmark_phys_t zbm = { 0 };
4840
dsl_dataset_phys_t *dsp = dsl_dataset_phys(oldsnap);
4841
zbm.zbm_guid = dsp->ds_guid;
4842
zbm.zbm_creation_txg = dsp->ds_creation_txg;
4843
zbm.zbm_creation_time = dsp->ds_creation_time;
4844
zbm.zbm_referenced_bytes_refd = dsp->ds_referenced_bytes;
4845
zbm.zbm_compressed_bytes_refd = dsp->ds_compressed_bytes;
4846
zbm.zbm_uncompressed_bytes_refd = dsp->ds_uncompressed_bytes;
4847
4848
/*
4849
* If oldsnap is the origin (or origin's origin, ...) of new,
4850
* we can't easily calculate the effective FBN. Therefore,
4851
* we do not set ZBM_FLAG_HAS_FBN, so that the _impl will calculate
4852
* it relative to the correct "next": the next snapshot towards "new",
4853
* rather than the next snapshot in oldsnap's dsl_dir.
4854
*/
4855
return (dsl_dataset_space_written_impl(&zbm, new,
4856
usedp, compp, uncompp));
4857
}
4858
4859
/*
4860
* Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
4861
* lastsnap, and all snapshots in between are deleted.
4862
*
4863
* blocks that would be freed [---------------------------]
4864
* snapshots ---O-------O--------O-------O--------O
4865
* firstsnap lastsnap
4866
*
4867
* This is the set of blocks that were born after the snap before firstsnap,
4868
* (birth > firstsnap->prev_snap_txg) and died before the snap after the
4869
* last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
4870
* We calculate this by iterating over the relevant deadlists (from the snap
4871
* after lastsnap, backward to the snap after firstsnap), summing up the
4872
* space on the deadlist that was born after the snap before firstsnap.
4873
*/
4874
int
4875
dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
4876
dsl_dataset_t *lastsnap,
4877
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
4878
{
4879
int err = 0;
4880
uint64_t snapobj;
4881
dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
4882
4883
ASSERT(firstsnap->ds_is_snapshot);
4884
ASSERT(lastsnap->ds_is_snapshot);
4885
4886
/*
4887
* Check that the snapshots are in the same dsl_dir, and firstsnap
4888
* is before lastsnap.
4889
*/
4890
if (firstsnap->ds_dir != lastsnap->ds_dir ||
4891
dsl_dataset_phys(firstsnap)->ds_creation_txg >
4892
dsl_dataset_phys(lastsnap)->ds_creation_txg)
4893
return (SET_ERROR(EINVAL));
4894
4895
*usedp = *compp = *uncompp = 0;
4896
4897
snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj;
4898
while (snapobj != firstsnap->ds_object) {
4899
dsl_dataset_t *ds;
4900
uint64_t used, comp, uncomp;
4901
4902
err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
4903
if (err != 0)
4904
break;
4905
4906
dsl_deadlist_space_range(&ds->ds_deadlist,
4907
dsl_dataset_phys(firstsnap)->ds_prev_snap_txg, UINT64_MAX,
4908
&used, &comp, &uncomp);
4909
*usedp += used;
4910
*compp += comp;
4911
*uncompp += uncomp;
4912
4913
snapobj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
4914
ASSERT3U(snapobj, !=, 0);
4915
dsl_dataset_rele(ds, FTAG);
4916
}
4917
return (err);
4918
}
4919
4920
/*
4921
* Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
4922
* For example, they could both be snapshots of the same filesystem, and
4923
* 'earlier' is before 'later'. Or 'earlier' could be the origin of
4924
* 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's
4925
* filesystem. Or 'earlier' could be the origin's origin.
4926
*
4927
* If non-zero, earlier_txg is used instead of earlier's ds_creation_txg.
4928
*/
4929
boolean_t
4930
dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
4931
uint64_t earlier_txg)
4932
{
4933
dsl_pool_t *dp = later->ds_dir->dd_pool;
4934
int error;
4935
boolean_t ret;
4936
4937
ASSERT(dsl_pool_config_held(dp));
4938
ASSERT(earlier->ds_is_snapshot || earlier_txg != 0);
4939
4940
if (earlier_txg == 0)
4941
earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg;
4942
4943
if (later->ds_is_snapshot &&
4944
earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg)
4945
return (B_FALSE);
4946
4947
if (later->ds_dir == earlier->ds_dir)
4948
return (B_TRUE);
4949
4950
/*
4951
* We check dd_origin_obj explicitly here rather than using
4952
* dsl_dir_is_clone() so that we will return TRUE if "earlier"
4953
* is $ORIGIN@$ORIGIN. dsl_dataset_space_written() depends on
4954
* this behavior.
4955
*/
4956
if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == 0)
4957
return (B_FALSE);
4958
4959
dsl_dataset_t *origin;
4960
error = dsl_dataset_hold_obj(dp,
4961
dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin);
4962
if (error != 0)
4963
return (B_FALSE);
4964
if (dsl_dataset_phys(origin)->ds_creation_txg == earlier_txg &&
4965
origin->ds_dir == earlier->ds_dir) {
4966
dsl_dataset_rele(origin, FTAG);
4967
return (B_TRUE);
4968
}
4969
ret = dsl_dataset_is_before(origin, earlier, earlier_txg);
4970
dsl_dataset_rele(origin, FTAG);
4971
return (ret);
4972
}
4973
4974
void
4975
dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx)
4976
{
4977
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
4978
dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx);
4979
}
4980
4981
boolean_t
4982
dsl_dataset_is_zapified(dsl_dataset_t *ds)
4983
{
4984
dmu_object_info_t doi;
4985
4986
dmu_object_info_from_db(ds->ds_dbuf, &doi);
4987
return (doi.doi_type == DMU_OTN_ZAP_METADATA);
4988
}
4989
4990
boolean_t
4991
dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds)
4992
{
4993
return (dsl_dataset_is_zapified(ds) &&
4994
zap_contains(ds->ds_dir->dd_pool->dp_meta_objset,
4995
ds->ds_object, DS_FIELD_RESUME_TOGUID) == 0);
4996
}
4997
4998
uint64_t
4999
dsl_dataset_get_remap_deadlist_object(dsl_dataset_t *ds)
5000
{
5001
uint64_t remap_deadlist_obj;
5002
int err;
5003
5004
if (!dsl_dataset_is_zapified(ds))
5005
return (0);
5006
5007
err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_object,
5008
DS_FIELD_REMAP_DEADLIST, sizeof (remap_deadlist_obj), 1,
5009
&remap_deadlist_obj);
5010
5011
if (err != 0) {
5012
VERIFY3S(err, ==, ENOENT);
5013
return (0);
5014
}
5015
5016
ASSERT(remap_deadlist_obj != 0);
5017
return (remap_deadlist_obj);
5018
}
5019
5020
boolean_t
5021
dsl_dataset_remap_deadlist_exists(dsl_dataset_t *ds)
5022
{
5023
EQUIV(dsl_deadlist_is_open(&ds->ds_remap_deadlist),
5024
dsl_dataset_get_remap_deadlist_object(ds) != 0);
5025
return (dsl_deadlist_is_open(&ds->ds_remap_deadlist));
5026
}
5027
5028
static void
5029
dsl_dataset_set_remap_deadlist_object(dsl_dataset_t *ds, uint64_t obj,
5030
dmu_tx_t *tx)
5031
{
5032
ASSERT(obj != 0);
5033
dsl_dataset_zapify(ds, tx);
5034
VERIFY0(zap_add(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_object,
5035
DS_FIELD_REMAP_DEADLIST, sizeof (obj), 1, &obj, tx));
5036
}
5037
5038
static void
5039
dsl_dataset_unset_remap_deadlist_object(dsl_dataset_t *ds, dmu_tx_t *tx)
5040
{
5041
VERIFY0(zap_remove(ds->ds_dir->dd_pool->dp_meta_objset,
5042
ds->ds_object, DS_FIELD_REMAP_DEADLIST, tx));
5043
}
5044
5045
void
5046
dsl_dataset_destroy_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx)
5047
{
5048
uint64_t remap_deadlist_object;
5049
spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
5050
5051
ASSERT(dmu_tx_is_syncing(tx));
5052
ASSERT(dsl_dataset_remap_deadlist_exists(ds));
5053
5054
remap_deadlist_object = ds->ds_remap_deadlist.dl_object;
5055
dsl_deadlist_close(&ds->ds_remap_deadlist);
5056
dsl_deadlist_free(spa_meta_objset(spa), remap_deadlist_object, tx);
5057
dsl_dataset_unset_remap_deadlist_object(ds, tx);
5058
spa_feature_decr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
5059
}
5060
5061
void
5062
dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx)
5063
{
5064
uint64_t remap_deadlist_obj;
5065
spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
5066
5067
ASSERT(dmu_tx_is_syncing(tx));
5068
ASSERT(MUTEX_HELD(&ds->ds_remap_deadlist_lock));
5069
/*
5070
* Currently we only create remap deadlists when there are indirect
5071
* vdevs with referenced mappings.
5072
*/
5073
ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REMOVAL));
5074
5075
remap_deadlist_obj = dsl_deadlist_clone(
5076
&ds->ds_deadlist, UINT64_MAX,
5077
dsl_dataset_phys(ds)->ds_prev_snap_obj, tx);
5078
dsl_dataset_set_remap_deadlist_object(ds,
5079
remap_deadlist_obj, tx);
5080
VERIFY0(dsl_deadlist_open(&ds->ds_remap_deadlist, spa_meta_objset(spa),
5081
remap_deadlist_obj));
5082
spa_feature_incr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
5083
}
5084
5085
void
5086
dsl_dataset_activate_redaction(dsl_dataset_t *ds, uint64_t *redact_snaps,
5087
uint64_t num_redact_snaps, dmu_tx_t *tx)
5088
{
5089
uint64_t dsobj = ds->ds_object;
5090
struct feature_type_uint64_array_arg *ftuaa =
5091
kmem_zalloc(sizeof (*ftuaa), KM_SLEEP);
5092
ftuaa->length = (int64_t)num_redact_snaps;
5093
if (num_redact_snaps > 0) {
5094
ftuaa->array = kmem_alloc(num_redact_snaps * sizeof (uint64_t),
5095
KM_SLEEP);
5096
memcpy(ftuaa->array, redact_snaps, num_redact_snaps *
5097
sizeof (uint64_t));
5098
}
5099
dsl_dataset_activate_feature(dsobj, SPA_FEATURE_REDACTED_DATASETS,
5100
ftuaa, tx);
5101
ds->ds_feature[SPA_FEATURE_REDACTED_DATASETS] = ftuaa;
5102
}
5103
5104
/*
5105
* Find and return (in *oldest_dsobj) the oldest snapshot of the dsobj
5106
* dataset whose birth time is >= min_txg.
5107
*/
5108
int
5109
dsl_dataset_oldest_snapshot(spa_t *spa, uint64_t head_ds, uint64_t min_txg,
5110
uint64_t *oldest_dsobj)
5111
{
5112
dsl_dataset_t *ds;
5113
dsl_pool_t *dp = spa->spa_dsl_pool;
5114
5115
int error = dsl_dataset_hold_obj(dp, head_ds, FTAG, &ds);
5116
if (error != 0)
5117
return (error);
5118
5119
uint64_t prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
5120
uint64_t prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
5121
5122
while (prev_obj != 0 && min_txg < prev_obj_txg) {
5123
dsl_dataset_rele(ds, FTAG);
5124
if ((error = dsl_dataset_hold_obj(dp, prev_obj,
5125
FTAG, &ds)) != 0)
5126
return (error);
5127
prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
5128
prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
5129
}
5130
*oldest_dsobj = ds->ds_object;
5131
dsl_dataset_rele(ds, FTAG);
5132
return (0);
5133
}
5134
5135
ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, UINT, ZMOD_RW,
5136
"Max allowed record size");
5137
5138
ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW,
5139
"Allow mounting of redacted datasets");
5140
5141
ZFS_MODULE_PARAM(zfs, zfs_, snapshot_history_enabled, INT, ZMOD_RW,
5142
"Include snapshot events in pool history/events");
5143
5144
EXPORT_SYMBOL(dsl_dataset_hold);
5145
EXPORT_SYMBOL(dsl_dataset_hold_flags);
5146
EXPORT_SYMBOL(dsl_dataset_hold_obj);
5147
EXPORT_SYMBOL(dsl_dataset_hold_obj_flags);
5148
EXPORT_SYMBOL(dsl_dataset_own);
5149
EXPORT_SYMBOL(dsl_dataset_own_obj);
5150
EXPORT_SYMBOL(dsl_dataset_name);
5151
EXPORT_SYMBOL(dsl_dataset_rele);
5152
EXPORT_SYMBOL(dsl_dataset_rele_flags);
5153
EXPORT_SYMBOL(dsl_dataset_disown);
5154
EXPORT_SYMBOL(dsl_dataset_tryown);
5155
EXPORT_SYMBOL(dsl_dataset_create_sync);
5156
EXPORT_SYMBOL(dsl_dataset_create_sync_dd);
5157
EXPORT_SYMBOL(dsl_dataset_snapshot_check);
5158
EXPORT_SYMBOL(dsl_dataset_snapshot_sync);
5159
EXPORT_SYMBOL(dsl_dataset_promote);
5160
EXPORT_SYMBOL(dsl_dataset_user_hold);
5161
EXPORT_SYMBOL(dsl_dataset_user_release);
5162
EXPORT_SYMBOL(dsl_dataset_get_holds);
5163
EXPORT_SYMBOL(dsl_dataset_get_blkptr);
5164
EXPORT_SYMBOL(dsl_dataset_get_spa);
5165
EXPORT_SYMBOL(dsl_dataset_modified_since_snap);
5166
EXPORT_SYMBOL(dsl_dataset_space_written);
5167
EXPORT_SYMBOL(dsl_dataset_space_wouldfree);
5168
EXPORT_SYMBOL(dsl_dataset_sync);
5169
EXPORT_SYMBOL(dsl_dataset_block_born);
5170
EXPORT_SYMBOL(dsl_dataset_block_kill);
5171
EXPORT_SYMBOL(dsl_dataset_dirty);
5172
EXPORT_SYMBOL(dsl_dataset_stats);
5173
EXPORT_SYMBOL(dsl_dataset_fast_stat);
5174
EXPORT_SYMBOL(dsl_dataset_space);
5175
EXPORT_SYMBOL(dsl_dataset_fsid_guid);
5176
EXPORT_SYMBOL(dsl_dsobj_to_dsname);
5177
EXPORT_SYMBOL(dsl_dataset_check_quota);
5178
EXPORT_SYMBOL(dsl_dataset_clone_swap_check_impl);
5179
EXPORT_SYMBOL(dsl_dataset_clone_swap_sync_impl);
5180
5181