Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/openzfs/cmd/zhack.c
48259 views
1
// SPDX-License-Identifier: CDDL-1.0
2
/*
3
* CDDL HEADER START
4
*
5
* The contents of this file are subject to the terms of the
6
* Common Development and Distribution License (the "License").
7
* You may not use this file except in compliance with the License.
8
*
9
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10
* or https://opensource.org/licenses/CDDL-1.0.
11
* See the License for the specific language governing permissions
12
* and limitations under the License.
13
*
14
* When distributing Covered Code, include this CDDL HEADER in each
15
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16
* If applicable, add the following below this CDDL HEADER, with the
17
* fields enclosed by brackets "[]" replaced with your own identifying
18
* information: Portions Copyright [yyyy] [name of copyright owner]
19
*
20
* CDDL HEADER END
21
*/
22
23
/*
24
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25
* Copyright (c) 2013 Steven Hartland. All rights reserved.
26
*/
27
28
/*
29
* zhack is a debugging tool that can write changes to ZFS pool using libzpool
30
* for testing purposes. Altering pools with zhack is unsupported and may
31
* result in corrupted pools.
32
*/
33
34
#include <zfs_prop.h>
35
#include <stdio.h>
36
#include <stdlib.h>
37
#include <ctype.h>
38
#include <sys/stat.h>
39
#include <sys/zfs_context.h>
40
#include <sys/spa.h>
41
#include <sys/spa_impl.h>
42
#include <sys/dmu.h>
43
#include <sys/zap.h>
44
#include <sys/zfs_znode.h>
45
#include <sys/dsl_synctask.h>
46
#include <sys/vdev.h>
47
#include <sys/vdev_impl.h>
48
#include <sys/fs/zfs.h>
49
#include <sys/dmu_objset.h>
50
#include <sys/dsl_pool.h>
51
#include <sys/zio_checksum.h>
52
#include <sys/zio_compress.h>
53
#include <sys/zfeature.h>
54
#include <sys/dmu_tx.h>
55
#include <zfeature_common.h>
56
#include <libzutil.h>
57
#include <sys/metaslab_impl.h>
58
59
static importargs_t g_importargs;
60
static char *g_pool;
61
static boolean_t g_readonly;
62
63
typedef enum {
64
ZHACK_REPAIR_OP_UNKNOWN = 0,
65
ZHACK_REPAIR_OP_CKSUM = (1 << 0),
66
ZHACK_REPAIR_OP_UNDETACH = (1 << 1)
67
} zhack_repair_op_t;
68
69
static __attribute__((noreturn)) void
70
usage(void)
71
{
72
(void) fprintf(stderr,
73
"Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> "
74
"<args> ...\n"
75
"where <subcommand> <args> is one of the following:\n"
76
"\n");
77
78
(void) fprintf(stderr,
79
" feature stat <pool>\n"
80
" print information about enabled features\n"
81
" feature enable [-r] [-d desc] <pool> <feature>\n"
82
" add a new enabled feature to the pool\n"
83
" -d <desc> sets the feature's description\n"
84
" -r set read-only compatible flag for feature\n"
85
" feature ref [-md] <pool> <feature>\n"
86
" change the refcount on the given feature\n"
87
" -d decrease instead of increase the refcount\n"
88
" -m add the feature to the label if increasing refcount\n"
89
"\n"
90
" <feature> : should be a feature guid\n"
91
"\n"
92
" label repair <device>\n"
93
" repair labels of a specified device according to options\n"
94
" which may be combined to do their functions in one call\n"
95
" -c repair corrupted label checksums\n"
96
" -u restore the label on a detached device\n"
97
"\n"
98
" <device> : path to vdev\n"
99
"\n"
100
" metaslab leak <pool>\n"
101
" apply allocation map from zdb to specified pool\n");
102
exit(1);
103
}
104
105
106
static __attribute__((format(printf, 3, 4))) __attribute__((noreturn)) void
107
fatal(spa_t *spa, const void *tag, const char *fmt, ...)
108
{
109
va_list ap;
110
111
if (spa != NULL) {
112
spa_close(spa, tag);
113
(void) spa_export(g_pool, NULL, B_TRUE, B_FALSE);
114
}
115
116
va_start(ap, fmt);
117
(void) fputs("zhack: ", stderr);
118
(void) vfprintf(stderr, fmt, ap);
119
va_end(ap);
120
(void) fputc('\n', stderr);
121
122
exit(1);
123
}
124
125
static int
126
space_delta_cb(dmu_object_type_t bonustype, const void *data,
127
zfs_file_info_t *zoi)
128
{
129
(void) data, (void) zoi;
130
131
/*
132
* Is it a valid type of object to track?
133
*/
134
if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
135
return (ENOENT);
136
(void) fprintf(stderr, "modifying object that needs user accounting");
137
abort();
138
}
139
140
/*
141
* Target is the dataset whose pool we want to open.
142
*/
143
static void
144
zhack_import(char *target, boolean_t readonly)
145
{
146
nvlist_t *config;
147
nvlist_t *props;
148
int error;
149
150
kernel_init(readonly ? SPA_MODE_READ :
151
(SPA_MODE_READ | SPA_MODE_WRITE));
152
153
dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
154
155
g_readonly = readonly;
156
g_importargs.can_be_active = readonly;
157
g_pool = strdup(target);
158
159
libpc_handle_t lpch = {
160
.lpc_lib_handle = NULL,
161
.lpc_ops = &libzpool_config_ops,
162
.lpc_printerr = B_TRUE
163
};
164
error = zpool_find_config(&lpch, target, &config, &g_importargs);
165
if (error)
166
fatal(NULL, FTAG, "cannot import '%s'", target);
167
168
props = NULL;
169
if (readonly) {
170
VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
171
VERIFY0(nvlist_add_uint64(props,
172
zpool_prop_to_name(ZPOOL_PROP_READONLY), 1));
173
}
174
175
zfeature_checks_disable = B_TRUE;
176
error = spa_import(target, config, props,
177
(readonly ? ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL));
178
fnvlist_free(config);
179
zfeature_checks_disable = B_FALSE;
180
if (error == EEXIST)
181
error = 0;
182
183
if (error)
184
fatal(NULL, FTAG, "can't import '%s': %s", target,
185
strerror(error));
186
}
187
188
static void
189
zhack_spa_open(char *target, boolean_t readonly, const void *tag, spa_t **spa)
190
{
191
int err;
192
193
zhack_import(target, readonly);
194
195
zfeature_checks_disable = B_TRUE;
196
err = spa_open(target, spa, tag);
197
zfeature_checks_disable = B_FALSE;
198
199
if (err != 0)
200
fatal(*spa, FTAG, "cannot open '%s': %s", target,
201
strerror(err));
202
if (spa_version(*spa) < SPA_VERSION_FEATURES) {
203
fatal(*spa, FTAG, "'%s' has version %d, features not enabled",
204
target, (int)spa_version(*spa));
205
}
206
}
207
208
static void
209
dump_obj(objset_t *os, uint64_t obj, const char *name)
210
{
211
zap_cursor_t zc;
212
zap_attribute_t *za = zap_attribute_long_alloc();
213
214
(void) printf("%s_obj:\n", name);
215
216
for (zap_cursor_init(&zc, os, obj);
217
zap_cursor_retrieve(&zc, za) == 0;
218
zap_cursor_advance(&zc)) {
219
if (za->za_integer_length == 8) {
220
ASSERT(za->za_num_integers == 1);
221
(void) printf("\t%s = %llu\n",
222
za->za_name, (u_longlong_t)za->za_first_integer);
223
} else {
224
ASSERT(za->za_integer_length == 1);
225
char val[1024];
226
VERIFY0(zap_lookup(os, obj, za->za_name,
227
1, sizeof (val), val));
228
(void) printf("\t%s = %s\n", za->za_name, val);
229
}
230
}
231
zap_cursor_fini(&zc);
232
zap_attribute_free(za);
233
}
234
235
static void
236
dump_mos(spa_t *spa)
237
{
238
nvlist_t *nv = spa->spa_label_features;
239
nvpair_t *pair;
240
241
(void) printf("label config:\n");
242
for (pair = nvlist_next_nvpair(nv, NULL);
243
pair != NULL;
244
pair = nvlist_next_nvpair(nv, pair)) {
245
(void) printf("\t%s\n", nvpair_name(pair));
246
}
247
}
248
249
static void
250
zhack_do_feature_stat(int argc, char **argv)
251
{
252
spa_t *spa;
253
objset_t *os;
254
char *target;
255
256
argc--;
257
argv++;
258
259
if (argc < 1) {
260
(void) fprintf(stderr, "error: missing pool name\n");
261
usage();
262
}
263
target = argv[0];
264
265
zhack_spa_open(target, B_TRUE, FTAG, &spa);
266
os = spa->spa_meta_objset;
267
268
dump_obj(os, spa->spa_feat_for_read_obj, "for_read");
269
dump_obj(os, spa->spa_feat_for_write_obj, "for_write");
270
dump_obj(os, spa->spa_feat_desc_obj, "descriptions");
271
if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
272
dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg");
273
}
274
dump_mos(spa);
275
276
spa_close(spa, FTAG);
277
}
278
279
static void
280
zhack_feature_enable_sync(void *arg, dmu_tx_t *tx)
281
{
282
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
283
zfeature_info_t *feature = arg;
284
285
feature_enable_sync(spa, feature, tx);
286
287
spa_history_log_internal(spa, "zhack enable feature", tx,
288
"name=%s flags=%u",
289
feature->fi_guid, feature->fi_flags);
290
}
291
292
static void
293
zhack_do_feature_enable(int argc, char **argv)
294
{
295
int c;
296
char *desc, *target;
297
spa_t *spa;
298
objset_t *mos;
299
zfeature_info_t feature;
300
const spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
301
302
/*
303
* Features are not added to the pool's label until their refcounts
304
* are incremented, so fi_mos can just be left as false for now.
305
*/
306
desc = NULL;
307
feature.fi_uname = "zhack";
308
feature.fi_flags = 0;
309
feature.fi_depends = nodeps;
310
feature.fi_feature = SPA_FEATURE_NONE;
311
312
optind = 1;
313
while ((c = getopt(argc, argv, "+rd:")) != -1) {
314
switch (c) {
315
case 'r':
316
feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
317
break;
318
case 'd':
319
if (desc != NULL)
320
free(desc);
321
desc = strdup(optarg);
322
break;
323
default:
324
usage();
325
break;
326
}
327
}
328
329
if (desc == NULL)
330
desc = strdup("zhack injected");
331
feature.fi_desc = desc;
332
333
argc -= optind;
334
argv += optind;
335
336
if (argc < 2) {
337
(void) fprintf(stderr, "error: missing feature or pool name\n");
338
usage();
339
}
340
target = argv[0];
341
feature.fi_guid = argv[1];
342
343
if (!zfeature_is_valid_guid(feature.fi_guid))
344
fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
345
346
zhack_spa_open(target, B_FALSE, FTAG, &spa);
347
mos = spa->spa_meta_objset;
348
349
if (zfeature_is_supported(feature.fi_guid))
350
fatal(spa, FTAG, "'%s' is a real feature, will not enable",
351
feature.fi_guid);
352
if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
353
fatal(spa, FTAG, "feature already enabled: %s",
354
feature.fi_guid);
355
356
VERIFY0(dsl_sync_task(spa_name(spa), NULL,
357
zhack_feature_enable_sync, &feature, 5, ZFS_SPACE_CHECK_NORMAL));
358
359
spa_close(spa, FTAG);
360
361
free(desc);
362
}
363
364
static void
365
feature_incr_sync(void *arg, dmu_tx_t *tx)
366
{
367
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
368
zfeature_info_t *feature = arg;
369
uint64_t refcount;
370
371
mutex_enter(&spa->spa_feat_stats_lock);
372
VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
373
feature_sync(spa, feature, refcount + 1, tx);
374
spa_history_log_internal(spa, "zhack feature incr", tx,
375
"name=%s", feature->fi_guid);
376
mutex_exit(&spa->spa_feat_stats_lock);
377
}
378
379
static void
380
feature_decr_sync(void *arg, dmu_tx_t *tx)
381
{
382
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
383
zfeature_info_t *feature = arg;
384
uint64_t refcount;
385
386
mutex_enter(&spa->spa_feat_stats_lock);
387
VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
388
feature_sync(spa, feature, refcount - 1, tx);
389
spa_history_log_internal(spa, "zhack feature decr", tx,
390
"name=%s", feature->fi_guid);
391
mutex_exit(&spa->spa_feat_stats_lock);
392
}
393
394
static void
395
zhack_do_feature_ref(int argc, char **argv)
396
{
397
int c;
398
char *target;
399
boolean_t decr = B_FALSE;
400
spa_t *spa;
401
objset_t *mos;
402
zfeature_info_t feature;
403
const spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
404
405
/*
406
* fi_desc does not matter here because it was written to disk
407
* when the feature was enabled, but we need to properly set the
408
* feature for read or write based on the information we read off
409
* disk later.
410
*/
411
feature.fi_uname = "zhack";
412
feature.fi_flags = 0;
413
feature.fi_desc = NULL;
414
feature.fi_depends = nodeps;
415
feature.fi_feature = SPA_FEATURE_NONE;
416
417
optind = 1;
418
while ((c = getopt(argc, argv, "+md")) != -1) {
419
switch (c) {
420
case 'm':
421
feature.fi_flags |= ZFEATURE_FLAG_MOS;
422
break;
423
case 'd':
424
decr = B_TRUE;
425
break;
426
default:
427
usage();
428
break;
429
}
430
}
431
argc -= optind;
432
argv += optind;
433
434
if (argc < 2) {
435
(void) fprintf(stderr, "error: missing feature or pool name\n");
436
usage();
437
}
438
target = argv[0];
439
feature.fi_guid = argv[1];
440
441
if (!zfeature_is_valid_guid(feature.fi_guid))
442
fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
443
444
zhack_spa_open(target, B_FALSE, FTAG, &spa);
445
mos = spa->spa_meta_objset;
446
447
if (zfeature_is_supported(feature.fi_guid)) {
448
fatal(spa, FTAG,
449
"'%s' is a real feature, will not change refcount",
450
feature.fi_guid);
451
}
452
453
if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
454
feature.fi_guid)) {
455
feature.fi_flags &= ~ZFEATURE_FLAG_READONLY_COMPAT;
456
} else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
457
feature.fi_guid)) {
458
feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
459
} else {
460
fatal(spa, FTAG, "feature is not enabled: %s", feature.fi_guid);
461
}
462
463
if (decr) {
464
uint64_t count;
465
if (feature_get_refcount_from_disk(spa, &feature,
466
&count) == 0 && count == 0) {
467
fatal(spa, FTAG, "feature refcount already 0: %s",
468
feature.fi_guid);
469
}
470
}
471
472
VERIFY0(dsl_sync_task(spa_name(spa), NULL,
473
decr ? feature_decr_sync : feature_incr_sync, &feature,
474
5, ZFS_SPACE_CHECK_NORMAL));
475
476
spa_close(spa, FTAG);
477
}
478
479
static int
480
zhack_do_feature(int argc, char **argv)
481
{
482
char *subcommand;
483
484
argc--;
485
argv++;
486
if (argc == 0) {
487
(void) fprintf(stderr,
488
"error: no feature operation specified\n");
489
usage();
490
}
491
492
subcommand = argv[0];
493
if (strcmp(subcommand, "stat") == 0) {
494
zhack_do_feature_stat(argc, argv);
495
} else if (strcmp(subcommand, "enable") == 0) {
496
zhack_do_feature_enable(argc, argv);
497
} else if (strcmp(subcommand, "ref") == 0) {
498
zhack_do_feature_ref(argc, argv);
499
} else {
500
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
501
subcommand);
502
usage();
503
}
504
505
return (0);
506
}
507
508
static boolean_t
509
strstarts(const char *a, const char *b)
510
{
511
return (strncmp(a, b, strlen(b)) == 0);
512
}
513
514
static void
515
metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size,
516
dmu_tx_t *tx)
517
{
518
ASSERT(msp->ms_disabled);
519
ASSERT(MUTEX_HELD(&msp->ms_lock));
520
uint64_t txg = dmu_tx_get_txg(tx);
521
522
uint64_t off = start;
523
while (off < start + size) {
524
uint64_t ostart, osize;
525
boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable,
526
off, start + size - off, &ostart, &osize);
527
if (!found)
528
break;
529
zfs_range_tree_remove(msp->ms_allocatable, ostart, osize);
530
531
if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
532
vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp,
533
txg);
534
535
zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart,
536
osize);
537
msp->ms_allocating_total += osize;
538
off = ostart + osize;
539
}
540
}
541
542
static void
543
zhack_do_metaslab_leak(int argc, char **argv)
544
{
545
int c;
546
char *target;
547
spa_t *spa;
548
549
optind = 1;
550
boolean_t force = B_FALSE;
551
while ((c = getopt(argc, argv, "f")) != -1) {
552
switch (c) {
553
case 'f':
554
force = B_TRUE;
555
break;
556
default:
557
usage();
558
break;
559
}
560
}
561
562
argc -= optind;
563
argv += optind;
564
565
if (argc < 1) {
566
(void) fprintf(stderr, "error: missing pool name\n");
567
usage();
568
}
569
target = argv[0];
570
571
zhack_spa_open(target, B_FALSE, FTAG, &spa);
572
spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER);
573
574
char *line = NULL;
575
size_t cap = 0;
576
577
vdev_t *vd = NULL;
578
metaslab_t *prev = NULL;
579
dmu_tx_t *tx = NULL;
580
while (getline(&line, &cap, stdin) > 0) {
581
if (strstarts(line, "\tvdev ")) {
582
uint64_t vdev_id, ms_shift;
583
if (sscanf(line,
584
"\tvdev %10"PRIu64"\t%*s metaslab shift %4"PRIu64,
585
&vdev_id, &ms_shift) == 1) {
586
VERIFY3U(sscanf(line, "\tvdev %"PRIu64
587
"\t metaslab shift %4"PRIu64,
588
&vdev_id, &ms_shift), ==, 2);
589
}
590
vd = vdev_lookup_top(spa, vdev_id);
591
if (vd == NULL) {
592
fprintf(stderr, "error: no such vdev with "
593
"id %"PRIu64"\n", vdev_id);
594
break;
595
}
596
if (tx) {
597
dmu_tx_commit(tx);
598
mutex_exit(&prev->ms_lock);
599
metaslab_enable(prev, B_FALSE, B_FALSE);
600
tx = NULL;
601
prev = NULL;
602
}
603
if (vd->vdev_ms_shift != ms_shift) {
604
fprintf(stderr, "error: ms_shift mismatch: %"
605
PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift,
606
ms_shift);
607
break;
608
}
609
} else if (strstarts(line, "\tmetaslabs ")) {
610
uint64_t ms_count;
611
VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count),
612
==, 1);
613
ASSERT(vd);
614
if (!force && vd->vdev_ms_count != ms_count) {
615
fprintf(stderr, "error: ms_count mismatch: %"
616
PRIu64" != %"PRIu64"\n", vd->vdev_ms_count,
617
ms_count);
618
break;
619
}
620
} else if (strstarts(line, "ALLOC:")) {
621
uint64_t start, size;
622
VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n",
623
&start, &size), ==, 2);
624
625
ASSERT(vd);
626
metaslab_t *cur =
627
vd->vdev_ms[start >> vd->vdev_ms_shift];
628
if (prev != cur) {
629
if (prev) {
630
dmu_tx_commit(tx);
631
mutex_exit(&prev->ms_lock);
632
metaslab_enable(prev, B_FALSE, B_FALSE);
633
}
634
ASSERT(cur);
635
metaslab_disable(cur);
636
mutex_enter(&cur->ms_lock);
637
metaslab_load(cur);
638
prev = cur;
639
tx = dmu_tx_create_dd(
640
spa_get_dsl(vd->vdev_spa)->dp_root_dir);
641
dmu_tx_assign(tx, DMU_TX_WAIT);
642
}
643
644
metaslab_force_alloc(cur, start, size, tx);
645
} else {
646
continue;
647
}
648
}
649
if (tx) {
650
dmu_tx_commit(tx);
651
mutex_exit(&prev->ms_lock);
652
metaslab_enable(prev, B_FALSE, B_FALSE);
653
tx = NULL;
654
prev = NULL;
655
}
656
if (line)
657
free(line);
658
659
spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG);
660
spa_close(spa, FTAG);
661
}
662
663
static int
664
zhack_do_metaslab(int argc, char **argv)
665
{
666
char *subcommand;
667
668
argc--;
669
argv++;
670
if (argc == 0) {
671
(void) fprintf(stderr,
672
"error: no metaslab operation specified\n");
673
usage();
674
}
675
676
subcommand = argv[0];
677
if (strcmp(subcommand, "leak") == 0) {
678
zhack_do_metaslab_leak(argc, argv);
679
} else {
680
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
681
subcommand);
682
usage();
683
}
684
685
return (0);
686
}
687
688
#define ASHIFT_UBERBLOCK_SHIFT(ashift) \
689
MIN(MAX(ashift, UBERBLOCK_SHIFT), \
690
MAX_UBERBLOCK_SHIFT)
691
#define ASHIFT_UBERBLOCK_SIZE(ashift) \
692
(1ULL << ASHIFT_UBERBLOCK_SHIFT(ashift))
693
694
#define REPAIR_LABEL_STATUS_CKSUM (1 << 0)
695
#define REPAIR_LABEL_STATUS_UB (1 << 1)
696
697
static int
698
zhack_repair_read_label(const int fd, vdev_label_t *vl,
699
const uint64_t label_offset, const int l)
700
{
701
const int err = pread64(fd, vl, sizeof (vdev_label_t), label_offset);
702
703
if (err == -1) {
704
(void) fprintf(stderr,
705
"error: cannot read label %d: %s\n",
706
l, strerror(errno));
707
return (err);
708
} else if (err != sizeof (vdev_label_t)) {
709
(void) fprintf(stderr,
710
"error: bad label %d read size\n", l);
711
return (err);
712
}
713
714
return (0);
715
}
716
717
static int
718
zhack_repair_get_byteswap(const zio_eck_t *vdev_eck, const int l, int *byteswap)
719
{
720
if (vdev_eck->zec_magic == ZEC_MAGIC) {
721
*byteswap = B_FALSE;
722
} else if (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)) {
723
*byteswap = B_TRUE;
724
} else {
725
(void) fprintf(stderr, "error: label %d: "
726
"Expected the nvlist checksum magic number but instead got "
727
"0x%" PRIx64 "\n",
728
l, vdev_eck->zec_magic);
729
return (1);
730
}
731
return (0);
732
}
733
734
static void
735
zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset,
736
const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum)
737
{
738
zio_cksum_t verifier;
739
zio_cksum_t current_cksum;
740
zio_checksum_info_t *ci;
741
abd_t *abd;
742
743
ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0);
744
745
if (byteswap)
746
byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
747
748
current_cksum = eck->zec_cksum;
749
eck->zec_cksum = verifier;
750
751
ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
752
abd = abd_get_from_buf(data, abdsize);
753
ci->ci_func[byteswap](abd, abdsize, NULL, cksum);
754
abd_free(abd);
755
756
eck->zec_cksum = current_cksum;
757
}
758
759
static int
760
zhack_repair_get_ashift(nvlist_t *cfg, const int l, uint64_t *ashift)
761
{
762
int err;
763
nvlist_t *vdev_tree_cfg;
764
765
err = nvlist_lookup_nvlist(cfg,
766
ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg);
767
if (err) {
768
(void) fprintf(stderr,
769
"error: label %d: cannot find nvlist key %s\n",
770
l, ZPOOL_CONFIG_VDEV_TREE);
771
return (err);
772
}
773
774
err = nvlist_lookup_uint64(vdev_tree_cfg,
775
ZPOOL_CONFIG_ASHIFT, ashift);
776
if (err) {
777
(void) fprintf(stderr,
778
"error: label %d: cannot find nvlist key %s\n",
779
l, ZPOOL_CONFIG_ASHIFT);
780
return (err);
781
}
782
783
if (*ashift == 0) {
784
(void) fprintf(stderr,
785
"error: label %d: nvlist key %s is zero\n",
786
l, ZPOOL_CONFIG_ASHIFT);
787
return (1);
788
}
789
790
return (0);
791
}
792
793
static int
794
zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l)
795
{
796
/*
797
* Uberblock root block pointer has valid birth TXG.
798
* Copying it to the label NVlist
799
*/
800
if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) {
801
const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp);
802
int err;
803
804
ub->ub_txg = txg;
805
806
err = nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG);
807
if (err) {
808
(void) fprintf(stderr,
809
"error: label %d: "
810
"Failed to remove pool creation TXG\n",
811
l);
812
return (err);
813
}
814
815
err = nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG);
816
if (err) {
817
(void) fprintf(stderr,
818
"error: label %d: Failed to remove pool TXG to "
819
"be replaced.\n",
820
l);
821
return (err);
822
}
823
824
err = nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg);
825
if (err) {
826
(void) fprintf(stderr,
827
"error: label %d: "
828
"Failed to add pool TXG of %" PRIu64 "\n",
829
l, txg);
830
return (err);
831
}
832
}
833
834
return (0);
835
}
836
837
static boolean_t
838
zhack_repair_write_label(const int l, const int fd, const int byteswap,
839
void *data, zio_eck_t *eck, const uint64_t offset, const uint64_t abdsize)
840
{
841
zio_cksum_t actual_cksum;
842
zhack_repair_calc_cksum(byteswap, data, offset, abdsize, eck,
843
&actual_cksum);
844
zio_cksum_t expected_cksum = eck->zec_cksum;
845
ssize_t err;
846
847
if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
848
return (B_FALSE);
849
850
eck->zec_cksum = actual_cksum;
851
852
err = pwrite64(fd, data, abdsize, offset);
853
if (err == -1) {
854
(void) fprintf(stderr, "error: cannot write label %d: %s\n",
855
l, strerror(errno));
856
return (B_FALSE);
857
} else if (err != abdsize) {
858
(void) fprintf(stderr, "error: bad write size label %d\n", l);
859
return (B_FALSE);
860
} else {
861
(void) fprintf(stderr,
862
"label %d: wrote %" PRIu64 " bytes at offset %" PRIu64 "\n",
863
l, abdsize, offset);
864
}
865
866
return (B_TRUE);
867
}
868
869
static void
870
zhack_repair_write_uberblock(vdev_label_t *vl, const int l,
871
const uint64_t ashift, const int fd, const int byteswap,
872
const uint64_t label_offset, uint32_t *labels_repaired)
873
{
874
void *ub_data =
875
(char *)vl + offsetof(vdev_label_t, vl_uberblock);
876
zio_eck_t *ub_eck =
877
(zio_eck_t *)
878
((char *)(ub_data) + (ASHIFT_UBERBLOCK_SIZE(ashift))) - 1;
879
880
if (ub_eck->zec_magic != 0) {
881
(void) fprintf(stderr,
882
"error: label %d: "
883
"Expected Uberblock checksum magic number to "
884
"be 0, but got %" PRIu64 "\n",
885
l, ub_eck->zec_magic);
886
(void) fprintf(stderr, "It would appear there's already "
887
"a checksum for the uberblock.\n");
888
return;
889
}
890
891
892
ub_eck->zec_magic = byteswap ? BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
893
894
if (zhack_repair_write_label(l, fd, byteswap,
895
ub_data, ub_eck,
896
label_offset + offsetof(vdev_label_t, vl_uberblock),
897
ASHIFT_UBERBLOCK_SIZE(ashift)))
898
labels_repaired[l] |= REPAIR_LABEL_STATUS_UB;
899
}
900
901
static void
902
zhack_repair_print_cksum(FILE *stream, const zio_cksum_t *cksum)
903
{
904
(void) fprintf(stream,
905
"%016llx:%016llx:%016llx:%016llx",
906
(u_longlong_t)cksum->zc_word[0],
907
(u_longlong_t)cksum->zc_word[1],
908
(u_longlong_t)cksum->zc_word[2],
909
(u_longlong_t)cksum->zc_word[3]);
910
}
911
912
static int
913
zhack_repair_test_cksum(const int byteswap, void *vdev_data,
914
zio_eck_t *vdev_eck, const uint64_t vdev_phys_offset, const int l)
915
{
916
const zio_cksum_t expected_cksum = vdev_eck->zec_cksum;
917
zio_cksum_t actual_cksum;
918
zhack_repair_calc_cksum(byteswap, vdev_data, vdev_phys_offset,
919
VDEV_PHYS_SIZE, vdev_eck, &actual_cksum);
920
const uint64_t expected_magic = byteswap ?
921
BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
922
const uint64_t actual_magic = vdev_eck->zec_magic;
923
int err = 0;
924
925
if (actual_magic != expected_magic) {
926
(void) fprintf(stderr, "error: label %d: "
927
"Expected "
928
"the nvlist checksum magic number to not be %"
929
PRIu64 " not %" PRIu64 "\n",
930
l, expected_magic, actual_magic);
931
err = ECKSUM;
932
}
933
if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) {
934
(void) fprintf(stderr, "error: label %d: "
935
"Expected the nvlist checksum to be ", l);
936
(void) zhack_repair_print_cksum(stderr,
937
&expected_cksum);
938
(void) fprintf(stderr, " not ");
939
zhack_repair_print_cksum(stderr, &actual_cksum);
940
(void) fprintf(stderr, "\n");
941
err = ECKSUM;
942
}
943
return (err);
944
}
945
946
static int
947
zhack_repair_unpack_cfg(vdev_label_t *vl, const int l, nvlist_t **cfg)
948
{
949
const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
950
ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
951
int err;
952
953
err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
954
VDEV_PHYS_SIZE - sizeof (zio_eck_t), cfg, 0);
955
if (err) {
956
(void) fprintf(stderr,
957
"error: cannot unpack nvlist label %d\n", l);
958
return (err);
959
}
960
961
for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) {
962
uint64_t val;
963
err = nvlist_lookup_uint64(*cfg, cfg_keys[i], &val);
964
if (err) {
965
(void) fprintf(stderr,
966
"error: label %d, %d: "
967
"cannot find nvlist key %s\n",
968
l, i, cfg_keys[i]);
969
return (err);
970
}
971
}
972
973
return (0);
974
}
975
976
static void
977
zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
978
vdev_label_t *vl, const uint64_t label_offset, const int l,
979
uint32_t *labels_repaired)
980
{
981
ssize_t err;
982
uberblock_t *ub = (uberblock_t *)vl->vl_uberblock;
983
void *vdev_data =
984
(char *)vl + offsetof(vdev_label_t, vl_vdev_phys);
985
zio_eck_t *vdev_eck =
986
(zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1;
987
const uint64_t vdev_phys_offset =
988
label_offset + offsetof(vdev_label_t, vl_vdev_phys);
989
nvlist_t *cfg;
990
uint64_t ashift;
991
int byteswap;
992
993
err = zhack_repair_read_label(fd, vl, label_offset, l);
994
if (err)
995
return;
996
997
err = zhack_repair_get_byteswap(vdev_eck, l, &byteswap);
998
if (err)
999
return;
1000
1001
if (byteswap) {
1002
byteswap_uint64_array(&vdev_eck->zec_cksum,
1003
sizeof (zio_cksum_t));
1004
vdev_eck->zec_magic = BSWAP_64(vdev_eck->zec_magic);
1005
}
1006
1007
if ((op & ZHACK_REPAIR_OP_CKSUM) == 0 &&
1008
zhack_repair_test_cksum(byteswap, vdev_data, vdev_eck,
1009
vdev_phys_offset, l) != 0) {
1010
(void) fprintf(stderr, "It would appear checksums are "
1011
"corrupted. Try zhack repair label -c <device>\n");
1012
return;
1013
}
1014
1015
err = zhack_repair_unpack_cfg(vl, l, &cfg);
1016
if (err)
1017
return;
1018
1019
if ((op & ZHACK_REPAIR_OP_UNDETACH) != 0) {
1020
char *buf;
1021
size_t buflen;
1022
1023
if (ub->ub_txg != 0) {
1024
(void) fprintf(stderr,
1025
"error: label %d: UB TXG of 0 expected, but got %"
1026
PRIu64 "\n", l, ub->ub_txg);
1027
(void) fprintf(stderr, "It would appear the device was "
1028
"not properly detached.\n");
1029
return;
1030
}
1031
1032
err = zhack_repair_get_ashift(cfg, l, &ashift);
1033
if (err)
1034
return;
1035
1036
err = zhack_repair_undetach(ub, cfg, l);
1037
if (err)
1038
return;
1039
1040
buf = vl->vl_vdev_phys.vp_nvlist;
1041
buflen = VDEV_PHYS_SIZE - sizeof (zio_eck_t);
1042
if (nvlist_pack(cfg, &buf, &buflen, NV_ENCODE_XDR, 0) != 0) {
1043
(void) fprintf(stderr,
1044
"error: label %d: Failed to pack nvlist\n", l);
1045
return;
1046
}
1047
1048
zhack_repair_write_uberblock(vl,
1049
l, ashift, fd, byteswap, label_offset, labels_repaired);
1050
}
1051
1052
if (zhack_repair_write_label(l, fd, byteswap, vdev_data, vdev_eck,
1053
vdev_phys_offset, VDEV_PHYS_SIZE))
1054
labels_repaired[l] |= REPAIR_LABEL_STATUS_CKSUM;
1055
1056
fsync(fd);
1057
}
1058
1059
static const char *
1060
zhack_repair_label_status(const uint32_t label_status,
1061
const uint32_t to_check)
1062
{
1063
return ((label_status & to_check) != 0 ? "repaired" : "skipped");
1064
}
1065
1066
static int
1067
zhack_label_repair(const zhack_repair_op_t op, const int argc, char **argv)
1068
{
1069
uint32_t labels_repaired[VDEV_LABELS] = {0};
1070
vdev_label_t labels[VDEV_LABELS] = {{{0}}};
1071
struct stat64 st;
1072
int fd;
1073
off_t filesize;
1074
uint32_t repaired = 0;
1075
1076
abd_init();
1077
1078
if (argc < 1) {
1079
(void) fprintf(stderr, "error: missing device\n");
1080
usage();
1081
}
1082
1083
if ((fd = open(argv[0], O_RDWR)) == -1)
1084
fatal(NULL, FTAG, "cannot open '%s': %s", argv[0],
1085
strerror(errno));
1086
1087
if (fstat64_blk(fd, &st) != 0)
1088
fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0],
1089
strerror(errno));
1090
1091
filesize = st.st_size;
1092
(void) fprintf(stderr, "Calculated filesize to be %jd\n",
1093
(intmax_t)filesize);
1094
1095
if (filesize % sizeof (vdev_label_t) != 0)
1096
filesize =
1097
(filesize / sizeof (vdev_label_t)) * sizeof (vdev_label_t);
1098
1099
for (int l = 0; l < VDEV_LABELS; l++) {
1100
zhack_repair_one_label(op, fd, &labels[l],
1101
vdev_label_offset(filesize, l, 0), l, labels_repaired);
1102
}
1103
1104
close(fd);
1105
1106
abd_fini();
1107
1108
for (int l = 0; l < VDEV_LABELS; l++) {
1109
const uint32_t lr = labels_repaired[l];
1110
(void) printf("label %d: ", l);
1111
(void) printf("uberblock: %s ",
1112
zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_UB));
1113
(void) printf("checksum: %s\n",
1114
zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_CKSUM));
1115
repaired |= lr;
1116
}
1117
1118
if (repaired > 0)
1119
return (0);
1120
1121
return (1);
1122
}
1123
1124
static int
1125
zhack_do_label_repair(int argc, char **argv)
1126
{
1127
zhack_repair_op_t op = ZHACK_REPAIR_OP_UNKNOWN;
1128
int c;
1129
1130
optind = 1;
1131
while ((c = getopt(argc, argv, "+cu")) != -1) {
1132
switch (c) {
1133
case 'c':
1134
op |= ZHACK_REPAIR_OP_CKSUM;
1135
break;
1136
case 'u':
1137
op |= ZHACK_REPAIR_OP_UNDETACH;
1138
break;
1139
default:
1140
usage();
1141
break;
1142
}
1143
}
1144
1145
argc -= optind;
1146
argv += optind;
1147
1148
if (op == ZHACK_REPAIR_OP_UNKNOWN)
1149
op = ZHACK_REPAIR_OP_CKSUM;
1150
1151
return (zhack_label_repair(op, argc, argv));
1152
}
1153
1154
static int
1155
zhack_do_label(int argc, char **argv)
1156
{
1157
char *subcommand;
1158
int err;
1159
1160
argc--;
1161
argv++;
1162
if (argc == 0) {
1163
(void) fprintf(stderr,
1164
"error: no label operation specified\n");
1165
usage();
1166
}
1167
1168
subcommand = argv[0];
1169
if (strcmp(subcommand, "repair") == 0) {
1170
err = zhack_do_label_repair(argc, argv);
1171
} else {
1172
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
1173
subcommand);
1174
usage();
1175
}
1176
1177
return (err);
1178
}
1179
1180
#define MAX_NUM_PATHS 1024
1181
1182
int
1183
main(int argc, char **argv)
1184
{
1185
char *path[MAX_NUM_PATHS];
1186
const char *subcommand;
1187
int rv = 0;
1188
int c;
1189
1190
g_importargs.path = path;
1191
1192
dprintf_setup(&argc, argv);
1193
zfs_prop_init();
1194
1195
while ((c = getopt(argc, argv, "+c:d:o:")) != -1) {
1196
switch (c) {
1197
case 'c':
1198
g_importargs.cachefile = optarg;
1199
break;
1200
case 'd':
1201
assert(g_importargs.paths < MAX_NUM_PATHS);
1202
g_importargs.path[g_importargs.paths++] = optarg;
1203
break;
1204
case 'o':
1205
if (handle_tunable_option(optarg, B_FALSE) != 0)
1206
exit(1);
1207
break;
1208
default:
1209
usage();
1210
break;
1211
}
1212
}
1213
1214
argc -= optind;
1215
argv += optind;
1216
optind = 1;
1217
1218
if (argc == 0) {
1219
(void) fprintf(stderr, "error: no command specified\n");
1220
usage();
1221
}
1222
1223
subcommand = argv[0];
1224
1225
if (strcmp(subcommand, "feature") == 0) {
1226
rv = zhack_do_feature(argc, argv);
1227
} else if (strcmp(subcommand, "label") == 0) {
1228
return (zhack_do_label(argc, argv));
1229
} else if (strcmp(subcommand, "metaslab") == 0) {
1230
rv = zhack_do_metaslab(argc, argv);
1231
} else {
1232
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
1233
subcommand);
1234
usage();
1235
}
1236
1237
if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE) != 0) {
1238
fatal(NULL, FTAG, "pool export failed; "
1239
"changes may not be committed to disk\n");
1240
}
1241
1242
kernel_fini();
1243
1244
return (rv);
1245
}
1246
1247