Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/openzfs/cmd/zhack.c
106463 views
1
// SPDX-License-Identifier: CDDL-1.0
2
/*
3
* CDDL HEADER START
4
*
5
* The contents of this file are subject to the terms of the
6
* Common Development and Distribution License (the "License").
7
* You may not use this file except in compliance with the License.
8
*
9
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10
* or https://opensource.org/licenses/CDDL-1.0.
11
* See the License for the specific language governing permissions
12
* and limitations under the License.
13
*
14
* When distributing Covered Code, include this CDDL HEADER in each
15
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16
* If applicable, add the following below this CDDL HEADER, with the
17
* fields enclosed by brackets "[]" replaced with your own identifying
18
* information: Portions Copyright [yyyy] [name of copyright owner]
19
*
20
* CDDL HEADER END
21
*/
22
23
/*
24
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25
* Copyright (c) 2013 Steven Hartland. All rights reserved.
26
*/
27
28
/*
29
* zhack is a debugging tool that can write changes to ZFS pool using libzpool
30
* for testing purposes. Altering pools with zhack is unsupported and may
31
* result in corrupted pools.
32
*/
33
34
#include <zfs_prop.h>
35
#include <stdio.h>
36
#include <stdlib.h>
37
#include <ctype.h>
38
#include <sys/stat.h>
39
#include <sys/zfs_context.h>
40
#include <sys/spa.h>
41
#include <sys/spa_impl.h>
42
#include <sys/dmu.h>
43
#include <sys/zap.h>
44
#include <sys/zfs_znode.h>
45
#include <sys/dsl_synctask.h>
46
#include <sys/vdev.h>
47
#include <sys/vdev_impl.h>
48
#include <sys/fs/zfs.h>
49
#include <sys/dmu_objset.h>
50
#include <sys/dsl_pool.h>
51
#include <sys/zio_checksum.h>
52
#include <sys/zio_compress.h>
53
#include <sys/zfeature.h>
54
#include <sys/dmu_tx.h>
55
#include <zfeature_common.h>
56
#include <libzutil.h>
57
#include <sys/metaslab_impl.h>
58
#include <libzpool.h>
59
60
static importargs_t g_importargs;
61
static char *g_pool;
62
static boolean_t g_readonly;
63
64
typedef enum {
65
ZHACK_REPAIR_OP_UNKNOWN = 0,
66
ZHACK_REPAIR_OP_CKSUM = (1 << 0),
67
ZHACK_REPAIR_OP_UNDETACH = (1 << 1)
68
} zhack_repair_op_t;
69
70
static __attribute__((noreturn)) void
71
usage(void)
72
{
73
(void) fprintf(stderr,
74
"Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> "
75
"<args> ...\n"
76
"where <subcommand> <args> is one of the following:\n"
77
"\n");
78
79
(void) fprintf(stderr,
80
" feature stat <pool>\n"
81
" print information about enabled features\n"
82
" feature enable [-r] [-d desc] <pool> <feature>\n"
83
" add a new enabled feature to the pool\n"
84
" -d <desc> sets the feature's description\n"
85
" -r set read-only compatible flag for feature\n"
86
" feature ref [-md] <pool> <feature>\n"
87
" change the refcount on the given feature\n"
88
" -d decrease instead of increase the refcount\n"
89
" -m add the feature to the label if increasing refcount\n"
90
"\n"
91
" <feature> : should be a feature guid\n"
92
"\n"
93
" label repair <device>\n"
94
" repair labels of a specified device according to options\n"
95
" which may be combined to do their functions in one call\n"
96
" -c repair corrupted label checksums\n"
97
" -u restore the label on a detached device\n"
98
"\n"
99
" <device> : path to vdev\n"
100
"\n"
101
" metaslab leak <pool>\n"
102
" apply allocation map from zdb to specified pool\n");
103
exit(1);
104
}
105
106
107
static __attribute__((format(printf, 3, 4))) __attribute__((noreturn)) void
108
fatal(spa_t *spa, const void *tag, const char *fmt, ...)
109
{
110
va_list ap;
111
112
if (spa != NULL) {
113
spa_close(spa, tag);
114
(void) spa_export(g_pool, NULL, B_TRUE, B_FALSE);
115
}
116
117
va_start(ap, fmt);
118
(void) fputs("zhack: ", stderr);
119
(void) vfprintf(stderr, fmt, ap);
120
va_end(ap);
121
(void) fputc('\n', stderr);
122
123
exit(1);
124
}
125
126
static int
127
space_delta_cb(dmu_object_type_t bonustype, const void *data,
128
zfs_file_info_t *zoi)
129
{
130
(void) data, (void) zoi;
131
132
/*
133
* Is it a valid type of object to track?
134
*/
135
if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
136
return (ENOENT);
137
(void) fprintf(stderr, "modifying object that needs user accounting");
138
abort();
139
}
140
141
/*
142
* Target is the dataset whose pool we want to open.
143
*/
144
static void
145
zhack_import(char *target, boolean_t readonly)
146
{
147
nvlist_t *config;
148
nvlist_t *props;
149
int error;
150
151
kernel_init(readonly ? SPA_MODE_READ :
152
(SPA_MODE_READ | SPA_MODE_WRITE));
153
154
dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
155
156
g_readonly = readonly;
157
g_importargs.can_be_active = readonly;
158
g_pool = strdup(target);
159
160
libpc_handle_t lpch = {
161
.lpc_lib_handle = NULL,
162
.lpc_ops = &libzpool_config_ops,
163
.lpc_printerr = B_TRUE
164
};
165
error = zpool_find_config(&lpch, target, &config, &g_importargs);
166
if (error)
167
fatal(NULL, FTAG, "cannot import '%s'", target);
168
169
props = NULL;
170
if (readonly) {
171
VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
172
VERIFY0(nvlist_add_uint64(props,
173
zpool_prop_to_name(ZPOOL_PROP_READONLY), 1));
174
}
175
176
zfeature_checks_disable = B_TRUE;
177
error = spa_import(target, config, props,
178
(readonly ? ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL));
179
fnvlist_free(config);
180
zfeature_checks_disable = B_FALSE;
181
if (error == EEXIST)
182
error = 0;
183
184
if (error)
185
fatal(NULL, FTAG, "can't import '%s': %s", target,
186
strerror(error));
187
}
188
189
static void
190
zhack_spa_open(char *target, boolean_t readonly, const void *tag, spa_t **spa)
191
{
192
int err;
193
194
zhack_import(target, readonly);
195
196
zfeature_checks_disable = B_TRUE;
197
err = spa_open(target, spa, tag);
198
zfeature_checks_disable = B_FALSE;
199
200
if (err != 0)
201
fatal(*spa, FTAG, "cannot open '%s': %s", target,
202
strerror(err));
203
if (spa_version(*spa) < SPA_VERSION_FEATURES) {
204
fatal(*spa, FTAG, "'%s' has version %d, features not enabled",
205
target, (int)spa_version(*spa));
206
}
207
}
208
209
static void
210
dump_obj(objset_t *os, uint64_t obj, const char *name)
211
{
212
zap_cursor_t zc;
213
zap_attribute_t *za = zap_attribute_long_alloc();
214
215
(void) printf("%s_obj:\n", name);
216
217
for (zap_cursor_init(&zc, os, obj);
218
zap_cursor_retrieve(&zc, za) == 0;
219
zap_cursor_advance(&zc)) {
220
if (za->za_integer_length == 8) {
221
ASSERT(za->za_num_integers == 1);
222
(void) printf("\t%s = %llu\n",
223
za->za_name, (u_longlong_t)za->za_first_integer);
224
} else {
225
ASSERT(za->za_integer_length == 1);
226
char val[1024];
227
VERIFY0(zap_lookup(os, obj, za->za_name,
228
1, sizeof (val), val));
229
(void) printf("\t%s = %s\n", za->za_name, val);
230
}
231
}
232
zap_cursor_fini(&zc);
233
zap_attribute_free(za);
234
}
235
236
static void
237
dump_mos(spa_t *spa)
238
{
239
nvlist_t *nv = spa->spa_label_features;
240
nvpair_t *pair;
241
242
(void) printf("label config:\n");
243
for (pair = nvlist_next_nvpair(nv, NULL);
244
pair != NULL;
245
pair = nvlist_next_nvpair(nv, pair)) {
246
(void) printf("\t%s\n", nvpair_name(pair));
247
}
248
}
249
250
static void
251
zhack_do_feature_stat(int argc, char **argv)
252
{
253
spa_t *spa;
254
objset_t *os;
255
char *target;
256
257
argc--;
258
argv++;
259
260
if (argc < 1) {
261
(void) fprintf(stderr, "error: missing pool name\n");
262
usage();
263
}
264
target = argv[0];
265
266
zhack_spa_open(target, B_TRUE, FTAG, &spa);
267
os = spa->spa_meta_objset;
268
269
dump_obj(os, spa->spa_feat_for_read_obj, "for_read");
270
dump_obj(os, spa->spa_feat_for_write_obj, "for_write");
271
dump_obj(os, spa->spa_feat_desc_obj, "descriptions");
272
if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
273
dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg");
274
}
275
dump_mos(spa);
276
277
spa_close(spa, FTAG);
278
}
279
280
static void
281
zhack_feature_enable_sync(void *arg, dmu_tx_t *tx)
282
{
283
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
284
zfeature_info_t *feature = arg;
285
286
feature_enable_sync(spa, feature, tx);
287
288
spa_history_log_internal(spa, "zhack enable feature", tx,
289
"name=%s flags=%u",
290
feature->fi_guid, feature->fi_flags);
291
}
292
293
static void
294
zhack_do_feature_enable(int argc, char **argv)
295
{
296
int c;
297
char *desc, *target;
298
spa_t *spa;
299
objset_t *mos;
300
zfeature_info_t feature;
301
const spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
302
303
/*
304
* Features are not added to the pool's label until their refcounts
305
* are incremented, so fi_mos can just be left as false for now.
306
*/
307
desc = NULL;
308
feature.fi_uname = "zhack";
309
feature.fi_flags = 0;
310
feature.fi_depends = nodeps;
311
feature.fi_feature = SPA_FEATURE_NONE;
312
313
optind = 1;
314
while ((c = getopt(argc, argv, "+rd:")) != -1) {
315
switch (c) {
316
case 'r':
317
feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
318
break;
319
case 'd':
320
if (desc != NULL)
321
free(desc);
322
desc = strdup(optarg);
323
break;
324
default:
325
usage();
326
break;
327
}
328
}
329
330
if (desc == NULL)
331
desc = strdup("zhack injected");
332
feature.fi_desc = desc;
333
334
argc -= optind;
335
argv += optind;
336
337
if (argc < 2) {
338
(void) fprintf(stderr, "error: missing feature or pool name\n");
339
usage();
340
}
341
target = argv[0];
342
feature.fi_guid = argv[1];
343
344
if (!zfeature_is_valid_guid(feature.fi_guid))
345
fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
346
347
zhack_spa_open(target, B_FALSE, FTAG, &spa);
348
mos = spa->spa_meta_objset;
349
350
if (zfeature_is_supported(feature.fi_guid))
351
fatal(spa, FTAG, "'%s' is a real feature, will not enable",
352
feature.fi_guid);
353
if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
354
fatal(spa, FTAG, "feature already enabled: %s",
355
feature.fi_guid);
356
357
VERIFY0(dsl_sync_task(spa_name(spa), NULL,
358
zhack_feature_enable_sync, &feature, 5, ZFS_SPACE_CHECK_NORMAL));
359
360
spa_close(spa, FTAG);
361
362
free(desc);
363
}
364
365
static void
366
feature_incr_sync(void *arg, dmu_tx_t *tx)
367
{
368
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
369
zfeature_info_t *feature = arg;
370
uint64_t refcount;
371
372
mutex_enter(&spa->spa_feat_stats_lock);
373
VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
374
feature_sync(spa, feature, refcount + 1, tx);
375
spa_history_log_internal(spa, "zhack feature incr", tx,
376
"name=%s", feature->fi_guid);
377
mutex_exit(&spa->spa_feat_stats_lock);
378
}
379
380
static void
381
feature_decr_sync(void *arg, dmu_tx_t *tx)
382
{
383
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
384
zfeature_info_t *feature = arg;
385
uint64_t refcount;
386
387
mutex_enter(&spa->spa_feat_stats_lock);
388
VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
389
feature_sync(spa, feature, refcount - 1, tx);
390
spa_history_log_internal(spa, "zhack feature decr", tx,
391
"name=%s", feature->fi_guid);
392
mutex_exit(&spa->spa_feat_stats_lock);
393
}
394
395
static void
396
zhack_do_feature_ref(int argc, char **argv)
397
{
398
int c;
399
char *target;
400
boolean_t decr = B_FALSE;
401
spa_t *spa;
402
objset_t *mos;
403
zfeature_info_t feature;
404
const spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
405
406
/*
407
* fi_desc does not matter here because it was written to disk
408
* when the feature was enabled, but we need to properly set the
409
* feature for read or write based on the information we read off
410
* disk later.
411
*/
412
feature.fi_uname = "zhack";
413
feature.fi_flags = 0;
414
feature.fi_desc = NULL;
415
feature.fi_depends = nodeps;
416
feature.fi_feature = SPA_FEATURE_NONE;
417
418
optind = 1;
419
while ((c = getopt(argc, argv, "+md")) != -1) {
420
switch (c) {
421
case 'm':
422
feature.fi_flags |= ZFEATURE_FLAG_MOS;
423
break;
424
case 'd':
425
decr = B_TRUE;
426
break;
427
default:
428
usage();
429
break;
430
}
431
}
432
argc -= optind;
433
argv += optind;
434
435
if (argc < 2) {
436
(void) fprintf(stderr, "error: missing feature or pool name\n");
437
usage();
438
}
439
target = argv[0];
440
feature.fi_guid = argv[1];
441
442
if (!zfeature_is_valid_guid(feature.fi_guid))
443
fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
444
445
zhack_spa_open(target, B_FALSE, FTAG, &spa);
446
mos = spa->spa_meta_objset;
447
448
if (zfeature_is_supported(feature.fi_guid)) {
449
fatal(spa, FTAG,
450
"'%s' is a real feature, will not change refcount",
451
feature.fi_guid);
452
}
453
454
if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
455
feature.fi_guid)) {
456
feature.fi_flags &= ~ZFEATURE_FLAG_READONLY_COMPAT;
457
} else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
458
feature.fi_guid)) {
459
feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
460
} else {
461
fatal(spa, FTAG, "feature is not enabled: %s", feature.fi_guid);
462
}
463
464
if (decr) {
465
uint64_t count;
466
if (feature_get_refcount_from_disk(spa, &feature,
467
&count) == 0 && count == 0) {
468
fatal(spa, FTAG, "feature refcount already 0: %s",
469
feature.fi_guid);
470
}
471
}
472
473
VERIFY0(dsl_sync_task(spa_name(spa), NULL,
474
decr ? feature_decr_sync : feature_incr_sync, &feature,
475
5, ZFS_SPACE_CHECK_NORMAL));
476
477
spa_close(spa, FTAG);
478
}
479
480
static int
481
zhack_do_feature(int argc, char **argv)
482
{
483
char *subcommand;
484
485
argc--;
486
argv++;
487
if (argc == 0) {
488
(void) fprintf(stderr,
489
"error: no feature operation specified\n");
490
usage();
491
}
492
493
subcommand = argv[0];
494
if (strcmp(subcommand, "stat") == 0) {
495
zhack_do_feature_stat(argc, argv);
496
} else if (strcmp(subcommand, "enable") == 0) {
497
zhack_do_feature_enable(argc, argv);
498
} else if (strcmp(subcommand, "ref") == 0) {
499
zhack_do_feature_ref(argc, argv);
500
} else {
501
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
502
subcommand);
503
usage();
504
}
505
506
return (0);
507
}
508
509
static boolean_t
510
strstarts(const char *a, const char *b)
511
{
512
return (strncmp(a, b, strlen(b)) == 0);
513
}
514
515
static void
516
metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size,
517
dmu_tx_t *tx)
518
{
519
ASSERT(msp->ms_disabled);
520
ASSERT(MUTEX_HELD(&msp->ms_lock));
521
uint64_t txg = dmu_tx_get_txg(tx);
522
523
uint64_t off = start;
524
while (off < start + size) {
525
uint64_t ostart, osize;
526
boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable,
527
off, start + size - off, &ostart, &osize);
528
if (!found)
529
break;
530
zfs_range_tree_remove(msp->ms_allocatable, ostart, osize);
531
532
if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
533
vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp,
534
txg);
535
536
zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart,
537
osize);
538
msp->ms_allocating_total += osize;
539
off = ostart + osize;
540
}
541
}
542
543
static void
544
zhack_do_metaslab_leak(int argc, char **argv)
545
{
546
int c;
547
char *target;
548
spa_t *spa;
549
550
optind = 1;
551
boolean_t force = B_FALSE;
552
while ((c = getopt(argc, argv, "f")) != -1) {
553
switch (c) {
554
case 'f':
555
force = B_TRUE;
556
break;
557
default:
558
usage();
559
break;
560
}
561
}
562
563
argc -= optind;
564
argv += optind;
565
566
if (argc < 1) {
567
(void) fprintf(stderr, "error: missing pool name\n");
568
usage();
569
}
570
target = argv[0];
571
572
zhack_spa_open(target, B_FALSE, FTAG, &spa);
573
spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER);
574
575
char *line = NULL;
576
size_t cap = 0;
577
578
vdev_t *vd = NULL;
579
metaslab_t *prev = NULL;
580
dmu_tx_t *tx = NULL;
581
while (getline(&line, &cap, stdin) > 0) {
582
if (strstarts(line, "\tvdev ")) {
583
uint64_t vdev_id, ms_shift;
584
if (sscanf(line,
585
"\tvdev %10"PRIu64"\t%*s metaslab shift %4"PRIu64,
586
&vdev_id, &ms_shift) == 1) {
587
VERIFY3U(sscanf(line, "\tvdev %"PRIu64
588
"\t metaslab shift %4"PRIu64,
589
&vdev_id, &ms_shift), ==, 2);
590
}
591
vd = vdev_lookup_top(spa, vdev_id);
592
if (vd == NULL) {
593
fprintf(stderr, "error: no such vdev with "
594
"id %"PRIu64"\n", vdev_id);
595
break;
596
}
597
if (tx) {
598
dmu_tx_commit(tx);
599
mutex_exit(&prev->ms_lock);
600
metaslab_enable(prev, B_FALSE, B_FALSE);
601
tx = NULL;
602
prev = NULL;
603
}
604
if (vd->vdev_ms_shift != ms_shift) {
605
fprintf(stderr, "error: ms_shift mismatch: %"
606
PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift,
607
ms_shift);
608
break;
609
}
610
} else if (strstarts(line, "\tmetaslabs ")) {
611
uint64_t ms_count;
612
VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count),
613
==, 1);
614
ASSERT(vd);
615
if (!force && vd->vdev_ms_count != ms_count) {
616
fprintf(stderr, "error: ms_count mismatch: %"
617
PRIu64" != %"PRIu64"\n", vd->vdev_ms_count,
618
ms_count);
619
break;
620
}
621
} else if (strstarts(line, "ALLOC:")) {
622
uint64_t start, size;
623
VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n",
624
&start, &size), ==, 2);
625
626
ASSERT(vd);
627
metaslab_t *cur =
628
vd->vdev_ms[start >> vd->vdev_ms_shift];
629
if (prev != cur) {
630
if (prev) {
631
dmu_tx_commit(tx);
632
mutex_exit(&prev->ms_lock);
633
metaslab_enable(prev, B_FALSE, B_FALSE);
634
}
635
ASSERT(cur);
636
metaslab_disable(cur);
637
mutex_enter(&cur->ms_lock);
638
metaslab_load(cur);
639
prev = cur;
640
tx = dmu_tx_create_dd(
641
spa_get_dsl(vd->vdev_spa)->dp_root_dir);
642
dmu_tx_assign(tx, DMU_TX_WAIT);
643
}
644
645
metaslab_force_alloc(cur, start, size, tx);
646
} else {
647
continue;
648
}
649
}
650
if (tx) {
651
dmu_tx_commit(tx);
652
mutex_exit(&prev->ms_lock);
653
metaslab_enable(prev, B_FALSE, B_FALSE);
654
tx = NULL;
655
prev = NULL;
656
}
657
if (line)
658
free(line);
659
660
spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG);
661
spa_close(spa, FTAG);
662
}
663
664
static int
665
zhack_do_metaslab(int argc, char **argv)
666
{
667
char *subcommand;
668
669
argc--;
670
argv++;
671
if (argc == 0) {
672
(void) fprintf(stderr,
673
"error: no metaslab operation specified\n");
674
usage();
675
}
676
677
subcommand = argv[0];
678
if (strcmp(subcommand, "leak") == 0) {
679
zhack_do_metaslab_leak(argc, argv);
680
} else {
681
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
682
subcommand);
683
usage();
684
}
685
686
return (0);
687
}
688
689
#define ASHIFT_UBERBLOCK_SHIFT(ashift) \
690
MIN(MAX(ashift, UBERBLOCK_SHIFT), \
691
MAX_UBERBLOCK_SHIFT)
692
#define ASHIFT_UBERBLOCK_SIZE(ashift) \
693
(1ULL << ASHIFT_UBERBLOCK_SHIFT(ashift))
694
695
#define REPAIR_LABEL_STATUS_CKSUM (1 << 0)
696
#define REPAIR_LABEL_STATUS_UB (1 << 1)
697
698
static int
699
zhack_repair_read_label(const int fd, vdev_label_t *vl,
700
const uint64_t label_offset, const int l)
701
{
702
const int err = pread64(fd, vl, sizeof (vdev_label_t), label_offset);
703
704
if (err == -1) {
705
(void) fprintf(stderr,
706
"error: cannot read label %d: %s\n",
707
l, strerror(errno));
708
return (err);
709
} else if (err != sizeof (vdev_label_t)) {
710
(void) fprintf(stderr,
711
"error: bad label %d read size\n", l);
712
return (err);
713
}
714
715
return (0);
716
}
717
718
static int
719
zhack_repair_get_byteswap(const zio_eck_t *vdev_eck, const int l, int *byteswap)
720
{
721
if (vdev_eck->zec_magic == ZEC_MAGIC) {
722
*byteswap = B_FALSE;
723
} else if (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)) {
724
*byteswap = B_TRUE;
725
} else {
726
(void) fprintf(stderr, "error: label %d: "
727
"Expected the nvlist checksum magic number but instead got "
728
"0x%" PRIx64 "\n",
729
l, vdev_eck->zec_magic);
730
return (1);
731
}
732
return (0);
733
}
734
735
static void
736
zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset,
737
const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum)
738
{
739
zio_cksum_t verifier;
740
zio_cksum_t current_cksum;
741
zio_checksum_info_t *ci;
742
abd_t *abd;
743
744
ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0);
745
746
if (byteswap)
747
byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
748
749
current_cksum = eck->zec_cksum;
750
eck->zec_cksum = verifier;
751
752
ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
753
abd = abd_get_from_buf(data, abdsize);
754
ci->ci_func[byteswap](abd, abdsize, NULL, cksum);
755
abd_free(abd);
756
757
eck->zec_cksum = current_cksum;
758
}
759
760
static int
761
zhack_repair_get_ashift(nvlist_t *cfg, const int l, uint64_t *ashift)
762
{
763
int err;
764
nvlist_t *vdev_tree_cfg;
765
766
err = nvlist_lookup_nvlist(cfg,
767
ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg);
768
if (err) {
769
(void) fprintf(stderr,
770
"error: label %d: cannot find nvlist key %s\n",
771
l, ZPOOL_CONFIG_VDEV_TREE);
772
return (err);
773
}
774
775
err = nvlist_lookup_uint64(vdev_tree_cfg,
776
ZPOOL_CONFIG_ASHIFT, ashift);
777
if (err) {
778
(void) fprintf(stderr,
779
"error: label %d: cannot find nvlist key %s\n",
780
l, ZPOOL_CONFIG_ASHIFT);
781
return (err);
782
}
783
784
if (*ashift == 0) {
785
(void) fprintf(stderr,
786
"error: label %d: nvlist key %s is zero\n",
787
l, ZPOOL_CONFIG_ASHIFT);
788
return (1);
789
}
790
791
return (0);
792
}
793
794
static int
795
zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l)
796
{
797
/*
798
* Uberblock root block pointer has valid birth TXG.
799
* Copying it to the label NVlist
800
*/
801
if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) {
802
const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp);
803
int err;
804
805
ub->ub_txg = txg;
806
807
err = nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG);
808
if (err) {
809
(void) fprintf(stderr,
810
"error: label %d: "
811
"Failed to remove pool creation TXG\n",
812
l);
813
return (err);
814
}
815
816
err = nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG);
817
if (err) {
818
(void) fprintf(stderr,
819
"error: label %d: Failed to remove pool TXG to "
820
"be replaced.\n",
821
l);
822
return (err);
823
}
824
825
err = nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg);
826
if (err) {
827
(void) fprintf(stderr,
828
"error: label %d: "
829
"Failed to add pool TXG of %" PRIu64 "\n",
830
l, txg);
831
return (err);
832
}
833
}
834
835
return (0);
836
}
837
838
static boolean_t
839
zhack_repair_write_label(const int l, const int fd, const int byteswap,
840
void *data, zio_eck_t *eck, const uint64_t offset, const uint64_t abdsize)
841
{
842
zio_cksum_t actual_cksum;
843
zhack_repair_calc_cksum(byteswap, data, offset, abdsize, eck,
844
&actual_cksum);
845
zio_cksum_t expected_cksum = eck->zec_cksum;
846
ssize_t err;
847
848
if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
849
return (B_FALSE);
850
851
eck->zec_cksum = actual_cksum;
852
853
err = pwrite64(fd, data, abdsize, offset);
854
if (err == -1) {
855
(void) fprintf(stderr, "error: cannot write label %d: %s\n",
856
l, strerror(errno));
857
return (B_FALSE);
858
} else if (err != abdsize) {
859
(void) fprintf(stderr, "error: bad write size label %d\n", l);
860
return (B_FALSE);
861
} else {
862
(void) fprintf(stderr,
863
"label %d: wrote %" PRIu64 " bytes at offset %" PRIu64 "\n",
864
l, abdsize, offset);
865
}
866
867
return (B_TRUE);
868
}
869
870
static void
871
zhack_repair_write_uberblock(vdev_label_t *vl, const int l,
872
const uint64_t ashift, const int fd, const int byteswap,
873
const uint64_t label_offset, uint32_t *labels_repaired)
874
{
875
void *ub_data =
876
(char *)vl + offsetof(vdev_label_t, vl_uberblock);
877
zio_eck_t *ub_eck =
878
(zio_eck_t *)
879
((char *)(ub_data) + (ASHIFT_UBERBLOCK_SIZE(ashift))) - 1;
880
881
if (ub_eck->zec_magic != 0) {
882
(void) fprintf(stderr,
883
"error: label %d: "
884
"Expected Uberblock checksum magic number to "
885
"be 0, but got %" PRIu64 "\n",
886
l, ub_eck->zec_magic);
887
(void) fprintf(stderr, "It would appear there's already "
888
"a checksum for the uberblock.\n");
889
return;
890
}
891
892
893
ub_eck->zec_magic = byteswap ? BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
894
895
if (zhack_repair_write_label(l, fd, byteswap,
896
ub_data, ub_eck,
897
label_offset + offsetof(vdev_label_t, vl_uberblock),
898
ASHIFT_UBERBLOCK_SIZE(ashift)))
899
labels_repaired[l] |= REPAIR_LABEL_STATUS_UB;
900
}
901
902
static void
903
zhack_repair_print_cksum(FILE *stream, const zio_cksum_t *cksum)
904
{
905
(void) fprintf(stream,
906
"%016llx:%016llx:%016llx:%016llx",
907
(u_longlong_t)cksum->zc_word[0],
908
(u_longlong_t)cksum->zc_word[1],
909
(u_longlong_t)cksum->zc_word[2],
910
(u_longlong_t)cksum->zc_word[3]);
911
}
912
913
static int
914
zhack_repair_test_cksum(const int byteswap, void *vdev_data,
915
zio_eck_t *vdev_eck, const uint64_t vdev_phys_offset, const int l)
916
{
917
const zio_cksum_t expected_cksum = vdev_eck->zec_cksum;
918
zio_cksum_t actual_cksum;
919
zhack_repair_calc_cksum(byteswap, vdev_data, vdev_phys_offset,
920
VDEV_PHYS_SIZE, vdev_eck, &actual_cksum);
921
const uint64_t expected_magic = byteswap ?
922
BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
923
const uint64_t actual_magic = vdev_eck->zec_magic;
924
int err = 0;
925
926
if (actual_magic != expected_magic) {
927
(void) fprintf(stderr, "error: label %d: "
928
"Expected "
929
"the nvlist checksum magic number to not be %"
930
PRIu64 " not %" PRIu64 "\n",
931
l, expected_magic, actual_magic);
932
err = ECKSUM;
933
}
934
if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) {
935
(void) fprintf(stderr, "error: label %d: "
936
"Expected the nvlist checksum to be ", l);
937
(void) zhack_repair_print_cksum(stderr,
938
&expected_cksum);
939
(void) fprintf(stderr, " not ");
940
zhack_repair_print_cksum(stderr, &actual_cksum);
941
(void) fprintf(stderr, "\n");
942
err = ECKSUM;
943
}
944
return (err);
945
}
946
947
static int
948
zhack_repair_unpack_cfg(vdev_label_t *vl, const int l, nvlist_t **cfg)
949
{
950
const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
951
ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
952
int err;
953
954
err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
955
VDEV_PHYS_SIZE - sizeof (zio_eck_t), cfg, 0);
956
if (err) {
957
(void) fprintf(stderr,
958
"error: cannot unpack nvlist label %d\n", l);
959
return (err);
960
}
961
962
for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) {
963
uint64_t val;
964
err = nvlist_lookup_uint64(*cfg, cfg_keys[i], &val);
965
if (err) {
966
(void) fprintf(stderr,
967
"error: label %d, %d: "
968
"cannot find nvlist key %s\n",
969
l, i, cfg_keys[i]);
970
return (err);
971
}
972
}
973
974
return (0);
975
}
976
977
static void
978
zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
979
vdev_label_t *vl, const uint64_t label_offset, const int l,
980
uint32_t *labels_repaired)
981
{
982
ssize_t err;
983
uberblock_t *ub = (uberblock_t *)vl->vl_uberblock;
984
void *vdev_data =
985
(char *)vl + offsetof(vdev_label_t, vl_vdev_phys);
986
zio_eck_t *vdev_eck =
987
(zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1;
988
const uint64_t vdev_phys_offset =
989
label_offset + offsetof(vdev_label_t, vl_vdev_phys);
990
nvlist_t *cfg;
991
uint64_t ashift;
992
int byteswap;
993
994
err = zhack_repair_read_label(fd, vl, label_offset, l);
995
if (err)
996
return;
997
998
err = zhack_repair_get_byteswap(vdev_eck, l, &byteswap);
999
if (err)
1000
return;
1001
1002
if (byteswap) {
1003
byteswap_uint64_array(&vdev_eck->zec_cksum,
1004
sizeof (zio_cksum_t));
1005
vdev_eck->zec_magic = BSWAP_64(vdev_eck->zec_magic);
1006
}
1007
1008
if ((op & ZHACK_REPAIR_OP_CKSUM) == 0 &&
1009
zhack_repair_test_cksum(byteswap, vdev_data, vdev_eck,
1010
vdev_phys_offset, l) != 0) {
1011
(void) fprintf(stderr, "It would appear checksums are "
1012
"corrupted. Try zhack repair label -c <device>\n");
1013
return;
1014
}
1015
1016
err = zhack_repair_unpack_cfg(vl, l, &cfg);
1017
if (err)
1018
return;
1019
1020
if ((op & ZHACK_REPAIR_OP_UNDETACH) != 0) {
1021
char *buf;
1022
size_t buflen;
1023
1024
if (ub->ub_txg != 0) {
1025
(void) fprintf(stderr,
1026
"error: label %d: UB TXG of 0 expected, but got %"
1027
PRIu64 "\n", l, ub->ub_txg);
1028
(void) fprintf(stderr, "It would appear the device was "
1029
"not properly detached.\n");
1030
return;
1031
}
1032
1033
err = zhack_repair_get_ashift(cfg, l, &ashift);
1034
if (err)
1035
return;
1036
1037
err = zhack_repair_undetach(ub, cfg, l);
1038
if (err)
1039
return;
1040
1041
buf = vl->vl_vdev_phys.vp_nvlist;
1042
buflen = VDEV_PHYS_SIZE - sizeof (zio_eck_t);
1043
if (nvlist_pack(cfg, &buf, &buflen, NV_ENCODE_XDR, 0) != 0) {
1044
(void) fprintf(stderr,
1045
"error: label %d: Failed to pack nvlist\n", l);
1046
return;
1047
}
1048
1049
zhack_repair_write_uberblock(vl,
1050
l, ashift, fd, byteswap, label_offset, labels_repaired);
1051
}
1052
1053
if (zhack_repair_write_label(l, fd, byteswap, vdev_data, vdev_eck,
1054
vdev_phys_offset, VDEV_PHYS_SIZE))
1055
labels_repaired[l] |= REPAIR_LABEL_STATUS_CKSUM;
1056
1057
fsync(fd);
1058
}
1059
1060
static const char *
1061
zhack_repair_label_status(const uint32_t label_status,
1062
const uint32_t to_check)
1063
{
1064
return ((label_status & to_check) != 0 ? "repaired" : "skipped");
1065
}
1066
1067
static int
1068
zhack_label_repair(const zhack_repair_op_t op, const int argc, char **argv)
1069
{
1070
uint32_t labels_repaired[VDEV_LABELS] = {0};
1071
vdev_label_t labels[VDEV_LABELS] = {{{0}}};
1072
struct stat64 st;
1073
int fd;
1074
off_t filesize;
1075
uint32_t repaired = 0;
1076
1077
abd_init();
1078
1079
if (argc < 1) {
1080
(void) fprintf(stderr, "error: missing device\n");
1081
usage();
1082
}
1083
1084
if ((fd = open(argv[0], O_RDWR)) == -1)
1085
fatal(NULL, FTAG, "cannot open '%s': %s", argv[0],
1086
strerror(errno));
1087
1088
if (fstat64_blk(fd, &st) != 0)
1089
fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0],
1090
strerror(errno));
1091
1092
filesize = st.st_size;
1093
(void) fprintf(stderr, "Calculated filesize to be %jd\n",
1094
(intmax_t)filesize);
1095
1096
if (filesize % sizeof (vdev_label_t) != 0)
1097
filesize =
1098
(filesize / sizeof (vdev_label_t)) * sizeof (vdev_label_t);
1099
1100
for (int l = 0; l < VDEV_LABELS; l++) {
1101
zhack_repair_one_label(op, fd, &labels[l],
1102
vdev_label_offset(filesize, l, 0), l, labels_repaired);
1103
}
1104
1105
close(fd);
1106
1107
abd_fini();
1108
1109
for (int l = 0; l < VDEV_LABELS; l++) {
1110
const uint32_t lr = labels_repaired[l];
1111
(void) printf("label %d: ", l);
1112
(void) printf("uberblock: %s ",
1113
zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_UB));
1114
(void) printf("checksum: %s\n",
1115
zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_CKSUM));
1116
repaired |= lr;
1117
}
1118
1119
if (repaired > 0)
1120
return (0);
1121
1122
return (1);
1123
}
1124
1125
static int
1126
zhack_do_label_repair(int argc, char **argv)
1127
{
1128
zhack_repair_op_t op = ZHACK_REPAIR_OP_UNKNOWN;
1129
int c;
1130
1131
optind = 1;
1132
while ((c = getopt(argc, argv, "+cu")) != -1) {
1133
switch (c) {
1134
case 'c':
1135
op |= ZHACK_REPAIR_OP_CKSUM;
1136
break;
1137
case 'u':
1138
op |= ZHACK_REPAIR_OP_UNDETACH;
1139
break;
1140
default:
1141
usage();
1142
break;
1143
}
1144
}
1145
1146
argc -= optind;
1147
argv += optind;
1148
1149
if (op == ZHACK_REPAIR_OP_UNKNOWN)
1150
op = ZHACK_REPAIR_OP_CKSUM;
1151
1152
return (zhack_label_repair(op, argc, argv));
1153
}
1154
1155
static int
1156
zhack_do_label(int argc, char **argv)
1157
{
1158
char *subcommand;
1159
int err;
1160
1161
argc--;
1162
argv++;
1163
if (argc == 0) {
1164
(void) fprintf(stderr,
1165
"error: no label operation specified\n");
1166
usage();
1167
}
1168
1169
subcommand = argv[0];
1170
if (strcmp(subcommand, "repair") == 0) {
1171
err = zhack_do_label_repair(argc, argv);
1172
} else {
1173
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
1174
subcommand);
1175
usage();
1176
}
1177
1178
return (err);
1179
}
1180
1181
#define MAX_NUM_PATHS 1024
1182
1183
int
1184
main(int argc, char **argv)
1185
{
1186
char *path[MAX_NUM_PATHS];
1187
const char *subcommand;
1188
int rv = 0;
1189
int c;
1190
1191
g_importargs.path = path;
1192
1193
dprintf_setup(&argc, argv);
1194
zfs_prop_init();
1195
1196
while ((c = getopt(argc, argv, "+c:d:o:")) != -1) {
1197
switch (c) {
1198
case 'c':
1199
g_importargs.cachefile = optarg;
1200
break;
1201
case 'd':
1202
assert(g_importargs.paths < MAX_NUM_PATHS);
1203
g_importargs.path[g_importargs.paths++] = optarg;
1204
break;
1205
case 'o':
1206
if (handle_tunable_option(optarg, B_FALSE) != 0)
1207
exit(1);
1208
break;
1209
default:
1210
usage();
1211
break;
1212
}
1213
}
1214
1215
argc -= optind;
1216
argv += optind;
1217
optind = 1;
1218
1219
if (argc == 0) {
1220
(void) fprintf(stderr, "error: no command specified\n");
1221
usage();
1222
}
1223
1224
subcommand = argv[0];
1225
1226
if (strcmp(subcommand, "feature") == 0) {
1227
rv = zhack_do_feature(argc, argv);
1228
} else if (strcmp(subcommand, "label") == 0) {
1229
return (zhack_do_label(argc, argv));
1230
} else if (strcmp(subcommand, "metaslab") == 0) {
1231
rv = zhack_do_metaslab(argc, argv);
1232
} else {
1233
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
1234
subcommand);
1235
usage();
1236
}
1237
1238
if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE) != 0) {
1239
fatal(NULL, FTAG, "pool export failed; "
1240
"changes may not be committed to disk\n");
1241
}
1242
1243
kernel_fini();
1244
1245
return (rv);
1246
}
1247
1248