Skip to content

Commit

Permalink
zhack: Add repair label option
Browse files Browse the repository at this point in the history
In case if all label checksums will be invalid on any vdev, the pool
will become unimportable. The zhack with newly added cli options could
be used to restore label checksums and make pool importable again.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Fedor Uporov <fuporov.vstack@gmail.com>
Closes openzfs#2510
Closes openzfs#12686
  • Loading branch information
fuporovvStack committed Nov 11, 2021
1 parent 637771a commit d04b5c9
Show file tree
Hide file tree
Showing 7 changed files with 252 additions and 1 deletion.
171 changes: 170 additions & 1 deletion cmd/zhack/zhack.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/stat.h>
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
Expand All @@ -41,6 +42,7 @@
#include <sys/zfs_znode.h>
#include <sys/dsl_synctask.h>
#include <sys/vdev.h>
#include <sys/vdev_impl.h>
#include <sys/fs/zfs.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_pool.h>
Expand Down Expand Up @@ -76,7 +78,12 @@ usage(void)
" -d decrease instead of increase the refcount\n"
" -m add the feature to the label if increasing refcount\n"
"\n"
" <feature> : should be a feature guid\n");
" <feature> : should be a feature guid\n"
"\n"
" label repair <device>\n"
" repair corrupted label checksums\n"
"\n"
" <device> : path to vdev\n");
exit(1);
}

Expand Down Expand Up @@ -471,6 +478,166 @@ zhack_do_feature(int argc, char **argv)
return (0);
}

static int
zhack_repair_label_cksum(int argc, char **argv)
{
zio_checksum_info_t *ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
boolean_t labels_repaired[VDEV_LABELS];
boolean_t repaired = B_FALSE;
vdev_label_t labels[VDEV_LABELS];
struct stat st;
int fd;

bzero(labels_repaired, sizeof (labels_repaired));
bzero(labels, sizeof (labels));

abd_init();

argc -= 1;
argv += 1;

if (argc < 1) {
(void) fprintf(stderr, "error: missing device\n");
usage();
}

if ((fd = open(argv[0], O_RDWR)) == -1)
fatal(NULL, FTAG, "cannot open '%s': %s", argv[0],
strerror(errno));

if (stat(argv[0], &st) != 0)
fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0],
strerror(errno));

for (int l = 0; l < VDEV_LABELS; l++) {
uint64_t label_offset, offset;
zio_cksum_t expected_cksum;
zio_cksum_t actual_cksum;
zio_cksum_t verifier;
zio_eck_t *eck;
nvlist_t *cfg;
int byteswap;
uint64_t val;
ssize_t err;

vdev_label_t *vl = &labels[l];

label_offset = vdev_label_offset(st.st_size, l, 0);
err = pread64(fd, vl, sizeof (vdev_label_t), label_offset);
if (err == -1) {
(void) fprintf(stderr, "error: cannot read "
"label %d: %s\n", l, strerror(errno));
continue;
} else if (err != sizeof (vdev_label_t)) {
(void) fprintf(stderr, "error: bad label %d read size "
"\n", l);
continue;
}

err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
VDEV_PHYS_SIZE - sizeof (zio_eck_t), &cfg, 0);
if (err) {
(void) fprintf(stderr, "error: cannot unpack nvlist "
"label %d\n", l);
continue;
}

for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) {
err = nvlist_lookup_uint64(cfg, cfg_keys[i], &val);
if (err) {
(void) fprintf(stderr, "error: label %d: "
"cannot find nvlist key %s\n",
l, cfg_keys[i]);
continue;
}
}

void *data = (char *)vl + offsetof(vdev_label_t, vl_vdev_phys);
eck = (zio_eck_t *)((char *)(data) + VDEV_PHYS_SIZE) - 1;

offset = label_offset + offsetof(vdev_label_t, vl_vdev_phys);
ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0);

byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC));
if (byteswap)
byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));

expected_cksum = eck->zec_cksum;
eck->zec_cksum = verifier;

abd_t *abd = abd_get_from_buf(data, VDEV_PHYS_SIZE);
ci->ci_func[byteswap](abd, VDEV_PHYS_SIZE, NULL, &actual_cksum);
abd_free(abd);

if (byteswap)
byteswap_uint64_array(&expected_cksum,
sizeof (zio_cksum_t));

if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
continue;

eck->zec_cksum = actual_cksum;

err = pwrite64(fd, data, VDEV_PHYS_SIZE, offset);
if (err == -1) {
(void) fprintf(stderr, "error: cannot write "
"label %d: %s\n", l, strerror(errno));
continue;
} else if (err != VDEV_PHYS_SIZE) {
(void) fprintf(stderr, "error: bad write size "
"label %d\n", l);
continue;
}

fsync(fd);

labels_repaired[l] = B_TRUE;
}

close(fd);

abd_fini();

for (int l = 0; l < VDEV_LABELS; l++) {
(void) printf("label %d: %s\n", l,
labels_repaired[l] ? "repaired" : "skipped");
repaired |= labels_repaired[l];
}

if (repaired)
return (0);

return (1);
}

static int
zhack_do_label(int argc, char **argv)
{
char *subcommand;
int err;

argc--;
argv++;
if (argc == 0) {
(void) fprintf(stderr,
"error: no label operation specified\n");
usage();
}

subcommand = argv[0];
if (strcmp(subcommand, "repair") == 0) {
err = zhack_repair_label_cksum(argc, argv);
} else {
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
subcommand);
usage();
}

return (err);
}

#define MAX_NUM_PATHS 1024

int
Expand Down Expand Up @@ -516,6 +683,8 @@ main(int argc, char **argv)

if (strcmp(subcommand, "feature") == 0) {
rv = zhack_do_feature(argc, argv);
} else if (strcmp(subcommand, "label") == 0) {
return (zhack_do_label(argc, argv));
} else {
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
subcommand);
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile
tests/zfs-tests/tests/functional/cli_root/zhack/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_attach/Makefile
Expand Down
7 changes: 7 additions & 0 deletions man/man1/zhack.1
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,13 @@ The
flag indicates that the
.Ar guid
feature is now required to read the pool MOS.
.
.It Xo
.Nm zhack
.Cm label repair
.Ar device
.Xc
Repair corrupted labels by rewriting the checksum using the presumed valid contents of the label.
.El
.
.Sh GLOBAL OPTIONS
Expand Down
6 changes: 6 additions & 0 deletions tests/runfiles/common.run
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,12 @@ tags = ['functional', 'cli_root', 'zfs_upgrade']
tests = ['zfs_wait_deleteq']
tags = ['functional', 'cli_root', 'zfs_wait']

[tests/functional/cli_root/zhack]
tests = ['zhack_label_checksum']
pre =
post =
tags = ['functional', 'cli_root', 'zhack']

[tests/functional/cli_root/zpool]
tests = ['zpool_001_neg', 'zpool_002_pos', 'zpool_003_pos', 'zpool_colors']
tags = ['functional', 'cli_root', 'zpool']
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/cli_root/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ SUBDIRS = \
zfs_unshare \
zfs_upgrade \
zfs_wait \
zhack \
zpool \
zpool_add \
zpool_attach \
Expand Down
3 changes: 3 additions & 0 deletions tests/zfs-tests/tests/functional/cli_root/zhack/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zhack
dist_pkgdata_SCRIPTS = \
zhack_label_checksum.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/ksh

#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#

#
# Copyright (c) 2021 by vStack. All rights reserved.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/blkdev.shlib

#
# Description:
# zhack label repair <vdev> will calculate and rewrite label checksum if invalid
#
# Strategy:
# 1. Create pool with some number of vdevs and export it
# 2. Corrupt all labels checksums
# 3. Check that pool cannot be imported
# 4. Use zhack to repair labels checksums
# 5. Check that pool can be imported
#

log_assert "Verify zhack label repair <vdev> will repair labels checksums"
log_onexit cleanup

VIRTUAL_DISK=$TEST_BASE_DIR/disk

function cleanup
{
poolexists $TESTPOOL && destroy_pool $TESTPOOL
[[ -f $VIRTUAL_DISK ]] && log_must rm $VIRTUAL_DISK
}

log_must truncate -s $(($MINVDEVSIZE * 8)) $VIRTUAL_DISK

log_must zpool create $TESTPOOL $VIRTUAL_DISK
log_must zpool export $TESTPOOL

log_mustnot zhack label repair $VIRTUAL_DISK

corrupt_label_checksum 0 $VIRTUAL_DISK
corrupt_label_checksum 1 $VIRTUAL_DISK
corrupt_label_checksum 2 $VIRTUAL_DISK
corrupt_label_checksum 3 $VIRTUAL_DISK

log_mustnot zpool import $TESTPOOL -d $TEST_BASE_DIR

log_must zhack label repair $VIRTUAL_DISK

log_must zpool import $TESTPOOL -d $TEST_BASE_DIR

cleanup

log_pass "zhack label repair works correctly."

0 comments on commit d04b5c9

Please sign in to comment.