aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorge Amanakis <[email protected]>2022-08-23 02:42:01 +0200
committerGitHub <[email protected]>2022-08-22 17:42:01 -0700
commit0c4064d9a08ca2cf601ad1010e7cfd3f917cb991 (patch)
treee810a24532dc58a3933c3d54d0737ada633dab15
parent17e212652db34d2942d6aa42ba14b443bcd73bea (diff)
Fix zpool status in case of unloaded keys
When scrubbing an encrypted filesystem with unloaded key still report an error in zpool status. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Alek Pinchuk <[email protected]> Signed-off-by: George Amanakis <[email protected]> Closes #13675 Closes #13717
-rw-r--r--include/sys/dsl_crypt.h1
-rw-r--r--man/man7/zpool-features.73
-rw-r--r--module/zfs/dsl_crypt.c2
-rw-r--r--module/zfs/spa_errlog.c135
-rw-r--r--tests/runfiles/common.run2
-rw-r--r--tests/zfs-tests/tests/Makefile.am1
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh1
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh1
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh78
9 files changed, 190 insertions, 34 deletions
diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h
index db594eece..72716e296 100644
--- a/include/sys/dsl_crypt.h
+++ b/include/sys/dsl_crypt.h
@@ -222,5 +222,6 @@ int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb,
dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt,
uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd,
boolean_t *no_crypt);
+zfs_keystatus_t dsl_dataset_get_keystatus(dsl_dir_t *dd);
#endif
diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7
index 09f1e50de..0ff1a9a52 100644
--- a/man/man7/zpool-features.7
+++ b/man/man7/zpool-features.7
@@ -545,6 +545,9 @@ This feature enables the upgraded version of errlog, which required an on-disk
error log format change.
Now the error log of each head dataset is stored separately in the zap object
and keyed by the head id.
+In case of encrypted filesystems with unloaded keys or unmounted encrypted
+filesystems we are unable to check their snapshots or clones for errors and
+these will not be reported.
With this feature enabled, every dataset affected by an error block is listed
in the output of
.Nm zpool Cm status .
diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c
index 25cb4f6ab..ce2e6ce74 100644
--- a/module/zfs/dsl_crypt.c
+++ b/module/zfs/dsl_crypt.c
@@ -1137,7 +1137,7 @@ dmu_objset_check_wkey_loaded(dsl_dir_t *dd)
return (0);
}
-static zfs_keystatus_t
+zfs_keystatus_t
dsl_dataset_get_keystatus(dsl_dir_t *dd)
{
/* check if this dd has a has a dsl key */
diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c
index 4572a6e56..e682f6c69 100644
--- a/module/zfs/spa_errlog.c
+++ b/module/zfs/spa_errlog.c
@@ -21,8 +21,8 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2014, Delphix. All rights reserved.
- * Copyright (c) 2021, George Amanakis. All rights reserved.
* Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2021, 2022, George Amanakis. All rights reserved.
*/
/*
@@ -68,6 +68,7 @@
#include <sys/dsl_dir.h>
#include <sys/dmu_objset.h>
#include <sys/dbuf.h>
+#include <sys/zfs_znode.h>
#define NAME_MAX_LEN 64
@@ -175,6 +176,23 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
return (error);
}
+ /*
+ * If the key is not loaded dbuf_dnode_findbp() will error out with
+ * EACCES. However in that case dnode_hold() will eventually call
+ * dbuf_read()->zio_wait() which may call spa_log_error(). This will
+ * lead to a deadlock due to us holding the mutex spa_errlist_lock.
+ * Avoid this by checking here if the keys are loaded, if not return.
+ * If the keys are not loaded the head_errlog feature is meaningless
+ * as we cannot figure out the birth txg of the block pointer.
+ */
+ if (dsl_dataset_get_keystatus(ds->ds_dir) ==
+ ZFS_KEYSTATUS_UNAVAILABLE) {
+ zep->zb_birth = 0;
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_config_exit(dp, FTAG);
+ return (0);
+ }
+
dnode_t *dn;
blkptr_t bp;
@@ -188,11 +206,22 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
rw_enter(&dn->dn_struct_rwlock, RW_READER);
error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL,
NULL);
-
if (error == 0 && BP_IS_HOLE(&bp))
error = SET_ERROR(ENOENT);
- zep->zb_birth = bp.blk_birth;
+ /*
+ * If the key is loaded but the encrypted filesystem is unmounted when
+ * a scrub is run, then dbuf_dnode_findbp() will still error out with
+ * EACCES (possibly due to the key mapping being removed upon
+ * unmounting). In that case the head_errlog feature is also
+ * meaningless as we cannot figure out the birth txg of the block
+ * pointer.
+ */
+ if (error == EACCES)
+ error = 0;
+ else if (!error)
+ zep->zb_birth = bp.blk_birth;
+
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
dsl_dataset_rele(ds, FTAG);
@@ -264,7 +293,6 @@ find_birth_txg(dsl_dataset_t *ds, zbookmark_err_phys_t *zep,
rw_enter(&dn->dn_struct_rwlock, RW_READER);
error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL,
NULL);
-
if (error == 0 && BP_IS_HOLE(&bp))
error = SET_ERROR(ENOENT);
@@ -298,27 +326,40 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
uint64_t txg_to_consider = spa->spa_syncing_txg;
boolean_t check_snapshot = B_TRUE;
error = find_birth_txg(ds, zep, &latest_txg);
- if (error == 0) {
- if (zep->zb_birth == latest_txg) {
- /* Block neither free nor rewritten. */
- if (!only_count) {
- zbookmark_phys_t zb;
- zep_to_zb(head_ds, zep, &zb);
- if (copyout(&zb, (char *)uaddr + (*count - 1)
- * sizeof (zbookmark_phys_t),
- sizeof (zbookmark_phys_t)) != 0) {
- dsl_dataset_rele(ds, FTAG);
- return (SET_ERROR(EFAULT));
- }
- (*count)--;
- } else {
- (*count)++;
+
+ /*
+ * If we cannot figure out the current birth txg of the block pointer
+ * error out. If the filesystem is encrypted and the key is not loaded
+ * or the encrypted filesystem is not mounted the error will be EACCES.
+ * In that case do not return an error.
+ */
+ if (error == EACCES) {
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+ }
+ if (error) {
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
+ }
+ if (zep->zb_birth == latest_txg) {
+ /* Block neither free nor rewritten. */
+ if (!only_count) {
+ zbookmark_phys_t zb;
+ zep_to_zb(head_ds, zep, &zb);
+ if (copyout(&zb, (char *)uaddr + (*count - 1)
+ * sizeof (zbookmark_phys_t),
+ sizeof (zbookmark_phys_t)) != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EFAULT));
}
- check_snapshot = B_FALSE;
+ (*count)--;
} else {
- ASSERT3U(zep->zb_birth, <, latest_txg);
- txg_to_consider = latest_txg;
+ (*count)++;
}
+ check_snapshot = B_FALSE;
+ } else {
+ ASSERT3U(zep->zb_birth, <, latest_txg);
+ txg_to_consider = latest_txg;
}
/* How many snapshots reference this block. */
@@ -439,9 +480,31 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
uint64_t *count, void *uaddr, boolean_t only_count)
{
dsl_pool_t *dp = spa->spa_dsl_pool;
- dsl_pool_config_enter(dp, FTAG);
uint64_t top_affected_fs;
+ /*
+ * If the zb_birth is 0 it means we failed to retrieve the birth txg
+ * of the block pointer. This happens when an encrypted filesystem is
+ * not mounted or when the key is not loaded. Do not proceed to
+ * check_filesystem(), instead do the accounting here.
+ */
+ if (zep->zb_birth == 0) {
+ if (!only_count) {
+ zbookmark_phys_t zb;
+ zep_to_zb(head_ds, zep, &zb);
+ if (copyout(&zb, (char *)uaddr + (*count - 1)
+ * sizeof (zbookmark_phys_t),
+ sizeof (zbookmark_phys_t)) != 0) {
+ return (SET_ERROR(EFAULT));
+ }
+ (*count)--;
+ } else {
+ (*count)++;
+ }
+ return (0);
+ }
+
+ dsl_pool_config_enter(dp, FTAG);
int error = find_top_affected_fs(spa, head_ds, zep, &top_affected_fs);
if (error == 0)
error = check_filesystem(spa, top_affected_fs, zep, count,
@@ -497,6 +560,7 @@ get_errlist_size(spa_t *spa, avl_tree_t *tree)
zep.zb_object = se->se_bookmark.zb_object;
zep.zb_level = se->se_bookmark.zb_level;
zep.zb_blkid = se->se_bookmark.zb_blkid;
+ zep.zb_birth = 0;
/*
* If we cannot find out the head dataset and birth txg of
@@ -505,8 +569,10 @@ get_errlist_size(spa_t *spa, avl_tree_t *tree)
* sync_error_list() and written to the on-disk error log.
*/
uint64_t head_ds_obj;
- if (get_head_and_birth_txg(spa, &zep,
- se->se_bookmark.zb_objset, &head_ds_obj) == 0)
+ int error = get_head_and_birth_txg(spa, &zep,
+ se->se_bookmark.zb_objset, &head_ds_obj);
+
+ if (!error)
(void) process_error_block(spa, head_ds_obj, &zep,
&total, NULL, B_TRUE);
}
@@ -695,6 +761,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
zep.zb_object = zb.zb_object;
zep.zb_level = zb.zb_level;
zep.zb_blkid = zb.zb_blkid;
+ zep.zb_birth = 0;
/*
* We cannot use get_head_and_birth_txg() because it will
@@ -737,8 +804,11 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
rw_enter(&dn->dn_struct_rwlock, RW_READER);
error = dbuf_dnode_findbp(dn, zep.zb_level, zep.zb_blkid, &bp,
NULL, NULL);
+ if (error == EACCES)
+ error = 0;
+ else if (!error)
+ zep.zb_birth = bp.blk_birth;
- zep.zb_birth = bp.blk_birth;
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
dsl_dataset_rele(ds, FTAG);
@@ -885,16 +955,16 @@ process_error_list(spa_t *spa, avl_tree_t *list, void *uaddr, uint64_t *count)
zep.zb_object = se->se_bookmark.zb_object;
zep.zb_level = se->se_bookmark.zb_level;
zep.zb_blkid = se->se_bookmark.zb_blkid;
+ zep.zb_birth = 0;
uint64_t head_ds_obj;
int error = get_head_and_birth_txg(spa, &zep,
se->se_bookmark.zb_objset, &head_ds_obj);
- if (error != 0)
- return (error);
- error = process_error_block(spa, head_ds_obj, &zep, count,
- uaddr, B_FALSE);
- if (error != 0)
+ if (!error)
+ error = process_error_block(spa, head_ds_obj, &zep,
+ count, uaddr, B_FALSE);
+ if (error)
return (error);
}
return (0);
@@ -1013,6 +1083,7 @@ sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx)
zep.zb_object = se->se_bookmark.zb_object;
zep.zb_level = se->se_bookmark.zb_level;
zep.zb_blkid = se->se_bookmark.zb_blkid;
+ zep.zb_birth = 0;
/*
* If we cannot find out the head dataset and birth txg
@@ -1024,7 +1095,7 @@ sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx)
uint64_t head_dataset_obj;
int error = get_head_and_birth_txg(spa, &zep,
se->se_bookmark.zb_objset, &head_dataset_obj);
- if (error != 0)
+ if (error)
continue;
uint64_t err_obj;
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index 414316eb6..b9a9e0efc 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -494,7 +494,7 @@ tags = ['functional', 'cli_root', 'zpool_split']
[tests/functional/cli_root/zpool_status]
tests = ['zpool_status_001_pos', 'zpool_status_002_pos',
'zpool_status_003_pos', 'zpool_status_004_pos',
- 'zpool_status_features_001_pos']
+ 'zpool_status_005_pos', 'zpool_status_features_001_pos']
tags = ['functional', 'cli_root', 'zpool_status']
[tests/functional/cli_root/zpool_sync]
diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am
index d65788d08..4a815db8a 100644
--- a/tests/zfs-tests/tests/Makefile.am
+++ b/tests/zfs-tests/tests/Makefile.am
@@ -1161,6 +1161,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_status/zpool_status_002_pos.ksh \
functional/cli_root/zpool_status/zpool_status_003_pos.ksh \
functional/cli_root/zpool_status/zpool_status_004_pos.ksh \
+ functional/cli_root/zpool_status/zpool_status_005_pos.ksh \
functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh \
functional/cli_root/zpool_sync/cleanup.ksh \
functional/cli_root/zpool_sync/setup.ksh \
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh
index 231c46d4f..a243944e4 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh
@@ -63,6 +63,7 @@ log_must zfs snapshot $TESTPOOL2@snap
log_must zfs clone $TESTPOOL2@snap $TESTPOOL2/clone
# Look to see that snapshot, clone and filesystem our files report errors
+log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh
index 1ac3e03b7..111d598df 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_004_pos.ksh
@@ -74,6 +74,7 @@ log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'"
# Check that enabling the feature reports the error properly.
log_must zpool set feature@head_errlog=enabled $TESTPOOL2
+log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh
new file mode 100755
index 000000000..3eb7825ca
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh
@@ -0,0 +1,78 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2022 George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+#
+# DESCRIPTION:
+# Verify correct output with 'zpool status -v' after corrupting a file
+#
+# STRATEGY:
+# 1. Create a pool, an ancrypted filesystem and a file
+# 2. zinject checksum errors
+# 3. Unmount the filesystem and unload the key
+# 4. Scrub the pool
+# 5. Verify we report errors in the pool in 'zpool status -v'
+
+verify_runnable "both"
+
+DISK=${DISKS%% *}
+
+function cleanup
+{
+ log_must zinject -c all
+ destroy_pool $TESTPOOL2
+ rm -f $TESTDIR/vdev_a
+}
+
+log_assert "Verify reporting errors with unloaded keys works"
+log_onexit cleanup
+
+typeset passphrase="password"
+typeset file="/$TESTPOOL2/$TESTFS1/$TESTFILE0"
+
+truncate -s $MINVDEVSIZE $TESTDIR/vdev_a
+log_must zpool create -f -o feature@head_errlog=enabled $TESTPOOL2 $TESTDIR/vdev_a
+
+log_must eval "echo $passphrase > /$TESTPOOL2/pwd"
+
+log_must zfs create -o encryption=aes-256-ccm -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL2/pwd -o primarycache=none \
+ $TESTPOOL2/$TESTFS1
+
+log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync
+log_must eval "echo 'aaaaaaaa' >> "$file
+
+corrupt_blocks_at_level $file 0
+log_must zfs unmount $TESTPOOL2/$TESTFS1
+log_must zfs unload-key $TESTPOOL2/$TESTFS1
+log_must zpool sync $TESTPOOL2
+log_must zpool scrub $TESTPOOL2
+log_must zpool wait -t scrub $TESTPOOL2
+log_must zpool status -v $TESTPOOL2
+log_must eval "zpool status -v $TESTPOOL2 | \
+ grep \"Permanent errors have been detected\""
+
+log_pass "Verify reporting errors with unloaded keys works"