aboutsummaryrefslogtreecommitdiffstats
path: root/module/os
diff options
context:
space:
mode:
authorChristian Schwarz <[email protected]>2021-03-07 18:49:58 +0100
committerTony Hutter <[email protected]>2021-06-23 13:22:14 -0700
commitabb485a34a1ac9e1c0c8f7598bc03cdaac8736d8 (patch)
tree62a5ed42d2fe30642067160388b6320d05d0ee0a /module/os
parent0ccffb2634fe4053187223691162ddd378b792b5 (diff)
zvol: call zil_replaying() during replay
zil_replaying(zil, tx) has the side-effect of informing the ZIL that an entry has been replayed in the (still open) tx. The ZIL uses that information to record the replay progress in the ZIL header when that tx's txg syncs. ZPL log entries are not idempotent and logically dependent and thus calling zil_replaying() is necessary for correctness. For ZVOLs the question of correctness is more nuanced: ZVOL logs only TX_WRITE and TX_TRUNCATE, both of which are idempotent. Logical dependencies between two records exist only if the write or discard request had sync semantics or if the ranges affected by the records overlap. Thus, at a first glance, it would be correct to restart replay from the beginning if we crash before replay completes. But this does not address the following scenario: Assume one log record per LWB. The chain on disk is HDR -> 1:W(1, "A") -> 2:W(1, "B") -> 3:W(2, "X") -> 4:W(3, "Z") where N:W(O, C) represents log entry number N which is a TX_WRITE of C to offset A. We replay 1, 2 and 3 in one txg, sync that txg, then crash. Bit flips corrupt 2, 3, and 4. We come up again and restart replay from the beginning because we did not call zil_replaying() during replay. We replay 1 again, then interpret 2's invalid checksum as the end of the ZIL chain and call replay done. The replayed zvol content is "AX". If we had called zil_replaying() the HDR would have pointed to 3 and our resumed replay would not have replayed anything because 3 was corrupted, resulting in zvol content "BX". If 3 logically depends on 2 then the replay corrupted the ZVOL_OBJ's contents. This patch adds the zil_replaying() calls to the replay functions. Since the callbacks in the replay function need the zilog_t* pointer so that they can call zil_replaying() we open the ZIL while replaying in zvol_create_minor(). We also verify that replay has been done when on-demand-opening the ZIL on the first modifying bio. Reviewed-by: Matthew Ahrens <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Christian Schwarz <[email protected]> Closes #11667
Diffstat (limited to 'module/os')
-rw-r--r--module/os/freebsd/zfs/zvol_os.c9
-rw-r--r--module/os/linux/zfs/zvol_os.c9
2 files changed, 16 insertions, 2 deletions
diff --git a/module/os/freebsd/zfs/zvol_os.c b/module/os/freebsd/zfs/zvol_os.c
index 348c24631..695253ac6 100644
--- a/module/os/freebsd/zfs/zvol_os.c
+++ b/module/os/freebsd/zfs/zvol_os.c
@@ -1157,6 +1157,9 @@ zvol_ensure_zilog(zvol_state_t *zv)
zv->zv_zilog = zil_open(zv->zv_objset,
zvol_get_data);
zv->zv_flags |= ZVOL_WRITTEN_TO;
+ /* replay / destroy done in zvol_create_minor_impl() */
+ VERIFY0((zv->zv_zilog->zl_header->zh_flags &
+ ZIL_REPLAY_NEEDED));
}
rw_downgrade(&zv->zv_suspend_lock);
}
@@ -1381,12 +1384,16 @@ zvol_create_minor_impl(const char *name)
zv->zv_volsize = volsize;
zv->zv_objset = os;
+ ASSERT3P(zv->zv_zilog, ==, NULL);
+ zv->zv_zilog = zil_open(os, zvol_get_data);
if (spa_writeable(dmu_objset_spa(os))) {
if (zil_replay_disable)
- zil_destroy(dmu_objset_zil(os), B_FALSE);
+ zil_destroy(zv->zv_zilog, B_FALSE);
else
zil_replay(os, zv, zvol_replay_vector);
}
+ zil_close(zv->zv_zilog);
+ zv->zv_zilog = NULL;
ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
index 6158d58dd..836af9199 100644
--- a/module/os/linux/zfs/zvol_os.c
+++ b/module/os/linux/zfs/zvol_os.c
@@ -357,6 +357,9 @@ zvol_request(struct request_queue *q, struct bio *bio)
zv->zv_zilog = zil_open(zv->zv_objset,
zvol_get_data);
zv->zv_flags |= ZVOL_WRITTEN_TO;
+ /* replay / destroy done in zvol_create_minor */
+ VERIFY0((zv->zv_zilog->zl_header->zh_flags &
+ ZIL_REPLAY_NEEDED));
}
rw_downgrade(&zv->zv_suspend_lock);
}
@@ -947,12 +950,16 @@ zvol_os_create_minor(const char *name)
blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_zso->zvo_queue);
#endif
+ ASSERT3P(zv->zv_zilog, ==, NULL);
+ zv->zv_zilog = zil_open(os, zvol_get_data);
if (spa_writeable(dmu_objset_spa(os))) {
if (zil_replay_disable)
- zil_destroy(dmu_objset_zil(os), B_FALSE);
+ zil_destroy(zv->zv_zilog, B_FALSE);
else
zil_replay(os, zv, zvol_replay_vector);
}
+ zil_close(zv->zv_zilog);
+ zv->zv_zilog = NULL;
ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);