From 7bcb7f0840d1857370dd1f9ee0ad48f9b7939dfd Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Tue, 9 Jun 2020 10:41:01 -0700 Subject: File incorrectly zeroed when receiving incremental stream that toggles -L Background: By increasing the recordsize property above the default of 128KB, a filesystem may have "large" blocks. By default, a send stream of such a filesystem does not contain large WRITE records, instead it decreases objects' block sizes to 128KB and splits the large blocks into 128KB blocks, allowing the large-block filesystem to be received by a system that does not support the `large_blocks` feature. A send stream generated by `zfs send -L` (or `--large-block`) preserves the large block size on the receiving system, by using large WRITE records. When receiving an incremental send stream for a filesystem with large blocks, if the send stream's -L flag was toggled, a bug is encountered in which the file's contents are incorrectly zeroed out. The contents of any blocks that were not modified by this send stream will be lost. "Toggled" means that the previous send used `-L`, but this incremental does not use `-L` (-L to no-L); or that the previous send did not use `-L`, but this incremental does use `-L` (no-L to -L). Changes: This commit addresses the problem with several changes to the semantics of zfs send/receive: 1. "-L to no-L" incrementals are rejected. If the previous send used `-L`, but this incremental does not use `-L`, the `zfs receive` will fail with this error message: incremental send stream requires -L (--large-block), to match previous receive. 2. "no-L to -L" incrementals are handled correctly, preserving the smaller (128KB) block size of any already-received files that used large blocks on the sending system but were split by `zfs send` without the `-L` flag. 3. A new send stream format flag is added, `SWITCH_TO_LARGE_BLOCKS`. This feature indicates that we can correctly handle "no-L to -L" incrementals. This flag is currently not set on any send streams. In the future, we intend for incremental send streams of snapshots that have large blocks to use `-L` by default, and these streams will also have the `SWITCH_TO_LARGE_BLOCKS` feature set. This ensures that streams from the default use of `zfs send` won't encounter the bug mentioned above, because they can't be received by software with the bug. Implementation notes: To facilitate accessing the ZPL's generation number, `zfs_space_delta_cb()` has been renamed to `zpl_get_file_info()` and restructured to fill in a struct with ZPL-specific info including owner and generation. In the "no-L to -L" case, if this is a compressed send stream (from `zfs send -cL`), large WRITE records that are being written to small (128KB) blocksize files need to be decompressed so that they can be written split up into multiple blocks. The zio pipeline will recompress each smaller block individually. A new test case, `send-L_toggle`, is added, which tests the "no-L to -L" case and verifies that we get an error for the "-L to no-L" case. Reviewed-by: Paul Dagnelie Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens Closes #6224 Closes #10383 --- include/sys/zfs_ioctl.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/sys/zfs_ioctl.h') diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index d4ffe70bb..78d33deda 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -107,6 +107,22 @@ typedef enum drr_headertype { #define DMU_BACKUP_FEATURE_RAW (1 << 24) /* flag #25 is reserved for the ZSTD compression feature */ #define DMU_BACKUP_FEATURE_HOLDS (1 << 26) +/* + * The SWITCH_TO_LARGE_BLOCKS feature indicates that we can receive + * incremental LARGE_BLOCKS streams (those with WRITE records of >128KB) even + * if the previous send did not use LARGE_BLOCKS, and thus its large blocks + * were split into multiple 128KB WRITE records. (See + * flush_write_batch_impl() and receive_object()). Older software that does + * not support this flag may encounter a bug when switching to large blocks, + * which causes files to incorrectly be zeroed. + * + * This flag is currently not set on any send streams. In the future, we + * intend for incremental send streams of snapshots that have large blocks to + * use LARGE_BLOCKS by default, and these streams will also have the + * SWITCH_TO_LARGE_BLOCKS feature set. This ensures that streams from the + * default use of "zfs send" won't encounter the bug mentioned above. + */ +#define DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS (1 << 27) /* * Mask of all supported backup features @@ -116,7 +132,7 @@ typedef enum drr_headertype { DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \ DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \ DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \ - DMU_BACKUP_FEATURE_REDACTED) + DMU_BACKUP_FEATURE_REDACTED | DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS) /* Are all features in the given flag word currently supported? */ #define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) -- cgit v1.2.3