summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Dagnelie <[email protected]>2016-08-09 23:06:39 +0200
committerBrian Behlendorf <[email protected]>2016-08-18 09:26:44 -0700
commit32d41fb73a0d04a1d864e7602595b4c6e0d16c10 (patch)
treebb9f2da2d43f37a1f1a4e850198ff6e6b252af50
parent8658115c19f84b30d68402c32a33a2157c97e4f1 (diff)
OpenZFS 7176 - Yet another hole birth issue
This is another bug in the long line of hole-birth related issues. In this particular case, it was discovered that a previous hole-birth fix (illumos bug 6513, commit bc77ba73) did not cover as many cases as we thought it did. While the issue worked in the case of hole-punching (writing zeroes to a large part of a file), it did not deal with truncation, and then writing beyond the new end of the file. The problem is that dbuf_findbp will return ENOENT if the block it's trying to find is beyond the end of the file. If that happens, we assume there is no birth time, and so we lose that information when we write out new blkptrs. We should teach dbuf_findbp to look for things that are beyond the current end, but not beyond the absolute end of the file. Authored by: Paul Dagnelie <[email protected]> Reviewed by: Matthew Ahrens [email protected] Reviewed by: George Wilson [email protected] Ported-by: kernelOfTruth <[email protected]> Signed-off-by: Boris Protopopov <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> OpenZFS-issue: https://www.illumos.org/issues/7176 OpenZFS-commit: https://github.com/openzfs/openzfs/pull/173/commits/8b9f3ad Upstream-bugs: DLPX-46009 Porting notes: - Fix ISO C90 mixed declaration error in dbuf.c ( int nlevels, epbs; ) ; keep previous position of the initialization
-rw-r--r--include/sys/dnode.h6
-rw-r--r--module/zfs/dbuf.c32
2 files changed, 32 insertions, 6 deletions
diff --git a/include/sys/dnode.h b/include/sys/dnode.h
index cee4ea783..3f560a7fe 100644
--- a/include/sys/dnode.h
+++ b/include/sys/dnode.h
@@ -58,6 +58,12 @@ extern "C" {
*/
#define DNODE_SHIFT 9 /* 512 bytes */
#define DN_MIN_INDBLKSHIFT 12 /* 4k */
+/*
+ * If we ever increase this value beyond 20, we need to revisit all logic that
+ * does x << level * ebps to handle overflow. With a 1M indirect block size,
+ * 4 levels of indirect blocks would not be able to guarantee addressing an
+ * entire object, so 5 levels will be used, but 5 * (20 - 7) = 65.
+ */
#define DN_MAX_INDBLKSHIFT 14 /* 16k */
#define DNODE_BLOCK_SHIFT 14 /* 16k */
#define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 1728694ac..266f31250 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -1946,17 +1946,35 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
return (0);
}
- if (dn->dn_phys->dn_nlevels == 0)
- nlevels = 1;
- else
- nlevels = dn->dn_phys->dn_nlevels;
-
+ nlevels =
+ (dn->dn_phys->dn_nlevels == 0) ? 1 : dn->dn_phys->dn_nlevels;
epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
ASSERT3U(level * epbs, <, 64);
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+ /*
+ * This assertion shouldn't trip as long as the max indirect block size
+ * is less than 1M. The reason for this is that up to that point,
+ * the number of levels required to address an entire object with blocks
+ * of size SPA_MINBLOCKSIZE satisfies nlevels * epbs + 1 <= 64. In
+ * other words, if N * epbs + 1 > 64, then if (N-1) * epbs + 1 > 55
+ * (i.e. we can address the entire object), objects will all use at most
+ * N-1 levels and the assertion won't overflow. However, once epbs is
+ * 13, 4 * 13 + 1 = 53, but 5 * 13 + 1 = 66. Then, 4 levels will not be
+ * enough to address an entire object, so objects will have 5 levels,
+ * but then this assertion will overflow.
+ *
+ * All this is to say that if we ever increase DN_MAX_INDBLKSHIFT, we
+ * need to redo this logic to handle overflows.
+ */
+ ASSERT(level >= nlevels ||
+ ((nlevels - level - 1) * epbs) +
+ highbit64(dn->dn_phys->dn_nblkptr) <= 64);
if (level >= nlevels ||
- (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
+ blkid >= ((uint64_t)dn->dn_phys->dn_nblkptr <<
+ ((nlevels - level - 1) * epbs)) ||
+ (fail_sparse &&
+ blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
/* the buffer has no parent yet */
return (SET_ERROR(ENOENT));
} else if (level < nlevels-1) {
@@ -1982,6 +2000,8 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
}
*bpp = ((blkptr_t *)(*parentp)->db.db_data) +
(blkid & ((1ULL << epbs) - 1));
+ if (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))
+ ASSERT(BP_IS_HOLE(*bpp));
return (0);
} else {
/* the block is referenced from the dnode */