diff options
author | Paul Dagnelie <[email protected]> | 2016-08-09 23:06:39 +0200 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-08-18 09:26:44 -0700 |
commit | 32d41fb73a0d04a1d864e7602595b4c6e0d16c10 (patch) | |
tree | bb9f2da2d43f37a1f1a4e850198ff6e6b252af50 | |
parent | 8658115c19f84b30d68402c32a33a2157c97e4f1 (diff) |
OpenZFS 7176 - Yet another hole birth issue
This is another bug in the long line of hole-birth related issues. In
this particular case, it was discovered that a previous hole-birth fix
(illumos bug 6513, commit bc77ba73) did not cover as many cases as we
thought it did. While the issue worked in the case of hole-punching
(writing zeroes to a large part of a file), it did not deal with
truncation, and then writing beyond the new end of the file.
The problem is that dbuf_findbp will return ENOENT if the block it's
trying to find is beyond the end of the file. If that happens, we assume
there is no birth time, and so we lose that information when we write
out new blkptrs. We should teach dbuf_findbp to look for things that are
beyond the current end, but not beyond the absolute end of the file.
Authored by: Paul Dagnelie <[email protected]>
Reviewed by: Matthew Ahrens [email protected]
Reviewed by: George Wilson [email protected]
Ported-by: kernelOfTruth <[email protected]>
Signed-off-by: Boris Protopopov <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
OpenZFS-issue: https://www.illumos.org/issues/7176
OpenZFS-commit: https://github.com/openzfs/openzfs/pull/173/commits/8b9f3ad
Upstream-bugs: DLPX-46009
Porting notes:
- Fix ISO C90 mixed declaration error in dbuf.c ( int nlevels, epbs; ) ;
keep previous position of the initialization
-rw-r--r-- | include/sys/dnode.h | 6 | ||||
-rw-r--r-- | module/zfs/dbuf.c | 32 |
2 files changed, 32 insertions, 6 deletions
diff --git a/include/sys/dnode.h b/include/sys/dnode.h index cee4ea783..3f560a7fe 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -58,6 +58,12 @@ extern "C" { */ #define DNODE_SHIFT 9 /* 512 bytes */ #define DN_MIN_INDBLKSHIFT 12 /* 4k */ +/* + * If we ever increase this value beyond 20, we need to revisit all logic that + * does x << level * ebps to handle overflow. With a 1M indirect block size, + * 4 levels of indirect blocks would not be able to guarantee addressing an + * entire object, so 5 levels will be used, but 5 * (20 - 7) = 65. + */ #define DN_MAX_INDBLKSHIFT 14 /* 16k */ #define DNODE_BLOCK_SHIFT 14 /* 16k */ #define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */ diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 1728694ac..266f31250 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1946,17 +1946,35 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse, return (0); } - if (dn->dn_phys->dn_nlevels == 0) - nlevels = 1; - else - nlevels = dn->dn_phys->dn_nlevels; - + nlevels = + (dn->dn_phys->dn_nlevels == 0) ? 1 : dn->dn_phys->dn_nlevels; epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; ASSERT3U(level * epbs, <, 64); ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); + /* + * This assertion shouldn't trip as long as the max indirect block size + * is less than 1M. The reason for this is that up to that point, + * the number of levels required to address an entire object with blocks + * of size SPA_MINBLOCKSIZE satisfies nlevels * epbs + 1 <= 64. In + * other words, if N * epbs + 1 > 64, then if (N-1) * epbs + 1 > 55 + * (i.e. we can address the entire object), objects will all use at most + * N-1 levels and the assertion won't overflow. However, once epbs is + * 13, 4 * 13 + 1 = 53, but 5 * 13 + 1 = 66. Then, 4 levels will not be + * enough to address an entire object, so objects will have 5 levels, + * but then this assertion will overflow. + * + * All this is to say that if we ever increase DN_MAX_INDBLKSHIFT, we + * need to redo this logic to handle overflows. + */ + ASSERT(level >= nlevels || + ((nlevels - level - 1) * epbs) + + highbit64(dn->dn_phys->dn_nblkptr) <= 64); if (level >= nlevels || - (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) { + blkid >= ((uint64_t)dn->dn_phys->dn_nblkptr << + ((nlevels - level - 1) * epbs)) || + (fail_sparse && + blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) { /* the buffer has no parent yet */ return (SET_ERROR(ENOENT)); } else if (level < nlevels-1) { @@ -1982,6 +2000,8 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse, } *bpp = ((blkptr_t *)(*parentp)->db.db_data) + (blkid & ((1ULL << epbs) - 1)); + if (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs))) + ASSERT(BP_IS_HOLE(*bpp)); return (0); } else { /* the block is referenced from the dnode */ |