aboutsummaryrefslogtreecommitdiffstats
path: root/include/sys/spa.h
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2014-06-05 13:19:08 -0800
committerBrian Behlendorf <[email protected]>2014-08-01 14:28:05 -0700
commit9b67f605601c77c814037613d8129562db642a29 (patch)
tree21a3270ed7eda24858e56a9584f64f6359f4b28f /include/sys/spa.h
parentfaf0f58c69607a15e2d1563567afb815842805de (diff)
Illumos 4757, 4913
4757 ZFS embedded-data block pointers ("zero block compression") 4913 zfs release should not be subject to space checks Reviewed by: Adam Leventhal <[email protected]> Reviewed by: Max Grossman <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Christopher Siden <[email protected]> Reviewed by: Dan McDonald <[email protected]> Approved by: Dan McDonald <[email protected]> References: https://www.illumos.org/issues/4757 https://www.illumos.org/issues/4913 https://github.com/illumos/illumos-gate/commit/5d7b4d4 Porting notes: For compatibility with the fastpath code the zio_done() function needed to be updated. Because embedded-data block pointers do not require DVAs to be allocated the associated vdevs will not be marked and therefore should not be unmarked. Ported by: Tim Chase <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #2544
Diffstat (limited to 'include/sys/spa.h')
-rw-r--r--include/sys/spa.h179
1 files changed, 155 insertions, 24 deletions
diff --git a/include/sys/spa.h b/include/sys/spa.h
index 5c754b0af..707b1987a 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -156,7 +156,7 @@ typedef struct zio_cksum {
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE |
+ * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
@@ -190,7 +190,8 @@ typedef struct zio_cksum {
* G gang block indicator
* B byteorder (endianness)
* D dedup
- * X unused
+ * X encryption (on version 30, which is not supported)
+ * E blkptr_t contains embedded data (see below)
* lvl level of indirection
* type DMU object type
* phys birth txg of block allocation; zero if same as logical birth txg
@@ -198,6 +199,100 @@ typedef struct zio_cksum {
* fill count number of non-zero blocks under this bp
* checksum[4] 256-bit checksum of the data this bp describes
*/
+
+/*
+ * "Embedded" blkptr_t's don't actually point to a block, instead they
+ * have a data payload embedded in the blkptr_t itself. See the comment
+ * in blkptr.c for more details.
+ *
+ * The blkptr_t is laid out as follows:
+ *
+ * 64 56 48 40 32 24 16 8 0
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 0 | payload |
+ * 1 | payload |
+ * 2 | payload |
+ * 3 | payload |
+ * 4 | payload |
+ * 5 | payload |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 6 |BDX|lvl| type | etype |E| comp| PSIZE| LSIZE |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 7 | payload |
+ * 8 | payload |
+ * 9 | payload |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * a | logical birth txg |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * b | payload |
+ * c | payload |
+ * d | payload |
+ * e | payload |
+ * f | payload |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ *
+ * Legend:
+ *
+ * payload contains the embedded data
+ * B (byteorder) byteorder (endianness)
+ * D (dedup) padding (set to zero)
+ * X encryption (set to zero; see above)
+ * E (embedded) set to one
+ * lvl indirection level
+ * type DMU object type
+ * etype how to interpret embedded data (BP_EMBEDDED_TYPE_*)
+ * comp compression function of payload
+ * PSIZE size of payload after compression, in bytes
+ * LSIZE logical size of payload, in bytes
+ * note that 25 bits is enough to store the largest
+ * "normal" BP's LSIZE (2^16 * 2^9) in bytes
+ * log. birth transaction group in which the block was logically born
+ *
+ * Note that LSIZE and PSIZE are stored in bytes, whereas for non-embedded
+ * bp's they are stored in units of SPA_MINBLOCKSHIFT.
+ * Generally, the generic BP_GET_*() macros can be used on embedded BP's.
+ * The B, D, X, lvl, type, and comp fields are stored the same as with normal
+ * BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must
+ * be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before
+ * other macros, as they assert that they are only used on BP's of the correct
+ * "embedded-ness".
+ */
+
+#define BPE_GET_ETYPE(bp) \
+ (ASSERT(BP_IS_EMBEDDED(bp)), \
+ BF64_GET((bp)->blk_prop, 40, 8))
+#define BPE_SET_ETYPE(bp, t) do { \
+ ASSERT(BP_IS_EMBEDDED(bp)); \
+ BF64_SET((bp)->blk_prop, 40, 8, t); \
+_NOTE(CONSTCOND) } while (0)
+
+#define BPE_GET_LSIZE(bp) \
+ (ASSERT(BP_IS_EMBEDDED(bp)), \
+ BF64_GET_SB((bp)->blk_prop, 0, 25, 0, 1))
+#define BPE_SET_LSIZE(bp, x) do { \
+ ASSERT(BP_IS_EMBEDDED(bp)); \
+ BF64_SET_SB((bp)->blk_prop, 0, 25, 0, 1, x); \
+_NOTE(CONSTCOND) } while (0)
+
+#define BPE_GET_PSIZE(bp) \
+ (ASSERT(BP_IS_EMBEDDED(bp)), \
+ BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1))
+#define BPE_SET_PSIZE(bp, x) do { \
+ ASSERT(BP_IS_EMBEDDED(bp)); \
+ BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \
+_NOTE(CONSTCOND) } while (0)
+
+typedef enum bp_embedded_type {
+ BP_EMBEDDED_TYPE_DATA,
+ BP_EMBEDDED_TYPE_RESERVED, /* Reserved for an unintegrated feature. */
+ NUM_BP_EMBEDDED_TYPES = BP_EMBEDDED_TYPE_RESERVED
+} bp_embedded_type_t;
+
+#define BPE_NUM_WORDS 14
+#define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t))
+#define BPE_IS_PAYLOADWORD(bp, wp) \
+ ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth)
+
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
@@ -244,20 +339,37 @@ typedef struct blkptr {
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
#define BP_GET_LSIZE(bp) \
- BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
-#define BP_SET_LSIZE(bp, x) \
- BF64_SET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
+ (BP_IS_EMBEDDED(bp) ? \
+ (BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA ? BPE_GET_LSIZE(bp) : 0): \
+ BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1))
+#define BP_SET_LSIZE(bp, x) do { \
+ ASSERT(!BP_IS_EMBEDDED(bp)); \
+ BF64_SET_SB((bp)->blk_prop, \
+ 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \
+_NOTE(CONSTCOND) } while (0)
#define BP_GET_PSIZE(bp) \
- BF64_GET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1)
-#define BP_SET_PSIZE(bp, x) \
- BF64_SET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
+ (BP_IS_EMBEDDED(bp) ? 0 : \
+ BF64_GET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1))
+#define BP_SET_PSIZE(bp, x) do { \
+ ASSERT(!BP_IS_EMBEDDED(bp)); \
+ BF64_SET_SB((bp)->blk_prop, \
+ 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \
+_NOTE(CONSTCOND) } while (0)
+
+#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 7)
+#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 7, x)
-#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
-#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
+#define BP_IS_EMBEDDED(bp) BF64_GET((bp)->blk_prop, 39, 1)
+#define BP_SET_EMBEDDED(bp, x) BF64_SET((bp)->blk_prop, 39, 1, x)
-#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
-#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
+#define BP_GET_CHECKSUM(bp) \
+ (BP_IS_EMBEDDED(bp) ? ZIO_CHECKSUM_OFF : \
+ BF64_GET((bp)->blk_prop, 40, 8))
+#define BP_SET_CHECKSUM(bp, x) do { \
+ ASSERT(!BP_IS_EMBEDDED(bp)); \
+ BF64_SET((bp)->blk_prop, 40, 8, x); \
+_NOTE(CONSTCOND) } while (0)
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
@@ -265,9 +377,6 @@ typedef struct blkptr {
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
-#define BP_GET_PROP_BIT_61(bp) BF64_GET((bp)->blk_prop, 61, 1)
-#define BP_SET_PROP_BIT_61(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x)
-
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
@@ -275,31 +384,39 @@ typedef struct blkptr {
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
#define BP_PHYSICAL_BIRTH(bp) \
- ((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
+ (BP_IS_EMBEDDED(bp) ? 0 : \
+ (bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
#define BP_SET_BIRTH(bp, logical, physical) \
{ \
+ ASSERT(!BP_IS_EMBEDDED(bp)); \
(bp)->blk_birth = (logical); \
(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
}
+#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill)
+
#define BP_GET_ASIZE(bp) \
- (DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
- DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+ (BP_IS_EMBEDDED(bp) ? 0 : \
+ DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
+ DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
+ DVA_GET_ASIZE(&(bp)->blk_dva[2]))
#define BP_GET_UCSIZE(bp) \
((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
#define BP_GET_NDVAS(bp) \
- (!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
+ (BP_IS_EMBEDDED(bp) ? 0 : \
+ !!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
#define BP_COUNT_GANG(bp) \
+ (BP_IS_EMBEDDED(bp) ? 0 : \
(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
DVA_GET_GANG(&(bp)->blk_dva[1]) + \
- DVA_GET_GANG(&(bp)->blk_dva[2]))
+ DVA_GET_GANG(&(bp)->blk_dva[2])))
#define DVA_EQUAL(dva1, dva2) \
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
@@ -307,6 +424,7 @@ typedef struct blkptr {
#define BP_EQUAL(bp1, bp2) \
(BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) && \
+ (bp1)->blk_birth == (bp2)->blk_birth && \
DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \
DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \
DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
@@ -327,11 +445,13 @@ typedef struct blkptr {
(zcp)->zc_word[3] = w3; \
}
-#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
-#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
+#define BP_IDENTITY(bp) (ASSERT(!BP_IS_EMBEDDED(bp)), &(bp)->blk_dva[0])
+#define BP_IS_GANG(bp) \
+ (BP_IS_EMBEDDED(bp) ? B_FALSE : DVA_GET_GANG(BP_IDENTITY(bp)))
#define DVA_IS_EMPTY(dva) ((dva)->dva_word[0] == 0ULL && \
(dva)->dva_word[1] == 0ULL)
-#define BP_IS_HOLE(bp) DVA_IS_EMPTY(BP_IDENTITY(bp))
+#define BP_IS_HOLE(bp) \
+ (!BP_IS_EMBEDDED(bp) && DVA_IS_EMPTY(BP_IDENTITY(bp)))
/* BP_IS_RAIDZ(bp) assumes no block compression */
#define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
@@ -386,6 +506,17 @@ typedef struct blkptr {
" birth=%lluL", \
(u_longlong_t)bp->blk_birth); \
} \
+ } else if (BP_IS_EMBEDDED(bp)) { \
+ len = func(buf + len, size - len, \
+ "EMBEDDED [L%llu %s] et=%u %s " \
+ "size=%llxL/%llxP birth=%lluL", \
+ (u_longlong_t)BP_GET_LEVEL(bp), \
+ type, \
+ (int)BPE_GET_ETYPE(bp), \
+ compress, \
+ (u_longlong_t)BPE_GET_LSIZE(bp), \
+ (u_longlong_t)BPE_GET_PSIZE(bp), \
+ (u_longlong_t)bp->blk_birth); \
} else { \
for (d = 0; d < BP_GET_NDVAS(bp); d++) { \
const dva_t *dva = &bp->blk_dva[d]; \
@@ -419,7 +550,7 @@ typedef struct blkptr {
(u_longlong_t)BP_GET_PSIZE(bp), \
(u_longlong_t)bp->blk_birth, \
(u_longlong_t)BP_PHYSICAL_BIRTH(bp), \
- (u_longlong_t)bp->blk_fill, \
+ (u_longlong_t)BP_GET_FILL(bp), \
ws, \
(u_longlong_t)bp->blk_cksum.zc_word[0], \
(u_longlong_t)bp->blk_cksum.zc_word[1], \