summaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2014-09-12 05:28:35 +0200
committerBrian Behlendorf <[email protected]>2014-10-21 15:26:50 -0700
commit6c59307a3c52535513e1ea3b612dac5a594c5b5d (patch)
tree354e056c4e1bfca9feafc741cb6d5e3b775cf77b /module/zfs
parent356d9ed4c81dbb1c52627d1af242f4d9f66b67af (diff)
Illumos 3693 - restore_object uses at least two transactions to restore an object
Restore_object should not use two transactions to restore an object: * one transaction is used for dmu_object_claim * another transaction is used to set compression, checksum and most importantly bonus data * furthermore dmu_object_reclaim internally uses multiple transactions * dmu_free_long_range frees chunks in separate transactions * dnode_reallocate is executed in a distinct transaction The fact the dnode_allocate/dnode_reallocate are executed in one transaction and bonus (re-)population is executed in a different transaction may lead to violation of ZFS consistency assertions if the transactions are assigned to different transaction groups. Also, if the first transaction group is successfully written to a permanent storage, but the second transaction is lost, then an invalid dnode may be created on the stable storage. 3693 restore_object uses at least two transactions to restore an object Reviewed by: Christopher Siden <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Andriy Gapon <[email protected]> Approved by: Robert Mustacchi <[email protected]> Original authors: Matthew Ahrens and Andriy Gapon References: https://www.illumos.org/issues/3693 https://github.com/illumos/illumos-gate/commit/e77d42e Ported by: Turbo Fredriksson <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #2689
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/dmu.c1
-rw-r--r--module/zfs/dmu_object.c42
-rw-r--r--module/zfs/dmu_send.c74
3 files changed, 56 insertions, 61 deletions
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index a8e6ae1fd..81432b9ba 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -1894,6 +1894,7 @@ __dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
doi->doi_indirection = dn->dn_nlevels;
doi->doi_checksum = dn->dn_checksum;
doi->doi_compress = dn->dn_compress;
+ doi->doi_nblkptr = dn->dn_nblkptr;
doi->doi_physical_blocks_512 = (DN_USED_BYTES(dnp) + 256) >> 9;
doi->doi_max_offset = (dn->dn_maxblkid + 1) * dn->dn_datablksz;
doi->doi_fill_count = 0;
diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c
index 375439ed7..c0e8d7b7f 100644
--- a/module/zfs/dmu_object.c
+++ b/module/zfs/dmu_object.c
@@ -20,7 +20,8 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright 2014 HybridCluster. All rights reserved.
*/
#include <sys/dmu.h>
@@ -107,11 +108,9 @@ dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int
dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
- int blocksize, dmu_object_type_t bonustype, int bonuslen)
+ int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
{
dnode_t *dn;
- dmu_tx_t *tx;
- int nblkptr;
int err;
if (object == DMU_META_DNODE_OBJECT)
@@ -122,44 +121,9 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
if (err)
return (err);
- if (dn->dn_type == ot && dn->dn_datablksz == blocksize &&
- dn->dn_bonustype == bonustype && dn->dn_bonuslen == bonuslen) {
- /* nothing is changing, this is a noop */
- dnode_rele(dn, FTAG);
- return (0);
- }
-
- if (bonustype == DMU_OT_SA) {
- nblkptr = 1;
- } else {
- nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
- }
-
- /*
- * If we are losing blkptrs or changing the block size this must
- * be a new file instance. We must clear out the previous file
- * contents before we can change this type of metadata in the dnode.
- */
- if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) {
- err = dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
- if (err)
- goto out;
- }
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, object);
- err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
- dmu_tx_abort(tx);
- goto out;
- }
-
dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
- dmu_tx_commit(tx);
-out:
dnode_rele(dn, FTAG);
-
return (err);
}
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 6f7910362..feb089fd5 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -1328,12 +1328,25 @@ backup_byteswap(dmu_replay_record_t *drr)
#undef DO32
}
+static inline uint8_t
+deduce_nblkptr(dmu_object_type_t bonus_type, uint64_t bonus_size)
+{
+ if (bonus_type == DMU_OT_SA) {
+ return (1);
+ } else {
+ return (1 +
+ ((DN_MAX_BONUSLEN - bonus_size) >> SPA_BLKPTRSHIFT));
+ }
+}
+
noinline static int
restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
{
- int err;
+ dmu_object_info_t doi;
dmu_tx_t *tx;
void *data = NULL;
+ uint64_t object;
+ int err;
if (drro->drr_type == DMU_OT_NONE ||
!DMU_OT_IS_VALID(drro->drr_type) ||
@@ -1347,10 +1360,11 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
return (SET_ERROR(EINVAL));
}
- err = dmu_object_info(os, drro->drr_object, NULL);
+ err = dmu_object_info(os, drro->drr_object, &doi);
if (err != 0 && err != ENOENT)
return (SET_ERROR(EINVAL));
+ object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
if (drro->drr_bonuslen) {
data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
@@ -1358,37 +1372,53 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
return (ra->err);
}
- if (err == ENOENT) {
- /* currently free, want to be allocated */
- tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- err = dmu_tx_assign(tx, TXG_WAIT);
- if (err != 0) {
- dmu_tx_abort(tx);
- return (err);
+ /*
+ * If we are losing blkptrs or changing the block size this must
+ * be a new file instance. We must clear out the previous file
+ * contents before we can change this type of metadata in the dnode.
+ */
+ if (err == 0) {
+ int nblkptr;
+
+ nblkptr = deduce_nblkptr(drro->drr_bonustype,
+ drro->drr_bonuslen);
+
+ if (drro->drr_blksz != doi.doi_data_block_size ||
+ nblkptr < doi.doi_nblkptr) {
+ err = dmu_free_long_range(os, drro->drr_object,
+ 0, DMU_OBJECT_END);
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
}
+ }
+
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_bonus(tx, object);
+ err = dmu_tx_assign(tx, TXG_WAIT);
+ if (err != 0) {
+ dmu_tx_abort(tx);
+ return (err);
+ }
+
+ if (object == DMU_NEW_OBJECT) {
+ /* currently free, want to be allocated */
err = dmu_object_claim(os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
drro->drr_bonustype, drro->drr_bonuslen, tx);
- dmu_tx_commit(tx);
- } else {
- /* currently allocated, want to be allocated */
+ } else if (drro->drr_type != doi.doi_type ||
+ drro->drr_blksz != doi.doi_data_block_size ||
+ drro->drr_bonustype != doi.doi_bonus_type ||
+ drro->drr_bonuslen != doi.doi_bonus_size) {
+ /* currently allocated, but with different properties */
err = dmu_object_reclaim(os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
- drro->drr_bonustype, drro->drr_bonuslen);
+ drro->drr_bonustype, drro->drr_bonuslen, tx);
}
if (err != 0) {
+ dmu_tx_commit(tx);
return (SET_ERROR(EINVAL));
}
- tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, drro->drr_object);
- err = dmu_tx_assign(tx, TXG_WAIT);
- if (err != 0) {
- dmu_tx_abort(tx);
- return (err);
- }
-
dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype,
tx);
dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);