aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/dsl_dataset.c
diff options
context:
space:
mode:
authorChristopher Siden <[email protected]>2012-12-13 15:24:15 -0800
committerBrian Behlendorf <[email protected]>2013-01-08 10:35:35 -0800
commit9ae529ec5dbdc828ff8326beae58062971d74b2e (patch)
treed65c2d8913391cd03a3e8e06ad77721c5e9cdadc /module/zfs/dsl_dataset.c
parent15313c5e1866e81e2f4a30d2c50b43b5435e547a (diff)
Illumos #2619 and #2747
2619 asynchronous destruction of ZFS file systems 2747 SPA versioning with zfs feature flags Reviewed by: Matt Ahrens <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Richard Lowe <[email protected]> Reviewed by: Dan Kruchinin <[email protected]> Approved by: Eric Schrock <[email protected]> References: illumos/illumos-gate@53089ab7c84db6fb76c16ca50076c147cda11757 illumos/illumos-gate@ad135b5d644628e791c3188a6ecbd9c257961ef8 illumos changeset: 13700:2889e2596bd6 https://www.illumos.org/issues/2619 https://www.illumos.org/issues/2747 NOTE: The grub specific changes were not ported. This change must be made to the Linux grub packages. Ported-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'module/zfs/dsl_dataset.c')
-rw-r--r--module/zfs/dsl_dataset.c185
1 files changed, 126 insertions, 59 deletions
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 21fdd081c..872d44afb 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
@@ -35,6 +35,7 @@
#include <sys/arc.h>
#include <sys/zio.h>
#include <sys/zap.h>
+#include <sys/zfeature.h>
#include <sys/unique.h>
#include <sys/zfs_context.h>
#include <sys/zfs_ioctl.h>
@@ -102,7 +103,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
if (BP_IS_HOLE(bp))
return;
ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
- ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
+ ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
if (ds == NULL) {
/*
* Account for the meta-objset space in its placeholder
@@ -119,7 +120,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
mutex_enter(&ds->ds_dir->dd_lock);
mutex_enter(&ds->ds_lock);
delta = parent_delta(ds, used);
- ds->ds_phys->ds_used_bytes += used;
+ ds->ds_phys->ds_referenced_bytes += used;
ds->ds_phys->ds_compressed_bytes += compressed;
ds->ds_phys->ds_uncompressed_bytes += uncompressed;
ds->ds_phys->ds_unique_bytes += used;
@@ -215,8 +216,8 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
}
}
mutex_enter(&ds->ds_lock);
- ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
- ds->ds_phys->ds_used_bytes -= used;
+ ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
+ ds->ds_phys->ds_referenced_bytes -= used;
ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
ds->ds_phys->ds_compressed_bytes -= compressed;
ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
@@ -823,8 +824,8 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
dsphys->ds_prev_snap_obj = origin->ds_object;
dsphys->ds_prev_snap_txg =
origin->ds_phys->ds_creation_txg;
- dsphys->ds_used_bytes =
- origin->ds_phys->ds_used_bytes;
+ dsphys->ds_referenced_bytes =
+ origin->ds_phys->ds_referenced_bytes;
dsphys->ds_compressed_bytes =
origin->ds_phys->ds_compressed_bytes;
dsphys->ds_uncompressed_bytes =
@@ -938,7 +939,6 @@ dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
pair = nvlist_next_nvpair(snaps, pair)) {
dsl_dataset_t *ds;
- int err;
err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
if (err == 0) {
@@ -1088,19 +1088,23 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
goto out_free;
/*
- * remove the objects in open context, so that we won't
- * have too much to do in syncing context.
+ * If async destruction is not enabled try to remove all objects
+ * while in the open context so that there is less work to do in
+ * the syncing context.
*/
- for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
- ds->ds_phys->ds_prev_snap_txg)) {
- /*
- * Ignore errors, if there is not enough disk space
- * we will deal with it in dsl_dataset_destroy_sync().
- */
- (void) dmu_free_object(os, obj);
+ if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
+ ds->ds_phys->ds_prev_snap_txg)) {
+ /*
+ * Ignore errors, if there is not enough disk space
+ * we will deal with it in dsl_dataset_destroy_sync().
+ */
+ (void) dmu_free_object(os, obj);
+ }
+ if (err != ESRCH)
+ goto out_free;
}
- if (err != ESRCH)
- goto out_free;
/*
* Only the ZIL knows how to free log blocks.
@@ -1261,7 +1265,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
ASSERT(!dsl_dataset_is_snapshot(ds));
if (ds->ds_phys->ds_prev_snap_obj != 0)
- mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
+ mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
else
mrs_used = 0;
@@ -1269,7 +1273,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
ASSERT3U(dlused, <=, mrs_used);
ds->ds_phys->ds_unique_bytes =
- ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
+ ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
SPA_VERSION_UNIQUE_ACCURATE)
@@ -1627,12 +1631,36 @@ process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
ds_next->ds_phys->ds_deadlist_obj);
}
+static int
+old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+ int err;
+ struct killarg ka;
+
+ /*
+ * Free everything that we point to (that's born after
+ * the previous snapshot, if we are a clone)
+ *
+ * NB: this should be very quick, because we already
+ * freed all the objects in open context.
+ */
+ ka.ds = ds;
+ ka.tx = tx;
+ err = traverse_dataset(ds,
+ ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
+ kill_blkptr, &ka);
+ ASSERT3U(err, ==, 0);
+ ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
+
+ return (err);
+}
+
void
dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
{
struct dsl_ds_destroyarg *dsda = arg1;
dsl_dataset_t *ds = dsda->ds;
- int err;
+ int err = 0;
int after_branch_point = FALSE;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
@@ -1773,7 +1801,6 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
tx);
dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
DD_USED_HEAD, used, comp, uncomp, tx);
- dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx);
/* Merge our deadlist into next's and free it. */
dsl_deadlist_merge(&ds_next->ds_deadlist,
@@ -1849,32 +1876,54 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
}
dsl_dataset_rele(ds_next, FTAG);
} else {
+ zfeature_info_t *async_destroy =
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
+
/*
* There's no next snapshot, so this is a head dataset.
* Destroy the deadlist. Unless it's a clone, the
* deadlist should be empty. (If it's a clone, it's
* safe to ignore the deadlist contents.)
*/
- struct killarg ka;
-
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
ds->ds_phys->ds_deadlist_obj = 0;
- /*
- * Free everything that we point to (that's born after
- * the previous snapshot, if we are a clone)
- *
- * NB: this should be very quick, because we already
- * freed all the objects in open context.
- */
- ka.ds = ds;
- ka.tx = tx;
- err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
- TRAVERSE_POST, kill_blkptr, &ka);
- ASSERT3U(err, ==, 0);
- ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
- ds->ds_phys->ds_unique_bytes == 0);
+ if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
+ err = old_synchronous_dataset_destroy(ds, tx);
+ } else {
+ /*
+ * Move the bptree into the pool's list of trees to
+ * clean up and update space accounting information.
+ */
+ uint64_t used, comp, uncomp;
+
+ ASSERT(err == 0 || err == EBUSY);
+ if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
+ spa_feature_incr(dp->dp_spa, async_destroy, tx);
+ dp->dp_bptree_obj = bptree_alloc(
+ dp->dp_meta_objset, tx);
+ VERIFY(zap_add(dp->dp_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
+ &dp->dp_bptree_obj, tx) == 0);
+ }
+
+ used = ds->ds_dir->dd_phys->dd_used_bytes;
+ comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
+ uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
+
+ ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
+ ds->ds_phys->ds_unique_bytes == used);
+
+ bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
+ &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
+ used, comp, uncomp, tx);
+ dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
+ -used, -comp, -uncomp, tx);
+ dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
+ used, comp, uncomp, tx);
+ }
if (ds->ds_prev != NULL) {
if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
@@ -2065,7 +2114,7 @@ dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsphys->ds_creation_time = gethrestime_sec();
dsphys->ds_creation_txg = crtxg;
dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
- dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
+ dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
dsphys->ds_flags = ds->ds_phys->ds_flags;
@@ -2189,10 +2238,22 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
zap_cursor_advance(&zc)) {
dsl_dataset_t *clone;
char buf[ZFS_MAXNAMELEN];
+ /*
+ * Even though we hold the dp_config_rwlock, the dataset
+ * may fail to open, returning ENOENT. If there is a
+ * thread concurrently attempting to destroy this
+ * dataset, it will have the ds_rwlock held for
+ * RW_WRITER. Our call to dsl_dataset_hold_obj() ->
+ * dsl_dataset_hold_ref() will fail its
+ * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
+ * dp_config_rwlock, and wait for the destroy progress
+ * and signal ds_exclusive_cv. If the destroy was
+ * successful, we will see that
+ * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
+ */
if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
- za.za_first_integer, FTAG, &clone) != 0) {
- goto fail;
- }
+ za.za_first_integer, FTAG, &clone) != 0)
+ continue;
dsl_dir_name(clone->ds_dir, buf);
VERIFY(nvlist_add_boolean(val, buf) == 0);
dsl_dataset_rele(clone, FTAG);
@@ -2316,7 +2377,7 @@ dsl_dataset_space(dsl_dataset_t *ds,
uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp)
{
- *refdbytesp = ds->ds_phys->ds_used_bytes;
+ *refdbytesp = ds->ds_phys->ds_referenced_bytes;
*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
@@ -2652,7 +2713,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
* Note however, if we stop before we reach the ORIGIN we get:
* uN + kN + kN-1 + ... + kM - uM-1
*/
- pa->used = origin_ds->ds_phys->ds_used_bytes;
+ pa->used = origin_ds->ds_phys->ds_referenced_bytes;
pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
for (snap = list_head(&pa->shared_snaps); snap;
@@ -2686,7 +2747,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
* so we need to subtract out the clone origin's used space.
*/
if (pa->origin_origin) {
- pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
+ pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
}
@@ -3203,8 +3264,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_deadlist_space(&csa->ohds->ds_deadlist,
&odl_used, &odl_comp, &odl_uncomp);
- dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
- (csa->ohds->ds_phys->ds_used_bytes + odl_used);
+ dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
+ (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
(csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
@@ -3233,8 +3294,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
}
/* swap ds_*_bytes */
- SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
- csa->cds->ds_phys->ds_used_bytes);
+ SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
+ csa->cds->ds_phys->ds_referenced_bytes);
SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
csa->cds->ds_phys->ds_compressed_bytes);
SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
@@ -3363,8 +3424,9 @@ dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
* on-disk is over quota and there are no pending changes (which
* may free up space for us).
*/
- if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
- if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
+ if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
+ if (inflight > 0 ||
+ ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
error = ERESTART;
else
error = EDQUOT;
@@ -3393,7 +3455,7 @@ dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
if (psa->psa_effective_value == 0)
return (0);
- if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes ||
+ if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
psa->psa_effective_value < ds->ds_reserved)
return (ENOSPC);
@@ -4141,8 +4203,8 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
dsl_pool_t *dp = new->ds_dir->dd_pool;
*usedp = 0;
- *usedp += new->ds_phys->ds_used_bytes;
- *usedp -= oldsnap->ds_phys->ds_used_bytes;
+ *usedp += new->ds_phys->ds_referenced_bytes;
+ *usedp -= oldsnap->ds_phys->ds_referenced_bytes;
*compp = 0;
*compp += new->ds_phys->ds_compressed_bytes;
@@ -4158,9 +4220,13 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
dsl_dataset_t *snap;
uint64_t used, comp, uncomp;
- err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
- if (err != 0)
- break;
+ if (snapobj == new->ds_object) {
+ snap = new;
+ } else {
+ err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
+ if (err != 0)
+ break;
+ }
if (snap->ds_phys->ds_prev_snap_txg ==
oldsnap->ds_phys->ds_creation_txg) {
@@ -4189,7 +4255,8 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
* was not a snapshot of/before new.
*/
snapobj = snap->ds_phys->ds_prev_snap_obj;
- dsl_dataset_rele(snap, FTAG);
+ if (snap != new)
+ dsl_dataset_rele(snap, FTAG);
if (snapobj == 0) {
err = EINVAL;
break;