summaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/bpobj.c7
-rw-r--r--module/zfs/dmu_send.c107
-rw-r--r--module/zfs/dsl_dataset.c320
-rw-r--r--module/zfs/dsl_deadlist.c30
-rw-r--r--module/zfs/dsl_deleg.c11
-rw-r--r--module/zfs/dsl_pool.c4
-rw-r--r--module/zfs/spa_history.c7
-rw-r--r--module/zfs/zap_micro.c3
-rw-r--r--module/zfs/zfs_ioctl.c214
9 files changed, 577 insertions, 126 deletions
diff --git a/module/zfs/bpobj.c b/module/zfs/bpobj.c
index 72be31235..022921c66 100644
--- a/module/zfs/bpobj.c
+++ b/module/zfs/bpobj.c
@@ -20,11 +20,13 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/bpobj.h>
#include <sys/zfs_context.h>
#include <sys/refcount.h>
+#include <sys/dsl_pool.h>
uint64_t
bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
@@ -440,7 +442,10 @@ space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
struct space_range_arg *sra = arg;
if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
- sra->used += bp_get_dsize_sync(sra->spa, bp);
+ if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
+ sra->used += bp_get_dsize_sync(sra->spa, bp);
+ else
+ sra->used += bp_get_dsize(sra->spa, bp);
sra->comp += BP_GET_PSIZE(bp);
sra->uncomp += BP_GET_UCSIZE(bp);
}
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index fad770e27..2f0613211 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -20,9 +20,9 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-/*
+ * Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@@ -47,6 +47,9 @@
#include <sys/ddt.h>
#include <sys/zfs_onexit.h>
+/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
+int zfs_send_corrupt_data = B_FALSE;
+
static char *dmu_recv_tag = "dmu_recv_tag";
/*
@@ -368,8 +371,20 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
if (dsl_read(NULL, spa, bp, pbuf,
arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
- ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
- return (EIO);
+ ZIO_FLAG_CANFAIL, &aflags, zb) != 0) {
+ if (zfs_send_corrupt_data) {
+ uint64_t *ptr;
+ /* Send a block filled with 0x"zfs badd bloc" */
+ abuf = arc_buf_alloc(spa, blksz, &abuf,
+ ARC_BUFC_DATA);
+ for (ptr = abuf->b_data;
+ (char *)ptr < (char *)abuf->b_data + blksz;
+ ptr++)
+ *ptr = 0x2f5baddb10c;
+ } else {
+ return (EIO);
+ }
+ }
err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz,
blksz, bp, abuf->b_data);
@@ -498,6 +513,85 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
return (0);
}
+int
+dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
+ uint64_t *sizep)
+{
+ dsl_dataset_t *ds = tosnap->os_dsl_dataset;
+ dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ int err;
+ uint64_t size, recordsize;
+
+ /* tosnap must be a snapshot */
+ if (ds->ds_phys->ds_next_snap_obj == 0)
+ return (EINVAL);
+
+ /* fromsnap must be an earlier snapshot from the same fs as tosnap */
+ if (fromds && (ds->ds_dir != fromds->ds_dir ||
+ fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg))
+ return (EXDEV);
+
+ if (fromorigin) {
+ if (fromsnap)
+ return (EINVAL);
+
+ if (dsl_dir_is_clone(ds->ds_dir)) {
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+ err = dsl_dataset_hold_obj(dp,
+ ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds);
+ rw_exit(&dp->dp_config_rwlock);
+ if (err)
+ return (err);
+ } else {
+ fromorigin = B_FALSE;
+ }
+ }
+
+ /* Get uncompressed size estimate of changed data. */
+ if (fromds == NULL) {
+ size = ds->ds_phys->ds_uncompressed_bytes;
+ } else {
+ uint64_t used, comp;
+ err = dsl_dataset_space_written(fromds, ds,
+ &used, &comp, &size);
+ if (fromorigin)
+ dsl_dataset_rele(fromds, FTAG);
+ if (err)
+ return (err);
+ }
+
+ /*
+ * Assume that space (both on-disk and in-stream) is dominated by
+ * data. We will adjust for indirect blocks and the copies property,
+ * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
+ */
+
+ /*
+ * Subtract out approximate space used by indirect blocks.
+ * Assume most space is used by data blocks (non-indirect, non-dnode).
+ * Assume all blocks are recordsize. Assume ditto blocks and
+ * internal fragmentation counter out compression.
+ *
+ * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
+ * block, which we observe in practice.
+ */
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+ err = dsl_prop_get_ds(ds, "recordsize",
+ sizeof (recordsize), 1, &recordsize, NULL);
+ rw_exit(&dp->dp_config_rwlock);
+ if (err)
+ return (err);
+ size -= size / recordsize * sizeof (blkptr_t);
+
+ /* Add in the space for the record associated with each block. */
+ size += size / recordsize * sizeof (dmu_replay_record_t);
+
+ *sizep = size;
+
+ return (0);
+}
+
struct recvbeginsyncarg {
const char *tofs;
const char *tosnap;
@@ -1500,7 +1594,7 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc)
{
struct recvendsyncarg resa;
dsl_dataset_t *ds = drc->drc_logical_ds;
- int err;
+ int err, myerr;
/*
* XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
@@ -1538,7 +1632,8 @@ out:
if (err == 0 && drc->drc_guid_to_ds_map != NULL)
(void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
dsl_dataset_disown(ds, dmu_recv_tag);
- (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
+ myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
+ ASSERT3U(myerr, ==, 0);
return (err);
}
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 2deec8cf1..25c8ac6b1 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -907,69 +907,56 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
return (dsobj);
}
-struct destroyarg {
- dsl_sync_task_group_t *dstg;
- char *snapname;
- char *failed;
- boolean_t defer;
-};
-
-static int
-dsl_snapshot_destroy_one(const char *name, void *arg)
-{
- struct destroyarg *da = arg;
- dsl_dataset_t *ds;
- int err;
- char *dsname;
-
- dsname = kmem_asprintf("%s@%s", name, da->snapname);
- err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds);
- strfree(dsname);
- if (err == 0) {
- struct dsl_ds_destroyarg *dsda;
-
- dsl_dataset_make_exclusive(ds, da->dstg);
- dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP);
- dsda->ds = ds;
- dsda->defer = da->defer;
- dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
- dsl_dataset_destroy_sync, dsda, da->dstg, 0);
- } else if (err == ENOENT) {
- err = 0;
- } else {
- (void) strcpy(da->failed, name);
- }
- return (err);
-}
-
/*
- * Destroy 'snapname' in all descendants of 'fsname'.
+ * The snapshots must all be in the same pool.
*/
-#pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
int
-dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer)
+dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)
{
int err;
- struct destroyarg da;
dsl_sync_task_t *dst;
spa_t *spa;
+ nvpair_t *pair;
+ dsl_sync_task_group_t *dstg;
- err = spa_open(fsname, &spa, FTAG);
+ pair = nvlist_next_nvpair(snaps, NULL);
+ if (pair == NULL)
+ return (0);
+
+ err = spa_open(nvpair_name(pair), &spa, FTAG);
if (err)
return (err);
- da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
- da.snapname = snapname;
- da.failed = fsname;
- da.defer = defer;
+ dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
- err = dmu_objset_find(fsname,
- dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
+ for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(snaps, pair)) {
+ dsl_dataset_t *ds;
+ int err;
+
+ err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
+ if (err == 0) {
+ struct dsl_ds_destroyarg *dsda;
+
+ dsl_dataset_make_exclusive(ds, dstg);
+ dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
+ KM_SLEEP);
+ dsda->ds = ds;
+ dsda->defer = defer;
+ dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
+ dsl_dataset_destroy_sync, dsda, dstg, 0);
+ } else if (err == ENOENT) {
+ err = 0;
+ } else {
+ (void) strcpy(failed, nvpair_name(pair));
+ break;
+ }
+ }
if (err == 0)
- err = dsl_sync_task_group_wait(da.dstg);
+ err = dsl_sync_task_group_wait(dstg);
- for (dst = list_head(&da.dstg->dstg_tasks); dst;
- dst = list_next(&da.dstg->dstg_tasks, dst)) {
+ for (dst = list_head(&dstg->dstg_tasks); dst;
+ dst = list_next(&dstg->dstg_tasks, dst)) {
struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
dsl_dataset_t *ds = dsda->ds;
@@ -977,17 +964,17 @@ dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer)
* Return the file system name that triggered the error
*/
if (dst->dst_err) {
- dsl_dataset_name(ds, fsname);
- *strchr(fsname, '@') = '\0';
+ dsl_dataset_name(ds, failed);
}
ASSERT3P(dsda->rm_origin, ==, NULL);
- dsl_dataset_disown(ds, da.dstg);
+ dsl_dataset_disown(ds, dstg);
kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
}
- dsl_sync_task_group_destroy(da.dstg);
+ dsl_sync_task_group_destroy(dstg);
spa_close(spa, FTAG);
return (err);
+
}
static boolean_t
@@ -2151,6 +2138,55 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
dmu_objset_sync(ds->ds_objset, zio, tx);
}
+static void
+get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
+{
+ uint64_t count = 0;
+ objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+ zap_cursor_t zc;
+ zap_attribute_t za;
+ nvlist_t *propval;
+ nvlist_t *val;
+
+ rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
+ VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+ /*
+ * There may me missing entries in ds_next_clones_obj
+ * due to a bug in a previous version of the code.
+ * Only trust it if it has the right number of entries.
+ */
+ if (ds->ds_phys->ds_next_clones_obj != 0) {
+ ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
+ &count));
+ }
+ if (count != ds->ds_phys->ds_num_children - 1) {
+ goto fail;
+ }
+ for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ dsl_dataset_t *clone;
+ char buf[ZFS_MAXNAMELEN];
+ if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
+ za.za_first_integer, FTAG, &clone) != 0) {
+ goto fail;
+ }
+ dsl_dir_name(clone->ds_dir, buf);
+ VERIFY(nvlist_add_boolean(val, buf) == 0);
+ dsl_dataset_rele(clone, FTAG);
+ }
+ zap_cursor_fini(&zc);
+ VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
+ VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
+ propval) == 0);
+fail:
+ nvlist_free(val);
+ nvlist_free(propval);
+ rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
+}
+
void
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
{
@@ -2181,6 +2217,27 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
+ if (ds->ds_phys->ds_prev_snap_obj != 0) {
+ uint64_t written, comp, uncomp;
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ dsl_dataset_t *prev;
+ int err;
+
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+ err = dsl_dataset_hold_obj(dp,
+ ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
+ rw_exit(&dp->dp_config_rwlock);
+ if (err == 0) {
+ err = dsl_dataset_space_written(prev, ds, &written,
+ &comp, &uncomp);
+ dsl_dataset_rele(prev, FTAG);
+ if (err == 0) {
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
+ written);
+ }
+ }
+ }
+
ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
(ds->ds_phys->ds_uncompressed_bytes * 100 /
ds->ds_phys->ds_compressed_bytes);
@@ -2194,6 +2251,8 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
ds->ds_phys->ds_unique_bytes);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
+
+ get_clones_stat(ds, nv);
}
}
@@ -4022,7 +4081,7 @@ dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
}
/*
- * Note, this fuction is used as the callback for dmu_objset_find(). We
+ * Note, this function is used as the callback for dmu_objset_find(). We
* always return 0 so that we will continue to find and process
* inconsistent datasets, even if we encounter an error trying to
* process one of them.
@@ -4042,7 +4101,157 @@ dsl_destroy_inconsistent(const char *dsname, void *arg)
return (0);
}
+
+/*
+ * Return (in *usedp) the amount of space written in new that is not
+ * present in oldsnap. New may be a snapshot or the head. Old must be
+ * a snapshot before new, in new's filesystem (or its origin). If not then
+ * fail and return EINVAL.
+ *
+ * The written space is calculated by considering two components: First, we
+ * ignore any freed space, and calculate the written as new's used space
+ * minus old's used space. Next, we add in the amount of space that was freed
+ * between the two snapshots, thus reducing new's used space relative to old's.
+ * Specifically, this is the space that was born before old->ds_creation_txg,
+ * and freed before new (ie. on new's deadlist or a previous deadlist).
+ *
+ * space freed [---------------------]
+ * snapshots ---O-------O--------O-------O------
+ * oldsnap new
+ */
+int
+dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
+ uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
+{
+ int err = 0;
+ uint64_t snapobj;
+ dsl_pool_t *dp = new->ds_dir->dd_pool;
+
+ *usedp = 0;
+ *usedp += new->ds_phys->ds_used_bytes;
+ *usedp -= oldsnap->ds_phys->ds_used_bytes;
+
+ *compp = 0;
+ *compp += new->ds_phys->ds_compressed_bytes;
+ *compp -= oldsnap->ds_phys->ds_compressed_bytes;
+
+ *uncompp = 0;
+ *uncompp += new->ds_phys->ds_uncompressed_bytes;
+ *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
+
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+ snapobj = new->ds_object;
+ while (snapobj != oldsnap->ds_object) {
+ dsl_dataset_t *snap;
+ uint64_t used, comp, uncomp;
+
+ err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
+ if (err != 0)
+ break;
+
+ if (snap->ds_phys->ds_prev_snap_txg ==
+ oldsnap->ds_phys->ds_creation_txg) {
+ /*
+ * The blocks in the deadlist can not be born after
+ * ds_prev_snap_txg, so get the whole deadlist space,
+ * which is more efficient (especially for old-format
+ * deadlists). Unfortunately the deadlist code
+ * doesn't have enough information to make this
+ * optimization itself.
+ */
+ dsl_deadlist_space(&snap->ds_deadlist,
+ &used, &comp, &uncomp);
+ } else {
+ dsl_deadlist_space_range(&snap->ds_deadlist,
+ 0, oldsnap->ds_phys->ds_creation_txg,
+ &used, &comp, &uncomp);
+ }
+ *usedp += used;
+ *compp += comp;
+ *uncompp += uncomp;
+
+ /*
+ * If we get to the beginning of the chain of snapshots
+ * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
+ * was not a snapshot of/before new.
+ */
+ snapobj = snap->ds_phys->ds_prev_snap_obj;
+ dsl_dataset_rele(snap, FTAG);
+ if (snapobj == 0) {
+ err = EINVAL;
+ break;
+ }
+
+ }
+ rw_exit(&dp->dp_config_rwlock);
+ return (err);
+}
+
+/*
+ * Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
+ * lastsnap, and all snapshots in between are deleted.
+ *
+ * blocks that would be freed [---------------------------]
+ * snapshots ---O-------O--------O-------O--------O
+ * firstsnap lastsnap
+ *
+ * This is the set of blocks that were born after the snap before firstsnap,
+ * (birth > firstsnap->prev_snap_txg) and died before the snap after the
+ * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
+ * We calculate this by iterating over the relevant deadlists (from the snap
+ * after lastsnap, backward to the snap after firstsnap), summing up the
+ * space on the deadlist that was born after the snap before firstsnap.
+ */
+int
+dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
+ dsl_dataset_t *lastsnap,
+ uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
+{
+ int err = 0;
+ uint64_t snapobj;
+ dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
+
+ ASSERT(dsl_dataset_is_snapshot(firstsnap));
+ ASSERT(dsl_dataset_is_snapshot(lastsnap));
+
+ /*
+ * Check that the snapshots are in the same dsl_dir, and firstsnap
+ * is before lastsnap.
+ */
+ if (firstsnap->ds_dir != lastsnap->ds_dir ||
+ firstsnap->ds_phys->ds_creation_txg >
+ lastsnap->ds_phys->ds_creation_txg)
+ return (EINVAL);
+
+ *usedp = *compp = *uncompp = 0;
+
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+ snapobj = lastsnap->ds_phys->ds_next_snap_obj;
+ while (snapobj != firstsnap->ds_object) {
+ dsl_dataset_t *ds;
+ uint64_t used, comp, uncomp;
+
+ err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
+ if (err != 0)
+ break;
+
+ dsl_deadlist_space_range(&ds->ds_deadlist,
+ firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX,
+ &used, &comp, &uncomp);
+ *usedp += used;
+ *compp += comp;
+ *uncompp += uncomp;
+
+ snapobj = ds->ds_phys->ds_prev_snap_obj;
+ ASSERT3U(snapobj, !=, 0);
+ dsl_dataset_rele(ds, FTAG);
+ }
+ rw_exit(&dp->dp_config_rwlock);
+ return (err);
+}
+
#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dmu_snapshots_destroy_nvl);
EXPORT_SYMBOL(dsl_dataset_hold);
EXPORT_SYMBOL(dsl_dataset_hold_obj);
EXPORT_SYMBOL(dsl_dataset_own);
@@ -4056,7 +4265,6 @@ EXPORT_SYMBOL(dsl_dataset_make_exclusive);
EXPORT_SYMBOL(dsl_dataset_create_sync);
EXPORT_SYMBOL(dsl_dataset_create_sync_dd);
EXPORT_SYMBOL(dsl_dataset_destroy);
-EXPORT_SYMBOL(dsl_snapshots_destroy);
EXPORT_SYMBOL(dsl_dataset_destroy_check);
EXPORT_SYMBOL(dsl_dataset_destroy_sync);
EXPORT_SYMBOL(dsl_dataset_snapshot_check);
@@ -4072,6 +4280,8 @@ EXPORT_SYMBOL(dsl_dataset_get_blkptr);
EXPORT_SYMBOL(dsl_dataset_set_blkptr);
EXPORT_SYMBOL(dsl_dataset_get_spa);
EXPORT_SYMBOL(dsl_dataset_modified_since_lastsnap);
+EXPORT_SYMBOL(dsl_dataset_space_written);
+EXPORT_SYMBOL(dsl_dataset_space_wouldfree);
EXPORT_SYMBOL(dsl_dataset_sync);
EXPORT_SYMBOL(dsl_dataset_block_born);
EXPORT_SYMBOL(dsl_dataset_block_kill);
diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c
index 064f8aceb..dd6db2120 100644
--- a/module/zfs/dsl_deadlist.c
+++ b/module/zfs/dsl_deadlist.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/dsl_dataset.h>
@@ -29,6 +30,26 @@
#include <sys/zfs_context.h>
#include <sys/dsl_pool.h>
+/*
+ * Deadlist concurrency:
+ *
+ * Deadlists can only be modified from the syncing thread.
+ *
+ * Except for dsl_deadlist_insert(), it can only be modified with the
+ * dp_config_rwlock held with RW_WRITER.
+ *
+ * The accessors (dsl_deadlist_space() and dsl_deadlist_space_range()) can
+ * be called concurrently, from open context, with the dl_config_rwlock held
+ * with RW_READER.
+ *
+ * Therefore, we only need to provide locking between dsl_deadlist_insert() and
+ * the accessors, protecting:
+ * dl_phys->dl_used,comp,uncomp
+ * and protecting the dl_tree from being loaded.
+ * The locking is provided by dl_lock. Note that locking on the bpobj_t
+ * provides its own locking, and dl_oldfmt is immutable.
+ */
+
static int
dsl_deadlist_compare(const void *arg1, const void *arg2)
{
@@ -309,14 +330,14 @@ dsl_deadlist_space(dsl_deadlist_t *dl,
* return space used in the range (mintxg, maxtxg].
* Includes maxtxg, does not include mintxg.
* mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is
- * UINT64_MAX).
+ * larger than any bp in the deadlist (eg. UINT64_MAX)).
*/
void
dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
{
- dsl_deadlist_entry_t dle_tofind;
dsl_deadlist_entry_t *dle;
+ dsl_deadlist_entry_t dle_tofind;
avl_index_t where;
if (dl->dl_oldfmt) {
@@ -325,9 +346,10 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
return;
}
- dsl_deadlist_load_tree(dl);
*usedp = *compp = *uncompp = 0;
+ mutex_enter(&dl->dl_lock);
+ dsl_deadlist_load_tree(dl);
dle_tofind.dle_mintxg = mintxg;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
/*
@@ -336,6 +358,7 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
*/
ASSERT(dle != NULL ||
avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL);
+
for (; dle && dle->dle_mintxg < maxtxg;
dle = AVL_NEXT(&dl->dl_tree, dle)) {
uint64_t used, comp, uncomp;
@@ -347,6 +370,7 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
*compp += comp;
*uncompp += uncomp;
}
+ mutex_exit(&dl->dl_lock);
}
static void
diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c
index 6b5c8424a..a4d4e42da 100644
--- a/module/zfs/dsl_deleg.c
+++ b/module/zfs/dsl_deleg.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
*/
/*
@@ -534,10 +535,12 @@ dsl_load_user_sets(objset_t *mos, uint64_t zapobj, avl_tree_t *avl,
}
/*
- * Check if user has requested permission.
+ * Check if user has requested permission. If descendent is set, must have
+ * descendent perms.
*/
int
-dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
+dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm,
+ cred_t *cr)
{
dsl_dir_t *dd;
dsl_pool_t *dp;
@@ -558,7 +561,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
SPA_VERSION_DELEGATED_PERMS)
return (EPERM);
- if (dsl_dataset_is_snapshot(ds)) {
+ if (dsl_dataset_is_snapshot(ds) || descendent) {
/*
* Snapshots are treated as descendents only,
* local permissions do not apply.
@@ -651,7 +654,7 @@ dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
if (error)
return (error);
- error = dsl_deleg_access_impl(ds, perm, cr);
+ error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
dsl_dataset_rele(ds, FTAG);
return (error);
diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c
index d428b7ad7..3b285df65 100644
--- a/module/zfs/dsl_pool.c
+++ b/module/zfs/dsl_pool.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/dsl_pool.h>
@@ -291,7 +292,10 @@ static int
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_deadlist_t *dl = arg;
+ dsl_pool_t *dp = dmu_objset_pool(dl->dl_os);
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_deadlist_insert(dl, bp, tx);
+ rw_exit(&dp->dp_config_rwlock);
return (0);
}
diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c
index 243f2b4ab..7a2537875 100644
--- a/module/zfs/spa_history.c
+++ b/module/zfs/spa_history.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/spa.h>
@@ -101,11 +102,11 @@ spa_history_create_obj(spa_t *spa, dmu_tx_t *tx)
/*
* Figure out maximum size of history log. We set it at
- * 1% of pool size, with a max of 32MB and min of 128KB.
+ * 0.1% of pool size, with a max of 1G and min of 128KB.
*/
shpp->sh_phys_max_off =
- metaslab_class_get_dspace(spa_normal_class(spa)) / 100;
- shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 32<<20);
+ metaslab_class_get_dspace(spa_normal_class(spa)) / 1000;
+ shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30);
shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10);
dmu_buf_rele(dbp, FTAG);
diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c
index 49aad2a3b..bd3d4a8d8 100644
--- a/module/zfs/zap_micro.c
+++ b/module/zfs/zap_micro.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/zio.h>
@@ -1404,7 +1405,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
}
/*
- * We lock the zap with adding == FALSE. Because, if we pass
+ * We lock the zap with adding == FALSE. Because, if we pass
* the actual value of add, it could trigger a mzap_upgrade().
* At present we are just evaluating the possibility of this operation
* and hence we donot want to trigger an upgrade.
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 65b0a1975..94c91e876 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -23,8 +23,6 @@
* Portions Copyright 2011 Martin Matuska
* Portions Copyright 2012 Pawel Jakub Dawidek <[email protected]>
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
- */
-/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
@@ -319,17 +317,37 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
return (zfs_dozonecheck_impl(dataset, zoned, cr));
}
+/*
+ * If name ends in a '@', then require recursive permissions.
+ */
int
zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
{
int error;
+ boolean_t descendent = B_FALSE;
+ dsl_dataset_t *ds;
+ char *at;
+
+ at = strchr(name, '@');
+ if (at != NULL && at[1] == '\0') {
+ *at = '\0';
+ descendent = B_TRUE;
+ }
+
+ error = dsl_dataset_hold(name, FTAG, &ds);
+ if (at != NULL)
+ *at = '@';
+ if (error != 0)
+ return (error);
- error = zfs_dozonecheck(name, cr);
+ error = zfs_dozonecheck_ds(name, ds, cr);
if (error == 0) {
error = secpolicy_zfs(cr);
if (error)
- error = dsl_deleg_access(name, perm, cr);
+ error = dsl_deleg_access_impl(ds, descendent, perm, cr);
}
+
+ dsl_dataset_rele(ds, FTAG);
return (error);
}
@@ -343,7 +361,7 @@ zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
if (error == 0) {
error = secpolicy_zfs(cr);
if (error)
- error = dsl_deleg_access_impl(ds, perm, cr);
+ error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
}
return (error);
}
@@ -666,24 +684,14 @@ zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
/*
* Destroying snapshots with delegated permissions requires
* descendent mount and destroy permissions.
- * Reassemble the full filesystem@snap name so dsl_deleg_access()
- * can do the correct permission check.
- *
- * Since this routine is used when doing a recursive destroy of snapshots
- * and destroying snapshots requires descendent permissions, a successfull
- * check of the top level snapshot applies to snapshots of all descendent
- * datasets as well.
- *
- * The target snapshot may not exist when doing a recursive destroy.
- * In this case fallback to permissions of the parent dataset.
*/
static int
-zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr)
{
int error;
char *dsname;
- dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
+ dsname = kmem_asprintf("%s@", zc->zc_name);
error = zfs_secpolicy_destroy_perms(dsname, cr);
if (error == ENOENT)
@@ -1742,9 +1750,12 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
* inconsistent. So this is a bit of a workaround...
* XXX reading with out owning
*/
- if (!zc->zc_objset_stats.dds_inconsistent) {
- if (dmu_objset_type(os) == DMU_OST_ZVOL)
- error = zvol_get_stats(os, nv);
+ if (!zc->zc_objset_stats.dds_inconsistent &&
+ dmu_objset_type(os) == DMU_OST_ZVOL) {
+ error = zvol_get_stats(os, nv);
+ if (error == EIO)
+ return (error);
+ VERIFY3S(error, ==, 0);
}
if (error == 0)
error = put_nvlist(zc, nv);
@@ -1954,8 +1965,7 @@ top:
NULL, &zc->zc_cookie);
if (error == ENOENT)
error = ESRCH;
- } while (error == 0 && dataset_name_hidden(zc->zc_name) &&
- !(zc->zc_iflags & FKIOCTL));
+ } while (error == 0 && dataset_name_hidden(zc->zc_name));
dmu_objset_rele(os, FTAG);
/*
@@ -2233,6 +2243,8 @@ retry:
if (nvpair_type(propval) !=
DATA_TYPE_UINT64_ARRAY)
err = EINVAL;
+ } else {
+ err = EINVAL;
}
} else if (err == 0) {
if (nvpair_type(propval) == DATA_TYPE_STRING) {
@@ -3118,25 +3130,45 @@ zfs_unmount_snap(const char *name, void *arg)
/*
* inputs:
- * zc_name name of filesystem
- * zc_value short name of snapshot
+ * zc_name name of filesystem, snaps must be under it
+ * zc_nvlist_src[_size] full names of snapshots to destroy
* zc_defer_destroy mark for deferred destroy
*
- * outputs: none
+ * outputs:
+ * zc_name on failure, name of failed snapshot
*/
static int
-zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
+zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc)
{
- int err;
+ int err, len;
+ nvlist_t *nvl;
+ nvpair_t *pair;
- if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
- return (EINVAL);
- err = dmu_objset_find(zc->zc_name,
- zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
- if (err)
+ if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+ zc->zc_iflags, &nvl)) != 0)
return (err);
- return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value,
- zc->zc_defer_destroy));
+
+ len = strlen(zc->zc_name);
+ for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(nvl, pair)) {
+ const char *name = nvpair_name(pair);
+ /*
+ * The snap name must be underneath the zc_name. This ensures
+ * that our permission checks were legitimate.
+ */
+ if (strncmp(zc->zc_name, name, len) != 0 ||
+ (name[len] != '@' && name[len] != '/')) {
+ nvlist_free(nvl);
+ return (EINVAL);
+ }
+
+ (void) zfs_unmount_snap(name, NULL);
+ }
+
+ err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy,
+ zc->zc_name);
+ nvlist_free(nvl);
+ return (err);
}
/*
@@ -3787,6 +3819,8 @@ out:
* zc_obj fromorigin flag (mutually exclusive with zc_fromobj)
* zc_sendobj objsetid of snapshot to send
* zc_fromobj objsetid of incremental fromsnap (may be zero)
+ * zc_guid if set, estimate size of stream only. zc_cookie is ignored.
+ * output size in zc_objset_type.
*
* outputs: none
*/
@@ -3795,13 +3829,13 @@ zfs_ioc_send(zfs_cmd_t *zc)
{
objset_t *fromsnap = NULL;
objset_t *tosnap;
- file_t *fp;
int error;
offset_t off;
dsl_dataset_t *ds;
dsl_dataset_t *dsfrom = NULL;
spa_t *spa;
dsl_pool_t *dp;
+ boolean_t estimate = (zc->zc_guid != 0);
error = spa_open(zc->zc_name, &spa, FTAG);
if (error)
@@ -3842,20 +3876,26 @@ zfs_ioc_send(zfs_cmd_t *zc)
spa_close(spa, FTAG);
}
- fp = getf(zc->zc_cookie);
- if (fp == NULL) {
- dsl_dataset_rele(ds, FTAG);
- if (dsfrom)
- dsl_dataset_rele(dsfrom, FTAG);
- return (EBADF);
- }
+ if (estimate) {
+ error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj,
+ &zc->zc_objset_type);
+ } else {
+ file_t *fp = getf(zc->zc_cookie);
+ if (fp == NULL) {
+ dsl_dataset_rele(ds, FTAG);
+ if (dsfrom)
+ dsl_dataset_rele(dsfrom, FTAG);
+ return (EBADF);
+ }
- off = fp->f_offset;
- error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
+ off = fp->f_offset;
+ error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj,
+ fp->f_vnode, &off);
- if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
- fp->f_offset = off;
- releasef(zc->zc_cookie);
+ if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
+ fp->f_offset = off;
+ releasef(zc->zc_cookie);
+ }
if (dsfrom)
dsl_dataset_rele(dsfrom, FTAG);
dsl_dataset_rele(ds, FTAG);
@@ -4591,6 +4631,70 @@ zfs_ioc_events_clear(zfs_cmd_t *zc)
}
/*
+ * inputs:
+ * zc_name name of new filesystem or snapshot
+ * zc_value full name of old snapshot
+ *
+ * outputs:
+ * zc_cookie space in bytes
+ * zc_objset_type compressed space in bytes
+ * zc_perm_action uncompressed space in bytes
+ */
+static int
+zfs_ioc_space_written(zfs_cmd_t *zc)
+{
+ int error;
+ dsl_dataset_t *new, *old;
+
+ error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
+ if (error != 0)
+ return (error);
+ error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
+ if (error != 0) {
+ dsl_dataset_rele(new, FTAG);
+ return (error);
+ }
+
+ error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
+ &zc->zc_objset_type, &zc->zc_perm_action);
+ dsl_dataset_rele(old, FTAG);
+ dsl_dataset_rele(new, FTAG);
+ return (error);
+}
+
+/*
+ * inputs:
+ * zc_name full name of last snapshot
+ * zc_value full name of first snapshot
+ *
+ * outputs:
+ * zc_cookie space in bytes
+ * zc_objset_type compressed space in bytes
+ * zc_perm_action uncompressed space in bytes
+ */
+static int
+zfs_ioc_space_snaps(zfs_cmd_t *zc)
+{
+ int error;
+ dsl_dataset_t *new, *old;
+
+ error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
+ if (error != 0)
+ return (error);
+ error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
+ if (error != 0) {
+ dsl_dataset_rele(new, FTAG);
+ return (error);
+ }
+
+ error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie,
+ &zc->zc_objset_type, &zc->zc_perm_action);
+ dsl_dataset_rele(old, FTAG);
+ dsl_dataset_rele(new, FTAG);
+ return (error);
+}
+
+/*
* pool create, destroy, and export don't log the history as part of
* zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
* do the logging of those commands.
@@ -4656,7 +4760,7 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
- { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE,
+ { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE,
POOL_CHECK_NONE },
{ zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
POOL_CHECK_NONE },
@@ -4670,8 +4774,8 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
POOL_CHECK_NONE },
{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
- { zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME,
- B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive,
+ DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE,
@@ -4716,12 +4820,16 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
{ zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
POOL_CHECK_SUSPENDED },
- { zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE,
- POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
{ zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE,
POOL_CHECK_NONE },
{ zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE,
- POOL_CHECK_NONE }
+ POOL_CHECK_NONE },
+ { zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
+ POOL_CHECK_SUSPENDED },
+ { zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
+ POOL_CHECK_SUSPENDED },
};
int