aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/dsl_dataset.c
diff options
context:
space:
mode:
authorJerry Jelinek <[email protected]>2015-04-02 00:07:48 +1100
committerBrian Behlendorf <[email protected]>2015-04-28 16:22:51 -0700
commit788eb90c4ca64beaf85ef542bb22c64fcb50993a (patch)
treec759bb7636e66d33713490aa8170ec4c03a1be62 /module/zfs/dsl_dataset.c
parent308a451f7f5ecded4f194a50abab5ca123b6d397 (diff)
Illumos 3897 - zfs filesystem and snapshot limits
3897 zfs filesystem and snapshot limits Author: Jerry Jelinek <[email protected]> Reviewed by: Matthew Ahrens <[email protected]> Approved by: Christopher Siden <[email protected]> References: https://www.illumos.org/issues/3897 https://github.com/illumos/illumos-gate/commit/a2afb61 Porting Notes: dsl_dataset_snapshot_check(): reduce stack usage using kmem_alloc(). Ported-by: Chris Dunlop <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'module/zfs/dsl_dataset.c')
-rw-r--r--module/zfs/dsl_dataset.c163
1 files changed, 154 insertions, 9 deletions
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 79cb6a3a2..34c473006 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 RackTop Systems.
*/
@@ -318,7 +318,8 @@ dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
}
int
-dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx)
+dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx,
+ boolean_t adj_cnt)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
@@ -335,6 +336,11 @@ dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx)
err = zap_remove_norm(mos, snapobj, name, mt, tx);
if (err == ENOTSUP && mt == MT_FIRST)
err = zap_remove(mos, snapobj, name, tx);
+
+ if (err == 0 && adj_cnt)
+ dsl_fs_ss_count_adjust(ds->ds_dir, -1,
+ DD_FIELD_SNAPSHOT_COUNT, tx);
+
return (err);
}
@@ -767,6 +773,21 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
dsl_deleg_set_create_perms(dd, tx, cr);
+ /*
+ * Since we're creating a new node we know it's a leaf, so we can
+ * initialize the counts if the limit feature is active.
+ */
+ if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) {
+ uint64_t cnt = 0;
+ objset_t *os = dd->dd_pool->dp_meta_objset;
+
+ dsl_dir_zapify(dd, tx);
+ VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
+ sizeof (cnt), 1, &cnt, tx));
+ VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
+ sizeof (cnt), 1, &cnt, tx));
+ }
+
dsl_dir_rele(dd, FTAG);
/*
@@ -935,11 +956,12 @@ typedef struct dsl_dataset_snapshot_arg {
nvlist_t *ddsa_snaps;
nvlist_t *ddsa_props;
nvlist_t *ddsa_errors;
+ cred_t *ddsa_cr;
} dsl_dataset_snapshot_arg_t;
int
dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
- dmu_tx_t *tx, boolean_t recv)
+ dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr)
{
int error;
uint64_t value;
@@ -977,6 +999,18 @@ dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
if (!recv && DS_IS_INCONSISTENT(ds))
return (SET_ERROR(EBUSY));
+ /*
+ * Skip the check for temporary snapshots or if we have already checked
+ * the counts in dsl_dataset_snapshot_check. This means we really only
+ * check the count here when we're receiving a stream.
+ */
+ if (cnt != 0 && cr != NULL) {
+ error = dsl_fs_ss_limit_check(ds->ds_dir, cnt,
+ ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr);
+ if (error != 0)
+ return (error);
+ }
+
error = dsl_dataset_snapshot_reserve_space(ds, tx);
if (error != 0)
return (error);
@@ -992,6 +1026,103 @@ dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx)
nvpair_t *pair;
int rv = 0;
+ /*
+ * Pre-compute how many total new snapshots will be created for each
+ * level in the tree and below. This is needed for validating the
+ * snapshot limit when either taking a recursive snapshot or when
+ * taking multiple snapshots.
+ *
+ * The problem is that the counts are not actually adjusted when
+ * we are checking, only when we finally sync. For a single snapshot,
+ * this is easy, the count will increase by 1 at each node up the tree,
+ * but its more complicated for the recursive/multiple snapshot case.
+ *
+ * The dsl_fs_ss_limit_check function does recursively check the count
+ * at each level up the tree but since it is validating each snapshot
+ * independently we need to be sure that we are validating the complete
+ * count for the entire set of snapshots. We do this by rolling up the
+ * counts for each component of the name into an nvlist and then
+ * checking each of those cases with the aggregated count.
+ *
+ * This approach properly handles not only the recursive snapshot
+ * case (where we get all of those on the ddsa_snaps list) but also
+ * the sibling case (e.g. snapshot a/b and a/c so that we will also
+ * validate the limit on 'a' using a count of 2).
+ *
+ * We validate the snapshot names in the third loop and only report
+ * name errors once.
+ */
+ if (dmu_tx_is_syncing(tx)) {
+ char *nm;
+ nvlist_t *cnt_track = NULL;
+ cnt_track = fnvlist_alloc();
+
+ nm = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ /* Rollup aggregated counts into the cnt_track list */
+ for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
+ pair != NULL;
+ pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
+ char *pdelim;
+ uint64_t val;
+
+ (void) strlcpy(nm, nvpair_name(pair), MAXPATHLEN);
+ pdelim = strchr(nm, '@');
+ if (pdelim == NULL)
+ continue;
+ *pdelim = '\0';
+
+ do {
+ if (nvlist_lookup_uint64(cnt_track, nm,
+ &val) == 0) {
+ /* update existing entry */
+ fnvlist_add_uint64(cnt_track, nm,
+ val + 1);
+ } else {
+ /* add to list */
+ fnvlist_add_uint64(cnt_track, nm, 1);
+ }
+
+ pdelim = strrchr(nm, '/');
+ if (pdelim != NULL)
+ *pdelim = '\0';
+ } while (pdelim != NULL);
+ }
+
+ kmem_free(nm, MAXPATHLEN);
+
+ /* Check aggregated counts at each level */
+ for (pair = nvlist_next_nvpair(cnt_track, NULL);
+ pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) {
+ int error = 0;
+ char *name;
+ uint64_t cnt = 0;
+ dsl_dataset_t *ds;
+
+ name = nvpair_name(pair);
+ cnt = fnvpair_value_uint64(pair);
+ ASSERT(cnt > 0);
+
+ error = dsl_dataset_hold(dp, name, FTAG, &ds);
+ if (error == 0) {
+ error = dsl_fs_ss_limit_check(ds->ds_dir, cnt,
+ ZFS_PROP_SNAPSHOT_LIMIT, NULL,
+ ddsa->ddsa_cr);
+ dsl_dataset_rele(ds, FTAG);
+ }
+
+ if (error != 0) {
+ if (ddsa->ddsa_errors != NULL)
+ fnvlist_add_int32(ddsa->ddsa_errors,
+ name, error);
+ rv = error;
+ /* only report one error for this check */
+ break;
+ }
+ }
+ nvlist_free(cnt_track);
+ }
+
for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
int error = 0;
@@ -1012,8 +1143,9 @@ dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx)
if (error == 0)
error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
if (error == 0) {
+ /* passing 0/NULL skips dsl_fs_ss_limit_check */
error = dsl_dataset_snapshot_check_impl(ds,
- atp + 1, tx, B_FALSE);
+ atp + 1, tx, B_FALSE, 0, NULL);
dsl_dataset_rele(ds, FTAG);
}
@@ -1025,6 +1157,7 @@ dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx)
rv = error;
}
}
+
return (rv);
}
@@ -1051,6 +1184,7 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
bcmp(&os->os_phys->os_zil_header, &zero_zil,
sizeof (zero_zil)) == 0);
+ dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx);
/*
* The origin's ds_creation_txg has to be < TXG_INITIAL
@@ -1227,6 +1361,7 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
ddsa.ddsa_snaps = snaps;
ddsa.ddsa_props = props;
ddsa.ddsa_errors = errors;
+ ddsa.ddsa_cr = CRED();
if (error == 0) {
error = dsl_sync_task(firstname, dsl_dataset_snapshot_check,
@@ -1275,8 +1410,9 @@ dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx)
if (error != 0)
return (error);
+ /* NULL cred means no limit check for tmp snapshot */
error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname,
- tx, B_FALSE);
+ tx, B_FALSE, 0, NULL);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
return (error);
@@ -1644,7 +1780,8 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
spa_history_log_internal_ds(ds, "rename", tx,
"-> @%s", ddrsa->ddrsa_newsnapname);
- VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx));
+ VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx,
+ B_FALSE));
mutex_enter(&ds->ds_lock);
(void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname);
mutex_exit(&ds->ds_lock);
@@ -1896,6 +2033,7 @@ typedef struct dsl_dataset_promote_arg {
dsl_dataset_t *origin_origin; /* origin of the origin */
uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
char *err_ds;
+ cred_t *cr;
} dsl_dataset_promote_arg_t;
static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
@@ -1913,6 +2051,7 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
dsl_dataset_t *origin_ds;
int err;
uint64_t unused;
+ uint64_t ss_mv_cnt;
err = promote_hold(ddpa, dp, FTAG);
if (err != 0)
@@ -1959,6 +2098,7 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
* Note however, if we stop before we reach the ORIGIN we get:
* uN + kN + kN-1 + ... + kM - uM-1
*/
+ ss_mv_cnt = 0;
ddpa->used = origin_ds->ds_phys->ds_referenced_bytes;
ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes;
ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
@@ -1967,6 +2107,8 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
uint64_t val, dlused, dlcomp, dluncomp;
dsl_dataset_t *ds = snap->ds;
+ ss_mv_cnt++;
+
/*
* If there are long holds, we won't be able to evict
* the objset.
@@ -2009,9 +2151,9 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
ddpa->origin_origin->ds_phys->ds_uncompressed_bytes;
}
- /* Check that there is enough space here */
+ /* Check that there is enough space and limit headroom here */
err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
- ddpa->used);
+ 0, ss_mv_cnt, ddpa->used, ddpa->cr);
if (err != 0)
goto out;
@@ -2151,10 +2293,12 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
/* move snap name entry */
VERIFY0(dsl_dataset_get_snapname(ds));
VERIFY0(dsl_dataset_snap_remove(origin_head,
- ds->ds_snapname, tx));
+ ds->ds_snapname, tx, B_TRUE));
VERIFY0(zap_add(dp->dp_meta_objset,
hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
8, 1, &ds->ds_object, tx));
+ dsl_fs_ss_count_adjust(hds->ds_dir, 1,
+ DD_FIELD_SNAPSHOT_COUNT, tx);
/* change containing dsl_dir */
dmu_buf_will_dirty(ds->ds_dbuf, tx);
@@ -2392,6 +2536,7 @@ dsl_dataset_promote(const char *name, char *conflsnap)
ddpa.ddpa_clonename = name;
ddpa.err_ds = conflsnap;
+ ddpa.cr = CRED();
return (dsl_sync_task(name, dsl_dataset_promote_check,
dsl_dataset_promote_sync, &ddpa, 2 + numsnaps));