diff options
author | Jerry Jelinek <[email protected]> | 2015-04-02 00:07:48 +1100 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2015-04-28 16:22:51 -0700 |
commit | 788eb90c4ca64beaf85ef542bb22c64fcb50993a (patch) | |
tree | c759bb7636e66d33713490aa8170ec4c03a1be62 /module/zfs/dsl_dir.c | |
parent | 308a451f7f5ecded4f194a50abab5ca123b6d397 (diff) |
Illumos 3897 - zfs filesystem and snapshot limits
3897 zfs filesystem and snapshot limits
Author: Jerry Jelinek <[email protected]>
Reviewed by: Matthew Ahrens <[email protected]>
Approved by: Christopher Siden <[email protected]>
References:
https://www.illumos.org/issues/3897
https://github.com/illumos/illumos-gate/commit/a2afb61
Porting Notes:
dsl_dataset_snapshot_check(): reduce stack usage using kmem_alloc().
Ported-by: Chris Dunlop <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'module/zfs/dsl_dir.c')
-rw-r--r-- | module/zfs/dsl_dir.c | 591 |
1 files changed, 588 insertions, 3 deletions
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index b94b68e15..86b40657c 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -22,6 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 Martin Matuska. All rights reserved. + * Copyright (c) 2014 Joyent, Inc. All rights reserved. */ #include <sys/dmu.h> @@ -39,8 +40,87 @@ #include <sys/zio.h> #include <sys/arc.h> #include <sys/sunddi.h> +#include <sys/zfeature.h> +#include <sys/policy.h> +#include <sys/zfs_znode.h> #include <sys/zvol.h> #include "zfs_namecheck.h" +#include "zfs_prop.h" + +/* + * Filesystem and Snapshot Limits + * ------------------------------ + * + * These limits are used to restrict the number of filesystems and/or snapshots + * that can be created at a given level in the tree or below. A typical + * use-case is with a delegated dataset where the administrator wants to ensure + * that a user within the zone is not creating too many additional filesystems + * or snapshots, even though they're not exceeding their space quota. + * + * The filesystem and snapshot counts are stored as extensible properties. This + * capability is controlled by a feature flag and must be enabled to be used. + * Once enabled, the feature is not active until the first limit is set. At + * that point, future operations to create/destroy filesystems or snapshots + * will validate and update the counts. + * + * Because the count properties will not exist before the feature is active, + * the counts are updated when a limit is first set on an uninitialized + * dsl_dir node in the tree (The filesystem/snapshot count on a node includes + * all of the nested filesystems/snapshots. Thus, a new leaf node has a + * filesystem count of 0 and a snapshot count of 0. Non-existent filesystem and + * snapshot count properties on a node indicate uninitialized counts on that + * node.) When first setting a limit on an uninitialized node, the code starts + * at the filesystem with the new limit and descends into all sub-filesystems + * to add the count properties. + * + * In practice this is lightweight since a limit is typically set when the + * filesystem is created and thus has no children. Once valid, changing the + * limit value won't require a re-traversal since the counts are already valid. + * When recursively fixing the counts, if a node with a limit is encountered + * during the descent, the counts are known to be valid and there is no need to + * descend into that filesystem's children. The counts on filesystems above the + * one with the new limit will still be uninitialized, unless a limit is + * eventually set on one of those filesystems. The counts are always recursively + * updated when a limit is set on a dataset, unless there is already a limit. + * When a new limit value is set on a filesystem with an existing limit, it is + * possible for the new limit to be less than the current count at that level + * since a user who can change the limit is also allowed to exceed the limit. + * + * Once the feature is active, then whenever a filesystem or snapshot is + * created, the code recurses up the tree, validating the new count against the + * limit at each initialized level. In practice, most levels will not have a + * limit set. If there is a limit at any initialized level up the tree, the + * check must pass or the creation will fail. Likewise, when a filesystem or + * snapshot is destroyed, the counts are recursively adjusted all the way up + * the initizized nodes in the tree. Renaming a filesystem into different point + * in the tree will first validate, then update the counts on each branch up to + * the common ancestor. A receive will also validate the counts and then update + * them. + * + * An exception to the above behavior is that the limit is not enforced if the + * user has permission to modify the limit. This is primarily so that + * recursive snapshots in the global zone always work. We want to prevent a + * denial-of-service in which a lower level delegated dataset could max out its + * limit and thus block recursive snapshots from being taken in the global zone. + * Because of this, it is possible for the snapshot count to be over the limit + * and snapshots taken in the global zone could cause a lower level dataset to + * hit or exceed its limit. The administrator taking the global zone recursive + * snapshot should be aware of this side-effect and behave accordingly. + * For consistency, the filesystem limit is also not enforced if the user can + * modify the limit. + * + * The filesystem and snapshot limits are validated by dsl_fs_ss_limit_check() + * and updated by dsl_fs_ss_count_adjust(). A new limit value is setup in + * dsl_dir_activate_fs_ss_limit() and the counts are adjusted, if necessary, by + * dsl_dir_init_fs_ss_count(). + * + * There is a special case when we receive a filesystem that already exists. In + * this case a temporary clone name of %X is created (see dmu_recv_begin). We + * never update the filesystem counts for temporary clones. + * + * Likewise, we do not update the snapshot counts for temporary snapshots, + * such as those created by zfs diff. + */ static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); @@ -384,6 +464,398 @@ error: return (err); } +/* + * If the counts are already initialized for this filesystem and its + * descendants then do nothing, otherwise initialize the counts. + * + * The counts on this filesystem, and those below, may be uninitialized due to + * either the use of a pre-existing pool which did not support the + * filesystem/snapshot limit feature, or one in which the feature had not yet + * been enabled. + * + * Recursively descend the filesystem tree and update the filesystem/snapshot + * counts on each filesystem below, then update the cumulative count on the + * current filesystem. If the filesystem already has a count set on it, + * then we know that its counts, and the counts on the filesystems below it, + * are already correct, so we don't have to update this filesystem. + */ +static void +dsl_dir_init_fs_ss_count(dsl_dir_t *dd, dmu_tx_t *tx) +{ + uint64_t my_fs_cnt = 0; + uint64_t my_ss_cnt = 0; + dsl_pool_t *dp = dd->dd_pool; + objset_t *os = dp->dp_meta_objset; + zap_cursor_t *zc; + zap_attribute_t *za; + dsl_dataset_t *ds; + + ASSERT(spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)); + ASSERT(dsl_pool_config_held(dp)); + ASSERT(dmu_tx_is_syncing(tx)); + + dsl_dir_zapify(dd, tx); + + /* + * If the filesystem count has already been initialized then we + * don't need to recurse down any further. + */ + if (zap_contains(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT) == 0) + return; + + zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + + /* Iterate my child dirs */ + for (zap_cursor_init(zc, os, dd->dd_phys->dd_child_dir_zapobj); + zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { + dsl_dir_t *chld_dd; + uint64_t count; + + VERIFY0(dsl_dir_hold_obj(dp, za->za_first_integer, NULL, FTAG, + &chld_dd)); + + /* + * Ignore hidden ($FREE, $MOS & $ORIGIN) objsets and + * temporary datasets. + */ + if (chld_dd->dd_myname[0] == '$' || + chld_dd->dd_myname[0] == '%') { + dsl_dir_rele(chld_dd, FTAG); + continue; + } + + my_fs_cnt++; /* count this child */ + + dsl_dir_init_fs_ss_count(chld_dd, tx); + + VERIFY0(zap_lookup(os, chld_dd->dd_object, + DD_FIELD_FILESYSTEM_COUNT, sizeof (count), 1, &count)); + my_fs_cnt += count; + VERIFY0(zap_lookup(os, chld_dd->dd_object, + DD_FIELD_SNAPSHOT_COUNT, sizeof (count), 1, &count)); + my_ss_cnt += count; + + dsl_dir_rele(chld_dd, FTAG); + } + zap_cursor_fini(zc); + /* Count my snapshots (we counted children's snapshots above) */ + VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, + dd->dd_phys->dd_head_dataset_obj, FTAG, &ds)); + + for (zap_cursor_init(zc, os, ds->ds_phys->ds_snapnames_zapobj); + zap_cursor_retrieve(zc, za) == 0; + zap_cursor_advance(zc)) { + /* Don't count temporary snapshots */ + if (za->za_name[0] != '%') + my_ss_cnt++; + } + + dsl_dataset_rele(ds, FTAG); + + kmem_free(zc, sizeof (zap_cursor_t)); + kmem_free(za, sizeof (zap_attribute_t)); + + /* we're in a sync task, update counts */ + dmu_buf_will_dirty(dd->dd_dbuf, tx); + VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, + sizeof (my_fs_cnt), 1, &my_fs_cnt, tx)); + VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, + sizeof (my_ss_cnt), 1, &my_ss_cnt, tx)); +} + +static int +dsl_dir_actv_fs_ss_limit_check(void *arg, dmu_tx_t *tx) +{ + char *ddname = (char *)arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + dsl_dir_t *dd; + int error; + + error = dsl_dataset_hold(dp, ddname, FTAG, &ds); + if (error != 0) + return (error); + + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(ENOTSUP)); + } + + dd = ds->ds_dir; + if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT) && + dsl_dir_is_zapified(dd) && + zap_contains(dp->dp_meta_objset, dd->dd_object, + DD_FIELD_FILESYSTEM_COUNT) == 0) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EALREADY)); + } + + dsl_dataset_rele(ds, FTAG); + return (0); +} + +static void +dsl_dir_actv_fs_ss_limit_sync(void *arg, dmu_tx_t *tx) +{ + char *ddname = (char *)arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + spa_t *spa; + + VERIFY0(dsl_dataset_hold(dp, ddname, FTAG, &ds)); + + spa = dsl_dataset_get_spa(ds); + + if (!spa_feature_is_active(spa, SPA_FEATURE_FS_SS_LIMIT)) { + /* + * Since the feature was not active and we're now setting a + * limit, increment the feature-active counter so that the + * feature becomes active for the first time. + * + * We are already in a sync task so we can update the MOS. + */ + spa_feature_incr(spa, SPA_FEATURE_FS_SS_LIMIT, tx); + } + + /* + * Since we are now setting a non-UINT64_MAX limit on the filesystem, + * we need to ensure the counts are correct. Descend down the tree from + * this point and update all of the counts to be accurate. + */ + dsl_dir_init_fs_ss_count(ds->ds_dir, tx); + + dsl_dataset_rele(ds, FTAG); +} + +/* + * Make sure the feature is enabled and activate it if necessary. + * Since we're setting a limit, ensure the on-disk counts are valid. + * This is only called by the ioctl path when setting a limit value. + * + * We do not need to validate the new limit, since users who can change the + * limit are also allowed to exceed the limit. + */ +int +dsl_dir_activate_fs_ss_limit(const char *ddname) +{ + int error; + + error = dsl_sync_task(ddname, dsl_dir_actv_fs_ss_limit_check, + dsl_dir_actv_fs_ss_limit_sync, (void *)ddname, 0); + + if (error == EALREADY) + error = 0; + + return (error); +} + +/* + * Used to determine if the filesystem_limit or snapshot_limit should be + * enforced. We allow the limit to be exceeded if the user has permission to + * write the property value. We pass in the creds that we got in the open + * context since we will always be the GZ root in syncing context. We also have + * to handle the case where we are allowed to change the limit on the current + * dataset, but there may be another limit in the tree above. + * + * We can never modify these two properties within a non-global zone. In + * addition, the other checks are modeled on zfs_secpolicy_write_perms. We + * can't use that function since we are already holding the dp_config_rwlock. + * In addition, we already have the dd and dealing with snapshots is simplified + * in this code. + */ + +typedef enum { + ENFORCE_ALWAYS, + ENFORCE_NEVER, + ENFORCE_ABOVE +} enforce_res_t; + +static enforce_res_t +dsl_enforce_ds_ss_limits(dsl_dir_t *dd, zfs_prop_t prop, cred_t *cr) +{ + enforce_res_t enforce = ENFORCE_ALWAYS; + uint64_t obj; + dsl_dataset_t *ds; + uint64_t zoned; + + ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT || + prop == ZFS_PROP_SNAPSHOT_LIMIT); + +#ifdef _KERNEL + if (crgetzoneid(cr) != GLOBAL_ZONEID) + return (ENFORCE_ALWAYS); + + if (secpolicy_zfs(cr) == 0) + return (ENFORCE_NEVER); +#endif + + if ((obj = dd->dd_phys->dd_head_dataset_obj) == 0) + return (ENFORCE_ALWAYS); + + ASSERT(dsl_pool_config_held(dd->dd_pool)); + + if (dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds) != 0) + return (ENFORCE_ALWAYS); + + if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL) || zoned) { + /* Only root can access zoned fs's from the GZ */ + enforce = ENFORCE_ALWAYS; + } else { + if (dsl_deleg_access_impl(ds, zfs_prop_to_name(prop), cr) == 0) + enforce = ENFORCE_ABOVE; + } + + dsl_dataset_rele(ds, FTAG); + return (enforce); +} + +/* + * Check if adding additional child filesystem(s) would exceed any filesystem + * limits or adding additional snapshot(s) would exceed any snapshot limits. + * The prop argument indicates which limit to check. + * + * Note that all filesystem limits up to the root (or the highest + * initialized) filesystem or the given ancestor must be satisfied. + */ +int +dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop, + dsl_dir_t *ancestor, cred_t *cr) +{ + objset_t *os = dd->dd_pool->dp_meta_objset; + uint64_t limit, count; + char *count_prop; + enforce_res_t enforce; + int err = 0; + + ASSERT(dsl_pool_config_held(dd->dd_pool)); + ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT || + prop == ZFS_PROP_SNAPSHOT_LIMIT); + + /* + * If we're allowed to change the limit, don't enforce the limit + * e.g. this can happen if a snapshot is taken by an administrative + * user in the global zone (i.e. a recursive snapshot by root). + * However, we must handle the case of delegated permissions where we + * are allowed to change the limit on the current dataset, but there + * is another limit in the tree above. + */ + enforce = dsl_enforce_ds_ss_limits(dd, prop, cr); + if (enforce == ENFORCE_NEVER) + return (0); + + /* + * e.g. if renaming a dataset with no snapshots, count adjustment + * is 0. + */ + if (delta == 0) + return (0); + + if (prop == ZFS_PROP_SNAPSHOT_LIMIT) { + /* + * We don't enforce the limit for temporary snapshots. This is + * indicated by a NULL cred_t argument. + */ + if (cr == NULL) + return (0); + + count_prop = DD_FIELD_SNAPSHOT_COUNT; + } else { + count_prop = DD_FIELD_FILESYSTEM_COUNT; + } + + /* + * If an ancestor has been provided, stop checking the limit once we + * hit that dir. We need this during rename so that we don't overcount + * the check once we recurse up to the common ancestor. + */ + if (ancestor == dd) + return (0); + + /* + * If we hit an uninitialized node while recursing up the tree, we can + * stop since we know there is no limit here (or above). The counts are + * not valid on this node and we know we won't touch this node's counts. + */ + if (!dsl_dir_is_zapified(dd) || zap_lookup(os, dd->dd_object, + count_prop, sizeof (count), 1, &count) == ENOENT) + return (0); + + err = dsl_prop_get_dd(dd, zfs_prop_to_name(prop), 8, 1, &limit, NULL, + B_FALSE); + if (err != 0) + return (err); + + /* Is there a limit which we've hit? */ + if (enforce == ENFORCE_ALWAYS && (count + delta) > limit) + return (SET_ERROR(EDQUOT)); + + if (dd->dd_parent != NULL) + err = dsl_fs_ss_limit_check(dd->dd_parent, delta, prop, + ancestor, cr); + + return (err); +} + +/* + * Adjust the filesystem or snapshot count for the specified dsl_dir_t and all + * parents. When a new filesystem/snapshot is created, increment the count on + * all parents, and when a filesystem/snapshot is destroyed, decrement the + * count. + */ +void +dsl_fs_ss_count_adjust(dsl_dir_t *dd, int64_t delta, const char *prop, + dmu_tx_t *tx) +{ + int err; + objset_t *os = dd->dd_pool->dp_meta_objset; + uint64_t count; + + ASSERT(dsl_pool_config_held(dd->dd_pool)); + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT(strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0 || + strcmp(prop, DD_FIELD_SNAPSHOT_COUNT) == 0); + + /* + * When we receive an incremental stream into a filesystem that already + * exists, a temporary clone is created. We don't count this temporary + * clone, whose name begins with a '%'. We also ignore hidden ($FREE, + * $MOS & $ORIGIN) objsets. + */ + if ((dd->dd_myname[0] == '%' || dd->dd_myname[0] == '$') && + strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0) + return; + + /* + * e.g. if renaming a dataset with no snapshots, count adjustment is 0 + */ + if (delta == 0) + return; + + /* + * If we hit an uninitialized node while recursing up the tree, we can + * stop since we know the counts are not valid on this node and we + * know we shouldn't touch this node's counts. An uninitialized count + * on the node indicates that either the feature has not yet been + * activated or there are no limits on this part of the tree. + */ + if (!dsl_dir_is_zapified(dd) || (err = zap_lookup(os, dd->dd_object, + prop, sizeof (count), 1, &count)) == ENOENT) + return; + VERIFY0(err); + + count += delta; + /* Use a signed verify to make sure we're not neg. */ + VERIFY3S(count, >=, 0); + + VERIFY0(zap_update(os, dd->dd_object, prop, sizeof (count), 1, &count, + tx)); + + /* Roll up this additional count into our ancestors */ + if (dd->dd_parent != NULL) + dsl_fs_ss_count_adjust(dd->dd_parent, delta, prop, tx); +} + uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, dmu_tx_t *tx) @@ -408,8 +880,12 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, ddphys = dbuf->db_data; ddphys->dd_creation_time = gethrestime_sec(); - if (pds) + if (pds) { ddphys->dd_parent_obj = pds->dd_object; + + /* update the filesystem counts */ + dsl_fs_ss_count_adjust(pds, 1, DD_FIELD_FILESYSTEM_COUNT, tx); + } ddphys->dd_props_zapobj = zap_create(mos, DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); ddphys->dd_child_dir_zapobj = zap_create(mos, @@ -458,6 +934,22 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) } mutex_exit(&dd->dd_lock); + if (dsl_dir_is_zapified(dd)) { + uint64_t count; + objset_t *os = dd->dd_pool->dp_meta_objset; + + if (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, + sizeof (count), 1, &count) == 0) { + dsl_prop_nvlist_add_uint64(nv, + ZFS_PROP_FILESYSTEM_COUNT, count); + } + if (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, + sizeof (count), 1, &count) == 0) { + dsl_prop_nvlist_add_uint64(nv, + ZFS_PROP_SNAPSHOT_COUNT, count); + } + } + if (dsl_dir_is_clone(dd)) { dsl_dataset_t *ds; char buf[MAXNAMELEN]; @@ -1166,6 +1658,7 @@ would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) typedef struct dsl_dir_rename_arg { const char *ddra_oldname; const char *ddra_newname; + cred_t *ddra_cred; } dsl_dir_rename_arg_t; /* ARGSUSED */ @@ -1230,10 +1723,50 @@ dsl_dir_rename_check(void *arg, dmu_tx_t *tx) } } + if (dmu_tx_is_syncing(tx)) { + if (spa_feature_is_enabled(dp->dp_spa, + SPA_FEATURE_FS_SS_LIMIT)) { + /* + * Although this is the check function and we don't + * normally make on-disk changes in check functions, + * we need to do that here. + * + * Ensure this portion of the tree's counts have been + * initialized in case the new parent has limits set. + */ + dsl_dir_init_fs_ss_count(dd, tx); + } + } + if (newparent != dd->dd_parent) { /* is there enough space? */ uint64_t myspace = MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved); + objset_t *os = dd->dd_pool->dp_meta_objset; + uint64_t fs_cnt = 0; + uint64_t ss_cnt = 0; + + if (dsl_dir_is_zapified(dd)) { + int err; + + err = zap_lookup(os, dd->dd_object, + DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1, + &fs_cnt); + if (err != ENOENT && err != 0) + return (err); + + /* + * have to add 1 for the filesystem itself that we're + * moving + */ + fs_cnt++; + + err = zap_lookup(os, dd->dd_object, + DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1, + &ss_cnt); + if (err != ENOENT && err != 0) + return (err); + } /* no rename into our descendant */ if (closest_common_ancestor(dd, newparent) == dd) { @@ -1243,7 +1776,7 @@ dsl_dir_rename_check(void *arg, dmu_tx_t *tx) } error = dsl_dir_transfer_possible(dd->dd_parent, - newparent, myspace); + newparent, fs_cnt, ss_cnt, myspace, ddra->ddra_cred); if (error != 0) { dsl_dir_rele(newparent, FTAG); dsl_dir_rele(dd, FTAG); @@ -1275,6 +1808,37 @@ dsl_dir_rename_sync(void *arg, dmu_tx_t *tx) "-> %s", ddra->ddra_newname); if (newparent != dd->dd_parent) { + objset_t *os = dd->dd_pool->dp_meta_objset; + uint64_t fs_cnt = 0; + uint64_t ss_cnt = 0; + + /* + * We already made sure the dd counts were initialized in the + * check function. + */ + if (spa_feature_is_enabled(dp->dp_spa, + SPA_FEATURE_FS_SS_LIMIT)) { + VERIFY0(zap_lookup(os, dd->dd_object, + DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1, + &fs_cnt)); + /* add 1 for the filesystem itself that we're moving */ + fs_cnt++; + + VERIFY0(zap_lookup(os, dd->dd_object, + DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1, + &ss_cnt)); + } + + dsl_fs_ss_count_adjust(dd->dd_parent, -fs_cnt, + DD_FIELD_FILESYSTEM_COUNT, tx); + dsl_fs_ss_count_adjust(newparent, fs_cnt, + DD_FIELD_FILESYSTEM_COUNT, tx); + + dsl_fs_ss_count_adjust(dd->dd_parent, -ss_cnt, + DD_FIELD_SNAPSHOT_COUNT, tx); + dsl_fs_ss_count_adjust(newparent, ss_cnt, + DD_FIELD_SNAPSHOT_COUNT, tx); + dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, -dd->dd_phys->dd_used_bytes, -dd->dd_phys->dd_compressed_bytes, @@ -1329,17 +1893,20 @@ dsl_dir_rename(const char *oldname, const char *newname) ddra.ddra_oldname = oldname; ddra.ddra_newname = newname; + ddra.ddra_cred = CRED(); return (dsl_sync_task(oldname, dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 3)); } int -dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space) +dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, + uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *cr) { dsl_dir_t *ancestor; int64_t adelta; uint64_t avail; + int err; ancestor = closest_common_ancestor(sdd, tdd); adelta = would_change(sdd, -space, ancestor); @@ -1347,6 +1914,15 @@ dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space) if (avail < space) return (SET_ERROR(ENOSPC)); + err = dsl_fs_ss_limit_check(tdd, fs_cnt, ZFS_PROP_FILESYSTEM_LIMIT, + ancestor, cr); + if (err != 0) + return (err); + err = dsl_fs_ss_limit_check(tdd, ss_cnt, ZFS_PROP_SNAPSHOT_LIMIT, + ancestor, cr); + if (err != 0) + return (err); + return (0); } @@ -1380,6 +1956,15 @@ dsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx) dmu_object_zapify(mos, dd->dd_object, DMU_OT_DSL_DIR, tx); } +boolean_t +dsl_dir_is_zapified(dsl_dir_t *dd) +{ + dmu_object_info_t doi; + + dmu_object_info_from_db(dd->dd_dbuf, &doi); + return (doi.doi_type == DMU_OTN_ZAP_METADATA); +} + #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(dsl_dir_set_quota); EXPORT_SYMBOL(dsl_dir_set_reservation); |