diff options
author | Matthew Ahrens <[email protected]> | 2013-08-28 06:45:09 -0500 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2013-09-04 15:49:00 -0700 |
commit | 6f1ffb06655008c9b519108ed29fbf03acd6e5de (patch) | |
tree | 0fe1d5278370b37ab45a565c0ce7d9301bebde30 /module/zfs | |
parent | 0c28fb480836ab7bb1bbf8de6e572d2443273396 (diff) |
Illumos #2882, #2883, #2900
2882 implement libzfs_core
2883 changing "canmount" property to "on" should not always remount dataset
2900 "zfs snapshot" should be able to create multiple, arbitrary snapshots at once
Reviewed by: George Wilson <[email protected]>
Reviewed by: Chris Siden <[email protected]>
Reviewed by: Garrett D'Amore <[email protected]>
Reviewed by: Bill Pijewski <[email protected]>
Reviewed by: Dan Kruchinin <[email protected]>
Approved by: Eric Schrock <[email protected]>
References:
https://www.illumos.org/issues/2882
https://www.illumos.org/issues/2883
https://www.illumos.org/issues/2900
illumos/illumos-gate@4445fffbbb1ea25fd0e9ea68b9380dd7a6709025
Ported-by: Tim Chase <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #1293
Porting notes:
WARNING: This patch changes the user/kernel ABI. That means that
the zfs/zpool utilities built from master are NOT compatible with
the 0.6.2 kernel modules. Ensure you load the matching kernel
modules from master after updating the utilities. Otherwise the
zfs/zpool commands will be unable to interact with your pool and
you will see errors similar to the following:
$ zpool list
failed to read pool configuration: bad address
no pools available
$ zfs list
no datasets available
Add zvol minor device creation to the new zfs_snapshot_nvl function.
Remove the logging of the "release" operation in
dsl_dataset_user_release_sync(). The logging caused a null dereference
because ds->ds_dir is zeroed in dsl_dataset_destroy_sync() and the
logging functions try to get the ds name via the dsl_dataset_name()
function. I've got no idea why this particular code would have worked
in Illumos. This code has subsequently been completely reworked in
Illumos commit 3b2aab1 (3464 zfs synctask code needs restructuring).
Squash some "may be used uninitialized" warning/erorrs.
Fix some printf format warnings for %lld and %llu.
Apply a few spa_writeable() changes that were made to Illumos in
illumos/illumos-gate.git@cd1c8b8 as part of the 3112, 3113, 3114 and
3115 fixes.
Add a missing call to fnvlist_free(nvl) in log_internal() that was added
in Illumos to fix issue 3085 but couldn't be ported to ZoL at the time
(zfsonlinux/zfs@9e11c73) because it depended on future work.
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/dmu_objset.c | 310 | ||||
-rw-r--r-- | module/zfs/dmu_send.c | 110 | ||||
-rw-r--r-- | module/zfs/dmu_tx.c | 2 | ||||
-rw-r--r-- | module/zfs/dsl_dataset.c | 145 | ||||
-rw-r--r-- | module/zfs/dsl_deleg.c | 28 | ||||
-rw-r--r-- | module/zfs/dsl_dir.c | 67 | ||||
-rw-r--r-- | module/zfs/dsl_prop.c | 27 | ||||
-rw-r--r-- | module/zfs/dsl_scan.c | 4 | ||||
-rw-r--r-- | module/zfs/dsl_synctask.c | 23 | ||||
-rw-r--r-- | module/zfs/rrwlock.c | 13 | ||||
-rw-r--r-- | module/zfs/spa.c | 54 | ||||
-rw-r--r-- | module/zfs/spa_history.c | 228 | ||||
-rw-r--r-- | module/zfs/zfs_ctldir.c | 3 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 1622 | ||||
-rw-r--r-- | module/zfs/zfs_vfsops.c | 6 |
15 files changed, 1667 insertions, 975 deletions
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 52d55d566..0f07a4cc9 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -525,9 +526,10 @@ dmu_objset_evict_dbufs(objset_t *os) void dmu_objset_evict(objset_t *os) { - dsl_dataset_t *ds = os->os_dsl_dataset; int t; + dsl_dataset_t *ds = os->os_dsl_dataset; + for (t = 0; t < TXG_SIZE; t++) ASSERT(!dmu_objset_is_dirty(os, t)); @@ -698,30 +700,33 @@ dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) spa_t *spa = dd->dd_pool->dp_spa; struct oscarg *oa = arg2; uint64_t obj; + dsl_dataset_t *ds; + blkptr_t *bp; ASSERT(dmu_tx_is_syncing(tx)); obj = dsl_dataset_create_sync(dd, oa->lastname, oa->clone_origin, oa->flags, oa->cr, tx); - if (oa->clone_origin == NULL) { - dsl_pool_t *dp = dd->dd_pool; - dsl_dataset_t *ds; - blkptr_t *bp; - objset_t *os; - - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); - bp = dsl_dataset_get_blkptr(ds); - ASSERT(BP_IS_HOLE(bp)); - - os = dmu_objset_create_impl(spa, ds, bp, oa->type, tx); + VERIFY3U(0, ==, dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds)); + bp = dsl_dataset_get_blkptr(ds); + if (BP_IS_HOLE(bp)) { + objset_t *os = + dmu_objset_create_impl(spa, ds, bp, oa->type, tx); if (oa->userfunc) oa->userfunc(os, oa->userarg, oa->cr, tx); - dsl_dataset_rele(ds, FTAG); } - spa_history_log_internal(LOG_DS_CREATE, spa, tx, "dataset = %llu", obj); + if (oa->clone_origin == NULL) { + spa_history_log_internal_ds(ds, "create", tx, ""); + } else { + char namebuf[MAXNAMELEN]; + dsl_dataset_name(oa->clone_origin, namebuf); + spa_history_log_internal_ds(ds, "clone", tx, + "origin=%s (%llu)", namebuf, oa->clone_origin->ds_object); + } + dsl_dataset_rele(ds, FTAG); } int @@ -798,34 +803,40 @@ dmu_objset_destroy(const char *name, boolean_t defer) return (error); } -struct snaparg { - dsl_sync_task_group_t *dstg; - char *snapname; - char *htag; - char failed[MAXPATHLEN]; - boolean_t recursive; - boolean_t needsuspend; - boolean_t temporary; - nvlist_t *props; - struct dsl_ds_holdarg *ha; /* only needed in the temporary case */ - dsl_dataset_t *newds; -}; +typedef struct snapallarg { + dsl_sync_task_group_t *saa_dstg; + boolean_t saa_needsuspend; + nvlist_t *saa_props; + + /* the following are used only if 'temporary' is set: */ + boolean_t saa_temporary; + const char *saa_htag; + struct dsl_ds_holdarg *saa_ha; + dsl_dataset_t *saa_newds; +} snapallarg_t; + +typedef struct snaponearg { + const char *soa_longname; /* long snap name */ + const char *soa_snapname; /* short snap name */ + snapallarg_t *soa_saa; +} snaponearg_t; static int snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) { objset_t *os = arg1; - struct snaparg *sn = arg2; + snaponearg_t *soa = arg2; + snapallarg_t *saa = soa->soa_saa; int error; /* The props have already been checked by zfs_check_userprops(). */ error = dsl_dataset_snapshot_check(os->os_dsl_dataset, - sn->snapname, tx); + soa->soa_snapname, tx); if (error) return (error); - if (sn->temporary) { + if (saa->saa_temporary) { /* * Ideally we would just call * dsl_dataset_user_hold_check() and @@ -843,12 +854,13 @@ snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) * Not checking number of tags because the tag will be * unique, as it will be the only tag. */ - if (strlen(sn->htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) + if (strlen(saa->saa_htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) return (E2BIG); - sn->ha = kmem_alloc(sizeof(struct dsl_ds_holdarg), KM_PUSHPAGE); - sn->ha->temphold = B_TRUE; - sn->ha->htag = sn->htag; + saa->saa_ha = kmem_alloc(sizeof (struct dsl_ds_holdarg), + KM_PUSHPAGE); + saa->saa_ha->temphold = B_TRUE; + saa->saa_ha->htag = saa->saa_htag; } return (error); } @@ -858,24 +870,25 @@ snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) { objset_t *os = arg1; dsl_dataset_t *ds = os->os_dsl_dataset; - struct snaparg *sn = arg2; + snaponearg_t *soa = arg2; + snapallarg_t *saa = soa->soa_saa; - dsl_dataset_snapshot_sync(ds, sn->snapname, tx); + dsl_dataset_snapshot_sync(ds, soa->soa_snapname, tx); - if (sn->props) { + if (saa->saa_props != NULL) { dsl_props_arg_t pa; - pa.pa_props = sn->props; + pa.pa_props = saa->saa_props; pa.pa_source = ZPROP_SRC_LOCAL; dsl_props_set_sync(ds->ds_prev, &pa, tx); } - if (sn->temporary) { + if (saa->saa_temporary) { struct dsl_ds_destroyarg da; - dsl_dataset_user_hold_sync(ds->ds_prev, sn->ha, tx); - kmem_free(sn->ha, sizeof (struct dsl_ds_holdarg)); - sn->ha = NULL; - sn->newds = ds->ds_prev; + dsl_dataset_user_hold_sync(ds->ds_prev, saa->saa_ha, tx); + kmem_free(saa->saa_ha, sizeof (struct dsl_ds_holdarg)); + saa->saa_ha = NULL; + saa->saa_newds = ds->ds_prev; da.ds = ds->ds_prev; da.defer = B_TRUE; @@ -884,135 +897,182 @@ snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) } static int -dmu_objset_snapshot_one(const char *name, void *arg) +snapshot_one_impl(const char *snapname, void *arg) { - struct snaparg *sn = arg; + char *fsname; + snapallarg_t *saa = arg; + snaponearg_t *soa; objset_t *os; int err; - char *cp; - /* - * If the objset starts with a '%', then ignore it unless it was - * explicitly named (ie, not recursive). These hidden datasets - * are always inconsistent, and by not opening them here, we can - * avoid a race with dsl_dir_destroy_check(). - */ - cp = strrchr(name, '/'); - if (cp && cp[1] == '%' && sn->recursive) - return (0); + fsname = kmem_zalloc(MAXPATHLEN, KM_PUSHPAGE); + (void) strlcpy(fsname, snapname, MAXPATHLEN); + strchr(fsname, '@')[0] = '\0'; - (void) strcpy(sn->failed, name); - - /* - * Check permissions if we are doing a recursive snapshot. The - * permission checks for the starting dataset have already been - * performed in zfs_secpolicy_snapshot() - */ - if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) - return (err); - - err = dmu_objset_hold(name, sn, &os); + err = dmu_objset_hold(fsname, saa, &os); + kmem_free(fsname, MAXPATHLEN); if (err != 0) return (err); /* * If the objset is in an inconsistent state (eg, in the process - * of being destroyed), don't snapshot it. As with %hidden - * datasets, we return EBUSY if this name was explicitly - * requested (ie, not recursive), and otherwise ignore it. + * of being destroyed), don't snapshot it. */ if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { - dmu_objset_rele(os, sn); - return (sn->recursive ? 0 : EBUSY); + dmu_objset_rele(os, saa); + return (EBUSY); } - if (sn->needsuspend) { + if (saa->saa_needsuspend) { err = zil_suspend(dmu_objset_zil(os)); if (err) { - dmu_objset_rele(os, sn); + dmu_objset_rele(os, saa); return (err); } } - dsl_sync_task_create(sn->dstg, snapshot_check, snapshot_sync, - os, sn, 3); + + soa = kmem_zalloc(sizeof (*soa), KM_PUSHPAGE); + soa->soa_saa = saa; + soa->soa_longname = snapname; + soa->soa_snapname = strchr(snapname, '@') + 1; + + dsl_sync_task_create(saa->saa_dstg, snapshot_check, snapshot_sync, + os, soa, 3); return (0); } +/* + * The snapshots must all be in the same pool. + */ int -dmu_objset_snapshot(char *fsname, char *snapname, char *tag, - nvlist_t *props, boolean_t recursive, boolean_t temporary, int cleanup_fd) +dmu_objset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) { dsl_sync_task_t *dst; - struct snaparg *sn; + snapallarg_t saa = { 0 }; spa_t *spa; - minor_t minor; + int rv = 0; int err; + nvpair_t *pair; - sn = kmem_alloc(sizeof (struct snaparg), KM_SLEEP); - (void) strcpy(sn->failed, fsname); + pair = nvlist_next_nvpair(snaps, NULL); + if (pair == NULL) + return (0); - err = spa_open(fsname, &spa, FTAG); - if (err) { - kmem_free(sn, sizeof (struct snaparg)); + err = spa_open(nvpair_name(pair), &spa, FTAG); + if (err) return (err); + saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + saa.saa_props = props; + saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + err = snapshot_one_impl(nvpair_name(pair), &saa); + if (err != 0) { + if (errors != NULL) { + fnvlist_add_int32(errors, + nvpair_name(pair), err); + } + rv = err; + } } - if (temporary) { - if (cleanup_fd < 0) { - spa_close(spa, FTAG); - return (EINVAL); - } - if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { - spa_close(spa, FTAG); - return (err); + /* + * If any call to snapshot_one_impl() failed, don't execute the + * sync task. The error handling code below will clean up the + * snaponearg_t from any successful calls to + * snapshot_one_impl(). + */ + if (rv == 0) + err = dsl_sync_task_group_wait(saa.saa_dstg); + if (err != 0) + rv = err; + + for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst; + dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) { + objset_t *os = dst->dst_arg1; + snaponearg_t *soa = dst->dst_arg2; + if (dst->dst_err != 0) { + if (errors != NULL) { + fnvlist_add_int32(errors, + soa->soa_longname, dst->dst_err); + } + rv = dst->dst_err; } + + if (saa.saa_needsuspend) + zil_resume(dmu_objset_zil(os)); + dmu_objset_rele(os, &saa); + kmem_free(soa, sizeof (*soa)); } - sn->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - sn->snapname = snapname; - sn->htag = tag; - sn->props = props; - sn->recursive = recursive; - sn->needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); - sn->temporary = temporary; - sn->ha = NULL; - sn->newds = NULL; - - if (recursive) { - err = dmu_objset_find(fsname, - dmu_objset_snapshot_one, sn, DS_FIND_CHILDREN); - } else { - err = dmu_objset_snapshot_one(fsname, sn); + dsl_sync_task_group_destroy(saa.saa_dstg); + spa_close(spa, FTAG); + return (rv); +} + +int +dmu_objset_snapshot_one(const char *fsname, const char *snapname) +{ + int err; + char *longsnap = kmem_asprintf("%s@%s", fsname, snapname); + nvlist_t *snaps = fnvlist_alloc(); + + fnvlist_add_boolean(snaps, longsnap); + err = dmu_objset_snapshot(snaps, NULL, NULL); + fnvlist_free(snaps); + strfree(longsnap); + return (err); +} + +int +dmu_objset_snapshot_tmp(const char *snapname, const char *tag, int cleanup_fd) +{ + dsl_sync_task_t *dst; + snapallarg_t saa = { 0 }; + spa_t *spa; + minor_t minor; + int err; + + err = spa_open(snapname, &spa, FTAG); + if (err) + return (err); + saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + saa.saa_htag = tag; + saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + saa.saa_temporary = B_TRUE; + + if (cleanup_fd < 0) { + spa_close(spa, FTAG); + return (EINVAL); + } + if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { + spa_close(spa, FTAG); + return (err); } + err = snapshot_one_impl(snapname, &saa); + if (err == 0) - err = dsl_sync_task_group_wait(sn->dstg); + err = dsl_sync_task_group_wait(saa.saa_dstg); - for (dst = list_head(&sn->dstg->dstg_tasks); dst; - dst = list_next(&sn->dstg->dstg_tasks, dst)) { + for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst; + dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) { objset_t *os = dst->dst_arg1; - dsl_dataset_t *ds = os->os_dsl_dataset; - if (dst->dst_err) { - dsl_dataset_name(ds, sn->failed); - } else if (temporary) { - dsl_register_onexit_hold_cleanup(sn->newds, tag, minor); - } - if (sn->needsuspend) + dsl_register_onexit_hold_cleanup(saa.saa_newds, tag, minor); + if (saa.saa_needsuspend) zil_resume(dmu_objset_zil(os)); - dmu_objset_rele(os, sn); + dmu_objset_rele(os, &saa); } - if (err) - (void) strcpy(fsname, sn->failed); - if (temporary) - zfs_onexit_fd_rele(cleanup_fd); - dsl_sync_task_group_destroy(sn->dstg); + zfs_onexit_fd_rele(cleanup_fd); + dsl_sync_task_group_destroy(saa.saa_dstg); spa_close(spa, FTAG); - kmem_free(sn, sizeof (struct snaparg)); return (err); } + static void dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) { @@ -1159,7 +1219,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff]; - while ((dr = list_head(list)) != NULL) { + while ((dr = list_head(list))) { ASSERT(dr->dr_dbuf->db_level == 0); list_remove(list, dr); if (dr->dr_zio) @@ -1219,7 +1279,7 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); - while ((dn = list_head(list)) != NULL) { + while ((dn = list_head(list))) { int flags; ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 54e75971d..6552e1d9d 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -416,9 +416,48 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, return (err); } +/* + * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. + * For example, they could both be snapshots of the same filesystem, and + * 'earlier' is before 'later'. Or 'earlier' could be the origin of + * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's + * filesystem. Or 'earlier' could be the origin's origin. + */ +static boolean_t +is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) +{ + dsl_pool_t *dp = later->ds_dir->dd_pool; + int error; + boolean_t ret; + dsl_dataset_t *origin; + + if (earlier->ds_phys->ds_creation_txg >= + later->ds_phys->ds_creation_txg) + return (B_FALSE); + + if (later->ds_dir == earlier->ds_dir) + return (B_TRUE); + if (!dsl_dir_is_clone(later->ds_dir)) + return (B_FALSE); + + rw_enter(&dp->dp_config_rwlock, RW_READER); + if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) { + rw_exit(&dp->dp_config_rwlock); + return (B_TRUE); + } + error = dsl_dataset_hold_obj(dp, + later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); + rw_exit(&dp->dp_config_rwlock); + if (error != 0) + return (B_FALSE); + ret = is_before(origin, earlier); + dsl_dataset_rele(origin, FTAG); + return (ret); +} + int -dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - int outfd, vnode_t *vp, offset_t *off) +dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, + offset_t *off) { dsl_dataset_t *ds = tosnap->os_dsl_dataset; dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; @@ -431,30 +470,13 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, if (ds->ds_phys->ds_next_snap_obj == 0) return (EINVAL); - /* fromsnap must be an earlier snapshot from the same fs as tosnap */ - if (fromds && (ds->ds_dir != fromds->ds_dir || - fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) + /* + * fromsnap must be an earlier snapshot from the same fs as tosnap, + * or the origin's fs. + */ + if (fromds != NULL && !is_before(ds, fromds)) return (EXDEV); - if (fromorigin) { - dsl_pool_t *dp = ds->ds_dir->dd_pool; - - if (fromsnap) - return (EINVAL); - - if (dsl_dir_is_clone(ds->ds_dir)) { - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); - rw_exit(&dp->dp_config_rwlock); - if (err) - return (err); - } else { - fromorigin = B_FALSE; - } - } - - drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); drr->drr_type = DRR_BEGIN; drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; @@ -479,7 +501,7 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, drr->drr_u.drr_begin.drr_creation_time = ds->ds_phys->ds_creation_time; drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; - if (fromorigin) + if (fromds != NULL && ds->ds_dir != fromds->ds_dir) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) @@ -491,8 +513,6 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, if (fromds) fromtxg = fromds->ds_phys->ds_creation_txg; - if (fromorigin) - dsl_dataset_rele(fromds, FTAG); dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); @@ -550,8 +570,7 @@ out: } int -dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - uint64_t *sizep) +dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep) { dsl_dataset_t *ds = tosnap->os_dsl_dataset; dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; @@ -563,27 +582,13 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, if (ds->ds_phys->ds_next_snap_obj == 0) return (EINVAL); - /* fromsnap must be an earlier snapshot from the same fs as tosnap */ - if (fromds && (ds->ds_dir != fromds->ds_dir || - fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) + /* + * fromsnap must be an earlier snapshot from the same fs as tosnap, + * or the origin's fs. + */ + if (fromds != NULL && !is_before(ds, fromds)) return (EXDEV); - if (fromorigin) { - if (fromsnap) - return (EINVAL); - - if (dsl_dir_is_clone(ds->ds_dir)) { - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); - rw_exit(&dp->dp_config_rwlock); - if (err) - return (err); - } else { - fromorigin = B_FALSE; - } - } - /* Get uncompressed size estimate of changed data. */ if (fromds == NULL) { size = ds->ds_phys->ds_uncompressed_bytes; @@ -591,8 +596,6 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, uint64_t used, comp; err = dsl_dataset_space_written(fromds, ds, &used, &comp, &size); - if (fromorigin) - dsl_dataset_rele(fromds, FTAG); if (err) return (err); } @@ -690,8 +693,7 @@ recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx) rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx); } - spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC, - dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj); + spa_history_log_internal_ds(rbsa->ds, "receive new", tx, ""); } /* ARGSUSED */ @@ -792,8 +794,7 @@ recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx) rbsa->ds = cds; - spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC, - dp->dp_spa, tx, "dataset = %lld", dsobj); + spa_history_log_internal_ds(cds, "receive over existing", tx, ""); } static boolean_t @@ -1604,6 +1605,7 @@ recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + spa_history_log_internal_ds(ds, "finished receiving", tx, ""); } static int diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index fd714135a..30867f9d7 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -64,7 +64,7 @@ dmu_tx_create_dd(dsl_dir_t *dd) { dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_PUSHPAGE); tx->tx_dir = dd; - if (dd) + if (dd != NULL) tx->tx_pool = dd->dd_pool; list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), offsetof(dmu_tx_hold_t, txh_node)); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index d9e8bd30f..2eca2b204 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -907,7 +907,8 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, * The snapshots must all be in the same pool. */ int -dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) +dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, + nvlist_t *errlist) { int err; dsl_sync_task_t *dst; @@ -942,7 +943,7 @@ dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) } else if (err == ENOENT) { err = 0; } else { - (void) strcpy(failed, nvpair_name(pair)); + fnvlist_add_int32(errlist, nvpair_name(pair), err); break; } } @@ -956,10 +957,12 @@ dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) dsl_dataset_t *ds = dsda->ds; /* - * Return the file system name that triggered the error + * Return the snapshots that triggered the error. */ - if (dst->dst_err) { - dsl_dataset_name(ds, failed); + if (dst->dst_err != 0) { + char name[ZFS_MAXNAMELEN]; + dsl_dataset_name(ds, name); + fnvlist_add_int32(errlist, name, dst->dst_err); } ASSERT3P(dsda->rm_origin, ==, NULL); dsl_dataset_disown(ds, dstg); @@ -1038,7 +1041,6 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_dir_t *dd; uint64_t obj; struct dsl_ds_destroyarg dsda = { 0 }; - dsl_dataset_t *dummy_ds; dsda.ds = ds; @@ -1058,9 +1060,6 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) } dd = ds->ds_dir; - dummy_ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); - dummy_ds->ds_dir = dd; - dummy_ds->ds_object = ds->ds_object; if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds), &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { @@ -1072,11 +1071,11 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_dataset_destroy_begin_check, dsl_dataset_destroy_begin_sync, ds, NULL, 0); if (err) - goto out_free; + goto out; err = dmu_objset_from_ds(ds, &os); if (err) - goto out_free; + goto out; /* * Remove all objects while in the open context so that @@ -1091,7 +1090,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) (void) dmu_free_object(os, obj); } if (err != ESRCH) - goto out_free; + goto out; /* * Sync out all in-flight IO. @@ -1118,7 +1117,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) rw_exit(&dd->dd_pool->dp_config_rwlock); if (err) - goto out_free; + goto out; /* * Blow away the dsl_dir + head dataset. @@ -1134,7 +1133,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) err = dsl_dataset_origin_rm_prep(&dsda, tag); if (err) { dsl_dir_close(dd, FTAG); - goto out_free; + goto out; } } @@ -1142,7 +1141,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_sync_task_create(dstg, dsl_dataset_destroy_check, dsl_dataset_destroy_sync, &dsda, tag, 0); dsl_sync_task_create(dstg, dsl_dir_destroy_check, - dsl_dir_destroy_sync, dummy_ds, FTAG, 0); + dsl_dir_destroy_sync, dd, FTAG, 0); err = dsl_sync_task_group_wait(dstg); dsl_sync_task_group_destroy(dstg); @@ -1166,8 +1165,6 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) if (err) dsl_dir_close(dd, FTAG); -out_free: - kmem_free(dummy_ds, sizeof (dsl_dataset_t)); out: dsl_dataset_disown(ds, tag); return (err); @@ -1333,14 +1330,12 @@ static void dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; - dsl_pool_t *dp = ds->ds_dir->dd_pool; /* Mark it as inconsistent on-disk, in case we crash */ dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; - spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, - "dataset = %llu", ds->ds_object); + spa_history_log_internal_ds(ds, "destroy begin", tx, ""); } static int @@ -1665,9 +1660,13 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; + spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); return; } + /* We need to log before removing it from the namespace. */ + spa_history_log_internal_ds(ds, "destroy", tx, ""); + /* signal any waiters that this dataset is going away */ mutex_enter(&ds->ds_lock); ds->ds_owner = dsl_reaper; @@ -1965,8 +1964,6 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) dsl_dataset_rele(ds_prev, FTAG); spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); - spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx, - "dataset = %llu", ds->ds_object); if (ds->ds_phys->ds_next_clones_obj != 0) { ASSERTV(uint64_t count); @@ -2014,7 +2011,7 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) return (ENOSPC); /* - * Propogate any reserved space for this snapshot to other + * Propagate any reserved space for this snapshot to other * snapshot checks in this sync group. */ if (asize > 0) @@ -2024,10 +2021,9 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) } int -dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - const char *snapname = arg2; int err; uint64_t value; @@ -2039,7 +2035,7 @@ dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) return (EAGAIN); /* - * Check for conflicting name snapshot name. + * Check for conflicting snapshot name. */ err = dsl_dataset_snap_lookup(ds, snapname, &value); if (err == 0) @@ -2063,10 +2059,9 @@ dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) } void -dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - const char *snapname = arg2; dsl_pool_t *dp = ds->ds_dir->dd_pool; dmu_buf_t *dbuf; dsl_dataset_phys_t *dsphys; @@ -2172,8 +2167,7 @@ dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_dir_snap_cmtime_update(ds->ds_dir); - spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx, - "dataset = %llu", dsobj); + spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); } void @@ -2259,7 +2253,20 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { uint64_t refd, avail, uobjs, aobjs, ratio; - dsl_dir_stats(ds->ds_dir, nv); + ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : + (ds->ds_phys->ds_uncompressed_bytes * 100 / + ds->ds_phys->ds_compressed_bytes); + + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); + + if (dsl_dataset_is_snapshot(ds)) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, + ds->ds_phys->ds_unique_bytes); + get_clones_stat(ds, nv); + } else { + dsl_dir_stats(ds->ds_dir, nv); + } dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); @@ -2305,22 +2312,6 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) } } - ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : - (ds->ds_phys->ds_uncompressed_bytes * 100 / - ds->ds_phys->ds_compressed_bytes); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); - - if (ds->ds_phys->ds_next_snap_obj) { - /* - * This is a snapshot; override the dd's space used with - * our unique space and compression ratio. - */ - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, - ds->ds_phys->ds_unique_bytes); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); - - get_clones_stat(ds, nv); - } } void @@ -2329,27 +2320,25 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; stat->dds_guid = ds->ds_phys->ds_guid; - if (ds->ds_phys->ds_next_snap_obj) { + stat->dds_origin[0] = '\0'; + if (dsl_dataset_is_snapshot(ds)) { stat->dds_is_snapshot = B_TRUE; stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; } else { stat->dds_is_snapshot = B_FALSE; stat->dds_num_clones = 0; - } - /* clone origin is really a dsl_dir thing... */ - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - if (dsl_dir_is_clone(ds->ds_dir)) { - dsl_dataset_t *ods; + rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); + if (dsl_dir_is_clone(ds->ds_dir)) { + dsl_dataset_t *ods; - VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); - dsl_dataset_name(ods, stat->dds_origin); - dsl_dataset_drop_ref(ods, FTAG); - } else { - stat->dds_origin[0] = '\0'; + VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); + dsl_dataset_name(ods, stat->dds_origin); + dsl_dataset_drop_ref(ods, FTAG); + } + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } uint64_t @@ -2466,8 +2455,8 @@ dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) ds->ds_snapname, 8, 1, &ds->ds_object, tx); ASSERT0(err); - spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, - "dataset = %llu", ds->ds_object); + spa_history_log_internal_ds(ds, "rename", tx, + "-> @%s", newsnapname); dsl_dataset_rele(hds, FTAG); } @@ -2946,8 +2935,7 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) origin_ds->ds_phys->ds_unique_bytes = pa->unique; /* log history record */ - spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, - "dataset = %llu", hds->ds_object); + spa_history_log_internal_ds(hds, "promote", tx, ""); dsl_dir_close(odd, FTAG); } @@ -3306,6 +3294,9 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) csa->ohds->ds_phys->ds_deadlist_obj); dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); + + spa_history_log_internal_ds(csa->cds, "clone swap", tx, + "parent=%s", csa->ohds->ds_dir->dd_myname); } /* @@ -3463,6 +3454,9 @@ dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) if (ds->ds_quota != effective_value) { dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_quota = effective_value; + + spa_history_log_internal_ds(ds, "set refquota", tx, + "refquota=%lld", (longlong_t)ds->ds_quota); } } @@ -3566,6 +3560,9 @@ dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); mutex_exit(&ds->ds_dir->dd_lock); + + spa_history_log_internal_ds(ds, "set refreservation", tx, + "refreservation=%lld", (longlong_t)effective_value); } int @@ -3631,7 +3628,7 @@ dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct dsl_ds_holdarg *ha = arg2; - char *htag = ha->htag; + const char *htag = ha->htag; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; int error = 0; @@ -3665,7 +3662,7 @@ dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct dsl_ds_holdarg *ha = arg2; - char *htag = ha->htag; + const char *htag = ha->htag; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t now = gethrestime_sec(); @@ -3693,9 +3690,9 @@ dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) htag, &now, tx)); } - spa_history_log_internal(LOG_DS_USER_HOLD, - dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag, - (int)ha->temphold, ds->ds_object); + spa_history_log_internal_ds(ds, "hold", tx, + "tag = %s temp = %d holds now = %llu", + htag, (int)ha->temphold, ds->ds_userrefs); } static int @@ -3902,7 +3899,6 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj; - uint64_t dsobj = ds->ds_object; uint64_t refs; int error; @@ -3914,11 +3910,6 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) VERIFY(error == 0 || error == ENOENT); zapobj = ds->ds_phys->ds_userrefs_obj; VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); - - spa_history_log_internal(LOG_DS_USER_RELEASE, - dp->dp_spa, tx, "<%s> %lld dataset = %llu", - ra->htag, (longlong_t)refs, dsobj); - if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && DS_IS_DEFER_DESTROY(ds)) { struct dsl_ds_destroyarg dsda = {0}; diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c index 294932c45..48c261e63 100644 --- a/module/zfs/dsl_deleg.c +++ b/module/zfs/dsl_deleg.c @@ -181,10 +181,8 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, jumpobj, perm, 8, 1, &n, tx) == 0); - spa_history_log_internal(LOG_DS_PERM_UPDATE, - dd->dd_pool->dp_spa, tx, - "%s %s dataset = %llu", whokey, perm, - dd->dd_phys->dd_head_dataset_obj); + spa_history_log_internal_dd(dd, "permission update", tx, + "%s %s", whokey, perm); } } } @@ -213,10 +211,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) (void) zap_remove(mos, zapobj, whokey, tx); VERIFY(0 == zap_destroy(mos, jumpobj, tx)); } - spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE, - dd->dd_pool->dp_spa, tx, - "%s dataset = %llu", whokey, - dd->dd_phys->dd_head_dataset_obj); + spa_history_log_internal_dd(dd, "permission who remove", + tx, "%s", whokey); continue; } @@ -234,10 +230,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(0 == zap_destroy(mos, jumpobj, tx)); } - spa_history_log_internal(LOG_DS_PERM_REMOVE, - dd->dd_pool->dp_spa, tx, - "%s %s dataset = %llu", whokey, perm, - dd->dd_phys->dd_head_dataset_obj); + spa_history_log_internal_dd(dd, "permission remove", tx, + "%s %s", whokey, perm); } } } @@ -533,12 +527,10 @@ dsl_load_user_sets(objset_t *mos, uint64_t zapobj, avl_tree_t *avl, } /* - * Check if user has requested permission. If descendent is set, must have - * descendent perms. + * Check if user has requested permission. */ int -dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm, - cred_t *cr) +dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr) { dsl_dir_t *dd; dsl_pool_t *dp; @@ -559,7 +551,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm, SPA_VERSION_DELEGATED_PERMS) return (EPERM); - if (dsl_dataset_is_snapshot(ds) || descendent) { + if (dsl_dataset_is_snapshot(ds)) { /* * Snapshots are treated as descendents only, * local permissions do not apply. @@ -652,7 +644,7 @@ dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr) if (error) return (error); - error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); + error = dsl_deleg_access_impl(ds, perm, cr); dsl_dataset_rele(ds, FTAG); return (error); diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 69f68c209..45c73c363 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/dmu.h> @@ -39,8 +40,8 @@ #include "zfs_namecheck.h" static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); -static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx); - +static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, + uint64_t value, dmu_tx_t *tx); /* ARGSUSED */ static void @@ -449,8 +450,7 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, int dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; + dsl_dir_t *dd = arg1; dsl_pool_t *dp = dd->dd_pool; objset_t *mos = dp->dp_meta_objset; int err; @@ -481,24 +481,19 @@ dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) void dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; + dsl_dir_t *dd = arg1; objset_t *mos = dd->dd_pool->dp_meta_objset; - dsl_prop_setarg_t psa; - uint64_t value = 0; uint64_t obj; dd_used_t t; ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)); ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); - /* Remove our reservation. */ - dsl_prop_setarg_init_uint64(&psa, "reservation", - (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), - &value); - psa.psa_effective_value = 0; /* predict default value */ - - dsl_dir_set_reservation_sync(ds, &psa, tx); + /* + * Remove our reservation. The impl() routine avoids setting the + * actual property, which would require the (already destroyed) ds. + */ + dsl_dir_set_reservation_sync_impl(dd, 0, tx); ASSERT0(dd->dd_phys->dd_used_bytes); ASSERT0(dd->dd_phys->dd_reserved); @@ -1062,6 +1057,9 @@ dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) mutex_enter(&dd->dd_lock); dd->dd_phys->dd_quota = effective_value; mutex_exit(&dd->dd_lock); + + spa_history_log_internal_dd(dd, "set quota", tx, + "quota=%lld", (longlong_t)effective_value); } int @@ -1148,25 +1146,17 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) } static void -dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value = psa->psa_effective_value; uint64_t used; int64_t delta; - dsl_prop_set_sync(ds, psa, tx); - DSL_PROP_CHECK_PREDICTION(dd, psa); - dmu_buf_will_dirty(dd->dd_dbuf, tx); mutex_enter(&dd->dd_lock); used = dd->dd_phys->dd_used_bytes; - delta = MAX(used, effective_value) - - MAX(used, dd->dd_phys->dd_reserved); - dd->dd_phys->dd_reserved = effective_value; + delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved); + dd->dd_phys->dd_reserved = value; if (dd->dd_parent != NULL) { /* Roll up this additional usage into our ancestors */ @@ -1176,6 +1166,23 @@ dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) mutex_exit(&dd->dd_lock); } +static void +dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_setarg_t *psa = arg2; + uint64_t value = psa->psa_effective_value; + + dsl_prop_set_sync(ds, psa, tx); + DSL_PROP_CHECK_PREDICTION(dd, psa); + + dsl_dir_set_reservation_sync_impl(dd, value, tx); + + spa_history_log_internal_dd(dd, "set reservation", tx, + "reservation=%lld", (longlong_t)value); + } + int dsl_dir_set_reservation(const char *ddname, zprop_source_t source, uint64_t reservation) @@ -1294,9 +1301,15 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_pool_t *dp = dd->dd_pool; objset_t *mos = dp->dp_meta_objset; int err; + char namebuf[MAXNAMELEN]; ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2); + /* Log this before we change the name. */ + dsl_dir_name(ra->newparent, namebuf); + spa_history_log_internal_dd(dd, "rename", tx, + "-> %s/%s", namebuf, ra->mynewname); + if (ra->newparent != dd->dd_parent) { dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, -dd->dd_phys->dd_used_bytes, @@ -1336,8 +1349,6 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) dd->dd_myname, 8, 1, &dd->dd_object, tx); ASSERT0(err); - spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, - tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj); } int diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index e44a94853..153420ccf 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -704,11 +705,9 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) } } - spa_history_log_internal((source == ZPROP_SRC_NONE || - source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT : - LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx, - "%s=%s dataset = %llu", propname, - (valstr == NULL ? "" : valstr), ds->ds_object); + spa_history_log_internal_ds(ds, (source == ZPROP_SRC_NONE || + source == ZPROP_SRC_INHERITED) ? "inherit" : "set", tx, + "%s=%s", propname, (valstr == NULL ? "" : valstr)); if (tbuf != NULL) kmem_free(tbuf, ZAP_MAXVALUELEN); @@ -757,24 +756,6 @@ dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) } } -void -dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val, - dmu_tx_t *tx) -{ - objset_t *mos = dd->dd_pool->dp_meta_objset; - uint64_t zapobj = dd->dd_phys->dd_props_zapobj; - - ASSERT(dmu_tx_is_syncing(tx)); - - VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx)); - - dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE); - - spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx, - "%s=%llu dataset = %llu", name, (u_longlong_t)val, - dd->dd_phys->dd_head_dataset_obj); -} - int dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source, int intsz, int numints, const void *buf) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 34a4f0382..90ca7b256 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -224,7 +224,7 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_scan_sync_state(scn, tx); - spa_history_log_internal(LOG_POOL_SCAN, spa, tx, + spa_history_log_internal(spa, "scan setup", tx, "func=%u mintxg=%llu maxtxg=%llu", *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg); } @@ -273,7 +273,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) else scn->scn_phys.scn_state = DSS_CANCELED; - spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx, + spa_history_log_internal(spa, "scan done", tx, "complete=%u", complete); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c index 75eb507b9..2ed47fe0c 100644 --- a/module/zfs/dsl_synctask.c +++ b/module/zfs/dsl_synctask.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/dmu.h> @@ -85,17 +86,17 @@ top: /* Do a preliminary error check. */ dstg->dstg_err = 0; +#ifdef ZFS_DEBUG + /* + * Only check half the time, otherwise, the sync-context + * check will almost never fail. + */ + if (spa_get_random(2) == 0) + goto skip; +#endif rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER); for (dst = list_head(&dstg->dstg_tasks); dst; dst = list_next(&dstg->dstg_tasks, dst)) { -#ifdef ZFS_DEBUG - /* - * Only check half the time, otherwise, the sync-context - * check will almost never fail. - */ - if (spa_get_random(2) == 0) - continue; -#endif dst->dst_err = dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); if (dst->dst_err) @@ -107,6 +108,9 @@ top: dmu_tx_commit(tx); return (dstg->dstg_err); } +#ifdef ZFS_DEBUG +skip: +#endif /* * We don't generally have many sync tasks, so pay the price of @@ -230,9 +234,6 @@ dsl_sync_task_do_nowait(dsl_pool_t *dp, { dsl_sync_task_group_t *dstg; - if (!spa_writeable(dp->dp_spa)) - return; - dstg = dsl_sync_task_group_create(dp); dsl_sync_task_create(dstg, checkfunc, syncfunc, arg1, arg2, blocks_modified); diff --git a/module/zfs/rrwlock.c b/module/zfs/rrwlock.c index 4cef53f95..7f9290bd4 100644 --- a/module/zfs/rrwlock.c +++ b/module/zfs/rrwlock.c @@ -22,6 +22,9 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ #include <sys/refcount.h> #include <sys/rrwlock.h> @@ -262,3 +265,13 @@ rrw_held(rrwlock_t *rrl, krw_t rw) return (held); } + +void +rrw_tsd_destroy(void *arg) +{ + rrw_node_t *rn = arg; + if (rn != NULL) { + panic("thread %p terminating with rrw lock %p held", + (void *)curthread, (void *)rn->rn_rrl); + } +} diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 65f78b784..7c37ca426 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -731,8 +731,8 @@ spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx) vdev_config_dirty(rvd); spa_config_exit(spa, SCL_STATE, FTAG); - spa_history_log_internal(LOG_POOL_GUID_CHANGE, spa, tx, - "old=%lld new=%lld", oldguid, *newguid); + spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu", + oldguid, *newguid); } /* @@ -2716,6 +2716,12 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, spa_async_request(spa, SPA_ASYNC_RESILVER); /* + * Log the fact that we booted up (so that we can detect if + * we rebooted in the middle of an operation). + */ + spa_history_log_version(spa, "open"); + + /* * Delete any inconsistent datasets. */ (void) dmu_objset_find(spa_name(spa), @@ -3399,7 +3405,7 @@ spa_l2cache_drop(spa_t *spa) */ int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, - const char *history_str, nvlist_t *zplprops) + nvlist_t *zplprops) { spa_t *spa; char *altroot = NULL; @@ -3620,9 +3626,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa_config_sync(spa, B_FALSE, B_TRUE); - if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) - (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); - spa_history_log_version(spa, LOG_POOL_CREATE); + spa_history_log_version(spa, "create"); spa->spa_minref = refcount_count(&spa->spa_refcount); @@ -3824,7 +3828,6 @@ spa_import_rootpool(char *devpath, char *devid) } error = 0; - spa_history_log_version(spa, LOG_POOL_IMPORT); out: spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_free(rvd); @@ -3886,7 +3889,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_config_sync(spa, B_FALSE, B_TRUE); mutex_exit(&spa_namespace_lock); - spa_history_log_version(spa, LOG_POOL_IMPORT); + spa_history_log_version(spa, "import"); return (0); } @@ -4017,7 +4020,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); mutex_exit(&spa_namespace_lock); - spa_history_log_version(spa, LOG_POOL_IMPORT); + spa_history_log_version(spa, "import"); #ifdef _KERNEL zvol_create_minors(pool); @@ -4560,7 +4563,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) */ (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); - spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL, + spa_history_log_internal(spa, "vdev attach", NULL, "%s vdev=%s %s vdev=%s", replacing && newvd_isspare ? "spare in" : replacing ? "replace" : "attach", newvdpath, @@ -4778,7 +4781,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) error = spa_vdev_exit(spa, vd, txg, 0); - spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL, + spa_history_log_internal(spa, "detach", NULL, "vdev=%s", vdpath); spa_strfree(vdpath); @@ -5047,9 +5050,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, if (vml[c] != NULL) { vdev_split(vml[c]); if (error == 0) - spa_history_log_internal(LOG_POOL_VDEV_DETACH, - spa, tx, "vdev=%s", - vml[c]->vdev_path); + spa_history_log_internal(spa, "detach", tx, + "vdev=%s", vml[c]->vdev_path); vdev_free(vml[c]); } } @@ -5064,8 +5066,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, zio_handle_panic_injection(spa, FTAG, 3); /* split is complete; log a history record */ - spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL, - "split new pool %s from pool %s", newname, spa_name(spa)); + spa_history_log_internal(newspa, "split", NULL, + "from pool %s", spa_name(spa)); kmem_free(vml, children * sizeof (vdev_t *)); @@ -5647,8 +5649,7 @@ spa_async_thread(spa_t *spa) * then log an internal history event. */ if (new_space != old_space) { - spa_history_log_internal(LOG_POOL_VDEV_ONLINE, - spa, NULL, + spa_history_log_internal(spa, "vdev online", NULL, "pool '%s' size: %llu(+%llu)", spa_name(spa), new_space, new_space - old_space); } @@ -5892,6 +5893,7 @@ spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx) spa->spa_uberblock.ub_version = version; vdev_config_dirty(spa->spa_root_vdev); + spa_history_log_internal(spa, "set", tx, "version=%lld", version); } /* @@ -5927,6 +5929,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature)); spa_feature_enable(spa, feature, tx); + spa_history_log_internal(spa, "set", tx, + "%s=enabled", nvpair_name(elem)); break; case ZPOOL_PROP_VERSION: @@ -5966,6 +5970,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) */ if (tx->tx_txg != TXG_INITIAL) vdev_config_dirty(spa->spa_root_vdev); + spa_history_log_internal(spa, "set", tx, + "%s=%s", nvpair_name(elem), strval); break; default: /* @@ -5988,7 +5994,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, spa->spa_pool_props_object, propname, 1, strlen(strval) + 1, strval, tx) == 0); - + spa_history_log_internal(spa, "set", tx, + "%s=%s", nvpair_name(elem), strval); } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { VERIFY(nvpair_value_uint64(elem, &intval) == 0); @@ -6000,6 +6007,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, spa->spa_pool_props_object, propname, 8, 1, &intval, tx) == 0); + spa_history_log_internal(spa, "set", tx, + "%s=%lld", nvpair_name(elem), intval); } else { ASSERT(0); /* not allowed */ } @@ -6028,13 +6037,6 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) } } - /* log internal history if this is not a zpool create */ - if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && - tx->tx_txg != TXG_INITIAL) { - spa_history_log_internal(LOG_POOL_PROPSET, - spa, tx, "%s %lld %s", - nvpair_name(elem), intval, spa_name(spa)); - } } mutex_exit(&spa->spa_props_lock); diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index 9fb75f391..79d48620c 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/spa.h> @@ -30,9 +30,12 @@ #include <sys/dsl_synctask.h> #include <sys/dmu_tx.h> #include <sys/dmu_objset.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> #include <sys/utsname.h> #include <sys/cmn_err.h> #include <sys/sunddi.h> +#include <sys/cred.h> #include "zfs_comutil.h" #ifdef _KERNEL #include <sys/zone.h> @@ -185,7 +188,7 @@ spa_history_zone(void) return (curproc->p_zone->zone_name); #endif #else - return ("global"); + return (NULL); #endif } @@ -197,14 +200,12 @@ static void spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) { spa_t *spa = arg1; - history_arg_t *hap = arg2; - const char *history_str = hap->ha_history_str; + nvlist_t *nvl = arg2; objset_t *mos = spa->spa_meta_objset; dmu_buf_t *dbp; spa_history_phys_t *shpp; size_t reclen; uint64_t le_len; - nvlist_t *nvrecord; char *record_packed = NULL; int ret; @@ -234,46 +235,36 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) } #endif - VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME, - gethrestime_sec()) == 0); - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0); - if (hap->ha_zone != NULL) - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE, - hap->ha_zone) == 0); + fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec()); #ifdef _KERNEL - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST, - utsname.nodename) == 0); + fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename); #endif - if (hap->ha_log_type == LOG_CMD_POOL_CREATE || - hap->ha_log_type == LOG_CMD_NORMAL) { - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD, - history_str) == 0); - - zfs_dbgmsg("command: %s", history_str); - } else { - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT, - hap->ha_event) == 0); - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG, - tx->tx_txg) == 0); - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR, - history_str) == 0); - - zfs_dbgmsg("internal %s pool:%s txg:%llu %s", - zfs_history_event_names[hap->ha_event], spa_name(spa), - (longlong_t)tx->tx_txg, history_str); - + if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) { + zfs_dbgmsg("command: %s", + fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD)); + } else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) { + if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) { + zfs_dbgmsg("txg %lld %s %s (id %llu) %s", + fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), + fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME), + fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); + } else { + zfs_dbgmsg("txg %lld %s %s", + fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); + } + } else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) { + zfs_dbgmsg("ioctl %s", + fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL)); } - VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0); - record_packed = kmem_alloc(reclen, KM_PUSHPAGE); - - VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen, - NV_ENCODE_XDR, KM_PUSHPAGE) == 0); + VERIFY3U(nvlist_pack(nvl, &record_packed, &reclen, NV_ENCODE_NATIVE, + KM_PUSHPAGE), ==, 0); mutex_enter(&spa->spa_history_lock); - if (hap->ha_log_type == LOG_CMD_POOL_CREATE) - VERIFY(shpp->sh_eof == shpp->sh_pool_create_len); /* write out the packed length as little endian */ le_len = LE_64((uint64_t)reclen); @@ -281,33 +272,44 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) if (!ret) ret = spa_history_write(spa, record_packed, reclen, shpp, tx); - if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) { - shpp->sh_pool_create_len += sizeof (le_len) + reclen; - shpp->sh_bof = shpp->sh_pool_create_len; + /* The first command is the create, which we keep forever */ + if (ret == 0 && shpp->sh_pool_create_len == 0 && + nvlist_exists(nvl, ZPOOL_HIST_CMD)) { + shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof; } mutex_exit(&spa->spa_history_lock); - nvlist_free(nvrecord); - kmem_free(record_packed, reclen); + fnvlist_pack_free(record_packed, reclen); dmu_buf_rele(dbp, FTAG); - - strfree(hap->ha_history_str); - if (hap->ha_zone != NULL) - strfree(hap->ha_zone); - kmem_free(hap, sizeof (history_arg_t)); + fnvlist_free(nvl); } /* * Write out a history event. */ int -spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) +spa_history_log(spa_t *spa, const char *msg) +{ + int err; + nvlist_t *nvl; + + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + + fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg); + err = spa_history_log_nvl(spa, nvl); + fnvlist_free(nvl); + return (err); +} + +int +spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) { - history_arg_t *ha; int err = 0; dmu_tx_t *tx; + nvlist_t *nvarg; - ASSERT(what != LOG_INTERNAL); + if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) + return (EINVAL); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); @@ -316,19 +318,21 @@ spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) return (err); } - ha = kmem_alloc(sizeof (history_arg_t), KM_PUSHPAGE); - ha->ha_history_str = strdup(history_str); - ha->ha_zone = strdup(spa_history_zone()); - ha->ha_log_type = what; - ha->ha_uid = crgetuid(CRED()); + VERIFY0(nvlist_dup(nvl, &nvarg, KM_PUSHPAGE)); + if (spa_history_zone() != NULL) { + fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, + spa_history_zone()); + } + fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); /* Kick this off asynchronously; errors are ignored. */ dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, - spa_history_log_sync, spa, ha, 0, tx); + spa_history_log_sync, spa, nvarg, 0, tx); dmu_tx_commit(tx); - /* spa_history_log_sync will free ha and strings */ + /* spa_history_log_sync will free nvl */ return (err); + } /* @@ -345,7 +349,7 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) int err; /* - * If the command history doesn't exist (older pool), + * If the command history doesn't exist (older pool), * that's ok, just return ENOENT. */ if (!spa->spa_history) @@ -428,44 +432,54 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) return (err); } +/* + * The nvlist will be consumed by this call. + */ static void -log_internal(history_internal_events_t event, spa_t *spa, +log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, dmu_tx_t *tx, const char *fmt, va_list adx) { - history_arg_t *ha; - va_list adx_copy; + char *msg; + va_list adx1; /* * If this is part of creating a pool, not everything is * initialized yet, so don't bother logging the internal events. + * Likewise if the pool is not writeable. */ - if (tx->tx_txg == TXG_INITIAL) + if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) { + fnvlist_free(nvl); return; + } - ha = kmem_alloc(sizeof (history_arg_t), KM_PUSHPAGE); - va_copy(adx_copy, adx); - ha->ha_history_str = kmem_vasprintf(fmt, adx_copy); - va_end(adx_copy); - ha->ha_log_type = LOG_INTERNAL; - ha->ha_event = event; - ha->ha_zone = NULL; - ha->ha_uid = 0; + va_copy(adx1, adx); + msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx1) + 1, KM_PUSHPAGE); + va_end(adx1); + va_copy(adx1, adx); + (void) vsprintf(msg, fmt, adx1); + va_end(adx1); + fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg); + strfree(msg); + + fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation); + fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg); if (dmu_tx_is_syncing(tx)) { - spa_history_log_sync(spa, ha, tx); + spa_history_log_sync(spa, nvl, tx); } else { dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, - spa_history_log_sync, spa, ha, 0, tx); + spa_history_log_sync, spa, nvl, 0, tx); } - /* spa_history_log_sync() will free ha and strings */ + /* spa_history_log_sync() will free nvl */ } void -spa_history_log_internal(history_internal_events_t event, spa_t *spa, +spa_history_log_internal(spa_t *spa, const char *operation, dmu_tx_t *tx, const char *fmt, ...) { dmu_tx_t *htx = tx; va_list adx; + nvlist_t *nvl; /* create a tx if we didn't get one */ if (tx == NULL) { @@ -477,7 +491,8 @@ spa_history_log_internal(history_internal_events_t event, spa_t *spa, } va_start(adx, fmt); - log_internal(event, spa, htx, fmt, adx); + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + log_internal(nvl, operation, spa, htx, fmt, adx); va_end(adx); /* if we didn't get a tx from the caller, commit the one we made */ @@ -486,21 +501,58 @@ spa_history_log_internal(history_internal_events_t event, spa_t *spa, } void -spa_history_log_version(spa_t *spa, history_internal_events_t event) +spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation, + dmu_tx_t *tx, const char *fmt, ...) +{ + va_list adx; + char namebuf[MAXNAMELEN]; + nvlist_t *nvl; + + ASSERT(tx != NULL); + + dsl_dataset_name(ds, namebuf); + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); + fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object); + + va_start(adx, fmt); + log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx); + va_end(adx); +} + +void +spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, + dmu_tx_t *tx, const char *fmt, ...) +{ + va_list adx; + char namebuf[MAXNAMELEN]; + nvlist_t *nvl; + + ASSERT(tx != NULL); + + dsl_dir_name(dd, namebuf); + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); + fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, + dd->dd_phys->dd_head_dataset_obj); + + va_start(adx, fmt); + log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx); + va_end(adx); +} + +void +spa_history_log_version(spa_t *spa, const char *operation) { #ifdef _KERNEL uint64_t current_vers = spa_version(spa); - if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) { - spa_history_log_internal(event, spa, NULL, - "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s", - (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, - utsname.nodename, utsname.release, utsname.version, - utsname.machine); - } - cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", - event == LOG_POOL_IMPORT ? "imported" : - event == LOG_POOL_CREATE ? "created" : "accessed", + spa_history_log_internal(spa, operation, NULL, + "pool version %llu; software version %llu/%d; uts %s %s %s %s", + (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, + utsname.nodename, utsname.release, utsname.version, + utsname.machine); + cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation, (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); #endif } diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index b35f27d19..a03e1c694 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -632,8 +632,7 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, goto out; if (error == 0) { - error = dmu_objset_snapshot(dsname, dirname, - NULL, NULL, B_FALSE, B_FALSE, -1); + error = dmu_objset_snapshot_one(dsname, dirname); if (error) goto out; diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index a9184a1b6..e64d6a1f0 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -28,6 +28,108 @@ * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * ZFS ioctls. + * + * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage + * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool. + * + * There are two ways that we handle ioctls: the legacy way where almost + * all of the logic is in the ioctl callback, and the new way where most + * of the marshalling is handled in the common entry point, zfsdev_ioctl(). + * + * Non-legacy ioctls should be registered by calling + * zfs_ioctl_register() from zfs_ioctl_init(). The ioctl is invoked + * from userland by lzc_ioctl(). + * + * The registration arguments are as follows: + * + * const char *name + * The name of the ioctl. This is used for history logging. If the + * ioctl returns successfully (the callback returns 0), and allow_log + * is true, then a history log entry will be recorded with the input & + * output nvlists. The log entry can be printed with "zpool history -i". + * + * zfs_ioc_t ioc + * The ioctl request number, which userland will pass to ioctl(2). + * The ioctl numbers can change from release to release, because + * the caller (libzfs) must be matched to the kernel. + * + * zfs_secpolicy_func_t *secpolicy + * This function will be called before the zfs_ioc_func_t, to + * determine if this operation is permitted. It should return EPERM + * on failure, and 0 on success. Checks include determining if the + * dataset is visible in this zone, and if the user has either all + * zfs privileges in the zone (SYS_MOUNT), or has been granted permission + * to do this operation on this dataset with "zfs allow". + * + * zfs_ioc_namecheck_t namecheck + * This specifies what to expect in the zfs_cmd_t:zc_name -- a pool + * name, a dataset name, or nothing. If the name is not well-formed, + * the ioctl will fail and the callback will not be called. + * Therefore, the callback can assume that the name is well-formed + * (e.g. is null-terminated, doesn't have more than one '@' character, + * doesn't have invalid characters). + * + * zfs_ioc_poolcheck_t pool_check + * This specifies requirements on the pool state. If the pool does + * not meet them (is suspended or is readonly), the ioctl will fail + * and the callback will not be called. If any checks are specified + * (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME. + * Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED | + * POOL_CHECK_READONLY). + * + * boolean_t smush_outnvlist + * If smush_outnvlist is true, then the output is presumed to be a + * list of errors, and it will be "smushed" down to fit into the + * caller's buffer, by removing some entries and replacing them with a + * single "N_MORE_ERRORS" entry indicating how many were removed. See + * nvlist_smush() for details. If smush_outnvlist is false, and the + * outnvlist does not fit into the userland-provided buffer, then the + * ioctl will fail with ENOMEM. + * + * zfs_ioc_func_t *func + * The callback function that will perform the operation. + * + * The callback should return 0 on success, or an error number on + * failure. If the function fails, the userland ioctl will return -1, + * and errno will be set to the callback's return value. The callback + * will be called with the following arguments: + * + * const char *name + * The name of the pool or dataset to operate on, from + * zfs_cmd_t:zc_name. The 'namecheck' argument specifies the + * expected type (pool, dataset, or none). + * + * nvlist_t *innvl + * The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src. Or + * NULL if no input nvlist was provided. Changes to this nvlist are + * ignored. If the input nvlist could not be deserialized, the + * ioctl will fail and the callback will not be called. + * + * nvlist_t *outnvl + * The output nvlist, initially empty. The callback can fill it in, + * and it will be returned to userland by serializing it into + * zfs_cmd_t:zc_nvlist_dst. If it is non-empty, and serialization + * fails (e.g. because the caller didn't supply a large enough + * buffer), then the overall ioctl will fail. See the + * 'smush_nvlist' argument above for additional behaviors. + * + * There are two typical uses of the output nvlist: + * - To return state, e.g. property values. In this case, + * smush_outnvlist should be false. If the buffer was not large + * enough, the caller will reallocate a larger buffer and try + * the ioctl again. + * + * - To return multiple errors from an ioctl which makes on-disk + * changes. In this case, smush_outnvlist should be true. + * Ioctls which make on-disk modifications should generally not + * use the outnvl if they succeed, because the caller can not + * distinguish between the operation failing, and + * deserialization failing. */ #include <sys/types.h> @@ -91,8 +193,13 @@ list_t zfsdev_state_list; extern void zfs_init(void); extern void zfs_fini(void); -typedef int zfs_ioc_func_t(zfs_cmd_t *); -typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *); +uint_t zfs_fsyncer_key; +extern uint_t rrw_tsd_key; +static uint_t zfs_allow_log_key; + +typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *); +typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *); +typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *); typedef enum { NO_NAME, @@ -103,15 +210,18 @@ typedef enum { typedef enum { POOL_CHECK_NONE = 1 << 0, POOL_CHECK_SUSPENDED = 1 << 1, - POOL_CHECK_READONLY = 1 << 2 + POOL_CHECK_READONLY = 1 << 2, } zfs_ioc_poolcheck_t; typedef struct zfs_ioc_vec { + zfs_ioc_legacy_func_t *zvec_legacy_func; zfs_ioc_func_t *zvec_func; zfs_secpolicy_func_t *zvec_secpolicy; zfs_ioc_namecheck_t zvec_namecheck; - boolean_t zvec_his_log; + boolean_t zvec_allow_log; zfs_ioc_poolcheck_t zvec_pool_check; + boolean_t zvec_smush_outnvlist; + const char *zvec_name; } zfs_ioc_vec_t; /* This array is indexed by zfs_userquota_prop_t */ @@ -129,7 +239,8 @@ static int zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errors); static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *, boolean_t *); -int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **); +int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *); +static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp); static int zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature); static int zfs_prop_activate_feature_check(void *arg1, void *arg2, @@ -237,7 +348,7 @@ zfs_log_history(zfs_cmd_t *zc) if (spa_open(zc->zc_name, &spa, FTAG) == 0) { if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY) - (void) spa_history_log(spa, buf, LOG_CMD_NORMAL); + (void) spa_history_log(spa, buf); spa_close(spa, FTAG); } history_str_free(buf); @@ -249,7 +360,7 @@ zfs_log_history(zfs_cmd_t *zc) */ /* ARGSUSED */ static int -zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (0); } @@ -260,7 +371,7 @@ zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { if (INGLOBALZONE(curproc) || zone_dataset_visible(zc->zc_name, NULL)) @@ -329,26 +440,13 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr) return (zfs_dozonecheck_impl(dataset, zoned, cr)); } -/* - * If name ends in a '@', then require recursive permissions. - */ -int +static int zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) { int error; - boolean_t descendent = B_FALSE; dsl_dataset_t *ds; - char *at; - - at = strchr(name, '@'); - if (at != NULL && at[1] == '\0') { - *at = '\0'; - descendent = B_TRUE; - } error = dsl_dataset_hold(name, FTAG, &ds); - if (at != NULL) - *at = '@'; if (error != 0) return (error); @@ -356,14 +454,14 @@ zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) if (error == 0) { error = secpolicy_zfs(cr); if (error) - error = dsl_deleg_access_impl(ds, descendent, perm, cr); + error = dsl_deleg_access_impl(ds, perm, cr); } dsl_dataset_rele(ds, FTAG); return (error); } -int +static int zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, const char *perm, cred_t *cr) { @@ -373,7 +471,7 @@ zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, if (error == 0) { error = secpolicy_zfs(cr); if (error) - error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); + error = dsl_deleg_access_impl(ds, perm, cr); } return (error); } @@ -533,8 +631,9 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval, return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr)); } -int -zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { int error; @@ -549,15 +648,17 @@ zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) return (0); } -int -zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_ROLLBACK, cr)); } -int -zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { spa_t *spa; dsl_pool_t *dp; @@ -593,9 +694,18 @@ zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) return (error); } +/* ARGSUSED */ +static int +zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_SEND, cr)); +} + #ifdef HAVE_SMB_SHARE +/* ARGSUSED */ static int -zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { vnode_t *vp; int error; @@ -620,7 +730,7 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) #endif /* HAVE_SMB_SHARE */ int -zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { #ifdef HAVE_SMB_SHARE if (!INGLOBALZONE(curproc)) @@ -629,7 +739,7 @@ zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) if (secpolicy_nfs(cr) == 0) { return (0); } else { - return (zfs_secpolicy_deleg_share(zc, cr)); + return (zfs_secpolicy_deleg_share(zc, innvl, cr)); } #else return (ENOTSUP); @@ -637,7 +747,7 @@ zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) } int -zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { #ifdef HAVE_SMB_SHARE if (!INGLOBALZONE(curproc)) @@ -646,7 +756,7 @@ zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) if (secpolicy_smb(cr) == 0) { return (0); } else { - return (zfs_secpolicy_deleg_share(zc, cr)); + return (zfs_secpolicy_deleg_share(zc, innvl, cr)); } #else return (ENOTSUP); @@ -687,29 +797,55 @@ zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_destroy_perms(zc->zc_name, cr)); } /* * Destroying snapshots with delegated permissions requires - * descendent mount and destroy permissions. + * descendant mount and destroy permissions. */ +/* ARGSUSED */ static int -zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - int error; - char *dsname; + nvlist_t *snaps; + nvpair_t *pair, *nextpair; + int error = 0; - dsname = kmem_asprintf("%s@", zc->zc_name); + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (EINVAL); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nextpair) { + dsl_dataset_t *ds; - error = zfs_secpolicy_destroy_perms(dsname, cr); - if (error == ENOENT) - error = zfs_secpolicy_destroy_perms(zc->zc_name, cr); + nextpair = nvlist_next_nvpair(snaps, pair); + error = dsl_dataset_hold(nvpair_name(pair), FTAG, &ds); + if (error == 0) { + dsl_dataset_rele(ds, FTAG); + } else if (error == ENOENT) { + /* + * Ignore any snapshots that don't exist (we consider + * them "already destroyed"). Remove the name from the + * nvl here in case the snapshot is created between + * now and when we try to destroy it (in which case + * we don't want to destroy it since we haven't + * checked for permission). + */ + fnvlist_remove_nvpair(snaps, pair); + error = 0; + continue; + } else { + break; + } + error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr); + if (error != 0) + break; + } - strfree(dsname); return (error); } @@ -742,14 +878,16 @@ zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) return (error); } +/* ARGSUSED */ static int -zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { char parentname[MAXNAMELEN]; objset_t *clone; @@ -789,8 +927,9 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) return (error); } +/* ARGSUSED */ static int -zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { int error; @@ -813,37 +952,72 @@ zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) ZFS_DELEG_PERM_SNAPSHOT, cr)); } +/* + * Check for permission to create each snapshot in the nvlist. + */ +/* ARGSUSED */ static int -zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { + nvlist_t *snaps; + int error = 0; + nvpair_t *pair; + + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (EINVAL); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + char *name = nvpair_name(pair); + char *atp = strchr(name, '@'); - return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr)); + if (atp == NULL) { + error = EINVAL; + break; + } + *atp = '\0'; + error = zfs_secpolicy_snapshot_perms(name, cr); + *atp = '@'; + if (error != 0) + break; + } + return (error); +} + +/* ARGSUSED */ +static int +zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + /* + * Even root must have a proper TSD so that we know what pool + * to log to. + */ + if (tsd_get(zfs_allow_log_key) == NULL) + return (EPERM); + return (0); } static int -zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { char parentname[MAXNAMELEN]; int error; + char *origin; if ((error = zfs_get_parent(zc->zc_name, parentname, sizeof (parentname))) != 0) return (error); - if (zc->zc_value[0] != '\0') { - if ((error = zfs_secpolicy_write_perms(zc->zc_value, - ZFS_DELEG_PERM_CLONE, cr)) != 0) - return (error); - } + if (nvlist_lookup_string(innvl, "origin", &origin) == 0 && + (error = zfs_secpolicy_write_perms(origin, + ZFS_DELEG_PERM_CLONE, cr)) != 0) + return (error); if ((error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_CREATE, cr)) != 0) return (error); - error = zfs_secpolicy_write_perms(parentname, - ZFS_DELEG_PERM_MOUNT, cr); - - return (error); + return (zfs_secpolicy_write_perms(parentname, + ZFS_DELEG_PERM_MOUNT, cr)); } /* @@ -852,7 +1026,7 @@ zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { if (secpolicy_sys_config(cr, B_FALSE) != 0) return (EPERM); @@ -865,7 +1039,7 @@ zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { int error; @@ -881,13 +1055,14 @@ zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (secpolicy_zinject(cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { zfs_prop_t prop = zfs_name_to_prop(zc->zc_value); @@ -903,9 +1078,9 @@ zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) } static int -zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - int err = zfs_secpolicy_read(zc, cr); + int err = zfs_secpolicy_read(zc, innvl, cr); if (err) return (err); @@ -932,9 +1107,9 @@ zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) } static int -zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - int err = zfs_secpolicy_read(zc, cr); + int err = zfs_secpolicy_read(zc, innvl, cr); if (err) return (err); @@ -945,22 +1120,25 @@ zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) userquota_perms[zc->zc_objset_type], cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, NULL, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_HOLD, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_RELEASE, cr)); @@ -970,7 +1148,7 @@ zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr) * Policy for allowing temporary snapshots to be taken or released */ static int -zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { /* * A temporary snapshot is the same as a snapshot, @@ -983,13 +1161,13 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) ZFS_DELEG_PERM_DIFF, cr)) == 0) return (0); - error = zfs_secpolicy_snapshot(zc, cr); + error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr); if (!error) - error = zfs_secpolicy_hold(zc, cr); + error = zfs_secpolicy_hold(zc, innvl, cr); if (!error) - error = zfs_secpolicy_release(zc, cr); + error = zfs_secpolicy_release(zc, innvl, cr); if (!error) - error = zfs_secpolicy_destroy(zc, cr); + error = zfs_secpolicy_destroy(zc, innvl, cr); return (error); } @@ -1028,36 +1206,40 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp) return (0); } +/* + * Reduce the size of this nvlist until it can be serialized in 'max' bytes. + * Entries will be removed from the end of the nvlist, and one int32 entry + * named "N_MORE_ERRORS" will be added indicating how many entries were + * removed. + */ static int -fit_error_list(zfs_cmd_t *zc, nvlist_t **errors) +nvlist_smush(nvlist_t *errors, size_t max) { size_t size; - VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); + size = fnvlist_size(errors); - if (size > zc->zc_nvlist_dst_size) { + if (size > max) { nvpair_t *more_errors; int n = 0; - if (zc->zc_nvlist_dst_size < 1024) + if (max < 1024) return (ENOMEM); - VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0); - more_errors = nvlist_prev_nvpair(*errors, NULL); + fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0); + more_errors = nvlist_prev_nvpair(errors, NULL); do { - nvpair_t *pair = nvlist_prev_nvpair(*errors, + nvpair_t *pair = nvlist_prev_nvpair(errors, more_errors); - VERIFY(nvlist_remove_nvpair(*errors, pair) == 0); + fnvlist_remove_nvpair(errors, pair); n++; - VERIFY(nvlist_size(*errors, &size, - NV_ENCODE_NATIVE) == 0); - } while (size > zc->zc_nvlist_dst_size); + size = fnvlist_size(errors); + } while (size > max); - VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0); - VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0); - ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); - ASSERT(size <= zc->zc_nvlist_dst_size); + fnvlist_remove_nvpair(errors, more_errors); + fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n); + ASSERT3U(fnvlist_size(errors), <=, max); } return (0); @@ -1070,21 +1252,20 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) int error = 0; size_t size; - VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0); + size = fnvlist_size(nvl); if (size > zc->zc_nvlist_dst_size) { error = ENOMEM; } else { - packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG); - VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE, - KM_SLEEP) == 0); + packed = fnvlist_pack(nvl, &size); if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst, size, zc->zc_iflags) != 0) error = EFAULT; - kmem_free(packed, size); + fnvlist_pack_free(packed, size); } zc->zc_nvlist_dst_size = size; + zc->zc_nvlist_dst_filled = B_TRUE; return (error); } @@ -1163,7 +1344,6 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) nvlist_t *config, *props = NULL; nvlist_t *rootprops = NULL; nvlist_t *zplprops = NULL; - char *buf; if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config))) @@ -1203,9 +1383,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) goto pool_props_bad; } - buf = history_str_get(zc); - - error = spa_create(zc->zc_name, config, props, buf, zplprops); + error = spa_create(zc->zc_name, config, props, zplprops); /* * Set the remaining root properties @@ -1214,9 +1392,6 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) (void) spa_destroy(zc->zc_name); - if (buf != NULL) - history_str_free(buf); - pool_props_bad: nvlist_free(rootprops); nvlist_free(zplprops); @@ -2249,32 +2424,26 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, /* * This function is best effort. If it fails to set any of the given properties, - * it continues to set as many as it can and returns the first error - * encountered. If the caller provides a non-NULL errlist, it also gives the - * complete list of names of all the properties it failed to set along with the - * corresponding error numbers. The caller is responsible for freeing the - * returned errlist. + * it continues to set as many as it can and returns the last error + * encountered. If the caller provides a non-NULL errlist, it will be filled in + * with the list of names of all the properties that failed along with the + * corresponding error numbers. * - * If every property is set successfully, zero is returned and the list pointed - * at by errlist is NULL. + * If every property is set successfully, zero is returned and errlist is not + * modified. */ int zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, - nvlist_t **errlist) + nvlist_t *errlist) { nvpair_t *pair; nvpair_t *propval; int rv = 0; uint64_t intval; char *strval; - nvlist_t *genericnvl; - nvlist_t *errors; - nvlist_t *retrynvl; - - VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + nvlist_t *genericnvl = fnvlist_alloc(); + nvlist_t *retrynvl = fnvlist_alloc(); retry: pair = NULL; while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { @@ -2286,7 +2455,7 @@ retry: propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); + attrs = fnvpair_value_nvlist(pair); if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &propval) != 0) err = EINVAL; @@ -2311,8 +2480,7 @@ retry: } else if (nvpair_type(propval) == DATA_TYPE_UINT64) { const char *unused; - VERIFY(nvpair_value_uint64(propval, - &intval) == 0); + intval = fnvpair_value_uint64(propval); switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: @@ -2356,8 +2524,11 @@ retry: } } - if (err != 0) - VERIFY(nvlist_add_int32(errors, propname, err) == 0); + if (err != 0) { + if (errlist != NULL) + fnvlist_add_int32(errlist, propname, err); + rv = err; + } } if (nvl != retrynvl && !nvlist_empty(retrynvl)) { @@ -2379,44 +2550,33 @@ retry: propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); - VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, - &propval) == 0); + attrs = fnvpair_value_nvlist(pair); + propval = fnvlist_lookup_nvpair(attrs, + ZPROP_VALUE); } if (nvpair_type(propval) == DATA_TYPE_STRING) { - VERIFY(nvpair_value_string(propval, - &strval) == 0); + strval = fnvpair_value_string(propval); err = dsl_prop_set(dsname, propname, source, 1, strlen(strval) + 1, strval); } else { - VERIFY(nvpair_value_uint64(propval, - &intval) == 0); + intval = fnvpair_value_uint64(propval); err = dsl_prop_set(dsname, propname, source, 8, 1, &intval); } if (err != 0) { - VERIFY(nvlist_add_int32(errors, propname, - err) == 0); + if (errlist != NULL) { + fnvlist_add_int32(errlist, propname, + err); + } + rv = err; } } } nvlist_free(genericnvl); nvlist_free(retrynvl); - if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) { - nvlist_free(errors); - errors = NULL; - } else { - VERIFY(nvpair_value_int32(pair, &rv) == 0); - } - - if (errlist == NULL) - nvlist_free(errors); - else - *errlist = errors; - return (rv); } @@ -2424,7 +2584,7 @@ retry: * Check that all the properties are valid user properties. */ static int -zfs_check_userprops(char *fsname, nvlist_t *nvl) +zfs_check_userprops(const char *fsname, nvlist_t *nvl) { nvpair_t *pair = NULL; int error = 0; @@ -2504,7 +2664,7 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) boolean_t received = zc->zc_cookie; zprop_source_t source = (received ? ZPROP_SRC_RECEIVED : ZPROP_SRC_LOCAL); - nvlist_t *errors = NULL; + nvlist_t *errors; int error; if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, @@ -2527,7 +2687,8 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) } } - error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors); + errors = fnvlist_alloc(); + error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors); if (zc->zc_nvlist_dst != 0 && errors != NULL) { (void) put_nvlist(zc, errors); @@ -2609,7 +2770,7 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc) return (EINVAL); } - /* the property name has been validated by zfs_secpolicy_inherit() */ + /* property name has been validated by zfs_secpolicy_inherit_prop() */ return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL)); } @@ -2955,26 +3116,30 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops, } /* - * inputs: - * zc_objset_type type of objset to create (fs vs zvol) - * zc_name name of new objset - * zc_value name of snapshot to clone from (may be empty) - * zc_nvlist_src{_size} nvlist of properties to apply + * innvl: { + * "type" -> dmu_objset_type_t (int32) + * (optional) "props" -> { prop -> value } + * } * - * outputs: none + * outnvl: propname -> error code (int32) */ static int -zfs_ioc_create(zfs_cmd_t *zc) +zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) { - objset_t *clone; int error = 0; - zfs_creat_t zct; + zfs_creat_t zct = { 0 }; nvlist_t *nvprops = NULL; void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); - dmu_objset_type_t type = zc->zc_objset_type; + int32_t type32; + dmu_objset_type_t type; + boolean_t is_insensitive = B_FALSE; - switch (type) { + if (nvlist_lookup_int32(innvl, "type", &type32) != 0) + return (EINVAL); + type = type32; + (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); + switch (type) { case DMU_OST_ZFS: cbfunc = zfs_create_cb; break; @@ -2987,154 +3152,221 @@ zfs_ioc_create(zfs_cmd_t *zc) cbfunc = NULL; break; } - if (strchr(zc->zc_name, '@') || - strchr(zc->zc_name, '%')) + if (strchr(fsname, '@') || + strchr(fsname, '%')) return (EINVAL); - if (zc->zc_nvlist_src != 0 && - (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &nvprops)) != 0) - return (error); - - zct.zct_zplprops = NULL; zct.zct_props = nvprops; - if (zc->zc_value[0] != '\0') { - /* - * We're creating a clone of an existing snapshot. - */ - zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; - if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) { - nvlist_free(nvprops); - return (EINVAL); - } - - error = dmu_objset_hold(zc->zc_value, FTAG, &clone); - if (error) { - nvlist_free(nvprops); - return (error); - } + if (cbfunc == NULL) + return (EINVAL); - error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0); - dmu_objset_rele(clone, FTAG); - if (error) { - nvlist_free(nvprops); - return (error); - } - } else { - boolean_t is_insensitive = B_FALSE; + if (type == DMU_OST_ZVOL) { + uint64_t volsize, volblocksize; - if (cbfunc == NULL) { - nvlist_free(nvprops); + if (nvprops == NULL) + return (EINVAL); + if (nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0) return (EINVAL); - } - - if (type == DMU_OST_ZVOL) { - uint64_t volsize, volblocksize; - - if (nvprops == NULL || - nvlist_lookup_uint64(nvprops, - zfs_prop_to_name(ZFS_PROP_VOLSIZE), - &volsize) != 0) { - nvlist_free(nvprops); - return (EINVAL); - } - if ((error = nvlist_lookup_uint64(nvprops, - zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), - &volblocksize)) != 0 && error != ENOENT) { - nvlist_free(nvprops); - return (EINVAL); - } + if ((error = nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), + &volblocksize)) != 0 && error != ENOENT) + return (EINVAL); - if (error != 0) - volblocksize = zfs_prop_default_numeric( - ZFS_PROP_VOLBLOCKSIZE); + if (error != 0) + volblocksize = zfs_prop_default_numeric( + ZFS_PROP_VOLBLOCKSIZE); - if ((error = zvol_check_volblocksize( - volblocksize)) != 0 || - (error = zvol_check_volsize(volsize, - volblocksize)) != 0) { - nvlist_free(nvprops); - return (error); - } - } else if (type == DMU_OST_ZFS) { - int error; + if ((error = zvol_check_volblocksize( + volblocksize)) != 0 || + (error = zvol_check_volsize(volsize, + volblocksize)) != 0) + return (error); + } else if (type == DMU_OST_ZFS) { + int error; - /* - * We have to have normalization and - * case-folding flags correct when we do the - * file system creation, so go figure them out - * now. - */ - VERIFY(nvlist_alloc(&zct.zct_zplprops, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - error = zfs_fill_zplprops(zc->zc_name, nvprops, - zct.zct_zplprops, &is_insensitive); - if (error != 0) { - nvlist_free(nvprops); - nvlist_free(zct.zct_zplprops); - return (error); - } + /* + * We have to have normalization and + * case-folding flags correct when we do the + * file system creation, so go figure them out + * now. + */ + VERIFY(nvlist_alloc(&zct.zct_zplprops, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + error = zfs_fill_zplprops(fsname, nvprops, + zct.zct_zplprops, &is_insensitive); + if (error != 0) { + nvlist_free(zct.zct_zplprops); + return (error); } - error = dmu_objset_create(zc->zc_name, type, - is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); - nvlist_free(zct.zct_zplprops); } + error = dmu_objset_create(fsname, type, + is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); + nvlist_free(zct.zct_zplprops); + /* * It would be nice to do this atomically. */ if (error == 0) { - error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL, - nvprops, NULL); + error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, + nvprops, outnvl); if (error != 0) - (void) dmu_objset_destroy(zc->zc_name, B_FALSE); + (void) dmu_objset_destroy(fsname, B_FALSE); } - nvlist_free(nvprops); return (error); } /* - * inputs: - * zc_name name of filesystem - * zc_value short name of snapshot - * zc_cookie recursive flag - * zc_nvlist_src[_size] property list + * innvl: { + * "origin" -> name of origin snapshot + * (optional) "props" -> { prop -> value } + * } * * outputs: - * zc_value short snapname (i.e. part after the '@') + * outnvl: propname -> error code (int32) */ static int -zfs_ioc_snapshot(zfs_cmd_t *zc) +zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) { + int error = 0; nvlist_t *nvprops = NULL; - int error; - boolean_t recursive = zc->zc_cookie; + char *origin_name; + dsl_dataset_t *origin; - if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) + if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0) return (EINVAL); + (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); - if (zc->zc_nvlist_src != 0 && - (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &nvprops)) != 0) + if (strchr(fsname, '@') || + strchr(fsname, '%')) + return (EINVAL); + + if (dataset_namecheck(origin_name, NULL, NULL) != 0) + return (EINVAL); + + error = dsl_dataset_hold(origin_name, FTAG, &origin); + if (error) return (error); - error = zfs_check_userprops(zc->zc_name, nvprops); + error = dmu_objset_clone(fsname, origin, 0); + dsl_dataset_rele(origin, FTAG); if (error) - goto out; + return (error); - if (!nvlist_empty(nvprops) && - zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) { - error = ENOTSUP; - goto out; + /* + * It would be nice to do this atomically. + */ + if (error == 0) { + error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, + nvprops, outnvl); + if (error != 0) + (void) dmu_objset_destroy(fsname, B_FALSE); } + return (error); +} - error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL, - nvprops, recursive, B_FALSE, -1); +/* + * innvl: { + * "snaps" -> { snapshot1, snapshot2 } + * (optional) "props" -> { prop -> value (string) } + * } + * + * outnvl: snapshot -> error code (int32) + * + */ +static int +zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +{ + nvlist_t *snaps; + nvlist_t *props = NULL; + int error, poollen; + nvpair_t *pair, *pair2; -out: - nvlist_free(nvprops); + (void) nvlist_lookup_nvlist(innvl, "props", &props); + if ((error = zfs_check_userprops(poolname, props)) != 0) + return (error); + + if (!nvlist_empty(props) && + zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS)) + return (ENOTSUP); + + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (EINVAL); + poollen = strlen(poolname); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + const char *name = nvpair_name(pair); + const char *cp = strchr(name, '@'); + + /* + * The snap name must contain an @, and the part after it must + * contain only valid characters. + */ + if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0) + return (EINVAL); + + /* + * The snap must be in the specified pool. + */ + if (strncmp(name, poolname, poollen) != 0 || + (name[poollen] != '/' && name[poollen] != '@')) + return (EXDEV); + + /* This must be the only snap of this fs. */ + for (pair2 = nvlist_next_nvpair(snaps, pair); + pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) { + if (strncmp(name, nvpair_name(pair2), cp - name + 1) + == 0) { + return (EXDEV); + } + } + } + + error = dmu_objset_snapshot(snaps, props, outnvl); + return (error); +} + +/* + * innvl: "message" -> string + */ +/* ARGSUSED */ +static int +zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) +{ + char *message; + spa_t *spa; + int error; + char *poolname; + + /* + * The poolname in the ioctl is not set, we get it from the TSD, + * which was set at the end of the last successful ioctl that allows + * logging. The secpolicy func already checked that it is set. + * Only one log ioctl is allowed after each successful ioctl, so + * we clear the TSD here. + */ + poolname = tsd_get(zfs_allow_log_key); + (void) tsd_set(zfs_allow_log_key, NULL); + error = spa_open(poolname, &spa, FTAG); + strfree(poolname); + if (error != 0) + return (error); + + if (nvlist_lookup_string(innvl, "message", &message) != 0) { + spa_close(spa, FTAG); + return (EINVAL); + } + + if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) { + spa_close(spa, FTAG); + return (ENOTSUP); + } + + error = spa_history_log(spa, message); + spa_close(spa, FTAG); return (error); } @@ -3143,6 +3375,7 @@ out: * name dataset name, or when 'arg == NULL' the full snapshot name * arg short snapshot name (i.e. part after the '@') */ +/* ARGSUSED */ int zfs_unmount_snap(const char *name, void *arg) { @@ -3153,22 +3386,13 @@ zfs_unmount_snap(const char *name, void *arg) char *ptr; int error; - if (arg) { - dsname = strdup(name); - snapname = strdup(arg); - } else { - ptr = strchr(name, '@'); - if (ptr) { - dsname = strdup(name); - dsname[ptr - name] = '\0'; - snapname = strdup(ptr + 1); - } else { - return (0); - } - } + if ((ptr = strchr(name, '@')) == NULL) + return (0); + dsname = strdup(name); + dsname[ptr - name] = '\0'; + snapname = strdup(ptr + 1); fullname = kmem_asprintf("%s@%s", dsname, snapname); - error = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE); if (error == 0) { error = zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE); @@ -3187,47 +3411,46 @@ zfs_unmount_snap(const char *name, void *arg) } /* - * inputs: - * zc_name name of filesystem, snaps must be under it - * zc_nvlist_src[_size] full names of snapshots to destroy - * zc_defer_destroy mark for deferred destroy + * innvl: { + * "snaps" -> { snapshot1, snapshot2 } + * (optional boolean) "defer" + * } * - * outputs: - * zc_name on failure, name of failed snapshot + * outnvl: snapshot -> error code (int32) */ static int -zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc) +zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) { - int err, len; - nvlist_t *nvl; + int poollen; + nvlist_t *snaps; nvpair_t *pair; + boolean_t defer; - if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &nvl)) != 0) - return (err); + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (EINVAL); + defer = nvlist_exists(innvl, "defer"); - len = strlen(zc->zc_name); - for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; - pair = nvlist_next_nvpair(nvl, pair)) { + poollen = strlen(poolname); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { const char *name = nvpair_name(pair); + /* - * The snap name must be underneath the zc_name. This ensures - * that our permission checks were legitimate. + * The snap must be in the specified pool. */ - if (strncmp(zc->zc_name, name, len) != 0 || - (name[len] != '@' && name[len] != '/')) { - nvlist_free(nvl); - return (EINVAL); - } + if (strncmp(name, poolname, poollen) != 0 || + (name[poollen] != '/' && name[poollen] != '@')) + return (EXDEV); + /* + * Ignore failures to unmount; dmu_snapshots_destroy_nvl() + * will deal with this gracefully (by filling in outnvl). + */ (void) zfs_unmount_snap(name, NULL); (void) zvol_remove_minor(name); } - err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy, - zc->zc_name); - nvlist_free(nvl); - return (err); + return (dmu_snapshots_destroy_nvl(snaps, defer, outnvl)); } /* @@ -3605,7 +3828,7 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist) (void) strcpy(zc->zc_value, nvpair_name(pair)); if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 || - (err = zfs_secpolicy_inherit(zc, CRED())) != 0) { + (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) { VERIFY(nvlist_remove_nvpair(props, pair) == 0); VERIFY(nvlist_add_int32(errors, zc->zc_value, err) == 0); @@ -3813,8 +4036,6 @@ zfs_ioc_recv(zfs_cmd_t *zc) * dmu_recv_begin() succeeds. */ if (props) { - nvlist_t *errlist; - if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) { if (drc.drc_newfs) { if (spa_version(os->os_spa) >= @@ -3833,12 +4054,12 @@ zfs_ioc_recv(zfs_cmd_t *zc) } (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, - props, &errlist); - (void) nvlist_merge(errors, errlist, 0); - nvlist_free(errlist); + props, errors); } - if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) { + if (zc->zc_nvlist_dst_size != 0 && + (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 || + put_nvlist(zc, errors) != 0)) { /* * Caller made zc->zc_nvlist_dst less than the minimum expected * size or supplied an invalid address. @@ -3970,15 +4191,13 @@ zfs_ioc_send(zfs_cmd_t *zc) rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); - if (error) { - spa_close(spa, FTAG); + spa_close(spa, FTAG); + if (error) return (error); - } error = dmu_objset_from_ds(ds, &tosnap); if (error) { dsl_dataset_rele(ds, FTAG); - spa_close(spa, FTAG); return (error); } @@ -3986,7 +4205,6 @@ zfs_ioc_send(zfs_cmd_t *zc) rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom); rw_exit(&dp->dp_config_rwlock); - spa_close(spa, FTAG); if (error) { dsl_dataset_rele(ds, FTAG); return (error); @@ -3997,12 +4215,37 @@ zfs_ioc_send(zfs_cmd_t *zc) dsl_dataset_rele(ds, FTAG); return (error); } - } else { - spa_close(spa, FTAG); + } + + if (zc->zc_obj) { + dsl_pool_t *dp = ds->ds_dir->dd_pool; + + if (fromsnap != NULL) { + dsl_dataset_rele(dsfrom, FTAG); + dsl_dataset_rele(ds, FTAG); + return (EINVAL); + } + + if (dsl_dir_is_clone(ds->ds_dir)) { + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &dsfrom); + rw_exit(&dp->dp_config_rwlock); + if (error) { + dsl_dataset_rele(ds, FTAG); + return (error); + } + error = dmu_objset_from_ds(dsfrom, &fromsnap); + if (error) { + dsl_dataset_rele(dsfrom, FTAG); + dsl_dataset_rele(ds, FTAG); + return (error); + } + } } if (estimate) { - error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj, + error = dmu_send_estimate(tosnap, fromsnap, &zc->zc_objset_type); } else { file_t *fp = getf(zc->zc_cookie); @@ -4014,7 +4257,7 @@ zfs_ioc_send(zfs_cmd_t *zc) } off = fp->f_offset; - error = dmu_send(tosnap, fromsnap, zc->zc_obj, + error = dmu_send(tosnap, fromsnap, zc->zc_cookie, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) @@ -4413,6 +4656,7 @@ zfs_ioc_next_obj(zfs_cmd_t *zc) * zc_cleanup_fd cleanup-on-exit file descriptor for calling process * * outputs: + * zc_value short name of new snapshot */ static int zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) @@ -4420,22 +4664,21 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) char *snap_name; int error; - snap_name = kmem_asprintf("%s-%016llx", zc->zc_value, + snap_name = kmem_asprintf("%s@%s-%016llx", zc->zc_name, zc->zc_value, (u_longlong_t)ddi_get_lbolt64()); - if (strlen(snap_name) >= MAXNAMELEN) { + if (strlen(snap_name) >= MAXPATHLEN) { strfree(snap_name); return (E2BIG); } - error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name, - NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd); + error = dmu_objset_snapshot_tmp(snap_name, "%temp", zc->zc_cleanup_fd); if (error != 0) { strfree(snap_name); return (error); } - (void) strcpy(zc->zc_value, snap_name); + (void) strcpy(zc->zc_value, strchr(snap_name, '@') + 1); strfree(snap_name); return (0); } @@ -4858,178 +5101,440 @@ zfs_ioc_space_written(zfs_cmd_t *zc) } /* - * inputs: - * zc_name full name of last snapshot - * zc_value full name of first snapshot + * innvl: { + * "firstsnap" -> snapshot name + * } * - * outputs: - * zc_cookie space in bytes - * zc_objset_type compressed space in bytes - * zc_perm_action uncompressed space in bytes + * outnvl: { + * "used" -> space in bytes + * "compressed" -> compressed space in bytes + * "uncompressed" -> uncompressed space in bytes + * } */ static int -zfs_ioc_space_snaps(zfs_cmd_t *zc) +zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) { int error; dsl_dataset_t *new, *old; + char *firstsnap; + uint64_t used, comp, uncomp; - error = dsl_dataset_hold(zc->zc_name, FTAG, &new); + if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0) + return (EINVAL); + + error = dsl_dataset_hold(lastsnap, FTAG, &new); if (error != 0) return (error); - error = dsl_dataset_hold(zc->zc_value, FTAG, &old); + error = dsl_dataset_hold(firstsnap, FTAG, &old); if (error != 0) { dsl_dataset_rele(new, FTAG); return (error); } - error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie, - &zc->zc_objset_type, &zc->zc_perm_action); + error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp); dsl_dataset_rele(old, FTAG); dsl_dataset_rele(new, FTAG); + fnvlist_add_uint64(outnvl, "used", used); + fnvlist_add_uint64(outnvl, "compressed", comp); + fnvlist_add_uint64(outnvl, "uncompressed", uncomp); return (error); } /* - * pool create, destroy, and export don't log the history as part of - * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export - * do the logging of those commands. + * innvl: { + * "fd" -> file descriptor to write stream to (int32) + * (optional) "fromsnap" -> full snap name to send an incremental from + * } + * + * outnvl is unused */ -static zfs_ioc_vec_t zfs_ioc_vec[] = { - { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_READONLY }, - { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_setfru, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_create_minor, zfs_secpolicy_config, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_remove_minor, zfs_secpolicy_config, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_NONE }, - { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive, - DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME, - B_FALSE, POOL_CHECK_NONE }, - { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME, - B_FALSE, POOL_CHECK_NONE }, - { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, - DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME, - B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE } -}; +/* ARGSUSED */ +static int +zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) +{ + objset_t *fromsnap = NULL; + objset_t *tosnap; + int error; + offset_t off; + char *fromname; + int fd; + + error = nvlist_lookup_int32(innvl, "fd", &fd); + if (error != 0) + return (EINVAL); + + error = dmu_objset_hold(snapname, FTAG, &tosnap); + if (error) + return (error); + + error = nvlist_lookup_string(innvl, "fromsnap", &fromname); + if (error == 0) { + error = dmu_objset_hold(fromname, FTAG, &fromsnap); + if (error) { + dmu_objset_rele(tosnap, FTAG); + return (error); + } + } + + { + file_t *fp = getf(fd); + if (fp == NULL) { + dmu_objset_rele(tosnap, FTAG); + if (fromsnap != NULL) + dmu_objset_rele(fromsnap, FTAG); + return (EBADF); + } + + off = fp->f_offset; + error = dmu_send(tosnap, fromsnap, fd, fp->f_vnode, &off); + + if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + fp->f_offset = off; + } + releasef(fd); + if (fromsnap != NULL) + dmu_objset_rele(fromsnap, FTAG); + dmu_objset_rele(tosnap, FTAG); + return (error); +} + +/* + * Determine approximately how large a zfs send stream will be -- the number + * of bytes that will be written to the fd supplied to zfs_ioc_send_new(). + * + * innvl: { + * (optional) "fromsnap" -> full snap name to send an incremental from + * } + * + * outnvl: { + * "space" -> bytes of space (uint64) + * } + */ +static int +zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) +{ + objset_t *fromsnap = NULL; + objset_t *tosnap; + int error; + char *fromname; + uint64_t space; + + error = dmu_objset_hold(snapname, FTAG, &tosnap); + if (error) + return (error); + + error = nvlist_lookup_string(innvl, "fromsnap", &fromname); + if (error == 0) { + error = dmu_objset_hold(fromname, FTAG, &fromsnap); + if (error) { + dmu_objset_rele(tosnap, FTAG); + return (error); + } + } + + error = dmu_send_estimate(tosnap, fromsnap, &space); + fnvlist_add_uint64(outnvl, "space", space); + + if (fromsnap != NULL) + dmu_objset_rele(fromsnap, FTAG); + dmu_objset_rele(tosnap, FTAG); + return (error); +} + + +static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST]; + +static void +zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck, + boolean_t log_history, zfs_ioc_poolcheck_t pool_check) +{ + zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST]; + + ASSERT3U(ioc, >=, ZFS_IOC_FIRST); + ASSERT3U(ioc, <, ZFS_IOC_LAST); + ASSERT3P(vec->zvec_legacy_func, ==, NULL); + ASSERT3P(vec->zvec_func, ==, NULL); + + vec->zvec_legacy_func = func; + vec->zvec_secpolicy = secpolicy; + vec->zvec_namecheck = namecheck; + vec->zvec_allow_log = log_history; + vec->zvec_pool_check = pool_check; +} + +/* + * See the block comment at the beginning of this file for details on + * each argument to this function. + */ +static void +zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func, + zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck, + zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist, + boolean_t allow_log) +{ + zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST]; + + ASSERT3U(ioc, >=, ZFS_IOC_FIRST); + ASSERT3U(ioc, <, ZFS_IOC_LAST); + ASSERT3P(vec->zvec_legacy_func, ==, NULL); + ASSERT3P(vec->zvec_func, ==, NULL); + + /* if we are logging, the name must be valid */ + ASSERT(!allow_log || namecheck != NO_NAME); + + vec->zvec_name = name; + vec->zvec_func = func; + vec->zvec_secpolicy = secpolicy; + vec->zvec_namecheck = namecheck; + vec->zvec_pool_check = pool_check; + vec->zvec_smush_outnvlist = smush_outnvlist; + vec->zvec_allow_log = allow_log; +} + +static void +zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy, boolean_t log_history, + zfs_ioc_poolcheck_t pool_check) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + POOL_NAME, log_history, pool_check); +} + +static void +zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + DATASET_NAME, B_FALSE, pool_check); +} + +static void +zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) +{ + zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config, + POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +} + +static void +zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + NO_NAME, B_FALSE, POOL_CHECK_NONE); +} + +static void +zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc, + zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED); +} + +static void +zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) +{ + zfs_ioctl_register_dataset_read_secpolicy(ioc, func, + zfs_secpolicy_read); +} + +static void +zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +} + +static void +zfs_ioctl_init(void) +{ + zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT, + zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY, + zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE); + + zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS, + zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("send", ZFS_IOC_SEND_NEW, + zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE, + zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("create", ZFS_IOC_CREATE, + zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("clone", ZFS_IOC_CLONE, + zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS, + zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + /* IOCTLS that use the legacy function signature */ + + zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, + zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY); + + zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN, + zfs_ioc_pool_scan); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE, + zfs_ioc_pool_upgrade); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD, + zfs_ioc_vdev_add); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE, + zfs_ioc_vdev_remove); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE, + zfs_ioc_vdev_set_state); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH, + zfs_ioc_vdev_attach); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH, + zfs_ioc_vdev_detach); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH, + zfs_ioc_vdev_setpath); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU, + zfs_ioc_vdev_setfru); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS, + zfs_ioc_pool_set_props); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT, + zfs_ioc_vdev_split); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID, + zfs_ioc_pool_reguid); + + zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS, + zfs_ioc_pool_configs, zfs_secpolicy_none); + zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT, + zfs_ioc_pool_tryimport, zfs_secpolicy_config); + zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT, + zfs_ioc_inject_fault, zfs_secpolicy_inject); + zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT, + zfs_ioc_clear_fault, zfs_secpolicy_inject); + zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT, + zfs_ioc_inject_list_next, zfs_secpolicy_inject); + + /* + * pool destroy, and export don't log the history as part of + * zfsdev_ioctl, but rather zfs_ioc_pool_export + * does the logging of those commands. + */ + zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy, + zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export, + zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE); + + zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats, + zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props, + zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); + + zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log, + zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED); + zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME, + zfs_ioc_dsobj_to_dsname, + zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED); + zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY, + zfs_ioc_pool_get_history, + zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED); + + zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE); + + zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED); + zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED); + + zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN, + zfs_ioc_space_written); + zfs_ioctl_register_dataset_read(ZFS_IOC_GET_HOLDS, + zfs_ioc_get_holds); + zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS, + zfs_ioc_objset_recvd_props); + zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ, + zfs_ioc_next_obj); + zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL, + zfs_ioc_get_fsacl); + zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS, + zfs_ioc_objset_stats); + zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS, + zfs_ioc_objset_zplprops); + zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT, + zfs_ioc_dataset_list_next); + zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT, + zfs_ioc_snapshot_list_next); + zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS, + zfs_ioc_send_progress); + + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF, + zfs_ioc_diff, zfs_secpolicy_diff); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS, + zfs_ioc_obj_to_stats, zfs_secpolicy_diff); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH, + zfs_ioc_obj_to_path, zfs_secpolicy_diff); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE, + zfs_ioc_userspace_one, zfs_secpolicy_userspace_one); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY, + zfs_ioc_userspace_many, zfs_secpolicy_userspace_many); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND, + zfs_ioc_send, zfs_secpolicy_send); + + zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop, + zfs_secpolicy_none); + zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy, + zfs_secpolicy_destroy); + zfs_ioctl_register_dataset_modify(ZFS_IOC_ROLLBACK, zfs_ioc_rollback, + zfs_secpolicy_rollback); + zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename, + zfs_secpolicy_rename); + zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv, + zfs_secpolicy_recv); + zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote, + zfs_secpolicy_promote); + zfs_ioctl_register_dataset_modify(ZFS_IOC_HOLD, zfs_ioc_hold, + zfs_secpolicy_hold); + zfs_ioctl_register_dataset_modify(ZFS_IOC_RELEASE, zfs_ioc_release, + zfs_secpolicy_release); + zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP, + zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop); + zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl, + zfs_secpolicy_set_fsacl); + + zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share, + zfs_secpolicy_share, POOL_CHECK_NONE); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl, + zfs_secpolicy_smb_acl, POOL_CHECK_NONE); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE, + zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT, + zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); + + /* + * ZoL functions + */ + zfs_ioctl_register_legacy(ZFS_IOC_CREATE_MINOR, zfs_ioc_create_minor, + zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_legacy(ZFS_IOC_REMOVE_MINOR, zfs_ioc_remove_minor, + zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next, + zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear, + zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE); +} int pool_status_check(const char *name, zfs_ioc_namecheck_t type, @@ -5192,60 +5697,135 @@ static long zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) { zfs_cmd_t *zc; - uint_t vec; - int error, rc, flag = 0; - - vec = cmd - ZFS_IOC; - if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) + uint_t vecnum; + int error, rc, len, flag = 0; + const zfs_ioc_vec_t *vec; + char saved_poolname[MAXNAMELEN]; + nvlist_t *innvl = NULL; + + vecnum = cmd - ZFS_IOC_FIRST; + if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) return (-EINVAL); + vec = &zfs_ioc_vec[vecnum]; zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG); error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); - if (error != 0) + if (error != 0) { error = EFAULT; + goto out; + } - if ((error == 0) && !(flag & FKIOCTL)) - error = zfs_ioc_vec[vec].zvec_secpolicy(zc, CRED()); + zc->zc_iflags = flag & FKIOCTL; + if (zc->zc_nvlist_src_size != 0) { + error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &innvl); + if (error != 0) + goto out; + } /* * Ensure that all pool/dataset names are valid before we pass down to * the lower layers. */ - if (error == 0) { - zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; - zc->zc_iflags = flag & FKIOCTL; - switch (zfs_ioc_vec[vec].zvec_namecheck) { - case POOL_NAME: - if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) - error = EINVAL; + zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; + switch (vec->zvec_namecheck) { + case POOL_NAME: + if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) + error = EINVAL; + else error = pool_status_check(zc->zc_name, - zfs_ioc_vec[vec].zvec_namecheck, - zfs_ioc_vec[vec].zvec_pool_check); - break; + vec->zvec_namecheck, vec->zvec_pool_check); + break; - case DATASET_NAME: - if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) - error = EINVAL; + case DATASET_NAME: + if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) + error = EINVAL; + else error = pool_status_check(zc->zc_name, - zfs_ioc_vec[vec].zvec_namecheck, - zfs_ioc_vec[vec].zvec_pool_check); - break; + vec->zvec_namecheck, vec->zvec_pool_check); + break; - case NO_NAME: - break; - } + case NO_NAME: + break; } - if (error == 0) - error = zfs_ioc_vec[vec].zvec_func(zc); + if (error == 0 && !(flag & FKIOCTL)) + error = vec->zvec_secpolicy(zc, innvl, CRED()); + + if (error != 0) + goto out; + + /* legacy ioctls can modify zc_name */ + (void) strlcpy(saved_poolname, zc->zc_name, sizeof(saved_poolname)); + len = strcspn(saved_poolname, "/@") + 1; + saved_poolname[len] = '\0'; + + if (vec->zvec_func != NULL) { + nvlist_t *outnvl; + int puterror = 0; + spa_t *spa; + nvlist_t *lognv = NULL; + + ASSERT(vec->zvec_legacy_func == NULL); + + /* + * Add the innvl to the lognv before calling the func, + * in case the func changes the innvl. + */ + if (vec->zvec_allow_log) { + lognv = fnvlist_alloc(); + fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL, + vec->zvec_name); + if (!nvlist_empty(innvl)) { + fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL, + innvl); + } + } + + outnvl = fnvlist_alloc(); + error = vec->zvec_func(zc->zc_name, innvl, outnvl); + + if (error == 0 && vec->zvec_allow_log && + spa_open(zc->zc_name, &spa, FTAG) == 0) { + if (!nvlist_empty(outnvl)) { + fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL, + outnvl); + } + (void) spa_history_log_nvl(spa, lognv); + spa_close(spa, FTAG); + } + fnvlist_free(lognv); + + if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) { + int smusherror = 0; + if (vec->zvec_smush_outnvlist) { + smusherror = nvlist_smush(outnvl, + zc->zc_nvlist_dst_size); + } + if (smusherror == 0) + puterror = put_nvlist(zc, outnvl); + } + + if (puterror != 0) + error = puterror; + + nvlist_free(outnvl); + } else { + error = vec->zvec_legacy_func(zc); + } + +out: + nvlist_free(innvl); rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); - if (error == 0) { - if (rc != 0) - error = EFAULT; - if (zfs_ioc_vec[vec].zvec_his_log) - zfs_log_history(zc); + if (error == 0 && rc != 0) + error = EFAULT; + if (error == 0 && vec->zvec_allow_log) { + char *s = tsd_get(zfs_allow_log_key); + if (s != NULL) + strfree(s); + (void) tsd_set(zfs_allow_log_key, strdup(saved_poolname)); } kmem_free(zc, sizeof (zfs_cmd_t)); @@ -5307,8 +5887,12 @@ zfs_detach(void) list_destroy(&zfsdev_state_list); } -uint_t zfs_fsyncer_key; -extern uint_t rrw_tsd_key; +static void +zfs_allow_log_destroy(void *arg) +{ + char *poolname = arg; + strfree(poolname); +} #ifdef DEBUG #define ZFS_DEBUG_STR " (DEBUG mode)" @@ -5327,11 +5911,14 @@ _init(void) if ((error = zvol_init()) != 0) goto out1; + zfs_ioctl_init(); + if ((error = zfs_attach()) != 0) goto out2; tsd_create(&zfs_fsyncer_key, NULL); - tsd_create(&rrw_tsd_key, NULL); + tsd_create(&rrw_tsd_key, rrw_tsd_destroy); + tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy); printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, " "ZFS pool version %s, ZFS filesystem version %s\n", @@ -5362,6 +5949,7 @@ _fini(void) tsd_destroy(&zfs_fsyncer_key); tsd_destroy(&rrw_tsd_key); + tsd_destroy(&zfs_allow_log_key); printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n", ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR); diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 9ae7ab500..8fee441b1 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -1550,9 +1551,8 @@ zfs_set_version(zfs_sb_t *zsb, uint64_t newvers) sa_register_update_callback(os, zfs_sa_upgrade); } - spa_history_log_internal(LOG_DS_UPGRADE, - dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu", - zsb->z_version, newvers, dmu_objset_id(os)); + spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, + "from %llu to %llu", zsb->z_version, newvers); dmu_tx_commit(tx); |