diff options
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/dmu_objset.c | 310 | ||||
-rw-r--r-- | module/zfs/dmu_send.c | 110 | ||||
-rw-r--r-- | module/zfs/dmu_tx.c | 2 | ||||
-rw-r--r-- | module/zfs/dsl_dataset.c | 145 | ||||
-rw-r--r-- | module/zfs/dsl_deleg.c | 28 | ||||
-rw-r--r-- | module/zfs/dsl_dir.c | 67 | ||||
-rw-r--r-- | module/zfs/dsl_prop.c | 27 | ||||
-rw-r--r-- | module/zfs/dsl_scan.c | 4 | ||||
-rw-r--r-- | module/zfs/dsl_synctask.c | 23 | ||||
-rw-r--r-- | module/zfs/rrwlock.c | 13 | ||||
-rw-r--r-- | module/zfs/spa.c | 54 | ||||
-rw-r--r-- | module/zfs/spa_history.c | 228 | ||||
-rw-r--r-- | module/zfs/zfs_ctldir.c | 3 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 1622 | ||||
-rw-r--r-- | module/zfs/zfs_vfsops.c | 6 |
15 files changed, 1667 insertions, 975 deletions
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 52d55d566..0f07a4cc9 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -525,9 +526,10 @@ dmu_objset_evict_dbufs(objset_t *os) void dmu_objset_evict(objset_t *os) { - dsl_dataset_t *ds = os->os_dsl_dataset; int t; + dsl_dataset_t *ds = os->os_dsl_dataset; + for (t = 0; t < TXG_SIZE; t++) ASSERT(!dmu_objset_is_dirty(os, t)); @@ -698,30 +700,33 @@ dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) spa_t *spa = dd->dd_pool->dp_spa; struct oscarg *oa = arg2; uint64_t obj; + dsl_dataset_t *ds; + blkptr_t *bp; ASSERT(dmu_tx_is_syncing(tx)); obj = dsl_dataset_create_sync(dd, oa->lastname, oa->clone_origin, oa->flags, oa->cr, tx); - if (oa->clone_origin == NULL) { - dsl_pool_t *dp = dd->dd_pool; - dsl_dataset_t *ds; - blkptr_t *bp; - objset_t *os; - - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); - bp = dsl_dataset_get_blkptr(ds); - ASSERT(BP_IS_HOLE(bp)); - - os = dmu_objset_create_impl(spa, ds, bp, oa->type, tx); + VERIFY3U(0, ==, dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds)); + bp = dsl_dataset_get_blkptr(ds); + if (BP_IS_HOLE(bp)) { + objset_t *os = + dmu_objset_create_impl(spa, ds, bp, oa->type, tx); if (oa->userfunc) oa->userfunc(os, oa->userarg, oa->cr, tx); - dsl_dataset_rele(ds, FTAG); } - spa_history_log_internal(LOG_DS_CREATE, spa, tx, "dataset = %llu", obj); + if (oa->clone_origin == NULL) { + spa_history_log_internal_ds(ds, "create", tx, ""); + } else { + char namebuf[MAXNAMELEN]; + dsl_dataset_name(oa->clone_origin, namebuf); + spa_history_log_internal_ds(ds, "clone", tx, + "origin=%s (%llu)", namebuf, oa->clone_origin->ds_object); + } + dsl_dataset_rele(ds, FTAG); } int @@ -798,34 +803,40 @@ dmu_objset_destroy(const char *name, boolean_t defer) return (error); } -struct snaparg { - dsl_sync_task_group_t *dstg; - char *snapname; - char *htag; - char failed[MAXPATHLEN]; - boolean_t recursive; - boolean_t needsuspend; - boolean_t temporary; - nvlist_t *props; - struct dsl_ds_holdarg *ha; /* only needed in the temporary case */ - dsl_dataset_t *newds; -}; +typedef struct snapallarg { + dsl_sync_task_group_t *saa_dstg; + boolean_t saa_needsuspend; + nvlist_t *saa_props; + + /* the following are used only if 'temporary' is set: */ + boolean_t saa_temporary; + const char *saa_htag; + struct dsl_ds_holdarg *saa_ha; + dsl_dataset_t *saa_newds; +} snapallarg_t; + +typedef struct snaponearg { + const char *soa_longname; /* long snap name */ + const char *soa_snapname; /* short snap name */ + snapallarg_t *soa_saa; +} snaponearg_t; static int snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) { objset_t *os = arg1; - struct snaparg *sn = arg2; + snaponearg_t *soa = arg2; + snapallarg_t *saa = soa->soa_saa; int error; /* The props have already been checked by zfs_check_userprops(). */ error = dsl_dataset_snapshot_check(os->os_dsl_dataset, - sn->snapname, tx); + soa->soa_snapname, tx); if (error) return (error); - if (sn->temporary) { + if (saa->saa_temporary) { /* * Ideally we would just call * dsl_dataset_user_hold_check() and @@ -843,12 +854,13 @@ snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) * Not checking number of tags because the tag will be * unique, as it will be the only tag. */ - if (strlen(sn->htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) + if (strlen(saa->saa_htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) return (E2BIG); - sn->ha = kmem_alloc(sizeof(struct dsl_ds_holdarg), KM_PUSHPAGE); - sn->ha->temphold = B_TRUE; - sn->ha->htag = sn->htag; + saa->saa_ha = kmem_alloc(sizeof (struct dsl_ds_holdarg), + KM_PUSHPAGE); + saa->saa_ha->temphold = B_TRUE; + saa->saa_ha->htag = saa->saa_htag; } return (error); } @@ -858,24 +870,25 @@ snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) { objset_t *os = arg1; dsl_dataset_t *ds = os->os_dsl_dataset; - struct snaparg *sn = arg2; + snaponearg_t *soa = arg2; + snapallarg_t *saa = soa->soa_saa; - dsl_dataset_snapshot_sync(ds, sn->snapname, tx); + dsl_dataset_snapshot_sync(ds, soa->soa_snapname, tx); - if (sn->props) { + if (saa->saa_props != NULL) { dsl_props_arg_t pa; - pa.pa_props = sn->props; + pa.pa_props = saa->saa_props; pa.pa_source = ZPROP_SRC_LOCAL; dsl_props_set_sync(ds->ds_prev, &pa, tx); } - if (sn->temporary) { + if (saa->saa_temporary) { struct dsl_ds_destroyarg da; - dsl_dataset_user_hold_sync(ds->ds_prev, sn->ha, tx); - kmem_free(sn->ha, sizeof (struct dsl_ds_holdarg)); - sn->ha = NULL; - sn->newds = ds->ds_prev; + dsl_dataset_user_hold_sync(ds->ds_prev, saa->saa_ha, tx); + kmem_free(saa->saa_ha, sizeof (struct dsl_ds_holdarg)); + saa->saa_ha = NULL; + saa->saa_newds = ds->ds_prev; da.ds = ds->ds_prev; da.defer = B_TRUE; @@ -884,135 +897,182 @@ snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) } static int -dmu_objset_snapshot_one(const char *name, void *arg) +snapshot_one_impl(const char *snapname, void *arg) { - struct snaparg *sn = arg; + char *fsname; + snapallarg_t *saa = arg; + snaponearg_t *soa; objset_t *os; int err; - char *cp; - /* - * If the objset starts with a '%', then ignore it unless it was - * explicitly named (ie, not recursive). These hidden datasets - * are always inconsistent, and by not opening them here, we can - * avoid a race with dsl_dir_destroy_check(). - */ - cp = strrchr(name, '/'); - if (cp && cp[1] == '%' && sn->recursive) - return (0); + fsname = kmem_zalloc(MAXPATHLEN, KM_PUSHPAGE); + (void) strlcpy(fsname, snapname, MAXPATHLEN); + strchr(fsname, '@')[0] = '\0'; - (void) strcpy(sn->failed, name); - - /* - * Check permissions if we are doing a recursive snapshot. The - * permission checks for the starting dataset have already been - * performed in zfs_secpolicy_snapshot() - */ - if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) - return (err); - - err = dmu_objset_hold(name, sn, &os); + err = dmu_objset_hold(fsname, saa, &os); + kmem_free(fsname, MAXPATHLEN); if (err != 0) return (err); /* * If the objset is in an inconsistent state (eg, in the process - * of being destroyed), don't snapshot it. As with %hidden - * datasets, we return EBUSY if this name was explicitly - * requested (ie, not recursive), and otherwise ignore it. + * of being destroyed), don't snapshot it. */ if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { - dmu_objset_rele(os, sn); - return (sn->recursive ? 0 : EBUSY); + dmu_objset_rele(os, saa); + return (EBUSY); } - if (sn->needsuspend) { + if (saa->saa_needsuspend) { err = zil_suspend(dmu_objset_zil(os)); if (err) { - dmu_objset_rele(os, sn); + dmu_objset_rele(os, saa); return (err); } } - dsl_sync_task_create(sn->dstg, snapshot_check, snapshot_sync, - os, sn, 3); + + soa = kmem_zalloc(sizeof (*soa), KM_PUSHPAGE); + soa->soa_saa = saa; + soa->soa_longname = snapname; + soa->soa_snapname = strchr(snapname, '@') + 1; + + dsl_sync_task_create(saa->saa_dstg, snapshot_check, snapshot_sync, + os, soa, 3); return (0); } +/* + * The snapshots must all be in the same pool. + */ int -dmu_objset_snapshot(char *fsname, char *snapname, char *tag, - nvlist_t *props, boolean_t recursive, boolean_t temporary, int cleanup_fd) +dmu_objset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) { dsl_sync_task_t *dst; - struct snaparg *sn; + snapallarg_t saa = { 0 }; spa_t *spa; - minor_t minor; + int rv = 0; int err; + nvpair_t *pair; - sn = kmem_alloc(sizeof (struct snaparg), KM_SLEEP); - (void) strcpy(sn->failed, fsname); + pair = nvlist_next_nvpair(snaps, NULL); + if (pair == NULL) + return (0); - err = spa_open(fsname, &spa, FTAG); - if (err) { - kmem_free(sn, sizeof (struct snaparg)); + err = spa_open(nvpair_name(pair), &spa, FTAG); + if (err) return (err); + saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + saa.saa_props = props; + saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + err = snapshot_one_impl(nvpair_name(pair), &saa); + if (err != 0) { + if (errors != NULL) { + fnvlist_add_int32(errors, + nvpair_name(pair), err); + } + rv = err; + } } - if (temporary) { - if (cleanup_fd < 0) { - spa_close(spa, FTAG); - return (EINVAL); - } - if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { - spa_close(spa, FTAG); - return (err); + /* + * If any call to snapshot_one_impl() failed, don't execute the + * sync task. The error handling code below will clean up the + * snaponearg_t from any successful calls to + * snapshot_one_impl(). + */ + if (rv == 0) + err = dsl_sync_task_group_wait(saa.saa_dstg); + if (err != 0) + rv = err; + + for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst; + dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) { + objset_t *os = dst->dst_arg1; + snaponearg_t *soa = dst->dst_arg2; + if (dst->dst_err != 0) { + if (errors != NULL) { + fnvlist_add_int32(errors, + soa->soa_longname, dst->dst_err); + } + rv = dst->dst_err; } + + if (saa.saa_needsuspend) + zil_resume(dmu_objset_zil(os)); + dmu_objset_rele(os, &saa); + kmem_free(soa, sizeof (*soa)); } - sn->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - sn->snapname = snapname; - sn->htag = tag; - sn->props = props; - sn->recursive = recursive; - sn->needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); - sn->temporary = temporary; - sn->ha = NULL; - sn->newds = NULL; - - if (recursive) { - err = dmu_objset_find(fsname, - dmu_objset_snapshot_one, sn, DS_FIND_CHILDREN); - } else { - err = dmu_objset_snapshot_one(fsname, sn); + dsl_sync_task_group_destroy(saa.saa_dstg); + spa_close(spa, FTAG); + return (rv); +} + +int +dmu_objset_snapshot_one(const char *fsname, const char *snapname) +{ + int err; + char *longsnap = kmem_asprintf("%s@%s", fsname, snapname); + nvlist_t *snaps = fnvlist_alloc(); + + fnvlist_add_boolean(snaps, longsnap); + err = dmu_objset_snapshot(snaps, NULL, NULL); + fnvlist_free(snaps); + strfree(longsnap); + return (err); +} + +int +dmu_objset_snapshot_tmp(const char *snapname, const char *tag, int cleanup_fd) +{ + dsl_sync_task_t *dst; + snapallarg_t saa = { 0 }; + spa_t *spa; + minor_t minor; + int err; + + err = spa_open(snapname, &spa, FTAG); + if (err) + return (err); + saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + saa.saa_htag = tag; + saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + saa.saa_temporary = B_TRUE; + + if (cleanup_fd < 0) { + spa_close(spa, FTAG); + return (EINVAL); + } + if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { + spa_close(spa, FTAG); + return (err); } + err = snapshot_one_impl(snapname, &saa); + if (err == 0) - err = dsl_sync_task_group_wait(sn->dstg); + err = dsl_sync_task_group_wait(saa.saa_dstg); - for (dst = list_head(&sn->dstg->dstg_tasks); dst; - dst = list_next(&sn->dstg->dstg_tasks, dst)) { + for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst; + dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) { objset_t *os = dst->dst_arg1; - dsl_dataset_t *ds = os->os_dsl_dataset; - if (dst->dst_err) { - dsl_dataset_name(ds, sn->failed); - } else if (temporary) { - dsl_register_onexit_hold_cleanup(sn->newds, tag, minor); - } - if (sn->needsuspend) + dsl_register_onexit_hold_cleanup(saa.saa_newds, tag, minor); + if (saa.saa_needsuspend) zil_resume(dmu_objset_zil(os)); - dmu_objset_rele(os, sn); + dmu_objset_rele(os, &saa); } - if (err) - (void) strcpy(fsname, sn->failed); - if (temporary) - zfs_onexit_fd_rele(cleanup_fd); - dsl_sync_task_group_destroy(sn->dstg); + zfs_onexit_fd_rele(cleanup_fd); + dsl_sync_task_group_destroy(saa.saa_dstg); spa_close(spa, FTAG); - kmem_free(sn, sizeof (struct snaparg)); return (err); } + static void dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) { @@ -1159,7 +1219,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff]; - while ((dr = list_head(list)) != NULL) { + while ((dr = list_head(list))) { ASSERT(dr->dr_dbuf->db_level == 0); list_remove(list, dr); if (dr->dr_zio) @@ -1219,7 +1279,7 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); - while ((dn = list_head(list)) != NULL) { + while ((dn = list_head(list))) { int flags; ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 54e75971d..6552e1d9d 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -416,9 +416,48 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, return (err); } +/* + * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. + * For example, they could both be snapshots of the same filesystem, and + * 'earlier' is before 'later'. Or 'earlier' could be the origin of + * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's + * filesystem. Or 'earlier' could be the origin's origin. + */ +static boolean_t +is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) +{ + dsl_pool_t *dp = later->ds_dir->dd_pool; + int error; + boolean_t ret; + dsl_dataset_t *origin; + + if (earlier->ds_phys->ds_creation_txg >= + later->ds_phys->ds_creation_txg) + return (B_FALSE); + + if (later->ds_dir == earlier->ds_dir) + return (B_TRUE); + if (!dsl_dir_is_clone(later->ds_dir)) + return (B_FALSE); + + rw_enter(&dp->dp_config_rwlock, RW_READER); + if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) { + rw_exit(&dp->dp_config_rwlock); + return (B_TRUE); + } + error = dsl_dataset_hold_obj(dp, + later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); + rw_exit(&dp->dp_config_rwlock); + if (error != 0) + return (B_FALSE); + ret = is_before(origin, earlier); + dsl_dataset_rele(origin, FTAG); + return (ret); +} + int -dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - int outfd, vnode_t *vp, offset_t *off) +dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, + offset_t *off) { dsl_dataset_t *ds = tosnap->os_dsl_dataset; dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; @@ -431,30 +470,13 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, if (ds->ds_phys->ds_next_snap_obj == 0) return (EINVAL); - /* fromsnap must be an earlier snapshot from the same fs as tosnap */ - if (fromds && (ds->ds_dir != fromds->ds_dir || - fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) + /* + * fromsnap must be an earlier snapshot from the same fs as tosnap, + * or the origin's fs. + */ + if (fromds != NULL && !is_before(ds, fromds)) return (EXDEV); - if (fromorigin) { - dsl_pool_t *dp = ds->ds_dir->dd_pool; - - if (fromsnap) - return (EINVAL); - - if (dsl_dir_is_clone(ds->ds_dir)) { - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); - rw_exit(&dp->dp_config_rwlock); - if (err) - return (err); - } else { - fromorigin = B_FALSE; - } - } - - drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); drr->drr_type = DRR_BEGIN; drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; @@ -479,7 +501,7 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, drr->drr_u.drr_begin.drr_creation_time = ds->ds_phys->ds_creation_time; drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; - if (fromorigin) + if (fromds != NULL && ds->ds_dir != fromds->ds_dir) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) @@ -491,8 +513,6 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, if (fromds) fromtxg = fromds->ds_phys->ds_creation_txg; - if (fromorigin) - dsl_dataset_rele(fromds, FTAG); dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); @@ -550,8 +570,7 @@ out: } int -dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - uint64_t *sizep) +dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep) { dsl_dataset_t *ds = tosnap->os_dsl_dataset; dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; @@ -563,27 +582,13 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, if (ds->ds_phys->ds_next_snap_obj == 0) return (EINVAL); - /* fromsnap must be an earlier snapshot from the same fs as tosnap */ - if (fromds && (ds->ds_dir != fromds->ds_dir || - fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) + /* + * fromsnap must be an earlier snapshot from the same fs as tosnap, + * or the origin's fs. + */ + if (fromds != NULL && !is_before(ds, fromds)) return (EXDEV); - if (fromorigin) { - if (fromsnap) - return (EINVAL); - - if (dsl_dir_is_clone(ds->ds_dir)) { - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); - rw_exit(&dp->dp_config_rwlock); - if (err) - return (err); - } else { - fromorigin = B_FALSE; - } - } - /* Get uncompressed size estimate of changed data. */ if (fromds == NULL) { size = ds->ds_phys->ds_uncompressed_bytes; @@ -591,8 +596,6 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, uint64_t used, comp; err = dsl_dataset_space_written(fromds, ds, &used, &comp, &size); - if (fromorigin) - dsl_dataset_rele(fromds, FTAG); if (err) return (err); } @@ -690,8 +693,7 @@ recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx) rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx); } - spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC, - dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj); + spa_history_log_internal_ds(rbsa->ds, "receive new", tx, ""); } /* ARGSUSED */ @@ -792,8 +794,7 @@ recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx) rbsa->ds = cds; - spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC, - dp->dp_spa, tx, "dataset = %lld", dsobj); + spa_history_log_internal_ds(cds, "receive over existing", tx, ""); } static boolean_t @@ -1604,6 +1605,7 @@ recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + spa_history_log_internal_ds(ds, "finished receiving", tx, ""); } static int diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index fd714135a..30867f9d7 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -64,7 +64,7 @@ dmu_tx_create_dd(dsl_dir_t *dd) { dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_PUSHPAGE); tx->tx_dir = dd; - if (dd) + if (dd != NULL) tx->tx_pool = dd->dd_pool; list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), offsetof(dmu_tx_hold_t, txh_node)); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index d9e8bd30f..2eca2b204 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -907,7 +907,8 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, * The snapshots must all be in the same pool. */ int -dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) +dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, + nvlist_t *errlist) { int err; dsl_sync_task_t *dst; @@ -942,7 +943,7 @@ dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) } else if (err == ENOENT) { err = 0; } else { - (void) strcpy(failed, nvpair_name(pair)); + fnvlist_add_int32(errlist, nvpair_name(pair), err); break; } } @@ -956,10 +957,12 @@ dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) dsl_dataset_t *ds = dsda->ds; /* - * Return the file system name that triggered the error + * Return the snapshots that triggered the error. */ - if (dst->dst_err) { - dsl_dataset_name(ds, failed); + if (dst->dst_err != 0) { + char name[ZFS_MAXNAMELEN]; + dsl_dataset_name(ds, name); + fnvlist_add_int32(errlist, name, dst->dst_err); } ASSERT3P(dsda->rm_origin, ==, NULL); dsl_dataset_disown(ds, dstg); @@ -1038,7 +1041,6 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_dir_t *dd; uint64_t obj; struct dsl_ds_destroyarg dsda = { 0 }; - dsl_dataset_t *dummy_ds; dsda.ds = ds; @@ -1058,9 +1060,6 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) } dd = ds->ds_dir; - dummy_ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); - dummy_ds->ds_dir = dd; - dummy_ds->ds_object = ds->ds_object; if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds), &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { @@ -1072,11 +1071,11 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_dataset_destroy_begin_check, dsl_dataset_destroy_begin_sync, ds, NULL, 0); if (err) - goto out_free; + goto out; err = dmu_objset_from_ds(ds, &os); if (err) - goto out_free; + goto out; /* * Remove all objects while in the open context so that @@ -1091,7 +1090,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) (void) dmu_free_object(os, obj); } if (err != ESRCH) - goto out_free; + goto out; /* * Sync out all in-flight IO. @@ -1118,7 +1117,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) rw_exit(&dd->dd_pool->dp_config_rwlock); if (err) - goto out_free; + goto out; /* * Blow away the dsl_dir + head dataset. @@ -1134,7 +1133,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) err = dsl_dataset_origin_rm_prep(&dsda, tag); if (err) { dsl_dir_close(dd, FTAG); - goto out_free; + goto out; } } @@ -1142,7 +1141,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_sync_task_create(dstg, dsl_dataset_destroy_check, dsl_dataset_destroy_sync, &dsda, tag, 0); dsl_sync_task_create(dstg, dsl_dir_destroy_check, - dsl_dir_destroy_sync, dummy_ds, FTAG, 0); + dsl_dir_destroy_sync, dd, FTAG, 0); err = dsl_sync_task_group_wait(dstg); dsl_sync_task_group_destroy(dstg); @@ -1166,8 +1165,6 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) if (err) dsl_dir_close(dd, FTAG); -out_free: - kmem_free(dummy_ds, sizeof (dsl_dataset_t)); out: dsl_dataset_disown(ds, tag); return (err); @@ -1333,14 +1330,12 @@ static void dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; - dsl_pool_t *dp = ds->ds_dir->dd_pool; /* Mark it as inconsistent on-disk, in case we crash */ dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; - spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, - "dataset = %llu", ds->ds_object); + spa_history_log_internal_ds(ds, "destroy begin", tx, ""); } static int @@ -1665,9 +1660,13 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; + spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); return; } + /* We need to log before removing it from the namespace. */ + spa_history_log_internal_ds(ds, "destroy", tx, ""); + /* signal any waiters that this dataset is going away */ mutex_enter(&ds->ds_lock); ds->ds_owner = dsl_reaper; @@ -1965,8 +1964,6 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) dsl_dataset_rele(ds_prev, FTAG); spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); - spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx, - "dataset = %llu", ds->ds_object); if (ds->ds_phys->ds_next_clones_obj != 0) { ASSERTV(uint64_t count); @@ -2014,7 +2011,7 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) return (ENOSPC); /* - * Propogate any reserved space for this snapshot to other + * Propagate any reserved space for this snapshot to other * snapshot checks in this sync group. */ if (asize > 0) @@ -2024,10 +2021,9 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) } int -dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - const char *snapname = arg2; int err; uint64_t value; @@ -2039,7 +2035,7 @@ dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) return (EAGAIN); /* - * Check for conflicting name snapshot name. + * Check for conflicting snapshot name. */ err = dsl_dataset_snap_lookup(ds, snapname, &value); if (err == 0) @@ -2063,10 +2059,9 @@ dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) } void -dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - const char *snapname = arg2; dsl_pool_t *dp = ds->ds_dir->dd_pool; dmu_buf_t *dbuf; dsl_dataset_phys_t *dsphys; @@ -2172,8 +2167,7 @@ dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_dir_snap_cmtime_update(ds->ds_dir); - spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx, - "dataset = %llu", dsobj); + spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); } void @@ -2259,7 +2253,20 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { uint64_t refd, avail, uobjs, aobjs, ratio; - dsl_dir_stats(ds->ds_dir, nv); + ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : + (ds->ds_phys->ds_uncompressed_bytes * 100 / + ds->ds_phys->ds_compressed_bytes); + + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); + + if (dsl_dataset_is_snapshot(ds)) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, + ds->ds_phys->ds_unique_bytes); + get_clones_stat(ds, nv); + } else { + dsl_dir_stats(ds->ds_dir, nv); + } dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); @@ -2305,22 +2312,6 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) } } - ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : - (ds->ds_phys->ds_uncompressed_bytes * 100 / - ds->ds_phys->ds_compressed_bytes); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); - - if (ds->ds_phys->ds_next_snap_obj) { - /* - * This is a snapshot; override the dd's space used with - * our unique space and compression ratio. - */ - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, - ds->ds_phys->ds_unique_bytes); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); - - get_clones_stat(ds, nv); - } } void @@ -2329,27 +2320,25 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; stat->dds_guid = ds->ds_phys->ds_guid; - if (ds->ds_phys->ds_next_snap_obj) { + stat->dds_origin[0] = '\0'; + if (dsl_dataset_is_snapshot(ds)) { stat->dds_is_snapshot = B_TRUE; stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; } else { stat->dds_is_snapshot = B_FALSE; stat->dds_num_clones = 0; - } - /* clone origin is really a dsl_dir thing... */ - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - if (dsl_dir_is_clone(ds->ds_dir)) { - dsl_dataset_t *ods; + rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); + if (dsl_dir_is_clone(ds->ds_dir)) { + dsl_dataset_t *ods; - VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); - dsl_dataset_name(ods, stat->dds_origin); - dsl_dataset_drop_ref(ods, FTAG); - } else { - stat->dds_origin[0] = '\0'; + VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); + dsl_dataset_name(ods, stat->dds_origin); + dsl_dataset_drop_ref(ods, FTAG); + } + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } uint64_t @@ -2466,8 +2455,8 @@ dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) ds->ds_snapname, 8, 1, &ds->ds_object, tx); ASSERT0(err); - spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, - "dataset = %llu", ds->ds_object); + spa_history_log_internal_ds(ds, "rename", tx, + "-> @%s", newsnapname); dsl_dataset_rele(hds, FTAG); } @@ -2946,8 +2935,7 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) origin_ds->ds_phys->ds_unique_bytes = pa->unique; /* log history record */ - spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, - "dataset = %llu", hds->ds_object); + spa_history_log_internal_ds(hds, "promote", tx, ""); dsl_dir_close(odd, FTAG); } @@ -3306,6 +3294,9 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) csa->ohds->ds_phys->ds_deadlist_obj); dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); + + spa_history_log_internal_ds(csa->cds, "clone swap", tx, + "parent=%s", csa->ohds->ds_dir->dd_myname); } /* @@ -3463,6 +3454,9 @@ dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) if (ds->ds_quota != effective_value) { dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_quota = effective_value; + + spa_history_log_internal_ds(ds, "set refquota", tx, + "refquota=%lld", (longlong_t)ds->ds_quota); } } @@ -3566,6 +3560,9 @@ dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); mutex_exit(&ds->ds_dir->dd_lock); + + spa_history_log_internal_ds(ds, "set refreservation", tx, + "refreservation=%lld", (longlong_t)effective_value); } int @@ -3631,7 +3628,7 @@ dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct dsl_ds_holdarg *ha = arg2; - char *htag = ha->htag; + const char *htag = ha->htag; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; int error = 0; @@ -3665,7 +3662,7 @@ dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct dsl_ds_holdarg *ha = arg2; - char *htag = ha->htag; + const char *htag = ha->htag; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t now = gethrestime_sec(); @@ -3693,9 +3690,9 @@ dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) htag, &now, tx)); } - spa_history_log_internal(LOG_DS_USER_HOLD, - dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag, - (int)ha->temphold, ds->ds_object); + spa_history_log_internal_ds(ds, "hold", tx, + "tag = %s temp = %d holds now = %llu", + htag, (int)ha->temphold, ds->ds_userrefs); } static int @@ -3902,7 +3899,6 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj; - uint64_t dsobj = ds->ds_object; uint64_t refs; int error; @@ -3914,11 +3910,6 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) VERIFY(error == 0 || error == ENOENT); zapobj = ds->ds_phys->ds_userrefs_obj; VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); - - spa_history_log_internal(LOG_DS_USER_RELEASE, - dp->dp_spa, tx, "<%s> %lld dataset = %llu", - ra->htag, (longlong_t)refs, dsobj); - if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && DS_IS_DEFER_DESTROY(ds)) { struct dsl_ds_destroyarg dsda = {0}; diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c index 294932c45..48c261e63 100644 --- a/module/zfs/dsl_deleg.c +++ b/module/zfs/dsl_deleg.c @@ -181,10 +181,8 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, jumpobj, perm, 8, 1, &n, tx) == 0); - spa_history_log_internal(LOG_DS_PERM_UPDATE, - dd->dd_pool->dp_spa, tx, - "%s %s dataset = %llu", whokey, perm, - dd->dd_phys->dd_head_dataset_obj); + spa_history_log_internal_dd(dd, "permission update", tx, + "%s %s", whokey, perm); } } } @@ -213,10 +211,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) (void) zap_remove(mos, zapobj, whokey, tx); VERIFY(0 == zap_destroy(mos, jumpobj, tx)); } - spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE, - dd->dd_pool->dp_spa, tx, - "%s dataset = %llu", whokey, - dd->dd_phys->dd_head_dataset_obj); + spa_history_log_internal_dd(dd, "permission who remove", + tx, "%s", whokey); continue; } @@ -234,10 +230,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(0 == zap_destroy(mos, jumpobj, tx)); } - spa_history_log_internal(LOG_DS_PERM_REMOVE, - dd->dd_pool->dp_spa, tx, - "%s %s dataset = %llu", whokey, perm, - dd->dd_phys->dd_head_dataset_obj); + spa_history_log_internal_dd(dd, "permission remove", tx, + "%s %s", whokey, perm); } } } @@ -533,12 +527,10 @@ dsl_load_user_sets(objset_t *mos, uint64_t zapobj, avl_tree_t *avl, } /* - * Check if user has requested permission. If descendent is set, must have - * descendent perms. + * Check if user has requested permission. */ int -dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm, - cred_t *cr) +dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr) { dsl_dir_t *dd; dsl_pool_t *dp; @@ -559,7 +551,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm, SPA_VERSION_DELEGATED_PERMS) return (EPERM); - if (dsl_dataset_is_snapshot(ds) || descendent) { + if (dsl_dataset_is_snapshot(ds)) { /* * Snapshots are treated as descendents only, * local permissions do not apply. @@ -652,7 +644,7 @@ dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr) if (error) return (error); - error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); + error = dsl_deleg_access_impl(ds, perm, cr); dsl_dataset_rele(ds, FTAG); return (error); diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 69f68c209..45c73c363 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/dmu.h> @@ -39,8 +40,8 @@ #include "zfs_namecheck.h" static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); -static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx); - +static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, + uint64_t value, dmu_tx_t *tx); /* ARGSUSED */ static void @@ -449,8 +450,7 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, int dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; + dsl_dir_t *dd = arg1; dsl_pool_t *dp = dd->dd_pool; objset_t *mos = dp->dp_meta_objset; int err; @@ -481,24 +481,19 @@ dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) void dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; + dsl_dir_t *dd = arg1; objset_t *mos = dd->dd_pool->dp_meta_objset; - dsl_prop_setarg_t psa; - uint64_t value = 0; uint64_t obj; dd_used_t t; ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)); ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); - /* Remove our reservation. */ - dsl_prop_setarg_init_uint64(&psa, "reservation", - (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), - &value); - psa.psa_effective_value = 0; /* predict default value */ - - dsl_dir_set_reservation_sync(ds, &psa, tx); + /* + * Remove our reservation. The impl() routine avoids setting the + * actual property, which would require the (already destroyed) ds. + */ + dsl_dir_set_reservation_sync_impl(dd, 0, tx); ASSERT0(dd->dd_phys->dd_used_bytes); ASSERT0(dd->dd_phys->dd_reserved); @@ -1062,6 +1057,9 @@ dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) mutex_enter(&dd->dd_lock); dd->dd_phys->dd_quota = effective_value; mutex_exit(&dd->dd_lock); + + spa_history_log_internal_dd(dd, "set quota", tx, + "quota=%lld", (longlong_t)effective_value); } int @@ -1148,25 +1146,17 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) } static void -dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value = psa->psa_effective_value; uint64_t used; int64_t delta; - dsl_prop_set_sync(ds, psa, tx); - DSL_PROP_CHECK_PREDICTION(dd, psa); - dmu_buf_will_dirty(dd->dd_dbuf, tx); mutex_enter(&dd->dd_lock); used = dd->dd_phys->dd_used_bytes; - delta = MAX(used, effective_value) - - MAX(used, dd->dd_phys->dd_reserved); - dd->dd_phys->dd_reserved = effective_value; + delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved); + dd->dd_phys->dd_reserved = value; if (dd->dd_parent != NULL) { /* Roll up this additional usage into our ancestors */ @@ -1176,6 +1166,23 @@ dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) mutex_exit(&dd->dd_lock); } +static void +dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_setarg_t *psa = arg2; + uint64_t value = psa->psa_effective_value; + + dsl_prop_set_sync(ds, psa, tx); + DSL_PROP_CHECK_PREDICTION(dd, psa); + + dsl_dir_set_reservation_sync_impl(dd, value, tx); + + spa_history_log_internal_dd(dd, "set reservation", tx, + "reservation=%lld", (longlong_t)value); + } + int dsl_dir_set_reservation(const char *ddname, zprop_source_t source, uint64_t reservation) @@ -1294,9 +1301,15 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_pool_t *dp = dd->dd_pool; objset_t *mos = dp->dp_meta_objset; int err; + char namebuf[MAXNAMELEN]; ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2); + /* Log this before we change the name. */ + dsl_dir_name(ra->newparent, namebuf); + spa_history_log_internal_dd(dd, "rename", tx, + "-> %s/%s", namebuf, ra->mynewname); + if (ra->newparent != dd->dd_parent) { dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, -dd->dd_phys->dd_used_bytes, @@ -1336,8 +1349,6 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) dd->dd_myname, 8, 1, &dd->dd_object, tx); ASSERT0(err); - spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, - tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj); } int diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index e44a94853..153420ccf 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -704,11 +705,9 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) } } - spa_history_log_internal((source == ZPROP_SRC_NONE || - source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT : - LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx, - "%s=%s dataset = %llu", propname, - (valstr == NULL ? "" : valstr), ds->ds_object); + spa_history_log_internal_ds(ds, (source == ZPROP_SRC_NONE || + source == ZPROP_SRC_INHERITED) ? "inherit" : "set", tx, + "%s=%s", propname, (valstr == NULL ? "" : valstr)); if (tbuf != NULL) kmem_free(tbuf, ZAP_MAXVALUELEN); @@ -757,24 +756,6 @@ dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) } } -void -dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val, - dmu_tx_t *tx) -{ - objset_t *mos = dd->dd_pool->dp_meta_objset; - uint64_t zapobj = dd->dd_phys->dd_props_zapobj; - - ASSERT(dmu_tx_is_syncing(tx)); - - VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx)); - - dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE); - - spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx, - "%s=%llu dataset = %llu", name, (u_longlong_t)val, - dd->dd_phys->dd_head_dataset_obj); -} - int dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source, int intsz, int numints, const void *buf) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 34a4f0382..90ca7b256 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -224,7 +224,7 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_scan_sync_state(scn, tx); - spa_history_log_internal(LOG_POOL_SCAN, spa, tx, + spa_history_log_internal(spa, "scan setup", tx, "func=%u mintxg=%llu maxtxg=%llu", *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg); } @@ -273,7 +273,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) else scn->scn_phys.scn_state = DSS_CANCELED; - spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx, + spa_history_log_internal(spa, "scan done", tx, "complete=%u", complete); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c index 75eb507b9..2ed47fe0c 100644 --- a/module/zfs/dsl_synctask.c +++ b/module/zfs/dsl_synctask.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/dmu.h> @@ -85,17 +86,17 @@ top: /* Do a preliminary error check. */ dstg->dstg_err = 0; +#ifdef ZFS_DEBUG + /* + * Only check half the time, otherwise, the sync-context + * check will almost never fail. + */ + if (spa_get_random(2) == 0) + goto skip; +#endif rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER); for (dst = list_head(&dstg->dstg_tasks); dst; dst = list_next(&dstg->dstg_tasks, dst)) { -#ifdef ZFS_DEBUG - /* - * Only check half the time, otherwise, the sync-context - * check will almost never fail. - */ - if (spa_get_random(2) == 0) - continue; -#endif dst->dst_err = dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); if (dst->dst_err) @@ -107,6 +108,9 @@ top: dmu_tx_commit(tx); return (dstg->dstg_err); } +#ifdef ZFS_DEBUG +skip: +#endif /* * We don't generally have many sync tasks, so pay the price of @@ -230,9 +234,6 @@ dsl_sync_task_do_nowait(dsl_pool_t *dp, { dsl_sync_task_group_t *dstg; - if (!spa_writeable(dp->dp_spa)) - return; - dstg = dsl_sync_task_group_create(dp); dsl_sync_task_create(dstg, checkfunc, syncfunc, arg1, arg2, blocks_modified); diff --git a/module/zfs/rrwlock.c b/module/zfs/rrwlock.c index 4cef53f95..7f9290bd4 100644 --- a/module/zfs/rrwlock.c +++ b/module/zfs/rrwlock.c @@ -22,6 +22,9 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ #include <sys/refcount.h> #include <sys/rrwlock.h> @@ -262,3 +265,13 @@ rrw_held(rrwlock_t *rrl, krw_t rw) return (held); } + +void +rrw_tsd_destroy(void *arg) +{ + rrw_node_t *rn = arg; + if (rn != NULL) { + panic("thread %p terminating with rrw lock %p held", + (void *)curthread, (void *)rn->rn_rrl); + } +} diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 65f78b784..7c37ca426 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -731,8 +731,8 @@ spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx) vdev_config_dirty(rvd); spa_config_exit(spa, SCL_STATE, FTAG); - spa_history_log_internal(LOG_POOL_GUID_CHANGE, spa, tx, - "old=%lld new=%lld", oldguid, *newguid); + spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu", + oldguid, *newguid); } /* @@ -2716,6 +2716,12 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, spa_async_request(spa, SPA_ASYNC_RESILVER); /* + * Log the fact that we booted up (so that we can detect if + * we rebooted in the middle of an operation). + */ + spa_history_log_version(spa, "open"); + + /* * Delete any inconsistent datasets. */ (void) dmu_objset_find(spa_name(spa), @@ -3399,7 +3405,7 @@ spa_l2cache_drop(spa_t *spa) */ int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, - const char *history_str, nvlist_t *zplprops) + nvlist_t *zplprops) { spa_t *spa; char *altroot = NULL; @@ -3620,9 +3626,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa_config_sync(spa, B_FALSE, B_TRUE); - if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) - (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); - spa_history_log_version(spa, LOG_POOL_CREATE); + spa_history_log_version(spa, "create"); spa->spa_minref = refcount_count(&spa->spa_refcount); @@ -3824,7 +3828,6 @@ spa_import_rootpool(char *devpath, char *devid) } error = 0; - spa_history_log_version(spa, LOG_POOL_IMPORT); out: spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_free(rvd); @@ -3886,7 +3889,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_config_sync(spa, B_FALSE, B_TRUE); mutex_exit(&spa_namespace_lock); - spa_history_log_version(spa, LOG_POOL_IMPORT); + spa_history_log_version(spa, "import"); return (0); } @@ -4017,7 +4020,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); mutex_exit(&spa_namespace_lock); - spa_history_log_version(spa, LOG_POOL_IMPORT); + spa_history_log_version(spa, "import"); #ifdef _KERNEL zvol_create_minors(pool); @@ -4560,7 +4563,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) */ (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); - spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL, + spa_history_log_internal(spa, "vdev attach", NULL, "%s vdev=%s %s vdev=%s", replacing && newvd_isspare ? "spare in" : replacing ? "replace" : "attach", newvdpath, @@ -4778,7 +4781,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) error = spa_vdev_exit(spa, vd, txg, 0); - spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL, + spa_history_log_internal(spa, "detach", NULL, "vdev=%s", vdpath); spa_strfree(vdpath); @@ -5047,9 +5050,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, if (vml[c] != NULL) { vdev_split(vml[c]); if (error == 0) - spa_history_log_internal(LOG_POOL_VDEV_DETACH, - spa, tx, "vdev=%s", - vml[c]->vdev_path); + spa_history_log_internal(spa, "detach", tx, + "vdev=%s", vml[c]->vdev_path); vdev_free(vml[c]); } } @@ -5064,8 +5066,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, zio_handle_panic_injection(spa, FTAG, 3); /* split is complete; log a history record */ - spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL, - "split new pool %s from pool %s", newname, spa_name(spa)); + spa_history_log_internal(newspa, "split", NULL, + "from pool %s", spa_name(spa)); kmem_free(vml, children * sizeof (vdev_t *)); @@ -5647,8 +5649,7 @@ spa_async_thread(spa_t *spa) * then log an internal history event. */ if (new_space != old_space) { - spa_history_log_internal(LOG_POOL_VDEV_ONLINE, - spa, NULL, + spa_history_log_internal(spa, "vdev online", NULL, "pool '%s' size: %llu(+%llu)", spa_name(spa), new_space, new_space - old_space); } @@ -5892,6 +5893,7 @@ spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx) spa->spa_uberblock.ub_version = version; vdev_config_dirty(spa->spa_root_vdev); + spa_history_log_internal(spa, "set", tx, "version=%lld", version); } /* @@ -5927,6 +5929,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature)); spa_feature_enable(spa, feature, tx); + spa_history_log_internal(spa, "set", tx, + "%s=enabled", nvpair_name(elem)); break; case ZPOOL_PROP_VERSION: @@ -5966,6 +5970,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) */ if (tx->tx_txg != TXG_INITIAL) vdev_config_dirty(spa->spa_root_vdev); + spa_history_log_internal(spa, "set", tx, + "%s=%s", nvpair_name(elem), strval); break; default: /* @@ -5988,7 +5994,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, spa->spa_pool_props_object, propname, 1, strlen(strval) + 1, strval, tx) == 0); - + spa_history_log_internal(spa, "set", tx, + "%s=%s", nvpair_name(elem), strval); } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { VERIFY(nvpair_value_uint64(elem, &intval) == 0); @@ -6000,6 +6007,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, spa->spa_pool_props_object, propname, 8, 1, &intval, tx) == 0); + spa_history_log_internal(spa, "set", tx, + "%s=%lld", nvpair_name(elem), intval); } else { ASSERT(0); /* not allowed */ } @@ -6028,13 +6037,6 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) } } - /* log internal history if this is not a zpool create */ - if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && - tx->tx_txg != TXG_INITIAL) { - spa_history_log_internal(LOG_POOL_PROPSET, - spa, tx, "%s %lld %s", - nvpair_name(elem), intval, spa_name(spa)); - } } mutex_exit(&spa->spa_props_lock); diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index 9fb75f391..79d48620c 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/spa.h> @@ -30,9 +30,12 @@ #include <sys/dsl_synctask.h> #include <sys/dmu_tx.h> #include <sys/dmu_objset.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> #include <sys/utsname.h> #include <sys/cmn_err.h> #include <sys/sunddi.h> +#include <sys/cred.h> #include "zfs_comutil.h" #ifdef _KERNEL #include <sys/zone.h> @@ -185,7 +188,7 @@ spa_history_zone(void) return (curproc->p_zone->zone_name); #endif #else - return ("global"); + return (NULL); #endif } @@ -197,14 +200,12 @@ static void spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) { spa_t *spa = arg1; - history_arg_t *hap = arg2; - const char *history_str = hap->ha_history_str; + nvlist_t *nvl = arg2; objset_t *mos = spa->spa_meta_objset; dmu_buf_t *dbp; spa_history_phys_t *shpp; size_t reclen; uint64_t le_len; - nvlist_t *nvrecord; char *record_packed = NULL; int ret; @@ -234,46 +235,36 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) } #endif - VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME, - gethrestime_sec()) == 0); - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0); - if (hap->ha_zone != NULL) - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE, - hap->ha_zone) == 0); + fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec()); #ifdef _KERNEL - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST, - utsname.nodename) == 0); + fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename); #endif - if (hap->ha_log_type == LOG_CMD_POOL_CREATE || - hap->ha_log_type == LOG_CMD_NORMAL) { - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD, - history_str) == 0); - - zfs_dbgmsg("command: %s", history_str); - } else { - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT, - hap->ha_event) == 0); - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG, - tx->tx_txg) == 0); - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR, - history_str) == 0); - - zfs_dbgmsg("internal %s pool:%s txg:%llu %s", - zfs_history_event_names[hap->ha_event], spa_name(spa), - (longlong_t)tx->tx_txg, history_str); - + if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) { + zfs_dbgmsg("command: %s", + fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD)); + } else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) { + if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) { + zfs_dbgmsg("txg %lld %s %s (id %llu) %s", + fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), + fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME), + fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); + } else { + zfs_dbgmsg("txg %lld %s %s", + fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); + } + } else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) { + zfs_dbgmsg("ioctl %s", + fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL)); } - VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0); - record_packed = kmem_alloc(reclen, KM_PUSHPAGE); - - VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen, - NV_ENCODE_XDR, KM_PUSHPAGE) == 0); + VERIFY3U(nvlist_pack(nvl, &record_packed, &reclen, NV_ENCODE_NATIVE, + KM_PUSHPAGE), ==, 0); mutex_enter(&spa->spa_history_lock); - if (hap->ha_log_type == LOG_CMD_POOL_CREATE) - VERIFY(shpp->sh_eof == shpp->sh_pool_create_len); /* write out the packed length as little endian */ le_len = LE_64((uint64_t)reclen); @@ -281,33 +272,44 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) if (!ret) ret = spa_history_write(spa, record_packed, reclen, shpp, tx); - if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) { - shpp->sh_pool_create_len += sizeof (le_len) + reclen; - shpp->sh_bof = shpp->sh_pool_create_len; + /* The first command is the create, which we keep forever */ + if (ret == 0 && shpp->sh_pool_create_len == 0 && + nvlist_exists(nvl, ZPOOL_HIST_CMD)) { + shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof; } mutex_exit(&spa->spa_history_lock); - nvlist_free(nvrecord); - kmem_free(record_packed, reclen); + fnvlist_pack_free(record_packed, reclen); dmu_buf_rele(dbp, FTAG); - - strfree(hap->ha_history_str); - if (hap->ha_zone != NULL) - strfree(hap->ha_zone); - kmem_free(hap, sizeof (history_arg_t)); + fnvlist_free(nvl); } /* * Write out a history event. */ int -spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) +spa_history_log(spa_t *spa, const char *msg) +{ + int err; + nvlist_t *nvl; + + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + + fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg); + err = spa_history_log_nvl(spa, nvl); + fnvlist_free(nvl); + return (err); +} + +int +spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) { - history_arg_t *ha; int err = 0; dmu_tx_t *tx; + nvlist_t *nvarg; - ASSERT(what != LOG_INTERNAL); + if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) + return (EINVAL); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); @@ -316,19 +318,21 @@ spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) return (err); } - ha = kmem_alloc(sizeof (history_arg_t), KM_PUSHPAGE); - ha->ha_history_str = strdup(history_str); - ha->ha_zone = strdup(spa_history_zone()); - ha->ha_log_type = what; - ha->ha_uid = crgetuid(CRED()); + VERIFY0(nvlist_dup(nvl, &nvarg, KM_PUSHPAGE)); + if (spa_history_zone() != NULL) { + fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, + spa_history_zone()); + } + fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); /* Kick this off asynchronously; errors are ignored. */ dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, - spa_history_log_sync, spa, ha, 0, tx); + spa_history_log_sync, spa, nvarg, 0, tx); dmu_tx_commit(tx); - /* spa_history_log_sync will free ha and strings */ + /* spa_history_log_sync will free nvl */ return (err); + } /* @@ -345,7 +349,7 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) int err; /* - * If the command history doesn't exist (older pool), + * If the command history doesn't exist (older pool), * that's ok, just return ENOENT. */ if (!spa->spa_history) @@ -428,44 +432,54 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) return (err); } +/* + * The nvlist will be consumed by this call. + */ static void -log_internal(history_internal_events_t event, spa_t *spa, +log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, dmu_tx_t *tx, const char *fmt, va_list adx) { - history_arg_t *ha; - va_list adx_copy; + char *msg; + va_list adx1; /* * If this is part of creating a pool, not everything is * initialized yet, so don't bother logging the internal events. + * Likewise if the pool is not writeable. */ - if (tx->tx_txg == TXG_INITIAL) + if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) { + fnvlist_free(nvl); return; + } - ha = kmem_alloc(sizeof (history_arg_t), KM_PUSHPAGE); - va_copy(adx_copy, adx); - ha->ha_history_str = kmem_vasprintf(fmt, adx_copy); - va_end(adx_copy); - ha->ha_log_type = LOG_INTERNAL; - ha->ha_event = event; - ha->ha_zone = NULL; - ha->ha_uid = 0; + va_copy(adx1, adx); + msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx1) + 1, KM_PUSHPAGE); + va_end(adx1); + va_copy(adx1, adx); + (void) vsprintf(msg, fmt, adx1); + va_end(adx1); + fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg); + strfree(msg); + + fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation); + fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg); if (dmu_tx_is_syncing(tx)) { - spa_history_log_sync(spa, ha, tx); + spa_history_log_sync(spa, nvl, tx); } else { dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, - spa_history_log_sync, spa, ha, 0, tx); + spa_history_log_sync, spa, nvl, 0, tx); } - /* spa_history_log_sync() will free ha and strings */ + /* spa_history_log_sync() will free nvl */ } void -spa_history_log_internal(history_internal_events_t event, spa_t *spa, +spa_history_log_internal(spa_t *spa, const char *operation, dmu_tx_t *tx, const char *fmt, ...) { dmu_tx_t *htx = tx; va_list adx; + nvlist_t *nvl; /* create a tx if we didn't get one */ if (tx == NULL) { @@ -477,7 +491,8 @@ spa_history_log_internal(history_internal_events_t event, spa_t *spa, } va_start(adx, fmt); - log_internal(event, spa, htx, fmt, adx); + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + log_internal(nvl, operation, spa, htx, fmt, adx); va_end(adx); /* if we didn't get a tx from the caller, commit the one we made */ @@ -486,21 +501,58 @@ spa_history_log_internal(history_internal_events_t event, spa_t *spa, } void -spa_history_log_version(spa_t *spa, history_internal_events_t event) +spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation, + dmu_tx_t *tx, const char *fmt, ...) +{ + va_list adx; + char namebuf[MAXNAMELEN]; + nvlist_t *nvl; + + ASSERT(tx != NULL); + + dsl_dataset_name(ds, namebuf); + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); + fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object); + + va_start(adx, fmt); + log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx); + va_end(adx); +} + +void +spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, + dmu_tx_t *tx, const char *fmt, ...) +{ + va_list adx; + char namebuf[MAXNAMELEN]; + nvlist_t *nvl; + + ASSERT(tx != NULL); + + dsl_dir_name(dd, namebuf); + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); + fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, + dd->dd_phys->dd_head_dataset_obj); + + va_start(adx, fmt); + log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx); + va_end(adx); +} + +void +spa_history_log_version(spa_t *spa, const char *operation) { #ifdef _KERNEL uint64_t current_vers = spa_version(spa); - if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) { - spa_history_log_internal(event, spa, NULL, - "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s", - (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, - utsname.nodename, utsname.release, utsname.version, - utsname.machine); - } - cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", - event == LOG_POOL_IMPORT ? "imported" : - event == LOG_POOL_CREATE ? "created" : "accessed", + spa_history_log_internal(spa, operation, NULL, + "pool version %llu; software version %llu/%d; uts %s %s %s %s", + (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, + utsname.nodename, utsname.release, utsname.version, + utsname.machine); + cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation, (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); #endif } diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index b35f27d19..a03e1c694 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -632,8 +632,7 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, goto out; if (error == 0) { - error = dmu_objset_snapshot(dsname, dirname, - NULL, NULL, B_FALSE, B_FALSE, -1); + error = dmu_objset_snapshot_one(dsname, dirname); if (error) goto out; diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index a9184a1b6..e64d6a1f0 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -28,6 +28,108 @@ * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * ZFS ioctls. + * + * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage + * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool. + * + * There are two ways that we handle ioctls: the legacy way where almost + * all of the logic is in the ioctl callback, and the new way where most + * of the marshalling is handled in the common entry point, zfsdev_ioctl(). + * + * Non-legacy ioctls should be registered by calling + * zfs_ioctl_register() from zfs_ioctl_init(). The ioctl is invoked + * from userland by lzc_ioctl(). + * + * The registration arguments are as follows: + * + * const char *name + * The name of the ioctl. This is used for history logging. If the + * ioctl returns successfully (the callback returns 0), and allow_log + * is true, then a history log entry will be recorded with the input & + * output nvlists. The log entry can be printed with "zpool history -i". + * + * zfs_ioc_t ioc + * The ioctl request number, which userland will pass to ioctl(2). + * The ioctl numbers can change from release to release, because + * the caller (libzfs) must be matched to the kernel. + * + * zfs_secpolicy_func_t *secpolicy + * This function will be called before the zfs_ioc_func_t, to + * determine if this operation is permitted. It should return EPERM + * on failure, and 0 on success. Checks include determining if the + * dataset is visible in this zone, and if the user has either all + * zfs privileges in the zone (SYS_MOUNT), or has been granted permission + * to do this operation on this dataset with "zfs allow". + * + * zfs_ioc_namecheck_t namecheck + * This specifies what to expect in the zfs_cmd_t:zc_name -- a pool + * name, a dataset name, or nothing. If the name is not well-formed, + * the ioctl will fail and the callback will not be called. + * Therefore, the callback can assume that the name is well-formed + * (e.g. is null-terminated, doesn't have more than one '@' character, + * doesn't have invalid characters). + * + * zfs_ioc_poolcheck_t pool_check + * This specifies requirements on the pool state. If the pool does + * not meet them (is suspended or is readonly), the ioctl will fail + * and the callback will not be called. If any checks are specified + * (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME. + * Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED | + * POOL_CHECK_READONLY). + * + * boolean_t smush_outnvlist + * If smush_outnvlist is true, then the output is presumed to be a + * list of errors, and it will be "smushed" down to fit into the + * caller's buffer, by removing some entries and replacing them with a + * single "N_MORE_ERRORS" entry indicating how many were removed. See + * nvlist_smush() for details. If smush_outnvlist is false, and the + * outnvlist does not fit into the userland-provided buffer, then the + * ioctl will fail with ENOMEM. + * + * zfs_ioc_func_t *func + * The callback function that will perform the operation. + * + * The callback should return 0 on success, or an error number on + * failure. If the function fails, the userland ioctl will return -1, + * and errno will be set to the callback's return value. The callback + * will be called with the following arguments: + * + * const char *name + * The name of the pool or dataset to operate on, from + * zfs_cmd_t:zc_name. The 'namecheck' argument specifies the + * expected type (pool, dataset, or none). + * + * nvlist_t *innvl + * The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src. Or + * NULL if no input nvlist was provided. Changes to this nvlist are + * ignored. If the input nvlist could not be deserialized, the + * ioctl will fail and the callback will not be called. + * + * nvlist_t *outnvl + * The output nvlist, initially empty. The callback can fill it in, + * and it will be returned to userland by serializing it into + * zfs_cmd_t:zc_nvlist_dst. If it is non-empty, and serialization + * fails (e.g. because the caller didn't supply a large enough + * buffer), then the overall ioctl will fail. See the + * 'smush_nvlist' argument above for additional behaviors. + * + * There are two typical uses of the output nvlist: + * - To return state, e.g. property values. In this case, + * smush_outnvlist should be false. If the buffer was not large + * enough, the caller will reallocate a larger buffer and try + * the ioctl again. + * + * - To return multiple errors from an ioctl which makes on-disk + * changes. In this case, smush_outnvlist should be true. + * Ioctls which make on-disk modifications should generally not + * use the outnvl if they succeed, because the caller can not + * distinguish between the operation failing, and + * deserialization failing. */ #include <sys/types.h> @@ -91,8 +193,13 @@ list_t zfsdev_state_list; extern void zfs_init(void); extern void zfs_fini(void); -typedef int zfs_ioc_func_t(zfs_cmd_t *); -typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *); +uint_t zfs_fsyncer_key; +extern uint_t rrw_tsd_key; +static uint_t zfs_allow_log_key; + +typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *); +typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *); +typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *); typedef enum { NO_NAME, @@ -103,15 +210,18 @@ typedef enum { typedef enum { POOL_CHECK_NONE = 1 << 0, POOL_CHECK_SUSPENDED = 1 << 1, - POOL_CHECK_READONLY = 1 << 2 + POOL_CHECK_READONLY = 1 << 2, } zfs_ioc_poolcheck_t; typedef struct zfs_ioc_vec { + zfs_ioc_legacy_func_t *zvec_legacy_func; zfs_ioc_func_t *zvec_func; zfs_secpolicy_func_t *zvec_secpolicy; zfs_ioc_namecheck_t zvec_namecheck; - boolean_t zvec_his_log; + boolean_t zvec_allow_log; zfs_ioc_poolcheck_t zvec_pool_check; + boolean_t zvec_smush_outnvlist; + const char *zvec_name; } zfs_ioc_vec_t; /* This array is indexed by zfs_userquota_prop_t */ @@ -129,7 +239,8 @@ static int zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errors); static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *, boolean_t *); -int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **); +int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *); +static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp); static int zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature); static int zfs_prop_activate_feature_check(void *arg1, void *arg2, @@ -237,7 +348,7 @@ zfs_log_history(zfs_cmd_t *zc) if (spa_open(zc->zc_name, &spa, FTAG) == 0) { if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY) - (void) spa_history_log(spa, buf, LOG_CMD_NORMAL); + (void) spa_history_log(spa, buf); spa_close(spa, FTAG); } history_str_free(buf); @@ -249,7 +360,7 @@ zfs_log_history(zfs_cmd_t *zc) */ /* ARGSUSED */ static int -zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (0); } @@ -260,7 +371,7 @@ zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { if (INGLOBALZONE(curproc) || zone_dataset_visible(zc->zc_name, NULL)) @@ -329,26 +440,13 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr) return (zfs_dozonecheck_impl(dataset, zoned, cr)); } -/* - * If name ends in a '@', then require recursive permissions. - */ -int +static int zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) { int error; - boolean_t descendent = B_FALSE; dsl_dataset_t *ds; - char *at; - - at = strchr(name, '@'); - if (at != NULL && at[1] == '\0') { - *at = '\0'; - descendent = B_TRUE; - } error = dsl_dataset_hold(name, FTAG, &ds); - if (at != NULL) - *at = '@'; if (error != 0) return (error); @@ -356,14 +454,14 @@ zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) if (error == 0) { error = secpolicy_zfs(cr); if (error) - error = dsl_deleg_access_impl(ds, descendent, perm, cr); + error = dsl_deleg_access_impl(ds, perm, cr); } dsl_dataset_rele(ds, FTAG); return (error); } -int +static int zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, const char *perm, cred_t *cr) { @@ -373,7 +471,7 @@ zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, if (error == 0) { error = secpolicy_zfs(cr); if (error) - error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); + error = dsl_deleg_access_impl(ds, perm, cr); } return (error); } @@ -533,8 +631,9 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval, return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr)); } -int -zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { int error; @@ -549,15 +648,17 @@ zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) return (0); } -int -zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_ROLLBACK, cr)); } -int -zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { spa_t *spa; dsl_pool_t *dp; @@ -593,9 +694,18 @@ zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) return (error); } +/* ARGSUSED */ +static int +zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_SEND, cr)); +} + #ifdef HAVE_SMB_SHARE +/* ARGSUSED */ static int -zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { vnode_t *vp; int error; @@ -620,7 +730,7 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) #endif /* HAVE_SMB_SHARE */ int -zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { #ifdef HAVE_SMB_SHARE if (!INGLOBALZONE(curproc)) @@ -629,7 +739,7 @@ zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) if (secpolicy_nfs(cr) == 0) { return (0); } else { - return (zfs_secpolicy_deleg_share(zc, cr)); + return (zfs_secpolicy_deleg_share(zc, innvl, cr)); } #else return (ENOTSUP); @@ -637,7 +747,7 @@ zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) } int -zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { #ifdef HAVE_SMB_SHARE if (!INGLOBALZONE(curproc)) @@ -646,7 +756,7 @@ zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) if (secpolicy_smb(cr) == 0) { return (0); } else { - return (zfs_secpolicy_deleg_share(zc, cr)); + return (zfs_secpolicy_deleg_share(zc, innvl, cr)); } #else return (ENOTSUP); @@ -687,29 +797,55 @@ zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_destroy_perms(zc->zc_name, cr)); } /* * Destroying snapshots with delegated permissions requires - * descendent mount and destroy permissions. + * descendant mount and destroy permissions. */ +/* ARGSUSED */ static int -zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - int error; - char *dsname; + nvlist_t *snaps; + nvpair_t *pair, *nextpair; + int error = 0; - dsname = kmem_asprintf("%s@", zc->zc_name); + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (EINVAL); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nextpair) { + dsl_dataset_t *ds; - error = zfs_secpolicy_destroy_perms(dsname, cr); - if (error == ENOENT) - error = zfs_secpolicy_destroy_perms(zc->zc_name, cr); + nextpair = nvlist_next_nvpair(snaps, pair); + error = dsl_dataset_hold(nvpair_name(pair), FTAG, &ds); + if (error == 0) { + dsl_dataset_rele(ds, FTAG); + } else if (error == ENOENT) { + /* + * Ignore any snapshots that don't exist (we consider + * them "already destroyed"). Remove the name from the + * nvl here in case the snapshot is created between + * now and when we try to destroy it (in which case + * we don't want to destroy it since we haven't + * checked for permission). + */ + fnvlist_remove_nvpair(snaps, pair); + error = 0; + continue; + } else { + break; + } + error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr); + if (error != 0) + break; + } - strfree(dsname); return (error); } @@ -742,14 +878,16 @@ zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) return (error); } +/* ARGSUSED */ static int -zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { char parentname[MAXNAMELEN]; objset_t *clone; @@ -789,8 +927,9 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) return (error); } +/* ARGSUSED */ static int -zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { int error; @@ -813,37 +952,72 @@ zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) ZFS_DELEG_PERM_SNAPSHOT, cr)); } +/* + * Check for permission to create each snapshot in the nvlist. + */ +/* ARGSUSED */ static int -zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { + nvlist_t *snaps; + int error = 0; + nvpair_t *pair; + + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (EINVAL); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + char *name = nvpair_name(pair); + char *atp = strchr(name, '@'); - return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr)); + if (atp == NULL) { + error = EINVAL; + break; + } + *atp = '\0'; + error = zfs_secpolicy_snapshot_perms(name, cr); + *atp = '@'; + if (error != 0) + break; + } + return (error); +} + +/* ARGSUSED */ +static int +zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + /* + * Even root must have a proper TSD so that we know what pool + * to log to. + */ + if (tsd_get(zfs_allow_log_key) == NULL) + return (EPERM); + return (0); } static int -zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { char parentname[MAXNAMELEN]; int error; + char *origin; if ((error = zfs_get_parent(zc->zc_name, parentname, sizeof (parentname))) != 0) return (error); - if (zc->zc_value[0] != '\0') { - if ((error = zfs_secpolicy_write_perms(zc->zc_value, - ZFS_DELEG_PERM_CLONE, cr)) != 0) - return (error); - } + if (nvlist_lookup_string(innvl, "origin", &origin) == 0 && + (error = zfs_secpolicy_write_perms(origin, + ZFS_DELEG_PERM_CLONE, cr)) != 0) + return (error); if ((error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_CREATE, cr)) != 0) return (error); - error = zfs_secpolicy_write_perms(parentname, - ZFS_DELEG_PERM_MOUNT, cr); - - return (error); + return (zfs_secpolicy_write_perms(parentname, + ZFS_DELEG_PERM_MOUNT, cr)); } /* @@ -852,7 +1026,7 @@ zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { if (secpolicy_sys_config(cr, B_FALSE) != 0) return (EPERM); @@ -865,7 +1039,7 @@ zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { int error; @@ -881,13 +1055,14 @@ zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (secpolicy_zinject(cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { zfs_prop_t prop = zfs_name_to_prop(zc->zc_value); @@ -903,9 +1078,9 @@ zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) } static int -zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - int err = zfs_secpolicy_read(zc, cr); + int err = zfs_secpolicy_read(zc, innvl, cr); if (err) return (err); @@ -932,9 +1107,9 @@ zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) } static int -zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - int err = zfs_secpolicy_read(zc, cr); + int err = zfs_secpolicy_read(zc, innvl, cr); if (err) return (err); @@ -945,22 +1120,25 @@ zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) userquota_perms[zc->zc_objset_type], cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, NULL, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_HOLD, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_RELEASE, cr)); @@ -970,7 +1148,7 @@ zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr) * Policy for allowing temporary snapshots to be taken or released */ static int -zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { /* * A temporary snapshot is the same as a snapshot, @@ -983,13 +1161,13 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) ZFS_DELEG_PERM_DIFF, cr)) == 0) return (0); - error = zfs_secpolicy_snapshot(zc, cr); + error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr); if (!error) - error = zfs_secpolicy_hold(zc, cr); + error = zfs_secpolicy_hold(zc, innvl, cr); if (!error) - error = zfs_secpolicy_release(zc, cr); + error = zfs_secpolicy_release(zc, innvl, cr); if (!error) - error = zfs_secpolicy_destroy(zc, cr); + error = zfs_secpolicy_destroy(zc, innvl, cr); return (error); } @@ -1028,36 +1206,40 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp) return (0); } +/* + * Reduce the size of this nvlist until it can be serialized in 'max' bytes. + * Entries will be removed from the end of the nvlist, and one int32 entry + * named "N_MORE_ERRORS" will be added indicating how many entries were + * removed. + */ static int -fit_error_list(zfs_cmd_t *zc, nvlist_t **errors) +nvlist_smush(nvlist_t *errors, size_t max) { size_t size; - VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); + size = fnvlist_size(errors); - if (size > zc->zc_nvlist_dst_size) { + if (size > max) { nvpair_t *more_errors; int n = 0; - if (zc->zc_nvlist_dst_size < 1024) + if (max < 1024) return (ENOMEM); - VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0); - more_errors = nvlist_prev_nvpair(*errors, NULL); + fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0); + more_errors = nvlist_prev_nvpair(errors, NULL); do { - nvpair_t *pair = nvlist_prev_nvpair(*errors, + nvpair_t *pair = nvlist_prev_nvpair(errors, more_errors); - VERIFY(nvlist_remove_nvpair(*errors, pair) == 0); + fnvlist_remove_nvpair(errors, pair); n++; - VERIFY(nvlist_size(*errors, &size, - NV_ENCODE_NATIVE) == 0); - } while (size > zc->zc_nvlist_dst_size); + size = fnvlist_size(errors); + } while (size > max); - VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0); - VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0); - ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); - ASSERT(size <= zc->zc_nvlist_dst_size); + fnvlist_remove_nvpair(errors, more_errors); + fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n); + ASSERT3U(fnvlist_size(errors), <=, max); } return (0); @@ -1070,21 +1252,20 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) int error = 0; size_t size; - VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0); + size = fnvlist_size(nvl); if (size > zc->zc_nvlist_dst_size) { error = ENOMEM; } else { - packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG); - VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE, - KM_SLEEP) == 0); + packed = fnvlist_pack(nvl, &size); if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst, size, zc->zc_iflags) != 0) error = EFAULT; - kmem_free(packed, size); + fnvlist_pack_free(packed, size); } zc->zc_nvlist_dst_size = size; + zc->zc_nvlist_dst_filled = B_TRUE; return (error); } @@ -1163,7 +1344,6 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) nvlist_t *config, *props = NULL; nvlist_t *rootprops = NULL; nvlist_t *zplprops = NULL; - char *buf; if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config))) @@ -1203,9 +1383,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) goto pool_props_bad; } - buf = history_str_get(zc); - - error = spa_create(zc->zc_name, config, props, buf, zplprops); + error = spa_create(zc->zc_name, config, props, zplprops); /* * Set the remaining root properties @@ -1214,9 +1392,6 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) (void) spa_destroy(zc->zc_name); - if (buf != NULL) - history_str_free(buf); - pool_props_bad: nvlist_free(rootprops); nvlist_free(zplprops); @@ -2249,32 +2424,26 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, /* * This function is best effort. If it fails to set any of the given properties, - * it continues to set as many as it can and returns the first error - * encountered. If the caller provides a non-NULL errlist, it also gives the - * complete list of names of all the properties it failed to set along with the - * corresponding error numbers. The caller is responsible for freeing the - * returned errlist. + * it continues to set as many as it can and returns the last error + * encountered. If the caller provides a non-NULL errlist, it will be filled in + * with the list of names of all the properties that failed along with the + * corresponding error numbers. * - * If every property is set successfully, zero is returned and the list pointed - * at by errlist is NULL. + * If every property is set successfully, zero is returned and errlist is not + * modified. */ int zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, - nvlist_t **errlist) + nvlist_t *errlist) { nvpair_t *pair; nvpair_t *propval; int rv = 0; uint64_t intval; char *strval; - nvlist_t *genericnvl; - nvlist_t *errors; - nvlist_t *retrynvl; - - VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + nvlist_t *genericnvl = fnvlist_alloc(); + nvlist_t *retrynvl = fnvlist_alloc(); retry: pair = NULL; while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { @@ -2286,7 +2455,7 @@ retry: propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); + attrs = fnvpair_value_nvlist(pair); if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &propval) != 0) err = EINVAL; @@ -2311,8 +2480,7 @@ retry: } else if (nvpair_type(propval) == DATA_TYPE_UINT64) { const char *unused; - VERIFY(nvpair_value_uint64(propval, - &intval) == 0); + intval = fnvpair_value_uint64(propval); switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: @@ -2356,8 +2524,11 @@ retry: } } - if (err != 0) - VERIFY(nvlist_add_int32(errors, propname, err) == 0); + if (err != 0) { + if (errlist != NULL) + fnvlist_add_int32(errlist, propname, err); + rv = err; + } } if (nvl != retrynvl && !nvlist_empty(retrynvl)) { @@ -2379,44 +2550,33 @@ retry: propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); - VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, - &propval) == 0); + attrs = fnvpair_value_nvlist(pair); + propval = fnvlist_lookup_nvpair(attrs, + ZPROP_VALUE); } if (nvpair_type(propval) == DATA_TYPE_STRING) { - VERIFY(nvpair_value_string(propval, - &strval) == 0); + strval = fnvpair_value_string(propval); err = dsl_prop_set(dsname, propname, source, 1, strlen(strval) + 1, strval); } else { - VERIFY(nvpair_value_uint64(propval, - &intval) == 0); + intval = fnvpair_value_uint64(propval); err = dsl_prop_set(dsname, propname, source, 8, 1, &intval); } if (err != 0) { - VERIFY(nvlist_add_int32(errors, propname, - err) == 0); + if (errlist != NULL) { + fnvlist_add_int32(errlist, propname, + err); + } + rv = err; } } } nvlist_free(genericnvl); nvlist_free(retrynvl); - if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) { - nvlist_free(errors); - errors = NULL; - } else { - VERIFY(nvpair_value_int32(pair, &rv) == 0); - } - - if (errlist == NULL) - nvlist_free(errors); - else - *errlist = errors; - return (rv); } @@ -2424,7 +2584,7 @@ retry: * Check that all the properties are valid user properties. */ static int -zfs_check_userprops(char *fsname, nvlist_t *nvl) +zfs_check_userprops(const char *fsname, nvlist_t *nvl) { nvpair_t *pair = NULL; int error = 0; @@ -2504,7 +2664,7 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) boolean_t received = zc->zc_cookie; zprop_source_t source = (received ? ZPROP_SRC_RECEIVED : ZPROP_SRC_LOCAL); - nvlist_t *errors = NULL; + nvlist_t *errors; int error; if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, @@ -2527,7 +2687,8 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) } } - error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors); + errors = fnvlist_alloc(); + error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors); if (zc->zc_nvlist_dst != 0 && errors != NULL) { (void) put_nvlist(zc, errors); @@ -2609,7 +2770,7 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc) return (EINVAL); } - /* the property name has been validated by zfs_secpolicy_inherit() */ + /* property name has been validated by zfs_secpolicy_inherit_prop() */ return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL)); } @@ -2955,26 +3116,30 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops, } /* - * inputs: - * zc_objset_type type of objset to create (fs vs zvol) - * zc_name name of new objset - * zc_value name of snapshot to clone from (may be empty) - * zc_nvlist_src{_size} nvlist of properties to apply + * innvl: { + * "type" -> dmu_objset_type_t (int32) + * (optional) "props" -> { prop -> value } + * } * - * outputs: none + * outnvl: propname -> error code (int32) */ static int -zfs_ioc_create(zfs_cmd_t *zc) +zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) { - objset_t *clone; int error = 0; - zfs_creat_t zct; + zfs_creat_t zct = { 0 }; nvlist_t *nvprops = NULL; void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); - dmu_objset_type_t type = zc->zc_objset_type; + int32_t type32; + dmu_objset_type_t type; + boolean_t is_insensitive = B_FALSE; - switch (type) { + if (nvlist_lookup_int32(innvl, "type", &type32) != 0) + return (EINVAL); + type = type32; + (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); + switch (type) { case DMU_OST_ZFS: cbfunc = zfs_create_cb; break; @@ -2987,154 +3152,221 @@ zfs_ioc_create(zfs_cmd_t *zc) cbfunc = NULL; break; } - if (strchr(zc->zc_name, '@') || - strchr(zc->zc_name, '%')) + if (strchr(fsname, '@') || + strchr(fsname, '%')) return (EINVAL); - if (zc->zc_nvlist_src != 0 && - (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &nvprops)) != 0) - return (error); - - zct.zct_zplprops = NULL; zct.zct_props = nvprops; - if (zc->zc_value[0] != '\0') { - /* - * We're creating a clone of an existing snapshot. - */ - zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; - if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) { - nvlist_free(nvprops); - return (EINVAL); - } - - error = dmu_objset_hold(zc->zc_value, FTAG, &clone); - if (error) { - nvlist_free(nvprops); - return (error); - } + if (cbfunc == NULL) + return (EINVAL); - error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0); - dmu_objset_rele(clone, FTAG); - if (error) { - nvlist_free(nvprops); - return (error); - } - } else { - boolean_t is_insensitive = B_FALSE; + if (type == DMU_OST_ZVOL) { + uint64_t volsize, volblocksize; - if (cbfunc == NULL) { - nvlist_free(nvprops); + if (nvprops == NULL) + return (EINVAL); + if (nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0) return (EINVAL); - } - - if (type == DMU_OST_ZVOL) { - uint64_t volsize, volblocksize; - - if (nvprops == NULL || - nvlist_lookup_uint64(nvprops, - zfs_prop_to_name(ZFS_PROP_VOLSIZE), - &volsize) != 0) { - nvlist_free(nvprops); - return (EINVAL); - } - if ((error = nvlist_lookup_uint64(nvprops, - zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), - &volblocksize)) != 0 && error != ENOENT) { - nvlist_free(nvprops); - return (EINVAL); - } + if ((error = nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), + &volblocksize)) != 0 && error != ENOENT) + return (EINVAL); - if (error != 0) - volblocksize = zfs_prop_default_numeric( - ZFS_PROP_VOLBLOCKSIZE); + if (error != 0) + volblocksize = zfs_prop_default_numeric( + ZFS_PROP_VOLBLOCKSIZE); - if ((error = zvol_check_volblocksize( - volblocksize)) != 0 || - (error = zvol_check_volsize(volsize, - volblocksize)) != 0) { - nvlist_free(nvprops); - return (error); - } - } else if (type == DMU_OST_ZFS) { - int error; + if ((error = zvol_check_volblocksize( + volblocksize)) != 0 || + (error = zvol_check_volsize(volsize, + volblocksize)) != 0) + return (error); + } else if (type == DMU_OST_ZFS) { + int error; - /* - * We have to have normalization and - * case-folding flags correct when we do the - * file system creation, so go figure them out - * now. - */ - VERIFY(nvlist_alloc(&zct.zct_zplprops, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - error = zfs_fill_zplprops(zc->zc_name, nvprops, - zct.zct_zplprops, &is_insensitive); - if (error != 0) { - nvlist_free(nvprops); - nvlist_free(zct.zct_zplprops); - return (error); - } + /* + * We have to have normalization and + * case-folding flags correct when we do the + * file system creation, so go figure them out + * now. + */ + VERIFY(nvlist_alloc(&zct.zct_zplprops, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + error = zfs_fill_zplprops(fsname, nvprops, + zct.zct_zplprops, &is_insensitive); + if (error != 0) { + nvlist_free(zct.zct_zplprops); + return (error); } - error = dmu_objset_create(zc->zc_name, type, - is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); - nvlist_free(zct.zct_zplprops); } + error = dmu_objset_create(fsname, type, + is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); + nvlist_free(zct.zct_zplprops); + /* * It would be nice to do this atomically. */ if (error == 0) { - error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL, - nvprops, NULL); + error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, + nvprops, outnvl); if (error != 0) - (void) dmu_objset_destroy(zc->zc_name, B_FALSE); + (void) dmu_objset_destroy(fsname, B_FALSE); } - nvlist_free(nvprops); return (error); } /* - * inputs: - * zc_name name of filesystem - * zc_value short name of snapshot - * zc_cookie recursive flag - * zc_nvlist_src[_size] property list + * innvl: { + * "origin" -> name of origin snapshot + * (optional) "props" -> { prop -> value } + * } * * outputs: - * zc_value short snapname (i.e. part after the '@') + * outnvl: propname -> error code (int32) */ static int -zfs_ioc_snapshot(zfs_cmd_t *zc) +zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) { + int error = 0; nvlist_t *nvprops = NULL; - int error; - boolean_t recursive = zc->zc_cookie; + char *origin_name; + dsl_dataset_t *origin; - if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) + if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0) return (EINVAL); + (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); - if (zc->zc_nvlist_src != 0 && - (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &nvprops)) != 0) + if (strchr(fsname, '@') || + strchr(fsname, '%')) + return (EINVAL); + + if (dataset_namecheck(origin_name, NULL, NULL) != 0) + return (EINVAL); + + error = dsl_dataset_hold(origin_name, FTAG, &origin); + if (error) return (error); - error = zfs_check_userprops(zc->zc_name, nvprops); + error = dmu_objset_clone(fsname, origin, 0); + dsl_dataset_rele(origin, FTAG); if (error) - goto out; + return (error); - if (!nvlist_empty(nvprops) && - zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) { - error = ENOTSUP; - goto out; + /* + * It would be nice to do this atomically. + */ + if (error == 0) { + error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, + nvprops, outnvl); + if (error != 0) + (void) dmu_objset_destroy(fsname, B_FALSE); } + return (error); +} - error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL, - nvprops, recursive, B_FALSE, -1); +/* + * innvl: { + * "snaps" -> { snapshot1, snapshot2 } + * (optional) "props" -> { prop -> value (string) } + * } + * + * outnvl: snapshot -> error code (int32) + * + */ +static int +zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +{ + nvlist_t *snaps; + nvlist_t *props = NULL; + int error, poollen; + nvpair_t *pair, *pair2; -out: - nvlist_free(nvprops); + (void) nvlist_lookup_nvlist(innvl, "props", &props); + if ((error = zfs_check_userprops(poolname, props)) != 0) + return (error); + + if (!nvlist_empty(props) && + zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS)) + return (ENOTSUP); + + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (EINVAL); + poollen = strlen(poolname); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + const char *name = nvpair_name(pair); + const char *cp = strchr(name, '@'); + + /* + * The snap name must contain an @, and the part after it must + * contain only valid characters. + */ + if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0) + return (EINVAL); + + /* + * The snap must be in the specified pool. + */ + if (strncmp(name, poolname, poollen) != 0 || + (name[poollen] != '/' && name[poollen] != '@')) + return (EXDEV); + + /* This must be the only snap of this fs. */ + for (pair2 = nvlist_next_nvpair(snaps, pair); + pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) { + if (strncmp(name, nvpair_name(pair2), cp - name + 1) + == 0) { + return (EXDEV); + } + } + } + + error = dmu_objset_snapshot(snaps, props, outnvl); + return (error); +} + +/* + * innvl: "message" -> string + */ +/* ARGSUSED */ +static int +zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) +{ + char *message; + spa_t *spa; + int error; + char *poolname; + + /* + * The poolname in the ioctl is not set, we get it from the TSD, + * which was set at the end of the last successful ioctl that allows + * logging. The secpolicy func already checked that it is set. + * Only one log ioctl is allowed after each successful ioctl, so + * we clear the TSD here. + */ + poolname = tsd_get(zfs_allow_log_key); + (void) tsd_set(zfs_allow_log_key, NULL); + error = spa_open(poolname, &spa, FTAG); + strfree(poolname); + if (error != 0) + return (error); + + if (nvlist_lookup_string(innvl, "message", &message) != 0) { + spa_close(spa, FTAG); + return (EINVAL); + } + + if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) { + spa_close(spa, FTAG); + return (ENOTSUP); + } + + error = spa_history_log(spa, message); + spa_close(spa, FTAG); return (error); } @@ -3143,6 +3375,7 @@ out: * name dataset name, or when 'arg == NULL' the full snapshot name * arg short snapshot name (i.e. part after the '@') */ +/* ARGSUSED */ int zfs_unmount_snap(const char *name, void *arg) { @@ -3153,22 +3386,13 @@ zfs_unmount_snap(const char *name, void *arg) char *ptr; int error; - if (arg) { - dsname = strdup(name); - snapname = strdup(arg); - } else { - ptr = strchr(name, '@'); - if (ptr) { - dsname = strdup(name); - dsname[ptr - name] = '\0'; - snapname = strdup(ptr + 1); - } else { - return (0); - } - } + if ((ptr = strchr(name, '@')) == NULL) + return (0); + dsname = strdup(name); + dsname[ptr - name] = '\0'; + snapname = strdup(ptr + 1); fullname = kmem_asprintf("%s@%s", dsname, snapname); - error = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE); if (error == 0) { error = zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE); @@ -3187,47 +3411,46 @@ zfs_unmount_snap(const char *name, void *arg) } /* - * inputs: - * zc_name name of filesystem, snaps must be under it - * zc_nvlist_src[_size] full names of snapshots to destroy - * zc_defer_destroy mark for deferred destroy + * innvl: { + * "snaps" -> { snapshot1, snapshot2 } + * (optional boolean) "defer" + * } * - * outputs: - * zc_name on failure, name of failed snapshot + * outnvl: snapshot -> error code (int32) */ static int -zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc) +zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) { - int err, len; - nvlist_t *nvl; + int poollen; + nvlist_t *snaps; nvpair_t *pair; + boolean_t defer; - if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &nvl)) != 0) - return (err); + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (EINVAL); + defer = nvlist_exists(innvl, "defer"); - len = strlen(zc->zc_name); - for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; - pair = nvlist_next_nvpair(nvl, pair)) { + poollen = strlen(poolname); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { const char *name = nvpair_name(pair); + /* - * The snap name must be underneath the zc_name. This ensures - * that our permission checks were legitimate. + * The snap must be in the specified pool. */ - if (strncmp(zc->zc_name, name, len) != 0 || - (name[len] != '@' && name[len] != '/')) { - nvlist_free(nvl); - return (EINVAL); - } + if (strncmp(name, poolname, poollen) != 0 || + (name[poollen] != '/' && name[poollen] != '@')) + return (EXDEV); + /* + * Ignore failures to unmount; dmu_snapshots_destroy_nvl() + * will deal with this gracefully (by filling in outnvl). + */ (void) zfs_unmount_snap(name, NULL); (void) zvol_remove_minor(name); } - err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy, - zc->zc_name); - nvlist_free(nvl); - return (err); + return (dmu_snapshots_destroy_nvl(snaps, defer, outnvl)); } /* @@ -3605,7 +3828,7 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist) (void) strcpy(zc->zc_value, nvpair_name(pair)); if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 || - (err = zfs_secpolicy_inherit(zc, CRED())) != 0) { + (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) { VERIFY(nvlist_remove_nvpair(props, pair) == 0); VERIFY(nvlist_add_int32(errors, zc->zc_value, err) == 0); @@ -3813,8 +4036,6 @@ zfs_ioc_recv(zfs_cmd_t *zc) * dmu_recv_begin() succeeds. */ if (props) { - nvlist_t *errlist; - if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) { if (drc.drc_newfs) { if (spa_version(os->os_spa) >= @@ -3833,12 +4054,12 @@ zfs_ioc_recv(zfs_cmd_t *zc) } (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, - props, &errlist); - (void) nvlist_merge(errors, errlist, 0); - nvlist_free(errlist); + props, errors); } - if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) { + if (zc->zc_nvlist_dst_size != 0 && + (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 || + put_nvlist(zc, errors) != 0)) { /* * Caller made zc->zc_nvlist_dst less than the minimum expected * size or supplied an invalid address. @@ -3970,15 +4191,13 @@ zfs_ioc_send(zfs_cmd_t *zc) rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); - if (error) { - spa_close(spa, FTAG); + spa_close(spa, FTAG); + if (error) return (error); - } error = dmu_objset_from_ds(ds, &tosnap); if (error) { dsl_dataset_rele(ds, FTAG); - spa_close(spa, FTAG); return (error); } @@ -3986,7 +4205,6 @@ zfs_ioc_send(zfs_cmd_t *zc) rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom); rw_exit(&dp->dp_config_rwlock); - spa_close(spa, FTAG); if (error) { dsl_dataset_rele(ds, FTAG); return (error); @@ -3997,12 +4215,37 @@ zfs_ioc_send(zfs_cmd_t *zc) dsl_dataset_rele(ds, FTAG); return (error); } - } else { - spa_close(spa, FTAG); + } + + if (zc->zc_obj) { + dsl_pool_t *dp = ds->ds_dir->dd_pool; + + if (fromsnap != NULL) { + dsl_dataset_rele(dsfrom, FTAG); + dsl_dataset_rele(ds, FTAG); + return (EINVAL); + } + + if (dsl_dir_is_clone(ds->ds_dir)) { + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &dsfrom); + rw_exit(&dp->dp_config_rwlock); + if (error) { + dsl_dataset_rele(ds, FTAG); + return (error); + } + error = dmu_objset_from_ds(dsfrom, &fromsnap); + if (error) { + dsl_dataset_rele(dsfrom, FTAG); + dsl_dataset_rele(ds, FTAG); + return (error); + } + } } if (estimate) { - error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj, + error = dmu_send_estimate(tosnap, fromsnap, &zc->zc_objset_type); } else { file_t *fp = getf(zc->zc_cookie); @@ -4014,7 +4257,7 @@ zfs_ioc_send(zfs_cmd_t *zc) } off = fp->f_offset; - error = dmu_send(tosnap, fromsnap, zc->zc_obj, + error = dmu_send(tosnap, fromsnap, zc->zc_cookie, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) @@ -4413,6 +4656,7 @@ zfs_ioc_next_obj(zfs_cmd_t *zc) * zc_cleanup_fd cleanup-on-exit file descriptor for calling process * * outputs: + * zc_value short name of new snapshot */ static int zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) @@ -4420,22 +4664,21 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) char *snap_name; int error; - snap_name = kmem_asprintf("%s-%016llx", zc->zc_value, + snap_name = kmem_asprintf("%s@%s-%016llx", zc->zc_name, zc->zc_value, (u_longlong_t)ddi_get_lbolt64()); - if (strlen(snap_name) >= MAXNAMELEN) { + if (strlen(snap_name) >= MAXPATHLEN) { strfree(snap_name); return (E2BIG); } - error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name, - NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd); + error = dmu_objset_snapshot_tmp(snap_name, "%temp", zc->zc_cleanup_fd); if (error != 0) { strfree(snap_name); return (error); } - (void) strcpy(zc->zc_value, snap_name); + (void) strcpy(zc->zc_value, strchr(snap_name, '@') + 1); strfree(snap_name); return (0); } @@ -4858,178 +5101,440 @@ zfs_ioc_space_written(zfs_cmd_t *zc) } /* - * inputs: - * zc_name full name of last snapshot - * zc_value full name of first snapshot + * innvl: { + * "firstsnap" -> snapshot name + * } * - * outputs: - * zc_cookie space in bytes - * zc_objset_type compressed space in bytes - * zc_perm_action uncompressed space in bytes + * outnvl: { + * "used" -> space in bytes + * "compressed" -> compressed space in bytes + * "uncompressed" -> uncompressed space in bytes + * } */ static int -zfs_ioc_space_snaps(zfs_cmd_t *zc) +zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) { int error; dsl_dataset_t *new, *old; + char *firstsnap; + uint64_t used, comp, uncomp; - error = dsl_dataset_hold(zc->zc_name, FTAG, &new); + if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0) + return (EINVAL); + + error = dsl_dataset_hold(lastsnap, FTAG, &new); if (error != 0) return (error); - error = dsl_dataset_hold(zc->zc_value, FTAG, &old); + error = dsl_dataset_hold(firstsnap, FTAG, &old); if (error != 0) { dsl_dataset_rele(new, FTAG); return (error); } - error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie, - &zc->zc_objset_type, &zc->zc_perm_action); + error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp); dsl_dataset_rele(old, FTAG); dsl_dataset_rele(new, FTAG); + fnvlist_add_uint64(outnvl, "used", used); + fnvlist_add_uint64(outnvl, "compressed", comp); + fnvlist_add_uint64(outnvl, "uncompressed", uncomp); return (error); } /* - * pool create, destroy, and export don't log the history as part of - * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export - * do the logging of those commands. + * innvl: { + * "fd" -> file descriptor to write stream to (int32) + * (optional) "fromsnap" -> full snap name to send an incremental from + * } + * + * outnvl is unused */ -static zfs_ioc_vec_t zfs_ioc_vec[] = { - { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_READONLY }, - { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_setfru, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_create_minor, zfs_secpolicy_config, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_remove_minor, zfs_secpolicy_config, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_NONE }, - { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive, - DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME, - B_FALSE, POOL_CHECK_NONE }, - { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME, - B_FALSE, POOL_CHECK_NONE }, - { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, - DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME, - B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE } -}; +/* ARGSUSED */ +static int +zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) +{ + objset_t *fromsnap = NULL; + objset_t *tosnap; + int error; + offset_t off; + char *fromname; + int fd; + + error = nvlist_lookup_int32(innvl, "fd", &fd); + if (error != 0) + return (EINVAL); + + error = dmu_objset_hold(snapname, FTAG, &tosnap); + if (error) + return (error); + + error = nvlist_lookup_string(innvl, "fromsnap", &fromname); + if (error == 0) { + error = dmu_objset_hold(fromname, FTAG, &fromsnap); + if (error) { + dmu_objset_rele(tosnap, FTAG); + return (error); + } + } + + { + file_t *fp = getf(fd); + if (fp == NULL) { + dmu_objset_rele(tosnap, FTAG); + if (fromsnap != NULL) + dmu_objset_rele(fromsnap, FTAG); + return (EBADF); + } + + off = fp->f_offset; + error = dmu_send(tosnap, fromsnap, fd, fp->f_vnode, &off); + + if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + fp->f_offset = off; + } + releasef(fd); + if (fromsnap != NULL) + dmu_objset_rele(fromsnap, FTAG); + dmu_objset_rele(tosnap, FTAG); + return (error); +} + +/* + * Determine approximately how large a zfs send stream will be -- the number + * of bytes that will be written to the fd supplied to zfs_ioc_send_new(). + * + * innvl: { + * (optional) "fromsnap" -> full snap name to send an incremental from + * } + * + * outnvl: { + * "space" -> bytes of space (uint64) + * } + */ +static int +zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) +{ + objset_t *fromsnap = NULL; + objset_t *tosnap; + int error; + char *fromname; + uint64_t space; + + error = dmu_objset_hold(snapname, FTAG, &tosnap); + if (error) + return (error); + + error = nvlist_lookup_string(innvl, "fromsnap", &fromname); + if (error == 0) { + error = dmu_objset_hold(fromname, FTAG, &fromsnap); + if (error) { + dmu_objset_rele(tosnap, FTAG); + return (error); + } + } + + error = dmu_send_estimate(tosnap, fromsnap, &space); + fnvlist_add_uint64(outnvl, "space", space); + + if (fromsnap != NULL) + dmu_objset_rele(fromsnap, FTAG); + dmu_objset_rele(tosnap, FTAG); + return (error); +} + + +static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST]; + +static void +zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck, + boolean_t log_history, zfs_ioc_poolcheck_t pool_check) +{ + zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST]; + + ASSERT3U(ioc, >=, ZFS_IOC_FIRST); + ASSERT3U(ioc, <, ZFS_IOC_LAST); + ASSERT3P(vec->zvec_legacy_func, ==, NULL); + ASSERT3P(vec->zvec_func, ==, NULL); + + vec->zvec_legacy_func = func; + vec->zvec_secpolicy = secpolicy; + vec->zvec_namecheck = namecheck; + vec->zvec_allow_log = log_history; + vec->zvec_pool_check = pool_check; +} + +/* + * See the block comment at the beginning of this file for details on + * each argument to this function. + */ +static void +zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func, + zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck, + zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist, + boolean_t allow_log) +{ + zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST]; + + ASSERT3U(ioc, >=, ZFS_IOC_FIRST); + ASSERT3U(ioc, <, ZFS_IOC_LAST); + ASSERT3P(vec->zvec_legacy_func, ==, NULL); + ASSERT3P(vec->zvec_func, ==, NULL); + + /* if we are logging, the name must be valid */ + ASSERT(!allow_log || namecheck != NO_NAME); + + vec->zvec_name = name; + vec->zvec_func = func; + vec->zvec_secpolicy = secpolicy; + vec->zvec_namecheck = namecheck; + vec->zvec_pool_check = pool_check; + vec->zvec_smush_outnvlist = smush_outnvlist; + vec->zvec_allow_log = allow_log; +} + +static void +zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy, boolean_t log_history, + zfs_ioc_poolcheck_t pool_check) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + POOL_NAME, log_history, pool_check); +} + +static void +zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + DATASET_NAME, B_FALSE, pool_check); +} + +static void +zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) +{ + zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config, + POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +} + +static void +zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + NO_NAME, B_FALSE, POOL_CHECK_NONE); +} + +static void +zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc, + zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED); +} + +static void +zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) +{ + zfs_ioctl_register_dataset_read_secpolicy(ioc, func, + zfs_secpolicy_read); +} + +static void +zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +} + +static void +zfs_ioctl_init(void) +{ + zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT, + zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY, + zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE); + + zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS, + zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("send", ZFS_IOC_SEND_NEW, + zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE, + zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("create", ZFS_IOC_CREATE, + zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("clone", ZFS_IOC_CLONE, + zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS, + zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + /* IOCTLS that use the legacy function signature */ + + zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, + zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY); + + zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN, + zfs_ioc_pool_scan); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE, + zfs_ioc_pool_upgrade); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD, + zfs_ioc_vdev_add); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE, + zfs_ioc_vdev_remove); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE, + zfs_ioc_vdev_set_state); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH, + zfs_ioc_vdev_attach); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH, + zfs_ioc_vdev_detach); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH, + zfs_ioc_vdev_setpath); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU, + zfs_ioc_vdev_setfru); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS, + zfs_ioc_pool_set_props); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT, + zfs_ioc_vdev_split); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID, + zfs_ioc_pool_reguid); + + zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS, + zfs_ioc_pool_configs, zfs_secpolicy_none); + zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT, + zfs_ioc_pool_tryimport, zfs_secpolicy_config); + zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT, + zfs_ioc_inject_fault, zfs_secpolicy_inject); + zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT, + zfs_ioc_clear_fault, zfs_secpolicy_inject); + zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT, + zfs_ioc_inject_list_next, zfs_secpolicy_inject); + + /* + * pool destroy, and export don't log the history as part of + * zfsdev_ioctl, but rather zfs_ioc_pool_export + * does the logging of those commands. + */ + zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy, + zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export, + zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE); + + zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats, + zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props, + zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); + + zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log, + zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED); + zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME, + zfs_ioc_dsobj_to_dsname, + zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED); + zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY, + zfs_ioc_pool_get_history, + zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED); + + zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE); + + zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED); + zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED); + + zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN, + zfs_ioc_space_written); + zfs_ioctl_register_dataset_read(ZFS_IOC_GET_HOLDS, + zfs_ioc_get_holds); + zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS, + zfs_ioc_objset_recvd_props); + zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ, + zfs_ioc_next_obj); + zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL, + zfs_ioc_get_fsacl); + zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS, + zfs_ioc_objset_stats); + zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS, + zfs_ioc_objset_zplprops); + zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT, + zfs_ioc_dataset_list_next); + zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT, + zfs_ioc_snapshot_list_next); + zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS, + zfs_ioc_send_progress); + + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF, + zfs_ioc_diff, zfs_secpolicy_diff); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS, + zfs_ioc_obj_to_stats, zfs_secpolicy_diff); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH, + zfs_ioc_obj_to_path, zfs_secpolicy_diff); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE, + zfs_ioc_userspace_one, zfs_secpolicy_userspace_one); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY, + zfs_ioc_userspace_many, zfs_secpolicy_userspace_many); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND, + zfs_ioc_send, zfs_secpolicy_send); + + zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop, + zfs_secpolicy_none); + zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy, + zfs_secpolicy_destroy); + zfs_ioctl_register_dataset_modify(ZFS_IOC_ROLLBACK, zfs_ioc_rollback, + zfs_secpolicy_rollback); + zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename, + zfs_secpolicy_rename); + zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv, + zfs_secpolicy_recv); + zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote, + zfs_secpolicy_promote); + zfs_ioctl_register_dataset_modify(ZFS_IOC_HOLD, zfs_ioc_hold, + zfs_secpolicy_hold); + zfs_ioctl_register_dataset_modify(ZFS_IOC_RELEASE, zfs_ioc_release, + zfs_secpolicy_release); + zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP, + zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop); + zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl, + zfs_secpolicy_set_fsacl); + + zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share, + zfs_secpolicy_share, POOL_CHECK_NONE); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl, + zfs_secpolicy_smb_acl, POOL_CHECK_NONE); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE, + zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT, + zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); + + /* + * ZoL functions + */ + zfs_ioctl_register_legacy(ZFS_IOC_CREATE_MINOR, zfs_ioc_create_minor, + zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_legacy(ZFS_IOC_REMOVE_MINOR, zfs_ioc_remove_minor, + zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next, + zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear, + zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE); +} int pool_status_check(const char *name, zfs_ioc_namecheck_t type, @@ -5192,60 +5697,135 @@ static long zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) { zfs_cmd_t *zc; - uint_t vec; - int error, rc, flag = 0; - - vec = cmd - ZFS_IOC; - if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) + uint_t vecnum; + int error, rc, len, flag = 0; + const zfs_ioc_vec_t *vec; + char saved_poolname[MAXNAMELEN]; + nvlist_t *innvl = NULL; + + vecnum = cmd - ZFS_IOC_FIRST; + if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) return (-EINVAL); + vec = &zfs_ioc_vec[vecnum]; zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG); error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); - if (error != 0) + if (error != 0) { error = EFAULT; + goto out; + } - if ((error == 0) && !(flag & FKIOCTL)) - error = zfs_ioc_vec[vec].zvec_secpolicy(zc, CRED()); + zc->zc_iflags = flag & FKIOCTL; + if (zc->zc_nvlist_src_size != 0) { + error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &innvl); + if (error != 0) + goto out; + } /* * Ensure that all pool/dataset names are valid before we pass down to * the lower layers. */ - if (error == 0) { - zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; - zc->zc_iflags = flag & FKIOCTL; - switch (zfs_ioc_vec[vec].zvec_namecheck) { - case POOL_NAME: - if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) - error = EINVAL; + zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; + switch (vec->zvec_namecheck) { + case POOL_NAME: + if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) + error = EINVAL; + else error = pool_status_check(zc->zc_name, - zfs_ioc_vec[vec].zvec_namecheck, - zfs_ioc_vec[vec].zvec_pool_check); - break; + vec->zvec_namecheck, vec->zvec_pool_check); + break; - case DATASET_NAME: - if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) - error = EINVAL; + case DATASET_NAME: + if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) + error = EINVAL; + else error = pool_status_check(zc->zc_name, - zfs_ioc_vec[vec].zvec_namecheck, - zfs_ioc_vec[vec].zvec_pool_check); - break; + vec->zvec_namecheck, vec->zvec_pool_check); + break; - case NO_NAME: - break; - } + case NO_NAME: + break; } - if (error == 0) - error = zfs_ioc_vec[vec].zvec_func(zc); + if (error == 0 && !(flag & FKIOCTL)) + error = vec->zvec_secpolicy(zc, innvl, CRED()); + + if (error != 0) + goto out; + + /* legacy ioctls can modify zc_name */ + (void) strlcpy(saved_poolname, zc->zc_name, sizeof(saved_poolname)); + len = strcspn(saved_poolname, "/@") + 1; + saved_poolname[len] = '\0'; + + if (vec->zvec_func != NULL) { + nvlist_t *outnvl; + int puterror = 0; + spa_t *spa; + nvlist_t *lognv = NULL; + + ASSERT(vec->zvec_legacy_func == NULL); + + /* + * Add the innvl to the lognv before calling the func, + * in case the func changes the innvl. + */ + if (vec->zvec_allow_log) { + lognv = fnvlist_alloc(); + fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL, + vec->zvec_name); + if (!nvlist_empty(innvl)) { + fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL, + innvl); + } + } + + outnvl = fnvlist_alloc(); + error = vec->zvec_func(zc->zc_name, innvl, outnvl); + + if (error == 0 && vec->zvec_allow_log && + spa_open(zc->zc_name, &spa, FTAG) == 0) { + if (!nvlist_empty(outnvl)) { + fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL, + outnvl); + } + (void) spa_history_log_nvl(spa, lognv); + spa_close(spa, FTAG); + } + fnvlist_free(lognv); + + if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) { + int smusherror = 0; + if (vec->zvec_smush_outnvlist) { + smusherror = nvlist_smush(outnvl, + zc->zc_nvlist_dst_size); + } + if (smusherror == 0) + puterror = put_nvlist(zc, outnvl); + } + + if (puterror != 0) + error = puterror; + + nvlist_free(outnvl); + } else { + error = vec->zvec_legacy_func(zc); + } + +out: + nvlist_free(innvl); rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); - if (error == 0) { - if (rc != 0) - error = EFAULT; - if (zfs_ioc_vec[vec].zvec_his_log) - zfs_log_history(zc); + if (error == 0 && rc != 0) + error = EFAULT; + if (error == 0 && vec->zvec_allow_log) { + char *s = tsd_get(zfs_allow_log_key); + if (s != NULL) + strfree(s); + (void) tsd_set(zfs_allow_log_key, strdup(saved_poolname)); } kmem_free(zc, sizeof (zfs_cmd_t)); @@ -5307,8 +5887,12 @@ zfs_detach(void) list_destroy(&zfsdev_state_list); } -uint_t zfs_fsyncer_key; -extern uint_t rrw_tsd_key; +static void +zfs_allow_log_destroy(void *arg) +{ + char *poolname = arg; + strfree(poolname); +} #ifdef DEBUG #define ZFS_DEBUG_STR " (DEBUG mode)" @@ -5327,11 +5911,14 @@ _init(void) if ((error = zvol_init()) != 0) goto out1; + zfs_ioctl_init(); + if ((error = zfs_attach()) != 0) goto out2; tsd_create(&zfs_fsyncer_key, NULL); - tsd_create(&rrw_tsd_key, NULL); + tsd_create(&rrw_tsd_key, rrw_tsd_destroy); + tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy); printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, " "ZFS pool version %s, ZFS filesystem version %s\n", @@ -5362,6 +5949,7 @@ _fini(void) tsd_destroy(&zfs_fsyncer_key); tsd_destroy(&rrw_tsd_key); + tsd_destroy(&zfs_allow_log_key); printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n", ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR); diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 9ae7ab500..8fee441b1 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -1550,9 +1551,8 @@ zfs_set_version(zfs_sb_t *zsb, uint64_t newvers) sa_register_update_callback(os, zfs_sa_upgrade); } - spa_history_log_internal(LOG_DS_UPGRADE, - dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu", - zsb->z_version, newvers, dmu_objset_id(os)); + spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, + "from %llu to %llu", zsb->z_version, newvers); dmu_tx_commit(tx); |