diff options
author | Chris Williamson <[email protected]> | 2018-02-08 09:16:23 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-02-08 15:28:18 -0800 |
commit | d99a015343425a1c856c900aa8223016400ac2dc (patch) | |
tree | f6ab517b27b650c32127953b74567baa99951d08 /module/zfs | |
parent | 8824a7f133e4402f7176115cf8efd535c8cbdab2 (diff) |
OpenZFS 7431 - ZFS Channel Programs
Authored by: Chris Williamson <[email protected]>
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: John Kennedy <[email protected]>
Reviewed by: Dan Kimmel <[email protected]>
Approved by: Garrett D'Amore <[email protected]>
Ported-by: Don Brady <[email protected]>
Ported-by: John Kennedy <[email protected]>
OpenZFS-issue: https://www.illumos.org/issues/7431
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/dfc11533
Porting Notes:
* The CLI long option arguments for '-t' and '-m' don't parse on linux
* Switched from kmem_alloc to vmem_alloc in zcp_lua_alloc
* Lua implementation is built as its own module (zlua.ko)
* Lua headers consumed directly by zfs code moved to 'include/sys/lua/'
* There is no native setjmp/longjump available in stock Linux kernel.
Brought over implementations from illumos and FreeBSD
* The get_temporary_prop() was adapted due to VFS platform differences
* Use of inline functions in lua parser to reduce stack usage per C call
* Skip some ZFS Test Suite ZCP tests on sparc64 to avoid stack overflow
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/Makefile.in | 11 | ||||
-rw-r--r-- | module/zfs/dsl_dataset.c | 503 | ||||
-rw-r--r-- | module/zfs/dsl_destroy.c | 194 | ||||
-rw-r--r-- | module/zfs/dsl_dir.c | 147 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math.c | 3 | ||||
-rw-r--r-- | module/zfs/zcp.c | 1357 | ||||
-rw-r--r-- | module/zfs/zcp_get.c | 876 | ||||
-rw-r--r-- | module/zfs/zcp_global.c | 84 | ||||
-rw-r--r-- | module/zfs/zcp_iter.c | 531 | ||||
-rw-r--r-- | module/zfs/zcp_synctask.c | 265 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 80 | ||||
-rw-r--r-- | module/zfs/zfs_vfsops.c | 16 |
12 files changed, 3834 insertions, 233 deletions
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index cb352bf91..084c1ac23 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -1,5 +1,6 @@ src = @abs_top_srcdir@/module/zfs obj = @abs_builddir@ +target_cpu = @target_cpu@ MODULE := zfs @@ -7,6 +8,11 @@ obj-$(CONFIG_ZFS) := $(MODULE).o ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) +# Suppress unused-value warnings in sparc64 architecture headers +ifeq ($(target_cpu),sparc64) +ccflags-y += -Wno-unused-value +endif + # Suppress unused but set variable warnings often due to ASSERTs ccflags-y += $(NO_UNUSED_BUT_SET_VARIABLE) @@ -86,6 +92,11 @@ $(MODULE)-objs += vdev_root.o $(MODULE)-objs += zap.o $(MODULE)-objs += zap_leaf.o $(MODULE)-objs += zap_micro.o +$(MODULE)-objs += zcp.o +$(MODULE)-objs += zcp_get.o +$(MODULE)-objs += zcp_global.o +$(MODULE)-objs += zcp_iter.o +$(MODULE)-objs += zcp_synctask.o $(MODULE)-objs += zfeature.o $(MODULE)-objs += zfs_acl.o $(MODULE)-objs += zfs_byteswap.o diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 3c329f207..af3dc230a 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -1681,7 +1681,6 @@ dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, return (error); } - void dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) { @@ -1749,30 +1748,17 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx) dmu_buf_rele(ds->ds_dbuf, ds); } -static void -get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) +int +get_clones_stat_impl(dsl_dataset_t *ds, nvlist_t *val) { uint64_t count = 0; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; zap_cursor_t zc; zap_attribute_t za; - nvlist_t *propval = fnvlist_alloc(); - nvlist_t *val; ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); /* - * We use nvlist_alloc() instead of fnvlist_alloc() because the - * latter would allocate the list with NV_UNIQUE_NAME flag. - * As a result, every time a clone name is appended to the list - * it would be (linearly) searched for for a duplicate name. - * We already know that all clone names must be unique and we - * want avoid the quadratic complexity of double-checking that - * because we can have a large number of clones. - */ - VERIFY0(nvlist_alloc(&val, 0, KM_SLEEP)); - - /* * There may be missing entries in ds_next_clones_obj * due to a bug in a previous version of the code. * Only trust it if it has the right number of entries. @@ -1781,8 +1767,9 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) VERIFY0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, &count)); } - if (count != dsl_dataset_phys(ds)->ds_num_children - 1) - goto fail; + if (count != dsl_dataset_phys(ds)->ds_num_children - 1) { + return (ENOENT); + } for (zap_cursor_init(&zc, mos, dsl_dataset_phys(ds)->ds_next_clones_obj); zap_cursor_retrieve(&zc, &za) == 0; @@ -1796,15 +1783,42 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) dsl_dataset_rele(clone, FTAG); } zap_cursor_fini(&zc); - fnvlist_add_nvlist(propval, ZPROP_VALUE, val); - fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); -fail: + return (0); +} + +void +get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) +{ + nvlist_t *propval = fnvlist_alloc(); + nvlist_t *val; + + /* + * We use nvlist_alloc() instead of fnvlist_alloc() because the + * latter would allocate the list with NV_UNIQUE_NAME flag. + * As a result, every time a clone name is appended to the list + * it would be (linearly) searched for for a duplicate name. + * We already know that all clone names must be unique and we + * want avoid the quadratic complexity of double-checking that + * because we can have a large number of clones. + */ + VERIFY0(nvlist_alloc(&val, 0, KM_SLEEP)); + + if (get_clones_stat_impl(ds, val) == 0) { + fnvlist_add_nvlist(propval, ZPROP_VALUE, val); + fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), + propval); + } + nvlist_free(val); nvlist_free(propval); } -static void -get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv) +/* + * Returns a string that represents the receive resume stats token. It should + * be freed with strfree(). + */ +char * +get_receive_resume_stats_impl(dsl_dataset_t *ds) { dsl_pool_t *dp = ds->ds_dir->dd_pool; @@ -1876,86 +1890,361 @@ get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv) ZFS_SEND_RESUME_TOKEN_VERSION, (longlong_t)cksum.zc_word[0], (longlong_t)packed_size, str); - dsl_prop_nvlist_add_string(nv, - ZFS_PROP_RECEIVE_RESUME_TOKEN, propval); kmem_free(packed, packed_size); kmem_free(str, compressed_size * 2 + 1); kmem_free(compressed, packed_size); - strfree(propval); + return (propval); + } + return (strdup("")); +} + +/* + * Returns a string that represents the receive resume stats token of the + * dataset's child. It should be freed with strfree(). + */ +char * +get_child_receive_stats(dsl_dataset_t *ds) +{ + char recvname[ZFS_MAX_DATASET_NAME_LEN + 6]; + dsl_dataset_t *recv_ds; + dsl_dataset_name(ds, recvname); + if (strlcat(recvname, "/", sizeof (recvname)) < + sizeof (recvname) && + strlcat(recvname, recv_clone_name, sizeof (recvname)) < + sizeof (recvname) && + dsl_dataset_hold(ds->ds_dir->dd_pool, recvname, FTAG, + &recv_ds) == 0) { + char *propval = get_receive_resume_stats_impl(recv_ds); + dsl_dataset_rele(recv_ds, FTAG); + return (propval); + } + return (strdup("")); +} + +static void +get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv) +{ + char *propval = get_receive_resume_stats_impl(ds); + if (strcmp(propval, "") != 0) { + dsl_prop_nvlist_add_string(nv, + ZFS_PROP_RECEIVE_RESUME_TOKEN, propval); + } else { + char *childval = get_child_receive_stats(ds); + if (strcmp(childval, "") != 0) { + dsl_prop_nvlist_add_string(nv, + ZFS_PROP_RECEIVE_RESUME_TOKEN, childval); + } + strfree(childval); + } + strfree(propval); +} + +uint64_t +dsl_get_refratio(dsl_dataset_t *ds) +{ + uint64_t ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 : + (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 / + dsl_dataset_phys(ds)->ds_compressed_bytes); + return (ratio); +} + +uint64_t +dsl_get_logicalreferenced(dsl_dataset_t *ds) +{ + return (dsl_dataset_phys(ds)->ds_uncompressed_bytes); +} + +uint64_t +dsl_get_compressratio(dsl_dataset_t *ds) +{ + if (ds->ds_is_snapshot) { + return (dsl_get_refratio(ds)); + } else { + dsl_dir_t *dd = ds->ds_dir; + mutex_enter(&dd->dd_lock); + uint64_t val = dsl_dir_get_compressratio(dd); + mutex_exit(&dd->dd_lock); + return (val); } } +uint64_t +dsl_get_used(dsl_dataset_t *ds) +{ + if (ds->ds_is_snapshot) { + return (dsl_dataset_phys(ds)->ds_unique_bytes); + } else { + dsl_dir_t *dd = ds->ds_dir; + mutex_enter(&dd->dd_lock); + uint64_t val = dsl_dir_get_used(dd); + mutex_exit(&dd->dd_lock); + return (val); + } +} + +uint64_t +dsl_get_creation(dsl_dataset_t *ds) +{ + return (dsl_dataset_phys(ds)->ds_creation_time); +} + +uint64_t +dsl_get_creationtxg(dsl_dataset_t *ds) +{ + return (dsl_dataset_phys(ds)->ds_creation_txg); +} + +uint64_t +dsl_get_refquota(dsl_dataset_t *ds) +{ + return (ds->ds_quota); +} + +uint64_t +dsl_get_refreservation(dsl_dataset_t *ds) +{ + return (ds->ds_reserved); +} + +uint64_t +dsl_get_guid(dsl_dataset_t *ds) +{ + return (dsl_dataset_phys(ds)->ds_guid); +} + +uint64_t +dsl_get_unique(dsl_dataset_t *ds) +{ + return (dsl_dataset_phys(ds)->ds_unique_bytes); +} + +uint64_t +dsl_get_objsetid(dsl_dataset_t *ds) +{ + return (ds->ds_object); +} + +uint64_t +dsl_get_userrefs(dsl_dataset_t *ds) +{ + return (ds->ds_userrefs); +} + +uint64_t +dsl_get_defer_destroy(dsl_dataset_t *ds) +{ + return (DS_IS_DEFER_DESTROY(ds) ? 1 : 0); +} + +uint64_t +dsl_get_referenced(dsl_dataset_t *ds) +{ + return (dsl_dataset_phys(ds)->ds_referenced_bytes); +} + +uint64_t +dsl_get_numclones(dsl_dataset_t *ds) +{ + ASSERT(ds->ds_is_snapshot); + return (dsl_dataset_phys(ds)->ds_num_children - 1); +} + +uint64_t +dsl_get_inconsistent(dsl_dataset_t *ds) +{ + return ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT) ? + 1 : 0); +} + +uint64_t +dsl_get_available(dsl_dataset_t *ds) +{ + uint64_t refdbytes = dsl_get_referenced(ds); + uint64_t availbytes = dsl_dir_space_available(ds->ds_dir, + NULL, 0, TRUE); + if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) { + availbytes += + ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes; + } + if (ds->ds_quota != 0) { + /* + * Adjust available bytes according to refquota + */ + if (refdbytes < ds->ds_quota) { + availbytes = MIN(availbytes, + ds->ds_quota - refdbytes); + } else { + availbytes = 0; + } + } + return (availbytes); +} + +int +dsl_get_written(dsl_dataset_t *ds, uint64_t *written) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + dsl_dataset_t *prev; + int err = dsl_dataset_hold_obj(dp, + dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); + if (err == 0) { + uint64_t comp, uncomp; + err = dsl_dataset_space_written(prev, ds, written, + &comp, &uncomp); + dsl_dataset_rele(prev, FTAG); + } + return (err); +} + +/* + * 'snap' should be a buffer of size ZFS_MAX_DATASET_NAME_LEN. + */ +int +dsl_get_prev_snap(dsl_dataset_t *ds, char *snap) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { + dsl_dataset_name(ds->ds_prev, snap); + return (0); + } else { + return (ENOENT); + } +} + +/* + * Returns the mountpoint property and source for the given dataset in the value + * and source buffers. The value buffer must be at least as large as MAXPATHLEN + * and the source buffer as least as large a ZFS_MAX_DATASET_NAME_LEN. + * Returns 0 on success and an error on failure. + */ +int +dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value, + char *source) +{ + int error; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + + /* Retrieve the mountpoint value stored in the zap opbject */ + error = dsl_prop_get_ds(ds, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT), 1, + ZAP_MAXVALUELEN, value, source); + if (error != 0) { + return (error); + } + + /* + * Process the dsname and source to find the full mountpoint string. + * Can be skipped for 'legacy' or 'none'. + */ + if (value[0] == '/') { + char *buf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); + char *root = buf; + const char *relpath; + + /* + * If we inherit the mountpoint, even from a dataset + * with a received value, the source will be the path of + * the dataset we inherit from. If source is + * ZPROP_SOURCE_VAL_RECVD, the received value is not + * inherited. + */ + if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) { + relpath = ""; + } else { + ASSERT0(strncmp(dsname, source, strlen(source))); + relpath = dsname + strlen(source); + if (relpath[0] == '/') + relpath++; + } + + spa_altroot(dp->dp_spa, root, ZAP_MAXVALUELEN); + + /* + * Special case an alternate root of '/'. This will + * avoid having multiple leading slashes in the + * mountpoint path. + */ + if (strcmp(root, "/") == 0) + root++; + + /* + * If the mountpoint is '/' then skip over this + * if we are obtaining either an alternate root or + * an inherited mountpoint. + */ + char *mnt = value; + if (value[1] == '\0' && (root[0] != '\0' || + relpath[0] != '\0')) + mnt = value + 1; + + if (relpath[0] == '\0') { + (void) snprintf(value, ZAP_MAXVALUELEN, "%s%s", + root, mnt); + } else { + (void) snprintf(value, ZAP_MAXVALUELEN, "%s%s%s%s", + root, mnt, relpath[0] == '@' ? "" : "/", + relpath); + } + kmem_free(buf, ZAP_MAXVALUELEN); + } + + return (0); +} + void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { - int err; dsl_pool_t *dp = ds->ds_dir->dd_pool; - uint64_t refd, avail, uobjs, aobjs, ratio; ASSERT(dsl_pool_config_held(dp)); - ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 : - (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 / - dsl_dataset_phys(ds)->ds_compressed_bytes); - - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, + dsl_get_refratio(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, - dsl_dataset_phys(ds)->ds_uncompressed_bytes); + dsl_get_logicalreferenced(ds)); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, + dsl_get_compressratio(ds)); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, + dsl_get_used(ds)); if (ds->ds_is_snapshot) { - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, - dsl_dataset_phys(ds)->ds_unique_bytes); get_clones_stat(ds, nv); } else { - if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { - char buf[ZFS_MAX_DATASET_NAME_LEN]; - dsl_dataset_name(ds->ds_prev, buf); - dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf); - } - + char buf[ZFS_MAX_DATASET_NAME_LEN]; + if (dsl_get_prev_snap(ds, buf) == 0) + dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, + buf); dsl_dir_stats(ds->ds_dir, nv); } - dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); - + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, + dsl_get_available(ds)); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, + dsl_get_referenced(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, - dsl_dataset_phys(ds)->ds_creation_time); + dsl_get_creation(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, - dsl_dataset_phys(ds)->ds_creation_txg); + dsl_get_creationtxg(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, - ds->ds_quota); + dsl_get_refquota(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, - ds->ds_reserved); + dsl_get_refreservation(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, - dsl_dataset_phys(ds)->ds_guid); + dsl_get_guid(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, - dsl_dataset_phys(ds)->ds_unique_bytes); + dsl_get_unique(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, - ds->ds_object); + dsl_get_objsetid(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, - ds->ds_userrefs); + dsl_get_userrefs(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, - DS_IS_DEFER_DESTROY(ds) ? 1 : 0); + dsl_get_defer_destroy(ds)); dsl_dataset_crypt_stats(ds, nv); if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { - uint64_t written, comp, uncomp; - dsl_pool_t *dp = ds->ds_dir->dd_pool; - dsl_dataset_t *prev; - - err = dsl_dataset_hold_obj(dp, - dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); - if (err == 0) { - err = dsl_dataset_space_written(prev, ds, &written, - &comp, &uncomp); - dsl_dataset_rele(prev, FTAG); - if (err == 0) { - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, - written); - } + uint64_t written; + if (dsl_get_written(ds, &written) == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, + written); } } @@ -1989,30 +2278,22 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) { - dsl_pool_t *dp = ds->ds_dir->dd_pool; + ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool); ASSERT(dsl_pool_config_held(dp)); - stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg; - stat->dds_inconsistent = - dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT; - stat->dds_guid = dsl_dataset_phys(ds)->ds_guid; + stat->dds_creation_txg = dsl_get_creationtxg(ds); + stat->dds_inconsistent = dsl_get_inconsistent(ds); + stat->dds_guid = dsl_get_guid(ds); stat->dds_origin[0] = '\0'; if (ds->ds_is_snapshot) { stat->dds_is_snapshot = B_TRUE; - stat->dds_num_clones = - dsl_dataset_phys(ds)->ds_num_children - 1; + stat->dds_num_clones = dsl_get_numclones(ds); } else { stat->dds_is_snapshot = B_FALSE; stat->dds_num_clones = 0; if (dsl_dir_is_clone(ds->ds_dir)) { - dsl_dataset_t *ods; - - VERIFY0(dsl_dataset_hold_obj(dp, - dsl_dir_phys(ds->ds_dir)->dd_origin_obj, - FTAG, &ods)); - dsl_dataset_name(ods, stat->dds_origin); - dsl_dataset_rele(ods, FTAG); + dsl_dir_get_origin(ds->ds_dir, stat->dds_origin); } } } @@ -2422,22 +2703,12 @@ struct promotenode { dsl_dataset_t *ds; }; -typedef struct dsl_dataset_promote_arg { - const char *ddpa_clonename; - dsl_dataset_t *ddpa_clone; - list_t shared_snaps, origin_snaps, clone_snaps; - dsl_dataset_t *origin_origin; /* origin of the origin */ - uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; - char *err_ds; - cred_t *cr; -} dsl_dataset_promote_arg_t; - static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag); static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); -static int +int dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) { dsl_dataset_promote_arg_t *ddpa = arg; @@ -2449,14 +2720,19 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) uint64_t unused; uint64_t ss_mv_cnt; size_t max_snap_len; + boolean_t conflicting_snaps; err = promote_hold(ddpa, dp, FTAG); if (err != 0) return (err); hds = ddpa->ddpa_clone; + snap = list_head(&ddpa->shared_snaps); + origin_ds = snap->ds; max_snap_len = MAXNAMELEN - strlen(ddpa->ddpa_clonename) - 1; + snap = list_head(&ddpa->origin_snaps); + if (dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE) { promote_rele(ddpa, FTAG); return (SET_ERROR(EXDEV)); @@ -2511,6 +2787,7 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) * Note however, if we stop before we reach the ORIGIN we get: * uN + kN + kN-1 + ... + kM - uM-1 */ + conflicting_snaps = B_FALSE; ss_mv_cnt = 0; ddpa->used = dsl_dataset_phys(origin_ds)->ds_referenced_bytes; ddpa->comp = dsl_dataset_phys(origin_ds)->ds_compressed_bytes; @@ -2539,12 +2816,12 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) } err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); if (err == 0) { - (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); - err = SET_ERROR(EEXIST); + fnvlist_add_boolean(ddpa->err_ds, + snap->ds->ds_snapname); + conflicting_snaps = B_TRUE; + } else if (err != ENOENT) { goto out; } - if (err != ENOENT) - goto out; /* The very first snapshot does not have a deadlist */ if (dsl_dataset_phys(ds)->ds_prev_snap_obj == 0) @@ -2558,6 +2835,15 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) } /* + * In order to return the full list of conflicting snapshots, we check + * whether there was a conflict after traversing all of them. + */ + if (conflicting_snaps) { + err = SET_ERROR(EEXIST); + goto out; + } + + /* * If we are a clone of a clone then we never reached ORIGIN, * so we need to subtract out the clone origin's used space. */ @@ -2623,7 +2909,7 @@ out: return (err); } -static void +void dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_promote_arg_t *ddpa = arg; @@ -2950,6 +3236,7 @@ dsl_dataset_promote(const char *name, char *conflsnap) dsl_dataset_promote_arg_t ddpa = { 0 }; uint64_t numsnaps; int error; + nvpair_t *snap_pair; objset_t *os; /* @@ -2967,12 +3254,22 @@ dsl_dataset_promote(const char *name, char *conflsnap) return (error); ddpa.ddpa_clonename = name; - ddpa.err_ds = conflsnap; + ddpa.err_ds = fnvlist_alloc(); ddpa.cr = CRED(); - return (dsl_sync_task(name, dsl_dataset_promote_check, + error = dsl_sync_task(name, dsl_dataset_promote_check, dsl_dataset_promote_sync, &ddpa, - 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED)); + 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED); + + /* + * Return the first conflicting snapshot found. + */ + snap_pair = nvlist_next_nvpair(ddpa.err_ds, NULL); + if (snap_pair != NULL && conflsnap != NULL) + (void) strcpy(conflsnap, nvpair_name(snap_pair)); + + fnvlist_free(ddpa.err_ds); + return (error); } int diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index 627831bbc..0ea27c3df 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2015 by Delphix. All rights reserved. + * Copyright (c) 2012, 2016 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2013 by Joyent, Inc. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. @@ -30,6 +30,7 @@ #include <sys/dsl_userhold.h> #include <sys/dsl_dataset.h> #include <sys/dsl_synctask.h> +#include <sys/dsl_destroy.h> #include <sys/dmu_tx.h> #include <sys/dsl_pool.h> #include <sys/dsl_dir.h> @@ -42,13 +43,7 @@ #include <sys/dsl_deleg.h> #include <sys/dmu_impl.h> #include <sys/zvol.h> - -typedef struct dmu_snapshots_destroy_arg { - nvlist_t *dsda_snaps; - nvlist_t *dsda_successful_snaps; - boolean_t dsda_defer; - nvlist_t *dsda_errlist; -} dmu_snapshots_destroy_arg_t; +#include <sys/zcp.h> int dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer) @@ -86,51 +81,33 @@ dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer) return (0); } -static int +int dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx) { - dmu_snapshots_destroy_arg_t *dsda = arg; + dsl_destroy_snapshot_arg_t *ddsa = arg; + const char *dsname = ddsa->ddsa_name; + boolean_t defer = ddsa->ddsa_defer; + dsl_pool_t *dp = dmu_tx_pool(tx); - nvpair_t *pair; int error = 0; + dsl_dataset_t *ds; - if (!dmu_tx_is_syncing(tx)) - return (0); - - for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL); - pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) { - dsl_dataset_t *ds; - - error = dsl_dataset_hold(dp, nvpair_name(pair), - FTAG, &ds); - - /* - * If the snapshot does not exist, silently ignore it - * (it's "already destroyed"). - */ - if (error == ENOENT) - continue; + error = dsl_dataset_hold(dp, dsname, FTAG, &ds); - if (error == 0) { - error = dsl_destroy_snapshot_check_impl(ds, - dsda->dsda_defer); - dsl_dataset_rele(ds, FTAG); - } + /* + * If the snapshot does not exist, silently ignore it, and + * dsl_destroy_snapshot_sync() will be a no-op + * (it's "already destroyed"). + */ + if (error == ENOENT) + return (0); - if (error == 0) { - fnvlist_add_boolean(dsda->dsda_successful_snaps, - nvpair_name(pair)); - } else { - fnvlist_add_int32(dsda->dsda_errlist, - nvpair_name(pair), error); - } + if (error == 0) { + error = dsl_destroy_snapshot_check_impl(ds, defer); + dsl_dataset_rele(ds, FTAG); } - pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL); - if (pair != NULL) - return (fnvpair_value_int32(pair)); - - return (0); + return (error); } struct process_old_arg { @@ -480,24 +457,23 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) dmu_object_free_zapified(mos, obj, tx); } -static void +void dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx) { - dmu_snapshots_destroy_arg_t *dsda = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - nvpair_t *pair; + dsl_destroy_snapshot_arg_t *ddsa = arg; + const char *dsname = ddsa->ddsa_name; + boolean_t defer = ddsa->ddsa_defer; - for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL); - pair != NULL; - pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) { - dsl_dataset_t *ds; - - VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; - dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx); - zvol_remove_minors(dp->dp_spa, nvpair_name(pair), B_TRUE); - dsl_dataset_rele(ds, FTAG); - } + int error = dsl_dataset_hold(dp, dsname, FTAG, &ds); + if (error == ENOENT) + return; + ASSERT0(error); + dsl_destroy_snapshot_sync_impl(ds, defer, tx); + zvol_remove_minors(dp->dp_spa, dsname, B_TRUE); + dsl_dataset_rele(ds, FTAG); } /* @@ -517,26 +493,86 @@ int dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer, nvlist_t *errlist) { - dmu_snapshots_destroy_arg_t dsda; - int error; - nvpair_t *pair; - - pair = nvlist_next_nvpair(snaps, NULL); - if (pair == NULL) + if (nvlist_next_nvpair(snaps, NULL) == NULL) return (0); - dsda.dsda_snaps = snaps; - VERIFY0(nvlist_alloc(&dsda.dsda_successful_snaps, - NV_UNIQUE_NAME, KM_SLEEP)); - dsda.dsda_defer = defer; - dsda.dsda_errlist = errlist; - - error = dsl_sync_task(nvpair_name(pair), - dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync, - &dsda, 0, ZFS_SPACE_CHECK_NONE); - fnvlist_free(dsda.dsda_successful_snaps); + nvlist_t *arg = fnvlist_alloc(); + nvlist_t *snaps_normalized = fnvlist_alloc(); + /* + * lzc_destroy_snaps() is documented to take an nvlist whose + * values "don't matter". We need to convert that nvlist to one + * that we know can be converted to LUA. + */ + for (nvpair_t *pair = nvlist_next_nvpair(snaps, NULL); + pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) { + fnvlist_add_boolean_value(snaps_normalized, + nvpair_name(pair), B_TRUE); + } + fnvlist_add_nvlist(arg, "snaps", snaps_normalized); + fnvlist_free(snaps_normalized); + fnvlist_add_boolean_value(arg, "defer", defer); + + nvlist_t *wrapper = fnvlist_alloc(); + fnvlist_add_nvlist(wrapper, ZCP_ARG_ARGLIST, arg); + fnvlist_free(arg); + + const char *program = + "arg = ...\n" + "snaps = arg['snaps']\n" + "defer = arg['defer']\n" + "errors = { }\n" + "has_errors = false\n" + "for snap, v in pairs(snaps) do\n" + " errno = zfs.check.destroy{snap, defer=defer}\n" + " zfs.debug('snap: ' .. snap .. ' errno: ' .. errno)\n" + " if errno == ENOENT then\n" + " snaps[snap] = nil\n" + " elseif errno ~= 0 then\n" + " errors[snap] = errno\n" + " has_errors = true\n" + " end\n" + "end\n" + "if has_errors then\n" + " return errors\n" + "end\n" + "for snap, v in pairs(snaps) do\n" + " errno = zfs.sync.destroy{snap, defer=defer}\n" + " assert(errno == 0)\n" + "end\n" + "return { }\n"; + + nvlist_t *result = fnvlist_alloc(); + int error = zcp_eval(nvpair_name(nvlist_next_nvpair(snaps, NULL)), + program, + 0, + zfs_lua_max_memlimit, + fnvlist_lookup_nvpair(wrapper, ZCP_ARG_ARGLIST), result); + if (error != 0) { + char *errorstr = NULL; + (void) nvlist_lookup_string(result, ZCP_RET_ERROR, &errorstr); + if (errorstr != NULL) { + zfs_dbgmsg(errorstr); + } + return (error); + } + fnvlist_free(wrapper); - return (error); + /* + * lzc_destroy_snaps() is documented to fill the errlist with + * int32 values, so we need to covert the int64 values that are + * returned from LUA. + */ + int rv = 0; + nvlist_t *errlist_raw = fnvlist_lookup_nvlist(result, ZCP_RET_RETURN); + for (nvpair_t *pair = nvlist_next_nvpair(errlist_raw, NULL); + pair != NULL; pair = nvlist_next_nvpair(errlist_raw, pair)) { + int32_t val = (int32_t)fnvpair_value_int64(pair); + if (rv == 0) + rv = val; + fnvlist_add_int32(errlist, nvpair_name(pair), val); + } + fnvlist_free(result); + return (rv); } int @@ -607,10 +643,6 @@ old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) dsl_dataset_phys(ds)->ds_unique_bytes == 0); } -typedef struct dsl_destroy_head_arg { - const char *ddha_name; -} dsl_destroy_head_arg_t; - int dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds) { @@ -656,7 +688,7 @@ dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds) return (0); } -static int +int dsl_destroy_head_check(void *arg, dmu_tx_t *tx) { dsl_destroy_head_arg_t *ddha = arg; @@ -894,7 +926,7 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) } } -static void +void dsl_destroy_head_sync(void *arg, dmu_tx_t *tx) { dsl_destroy_head_arg_t *ddha = arg; diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 96e8dd62e..bf130eb99 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -947,62 +947,139 @@ dsl_dir_is_clone(dsl_dir_t *dd) dd->dd_pool->dp_origin_snap->ds_object)); } + +uint64_t +dsl_dir_get_used(dsl_dir_t *dd) +{ + return (dsl_dir_phys(dd)->dd_used_bytes); +} + +uint64_t +dsl_dir_get_quota(dsl_dir_t *dd) +{ + return (dsl_dir_phys(dd)->dd_quota); +} + +uint64_t +dsl_dir_get_reservation(dsl_dir_t *dd) +{ + return (dsl_dir_phys(dd)->dd_reserved); +} + +uint64_t +dsl_dir_get_compressratio(dsl_dir_t *dd) +{ + /* a fixed point number, 100x the ratio */ + return (dsl_dir_phys(dd)->dd_compressed_bytes == 0 ? 100 : + (dsl_dir_phys(dd)->dd_uncompressed_bytes * 100 / + dsl_dir_phys(dd)->dd_compressed_bytes)); +} + +uint64_t +dsl_dir_get_logicalused(dsl_dir_t *dd) +{ + return (dsl_dir_phys(dd)->dd_uncompressed_bytes); +} + +uint64_t +dsl_dir_get_usedsnap(dsl_dir_t *dd) +{ + return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]); +} + +uint64_t +dsl_dir_get_usedds(dsl_dir_t *dd) +{ + return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_HEAD]); +} + +uint64_t +dsl_dir_get_usedrefreserv(dsl_dir_t *dd) +{ + return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_REFRSRV]); +} + +uint64_t +dsl_dir_get_usedchild(dsl_dir_t *dd) +{ + return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD] + + dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD_RSRV]); +} + void -dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) +dsl_dir_get_origin(dsl_dir_t *dd, char *buf) +{ + dsl_dataset_t *ds; + VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, + dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds)); + + dsl_dataset_name(ds, buf); + + dsl_dataset_rele(ds, FTAG); +} + +int +dsl_dir_get_filesystem_count(dsl_dir_t *dd, uint64_t *count) { - uint64_t intval; + if (dsl_dir_is_zapified(dd)) { + objset_t *os = dd->dd_pool->dp_meta_objset; + return (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, + sizeof (*count), 1, count)); + } else { + return (ENOENT); + } +} + +int +dsl_dir_get_snapshot_count(dsl_dir_t *dd, uint64_t *count) +{ + if (dsl_dir_is_zapified(dd)) { + objset_t *os = dd->dd_pool->dp_meta_objset; + return (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, + sizeof (*count), 1, count)); + } else { + return (ENOENT); + } +} +void +dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) +{ mutex_enter(&dd->dd_lock); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, - dsl_dir_phys(dd)->dd_used_bytes); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, - dsl_dir_phys(dd)->dd_quota); + dsl_dir_get_quota(dd)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION, - dsl_dir_phys(dd)->dd_reserved); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, - dsl_dir_phys(dd)->dd_compressed_bytes == 0 ? 100 : - (dsl_dir_phys(dd)->dd_uncompressed_bytes * 100 / - dsl_dir_phys(dd)->dd_compressed_bytes)); + dsl_dir_get_reservation(dd)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED, - dsl_dir_phys(dd)->dd_uncompressed_bytes); + dsl_dir_get_logicalused(dd)); if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP, - dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]); + dsl_dir_get_usedsnap(dd)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS, - dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_HEAD]); + dsl_dir_get_usedds(dd)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV, - dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_REFRSRV]); + dsl_dir_get_usedrefreserv(dd)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD, - dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD] + - dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD_RSRV]); + dsl_dir_get_usedchild(dd)); } mutex_exit(&dd->dd_lock); - if (dsl_dir_is_zapified(dd)) { - objset_t *os = dd->dd_pool->dp_meta_objset; - - if (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, - sizeof (intval), 1, &intval) == 0) { - dsl_prop_nvlist_add_uint64(nv, - ZFS_PROP_FILESYSTEM_COUNT, intval); - } - if (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, - sizeof (intval), 1, &intval) == 0) { - dsl_prop_nvlist_add_uint64(nv, - ZFS_PROP_SNAPSHOT_COUNT, intval); - } + uint64_t count; + if (dsl_dir_get_filesystem_count(dd, &count) == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_FILESYSTEM_COUNT, + count); + } + if (dsl_dir_get_snapshot_count(dd, &count) == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_SNAPSHOT_COUNT, + count); } if (dsl_dir_is_clone(dd)) { - dsl_dataset_t *ds; char buf[ZFS_MAX_DATASET_NAME_LEN]; - - VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, - dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds)); - dsl_dataset_name(ds, buf); - dsl_dataset_rele(ds, FTAG); + dsl_dir_get_origin(dd, buf); dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf); } + } void diff --git a/module/zfs/vdev_raidz_math.c b/module/zfs/vdev_raidz_math.c index a64e3b023..ba4dcc480 100644 --- a/module/zfs/vdev_raidz_math.c +++ b/module/zfs/vdev_raidz_math.c @@ -132,7 +132,8 @@ vdev_raidz_math_get_ops() default: ASSERT3U(impl, <, raidz_supp_impl_cnt); ASSERT3U(raidz_supp_impl_cnt, >, 0); - ops = raidz_supp_impl[impl]; + if (impl < ARRAY_SIZE(raidz_all_maths)) + ops = raidz_supp_impl[impl]; break; } diff --git a/module/zfs/zcp.c b/module/zfs/zcp.c new file mode 100644 index 000000000..07f96e298 --- /dev/null +++ b/module/zfs/zcp.c @@ -0,0 +1,1357 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2016 by Delphix. All rights reserved. + */ + +/* + * ZFS Channel Programs (ZCP) + * + * The ZCP interface allows various ZFS commands and operations ZFS + * administrative operations (e.g. creating and destroying snapshots, typically + * performed via an ioctl to /dev/zfs by the zfs(8) command and + * libzfs/libzfs_core) to be run * programmatically as a Lua script. A ZCP + * script is run as a dsl_sync_task and fully executed during one transaction + * group sync. This ensures that no other changes can be written concurrently + * with a running Lua script. Combining multiple calls to the exposed ZFS + * functions into one script gives a number of benefits: + * + * 1. Atomicity. For some compound or iterative operations, it's useful to be + * able to guarantee that the state of a pool has not changed between calls to + * ZFS. + * + * 2. Performance. If a large number of changes need to be made (e.g. deleting + * many filesystems), there can be a significant performance penalty as a + * result of the need to wait for a transaction group sync to pass for every + * single operation. When expressed as a single ZCP script, all these changes + * can be performed at once in one txg sync. + * + * A modified version of the Lua 5.2 interpreter is used to run channel program + * scripts. The Lua 5.2 manual can be found at: + * + * http://www.lua.org/manual/5.2/ + * + * If being run by a user (via an ioctl syscall), executing a ZCP script + * requires root privileges in the global zone. + * + * Scripts are passed to zcp_eval() as a string, then run in a synctask by + * zcp_eval_sync(). Arguments can be passed into the Lua script as an nvlist, + * which will be converted to a Lua table. Similarly, values returned from + * a ZCP script will be converted to an nvlist. See zcp_lua_to_nvlist_impl() + * for details on exact allowed types and conversion. + * + * ZFS functionality is exposed to a ZCP script as a library of function calls. + * These calls are sorted into submodules, such as zfs.list and zfs.sync, for + * iterators and synctasks, respectively. Each of these submodules resides in + * its own source file, with a zcp_*_info structure describing each library + * call in the submodule. + * + * Error handling in ZCP scripts is handled by a number of different methods + * based on severity: + * + * 1. Memory and time limits are in place to prevent a channel program from + * consuming excessive system or running forever. If one of these limits is + * hit, the channel program will be stopped immediately and return from + * zcp_eval() with an error code. No attempt will be made to roll back or undo + * any changes made by the channel program before the error occured. + * Consumers invoking zcp_eval() from elsewhere in the kernel may pass a time + * limit of 0, disabling the time limit. + * + * 2. Internal Lua errors can occur as a result of a syntax error, calling a + * library function with incorrect arguments, invoking the error() function, + * failing an assert(), or other runtime errors. In these cases the channel + * program will stop executing and return from zcp_eval() with an error code. + * In place of a return value, an error message will also be returned in the + * 'result' nvlist containing information about the error. No attempt will be + * made to roll back or undo any changes made by the channel program before the + * error occured. + * + * 3. If an error occurs inside a ZFS library call which returns an error code, + * the error is returned to the Lua script to be handled as desired. + * + * In the first two cases, Lua's error-throwing mechanism is used, which + * longjumps out of the script execution with luaL_error() and returns with the + * error. + * + * See zfs-program(8) for more information on high level usage. + */ + +#include <sys/lua/lua.h> +#include <sys/lua/lualib.h> +#include <sys/lua/lauxlib.h> + +#include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> +#include <sys/dsl_dataset.h> +#include <sys/zcp.h> +#include <sys/zcp_iter.h> +#include <sys/zcp_prop.h> +#include <sys/zcp_global.h> +#include <util/sscanf.h> + +#ifndef KM_NORMALPRI +#define KM_NORMALPRI 0 +#endif + +uint64_t zfs_lua_check_instrlimit_interval = 100; +uint64_t zfs_lua_max_instrlimit = ZCP_MAX_INSTRLIMIT; +uint64_t zfs_lua_max_memlimit = ZCP_MAX_MEMLIMIT; + +static int zcp_nvpair_value_to_lua(lua_State *, nvpair_t *, char *, int); +static int zcp_lua_to_nvlist_impl(lua_State *, int, nvlist_t *, const char *, + int); + +typedef struct zcp_alloc_arg { + boolean_t aa_must_succeed; + int64_t aa_alloc_remaining; + int64_t aa_alloc_limit; +} zcp_alloc_arg_t; + +typedef struct zcp_eval_arg { + lua_State *ea_state; + zcp_alloc_arg_t *ea_allocargs; + cred_t *ea_cred; + nvlist_t *ea_outnvl; + int ea_result; + uint64_t ea_instrlimit; +} zcp_eval_arg_t; + +/*ARGSUSED*/ +static int +zcp_eval_check(void *arg, dmu_tx_t *tx) +{ + return (0); +} + +/* + * The outer-most error callback handler for use with lua_pcall(). On + * error Lua will call this callback with a single argument that + * represents the error value. In most cases this will be a string + * containing an error message, but channel programs can use Lua's + * error() function to return arbitrary objects as errors. This callback + * returns (on the Lua stack) the original error object along with a traceback. + * + * Fatal Lua errors can occur while resources are held, so we also call any + * registered cleanup function here. + */ +static int +zcp_error_handler(lua_State *state) +{ + const char *msg; + + zcp_cleanup(state); + + VERIFY3U(1, ==, lua_gettop(state)); + msg = lua_tostring(state, 1); + luaL_traceback(state, state, msg, 1); + return (1); +} + +int +zcp_argerror(lua_State *state, int narg, const char *msg, ...) +{ + va_list alist; + + va_start(alist, msg); + const char *buf = lua_pushvfstring(state, msg, alist); + va_end(alist); + + return (luaL_argerror(state, narg, buf)); +} + +/* + * Install a new cleanup function, which will be invoked with the given + * opaque argument if a fatal error causes the Lua interpreter to longjump out + * of a function call. + * + * If an error occurs, the cleanup function will be invoked exactly once and + * then unreigstered. + */ +void +zcp_register_cleanup(lua_State *state, zcp_cleanup_t cleanfunc, void *cleanarg) +{ + zcp_run_info_t *ri = zcp_run_info(state); + /* + * A cleanup function should always be explicitly removed before + * installing a new one to avoid accidental clobbering. + */ + ASSERT3P(ri->zri_cleanup, ==, NULL); + + ri->zri_cleanup = cleanfunc; + ri->zri_cleanup_arg = cleanarg; +} + +void +zcp_clear_cleanup(lua_State *state) +{ + zcp_run_info_t *ri = zcp_run_info(state); + + ri->zri_cleanup = NULL; + ri->zri_cleanup_arg = NULL; +} + +/* + * If it exists, execute the currently set cleanup function then unregister it. + */ +void +zcp_cleanup(lua_State *state) +{ + zcp_run_info_t *ri = zcp_run_info(state); + + if (ri->zri_cleanup != NULL) { + ri->zri_cleanup(ri->zri_cleanup_arg); + zcp_clear_cleanup(state); + } +} + +#define ZCP_NVLIST_MAX_DEPTH 20 + +/* + * Convert the lua table at the given index on the Lua stack to an nvlist + * and return it. + * + * If the table can not be converted for any reason, NULL is returned and + * an error message is pushed onto the Lua stack. + */ +static nvlist_t * +zcp_table_to_nvlist(lua_State *state, int index, int depth) +{ + nvlist_t *nvl; + /* + * Converting a Lua table to an nvlist with key uniqueness checking is + * O(n^2) in the number of keys in the nvlist, which can take a long + * time when we return a large table from a channel program. + * Furthermore, Lua's table interface *almost* guarantees unique keys + * on its own (details below). Therefore, we don't use fnvlist_alloc() + * here to avoid the built-in uniqueness checking. + * + * The *almost* is because it's possible to have key collisions between + * e.g. the string "1" and the number 1, or the string "true" and the + * boolean true, so we explicitly check that when we're looking at a + * key which is an integer / boolean or a string that can be parsed as + * one of those types. In the worst case this could still devolve into + * O(n^2), so we only start doing these checks on boolean/integer keys + * once we've seen a string key which fits this weird usage pattern. + * + * Ultimately, we still want callers to know that the keys in this + * nvlist are unique, so before we return this we set the nvlist's + * flags to reflect that. + */ + VERIFY0(nvlist_alloc(&nvl, 0, KM_SLEEP)); + + /* + * Push an empty stack slot where lua_next() will store each + * table key. + */ + lua_pushnil(state); + boolean_t saw_str_could_collide = B_FALSE; + while (lua_next(state, index) != 0) { + /* + * The next key-value pair from the table at index is + * now on the stack, with the key at stack slot -2 and + * the value at slot -1. + */ + int err = 0; + char buf[32]; + const char *key = NULL; + boolean_t key_could_collide = B_FALSE; + + switch (lua_type(state, -2)) { + case LUA_TSTRING: + key = lua_tostring(state, -2); + + /* check if this could collide with a number or bool */ + long long tmp; + int parselen; + if ((sscanf(key, "%lld%n", &tmp, &parselen) > 0 && + parselen == strlen(key)) || + strcmp(key, "true") == 0 || + strcmp(key, "false") == 0) { + key_could_collide = B_TRUE; + saw_str_could_collide = B_TRUE; + } + break; + case LUA_TBOOLEAN: + key = (lua_toboolean(state, -2) == B_TRUE ? + "true" : "false"); + if (saw_str_could_collide) { + key_could_collide = B_TRUE; + } + break; + case LUA_TNUMBER: + VERIFY3U(sizeof (buf), >, + snprintf(buf, sizeof (buf), "%lld", + (longlong_t)lua_tonumber(state, -2))); + key = buf; + if (saw_str_could_collide) { + key_could_collide = B_TRUE; + } + break; + default: + fnvlist_free(nvl); + (void) lua_pushfstring(state, "Invalid key " + "type '%s' in table", + lua_typename(state, lua_type(state, -2))); + return (NULL); + } + /* + * Check for type-mismatched key collisions, and throw an error. + */ + if (key_could_collide && nvlist_exists(nvl, key)) { + fnvlist_free(nvl); + (void) lua_pushfstring(state, "Collision of " + "key '%s' in table", key); + return (NULL); + } + /* + * Recursively convert the table value and insert into + * the new nvlist with the parsed key. To prevent + * stack overflow on circular or heavily nested tables, + * we track the current nvlist depth. + */ + if (depth >= ZCP_NVLIST_MAX_DEPTH) { + fnvlist_free(nvl); + (void) lua_pushfstring(state, "Maximum table " + "depth (%d) exceeded for table", + ZCP_NVLIST_MAX_DEPTH); + return (NULL); + } + err = zcp_lua_to_nvlist_impl(state, -1, nvl, key, + depth + 1); + if (err != 0) { + fnvlist_free(nvl); + /* + * Error message has been pushed to the lua + * stack by the recursive call. + */ + return (NULL); + } + /* + * Pop the value pushed by lua_next(). + */ + lua_pop(state, 1); + } + + /* + * Mark the nvlist as having unique keys. This is a little ugly, but we + * ensured above that there are no duplicate keys in the nvlist. + */ + nvl->nvl_nvflag |= NV_UNIQUE_NAME; + + return (nvl); +} + +/* + * Convert a value from the given index into the lua stack to an nvpair, adding + * it to an nvlist with the given key. + * + * Values are converted as follows: + * + * string -> string + * number -> int64 + * boolean -> boolean + * nil -> boolean (no value) + * + * Lua tables are converted to nvlists and then inserted. The table's keys + * are converted to strings then used as keys in the nvlist to store each table + * element. Keys are converted as follows: + * + * string -> no change + * number -> "%lld" + * boolean -> "true" | "false" + * nil -> error + * + * In the case of a key collision, an error is thrown. + * + * If an error is encountered, a nonzero error code is returned, and an error + * string will be pushed onto the Lua stack. + */ +static int +zcp_lua_to_nvlist_impl(lua_State *state, int index, nvlist_t *nvl, + const char *key, int depth) +{ + /* + * Verify that we have enough remaining space in the lua stack to parse + * a key-value pair and push an error. + */ + if (!lua_checkstack(state, 3)) { + (void) lua_pushstring(state, "Lua stack overflow"); + return (1); + } + + index = lua_absindex(state, index); + + switch (lua_type(state, index)) { + case LUA_TNIL: + fnvlist_add_boolean(nvl, key); + break; + case LUA_TBOOLEAN: + fnvlist_add_boolean_value(nvl, key, + lua_toboolean(state, index)); + break; + case LUA_TNUMBER: + fnvlist_add_int64(nvl, key, lua_tonumber(state, index)); + break; + case LUA_TSTRING: + fnvlist_add_string(nvl, key, lua_tostring(state, index)); + break; + case LUA_TTABLE: { + nvlist_t *value_nvl = zcp_table_to_nvlist(state, index, depth); + if (value_nvl == NULL) + return (EINVAL); + + fnvlist_add_nvlist(nvl, key, value_nvl); + fnvlist_free(value_nvl); + break; + } + default: + (void) lua_pushfstring(state, + "Invalid value type '%s' for key '%s'", + lua_typename(state, lua_type(state, index)), key); + return (EINVAL); + } + + return (0); +} + +/* + * Convert a lua value to an nvpair, adding it to an nvlist with the given key. + */ +void +zcp_lua_to_nvlist(lua_State *state, int index, nvlist_t *nvl, const char *key) +{ + /* + * On error, zcp_lua_to_nvlist_impl pushes an error string onto the Lua + * stack before returning with a nonzero error code. If an error is + * returned, throw a fatal lua error with the given string. + */ + if (zcp_lua_to_nvlist_impl(state, index, nvl, key, 0) != 0) + (void) lua_error(state); +} + +int +zcp_lua_to_nvlist_helper(lua_State *state) +{ + nvlist_t *nv = (nvlist_t *)lua_touserdata(state, 2); + const char *key = (const char *)lua_touserdata(state, 1); + zcp_lua_to_nvlist(state, 3, nv, key); + return (0); +} + +void +zcp_convert_return_values(lua_State *state, nvlist_t *nvl, + const char *key, zcp_eval_arg_t *evalargs) +{ + int err; + lua_pushcfunction(state, zcp_lua_to_nvlist_helper); + lua_pushlightuserdata(state, (char *)key); + lua_pushlightuserdata(state, nvl); + lua_pushvalue(state, 1); + lua_remove(state, 1); + err = lua_pcall(state, 3, 0, 0); /* zcp_lua_to_nvlist_helper */ + if (err != 0) { + zcp_lua_to_nvlist(state, 1, nvl, ZCP_RET_ERROR); + evalargs->ea_result = SET_ERROR(ECHRNG); + } +} + +/* + * Push a Lua table representing nvl onto the stack. If it can't be + * converted, return EINVAL, fill in errbuf, and push nothing. errbuf may + * be specified as NULL, in which case no error string will be output. + * + * Most nvlists are converted as simple key->value Lua tables, but we make + * an exception for the case where all nvlist entries are BOOLEANs (a string + * key without a value). In Lua, a table key pointing to a value of Nil + * (no value) is equivalent to the key not existing, so a BOOLEAN nvlist + * entry can't be directly converted to a Lua table entry. Nvlists of entirely + * BOOLEAN entries are frequently used to pass around lists of datasets, so for + * convenience we check for this case, and convert it to a simple Lua array of + * strings. + */ +int +zcp_nvlist_to_lua(lua_State *state, nvlist_t *nvl, + char *errbuf, int errbuf_len) +{ + nvpair_t *pair; + lua_newtable(state); + boolean_t has_values = B_FALSE; + /* + * If the list doesn't have any values, just convert it to a string + * array. + */ + for (pair = nvlist_next_nvpair(nvl, NULL); + pair != NULL; pair = nvlist_next_nvpair(nvl, pair)) { + if (nvpair_type(pair) != DATA_TYPE_BOOLEAN) { + has_values = B_TRUE; + break; + } + } + if (!has_values) { + int i = 1; + for (pair = nvlist_next_nvpair(nvl, NULL); + pair != NULL; pair = nvlist_next_nvpair(nvl, pair)) { + (void) lua_pushinteger(state, i); + (void) lua_pushstring(state, nvpair_name(pair)); + (void) lua_settable(state, -3); + i++; + } + } else { + for (pair = nvlist_next_nvpair(nvl, NULL); + pair != NULL; pair = nvlist_next_nvpair(nvl, pair)) { + int err = zcp_nvpair_value_to_lua(state, pair, + errbuf, errbuf_len); + if (err != 0) { + lua_pop(state, 1); + return (err); + } + (void) lua_setfield(state, -2, nvpair_name(pair)); + } + } + return (0); +} + +/* + * Push a Lua object representing the value of "pair" onto the stack. + * + * Only understands boolean_value, string, int64, nvlist, + * string_array, and int64_array type values. For other + * types, returns EINVAL, fills in errbuf, and pushes nothing. + */ +static int +zcp_nvpair_value_to_lua(lua_State *state, nvpair_t *pair, + char *errbuf, int errbuf_len) +{ + int err = 0; + + if (pair == NULL) { + lua_pushnil(state); + return (0); + } + + switch (nvpair_type(pair)) { + case DATA_TYPE_BOOLEAN_VALUE: + (void) lua_pushboolean(state, + fnvpair_value_boolean_value(pair)); + break; + case DATA_TYPE_STRING: + (void) lua_pushstring(state, fnvpair_value_string(pair)); + break; + case DATA_TYPE_INT64: + (void) lua_pushinteger(state, fnvpair_value_int64(pair)); + break; + case DATA_TYPE_NVLIST: + err = zcp_nvlist_to_lua(state, + fnvpair_value_nvlist(pair), errbuf, errbuf_len); + break; + case DATA_TYPE_STRING_ARRAY: { + char **strarr; + uint_t nelem; + (void) nvpair_value_string_array(pair, &strarr, &nelem); + lua_newtable(state); + for (int i = 0; i < nelem; i++) { + (void) lua_pushinteger(state, i + 1); + (void) lua_pushstring(state, strarr[i]); + (void) lua_settable(state, -3); + } + break; + } + case DATA_TYPE_UINT64_ARRAY: { + uint64_t *intarr; + uint_t nelem; + (void) nvpair_value_uint64_array(pair, &intarr, &nelem); + lua_newtable(state); + for (int i = 0; i < nelem; i++) { + (void) lua_pushinteger(state, i + 1); + (void) lua_pushinteger(state, intarr[i]); + (void) lua_settable(state, -3); + } + break; + } + case DATA_TYPE_INT64_ARRAY: { + int64_t *intarr; + uint_t nelem; + (void) nvpair_value_int64_array(pair, &intarr, &nelem); + lua_newtable(state); + for (int i = 0; i < nelem; i++) { + (void) lua_pushinteger(state, i + 1); + (void) lua_pushinteger(state, intarr[i]); + (void) lua_settable(state, -3); + } + break; + } + default: { + if (errbuf != NULL) { + (void) snprintf(errbuf, errbuf_len, + "Unhandled nvpair type %d for key '%s'", + nvpair_type(pair), nvpair_name(pair)); + } + return (EINVAL); + } + } + return (err); +} + +int +zcp_dataset_hold_error(lua_State *state, dsl_pool_t *dp, const char *dsname, + int error) +{ + if (error == ENOENT) { + (void) zcp_argerror(state, 1, "no such dataset '%s'", dsname); + return (0); /* not reached; zcp_argerror will longjmp */ + } else if (error == EXDEV) { + (void) zcp_argerror(state, 1, + "dataset '%s' is not in the target pool '%s'", + dsname, spa_name(dp->dp_spa)); + return (0); /* not reached; zcp_argerror will longjmp */ + } else if (error == EIO) { + (void) luaL_error(state, + "I/O error while accessing dataset '%s'", dsname); + return (0); /* not reached; luaL_error will longjmp */ + } else if (error != 0) { + (void) luaL_error(state, + "unexpected error %d while accessing dataset '%s'", + error, dsname); + return (0); /* not reached; luaL_error will longjmp */ + } + return (0); +} + +/* + * Note: will longjmp (via lua_error()) on error. + * Assumes that the dsname is argument #1 (for error reporting purposes). + */ +dsl_dataset_t * +zcp_dataset_hold(lua_State *state, dsl_pool_t *dp, const char *dsname, + void *tag) +{ + dsl_dataset_t *ds; + int error = dsl_dataset_hold(dp, dsname, tag, &ds); + (void) zcp_dataset_hold_error(state, dp, dsname, error); + return (ds); +} + +static int zcp_debug(lua_State *); +static zcp_lib_info_t zcp_debug_info = { + .name = "debug", + .func = zcp_debug, + .pargs = { + { .za_name = "debug string", .za_lua_type = LUA_TSTRING}, + {NULL, 0} + }, + .kwargs = { + {NULL, 0} + } +}; + +static int +zcp_debug(lua_State *state) +{ + const char *dbgstring; + zcp_run_info_t *ri = zcp_run_info(state); + zcp_lib_info_t *libinfo = &zcp_debug_info; + + zcp_parse_args(state, libinfo->name, libinfo->pargs, libinfo->kwargs); + + dbgstring = lua_tostring(state, 1); + + zfs_dbgmsg("txg %lld ZCP: %s", ri->zri_tx->tx_txg, dbgstring); + + return (0); +} + +static int zcp_exists(lua_State *); +static zcp_lib_info_t zcp_exists_info = { + .name = "exists", + .func = zcp_exists, + .pargs = { + { .za_name = "dataset", .za_lua_type = LUA_TSTRING}, + {NULL, 0} + }, + .kwargs = { + {NULL, 0} + } +}; + +static int +zcp_exists(lua_State *state) +{ + zcp_run_info_t *ri = zcp_run_info(state); + dsl_pool_t *dp = ri->zri_pool; + zcp_lib_info_t *libinfo = &zcp_exists_info; + + zcp_parse_args(state, libinfo->name, libinfo->pargs, libinfo->kwargs); + + const char *dsname = lua_tostring(state, 1); + + dsl_dataset_t *ds; + int error = dsl_dataset_hold(dp, dsname, FTAG, &ds); + if (error == 0) { + dsl_dataset_rele(ds, FTAG); + lua_pushboolean(state, B_TRUE); + } else if (error == ENOENT) { + lua_pushboolean(state, B_FALSE); + } else if (error == EXDEV) { + return (luaL_error(state, "dataset '%s' is not in the " + "target pool", dsname)); + } else if (error == EIO) { + return (luaL_error(state, "I/O error opening dataset '%s'", + dsname)); + } else if (error != 0) { + return (luaL_error(state, "unexpected error %d", error)); + } + + return (0); +} + +/* + * Allocate/realloc/free a buffer for the lua interpreter. + * + * When nsize is 0, behaves as free() and returns NULL. + * + * If ptr is NULL, behaves as malloc() and returns an allocated buffer of size + * at least nsize. + * + * Otherwise, behaves as realloc(), changing the allocation from osize to nsize. + * Shrinking the buffer size never fails. + * + * The original allocated buffer size is stored as a uint64 at the beginning of + * the buffer to avoid actually reallocating when shrinking a buffer, since lua + * requires that this operation never fail. + */ +static void * +zcp_lua_alloc(void *ud, void *ptr, size_t osize, size_t nsize) +{ + zcp_alloc_arg_t *allocargs = ud; + int flags = (allocargs->aa_must_succeed) ? + KM_SLEEP : (KM_NOSLEEP | KM_NORMALPRI); + + if (nsize == 0) { + if (ptr != NULL) { + int64_t *allocbuf = (int64_t *)ptr - 1; + int64_t allocsize = *allocbuf; + ASSERT3S(allocsize, >, 0); + ASSERT3S(allocargs->aa_alloc_remaining + allocsize, <=, + allocargs->aa_alloc_limit); + allocargs->aa_alloc_remaining += allocsize; + vmem_free(allocbuf, allocsize); + } + return (NULL); + } else if (ptr == NULL) { + int64_t *allocbuf; + int64_t allocsize = nsize + sizeof (int64_t); + + if (!allocargs->aa_must_succeed && + (allocsize <= 0 || + allocsize > allocargs->aa_alloc_remaining)) { + return (NULL); + } + + allocbuf = vmem_alloc(allocsize, flags); + if (allocbuf == NULL) { + return (NULL); + } + allocargs->aa_alloc_remaining -= allocsize; + + *allocbuf = allocsize; + return (allocbuf + 1); + } else if (nsize <= osize) { + /* + * If shrinking the buffer, lua requires that the reallocation + * never fail. + */ + return (ptr); + } else { + ASSERT3U(nsize, >, osize); + + uint64_t *luabuf = zcp_lua_alloc(ud, NULL, 0, nsize); + if (luabuf == NULL) { + return (NULL); + } + (void) memcpy(luabuf, ptr, osize); + VERIFY3P(zcp_lua_alloc(ud, ptr, osize, 0), ==, NULL); + return (luabuf); + } +} + +/* ARGSUSED */ +static void +zcp_lua_counthook(lua_State *state, lua_Debug *ar) +{ + /* + * If we're called, check how many instructions the channel program has + * executed so far, and compare against the limit. + */ + lua_getfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY); + zcp_run_info_t *ri = lua_touserdata(state, -1); + + ri->zri_curinstrs += zfs_lua_check_instrlimit_interval; + if (ri->zri_maxinstrs != 0 && ri->zri_curinstrs > ri->zri_maxinstrs) { + ri->zri_timed_out = B_TRUE; + (void) lua_pushstring(state, + "Channel program timed out."); + (void) lua_error(state); + } +} + +static int +zcp_panic_cb(lua_State *state) +{ + panic("unprotected error in call to Lua API (%s)\n", + lua_tostring(state, -1)); + return (0); +} + +static void +zcp_eval_sync(void *arg, dmu_tx_t *tx) +{ + int err; + zcp_run_info_t ri; + zcp_eval_arg_t *evalargs = arg; + lua_State *state = evalargs->ea_state; + + /* + * Open context should have setup the stack to contain: + * 1: Error handler callback + * 2: Script to run (converted to a Lua function) + * 3: nvlist input to function (converted to Lua table or nil) + */ + VERIFY3U(3, ==, lua_gettop(state)); + + /* + * Store the zcp_run_info_t struct for this run in the Lua registry. + * Registry entries are not directly accessible by the Lua scripts but + * can be accessed by our callbacks. + */ + ri.zri_space_used = 0; + ri.zri_pool = dmu_tx_pool(tx); + ri.zri_cred = evalargs->ea_cred; + ri.zri_tx = tx; + ri.zri_timed_out = B_FALSE; + ri.zri_cleanup = NULL; + ri.zri_cleanup_arg = NULL; + ri.zri_curinstrs = 0; + ri.zri_maxinstrs = evalargs->ea_instrlimit; + + lua_pushlightuserdata(state, &ri); + lua_setfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY); + VERIFY3U(3, ==, lua_gettop(state)); + + /* + * Tell the Lua interpreter to call our handler every count + * instructions. Channel programs that execute too many instructions + * should die with ETIME. + */ + (void) lua_sethook(state, zcp_lua_counthook, LUA_MASKCOUNT, + zfs_lua_check_instrlimit_interval); + + /* + * Tell the Lua memory allocator to stop using KM_SLEEP before handing + * off control to the channel program. Channel programs that use too + * much memory should die with ENOSPC. + */ + evalargs->ea_allocargs->aa_must_succeed = B_FALSE; + + /* + * Call the Lua function that open-context passed us. This pops the + * function and its input from the stack and pushes any return + * or error values. + */ + err = lua_pcall(state, 1, LUA_MULTRET, 1); + + /* + * Let Lua use KM_SLEEP while we interpret the return values. + */ + evalargs->ea_allocargs->aa_must_succeed = B_TRUE; + + /* + * Remove the error handler callback from the stack. At this point, + * if there is a cleanup function registered, then it was registered + * but never run or removed, which should never occur. + */ + ASSERT3P(ri.zri_cleanup, ==, NULL); + lua_remove(state, 1); + + switch (err) { + case LUA_OK: { + /* + * Lua supports returning multiple values in a single return + * statement. Return values will have been pushed onto the + * stack: + * 1: Return value 1 + * 2: Return value 2 + * 3: etc... + * To simplify the process of retrieving a return value from a + * channel program, we disallow returning more than one value + * to ZFS from the Lua script, yielding a singleton return + * nvlist of the form { "return": Return value 1 }. + */ + int return_count = lua_gettop(state); + + if (return_count == 1) { + evalargs->ea_result = 0; + zcp_convert_return_values(state, evalargs->ea_outnvl, + ZCP_RET_RETURN, evalargs); + } else if (return_count > 1) { + evalargs->ea_result = SET_ERROR(ECHRNG); + (void) lua_pushfstring(state, "Multiple return " + "values not supported"); + zcp_convert_return_values(state, evalargs->ea_outnvl, + ZCP_RET_ERROR, evalargs); + } + break; + } + case LUA_ERRRUN: + case LUA_ERRGCMM: { + /* + * The channel program encountered a fatal error within the + * script, such as failing an assertion, or calling a function + * with incompatible arguments. The error value and the + * traceback generated by zcp_error_handler() should be on the + * stack. + */ + VERIFY3U(1, ==, lua_gettop(state)); + if (ri.zri_timed_out) { + evalargs->ea_result = SET_ERROR(ETIME); + } else { + evalargs->ea_result = SET_ERROR(ECHRNG); + } + + zcp_convert_return_values(state, evalargs->ea_outnvl, + ZCP_RET_ERROR, evalargs); + + if (evalargs->ea_result == ETIME && + evalargs->ea_outnvl != NULL) { + (void) nvlist_add_uint64(evalargs->ea_outnvl, + ZCP_ARG_INSTRLIMIT, ri.zri_curinstrs); + } + break; + } + case LUA_ERRERR: { + /* + * The channel program encountered a fatal error within the + * script, and we encountered another error while trying to + * compute the traceback in zcp_error_handler(). We can only + * return the error message. + */ + VERIFY3U(1, ==, lua_gettop(state)); + if (ri.zri_timed_out) { + evalargs->ea_result = SET_ERROR(ETIME); + } else { + evalargs->ea_result = SET_ERROR(ECHRNG); + } + + zcp_convert_return_values(state, evalargs->ea_outnvl, + ZCP_RET_ERROR, evalargs); + break; + } + case LUA_ERRMEM: + /* + * Lua ran out of memory while running the channel program. + * There's not much we can do. + */ + evalargs->ea_result = SET_ERROR(ENOSPC); + break; + default: + VERIFY0(err); + } +} + +int +zcp_eval(const char *poolname, const char *program, uint64_t instrlimit, + uint64_t memlimit, nvpair_t *nvarg, nvlist_t *outnvl) +{ + int err; + lua_State *state; + zcp_eval_arg_t evalargs; + + if (instrlimit > zfs_lua_max_instrlimit) + return (SET_ERROR(EINVAL)); + if (memlimit == 0 || memlimit > zfs_lua_max_memlimit) + return (SET_ERROR(EINVAL)); + + zcp_alloc_arg_t allocargs = { + .aa_must_succeed = B_TRUE, + .aa_alloc_remaining = (int64_t)memlimit, + .aa_alloc_limit = (int64_t)memlimit, + }; + + /* + * Creates a Lua state with a memory allocator that uses KM_SLEEP. + * This should never fail. + */ + state = lua_newstate(zcp_lua_alloc, &allocargs); + VERIFY(state != NULL); + (void) lua_atpanic(state, zcp_panic_cb); + + /* + * Load core Lua libraries we want access to. + */ + VERIFY3U(1, ==, luaopen_base(state)); + lua_pop(state, 1); + VERIFY3U(1, ==, luaopen_coroutine(state)); + lua_setglobal(state, LUA_COLIBNAME); + VERIFY0(lua_gettop(state)); + VERIFY3U(1, ==, luaopen_string(state)); + lua_setglobal(state, LUA_STRLIBNAME); + VERIFY0(lua_gettop(state)); + VERIFY3U(1, ==, luaopen_table(state)); + lua_setglobal(state, LUA_TABLIBNAME); + VERIFY0(lua_gettop(state)); + + /* + * Load globally visible variables such as errno aliases. + */ + zcp_load_globals(state); + VERIFY0(lua_gettop(state)); + + /* + * Load ZFS-specific modules. + */ + lua_newtable(state); + VERIFY3U(1, ==, zcp_load_list_lib(state)); + lua_setfield(state, -2, "list"); + VERIFY3U(1, ==, zcp_load_synctask_lib(state, B_FALSE)); + lua_setfield(state, -2, "check"); + VERIFY3U(1, ==, zcp_load_synctask_lib(state, B_TRUE)); + lua_setfield(state, -2, "sync"); + VERIFY3U(1, ==, zcp_load_get_lib(state)); + lua_pushcclosure(state, zcp_debug_info.func, 0); + lua_setfield(state, -2, zcp_debug_info.name); + lua_pushcclosure(state, zcp_exists_info.func, 0); + lua_setfield(state, -2, zcp_exists_info.name); + lua_setglobal(state, "zfs"); + VERIFY0(lua_gettop(state)); + + /* + * Push the error-callback that calculates Lua stack traces on + * unexpected failures. + */ + lua_pushcfunction(state, zcp_error_handler); + VERIFY3U(1, ==, lua_gettop(state)); + + /* + * Load the actual script as a function onto the stack as text ("t"). + * The only valid error condition is a syntax error in the script. + * ERRMEM should not be possible because our allocator is using + * KM_SLEEP. ERRGCMM should not be possible because we have not added + * any objects with __gc metamethods to the interpreter that could + * fail. + */ + err = luaL_loadbufferx(state, program, strlen(program), + "channel program", "t"); + if (err == LUA_ERRSYNTAX) { + fnvlist_add_string(outnvl, ZCP_RET_ERROR, + lua_tostring(state, -1)); + lua_close(state); + return (SET_ERROR(EINVAL)); + } + VERIFY0(err); + VERIFY3U(2, ==, lua_gettop(state)); + + /* + * Convert the input nvlist to a Lua object and put it on top of the + * stack. + */ + char errmsg[128]; + err = zcp_nvpair_value_to_lua(state, nvarg, + errmsg, sizeof (errmsg)); + if (err != 0) { + fnvlist_add_string(outnvl, ZCP_RET_ERROR, errmsg); + lua_close(state); + return (SET_ERROR(EINVAL)); + } + VERIFY3U(3, ==, lua_gettop(state)); + + evalargs.ea_state = state; + evalargs.ea_allocargs = &allocargs; + evalargs.ea_instrlimit = instrlimit; + evalargs.ea_cred = CRED(); + evalargs.ea_outnvl = outnvl; + evalargs.ea_result = 0; + + VERIFY0(dsl_sync_task(poolname, zcp_eval_check, + zcp_eval_sync, &evalargs, 0, ZFS_SPACE_CHECK_NONE)); + + lua_close(state); + + return (evalargs.ea_result); +} + +/* + * Retrieve metadata about the currently running channel program. + */ +zcp_run_info_t * +zcp_run_info(lua_State *state) +{ + zcp_run_info_t *ri; + + lua_getfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY); + ri = lua_touserdata(state, -1); + lua_pop(state, 1); + return (ri); +} + +/* + * Argument Parsing + * ================ + * + * The Lua language allows methods to be called with any number + * of arguments of any type. When calling back into ZFS we need to sanitize + * arguments from channel programs to make sure unexpected arguments or + * arguments of the wrong type result in clear error messages. To do this + * in a uniform way all callbacks from channel programs should use the + * zcp_parse_args() function to interpret inputs. + * + * Positional vs Keyword Arguments + * =============================== + * + * Every callback function takes a fixed set of required positional arguments + * and optional keyword arguments. For example, the destroy function takes + * a single positional string argument (the name of the dataset to destroy) + * and an optional "defer" keyword boolean argument. When calling lua functions + * with parentheses, only positional arguments can be used: + * + * zfs.sync.snapshot("rpool@snap") + * + * To use keyword arguments functions should be called with a single argument + * that is a lua table containing mappings of integer -> positional arguments + * and string -> keyword arguments: + * + * zfs.sync.snapshot({1="rpool@snap", defer=true}) + * + * The lua language allows curly braces to be used in place of parenthesis as + * syntactic sugar for this calling convention: + * + * zfs.sync.snapshot{"rpool@snap", defer=true} + */ + +/* + * Throw an error and print the given arguments. If there are too many + * arguments to fit in the output buffer, only the error format string is + * output. + */ +static void +zcp_args_error(lua_State *state, const char *fname, const zcp_arg_t *pargs, + const zcp_arg_t *kwargs, const char *fmt, ...) +{ + int i; + char errmsg[512]; + size_t len = sizeof (errmsg); + size_t msglen = 0; + va_list argp; + + va_start(argp, fmt); + VERIFY3U(len, >, vsnprintf(errmsg, len, fmt, argp)); + va_end(argp); + + /* + * Calculate the total length of the final string, including extra + * formatting characters. If the argument dump would be too large, + * only print the error string. + */ + msglen = strlen(errmsg); + msglen += strlen(fname) + 4; /* : + {} + null terminator */ + for (i = 0; pargs[i].za_name != NULL; i++) { + msglen += strlen(pargs[i].za_name); + msglen += strlen(lua_typename(state, pargs[i].za_lua_type)); + if (pargs[i + 1].za_name != NULL || kwargs[0].za_name != NULL) + msglen += 5; /* < + ( + )> + , */ + else + msglen += 4; /* < + ( + )> */ + } + for (i = 0; kwargs[i].za_name != NULL; i++) { + msglen += strlen(kwargs[i].za_name); + msglen += strlen(lua_typename(state, kwargs[i].za_lua_type)); + if (kwargs[i + 1].za_name != NULL) + msglen += 4; /* =( + ) + , */ + else + msglen += 3; /* =( + ) */ + } + + if (msglen >= len) + (void) luaL_error(state, errmsg); + + VERIFY3U(len, >, strlcat(errmsg, ": ", len)); + VERIFY3U(len, >, strlcat(errmsg, fname, len)); + VERIFY3U(len, >, strlcat(errmsg, "{", len)); + for (i = 0; pargs[i].za_name != NULL; i++) { + VERIFY3U(len, >, strlcat(errmsg, "<", len)); + VERIFY3U(len, >, strlcat(errmsg, pargs[i].za_name, len)); + VERIFY3U(len, >, strlcat(errmsg, "(", len)); + VERIFY3U(len, >, strlcat(errmsg, + lua_typename(state, pargs[i].za_lua_type), len)); + VERIFY3U(len, >, strlcat(errmsg, ")>", len)); + if (pargs[i + 1].za_name != NULL || kwargs[0].za_name != NULL) { + VERIFY3U(len, >, strlcat(errmsg, ", ", len)); + } + } + for (i = 0; kwargs[i].za_name != NULL; i++) { + VERIFY3U(len, >, strlcat(errmsg, kwargs[i].za_name, len)); + VERIFY3U(len, >, strlcat(errmsg, "=(", len)); + VERIFY3U(len, >, strlcat(errmsg, + lua_typename(state, kwargs[i].za_lua_type), len)); + VERIFY3U(len, >, strlcat(errmsg, ")", len)); + if (kwargs[i + 1].za_name != NULL) { + VERIFY3U(len, >, strlcat(errmsg, ", ", len)); + } + } + VERIFY3U(len, >, strlcat(errmsg, "}", len)); + + (void) luaL_error(state, errmsg); + panic("unreachable code"); +} + +static void +zcp_parse_table_args(lua_State *state, const char *fname, + const zcp_arg_t *pargs, const zcp_arg_t *kwargs) +{ + int i; + int type; + + for (i = 0; pargs[i].za_name != NULL; i++) { + /* + * Check the table for this positional argument, leaving it + * on the top of the stack once we finish validating it. + */ + lua_pushinteger(state, i + 1); + lua_gettable(state, 1); + + type = lua_type(state, -1); + if (type == LUA_TNIL) { + zcp_args_error(state, fname, pargs, kwargs, + "too few arguments"); + panic("unreachable code"); + } else if (type != pargs[i].za_lua_type) { + zcp_args_error(state, fname, pargs, kwargs, + "arg %d wrong type (is '%s', expected '%s')", + i + 1, lua_typename(state, type), + lua_typename(state, pargs[i].za_lua_type)); + panic("unreachable code"); + } + + /* + * Remove the positional argument from the table. + */ + lua_pushinteger(state, i + 1); + lua_pushnil(state); + lua_settable(state, 1); + } + + for (i = 0; kwargs[i].za_name != NULL; i++) { + /* + * Check the table for this keyword argument, which may be + * nil if it was omitted. Leave the value on the top of + * the stack after validating it. + */ + lua_getfield(state, 1, kwargs[i].za_name); + + type = lua_type(state, -1); + if (type != LUA_TNIL && type != kwargs[i].za_lua_type) { + zcp_args_error(state, fname, pargs, kwargs, + "kwarg '%s' wrong type (is '%s', expected '%s')", + kwargs[i].za_name, lua_typename(state, type), + lua_typename(state, kwargs[i].za_lua_type)); + panic("unreachable code"); + } + + /* + * Remove the keyword argument from the table. + */ + lua_pushnil(state); + lua_setfield(state, 1, kwargs[i].za_name); + } + + /* + * Any entries remaining in the table are invalid inputs, print + * an error message based on what the entry is. + */ + lua_pushnil(state); + if (lua_next(state, 1)) { + if (lua_isnumber(state, -2) && lua_tointeger(state, -2) > 0) { + zcp_args_error(state, fname, pargs, kwargs, + "too many positional arguments"); + } else if (lua_isstring(state, -2)) { + zcp_args_error(state, fname, pargs, kwargs, + "invalid kwarg '%s'", lua_tostring(state, -2)); + } else { + zcp_args_error(state, fname, pargs, kwargs, + "kwarg keys must be strings"); + } + panic("unreachable code"); + } + + lua_remove(state, 1); +} + +static void +zcp_parse_pos_args(lua_State *state, const char *fname, const zcp_arg_t *pargs, + const zcp_arg_t *kwargs) +{ + int i; + int type; + + for (i = 0; pargs[i].za_name != NULL; i++) { + type = lua_type(state, i + 1); + if (type == LUA_TNONE) { + zcp_args_error(state, fname, pargs, kwargs, + "too few arguments"); + panic("unreachable code"); + } else if (type != pargs[i].za_lua_type) { + zcp_args_error(state, fname, pargs, kwargs, + "arg %d wrong type (is '%s', expected '%s')", + i + 1, lua_typename(state, type), + lua_typename(state, pargs[i].za_lua_type)); + panic("unreachable code"); + } + } + if (lua_gettop(state) != i) { + zcp_args_error(state, fname, pargs, kwargs, + "too many positional arguments"); + panic("unreachable code"); + } + + for (i = 0; kwargs[i].za_name != NULL; i++) { + lua_pushnil(state); + } +} + +/* + * Checks the current Lua stack against an expected set of positional and + * keyword arguments. If the stack does not match the expected arguments + * aborts the current channel program with a useful error message, otherwise + * it re-arranges the stack so that it contains the positional arguments + * followed by the keyword argument values in declaration order. Any missing + * keyword argument will be represented by a nil value on the stack. + * + * If the stack contains exactly one argument of type LUA_TTABLE the curly + * braces calling convention is assumed, otherwise the stack is parsed for + * positional arguments only. + * + * This function should be used by every function callback. It should be called + * before the callback manipulates the Lua stack as it assumes the stack + * represents the function arguments. + */ +void +zcp_parse_args(lua_State *state, const char *fname, const zcp_arg_t *pargs, + const zcp_arg_t *kwargs) +{ + if (lua_gettop(state) == 1 && lua_istable(state, 1)) { + zcp_parse_table_args(state, fname, pargs, kwargs); + } else { + zcp_parse_pos_args(state, fname, pargs, kwargs); + } +} diff --git a/module/zfs/zcp_get.c b/module/zfs/zcp_get.c new file mode 100644 index 000000000..7645bc158 --- /dev/null +++ b/module/zfs/zcp_get.c @@ -0,0 +1,876 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2016 by Delphix. All rights reserved. + */ + +#include <sys/lua/lua.h> +#include <sys/lua/lualib.h> +#include <sys/lua/lauxlib.h> + +#include <zfs_prop.h> + +#include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> +#include <sys/dmu_objset.h> +#include <sys/mntent.h> +#include <sys/sunddi.h> +#include <sys/zap.h> +#include <sys/zcp.h> +#include <sys/zcp_iter.h> +#include <sys/zcp_global.h> +#include <sys/zfs_ioctl.h> +#include <sys/zfs_znode.h> +#include <sys/zvol.h> + +#ifdef _KERNEL +#include <sys/zfs_vfsops.h> +#endif + +static int +get_objset_type(dsl_dataset_t *ds, zfs_type_t *type) +{ + int error; + objset_t *os; + error = dmu_objset_from_ds(ds, &os); + if (error != 0) + return (error); + if (ds->ds_is_snapshot) { + *type = ZFS_TYPE_SNAPSHOT; + } else { + switch (os->os_phys->os_type) { + case DMU_OST_ZFS: + *type = ZFS_TYPE_FILESYSTEM; + break; + case DMU_OST_ZVOL: + *type = ZFS_TYPE_VOLUME; + break; + default: + return (EINVAL); + } + } + return (0); +} + +/* + * Returns the string name of ds's type in str (a buffer which should be + * at least 12 bytes long). + */ +static int +get_objset_type_name(dsl_dataset_t *ds, char *str) +{ + int error; + zfs_type_t type; + error = get_objset_type(ds, &type); + if (error != 0) + return (error); + switch (type) { + case ZFS_TYPE_SNAPSHOT: + (void) strcpy(str, "snapshot"); + break; + case ZFS_TYPE_FILESYSTEM: + (void) strcpy(str, "filesystem"); + break; + case ZFS_TYPE_VOLUME: + (void) strcpy(str, "volume"); + break; + default: + return (EINVAL); + } + return (0); +} + +/* + * Determines the source of a property given its setpoint and + * property type. It pushes the source to the lua stack. + */ +static void +get_prop_src(lua_State *state, const char *setpoint, zfs_prop_t prop) +{ + if (zfs_prop_readonly(prop) || (prop == ZFS_PROP_VERSION)) { + lua_pushnil(state); + } else { + const char *src; + if (strcmp("", setpoint) == 0) { + src = "default"; + } else { + src = setpoint; + } + (void) lua_pushstring(state, src); + } +} + +/* + * Given an error encountered while getting properties, either longjmp's for + * a fatal error or pushes nothing to the stack for a non fatal one. + */ +static int +zcp_handle_error(lua_State *state, const char *dataset_name, + const char *property_name, int error) +{ + ASSERT3S(error, !=, 0); + if (error == ENOENT) { + return (0); + } else if (error == EINVAL) { + return (luaL_error(state, + "property '%s' is not a valid property on dataset '%s'", + property_name, dataset_name)); + } else if (error == EIO) { + return (luaL_error(state, + "I/O error while retrieving property '%s' on dataset '%s'", + property_name, dataset_name)); + } else { + return (luaL_error(state, "unexpected error %d while " + "retrieving property '%s' on dataset '%s'", + error, property_name, dataset_name)); + } +} + +/* + * Look up a user defined property in the zap object. If it exists, push it + * and the setpoint onto the stack, otherwise don't push anything. + */ +static int +zcp_get_user_prop(lua_State *state, dsl_pool_t *dp, const char *dataset_name, + const char *property_name) +{ + int error; + char *buf; + char setpoint[ZFS_MAX_DATASET_NAME_LEN]; + /* + * zcp_dataset_hold will either successfully return the requested + * dataset or throw a lua error and longjmp out of the zfs.get_prop call + * without returning. + */ + dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dataset_name, FTAG); + if (ds == NULL) + return (1); /* not reached; zcp_dataset_hold() longjmp'd */ + + buf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); + error = dsl_prop_get_ds(ds, property_name, 1, ZAP_MAXVALUELEN, + buf, setpoint); + dsl_dataset_rele(ds, FTAG); + + if (error != 0) { + kmem_free(buf, ZAP_MAXVALUELEN); + return (zcp_handle_error(state, dataset_name, property_name, + error)); + } + (void) lua_pushstring(state, buf); + (void) lua_pushstring(state, setpoint); + kmem_free(buf, ZAP_MAXVALUELEN); + return (2); +} + +/* + * Check if the property we're looking for is stored in the ds_dir. If so, + * return it in the 'val' argument. Return 0 on success and ENOENT and if + * the property is not present. + */ +static int +get_dsl_dir_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, + uint64_t *val) +{ + dsl_dir_t *dd = ds->ds_dir; + mutex_enter(&dd->dd_lock); + switch (zfs_prop) { + case ZFS_PROP_USEDSNAP: + *val = dsl_dir_get_usedsnap(dd); + break; + case ZFS_PROP_USEDCHILD: + *val = dsl_dir_get_usedchild(dd); + break; + case ZFS_PROP_USEDDS: + *val = dsl_dir_get_usedds(dd); + break; + case ZFS_PROP_USEDREFRESERV: + *val = dsl_dir_get_usedrefreserv(dd); + break; + case ZFS_PROP_LOGICALUSED: + *val = dsl_dir_get_logicalused(dd); + break; + default: + mutex_exit(&dd->dd_lock); + return (ENOENT); + } + mutex_exit(&dd->dd_lock); + return (0); +} + +/* + * Takes a dataset, a property, a value and that value's setpoint as + * found in the ZAP. Checks if the property has been changed in the vfs. + * If so, val and setpoint will be overwritten with updated content. + * Otherwise, they are left unchanged. + */ +static int +get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, + char *setpoint) +{ +#if !defined(_KERNEL) + return (0); +#else + int error; + zfsvfs_t *zfvp; + vfs_t *vfsp; + objset_t *os; + uint64_t tmp = *val; + + error = dmu_objset_from_ds(ds, &os); + if (error != 0) + return (error); + + if (dmu_objset_type(os) != DMU_OST_ZFS) + return (EINVAL); + + mutex_enter(&os->os_user_ptr_lock); + zfvp = dmu_objset_get_user(os); + mutex_exit(&os->os_user_ptr_lock); + if (zfvp == NULL) + return (ESRCH); + + vfsp = zfvp->z_vfs; + + switch (zfs_prop) { + case ZFS_PROP_ATIME: + if (vfsp->vfs_do_atime) + tmp = vfsp->vfs_atime; + break; + case ZFS_PROP_RELATIME: + if (vfsp->vfs_do_relatime) + tmp = vfsp->vfs_relatime; + break; + case ZFS_PROP_DEVICES: + if (vfsp->vfs_do_devices) + tmp = vfsp->vfs_devices; + break; + case ZFS_PROP_EXEC: + if (vfsp->vfs_do_exec) + tmp = vfsp->vfs_exec; + break; + case ZFS_PROP_SETUID: + if (vfsp->vfs_do_setuid) + tmp = vfsp->vfs_setuid; + break; + case ZFS_PROP_READONLY: + if (vfsp->vfs_do_readonly) + tmp = vfsp->vfs_readonly; + break; + case ZFS_PROP_XATTR: + if (vfsp->vfs_do_xattr) + tmp = vfsp->vfs_xattr; + break; + case ZFS_PROP_NBMAND: + if (vfsp->vfs_do_nbmand) + tmp = vfsp->vfs_nbmand; + break; + default: + return (ENOENT); + } + + if (tmp != *val) { + (void) strcpy(setpoint, "temporary"); + *val = tmp; + } + return (0); +#endif +} + +/* + * Check if the property we're looking for is stored at the dsl_dataset or + * dsl_dir level. If so, push the property value and source onto the lua stack + * and return 0. If it is not present or a failure occurs in lookup, return a + * non-zero error value. + */ +static int +get_special_prop(lua_State *state, dsl_dataset_t *ds, const char *dsname, + zfs_prop_t zfs_prop) +{ + int error = 0; + objset_t *os; + uint64_t numval; + char *strval = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); + char setpoint[ZFS_MAX_DATASET_NAME_LEN] = + "Internal error - setpoint not determined"; + zfs_type_t ds_type; + zprop_type_t prop_type = zfs_prop_get_type(zfs_prop); + (void) get_objset_type(ds, &ds_type); + + switch (zfs_prop) { + case ZFS_PROP_REFRATIO: + numval = dsl_get_refratio(ds); + break; + case ZFS_PROP_USED: + numval = dsl_get_used(ds); + break; + case ZFS_PROP_CLONES: { + nvlist_t *clones = fnvlist_alloc(); + error = get_clones_stat_impl(ds, clones); + if (error == 0) { + /* push list to lua stack */ + VERIFY0(zcp_nvlist_to_lua(state, clones, NULL, 0ULL)); + /* source */ + (void) lua_pushnil(state); + } + nvlist_free(clones); + kmem_free(strval, ZAP_MAXVALUELEN); + return (error); + } + case ZFS_PROP_COMPRESSRATIO: + numval = dsl_get_compressratio(ds); + break; + case ZFS_PROP_CREATION: + numval = dsl_get_creation(ds); + break; + case ZFS_PROP_REFERENCED: + numval = dsl_get_referenced(ds); + break; + case ZFS_PROP_AVAILABLE: + numval = dsl_get_available(ds); + break; + case ZFS_PROP_LOGICALREFERENCED: + numval = dsl_get_logicalreferenced(ds); + break; + case ZFS_PROP_CREATETXG: + numval = dsl_get_creationtxg(ds); + break; + case ZFS_PROP_GUID: + numval = dsl_get_guid(ds); + break; + case ZFS_PROP_UNIQUE: + numval = dsl_get_unique(ds); + break; + case ZFS_PROP_OBJSETID: + numval = dsl_get_objsetid(ds); + break; + case ZFS_PROP_ORIGIN: + dsl_dir_get_origin(ds->ds_dir, strval); + break; + case ZFS_PROP_USERACCOUNTING: + error = dmu_objset_from_ds(ds, &os); + if (error == 0) + numval = dmu_objset_userspace_present(os); + break; + case ZFS_PROP_WRITTEN: + error = dsl_get_written(ds, &numval); + break; + case ZFS_PROP_TYPE: + error = get_objset_type_name(ds, strval); + break; + case ZFS_PROP_PREV_SNAP: + error = dsl_get_prev_snap(ds, strval); + break; + case ZFS_PROP_NAME: + dsl_dataset_name(ds, strval); + break; + case ZFS_PROP_MOUNTPOINT: + error = dsl_get_mountpoint(ds, dsname, strval, setpoint); + break; + case ZFS_PROP_VERSION: + /* should be a snapshot or filesystem */ + ASSERT(ds_type != ZFS_TYPE_VOLUME); + error = dmu_objset_from_ds(ds, &os); + /* look in the master node for the version */ + if (error == 0) { + error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, + sizeof (numval), 1, &numval); + } + break; + case ZFS_PROP_DEFER_DESTROY: + numval = dsl_get_defer_destroy(ds); + break; + case ZFS_PROP_USERREFS: + numval = dsl_get_userrefs(ds); + break; + case ZFS_PROP_FILESYSTEM_COUNT: + error = dsl_dir_get_filesystem_count(ds->ds_dir, &numval); + (void) strcpy(setpoint, ""); + break; + case ZFS_PROP_SNAPSHOT_COUNT: + error = dsl_dir_get_snapshot_count(ds->ds_dir, &numval); + (void) strcpy(setpoint, ""); + break; + case ZFS_PROP_NUMCLONES: + numval = dsl_get_numclones(ds); + break; + case ZFS_PROP_INCONSISTENT: + numval = dsl_get_inconsistent(ds); + break; + case ZFS_PROP_RECEIVE_RESUME_TOKEN: { + char *token = get_receive_resume_stats_impl(ds); + + VERIFY3U(strlcpy(strval, token, ZAP_MAXVALUELEN), + <, ZAP_MAXVALUELEN); + if (strcmp(strval, "") == 0) { + char *childval = get_child_receive_stats(ds); + + VERIFY3U(strlcpy(strval, childval, ZAP_MAXVALUELEN), + <, ZAP_MAXVALUELEN); + if (strcmp(strval, "") == 0) + error = ENOENT; + + strfree(childval); + } + strfree(token); + break; + } + case ZFS_PROP_VOLSIZE: + ASSERT(ds_type == ZFS_TYPE_VOLUME || + ds_type == ZFS_TYPE_SNAPSHOT); + error = dmu_objset_from_ds(ds, &os); + if (error == 0) { + error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", + sizeof (numval), 1, &numval); + } + if (error == 0) + (void) strcpy(setpoint, dsname); + + break; + case ZFS_PROP_VOLBLOCKSIZE: { + ASSERT(ds_type == ZFS_TYPE_VOLUME); + dmu_object_info_t doi; + error = dmu_objset_from_ds(ds, &os); + if (error == 0) { + error = dmu_object_info(os, ZVOL_OBJ, &doi); + if (error == 0) + numval = doi.doi_data_block_size; + } + break; + } + + case ZFS_PROP_KEYSTATUS: + case ZFS_PROP_KEYFORMAT: { + /* provide defaults in case no crypto obj exists */ + setpoint[0] = '\0'; + if (zfs_prop == ZFS_PROP_KEYSTATUS) + numval = ZFS_KEYSTATUS_NONE; + else + numval = ZFS_KEYFORMAT_NONE; + + nvlist_t *nvl, *propval; + nvl = fnvlist_alloc(); + dsl_dataset_crypt_stats(ds, nvl); + if (nvlist_lookup_nvlist(nvl, zfs_prop_to_name(zfs_prop), + &propval) == 0) { + char *source; + + (void) nvlist_lookup_uint64(propval, ZPROP_VALUE, + &numval); + if (nvlist_lookup_string(propval, ZPROP_SOURCE, + &source) == 0) + strlcpy(setpoint, source, sizeof (setpoint)); + } + nvlist_free(nvl); + break; + } + + default: + /* Did not match these props, check in the dsl_dir */ + error = get_dsl_dir_prop(ds, zfs_prop, &numval); + } + if (error != 0) { + kmem_free(strval, ZAP_MAXVALUELEN); + return (error); + } + + switch (prop_type) { + case PROP_TYPE_NUMBER: { + (void) lua_pushnumber(state, numval); + break; + } + case PROP_TYPE_STRING: { + (void) lua_pushstring(state, strval); + break; + } + case PROP_TYPE_INDEX: { + const char *propval; + error = zfs_prop_index_to_string(zfs_prop, numval, &propval); + if (error != 0) { + kmem_free(strval, ZAP_MAXVALUELEN); + return (error); + } + (void) lua_pushstring(state, propval); + break; + } + } + kmem_free(strval, ZAP_MAXVALUELEN); + + /* Push the source to the stack */ + get_prop_src(state, setpoint, zfs_prop); + return (0); +} + +/* + * Look up a property and its source in the zap object. If the value is + * present and successfully retrieved, push the value and source on the + * lua stack and return 0. On failure, return a non-zero error value. + */ +static int +get_zap_prop(lua_State *state, dsl_dataset_t *ds, zfs_prop_t zfs_prop) +{ + int error = 0; + char setpoint[ZFS_MAX_DATASET_NAME_LEN]; + char *strval = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); + uint64_t numval; + const char *prop_name = zfs_prop_to_name(zfs_prop); + zprop_type_t prop_type = zfs_prop_get_type(zfs_prop); + + if (prop_type == PROP_TYPE_STRING) { + /* Push value to lua stack */ + error = dsl_prop_get_ds(ds, prop_name, 1, + ZAP_MAXVALUELEN, strval, setpoint); + if (error == 0) + (void) lua_pushstring(state, strval); + } else { + error = dsl_prop_get_ds(ds, prop_name, sizeof (numval), + 1, &numval, setpoint); + + /* Fill in temorary value for prop, if applicable */ + (void) get_temporary_prop(ds, zfs_prop, &numval, setpoint); + + /* Push value to lua stack */ + if (prop_type == PROP_TYPE_INDEX) { + const char *propval; + error = zfs_prop_index_to_string(zfs_prop, numval, + &propval); + if (error == 0) + (void) lua_pushstring(state, propval); + } else { + if (error == 0) + (void) lua_pushnumber(state, numval); + } + } + kmem_free(strval, ZAP_MAXVALUELEN); + if (error == 0) + get_prop_src(state, setpoint, zfs_prop); + return (error); +} + +/* + * Determine whether property is valid for a given dataset + */ +boolean_t +prop_valid_for_ds(dsl_dataset_t *ds, zfs_prop_t zfs_prop) +{ + int error; + zfs_type_t zfs_type; + + /* properties not supported */ + if ((zfs_prop == ZFS_PROP_ISCSIOPTIONS) || + (zfs_prop == ZFS_PROP_MOUNTED)) + return (B_FALSE); + + /* if we want the origin prop, ds must be a clone */ + if ((zfs_prop == ZFS_PROP_ORIGIN) && (!dsl_dir_is_clone(ds->ds_dir))) + return (B_FALSE); + + error = get_objset_type(ds, &zfs_type); + if (error != 0) + return (B_FALSE); + return (zfs_prop_valid_for_type(zfs_prop, zfs_type, B_FALSE)); +} + +/* + * Look up a given dataset property. On success return 2, the number of + * values pushed to the lua stack (property value and source). On a fatal + * error, longjmp. On a non fatal error push nothing. + */ +static int +zcp_get_system_prop(lua_State *state, dsl_pool_t *dp, const char *dataset_name, + zfs_prop_t zfs_prop) +{ + int error; + /* + * zcp_dataset_hold will either successfully return the requested + * dataset or throw a lua error and longjmp out of the zfs.get_prop call + * without returning. + */ + dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dataset_name, FTAG); + if (ds == NULL) + return (1); /* not reached; zcp_dataset_hold() longjmp'd */ + + /* Check that the property is valid for the given dataset */ + const char *prop_name = zfs_prop_to_name(zfs_prop); + if (!prop_valid_for_ds(ds, zfs_prop)) { + dsl_dataset_rele(ds, FTAG); + return (0); + } + + /* Check if the property can be accessed directly */ + error = get_special_prop(state, ds, dataset_name, zfs_prop); + if (error == 0) { + dsl_dataset_rele(ds, FTAG); + /* The value and source have been pushed by get_special_prop */ + return (2); + } + if (error != ENOENT) { + dsl_dataset_rele(ds, FTAG); + return (zcp_handle_error(state, dataset_name, + prop_name, error)); + } + + /* If we were unable to find it, look in the zap object */ + error = get_zap_prop(state, ds, zfs_prop); + dsl_dataset_rele(ds, FTAG); + if (error != 0) { + return (zcp_handle_error(state, dataset_name, + prop_name, error)); + } + /* The value and source have been pushed by get_zap_prop */ + return (2); +} + +#ifdef _KERNEL +static zfs_userquota_prop_t +get_userquota_prop(const char *prop_name) +{ + zfs_userquota_prop_t type; + /* Figure out the property type ({user|group}{quota|used}) */ + for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++) { + if (strncmp(prop_name, zfs_userquota_prop_prefixes[type], + strlen(zfs_userquota_prop_prefixes[type])) == 0) + break; + } + return (type); +} + +/* + * Given the name of a zfs_userquota_prop, this function determines the + * prop type as well as the numeric group/user ids based on the string + * following the '@' in the property name. On success, returns 0. On failure, + * returns a non-zero error. + * 'domain' must be free'd by caller using strfree() + */ +static int +parse_userquota_prop(const char *prop_name, zfs_userquota_prop_t *type, + char **domain, uint64_t *rid) +{ + char *cp, *end, *domain_val; + + *type = get_userquota_prop(prop_name); + if (*type >= ZFS_NUM_USERQUOTA_PROPS) + return (EINVAL); + + *rid = 0; + cp = strchr(prop_name, '@') + 1; + if (strncmp(cp, "S-1-", 4) == 0) { + /* + * It's a numeric SID (eg "S-1-234-567-89") and we want to + * seperate the domain id and the rid + */ + int domain_len = strrchr(cp, '-') - cp; + domain_val = kmem_alloc(domain_len + 1, KM_SLEEP); + (void) strncpy(domain_val, cp, domain_len); + domain_val[domain_len] = '\0'; + cp += domain_len + 1; + + (void) ddi_strtoll(cp, &end, 10, (longlong_t *)rid); + if (*end != '\0') { + strfree(domain_val); + return (EINVAL); + } + } else { + /* It's only a user/group ID (eg "12345"), just get the rid */ + domain_val = NULL; + (void) ddi_strtoll(cp, &end, 10, (longlong_t *)rid); + if (*end != '\0') + return (EINVAL); + } + *domain = domain_val; + return (0); +} + +/* + * Look up {user|group}{quota|used} property for given dataset. On success + * push the value (quota or used amount) and the setpoint. On failure, push + * a lua error. + */ +static int +zcp_get_userquota_prop(lua_State *state, dsl_pool_t *dp, + const char *dataset_name, const char *prop_name) +{ + zfsvfs_t *zfvp; + zfsvfs_t *zfsvfs; + int error; + zfs_userquota_prop_t type; + char *domain; + uint64_t rid, value = 0; + objset_t *os; + + dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dataset_name, FTAG); + if (ds == NULL) + return (1); /* not reached; zcp_dataset_hold() longjmp'd */ + + error = parse_userquota_prop(prop_name, &type, &domain, &rid); + if (error == 0) { + error = dmu_objset_from_ds(ds, &os); + if (error == 0) { + zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); + error = zfsvfs_create_impl(&zfvp, zfsvfs, os); + if (error == 0) { + error = zfs_userspace_one(zfvp, type, domain, + rid, &value); + zfsvfs_free(zfvp); + } + } + if (domain != NULL) + strfree(domain); + } + dsl_dataset_rele(ds, FTAG); + + if ((value == 0) && ((type == ZFS_PROP_USERQUOTA) || + (type == ZFS_PROP_GROUPQUOTA))) + error = ENOENT; + if (error != 0) { + return (zcp_handle_error(state, dataset_name, + prop_name, error)); + } + + (void) lua_pushnumber(state, value); + (void) lua_pushstring(state, dataset_name); + return (2); +} +#endif + +/* + * Determines the name of the snapshot referenced in the written property + * name. Returns snapshot name in snap_name, a buffer that must be at least + * as large as ZFS_MAX_DATASET_NAME_LEN + */ +static void +parse_written_prop(const char *dataset_name, const char *prop_name, + char *snap_name) +{ + ASSERT(zfs_prop_written(prop_name)); + const char *name = prop_name + ZFS_WRITTEN_PROP_PREFIX_LEN; + if (strchr(name, '@') == NULL) { + (void) sprintf(snap_name, "%s@%s", dataset_name, name); + } else { + (void) strcpy(snap_name, name); + } +} + +/* + * Look up written@ property for given dataset. On success + * push the value and the setpoint. If error is fatal, we will + * longjmp, otherwise push nothing. + */ +static int +zcp_get_written_prop(lua_State *state, dsl_pool_t *dp, + const char *dataset_name, const char *prop_name) +{ + char snap_name[ZFS_MAX_DATASET_NAME_LEN]; + uint64_t used, comp, uncomp; + dsl_dataset_t *old; + int error = 0; + + parse_written_prop(dataset_name, prop_name, snap_name); + dsl_dataset_t *new = zcp_dataset_hold(state, dp, dataset_name, FTAG); + if (new == NULL) + return (1); /* not reached; zcp_dataset_hold() longjmp'd */ + + error = dsl_dataset_hold(dp, snap_name, FTAG, &old); + if (error != 0) { + dsl_dataset_rele(new, FTAG); + return (zcp_dataset_hold_error(state, dp, snap_name, + error)); + } + error = dsl_dataset_space_written(old, new, + &used, &comp, &uncomp); + + dsl_dataset_rele(old, FTAG); + dsl_dataset_rele(new, FTAG); + + if (error != 0) { + return (zcp_handle_error(state, dataset_name, + snap_name, error)); + } + (void) lua_pushnumber(state, used); + (void) lua_pushstring(state, dataset_name); + return (2); +} + +static int zcp_get_prop(lua_State *state); +static zcp_lib_info_t zcp_get_prop_info = { + .name = "get_prop", + .func = zcp_get_prop, + .pargs = { + { .za_name = "dataset", .za_lua_type = LUA_TSTRING}, + { .za_name = "property", .za_lua_type = LUA_TSTRING}, + {NULL, 0} + }, + .kwargs = { + {NULL, 0} + } +}; + +static int +zcp_get_prop(lua_State *state) +{ + const char *dataset_name; + const char *property_name; + dsl_pool_t *dp = zcp_run_info(state)->zri_pool; + zcp_lib_info_t *libinfo = &zcp_get_prop_info; + + zcp_parse_args(state, libinfo->name, libinfo->pargs, libinfo->kwargs); + + dataset_name = lua_tostring(state, 1); + property_name = lua_tostring(state, 2); + + /* User defined property */ + if (zfs_prop_user(property_name)) { + return (zcp_get_user_prop(state, dp, + dataset_name, property_name)); + } + /* userspace property */ + if (zfs_prop_userquota(property_name)) { +#ifdef _KERNEL + return (zcp_get_userquota_prop(state, dp, + dataset_name, property_name)); +#else + return (luaL_error(state, + "user quota properties only supported in kernel mode", + property_name)); +#endif + } + /* written@ property */ + if (zfs_prop_written(property_name)) { + return (zcp_get_written_prop(state, dp, + dataset_name, property_name)); + } + + zfs_prop_t zfs_prop = zfs_name_to_prop(property_name); + /* Valid system property */ + if (zfs_prop != ZPROP_INVAL) { + return (zcp_get_system_prop(state, dp, dataset_name, + zfs_prop)); + } + + /* Invalid property name */ + return (luaL_error(state, + "'%s' is not a valid property", property_name)); +} + +int +zcp_load_get_lib(lua_State *state) +{ + lua_pushcclosure(state, zcp_get_prop_info.func, 0); + lua_setfield(state, -2, zcp_get_prop_info.name); + + return (1); +} diff --git a/module/zfs/zcp_global.c b/module/zfs/zcp_global.c new file mode 100644 index 000000000..b6c3c3a4f --- /dev/null +++ b/module/zfs/zcp_global.c @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2016 by Delphix. All rights reserved. + */ + +#include <sys/zcp_global.h> + +#include <sys/lua/lua.h> +#include <sys/lua/lauxlib.h> + +typedef struct zcp_errno_global { + const char *zeg_name; + int zeg_errno; +} zcp_errno_global_t; + +static const zcp_errno_global_t errno_globals[] = { + {"EPERM", EPERM}, + {"ENOENT", ENOENT}, + {"ESRCH", ESRCH}, + {"EINTR", EINTR}, + {"EIO", EIO}, + {"ENXIO", ENXIO}, + {"E2BIG", E2BIG}, + {"ENOEXEC", ENOEXEC}, + {"EBADF", EBADF}, + {"ECHILD", ECHILD}, + {"EAGAIN", EAGAIN}, + {"ENOMEM", ENOMEM}, + {"EACCES", EACCES}, + {"EFAULT", EFAULT}, + {"ENOTBLK", ENOTBLK}, + {"EBUSY", EBUSY}, + {"EEXIST", EEXIST}, + {"EXDEV", EXDEV}, + {"ENODEV", ENODEV}, + {"ENOTDIR", ENOTDIR}, + {"EISDIR", EISDIR}, + {"EINVAL", EINVAL}, + {"ENFILE", ENFILE}, + {"EMFILE", EMFILE}, + {"ENOTTY", ENOTTY}, + {"ETXTBSY", ETXTBSY}, + {"EFBIG", EFBIG}, + {"ENOSPC", ENOSPC}, + {"ESPIPE", ESPIPE}, + {"EROFS", EROFS}, + {"EMLINK", EMLINK}, + {"EPIPE", EPIPE}, + {"EDOM", EDOM}, + {"ERANGE", ERANGE}, + {"EDQUOT", EDQUOT}, + {0, 0} +}; + +static void +zcp_load_errno_globals(lua_State *state) +{ + const zcp_errno_global_t *global = errno_globals; + while (global->zeg_name != NULL) { + lua_pushnumber(state, (lua_Number)global->zeg_errno); + lua_setglobal(state, global->zeg_name); + global++; + } +} + +void +zcp_load_globals(lua_State *state) +{ + zcp_load_errno_globals(state); +} diff --git a/module/zfs/zcp_iter.c b/module/zfs/zcp_iter.c new file mode 100644 index 000000000..d37172c88 --- /dev/null +++ b/module/zfs/zcp_iter.c @@ -0,0 +1,531 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2016 by Delphix. All rights reserved. + */ + +#include <sys/lua/lua.h> +#include <sys/lua/lauxlib.h> + +#include <sys/dmu.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_pool.h> +#include <sys/dmu_tx.h> +#include <sys/dmu_objset.h> +#include <sys/zap.h> +#include <sys/dsl_dir.h> +#include <sys/zcp_prop.h> + +#include <sys/zcp.h> + +typedef int (zcp_list_func_t)(lua_State *); +typedef struct zcp_list_info { + const char *name; + zcp_list_func_t *func; + zcp_list_func_t *gc; + const zcp_arg_t pargs[4]; + const zcp_arg_t kwargs[2]; +} zcp_list_info_t; + +static int +zcp_clones_iter(lua_State *state) +{ + int err; + char clonename[ZFS_MAX_DATASET_NAME_LEN]; + uint64_t dsobj = lua_tonumber(state, lua_upvalueindex(1)); + uint64_t cursor = lua_tonumber(state, lua_upvalueindex(2)); + dsl_pool_t *dp = zcp_run_info(state)->zri_pool; + dsl_dataset_t *ds, *clone; + zap_attribute_t za; + zap_cursor_t zc; + + err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + if (err == ENOENT) { + return (0); + } else if (err != 0) { + return (luaL_error(state, + "unexpected error %d from dsl_dataset_hold_obj(dsobj)", + err)); + } + + if (dsl_dataset_phys(ds)->ds_next_clones_obj == 0) { + dsl_dataset_rele(ds, FTAG); + return (0); + } + + zap_cursor_init_serialized(&zc, dp->dp_meta_objset, + dsl_dataset_phys(ds)->ds_next_clones_obj, cursor); + dsl_dataset_rele(ds, FTAG); + + err = zap_cursor_retrieve(&zc, &za); + if (err != 0) { + zap_cursor_fini(&zc); + if (err != ENOENT) { + return (luaL_error(state, + "unexpected error %d from zap_cursor_retrieve()", + err)); + } + return (0); + } + zap_cursor_advance(&zc); + cursor = zap_cursor_serialize(&zc); + zap_cursor_fini(&zc); + + err = dsl_dataset_hold_obj(dp, za.za_first_integer, FTAG, &clone); + if (err != 0) { + return (luaL_error(state, + "unexpected error %d from " + "dsl_dataset_hold_obj(za_first_integer)", err)); + } + + dsl_dir_name(clone->ds_dir, clonename); + dsl_dataset_rele(clone, FTAG); + + lua_pushnumber(state, cursor); + lua_replace(state, lua_upvalueindex(2)); + + (void) lua_pushstring(state, clonename); + return (1); +} + +static int zcp_clones_list(lua_State *); +static zcp_list_info_t zcp_clones_list_info = { + .name = "clones", + .func = zcp_clones_list, + .gc = NULL, + .pargs = { + { .za_name = "snapshot", .za_lua_type = LUA_TSTRING}, + {NULL, 0} + }, + .kwargs = { + {NULL, 0} + } +}; + +static int +zcp_clones_list(lua_State *state) +{ + const char *snapname = lua_tostring(state, 1); + dsl_pool_t *dp = zcp_run_info(state)->zri_pool; + boolean_t issnap; + uint64_t dsobj, cursor; + + /* + * zcp_dataset_hold will either successfully return the requested + * dataset or throw a lua error and longjmp out of the zfs.list.clones + * call without returning. + */ + dsl_dataset_t *ds = zcp_dataset_hold(state, dp, snapname, FTAG); + if (ds == NULL) + return (1); /* not reached; zcp_dataset_hold() longjmp'd */ + cursor = 0; + issnap = ds->ds_is_snapshot; + dsobj = ds->ds_object; + dsl_dataset_rele(ds, FTAG); + + if (!issnap) { + return (zcp_argerror(state, 1, "%s is not a snapshot", + snapname)); + } + + lua_pushnumber(state, dsobj); + lua_pushnumber(state, cursor); + lua_pushcclosure(state, &zcp_clones_iter, 2); + return (1); +} + +static int +zcp_snapshots_iter(lua_State *state) +{ + int err; + char snapname[ZFS_MAX_DATASET_NAME_LEN]; + uint64_t dsobj = lua_tonumber(state, lua_upvalueindex(1)); + uint64_t cursor = lua_tonumber(state, lua_upvalueindex(2)); + dsl_pool_t *dp = zcp_run_info(state)->zri_pool; + dsl_dataset_t *ds; + objset_t *os; + char *p; + + err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + if (err != 0) { + return (luaL_error(state, + "unexpected error %d from dsl_dataset_hold_obj(dsobj)", + err)); + } + + dsl_dataset_name(ds, snapname); + VERIFY3U(sizeof (snapname), >, + strlcat(snapname, "@", sizeof (snapname))); + + p = strchr(snapname, '\0'); + VERIFY0(dmu_objset_from_ds(ds, &os)); + err = dmu_snapshot_list_next(os, + sizeof (snapname) - (p - snapname), p, NULL, &cursor, NULL); + dsl_dataset_rele(ds, FTAG); + + if (err == ENOENT) { + return (0); + } else if (err != 0) { + return (luaL_error(state, + "unexpected error %d from dmu_snapshot_list_next()", err)); + } + + lua_pushnumber(state, cursor); + lua_replace(state, lua_upvalueindex(2)); + + (void) lua_pushstring(state, snapname); + return (1); +} + +static int zcp_snapshots_list(lua_State *); +static zcp_list_info_t zcp_snapshots_list_info = { + .name = "snapshots", + .func = zcp_snapshots_list, + .gc = NULL, + .pargs = { + { .za_name = "filesystem | volume", .za_lua_type = LUA_TSTRING}, + {NULL, 0} + }, + .kwargs = { + {NULL, 0} + } +}; + +static int +zcp_snapshots_list(lua_State *state) +{ + const char *fsname = lua_tostring(state, 1); + dsl_pool_t *dp = zcp_run_info(state)->zri_pool; + boolean_t issnap; + uint64_t dsobj; + + dsl_dataset_t *ds = zcp_dataset_hold(state, dp, fsname, FTAG); + if (ds == NULL) + return (1); /* not reached; zcp_dataset_hold() longjmp'd */ + issnap = ds->ds_is_snapshot; + dsobj = ds->ds_object; + dsl_dataset_rele(ds, FTAG); + + if (issnap) { + return (zcp_argerror(state, 1, + "argument %s cannot be a snapshot", fsname)); + } + + lua_pushnumber(state, dsobj); + lua_pushnumber(state, 0); + lua_pushcclosure(state, &zcp_snapshots_iter, 2); + return (1); +} + +/* + * Note: channel programs only run in the global zone, so all datasets + * are visible to this zone. + */ +static boolean_t +dataset_name_hidden(const char *name) +{ + if (strchr(name, '$') != NULL) + return (B_TRUE); + if (strchr(name, '%') != NULL) + return (B_TRUE); + return (B_FALSE); +} + +static int +zcp_children_iter(lua_State *state) +{ + int err; + char childname[ZFS_MAX_DATASET_NAME_LEN]; + uint64_t dsobj = lua_tonumber(state, lua_upvalueindex(1)); + uint64_t cursor = lua_tonumber(state, lua_upvalueindex(2)); + zcp_run_info_t *ri = zcp_run_info(state); + dsl_pool_t *dp = ri->zri_pool; + dsl_dataset_t *ds; + objset_t *os; + char *p; + + err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + if (err != 0) { + return (luaL_error(state, + "unexpected error %d from dsl_dataset_hold_obj(dsobj)", + err)); + } + + dsl_dataset_name(ds, childname); + VERIFY3U(sizeof (childname), >, + strlcat(childname, "/", sizeof (childname))); + p = strchr(childname, '\0'); + + VERIFY0(dmu_objset_from_ds(ds, &os)); + do { + err = dmu_dir_list_next(os, + sizeof (childname) - (p - childname), p, NULL, &cursor); + } while (err == 0 && dataset_name_hidden(childname)); + dsl_dataset_rele(ds, FTAG); + + if (err == ENOENT) { + return (0); + } else if (err != 0) { + return (luaL_error(state, + "unexpected error %d from dmu_dir_list_next()", + err)); + } + + lua_pushnumber(state, cursor); + lua_replace(state, lua_upvalueindex(2)); + + (void) lua_pushstring(state, childname); + return (1); +} + +static int zcp_children_list(lua_State *); +static zcp_list_info_t zcp_children_list_info = { + .name = "children", + .func = zcp_children_list, + .gc = NULL, + .pargs = { + { .za_name = "filesystem | volume", .za_lua_type = LUA_TSTRING}, + {NULL, 0} + }, + .kwargs = { + {NULL, 0} + } +}; + +static int +zcp_children_list(lua_State *state) +{ + const char *fsname = lua_tostring(state, 1); + dsl_pool_t *dp = zcp_run_info(state)->zri_pool; + boolean_t issnap; + uint64_t dsobj; + + dsl_dataset_t *ds = zcp_dataset_hold(state, dp, fsname, FTAG); + if (ds == NULL) + return (1); /* not reached; zcp_dataset_hold() longjmp'd */ + + issnap = ds->ds_is_snapshot; + dsobj = ds->ds_object; + dsl_dataset_rele(ds, FTAG); + + if (issnap) { + return (zcp_argerror(state, 1, + "argument %s cannot be a snapshot", fsname)); + } + + lua_pushnumber(state, dsobj); + lua_pushnumber(state, 0); + lua_pushcclosure(state, &zcp_children_iter, 2); + return (1); +} + +static int +zcp_props_list_gc(lua_State *state) +{ + nvlist_t **props = lua_touserdata(state, 1); + if (*props != NULL) + fnvlist_free(*props); + return (0); +} + +static int +zcp_props_iter(lua_State *state) +{ + char *source, *val; + nvlist_t *nvprop; + nvlist_t **props = lua_touserdata(state, lua_upvalueindex(1)); + nvpair_t *pair = lua_touserdata(state, lua_upvalueindex(2)); + + do { + pair = nvlist_next_nvpair(*props, pair); + if (pair == NULL) { + fnvlist_free(*props); + *props = NULL; + return (0); + } + } while (!zfs_prop_user(nvpair_name(pair))); + + lua_pushlightuserdata(state, pair); + lua_replace(state, lua_upvalueindex(2)); + + nvprop = fnvpair_value_nvlist(pair); + val = fnvlist_lookup_string(nvprop, ZPROP_VALUE); + source = fnvlist_lookup_string(nvprop, ZPROP_SOURCE); + + (void) lua_pushstring(state, nvpair_name(pair)); + (void) lua_pushstring(state, val); + (void) lua_pushstring(state, source); + return (3); +} + +static int zcp_props_list(lua_State *); +static zcp_list_info_t zcp_props_list_info = { + .name = "properties", + .func = zcp_props_list, + .gc = zcp_props_list_gc, + .pargs = { + { .za_name = "filesystem | snapshot | volume", + .za_lua_type = LUA_TSTRING}, + {NULL, 0} + }, + .kwargs = { + {NULL, 0} + } +}; + +static int +zcp_props_list(lua_State *state) +{ + const char *dsname = lua_tostring(state, 1); + dsl_pool_t *dp = zcp_run_info(state)->zri_pool; + objset_t *os; + nvlist_t **props = lua_newuserdata(state, sizeof (nvlist_t *)); + + dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dsname, FTAG); + if (ds == NULL) + return (1); /* not reached; zcp_dataset_hold() longjmp'd */ + VERIFY0(dmu_objset_from_ds(ds, &os)); + VERIFY0(dsl_prop_get_all(os, props)); + dsl_dataset_rele(ds, FTAG); + + /* + * Set the metatable for the properties list to free it on completion. + */ + luaL_getmetatable(state, zcp_props_list_info.name); + (void) lua_setmetatable(state, -2); + + lua_pushlightuserdata(state, NULL); + lua_pushcclosure(state, &zcp_props_iter, 2); + return (1); +} + + +/* + * Populate nv with all valid properties and their values for the given + * dataset. + */ +static void +zcp_dataset_props(dsl_dataset_t *ds, nvlist_t *nv) +{ + for (int prop = ZFS_PROP_TYPE; prop < ZFS_NUM_PROPS; prop++) { + /* Do not display hidden props */ + if (!zfs_prop_visible(prop)) + continue; + /* Do not display props not valid for this dataset */ + if (!prop_valid_for_ds(ds, prop)) + continue; + fnvlist_add_boolean(nv, zfs_prop_to_name(prop)); + } +} + +static int zcp_system_props_list(lua_State *); +static zcp_list_info_t zcp_system_props_list_info = { + .name = "system_properties", + .func = zcp_system_props_list, + .pargs = { + { .za_name = "dataset", .za_lua_type = LUA_TSTRING}, + {NULL, 0} + }, + .kwargs = { + {NULL, 0} + } +}; + +/* + * Get a list of all visble properties and their values for a given dataset. + * Returned on the stack as a Lua table. + */ +static int +zcp_system_props_list(lua_State *state) +{ + int error; + char errbuf[128]; + const char *dataset_name; + dsl_pool_t *dp = zcp_run_info(state)->zri_pool; + zcp_list_info_t *libinfo = &zcp_system_props_list_info; + zcp_parse_args(state, libinfo->name, libinfo->pargs, libinfo->kwargs); + dataset_name = lua_tostring(state, 1); + nvlist_t *nv = fnvlist_alloc(); + + dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dataset_name, FTAG); + if (ds == NULL) + return (1); /* not reached; zcp_dataset_hold() longjmp'd */ + + /* Get the names of all valid properties for this dataset */ + zcp_dataset_props(ds, nv); + dsl_dataset_rele(ds, FTAG); + + /* push list as lua table */ + error = zcp_nvlist_to_lua(state, nv, errbuf, sizeof (errbuf)); + nvlist_free(nv); + if (error != 0) { + return (luaL_error(state, + "Error returning nvlist: %s", errbuf)); + } + return (1); +} + +static int +zcp_list_func(lua_State *state) +{ + zcp_list_info_t *info = lua_touserdata(state, lua_upvalueindex(1)); + + zcp_parse_args(state, info->name, info->pargs, info->kwargs); + + return (info->func(state)); +} + +int +zcp_load_list_lib(lua_State *state) +{ + int i; + zcp_list_info_t *zcp_list_funcs[] = { + &zcp_children_list_info, + &zcp_snapshots_list_info, + &zcp_props_list_info, + &zcp_clones_list_info, + &zcp_system_props_list_info, + NULL + }; + + lua_newtable(state); + + for (i = 0; zcp_list_funcs[i] != NULL; i++) { + zcp_list_info_t *info = zcp_list_funcs[i]; + + if (info->gc != NULL) { + /* + * If the function requires garbage collection, create + * a metatable with its name and register the __gc + * function. + */ + (void) luaL_newmetatable(state, info->name); + (void) lua_pushstring(state, "__gc"); + lua_pushcfunction(state, info->gc); + lua_settable(state, -3); + lua_pop(state, 1); + } + + lua_pushlightuserdata(state, info); + lua_pushcclosure(state, &zcp_list_func, 1); + lua_setfield(state, -2, info->name); + info++; + } + + return (1); +} diff --git a/module/zfs/zcp_synctask.c b/module/zfs/zcp_synctask.c new file mode 100644 index 000000000..923d5ca67 --- /dev/null +++ b/module/zfs/zcp_synctask.c @@ -0,0 +1,265 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2016 by Delphix. All rights reserved. + */ + +#include <sys/lua/lua.h> +#include <sys/lua/lauxlib.h> + +#include <sys/zcp.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_pool.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_bookmark.h> +#include <sys/dsl_destroy.h> +#include <sys/dmu_objset.h> +#include <sys/zfs_znode.h> +#include <sys/zfeature.h> +#include <sys/metaslab.h> + +#define DST_AVG_BLKSHIFT 14 + +typedef int (zcp_synctask_func_t)(lua_State *, boolean_t, nvlist_t *); +typedef struct zcp_synctask_info { + const char *name; + zcp_synctask_func_t *func; + zfs_space_check_t space_check; + int blocks_modified; + const zcp_arg_t pargs[4]; + const zcp_arg_t kwargs[2]; +} zcp_synctask_info_t; + +/* + * Generic synctask interface for channel program syncfuncs. + * + * To perform some action in syncing context, we'd generally call + * dsl_sync_task(), but since the Lua script is already running inside a + * synctask we need to leave out some actions (such as acquiring the config + * rwlock and performing space checks). + * + * If 'sync' is false, executes a dry run and returns the error code. + * + * This function also handles common fatal error cases for channel program + * library functions. If a fatal error occurs, err_dsname will be the dataset + * name reported in error messages, if supplied. + */ +static int +zcp_sync_task(lua_State *state, dsl_checkfunc_t *checkfunc, + dsl_syncfunc_t *syncfunc, void *arg, boolean_t sync, const char *err_dsname) +{ + int err; + zcp_run_info_t *ri = zcp_run_info(state); + + err = checkfunc(arg, ri->zri_tx); + if (!sync) + return (err); + + if (err == 0) { + syncfunc(arg, ri->zri_tx); + } else if (err == EIO) { + if (err_dsname != NULL) { + return (luaL_error(state, + "I/O error while accessing dataset '%s'", + err_dsname)); + } else { + return (luaL_error(state, + "I/O error while accessing dataset.")); + } + } + + return (err); +} + + +static int zcp_synctask_destroy(lua_State *, boolean_t, nvlist_t *); +static zcp_synctask_info_t zcp_synctask_destroy_info = { + .name = "destroy", + .func = zcp_synctask_destroy, + .space_check = ZFS_SPACE_CHECK_NONE, + .blocks_modified = 0, + .pargs = { + {.za_name = "filesystem | snapshot", .za_lua_type = LUA_TSTRING}, + {NULL, 0} + }, + .kwargs = { + {.za_name = "defer", .za_lua_type = LUA_TBOOLEAN}, + {NULL, 0} + } +}; + +/* ARGSUSED */ +static int +zcp_synctask_destroy(lua_State *state, boolean_t sync, nvlist_t *err_details) +{ + int err; + const char *dsname = lua_tostring(state, 1); + + boolean_t issnap = (strchr(dsname, '@') != NULL); + + if (!issnap && !lua_isnil(state, 2)) { + return (luaL_error(state, + "'deferred' kwarg only supported for snapshots: %s", + dsname)); + } + + if (issnap) { + dsl_destroy_snapshot_arg_t ddsa = { 0 }; + ddsa.ddsa_name = dsname; + if (!lua_isnil(state, 2)) { + ddsa.ddsa_defer = lua_toboolean(state, 2); + } else { + ddsa.ddsa_defer = B_FALSE; + } + + err = zcp_sync_task(state, dsl_destroy_snapshot_check, + dsl_destroy_snapshot_sync, &ddsa, sync, dsname); + } else { + dsl_destroy_head_arg_t ddha = { 0 }; + ddha.ddha_name = dsname; + + err = zcp_sync_task(state, dsl_destroy_head_check, + dsl_destroy_head_sync, &ddha, sync, dsname); + } + + return (err); +} + +static int zcp_synctask_promote(lua_State *, boolean_t, nvlist_t *err_details); +static zcp_synctask_info_t zcp_synctask_promote_info = { + .name = "promote", + .func = zcp_synctask_promote, + .space_check = ZFS_SPACE_CHECK_RESERVED, + .blocks_modified = 3, + .pargs = { + {.za_name = "clone", .za_lua_type = LUA_TSTRING}, + {NULL, 0} + }, + .kwargs = { + {NULL, 0} + } +}; + +static int +zcp_synctask_promote(lua_State *state, boolean_t sync, nvlist_t *err_details) +{ + int err; + dsl_dataset_promote_arg_t ddpa = { 0 }; + const char *dsname = lua_tostring(state, 1); + zcp_run_info_t *ri = zcp_run_info(state); + + ddpa.ddpa_clonename = dsname; + ddpa.err_ds = err_details; + ddpa.cr = ri->zri_cred; + + /* + * If there was a snapshot name conflict, then err_ds will be filled + * with a list of conflicting snapshot names. + */ + err = zcp_sync_task(state, dsl_dataset_promote_check, + dsl_dataset_promote_sync, &ddpa, sync, dsname); + + return (err); +} + +void +zcp_synctask_wrapper_cleanup(void *arg) +{ + fnvlist_free(arg); +} + +static int +zcp_synctask_wrapper(lua_State *state) +{ + int err; + int num_ret = 1; + nvlist_t *err_details = fnvlist_alloc(); + + /* + * Make sure err_details is properly freed, even if a fatal error is + * thrown during the synctask. + */ + zcp_register_cleanup(state, &zcp_synctask_wrapper_cleanup, err_details); + + zcp_synctask_info_t *info = lua_touserdata(state, lua_upvalueindex(1)); + boolean_t sync = lua_toboolean(state, lua_upvalueindex(2)); + + zcp_run_info_t *ri = zcp_run_info(state); + dsl_pool_t *dp = ri->zri_pool; + + /* MOS space is triple-dittoed, so we multiply by 3. */ + uint64_t funcspace = (info->blocks_modified << DST_AVG_BLKSHIFT) * 3; + + zcp_parse_args(state, info->name, info->pargs, info->kwargs); + + err = 0; + if (info->space_check != ZFS_SPACE_CHECK_NONE && funcspace > 0) { + uint64_t quota = dsl_pool_adjustedsize(dp, + info->space_check == ZFS_SPACE_CHECK_RESERVED) - + metaslab_class_get_deferred(spa_normal_class(dp->dp_spa)); + uint64_t used = dsl_dir_phys(dp->dp_root_dir)->dd_used_bytes + + ri->zri_space_used; + + if (used + funcspace > quota) { + err = SET_ERROR(ENOSPC); + } + } + + if (err == 0) { + err = info->func(state, sync, err_details); + } + + if (err == 0) { + ri->zri_space_used += funcspace; + } + + lua_pushnumber(state, (lua_Number)err); + if (fnvlist_num_pairs(err_details) > 0) { + (void) zcp_nvlist_to_lua(state, err_details, NULL, 0); + num_ret++; + } + + zcp_clear_cleanup(state); + fnvlist_free(err_details); + + return (num_ret); +} + +int +zcp_load_synctask_lib(lua_State *state, boolean_t sync) +{ + int i; + zcp_synctask_info_t *zcp_synctask_funcs[] = { + &zcp_synctask_destroy_info, + &zcp_synctask_promote_info, + NULL + }; + + lua_newtable(state); + + for (i = 0; zcp_synctask_funcs[i] != NULL; i++) { + zcp_synctask_info_t *info = zcp_synctask_funcs[i]; + lua_pushlightuserdata(state, info); + lua_pushboolean(state, sync); + lua_pushcclosure(state, &zcp_synctask_wrapper, 2); + lua_setfield(state, -2, info->name); + info++; + } + + return (1); +} diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index b132a6885..a8f37fe84 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -27,7 +27,7 @@ * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved. * Copyright 2016 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014, Joyent, Inc. All rights reserved. - * Copyright (c) 2011, 2015 by Delphix. All rights reserved. + * Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Integros [integros.com] @@ -193,6 +193,7 @@ #include <sys/dsl_bookmark.h> #include <sys/dsl_userhold.h> #include <sys/zfeature.h> +#include <sys/zcp.h> #include <sys/zio_checksum.h> #include <linux/miscdevice.h> @@ -203,6 +204,9 @@ #include "zfs_deleg.h" #include "zfs_comutil.h" +#include <sys/lua/lua.h> +#include <sys/lua/lauxlib.h> + /* * Limit maximum nvlist size. We don't want users passing in insane values * for zc->zc_nvlist_src_size, since we will need to allocate that much memory. @@ -1414,17 +1418,11 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) return (error); } -static int -getzfsvfs(const char *dsname, zfsvfs_t **zfvp) +int +getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp) { - objset_t *os; - int error; - - error = dmu_objset_hold(dsname, FTAG, &os); - if (error != 0) - return (error); + int error = 0; if (dmu_objset_type(os) != DMU_OST_ZFS) { - dmu_objset_rele(os, FTAG); return (SET_ERROR(EINVAL)); } @@ -1436,6 +1434,20 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp) error = SET_ERROR(ESRCH); } mutex_exit(&os->os_user_ptr_lock); + return (error); +} + +static int +getzfsvfs(const char *dsname, zfsvfs_t **zfvp) +{ + objset_t *os; + int error; + + error = dmu_objset_hold(dsname, FTAG, &os); + if (error != 0) + return (error); + + error = getzfsvfs_impl(os, zfvp); dmu_objset_rele(os, FTAG); return (error); } @@ -3660,6 +3672,36 @@ zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl, return (error); } +static int +zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl, + nvlist_t *outnvl) +{ + char *program; + uint64_t instrlimit, memlimit; + nvpair_t *nvarg = NULL; + + if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) { + return (EINVAL); + } + if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) { + instrlimit = ZCP_DEFAULT_INSTRLIMIT; + } + if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) { + memlimit = ZCP_DEFAULT_MEMLIMIT; + } + if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) { + return (EINVAL); + } + + if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit) + return (EINVAL); + if (memlimit == 0 || memlimit > ZCP_MAX_MEMLIMIT) + return (EINVAL); + + return (zcp_eval(poolname, program, instrlimit, memlimit, + nvarg, outnvl)); +} + /* * inputs: * zc_name name of dataset to destroy @@ -6333,6 +6375,11 @@ zfs_ioctl_init(void) zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE); + zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM, + zfs_ioc_channel_program, zfs_secpolicy_config, + POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, + B_TRUE); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, @@ -6803,12 +6850,23 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) error = vec->zvec_func(zc->zc_name, innvl, outnvl); spl_fstrans_unmark(cookie); - if (error == 0 && vec->zvec_allow_log && + /* + * Some commands can partially execute, modify state, and still + * return an error. In these cases, attempt to record what + * was modified. + */ + if ((error == 0 || + (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) && + vec->zvec_allow_log && spa_open(zc->zc_name, &spa, FTAG) == 0) { if (!nvlist_empty(outnvl)) { fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL, outnvl); } + if (error != 0) { + fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO, + error); + } (void) spa_history_log_nvl(spa, lognv); spa_close(spa, FTAG); } diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 18b4ec3d6..bb380c920 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1053,13 +1053,26 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) * We claim to always be readonly so we can open snapshots; * other ZPL code will prevent us from writing to snapshots. */ + error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, B_TRUE, zfsvfs, &os); - if (error) { + if (error != 0) { kmem_free(zfsvfs, sizeof (zfsvfs_t)); return (error); } + error = zfsvfs_create_impl(zfvp, zfsvfs, os); + if (error != 0) { + dmu_objset_disown(os, B_TRUE, zfsvfs); + } + return (error); +} + +int +zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) +{ + int error; + zfsvfs->z_vfs = NULL; zfsvfs->z_sb = NULL; zfsvfs->z_parent = zfsvfs; @@ -1086,7 +1099,6 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) error = zfsvfs_init(zfsvfs, os); if (error != 0) { - dmu_objset_disown(os, B_TRUE, zfsvfs); *zfvp = NULL; kmem_free(zfsvfs, sizeof (zfsvfs_t)); return (error); |