aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/Makefile.in11
-rw-r--r--module/zfs/dsl_dataset.c503
-rw-r--r--module/zfs/dsl_destroy.c194
-rw-r--r--module/zfs/dsl_dir.c147
-rw-r--r--module/zfs/vdev_raidz_math.c3
-rw-r--r--module/zfs/zcp.c1357
-rw-r--r--module/zfs/zcp_get.c876
-rw-r--r--module/zfs/zcp_global.c84
-rw-r--r--module/zfs/zcp_iter.c531
-rw-r--r--module/zfs/zcp_synctask.c265
-rw-r--r--module/zfs/zfs_ioctl.c80
-rw-r--r--module/zfs/zfs_vfsops.c16
12 files changed, 3834 insertions, 233 deletions
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in
index cb352bf91..084c1ac23 100644
--- a/module/zfs/Makefile.in
+++ b/module/zfs/Makefile.in
@@ -1,5 +1,6 @@
src = @abs_top_srcdir@/module/zfs
obj = @abs_builddir@
+target_cpu = @target_cpu@
MODULE := zfs
@@ -7,6 +8,11 @@ obj-$(CONFIG_ZFS) := $(MODULE).o
ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
+# Suppress unused-value warnings in sparc64 architecture headers
+ifeq ($(target_cpu),sparc64)
+ccflags-y += -Wno-unused-value
+endif
+
# Suppress unused but set variable warnings often due to ASSERTs
ccflags-y += $(NO_UNUSED_BUT_SET_VARIABLE)
@@ -86,6 +92,11 @@ $(MODULE)-objs += vdev_root.o
$(MODULE)-objs += zap.o
$(MODULE)-objs += zap_leaf.o
$(MODULE)-objs += zap_micro.o
+$(MODULE)-objs += zcp.o
+$(MODULE)-objs += zcp_get.o
+$(MODULE)-objs += zcp_global.o
+$(MODULE)-objs += zcp_iter.o
+$(MODULE)-objs += zcp_synctask.o
$(MODULE)-objs += zfeature.o
$(MODULE)-objs += zfs_acl.o
$(MODULE)-objs += zfs_byteswap.o
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 3c329f207..af3dc230a 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -1681,7 +1681,6 @@ dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
return (error);
}
-
void
dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
{
@@ -1749,30 +1748,17 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
dmu_buf_rele(ds->ds_dbuf, ds);
}
-static void
-get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
+int
+get_clones_stat_impl(dsl_dataset_t *ds, nvlist_t *val)
{
uint64_t count = 0;
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
zap_cursor_t zc;
zap_attribute_t za;
- nvlist_t *propval = fnvlist_alloc();
- nvlist_t *val;
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
/*
- * We use nvlist_alloc() instead of fnvlist_alloc() because the
- * latter would allocate the list with NV_UNIQUE_NAME flag.
- * As a result, every time a clone name is appended to the list
- * it would be (linearly) searched for for a duplicate name.
- * We already know that all clone names must be unique and we
- * want avoid the quadratic complexity of double-checking that
- * because we can have a large number of clones.
- */
- VERIFY0(nvlist_alloc(&val, 0, KM_SLEEP));
-
- /*
* There may be missing entries in ds_next_clones_obj
* due to a bug in a previous version of the code.
* Only trust it if it has the right number of entries.
@@ -1781,8 +1767,9 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
VERIFY0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj,
&count));
}
- if (count != dsl_dataset_phys(ds)->ds_num_children - 1)
- goto fail;
+ if (count != dsl_dataset_phys(ds)->ds_num_children - 1) {
+ return (ENOENT);
+ }
for (zap_cursor_init(&zc, mos,
dsl_dataset_phys(ds)->ds_next_clones_obj);
zap_cursor_retrieve(&zc, &za) == 0;
@@ -1796,15 +1783,42 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
dsl_dataset_rele(clone, FTAG);
}
zap_cursor_fini(&zc);
- fnvlist_add_nvlist(propval, ZPROP_VALUE, val);
- fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval);
-fail:
+ return (0);
+}
+
+void
+get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
+{
+ nvlist_t *propval = fnvlist_alloc();
+ nvlist_t *val;
+
+ /*
+ * We use nvlist_alloc() instead of fnvlist_alloc() because the
+ * latter would allocate the list with NV_UNIQUE_NAME flag.
+ * As a result, every time a clone name is appended to the list
+ * it would be (linearly) searched for for a duplicate name.
+ * We already know that all clone names must be unique and we
+ * want avoid the quadratic complexity of double-checking that
+ * because we can have a large number of clones.
+ */
+ VERIFY0(nvlist_alloc(&val, 0, KM_SLEEP));
+
+ if (get_clones_stat_impl(ds, val) == 0) {
+ fnvlist_add_nvlist(propval, ZPROP_VALUE, val);
+ fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
+ propval);
+ }
+
nvlist_free(val);
nvlist_free(propval);
}
-static void
-get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
+/*
+ * Returns a string that represents the receive resume stats token. It should
+ * be freed with strfree().
+ */
+char *
+get_receive_resume_stats_impl(dsl_dataset_t *ds)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
@@ -1876,86 +1890,361 @@ get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
ZFS_SEND_RESUME_TOKEN_VERSION,
(longlong_t)cksum.zc_word[0],
(longlong_t)packed_size, str);
- dsl_prop_nvlist_add_string(nv,
- ZFS_PROP_RECEIVE_RESUME_TOKEN, propval);
kmem_free(packed, packed_size);
kmem_free(str, compressed_size * 2 + 1);
kmem_free(compressed, packed_size);
- strfree(propval);
+ return (propval);
+ }
+ return (strdup(""));
+}
+
+/*
+ * Returns a string that represents the receive resume stats token of the
+ * dataset's child. It should be freed with strfree().
+ */
+char *
+get_child_receive_stats(dsl_dataset_t *ds)
+{
+ char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
+ dsl_dataset_t *recv_ds;
+ dsl_dataset_name(ds, recvname);
+ if (strlcat(recvname, "/", sizeof (recvname)) <
+ sizeof (recvname) &&
+ strlcat(recvname, recv_clone_name, sizeof (recvname)) <
+ sizeof (recvname) &&
+ dsl_dataset_hold(ds->ds_dir->dd_pool, recvname, FTAG,
+ &recv_ds) == 0) {
+ char *propval = get_receive_resume_stats_impl(recv_ds);
+ dsl_dataset_rele(recv_ds, FTAG);
+ return (propval);
+ }
+ return (strdup(""));
+}
+
+static void
+get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
+{
+ char *propval = get_receive_resume_stats_impl(ds);
+ if (strcmp(propval, "") != 0) {
+ dsl_prop_nvlist_add_string(nv,
+ ZFS_PROP_RECEIVE_RESUME_TOKEN, propval);
+ } else {
+ char *childval = get_child_receive_stats(ds);
+ if (strcmp(childval, "") != 0) {
+ dsl_prop_nvlist_add_string(nv,
+ ZFS_PROP_RECEIVE_RESUME_TOKEN, childval);
+ }
+ strfree(childval);
+ }
+ strfree(propval);
+}
+
+uint64_t
+dsl_get_refratio(dsl_dataset_t *ds)
+{
+ uint64_t ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 :
+ (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 /
+ dsl_dataset_phys(ds)->ds_compressed_bytes);
+ return (ratio);
+}
+
+uint64_t
+dsl_get_logicalreferenced(dsl_dataset_t *ds)
+{
+ return (dsl_dataset_phys(ds)->ds_uncompressed_bytes);
+}
+
+uint64_t
+dsl_get_compressratio(dsl_dataset_t *ds)
+{
+ if (ds->ds_is_snapshot) {
+ return (dsl_get_refratio(ds));
+ } else {
+ dsl_dir_t *dd = ds->ds_dir;
+ mutex_enter(&dd->dd_lock);
+ uint64_t val = dsl_dir_get_compressratio(dd);
+ mutex_exit(&dd->dd_lock);
+ return (val);
}
}
+uint64_t
+dsl_get_used(dsl_dataset_t *ds)
+{
+ if (ds->ds_is_snapshot) {
+ return (dsl_dataset_phys(ds)->ds_unique_bytes);
+ } else {
+ dsl_dir_t *dd = ds->ds_dir;
+ mutex_enter(&dd->dd_lock);
+ uint64_t val = dsl_dir_get_used(dd);
+ mutex_exit(&dd->dd_lock);
+ return (val);
+ }
+}
+
+uint64_t
+dsl_get_creation(dsl_dataset_t *ds)
+{
+ return (dsl_dataset_phys(ds)->ds_creation_time);
+}
+
+uint64_t
+dsl_get_creationtxg(dsl_dataset_t *ds)
+{
+ return (dsl_dataset_phys(ds)->ds_creation_txg);
+}
+
+uint64_t
+dsl_get_refquota(dsl_dataset_t *ds)
+{
+ return (ds->ds_quota);
+}
+
+uint64_t
+dsl_get_refreservation(dsl_dataset_t *ds)
+{
+ return (ds->ds_reserved);
+}
+
+uint64_t
+dsl_get_guid(dsl_dataset_t *ds)
+{
+ return (dsl_dataset_phys(ds)->ds_guid);
+}
+
+uint64_t
+dsl_get_unique(dsl_dataset_t *ds)
+{
+ return (dsl_dataset_phys(ds)->ds_unique_bytes);
+}
+
+uint64_t
+dsl_get_objsetid(dsl_dataset_t *ds)
+{
+ return (ds->ds_object);
+}
+
+uint64_t
+dsl_get_userrefs(dsl_dataset_t *ds)
+{
+ return (ds->ds_userrefs);
+}
+
+uint64_t
+dsl_get_defer_destroy(dsl_dataset_t *ds)
+{
+ return (DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
+}
+
+uint64_t
+dsl_get_referenced(dsl_dataset_t *ds)
+{
+ return (dsl_dataset_phys(ds)->ds_referenced_bytes);
+}
+
+uint64_t
+dsl_get_numclones(dsl_dataset_t *ds)
+{
+ ASSERT(ds->ds_is_snapshot);
+ return (dsl_dataset_phys(ds)->ds_num_children - 1);
+}
+
+uint64_t
+dsl_get_inconsistent(dsl_dataset_t *ds)
+{
+ return ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT) ?
+ 1 : 0);
+}
+
+uint64_t
+dsl_get_available(dsl_dataset_t *ds)
+{
+ uint64_t refdbytes = dsl_get_referenced(ds);
+ uint64_t availbytes = dsl_dir_space_available(ds->ds_dir,
+ NULL, 0, TRUE);
+ if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) {
+ availbytes +=
+ ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes;
+ }
+ if (ds->ds_quota != 0) {
+ /*
+ * Adjust available bytes according to refquota
+ */
+ if (refdbytes < ds->ds_quota) {
+ availbytes = MIN(availbytes,
+ ds->ds_quota - refdbytes);
+ } else {
+ availbytes = 0;
+ }
+ }
+ return (availbytes);
+}
+
+int
+dsl_get_written(dsl_dataset_t *ds, uint64_t *written)
+{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ dsl_dataset_t *prev;
+ int err = dsl_dataset_hold_obj(dp,
+ dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev);
+ if (err == 0) {
+ uint64_t comp, uncomp;
+ err = dsl_dataset_space_written(prev, ds, written,
+ &comp, &uncomp);
+ dsl_dataset_rele(prev, FTAG);
+ }
+ return (err);
+}
+
+/*
+ * 'snap' should be a buffer of size ZFS_MAX_DATASET_NAME_LEN.
+ */
+int
+dsl_get_prev_snap(dsl_dataset_t *ds, char *snap)
+{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {
+ dsl_dataset_name(ds->ds_prev, snap);
+ return (0);
+ } else {
+ return (ENOENT);
+ }
+}
+
+/*
+ * Returns the mountpoint property and source for the given dataset in the value
+ * and source buffers. The value buffer must be at least as large as MAXPATHLEN
+ * and the source buffer as least as large a ZFS_MAX_DATASET_NAME_LEN.
+ * Returns 0 on success and an error on failure.
+ */
+int
+dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value,
+ char *source)
+{
+ int error;
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+
+ /* Retrieve the mountpoint value stored in the zap opbject */
+ error = dsl_prop_get_ds(ds, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT), 1,
+ ZAP_MAXVALUELEN, value, source);
+ if (error != 0) {
+ return (error);
+ }
+
+ /*
+ * Process the dsname and source to find the full mountpoint string.
+ * Can be skipped for 'legacy' or 'none'.
+ */
+ if (value[0] == '/') {
+ char *buf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
+ char *root = buf;
+ const char *relpath;
+
+ /*
+ * If we inherit the mountpoint, even from a dataset
+ * with a received value, the source will be the path of
+ * the dataset we inherit from. If source is
+ * ZPROP_SOURCE_VAL_RECVD, the received value is not
+ * inherited.
+ */
+ if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) {
+ relpath = "";
+ } else {
+ ASSERT0(strncmp(dsname, source, strlen(source)));
+ relpath = dsname + strlen(source);
+ if (relpath[0] == '/')
+ relpath++;
+ }
+
+ spa_altroot(dp->dp_spa, root, ZAP_MAXVALUELEN);
+
+ /*
+ * Special case an alternate root of '/'. This will
+ * avoid having multiple leading slashes in the
+ * mountpoint path.
+ */
+ if (strcmp(root, "/") == 0)
+ root++;
+
+ /*
+ * If the mountpoint is '/' then skip over this
+ * if we are obtaining either an alternate root or
+ * an inherited mountpoint.
+ */
+ char *mnt = value;
+ if (value[1] == '\0' && (root[0] != '\0' ||
+ relpath[0] != '\0'))
+ mnt = value + 1;
+
+ if (relpath[0] == '\0') {
+ (void) snprintf(value, ZAP_MAXVALUELEN, "%s%s",
+ root, mnt);
+ } else {
+ (void) snprintf(value, ZAP_MAXVALUELEN, "%s%s%s%s",
+ root, mnt, relpath[0] == '@' ? "" : "/",
+ relpath);
+ }
+ kmem_free(buf, ZAP_MAXVALUELEN);
+ }
+
+ return (0);
+}
+
void
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
{
- int err;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
- uint64_t refd, avail, uobjs, aobjs, ratio;
ASSERT(dsl_pool_config_held(dp));
- ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 :
- (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 /
- dsl_dataset_phys(ds)->ds_compressed_bytes);
-
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO,
+ dsl_get_refratio(ds));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
- dsl_dataset_phys(ds)->ds_uncompressed_bytes);
+ dsl_get_logicalreferenced(ds));
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
+ dsl_get_compressratio(ds));
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
+ dsl_get_used(ds));
if (ds->ds_is_snapshot) {
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
- dsl_dataset_phys(ds)->ds_unique_bytes);
get_clones_stat(ds, nv);
} else {
- if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {
- char buf[ZFS_MAX_DATASET_NAME_LEN];
- dsl_dataset_name(ds->ds_prev, buf);
- dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf);
- }
-
+ char buf[ZFS_MAX_DATASET_NAME_LEN];
+ if (dsl_get_prev_snap(ds, buf) == 0)
+ dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP,
+ buf);
dsl_dir_stats(ds->ds_dir, nv);
}
- dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
-
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE,
+ dsl_get_available(ds));
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED,
+ dsl_get_referenced(ds));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
- dsl_dataset_phys(ds)->ds_creation_time);
+ dsl_get_creation(ds));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
- dsl_dataset_phys(ds)->ds_creation_txg);
+ dsl_get_creationtxg(ds));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
- ds->ds_quota);
+ dsl_get_refquota(ds));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
- ds->ds_reserved);
+ dsl_get_refreservation(ds));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
- dsl_dataset_phys(ds)->ds_guid);
+ dsl_get_guid(ds));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE,
- dsl_dataset_phys(ds)->ds_unique_bytes);
+ dsl_get_unique(ds));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID,
- ds->ds_object);
+ dsl_get_objsetid(ds));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,
- ds->ds_userrefs);
+ dsl_get_userrefs(ds));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
- DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
+ dsl_get_defer_destroy(ds));
dsl_dataset_crypt_stats(ds, nv);
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
- uint64_t written, comp, uncomp;
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
- dsl_dataset_t *prev;
-
- err = dsl_dataset_hold_obj(dp,
- dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev);
- if (err == 0) {
- err = dsl_dataset_space_written(prev, ds, &written,
- &comp, &uncomp);
- dsl_dataset_rele(prev, FTAG);
- if (err == 0) {
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
- written);
- }
+ uint64_t written;
+ if (dsl_get_written(ds, &written) == 0) {
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
+ written);
}
}
@@ -1989,30 +2278,22 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
void
dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
{
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
ASSERT(dsl_pool_config_held(dp));
- stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg;
- stat->dds_inconsistent =
- dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT;
- stat->dds_guid = dsl_dataset_phys(ds)->ds_guid;
+ stat->dds_creation_txg = dsl_get_creationtxg(ds);
+ stat->dds_inconsistent = dsl_get_inconsistent(ds);
+ stat->dds_guid = dsl_get_guid(ds);
stat->dds_origin[0] = '\0';
if (ds->ds_is_snapshot) {
stat->dds_is_snapshot = B_TRUE;
- stat->dds_num_clones =
- dsl_dataset_phys(ds)->ds_num_children - 1;
+ stat->dds_num_clones = dsl_get_numclones(ds);
} else {
stat->dds_is_snapshot = B_FALSE;
stat->dds_num_clones = 0;
if (dsl_dir_is_clone(ds->ds_dir)) {
- dsl_dataset_t *ods;
-
- VERIFY0(dsl_dataset_hold_obj(dp,
- dsl_dir_phys(ds->ds_dir)->dd_origin_obj,
- FTAG, &ods));
- dsl_dataset_name(ods, stat->dds_origin);
- dsl_dataset_rele(ods, FTAG);
+ dsl_dir_get_origin(ds->ds_dir, stat->dds_origin);
}
}
}
@@ -2422,22 +2703,12 @@ struct promotenode {
dsl_dataset_t *ds;
};
-typedef struct dsl_dataset_promote_arg {
- const char *ddpa_clonename;
- dsl_dataset_t *ddpa_clone;
- list_t shared_snaps, origin_snaps, clone_snaps;
- dsl_dataset_t *origin_origin; /* origin of the origin */
- uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
- char *err_ds;
- cred_t *cr;
-} dsl_dataset_promote_arg_t;
-
static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp,
void *tag);
static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag);
-static int
+int
dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_promote_arg_t *ddpa = arg;
@@ -2449,14 +2720,19 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
uint64_t unused;
uint64_t ss_mv_cnt;
size_t max_snap_len;
+ boolean_t conflicting_snaps;
err = promote_hold(ddpa, dp, FTAG);
if (err != 0)
return (err);
hds = ddpa->ddpa_clone;
+ snap = list_head(&ddpa->shared_snaps);
+ origin_ds = snap->ds;
max_snap_len = MAXNAMELEN - strlen(ddpa->ddpa_clonename) - 1;
+ snap = list_head(&ddpa->origin_snaps);
+
if (dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE) {
promote_rele(ddpa, FTAG);
return (SET_ERROR(EXDEV));
@@ -2511,6 +2787,7 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
* Note however, if we stop before we reach the ORIGIN we get:
* uN + kN + kN-1 + ... + kM - uM-1
*/
+ conflicting_snaps = B_FALSE;
ss_mv_cnt = 0;
ddpa->used = dsl_dataset_phys(origin_ds)->ds_referenced_bytes;
ddpa->comp = dsl_dataset_phys(origin_ds)->ds_compressed_bytes;
@@ -2539,12 +2816,12 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
}
err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
if (err == 0) {
- (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname);
- err = SET_ERROR(EEXIST);
+ fnvlist_add_boolean(ddpa->err_ds,
+ snap->ds->ds_snapname);
+ conflicting_snaps = B_TRUE;
+ } else if (err != ENOENT) {
goto out;
}
- if (err != ENOENT)
- goto out;
/* The very first snapshot does not have a deadlist */
if (dsl_dataset_phys(ds)->ds_prev_snap_obj == 0)
@@ -2558,6 +2835,15 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
}
/*
+ * In order to return the full list of conflicting snapshots, we check
+ * whether there was a conflict after traversing all of them.
+ */
+ if (conflicting_snaps) {
+ err = SET_ERROR(EEXIST);
+ goto out;
+ }
+
+ /*
* If we are a clone of a clone then we never reached ORIGIN,
* so we need to subtract out the clone origin's used space.
*/
@@ -2623,7 +2909,7 @@ out:
return (err);
}
-static void
+void
dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_promote_arg_t *ddpa = arg;
@@ -2950,6 +3236,7 @@ dsl_dataset_promote(const char *name, char *conflsnap)
dsl_dataset_promote_arg_t ddpa = { 0 };
uint64_t numsnaps;
int error;
+ nvpair_t *snap_pair;
objset_t *os;
/*
@@ -2967,12 +3254,22 @@ dsl_dataset_promote(const char *name, char *conflsnap)
return (error);
ddpa.ddpa_clonename = name;
- ddpa.err_ds = conflsnap;
+ ddpa.err_ds = fnvlist_alloc();
ddpa.cr = CRED();
- return (dsl_sync_task(name, dsl_dataset_promote_check,
+ error = dsl_sync_task(name, dsl_dataset_promote_check,
dsl_dataset_promote_sync, &ddpa,
- 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED));
+ 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED);
+
+ /*
+ * Return the first conflicting snapshot found.
+ */
+ snap_pair = nvlist_next_nvpair(ddpa.err_ds, NULL);
+ if (snap_pair != NULL && conflsnap != NULL)
+ (void) strcpy(conflsnap, nvpair_name(snap_pair));
+
+ fnvlist_free(ddpa.err_ds);
+ return (error);
}
int
diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c
index 627831bbc..0ea27c3df 100644
--- a/module/zfs/dsl_destroy.c
+++ b/module/zfs/dsl_destroy.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2013 by Joyent, Inc. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
@@ -30,6 +30,7 @@
#include <sys/dsl_userhold.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_synctask.h>
+#include <sys/dsl_destroy.h>
#include <sys/dmu_tx.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_dir.h>
@@ -42,13 +43,7 @@
#include <sys/dsl_deleg.h>
#include <sys/dmu_impl.h>
#include <sys/zvol.h>
-
-typedef struct dmu_snapshots_destroy_arg {
- nvlist_t *dsda_snaps;
- nvlist_t *dsda_successful_snaps;
- boolean_t dsda_defer;
- nvlist_t *dsda_errlist;
-} dmu_snapshots_destroy_arg_t;
+#include <sys/zcp.h>
int
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
@@ -86,51 +81,33 @@ dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
return (0);
}
-static int
+int
dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
{
- dmu_snapshots_destroy_arg_t *dsda = arg;
+ dsl_destroy_snapshot_arg_t *ddsa = arg;
+ const char *dsname = ddsa->ddsa_name;
+ boolean_t defer = ddsa->ddsa_defer;
+
dsl_pool_t *dp = dmu_tx_pool(tx);
- nvpair_t *pair;
int error = 0;
+ dsl_dataset_t *ds;
- if (!dmu_tx_is_syncing(tx))
- return (0);
-
- for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL);
- pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) {
- dsl_dataset_t *ds;
-
- error = dsl_dataset_hold(dp, nvpair_name(pair),
- FTAG, &ds);
-
- /*
- * If the snapshot does not exist, silently ignore it
- * (it's "already destroyed").
- */
- if (error == ENOENT)
- continue;
+ error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
- if (error == 0) {
- error = dsl_destroy_snapshot_check_impl(ds,
- dsda->dsda_defer);
- dsl_dataset_rele(ds, FTAG);
- }
+ /*
+ * If the snapshot does not exist, silently ignore it, and
+ * dsl_destroy_snapshot_sync() will be a no-op
+ * (it's "already destroyed").
+ */
+ if (error == ENOENT)
+ return (0);
- if (error == 0) {
- fnvlist_add_boolean(dsda->dsda_successful_snaps,
- nvpair_name(pair));
- } else {
- fnvlist_add_int32(dsda->dsda_errlist,
- nvpair_name(pair), error);
- }
+ if (error == 0) {
+ error = dsl_destroy_snapshot_check_impl(ds, defer);
+ dsl_dataset_rele(ds, FTAG);
}
- pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
- if (pair != NULL)
- return (fnvpair_value_int32(pair));
-
- return (0);
+ return (error);
}
struct process_old_arg {
@@ -480,24 +457,23 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
dmu_object_free_zapified(mos, obj, tx);
}
-static void
+void
dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
{
- dmu_snapshots_destroy_arg_t *dsda = arg;
- dsl_pool_t *dp = dmu_tx_pool(tx);
- nvpair_t *pair;
+ dsl_destroy_snapshot_arg_t *ddsa = arg;
+ const char *dsname = ddsa->ddsa_name;
+ boolean_t defer = ddsa->ddsa_defer;
- for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL);
- pair != NULL;
- pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) {
- dsl_dataset_t *ds;
-
- VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
- dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
- zvol_remove_minors(dp->dp_spa, nvpair_name(pair), B_TRUE);
- dsl_dataset_rele(ds, FTAG);
- }
+ int error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
+ if (error == ENOENT)
+ return;
+ ASSERT0(error);
+ dsl_destroy_snapshot_sync_impl(ds, defer, tx);
+ zvol_remove_minors(dp->dp_spa, dsname, B_TRUE);
+ dsl_dataset_rele(ds, FTAG);
}
/*
@@ -517,26 +493,86 @@ int
dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
nvlist_t *errlist)
{
- dmu_snapshots_destroy_arg_t dsda;
- int error;
- nvpair_t *pair;
-
- pair = nvlist_next_nvpair(snaps, NULL);
- if (pair == NULL)
+ if (nvlist_next_nvpair(snaps, NULL) == NULL)
return (0);
- dsda.dsda_snaps = snaps;
- VERIFY0(nvlist_alloc(&dsda.dsda_successful_snaps,
- NV_UNIQUE_NAME, KM_SLEEP));
- dsda.dsda_defer = defer;
- dsda.dsda_errlist = errlist;
-
- error = dsl_sync_task(nvpair_name(pair),
- dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync,
- &dsda, 0, ZFS_SPACE_CHECK_NONE);
- fnvlist_free(dsda.dsda_successful_snaps);
+ nvlist_t *arg = fnvlist_alloc();
+ nvlist_t *snaps_normalized = fnvlist_alloc();
+ /*
+ * lzc_destroy_snaps() is documented to take an nvlist whose
+ * values "don't matter". We need to convert that nvlist to one
+ * that we know can be converted to LUA.
+ */
+ for (nvpair_t *pair = nvlist_next_nvpair(snaps, NULL);
+ pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) {
+ fnvlist_add_boolean_value(snaps_normalized,
+ nvpair_name(pair), B_TRUE);
+ }
+ fnvlist_add_nvlist(arg, "snaps", snaps_normalized);
+ fnvlist_free(snaps_normalized);
+ fnvlist_add_boolean_value(arg, "defer", defer);
+
+ nvlist_t *wrapper = fnvlist_alloc();
+ fnvlist_add_nvlist(wrapper, ZCP_ARG_ARGLIST, arg);
+ fnvlist_free(arg);
+
+ const char *program =
+ "arg = ...\n"
+ "snaps = arg['snaps']\n"
+ "defer = arg['defer']\n"
+ "errors = { }\n"
+ "has_errors = false\n"
+ "for snap, v in pairs(snaps) do\n"
+ " errno = zfs.check.destroy{snap, defer=defer}\n"
+ " zfs.debug('snap: ' .. snap .. ' errno: ' .. errno)\n"
+ " if errno == ENOENT then\n"
+ " snaps[snap] = nil\n"
+ " elseif errno ~= 0 then\n"
+ " errors[snap] = errno\n"
+ " has_errors = true\n"
+ " end\n"
+ "end\n"
+ "if has_errors then\n"
+ " return errors\n"
+ "end\n"
+ "for snap, v in pairs(snaps) do\n"
+ " errno = zfs.sync.destroy{snap, defer=defer}\n"
+ " assert(errno == 0)\n"
+ "end\n"
+ "return { }\n";
+
+ nvlist_t *result = fnvlist_alloc();
+ int error = zcp_eval(nvpair_name(nvlist_next_nvpair(snaps, NULL)),
+ program,
+ 0,
+ zfs_lua_max_memlimit,
+ fnvlist_lookup_nvpair(wrapper, ZCP_ARG_ARGLIST), result);
+ if (error != 0) {
+ char *errorstr = NULL;
+ (void) nvlist_lookup_string(result, ZCP_RET_ERROR, &errorstr);
+ if (errorstr != NULL) {
+ zfs_dbgmsg(errorstr);
+ }
+ return (error);
+ }
+ fnvlist_free(wrapper);
- return (error);
+ /*
+ * lzc_destroy_snaps() is documented to fill the errlist with
+ * int32 values, so we need to covert the int64 values that are
+ * returned from LUA.
+ */
+ int rv = 0;
+ nvlist_t *errlist_raw = fnvlist_lookup_nvlist(result, ZCP_RET_RETURN);
+ for (nvpair_t *pair = nvlist_next_nvpair(errlist_raw, NULL);
+ pair != NULL; pair = nvlist_next_nvpair(errlist_raw, pair)) {
+ int32_t val = (int32_t)fnvpair_value_int64(pair);
+ if (rv == 0)
+ rv = val;
+ fnvlist_add_int32(errlist, nvpair_name(pair), val);
+ }
+ fnvlist_free(result);
+ return (rv);
}
int
@@ -607,10 +643,6 @@ old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
dsl_dataset_phys(ds)->ds_unique_bytes == 0);
}
-typedef struct dsl_destroy_head_arg {
- const char *ddha_name;
-} dsl_destroy_head_arg_t;
-
int
dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
{
@@ -656,7 +688,7 @@ dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
return (0);
}
-static int
+int
dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
{
dsl_destroy_head_arg_t *ddha = arg;
@@ -894,7 +926,7 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
}
}
-static void
+void
dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
{
dsl_destroy_head_arg_t *ddha = arg;
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index 96e8dd62e..bf130eb99 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -947,62 +947,139 @@ dsl_dir_is_clone(dsl_dir_t *dd)
dd->dd_pool->dp_origin_snap->ds_object));
}
+
+uint64_t
+dsl_dir_get_used(dsl_dir_t *dd)
+{
+ return (dsl_dir_phys(dd)->dd_used_bytes);
+}
+
+uint64_t
+dsl_dir_get_quota(dsl_dir_t *dd)
+{
+ return (dsl_dir_phys(dd)->dd_quota);
+}
+
+uint64_t
+dsl_dir_get_reservation(dsl_dir_t *dd)
+{
+ return (dsl_dir_phys(dd)->dd_reserved);
+}
+
+uint64_t
+dsl_dir_get_compressratio(dsl_dir_t *dd)
+{
+ /* a fixed point number, 100x the ratio */
+ return (dsl_dir_phys(dd)->dd_compressed_bytes == 0 ? 100 :
+ (dsl_dir_phys(dd)->dd_uncompressed_bytes * 100 /
+ dsl_dir_phys(dd)->dd_compressed_bytes));
+}
+
+uint64_t
+dsl_dir_get_logicalused(dsl_dir_t *dd)
+{
+ return (dsl_dir_phys(dd)->dd_uncompressed_bytes);
+}
+
+uint64_t
+dsl_dir_get_usedsnap(dsl_dir_t *dd)
+{
+ return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]);
+}
+
+uint64_t
+dsl_dir_get_usedds(dsl_dir_t *dd)
+{
+ return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_HEAD]);
+}
+
+uint64_t
+dsl_dir_get_usedrefreserv(dsl_dir_t *dd)
+{
+ return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_REFRSRV]);
+}
+
+uint64_t
+dsl_dir_get_usedchild(dsl_dir_t *dd)
+{
+ return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD] +
+ dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD_RSRV]);
+}
+
void
-dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
+dsl_dir_get_origin(dsl_dir_t *dd, char *buf)
+{
+ dsl_dataset_t *ds;
+ VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
+ dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds));
+
+ dsl_dataset_name(ds, buf);
+
+ dsl_dataset_rele(ds, FTAG);
+}
+
+int
+dsl_dir_get_filesystem_count(dsl_dir_t *dd, uint64_t *count)
{
- uint64_t intval;
+ if (dsl_dir_is_zapified(dd)) {
+ objset_t *os = dd->dd_pool->dp_meta_objset;
+ return (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
+ sizeof (*count), 1, count));
+ } else {
+ return (ENOENT);
+ }
+}
+
+int
+dsl_dir_get_snapshot_count(dsl_dir_t *dd, uint64_t *count)
+{
+ if (dsl_dir_is_zapified(dd)) {
+ objset_t *os = dd->dd_pool->dp_meta_objset;
+ return (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
+ sizeof (*count), 1, count));
+ } else {
+ return (ENOENT);
+ }
+}
+void
+dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
+{
mutex_enter(&dd->dd_lock);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
- dsl_dir_phys(dd)->dd_used_bytes);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA,
- dsl_dir_phys(dd)->dd_quota);
+ dsl_dir_get_quota(dd));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
- dsl_dir_phys(dd)->dd_reserved);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
- dsl_dir_phys(dd)->dd_compressed_bytes == 0 ? 100 :
- (dsl_dir_phys(dd)->dd_uncompressed_bytes * 100 /
- dsl_dir_phys(dd)->dd_compressed_bytes));
+ dsl_dir_get_reservation(dd));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED,
- dsl_dir_phys(dd)->dd_uncompressed_bytes);
+ dsl_dir_get_logicalused(dd));
if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) {
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
- dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]);
+ dsl_dir_get_usedsnap(dd));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
- dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_HEAD]);
+ dsl_dir_get_usedds(dd));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
- dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_REFRSRV]);
+ dsl_dir_get_usedrefreserv(dd));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
- dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD] +
- dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD_RSRV]);
+ dsl_dir_get_usedchild(dd));
}
mutex_exit(&dd->dd_lock);
- if (dsl_dir_is_zapified(dd)) {
- objset_t *os = dd->dd_pool->dp_meta_objset;
-
- if (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
- sizeof (intval), 1, &intval) == 0) {
- dsl_prop_nvlist_add_uint64(nv,
- ZFS_PROP_FILESYSTEM_COUNT, intval);
- }
- if (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
- sizeof (intval), 1, &intval) == 0) {
- dsl_prop_nvlist_add_uint64(nv,
- ZFS_PROP_SNAPSHOT_COUNT, intval);
- }
+ uint64_t count;
+ if (dsl_dir_get_filesystem_count(dd, &count) == 0) {
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_FILESYSTEM_COUNT,
+ count);
+ }
+ if (dsl_dir_get_snapshot_count(dd, &count) == 0) {
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_SNAPSHOT_COUNT,
+ count);
}
if (dsl_dir_is_clone(dd)) {
- dsl_dataset_t *ds;
char buf[ZFS_MAX_DATASET_NAME_LEN];
-
- VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
- dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds));
- dsl_dataset_name(ds, buf);
- dsl_dataset_rele(ds, FTAG);
+ dsl_dir_get_origin(dd, buf);
dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
}
+
}
void
diff --git a/module/zfs/vdev_raidz_math.c b/module/zfs/vdev_raidz_math.c
index a64e3b023..ba4dcc480 100644
--- a/module/zfs/vdev_raidz_math.c
+++ b/module/zfs/vdev_raidz_math.c
@@ -132,7 +132,8 @@ vdev_raidz_math_get_ops()
default:
ASSERT3U(impl, <, raidz_supp_impl_cnt);
ASSERT3U(raidz_supp_impl_cnt, >, 0);
- ops = raidz_supp_impl[impl];
+ if (impl < ARRAY_SIZE(raidz_all_maths))
+ ops = raidz_supp_impl[impl];
break;
}
diff --git a/module/zfs/zcp.c b/module/zfs/zcp.c
new file mode 100644
index 000000000..07f96e298
--- /dev/null
+++ b/module/zfs/zcp.c
@@ -0,0 +1,1357 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
+/*
+ * ZFS Channel Programs (ZCP)
+ *
+ * The ZCP interface allows various ZFS commands and operations ZFS
+ * administrative operations (e.g. creating and destroying snapshots, typically
+ * performed via an ioctl to /dev/zfs by the zfs(8) command and
+ * libzfs/libzfs_core) to be run * programmatically as a Lua script. A ZCP
+ * script is run as a dsl_sync_task and fully executed during one transaction
+ * group sync. This ensures that no other changes can be written concurrently
+ * with a running Lua script. Combining multiple calls to the exposed ZFS
+ * functions into one script gives a number of benefits:
+ *
+ * 1. Atomicity. For some compound or iterative operations, it's useful to be
+ * able to guarantee that the state of a pool has not changed between calls to
+ * ZFS.
+ *
+ * 2. Performance. If a large number of changes need to be made (e.g. deleting
+ * many filesystems), there can be a significant performance penalty as a
+ * result of the need to wait for a transaction group sync to pass for every
+ * single operation. When expressed as a single ZCP script, all these changes
+ * can be performed at once in one txg sync.
+ *
+ * A modified version of the Lua 5.2 interpreter is used to run channel program
+ * scripts. The Lua 5.2 manual can be found at:
+ *
+ * http://www.lua.org/manual/5.2/
+ *
+ * If being run by a user (via an ioctl syscall), executing a ZCP script
+ * requires root privileges in the global zone.
+ *
+ * Scripts are passed to zcp_eval() as a string, then run in a synctask by
+ * zcp_eval_sync(). Arguments can be passed into the Lua script as an nvlist,
+ * which will be converted to a Lua table. Similarly, values returned from
+ * a ZCP script will be converted to an nvlist. See zcp_lua_to_nvlist_impl()
+ * for details on exact allowed types and conversion.
+ *
+ * ZFS functionality is exposed to a ZCP script as a library of function calls.
+ * These calls are sorted into submodules, such as zfs.list and zfs.sync, for
+ * iterators and synctasks, respectively. Each of these submodules resides in
+ * its own source file, with a zcp_*_info structure describing each library
+ * call in the submodule.
+ *
+ * Error handling in ZCP scripts is handled by a number of different methods
+ * based on severity:
+ *
+ * 1. Memory and time limits are in place to prevent a channel program from
+ * consuming excessive system or running forever. If one of these limits is
+ * hit, the channel program will be stopped immediately and return from
+ * zcp_eval() with an error code. No attempt will be made to roll back or undo
+ * any changes made by the channel program before the error occured.
+ * Consumers invoking zcp_eval() from elsewhere in the kernel may pass a time
+ * limit of 0, disabling the time limit.
+ *
+ * 2. Internal Lua errors can occur as a result of a syntax error, calling a
+ * library function with incorrect arguments, invoking the error() function,
+ * failing an assert(), or other runtime errors. In these cases the channel
+ * program will stop executing and return from zcp_eval() with an error code.
+ * In place of a return value, an error message will also be returned in the
+ * 'result' nvlist containing information about the error. No attempt will be
+ * made to roll back or undo any changes made by the channel program before the
+ * error occured.
+ *
+ * 3. If an error occurs inside a ZFS library call which returns an error code,
+ * the error is returned to the Lua script to be handled as desired.
+ *
+ * In the first two cases, Lua's error-throwing mechanism is used, which
+ * longjumps out of the script execution with luaL_error() and returns with the
+ * error.
+ *
+ * See zfs-program(8) for more information on high level usage.
+ */
+
+#include <sys/lua/lua.h>
+#include <sys/lua/lualib.h>
+#include <sys/lua/lauxlib.h>
+
+#include <sys/dsl_prop.h>
+#include <sys/dsl_synctask.h>
+#include <sys/dsl_dataset.h>
+#include <sys/zcp.h>
+#include <sys/zcp_iter.h>
+#include <sys/zcp_prop.h>
+#include <sys/zcp_global.h>
+#include <util/sscanf.h>
+
+#ifndef KM_NORMALPRI
+#define KM_NORMALPRI 0
+#endif
+
+uint64_t zfs_lua_check_instrlimit_interval = 100;
+uint64_t zfs_lua_max_instrlimit = ZCP_MAX_INSTRLIMIT;
+uint64_t zfs_lua_max_memlimit = ZCP_MAX_MEMLIMIT;
+
+static int zcp_nvpair_value_to_lua(lua_State *, nvpair_t *, char *, int);
+static int zcp_lua_to_nvlist_impl(lua_State *, int, nvlist_t *, const char *,
+ int);
+
+typedef struct zcp_alloc_arg {
+ boolean_t aa_must_succeed;
+ int64_t aa_alloc_remaining;
+ int64_t aa_alloc_limit;
+} zcp_alloc_arg_t;
+
+typedef struct zcp_eval_arg {
+ lua_State *ea_state;
+ zcp_alloc_arg_t *ea_allocargs;
+ cred_t *ea_cred;
+ nvlist_t *ea_outnvl;
+ int ea_result;
+ uint64_t ea_instrlimit;
+} zcp_eval_arg_t;
+
+/*ARGSUSED*/
+static int
+zcp_eval_check(void *arg, dmu_tx_t *tx)
+{
+ return (0);
+}
+
+/*
+ * The outer-most error callback handler for use with lua_pcall(). On
+ * error Lua will call this callback with a single argument that
+ * represents the error value. In most cases this will be a string
+ * containing an error message, but channel programs can use Lua's
+ * error() function to return arbitrary objects as errors. This callback
+ * returns (on the Lua stack) the original error object along with a traceback.
+ *
+ * Fatal Lua errors can occur while resources are held, so we also call any
+ * registered cleanup function here.
+ */
+static int
+zcp_error_handler(lua_State *state)
+{
+ const char *msg;
+
+ zcp_cleanup(state);
+
+ VERIFY3U(1, ==, lua_gettop(state));
+ msg = lua_tostring(state, 1);
+ luaL_traceback(state, state, msg, 1);
+ return (1);
+}
+
+int
+zcp_argerror(lua_State *state, int narg, const char *msg, ...)
+{
+ va_list alist;
+
+ va_start(alist, msg);
+ const char *buf = lua_pushvfstring(state, msg, alist);
+ va_end(alist);
+
+ return (luaL_argerror(state, narg, buf));
+}
+
+/*
+ * Install a new cleanup function, which will be invoked with the given
+ * opaque argument if a fatal error causes the Lua interpreter to longjump out
+ * of a function call.
+ *
+ * If an error occurs, the cleanup function will be invoked exactly once and
+ * then unreigstered.
+ */
+void
+zcp_register_cleanup(lua_State *state, zcp_cleanup_t cleanfunc, void *cleanarg)
+{
+ zcp_run_info_t *ri = zcp_run_info(state);
+ /*
+ * A cleanup function should always be explicitly removed before
+ * installing a new one to avoid accidental clobbering.
+ */
+ ASSERT3P(ri->zri_cleanup, ==, NULL);
+
+ ri->zri_cleanup = cleanfunc;
+ ri->zri_cleanup_arg = cleanarg;
+}
+
+void
+zcp_clear_cleanup(lua_State *state)
+{
+ zcp_run_info_t *ri = zcp_run_info(state);
+
+ ri->zri_cleanup = NULL;
+ ri->zri_cleanup_arg = NULL;
+}
+
+/*
+ * If it exists, execute the currently set cleanup function then unregister it.
+ */
+void
+zcp_cleanup(lua_State *state)
+{
+ zcp_run_info_t *ri = zcp_run_info(state);
+
+ if (ri->zri_cleanup != NULL) {
+ ri->zri_cleanup(ri->zri_cleanup_arg);
+ zcp_clear_cleanup(state);
+ }
+}
+
+#define ZCP_NVLIST_MAX_DEPTH 20
+
+/*
+ * Convert the lua table at the given index on the Lua stack to an nvlist
+ * and return it.
+ *
+ * If the table can not be converted for any reason, NULL is returned and
+ * an error message is pushed onto the Lua stack.
+ */
+static nvlist_t *
+zcp_table_to_nvlist(lua_State *state, int index, int depth)
+{
+ nvlist_t *nvl;
+ /*
+ * Converting a Lua table to an nvlist with key uniqueness checking is
+ * O(n^2) in the number of keys in the nvlist, which can take a long
+ * time when we return a large table from a channel program.
+ * Furthermore, Lua's table interface *almost* guarantees unique keys
+ * on its own (details below). Therefore, we don't use fnvlist_alloc()
+ * here to avoid the built-in uniqueness checking.
+ *
+ * The *almost* is because it's possible to have key collisions between
+ * e.g. the string "1" and the number 1, or the string "true" and the
+ * boolean true, so we explicitly check that when we're looking at a
+ * key which is an integer / boolean or a string that can be parsed as
+ * one of those types. In the worst case this could still devolve into
+ * O(n^2), so we only start doing these checks on boolean/integer keys
+ * once we've seen a string key which fits this weird usage pattern.
+ *
+ * Ultimately, we still want callers to know that the keys in this
+ * nvlist are unique, so before we return this we set the nvlist's
+ * flags to reflect that.
+ */
+ VERIFY0(nvlist_alloc(&nvl, 0, KM_SLEEP));
+
+ /*
+ * Push an empty stack slot where lua_next() will store each
+ * table key.
+ */
+ lua_pushnil(state);
+ boolean_t saw_str_could_collide = B_FALSE;
+ while (lua_next(state, index) != 0) {
+ /*
+ * The next key-value pair from the table at index is
+ * now on the stack, with the key at stack slot -2 and
+ * the value at slot -1.
+ */
+ int err = 0;
+ char buf[32];
+ const char *key = NULL;
+ boolean_t key_could_collide = B_FALSE;
+
+ switch (lua_type(state, -2)) {
+ case LUA_TSTRING:
+ key = lua_tostring(state, -2);
+
+ /* check if this could collide with a number or bool */
+ long long tmp;
+ int parselen;
+ if ((sscanf(key, "%lld%n", &tmp, &parselen) > 0 &&
+ parselen == strlen(key)) ||
+ strcmp(key, "true") == 0 ||
+ strcmp(key, "false") == 0) {
+ key_could_collide = B_TRUE;
+ saw_str_could_collide = B_TRUE;
+ }
+ break;
+ case LUA_TBOOLEAN:
+ key = (lua_toboolean(state, -2) == B_TRUE ?
+ "true" : "false");
+ if (saw_str_could_collide) {
+ key_could_collide = B_TRUE;
+ }
+ break;
+ case LUA_TNUMBER:
+ VERIFY3U(sizeof (buf), >,
+ snprintf(buf, sizeof (buf), "%lld",
+ (longlong_t)lua_tonumber(state, -2)));
+ key = buf;
+ if (saw_str_could_collide) {
+ key_could_collide = B_TRUE;
+ }
+ break;
+ default:
+ fnvlist_free(nvl);
+ (void) lua_pushfstring(state, "Invalid key "
+ "type '%s' in table",
+ lua_typename(state, lua_type(state, -2)));
+ return (NULL);
+ }
+ /*
+ * Check for type-mismatched key collisions, and throw an error.
+ */
+ if (key_could_collide && nvlist_exists(nvl, key)) {
+ fnvlist_free(nvl);
+ (void) lua_pushfstring(state, "Collision of "
+ "key '%s' in table", key);
+ return (NULL);
+ }
+ /*
+ * Recursively convert the table value and insert into
+ * the new nvlist with the parsed key. To prevent
+ * stack overflow on circular or heavily nested tables,
+ * we track the current nvlist depth.
+ */
+ if (depth >= ZCP_NVLIST_MAX_DEPTH) {
+ fnvlist_free(nvl);
+ (void) lua_pushfstring(state, "Maximum table "
+ "depth (%d) exceeded for table",
+ ZCP_NVLIST_MAX_DEPTH);
+ return (NULL);
+ }
+ err = zcp_lua_to_nvlist_impl(state, -1, nvl, key,
+ depth + 1);
+ if (err != 0) {
+ fnvlist_free(nvl);
+ /*
+ * Error message has been pushed to the lua
+ * stack by the recursive call.
+ */
+ return (NULL);
+ }
+ /*
+ * Pop the value pushed by lua_next().
+ */
+ lua_pop(state, 1);
+ }
+
+ /*
+ * Mark the nvlist as having unique keys. This is a little ugly, but we
+ * ensured above that there are no duplicate keys in the nvlist.
+ */
+ nvl->nvl_nvflag |= NV_UNIQUE_NAME;
+
+ return (nvl);
+}
+
+/*
+ * Convert a value from the given index into the lua stack to an nvpair, adding
+ * it to an nvlist with the given key.
+ *
+ * Values are converted as follows:
+ *
+ * string -> string
+ * number -> int64
+ * boolean -> boolean
+ * nil -> boolean (no value)
+ *
+ * Lua tables are converted to nvlists and then inserted. The table's keys
+ * are converted to strings then used as keys in the nvlist to store each table
+ * element. Keys are converted as follows:
+ *
+ * string -> no change
+ * number -> "%lld"
+ * boolean -> "true" | "false"
+ * nil -> error
+ *
+ * In the case of a key collision, an error is thrown.
+ *
+ * If an error is encountered, a nonzero error code is returned, and an error
+ * string will be pushed onto the Lua stack.
+ */
+static int
+zcp_lua_to_nvlist_impl(lua_State *state, int index, nvlist_t *nvl,
+ const char *key, int depth)
+{
+ /*
+ * Verify that we have enough remaining space in the lua stack to parse
+ * a key-value pair and push an error.
+ */
+ if (!lua_checkstack(state, 3)) {
+ (void) lua_pushstring(state, "Lua stack overflow");
+ return (1);
+ }
+
+ index = lua_absindex(state, index);
+
+ switch (lua_type(state, index)) {
+ case LUA_TNIL:
+ fnvlist_add_boolean(nvl, key);
+ break;
+ case LUA_TBOOLEAN:
+ fnvlist_add_boolean_value(nvl, key,
+ lua_toboolean(state, index));
+ break;
+ case LUA_TNUMBER:
+ fnvlist_add_int64(nvl, key, lua_tonumber(state, index));
+ break;
+ case LUA_TSTRING:
+ fnvlist_add_string(nvl, key, lua_tostring(state, index));
+ break;
+ case LUA_TTABLE: {
+ nvlist_t *value_nvl = zcp_table_to_nvlist(state, index, depth);
+ if (value_nvl == NULL)
+ return (EINVAL);
+
+ fnvlist_add_nvlist(nvl, key, value_nvl);
+ fnvlist_free(value_nvl);
+ break;
+ }
+ default:
+ (void) lua_pushfstring(state,
+ "Invalid value type '%s' for key '%s'",
+ lua_typename(state, lua_type(state, index)), key);
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * Convert a lua value to an nvpair, adding it to an nvlist with the given key.
+ */
+void
+zcp_lua_to_nvlist(lua_State *state, int index, nvlist_t *nvl, const char *key)
+{
+ /*
+ * On error, zcp_lua_to_nvlist_impl pushes an error string onto the Lua
+ * stack before returning with a nonzero error code. If an error is
+ * returned, throw a fatal lua error with the given string.
+ */
+ if (zcp_lua_to_nvlist_impl(state, index, nvl, key, 0) != 0)
+ (void) lua_error(state);
+}
+
+int
+zcp_lua_to_nvlist_helper(lua_State *state)
+{
+ nvlist_t *nv = (nvlist_t *)lua_touserdata(state, 2);
+ const char *key = (const char *)lua_touserdata(state, 1);
+ zcp_lua_to_nvlist(state, 3, nv, key);
+ return (0);
+}
+
+void
+zcp_convert_return_values(lua_State *state, nvlist_t *nvl,
+ const char *key, zcp_eval_arg_t *evalargs)
+{
+ int err;
+ lua_pushcfunction(state, zcp_lua_to_nvlist_helper);
+ lua_pushlightuserdata(state, (char *)key);
+ lua_pushlightuserdata(state, nvl);
+ lua_pushvalue(state, 1);
+ lua_remove(state, 1);
+ err = lua_pcall(state, 3, 0, 0); /* zcp_lua_to_nvlist_helper */
+ if (err != 0) {
+ zcp_lua_to_nvlist(state, 1, nvl, ZCP_RET_ERROR);
+ evalargs->ea_result = SET_ERROR(ECHRNG);
+ }
+}
+
+/*
+ * Push a Lua table representing nvl onto the stack. If it can't be
+ * converted, return EINVAL, fill in errbuf, and push nothing. errbuf may
+ * be specified as NULL, in which case no error string will be output.
+ *
+ * Most nvlists are converted as simple key->value Lua tables, but we make
+ * an exception for the case where all nvlist entries are BOOLEANs (a string
+ * key without a value). In Lua, a table key pointing to a value of Nil
+ * (no value) is equivalent to the key not existing, so a BOOLEAN nvlist
+ * entry can't be directly converted to a Lua table entry. Nvlists of entirely
+ * BOOLEAN entries are frequently used to pass around lists of datasets, so for
+ * convenience we check for this case, and convert it to a simple Lua array of
+ * strings.
+ */
+int
+zcp_nvlist_to_lua(lua_State *state, nvlist_t *nvl,
+ char *errbuf, int errbuf_len)
+{
+ nvpair_t *pair;
+ lua_newtable(state);
+ boolean_t has_values = B_FALSE;
+ /*
+ * If the list doesn't have any values, just convert it to a string
+ * array.
+ */
+ for (pair = nvlist_next_nvpair(nvl, NULL);
+ pair != NULL; pair = nvlist_next_nvpair(nvl, pair)) {
+ if (nvpair_type(pair) != DATA_TYPE_BOOLEAN) {
+ has_values = B_TRUE;
+ break;
+ }
+ }
+ if (!has_values) {
+ int i = 1;
+ for (pair = nvlist_next_nvpair(nvl, NULL);
+ pair != NULL; pair = nvlist_next_nvpair(nvl, pair)) {
+ (void) lua_pushinteger(state, i);
+ (void) lua_pushstring(state, nvpair_name(pair));
+ (void) lua_settable(state, -3);
+ i++;
+ }
+ } else {
+ for (pair = nvlist_next_nvpair(nvl, NULL);
+ pair != NULL; pair = nvlist_next_nvpair(nvl, pair)) {
+ int err = zcp_nvpair_value_to_lua(state, pair,
+ errbuf, errbuf_len);
+ if (err != 0) {
+ lua_pop(state, 1);
+ return (err);
+ }
+ (void) lua_setfield(state, -2, nvpair_name(pair));
+ }
+ }
+ return (0);
+}
+
+/*
+ * Push a Lua object representing the value of "pair" onto the stack.
+ *
+ * Only understands boolean_value, string, int64, nvlist,
+ * string_array, and int64_array type values. For other
+ * types, returns EINVAL, fills in errbuf, and pushes nothing.
+ */
+static int
+zcp_nvpair_value_to_lua(lua_State *state, nvpair_t *pair,
+ char *errbuf, int errbuf_len)
+{
+ int err = 0;
+
+ if (pair == NULL) {
+ lua_pushnil(state);
+ return (0);
+ }
+
+ switch (nvpair_type(pair)) {
+ case DATA_TYPE_BOOLEAN_VALUE:
+ (void) lua_pushboolean(state,
+ fnvpair_value_boolean_value(pair));
+ break;
+ case DATA_TYPE_STRING:
+ (void) lua_pushstring(state, fnvpair_value_string(pair));
+ break;
+ case DATA_TYPE_INT64:
+ (void) lua_pushinteger(state, fnvpair_value_int64(pair));
+ break;
+ case DATA_TYPE_NVLIST:
+ err = zcp_nvlist_to_lua(state,
+ fnvpair_value_nvlist(pair), errbuf, errbuf_len);
+ break;
+ case DATA_TYPE_STRING_ARRAY: {
+ char **strarr;
+ uint_t nelem;
+ (void) nvpair_value_string_array(pair, &strarr, &nelem);
+ lua_newtable(state);
+ for (int i = 0; i < nelem; i++) {
+ (void) lua_pushinteger(state, i + 1);
+ (void) lua_pushstring(state, strarr[i]);
+ (void) lua_settable(state, -3);
+ }
+ break;
+ }
+ case DATA_TYPE_UINT64_ARRAY: {
+ uint64_t *intarr;
+ uint_t nelem;
+ (void) nvpair_value_uint64_array(pair, &intarr, &nelem);
+ lua_newtable(state);
+ for (int i = 0; i < nelem; i++) {
+ (void) lua_pushinteger(state, i + 1);
+ (void) lua_pushinteger(state, intarr[i]);
+ (void) lua_settable(state, -3);
+ }
+ break;
+ }
+ case DATA_TYPE_INT64_ARRAY: {
+ int64_t *intarr;
+ uint_t nelem;
+ (void) nvpair_value_int64_array(pair, &intarr, &nelem);
+ lua_newtable(state);
+ for (int i = 0; i < nelem; i++) {
+ (void) lua_pushinteger(state, i + 1);
+ (void) lua_pushinteger(state, intarr[i]);
+ (void) lua_settable(state, -3);
+ }
+ break;
+ }
+ default: {
+ if (errbuf != NULL) {
+ (void) snprintf(errbuf, errbuf_len,
+ "Unhandled nvpair type %d for key '%s'",
+ nvpair_type(pair), nvpair_name(pair));
+ }
+ return (EINVAL);
+ }
+ }
+ return (err);
+}
+
+int
+zcp_dataset_hold_error(lua_State *state, dsl_pool_t *dp, const char *dsname,
+ int error)
+{
+ if (error == ENOENT) {
+ (void) zcp_argerror(state, 1, "no such dataset '%s'", dsname);
+ return (0); /* not reached; zcp_argerror will longjmp */
+ } else if (error == EXDEV) {
+ (void) zcp_argerror(state, 1,
+ "dataset '%s' is not in the target pool '%s'",
+ dsname, spa_name(dp->dp_spa));
+ return (0); /* not reached; zcp_argerror will longjmp */
+ } else if (error == EIO) {
+ (void) luaL_error(state,
+ "I/O error while accessing dataset '%s'", dsname);
+ return (0); /* not reached; luaL_error will longjmp */
+ } else if (error != 0) {
+ (void) luaL_error(state,
+ "unexpected error %d while accessing dataset '%s'",
+ error, dsname);
+ return (0); /* not reached; luaL_error will longjmp */
+ }
+ return (0);
+}
+
+/*
+ * Note: will longjmp (via lua_error()) on error.
+ * Assumes that the dsname is argument #1 (for error reporting purposes).
+ */
+dsl_dataset_t *
+zcp_dataset_hold(lua_State *state, dsl_pool_t *dp, const char *dsname,
+ void *tag)
+{
+ dsl_dataset_t *ds;
+ int error = dsl_dataset_hold(dp, dsname, tag, &ds);
+ (void) zcp_dataset_hold_error(state, dp, dsname, error);
+ return (ds);
+}
+
+static int zcp_debug(lua_State *);
+static zcp_lib_info_t zcp_debug_info = {
+ .name = "debug",
+ .func = zcp_debug,
+ .pargs = {
+ { .za_name = "debug string", .za_lua_type = LUA_TSTRING},
+ {NULL, 0}
+ },
+ .kwargs = {
+ {NULL, 0}
+ }
+};
+
+static int
+zcp_debug(lua_State *state)
+{
+ const char *dbgstring;
+ zcp_run_info_t *ri = zcp_run_info(state);
+ zcp_lib_info_t *libinfo = &zcp_debug_info;
+
+ zcp_parse_args(state, libinfo->name, libinfo->pargs, libinfo->kwargs);
+
+ dbgstring = lua_tostring(state, 1);
+
+ zfs_dbgmsg("txg %lld ZCP: %s", ri->zri_tx->tx_txg, dbgstring);
+
+ return (0);
+}
+
+static int zcp_exists(lua_State *);
+static zcp_lib_info_t zcp_exists_info = {
+ .name = "exists",
+ .func = zcp_exists,
+ .pargs = {
+ { .za_name = "dataset", .za_lua_type = LUA_TSTRING},
+ {NULL, 0}
+ },
+ .kwargs = {
+ {NULL, 0}
+ }
+};
+
+static int
+zcp_exists(lua_State *state)
+{
+ zcp_run_info_t *ri = zcp_run_info(state);
+ dsl_pool_t *dp = ri->zri_pool;
+ zcp_lib_info_t *libinfo = &zcp_exists_info;
+
+ zcp_parse_args(state, libinfo->name, libinfo->pargs, libinfo->kwargs);
+
+ const char *dsname = lua_tostring(state, 1);
+
+ dsl_dataset_t *ds;
+ int error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
+ if (error == 0) {
+ dsl_dataset_rele(ds, FTAG);
+ lua_pushboolean(state, B_TRUE);
+ } else if (error == ENOENT) {
+ lua_pushboolean(state, B_FALSE);
+ } else if (error == EXDEV) {
+ return (luaL_error(state, "dataset '%s' is not in the "
+ "target pool", dsname));
+ } else if (error == EIO) {
+ return (luaL_error(state, "I/O error opening dataset '%s'",
+ dsname));
+ } else if (error != 0) {
+ return (luaL_error(state, "unexpected error %d", error));
+ }
+
+ return (0);
+}
+
+/*
+ * Allocate/realloc/free a buffer for the lua interpreter.
+ *
+ * When nsize is 0, behaves as free() and returns NULL.
+ *
+ * If ptr is NULL, behaves as malloc() and returns an allocated buffer of size
+ * at least nsize.
+ *
+ * Otherwise, behaves as realloc(), changing the allocation from osize to nsize.
+ * Shrinking the buffer size never fails.
+ *
+ * The original allocated buffer size is stored as a uint64 at the beginning of
+ * the buffer to avoid actually reallocating when shrinking a buffer, since lua
+ * requires that this operation never fail.
+ */
+static void *
+zcp_lua_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
+{
+ zcp_alloc_arg_t *allocargs = ud;
+ int flags = (allocargs->aa_must_succeed) ?
+ KM_SLEEP : (KM_NOSLEEP | KM_NORMALPRI);
+
+ if (nsize == 0) {
+ if (ptr != NULL) {
+ int64_t *allocbuf = (int64_t *)ptr - 1;
+ int64_t allocsize = *allocbuf;
+ ASSERT3S(allocsize, >, 0);
+ ASSERT3S(allocargs->aa_alloc_remaining + allocsize, <=,
+ allocargs->aa_alloc_limit);
+ allocargs->aa_alloc_remaining += allocsize;
+ vmem_free(allocbuf, allocsize);
+ }
+ return (NULL);
+ } else if (ptr == NULL) {
+ int64_t *allocbuf;
+ int64_t allocsize = nsize + sizeof (int64_t);
+
+ if (!allocargs->aa_must_succeed &&
+ (allocsize <= 0 ||
+ allocsize > allocargs->aa_alloc_remaining)) {
+ return (NULL);
+ }
+
+ allocbuf = vmem_alloc(allocsize, flags);
+ if (allocbuf == NULL) {
+ return (NULL);
+ }
+ allocargs->aa_alloc_remaining -= allocsize;
+
+ *allocbuf = allocsize;
+ return (allocbuf + 1);
+ } else if (nsize <= osize) {
+ /*
+ * If shrinking the buffer, lua requires that the reallocation
+ * never fail.
+ */
+ return (ptr);
+ } else {
+ ASSERT3U(nsize, >, osize);
+
+ uint64_t *luabuf = zcp_lua_alloc(ud, NULL, 0, nsize);
+ if (luabuf == NULL) {
+ return (NULL);
+ }
+ (void) memcpy(luabuf, ptr, osize);
+ VERIFY3P(zcp_lua_alloc(ud, ptr, osize, 0), ==, NULL);
+ return (luabuf);
+ }
+}
+
+/* ARGSUSED */
+static void
+zcp_lua_counthook(lua_State *state, lua_Debug *ar)
+{
+ /*
+ * If we're called, check how many instructions the channel program has
+ * executed so far, and compare against the limit.
+ */
+ lua_getfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY);
+ zcp_run_info_t *ri = lua_touserdata(state, -1);
+
+ ri->zri_curinstrs += zfs_lua_check_instrlimit_interval;
+ if (ri->zri_maxinstrs != 0 && ri->zri_curinstrs > ri->zri_maxinstrs) {
+ ri->zri_timed_out = B_TRUE;
+ (void) lua_pushstring(state,
+ "Channel program timed out.");
+ (void) lua_error(state);
+ }
+}
+
+static int
+zcp_panic_cb(lua_State *state)
+{
+ panic("unprotected error in call to Lua API (%s)\n",
+ lua_tostring(state, -1));
+ return (0);
+}
+
+static void
+zcp_eval_sync(void *arg, dmu_tx_t *tx)
+{
+ int err;
+ zcp_run_info_t ri;
+ zcp_eval_arg_t *evalargs = arg;
+ lua_State *state = evalargs->ea_state;
+
+ /*
+ * Open context should have setup the stack to contain:
+ * 1: Error handler callback
+ * 2: Script to run (converted to a Lua function)
+ * 3: nvlist input to function (converted to Lua table or nil)
+ */
+ VERIFY3U(3, ==, lua_gettop(state));
+
+ /*
+ * Store the zcp_run_info_t struct for this run in the Lua registry.
+ * Registry entries are not directly accessible by the Lua scripts but
+ * can be accessed by our callbacks.
+ */
+ ri.zri_space_used = 0;
+ ri.zri_pool = dmu_tx_pool(tx);
+ ri.zri_cred = evalargs->ea_cred;
+ ri.zri_tx = tx;
+ ri.zri_timed_out = B_FALSE;
+ ri.zri_cleanup = NULL;
+ ri.zri_cleanup_arg = NULL;
+ ri.zri_curinstrs = 0;
+ ri.zri_maxinstrs = evalargs->ea_instrlimit;
+
+ lua_pushlightuserdata(state, &ri);
+ lua_setfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY);
+ VERIFY3U(3, ==, lua_gettop(state));
+
+ /*
+ * Tell the Lua interpreter to call our handler every count
+ * instructions. Channel programs that execute too many instructions
+ * should die with ETIME.
+ */
+ (void) lua_sethook(state, zcp_lua_counthook, LUA_MASKCOUNT,
+ zfs_lua_check_instrlimit_interval);
+
+ /*
+ * Tell the Lua memory allocator to stop using KM_SLEEP before handing
+ * off control to the channel program. Channel programs that use too
+ * much memory should die with ENOSPC.
+ */
+ evalargs->ea_allocargs->aa_must_succeed = B_FALSE;
+
+ /*
+ * Call the Lua function that open-context passed us. This pops the
+ * function and its input from the stack and pushes any return
+ * or error values.
+ */
+ err = lua_pcall(state, 1, LUA_MULTRET, 1);
+
+ /*
+ * Let Lua use KM_SLEEP while we interpret the return values.
+ */
+ evalargs->ea_allocargs->aa_must_succeed = B_TRUE;
+
+ /*
+ * Remove the error handler callback from the stack. At this point,
+ * if there is a cleanup function registered, then it was registered
+ * but never run or removed, which should never occur.
+ */
+ ASSERT3P(ri.zri_cleanup, ==, NULL);
+ lua_remove(state, 1);
+
+ switch (err) {
+ case LUA_OK: {
+ /*
+ * Lua supports returning multiple values in a single return
+ * statement. Return values will have been pushed onto the
+ * stack:
+ * 1: Return value 1
+ * 2: Return value 2
+ * 3: etc...
+ * To simplify the process of retrieving a return value from a
+ * channel program, we disallow returning more than one value
+ * to ZFS from the Lua script, yielding a singleton return
+ * nvlist of the form { "return": Return value 1 }.
+ */
+ int return_count = lua_gettop(state);
+
+ if (return_count == 1) {
+ evalargs->ea_result = 0;
+ zcp_convert_return_values(state, evalargs->ea_outnvl,
+ ZCP_RET_RETURN, evalargs);
+ } else if (return_count > 1) {
+ evalargs->ea_result = SET_ERROR(ECHRNG);
+ (void) lua_pushfstring(state, "Multiple return "
+ "values not supported");
+ zcp_convert_return_values(state, evalargs->ea_outnvl,
+ ZCP_RET_ERROR, evalargs);
+ }
+ break;
+ }
+ case LUA_ERRRUN:
+ case LUA_ERRGCMM: {
+ /*
+ * The channel program encountered a fatal error within the
+ * script, such as failing an assertion, or calling a function
+ * with incompatible arguments. The error value and the
+ * traceback generated by zcp_error_handler() should be on the
+ * stack.
+ */
+ VERIFY3U(1, ==, lua_gettop(state));
+ if (ri.zri_timed_out) {
+ evalargs->ea_result = SET_ERROR(ETIME);
+ } else {
+ evalargs->ea_result = SET_ERROR(ECHRNG);
+ }
+
+ zcp_convert_return_values(state, evalargs->ea_outnvl,
+ ZCP_RET_ERROR, evalargs);
+
+ if (evalargs->ea_result == ETIME &&
+ evalargs->ea_outnvl != NULL) {
+ (void) nvlist_add_uint64(evalargs->ea_outnvl,
+ ZCP_ARG_INSTRLIMIT, ri.zri_curinstrs);
+ }
+ break;
+ }
+ case LUA_ERRERR: {
+ /*
+ * The channel program encountered a fatal error within the
+ * script, and we encountered another error while trying to
+ * compute the traceback in zcp_error_handler(). We can only
+ * return the error message.
+ */
+ VERIFY3U(1, ==, lua_gettop(state));
+ if (ri.zri_timed_out) {
+ evalargs->ea_result = SET_ERROR(ETIME);
+ } else {
+ evalargs->ea_result = SET_ERROR(ECHRNG);
+ }
+
+ zcp_convert_return_values(state, evalargs->ea_outnvl,
+ ZCP_RET_ERROR, evalargs);
+ break;
+ }
+ case LUA_ERRMEM:
+ /*
+ * Lua ran out of memory while running the channel program.
+ * There's not much we can do.
+ */
+ evalargs->ea_result = SET_ERROR(ENOSPC);
+ break;
+ default:
+ VERIFY0(err);
+ }
+}
+
+int
+zcp_eval(const char *poolname, const char *program, uint64_t instrlimit,
+ uint64_t memlimit, nvpair_t *nvarg, nvlist_t *outnvl)
+{
+ int err;
+ lua_State *state;
+ zcp_eval_arg_t evalargs;
+
+ if (instrlimit > zfs_lua_max_instrlimit)
+ return (SET_ERROR(EINVAL));
+ if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
+ return (SET_ERROR(EINVAL));
+
+ zcp_alloc_arg_t allocargs = {
+ .aa_must_succeed = B_TRUE,
+ .aa_alloc_remaining = (int64_t)memlimit,
+ .aa_alloc_limit = (int64_t)memlimit,
+ };
+
+ /*
+ * Creates a Lua state with a memory allocator that uses KM_SLEEP.
+ * This should never fail.
+ */
+ state = lua_newstate(zcp_lua_alloc, &allocargs);
+ VERIFY(state != NULL);
+ (void) lua_atpanic(state, zcp_panic_cb);
+
+ /*
+ * Load core Lua libraries we want access to.
+ */
+ VERIFY3U(1, ==, luaopen_base(state));
+ lua_pop(state, 1);
+ VERIFY3U(1, ==, luaopen_coroutine(state));
+ lua_setglobal(state, LUA_COLIBNAME);
+ VERIFY0(lua_gettop(state));
+ VERIFY3U(1, ==, luaopen_string(state));
+ lua_setglobal(state, LUA_STRLIBNAME);
+ VERIFY0(lua_gettop(state));
+ VERIFY3U(1, ==, luaopen_table(state));
+ lua_setglobal(state, LUA_TABLIBNAME);
+ VERIFY0(lua_gettop(state));
+
+ /*
+ * Load globally visible variables such as errno aliases.
+ */
+ zcp_load_globals(state);
+ VERIFY0(lua_gettop(state));
+
+ /*
+ * Load ZFS-specific modules.
+ */
+ lua_newtable(state);
+ VERIFY3U(1, ==, zcp_load_list_lib(state));
+ lua_setfield(state, -2, "list");
+ VERIFY3U(1, ==, zcp_load_synctask_lib(state, B_FALSE));
+ lua_setfield(state, -2, "check");
+ VERIFY3U(1, ==, zcp_load_synctask_lib(state, B_TRUE));
+ lua_setfield(state, -2, "sync");
+ VERIFY3U(1, ==, zcp_load_get_lib(state));
+ lua_pushcclosure(state, zcp_debug_info.func, 0);
+ lua_setfield(state, -2, zcp_debug_info.name);
+ lua_pushcclosure(state, zcp_exists_info.func, 0);
+ lua_setfield(state, -2, zcp_exists_info.name);
+ lua_setglobal(state, "zfs");
+ VERIFY0(lua_gettop(state));
+
+ /*
+ * Push the error-callback that calculates Lua stack traces on
+ * unexpected failures.
+ */
+ lua_pushcfunction(state, zcp_error_handler);
+ VERIFY3U(1, ==, lua_gettop(state));
+
+ /*
+ * Load the actual script as a function onto the stack as text ("t").
+ * The only valid error condition is a syntax error in the script.
+ * ERRMEM should not be possible because our allocator is using
+ * KM_SLEEP. ERRGCMM should not be possible because we have not added
+ * any objects with __gc metamethods to the interpreter that could
+ * fail.
+ */
+ err = luaL_loadbufferx(state, program, strlen(program),
+ "channel program", "t");
+ if (err == LUA_ERRSYNTAX) {
+ fnvlist_add_string(outnvl, ZCP_RET_ERROR,
+ lua_tostring(state, -1));
+ lua_close(state);
+ return (SET_ERROR(EINVAL));
+ }
+ VERIFY0(err);
+ VERIFY3U(2, ==, lua_gettop(state));
+
+ /*
+ * Convert the input nvlist to a Lua object and put it on top of the
+ * stack.
+ */
+ char errmsg[128];
+ err = zcp_nvpair_value_to_lua(state, nvarg,
+ errmsg, sizeof (errmsg));
+ if (err != 0) {
+ fnvlist_add_string(outnvl, ZCP_RET_ERROR, errmsg);
+ lua_close(state);
+ return (SET_ERROR(EINVAL));
+ }
+ VERIFY3U(3, ==, lua_gettop(state));
+
+ evalargs.ea_state = state;
+ evalargs.ea_allocargs = &allocargs;
+ evalargs.ea_instrlimit = instrlimit;
+ evalargs.ea_cred = CRED();
+ evalargs.ea_outnvl = outnvl;
+ evalargs.ea_result = 0;
+
+ VERIFY0(dsl_sync_task(poolname, zcp_eval_check,
+ zcp_eval_sync, &evalargs, 0, ZFS_SPACE_CHECK_NONE));
+
+ lua_close(state);
+
+ return (evalargs.ea_result);
+}
+
+/*
+ * Retrieve metadata about the currently running channel program.
+ */
+zcp_run_info_t *
+zcp_run_info(lua_State *state)
+{
+ zcp_run_info_t *ri;
+
+ lua_getfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY);
+ ri = lua_touserdata(state, -1);
+ lua_pop(state, 1);
+ return (ri);
+}
+
+/*
+ * Argument Parsing
+ * ================
+ *
+ * The Lua language allows methods to be called with any number
+ * of arguments of any type. When calling back into ZFS we need to sanitize
+ * arguments from channel programs to make sure unexpected arguments or
+ * arguments of the wrong type result in clear error messages. To do this
+ * in a uniform way all callbacks from channel programs should use the
+ * zcp_parse_args() function to interpret inputs.
+ *
+ * Positional vs Keyword Arguments
+ * ===============================
+ *
+ * Every callback function takes a fixed set of required positional arguments
+ * and optional keyword arguments. For example, the destroy function takes
+ * a single positional string argument (the name of the dataset to destroy)
+ * and an optional "defer" keyword boolean argument. When calling lua functions
+ * with parentheses, only positional arguments can be used:
+ *
+ * zfs.sync.snapshot("rpool@snap")
+ *
+ * To use keyword arguments functions should be called with a single argument
+ * that is a lua table containing mappings of integer -> positional arguments
+ * and string -> keyword arguments:
+ *
+ * zfs.sync.snapshot({1="rpool@snap", defer=true})
+ *
+ * The lua language allows curly braces to be used in place of parenthesis as
+ * syntactic sugar for this calling convention:
+ *
+ * zfs.sync.snapshot{"rpool@snap", defer=true}
+ */
+
+/*
+ * Throw an error and print the given arguments. If there are too many
+ * arguments to fit in the output buffer, only the error format string is
+ * output.
+ */
+static void
+zcp_args_error(lua_State *state, const char *fname, const zcp_arg_t *pargs,
+ const zcp_arg_t *kwargs, const char *fmt, ...)
+{
+ int i;
+ char errmsg[512];
+ size_t len = sizeof (errmsg);
+ size_t msglen = 0;
+ va_list argp;
+
+ va_start(argp, fmt);
+ VERIFY3U(len, >, vsnprintf(errmsg, len, fmt, argp));
+ va_end(argp);
+
+ /*
+ * Calculate the total length of the final string, including extra
+ * formatting characters. If the argument dump would be too large,
+ * only print the error string.
+ */
+ msglen = strlen(errmsg);
+ msglen += strlen(fname) + 4; /* : + {} + null terminator */
+ for (i = 0; pargs[i].za_name != NULL; i++) {
+ msglen += strlen(pargs[i].za_name);
+ msglen += strlen(lua_typename(state, pargs[i].za_lua_type));
+ if (pargs[i + 1].za_name != NULL || kwargs[0].za_name != NULL)
+ msglen += 5; /* < + ( + )> + , */
+ else
+ msglen += 4; /* < + ( + )> */
+ }
+ for (i = 0; kwargs[i].za_name != NULL; i++) {
+ msglen += strlen(kwargs[i].za_name);
+ msglen += strlen(lua_typename(state, kwargs[i].za_lua_type));
+ if (kwargs[i + 1].za_name != NULL)
+ msglen += 4; /* =( + ) + , */
+ else
+ msglen += 3; /* =( + ) */
+ }
+
+ if (msglen >= len)
+ (void) luaL_error(state, errmsg);
+
+ VERIFY3U(len, >, strlcat(errmsg, ": ", len));
+ VERIFY3U(len, >, strlcat(errmsg, fname, len));
+ VERIFY3U(len, >, strlcat(errmsg, "{", len));
+ for (i = 0; pargs[i].za_name != NULL; i++) {
+ VERIFY3U(len, >, strlcat(errmsg, "<", len));
+ VERIFY3U(len, >, strlcat(errmsg, pargs[i].za_name, len));
+ VERIFY3U(len, >, strlcat(errmsg, "(", len));
+ VERIFY3U(len, >, strlcat(errmsg,
+ lua_typename(state, pargs[i].za_lua_type), len));
+ VERIFY3U(len, >, strlcat(errmsg, ")>", len));
+ if (pargs[i + 1].za_name != NULL || kwargs[0].za_name != NULL) {
+ VERIFY3U(len, >, strlcat(errmsg, ", ", len));
+ }
+ }
+ for (i = 0; kwargs[i].za_name != NULL; i++) {
+ VERIFY3U(len, >, strlcat(errmsg, kwargs[i].za_name, len));
+ VERIFY3U(len, >, strlcat(errmsg, "=(", len));
+ VERIFY3U(len, >, strlcat(errmsg,
+ lua_typename(state, kwargs[i].za_lua_type), len));
+ VERIFY3U(len, >, strlcat(errmsg, ")", len));
+ if (kwargs[i + 1].za_name != NULL) {
+ VERIFY3U(len, >, strlcat(errmsg, ", ", len));
+ }
+ }
+ VERIFY3U(len, >, strlcat(errmsg, "}", len));
+
+ (void) luaL_error(state, errmsg);
+ panic("unreachable code");
+}
+
+static void
+zcp_parse_table_args(lua_State *state, const char *fname,
+ const zcp_arg_t *pargs, const zcp_arg_t *kwargs)
+{
+ int i;
+ int type;
+
+ for (i = 0; pargs[i].za_name != NULL; i++) {
+ /*
+ * Check the table for this positional argument, leaving it
+ * on the top of the stack once we finish validating it.
+ */
+ lua_pushinteger(state, i + 1);
+ lua_gettable(state, 1);
+
+ type = lua_type(state, -1);
+ if (type == LUA_TNIL) {
+ zcp_args_error(state, fname, pargs, kwargs,
+ "too few arguments");
+ panic("unreachable code");
+ } else if (type != pargs[i].za_lua_type) {
+ zcp_args_error(state, fname, pargs, kwargs,
+ "arg %d wrong type (is '%s', expected '%s')",
+ i + 1, lua_typename(state, type),
+ lua_typename(state, pargs[i].za_lua_type));
+ panic("unreachable code");
+ }
+
+ /*
+ * Remove the positional argument from the table.
+ */
+ lua_pushinteger(state, i + 1);
+ lua_pushnil(state);
+ lua_settable(state, 1);
+ }
+
+ for (i = 0; kwargs[i].za_name != NULL; i++) {
+ /*
+ * Check the table for this keyword argument, which may be
+ * nil if it was omitted. Leave the value on the top of
+ * the stack after validating it.
+ */
+ lua_getfield(state, 1, kwargs[i].za_name);
+
+ type = lua_type(state, -1);
+ if (type != LUA_TNIL && type != kwargs[i].za_lua_type) {
+ zcp_args_error(state, fname, pargs, kwargs,
+ "kwarg '%s' wrong type (is '%s', expected '%s')",
+ kwargs[i].za_name, lua_typename(state, type),
+ lua_typename(state, kwargs[i].za_lua_type));
+ panic("unreachable code");
+ }
+
+ /*
+ * Remove the keyword argument from the table.
+ */
+ lua_pushnil(state);
+ lua_setfield(state, 1, kwargs[i].za_name);
+ }
+
+ /*
+ * Any entries remaining in the table are invalid inputs, print
+ * an error message based on what the entry is.
+ */
+ lua_pushnil(state);
+ if (lua_next(state, 1)) {
+ if (lua_isnumber(state, -2) && lua_tointeger(state, -2) > 0) {
+ zcp_args_error(state, fname, pargs, kwargs,
+ "too many positional arguments");
+ } else if (lua_isstring(state, -2)) {
+ zcp_args_error(state, fname, pargs, kwargs,
+ "invalid kwarg '%s'", lua_tostring(state, -2));
+ } else {
+ zcp_args_error(state, fname, pargs, kwargs,
+ "kwarg keys must be strings");
+ }
+ panic("unreachable code");
+ }
+
+ lua_remove(state, 1);
+}
+
+static void
+zcp_parse_pos_args(lua_State *state, const char *fname, const zcp_arg_t *pargs,
+ const zcp_arg_t *kwargs)
+{
+ int i;
+ int type;
+
+ for (i = 0; pargs[i].za_name != NULL; i++) {
+ type = lua_type(state, i + 1);
+ if (type == LUA_TNONE) {
+ zcp_args_error(state, fname, pargs, kwargs,
+ "too few arguments");
+ panic("unreachable code");
+ } else if (type != pargs[i].za_lua_type) {
+ zcp_args_error(state, fname, pargs, kwargs,
+ "arg %d wrong type (is '%s', expected '%s')",
+ i + 1, lua_typename(state, type),
+ lua_typename(state, pargs[i].za_lua_type));
+ panic("unreachable code");
+ }
+ }
+ if (lua_gettop(state) != i) {
+ zcp_args_error(state, fname, pargs, kwargs,
+ "too many positional arguments");
+ panic("unreachable code");
+ }
+
+ for (i = 0; kwargs[i].za_name != NULL; i++) {
+ lua_pushnil(state);
+ }
+}
+
+/*
+ * Checks the current Lua stack against an expected set of positional and
+ * keyword arguments. If the stack does not match the expected arguments
+ * aborts the current channel program with a useful error message, otherwise
+ * it re-arranges the stack so that it contains the positional arguments
+ * followed by the keyword argument values in declaration order. Any missing
+ * keyword argument will be represented by a nil value on the stack.
+ *
+ * If the stack contains exactly one argument of type LUA_TTABLE the curly
+ * braces calling convention is assumed, otherwise the stack is parsed for
+ * positional arguments only.
+ *
+ * This function should be used by every function callback. It should be called
+ * before the callback manipulates the Lua stack as it assumes the stack
+ * represents the function arguments.
+ */
+void
+zcp_parse_args(lua_State *state, const char *fname, const zcp_arg_t *pargs,
+ const zcp_arg_t *kwargs)
+{
+ if (lua_gettop(state) == 1 && lua_istable(state, 1)) {
+ zcp_parse_table_args(state, fname, pargs, kwargs);
+ } else {
+ zcp_parse_pos_args(state, fname, pargs, kwargs);
+ }
+}
diff --git a/module/zfs/zcp_get.c b/module/zfs/zcp_get.c
new file mode 100644
index 000000000..7645bc158
--- /dev/null
+++ b/module/zfs/zcp_get.c
@@ -0,0 +1,876 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
+#include <sys/lua/lua.h>
+#include <sys/lua/lualib.h>
+#include <sys/lua/lauxlib.h>
+
+#include <zfs_prop.h>
+
+#include <sys/dsl_prop.h>
+#include <sys/dsl_synctask.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_dir.h>
+#include <sys/dmu_objset.h>
+#include <sys/mntent.h>
+#include <sys/sunddi.h>
+#include <sys/zap.h>
+#include <sys/zcp.h>
+#include <sys/zcp_iter.h>
+#include <sys/zcp_global.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_znode.h>
+#include <sys/zvol.h>
+
+#ifdef _KERNEL
+#include <sys/zfs_vfsops.h>
+#endif
+
+static int
+get_objset_type(dsl_dataset_t *ds, zfs_type_t *type)
+{
+ int error;
+ objset_t *os;
+ error = dmu_objset_from_ds(ds, &os);
+ if (error != 0)
+ return (error);
+ if (ds->ds_is_snapshot) {
+ *type = ZFS_TYPE_SNAPSHOT;
+ } else {
+ switch (os->os_phys->os_type) {
+ case DMU_OST_ZFS:
+ *type = ZFS_TYPE_FILESYSTEM;
+ break;
+ case DMU_OST_ZVOL:
+ *type = ZFS_TYPE_VOLUME;
+ break;
+ default:
+ return (EINVAL);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Returns the string name of ds's type in str (a buffer which should be
+ * at least 12 bytes long).
+ */
+static int
+get_objset_type_name(dsl_dataset_t *ds, char *str)
+{
+ int error;
+ zfs_type_t type;
+ error = get_objset_type(ds, &type);
+ if (error != 0)
+ return (error);
+ switch (type) {
+ case ZFS_TYPE_SNAPSHOT:
+ (void) strcpy(str, "snapshot");
+ break;
+ case ZFS_TYPE_FILESYSTEM:
+ (void) strcpy(str, "filesystem");
+ break;
+ case ZFS_TYPE_VOLUME:
+ (void) strcpy(str, "volume");
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/*
+ * Determines the source of a property given its setpoint and
+ * property type. It pushes the source to the lua stack.
+ */
+static void
+get_prop_src(lua_State *state, const char *setpoint, zfs_prop_t prop)
+{
+ if (zfs_prop_readonly(prop) || (prop == ZFS_PROP_VERSION)) {
+ lua_pushnil(state);
+ } else {
+ const char *src;
+ if (strcmp("", setpoint) == 0) {
+ src = "default";
+ } else {
+ src = setpoint;
+ }
+ (void) lua_pushstring(state, src);
+ }
+}
+
+/*
+ * Given an error encountered while getting properties, either longjmp's for
+ * a fatal error or pushes nothing to the stack for a non fatal one.
+ */
+static int
+zcp_handle_error(lua_State *state, const char *dataset_name,
+ const char *property_name, int error)
+{
+ ASSERT3S(error, !=, 0);
+ if (error == ENOENT) {
+ return (0);
+ } else if (error == EINVAL) {
+ return (luaL_error(state,
+ "property '%s' is not a valid property on dataset '%s'",
+ property_name, dataset_name));
+ } else if (error == EIO) {
+ return (luaL_error(state,
+ "I/O error while retrieving property '%s' on dataset '%s'",
+ property_name, dataset_name));
+ } else {
+ return (luaL_error(state, "unexpected error %d while "
+ "retrieving property '%s' on dataset '%s'",
+ error, property_name, dataset_name));
+ }
+}
+
+/*
+ * Look up a user defined property in the zap object. If it exists, push it
+ * and the setpoint onto the stack, otherwise don't push anything.
+ */
+static int
+zcp_get_user_prop(lua_State *state, dsl_pool_t *dp, const char *dataset_name,
+ const char *property_name)
+{
+ int error;
+ char *buf;
+ char setpoint[ZFS_MAX_DATASET_NAME_LEN];
+ /*
+ * zcp_dataset_hold will either successfully return the requested
+ * dataset or throw a lua error and longjmp out of the zfs.get_prop call
+ * without returning.
+ */
+ dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dataset_name, FTAG);
+ if (ds == NULL)
+ return (1); /* not reached; zcp_dataset_hold() longjmp'd */
+
+ buf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
+ error = dsl_prop_get_ds(ds, property_name, 1, ZAP_MAXVALUELEN,
+ buf, setpoint);
+ dsl_dataset_rele(ds, FTAG);
+
+ if (error != 0) {
+ kmem_free(buf, ZAP_MAXVALUELEN);
+ return (zcp_handle_error(state, dataset_name, property_name,
+ error));
+ }
+ (void) lua_pushstring(state, buf);
+ (void) lua_pushstring(state, setpoint);
+ kmem_free(buf, ZAP_MAXVALUELEN);
+ return (2);
+}
+
+/*
+ * Check if the property we're looking for is stored in the ds_dir. If so,
+ * return it in the 'val' argument. Return 0 on success and ENOENT and if
+ * the property is not present.
+ */
+static int
+get_dsl_dir_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop,
+ uint64_t *val)
+{
+ dsl_dir_t *dd = ds->ds_dir;
+ mutex_enter(&dd->dd_lock);
+ switch (zfs_prop) {
+ case ZFS_PROP_USEDSNAP:
+ *val = dsl_dir_get_usedsnap(dd);
+ break;
+ case ZFS_PROP_USEDCHILD:
+ *val = dsl_dir_get_usedchild(dd);
+ break;
+ case ZFS_PROP_USEDDS:
+ *val = dsl_dir_get_usedds(dd);
+ break;
+ case ZFS_PROP_USEDREFRESERV:
+ *val = dsl_dir_get_usedrefreserv(dd);
+ break;
+ case ZFS_PROP_LOGICALUSED:
+ *val = dsl_dir_get_logicalused(dd);
+ break;
+ default:
+ mutex_exit(&dd->dd_lock);
+ return (ENOENT);
+ }
+ mutex_exit(&dd->dd_lock);
+ return (0);
+}
+
+/*
+ * Takes a dataset, a property, a value and that value's setpoint as
+ * found in the ZAP. Checks if the property has been changed in the vfs.
+ * If so, val and setpoint will be overwritten with updated content.
+ * Otherwise, they are left unchanged.
+ */
+static int
+get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
+ char *setpoint)
+{
+#if !defined(_KERNEL)
+ return (0);
+#else
+ int error;
+ zfsvfs_t *zfvp;
+ vfs_t *vfsp;
+ objset_t *os;
+ uint64_t tmp = *val;
+
+ error = dmu_objset_from_ds(ds, &os);
+ if (error != 0)
+ return (error);
+
+ if (dmu_objset_type(os) != DMU_OST_ZFS)
+ return (EINVAL);
+
+ mutex_enter(&os->os_user_ptr_lock);
+ zfvp = dmu_objset_get_user(os);
+ mutex_exit(&os->os_user_ptr_lock);
+ if (zfvp == NULL)
+ return (ESRCH);
+
+ vfsp = zfvp->z_vfs;
+
+ switch (zfs_prop) {
+ case ZFS_PROP_ATIME:
+ if (vfsp->vfs_do_atime)
+ tmp = vfsp->vfs_atime;
+ break;
+ case ZFS_PROP_RELATIME:
+ if (vfsp->vfs_do_relatime)
+ tmp = vfsp->vfs_relatime;
+ break;
+ case ZFS_PROP_DEVICES:
+ if (vfsp->vfs_do_devices)
+ tmp = vfsp->vfs_devices;
+ break;
+ case ZFS_PROP_EXEC:
+ if (vfsp->vfs_do_exec)
+ tmp = vfsp->vfs_exec;
+ break;
+ case ZFS_PROP_SETUID:
+ if (vfsp->vfs_do_setuid)
+ tmp = vfsp->vfs_setuid;
+ break;
+ case ZFS_PROP_READONLY:
+ if (vfsp->vfs_do_readonly)
+ tmp = vfsp->vfs_readonly;
+ break;
+ case ZFS_PROP_XATTR:
+ if (vfsp->vfs_do_xattr)
+ tmp = vfsp->vfs_xattr;
+ break;
+ case ZFS_PROP_NBMAND:
+ if (vfsp->vfs_do_nbmand)
+ tmp = vfsp->vfs_nbmand;
+ break;
+ default:
+ return (ENOENT);
+ }
+
+ if (tmp != *val) {
+ (void) strcpy(setpoint, "temporary");
+ *val = tmp;
+ }
+ return (0);
+#endif
+}
+
+/*
+ * Check if the property we're looking for is stored at the dsl_dataset or
+ * dsl_dir level. If so, push the property value and source onto the lua stack
+ * and return 0. If it is not present or a failure occurs in lookup, return a
+ * non-zero error value.
+ */
+static int
+get_special_prop(lua_State *state, dsl_dataset_t *ds, const char *dsname,
+ zfs_prop_t zfs_prop)
+{
+ int error = 0;
+ objset_t *os;
+ uint64_t numval;
+ char *strval = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
+ char setpoint[ZFS_MAX_DATASET_NAME_LEN] =
+ "Internal error - setpoint not determined";
+ zfs_type_t ds_type;
+ zprop_type_t prop_type = zfs_prop_get_type(zfs_prop);
+ (void) get_objset_type(ds, &ds_type);
+
+ switch (zfs_prop) {
+ case ZFS_PROP_REFRATIO:
+ numval = dsl_get_refratio(ds);
+ break;
+ case ZFS_PROP_USED:
+ numval = dsl_get_used(ds);
+ break;
+ case ZFS_PROP_CLONES: {
+ nvlist_t *clones = fnvlist_alloc();
+ error = get_clones_stat_impl(ds, clones);
+ if (error == 0) {
+ /* push list to lua stack */
+ VERIFY0(zcp_nvlist_to_lua(state, clones, NULL, 0ULL));
+ /* source */
+ (void) lua_pushnil(state);
+ }
+ nvlist_free(clones);
+ kmem_free(strval, ZAP_MAXVALUELEN);
+ return (error);
+ }
+ case ZFS_PROP_COMPRESSRATIO:
+ numval = dsl_get_compressratio(ds);
+ break;
+ case ZFS_PROP_CREATION:
+ numval = dsl_get_creation(ds);
+ break;
+ case ZFS_PROP_REFERENCED:
+ numval = dsl_get_referenced(ds);
+ break;
+ case ZFS_PROP_AVAILABLE:
+ numval = dsl_get_available(ds);
+ break;
+ case ZFS_PROP_LOGICALREFERENCED:
+ numval = dsl_get_logicalreferenced(ds);
+ break;
+ case ZFS_PROP_CREATETXG:
+ numval = dsl_get_creationtxg(ds);
+ break;
+ case ZFS_PROP_GUID:
+ numval = dsl_get_guid(ds);
+ break;
+ case ZFS_PROP_UNIQUE:
+ numval = dsl_get_unique(ds);
+ break;
+ case ZFS_PROP_OBJSETID:
+ numval = dsl_get_objsetid(ds);
+ break;
+ case ZFS_PROP_ORIGIN:
+ dsl_dir_get_origin(ds->ds_dir, strval);
+ break;
+ case ZFS_PROP_USERACCOUNTING:
+ error = dmu_objset_from_ds(ds, &os);
+ if (error == 0)
+ numval = dmu_objset_userspace_present(os);
+ break;
+ case ZFS_PROP_WRITTEN:
+ error = dsl_get_written(ds, &numval);
+ break;
+ case ZFS_PROP_TYPE:
+ error = get_objset_type_name(ds, strval);
+ break;
+ case ZFS_PROP_PREV_SNAP:
+ error = dsl_get_prev_snap(ds, strval);
+ break;
+ case ZFS_PROP_NAME:
+ dsl_dataset_name(ds, strval);
+ break;
+ case ZFS_PROP_MOUNTPOINT:
+ error = dsl_get_mountpoint(ds, dsname, strval, setpoint);
+ break;
+ case ZFS_PROP_VERSION:
+ /* should be a snapshot or filesystem */
+ ASSERT(ds_type != ZFS_TYPE_VOLUME);
+ error = dmu_objset_from_ds(ds, &os);
+ /* look in the master node for the version */
+ if (error == 0) {
+ error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
+ sizeof (numval), 1, &numval);
+ }
+ break;
+ case ZFS_PROP_DEFER_DESTROY:
+ numval = dsl_get_defer_destroy(ds);
+ break;
+ case ZFS_PROP_USERREFS:
+ numval = dsl_get_userrefs(ds);
+ break;
+ case ZFS_PROP_FILESYSTEM_COUNT:
+ error = dsl_dir_get_filesystem_count(ds->ds_dir, &numval);
+ (void) strcpy(setpoint, "");
+ break;
+ case ZFS_PROP_SNAPSHOT_COUNT:
+ error = dsl_dir_get_snapshot_count(ds->ds_dir, &numval);
+ (void) strcpy(setpoint, "");
+ break;
+ case ZFS_PROP_NUMCLONES:
+ numval = dsl_get_numclones(ds);
+ break;
+ case ZFS_PROP_INCONSISTENT:
+ numval = dsl_get_inconsistent(ds);
+ break;
+ case ZFS_PROP_RECEIVE_RESUME_TOKEN: {
+ char *token = get_receive_resume_stats_impl(ds);
+
+ VERIFY3U(strlcpy(strval, token, ZAP_MAXVALUELEN),
+ <, ZAP_MAXVALUELEN);
+ if (strcmp(strval, "") == 0) {
+ char *childval = get_child_receive_stats(ds);
+
+ VERIFY3U(strlcpy(strval, childval, ZAP_MAXVALUELEN),
+ <, ZAP_MAXVALUELEN);
+ if (strcmp(strval, "") == 0)
+ error = ENOENT;
+
+ strfree(childval);
+ }
+ strfree(token);
+ break;
+ }
+ case ZFS_PROP_VOLSIZE:
+ ASSERT(ds_type == ZFS_TYPE_VOLUME ||
+ ds_type == ZFS_TYPE_SNAPSHOT);
+ error = dmu_objset_from_ds(ds, &os);
+ if (error == 0) {
+ error = zap_lookup(os, ZVOL_ZAP_OBJ, "size",
+ sizeof (numval), 1, &numval);
+ }
+ if (error == 0)
+ (void) strcpy(setpoint, dsname);
+
+ break;
+ case ZFS_PROP_VOLBLOCKSIZE: {
+ ASSERT(ds_type == ZFS_TYPE_VOLUME);
+ dmu_object_info_t doi;
+ error = dmu_objset_from_ds(ds, &os);
+ if (error == 0) {
+ error = dmu_object_info(os, ZVOL_OBJ, &doi);
+ if (error == 0)
+ numval = doi.doi_data_block_size;
+ }
+ break;
+ }
+
+ case ZFS_PROP_KEYSTATUS:
+ case ZFS_PROP_KEYFORMAT: {
+ /* provide defaults in case no crypto obj exists */
+ setpoint[0] = '\0';
+ if (zfs_prop == ZFS_PROP_KEYSTATUS)
+ numval = ZFS_KEYSTATUS_NONE;
+ else
+ numval = ZFS_KEYFORMAT_NONE;
+
+ nvlist_t *nvl, *propval;
+ nvl = fnvlist_alloc();
+ dsl_dataset_crypt_stats(ds, nvl);
+ if (nvlist_lookup_nvlist(nvl, zfs_prop_to_name(zfs_prop),
+ &propval) == 0) {
+ char *source;
+
+ (void) nvlist_lookup_uint64(propval, ZPROP_VALUE,
+ &numval);
+ if (nvlist_lookup_string(propval, ZPROP_SOURCE,
+ &source) == 0)
+ strlcpy(setpoint, source, sizeof (setpoint));
+ }
+ nvlist_free(nvl);
+ break;
+ }
+
+ default:
+ /* Did not match these props, check in the dsl_dir */
+ error = get_dsl_dir_prop(ds, zfs_prop, &numval);
+ }
+ if (error != 0) {
+ kmem_free(strval, ZAP_MAXVALUELEN);
+ return (error);
+ }
+
+ switch (prop_type) {
+ case PROP_TYPE_NUMBER: {
+ (void) lua_pushnumber(state, numval);
+ break;
+ }
+ case PROP_TYPE_STRING: {
+ (void) lua_pushstring(state, strval);
+ break;
+ }
+ case PROP_TYPE_INDEX: {
+ const char *propval;
+ error = zfs_prop_index_to_string(zfs_prop, numval, &propval);
+ if (error != 0) {
+ kmem_free(strval, ZAP_MAXVALUELEN);
+ return (error);
+ }
+ (void) lua_pushstring(state, propval);
+ break;
+ }
+ }
+ kmem_free(strval, ZAP_MAXVALUELEN);
+
+ /* Push the source to the stack */
+ get_prop_src(state, setpoint, zfs_prop);
+ return (0);
+}
+
+/*
+ * Look up a property and its source in the zap object. If the value is
+ * present and successfully retrieved, push the value and source on the
+ * lua stack and return 0. On failure, return a non-zero error value.
+ */
+static int
+get_zap_prop(lua_State *state, dsl_dataset_t *ds, zfs_prop_t zfs_prop)
+{
+ int error = 0;
+ char setpoint[ZFS_MAX_DATASET_NAME_LEN];
+ char *strval = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
+ uint64_t numval;
+ const char *prop_name = zfs_prop_to_name(zfs_prop);
+ zprop_type_t prop_type = zfs_prop_get_type(zfs_prop);
+
+ if (prop_type == PROP_TYPE_STRING) {
+ /* Push value to lua stack */
+ error = dsl_prop_get_ds(ds, prop_name, 1,
+ ZAP_MAXVALUELEN, strval, setpoint);
+ if (error == 0)
+ (void) lua_pushstring(state, strval);
+ } else {
+ error = dsl_prop_get_ds(ds, prop_name, sizeof (numval),
+ 1, &numval, setpoint);
+
+ /* Fill in temorary value for prop, if applicable */
+ (void) get_temporary_prop(ds, zfs_prop, &numval, setpoint);
+
+ /* Push value to lua stack */
+ if (prop_type == PROP_TYPE_INDEX) {
+ const char *propval;
+ error = zfs_prop_index_to_string(zfs_prop, numval,
+ &propval);
+ if (error == 0)
+ (void) lua_pushstring(state, propval);
+ } else {
+ if (error == 0)
+ (void) lua_pushnumber(state, numval);
+ }
+ }
+ kmem_free(strval, ZAP_MAXVALUELEN);
+ if (error == 0)
+ get_prop_src(state, setpoint, zfs_prop);
+ return (error);
+}
+
+/*
+ * Determine whether property is valid for a given dataset
+ */
+boolean_t
+prop_valid_for_ds(dsl_dataset_t *ds, zfs_prop_t zfs_prop)
+{
+ int error;
+ zfs_type_t zfs_type;
+
+ /* properties not supported */
+ if ((zfs_prop == ZFS_PROP_ISCSIOPTIONS) ||
+ (zfs_prop == ZFS_PROP_MOUNTED))
+ return (B_FALSE);
+
+ /* if we want the origin prop, ds must be a clone */
+ if ((zfs_prop == ZFS_PROP_ORIGIN) && (!dsl_dir_is_clone(ds->ds_dir)))
+ return (B_FALSE);
+
+ error = get_objset_type(ds, &zfs_type);
+ if (error != 0)
+ return (B_FALSE);
+ return (zfs_prop_valid_for_type(zfs_prop, zfs_type, B_FALSE));
+}
+
+/*
+ * Look up a given dataset property. On success return 2, the number of
+ * values pushed to the lua stack (property value and source). On a fatal
+ * error, longjmp. On a non fatal error push nothing.
+ */
+static int
+zcp_get_system_prop(lua_State *state, dsl_pool_t *dp, const char *dataset_name,
+ zfs_prop_t zfs_prop)
+{
+ int error;
+ /*
+ * zcp_dataset_hold will either successfully return the requested
+ * dataset or throw a lua error and longjmp out of the zfs.get_prop call
+ * without returning.
+ */
+ dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dataset_name, FTAG);
+ if (ds == NULL)
+ return (1); /* not reached; zcp_dataset_hold() longjmp'd */
+
+ /* Check that the property is valid for the given dataset */
+ const char *prop_name = zfs_prop_to_name(zfs_prop);
+ if (!prop_valid_for_ds(ds, zfs_prop)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+ }
+
+ /* Check if the property can be accessed directly */
+ error = get_special_prop(state, ds, dataset_name, zfs_prop);
+ if (error == 0) {
+ dsl_dataset_rele(ds, FTAG);
+ /* The value and source have been pushed by get_special_prop */
+ return (2);
+ }
+ if (error != ENOENT) {
+ dsl_dataset_rele(ds, FTAG);
+ return (zcp_handle_error(state, dataset_name,
+ prop_name, error));
+ }
+
+ /* If we were unable to find it, look in the zap object */
+ error = get_zap_prop(state, ds, zfs_prop);
+ dsl_dataset_rele(ds, FTAG);
+ if (error != 0) {
+ return (zcp_handle_error(state, dataset_name,
+ prop_name, error));
+ }
+ /* The value and source have been pushed by get_zap_prop */
+ return (2);
+}
+
+#ifdef _KERNEL
+static zfs_userquota_prop_t
+get_userquota_prop(const char *prop_name)
+{
+ zfs_userquota_prop_t type;
+ /* Figure out the property type ({user|group}{quota|used}) */
+ for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++) {
+ if (strncmp(prop_name, zfs_userquota_prop_prefixes[type],
+ strlen(zfs_userquota_prop_prefixes[type])) == 0)
+ break;
+ }
+ return (type);
+}
+
+/*
+ * Given the name of a zfs_userquota_prop, this function determines the
+ * prop type as well as the numeric group/user ids based on the string
+ * following the '@' in the property name. On success, returns 0. On failure,
+ * returns a non-zero error.
+ * 'domain' must be free'd by caller using strfree()
+ */
+static int
+parse_userquota_prop(const char *prop_name, zfs_userquota_prop_t *type,
+ char **domain, uint64_t *rid)
+{
+ char *cp, *end, *domain_val;
+
+ *type = get_userquota_prop(prop_name);
+ if (*type >= ZFS_NUM_USERQUOTA_PROPS)
+ return (EINVAL);
+
+ *rid = 0;
+ cp = strchr(prop_name, '@') + 1;
+ if (strncmp(cp, "S-1-", 4) == 0) {
+ /*
+ * It's a numeric SID (eg "S-1-234-567-89") and we want to
+ * seperate the domain id and the rid
+ */
+ int domain_len = strrchr(cp, '-') - cp;
+ domain_val = kmem_alloc(domain_len + 1, KM_SLEEP);
+ (void) strncpy(domain_val, cp, domain_len);
+ domain_val[domain_len] = '\0';
+ cp += domain_len + 1;
+
+ (void) ddi_strtoll(cp, &end, 10, (longlong_t *)rid);
+ if (*end != '\0') {
+ strfree(domain_val);
+ return (EINVAL);
+ }
+ } else {
+ /* It's only a user/group ID (eg "12345"), just get the rid */
+ domain_val = NULL;
+ (void) ddi_strtoll(cp, &end, 10, (longlong_t *)rid);
+ if (*end != '\0')
+ return (EINVAL);
+ }
+ *domain = domain_val;
+ return (0);
+}
+
+/*
+ * Look up {user|group}{quota|used} property for given dataset. On success
+ * push the value (quota or used amount) and the setpoint. On failure, push
+ * a lua error.
+ */
+static int
+zcp_get_userquota_prop(lua_State *state, dsl_pool_t *dp,
+ const char *dataset_name, const char *prop_name)
+{
+ zfsvfs_t *zfvp;
+ zfsvfs_t *zfsvfs;
+ int error;
+ zfs_userquota_prop_t type;
+ char *domain;
+ uint64_t rid, value = 0;
+ objset_t *os;
+
+ dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dataset_name, FTAG);
+ if (ds == NULL)
+ return (1); /* not reached; zcp_dataset_hold() longjmp'd */
+
+ error = parse_userquota_prop(prop_name, &type, &domain, &rid);
+ if (error == 0) {
+ error = dmu_objset_from_ds(ds, &os);
+ if (error == 0) {
+ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
+ error = zfsvfs_create_impl(&zfvp, zfsvfs, os);
+ if (error == 0) {
+ error = zfs_userspace_one(zfvp, type, domain,
+ rid, &value);
+ zfsvfs_free(zfvp);
+ }
+ }
+ if (domain != NULL)
+ strfree(domain);
+ }
+ dsl_dataset_rele(ds, FTAG);
+
+ if ((value == 0) && ((type == ZFS_PROP_USERQUOTA) ||
+ (type == ZFS_PROP_GROUPQUOTA)))
+ error = ENOENT;
+ if (error != 0) {
+ return (zcp_handle_error(state, dataset_name,
+ prop_name, error));
+ }
+
+ (void) lua_pushnumber(state, value);
+ (void) lua_pushstring(state, dataset_name);
+ return (2);
+}
+#endif
+
+/*
+ * Determines the name of the snapshot referenced in the written property
+ * name. Returns snapshot name in snap_name, a buffer that must be at least
+ * as large as ZFS_MAX_DATASET_NAME_LEN
+ */
+static void
+parse_written_prop(const char *dataset_name, const char *prop_name,
+ char *snap_name)
+{
+ ASSERT(zfs_prop_written(prop_name));
+ const char *name = prop_name + ZFS_WRITTEN_PROP_PREFIX_LEN;
+ if (strchr(name, '@') == NULL) {
+ (void) sprintf(snap_name, "%s@%s", dataset_name, name);
+ } else {
+ (void) strcpy(snap_name, name);
+ }
+}
+
+/*
+ * Look up written@ property for given dataset. On success
+ * push the value and the setpoint. If error is fatal, we will
+ * longjmp, otherwise push nothing.
+ */
+static int
+zcp_get_written_prop(lua_State *state, dsl_pool_t *dp,
+ const char *dataset_name, const char *prop_name)
+{
+ char snap_name[ZFS_MAX_DATASET_NAME_LEN];
+ uint64_t used, comp, uncomp;
+ dsl_dataset_t *old;
+ int error = 0;
+
+ parse_written_prop(dataset_name, prop_name, snap_name);
+ dsl_dataset_t *new = zcp_dataset_hold(state, dp, dataset_name, FTAG);
+ if (new == NULL)
+ return (1); /* not reached; zcp_dataset_hold() longjmp'd */
+
+ error = dsl_dataset_hold(dp, snap_name, FTAG, &old);
+ if (error != 0) {
+ dsl_dataset_rele(new, FTAG);
+ return (zcp_dataset_hold_error(state, dp, snap_name,
+ error));
+ }
+ error = dsl_dataset_space_written(old, new,
+ &used, &comp, &uncomp);
+
+ dsl_dataset_rele(old, FTAG);
+ dsl_dataset_rele(new, FTAG);
+
+ if (error != 0) {
+ return (zcp_handle_error(state, dataset_name,
+ snap_name, error));
+ }
+ (void) lua_pushnumber(state, used);
+ (void) lua_pushstring(state, dataset_name);
+ return (2);
+}
+
+static int zcp_get_prop(lua_State *state);
+static zcp_lib_info_t zcp_get_prop_info = {
+ .name = "get_prop",
+ .func = zcp_get_prop,
+ .pargs = {
+ { .za_name = "dataset", .za_lua_type = LUA_TSTRING},
+ { .za_name = "property", .za_lua_type = LUA_TSTRING},
+ {NULL, 0}
+ },
+ .kwargs = {
+ {NULL, 0}
+ }
+};
+
+static int
+zcp_get_prop(lua_State *state)
+{
+ const char *dataset_name;
+ const char *property_name;
+ dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
+ zcp_lib_info_t *libinfo = &zcp_get_prop_info;
+
+ zcp_parse_args(state, libinfo->name, libinfo->pargs, libinfo->kwargs);
+
+ dataset_name = lua_tostring(state, 1);
+ property_name = lua_tostring(state, 2);
+
+ /* User defined property */
+ if (zfs_prop_user(property_name)) {
+ return (zcp_get_user_prop(state, dp,
+ dataset_name, property_name));
+ }
+ /* userspace property */
+ if (zfs_prop_userquota(property_name)) {
+#ifdef _KERNEL
+ return (zcp_get_userquota_prop(state, dp,
+ dataset_name, property_name));
+#else
+ return (luaL_error(state,
+ "user quota properties only supported in kernel mode",
+ property_name));
+#endif
+ }
+ /* written@ property */
+ if (zfs_prop_written(property_name)) {
+ return (zcp_get_written_prop(state, dp,
+ dataset_name, property_name));
+ }
+
+ zfs_prop_t zfs_prop = zfs_name_to_prop(property_name);
+ /* Valid system property */
+ if (zfs_prop != ZPROP_INVAL) {
+ return (zcp_get_system_prop(state, dp, dataset_name,
+ zfs_prop));
+ }
+
+ /* Invalid property name */
+ return (luaL_error(state,
+ "'%s' is not a valid property", property_name));
+}
+
+int
+zcp_load_get_lib(lua_State *state)
+{
+ lua_pushcclosure(state, zcp_get_prop_info.func, 0);
+ lua_setfield(state, -2, zcp_get_prop_info.name);
+
+ return (1);
+}
diff --git a/module/zfs/zcp_global.c b/module/zfs/zcp_global.c
new file mode 100644
index 000000000..b6c3c3a4f
--- /dev/null
+++ b/module/zfs/zcp_global.c
@@ -0,0 +1,84 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
+#include <sys/zcp_global.h>
+
+#include <sys/lua/lua.h>
+#include <sys/lua/lauxlib.h>
+
+typedef struct zcp_errno_global {
+ const char *zeg_name;
+ int zeg_errno;
+} zcp_errno_global_t;
+
+static const zcp_errno_global_t errno_globals[] = {
+ {"EPERM", EPERM},
+ {"ENOENT", ENOENT},
+ {"ESRCH", ESRCH},
+ {"EINTR", EINTR},
+ {"EIO", EIO},
+ {"ENXIO", ENXIO},
+ {"E2BIG", E2BIG},
+ {"ENOEXEC", ENOEXEC},
+ {"EBADF", EBADF},
+ {"ECHILD", ECHILD},
+ {"EAGAIN", EAGAIN},
+ {"ENOMEM", ENOMEM},
+ {"EACCES", EACCES},
+ {"EFAULT", EFAULT},
+ {"ENOTBLK", ENOTBLK},
+ {"EBUSY", EBUSY},
+ {"EEXIST", EEXIST},
+ {"EXDEV", EXDEV},
+ {"ENODEV", ENODEV},
+ {"ENOTDIR", ENOTDIR},
+ {"EISDIR", EISDIR},
+ {"EINVAL", EINVAL},
+ {"ENFILE", ENFILE},
+ {"EMFILE", EMFILE},
+ {"ENOTTY", ENOTTY},
+ {"ETXTBSY", ETXTBSY},
+ {"EFBIG", EFBIG},
+ {"ENOSPC", ENOSPC},
+ {"ESPIPE", ESPIPE},
+ {"EROFS", EROFS},
+ {"EMLINK", EMLINK},
+ {"EPIPE", EPIPE},
+ {"EDOM", EDOM},
+ {"ERANGE", ERANGE},
+ {"EDQUOT", EDQUOT},
+ {0, 0}
+};
+
+static void
+zcp_load_errno_globals(lua_State *state)
+{
+ const zcp_errno_global_t *global = errno_globals;
+ while (global->zeg_name != NULL) {
+ lua_pushnumber(state, (lua_Number)global->zeg_errno);
+ lua_setglobal(state, global->zeg_name);
+ global++;
+ }
+}
+
+void
+zcp_load_globals(lua_State *state)
+{
+ zcp_load_errno_globals(state);
+}
diff --git a/module/zfs/zcp_iter.c b/module/zfs/zcp_iter.c
new file mode 100644
index 000000000..d37172c88
--- /dev/null
+++ b/module/zfs/zcp_iter.c
@@ -0,0 +1,531 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
+#include <sys/lua/lua.h>
+#include <sys/lua/lauxlib.h>
+
+#include <sys/dmu.h>
+#include <sys/dsl_prop.h>
+#include <sys/dsl_synctask.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_pool.h>
+#include <sys/dmu_tx.h>
+#include <sys/dmu_objset.h>
+#include <sys/zap.h>
+#include <sys/dsl_dir.h>
+#include <sys/zcp_prop.h>
+
+#include <sys/zcp.h>
+
+typedef int (zcp_list_func_t)(lua_State *);
+typedef struct zcp_list_info {
+ const char *name;
+ zcp_list_func_t *func;
+ zcp_list_func_t *gc;
+ const zcp_arg_t pargs[4];
+ const zcp_arg_t kwargs[2];
+} zcp_list_info_t;
+
+static int
+zcp_clones_iter(lua_State *state)
+{
+ int err;
+ char clonename[ZFS_MAX_DATASET_NAME_LEN];
+ uint64_t dsobj = lua_tonumber(state, lua_upvalueindex(1));
+ uint64_t cursor = lua_tonumber(state, lua_upvalueindex(2));
+ dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
+ dsl_dataset_t *ds, *clone;
+ zap_attribute_t za;
+ zap_cursor_t zc;
+
+ err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+ if (err == ENOENT) {
+ return (0);
+ } else if (err != 0) {
+ return (luaL_error(state,
+ "unexpected error %d from dsl_dataset_hold_obj(dsobj)",
+ err));
+ }
+
+ if (dsl_dataset_phys(ds)->ds_next_clones_obj == 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+ }
+
+ zap_cursor_init_serialized(&zc, dp->dp_meta_objset,
+ dsl_dataset_phys(ds)->ds_next_clones_obj, cursor);
+ dsl_dataset_rele(ds, FTAG);
+
+ err = zap_cursor_retrieve(&zc, &za);
+ if (err != 0) {
+ zap_cursor_fini(&zc);
+ if (err != ENOENT) {
+ return (luaL_error(state,
+ "unexpected error %d from zap_cursor_retrieve()",
+ err));
+ }
+ return (0);
+ }
+ zap_cursor_advance(&zc);
+ cursor = zap_cursor_serialize(&zc);
+ zap_cursor_fini(&zc);
+
+ err = dsl_dataset_hold_obj(dp, za.za_first_integer, FTAG, &clone);
+ if (err != 0) {
+ return (luaL_error(state,
+ "unexpected error %d from "
+ "dsl_dataset_hold_obj(za_first_integer)", err));
+ }
+
+ dsl_dir_name(clone->ds_dir, clonename);
+ dsl_dataset_rele(clone, FTAG);
+
+ lua_pushnumber(state, cursor);
+ lua_replace(state, lua_upvalueindex(2));
+
+ (void) lua_pushstring(state, clonename);
+ return (1);
+}
+
+static int zcp_clones_list(lua_State *);
+static zcp_list_info_t zcp_clones_list_info = {
+ .name = "clones",
+ .func = zcp_clones_list,
+ .gc = NULL,
+ .pargs = {
+ { .za_name = "snapshot", .za_lua_type = LUA_TSTRING},
+ {NULL, 0}
+ },
+ .kwargs = {
+ {NULL, 0}
+ }
+};
+
+static int
+zcp_clones_list(lua_State *state)
+{
+ const char *snapname = lua_tostring(state, 1);
+ dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
+ boolean_t issnap;
+ uint64_t dsobj, cursor;
+
+ /*
+ * zcp_dataset_hold will either successfully return the requested
+ * dataset or throw a lua error and longjmp out of the zfs.list.clones
+ * call without returning.
+ */
+ dsl_dataset_t *ds = zcp_dataset_hold(state, dp, snapname, FTAG);
+ if (ds == NULL)
+ return (1); /* not reached; zcp_dataset_hold() longjmp'd */
+ cursor = 0;
+ issnap = ds->ds_is_snapshot;
+ dsobj = ds->ds_object;
+ dsl_dataset_rele(ds, FTAG);
+
+ if (!issnap) {
+ return (zcp_argerror(state, 1, "%s is not a snapshot",
+ snapname));
+ }
+
+ lua_pushnumber(state, dsobj);
+ lua_pushnumber(state, cursor);
+ lua_pushcclosure(state, &zcp_clones_iter, 2);
+ return (1);
+}
+
+static int
+zcp_snapshots_iter(lua_State *state)
+{
+ int err;
+ char snapname[ZFS_MAX_DATASET_NAME_LEN];
+ uint64_t dsobj = lua_tonumber(state, lua_upvalueindex(1));
+ uint64_t cursor = lua_tonumber(state, lua_upvalueindex(2));
+ dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
+ dsl_dataset_t *ds;
+ objset_t *os;
+ char *p;
+
+ err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+ if (err != 0) {
+ return (luaL_error(state,
+ "unexpected error %d from dsl_dataset_hold_obj(dsobj)",
+ err));
+ }
+
+ dsl_dataset_name(ds, snapname);
+ VERIFY3U(sizeof (snapname), >,
+ strlcat(snapname, "@", sizeof (snapname)));
+
+ p = strchr(snapname, '\0');
+ VERIFY0(dmu_objset_from_ds(ds, &os));
+ err = dmu_snapshot_list_next(os,
+ sizeof (snapname) - (p - snapname), p, NULL, &cursor, NULL);
+ dsl_dataset_rele(ds, FTAG);
+
+ if (err == ENOENT) {
+ return (0);
+ } else if (err != 0) {
+ return (luaL_error(state,
+ "unexpected error %d from dmu_snapshot_list_next()", err));
+ }
+
+ lua_pushnumber(state, cursor);
+ lua_replace(state, lua_upvalueindex(2));
+
+ (void) lua_pushstring(state, snapname);
+ return (1);
+}
+
+static int zcp_snapshots_list(lua_State *);
+static zcp_list_info_t zcp_snapshots_list_info = {
+ .name = "snapshots",
+ .func = zcp_snapshots_list,
+ .gc = NULL,
+ .pargs = {
+ { .za_name = "filesystem | volume", .za_lua_type = LUA_TSTRING},
+ {NULL, 0}
+ },
+ .kwargs = {
+ {NULL, 0}
+ }
+};
+
+static int
+zcp_snapshots_list(lua_State *state)
+{
+ const char *fsname = lua_tostring(state, 1);
+ dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
+ boolean_t issnap;
+ uint64_t dsobj;
+
+ dsl_dataset_t *ds = zcp_dataset_hold(state, dp, fsname, FTAG);
+ if (ds == NULL)
+ return (1); /* not reached; zcp_dataset_hold() longjmp'd */
+ issnap = ds->ds_is_snapshot;
+ dsobj = ds->ds_object;
+ dsl_dataset_rele(ds, FTAG);
+
+ if (issnap) {
+ return (zcp_argerror(state, 1,
+ "argument %s cannot be a snapshot", fsname));
+ }
+
+ lua_pushnumber(state, dsobj);
+ lua_pushnumber(state, 0);
+ lua_pushcclosure(state, &zcp_snapshots_iter, 2);
+ return (1);
+}
+
+/*
+ * Note: channel programs only run in the global zone, so all datasets
+ * are visible to this zone.
+ */
+static boolean_t
+dataset_name_hidden(const char *name)
+{
+ if (strchr(name, '$') != NULL)
+ return (B_TRUE);
+ if (strchr(name, '%') != NULL)
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
+static int
+zcp_children_iter(lua_State *state)
+{
+ int err;
+ char childname[ZFS_MAX_DATASET_NAME_LEN];
+ uint64_t dsobj = lua_tonumber(state, lua_upvalueindex(1));
+ uint64_t cursor = lua_tonumber(state, lua_upvalueindex(2));
+ zcp_run_info_t *ri = zcp_run_info(state);
+ dsl_pool_t *dp = ri->zri_pool;
+ dsl_dataset_t *ds;
+ objset_t *os;
+ char *p;
+
+ err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+ if (err != 0) {
+ return (luaL_error(state,
+ "unexpected error %d from dsl_dataset_hold_obj(dsobj)",
+ err));
+ }
+
+ dsl_dataset_name(ds, childname);
+ VERIFY3U(sizeof (childname), >,
+ strlcat(childname, "/", sizeof (childname)));
+ p = strchr(childname, '\0');
+
+ VERIFY0(dmu_objset_from_ds(ds, &os));
+ do {
+ err = dmu_dir_list_next(os,
+ sizeof (childname) - (p - childname), p, NULL, &cursor);
+ } while (err == 0 && dataset_name_hidden(childname));
+ dsl_dataset_rele(ds, FTAG);
+
+ if (err == ENOENT) {
+ return (0);
+ } else if (err != 0) {
+ return (luaL_error(state,
+ "unexpected error %d from dmu_dir_list_next()",
+ err));
+ }
+
+ lua_pushnumber(state, cursor);
+ lua_replace(state, lua_upvalueindex(2));
+
+ (void) lua_pushstring(state, childname);
+ return (1);
+}
+
+static int zcp_children_list(lua_State *);
+static zcp_list_info_t zcp_children_list_info = {
+ .name = "children",
+ .func = zcp_children_list,
+ .gc = NULL,
+ .pargs = {
+ { .za_name = "filesystem | volume", .za_lua_type = LUA_TSTRING},
+ {NULL, 0}
+ },
+ .kwargs = {
+ {NULL, 0}
+ }
+};
+
+static int
+zcp_children_list(lua_State *state)
+{
+ const char *fsname = lua_tostring(state, 1);
+ dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
+ boolean_t issnap;
+ uint64_t dsobj;
+
+ dsl_dataset_t *ds = zcp_dataset_hold(state, dp, fsname, FTAG);
+ if (ds == NULL)
+ return (1); /* not reached; zcp_dataset_hold() longjmp'd */
+
+ issnap = ds->ds_is_snapshot;
+ dsobj = ds->ds_object;
+ dsl_dataset_rele(ds, FTAG);
+
+ if (issnap) {
+ return (zcp_argerror(state, 1,
+ "argument %s cannot be a snapshot", fsname));
+ }
+
+ lua_pushnumber(state, dsobj);
+ lua_pushnumber(state, 0);
+ lua_pushcclosure(state, &zcp_children_iter, 2);
+ return (1);
+}
+
+static int
+zcp_props_list_gc(lua_State *state)
+{
+ nvlist_t **props = lua_touserdata(state, 1);
+ if (*props != NULL)
+ fnvlist_free(*props);
+ return (0);
+}
+
+static int
+zcp_props_iter(lua_State *state)
+{
+ char *source, *val;
+ nvlist_t *nvprop;
+ nvlist_t **props = lua_touserdata(state, lua_upvalueindex(1));
+ nvpair_t *pair = lua_touserdata(state, lua_upvalueindex(2));
+
+ do {
+ pair = nvlist_next_nvpair(*props, pair);
+ if (pair == NULL) {
+ fnvlist_free(*props);
+ *props = NULL;
+ return (0);
+ }
+ } while (!zfs_prop_user(nvpair_name(pair)));
+
+ lua_pushlightuserdata(state, pair);
+ lua_replace(state, lua_upvalueindex(2));
+
+ nvprop = fnvpair_value_nvlist(pair);
+ val = fnvlist_lookup_string(nvprop, ZPROP_VALUE);
+ source = fnvlist_lookup_string(nvprop, ZPROP_SOURCE);
+
+ (void) lua_pushstring(state, nvpair_name(pair));
+ (void) lua_pushstring(state, val);
+ (void) lua_pushstring(state, source);
+ return (3);
+}
+
+static int zcp_props_list(lua_State *);
+static zcp_list_info_t zcp_props_list_info = {
+ .name = "properties",
+ .func = zcp_props_list,
+ .gc = zcp_props_list_gc,
+ .pargs = {
+ { .za_name = "filesystem | snapshot | volume",
+ .za_lua_type = LUA_TSTRING},
+ {NULL, 0}
+ },
+ .kwargs = {
+ {NULL, 0}
+ }
+};
+
+static int
+zcp_props_list(lua_State *state)
+{
+ const char *dsname = lua_tostring(state, 1);
+ dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
+ objset_t *os;
+ nvlist_t **props = lua_newuserdata(state, sizeof (nvlist_t *));
+
+ dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dsname, FTAG);
+ if (ds == NULL)
+ return (1); /* not reached; zcp_dataset_hold() longjmp'd */
+ VERIFY0(dmu_objset_from_ds(ds, &os));
+ VERIFY0(dsl_prop_get_all(os, props));
+ dsl_dataset_rele(ds, FTAG);
+
+ /*
+ * Set the metatable for the properties list to free it on completion.
+ */
+ luaL_getmetatable(state, zcp_props_list_info.name);
+ (void) lua_setmetatable(state, -2);
+
+ lua_pushlightuserdata(state, NULL);
+ lua_pushcclosure(state, &zcp_props_iter, 2);
+ return (1);
+}
+
+
+/*
+ * Populate nv with all valid properties and their values for the given
+ * dataset.
+ */
+static void
+zcp_dataset_props(dsl_dataset_t *ds, nvlist_t *nv)
+{
+ for (int prop = ZFS_PROP_TYPE; prop < ZFS_NUM_PROPS; prop++) {
+ /* Do not display hidden props */
+ if (!zfs_prop_visible(prop))
+ continue;
+ /* Do not display props not valid for this dataset */
+ if (!prop_valid_for_ds(ds, prop))
+ continue;
+ fnvlist_add_boolean(nv, zfs_prop_to_name(prop));
+ }
+}
+
+static int zcp_system_props_list(lua_State *);
+static zcp_list_info_t zcp_system_props_list_info = {
+ .name = "system_properties",
+ .func = zcp_system_props_list,
+ .pargs = {
+ { .za_name = "dataset", .za_lua_type = LUA_TSTRING},
+ {NULL, 0}
+ },
+ .kwargs = {
+ {NULL, 0}
+ }
+};
+
+/*
+ * Get a list of all visble properties and their values for a given dataset.
+ * Returned on the stack as a Lua table.
+ */
+static int
+zcp_system_props_list(lua_State *state)
+{
+ int error;
+ char errbuf[128];
+ const char *dataset_name;
+ dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
+ zcp_list_info_t *libinfo = &zcp_system_props_list_info;
+ zcp_parse_args(state, libinfo->name, libinfo->pargs, libinfo->kwargs);
+ dataset_name = lua_tostring(state, 1);
+ nvlist_t *nv = fnvlist_alloc();
+
+ dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dataset_name, FTAG);
+ if (ds == NULL)
+ return (1); /* not reached; zcp_dataset_hold() longjmp'd */
+
+ /* Get the names of all valid properties for this dataset */
+ zcp_dataset_props(ds, nv);
+ dsl_dataset_rele(ds, FTAG);
+
+ /* push list as lua table */
+ error = zcp_nvlist_to_lua(state, nv, errbuf, sizeof (errbuf));
+ nvlist_free(nv);
+ if (error != 0) {
+ return (luaL_error(state,
+ "Error returning nvlist: %s", errbuf));
+ }
+ return (1);
+}
+
+static int
+zcp_list_func(lua_State *state)
+{
+ zcp_list_info_t *info = lua_touserdata(state, lua_upvalueindex(1));
+
+ zcp_parse_args(state, info->name, info->pargs, info->kwargs);
+
+ return (info->func(state));
+}
+
+int
+zcp_load_list_lib(lua_State *state)
+{
+ int i;
+ zcp_list_info_t *zcp_list_funcs[] = {
+ &zcp_children_list_info,
+ &zcp_snapshots_list_info,
+ &zcp_props_list_info,
+ &zcp_clones_list_info,
+ &zcp_system_props_list_info,
+ NULL
+ };
+
+ lua_newtable(state);
+
+ for (i = 0; zcp_list_funcs[i] != NULL; i++) {
+ zcp_list_info_t *info = zcp_list_funcs[i];
+
+ if (info->gc != NULL) {
+ /*
+ * If the function requires garbage collection, create
+ * a metatable with its name and register the __gc
+ * function.
+ */
+ (void) luaL_newmetatable(state, info->name);
+ (void) lua_pushstring(state, "__gc");
+ lua_pushcfunction(state, info->gc);
+ lua_settable(state, -3);
+ lua_pop(state, 1);
+ }
+
+ lua_pushlightuserdata(state, info);
+ lua_pushcclosure(state, &zcp_list_func, 1);
+ lua_setfield(state, -2, info->name);
+ info++;
+ }
+
+ return (1);
+}
diff --git a/module/zfs/zcp_synctask.c b/module/zfs/zcp_synctask.c
new file mode 100644
index 000000000..923d5ca67
--- /dev/null
+++ b/module/zfs/zcp_synctask.c
@@ -0,0 +1,265 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
+#include <sys/lua/lua.h>
+#include <sys/lua/lauxlib.h>
+
+#include <sys/zcp.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_pool.h>
+#include <sys/dsl_prop.h>
+#include <sys/dsl_synctask.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_bookmark.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dmu_objset.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfeature.h>
+#include <sys/metaslab.h>
+
+#define DST_AVG_BLKSHIFT 14
+
+typedef int (zcp_synctask_func_t)(lua_State *, boolean_t, nvlist_t *);
+typedef struct zcp_synctask_info {
+ const char *name;
+ zcp_synctask_func_t *func;
+ zfs_space_check_t space_check;
+ int blocks_modified;
+ const zcp_arg_t pargs[4];
+ const zcp_arg_t kwargs[2];
+} zcp_synctask_info_t;
+
+/*
+ * Generic synctask interface for channel program syncfuncs.
+ *
+ * To perform some action in syncing context, we'd generally call
+ * dsl_sync_task(), but since the Lua script is already running inside a
+ * synctask we need to leave out some actions (such as acquiring the config
+ * rwlock and performing space checks).
+ *
+ * If 'sync' is false, executes a dry run and returns the error code.
+ *
+ * This function also handles common fatal error cases for channel program
+ * library functions. If a fatal error occurs, err_dsname will be the dataset
+ * name reported in error messages, if supplied.
+ */
+static int
+zcp_sync_task(lua_State *state, dsl_checkfunc_t *checkfunc,
+ dsl_syncfunc_t *syncfunc, void *arg, boolean_t sync, const char *err_dsname)
+{
+ int err;
+ zcp_run_info_t *ri = zcp_run_info(state);
+
+ err = checkfunc(arg, ri->zri_tx);
+ if (!sync)
+ return (err);
+
+ if (err == 0) {
+ syncfunc(arg, ri->zri_tx);
+ } else if (err == EIO) {
+ if (err_dsname != NULL) {
+ return (luaL_error(state,
+ "I/O error while accessing dataset '%s'",
+ err_dsname));
+ } else {
+ return (luaL_error(state,
+ "I/O error while accessing dataset."));
+ }
+ }
+
+ return (err);
+}
+
+
+static int zcp_synctask_destroy(lua_State *, boolean_t, nvlist_t *);
+static zcp_synctask_info_t zcp_synctask_destroy_info = {
+ .name = "destroy",
+ .func = zcp_synctask_destroy,
+ .space_check = ZFS_SPACE_CHECK_NONE,
+ .blocks_modified = 0,
+ .pargs = {
+ {.za_name = "filesystem | snapshot", .za_lua_type = LUA_TSTRING},
+ {NULL, 0}
+ },
+ .kwargs = {
+ {.za_name = "defer", .za_lua_type = LUA_TBOOLEAN},
+ {NULL, 0}
+ }
+};
+
+/* ARGSUSED */
+static int
+zcp_synctask_destroy(lua_State *state, boolean_t sync, nvlist_t *err_details)
+{
+ int err;
+ const char *dsname = lua_tostring(state, 1);
+
+ boolean_t issnap = (strchr(dsname, '@') != NULL);
+
+ if (!issnap && !lua_isnil(state, 2)) {
+ return (luaL_error(state,
+ "'deferred' kwarg only supported for snapshots: %s",
+ dsname));
+ }
+
+ if (issnap) {
+ dsl_destroy_snapshot_arg_t ddsa = { 0 };
+ ddsa.ddsa_name = dsname;
+ if (!lua_isnil(state, 2)) {
+ ddsa.ddsa_defer = lua_toboolean(state, 2);
+ } else {
+ ddsa.ddsa_defer = B_FALSE;
+ }
+
+ err = zcp_sync_task(state, dsl_destroy_snapshot_check,
+ dsl_destroy_snapshot_sync, &ddsa, sync, dsname);
+ } else {
+ dsl_destroy_head_arg_t ddha = { 0 };
+ ddha.ddha_name = dsname;
+
+ err = zcp_sync_task(state, dsl_destroy_head_check,
+ dsl_destroy_head_sync, &ddha, sync, dsname);
+ }
+
+ return (err);
+}
+
+static int zcp_synctask_promote(lua_State *, boolean_t, nvlist_t *err_details);
+static zcp_synctask_info_t zcp_synctask_promote_info = {
+ .name = "promote",
+ .func = zcp_synctask_promote,
+ .space_check = ZFS_SPACE_CHECK_RESERVED,
+ .blocks_modified = 3,
+ .pargs = {
+ {.za_name = "clone", .za_lua_type = LUA_TSTRING},
+ {NULL, 0}
+ },
+ .kwargs = {
+ {NULL, 0}
+ }
+};
+
+static int
+zcp_synctask_promote(lua_State *state, boolean_t sync, nvlist_t *err_details)
+{
+ int err;
+ dsl_dataset_promote_arg_t ddpa = { 0 };
+ const char *dsname = lua_tostring(state, 1);
+ zcp_run_info_t *ri = zcp_run_info(state);
+
+ ddpa.ddpa_clonename = dsname;
+ ddpa.err_ds = err_details;
+ ddpa.cr = ri->zri_cred;
+
+ /*
+ * If there was a snapshot name conflict, then err_ds will be filled
+ * with a list of conflicting snapshot names.
+ */
+ err = zcp_sync_task(state, dsl_dataset_promote_check,
+ dsl_dataset_promote_sync, &ddpa, sync, dsname);
+
+ return (err);
+}
+
+void
+zcp_synctask_wrapper_cleanup(void *arg)
+{
+ fnvlist_free(arg);
+}
+
+static int
+zcp_synctask_wrapper(lua_State *state)
+{
+ int err;
+ int num_ret = 1;
+ nvlist_t *err_details = fnvlist_alloc();
+
+ /*
+ * Make sure err_details is properly freed, even if a fatal error is
+ * thrown during the synctask.
+ */
+ zcp_register_cleanup(state, &zcp_synctask_wrapper_cleanup, err_details);
+
+ zcp_synctask_info_t *info = lua_touserdata(state, lua_upvalueindex(1));
+ boolean_t sync = lua_toboolean(state, lua_upvalueindex(2));
+
+ zcp_run_info_t *ri = zcp_run_info(state);
+ dsl_pool_t *dp = ri->zri_pool;
+
+ /* MOS space is triple-dittoed, so we multiply by 3. */
+ uint64_t funcspace = (info->blocks_modified << DST_AVG_BLKSHIFT) * 3;
+
+ zcp_parse_args(state, info->name, info->pargs, info->kwargs);
+
+ err = 0;
+ if (info->space_check != ZFS_SPACE_CHECK_NONE && funcspace > 0) {
+ uint64_t quota = dsl_pool_adjustedsize(dp,
+ info->space_check == ZFS_SPACE_CHECK_RESERVED) -
+ metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
+ uint64_t used = dsl_dir_phys(dp->dp_root_dir)->dd_used_bytes +
+ ri->zri_space_used;
+
+ if (used + funcspace > quota) {
+ err = SET_ERROR(ENOSPC);
+ }
+ }
+
+ if (err == 0) {
+ err = info->func(state, sync, err_details);
+ }
+
+ if (err == 0) {
+ ri->zri_space_used += funcspace;
+ }
+
+ lua_pushnumber(state, (lua_Number)err);
+ if (fnvlist_num_pairs(err_details) > 0) {
+ (void) zcp_nvlist_to_lua(state, err_details, NULL, 0);
+ num_ret++;
+ }
+
+ zcp_clear_cleanup(state);
+ fnvlist_free(err_details);
+
+ return (num_ret);
+}
+
+int
+zcp_load_synctask_lib(lua_State *state, boolean_t sync)
+{
+ int i;
+ zcp_synctask_info_t *zcp_synctask_funcs[] = {
+ &zcp_synctask_destroy_info,
+ &zcp_synctask_promote_info,
+ NULL
+ };
+
+ lua_newtable(state);
+
+ for (i = 0; zcp_synctask_funcs[i] != NULL; i++) {
+ zcp_synctask_info_t *info = zcp_synctask_funcs[i];
+ lua_pushlightuserdata(state, info);
+ lua_pushboolean(state, sync);
+ lua_pushcclosure(state, &zcp_synctask_wrapper, 2);
+ lua_setfield(state, -2, info->name);
+ info++;
+ }
+
+ return (1);
+}
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index b132a6885..a8f37fe84 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -27,7 +27,7 @@
* Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
@@ -193,6 +193,7 @@
#include <sys/dsl_bookmark.h>
#include <sys/dsl_userhold.h>
#include <sys/zfeature.h>
+#include <sys/zcp.h>
#include <sys/zio_checksum.h>
#include <linux/miscdevice.h>
@@ -203,6 +204,9 @@
#include "zfs_deleg.h"
#include "zfs_comutil.h"
+#include <sys/lua/lua.h>
+#include <sys/lua/lauxlib.h>
+
/*
* Limit maximum nvlist size. We don't want users passing in insane values
* for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
@@ -1414,17 +1418,11 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
return (error);
}
-static int
-getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
+int
+getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
{
- objset_t *os;
- int error;
-
- error = dmu_objset_hold(dsname, FTAG, &os);
- if (error != 0)
- return (error);
+ int error = 0;
if (dmu_objset_type(os) != DMU_OST_ZFS) {
- dmu_objset_rele(os, FTAG);
return (SET_ERROR(EINVAL));
}
@@ -1436,6 +1434,20 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
error = SET_ERROR(ESRCH);
}
mutex_exit(&os->os_user_ptr_lock);
+ return (error);
+}
+
+static int
+getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
+{
+ objset_t *os;
+ int error;
+
+ error = dmu_objset_hold(dsname, FTAG, &os);
+ if (error != 0)
+ return (error);
+
+ error = getzfsvfs_impl(os, zfvp);
dmu_objset_rele(os, FTAG);
return (error);
}
@@ -3660,6 +3672,36 @@ zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
return (error);
}
+static int
+zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
+ nvlist_t *outnvl)
+{
+ char *program;
+ uint64_t instrlimit, memlimit;
+ nvpair_t *nvarg = NULL;
+
+ if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
+ return (EINVAL);
+ }
+ if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
+ instrlimit = ZCP_DEFAULT_INSTRLIMIT;
+ }
+ if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
+ memlimit = ZCP_DEFAULT_MEMLIMIT;
+ }
+ if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) {
+ return (EINVAL);
+ }
+
+ if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
+ return (EINVAL);
+ if (memlimit == 0 || memlimit > ZCP_MAX_MEMLIMIT)
+ return (EINVAL);
+
+ return (zcp_eval(poolname, program, instrlimit, memlimit,
+ nvarg, outnvl));
+}
+
/*
* inputs:
* zc_name name of dataset to destroy
@@ -6333,6 +6375,11 @@ zfs_ioctl_init(void)
zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE,
B_TRUE);
+ zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
+ zfs_ioc_channel_program, zfs_secpolicy_config,
+ POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
+ B_TRUE);
+
/* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
@@ -6803,12 +6850,23 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
error = vec->zvec_func(zc->zc_name, innvl, outnvl);
spl_fstrans_unmark(cookie);
- if (error == 0 && vec->zvec_allow_log &&
+ /*
+ * Some commands can partially execute, modify state, and still
+ * return an error. In these cases, attempt to record what
+ * was modified.
+ */
+ if ((error == 0 ||
+ (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
+ vec->zvec_allow_log &&
spa_open(zc->zc_name, &spa, FTAG) == 0) {
if (!nvlist_empty(outnvl)) {
fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
outnvl);
}
+ if (error != 0) {
+ fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
+ error);
+ }
(void) spa_history_log_nvl(spa, lognv);
spa_close(spa, FTAG);
}
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 18b4ec3d6..bb380c920 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1053,13 +1053,26 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
* We claim to always be readonly so we can open snapshots;
* other ZPL code will prevent us from writing to snapshots.
*/
+
error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, B_TRUE,
zfsvfs, &os);
- if (error) {
+ if (error != 0) {
kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error);
}
+ error = zfsvfs_create_impl(zfvp, zfsvfs, os);
+ if (error != 0) {
+ dmu_objset_disown(os, B_TRUE, zfsvfs);
+ }
+ return (error);
+}
+
+int
+zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
+{
+ int error;
+
zfsvfs->z_vfs = NULL;
zfsvfs->z_sb = NULL;
zfsvfs->z_parent = zfsvfs;
@@ -1086,7 +1099,6 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
error = zfsvfs_init(zfsvfs, os);
if (error != 0) {
- dmu_objset_disown(os, B_TRUE, zfsvfs);
*zfvp = NULL;
kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error);