summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBoris Protopopov <[email protected]>2014-03-22 05:07:14 -0400
committerBrian Behlendorf <[email protected]>2016-03-10 09:49:22 -0800
commita0bd735adb1b1eb81fef10b4db102ee051c4d4ff (patch)
tree121fcde3000a116f0c33143b28a530a87fd6073a
parenteb0856779f7b57162c9179f238104f6d6e150745 (diff)
Add support for asynchronous zvol minor operations
zfsonlinux issue #2217 - zvol minor operations: check snapdev property before traversing snapshots of a dataset zfsonlinux issue #3681 - lock order inversion between zvol_open() and dsl_pool_sync()...zvol_rename_minors() Create a per-pool zvol taskq for asynchronous zvol tasks. There are a few key design decisions to be aware of. * Each taskq must be single threaded to ensure tasks are always processed in the order in which they were dispatched. * There is a taskq per-pool in order to keep the pools independent. This way if one pool is suspended it will not impact another. * The preferred location to dispatch a zvol minor task is a sync task. In this context there is easy access to the spa_t and minimal error handling is required because the sync task must succeed. Support for asynchronous zvol minor operations address issue #3681. Signed-off-by: Boris Protopopov <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #2217 Closes #3678 Closes #3681
-rw-r--r--include/sys/spa_impl.h2
-rw-r--r--include/sys/zvol.h15
-rw-r--r--lib/libzpool/kernel.c22
-rw-r--r--module/zfs/dmu_objset.c4
-rw-r--r--module/zfs/dmu_send.c3
-rw-r--r--module/zfs/dsl_dataset.c36
-rw-r--r--module/zfs/dsl_destroy.c8
-rw-r--r--module/zfs/dsl_dir.c6
-rw-r--r--module/zfs/spa.c37
-rw-r--r--module/zfs/zfs_ioctl.c50
-rw-r--r--module/zfs/zvol.c494
-rwxr-xr-xscripts/zconfig.sh19
12 files changed, 482 insertions, 214 deletions
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index 0bb6dccdc..759c3472f 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -23,6 +23,7 @@
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#ifndef _SYS_SPA_IMPL_H
@@ -253,6 +254,7 @@ struct spa {
uint64_t spa_errata; /* errata issues detected */
spa_stats_t spa_stats; /* assorted spa statistics */
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
+ taskq_t *spa_zvol_taskq; /* Taskq for minor managment */
/*
* spa_refcount & spa_config_lock must be the last elements
diff --git a/include/sys/zvol.h b/include/sys/zvol.h
index 898e23521..c3e386f0b 100644
--- a/include/sys/zvol.h
+++ b/include/sys/zvol.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#ifndef _SYS_ZVOL_H
@@ -31,24 +32,22 @@
#define ZVOL_OBJ 1ULL
#define ZVOL_ZAP_OBJ 2ULL
-#ifdef _KERNEL
+extern void zvol_create_minors(spa_t *spa, const char *name, boolean_t async);
+extern void zvol_remove_minors(spa_t *spa, const char *name, boolean_t async);
+extern void zvol_rename_minors(spa_t *spa, const char *oldname,
+ const char *newname, boolean_t async);
+#ifdef _KERNEL
extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
extern int zvol_check_volblocksize(const char *name, uint64_t volblocksize);
extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
extern boolean_t zvol_is_zvol(const char *);
extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
-extern int zvol_create_minor(const char *name);
-extern int zvol_create_minors(const char *name);
-extern int zvol_remove_minor(const char *name);
-extern void zvol_remove_minors(const char *name);
-extern void zvol_rename_minors(const char *oldname, const char *newname);
extern int zvol_set_volsize(const char *, uint64_t);
extern int zvol_set_volblocksize(const char *, uint64_t);
-extern int zvol_set_snapdev(const char *, uint64_t);
+extern int zvol_set_snapdev(const char *, zprop_source_t, uint64_t);
extern int zvol_init(void);
extern void zvol_fini(void);
-
#endif /* _KERNEL */
#endif /* _SYS_ZVOL_H */
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
index a69a8da3a..49d17ece3 100644
--- a/lib/libzpool/kernel.c
+++ b/lib/libzpool/kernel.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#include <assert.h>
@@ -1354,3 +1355,24 @@ spl_fstrans_check(void)
{
return (0);
}
+
+void
+zvol_create_minors(spa_t *spa, const char *name, boolean_t async)
+{
+}
+
+void
+zvol_remove_minor(spa_t *spa, const char *name, boolean_t async)
+{
+}
+
+void
+zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
+{
+}
+
+void
+zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname,
+ boolean_t async)
+{
+}
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index acfc7f048..f9c534eb5 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -26,6 +26,7 @@
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015, STRATO AG, Inc. All rights reserved.
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -868,6 +869,8 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
}
spa_history_log_internal_ds(ds, "create", tx, "");
+ zvol_create_minors(dp->dp_spa, doca->doca_name, B_TRUE);
+
dsl_dataset_rele(ds, FTAG);
dsl_dir_rele(pdd, FTAG);
}
@@ -961,6 +964,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
dsl_dataset_name(origin, namebuf);
spa_history_log_internal_ds(ds, "clone", tx,
"origin=%s (%llu)", namebuf, origin->ds_object);
+ zvol_create_minors(dp->dp_spa, doca->doca_clone, B_TRUE);
dsl_dataset_rele(ds, FTAG);
dsl_dataset_rele(origin, FTAG);
dsl_dir_rele(pdd, FTAG);
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 6585e4778..613770e10 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -24,6 +24,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#include <sys/dmu.h>
@@ -54,6 +55,7 @@
#include <sys/dsl_bookmark.h>
#include <sys/zfeature.h>
#include <sys/bqueue.h>
+#include <sys/zvol.h>
/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
int zfs_send_corrupt_data = B_FALSE;
@@ -2646,6 +2648,7 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
}
drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
+ zvol_create_minors(dp->dp_spa, drc->drc_tofs, B_TRUE);
/*
* Release the hold from dmu_recv_begin. This must be done before
* we return to open context, so that when we free the dataset's dnode,
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index a5a9694fc..230027daf 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -24,6 +24,7 @@
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 RackTop Systems.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#include <sys/dmu_objset.h>
@@ -1424,6 +1425,7 @@ dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)
dsl_props_set_sync_impl(ds->ds_prev,
ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx);
}
+ zvol_create_minors(dp->dp_spa, nvpair_name(pair), B_TRUE);
dsl_dataset_rele(ds, FTAG);
}
}
@@ -1498,16 +1500,6 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
fnvlist_free(suspended);
}
-#ifdef _KERNEL
- if (error == 0) {
- for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
- pair = nvlist_next_nvpair(snaps, pair)) {
- char *snapname = nvpair_name(pair);
- zvol_create_minors(snapname);
- }
- }
-#endif
-
return (error);
}
@@ -1930,6 +1922,8 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
VERIFY0(zap_add(dp->dp_meta_objset,
dsl_dataset_phys(hds)->ds_snapnames_zapobj,
ds->ds_snapname, 8, 1, &ds->ds_object, tx));
+ zvol_rename_minors(dp->dp_spa, ddrsa->ddrsa_oldsnapname,
+ ddrsa->ddrsa_newsnapname, B_TRUE);
dsl_dataset_rele(ds, FTAG);
return (0);
@@ -1958,11 +1952,6 @@ int
dsl_dataset_rename_snapshot(const char *fsname,
const char *oldsnapname, const char *newsnapname, boolean_t recursive)
{
-#ifdef _KERNEL
- char *oldname, *newname;
-#endif
- int error;
-
dsl_dataset_rename_snapshot_arg_t ddrsa;
ddrsa.ddrsa_fsname = fsname;
@@ -1970,22 +1959,9 @@ dsl_dataset_rename_snapshot(const char *fsname,
ddrsa.ddrsa_newsnapname = newsnapname;
ddrsa.ddrsa_recursive = recursive;
- error = dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check,
+ return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check,
dsl_dataset_rename_snapshot_sync, &ddrsa,
- 1, ZFS_SPACE_CHECK_RESERVED);
-
- if (error)
- return (SET_ERROR(error));
-
-#ifdef _KERNEL
- oldname = kmem_asprintf("%s@%s", fsname, oldsnapname);
- newname = kmem_asprintf("%s@%s", fsname, newsnapname);
- zvol_rename_minors(oldname, newname);
- strfree(newname);
- strfree(oldname);
-#endif
-
- return (0);
+ 1, ZFS_SPACE_CHECK_RESERVED));
}
/*
diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c
index d0015d1bd..d7c34c9a4 100644
--- a/module/zfs/dsl_destroy.c
+++ b/module/zfs/dsl_destroy.c
@@ -23,6 +23,7 @@
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2013 by Joyent, Inc. All rights reserved.
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -40,6 +41,7 @@
#include <sys/zfs_ioctl.h>
#include <sys/dsl_deleg.h>
#include <sys/dmu_impl.h>
+#include <sys/zvol.h>
typedef struct dmu_snapshots_destroy_arg {
nvlist_t *dsda_snaps;
@@ -243,9 +245,6 @@ dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
void
dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
{
-#ifdef ZFS_DEBUG
- int err;
-#endif
spa_feature_t f;
int after_branch_point = FALSE;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
@@ -441,6 +440,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
#ifdef ZFS_DEBUG
{
uint64_t val;
+ int err;
err = dsl_dataset_snap_lookup(ds_head,
ds->ds_snapname, &val);
@@ -490,6 +490,7 @@ dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
+ zvol_remove_minors(dp->dp_spa, nvpair_name(pair), B_TRUE);
dsl_dataset_rele(ds, FTAG);
}
}
@@ -889,6 +890,7 @@ dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
dsl_destroy_head_sync_impl(ds, tx);
+ zvol_remove_minors(dp->dp_spa, ddha->ddha_name, B_TRUE);
dsl_dataset_rele(ds, FTAG);
}
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index 2c521e285..8983e0793 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -24,6 +24,7 @@
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright (c) 2014 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
#include <sys/dmu.h>
@@ -1909,9 +1910,8 @@ dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
VERIFY0(zap_add(mos, dsl_dir_phys(newparent)->dd_child_dir_zapobj,
dd->dd_myname, 8, 1, &dd->dd_object, tx));
-#ifdef _KERNEL
- zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname);
-#endif
+ zvol_rename_minors(dp->dp_spa, ddra->ddra_oldname,
+ ddra->ddra_newname, B_TRUE);
dsl_prop_notify_all(dd);
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 34a317fbe..01048bfe9 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -24,6 +24,7 @@
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, 2014, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
/*
@@ -1136,6 +1137,24 @@ spa_activate(spa_t *spa, int mode)
avl_create(&spa->spa_errlist_last,
spa_error_entry_compare, sizeof (spa_error_entry_t),
offsetof(spa_error_entry_t, se_avl));
+
+ /*
+ * This taskq is used to perform zvol-minor-related tasks
+ * asynchronously. This has several advantages, including easy
+ * resolution of various deadlocks (zfsonlinux bug #3681).
+ *
+ * The taskq must be single threaded to ensure tasks are always
+ * processed in the order in which they were dispatched.
+ *
+ * A taskq per pool allows one to keep the pools independent.
+ * This way if one pool is suspended, it will not impact another.
+ *
+ * The preferred location to dispatch a zvol minor task is a sync
+ * task. In this context, there is easy access to the spa_t and minimal
+ * error handling is required because the sync task must succeed.
+ */
+ spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri,
+ 1, INT_MAX, 0);
}
/*
@@ -1154,6 +1173,11 @@ spa_deactivate(spa_t *spa)
spa_evicting_os_wait(spa);
+ if (spa->spa_zvol_taskq) {
+ taskq_destroy(spa->spa_zvol_taskq);
+ spa->spa_zvol_taskq = NULL;
+ }
+
txg_list_destroy(&spa->spa_vdev_txg_list);
list_destroy(&spa->spa_config_dirty_list);
@@ -3088,10 +3112,8 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
mutex_exit(&spa_namespace_lock);
}
-#ifdef _KERNEL
if (firstopen)
- zvol_create_minors(spa->spa_name);
-#endif
+ zvol_create_minors(spa, spa_name(spa), B_TRUE);
*spapp = spa;
@@ -4211,10 +4233,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
mutex_exit(&spa_namespace_lock);
spa_history_log_version(spa, "import");
-
-#ifdef _KERNEL
- zvol_create_minors(pool);
-#endif
+ zvol_create_minors(spa, pool, B_TRUE);
return (0);
}
@@ -4349,6 +4368,10 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
spa_open_ref(spa, FTAG);
mutex_exit(&spa_namespace_lock);
spa_async_suspend(spa);
+ if (spa->spa_zvol_taskq) {
+ zvol_remove_minors(spa, spa_name(spa), B_TRUE);
+ taskq_wait(spa->spa_zvol_taskq);
+ }
mutex_enter(&spa_namespace_lock);
spa_close(spa, FTAG);
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 746a3f0fc..5c84d238f 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -29,6 +29,7 @@
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
/*
@@ -1499,8 +1500,7 @@ zfs_ioc_pool_destroy(zfs_cmd_t *zc)
int error;
zfs_log_history(zc);
error = spa_destroy(zc->zc_name);
- if (error == 0)
- zvol_remove_minors(zc->zc_name);
+
return (error);
}
@@ -1552,8 +1552,7 @@ zfs_ioc_pool_export(zfs_cmd_t *zc)
zfs_log_history(zc);
error = spa_export(zc->zc_name, NULL, force, hardforce);
- if (error == 0)
- zvol_remove_minors(zc->zc_name);
+
return (error);
}
@@ -2394,7 +2393,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
err = zvol_set_volsize(dsname, intval);
break;
case ZFS_PROP_SNAPDEV:
- err = zvol_set_snapdev(dsname, intval);
+ err = zvol_set_snapdev(dsname, source, intval);
break;
case ZFS_PROP_VERSION:
{
@@ -3188,12 +3187,6 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
if (error != 0)
(void) dsl_destroy_head(fsname);
}
-
-#ifdef _KERNEL
- if (error == 0 && type == DMU_OST_ZVOL)
- zvol_create_minors(fsname);
-#endif
-
return (error);
}
@@ -3236,12 +3229,6 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
if (error != 0)
(void) dsl_destroy_head(fsname);
}
-
-#ifdef _KERNEL
- if (error == 0)
- zvol_create_minors(fsname);
-#endif
-
return (error);
}
@@ -3304,11 +3291,6 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
error = dsl_dataset_snapshot(snaps, props, outnvl);
-#ifdef _KERNEL
- if (error == 0)
- zvol_create_minors(poolname);
-#endif
-
return (error);
}
@@ -3434,7 +3416,6 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
pair = nvlist_next_nvpair(snaps, pair)) {
(void) zfs_unmount_snap(nvpair_name(pair));
- (void) zvol_remove_minor(nvpair_name(pair));
}
return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
@@ -3560,8 +3541,7 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
else
err = dsl_destroy_head(zc->zc_name);
- if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
- (void) zvol_remove_minor(zc->zc_name);
+
return (err);
}
@@ -4127,11 +4107,6 @@ zfs_ioc_recv(zfs_cmd_t *zc)
}
#endif
-#ifdef _KERNEL
- if (error == 0)
- zvol_create_minors(tofs);
-#endif
-
/*
* On error, restore the original props.
*/
@@ -6032,16 +6007,16 @@ _init(void)
return (error);
}
+ if ((error = -zvol_init()) != 0)
+ return (error);
+
spa_init(FREAD | FWRITE);
zfs_init();
- if ((error = -zvol_init()) != 0)
- goto out1;
-
zfs_ioctl_init();
if ((error = zfs_attach()) != 0)
- goto out2;
+ goto out;
tsd_create(&zfs_fsyncer_key, NULL);
tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
@@ -6057,11 +6032,10 @@ _init(void)
return (0);
-out2:
- (void) zvol_fini();
-out1:
+out:
zfs_fini();
spa_fini();
+ (void) zvol_fini();
printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s"
", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
ZFS_DEBUG_STR, error);
@@ -6073,9 +6047,9 @@ static void __exit
_fini(void)
{
zfs_detach();
- zvol_fini();
zfs_fini();
spa_fini();
+ zvol_fini();
tsd_destroy(&zfs_fsyncer_key);
tsd_destroy(&rrw_tsd_key);
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index 034cf6a6a..ab4d3ceb7 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -42,6 +42,7 @@
#include <sys/dmu_traverse.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_prop.h>
+#include <sys/dsl_dir.h>
#include <sys/zap.h>
#include <sys/zfeature.h>
#include <sys/zil_impl.h>
@@ -49,6 +50,7 @@
#include <sys/zio.h>
#include <sys/zfs_rlock.h>
#include <sys/zfs_znode.h>
+#include <sys/spa_impl.h>
#include <sys/zvol.h>
#include <linux/blkdev_compat.h>
@@ -81,6 +83,23 @@ typedef struct zvol_state {
list_node_t zv_next; /* next zvol_state_t linkage */
} zvol_state_t;
+typedef enum {
+ ZVOL_ASYNC_CREATE_MINORS,
+ ZVOL_ASYNC_REMOVE_MINORS,
+ ZVOL_ASYNC_RENAME_MINORS,
+ ZVOL_ASYNC_SET_SNAPDEV,
+ ZVOL_ASYNC_MAX
+} zvol_async_op_t;
+
+typedef struct {
+ zvol_async_op_t op;
+ char pool[MAXNAMELEN];
+ char name1[MAXNAMELEN];
+ char name2[MAXNAMELEN];
+ zprop_source_t source;
+ uint64_t snapdev;
+} zvol_task_t;
+
#define ZVOL_RDONLY 0x1
/*
@@ -977,6 +996,7 @@ zvol_first_open(zvol_state_t *zv)
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
if (error) {
dmu_objset_disown(os, zvol_tag);
+ zv->zv_objset = NULL;
goto out_mutex;
}
@@ -984,6 +1004,7 @@ zvol_first_open(zvol_state_t *zv)
error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
if (error) {
dmu_objset_disown(os, zvol_tag);
+ zv->zv_objset = NULL;
goto out_mutex;
}
@@ -1036,7 +1057,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
/*
* If the caller is already holding the mutex do not take it
- * again, this will happen as part of zvol_create_minor().
+ * again, this will happen as part of zvol_create_minor_impl().
* Once add_disk() is called the device is live and the kernel
* will attempt to open it to read the partition information.
*/
@@ -1355,31 +1376,13 @@ zvol_free(zvol_state_t *zv)
kmem_free(zv, sizeof (zvol_state_t));
}
+/*
+ * Create a block device minor node and setup the linkage between it
+ * and the specified volume. Once this function returns the block
+ * device is live and ready for use.
+ */
static int
-__zvol_snapdev_hidden(const char *name)
-{
- uint64_t snapdev;
- char *parent;
- char *atp;
- int error = 0;
-
- parent = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- (void) strlcpy(parent, name, MAXPATHLEN);
-
- if ((atp = strrchr(parent, '@')) != NULL) {
- *atp = '\0';
- error = dsl_prop_get_integer(parent, "snapdev", &snapdev, NULL);
- if ((error == 0) && (snapdev == ZFS_SNAPDEV_HIDDEN))
- error = SET_ERROR(ENODEV);
- }
-
- kmem_free(parent, MAXPATHLEN);
-
- return (SET_ERROR(error));
-}
-
-static int
-__zvol_create_minor(const char *name, boolean_t ignore_snapdev)
+zvol_create_minor_impl(const char *name)
{
zvol_state_t *zv;
objset_t *os;
@@ -1389,7 +1392,7 @@ __zvol_create_minor(const char *name, boolean_t ignore_snapdev)
unsigned minor = 0;
int error = 0;
- ASSERT(MUTEX_HELD(&zvol_state_lock));
+ mutex_enter(&zvol_state_lock);
zv = zvol_find_by_name(name);
if (zv) {
@@ -1397,12 +1400,6 @@ __zvol_create_minor(const char *name, boolean_t ignore_snapdev)
goto out;
}
- if (ignore_snapdev == B_FALSE) {
- error = __zvol_snapdev_hidden(name);
- if (error)
- goto out;
- }
-
doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
@@ -1489,69 +1486,18 @@ out:
*/
mutex_exit(&zvol_state_lock);
add_disk(zv->zv_disk);
- mutex_enter(&zvol_state_lock);
+ } else {
+ mutex_exit(&zvol_state_lock);
}
return (SET_ERROR(error));
}
/*
- * Create a block device minor node and setup the linkage between it
- * and the specified volume. Once this function returns the block
- * device is live and ready for use.
- */
-int
-zvol_create_minor(const char *name)
-{
- int error;
-
- mutex_enter(&zvol_state_lock);
- error = __zvol_create_minor(name, B_FALSE);
- mutex_exit(&zvol_state_lock);
-
- return (SET_ERROR(error));
-}
-
-static int
-__zvol_remove_minor(const char *name)
-{
- zvol_state_t *zv;
-
- ASSERT(MUTEX_HELD(&zvol_state_lock));
-
- zv = zvol_find_by_name(name);
- if (zv == NULL)
- return (SET_ERROR(ENXIO));
-
- if (zv->zv_open_count > 0)
- return (SET_ERROR(EBUSY));
-
- zvol_remove(zv);
- zvol_free(zv);
-
- return (0);
-}
-
-/*
- * Remove a block device minor node for the specified volume.
- */
-int
-zvol_remove_minor(const char *name)
-{
- int error;
-
- mutex_enter(&zvol_state_lock);
- error = __zvol_remove_minor(name);
- mutex_exit(&zvol_state_lock);
-
- return (SET_ERROR(error));
-}
-
-/*
* Rename a block device minor mode for the specified volume.
*/
static void
-__zvol_rename_minor(zvol_state_t *zv, const char *newname)
+zvol_rename_minor(zvol_state_t *zv, const char *newname)
{
int readonly = get_disk_ro(zv->zv_disk);
@@ -1571,30 +1517,120 @@ __zvol_rename_minor(zvol_state_t *zv, const char *newname)
set_disk_ro(zv->zv_disk, readonly);
}
+
+/*
+ * Mask errors to continue dmu_objset_find() traversal
+ */
+static int
+zvol_create_snap_minor_cb(const char *dsname, void *arg)
+{
+ const char *name = (const char *)arg;
+
+ /* skip the designated dataset */
+ if (name && strcmp(dsname, name) == 0)
+ return (0);
+
+ /* at this point, the dsname should name a snapshot */
+ if (strchr(dsname, '@') == 0) {
+ dprintf("zvol_create_snap_minor_cb(): "
+ "%s is not a shapshot name\n", dsname);
+ } else {
+ (void) zvol_create_minor_impl(dsname);
+ }
+
+ return (0);
+}
+
+/*
+ * Mask errors to continue dmu_objset_find() traversal
+ */
static int
zvol_create_minors_cb(const char *dsname, void *arg)
{
- (void) zvol_create_minor(dsname);
+ uint64_t snapdev;
+ int error;
+
+ error = dsl_prop_get_integer(dsname, "snapdev", &snapdev, NULL);
+ if (error)
+ return (0);
+
+ /*
+ * Given the name and the 'snapdev' property, create device minor nodes
+ * with the linkages to zvols/snapshots as needed.
+ * If the name represents a zvol, create a minor node for the zvol, then
+ * check if its snapshots are 'visible', and if so, iterate over the
+ * snapshots and create device minor nodes for those.
+ */
+ if (strchr(dsname, '@') == 0) {
+ /* create minor for the 'dsname' explicitly */
+ error = zvol_create_minor_impl(dsname);
+ if ((error == 0 || error == EEXIST) &&
+ (snapdev == ZFS_SNAPDEV_VISIBLE)) {
+ fstrans_cookie_t cookie = spl_fstrans_mark();
+ /*
+ * traverse snapshots only, do not traverse children,
+ * and skip the 'dsname'
+ */
+ error = dmu_objset_find((char *)dsname,
+ zvol_create_snap_minor_cb, (void *)dsname,
+ DS_FIND_SNAPSHOTS);
+ spl_fstrans_unmark(cookie);
+ }
+ } else {
+ dprintf("zvol_create_minors_cb(): %s is not a zvol name\n",
+ dsname);
+ }
return (0);
}
/*
- * Create minors for specified dataset including children and snapshots.
+ * Create minors for the specified dataset, including children and snapshots.
+ * Pay attention to the 'snapdev' property and iterate over the snapshots
+ * only if they are 'visible'. This approach allows one to assure that the
+ * snapshot metadata is read from disk only if it is needed.
+ *
+ * The name can represent a dataset to be recursively scanned for zvols and
+ * their snapshots, or a single zvol snapshot. If the name represents a
+ * dataset, the scan is performed in two nested stages:
+ * - scan the dataset for zvols, and
+ * - for each zvol, create a minor node, then check if the zvol's snapshots
+ * are 'visible', and only then iterate over the snapshots if needed
+ *
+ * If the name represents a snapshot, a check is perfromed if the snapshot is
+ * 'visible' (which also verifies that the parent is a zvol), and if so,
+ * a minor node for that snapshot is created.
*/
-int
-zvol_create_minors(const char *name)
+static int
+zvol_create_minors_impl(const char *name)
{
int error = 0;
fstrans_cookie_t cookie;
+ char *atp, *parent;
if (zvol_inhibit_dev)
return (0);
- cookie = spl_fstrans_mark();
- error = dmu_objset_find((char *)name, zvol_create_minors_cb,
- NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
- spl_fstrans_unmark(cookie);
+ parent = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) strlcpy(parent, name, MAXPATHLEN);
+
+ if ((atp = strrchr(parent, '@')) != NULL) {
+ uint64_t snapdev;
+
+ *atp = '\0';
+ error = dsl_prop_get_integer(parent, "snapdev",
+ &snapdev, NULL);
+
+ if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE)
+ error = zvol_create_minor_impl(name);
+ } else {
+ cookie = spl_fstrans_mark();
+ error = dmu_objset_find(parent, zvol_create_minors_cb,
+ NULL, DS_FIND_CHILDREN);
+ spl_fstrans_unmark(cookie);
+ }
+
+ kmem_free(parent, MAXPATHLEN);
return (SET_ERROR(error));
}
@@ -1602,8 +1638,8 @@ zvol_create_minors(const char *name)
/*
* Remove minors for specified dataset including children and snapshots.
*/
-void
-zvol_remove_minors(const char *name)
+static void
+zvol_remove_minors_impl(const char *name)
{
zvol_state_t *zv, *zv_next;
int namelen = ((name) ? strlen(name) : 0);
@@ -1633,11 +1669,41 @@ zvol_remove_minors(const char *name)
mutex_exit(&zvol_state_lock);
}
+/* Remove minor for this specific snapshot only */
+static void
+zvol_remove_minor_impl(const char *name)
+{
+ zvol_state_t *zv, *zv_next;
+
+ if (zvol_inhibit_dev)
+ return;
+
+ if (strchr(name, '@') == NULL)
+ return;
+
+ mutex_enter(&zvol_state_lock);
+
+ for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
+ zv_next = list_next(&zvol_state_list, zv);
+
+ if (strcmp(zv->zv_name, name) == 0) {
+ /* If in use, leave alone */
+ if (zv->zv_open_count > 0)
+ continue;
+ zvol_remove(zv);
+ zvol_free(zv);
+ break;
+ }
+ }
+
+ mutex_exit(&zvol_state_lock);
+}
+
/*
* Rename minors for specified dataset including children and snapshots.
*/
-void
-zvol_rename_minors(const char *oldname, const char *newname)
+static void
+zvol_rename_minors_impl(const char *oldname, const char *newname)
{
zvol_state_t *zv, *zv_next;
int oldnamelen, newnamelen;
@@ -1660,14 +1726,14 @@ zvol_rename_minors(const char *oldname, const char *newname)
continue;
if (strcmp(zv->zv_name, oldname) == 0) {
- __zvol_rename_minor(zv, newname);
+ zvol_rename_minor(zv, newname);
} else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
(zv->zv_name[oldnamelen] == '/' ||
zv->zv_name[oldnamelen] == '@')) {
snprintf(name, MAXNAMELEN, "%s%c%s", newname,
zv->zv_name[oldnamelen],
zv->zv_name + oldnamelen + 1);
- __zvol_rename_minor(zv, name);
+ zvol_rename_minor(zv, name);
}
}
@@ -1676,42 +1742,227 @@ zvol_rename_minors(const char *oldname, const char *newname)
kmem_free(name, MAXNAMELEN);
}
+typedef struct zvol_snapdev_cb_arg {
+ uint64_t snapdev;
+} zvol_snapdev_cb_arg_t;
+
static int
-snapdev_snapshot_changed_cb(const char *dsname, void *arg) {
- uint64_t snapdev = *(uint64_t *) arg;
+zvol_set_snapdev_cb(const char *dsname, void *param) {
+ zvol_snapdev_cb_arg_t *arg = param;
if (strchr(dsname, '@') == NULL)
return (0);
- switch (snapdev) {
+ switch (arg->snapdev) {
case ZFS_SNAPDEV_VISIBLE:
- mutex_enter(&zvol_state_lock);
- (void) __zvol_create_minor(dsname, B_TRUE);
- mutex_exit(&zvol_state_lock);
+ (void) zvol_create_minor_impl(dsname);
break;
case ZFS_SNAPDEV_HIDDEN:
- (void) zvol_remove_minor(dsname);
+ (void) zvol_remove_minor_impl(dsname);
break;
}
return (0);
}
+static void
+zvol_set_snapdev_impl(char *name, uint64_t snapdev)
+{
+ zvol_snapdev_cb_arg_t arg = {snapdev};
+ fstrans_cookie_t cookie = spl_fstrans_mark();
+ /*
+ * The zvol_set_snapdev_sync() sets snapdev appropriately
+ * in the dataset hierarchy. Here, we only scan snapshots.
+ */
+ dmu_objset_find(name, zvol_set_snapdev_cb, &arg, DS_FIND_SNAPSHOTS);
+ spl_fstrans_unmark(cookie);
+}
+
+static zvol_task_t *
+zvol_task_alloc(zvol_async_op_t op, const char *name1, const char *name2,
+ uint64_t snapdev)
+{
+ zvol_task_t *task;
+ char *delim;
+
+ /* Never allow tasks on hidden names. */
+ if (name1[0] == '$')
+ return (NULL);
+
+ task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
+ task->op = op;
+ task->snapdev = snapdev;
+ delim = strchr(name1, '/');
+ strlcpy(task->pool, name1, delim ? (delim - name1 + 1) : MAXNAMELEN);
+
+ strlcpy(task->name1, name1, MAXNAMELEN);
+ if (name2 != NULL)
+ strlcpy(task->name2, name2, MAXNAMELEN);
+
+ return (task);
+}
+
+static void
+zvol_task_free(zvol_task_t *task)
+{
+ kmem_free(task, sizeof (zvol_task_t));
+}
+
+/*
+ * The worker thread function performed asynchronously.
+ */
+static void
+zvol_task_cb(void *param)
+{
+ zvol_task_t *task = (zvol_task_t *)param;
+
+ switch (task->op) {
+ case ZVOL_ASYNC_CREATE_MINORS:
+ (void) zvol_create_minors_impl(task->name1);
+ break;
+ case ZVOL_ASYNC_REMOVE_MINORS:
+ zvol_remove_minors_impl(task->name1);
+ break;
+ case ZVOL_ASYNC_RENAME_MINORS:
+ zvol_rename_minors_impl(task->name1, task->name2);
+ break;
+ case ZVOL_ASYNC_SET_SNAPDEV:
+ zvol_set_snapdev_impl(task->name1, task->snapdev);
+ break;
+ default:
+ VERIFY(0);
+ break;
+ }
+
+ zvol_task_free(task);
+}
+
+typedef struct zvol_set_snapdev_arg {
+ const char *zsda_name;
+ uint64_t zsda_value;
+ zprop_source_t zsda_source;
+ dmu_tx_t *zsda_tx;
+} zvol_set_snapdev_arg_t;
+
+/*
+ * Sanity check the dataset for safe use by the sync task. No additional
+ * conditions are imposed.
+ */
+static int
+zvol_set_snapdev_check(void *arg, dmu_tx_t *tx)
+{
+ zvol_set_snapdev_arg_t *zsda = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dir_t *dd;
+ int error;
+
+ error = dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL);
+ if (error != 0)
+ return (error);
+
+ dsl_dir_rele(dd, FTAG);
+
+ return (error);
+}
+
+static int
+zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
+{
+ zvol_set_snapdev_arg_t *zsda = arg;
+ char dsname[MAXNAMELEN];
+ zvol_task_t *task;
+
+ dsl_dataset_name(ds, dsname);
+ dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_SNAPDEV),
+ zsda->zsda_source, sizeof (zsda->zsda_value), 1,
+ &zsda->zsda_value, zsda->zsda_tx);
+
+ task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname,
+ NULL, zsda->zsda_value);
+ if (task == NULL)
+ return (0);
+
+ (void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb,
+ task, TQ_SLEEP);
+ return (0);
+}
+
+/*
+ * Traverse all child snapshot datasets and apply snapdev appropriately.
+ */
+static void
+zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx)
+{
+ zvol_set_snapdev_arg_t *zsda = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dir_t *dd;
+
+ VERIFY0(dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL));
+ zsda->zsda_tx = tx;
+
+ dmu_objset_find_dp(dp, dd->dd_object, zvol_set_snapdev_sync_cb,
+ zsda, DS_FIND_CHILDREN);
+
+ dsl_dir_rele(dd, FTAG);
+}
+
int
-zvol_set_snapdev(const char *dsname, uint64_t snapdev) {
- fstrans_cookie_t cookie;
+zvol_set_snapdev(const char *ddname, zprop_source_t source, uint64_t snapdev)
+{
+ zvol_set_snapdev_arg_t zsda;
- if (zvol_inhibit_dev)
- /* caller should continue to modify snapdev property */
- return (-1);
+ zsda.zsda_name = ddname;
+ zsda.zsda_source = source;
+ zsda.zsda_value = snapdev;
- cookie = spl_fstrans_mark();
- (void) dmu_objset_find((char *) dsname, snapdev_snapshot_changed_cb,
- &snapdev, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
- spl_fstrans_unmark(cookie);
+ return (dsl_sync_task(ddname, zvol_set_snapdev_check,
+ zvol_set_snapdev_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
+}
+
+void
+zvol_create_minors(spa_t *spa, const char *name, boolean_t async)
+{
+ zvol_task_t *task;
+ taskqid_t id;
+
+ task = zvol_task_alloc(ZVOL_ASYNC_CREATE_MINORS, name, NULL, ~0ULL);
+ if (task == NULL)
+ return;
+
+ id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
+ if ((async == B_FALSE) && (id != 0))
+ taskq_wait_id(spa->spa_zvol_taskq, id);
+}
+
+void
+zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
+{
+ zvol_task_t *task;
+ taskqid_t id;
+
+ task = zvol_task_alloc(ZVOL_ASYNC_REMOVE_MINORS, name, NULL, ~0ULL);
+ if (task == NULL)
+ return;
- /* caller should continue to modify snapdev property */
- return (-1);
+ id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
+ if ((async == B_FALSE) && (id != 0))
+ taskq_wait_id(spa->spa_zvol_taskq, id);
+}
+
+void
+zvol_rename_minors(spa_t *spa, const char *name1, const char *name2,
+ boolean_t async)
+{
+ zvol_task_t *task;
+ taskqid_t id;
+
+ task = zvol_task_alloc(ZVOL_ASYNC_RENAME_MINORS, name1, name2, ~0ULL);
+ if (task == NULL)
+ return;
+
+ id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
+ if ((async == B_FALSE) && (id != 0))
+ taskq_wait_id(spa->spa_zvol_taskq, id);
}
int
@@ -1721,7 +1972,6 @@ zvol_init(void)
list_create(&zvol_state_list, sizeof (zvol_state_t),
offsetof(zvol_state_t, zv_next));
-
mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
error = register_blkdev(zvol_major, ZVOL_DRIVER);
@@ -1745,11 +1995,13 @@ out:
void
zvol_fini(void)
{
- zvol_remove_minors(NULL);
+ zvol_remove_minors_impl(NULL);
+
blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
unregister_blkdev(zvol_major, ZVOL_DRIVER);
- mutex_destroy(&zvol_state_lock);
+
list_destroy(&zvol_state_list);
+ mutex_destroy(&zvol_state_lock);
}
module_param(zvol_inhibit_dev, uint, 0644);
diff --git a/scripts/zconfig.sh b/scripts/zconfig.sh
index 45ccf62ed..1908dc1d6 100755
--- a/scripts/zconfig.sh
+++ b/scripts/zconfig.sh
@@ -217,15 +217,26 @@ test_3() {
zconfig_zvol_device_stat 10 ${POOL_NAME} ${FULL_ZVOL_NAME} \
${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 11
+ # Toggle the snapdev and observe snapshot device links toggled
+ ${ZFS} set snapdev=hidden ${FULL_ZVOL_NAME} || fail 12
+
+ zconfig_zvol_device_stat 7 ${POOL_NAME} ${FULL_ZVOL_NAME} \
+ "invalid" ${FULL_CLONE_NAME} || fail 13
+
+ ${ZFS} set snapdev=visible ${FULL_ZVOL_NAME} || fail 14
+
+ zconfig_zvol_device_stat 10 ${POOL_NAME} ${FULL_ZVOL_NAME} \
+ ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 15
+
# Destroy the pool and consequently the devices
- ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 -d || fail 12
+ ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 -d || fail 16
# verify the devices were removed
zconfig_zvol_device_stat 0 ${POOL_NAME} ${FULL_ZVOL_NAME} \
- ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 13
+ ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 17
- ${ZFS_SH} -u || fail 14
- rm -f ${TMP_CACHE} || fail 15
+ ${ZFS_SH} -u || fail 18
+ rm -f ${TMP_CACHE} || fail 19
pass
}