summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
Diffstat (limited to 'module')
-rw-r--r--module/zfs/spa.c8
-rw-r--r--module/zfs/spa_misc.c2
-rw-r--r--module/zfs/zfs_ctldir.c14
-rw-r--r--module/zfs/zfs_vfsops.c1
-rw-r--r--module/zfs/zpl_xattr.c8
-rw-r--r--module/zfs/zvol.c248
6 files changed, 195 insertions, 86 deletions
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index c55225a10..5203ea826 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -1208,7 +1208,7 @@ spa_deactivate(spa_t *spa)
list_destroy(&spa->spa_evicting_os_list);
list_destroy(&spa->spa_state_dirty_list);
- taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
+ taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
for (t = 0; t < ZIO_TYPES; t++) {
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
@@ -6515,8 +6515,8 @@ spa_sync(spa_t *spa, uint64_t txg)
tx = dmu_tx_create_assigned(dp, txg);
spa->spa_sync_starttime = gethrtime();
- taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
- spa->spa_deadman_tqid = taskq_dispatch_delay(system_taskq,
+ taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
+ spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
NSEC_TO_TICK(spa->spa_deadman_synctime));
@@ -6704,7 +6704,7 @@ spa_sync(spa_t *spa, uint64_t txg)
}
dmu_tx_commit(tx);
- taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
+ taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
spa->spa_deadman_tqid = 0;
/*
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index 909002cf5..8ae5fb559 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -530,7 +530,7 @@ spa_deadman(void *arg)
if (zfs_deadman_enabled)
vdev_deadman(spa->spa_root_vdev);
- spa->spa_deadman_tqid = taskq_dispatch_delay(system_taskq,
+ spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
NSEC_TO_TICK(spa->spa_deadman_synctime));
}
diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
index c7a93edfc..53674d975 100644
--- a/module/zfs/zfs_ctldir.c
+++ b/module/zfs/zfs_ctldir.c
@@ -111,11 +111,6 @@ static krwlock_t zfs_snapshot_lock;
int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
int zfs_admin_snapshot = 1;
-/*
- * Dedicated task queue for unmounting snapshots.
- */
-static taskq_t *zfs_expire_taskq;
-
typedef struct {
char *se_name; /* full snapshot name */
char *se_path; /* full mount path */
@@ -365,7 +360,7 @@ zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
{
ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
- if (taskq_cancel_id(zfs_expire_taskq, se->se_taskqid) == 0) {
+ if (taskq_cancel_id(system_delay_taskq, se->se_taskqid) == 0) {
se->se_taskqid = TASKQID_INVALID;
zfsctl_snapshot_rele(se);
}
@@ -383,7 +378,7 @@ zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
return;
zfsctl_snapshot_hold(se);
- se->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq,
+ se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
}
@@ -1257,9 +1252,6 @@ zfsctl_init(void)
sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
se_node_objsetid));
rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL);
-
- zfs_expire_taskq = taskq_create("z_unmount", 1, defclsyspri,
- 1, 8, TASKQ_PREPOPULATE);
}
/*
@@ -1269,8 +1261,6 @@ zfsctl_init(void)
void
zfsctl_fini(void)
{
- taskq_destroy(zfs_expire_taskq);
-
avl_destroy(&zfs_snapshots_by_name);
avl_destroy(&zfs_snapshots_by_objsetid);
rw_destroy(&zfs_snapshot_lock);
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 5417f2422..39e92ce21 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1922,6 +1922,7 @@ zfs_fini(void)
/*
* we don't use outstanding because zpl_posix_acl_free might add more.
*/
+ taskq_wait(system_delay_taskq);
taskq_wait(system_taskq);
unregister_filesystem(&zpl_fs_type);
zfs_znode_fini();
diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c
index cec870824..9ab27f1c2 100644
--- a/module/zfs/zpl_xattr.c
+++ b/module/zfs/zpl_xattr.c
@@ -1511,8 +1511,8 @@ zpl_posix_acl_free(void *arg)
}
if (refire)
- taskq_dispatch_delay(system_taskq, zpl_posix_acl_free, NULL,
- TQ_SLEEP, new_time);
+ taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
+ NULL, TQ_SLEEP, new_time);
while (freelist) {
a = freelist;
@@ -1537,7 +1537,7 @@ zpl_posix_acl_release_impl(struct posix_acl *acl)
*prev = a;
/* if it was empty before, schedule the free task */
if (prev == &acl_rel_head)
- taskq_dispatch_delay(system_taskq, zpl_posix_acl_free, NULL,
- TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
+ taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
+ NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
}
#endif
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index ea6997b5b..61d0538a3 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -63,6 +63,11 @@ static kmutex_t zvol_state_lock;
static list_t zvol_state_list;
void *zvol_tag = "zvol_tag";
+#define ZVOL_HT_SIZE 1024
+static struct hlist_head *zvol_htable;
+#define ZVOL_HT_HEAD(hash) (&zvol_htable[(hash) & (ZVOL_HT_SIZE-1)])
+static DEFINE_IDA(zvol_ida);
+
/*
* The in-core state of each volume.
*/
@@ -81,6 +86,8 @@ typedef struct zvol_state {
struct gendisk *zv_disk; /* generic disk */
struct request_queue *zv_queue; /* request queue */
list_node_t zv_next; /* next zvol_state_t linkage */
+ uint64_t zv_hash; /* name hash */
+ struct hlist_node zv_hlink; /* hash link */
} zvol_state_t;
typedef enum {
@@ -102,30 +109,17 @@ typedef struct {
#define ZVOL_RDONLY 0x1
-/*
- * Find the next available range of ZVOL_MINORS minor numbers. The
- * zvol_state_list is kept in ascending minor order so we simply need
- * to scan the list for the first gap in the sequence. This allows us
- * to recycle minor number as devices are created and removed.
- */
-static int
-zvol_find_minor(unsigned *minor)
+static uint64_t
+zvol_name_hash(const char *name)
{
- zvol_state_t *zv;
-
- *minor = 0;
- ASSERT(MUTEX_HELD(&zvol_state_lock));
- for (zv = list_head(&zvol_state_list); zv != NULL;
- zv = list_next(&zvol_state_list, zv), *minor += ZVOL_MINORS) {
- if (MINOR(zv->zv_dev) != MINOR(*minor))
- break;
+ int i;
+ uint64_t crc = -1ULL;
+ uint8_t *p = (uint8_t *)name;
+ ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
+ for (i = 0; i < MAXNAMELEN - 1 && *p; i++, p++) {
+ crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (*p)) & 0xFF];
}
-
- /* All minors are in use */
- if (*minor >= (1 << MINORBITS))
- return (SET_ERROR(ENXIO));
-
- return (0);
+ return (crc);
}
/*
@@ -147,23 +141,33 @@ zvol_find_by_dev(dev_t dev)
}
/*
- * Find a zvol_state_t given the name provided at zvol_alloc() time.
+ * Find a zvol_state_t given the name and hash generated by zvol_name_hash.
*/
static zvol_state_t *
-zvol_find_by_name(const char *name)
+zvol_find_by_name_hash(const char *name, uint64_t hash)
{
zvol_state_t *zv;
+ struct hlist_node *p;
ASSERT(MUTEX_HELD(&zvol_state_lock));
- for (zv = list_head(&zvol_state_list); zv != NULL;
- zv = list_next(&zvol_state_list, zv)) {
- if (strncmp(zv->zv_name, name, MAXNAMELEN) == 0)
+ hlist_for_each(p, ZVOL_HT_HEAD(hash)) {
+ zv = hlist_entry(p, zvol_state_t, zv_hlink);
+ if (zv->zv_hash == hash &&
+ strncmp(zv->zv_name, name, MAXNAMELEN) == 0)
return (zv);
}
-
return (NULL);
}
+/*
+ * Find a zvol_state_t given the name provided at zvol_alloc() time.
+ */
+static zvol_state_t *
+zvol_find_by_name(const char *name)
+{
+ return (zvol_find_by_name_hash(name, zvol_name_hash(name)));
+}
+
/*
* Given a path, return TRUE if path is a ZVOL.
@@ -921,32 +925,26 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
}
/*
- * The zvol_state_t's are inserted in increasing MINOR(dev_t) order.
+ * The zvol_state_t's are inserted into zvol_state_list and zvol_htable.
*/
static void
-zvol_insert(zvol_state_t *zv_insert)
+zvol_insert(zvol_state_t *zv)
{
- zvol_state_t *zv = NULL;
-
ASSERT(MUTEX_HELD(&zvol_state_lock));
- ASSERT3U(MINOR(zv_insert->zv_dev) & ZVOL_MINOR_MASK, ==, 0);
- for (zv = list_head(&zvol_state_list); zv != NULL;
- zv = list_next(&zvol_state_list, zv)) {
- if (MINOR(zv->zv_dev) > MINOR(zv_insert->zv_dev))
- break;
- }
-
- list_insert_before(&zvol_state_list, zv, zv_insert);
+ ASSERT3U(MINOR(zv->zv_dev) & ZVOL_MINOR_MASK, ==, 0);
+ list_insert_head(&zvol_state_list, zv);
+ hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
}
/*
* Simply remove the zvol from to list of zvols.
*/
static void
-zvol_remove(zvol_state_t *zv_remove)
+zvol_remove(zvol_state_t *zv)
{
ASSERT(MUTEX_HELD(&zvol_state_lock));
- list_remove(&zvol_state_list, zv_remove);
+ list_remove(&zvol_state_list, zv);
+ hlist_del(&zv->zv_hlink);
}
static int
@@ -1038,7 +1036,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
/*
* Obtain a copy of private_data under the lock to make sure
- * that either the result of zvol_freeg() setting
+ * that either the result of zvol_free() setting
* bdev->bd_disk->private_data to NULL is observed, or zvol_free()
* is not called on this zv because of the positive zv_open_count.
*/
@@ -1318,12 +1316,13 @@ out_kmem:
}
/*
- * Cleanup then free a zvol_state_t which was created by zvol_alloc().
+ * Used for taskq, if used out side zvol_state_lock, you need to clear
+ * zv_disk->private_data inside lock first.
*/
static void
-zvol_free(zvol_state_t *zv)
+zvol_free_impl(void *arg)
{
- ASSERT(MUTEX_HELD(&zvol_state_lock));
+ zvol_state_t *zv = arg;
ASSERT(zv->zv_open_count == 0);
zfs_rlock_destroy(&zv->zv_range_lock);
@@ -1334,10 +1333,21 @@ zvol_free(zvol_state_t *zv)
blk_cleanup_queue(zv->zv_queue);
put_disk(zv->zv_disk);
+ ida_simple_remove(&zvol_ida, MINOR(zv->zv_dev) >> ZVOL_MINOR_BITS);
kmem_free(zv, sizeof (zvol_state_t));
}
/*
+ * Cleanup then free a zvol_state_t which was created by zvol_alloc().
+ */
+static void
+zvol_free(zvol_state_t *zv)
+{
+ ASSERT(MUTEX_HELD(&zvol_state_lock));
+ zvol_free_impl(zv);
+}
+
+/*
* Create a block device minor node and setup the linkage between it
* and the specified volume. Once this function returns the block
* device is live and ready for use.
@@ -1352,10 +1362,17 @@ zvol_create_minor_impl(const char *name)
uint64_t len;
unsigned minor = 0;
int error = 0;
+ int idx;
+ uint64_t hash = zvol_name_hash(name);
+
+ idx = ida_simple_get(&zvol_ida, 0, 0, kmem_flags_convert(KM_SLEEP));
+ if (idx < 0)
+ return (SET_ERROR(-idx));
+ minor = idx << ZVOL_MINOR_BITS;
mutex_enter(&zvol_state_lock);
- zv = zvol_find_by_name(name);
+ zv = zvol_find_by_name_hash(name, hash);
if (zv) {
error = SET_ERROR(EEXIST);
goto out;
@@ -1375,15 +1392,12 @@ zvol_create_minor_impl(const char *name)
if (error)
goto out_dmu_objset_disown;
- error = zvol_find_minor(&minor);
- if (error)
- goto out_dmu_objset_disown;
-
zv = zvol_alloc(MKDEV(zvol_major, minor), name);
if (zv == NULL) {
error = SET_ERROR(EAGAIN);
goto out_dmu_objset_disown;
}
+ zv->zv_hash = hash;
if (dmu_objset_is_snapshot(os))
zv->zv_flags |= ZVOL_RDONLY;
@@ -1449,6 +1463,7 @@ out:
add_disk(zv->zv_disk);
} else {
mutex_exit(&zvol_state_lock);
+ ida_simple_remove(&zvol_ida, idx);
}
return (SET_ERROR(error));
@@ -1478,6 +1493,32 @@ zvol_rename_minor(zvol_state_t *zv, const char *newname)
set_disk_ro(zv->zv_disk, readonly);
}
+typedef struct minors_job {
+ list_t *list;
+ list_node_t link;
+ /* input */
+ char *name;
+ /* output */
+ int error;
+} minors_job_t;
+
+/*
+ * Prefetch zvol dnodes for the minors_job
+ */
+static void
+zvol_prefetch_minors_impl(void *arg)
+{
+ minors_job_t *job = arg;
+ char *dsname = job->name;
+ objset_t *os = NULL;
+
+ job->error = dmu_objset_own(dsname, DMU_OST_ZVOL, B_TRUE, zvol_tag,
+ &os);
+ if (job->error == 0) {
+ dmu_prefetch(os, ZVOL_OBJ, 0, 0, 0, ZIO_PRIORITY_SYNC_READ);
+ dmu_objset_disown(os, zvol_tag);
+ }
+}
/*
* Mask errors to continue dmu_objset_find() traversal
@@ -1485,7 +1526,9 @@ zvol_rename_minor(zvol_state_t *zv, const char *newname)
static int
zvol_create_snap_minor_cb(const char *dsname, void *arg)
{
- const char *name = (const char *)arg;
+ minors_job_t *j = arg;
+ list_t *minors_list = j->list;
+ const char *name = j->name;
ASSERT0(MUTEX_HELD(&spa_namespace_lock));
@@ -1498,7 +1541,19 @@ zvol_create_snap_minor_cb(const char *dsname, void *arg)
dprintf("zvol_create_snap_minor_cb(): "
"%s is not a shapshot name\n", dsname);
} else {
- (void) zvol_create_minor_impl(dsname);
+ minors_job_t *job;
+ char *n = strdup(dsname);
+ if (n == NULL)
+ return (0);
+
+ job = kmem_alloc(sizeof (minors_job_t), KM_SLEEP);
+ job->name = n;
+ job->list = minors_list;
+ job->error = 0;
+ list_insert_tail(minors_list, job);
+ /* don't care if dispatch fails, because job->error is 0 */
+ taskq_dispatch(system_taskq, zvol_prefetch_minors_impl, job,
+ TQ_SLEEP);
}
return (0);
@@ -1512,6 +1567,7 @@ zvol_create_minors_cb(const char *dsname, void *arg)
{
uint64_t snapdev;
int error;
+ list_t *minors_list = arg;
ASSERT0(MUTEX_HELD(&spa_namespace_lock));
@@ -1527,19 +1583,28 @@ zvol_create_minors_cb(const char *dsname, void *arg)
* snapshots and create device minor nodes for those.
*/
if (strchr(dsname, '@') == 0) {
- /* create minor for the 'dsname' explicitly */
- error = zvol_create_minor_impl(dsname);
- if ((error == 0 || error == EEXIST) &&
- (snapdev == ZFS_SNAPDEV_VISIBLE)) {
- fstrans_cookie_t cookie = spl_fstrans_mark();
+ minors_job_t *job;
+ char *n = strdup(dsname);
+ if (n == NULL)
+ return (0);
+
+ job = kmem_alloc(sizeof (minors_job_t), KM_SLEEP);
+ job->name = n;
+ job->list = minors_list;
+ job->error = 0;
+ list_insert_tail(minors_list, job);
+ /* don't care if dispatch fails, because job->error is 0 */
+ taskq_dispatch(system_taskq, zvol_prefetch_minors_impl, job,
+ TQ_SLEEP);
+
+ if (snapdev == ZFS_SNAPDEV_VISIBLE) {
/*
* traverse snapshots only, do not traverse children,
* and skip the 'dsname'
*/
error = dmu_objset_find((char *)dsname,
- zvol_create_snap_minor_cb, (void *)dsname,
+ zvol_create_snap_minor_cb, (void *)job,
DS_FIND_SNAPSHOTS);
- spl_fstrans_unmark(cookie);
}
} else {
dprintf("zvol_create_minors_cb(): %s is not a zvol name\n",
@@ -1572,10 +1637,24 @@ zvol_create_minors_impl(const char *name)
int error = 0;
fstrans_cookie_t cookie;
char *atp, *parent;
+ list_t minors_list;
+ minors_job_t *job;
if (zvol_inhibit_dev)
return (0);
+ /*
+ * This is the list for prefetch jobs. Whenever we found a match
+ * during dmu_objset_find, we insert a minors_job to the list and do
+ * taskq_dispatch to parallel prefetch zvol dnodes. Note we don't need
+ * any lock because all list operation is done on the current thread.
+ *
+ * We will use this list to do zvol_create_minor_impl after prefetch
+ * so we don't have to traverse using dmu_objset_find again.
+ */
+ list_create(&minors_list, sizeof (minors_job_t),
+ offsetof(minors_job_t, link));
+
parent = kmem_alloc(MAXPATHLEN, KM_SLEEP);
(void) strlcpy(parent, name, MAXPATHLEN);
@@ -1591,11 +1670,26 @@ zvol_create_minors_impl(const char *name)
} else {
cookie = spl_fstrans_mark();
error = dmu_objset_find(parent, zvol_create_minors_cb,
- NULL, DS_FIND_CHILDREN);
+ &minors_list, DS_FIND_CHILDREN);
spl_fstrans_unmark(cookie);
}
kmem_free(parent, MAXPATHLEN);
+ taskq_wait_outstanding(system_taskq, 0);
+
+ /*
+ * Prefetch is completed, we can do zvol_create_minor_impl
+ * sequentially.
+ */
+ while ((job = list_head(&minors_list)) != NULL) {
+ list_remove(&minors_list, job);
+ if (!job->error)
+ zvol_create_minor_impl(job->name);
+ strfree(job->name);
+ kmem_free(job, sizeof (minors_job_t));
+ }
+
+ list_destroy(&minors_list);
return (SET_ERROR(error));
}
@@ -1608,6 +1702,7 @@ zvol_remove_minors_impl(const char *name)
{
zvol_state_t *zv, *zv_next;
int namelen = ((name) ? strlen(name) : 0);
+ taskqid_t t, tid = TASKQID_INVALID;
if (zvol_inhibit_dev)
return;
@@ -1627,11 +1722,22 @@ zvol_remove_minors_impl(const char *name)
continue;
zvol_remove(zv);
- zvol_free(zv);
+
+ /* clear this so zvol_open won't open it */
+ zv->zv_disk->private_data = NULL;
+
+ /* try parallel zv_free, if failed do it in place */
+ t = taskq_dispatch(system_taskq, zvol_free_impl, zv,
+ TQ_SLEEP);
+ if (t == TASKQID_INVALID)
+ zvol_free(zv);
+ else
+ tid = t;
}
}
-
mutex_exit(&zvol_state_lock);
+ if (tid != TASKQID_INVALID)
+ taskq_wait_outstanding(system_taskq, tid);
}
/* Remove minor for this specific snapshot only */
@@ -1933,16 +2039,25 @@ zvol_rename_minors(spa_t *spa, const char *name1, const char *name2,
int
zvol_init(void)
{
- int error;
+ int i, error;
list_create(&zvol_state_list, sizeof (zvol_state_t),
offsetof(zvol_state_t, zv_next));
mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
+ zvol_htable = kmem_alloc(ZVOL_HT_SIZE * sizeof (struct hlist_head),
+ KM_SLEEP);
+ if (!zvol_htable) {
+ error = ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < ZVOL_HT_SIZE; i++)
+ INIT_HLIST_HEAD(&zvol_htable[i]);
+
error = register_blkdev(zvol_major, ZVOL_DRIVER);
if (error) {
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
- goto out;
+ goto out_free;
}
blk_register_region(MKDEV(zvol_major, 0), 1UL << MINORBITS,
@@ -1950,6 +2065,8 @@ zvol_init(void)
return (0);
+out_free:
+ kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head));
out:
mutex_destroy(&zvol_state_lock);
list_destroy(&zvol_state_list);
@@ -1964,6 +2081,7 @@ zvol_fini(void)
blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
unregister_blkdev(zvol_major, ZVOL_DRIVER);
+ kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head));
list_destroy(&zvol_state_list);
mutex_destroy(&zvol_state_lock);