aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2009-08-04 15:59:15 -0700
committerBrian Behlendorf <[email protected]>2009-08-04 15:59:15 -0700
commit5ad5798f92ae10613a07091138b6352f9dc86e8c (patch)
tree777623b8db95f6b4492ee29bad22b016b8aec7a1
parentd36e7b59a27ef9ecfbf4c9104522250435d756a7 (diff)
parentdfb7dba62e1e86993931a8543b66af6f4f16af96 (diff)
Merge commit 'refs/top-bases/linux-configure-branch' into linux-configure-branch
-rw-r--r--ChangeLog3
-rw-r--r--cmd/ztest/ztest.c40
-rw-r--r--lib/libzpool/include/sys/zfs_context.h20
-rw-r--r--lib/libzpool/kernel.c182
-rw-r--r--lib/libzpool/taskq.c22
-rw-r--r--module/zfs/txg.c4
6 files changed, 136 insertions, 135 deletions
diff --git a/ChangeLog b/ChangeLog
index 2e9d58288..73d92055d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -54,9 +54,6 @@
* module/zcommon/zfs_prop.c: Export new quota related symbols.
* lib/libzfs/libzfs_util.c: Increase buffer size for nvlist which
is needed for large configurations.
- * lib/libzpool/kernel.c: Reimplement user kthreads based on pthread
- thread specific data. This resolves previous scalabily concerns about
- the cost of calling curthread which previously required a list walk.
* Bug fixes:
* lib/libspl/include/sys/zfs_debug.h: Removed duplicate file.
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
index 64e6eb690..7da8f1f4d 100644
--- a/cmd/ztest/ztest.c
+++ b/cmd/ztest/ztest.c
@@ -141,6 +141,7 @@ typedef struct ztest_args {
objset_t *za_os;
zilog_t *za_zilog;
kthread_t *za_thread;
+ kt_did_t za_threadid;
uint64_t za_instance;
uint64_t za_random;
uint64_t za_diroff;
@@ -156,7 +157,6 @@ typedef struct ztest_args {
ztest_block_tag_t za_wbt;
dmu_object_info_t za_doi;
dmu_buf_t *za_dbuf;
- boolean_t za_exited;
} ztest_args_t;
typedef void ztest_func_t(ztest_args_t *);
@@ -253,8 +253,6 @@ typedef struct ztest_shared {
kmutex_t zs_sync_lock[ZTEST_SYNC_LOCKS];
uint64_t zs_seq[ZTEST_SYNC_LOCKS];
ztest_cb_list_t zs_cb_list;
- kmutex_t zs_thr_lock;
- kcondvar_t zs_thr_cv;
} ztest_shared_t;
static char ztest_dev_template[] = "%s/%s.%llua";
@@ -266,7 +264,6 @@ static int ztest_dump_core = 1;
static uint64_t metaslab_sz;
static boolean_t ztest_exiting;
-static boolean_t resume_thr_exited;
extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;
@@ -3812,8 +3809,6 @@ ztest_resume_thread(void *arg)
ztest_resume(spa);
}
- resume_thr_exited = B_TRUE;
-
thread_exit();
return (NULL);
}
@@ -3879,13 +3874,6 @@ ztest_thread(void *arg)
break;
}
- mutex_enter(&zs->zs_thr_lock);
- za->za_exited = B_TRUE;
- mutex_exit(&zs->zs_thr_lock);
-
- /* Announce that the thread has finished */
- cv_broadcast(&zs->zs_thr_cv);
-
thread_exit();
return (NULL);
}
@@ -3902,14 +3890,13 @@ ztest_run(char *pool)
spa_t *spa;
char name[100];
kthread_t *resume_thread;
+ kt_did_t resume_id;
ztest_exiting = B_FALSE;
mutex_init(&zs->zs_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
rw_init(&zs->zs_name_lock, NULL, RW_DEFAULT, NULL);
mutex_init(&zs->zs_cb_list.zcl_callbacks_lock,NULL,MUTEX_DEFAULT,NULL);
- mutex_init(&zs->zs_thr_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&zs->zs_thr_cv, NULL, CV_DEFAULT, NULL);
list_create(&zs->zs_cb_list.zcl_callbacks, sizeof (ztest_cb_data_t),
offsetof(ztest_cb_data_t, zcd_node));
@@ -3982,9 +3969,9 @@ ztest_run(char *pool)
/*
* Create a thread to periodically resume suspended I/O.
*/
- resume_thr_exited = B_FALSE;
VERIFY3P((resume_thread = thread_create(NULL, 0, ztest_resume_thread,
- spa, TS_RUN, NULL, 0, 0)), !=, NULL);
+ spa, THR_BOUND, NULL, 0, 0)), !=, NULL);
+ resume_id = resume_thread->t_tid;
/*
* Verify that we can safely inquire about about any object,
@@ -4060,18 +4047,13 @@ ztest_run(char *pool)
za[d].za_zilog = zil_open(za[d].za_os, NULL);
}
- za[t].za_exited = B_FALSE;
-
VERIFY3P((za[t].za_thread = thread_create(NULL, 0, ztest_thread,
- &za[t], TS_RUN, NULL, 0, 0)), !=, NULL);
+ &za[t], THR_BOUND, NULL, 0, 0)), !=, NULL);
+ za[t].za_threadid = za[t].za_thread->t_tid;
}
while (--t >= 0) {
- mutex_enter(&zs->zs_thr_lock);
- while (!za[t].za_exited)
- cv_wait(&zs->zs_thr_cv, &zs->zs_thr_lock);
- mutex_exit(&zs->zs_thr_lock);
-
+ VERIFY(thread_join(za[t].za_threadid, NULL, NULL) == 0);
if (t < zopt_datasets) {
zil_close(za[t].za_zilog);
dmu_objset_close(za[t].za_os);
@@ -4110,11 +4092,7 @@ ztest_run(char *pool)
/* Kill the resume thread */
ztest_exiting = B_TRUE;
-
- /* Wait for the resume thread to exit */
- while (!resume_thr_exited)
- (void) poll(NULL, 0, 200);
-
+ VERIFY(thread_join(resume_id, NULL, NULL) == 0);
ztest_resume(spa);
/*
@@ -4130,8 +4108,6 @@ ztest_run(char *pool)
list_destroy(&zs->zs_cb_list.zcl_callbacks);
- cv_destroy(&zs->zs_thr_cv);
- mutex_destroy(&zs->zs_thr_lock);
mutex_destroy(&zs->zs_cb_list.zcl_callbacks_lock);
rw_destroy(&zs->zs_name_lock);
mutex_destroy(&zs->zs_vdev_lock);
diff --git a/lib/libzpool/include/sys/zfs_context.h b/lib/libzpool/include/sys/zfs_context.h
index 0475ce093..9377dab2f 100644
--- a/lib/libzpool/include/sys/zfs_context.h
+++ b/lib/libzpool/include/sys/zfs_context.h
@@ -153,34 +153,34 @@ extern void vpanic(const char *, __va_list);
/*
* Threads
*/
+#define THR_BOUND 0x00000001
#define TS_RUN 0x00000002
-#define STACK_SIZE 8192 /* x86/x64 */
-
-typedef void (*thread_func_t)(void);
-typedef void (*thread_func_arg_t)(void *);
+typedef void (*thread_func_t)(void *);
typedef pthread_t kt_did_t;
typedef struct kthread {
+ list_node_t t_node;
kt_did_t t_tid;
- thread_func_t t_func;
- void * t_arg;
+ pthread_attr_t t_attr;
} kthread_t;
-/* XXX tsd_create()/tsd_destroy() missing */
#define tsd_get(key) pthread_getspecific(key)
#define tsd_set(key, val) pthread_setspecific(key, val)
#define curthread zk_thread_current()
#define thread_exit zk_thread_exit
#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
- zk_thread_create(stk, stksize, (thread_func_t) func, arg, len, \
- NULL, state, pri)
+ zk_thread_create(stk, stksize, (thread_func_t)func, arg, \
+ len, NULL, state, pri)
+#define thread_join(tid, dtid, status) \
+ zk_thread_join(tid, dtid, status)
extern kthread_t *zk_thread_current(void);
extern void zk_thread_exit(void);
extern kthread_t *zk_thread_create(caddr_t stk, size_t stksize,
thread_func_t func, void *arg, size_t len,
void *pp, int state, pri_t pri);
+extern int zk_thread_join(kt_did_t tid, kthread_t *dtid, void **status);
#define issig(why) (FALSE)
#define ISSIG(thr, why) (FALSE)
@@ -315,7 +315,7 @@ extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
extern void taskq_destroy(taskq_t *);
extern void taskq_wait(taskq_t *);
-extern int taskq_member(taskq_t *, kthread_t *);
+extern int taskq_member(taskq_t *, void *);
extern void system_taskq_init(void);
extern void system_taskq_fini(void);
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
index fcd0f2871..ab97636ba 100644
--- a/lib/libzpool/kernel.c
+++ b/lib/libzpool/kernel.c
@@ -57,141 +57,155 @@ struct utsname utsname = {
* =========================================================================
*/
+/* NOTE: Tracking each tid on a list and using it for curthread lookups
+ * is slow at best but it provides an easy way to provide a kthread
+ * style API on top of pthreads. For now we just want ztest to work
+ * to validate correctness. Performance is not much of an issue
+ * since that is what the in-kernel version is for. That said
+ * reworking this to track the kthread_t structure as thread
+ * specific data would be probably the best way to speed this up.
+ */
+
pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER;
pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER;
-pthread_key_t kthread_key;
-int kthread_nr = 0;
+list_t kthread_list;
+
+static int
+thread_count(void)
+{
+ kthread_t *kt;
+ int count = 0;
+
+ for (kt = list_head(&kthread_list); kt != NULL;
+ kt = list_next(&kthread_list, kt))
+ count++;
+
+ return count;
+}
static void
thread_init(void)
{
kthread_t *kt;
- VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0);
+ /* Initialize list for tracking kthreads */
+ list_create(&kthread_list, sizeof (kthread_t),
+ offsetof(kthread_t, t_node));
/* Create entry for primary kthread */
kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
- kt->t_tid = pthread_self();
- kt->t_func = NULL;
-
- VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
-
- /* Only the main thread should be running at the moment */
- ASSERT3S(kthread_nr, ==, 0);
- kthread_nr = 1;
+ list_link_init(&kt->t_node);
+ VERIFY3U(kt->t_tid = pthread_self(), !=, 0);
+ VERIFY3S(pthread_attr_init(&kt->t_attr), ==, 0);
+ VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
+ list_insert_head(&kthread_list, kt);
+ VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
}
static void
thread_fini(void)
{
- kthread_t *kt = curthread;
-
- ASSERT(pthread_equal(kt->t_tid, pthread_self()));
- ASSERT3P(kt->t_func, ==, NULL);
-
- umem_free(kt, sizeof(kthread_t));
+ kthread_t *kt;
+ struct timespec ts = { 0 };
+ int count;
/* Wait for all threads to exit via thread_exit() */
VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
+ while ((count = thread_count()) > 1) {
+ clock_gettime(CLOCK_REALTIME, &ts);
+ ts.tv_sec += 1;
+ pthread_cond_timedwait(&kthread_cond, &kthread_lock, &ts);
+ }
- kthread_nr--; /* Main thread is exiting */
-
- while (kthread_nr > 0)
- VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==,
- 0);
-
- ASSERT3S(kthread_nr, ==, 0);
+ ASSERT3S(thread_count(), ==, 1);
+ kt = list_head(&kthread_list);
+ list_remove(&kthread_list, kt);
VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
- VERIFY3S(pthread_key_delete(kthread_key), ==, 0);
+ VERIFY(pthread_attr_destroy(&kt->t_attr) == 0);
+ umem_free(kt, sizeof(kthread_t));
+
+ /* Cleanup list for tracking kthreads */
+ list_destroy(&kthread_list);
}
kthread_t *
zk_thread_current(void)
{
- kthread_t *kt = pthread_getspecific(kthread_key);
-
- ASSERT3P(kt, !=, NULL);
-
- return kt;
-}
-
-void *
-zk_thread_helper(void *arg)
-{
- kthread_t *kt = (kthread_t *) arg;
-
- VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
-
- VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
- kthread_nr++;
- VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
-
- kt->t_tid = pthread_self();
- ((thread_func_arg_t) kt->t_func)(kt->t_arg);
+ kt_did_t tid = pthread_self();
+ kthread_t *kt;
+ int count = 1;
- /* Unreachable, thread must exit with thread_exit() */
- abort();
+ /*
+ * Because a newly created thread may call zk_thread_current()
+ * before the thread parent has had time to add the thread's tid
+ * to our lookup list. We will loop as long as there are tid
+ * which have not yet been set which must be one of ours.
+ * Yes it's a hack, at some point we can just use native pthreads.
+ */
+ while (count > 0) {
+ count = 0;
+ VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
+ for (kt = list_head(&kthread_list); kt != NULL;
+ kt = list_next(&kthread_list, kt)) {
+
+ if (kt->t_tid == tid) {
+ VERIFY3S(pthread_mutex_unlock(
+ &kthread_lock), ==, 0);
+ return kt;
+ }
+
+ if (kt->t_tid == (kt_did_t)-1)
+ count++;
+ }
+ VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
+ }
+ /* Unreachable */
+ ASSERT(0);
return NULL;
}
kthread_t *
-zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
+zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
size_t len, void *pp, int state, pri_t pri)
{
kthread_t *kt;
- pthread_t tid;
- pthread_attr_t attr;
- size_t stack;
-
- /*
- * Due to a race when getting/setting the thread ID, currently only
- * detached threads are supported.
- */
- ASSERT3S(state & ~TS_RUN, ==, 0);
kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
- kt->t_func = func;
- kt->t_arg = arg;
-
- /*
- * The Solaris kernel stack size in x86/x64 is 8K, so we reduce the
- * default stack size in userspace, for sanity checking.
- *
- * PTHREAD_STACK_MIN is the stack required for a NULL procedure in
- * userspace.
- *
- * XXX: Stack size for other architectures is not being taken into
- * account.
- */
- stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE);
-
- VERIFY3S(pthread_attr_init(&attr), ==, 0);
- VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0);
- VERIFY3S(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED),
- ==, 0);
+ kt->t_tid = (kt_did_t)-1;
+ list_link_init(&kt->t_node);
+ VERIFY(pthread_attr_init(&kt->t_attr) == 0);
- VERIFY3S(pthread_create(&tid, &attr, &zk_thread_helper, kt), ==, 0);
+ VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
+ list_insert_head(&kthread_list, kt);
+ VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
- VERIFY3S(pthread_attr_destroy(&attr), ==, 0);
+ VERIFY3U(pthread_create(&kt->t_tid, &kt->t_attr,
+ (void *(*)(void *))func, arg), ==, 0);
return kt;
}
+int
+zk_thread_join(kt_did_t tid, kthread_t *dtid, void **status)
+{
+ return pthread_join(tid, status);
+}
+
void
zk_thread_exit(void)
{
- kthread_t *kt = curthread;
+ kthread_t *kt;
- ASSERT(pthread_equal(kt->t_tid, pthread_self()));
+ VERIFY3P(kt = curthread, !=, NULL);
+ VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
+ list_remove(&kthread_list, kt);
+ VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
+ VERIFY(pthread_attr_destroy(&kt->t_attr) == 0);
umem_free(kt, sizeof(kthread_t));
- pthread_mutex_lock(&kthread_lock);
- kthread_nr--;
- pthread_mutex_unlock(&kthread_lock);
-
pthread_cond_broadcast(&kthread_cond);
pthread_exit(NULL);
}
diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c
index 1efdf1d6f..42e2dd3f4 100644
--- a/lib/libzpool/taskq.c
+++ b/lib/libzpool/taskq.c
@@ -43,6 +43,7 @@ struct taskq {
kcondvar_t tq_dispatch_cv;
kcondvar_t tq_wait_cv;
kthread_t **tq_threadlist;
+ kt_did_t *tq_idlist;
int tq_flags;
int tq_active;
int tq_nthreads;
@@ -134,7 +135,7 @@ taskq_wait(taskq_t *tq)
mutex_exit(&tq->tq_lock);
}
-static void
+static void *
taskq_thread(void *arg)
{
taskq_t *tq = arg;
@@ -164,6 +165,7 @@ taskq_thread(void *arg)
cv_broadcast(&tq->tq_wait_cv);
mutex_exit(&tq->tq_lock);
thread_exit();
+ return (NULL);
}
/*ARGSUSED*/
@@ -198,8 +200,10 @@ taskq_create(const char *name, int nthreads, pri_t pri,
tq->tq_maxalloc = maxalloc;
tq->tq_task.task_next = &tq->tq_task;
tq->tq_task.task_prev = &tq->tq_task;
- tq->tq_threadlist = kmem_alloc(tq->tq_nthreads * sizeof(kthread_t *),
- KM_SLEEP);
+ VERIFY3P((tq->tq_threadlist = kmem_alloc(tq->tq_nthreads *
+ sizeof(kthread_t *), KM_SLEEP)), !=, NULL);
+ VERIFY3P((tq->tq_idlist = kmem_alloc(tq->tq_nthreads *
+ sizeof(kt_did_t), KM_SLEEP)), !=, NULL);
if (flags & TASKQ_PREPOPULATE) {
mutex_enter(&tq->tq_lock);
@@ -210,7 +214,8 @@ taskq_create(const char *name, int nthreads, pri_t pri,
for (t = 0; t < tq->tq_nthreads; t++) {
VERIFY((tq->tq_threadlist[t] = thread_create(NULL, 0,
- taskq_thread, tq, TS_RUN, NULL, 0, 0)) != NULL);
+ taskq_thread, tq, THR_BOUND, NULL, 0, 0)) != NULL);
+ tq->tq_idlist[t] = tq->tq_threadlist[t]->t_tid;
}
return (tq);
@@ -219,6 +224,7 @@ taskq_create(const char *name, int nthreads, pri_t pri,
void
taskq_destroy(taskq_t *tq)
{
+ int t;
int nthreads = tq->tq_nthreads;
taskq_wait(tq);
@@ -239,7 +245,11 @@ taskq_destroy(taskq_t *tq)
mutex_exit(&tq->tq_lock);
+ for (t = 0; t < nthreads; t++)
+ VERIFY3S(thread_join(tq->tq_idlist[t], NULL, NULL), ==, 0);
+
kmem_free(tq->tq_threadlist, nthreads * sizeof(kthread_t *));
+ kmem_free(tq->tq_idlist, nthreads * sizeof(kt_did_t));
rw_destroy(&tq->tq_threadlock);
mutex_destroy(&tq->tq_lock);
@@ -250,7 +260,7 @@ taskq_destroy(taskq_t *tq)
}
int
-taskq_member(taskq_t *tq, kthread_t *t)
+taskq_member(taskq_t *tq, void *t)
{
int i;
@@ -258,7 +268,7 @@ taskq_member(taskq_t *tq, kthread_t *t)
return (1);
for (i = 0; i < tq->tq_nthreads; i++)
- if (tq->tq_threadlist[i] == t)
+ if (tq->tq_threadlist[i] == (kthread_t *)t)
return (1);
return (0);
diff --git a/module/zfs/txg.c b/module/zfs/txg.c
index afc4c8332..3d82990f5 100644
--- a/module/zfs/txg.c
+++ b/module/zfs/txg.c
@@ -446,6 +446,8 @@ txg_sync_thread(dsl_pool_t *dp)
rw_exit(&tx->tx_suspend);
cv_broadcast(&tx->tx_sync_done_cv);
}
+
+ thread_exit();
}
static void
@@ -490,6 +492,8 @@ txg_quiesce_thread(dsl_pool_t *dp)
cv_broadcast(&tx->tx_sync_more_cv);
cv_broadcast(&tx->tx_quiesce_done_cv);
}
+
+ thread_exit();
}
/*