summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cmd/ztest/ztest.c239
-rw-r--r--lib/libuutil/uu_misc.c7
-rw-r--r--lib/libzpool/include/sys/zfs_context.h123
-rw-r--r--lib/libzpool/kernel.c340
-rw-r--r--lib/libzpool/taskq.c22
5 files changed, 480 insertions, 251 deletions
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
index b5cee5b3b..930342d2e 100644
--- a/cmd/ztest/ztest.c
+++ b/cmd/ztest/ztest.c
@@ -57,6 +57,9 @@
* the transaction group number is less than the current, open txg.
* If you add a new test, please do this if applicable.
*
+ * (7) Threads are created with a reduced stack size, for sanity checking.
+ * Therefore, it's important not to allocate huge buffers on the stack.
+ *
* When run with no arguments, ztest runs for about five minutes and
* produces no output if successful. To get a little bit of information,
* specify -V. To get more information, specify -VV, and so on.
@@ -168,8 +171,8 @@ typedef enum {
typedef struct rll {
void *rll_writer;
int rll_readers;
- mutex_t rll_lock;
- cond_t rll_cv;
+ kmutex_t rll_lock;
+ kcondvar_t rll_cv;
} rll_t;
typedef struct rl {
@@ -206,7 +209,7 @@ typedef struct ztest_ds {
uint64_t zd_seq;
ztest_od_t *zd_od; /* debugging aid */
char zd_name[MAXNAMELEN];
- mutex_t zd_dirobj_lock;
+ kmutex_t zd_dirobj_lock;
rll_t zd_object_lock[ZTEST_OBJECT_LOCKS];
rll_t zd_range_lock[ZTEST_RANGE_LOCKS];
} ztest_ds_t;
@@ -299,8 +302,8 @@ ztest_info_t ztest_info[] = {
* The callbacks are ordered by txg number.
*/
typedef struct ztest_cb_list {
- mutex_t zcl_callbacks_lock;
- list_t zcl_callbacks;
+ kmutex_t zcl_callbacks_lock;
+ list_t zcl_callbacks;
} ztest_cb_list_t;
/*
@@ -319,8 +322,8 @@ typedef struct ztest_shared {
uint64_t zs_vdev_aux;
uint64_t zs_alloc;
uint64_t zs_space;
- mutex_t zs_vdev_lock;
- rwlock_t zs_name_lock;
+ kmutex_t zs_vdev_lock;
+ krwlock_t zs_name_lock;
ztest_info_t zs_info[ZTEST_FUNCS];
uint64_t zs_splits;
uint64_t zs_mirrors;
@@ -892,8 +895,8 @@ ztest_rll_init(rll_t *rll)
{
rll->rll_writer = NULL;
rll->rll_readers = 0;
- VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0);
- VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0);
+ mutex_init(&rll->rll_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&rll->rll_cv, NULL, CV_DEFAULT, NULL);
}
static void
@@ -901,32 +904,32 @@ ztest_rll_destroy(rll_t *rll)
{
ASSERT(rll->rll_writer == NULL);
ASSERT(rll->rll_readers == 0);
- VERIFY(_mutex_destroy(&rll->rll_lock) == 0);
- VERIFY(cond_destroy(&rll->rll_cv) == 0);
+ mutex_destroy(&rll->rll_lock);
+ cv_destroy(&rll->rll_cv);
}
static void
ztest_rll_lock(rll_t *rll, rl_type_t type)
{
- VERIFY(mutex_lock(&rll->rll_lock) == 0);
+ mutex_enter(&rll->rll_lock);
if (type == RL_READER) {
while (rll->rll_writer != NULL)
- (void) cond_wait(&rll->rll_cv, &rll->rll_lock);
+ (void) cv_wait(&rll->rll_cv, &rll->rll_lock);
rll->rll_readers++;
} else {
while (rll->rll_writer != NULL || rll->rll_readers)
- (void) cond_wait(&rll->rll_cv, &rll->rll_lock);
+ (void) cv_wait(&rll->rll_cv, &rll->rll_lock);
rll->rll_writer = curthread;
}
- VERIFY(mutex_unlock(&rll->rll_lock) == 0);
+ mutex_exit(&rll->rll_lock);
}
static void
ztest_rll_unlock(rll_t *rll)
{
- VERIFY(mutex_lock(&rll->rll_lock) == 0);
+ mutex_enter(&rll->rll_lock);
if (rll->rll_writer) {
ASSERT(rll->rll_readers == 0);
@@ -938,9 +941,9 @@ ztest_rll_unlock(rll_t *rll)
}
if (rll->rll_writer == NULL && rll->rll_readers == 0)
- VERIFY(cond_broadcast(&rll->rll_cv) == 0);
+ cv_broadcast(&rll->rll_cv);
- VERIFY(mutex_unlock(&rll->rll_lock) == 0);
+ mutex_exit(&rll->rll_lock);
}
static void
@@ -997,7 +1000,7 @@ ztest_zd_init(ztest_ds_t *zd, objset_t *os)
dmu_objset_name(os, zd->zd_name);
int l;
- VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0);
+ mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL);
for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
ztest_rll_init(&zd->zd_object_lock[l]);
@@ -1011,7 +1014,7 @@ ztest_zd_fini(ztest_ds_t *zd)
{
int l;
- VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0);
+ mutex_destroy(&zd->zd_dirobj_lock);
for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
ztest_rll_destroy(&zd->zd_object_lock[l]);
@@ -1754,7 +1757,7 @@ ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count)
int error;
int i;
- ASSERT(_mutex_held(&zd->zd_dirobj_lock));
+ ASSERT(mutex_held(&zd->zd_dirobj_lock));
for (i = 0; i < count; i++, od++) {
od->od_object = 0;
@@ -1795,7 +1798,7 @@ ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count)
int missing = 0;
int i;
- ASSERT(_mutex_held(&zd->zd_dirobj_lock));
+ ASSERT(mutex_held(&zd->zd_dirobj_lock));
for (i = 0; i < count; i++, od++) {
if (missing) {
@@ -1841,7 +1844,7 @@ ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count)
int error;
int i;
- ASSERT(_mutex_held(&zd->zd_dirobj_lock));
+ ASSERT(mutex_held(&zd->zd_dirobj_lock));
od += count - 1;
@@ -2057,13 +2060,13 @@ ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove)
int count = size / sizeof (*od);
int rv = 0;
- VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0);
+ mutex_enter(&zd->zd_dirobj_lock);
if ((ztest_lookup(zd, od, count) != 0 || remove) &&
(ztest_remove(zd, od, count) != 0 ||
ztest_create(zd, od, count) != 0))
rv = -1;
zd->zd_od = od;
- VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0);
+ mutex_exit(&zd->zd_dirobj_lock);
return (rv);
}
@@ -2119,7 +2122,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
* Attempt to create an existing pool. It shouldn't matter
* what's in the nvroot; we should fail with EEXIST.
*/
- (void) rw_rdlock(&zs->zs_name_lock);
+ (void) rw_enter(&zs->zs_name_lock, RW_READER);
nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1);
VERIFY3U(EEXIST, ==, spa_create(zs->zs_pool, nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);
@@ -2127,7 +2130,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
VERIFY3U(EBUSY, ==, spa_destroy(zs->zs_pool));
spa_close(spa, FTAG);
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
}
static vdev_t *
@@ -2181,7 +2184,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
nvlist_t *nvroot;
int error;
- VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ mutex_enter(&zs->zs_vdev_lock);
leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * zopt_raidz;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@@ -2207,9 +2210,9 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
* dmu_objset_destroy() to fail with EBUSY thus
* leaving the dataset in an inconsistent state.
*/
- VERIFY(rw_wrlock(&ztest_shared->zs_name_lock) == 0);
+ rw_enter(&ztest_shared->zs_name_lock, RW_WRITER);
error = spa_vdev_remove(spa, guid, B_FALSE);
- VERIFY(rw_unlock(&ztest_shared->zs_name_lock) == 0);
+ rw_exit(&ztest_shared->zs_name_lock);
if (error && error != EEXIST)
fatal(0, "spa_vdev_remove() = %d", error);
@@ -2231,7 +2234,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
fatal(0, "spa_vdev_add() = %d", error);
}
- VERIFY(mutex_unlock(&ztest_shared->zs_vdev_lock) == 0);
+ mutex_exit(&ztest_shared->zs_vdev_lock);
}
/*
@@ -2257,7 +2260,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
aux = ZPOOL_CONFIG_L2CACHE;
}
- VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ mutex_enter(&zs->zs_vdev_lock);
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@@ -2313,7 +2316,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
}
- VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ mutex_exit(&zs->zs_vdev_lock);
}
/*
@@ -2330,11 +2333,11 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id)
uint_t c, children, schildren = 0, lastlogid = 0;
int error = 0;
- VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ mutex_enter(&zs->zs_vdev_lock);
/* ensure we have a useable config; mirrors of raidz aren't supported */
if (zs->zs_mirrors < 3 || zopt_raidz > 1) {
- VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ mutex_exit(&zs->zs_vdev_lock);
return;
}
@@ -2393,9 +2396,9 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id)
spa_config_exit(spa, SCL_VDEV, FTAG);
- (void) rw_wrlock(&zs->zs_name_lock);
+ (void) rw_enter(&zs->zs_name_lock, RW_WRITER);
error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE);
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
nvlist_free(config);
@@ -2408,7 +2411,7 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id)
++zs->zs_splits;
--zs->zs_mirrors;
}
- VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ mutex_exit(&zs->zs_vdev_lock);
}
@@ -2437,7 +2440,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
int oldvd_is_log;
int error, expected_error;
- VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ mutex_enter(&zs->zs_vdev_lock);
leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@@ -2498,7 +2501,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
if (error != 0 && error != ENODEV && error != EBUSY &&
error != ENOTSUP)
fatal(0, "detach (%s) returned %d", oldpath, error);
- VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ mutex_exit(&zs->zs_vdev_lock);
return;
}
@@ -2591,7 +2594,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
(longlong_t)newsize, replacing, error, expected_error);
}
- VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ mutex_exit(&zs->zs_vdev_lock);
}
/*
@@ -2722,7 +2725,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
uint64_t top;
uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
- VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ mutex_enter(&zs->zs_vdev_lock);
spa_config_enter(spa, SCL_STATE, spa, RW_READER);
top = ztest_random_vdev_top(spa, B_TRUE);
@@ -2750,7 +2753,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
if (tvd->vdev_state != VDEV_STATE_HEALTHY ||
psize == 0 || psize >= 4 * zopt_vdev_size) {
spa_config_exit(spa, SCL_STATE, spa);
- VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ mutex_exit(&zs->zs_vdev_lock);
return;
}
ASSERT(psize > 0);
@@ -2775,7 +2778,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
"the vdev configuration changed.\n");
}
spa_config_exit(spa, SCL_STATE, spa);
- VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ mutex_exit(&zs->zs_vdev_lock);
return;
}
@@ -2809,7 +2812,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
"intervening vdev offline or remove.\n");
}
spa_config_exit(spa, SCL_STATE, spa);
- VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ mutex_exit(&zs->zs_vdev_lock);
return;
}
@@ -2837,7 +2840,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
}
spa_config_exit(spa, SCL_STATE, spa);
- VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ mutex_exit(&zs->zs_vdev_lock);
}
/*
@@ -2945,7 +2948,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
zilog_t *zilog;
int i;
- (void) rw_rdlock(&zs->zs_name_lock);
+ (void) rw_enter(&zs->zs_name_lock, RW_READER);
(void) snprintf(name, MAXNAMELEN, "%s/temp_%llu",
zs->zs_pool, (u_longlong_t)id);
@@ -2983,7 +2986,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
if (error) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
return;
}
fatal(0, "dmu_objset_create(%s) = %d", name, error);
@@ -3032,7 +3035,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
dmu_objset_disown(os, FTAG);
ztest_zd_fini(&zdtmp);
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
}
/*
@@ -3043,10 +3046,10 @@ ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id)
{
ztest_shared_t *zs = ztest_shared;
- (void) rw_rdlock(&zs->zs_name_lock);
+ (void) rw_enter(&zs->zs_name_lock, RW_READER);
(void) ztest_snapshot_destroy(zd->zd_name, id);
(void) ztest_snapshot_create(zd->zd_name, id);
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
}
/*
@@ -3107,7 +3110,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
char *osname = zd->zd_name;
int error;
- (void) rw_rdlock(&zs->zs_name_lock);
+ (void) rw_enter(&zs->zs_name_lock, RW_READER);
ztest_dsl_dataset_cleanup(osname, id);
@@ -3192,7 +3195,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
out:
ztest_dsl_dataset_cleanup(osname, id);
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
}
/*
@@ -4111,7 +4114,7 @@ ztest_commit_callback(void *arg, int error)
ASSERT(data->zcd_added);
ASSERT3U(data->zcd_txg, !=, 0);
- (void) mutex_lock(&zcl.zcl_callbacks_lock);
+ (void) mutex_enter(&zcl.zcl_callbacks_lock);
/* See if this cb was called more quickly */
if ((synced_txg - data->zcd_txg) < zc_min_txg_delay)
@@ -4120,7 +4123,7 @@ ztest_commit_callback(void *arg, int error)
/* Remove our callback from the list */
list_remove(&zcl.zcl_callbacks, data);
- (void) mutex_unlock(&zcl.zcl_callbacks_lock);
+ (void) mutex_exit(&zcl.zcl_callbacks_lock);
umem_free(data, sizeof (ztest_cb_data_t));
}
@@ -4135,6 +4138,7 @@ ztest_create_cb_data(objset_t *os, uint64_t txg)
cb_data->zcd_txg = txg;
cb_data->zcd_spa = dmu_objset_spa(os);
+ list_link_init(&cb_data->zcd_node);
return (cb_data);
}
@@ -4214,7 +4218,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx);
- (void) mutex_lock(&zcl.zcl_callbacks_lock);
+ (void) mutex_enter(&zcl.zcl_callbacks_lock);
/*
* Since commit callbacks don't have any ordering requirement and since
@@ -4263,7 +4267,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
zc_cb_counter += 3;
- (void) mutex_unlock(&zcl.zcl_callbacks_lock);
+ (void) mutex_exit(&zcl.zcl_callbacks_lock);
dmu_tx_commit(tx);
}
@@ -4281,13 +4285,13 @@ ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
ztest_shared_t *zs = ztest_shared;
int p;
- (void) rw_rdlock(&zs->zs_name_lock);
+ (void) rw_enter(&zs->zs_name_lock, RW_READER);
for (p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++)
(void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p],
ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2));
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
}
/* ARGSUSED */
@@ -4297,7 +4301,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
ztest_shared_t *zs = ztest_shared;
nvlist_t *props = NULL;
- (void) rw_rdlock(&zs->zs_name_lock);
+ (void) rw_enter(&zs->zs_name_lock, RW_READER);
(void) ztest_spa_prop_set_uint64(zs, ZPOOL_PROP_DEDUPDITTO,
ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN));
@@ -4309,7 +4313,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
nvlist_free(props);
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
}
/*
@@ -4327,14 +4331,14 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
char tag[100];
char osname[MAXNAMELEN];
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
+ (void) rw_enter(&ztest_shared->zs_name_lock, RW_READER);
dmu_objset_name(os, osname);
- (void) snprintf(snapname, 100, "sh1_%llu", id);
+ (void) snprintf(snapname, 100, "sh1_%llu", (u_longlong_t)id);
(void) snprintf(fullname, 100, "%s@%s", osname, snapname);
- (void) snprintf(clonename, 100, "%s/ch1_%llu", osname, id);
- (void) snprintf(tag, 100, "%tag_%llu", id);
+ (void) snprintf(clonename, 100, "%s/ch1_%llu",osname,(u_longlong_t)id);
+ (void) snprintf(tag, 100, "tag_%llu", (u_longlong_t)id);
/*
* Clean up from any previous run.
@@ -4424,7 +4428,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT);
out:
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+ (void) rw_exit(&ztest_shared->zs_name_lock);
}
/*
@@ -4452,11 +4456,11 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
uint64_t guid0 = 0;
boolean_t islog = B_FALSE;
- VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ mutex_enter(&zs->zs_vdev_lock);
maxfaults = MAXFAULTS();
leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz;
mirror_save = zs->zs_mirrors;
- VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ mutex_exit(&zs->zs_vdev_lock);
ASSERT(leaves >= 1);
@@ -4550,12 +4554,13 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
* leaving the dataset in an inconsistent state.
*/
if (islog)
- (void) rw_wrlock(&ztest_shared->zs_name_lock);
+ (void) rw_enter(&ztest_shared->zs_name_lock,
+ RW_WRITER);
VERIFY(vdev_offline(spa, guid0, flags) != EBUSY);
if (islog)
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+ (void) rw_exit(&ztest_shared->zs_name_lock);
} else {
(void) vdev_online(spa, guid0, 0, NULL);
}
@@ -4582,9 +4587,9 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
if (offset >= fsize)
continue;
- VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ mutex_enter(&zs->zs_vdev_lock);
if (mirror_save != zs->zs_mirrors) {
- VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ mutex_exit(&zs->zs_vdev_lock);
(void) close(fd);
return;
}
@@ -4593,7 +4598,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
fatal(1, "can't inject bad word at 0x%llx in %s",
offset, pathrand);
- VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ mutex_exit(&zs->zs_vdev_lock);
if (zopt_verbose >= 7)
(void) printf("injected bad word into %s,"
@@ -4634,13 +4639,13 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
* Take the name lock as writer to prevent anyone else from changing
* the pool and dataset properies we need to maintain during this test.
*/
- (void) rw_wrlock(&zs->zs_name_lock);
+ (void) rw_enter(&zs->zs_name_lock, RW_WRITER);
if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum,
B_FALSE) != 0 ||
ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1,
B_FALSE) != 0) {
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
return;
}
@@ -4654,7 +4659,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
dmu_tx_hold_write(tx, object, 0, copies * blocksize);
txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
if (txg == 0) {
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
return;
}
@@ -4698,7 +4703,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
zio_buf_free(buf, psize);
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
}
/*
@@ -4727,7 +4732,7 @@ ztest_spa_rename(ztest_ds_t *zd, uint64_t id)
char *oldname, *newname;
spa_t *spa;
- (void) rw_wrlock(&zs->zs_name_lock);
+ (void) rw_enter(&zs->zs_name_lock, RW_WRITER);
oldname = zs->zs_pool;
newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL);
@@ -4767,7 +4772,7 @@ ztest_spa_rename(ztest_ds_t *zd, uint64_t id)
umem_free(newname, strlen(newname) + 1);
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
}
/*
@@ -4940,23 +4945,18 @@ ztest_resume_thread(void *arg)
ztest_resume(spa);
(void) poll(NULL, 0, 100);
}
- return (NULL);
-}
-static void *
-ztest_deadman_thread(void *arg)
-{
- ztest_shared_t *zs = arg;
- int grace = 300;
- hrtime_t delta;
+ thread_exit();
- delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace;
-
- (void) poll(NULL, 0, (int)(1000 * delta));
+ return (NULL);
+}
- fatal(0, "failed to complete within %d seconds of deadline", grace);
+#define GRACE 300
- return (NULL);
+static void
+ztest_deadman_alarm(int sig)
+{
+ fatal(0, "failed to complete within %d seconds of deadline", GRACE);
}
static void
@@ -5017,6 +5017,8 @@ ztest_thread(void *arg)
ztest_execute(zi, id);
}
+ thread_exit();
+
return (NULL);
}
@@ -5082,18 +5084,18 @@ ztest_dataset_open(ztest_shared_t *zs, int d)
ztest_dataset_name(name, zs->zs_pool, d);
- (void) rw_rdlock(&zs->zs_name_lock);
+ (void) rw_enter(&zs->zs_name_lock, RW_READER);
error = ztest_dataset_create(name);
if (error == ENOSPC) {
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
ztest_record_enospc(FTAG);
return (error);
}
ASSERT(error == 0 || error == EEXIST);
VERIFY3U(dmu_objset_hold(name, zd, &os), ==, 0);
- (void) rw_unlock(&zs->zs_name_lock);
+ (void) rw_exit(&zs->zs_name_lock);
ztest_zd_init(zd, os);
@@ -5144,9 +5146,10 @@ ztest_dataset_close(ztest_shared_t *zs, int d)
static void
ztest_run(ztest_shared_t *zs)
{
- thread_t *tid;
+ kt_did_t *tid;
spa_t *spa;
- thread_t resume_tid;
+ kthread_t *resume_thread;
+ uint64_t object;
int error;
int t, d;
@@ -5155,8 +5158,8 @@ ztest_run(ztest_shared_t *zs)
/*
* Initialize parent/child shared state.
*/
- VERIFY(_mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL) == 0);
- VERIFY(rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL) == 0);
+ mutex_init(&zs->zs_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
+ rw_init(&zs->zs_name_lock, NULL, RW_DEFAULT, NULL);
zs->zs_thread_start = gethrtime();
zs->zs_thread_stop = zs->zs_thread_start + zopt_passtime * NANOSEC;
@@ -5165,7 +5168,7 @@ ztest_run(ztest_shared_t *zs)
if (ztest_random(100) < zopt_killrate)
zs->zs_thread_kill -= ztest_random(zopt_passtime * NANOSEC);
- (void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL);
+ mutex_init(&zcl.zcl_callbacks_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t),
offsetof(ztest_cb_data_t, zcd_node));
@@ -5192,14 +5195,14 @@ ztest_run(ztest_shared_t *zs)
/*
* Create a thread to periodically resume suspended I/O.
*/
- VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND,
- &resume_tid) == 0);
+ VERIFY3P((resume_thread = thread_create(NULL, 0, ztest_resume_thread,
+ spa, TS_RUN, NULL, 0, 0)), !=, NULL);
/*
- * Create a deadman thread to abort() if we hang.
+ * Set a deadman alarm to abort() if we hang.
*/
- VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND,
- NULL) == 0);
+ signal(SIGALRM, ztest_deadman_alarm);
+ alarm((zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + GRACE);
/*
* Verify that we can safely inquire about about any object,
@@ -5225,7 +5228,7 @@ ztest_run(ztest_shared_t *zs)
}
zs->zs_enospc_count = 0;
- tid = umem_zalloc(zopt_threads * sizeof (thread_t), UMEM_NOFAIL);
+ tid = umem_zalloc(zopt_threads * sizeof (kt_did_t), UMEM_NOFAIL);
if (zopt_verbose >= 4)
(void) printf("starting main threads...\n");
@@ -5234,10 +5237,14 @@ ztest_run(ztest_shared_t *zs)
* Kick off all the tests that run in parallel.
*/
for (t = 0; t < zopt_threads; t++) {
+ kthread_t *thread;
+
if (t < zopt_datasets && ztest_dataset_open(zs, t) != 0)
return;
- VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t,
- THR_BOUND, &tid[t]) == 0);
+
+ VERIFY3P(thread = thread_create(NULL, 0, ztest_thread,
+ (void *)(uintptr_t)t, TS_RUN, NULL, 0, 0), !=, NULL);
+ tid[t] = thread->t_tid;
}
/*
@@ -5245,7 +5252,7 @@ ztest_run(ztest_shared_t *zs)
* so we don't close datasets while threads are still using them.
*/
for (t = zopt_threads - 1; t >= 0; t--) {
- VERIFY(thr_join(tid[t], NULL, NULL) == 0);
+ thread_join(tid[t]);
if (t < zopt_datasets)
ztest_dataset_close(zs, t);
}
@@ -5255,18 +5262,18 @@ ztest_run(ztest_shared_t *zs)
zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
- umem_free(tid, zopt_threads * sizeof (thread_t));
+ umem_free(tid, zopt_threads * sizeof (kt_did_t));
/* Kill the resume thread */
ztest_exiting = B_TRUE;
- VERIFY(thr_join(resume_tid, NULL, NULL) == 0);
+ thread_join(resume_thread->t_tid);
ztest_resume(spa);
/*
* Right before closing the pool, kick off a bunch of async I/O;
* spa_close() should wait for it to complete.
*/
- for (uint64_t object = 1; object < 50; object++)
+ for (object = 1; object < 50; object++)
dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20);
/* Verify that at least one commit cb was called in a timely fashion */
@@ -5426,8 +5433,8 @@ ztest_init(ztest_shared_t *zs)
spa_t *spa;
nvlist_t *nvroot, *props;
- VERIFY(_mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL) == 0);
- VERIFY(rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL) == 0);
+ mutex_init(&zs->zs_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
+ rw_init(&zs->zs_name_lock, NULL, RW_DEFAULT, NULL);
kernel_init(FREAD | FWRITE);
@@ -5456,8 +5463,8 @@ ztest_init(ztest_shared_t *zs)
ztest_run_zdb(zs->zs_pool);
- (void) rwlock_destroy(&zs->zs_name_lock);
- (void) _mutex_destroy(&zs->zs_vdev_lock);
+ (void) rw_destroy(&zs->zs_name_lock);
+ (void) mutex_destroy(&zs->zs_vdev_lock);
}
int
diff --git a/lib/libuutil/uu_misc.c b/lib/libuutil/uu_misc.c
index 578bf3294..1b843effe 100644
--- a/lib/libuutil/uu_misc.c
+++ b/lib/libuutil/uu_misc.c
@@ -34,7 +34,6 @@
#include <stdlib.h>
#include <string.h>
#include <sys/debug.h>
-#include <thread.h>
#include <unistd.h>
#include <ctype.h>
@@ -68,11 +67,12 @@ static va_list uu_panic_args;
static pthread_t uu_panic_thread;
static uint32_t _uu_main_error;
+static __thread int _uu_main_thread = 0;
void
uu_set_error(uint_t code)
{
- if (thr_main() != 0) {
+ if (_uu_main_thread) {
_uu_main_error = code;
return;
}
@@ -101,7 +101,7 @@ uu_set_error(uint_t code)
uint32_t
uu_error(void)
{
- if (thr_main() != 0)
+ if (_uu_main_thread)
return (_uu_main_error);
if (uu_error_key_setup < 0) /* can't happen? */
@@ -259,6 +259,7 @@ uu_init(void) __attribute__((constructor));
static void
uu_init(void)
{
+ _uu_main_thread = 1;
(void) pthread_atfork(uu_lockup, uu_release, uu_release_child);
}
diff --git a/lib/libzpool/include/sys/zfs_context.h b/lib/libzpool/include/sys/zfs_context.h
index fc543559b..55b117c21 100644
--- a/lib/libzpool/include/sys/zfs_context.h
+++ b/lib/libzpool/include/sys/zfs_context.h
@@ -49,8 +49,7 @@ extern "C" {
#include <errno.h>
#include <string.h>
#include <strings.h>
-#include <synch.h>
-#include <thread.h>
+#include <pthread.h>
#include <assert.h>
#include <alloca.h>
#include <umem.h>
@@ -90,6 +89,8 @@ extern "C" {
#define CE_PANIC 3 /* panic */
#define CE_IGNORE 4 /* print nothing */
+extern int aok;
+
/*
* ZFS debugging
*/
@@ -195,27 +196,55 @@ _NOTE(CONSTCOND) } while (0)
/*
* Threads
*/
-#define curthread ((void *)(uintptr_t)thr_self())
-
-typedef struct kthread kthread_t;
-
-#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
- zk_thread_create(func, arg)
-#define thread_exit() thr_exit(NULL)
-#define thread_join(t) panic("libzpool cannot join threads")
+#define TS_MAGIC 0x72f158ab4261e538ull
+#define TS_RUN 0x00000002
+#ifdef __linux__
+#define STACK_SIZE 8192 /* Linux x86 and amd64 */
+#else
+#define STACK_SIZE 24576 /* Solaris */
+#endif
-#define newproc(f, a, cid, pri, ctp, pid) (ENOSYS)
+#ifdef NPTL_GUARD_WITHIN_STACK
+#define EXTRA_GUARD_BYTES PAGESIZE
+#else
+#define EXTRA_GUARD_BYTES 0
+#endif
/* in libzpool, p0 exists only to have its address taken */
-struct proc {
+typedef struct proc {
uintptr_t this_is_never_used_dont_dereference_it;
-};
+} proc_t;
extern struct proc p0;
-#define PS_NONE -1
+typedef void (*thread_func_t)(void *);
+typedef void (*thread_func_arg_t)(void *);
+typedef pthread_t kt_did_t;
-extern kthread_t *zk_thread_create(void (*func)(), void *arg);
+typedef struct kthread {
+ kt_did_t t_tid;
+ thread_func_t t_func;
+ void * t_arg;
+} kthread_t;
+
+#define tsd_get(key) pthread_getspecific(key)
+#define tsd_set(key, val) pthread_setspecific(key, val)
+#define curthread zk_thread_current()
+#define thread_exit zk_thread_exit
+#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
+ zk_thread_create(stk, stksize, (thread_func_t)func, arg, \
+ len, NULL, state, pri)
+#define thread_join(t) zk_thread_join(t)
+#define newproc(f,a,cid,pri,ctp,pid) (ENOSYS)
+
+extern kthread_t *zk_thread_current(void);
+extern void zk_thread_exit(void);
+extern kthread_t *zk_thread_create(caddr_t stk, size_t stksize,
+ thread_func_t func, void *arg, size_t len,
+ proc_t *pp, int state, pri_t pri);
+extern void zk_thread_join(kt_did_t tid);
+
+#define PS_NONE -1
#define issig(why) (FALSE)
#define ISSIG(thr, why) (FALSE)
@@ -223,55 +252,52 @@ extern kthread_t *zk_thread_create(void (*func)(), void *arg);
/*
* Mutexes
*/
+#define MTX_MAGIC 0x9522f51362a6e326ull
+#define MTX_INIT ((void *)NULL)
+#define MTX_DEST ((void *)-1UL)
+
typedef struct kmutex {
void *m_owner;
- boolean_t initialized;
- mutex_t m_lock;
+ uint64_t m_magic;
+ pthread_mutex_t m_lock;
} kmutex_t;
-#define MUTEX_DEFAULT USYNC_THREAD
-#undef MUTEX_HELD
-#undef MUTEX_NOT_HELD
-#define MUTEX_HELD(m) _mutex_held(&(m)->m_lock)
+#define MUTEX_DEFAULT 0
+#define MUTEX_HELD(m) ((m)->m_owner == curthread)
#define MUTEX_NOT_HELD(m) (!MUTEX_HELD(m))
-/*
- * Argh -- we have to get cheesy here because the kernel and userland
- * have different signatures for the same routine.
- */
-extern int _mutex_init(mutex_t *mp, int type, void *arg);
-extern int _mutex_destroy(mutex_t *mp);
-
-#define mutex_init(mp, b, c, d) zmutex_init((kmutex_t *)(mp))
-#define mutex_destroy(mp) zmutex_destroy((kmutex_t *)(mp))
-
-extern void zmutex_init(kmutex_t *mp);
-extern void zmutex_destroy(kmutex_t *mp);
+extern void mutex_init(kmutex_t *mp, char *name, int type, void *cookie);
+extern void mutex_destroy(kmutex_t *mp);
extern void mutex_enter(kmutex_t *mp);
extern void mutex_exit(kmutex_t *mp);
extern int mutex_tryenter(kmutex_t *mp);
extern void *mutex_owner(kmutex_t *mp);
+extern int mutex_held(kmutex_t *mp);
/*
* RW locks
*/
+#define RW_MAGIC 0x4d31fb123648e78aull
+#define RW_INIT ((void *)NULL)
+#define RW_DEST ((void *)-1UL)
+
typedef struct krwlock {
- void *rw_owner;
- boolean_t initialized;
- rwlock_t rw_lock;
+ void *rw_owner;
+ void *rw_wr_owner;
+ uint64_t rw_magic;
+ pthread_rwlock_t rw_lock;
+ uint_t rw_readers;
} krwlock_t;
typedef int krw_t;
#define RW_READER 0
#define RW_WRITER 1
-#define RW_DEFAULT USYNC_THREAD
+#define RW_DEFAULT RW_READER
-#undef RW_READ_HELD
-#define RW_READ_HELD(x) _rw_read_held(&(x)->rw_lock)
-
-#undef RW_WRITE_HELD
-#define RW_WRITE_HELD(x) _rw_write_held(&(x)->rw_lock)
+#define RW_READ_HELD(x) ((x)->rw_readers > 0)
+#define RW_WRITE_HELD(x) ((x)->rw_wr_owner == curthread)
+#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x))
extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg);
extern void rw_destroy(krwlock_t *rwlp);
@@ -289,9 +315,14 @@ extern gid_t *crgetgroups(cred_t *cr);
/*
* Condition variables
*/
-typedef cond_t kcondvar_t;
+#define CV_MAGIC 0xd31ea9a83b1b30c4ull
+
+typedef struct kcondvar {
+ uint64_t cv_magic;
+ pthread_cond_t cv;
+} kcondvar_t;
-#define CV_DEFAULT USYNC_THREAD
+#define CV_DEFAULT 0
extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg);
extern void cv_destroy(kcondvar_t *cv);
@@ -367,7 +398,7 @@ extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
extern void taskq_destroy(taskq_t *);
extern void taskq_wait(taskq_t *);
-extern int taskq_member(taskq_t *, void *);
+extern int taskq_member(taskq_t *, kthread_t *);
extern void system_taskq_init(void);
extern void system_taskq_fini(void);
@@ -496,7 +527,7 @@ extern void delay(clock_t ticks);
#define minclsyspri 60
#define maxclsyspri 99
-#define CPU_SEQID (thr_self() & (max_ncpus - 1))
+#define CPU_SEQID (pthread_self() & (max_ncpus - 1))
#define kcred NULL
#define CRED() NULL
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
index b64f03da4..4bd08cdd4 100644
--- a/lib/libzpool/kernel.c
+++ b/lib/libzpool/kernel.c
@@ -29,6 +29,7 @@
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
+#include <sys/signal.h>
#include <sys/spa.h>
#include <sys/stat.h>
#include <sys/processor.h>
@@ -57,16 +58,156 @@ struct proc p0;
* threads
* =========================================================================
*/
-/*ARGSUSED*/
+
+pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER;
+pthread_key_t kthread_key;
+int kthread_nr = 0;
+
+static void
+thread_init(void)
+{
+ kthread_t *kt;
+
+ VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0);
+
+ /* Create entry for primary kthread */
+ kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
+ kt->t_tid = pthread_self();
+ kt->t_func = NULL;
+
+ VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
+
+ /* Only the main thread should be running at the moment */
+ ASSERT3S(kthread_nr, ==, 0);
+ kthread_nr = 1;
+}
+
+static void
+thread_fini(void)
+{
+ kthread_t *kt = curthread;
+
+ ASSERT(pthread_equal(kt->t_tid, pthread_self()));
+ ASSERT3P(kt->t_func, ==, NULL);
+
+ umem_free(kt, sizeof(kthread_t));
+
+ /* Wait for all threads to exit via thread_exit() */
+ VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
+
+ kthread_nr--; /* Main thread is exiting */
+
+ while (kthread_nr > 0)
+ VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==,
+ 0);
+
+ ASSERT3S(kthread_nr, ==, 0);
+ VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
+
+ VERIFY3S(pthread_key_delete(kthread_key), ==, 0);
+}
+
kthread_t *
-zk_thread_create(void (*func)(), void *arg)
+zk_thread_current(void)
+{
+ kthread_t *kt = pthread_getspecific(kthread_key);
+
+ ASSERT3P(kt, !=, NULL);
+
+ return kt;
+}
+
+void *
+zk_thread_helper(void *arg)
{
- thread_t tid;
+ kthread_t *kt = (kthread_t *) arg;
+
+ VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
- VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
- &tid) == 0);
+ VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
+ kthread_nr++;
+ VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
- return ((void *)(uintptr_t)tid);
+ kt->t_tid = pthread_self();
+ ((thread_func_arg_t) kt->t_func)(kt->t_arg);
+
+ /* Unreachable, thread must exit with thread_exit() */
+ abort();
+
+ return NULL;
+}
+
+kthread_t *
+zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
+ size_t len, proc_t *pp, int state, pri_t pri)
+{
+ kthread_t *kt;
+ pthread_attr_t attr;
+ size_t stack;
+
+ ASSERT3S(state & ~TS_RUN, ==, 0);
+
+ kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
+ kt->t_func = func;
+ kt->t_arg = arg;
+
+ /*
+ * The Solaris kernel stack size is 24k for x86/x86_64.
+ * The Linux kernel stack size is 8k for x86/x86_64.
+ *
+ * We reduce the default stack size in userspace, to ensure
+ * we observe stack overruns in user space as well as in
+ * kernel space. PTHREAD_STACK_MIN is the minimum stack
+ * required for a NULL procedure in user space and is added
+ * in to the stack requirements.
+ *
+ * Some buggy NPTL threading implementations include the
+ * guard area within the stack size allocations. In
+ * this case we allocate an extra page to account for the
+ * guard area since we only have two pages of usable stack
+ * on Linux.
+ */
+
+ stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) +
+ EXTRA_GUARD_BYTES;
+
+ VERIFY3S(pthread_attr_init(&attr), ==, 0);
+ VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0);
+ VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0);
+
+ VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt),
+ ==, 0);
+
+ VERIFY3S(pthread_attr_destroy(&attr), ==, 0);
+
+ return kt;
+}
+
+void
+zk_thread_exit(void)
+{
+ kthread_t *kt = curthread;
+
+ ASSERT(pthread_equal(kt->t_tid, pthread_self()));
+
+ umem_free(kt, sizeof(kthread_t));
+
+ pthread_mutex_lock(&kthread_lock);
+ kthread_nr--;
+ pthread_mutex_unlock(&kthread_lock);
+
+ pthread_cond_broadcast(&kthread_cond);
+ pthread_exit((void *)TS_MAGIC);
+}
+
+void
+zk_thread_join(kt_did_t tid)
+{
+ void *ret;
+
+ pthread_join((pthread_t)tid, &ret);
+ VERIFY3P(ret, ==, (void *)TS_MAGIC);
}
/*
@@ -97,42 +238,45 @@ kstat_delete(kstat_t *ksp)
* mutexes
* =========================================================================
*/
+
void
-zmutex_init(kmutex_t *mp)
+mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
{
- mp->m_owner = NULL;
- mp->initialized = B_TRUE;
- (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
+ ASSERT3S(type, ==, MUTEX_DEFAULT);
+ ASSERT3P(cookie, ==, NULL);
+ mp->m_owner = MTX_INIT;
+ mp->m_magic = MTX_MAGIC;
+ VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0);
}
void
-zmutex_destroy(kmutex_t *mp)
+mutex_destroy(kmutex_t *mp)
{
- ASSERT(mp->initialized == B_TRUE);
- ASSERT(mp->m_owner == NULL);
- (void) _mutex_destroy(&(mp)->m_lock);
- mp->m_owner = (void *)-1UL;
- mp->initialized = B_FALSE;
+ ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
+ ASSERT3P(mp->m_owner, ==, MTX_INIT);
+ VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0);
+ mp->m_owner = MTX_DEST;
+ mp->m_magic = 0;
}
void
mutex_enter(kmutex_t *mp)
{
- ASSERT(mp->initialized == B_TRUE);
- ASSERT(mp->m_owner != (void *)-1UL);
- ASSERT(mp->m_owner != curthread);
- VERIFY(mutex_lock(&mp->m_lock) == 0);
- ASSERT(mp->m_owner == NULL);
+ ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
+ ASSERT3P(mp->m_owner, !=, MTX_DEST);
+ ASSERT3P(mp->m_owner, !=, curthread);
+ VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0);
+ ASSERT3P(mp->m_owner, ==, MTX_INIT);
mp->m_owner = curthread;
}
int
mutex_tryenter(kmutex_t *mp)
{
- ASSERT(mp->initialized == B_TRUE);
- ASSERT(mp->m_owner != (void *)-1UL);
- if (0 == mutex_trylock(&mp->m_lock)) {
- ASSERT(mp->m_owner == NULL);
+ ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
+ ASSERT3P(mp->m_owner, !=, MTX_DEST);
+ if (0 == pthread_mutex_trylock(&mp->m_lock)) {
+ ASSERT3P(mp->m_owner, ==, MTX_INIT);
mp->m_owner = curthread;
return (1);
} else {
@@ -143,53 +287,71 @@ mutex_tryenter(kmutex_t *mp)
void
mutex_exit(kmutex_t *mp)
{
- ASSERT(mp->initialized == B_TRUE);
- ASSERT(mutex_owner(mp) == curthread);
- mp->m_owner = NULL;
- VERIFY(mutex_unlock(&mp->m_lock) == 0);
+ ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
+ ASSERT3P(mutex_owner(mp), ==, curthread);
+ mp->m_owner = MTX_INIT;
+ VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0);
}
void *
mutex_owner(kmutex_t *mp)
{
- ASSERT(mp->initialized == B_TRUE);
+ ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
return (mp->m_owner);
}
+int
+mutex_held(kmutex_t *mp)
+{
+ return (mp->m_owner == curthread);
+}
+
/*
* =========================================================================
* rwlocks
* =========================================================================
*/
-/*ARGSUSED*/
+
void
rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
{
- rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
- rwlp->rw_owner = NULL;
- rwlp->initialized = B_TRUE;
+ ASSERT3S(type, ==, RW_DEFAULT);
+ ASSERT3P(arg, ==, NULL);
+ VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0);
+ rwlp->rw_owner = RW_INIT;
+ rwlp->rw_wr_owner = RW_INIT;
+ rwlp->rw_readers = 0;
+ rwlp->rw_magic = RW_MAGIC;
}
void
rw_destroy(krwlock_t *rwlp)
{
- rwlock_destroy(&rwlp->rw_lock);
- rwlp->rw_owner = (void *)-1UL;
- rwlp->initialized = B_FALSE;
+ ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
+
+ VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0);
+ rwlp->rw_magic = 0;
}
void
rw_enter(krwlock_t *rwlp, krw_t rw)
{
- ASSERT(!RW_LOCK_HELD(rwlp));
- ASSERT(rwlp->initialized == B_TRUE);
- ASSERT(rwlp->rw_owner != (void *)-1UL);
- ASSERT(rwlp->rw_owner != curthread);
+ ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
+ ASSERT3P(rwlp->rw_owner, !=, curthread);
+ ASSERT3P(rwlp->rw_wr_owner, !=, curthread);
- if (rw == RW_READER)
- VERIFY(rw_rdlock(&rwlp->rw_lock) == 0);
- else
- VERIFY(rw_wrlock(&rwlp->rw_lock) == 0);
+ if (rw == RW_READER) {
+ VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0);
+ ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
+
+ atomic_inc_uint(&rwlp->rw_readers);
+ } else {
+ VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0);
+ ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
+ ASSERT3U(rwlp->rw_readers, ==, 0);
+
+ rwlp->rw_wr_owner = curthread;
+ }
rwlp->rw_owner = curthread;
}
@@ -197,11 +359,16 @@ rw_enter(krwlock_t *rwlp, krw_t rw)
void
rw_exit(krwlock_t *rwlp)
{
- ASSERT(rwlp->initialized == B_TRUE);
- ASSERT(rwlp->rw_owner != (void *)-1UL);
+ ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
+ ASSERT(RW_LOCK_HELD(rwlp));
+
+ if (RW_READ_HELD(rwlp))
+ atomic_dec_uint(&rwlp->rw_readers);
+ else
+ rwlp->rw_wr_owner = RW_INIT;
- rwlp->rw_owner = NULL;
- VERIFY(rw_unlock(&rwlp->rw_lock) == 0);
+ rwlp->rw_owner = RW_INIT;
+ VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0);
}
int
@@ -209,28 +376,36 @@ rw_tryenter(krwlock_t *rwlp, krw_t rw)
{
int rv;
- ASSERT(rwlp->initialized == B_TRUE);
- ASSERT(rwlp->rw_owner != (void *)-1UL);
+ ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
if (rw == RW_READER)
- rv = rw_tryrdlock(&rwlp->rw_lock);
+ rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
else
- rv = rw_trywrlock(&rwlp->rw_lock);
+ rv = pthread_rwlock_trywrlock(&rwlp->rw_lock);
if (rv == 0) {
+ ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
+
+ if (rw == RW_READER)
+ atomic_inc_uint(&rwlp->rw_readers);
+ else {
+ ASSERT3U(rwlp->rw_readers, ==, 0);
+ rwlp->rw_wr_owner = curthread;
+ }
+
rwlp->rw_owner = curthread;
return (1);
}
+ VERIFY3S(rv, ==, EBUSY);
+
return (0);
}
-/*ARGSUSED*/
int
rw_tryupgrade(krwlock_t *rwlp)
{
- ASSERT(rwlp->initialized == B_TRUE);
- ASSERT(rwlp->rw_owner != (void *)-1UL);
+ ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
return (0);
}
@@ -240,26 +415,32 @@ rw_tryupgrade(krwlock_t *rwlp)
* condition variables
* =========================================================================
*/
-/*ARGSUSED*/
+
void
cv_init(kcondvar_t *cv, char *name, int type, void *arg)
{
- VERIFY(cond_init(cv, type, NULL) == 0);
+ ASSERT3S(type, ==, CV_DEFAULT);
+ cv->cv_magic = CV_MAGIC;
+ VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0);
}
void
cv_destroy(kcondvar_t *cv)
{
- VERIFY(cond_destroy(cv) == 0);
+ ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
+ VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0);
+ cv->cv_magic = 0;
}
void
cv_wait(kcondvar_t *cv, kmutex_t *mp)
{
- ASSERT(mutex_owner(mp) == curthread);
- mp->m_owner = NULL;
- int ret = cond_wait(cv, &mp->m_lock);
- VERIFY(ret == 0 || ret == EINTR);
+ ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
+ ASSERT3P(mutex_owner(mp), ==, curthread);
+ mp->m_owner = MTX_INIT;
+ int ret = pthread_cond_wait(&cv->cv, &mp->m_lock);
+ if (ret != 0)
+ VERIFY3S(ret, ==, EINTR);
mp->m_owner = curthread;
}
@@ -267,29 +448,38 @@ clock_t
cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
{
int error;
+ struct timeval tv;
timestruc_t ts;
clock_t delta;
+ ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
+
top:
delta = abstime - ddi_get_lbolt();
if (delta <= 0)
return (-1);
- ts.tv_sec = delta / hz;
- ts.tv_nsec = (delta % hz) * (NANOSEC / hz);
+ VERIFY(gettimeofday(&tv, NULL) == 0);
+
+ ts.tv_sec = tv.tv_sec + delta / hz;
+ ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
+ if (ts.tv_nsec >= NANOSEC) {
+ ts.tv_sec++;
+ ts.tv_nsec -= NANOSEC;
+ }
- ASSERT(mutex_owner(mp) == curthread);
- mp->m_owner = NULL;
- error = cond_reltimedwait(cv, &mp->m_lock, &ts);
+ ASSERT3P(mutex_owner(mp), ==, curthread);
+ mp->m_owner = MTX_INIT;
+ error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts);
mp->m_owner = curthread;
- if (error == ETIME)
+ if (error == ETIMEDOUT)
return (-1);
if (error == EINTR)
goto top;
- ASSERT(error == 0);
+ VERIFY3S(error, ==, 0);
return (1);
}
@@ -297,13 +487,15 @@ top:
void
cv_signal(kcondvar_t *cv)
{
- VERIFY(cond_signal(cv) == 0);
+ ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
+ VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0);
}
void
cv_broadcast(kcondvar_t *cv)
{
- VERIFY(cond_broadcast(cv) == 0);
+ ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
+ VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0);
}
/*
@@ -571,7 +763,7 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
if (dprintf_find_string("pid"))
(void) printf("%d ", getpid());
if (dprintf_find_string("tid"))
- (void) printf("%u ", thr_self());
+ (void) printf("%u ", (uint_t) pthread_self());
if (dprintf_find_string("cpu"))
(void) printf("%u ", getcpuid());
if (dprintf_find_string("time"))
@@ -824,6 +1016,7 @@ kernel_init(int mode)
VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
+ thread_init();
system_taskq_init();
spa_init(mode);
@@ -835,6 +1028,7 @@ kernel_fini(void)
spa_fini();
system_taskq_fini();
+ thread_fini();
close(random_fd);
close(urandom_fd);
diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c
index 8db5d11c1..36c0ec7df 100644
--- a/lib/libzpool/taskq.c
+++ b/lib/libzpool/taskq.c
@@ -42,7 +42,7 @@ struct taskq {
krwlock_t tq_threadlock;
kcondvar_t tq_dispatch_cv;
kcondvar_t tq_wait_cv;
- thread_t *tq_threadlist;
+ kthread_t **tq_threadlist;
int tq_flags;
int tq_active;
int tq_nthreads;
@@ -154,7 +154,7 @@ taskq_wait(taskq_t *tq)
mutex_exit(&tq->tq_lock);
}
-static void *
+static void
taskq_thread(void *arg)
{
taskq_t *tq = arg;
@@ -183,7 +183,7 @@ taskq_thread(void *arg)
tq->tq_nthreads--;
cv_broadcast(&tq->tq_wait_cv);
mutex_exit(&tq->tq_lock);
- return (NULL);
+ thread_exit();
}
/*ARGSUSED*/
@@ -219,7 +219,7 @@ taskq_create(const char *name, int nthreads, pri_t pri,
tq->tq_maxalloc = maxalloc;
tq->tq_task.task_next = &tq->tq_task;
tq->tq_task.task_prev = &tq->tq_task;
- tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP);
+ tq->tq_threadlist = kmem_alloc(nthreads*sizeof(kthread_t *), KM_SLEEP);
if (flags & TASKQ_PREPOPULATE) {
mutex_enter(&tq->tq_lock);
@@ -229,8 +229,8 @@ taskq_create(const char *name, int nthreads, pri_t pri,
}
for (t = 0; t < nthreads; t++)
- (void) thr_create(0, 0, taskq_thread,
- tq, THR_BOUND, &tq->tq_threadlist[t]);
+ VERIFY((tq->tq_threadlist[t] = thread_create(NULL, 0,
+ taskq_thread, tq, TS_RUN, NULL, 0, 0)) != NULL);
return (tq);
}
@@ -238,7 +238,6 @@ taskq_create(const char *name, int nthreads, pri_t pri,
void
taskq_destroy(taskq_t *tq)
{
- int t;
int nthreads = tq->tq_nthreads;
taskq_wait(tq);
@@ -259,10 +258,7 @@ taskq_destroy(taskq_t *tq)
mutex_exit(&tq->tq_lock);
- for (t = 0; t < nthreads; t++)
- (void) thr_join(tq->tq_threadlist[t], NULL, NULL);
-
- kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t));
+ kmem_free(tq->tq_threadlist, nthreads * sizeof (kthread_t *));
rw_destroy(&tq->tq_threadlock);
mutex_destroy(&tq->tq_lock);
@@ -274,7 +270,7 @@ taskq_destroy(taskq_t *tq)
}
int
-taskq_member(taskq_t *tq, void *t)
+taskq_member(taskq_t *tq, kthread_t *t)
{
int i;
@@ -282,7 +278,7 @@ taskq_member(taskq_t *tq, void *t)
return (1);
for (i = 0; i < tq->tq_nthreads; i++)
- if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t)
+ if (tq->tq_threadlist[i] == t)
return (1);
return (0);