diff options
Diffstat (limited to 'lib/libzpool')
-rw-r--r-- | lib/libzpool/kernel.c | 885 | ||||
-rw-r--r-- | lib/libzpool/taskq.c | 261 | ||||
-rw-r--r-- | lib/libzpool/util.c | 156 |
3 files changed, 1302 insertions, 0 deletions
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c new file mode 100644 index 000000000..fe817cc64 --- /dev/null +++ b/lib/libzpool/kernel.c @@ -0,0 +1,885 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <assert.h> +#include <fcntl.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <zlib.h> +#include <sys/spa.h> +#include <sys/stat.h> +#include <sys/processor.h> +#include <sys/zfs_context.h> +#include <sys/zmod.h> +#include <sys/utsname.h> + +/* + * Emulation of kernel services in userland. + */ + +uint64_t physmem; +vnode_t *rootdir = (vnode_t *)0xabcd1234; +char hw_serial[11]; + +struct utsname utsname = { + "userland", "libzpool", "1", "1", "na" +}; + +/* + * ========================================================================= + * threads + * ========================================================================= + */ +/*ARGSUSED*/ +kthread_t * +zk_thread_create(void (*func)(), void *arg) +{ + thread_t tid; + + VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, + &tid) == 0); + + return ((void *)(uintptr_t)tid); +} + +/* + * ========================================================================= + * kstats + * ========================================================================= + */ +/*ARGSUSED*/ +kstat_t * +kstat_create(char *module, int instance, char *name, char *class, + uchar_t type, ulong_t ndata, uchar_t ks_flag) +{ + return (NULL); +} + +/*ARGSUSED*/ +void +kstat_install(kstat_t *ksp) +{} + +/*ARGSUSED*/ +void +kstat_delete(kstat_t *ksp) +{} + +/* + * ========================================================================= + * mutexes + * ========================================================================= + */ +void +zmutex_init(kmutex_t *mp) +{ + mp->m_owner = NULL; + mp->initialized = B_TRUE; + (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); +} + +void +zmutex_destroy(kmutex_t *mp) +{ + ASSERT(mp->initialized == B_TRUE); + ASSERT(mp->m_owner == NULL); + (void) _mutex_destroy(&(mp)->m_lock); + mp->m_owner = (void *)-1UL; + mp->initialized = B_FALSE; +} + +void +mutex_enter(kmutex_t *mp) +{ + ASSERT(mp->initialized == B_TRUE); + ASSERT(mp->m_owner != (void *)-1UL); + ASSERT(mp->m_owner != curthread); + VERIFY(mutex_lock(&mp->m_lock) == 0); + ASSERT(mp->m_owner == NULL); + mp->m_owner = curthread; +} + +int +mutex_tryenter(kmutex_t *mp) +{ + ASSERT(mp->initialized == B_TRUE); + ASSERT(mp->m_owner != (void *)-1UL); + if (0 == mutex_trylock(&mp->m_lock)) { + ASSERT(mp->m_owner == NULL); + mp->m_owner = curthread; + return (1); + } else { + return (0); + } +} + +void +mutex_exit(kmutex_t *mp) +{ + ASSERT(mp->initialized == B_TRUE); + ASSERT(mutex_owner(mp) == curthread); + mp->m_owner = NULL; + VERIFY(mutex_unlock(&mp->m_lock) == 0); +} + +void * +mutex_owner(kmutex_t *mp) +{ + ASSERT(mp->initialized == B_TRUE); + return (mp->m_owner); +} + +/* + * ========================================================================= + * rwlocks + * ========================================================================= + */ +/*ARGSUSED*/ +void +rw_init(krwlock_t *rwlp, char *name, int type, void *arg) +{ + rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); + rwlp->rw_owner = NULL; + rwlp->initialized = B_TRUE; +} + +void +rw_destroy(krwlock_t *rwlp) +{ + rwlock_destroy(&rwlp->rw_lock); + rwlp->rw_owner = (void *)-1UL; + rwlp->initialized = B_FALSE; +} + +void +rw_enter(krwlock_t *rwlp, krw_t rw) +{ + ASSERT(!RW_LOCK_HELD(rwlp)); + ASSERT(rwlp->initialized == B_TRUE); + ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT(rwlp->rw_owner != curthread); + + if (rw == RW_READER) + VERIFY(rw_rdlock(&rwlp->rw_lock) == 0); + else + VERIFY(rw_wrlock(&rwlp->rw_lock) == 0); + + rwlp->rw_owner = curthread; +} + +void +rw_exit(krwlock_t *rwlp) +{ + ASSERT(rwlp->initialized == B_TRUE); + ASSERT(rwlp->rw_owner != (void *)-1UL); + + rwlp->rw_owner = NULL; + VERIFY(rw_unlock(&rwlp->rw_lock) == 0); +} + +int +rw_tryenter(krwlock_t *rwlp, krw_t rw) +{ + int rv; + + ASSERT(rwlp->initialized == B_TRUE); + ASSERT(rwlp->rw_owner != (void *)-1UL); + + if (rw == RW_READER) + rv = rw_tryrdlock(&rwlp->rw_lock); + else + rv = rw_trywrlock(&rwlp->rw_lock); + + if (rv == 0) { + rwlp->rw_owner = curthread; + return (1); + } + + return (0); +} + +/*ARGSUSED*/ +int +rw_tryupgrade(krwlock_t *rwlp) +{ + ASSERT(rwlp->initialized == B_TRUE); + ASSERT(rwlp->rw_owner != (void *)-1UL); + + return (0); +} + +/* + * ========================================================================= + * condition variables + * ========================================================================= + */ +/*ARGSUSED*/ +void +cv_init(kcondvar_t *cv, char *name, int type, void *arg) +{ + VERIFY(cond_init(cv, type, NULL) == 0); +} + +void +cv_destroy(kcondvar_t *cv) +{ + VERIFY(cond_destroy(cv) == 0); +} + +void +cv_wait(kcondvar_t *cv, kmutex_t *mp) +{ + ASSERT(mutex_owner(mp) == curthread); + mp->m_owner = NULL; + int ret = cond_wait(cv, &mp->m_lock); + VERIFY(ret == 0 || ret == EINTR); + mp->m_owner = curthread; +} + +clock_t +cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) +{ + int error; + timestruc_t ts; + clock_t delta; + +top: + delta = abstime - lbolt; + if (delta <= 0) + return (-1); + + ts.tv_sec = delta / hz; + ts.tv_nsec = (delta % hz) * (NANOSEC / hz); + + ASSERT(mutex_owner(mp) == curthread); + mp->m_owner = NULL; + error = cond_reltimedwait(cv, &mp->m_lock, &ts); + mp->m_owner = curthread; + + if (error == ETIME) + return (-1); + + if (error == EINTR) + goto top; + + ASSERT(error == 0); + + return (1); +} + +void +cv_signal(kcondvar_t *cv) +{ + VERIFY(cond_signal(cv) == 0); +} + +void +cv_broadcast(kcondvar_t *cv) +{ + VERIFY(cond_broadcast(cv) == 0); +} + +/* + * ========================================================================= + * vnode operations + * ========================================================================= + */ +/* + * Note: for the xxxat() versions of these functions, we assume that the + * starting vp is always rootdir (which is true for spa_directory.c, the only + * ZFS consumer of these interfaces). We assert this is true, and then emulate + * them by adding '/' in front of the path. + */ + +/*ARGSUSED*/ +int +vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) +{ + int fd; + vnode_t *vp; + int old_umask; + char realpath[MAXPATHLEN]; + struct stat64 st; + + /* + * If we're accessing a real disk from userland, we need to use + * the character interface to avoid caching. This is particularly + * important if we're trying to look at a real in-kernel storage + * pool from userland, e.g. via zdb, because otherwise we won't + * see the changes occurring under the segmap cache. + * On the other hand, the stupid character device returns zero + * for its size. So -- gag -- we open the block device to get + * its size, and remember it for subsequent VOP_GETATTR(). + */ + if (strncmp(path, "/dev/", 5) == 0) { + char *dsk; + fd = open64(path, O_RDONLY); + if (fd == -1) + return (errno); + if (fstat64(fd, &st) == -1) { + close(fd); + return (errno); + } + close(fd); + (void) sprintf(realpath, "%s", path); + dsk = strstr(path, "/dsk/"); + if (dsk != NULL) + (void) sprintf(realpath + (dsk - path) + 1, "r%s", + dsk + 1); + } else { + (void) sprintf(realpath, "%s", path); + if (!(flags & FCREAT) && stat64(realpath, &st) == -1) + return (errno); + } + + if (flags & FCREAT) + old_umask = umask(0); + + /* + * The construct 'flags - FREAD' conveniently maps combinations of + * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. + */ + fd = open64(realpath, flags - FREAD, mode); + + if (flags & FCREAT) + (void) umask(old_umask); + + if (fd == -1) + return (errno); + + if (fstat64(fd, &st) == -1) { + close(fd); + return (errno); + } + + (void) fcntl(fd, F_SETFD, FD_CLOEXEC); + + *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); + + vp->v_fd = fd; + vp->v_size = st.st_size; + vp->v_path = spa_strdup(path); + + return (0); +} + +/*ARGSUSED*/ +int +vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, + int x3, vnode_t *startvp, int fd) +{ + char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); + int ret; + + ASSERT(startvp == rootdir); + (void) sprintf(realpath, "/%s", path); + + /* fd ignored for now, need if want to simulate nbmand support */ + ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); + + umem_free(realpath, strlen(path) + 2); + + return (ret); +} + +/*ARGSUSED*/ +int +vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, + int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) +{ + ssize_t iolen, split; + + if (uio == UIO_READ) { + iolen = pread64(vp->v_fd, addr, len, offset); + } else { + /* + * To simulate partial disk writes, we split writes into two + * system calls so that the process can be killed in between. + */ + split = (len > 0 ? rand() % len : 0); + iolen = pwrite64(vp->v_fd, addr, split, offset); + iolen += pwrite64(vp->v_fd, (char *)addr + split, + len - split, offset + split); + } + + if (iolen == -1) + return (errno); + if (residp) + *residp = len - iolen; + else if (iolen != len) + return (EIO); + return (0); +} + +void +vn_close(vnode_t *vp) +{ + close(vp->v_fd); + spa_strfree(vp->v_path); + umem_free(vp, sizeof (vnode_t)); +} + +#ifdef ZFS_DEBUG + +/* + * ========================================================================= + * Figure out which debugging statements to print + * ========================================================================= + */ + +static char *dprintf_string; +static int dprintf_print_all; + +int +dprintf_find_string(const char *string) +{ + char *tmp_str = dprintf_string; + int len = strlen(string); + + /* + * Find out if this is a string we want to print. + * String format: file1.c,function_name1,file2.c,file3.c + */ + + while (tmp_str != NULL) { + if (strncmp(tmp_str, string, len) == 0 && + (tmp_str[len] == ',' || tmp_str[len] == '\0')) + return (1); + tmp_str = strchr(tmp_str, ','); + if (tmp_str != NULL) + tmp_str++; /* Get rid of , */ + } + return (0); +} + +void +dprintf_setup(int *argc, char **argv) +{ + int i, j; + + /* + * Debugging can be specified two ways: by setting the + * environment variable ZFS_DEBUG, or by including a + * "debug=..." argument on the command line. The command + * line setting overrides the environment variable. + */ + + for (i = 1; i < *argc; i++) { + int len = strlen("debug="); + /* First look for a command line argument */ + if (strncmp("debug=", argv[i], len) == 0) { + dprintf_string = argv[i] + len; + /* Remove from args */ + for (j = i; j < *argc; j++) + argv[j] = argv[j+1]; + argv[j] = NULL; + (*argc)--; + } + } + + if (dprintf_string == NULL) { + /* Look for ZFS_DEBUG environment variable */ + dprintf_string = getenv("ZFS_DEBUG"); + } + + /* + * Are we just turning on all debugging? + */ + if (dprintf_find_string("on")) + dprintf_print_all = 1; +} + +/* + * ========================================================================= + * debug printfs + * ========================================================================= + */ +void +__dprintf(const char *file, const char *func, int line, const char *fmt, ...) +{ + const char *newfile; + va_list adx; + + /* + * Get rid of annoying "../common/" prefix to filename. + */ + newfile = strrchr(file, '/'); + if (newfile != NULL) { + newfile = newfile + 1; /* Get rid of leading / */ + } else { + newfile = file; + } + + if (dprintf_print_all || + dprintf_find_string(newfile) || + dprintf_find_string(func)) { + /* Print out just the function name if requested */ + flockfile(stdout); + if (dprintf_find_string("pid")) + (void) printf("%d ", getpid()); + if (dprintf_find_string("tid")) + (void) printf("%u ", thr_self()); + if (dprintf_find_string("cpu")) + (void) printf("%u ", getcpuid()); + if (dprintf_find_string("time")) + (void) printf("%llu ", gethrtime()); + if (dprintf_find_string("long")) + (void) printf("%s, line %d: ", newfile, line); + (void) printf("%s: ", func); + va_start(adx, fmt); + (void) vprintf(fmt, adx); + va_end(adx); + funlockfile(stdout); + } +} + +#endif /* ZFS_DEBUG */ + +/* + * ========================================================================= + * cmn_err() and panic() + * ========================================================================= + */ +static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; +static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; + +void +vpanic(const char *fmt, va_list adx) +{ + (void) fprintf(stderr, "error: "); + (void) vfprintf(stderr, fmt, adx); + (void) fprintf(stderr, "\n"); + + abort(); /* think of it as a "user-level crash dump" */ +} + +void +panic(const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + vpanic(fmt, adx); + va_end(adx); +} + +void +vcmn_err(int ce, const char *fmt, va_list adx) +{ + if (ce == CE_PANIC) + vpanic(fmt, adx); + if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ + (void) fprintf(stderr, "%s", ce_prefix[ce]); + (void) vfprintf(stderr, fmt, adx); + (void) fprintf(stderr, "%s", ce_suffix[ce]); + } +} + +/*PRINTFLIKE2*/ +void +cmn_err(int ce, const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + vcmn_err(ce, fmt, adx); + va_end(adx); +} + +/* + * ========================================================================= + * kobj interfaces + * ========================================================================= + */ +struct _buf * +kobj_open_file(char *name) +{ + struct _buf *file; + vnode_t *vp; + + /* set vp as the _fd field of the file */ + if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir, + -1) != 0) + return ((void *)-1UL); + + file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); + file->_fd = (intptr_t)vp; + return (file); +} + +int +kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) +{ + ssize_t resid; + + vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, + UIO_SYSSPACE, 0, 0, 0, &resid); + + return (size - resid); +} + +void +kobj_close_file(struct _buf *file) +{ + vn_close((vnode_t *)file->_fd); + umem_free(file, sizeof (struct _buf)); +} + +int +kobj_get_filesize(struct _buf *file, uint64_t *size) +{ + struct stat64 st; + vnode_t *vp = (vnode_t *)file->_fd; + + if (fstat64(vp->v_fd, &st) == -1) { + vn_close(vp); + return (errno); + } + *size = st.st_size; + return (0); +} + +/* + * ========================================================================= + * misc routines + * ========================================================================= + */ + +void +delay(clock_t ticks) +{ + poll(0, 0, ticks * (1000 / hz)); +} + +/* + * Find highest one bit set. + * Returns bit number + 1 of highest bit that is set, otherwise returns 0. + * High order bit is 31 (or 63 in _LP64 kernel). + */ +int +highbit(ulong_t i) +{ + register int h = 1; + + if (i == 0) + return (0); +#ifdef _LP64 + if (i & 0xffffffff00000000ul) { + h += 32; i >>= 32; + } +#endif + if (i & 0xffff0000) { + h += 16; i >>= 16; + } + if (i & 0xff00) { + h += 8; i >>= 8; + } + if (i & 0xf0) { + h += 4; i >>= 4; + } + if (i & 0xc) { + h += 2; i >>= 2; + } + if (i & 0x2) { + h += 1; + } + return (h); +} + +static int random_fd = -1, urandom_fd = -1; + +static int +random_get_bytes_common(uint8_t *ptr, size_t len, int fd) +{ + size_t resid = len; + ssize_t bytes; + + ASSERT(fd != -1); + + while (resid != 0) { + bytes = read(fd, ptr, resid); + ASSERT3S(bytes, >=, 0); + ptr += bytes; + resid -= bytes; + } + + return (0); +} + +int +random_get_bytes(uint8_t *ptr, size_t len) +{ + return (random_get_bytes_common(ptr, len, random_fd)); +} + +int +random_get_pseudo_bytes(uint8_t *ptr, size_t len) +{ + return (random_get_bytes_common(ptr, len, urandom_fd)); +} + +int +ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result) +{ + char *end; + + *result = strtoul(hw_serial, &end, base); + if (*result == 0) + return (errno); + return (0); +} + +/* + * ========================================================================= + * kernel emulation setup & teardown + * ========================================================================= + */ +static int +umem_out_of_memory(void) +{ + char errmsg[] = "out of memory -- generating core dump\n"; + + write(fileno(stderr), errmsg, sizeof (errmsg)); + abort(); + return (0); +} + +void +kernel_init(int mode) +{ + umem_nofail_callback(umem_out_of_memory); + + physmem = sysconf(_SC_PHYS_PAGES); + + dprintf("physmem = %llu pages (%.2f GB)\n", physmem, + (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); + + snprintf(hw_serial, sizeof (hw_serial), "%ld", gethostid()); + + VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1); + VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1); + + system_taskq_init(); + + spa_init(mode); +} + +void +kernel_fini(void) +{ + spa_fini(); + + close(random_fd); + close(urandom_fd); + + random_fd = -1; + urandom_fd = -1; +} + +int +z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen) +{ + int ret; + uLongf len = *dstlen; + + if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK) + *dstlen = (size_t)len; + + return (ret); +} + +int +z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen, + int level) +{ + int ret; + uLongf len = *dstlen; + + if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK) + *dstlen = (size_t)len; + + return (ret); +} + +uid_t +crgetuid(cred_t *cr) +{ + return (0); +} + +gid_t +crgetgid(cred_t *cr) +{ + return (0); +} + +int +crgetngroups(cred_t *cr) +{ + return (0); +} + +gid_t * +crgetgroups(cred_t *cr) +{ + return (NULL); +} + +int +zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) +{ + return (0); +} + +int +zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) +{ + return (0); +} + +int +zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) +{ + return (0); +} + +ksiddomain_t * +ksid_lookupdomain(const char *dom) +{ + ksiddomain_t *kd; + + kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL); + kd->kd_name = spa_strdup(dom); + return (kd); +} + +void +ksiddomain_rele(ksiddomain_t *ksid) +{ + spa_strfree(ksid->kd_name); + umem_free(ksid, sizeof (ksiddomain_t)); +} diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c new file mode 100644 index 000000000..93acdcf8e --- /dev/null +++ b/lib/libzpool/taskq.c @@ -0,0 +1,261 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/zfs_context.h> + +int taskq_now; +taskq_t *system_taskq; + +typedef struct task { + struct task *task_next; + struct task *task_prev; + task_func_t *task_func; + void *task_arg; +} task_t; + +#define TASKQ_ACTIVE 0x00010000 + +struct taskq { + kmutex_t tq_lock; + krwlock_t tq_threadlock; + kcondvar_t tq_dispatch_cv; + kcondvar_t tq_wait_cv; + thread_t *tq_threadlist; + int tq_flags; + int tq_active; + int tq_nthreads; + int tq_nalloc; + int tq_minalloc; + int tq_maxalloc; + task_t *tq_freelist; + task_t tq_task; +}; + +static task_t * +task_alloc(taskq_t *tq, int tqflags) +{ + task_t *t; + + if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) { + tq->tq_freelist = t->task_next; + } else { + mutex_exit(&tq->tq_lock); + if (tq->tq_nalloc >= tq->tq_maxalloc) { + if (!(tqflags & KM_SLEEP)) { + mutex_enter(&tq->tq_lock); + return (NULL); + } + /* + * We don't want to exceed tq_maxalloc, but we can't + * wait for other tasks to complete (and thus free up + * task structures) without risking deadlock with + * the caller. So, we just delay for one second + * to throttle the allocation rate. + */ + delay(hz); + } + t = kmem_alloc(sizeof (task_t), tqflags); + mutex_enter(&tq->tq_lock); + if (t != NULL) + tq->tq_nalloc++; + } + return (t); +} + +static void +task_free(taskq_t *tq, task_t *t) +{ + if (tq->tq_nalloc <= tq->tq_minalloc) { + t->task_next = tq->tq_freelist; + tq->tq_freelist = t; + } else { + tq->tq_nalloc--; + mutex_exit(&tq->tq_lock); + kmem_free(t, sizeof (task_t)); + mutex_enter(&tq->tq_lock); + } +} + +taskqid_t +taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags) +{ + task_t *t; + + if (taskq_now) { + func(arg); + return (1); + } + + mutex_enter(&tq->tq_lock); + ASSERT(tq->tq_flags & TASKQ_ACTIVE); + if ((t = task_alloc(tq, tqflags)) == NULL) { + mutex_exit(&tq->tq_lock); + return (0); + } + t->task_next = &tq->tq_task; + t->task_prev = tq->tq_task.task_prev; + t->task_next->task_prev = t; + t->task_prev->task_next = t; + t->task_func = func; + t->task_arg = arg; + cv_signal(&tq->tq_dispatch_cv); + mutex_exit(&tq->tq_lock); + return (1); +} + +void +taskq_wait(taskq_t *tq) +{ + mutex_enter(&tq->tq_lock); + while (tq->tq_task.task_next != &tq->tq_task || tq->tq_active != 0) + cv_wait(&tq->tq_wait_cv, &tq->tq_lock); + mutex_exit(&tq->tq_lock); +} + +static void * +taskq_thread(void *arg) +{ + taskq_t *tq = arg; + task_t *t; + + mutex_enter(&tq->tq_lock); + while (tq->tq_flags & TASKQ_ACTIVE) { + if ((t = tq->tq_task.task_next) == &tq->tq_task) { + if (--tq->tq_active == 0) + cv_broadcast(&tq->tq_wait_cv); + cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock); + tq->tq_active++; + continue; + } + t->task_prev->task_next = t->task_next; + t->task_next->task_prev = t->task_prev; + mutex_exit(&tq->tq_lock); + + rw_enter(&tq->tq_threadlock, RW_READER); + t->task_func(t->task_arg); + rw_exit(&tq->tq_threadlock); + + mutex_enter(&tq->tq_lock); + task_free(tq, t); + } + tq->tq_nthreads--; + cv_broadcast(&tq->tq_wait_cv); + mutex_exit(&tq->tq_lock); + return (NULL); +} + +/*ARGSUSED*/ +taskq_t * +taskq_create(const char *name, int nthreads, pri_t pri, + int minalloc, int maxalloc, uint_t flags) +{ + taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP); + int t; + + rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL); + mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL); + cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL); + tq->tq_flags = flags | TASKQ_ACTIVE; + tq->tq_active = nthreads; + tq->tq_nthreads = nthreads; + tq->tq_minalloc = minalloc; + tq->tq_maxalloc = maxalloc; + tq->tq_task.task_next = &tq->tq_task; + tq->tq_task.task_prev = &tq->tq_task; + tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP); + + if (flags & TASKQ_PREPOPULATE) { + mutex_enter(&tq->tq_lock); + while (minalloc-- > 0) + task_free(tq, task_alloc(tq, KM_SLEEP)); + mutex_exit(&tq->tq_lock); + } + + for (t = 0; t < nthreads; t++) + (void) thr_create(0, 0, taskq_thread, + tq, THR_BOUND, &tq->tq_threadlist[t]); + + return (tq); +} + +void +taskq_destroy(taskq_t *tq) +{ + int t; + int nthreads = tq->tq_nthreads; + + taskq_wait(tq); + + mutex_enter(&tq->tq_lock); + + tq->tq_flags &= ~TASKQ_ACTIVE; + cv_broadcast(&tq->tq_dispatch_cv); + + while (tq->tq_nthreads != 0) + cv_wait(&tq->tq_wait_cv, &tq->tq_lock); + + tq->tq_minalloc = 0; + while (tq->tq_nalloc != 0) { + ASSERT(tq->tq_freelist != NULL); + task_free(tq, task_alloc(tq, KM_SLEEP)); + } + + mutex_exit(&tq->tq_lock); + + for (t = 0; t < nthreads; t++) + (void) thr_join(tq->tq_threadlist[t], NULL, NULL); + + kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t)); + + rw_destroy(&tq->tq_threadlock); + mutex_destroy(&tq->tq_lock); + cv_destroy(&tq->tq_dispatch_cv); + cv_destroy(&tq->tq_wait_cv); + + kmem_free(tq, sizeof (taskq_t)); +} + +int +taskq_member(taskq_t *tq, void *t) +{ + int i; + + if (taskq_now) + return (1); + + for (i = 0; i < tq->tq_nthreads; i++) + if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t) + return (1); + + return (0); +} + +void +system_taskq_init(void) +{ + system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512, + TASKQ_DYNAMIC | TASKQ_PREPOPULATE); +} diff --git a/lib/libzpool/util.c b/lib/libzpool/util.c new file mode 100644 index 000000000..781edb6e8 --- /dev/null +++ b/lib/libzpool/util.c @@ -0,0 +1,156 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <assert.h> +#include <sys/zfs_context.h> +#include <sys/avl.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/spa.h> +#include <sys/fs/zfs.h> +#include <sys/refcount.h> + +/* + * Routines needed by more than one client of libzpool. + */ + +void +nicenum(uint64_t num, char *buf) +{ + uint64_t n = num; + int index = 0; + char u; + + while (n >= 1024) { + n = (n + (1024 / 2)) / 1024; /* Round up or down */ + index++; + } + + u = " KMGTPE"[index]; + + if (index == 0) { + (void) sprintf(buf, "%llu", (u_longlong_t)n); + } else if (n < 10 && (num & (num - 1)) != 0) { + (void) sprintf(buf, "%.2f%c", + (double)num / (1ULL << 10 * index), u); + } else if (n < 100 && (num & (num - 1)) != 0) { + (void) sprintf(buf, "%.1f%c", + (double)num / (1ULL << 10 * index), u); + } else { + (void) sprintf(buf, "%llu%c", (u_longlong_t)n, u); + } +} + +static void +show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent) +{ + vdev_stat_t *vs; + vdev_stat_t v0 = { 0 }; + uint64_t sec; + uint64_t is_log = 0; + nvlist_t **child; + uint_t c, children; + char used[6], avail[6]; + char rops[6], wops[6], rbytes[6], wbytes[6], rerr[6], werr[6], cerr[6]; + char *prefix = ""; + + if (indent == 0 && desc != NULL) { + (void) printf(" " + " capacity operations bandwidth ---- errors ----\n"); + (void) printf("description " + "used avail read write read write read write cksum\n"); + } + + if (desc != NULL) { + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log); + + if (is_log) + prefix = "log "; + + if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, + (uint64_t **)&vs, &c) != 0) + vs = &v0; + + sec = MAX(1, vs->vs_timestamp / NANOSEC); + + nicenum(vs->vs_alloc, used); + nicenum(vs->vs_space - vs->vs_alloc, avail); + nicenum(vs->vs_ops[ZIO_TYPE_READ] / sec, rops); + nicenum(vs->vs_ops[ZIO_TYPE_WRITE] / sec, wops); + nicenum(vs->vs_bytes[ZIO_TYPE_READ] / sec, rbytes); + nicenum(vs->vs_bytes[ZIO_TYPE_WRITE] / sec, wbytes); + nicenum(vs->vs_read_errors, rerr); + nicenum(vs->vs_write_errors, werr); + nicenum(vs->vs_checksum_errors, cerr); + + (void) printf("%*s%s%*s%*s%*s %5s %5s %5s %5s %5s %5s %5s\n", + indent, "", + prefix, + indent + strlen(prefix) - 25 - (vs->vs_space ? 0 : 12), + desc, + vs->vs_space ? 6 : 0, vs->vs_space ? used : "", + vs->vs_space ? 6 : 0, vs->vs_space ? avail : "", + rops, wops, rbytes, wbytes, rerr, werr, cerr); + } + + if (nvlist_lookup_nvlist_array(nv, ctype, &child, &children) != 0) + return; + + for (c = 0; c < children; c++) { + nvlist_t *cnv = child[c]; + char *cname, *tname; + uint64_t np; + if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) && + nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname)) + cname = "<unknown>"; + tname = calloc(1, strlen(cname) + 2); + (void) strcpy(tname, cname); + if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0) + tname[strlen(tname)] = '0' + np; + show_vdev_stats(tname, ctype, cnv, indent + 2); + free(tname); + } +} + +void +show_pool_stats(spa_t *spa) +{ + nvlist_t *config, *nvroot; + char *name; + + VERIFY(spa_get_stats(spa_name(spa), &config, NULL, 0) == 0); + + VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, + &name) == 0); + + show_vdev_stats(name, ZPOOL_CONFIG_CHILDREN, nvroot, 0); + show_vdev_stats(NULL, ZPOOL_CONFIG_L2CACHE, nvroot, 0); + show_vdev_stats(NULL, ZPOOL_CONFIG_SPARES, nvroot, 0); + + nvlist_free(config); +} |