summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.topdeps2
-rw-r--r--.topmsg4
-rw-r--r--ChangeLog2
-rw-r--r--cmd/Makefile.am2
-rw-r--r--cmd/zdb/zdb.c2
-rw-r--r--cmd/zpios/Makefile.am12
-rw-r--r--cmd/zpios/zpios.h120
-rw-r--r--cmd/zpios/zpios_main.c631
-rw-r--r--cmd/zpios/zpios_util.c456
-rw-r--r--cmd/zpool/zpool_main.c15
-rw-r--r--cmd/zpool/zpool_vdev.c507
-rw-r--r--cmd/ztest/ztest.c24
-rw-r--r--config/user-arch.m416
-rw-r--r--config/zfs-build.m49
-rw-r--r--configure.ac7
-rw-r--r--lib/Makefile.am2
-rw-r--r--lib/libefi/rdwr_efi.c376
-rw-r--r--lib/libnvpair/nvpair_alloc_system.c2
-rw-r--r--lib/libspl/Makefile.am27
-rw-r--r--lib/libspl/asm-generic/Makefile.am18
-rw-r--r--lib/libspl/asm-generic/atomic.S6
-rw-r--r--lib/libspl/asm-generic/atomic.c424
-rw-r--r--lib/libspl/asm-i386/Makefile.am1
-rw-r--r--lib/libspl/asm-i386/atomic.S730
-rw-r--r--lib/libspl/asm-x86_64/Makefile.am1
-rw-r--r--lib/libspl/asm-x86_64/atomic.S595
-rw-r--r--lib/libspl/getexecname.c55
-rw-r--r--lib/libspl/gethrtime.c45
-rw-r--r--lib/libspl/getmntany.c99
-rw-r--r--lib/libspl/include/Makefile.am8
-rw-r--r--lib/libspl/include/assert.h96
-rw-r--r--lib/libspl/include/atomic.h266
-rw-r--r--lib/libspl/include/devid.h48
-rw-r--r--lib/libspl/include/ia32/sys/asm_linkage.h302
-rw-r--r--lib/libspl/include/libdevinfo.h30
-rw-r--r--lib/libspl/include/libshare.h34
-rw-r--r--lib/libspl/include/limits.h40
-rw-r--r--lib/libspl/include/locale.h35
-rw-r--r--lib/libspl/include/priv.h37
-rw-r--r--lib/libspl/include/rpc/xdr.h65
-rw-r--r--lib/libspl/include/stdio.h34
-rw-r--r--lib/libspl/include/stdlib.h34
-rw-r--r--lib/libspl/include/string.h36
-rw-r--r--lib/libspl/include/strings.h33
-rw-r--r--lib/libspl/include/synch.h30
-rw-r--r--lib/libspl/include/sys/acl.h287
-rw-r--r--lib/libspl/include/sys/acl_impl.h59
-rw-r--r--lib/libspl/include/sys/bitmap.h30
-rw-r--r--lib/libspl/include/sys/byteorder.h199
-rw-r--r--lib/libspl/include/sys/callb.h30
-rw-r--r--lib/libspl/include/sys/cmn_err.h30
-rw-r--r--lib/libspl/include/sys/compress.h30
-rw-r--r--lib/libspl/include/sys/cred.h32
-rw-r--r--lib/libspl/include/sys/debug.h32
-rw-r--r--lib/libspl/include/sys/dkio.h484
-rw-r--r--lib/libspl/include/sys/dklabel.h268
-rw-r--r--lib/libspl/include/sys/dktp/fdisk.h173
-rw-r--r--lib/libspl/include/sys/feature_tests.h32
-rw-r--r--lib/libspl/include/sys/file.h50
-rw-r--r--lib/libspl/include/sys/fm/protocol.h30
-rw-r--r--lib/libspl/include/sys/fm/util.h30
-rw-r--r--lib/libspl/include/sys/frame.h131
-rw-r--r--lib/libspl/include/sys/int_limits.h30
-rw-r--r--lib/libspl/include/sys/int_types.h32
-rw-r--r--lib/libspl/include/sys/inttypes.h34
-rw-r--r--lib/libspl/include/sys/isa_defs.h125
-rw-r--r--lib/libspl/include/sys/kmem.h45
-rw-r--r--lib/libspl/include/sys/kstat.h820
-rw-r--r--lib/libspl/include/sys/list.h65
-rw-r--r--lib/libspl/include/sys/list_impl.h51
-rw-r--r--lib/libspl/include/sys/machelf.h180
-rw-r--r--lib/libspl/include/sys/mhd.h159
-rw-r--r--lib/libspl/include/sys/mkdev.h30
-rw-r--r--lib/libspl/include/sys/mntent.h142
-rw-r--r--lib/libspl/include/sys/mnttab.h86
-rw-r--r--lib/libspl/include/sys/mount.h50
-rw-r--r--lib/libspl/include/sys/note.h56
-rw-r--r--lib/libspl/include/sys/param.h67
-rw-r--r--lib/libspl/include/sys/priv.h30
-rw-r--r--lib/libspl/include/sys/processor.h32
-rw-r--r--lib/libspl/include/sys/sdt.h36
-rw-r--r--lib/libspl/include/sys/stack.h52
-rw-r--r--lib/libspl/include/sys/stropts.h4
-rw-r--r--lib/libspl/include/sys/sunddi.h29
-rw-r--r--lib/libspl/include/sys/sysevent.h30
-rw-r--r--lib/libspl/include/sys/sysevent/eventdefs.h235
-rw-r--r--lib/libspl/include/sys/sysmacros.h98
-rw-r--r--lib/libspl/include/sys/systeminfo.h37
-rw-r--r--lib/libspl/include/sys/time.h39
-rw-r--r--lib/libspl/include/sys/types.h98
-rw-r--r--lib/libspl/include/sys/types32.h91
-rw-r--r--lib/libspl/include/sys/tzfile.h164
-rw-r--r--lib/libspl/include/sys/uio.h50
-rw-r--r--lib/libspl/include/sys/utsname.h34
-rw-r--r--lib/libspl/include/sys/va_list.h36
-rw-r--r--lib/libspl/include/sys/varargs.h30
-rw-r--r--lib/libspl/include/sys/vtoc.h350
-rw-r--r--lib/libspl/include/sys/zone.h30
-rw-r--r--lib/libspl/include/thread.h30
-rw-r--r--lib/libspl/include/tsol/label.h30
-rw-r--r--lib/libspl/include/tzfile.h32
-rw-r--r--lib/libspl/include/ucred.h32
-rw-r--r--lib/libspl/include/umem.h169
-rw-r--r--lib/libspl/include/unistd.h59
-rw-r--r--lib/libspl/include/zone.h86
-rw-r--r--lib/libspl/list.c243
-rw-r--r--lib/libspl/mkdirp.c210
-rw-r--r--lib/libspl/strlcat.c56
-rw-r--r--lib/libspl/strlcpy.c52
-rw-r--r--lib/libspl/strnlen.c44
-rw-r--r--lib/libspl/xdr.c78
-rw-r--r--lib/libspl/zone.c60
-rw-r--r--lib/libuutil/uu_misc.c12
-rw-r--r--lib/libzfs/include/libzfs.h30
-rw-r--r--lib/libzfs/libzfs_dataset.c12
-rw-r--r--lib/libzfs/libzfs_import.c111
-rw-r--r--lib/libzfs/libzfs_pool.c144
-rw-r--r--lib/libzfs/libzfs_sendrecv.c1
-rw-r--r--lib/libzfs/libzfs_util.c10
-rw-r--r--lib/libzpool/include/sys/zfs_context.h66
-rw-r--r--lib/libzpool/kernel.c12
-rw-r--r--module/Makefile.in1
-rw-r--r--module/avl/avl.c26
-rw-r--r--module/nvpair/nvpair.c124
-rw-r--r--module/nvpair/nvpair_alloc_spl.c75
-rw-r--r--module/unicode/u8_textprep.c16
-rw-r--r--module/unicode/uconv.c9
-rw-r--r--module/zcommon/zfs_comutil.c4
-rw-r--r--module/zcommon/zfs_deleg.c6
-rw-r--r--module/zcommon/zfs_namecheck.c6
-rw-r--r--module/zcommon/zfs_prop.c34
-rw-r--r--module/zcommon/zpool_prop.c16
-rw-r--r--module/zcommon/zprop_common.c17
-rw-r--r--module/zfs/Makefile.in1
-rw-r--r--module/zfs/arc.c40
-rw-r--r--module/zfs/dbuf.c18
-rw-r--r--module/zfs/dmu.c39
-rw-r--r--module/zfs/dmu_object.c8
-rw-r--r--module/zfs/dmu_objset.c36
-rw-r--r--module/zfs/dmu_send.c4
-rw-r--r--module/zfs/dmu_traverse.c5
-rw-r--r--module/zfs/dmu_tx.c14
-rw-r--r--module/zfs/dmu_zfetch.c6
-rw-r--r--module/zfs/dsl_dataset.c51
-rw-r--r--module/zfs/dsl_deleg.c5
-rw-r--r--module/zfs/dsl_dir.c7
-rw-r--r--module/zfs/dsl_prop.c7
-rw-r--r--module/zfs/dsl_synctask.c5
-rw-r--r--module/zfs/fletcher.c9
-rw-r--r--module/zfs/include/sys/dmu.h10
-rw-r--r--module/zfs/include/sys/spa.h4
-rw-r--r--module/zfs/include/sys/spa_impl.h2
-rw-r--r--module/zfs/include/sys/vdev_disk.h71
-rw-r--r--module/zfs/include/sys/zfs_context.h4
-rw-r--r--module/zfs/include/sys/zfs_debug.h5
-rw-r--r--module/zfs/include/sys/zfs_znode.h7
-rw-r--r--module/zfs/spa.c72
-rw-r--r--module/zfs/spa_boot.c4
-rw-r--r--module/zfs/spa_config.c14
-rw-r--r--module/zfs/spa_errlog.c14
-rw-r--r--module/zfs/spa_history.c15
-rw-r--r--module/zfs/spa_misc.c73
-rw-r--r--module/zfs/txg.c22
-rw-r--r--module/zfs/vdev.c8
-rw-r--r--module/zfs/vdev_disk.c623
-rw-r--r--module/zfs/vdev_queue.c11
-rw-r--r--module/zfs/zap_micro.c16
-rw-r--r--module/zfs/zfs_byteswap.c6
-rw-r--r--module/zfs/zfs_fm.c4
-rw-r--r--module/zfs/zfs_ioctl.c36
-rw-r--r--module/zfs/zfs_vnops.c2
-rw-r--r--module/zfs/zfs_znode.c5
-rw-r--r--module/zfs/zil.c7
-rw-r--r--module/zfs/zio.c22
-rw-r--r--module/zpios/Makefile.in11
-rw-r--r--module/zpios/include/zpios-ctl.h197
-rw-r--r--module/zpios/include/zpios-internal.h137
-rw-r--r--module/zpios/zpios.c1297
-rw-r--r--scripts/Makefile.am14
-rwxr-xr-xscripts/common.sh5
-rwxr-xr-xscripts/zpios-profile/zpios-profile-disk.sh129
-rwxr-xr-xscripts/zpios-profile/zpios-profile-pids.sh131
-rwxr-xr-xscripts/zpios-profile/zpios-profile-post.sh129
-rwxr-xr-xscripts/zpios-profile/zpios-profile-pre.sh184
-rwxr-xr-xscripts/zpios-profile/zpios-profile.sh226
-rwxr-xr-xscripts/zpios-sanity.sh148
-rwxr-xr-xscripts/zpios-survey.sh215
-rwxr-xr-xscripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh65
-rwxr-xr-xscripts/zpios-test/1th-16rc-4rs-1cs-4off.sh66
-rwxr-xr-xscripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh65
-rwxr-xr-xscripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh65
-rwxr-xr-xscripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh65
l---------scripts/zpios-test/large-thread-survey.sh1
l---------scripts/zpios-test/large.sh1
l---------scripts/zpios-test/medium.sh1
l---------scripts/zpios-test/small.sh1
l---------scripts/zpios-test/tiny.sh1
-rwxr-xr-xscripts/zpios.sh266
198 files changed, 16876 insertions, 491 deletions
diff --git a/.topdeps b/.topdeps
index 7f16cbcdd..a8059c355 100644
--- a/.topdeps
+++ b/.topdeps
@@ -1 +1 @@
-zfs-branch
+linux-debug-zerocopy
diff --git a/.topmsg b/.topmsg
index 0483ce70d..be5391622 100644
--- a/.topmsg
+++ b/.topmsg
@@ -1,6 +1,6 @@
From: Brian Behlendorf <[email protected]>
-Subject: [PATCH] linux docs
+Subject: [PATCH] linux arc
-Documentation branch used simply to track historical results.
+Linux VM arc integration.
Signed-off-by: Brian Behlendorf <[email protected]>
diff --git a/ChangeLog b/ChangeLog
index 5730eb1c9..17534f477 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -333,7 +333,7 @@
- Minor build system improvements
- Minor script improvements
- Create a full copy and not a link tree with quilt
- - KPIOS_MAJOR changed from 231 to 232
+ - ZPIOS_MAJOR changed from 231 to 232
- BIO_RW_BARRIER flag removed from IO request
2008-06-30 Brian Behlendorf <[email protected]>
diff --git a/cmd/Makefile.am b/cmd/Makefile.am
index 86ec885bc..42e6d9c3c 100644
--- a/cmd/Makefile.am
+++ b/cmd/Makefile.am
@@ -1 +1 @@
-SUBDIRS = zfs zpool zdb zinject ztest
+SUBDIRS = zfs zpool zdb zinject ztest zpios
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index ce8434958..2cab6da0f 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -2395,7 +2395,7 @@ main(int argc, char **argv)
kernel_init(FREAD);
g_zfs = libzfs_init();
- ASSERT(g_zfs != NULL);
+ VERIFY(g_zfs != NULL);
for (c = 0; c < 256; c++) {
if (dump_all && c != 'l' && c != 'R')
diff --git a/cmd/zpios/Makefile.am b/cmd/zpios/Makefile.am
new file mode 100644
index 000000000..4e13a76c9
--- /dev/null
+++ b/cmd/zpios/Makefile.am
@@ -0,0 +1,12 @@
+include $(top_srcdir)/config/Rules.am
+
+DEFAULT_INCLUDES += \
+ -I${top_srcdir}/module/zpios/include
+
+sbin_PROGRAMS = zpios
+
+zpios_SOURCES = \
+ $(top_srcdir)/cmd/zpios/zpios_main.c \
+ $(top_srcdir)/cmd/zpios/zpios_util.c \
+ $(top_srcdir)/cmd/zpios/zpios.h
+
diff --git a/cmd/zpios/zpios.h b/cmd/zpios/zpios.h
new file mode 100644
index 000000000..d88af0b04
--- /dev/null
+++ b/cmd/zpios/zpios.h
@@ -0,0 +1,120 @@
+/*
+ * This file is part of the ZFS Linux port.
+ *
+ * Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory
+ * Written by:
+ * Brian Behlendorf <[email protected]>,
+ * Herb Wartens <[email protected]>,
+ * Jim Garlick <[email protected]>
+ * LLNL-CODE-403049
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef _ZPIOS_H
+#define _ZPIOS_H
+
+#include <zpios-ctl.h>
+
+#define VERSION_SIZE 64
+
+/* Regular expressions */
+#define REGEX_NUMBERS "^[0-9]*[0-9]$"
+#define REGEX_NUMBERS_COMMA "^([0-9]+,)*[0-9]+$"
+#define REGEX_SIZE "^[0-9][0-9]*[kmgt]$"
+#define REGEX_SIZE_COMMA "^([0-9][0-9]*[kmgt]+,)*[0-9][0-9]*[kmgt]$"
+
+/* Flags for low, high, incr */
+#define FLAG_SET 0x01
+#define FLAG_LOW 0x02
+#define FLAG_HIGH 0x04
+#define FLAG_INCR 0x08
+
+#define TRUE 1
+#define FALSE 0
+
+#define KB (1024)
+#define MB (KB * 1024)
+#define GB (MB * 1024)
+#define TB (GB * 1024)
+
+#define KMGT_SIZE 16
+
+/* All offsets, sizes and counts can be passed to the application in
+ * multiple ways.
+ * 1. a value (stored in val[0], val_count will be 1)
+ * 2. a comma separated list of values (stored in val[], using val_count)
+ * 3. a range and block sizes, low, high, factor (val_count must be 0)
+ */
+typedef struct pios_range_repeat {
+ uint64_t val[32]; /* Comma sep array, or low, high, inc */
+ uint64_t val_count; /* Num of values */
+ uint64_t val_low;
+ uint64_t val_high;
+ uint64_t val_inc_perc;
+ uint64_t next_val; /* Used for multiple runs in get_next() */
+} range_repeat_t;
+
+typedef struct cmd_args {
+ range_repeat_t T; /* Thread count */
+ range_repeat_t N; /* Region count */
+ range_repeat_t O; /* Offset count */
+ range_repeat_t C; /* Chunksize */
+ range_repeat_t S; /* Regionsize */
+
+ const char *pool; /* Pool */
+ const char *name; /* Name */
+ uint32_t flags; /* Flags */
+ uint32_t io_type; /* DMUIO only */
+ uint32_t verbose; /* Verbose */
+ uint32_t human_readable; /* Human readable output */
+
+ uint64_t regionnoise; /* Region noise */
+ uint64_t chunknoise; /* Chunk noise */
+ uint64_t thread_delay; /* Thread delay */
+
+ char pre[ZPIOS_PATH_SIZE]; /* Pre-exec hook */
+ char post[ZPIOS_PATH_SIZE]; /* Post-exec hook */
+ char log[ZPIOS_PATH_SIZE]; /* Requested log dir */
+
+ /* Control */
+ int current_id;
+ uint64_t current_T;
+ uint64_t current_N;
+ uint64_t current_C;
+ uint64_t current_S;
+ uint64_t current_O;
+
+ uint32_t rc;
+} cmd_args_t;
+
+int set_count(char *pattern1, char *pattern2, range_repeat_t *range,
+ char *optarg, uint32_t *flags, char *arg);
+int set_lhi(char *pattern, range_repeat_t *range, char *optarg,
+ int flag, uint32_t *flag_thread, char *arg);
+int set_noise(uint64_t *noise, char *optarg, char *arg);
+int set_load_params(cmd_args_t *args, char *optarg);
+int check_mutual_exclusive_command_lines(uint32_t flag, char *arg);
+void print_stats_header(cmd_args_t *args);
+void print_stats(cmd_args_t *args, zpios_cmd_t *cmd);
+
+#endif /* _ZPIOS_H */
diff --git a/cmd/zpios/zpios_main.c b/cmd/zpios/zpios_main.c
new file mode 100644
index 000000000..f774682ac
--- /dev/null
+++ b/cmd/zpios/zpios_main.c
@@ -0,0 +1,631 @@
+/*
+ * This file is part of the ZFS Linux port.
+ *
+ * Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory
+ * Written by:
+ * Brian Behlendorf <[email protected]>,
+ * Herb Wartens <[email protected]>,
+ * Jim Garlick <[email protected]>
+ * LLNL-CODE-403049
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Kernel PIOS DMU implemenation originally derived from PIOS test code.
+ * Character control interface derived from SPL code.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <getopt.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include "zpios.h"
+
+static const char short_opt[] = "t:l:h:e:n:i:j:k:o:m:q:r:c:a:b:g:s:A:B:C:"
+ "L:p:M:xP:R:G:I:N:T:VzOfHv?";
+static const struct option long_opt[] = {
+ {"threadcount", required_argument, 0, 't' },
+ {"threadcount_low", required_argument, 0, 'l' },
+ {"threadcount_high", required_argument, 0, 'h' },
+ {"threadcount_incr", required_argument, 0, 'e' },
+ {"regioncount", required_argument, 0, 'n' },
+ {"regioncount_low", required_argument, 0, 'i' },
+ {"regioncount_high", required_argument, 0, 'j' },
+ {"regioncount_incr", required_argument, 0, 'k' },
+ {"offset", required_argument, 0, 'o' },
+ {"offset_low", required_argument, 0, 'm' },
+ {"offset_high", required_argument, 0, 'q' },
+ {"offset_incr", required_argument, 0, 'r' },
+ {"chunksize", required_argument, 0, 'c' },
+ {"chunksize_low", required_argument, 0, 'a' },
+ {"chunksize_high", required_argument, 0, 'b' },
+ {"chunksize_incr", required_argument, 0, 'g' },
+ {"regionsize", required_argument, 0, 's' },
+ {"regionsize_low", required_argument, 0, 'A' },
+ {"regionsize_high", required_argument, 0, 'B' },
+ {"regionsize_incr", required_argument, 0, 'C' },
+ {"load", required_argument, 0, 'L' },
+ {"pool", required_argument, 0, 'p' },
+ {"name", required_argument, 0, 'M' },
+ {"cleanup", no_argument, 0, 'x' },
+ {"prerun", required_argument, 0, 'P' },
+ {"postrun", required_argument, 0, 'R' },
+ {"log", required_argument, 0, 'G' },
+ {"regionnoise", required_argument, 0, 'I' },
+ {"chunknoise", required_argument, 0, 'N' },
+ {"threaddelay", required_argument, 0, 'T' },
+ {"verify", no_argument, 0, 'V' },
+ {"zerocopy", no_argument, 0, 'z' },
+ {"nowait", no_argument, 0, 'O' },
+ {"noprefetch", no_argument, 0, 'f' },
+ {"human-readable", no_argument, 0, 'H' },
+ {"verbose", no_argument, 0, 'v' },
+ {"help", no_argument, 0, '?' },
+ { 0, 0, 0, 0 },
+};
+
+static int zpiosctl_fd; /* Control file descriptor */
+static char zpios_version[VERSION_SIZE]; /* Kernel version string */
+static char *zpios_buffer = NULL; /* Scratch space area */
+static int zpios_buffer_size = 0; /* Scratch space size */
+
+static int
+usage(void)
+{
+ fprintf(stderr, "Usage: zpios\n");
+ fprintf(stderr,
+ " --threadcount -t =values\n"
+ " --threadcount_low -l =value\n"
+ " --threadcount_high -h =value\n"
+ " --threadcount_incr -e =value\n"
+ " --regioncount -n =values\n"
+ " --regioncount_low -i =value\n"
+ " --regioncount_high -j =value\n"
+ " --regioncount_incr -k =value\n"
+ " --offset -o =values\n"
+ " --offset_low -m =value\n"
+ " --offset_high -q =value\n"
+ " --offset_incr -r =value\n"
+ " --chunksize -c =values\n"
+ " --chunksize_low -a =value\n"
+ " --chunksize_high -b =value\n"
+ " --chunksize_incr -g =value\n"
+ " --regionsize -s =values\n"
+ " --regionsize_low -A =value\n"
+ " --regionsize_high -B =value\n"
+ " --regionsize_incr -C =value\n"
+ " --load -L =dmuio|ssf|fpp\n"
+ " --pool -p =pool name\n"
+ " --name -M =test name\n"
+ " --cleanup -x\n"
+ " --prerun -P =pre-command\n"
+ " --postrun -R =post-command\n"
+ " --log -G =log directory\n"
+ " --regionnoise -I =shift\n"
+ " --chunknoise -N =bytes\n"
+ " --threaddelay -T =jiffies\n"
+ " --verify -V\n"
+ " --zerocopy -z\n"
+ " --nowait -O\n"
+ " --noprefetch -f\n"
+ " --human-readable -H\n"
+ " --verbose -v =increase verbosity\n"
+ " --help -? =this help\n\n");
+
+ return 0;
+}
+
+static void args_fini(cmd_args_t *args)
+{
+ assert(args != NULL);
+ free(args);
+}
+
+static cmd_args_t *
+args_init(int argc, char **argv)
+{
+ cmd_args_t *args;
+ uint32_t fl_th = 0;
+ uint32_t fl_rc = 0;
+ uint32_t fl_of = 0;
+ uint32_t fl_rs = 0;
+ uint32_t fl_cs = 0;
+ int c, rc;
+
+ if (argc == 1) {
+ usage();
+ return (cmd_args_t *)NULL;
+ }
+
+ /* Configure and populate the args structures */
+ args = malloc(sizeof(*args));
+ if (args == NULL)
+ return NULL;
+
+ memset(args, 0, sizeof(*args));
+
+ while ((c=getopt_long(argc, argv, short_opt, long_opt, NULL)) != -1) {
+ rc = 0;
+
+ switch (c) {
+ case 't': /* --thread count */
+ rc = set_count(REGEX_NUMBERS, REGEX_NUMBERS_COMMA,
+ &args->T, optarg, &fl_th, "threadcount");
+ break;
+ case 'l': /* --threadcount_low */
+ rc = set_lhi(REGEX_NUMBERS, &args->T, optarg,
+ FLAG_LOW, &fl_th, "threadcount_low");
+ break;
+ case 'h': /* --threadcount_high */
+ rc = set_lhi(REGEX_NUMBERS, &args->T, optarg,
+ FLAG_HIGH, &fl_th, "threadcount_high");
+ break;
+ case 'e': /* --threadcount_inc */
+ rc = set_lhi(REGEX_NUMBERS, &args->T, optarg,
+ FLAG_INCR, &fl_th, "threadcount_incr");
+ break;
+ case 'n': /* --regioncount */
+ rc = set_count(REGEX_NUMBERS, REGEX_NUMBERS_COMMA,
+ &args->N, optarg, &fl_rc, "regioncount");
+ break;
+ case 'i': /* --regioncount_low */
+ rc = set_lhi(REGEX_NUMBERS, &args->N, optarg,
+ FLAG_LOW, &fl_rc, "regioncount_low");
+ break;
+ case 'j': /* --regioncount_high */
+ rc = set_lhi(REGEX_NUMBERS, &args->N, optarg,
+ FLAG_HIGH, &fl_rc, "regioncount_high");
+ break;
+ case 'k': /* --regioncount_inc */
+ rc = set_lhi(REGEX_NUMBERS, &args->N, optarg,
+ FLAG_INCR, &fl_rc, "regioncount_incr");
+ break;
+ case 'o': /* --offset */
+ rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA,
+ &args->O, optarg, &fl_of, "offset");
+ break;
+ case 'm': /* --offset_low */
+ rc = set_lhi(REGEX_SIZE, &args->O, optarg,
+ FLAG_LOW, &fl_of, "offset_low");
+ break;
+ case 'q': /* --offset_high */
+ rc = set_lhi(REGEX_SIZE, &args->O, optarg,
+ FLAG_HIGH, &fl_of, "offset_high");
+ break;
+ case 'r': /* --offset_inc */
+ rc = set_lhi(REGEX_NUMBERS, &args->O, optarg,
+ FLAG_INCR, &fl_of, "offset_incr");
+ break;
+ case 'c': /* --chunksize */
+ rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA,
+ &args->C, optarg, &fl_cs, "chunksize");
+ break;
+ case 'a': /* --chunksize_low */
+ rc = set_lhi(REGEX_SIZE, &args->C, optarg,
+ FLAG_LOW, &fl_cs, "chunksize_low");
+ break;
+ case 'b': /* --chunksize_high */
+ rc = set_lhi(REGEX_SIZE, &args->C, optarg,
+ FLAG_HIGH, &fl_cs, "chunksize_high");
+ break;
+ case 'g': /* --chunksize_inc */
+ rc = set_lhi(REGEX_NUMBERS, &args->C, optarg,
+ FLAG_INCR, &fl_cs, "chunksize_incr");
+ break;
+ case 's': /* --regionsize */
+ rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA,
+ &args->S, optarg, &fl_rs, "regionsize");
+ break;
+ case 'A': /* --regionsize_low */
+ rc = set_lhi(REGEX_SIZE, &args->S, optarg,
+ FLAG_LOW, &fl_rs, "regionsize_low");
+ break;
+ case 'B': /* --regionsize_high */
+ rc = set_lhi(REGEX_SIZE, &args->S, optarg,
+ FLAG_HIGH, &fl_rs, "regionsize_high");
+ break;
+ case 'C': /* --regionsize_inc */
+ rc = set_lhi(REGEX_NUMBERS, &args->S, optarg,
+ FLAG_INCR, &fl_rs, "regionsize_incr");
+ break;
+ case 'L': /* --load */
+ rc = set_load_params(args, optarg);
+ break;
+ case 'p': /* --pool */
+ args->pool = optarg;
+ break;
+ case 'M':
+ args->name = optarg;
+ break;
+ case 'x': /* --cleanup */
+ args->flags |= DMU_REMOVE;
+ break;
+ case 'P': /* --prerun */
+ strncpy(args->pre, optarg, ZPIOS_PATH_SIZE - 1);
+ break;
+ case 'R': /* --postrun */
+ strncpy(args->post, optarg, ZPIOS_PATH_SIZE - 1);
+ break;
+ case 'G': /* --log */
+ strncpy(args->log, optarg, ZPIOS_PATH_SIZE - 1);
+ break;
+ case 'I': /* --regionnoise */
+ rc = set_noise(&args->regionnoise, optarg, "regionnoise");
+ break;
+ case 'N': /* --chunknoise */
+ rc = set_noise(&args->chunknoise, optarg, "chunknoise");
+ break;
+ case 'T': /* --threaddelay */
+ rc = set_noise(&args->thread_delay, optarg, "threaddelay");
+ break;
+ case 'V': /* --verify */
+ args->flags |= DMU_VERIFY;
+ break;
+ case 'z': /* --zerocopy */
+ args->flags |= (DMU_WRITE_ZC | DMU_READ_ZC);
+ break;
+ case 'O': /* --nowait */
+ args->flags |= DMU_WRITE_NOWAIT;
+ break;
+ case 'f': /* --noprefetch */
+ args->flags |= DMU_READ_NOPF;
+ break;
+ case 'H': /* --human-readable */
+ args->human_readable = 1;
+ break;
+ case 'v': /* --verbose */
+ args->verbose++;
+ break;
+ case '?':
+ rc = 1;
+ break;
+ default:
+ fprintf(stderr,"Unknown option '%s'\n",argv[optind-1]);
+ rc = EINVAL;
+ break;
+ }
+
+ if (rc) {
+ usage();
+ args_fini(args);
+ return NULL;
+ }
+ }
+
+ check_mutual_exclusive_command_lines(fl_th, "threadcount");
+ check_mutual_exclusive_command_lines(fl_rc, "regioncount");
+ check_mutual_exclusive_command_lines(fl_of, "offset");
+ check_mutual_exclusive_command_lines(fl_rs, "regionsize");
+ check_mutual_exclusive_command_lines(fl_cs, "chunksize");
+
+ if (args->pool == NULL) {
+ fprintf(stderr, "Error: Pool not specificed\n");
+ usage();
+ args_fini(args);
+ return NULL;
+ }
+
+ if ((args->flags & (DMU_WRITE_ZC | DMU_READ_ZC)) &&
+ (args->flags & DMU_VERIFY)) {
+ fprintf(stderr, "Error, --zerocopy incompatible --verify, "
+ "used for performance analysis only\n");
+ usage();
+ args_fini(args);
+ return NULL;
+ }
+
+ return args;
+}
+
+static int
+dev_clear(void)
+{
+ zpios_cfg_t cfg;
+ int rc;
+
+ memset(&cfg, 0, sizeof(cfg));
+ cfg.cfg_magic = ZPIOS_CFG_MAGIC;
+ cfg.cfg_cmd = ZPIOS_CFG_BUFFER_CLEAR;
+ cfg.cfg_arg1 = 0;
+
+ rc = ioctl(zpiosctl_fd, ZPIOS_CFG, &cfg);
+ if (rc)
+ fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
+ (unsigned long) ZPIOS_CFG, cfg.cfg_cmd, errno);
+
+ lseek(zpiosctl_fd, 0, SEEK_SET);
+
+ return rc;
+}
+
+/* Passing a size of zero simply results in querying the current size */
+static int
+dev_size(int size)
+{
+ zpios_cfg_t cfg;
+ int rc;
+
+ memset(&cfg, 0, sizeof(cfg));
+ cfg.cfg_magic = ZPIOS_CFG_MAGIC;
+ cfg.cfg_cmd = ZPIOS_CFG_BUFFER_SIZE;
+ cfg.cfg_arg1 = size;
+
+ rc = ioctl(zpiosctl_fd, ZPIOS_CFG, &cfg);
+ if (rc) {
+ fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
+ (unsigned long) ZPIOS_CFG, cfg.cfg_cmd, errno);
+ return rc;
+ }
+
+ return cfg.cfg_rc1;
+}
+
+static void
+dev_fini(void)
+{
+ if (zpios_buffer)
+ free(zpios_buffer);
+
+ if (zpiosctl_fd != -1) {
+ if (close(zpiosctl_fd) == -1) {
+ fprintf(stderr, "Unable to close %s: %d\n",
+ ZPIOS_DEV, errno);
+ }
+ }
+}
+
+static int
+dev_init(void)
+{
+ int rc;
+
+ zpiosctl_fd = open(ZPIOS_DEV, O_RDONLY);
+ if (zpiosctl_fd == -1) {
+ fprintf(stderr, "Unable to open %s: %d\n"
+ "Is the zpios module loaded?\n", ZPIOS_DEV, errno);
+ rc = errno;
+ goto error;
+ }
+
+ if ((rc = dev_clear()))
+ goto error;
+
+ if ((rc = dev_size(0)) < 0)
+ goto error;
+
+ zpios_buffer_size = rc;
+ zpios_buffer = (char *)malloc(zpios_buffer_size);
+ if (zpios_buffer == NULL) {
+ rc = ENOMEM;
+ goto error;
+ }
+
+ memset(zpios_buffer, 0, zpios_buffer_size);
+ return 0;
+error:
+ if (zpiosctl_fd != -1) {
+ if (close(zpiosctl_fd) == -1) {
+ fprintf(stderr, "Unable to close %s: %d\n",
+ ZPIOS_DEV, errno);
+ }
+ }
+
+ return rc;
+}
+
+static int
+get_next(uint64_t *val, range_repeat_t *range)
+{
+ /* if low, incr, high is given */
+ if (range->val_count == 0) {
+ *val = (range->val_low) +
+ (range->val_low * range->next_val / 100);
+
+ if (*val > range->val_high)
+ return 0; /* No more values, limit exceeded */
+
+ if (!range->next_val)
+ range->next_val = range->val_inc_perc;
+ else
+ range->next_val = range->next_val+range->val_inc_perc;
+
+ return 1; /* more values to come */
+
+ /* if only one val is given */
+ } else if (range->val_count == 1) {
+ if (range->next_val)
+ return 0; /* No more values, we only have one */
+
+ *val = range->val[0];
+ range->next_val = 1;
+ return 1; /* more values to come */
+
+ /* if comma separated values are given */
+ } else if (range->val_count > 1) {
+ if (range->next_val > range->val_count - 1)
+ return 0; /* No more values, limit exceeded */
+
+ *val = range->val[range->next_val];
+ range->next_val++;
+ return 1; /* more values to come */
+ }
+
+ return 0;
+}
+
+static int
+run_one(cmd_args_t *args, uint32_t id, uint32_t T, uint32_t N,
+ uint64_t C, uint64_t S, uint64_t O)
+{
+ zpios_cmd_t *cmd;
+ int rc, rc2, cmd_size;
+
+ dev_clear();
+
+ cmd_size = sizeof(zpios_cmd_t) + ((T + N + 1) * sizeof(zpios_stats_t));
+ cmd = (zpios_cmd_t *)malloc(cmd_size);
+ if (cmd == NULL)
+ return ENOMEM;
+
+ memset(cmd, 0, cmd_size);
+ cmd->cmd_magic = ZPIOS_CMD_MAGIC;
+ strncpy(cmd->cmd_pool, args->pool, ZPIOS_NAME_SIZE - 1);
+ strncpy(cmd->cmd_pre, args->pre, ZPIOS_PATH_SIZE - 1);
+ strncpy(cmd->cmd_post, args->post, ZPIOS_PATH_SIZE - 1);
+ strncpy(cmd->cmd_log, args->log, ZPIOS_PATH_SIZE - 1);
+ cmd->cmd_id = id;
+ cmd->cmd_chunk_size = C;
+ cmd->cmd_thread_count = T;
+ cmd->cmd_region_count = N;
+ cmd->cmd_region_size = S;
+ cmd->cmd_offset = O;
+ cmd->cmd_region_noise = args->regionnoise;
+ cmd->cmd_chunk_noise = args->chunknoise;
+ cmd->cmd_thread_delay = args->thread_delay;
+ cmd->cmd_flags = args->flags;
+ cmd->cmd_data_size = (T + N + 1) * sizeof(zpios_stats_t);
+
+ rc = ioctl(zpiosctl_fd, ZPIOS_CMD, cmd);
+ if (rc)
+ args->rc = errno;
+
+ print_stats(args, cmd);
+
+ if (args->verbose) {
+ rc2 = read(zpiosctl_fd, zpios_buffer, zpios_buffer_size - 1);
+ if (rc2 < 0) {
+ fprintf(stdout, "Error reading results: %d\n", rc2);
+ } else if ((rc2 > 0) && (strlen(zpios_buffer) > 0)) {
+ fprintf(stdout, "\n%s\n", zpios_buffer);
+ fflush(stdout);
+ }
+ }
+
+ free(cmd);
+
+ return rc;
+}
+
+static int
+run_offsets(cmd_args_t *args)
+{
+ int rc = 0;
+
+ while (rc == 0 && get_next(&args->current_O, &args->O)) {
+ rc = run_one(args, args->current_id,
+ args->current_T, args->current_N, args->current_C,
+ args->current_S, args->current_O);
+ args->current_id++;
+ }
+
+ args->O.next_val = 0;
+ return rc;
+}
+
+static int
+run_region_counts(cmd_args_t *args)
+{
+ int rc = 0;
+
+ while (rc == 0 && get_next((uint64_t *)&args->current_N, &args->N))
+ rc = run_offsets(args);
+
+ args->N.next_val = 0;
+ return rc;
+}
+
+static int
+run_region_sizes(cmd_args_t *args)
+{
+ int rc = 0;
+
+ while (rc == 0 && get_next(&args->current_S, &args->S)) {
+ if (args->current_S < args->current_C) {
+ fprintf(stderr, "Error: in any run chunksize can "
+ "not be smaller than regionsize.\n");
+ return EINVAL;
+ }
+
+ rc = run_region_counts(args);
+ }
+
+ args->S.next_val = 0;
+ return rc;
+}
+
+static int
+run_chunk_sizes(cmd_args_t *args)
+{
+ int rc = 0;
+
+ while (rc == 0 && get_next(&args->current_C, &args->C)) {
+ rc = run_region_sizes(args);
+ }
+
+ args->C.next_val = 0;
+ return rc;
+}
+
+static int
+run_thread_counts(cmd_args_t *args)
+{
+ int rc = 0;
+
+ while (rc == 0 && get_next((uint64_t *)&args->current_T, &args->T))
+ rc = run_chunk_sizes(args);
+
+ return rc;
+}
+
+int
+main(int argc, char **argv)
+{
+ cmd_args_t *args;
+ int rc = 0;
+
+ /* Argument init and parsing */
+ if ((args = args_init(argc, argv)) == NULL) {
+ rc = -1;
+ goto out;
+ }
+
+ /* Device specific init */
+ if ((rc = dev_init()))
+ goto out;
+
+ /* Generic kernel version string */
+ if (args->verbose)
+ fprintf(stdout, "%s", zpios_version);
+
+ print_stats_header(args);
+ rc = run_thread_counts(args);
+out:
+ if (args != NULL)
+ args_fini(args);
+
+ dev_fini();
+ return rc;
+}
diff --git a/cmd/zpios/zpios_util.c b/cmd/zpios/zpios_util.c
new file mode 100644
index 000000000..e42d06f11
--- /dev/null
+++ b/cmd/zpios/zpios_util.c
@@ -0,0 +1,456 @@
+/*
+ * This file is part of the ZFS Linux port.
+ *
+ * Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory
+ * Written by:
+ * Brian Behlendorf <[email protected]>,
+ * Herb Wartens <[email protected]>,
+ * Jim Garlick <[email protected]>
+ * LLNL-CODE-403049
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Kernel PIOS DMU implemenation originally derived from PIOS test code.
+ * Character control interface derived from SPL code.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <regex.h>
+#include "zpios.h"
+
+/* extracts an unsigned int (64) and K,M,G,T from the string */
+/* and returns a 64 bit value converted to the proper units */
+static int
+kmgt_to_uint64(const char *str, uint64_t *val)
+{
+ char *endptr;
+ int rc = 0;
+
+ *val = strtoll(str, &endptr, 0);
+ if ((str == endptr) && (*val == 0))
+ return EINVAL;
+
+ switch (endptr[0]) {
+ case 'k': case 'K':
+ *val = (*val) << 10;
+ break;
+ case 'm': case 'M':
+ *val = (*val) << 20;
+ break;
+ case 'g': case 'G':
+ *val = (*val) << 30;
+ break;
+ case 't': case 'T':
+ *val = (*val) << 40;
+ break;
+ case '\0':
+ break;
+ default:
+ rc = EINVAL;
+ }
+
+ return rc;
+}
+
+static char *
+uint64_to_kmgt(char *str, uint64_t val)
+{
+ char postfix[] = "kmgt";
+ int i = -1;
+
+ while ((val >= KB) && (i < 4)) {
+ val = (val >> 10);
+ i++;
+ }
+
+ if (i >= 4)
+ (void)snprintf(str, KMGT_SIZE-1, "inf");
+ else
+ (void)snprintf(str, KMGT_SIZE-1, "%lu%c", (unsigned long)val,
+ (i == -1) ? '\0' : postfix[i]);
+
+ return str;
+}
+
+static char *
+kmgt_per_sec(char *str, uint64_t v, double t)
+{
+ char postfix[] = "kmgt";
+ double val = ((double)v) / t;
+ int i = -1;
+
+ while ((val >= (double)KB) && (i < 4)) {
+ val /= (double)KB;
+ i++;
+ }
+
+ if (i >= 4)
+ (void)snprintf(str, KMGT_SIZE-1, "inf");
+ else
+ (void)snprintf(str, KMGT_SIZE-1, "%.2f%c", val,
+ (i == -1) ? '\0' : postfix[i]);
+
+ return str;
+}
+
+static char *
+print_flags(char *str, uint32_t flags)
+{
+ str[0] = (flags & DMU_WRITE) ? 'w' : '-';
+ str[1] = (flags & DMU_READ) ? 'r' : '-';
+ str[2] = (flags & DMU_VERIFY) ? 'v' : '-';
+ str[3] = (flags & DMU_REMOVE) ? 'c' : '-';
+ str[4] = (flags & DMU_FPP) ? 'p' : 's';
+ str[5] = (flags & (DMU_WRITE_ZC | DMU_READ_ZC)) ? 'z' : '-';
+ str[6] = (flags & DMU_WRITE_NOWAIT) ? 'O' : '-';
+ str[7] = '\0';
+
+ return str;
+}
+
+static int
+regex_match(const char *string, char *pattern)
+{
+ regex_t re = { 0 };
+ int rc;
+
+ rc = regcomp(&re, pattern, REG_EXTENDED | REG_NOSUB | REG_ICASE);
+ if (rc) {
+ fprintf(stderr, "Error: Couldn't do regcomp, %d\n", rc);
+ return rc;
+ }
+
+ rc = regexec(&re, string, (size_t) 0, NULL, 0);
+ regfree(&re);
+
+ return rc;
+}
+
+/* fills the pios_range_repeat structure of comma separated values */
+static int
+split_string(const char *optarg, char *pattern, range_repeat_t *range)
+{
+ const char comma[] = ",";
+ char *cp, *token[32];
+ int rc, i = 0;
+
+ if ((rc = regex_match(optarg, pattern)))
+ return rc;
+
+ cp = strdup(optarg);
+ if (cp == NULL)
+ return ENOMEM;
+
+ do {
+ /* STRTOK(3) Each subsequent call, with a null pointer as the
+ * value of the * first argument, starts searching from the
+ * saved pointer and behaves as described above.
+ */
+ token[i] = strtok(cp, comma);
+ cp = NULL;
+ } while ((token[i++] != NULL) && (i < 32));
+
+ range->val_count = i - 1;
+
+ for (i = 0; i < range->val_count; i++)
+ kmgt_to_uint64(token[i], &range->val[i]);
+
+ free(cp);
+ return 0;
+}
+
+int
+set_count(char *pattern1, char *pattern2, range_repeat_t *range,
+ char *optarg, uint32_t *flags, char *arg)
+{
+ if (flags)
+ *flags |= FLAG_SET;
+
+ range->next_val = 0;
+
+ if (regex_match(optarg, pattern1) == 0) {
+ kmgt_to_uint64(optarg, &range->val[0]);
+ range->val_count = 1;
+ } else if (split_string(optarg, pattern2, range) < 0) {
+ fprintf(stderr, "Error: Incorrect pattern for %s, '%s'\n",
+ arg, optarg);
+ return EINVAL;
+ }
+
+ return 0;
+}
+
+/* validates the value with regular expression and sets low, high, incr
+ * according to value at which flag will be set. Sets the flag after. */
+int
+set_lhi(char *pattern, range_repeat_t *range, char *optarg,
+ int flag, uint32_t *flag_thread, char *arg)
+{
+ int rc;
+
+ if ((rc = regex_match(optarg, pattern))) {
+ fprintf(stderr, "Error: Wrong pattern in %s, '%s'\n",
+ arg, optarg);
+ return rc;
+ }
+
+ switch (flag) {
+ case FLAG_LOW:
+ kmgt_to_uint64(optarg, &range->val_low);
+ break;
+ case FLAG_HIGH:
+ kmgt_to_uint64(optarg, &range->val_high);
+ break;
+ case FLAG_INCR:
+ kmgt_to_uint64(optarg, &range->val_inc_perc);
+ break;
+ default:
+ assert(0);
+ }
+
+ *flag_thread |= flag;
+
+ return 0;
+}
+
+int
+set_noise(uint64_t *noise, char *optarg, char *arg)
+{
+ if (regex_match(optarg, REGEX_NUMBERS) == 0) {
+ kmgt_to_uint64(optarg, noise);
+ } else {
+ fprintf(stderr, "Error: Incorrect pattern for %s\n", arg);
+ return EINVAL;
+ }
+
+ return 0;
+}
+
+int
+set_load_params(cmd_args_t *args, char *optarg)
+{
+ char *param, *search, comma[] = ",";
+ int rc = 0;
+
+ search = strdup(optarg);
+ if (search == NULL)
+ return ENOMEM;
+
+ while ((param = strtok(search, comma)) != NULL) {
+ search = NULL;
+
+ if (strcmp("fpp", param) == 0) {
+ args->flags |= DMU_FPP; /* File Per Process/Thread */
+ } else if (strcmp("ssf", param) == 0) {
+ args->flags &= ~DMU_FPP; /* Single Shared File */
+ } else if (strcmp("dmuio", param) == 0) {
+ args->io_type |= DMU_IO;
+ args->flags |= (DMU_WRITE | DMU_READ);
+ } else {
+ fprintf(stderr, "Invalid load: %s\n", param);
+ rc = EINVAL;
+ }
+ }
+
+ free(search);
+
+ return rc;
+}
+
+
+/* checks the low, high, increment values against the single value for
+ * mutual exclusion, for e.g threadcount is mutually exclusive to
+ * threadcount_low, ..._high, ..._incr */
+int
+check_mutual_exclusive_command_lines(uint32_t flag, char *arg)
+{
+ if ((flag & FLAG_SET) && (flag & (FLAG_LOW | FLAG_HIGH | FLAG_INCR))) {
+ fprintf(stderr, "Error: --%s can not be given with --%s_low, "
+ "--%s_high or --%s_incr.\n", arg, arg, arg, arg);
+ return 0;
+ }
+
+ if ((flag & (FLAG_LOW | FLAG_HIGH | FLAG_INCR)) && !(flag & FLAG_SET)){
+ if (flag != (FLAG_LOW | FLAG_HIGH | FLAG_INCR)) {
+ fprintf(stderr, "Error: One or more values missing "
+ "from --%s_low, --%s_high, --%s_incr.\n",
+ arg, arg, arg);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+void
+print_stats_header(cmd_args_t *args)
+{
+ if (args->verbose) {
+ printf("status name id\tth-cnt\trg-cnt\trg-sz\t"
+ "ch-sz\toffset\trg-no\tch-no\tth-dly\tflags\ttime\t"
+ "cr-time\trm-time\twr-time\trd-time\twr-data\twr-ch\t"
+ "wr-bw\trd-data\trd-ch\trd-bw\n");
+ printf("------------------------------------------------"
+ "------------------------------------------------"
+ "------------------------------------------------"
+ "----------------------------------------------\n");
+ } else {
+ printf("status name id\t"
+ "wr-data\twr-ch\twr-bw\t"
+ "rd-data\trd-ch\trd-bw\n");
+ printf("-----------------------------------------"
+ "--------------------------------------\n");
+ }
+}
+
+static void
+print_stats_human_readable(cmd_args_t *args, zpios_cmd_t *cmd)
+{
+ zpios_stats_t *summary_stats;
+ double t_time, wr_time, rd_time, cr_time, rm_time;
+ char str[KMGT_SIZE];
+
+ if (args->rc)
+ printf("FAIL: %3d ", args->rc);
+ else
+ printf("PASS: ");
+
+ printf("%-12s", args->name ? args->name : ZPIOS_NAME);
+ printf("%2u\t", cmd->cmd_id);
+
+ if (args->verbose) {
+ printf("%u\t", cmd->cmd_thread_count);
+ printf("%u\t", cmd->cmd_region_count);
+ printf("%s\t", uint64_to_kmgt(str, cmd->cmd_region_size));
+ printf("%s\t", uint64_to_kmgt(str, cmd->cmd_chunk_size));
+ printf("%s\t", uint64_to_kmgt(str, cmd->cmd_offset));
+ printf("%s\t", uint64_to_kmgt(str, cmd->cmd_region_noise));
+ printf("%s\t", uint64_to_kmgt(str, cmd->cmd_chunk_noise));
+ printf("%s\t", uint64_to_kmgt(str, cmd->cmd_thread_delay));
+ printf("%s\t", print_flags(str, cmd->cmd_flags));
+ }
+
+ if (args->rc) {
+ printf("\n");
+ return;
+ }
+
+ summary_stats = (zpios_stats_t *)cmd->cmd_data_str;
+ t_time = zpios_timespec_to_double(summary_stats->total_time.delta);
+ wr_time = zpios_timespec_to_double(summary_stats->wr_time.delta);
+ rd_time = zpios_timespec_to_double(summary_stats->rd_time.delta);
+ cr_time = zpios_timespec_to_double(summary_stats->cr_time.delta);
+ rm_time = zpios_timespec_to_double(summary_stats->rm_time.delta);
+
+ if (args->verbose) {
+ printf("%.2f\t", t_time);
+ printf("%.3f\t", cr_time);
+ printf("%.3f\t", rm_time);
+ printf("%.2f\t", wr_time);
+ printf("%.2f\t", rd_time);
+ }
+
+ printf("%s\t", uint64_to_kmgt(str, summary_stats->wr_data));
+ printf("%s\t", uint64_to_kmgt(str, summary_stats->wr_chunks));
+ printf("%s\t", kmgt_per_sec(str, summary_stats->wr_data, wr_time));
+
+ printf("%s\t", uint64_to_kmgt(str, summary_stats->rd_data));
+ printf("%s\t", uint64_to_kmgt(str, summary_stats->rd_chunks));
+ printf("%s\n", kmgt_per_sec(str, summary_stats->rd_data, rd_time));
+ fflush(stdout);
+}
+
+static void
+print_stats_table(cmd_args_t *args, zpios_cmd_t *cmd)
+{
+ zpios_stats_t *summary_stats;
+ double wr_time, rd_time;
+
+ if (args->rc)
+ printf("FAIL: %3d ", args->rc);
+ else
+ printf("PASS: ");
+
+ printf("%-12s", args->name ? args->name : ZPIOS_NAME);
+ printf("%2u\t", cmd->cmd_id);
+
+ if (args->verbose) {
+ printf("%u\t", cmd->cmd_thread_count);
+ printf("%u\t", cmd->cmd_region_count);
+ printf("%llu\t", (long long unsigned)cmd->cmd_region_size);
+ printf("%llu\t", (long long unsigned)cmd->cmd_chunk_size);
+ printf("%llu\t", (long long unsigned)cmd->cmd_offset);
+ printf("%u\t", cmd->cmd_region_noise);
+ printf("%u\t", cmd->cmd_chunk_noise);
+ printf("%u\t", cmd->cmd_thread_delay);
+ printf("0x%x\t", cmd->cmd_flags);
+ }
+
+ if (args->rc) {
+ printf("\n");
+ return;
+ }
+
+ summary_stats = (zpios_stats_t *)cmd->cmd_data_str;
+ wr_time = zpios_timespec_to_double(summary_stats->wr_time.delta);
+ rd_time = zpios_timespec_to_double(summary_stats->rd_time.delta);
+
+ if (args->verbose) {
+ printf("%ld.%02ld\t",
+ (long)summary_stats->total_time.delta.ts_sec,
+ (long)summary_stats->total_time.delta.ts_nsec);
+ printf("%ld.%02ld\t",
+ (long)summary_stats->cr_time.delta.ts_sec,
+ (long)summary_stats->cr_time.delta.ts_nsec);
+ printf("%ld.%02ld\t",
+ (long)summary_stats->rm_time.delta.ts_sec,
+ (long)summary_stats->rm_time.delta.ts_nsec);
+ printf("%ld.%02ld\t",
+ (long)summary_stats->wr_time.delta.ts_sec,
+ (long)summary_stats->wr_time.delta.ts_nsec);
+ printf("%ld.%02ld\t",
+ (long)summary_stats->rd_time.delta.ts_sec,
+ (long)summary_stats->rd_time.delta.ts_nsec);
+ }
+
+ printf("%lld\t", (long long unsigned)summary_stats->wr_data);
+ printf("%lld\t", (long long unsigned)summary_stats->wr_chunks);
+ printf("%.4f\t", (double)summary_stats->wr_data / wr_time);
+
+ printf("%lld\t", (long long unsigned)summary_stats->rd_data);
+ printf("%lld\t", (long long unsigned)summary_stats->rd_chunks);
+ printf("%.4f\n", (double)summary_stats->rd_data / rd_time);
+ fflush(stdout);
+}
+
+void
+print_stats(cmd_args_t *args, zpios_cmd_t *cmd)
+{
+ if (args->human_readable)
+ print_stats_human_readable(args, cmd);
+ else
+ print_stats_table(args, cmd);
+}
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index 3cdc269b0..b6c454d24 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -1674,12 +1674,6 @@ zpool_do_import(int argc, char **argv)
usage(B_FALSE);
}
- if (searchdirs == NULL) {
- searchdirs = safe_malloc(sizeof (char *));
- searchdirs[0] = "/dev/dsk";
- nsearch = 1;
- }
-
/* check argument count */
if (do_all) {
if (argc != 0) {
@@ -1700,7 +1694,8 @@ zpool_do_import(int argc, char **argv)
if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) {
(void) fprintf(stderr, gettext("cannot "
"discover pools: permission denied\n"));
- free(searchdirs);
+ if (searchdirs != NULL)
+ free(searchdirs);
return (1);
}
}
@@ -1747,7 +1742,8 @@ zpool_do_import(int argc, char **argv)
(void) fprintf(stderr, gettext("cannot import '%s': "
"no such pool available\n"), argv[0]);
}
- free(searchdirs);
+ if (searchdirs != NULL)
+ free(searchdirs);
return (1);
}
@@ -1842,7 +1838,8 @@ zpool_do_import(int argc, char **argv)
error:
nvlist_free(props);
nvlist_free(pools);
- free(searchdirs);
+ if (searchdirs != NULL)
+ free(searchdirs);
return (err ? 1 : 0);
}
diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c
index 7ff368415..5f540ac71 100644
--- a/cmd/zpool/zpool_vdev.c
+++ b/cmd/zpool/zpool_vdev.c
@@ -51,7 +51,7 @@
*
* 1. Construct the vdev specification. Performs syntax validation and
* makes sure each device is valid.
- * 2. Check for devices in use. Using libdiskmgt, makes sure that no
+ * 2. Check for devices in use. Using libblkid to make sure that no
* devices are also in use. Some can be overridden using the 'force'
* flag, others cannot.
* 3. Check for replication errors if the 'force' flag is not specified.
@@ -61,10 +61,10 @@
*/
#include <assert.h>
+#include <ctype.h>
#include <devid.h>
#include <errno.h>
#include <fcntl.h>
-#include <libdiskmgt.h>
#include <libintl.h>
#include <libnvpair.h>
#include <limits.h>
@@ -75,13 +75,13 @@
#include <sys/stat.h>
#include <sys/vtoc.h>
#include <sys/mntent.h>
+#include <uuid/uuid.h>
+#ifdef HAVE_LIBBLKID
+#include <blkid/blkid.h>
+#endif
#include "zpool_util.h"
-#define DISK_ROOT "/dev/dsk"
-#define RDISK_ROOT "/dev/rdsk"
-#define BACKUP_SLICE "s2"
-
/*
* For any given vdev specification, we can have multiple errors. The
* vdev_error() function keeps track of whether we have seen an error yet, and
@@ -112,168 +112,6 @@ vdev_error(const char *fmt, ...)
va_end(ap);
}
-static void
-libdiskmgt_error(int error)
-{
- /*
- * ENXIO/ENODEV is a valid error message if the device doesn't live in
- * /dev/dsk. Don't bother printing an error message in this case.
- */
- if (error == ENXIO || error == ENODEV)
- return;
-
- (void) fprintf(stderr, gettext("warning: device in use checking "
- "failed: %s\n"), strerror(error));
-}
-
-/*
- * Validate a device, passing the bulk of the work off to libdiskmgt.
- */
-static int
-check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
-{
- char *msg;
- int error = 0;
- dm_who_type_t who;
-
- if (force)
- who = DM_WHO_ZPOOL_FORCE;
- else if (isspare)
- who = DM_WHO_ZPOOL_SPARE;
- else
- who = DM_WHO_ZPOOL;
-
- if (dm_inuse((char *)path, &msg, who, &error) || error) {
- if (error != 0) {
- libdiskmgt_error(error);
- return (0);
- } else {
- vdev_error("%s", msg);
- free(msg);
- return (-1);
- }
- }
-
- /*
- * If we're given a whole disk, ignore overlapping slices since we're
- * about to label it anyway.
- */
- error = 0;
- if (!wholedisk && !force &&
- (dm_isoverlapping((char *)path, &msg, &error) || error)) {
- if (error == 0) {
- /* dm_isoverlapping returned -1 */
- vdev_error(gettext("%s overlaps with %s\n"), path, msg);
- free(msg);
- return (-1);
- } else if (error != ENODEV) {
- /* libdiskmgt's devcache only handles physical drives */
- libdiskmgt_error(error);
- return (0);
- }
- }
-
- return (0);
-}
-
-
-/*
- * Validate a whole disk. Iterate over all slices on the disk and make sure
- * that none is in use by calling check_slice().
- */
-static int
-check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
-{
- dm_descriptor_t *drive, *media, *slice;
- int err = 0;
- int i;
- int ret;
-
- /*
- * Get the drive associated with this disk. This should never fail,
- * because we already have an alias handle open for the device.
- */
- if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE,
- &err)) == NULL || *drive == NULL) {
- if (err)
- libdiskmgt_error(err);
- return (0);
- }
-
- if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA,
- &err)) == NULL) {
- dm_free_descriptors(drive);
- if (err)
- libdiskmgt_error(err);
- return (0);
- }
-
- dm_free_descriptors(drive);
-
- /*
- * It is possible that the user has specified a removable media drive,
- * and the media is not present.
- */
- if (*media == NULL) {
- dm_free_descriptors(media);
- vdev_error(gettext("'%s' has no media in drive\n"), name);
- return (-1);
- }
-
- if ((slice = dm_get_associated_descriptors(*media, DM_SLICE,
- &err)) == NULL) {
- dm_free_descriptors(media);
- if (err)
- libdiskmgt_error(err);
- return (0);
- }
-
- dm_free_descriptors(media);
-
- ret = 0;
-
- /*
- * Iterate over all slices and report any errors. We don't care about
- * overlapping slices because we are using the whole disk.
- */
- for (i = 0; slice[i] != NULL; i++) {
- char *name = dm_get_name(slice[i], &err);
-
- if (check_slice(name, force, B_TRUE, isspare) != 0)
- ret = -1;
-
- dm_free_name(name);
- }
-
- dm_free_descriptors(slice);
- return (ret);
-}
-
-/*
- * Validate a device.
- */
-static int
-check_device(const char *path, boolean_t force, boolean_t isspare)
-{
- dm_descriptor_t desc;
- int err;
- char *dev;
-
- /*
- * For whole disks, libdiskmgt does not include the leading dev path.
- */
- dev = strrchr(path, '/');
- assert(dev != NULL);
- dev++;
- if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
- err = check_disk(path, desc, force, isspare);
- dm_free_descriptor(desc);
- return (err);
- }
-
- return (check_slice(path, force, B_FALSE, isspare));
-}
-
/*
* Check that a file is valid. All we can do in this case is check that it's
* not in use by another pool, and not in use by swap.
@@ -284,20 +122,10 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
char *name;
int fd;
int ret = 0;
- int err;
pool_state_t state;
boolean_t inuse;
- if (dm_inuse_swap(file, &err)) {
- if (err)
- libdiskmgt_error(err);
- else
- vdev_error(gettext("%s is currently used by swap. "
- "Please see swap(1M).\n"), file);
- return (-1);
- }
-
- if ((fd = open(file, O_RDONLY)) < 0)
+ if ((fd = open(file, O_RDONLY|O_EXCL)) < 0)
return (0);
if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) {
@@ -349,6 +177,177 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
return (ret);
}
+#ifdef HAVE_LIBBLKID
+static void
+check_error(int err)
+{
+ (void) fprintf(stderr, gettext("warning: device in use checking "
+ "failed: %s\n"), strerror(err));
+}
+
+static int
+check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare)
+{
+ struct stat64 statbuf;
+ char *value;
+ int err;
+
+ if (stat64(path, &statbuf) != 0) {
+ vdev_error(gettext("cannot stat %s: %s\n"),
+ path, strerror(errno));
+ return (-1);
+ }
+
+ /* No valid type detected device is safe to use */
+ value = blkid_get_tag_value(cache, "TYPE", path);
+ if (value == NULL)
+ return (0);
+
+ /*
+ * If libblkid detects a ZFS device, we check the device
+ * using check_file() to see if it's safe. The one safe
+ * case is a spare device shared between multiple pools.
+ */
+ if (strcmp(value, "zfs") == 0) {
+ err = check_file(path, force, isspare);
+ } else {
+ if (force) {
+ err = 0;
+ } else {
+ err = -1;
+ vdev_error(gettext("%s contains a filesystem of "
+ "type '%s'\n"), path, value);
+ }
+ }
+
+ free(value);
+
+ return (err);
+}
+
+/*
+ * Validate a whole disk. Iterate over all slices on the disk and make sure
+ * that none is in use by calling check_slice().
+ */
+static int
+check_disk(const char *path, blkid_cache cache, int force,
+ boolean_t isspare, boolean_t iswholedisk)
+{
+ struct dk_gpt *vtoc;
+ char slice_path[MAXPATHLEN];
+ int err = 0;
+ int fd, i;
+
+ /* This is not a wholedisk we only check the given partition */
+ if (!iswholedisk)
+ return check_slice(path, cache, force, isspare);
+
+ /*
+ * When the device is a whole disk try to read the efi partition
+ * label. If this is successful we safely check the all of the
+ * partitions. However, when it fails it may simply be because
+ * the disk is partitioned via the MBR. Since we currently can
+ * not easily decode the MBR return a failure and prompt to the
+ * user to use force option since we cannot check the partitions.
+ */
+ if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) {
+ check_error(errno);
+ return -1;
+ }
+
+ if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
+ (void) close(fd);
+
+ if (force) {
+ return 0;
+ } else {
+ vdev_error(gettext("%s does not contain an EFI "
+ "label but it may contain partition\n"
+ "information in the MBR.\n"), path);
+ return -1;
+ }
+ }
+
+ /*
+ * The primary efi partition label is damaged however the secondary
+ * label at the end of the device is intact. Rather than use this
+ * label we should play it safe and treat this as a non efi device.
+ */
+ if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
+ efi_free(vtoc);
+ (void) close(fd);
+
+ if (force) {
+ /* Partitions will no be created using the backup */
+ return 0;
+ } else {
+ vdev_error(gettext("%s contains a corrupt primary "
+ "EFI label.\n"), path);
+ return -1;
+ }
+ }
+
+ for (i = 0; i < vtoc->efi_nparts; i++) {
+
+ if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED ||
+ uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
+ continue;
+
+ if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
+ (void) snprintf(slice_path, sizeof (slice_path),
+ "%s%s%d", path, "-part", i+1);
+ else
+ (void) snprintf(slice_path, sizeof (slice_path),
+ "%s%s%d", path, isdigit(path[strlen(path)-1]) ?
+ "p" : "", i+1);
+
+ err = check_slice(slice_path, cache, force, isspare);
+ if (err)
+ break;
+ }
+
+ efi_free(vtoc);
+ (void) close(fd);
+
+ return (err);
+}
+
+static int
+check_device(const char *path, boolean_t force,
+ boolean_t isspare, boolean_t iswholedisk)
+{
+ static blkid_cache cache = NULL;
+ int err;
+
+ /*
+ * There is no easy way to add a correct blkid_put_cache() call,
+ * memory will be reclaimed when the command exits.
+ */
+ if (cache == NULL) {
+ if ((err = blkid_get_cache(&cache, NULL)) != 0) {
+ check_error(err);
+ return -1;
+ }
+
+ if ((err = blkid_probe_all(cache)) != 0) {
+ blkid_put_cache(cache);
+ check_error(err);
+ return -1;
+ }
+ }
+
+ return check_disk(path, cache, force, isspare, iswholedisk);
+}
+
+#else /* HAVE_LIBBLKID */
+
+static int
+check_device(const char *path, boolean_t force,
+ boolean_t isspare, boolean_t iswholedisk)
+{
+ return check_file(path, force, isspare);
+}
+#endif /* HAVE_LIBBLKID */
/*
* By "whole disk" we mean an entire physical disk (something we can
@@ -367,7 +366,7 @@ is_whole_disk(const char *arg)
(void) snprintf(path, sizeof (path), "%s%s%s",
RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
- if ((fd = open(path, O_RDWR | O_NDELAY)) < 0)
+ if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0)
return (B_FALSE);
if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
(void) close(fd);
@@ -404,17 +403,28 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
if (arg[0] == '/') {
/*
* Complete device or file path. Exact type is determined by
- * examining the file descriptor afterwards.
+ * examining the file descriptor afterwards. Symbolic links
+ * are resolved to their real paths for the is_whole_disk()
+ * and S_ISBLK/S_ISREG type checks. However, we are careful
+ * to store the given path as ZPOOL_CONFIG_PATH to ensure we
+ * can leverage udev's persistent device labels.
*/
- wholedisk = is_whole_disk(arg);
- if (!wholedisk && (stat64(arg, &statbuf) != 0)) {
+ if (realpath(arg, path) == NULL) {
+ (void) fprintf(stderr,
+ gettext("cannot resolve path '%s'\n"), arg);
+ return (NULL);
+ }
+
+ wholedisk = is_whole_disk(path);
+ if (!wholedisk && (stat64(path, &statbuf) != 0)) {
(void) fprintf(stderr,
gettext("cannot open '%s': %s\n"),
- arg, strerror(errno));
+ path, strerror(errno));
return (NULL);
}
- (void) strlcpy(path, arg, sizeof (path));
+ /* After is_whole_disk() check restore original passed path */
+ strlcpy(path, arg, MAXPATHLEN);
} else {
/*
* This may be a short path for a device, or it could be total
@@ -476,6 +486,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
(uint64_t)wholedisk) == 0);
+#if defined(__sun__) || defined(__sun)
/*
* For a whole disk, defer getting its devid until after labeling it.
*/
@@ -487,7 +498,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
ddi_devid_t devid;
char *minor = NULL, *devid_str = NULL;
- if ((fd = open(path, O_RDONLY)) < 0) {
+ if ((fd = open(path, O_RDONLY|O_EXCL)) < 0) {
(void) fprintf(stderr, gettext("cannot open '%s': "
"%s\n"), path, strerror(errno));
nvlist_free(vdev);
@@ -510,6 +521,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
(void) close(fd);
}
+#endif
return (vdev);
}
@@ -872,6 +884,39 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
return (ret);
}
+static int
+zero_label(char *path)
+{
+ const int size = 4096;
+ char buf[size];
+ int err, fd;
+
+ if ((fd = open(path, O_WRONLY|O_EXCL)) < 0) {
+ (void) fprintf(stderr, gettext("cannot open '%s': %s\n"),
+ path, strerror(errno));
+ return (-1);
+ }
+
+ memset(buf, 0, size);
+ err = write(fd, buf, size);
+ (void) fdatasync(fd);
+ (void) close(fd);
+
+ if (err == -1) {
+ (void) fprintf(stderr, gettext("cannot zero first %d bytes "
+ "of '%s': %s\n"), size, path, strerror(errno));
+ return (-1);
+ }
+
+ if (err != size) {
+ (void) fprintf(stderr, gettext("could only zero %d/%d bytes "
+ "of '%s'\n"), err, size, path);
+ return (-1);
+ }
+
+ return 0;
+}
+
/*
* Go through and find any whole disks in the vdev specification, labelling them
* as appropriate. When constructing the vdev spec, we were unable to open this
@@ -890,10 +935,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
char *type, *path, *diskname;
char buf[MAXPATHLEN];
uint64_t wholedisk;
- int fd;
int ret;
- ddi_devid_t devid;
- char *minor = NULL, *devid_str = NULL;
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
@@ -904,55 +946,66 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
return (0);
/*
- * We have a disk device. Get the path to the device
- * and see if it's a whole disk by appending the backup
- * slice and stat()ing the device.
+ * We have a disk device. If this is a whole disk write
+ * out the efi partition table, otherwise write zero's to
+ * the first 4k of the partition. This is to ensure that
+ * libblkid will not misidentify the partition due to a
+ * magic value left by the previous filesystem.
*/
- verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
- &wholedisk) != 0 || !wholedisk)
- return (0);
+ verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path));
+ verify(!nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+ &wholedisk));
- diskname = strrchr(path, '/');
+ if (!wholedisk) {
+ ret = zero_label(path);
+ return (ret);
+ }
+
+ if (realpath(path, buf) == NULL) {
+ ret = errno;
+ (void) fprintf(stderr,
+ gettext("cannot resolve path '%s'\n"), path);
+ return (ret);
+ }
+
+ diskname = strrchr(buf, '/');
assert(diskname != NULL);
diskname++;
if (zpool_label_disk(g_zfs, zhp, diskname) == -1)
return (-1);
/*
- * Fill in the devid, now that we've labeled the disk.
+ * Now the we've labeled the disk and the partitions have
+ * been created. We still need to wait for udev to create
+ * the symlinks to those partitions. If we are accessing
+ * the devices via a udev disk path, /dev/disk, then wait
+ * for *-part# to be created. Otherwise just use the normal
+ * syntax for devices in /dev.
*/
- (void) snprintf(buf, sizeof (buf), "%ss0", path);
- if ((fd = open(buf, O_RDONLY)) < 0) {
+ if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
+ (void) snprintf(buf, sizeof (buf),
+ "%s%s%s", path, "-part", FIRST_SLICE);
+ else
+ (void) snprintf(buf, sizeof (buf),
+ "%s%s%s", path, isdigit(path[strlen(path)-1]) ?
+ "p" : "", FIRST_SLICE);
+
+ if ((ret = zpool_label_disk_wait(buf, 1000)) != 0) {
(void) fprintf(stderr,
- gettext("cannot open '%s': %s\n"),
- buf, strerror(errno));
+ gettext( "cannot resolve path '%s'\n"), buf);
return (-1);
}
- if (devid_get(fd, &devid) == 0) {
- if (devid_get_minor_name(fd, &minor) == 0 &&
- (devid_str = devid_str_encode(devid, minor)) !=
- NULL) {
- verify(nvlist_add_string(nv,
- ZPOOL_CONFIG_DEVID, devid_str) == 0);
- }
- if (devid_str != NULL)
- devid_str_free(devid_str);
- if (minor != NULL)
- devid_str_free(minor);
- devid_free(devid);
- }
-
/*
- * Update the path to refer to the 's0' slice. The presence of
+ * Update the path to refer to FIRST_SLICE. The presence of
* the 'whole_disk' field indicates to the CLI that we should
* chop off the slice number when displaying the device in
* future output.
*/
verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
- (void) close(fd);
+ /* Just in case this partition already existed. */
+ (void) zero_label(buf);
return (0);
}
@@ -992,7 +1045,7 @@ is_spare(nvlist_t *config, const char *path)
uint_t i, nspares;
boolean_t inuse;
- if ((fd = open(path, O_RDONLY)) < 0)
+ if ((fd = open(path, O_RDONLY|O_EXCL)) < 0)
return (B_FALSE);
if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
@@ -1035,25 +1088,27 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
nvlist_t **child;
uint_t c, children;
char *type, *path;
- int ret;
+ int ret = 0;
char buf[MAXPATHLEN];
- uint64_t wholedisk;
+ uint64_t wholedisk = B_FALSE;
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0) {
- verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
+ verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path));
+ if (strcmp(type, VDEV_TYPE_DISK) == 0)
+ verify(!nvlist_lookup_uint64(nv,
+ ZPOOL_CONFIG_WHOLE_DISK, &wholedisk));
/*
* As a generic check, we look to see if this is a replace of a
* hot spare within the same pool. If so, we allow it
- * regardless of what libdiskmgt or zpool_in_use() says.
+ * regardless of what libblkid or zpool_in_use() says.
*/
if (isreplacing) {
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
- &wholedisk) == 0 && wholedisk)
+ if (wholedisk)
(void) snprintf(buf, sizeof (buf), "%ss0",
path);
else
@@ -1063,7 +1118,7 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
}
if (strcmp(type, VDEV_TYPE_DISK) == 0)
- ret = check_device(path, force, isspare);
+ ret = check_device(path, force, isspare, wholedisk);
if (strcmp(type, VDEV_TYPE_FILE) == 0)
ret = check_file(path, force, isspare);
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
index e10649919..a10bd5ed1 100644
--- a/cmd/ztest/ztest.c
+++ b/cmd/ztest/ztest.c
@@ -3642,31 +3642,21 @@ static void
ztest_verify_blocks(char *pool)
{
int status;
+ char bin[MAXPATHLEN + MAXNAMELEN + 20];
char zdb[MAXPATHLEN + MAXNAMELEN + 20];
char zbuf[1024];
- char *bin;
- char *ztest;
- char *isa;
- int isalen;
FILE *fp;
- (void) realpath(getexecname(), zdb);
+ /* Designed to be run exclusively in the development tree */
+ VERIFY(realpath(getexecname(), bin) != NULL);
+ strstr(bin, "/ztest/")[0] = '\0';
- /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */
- bin = strstr(zdb, "/usr/bin/");
- ztest = strstr(bin, "/ztest");
- isa = bin + 8;
- isalen = ztest - isa;
- isa = strdup(isa);
- /* LINTED */
- (void) sprintf(bin,
- "/usr/sbin%.*s/zdb -bcc%s%s -U /tmp/zpool.cache %s",
- isalen,
- isa,
+ (void) sprintf(zdb,
+ "%s/zdb/zdb -bcc%s%s -U /tmp/zpool.cache %s",
+ bin,
zopt_verbose >= 3 ? "s" : "",
zopt_verbose >= 4 ? "v" : "",
pool);
- free(isa);
if (zopt_verbose >= 5)
(void) printf("Executing %s\n", strstr(zdb, "zdb "));
diff --git a/config/user-arch.m4 b/config/user-arch.m4
index 1ffa28b0e..fcc566fc5 100644
--- a/config/user-arch.m4
+++ b/config/user-arch.m4
@@ -2,20 +2,18 @@ dnl #
dnl # Set the target arch for libspl atomic implementation
dnl #
AC_DEFUN([ZFS_AC_CONFIG_USER_ARCH], [
- AC_MSG_CHECKING(for target arch)
+ AC_MSG_CHECKING(for target asm dir)
TARGET_ARCH=`echo ${target_cpu} | sed -e s/i.86/i386/`
- TARGET_ARCH_DIR=asm-$TARGET_ARCH
- AC_MSG_RESULT([$TARGET_ARCH])
case $TARGET_ARCH in
- i386|x86_64|powerpc64)
- AC_SUBST([TARGET_ARCH])
- AC_SUBST([TARGET_ARCH_DIR])
+ i386|x86_64)
+ TARGET_ASM_DIR=asm-${TARGET_ARCH}
;;
*)
- AC_MSG_ERROR([
- *** Unsupported architecture $TARGET_ARCH
- *** Available architectures: x86, x86_64, powerpc64])
+ TARGET_ASM_DIR=asm-generic
;;
esac
+
+ AC_SUBST([TARGET_ASM_DIR])
+ AC_MSG_RESULT([$TARGET_ASM_DIR])
])
diff --git a/config/zfs-build.m4 b/config/zfs-build.m4
index ccfd2eda2..955793be7 100644
--- a/config/zfs-build.m4
+++ b/config/zfs-build.m4
@@ -54,16 +54,21 @@ MODDIR=${MODDIR}
SCRIPTDIR=${SCRIPTDIR}
UDEVDIR=\${TOPDIR}/scripts/udev-rules
ZPOOLDIR=\${TOPDIR}/scripts/zpool-config
+ZPIOSDIR=\${TOPDIR}/scripts/zpios-test
+ZPIOSPROFILEDIR=\${TOPDIR}/scripts/zpios-profile
ZDB=\${CMDDIR}/zdb/zdb
ZFS=\${CMDDIR}/zfs/zfs
ZINJECT=\${CMDDIR}/zinject/zinject
ZPOOL=\${CMDDIR}/zpool/zpool
ZTEST=\${CMDDIR}/ztest/ztest
+ZPIOS=\${CMDDIR}/zpios/zpios
COMMON_SH=\${SCRIPTDIR}/common.sh
ZFS_SH=\${SCRIPTDIR}/zfs.sh
ZPOOL_CREATE_SH=\${SCRIPTDIR}/zpool-create.sh
+ZPIOS_SH=\${SCRIPTDIR}/zpios.sh
+ZPIOS_SURVEY_SH=\${SCRIPTDIR}/zpios-survey.sh
LDMOD=/sbin/insmod
@@ -83,6 +88,10 @@ ZFS_MODULES=( \\
\${MODDIR}/zfs/zfs.ko \\
)
+ZPIOS_MODULES=( \\
+ \${MODDIR}/zpios/zpios.ko \\
+)
+
MODULES=( \\
\${KERNEL_MODULES[[*]]} \\
\${SPL_MODULES[[*]]} \\
diff --git a/configure.ac b/configure.ac
index 97ab13729..26c389037 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,6 +54,11 @@ AC_CONFIG_FILES([
config/Makefile
doc/Makefile
lib/Makefile
+ lib/libspl/Makefile
+ lib/libspl/asm-generic/Makefile
+ lib/libspl/asm-i386/Makefile
+ lib/libspl/asm-x86_64/Makefile
+ lib/libspl/include/Makefile
lib/libavl/Makefile
lib/libefi/Makefile
lib/libnvpair/Makefile
@@ -68,12 +73,14 @@ AC_CONFIG_FILES([
cmd/zinject/Makefile
cmd/zpool/Makefile
cmd/ztest/Makefile
+ cmd/zpios/Makefile
module/Makefile
module/avl/Makefile
module/nvpair/Makefile
module/unicode/Makefile
module/zcommon/Makefile
module/zfs/Makefile
+ module/zpios/Makefile
scripts/Makefile
zfs.spec
zfs-modules.spec
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 2de022787..042656813 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -1 +1 @@
-SUBDIRS = libavl libefi libnvpair libunicode libuutil libzfs libzpool
+SUBDIRS = libspl libavl libefi libnvpair libunicode libuutil libzfs libzpool
diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c
index 31eb3d3f6..7c0f5b478 100644
--- a/lib/libefi/rdwr_efi.c
+++ b/lib/libefi/rdwr_efi.c
@@ -30,6 +30,7 @@
#include <strings.h>
#include <unistd.h>
#include <uuid/uuid.h>
+#include <zlib.h>
#include <libintl.h>
#include <sys/types.h>
#include <sys/dkio.h>
@@ -39,7 +40,9 @@
#include <sys/dktp/fdisk.h>
#include <sys/efi_partition.h>
#include <sys/byteorder.h>
-#include <sys/ddi.h>
+#if defined(__linux__)
+#include <linux/fs.h>
+#endif
static struct uuid_to_ptag {
struct uuid uuid;
@@ -50,11 +53,11 @@ static struct uuid_to_ptag {
{ EFI_SWAP },
{ EFI_USR },
{ EFI_BACKUP },
- { 0 }, /* STAND is never used */
+ { EFI_UNUSED }, /* STAND is never used */
{ EFI_VAR },
{ EFI_HOME },
{ EFI_ALTSCTR },
- { 0 }, /* CACHE (cachefs) is never used */
+ { EFI_UNUSED }, /* CACHE (cachefs) is never used */
{ EFI_RESERVED },
{ EFI_SYSTEM },
{ EFI_LEGACY_MBR },
@@ -108,19 +111,134 @@ int efi_debug = 1;
int efi_debug = 0;
#endif
-extern unsigned int efi_crc32(const unsigned char *, unsigned int);
-static int efi_read(int, struct dk_gpt *);
+static int efi_read(int, struct dk_gpt *);
+
+/*
+ * Return a 32-bit CRC of the contents of the buffer. Pre-and-post
+ * one's conditioning will be handled by crc32() internally.
+ */
+static uint32_t
+efi_crc32(const unsigned char *buf, unsigned int size)
+{
+ uint32_t crc = crc32(0, Z_NULL, 0);
+
+ crc = crc32(crc, buf, size);
+
+ return (crc);
+}
static int
read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
{
- struct dk_minfo disk_info;
+ int sector_size;
+ unsigned long long capacity_size;
+
+ if (ioctl(fd, BLKSSZGET, &sector_size) < 0)
+ return (-1);
+
+ if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0)
+ return (-1);
+
+ *lbsize = (uint_t)sector_size;
+ *capacity = (diskaddr_t)(capacity_size / sector_size);
+
+ return (0);
+}
- if ((ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info)) == -1)
- return (errno);
- *capacity = disk_info.dki_capacity;
- *lbsize = disk_info.dki_lbsize;
+static int
+efi_get_info(int fd, struct dk_cinfo *dki_info)
+{
+#if defined(__linux__)
+ char path[PATH_MAX];
+ char *dev_path;
+ int rval = 0;
+
+ /*
+ * The simplest way to get the partition number under linux is
+ * to parse it out of the /dev/<disk><parition> block device name.
+ * The kernel creates this using the partition number when it
+ * populates /dev/ so it may be trusted. The tricky bit here is
+ * that the naming convention is based on the block device type.
+ * So we need to take this in to account when parsing out the
+ * partition information. Another issue is that the libefi API
+ * API only provides the open fd and not the file path. To handle
+ * this realpath(3) is used to resolve the block device name from
+ * /proc/self/fd/<fd>. Aside from the partition number we collect
+ * some additional device info.
+ */
+ memset(dki_info, 0, sizeof(*dki_info));
+ (void) sprintf(path, "/proc/self/fd/%d", fd);
+ if ((dev_path = realpath(path, NULL)) == NULL)
+ goto error;
+
+ if ((strncmp(dev_path, "/dev/sd", 7) == 0)) {
+ strcpy(dki_info->dki_cname, "sd");
+ dki_info->dki_ctype = DKC_SCSI_CCS;
+ rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
+ dki_info->dki_dname,
+ &dki_info->dki_partition);
+ } else if ((strncmp(dev_path, "/dev/hd", 7) == 0)) {
+ strcpy(dki_info->dki_cname, "hd");
+ dki_info->dki_ctype = DKC_DIRECT;
+ rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
+ dki_info->dki_dname,
+ &dki_info->dki_partition);
+ } else if ((strncmp(dev_path, "/dev/md", 7) == 0)) {
+ strcpy(dki_info->dki_cname, "pseudo");
+ dki_info->dki_ctype = DKC_MD;
+ rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu",
+ dki_info->dki_dname,
+ &dki_info->dki_partition);
+ } else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) {
+ strcpy(dki_info->dki_cname, "pseudo");
+ dki_info->dki_ctype = DKC_VBD;
+ rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9-]p%hu",
+ dki_info->dki_dname,
+ &dki_info->dki_partition);
+ } else if ((strncmp(dev_path, "/dev/ram", 8) == 0)) {
+ strcpy(dki_info->dki_cname, "pseudo");
+ dki_info->dki_ctype = DKC_PCMCIA_MEM;
+ rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu",
+ dki_info->dki_dname,
+ &dki_info->dki_partition);
+ } else if ((strncmp(dev_path, "/dev/loop", 9) == 0)) {
+ strcpy(dki_info->dki_cname, "pseudo");
+ dki_info->dki_ctype = DKC_VBD;
+ rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu",
+ dki_info->dki_dname,
+ &dki_info->dki_partition);
+ } else {
+ strcpy(dki_info->dki_dname, "unknown");
+ strcpy(dki_info->dki_cname, "unknown");
+ dki_info->dki_ctype = DKC_UNKNOWN;
+ }
+
+ switch (rval) {
+ case 0:
+ errno = EINVAL;
+ goto error;
+ case 1:
+ dki_info->dki_partition = 0;
+ }
+
+ free(dev_path);
+#else
+ if (ioctl(fd, DKIOCINFO, (caddr_t)dki_info) == -1)
+ goto error;
+#endif
return (0);
+error:
+ if (efi_debug)
+ (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
+
+ switch (errno) {
+ case EIO:
+ return (VT_EIO);
+ case EINVAL:
+ return (VT_EINVAL);
+ default:
+ return (VT_ERROR);
+ }
}
/*
@@ -136,12 +254,13 @@ read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
int
efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
{
- diskaddr_t capacity;
- uint_t lbsize;
+ diskaddr_t capacity = 0;
+ uint_t lbsize = 0;
uint_t nblocks;
size_t length;
struct dk_gpt *vptr;
struct uuid uuid;
+ struct dk_cinfo dki_info;
if (read_disk_info(fd, &capacity, &lbsize) != 0) {
if (efi_debug)
@@ -149,6 +268,22 @@ efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
"couldn't read disk information\n");
return (-1);
}
+#if defined(__linux__)
+ if (efi_get_info(fd, &dki_info) != 0) {
+ if (efi_debug)
+ (void) fprintf(stderr,
+ "couldn't read disk information\n");
+ return (-1);
+ }
+
+ if (dki_info.dki_partition != 0)
+ return (-1);
+
+ if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) ||
+ (dki_info.dki_ctype == DKC_VBD) ||
+ (dki_info.dki_ctype == DKC_UNKNOWN))
+ return (-1);
+#endif
nblocks = NBLOCKS(nparts, lbsize);
if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) {
@@ -244,14 +379,138 @@ efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc)
{
void *data = dk_ioc->dki_data;
int error;
+#if defined(__linux__)
+ diskaddr_t capacity;
+ uint_t lbsize;
+
+ /*
+ * When the IO is not being performed in kernel as an ioctl we need
+ * to know the sector size so we can seek to the proper byte offset.
+ */
+ if (read_disk_info(fd, &capacity, &lbsize) == -1) {
+ if (efi_debug)
+ fprintf(stderr,"unable to read disk info: %d",errno);
+
+ errno = EIO;
+ return -1;
+ }
+
+ switch (cmd) {
+ case DKIOCGETEFI:
+ if (lbsize == 0) {
+ if (efi_debug)
+ (void) fprintf(stderr, "DKIOCGETEFI assuming "
+ "LBA %d bytes\n", DEV_BSIZE);
+
+ lbsize = DEV_BSIZE;
+ }
+
+ error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
+ if (error == -1) {
+ if (efi_debug)
+ (void) fprintf(stderr, "DKIOCGETEFI lseek "
+ "error: %d\n", errno);
+ return error;
+ }
+
+ error = read(fd, data, dk_ioc->dki_length);
+ if (error == -1) {
+ if (efi_debug)
+ (void) fprintf(stderr, "DKIOCGETEFI read "
+ "error: %d\n", errno);
+ return error;
+ }
+
+ if (error != dk_ioc->dki_length) {
+ if (efi_debug)
+ (void) fprintf(stderr, "DKIOCGETEFI short "
+ "read of %d bytes\n", error);
+ errno = EIO;
+ return -1;
+ }
+ error = 0;
+ break;
+
+ case DKIOCSETEFI:
+ if (lbsize == 0) {
+ if (efi_debug)
+ (void) fprintf(stderr, "DKIOCSETEFI unknown "
+ "LBA size\n");
+ errno = EIO;
+ return -1;
+ }
+
+ error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
+ if (error == -1) {
+ if (efi_debug)
+ (void) fprintf(stderr, "DKIOCSETEFI lseek "
+ "error: %d\n", errno);
+ return error;
+ }
+
+ error = write(fd, data, dk_ioc->dki_length);
+ if (error == -1) {
+ if (efi_debug)
+ (void) fprintf(stderr, "DKIOCSETEFI write "
+ "error: %d\n", errno);
+ return error;
+ }
+
+ if (error != dk_ioc->dki_length) {
+ if (efi_debug)
+ (void) fprintf(stderr, "DKIOCSETEFI short "
+ "write of %d bytes\n", error);
+ errno = EIO;
+ return -1;
+ }
+ /* Sync the new EFI table to disk */
+ error = fsync(fd);
+ if (error == -1)
+ return error;
+
+ /* Ensure any local disk cache is also flushed */
+ if (ioctl(fd, BLKFLSBUF, 0) == -1)
+ return error;
+
+ error = 0;
+ break;
+
+ default:
+ if (efi_debug)
+ (void) fprintf(stderr, "unsupported ioctl()\n");
+
+ errno = EIO;
+ return -1;
+ }
+#else
dk_ioc->dki_data_64 = (uint64_t)(uintptr_t)data;
error = ioctl(fd, cmd, (void *)dk_ioc);
dk_ioc->dki_data = data;
-
+#endif
return (error);
}
+#if defined(__linux__)
+static int
+efi_rescan(int fd)
+{
+ int retry = 5;
+ int error;
+
+ /* Notify the kernel a devices partition table has been updated */
+ while ((error = ioctl(fd, BLKRRPART)) != 0) {
+ if (--retry == 0) {
+ (void) fprintf(stderr, "the kernel failed to rescan "
+ "the partition table: %d\n", errno);
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+#endif
+
static int
check_label(int fd, dk_efi_t *dk_ioc)
{
@@ -306,6 +565,8 @@ efi_read(int fd, struct dk_gpt *vtoc)
int rval = 0;
int md_flag = 0;
int vdc_flag = 0;
+ diskaddr_t capacity = 0;
+ uint_t lbsize = 0;
struct dk_minfo disk_info;
dk_efi_t dk_ioc;
efi_gpt_t *efi;
@@ -317,19 +578,9 @@ efi_read(int fd, struct dk_gpt *vtoc)
/*
* get the partition number for this file descriptor.
*/
- if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) {
- if (efi_debug) {
- (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
- }
- switch (errno) {
- case EIO:
- return (VT_EIO);
- case EINVAL:
- return (VT_EINVAL);
- default:
- return (VT_ERROR);
- }
- }
+ if ((rval = efi_get_info(fd, &dki_info)) != 0)
+ return rval;
+
if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
(strncmp(dki_info.dki_dname, "md", 3) == 0)) {
md_flag++;
@@ -343,14 +594,18 @@ efi_read(int fd, struct dk_gpt *vtoc)
}
/* get the LBA size */
- if (ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info) == -1) {
+ if (read_disk_info(fd, &capacity, &lbsize) == -1) {
if (efi_debug) {
(void) fprintf(stderr,
- "assuming LBA 512 bytes %d\n",
- errno);
+ "unable to read disk info: %d",
+ errno);
}
- disk_info.dki_lbsize = DEV_BSIZE;
+ return (VT_EINVAL);
}
+
+ disk_info.dki_lbsize = lbsize;
+ disk_info.dki_capacity = capacity;
+
if (disk_info.dki_lbsize == 0) {
if (efi_debug) {
(void) fprintf(stderr,
@@ -375,9 +630,11 @@ efi_read(int fd, struct dk_gpt *vtoc)
}
}
- if ((dk_ioc.dki_data = calloc(label_len, 1)) == NULL)
+ if (posix_memalign((void **)&dk_ioc.dki_data,
+ disk_info.dki_lbsize, label_len))
return (VT_ERROR);
+ memset(dk_ioc.dki_data, 0, label_len);
dk_ioc.dki_length = disk_info.dki_lbsize;
user_length = vtoc->efi_nparts;
efi = dk_ioc.dki_data;
@@ -573,12 +830,14 @@ write_pmbr(int fd, struct dk_gpt *vtoc)
int len;
len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize;
- buf = calloc(len, 1);
+ if (posix_memalign((void **)&buf, len, len))
+ return (VT_ERROR);
/*
* Preserve any boot code and disk signature if the first block is
* already an MBR.
*/
+ memset(buf, 0, len);
dk_ioc.dki_lba = 0;
dk_ioc.dki_length = len;
/* LINTED -- always longlong aligned */
@@ -664,10 +923,9 @@ check_input(struct dk_gpt *vtoc)
if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
(vtoc->efi_parts[i].p_size != 0)) {
if (efi_debug) {
- (void) fprintf(stderr,
-"partition %d is \"unassigned\" but has a size of %llu",
- i,
- vtoc->efi_parts[i].p_size);
+ (void) fprintf(stderr, "partition %d is "
+ "\"unassigned\" but has a size of %llu",
+ i, vtoc->efi_parts[i].p_size);
}
return (VT_EINVAL);
}
@@ -680,9 +938,9 @@ check_input(struct dk_gpt *vtoc)
if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
if (resv_part != -1) {
if (efi_debug) {
- (void) fprintf(stderr,
-"found duplicate reserved partition at %d\n",
- i);
+ (void) fprintf(stderr, "found "
+ "duplicate reserved partition "
+ "at %d\n", i);
}
return (VT_EINVAL);
}
@@ -733,8 +991,8 @@ check_input(struct dk_gpt *vtoc)
(istart <= endsect)) {
if (efi_debug) {
(void) fprintf(stderr,
-"Partition %d overlaps partition %d.",
- i, j);
+ "Partition %d overlaps "
+ "partition %d.", i, j);
}
return (VT_EINVAL);
}
@@ -840,22 +1098,13 @@ efi_write(int fd, struct dk_gpt *vtoc)
efi_gpe_t *efi_parts;
int i, j;
struct dk_cinfo dki_info;
+ int rval;
int md_flag = 0;
int nblocks;
diskaddr_t lba_backup_gpt_hdr;
- if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) {
- if (efi_debug)
- (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
- switch (errno) {
- case EIO:
- return (VT_EIO);
- case EINVAL:
- return (VT_EINVAL);
- default:
- return (VT_ERROR);
- }
- }
+ if ((rval = efi_get_info(fd, &dki_info)) != 0)
+ return rval;
/* check if we are dealing wih a metadevice */
if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
@@ -893,9 +1142,11 @@ efi_write(int fd, struct dk_gpt *vtoc)
* for backup GPT header.
*/
lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks;
- if ((dk_ioc.dki_data = calloc(dk_ioc.dki_length, 1)) == NULL)
+ if (posix_memalign((void **)&dk_ioc.dki_data,
+ vtoc->efi_lbasize, dk_ioc.dki_length))
return (VT_ERROR);
+ memset(dk_ioc.dki_data, 0, dk_ioc.dki_length);
efi = dk_ioc.dki_data;
/* stuff user's input into EFI struct */
@@ -942,6 +1193,10 @@ efi_write(int fd, struct dk_gpt *vtoc)
return (VT_EINVAL);
}
+ /* Zero's should be written for empty partitions */
+ if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
+ continue;
+
efi_parts[i].efi_gpe_StartingLBA =
LE_64(vtoc->efi_parts[i].p_start);
efi_parts[i].efi_gpe_EndingLBA =
@@ -1033,6 +1288,13 @@ efi_write(int fd, struct dk_gpt *vtoc)
/* write the PMBR */
(void) write_pmbr(fd, vtoc);
free(dk_ioc.dki_data);
+
+#if defined(__linux__)
+ rval = efi_rescan(fd);
+ if (rval)
+ return (VT_ERROR);
+#endif
+
return (0);
}
@@ -1050,6 +1312,7 @@ efi_free(struct dk_gpt *ptr)
int
efi_type(int fd)
{
+#if 0
struct vtoc vtoc;
struct extvtoc extvtoc;
@@ -1063,6 +1326,9 @@ efi_type(int fd)
}
}
return (0);
+#else
+ return (ENOSYS);
+#endif
}
void
@@ -1176,7 +1442,7 @@ efi_auto_sense(int fd, struct dk_gpt **vtoc)
return (-1);
}
- for (i = 0; i < min((*vtoc)->efi_nparts, V_NUMPAR); i++) {
+ for (i = 0; i < MIN((*vtoc)->efi_nparts, V_NUMPAR); i++) {
(*vtoc)->efi_parts[i].p_tag = default_vtoc_map[i].p_tag;
(*vtoc)->efi_parts[i].p_flag = default_vtoc_map[i].p_flag;
(*vtoc)->efi_parts[i].p_start = 0;
diff --git a/lib/libnvpair/nvpair_alloc_system.c b/lib/libnvpair/nvpair_alloc_system.c
index f45dc5f0b..af30c1f40 100644
--- a/lib/libnvpair/nvpair_alloc_system.c
+++ b/lib/libnvpair/nvpair_alloc_system.c
@@ -26,7 +26,7 @@
-#include <rpc/types.h>
+#include <sys/kmem.h>
#include <sys/nvpair.h>
static void *
diff --git a/lib/libspl/Makefile.am b/lib/libspl/Makefile.am
new file mode 100644
index 000000000..ed8550c22
--- /dev/null
+++ b/lib/libspl/Makefile.am
@@ -0,0 +1,27 @@
+include $(top_srcdir)/config/Rules.am
+
+SUBDIRS = include $(TARGET_ASM_DIR)
+DIST_SUBDIRS = include asm-generic asm-i386 asm-x86_64
+
+DEFAULT_INCLUDES += \
+ -I${top_srcdir}/lib/libspl/include
+
+AM_CCASFLAGS = \
+ -I${top_srcdir}/lib/libspl/include
+
+lib_LTLIBRARIES = libspl.la
+
+libspl_la_SOURCES = \
+ ${top_srcdir}/lib/libspl/getexecname.c \
+ ${top_srcdir}/lib/libspl/gethrtime.c \
+ ${top_srcdir}/lib/libspl/getmntany.c \
+ ${top_srcdir}/lib/libspl/list.c \
+ ${top_srcdir}/lib/libspl/mkdirp.c \
+ ${top_srcdir}/lib/libspl/strlcat.c \
+ ${top_srcdir}/lib/libspl/strlcpy.c \
+ ${top_srcdir}/lib/libspl/strnlen.c \
+ ${top_srcdir}/lib/libspl/zone.c \
+ ${top_srcdir}/lib/libspl/xdr.c \
+ ${top_srcdir}/lib/libspl/${TARGET_ASM_DIR}/atomic.S \
+ ${top_srcdir}/lib/libspl/include/sys/list.h \
+ ${top_srcdir}/lib/libspl/include/sys/list_impl.h
diff --git a/lib/libspl/asm-generic/Makefile.am b/lib/libspl/asm-generic/Makefile.am
new file mode 100644
index 000000000..4f5032f73
--- /dev/null
+++ b/lib/libspl/asm-generic/Makefile.am
@@ -0,0 +1,18 @@
+include $(top_srcdir)/config/Rules.am
+
+DEFAULT_INCLUDES += \
+ -I${top_srcdir}/lib/libspl/include
+
+atomic_SOURCE = atomic.c
+atomic_ASM = atomic.S
+
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -fPIC
+EXTRA_DIST = ${atomic_SOURCE}
+
+# Generates assembly to simplify inclusion in ../Makefile.am
+all-am:
+ $(COMPILE) -c -S ${atomic_SOURCE} -o ${atomic_ASM}
+
+clean-generic:
+ $(RM) ${atomic_ASM}
diff --git a/lib/libspl/asm-generic/atomic.S b/lib/libspl/asm-generic/atomic.S
new file mode 100644
index 000000000..7550fd92b
--- /dev/null
+++ b/lib/libspl/asm-generic/atomic.S
@@ -0,0 +1,6 @@
+Stub file for 'make dist' distdir rule.
+
+This file is directly referenced by ../Makefile.am as a source
+file and thus will be expected by 'make dist'. To avoid this
+being a problem this stub file was added. It will be overwritten
+at build time based on assmebly generated from atomic.c.
diff --git a/lib/libspl/asm-generic/atomic.c b/lib/libspl/asm-generic/atomic.c
new file mode 100644
index 000000000..de4430f9f
--- /dev/null
+++ b/lib/libspl/asm-generic/atomic.c
@@ -0,0 +1,424 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2009 by Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <atomic.h>
+#include <assert.h>
+#include <pthread.h>
+
+/*
+ * All operations are implemented by serializing them through a global
+ * pthread mutex. This provides a correct generic implementation.
+ * However all supported architectures are encouraged to provide a
+ * native implementation is assembly for performance reasons.
+ */
+pthread_mutex_t atomic_lock = PTHREAD_MUTEX_INITIALIZER;
+
+/*
+ * Theses are the void returning variants
+ */
+
+#define ATOMIC_INC(name, type) \
+ void atomic_inc_##name(volatile type *target) \
+ { \
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \
+ (*target)++; \
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \
+ }
+
+ATOMIC_INC(long, unsigned long)
+ATOMIC_INC(8, uint8_t)
+ATOMIC_INC(uchar, uchar_t)
+ATOMIC_INC(16, uint16_t)
+ATOMIC_INC(ushort, ushort_t)
+ATOMIC_INC(32, uint32_t)
+ATOMIC_INC(uint, uint_t)
+ATOMIC_INC(ulong, ulong_t)
+ATOMIC_INC(64, uint64_t)
+
+
+#define ATOMIC_DEC(name, type) \
+ void atomic_dec_##name(volatile type *target) \
+ { \
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \
+ (*target)--; \
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \
+ }
+
+ATOMIC_DEC(long, unsigned long)
+ATOMIC_DEC(8, uint8_t)
+ATOMIC_DEC(uchar, uchar_t)
+ATOMIC_DEC(16, uint16_t)
+ATOMIC_DEC(ushort, ushort_t)
+ATOMIC_DEC(32, uint32_t)
+ATOMIC_DEC(uint, uint_t)
+ATOMIC_DEC(ulong, ulong_t)
+ATOMIC_DEC(64, uint64_t)
+
+
+#define ATOMIC_ADD(name, type1, type2) \
+ void atomic_add_##name(volatile type1 *target, type2 bits) \
+ { \
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \
+ *target += bits; \
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \
+ }
+
+ATOMIC_ADD(8, uint8_t, int8_t)
+ATOMIC_ADD(char, uchar_t, signed char)
+ATOMIC_ADD(16, uint16_t, int16_t)
+ATOMIC_ADD(short, ushort_t, short)
+ATOMIC_ADD(32, uint32_t, int32_t)
+ATOMIC_ADD(int, uint_t, int)
+ATOMIC_ADD(long, ulong_t, long)
+ATOMIC_ADD(64, uint64_t, int64_t)
+
+void atomic_add_ptr(volatile void *target, ssize_t bits)
+{
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
+ *(caddr_t *)target += bits;
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
+}
+
+
+#define ATOMIC_OR(name, type) \
+ void atomic_or_##name(volatile type *target, type bits) \
+ { \
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \
+ *target |= bits; \
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \
+ }
+
+ATOMIC_OR(8, uint8_t)
+ATOMIC_OR(uchar, uchar_t)
+ATOMIC_OR(16, uint16_t)
+ATOMIC_OR(ushort, ushort_t)
+ATOMIC_OR(32, uint32_t)
+ATOMIC_OR(uint, uint_t)
+ATOMIC_OR(ulong, ulong_t)
+ATOMIC_OR(64, uint64_t)
+
+
+#define ATOMIC_AND(name, type) \
+ void atomic_and_##name(volatile type *target, type bits) \
+ { \
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \
+ *target &= bits; \
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \
+ }
+
+ATOMIC_AND(8, uint8_t)
+ATOMIC_AND(uchar, uchar_t)
+ATOMIC_AND(16, uint16_t)
+ATOMIC_AND(ushort, ushort_t)
+ATOMIC_AND(32, uint32_t)
+ATOMIC_AND(uint, uint_t)
+ATOMIC_AND(ulong, ulong_t)
+ATOMIC_AND(64, uint64_t)
+
+
+/*
+ * New value returning variants
+ */
+
+#define ATOMIC_INC_NV(name, type) \
+ type atomic_inc_##name##_nv(volatile type *target) \
+ { \
+ type rc; \
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \
+ rc = (++(*target)); \
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \
+ return rc; \
+ }
+
+ATOMIC_INC_NV(long, unsigned long)
+ATOMIC_INC_NV(8, uint8_t)
+ATOMIC_INC_NV(uchar, uchar_t)
+ATOMIC_INC_NV(16, uint16_t)
+ATOMIC_INC_NV(ushort, ushort_t)
+ATOMIC_INC_NV(32, uint32_t)
+ATOMIC_INC_NV(uint, uint_t)
+ATOMIC_INC_NV(ulong, ulong_t)
+ATOMIC_INC_NV(64, uint64_t)
+
+
+#define ATOMIC_DEC_NV(name, type) \
+ type atomic_dec_##name##_nv(volatile type *target) \
+ { \
+ type rc; \
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \
+ rc = (--(*target)); \
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \
+ return rc; \
+ }
+
+ATOMIC_DEC_NV(long, unsigned long)
+ATOMIC_DEC_NV(8, uint8_t)
+ATOMIC_DEC_NV(uchar, uchar_t)
+ATOMIC_DEC_NV(16, uint16_t)
+ATOMIC_DEC_NV(ushort, ushort_t)
+ATOMIC_DEC_NV(32, uint32_t)
+ATOMIC_DEC_NV(uint, uint_t)
+ATOMIC_DEC_NV(ulong, ulong_t)
+ATOMIC_DEC_NV(64, uint64_t)
+
+
+#define ATOMIC_ADD_NV(name, type1, type2) \
+ type1 atomic_add_##name##_nv(volatile type1 *target, type2 bits)\
+ { \
+ type1 rc; \
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \
+ rc = (*target += bits); \
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \
+ return rc; \
+ }
+
+ATOMIC_ADD_NV(8, uint8_t, int8_t)
+ATOMIC_ADD_NV(char, uchar_t, signed char)
+ATOMIC_ADD_NV(16, uint16_t, int16_t)
+ATOMIC_ADD_NV(short, ushort_t, short)
+ATOMIC_ADD_NV(32, uint32_t, int32_t)
+ATOMIC_ADD_NV(int, uint_t, int)
+ATOMIC_ADD_NV(long, ulong_t, long)
+ATOMIC_ADD_NV(64, uint64_t, int64_t)
+
+void *atomic_add_ptr_nv(volatile void *target, ssize_t bits)
+{
+ void *ptr;
+
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
+ ptr = (*(caddr_t *)target += bits);
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
+
+ return ptr;
+}
+
+
+#define ATOMIC_OR_NV(name, type) \
+ type atomic_or_##name##_nv(volatile type *target, type bits) \
+ { \
+ type rc; \
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \
+ rc = (*target |= bits); \
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \
+ return rc; \
+ }
+
+ATOMIC_OR_NV(long, unsigned long)
+ATOMIC_OR_NV(8, uint8_t)
+ATOMIC_OR_NV(uchar, uchar_t)
+ATOMIC_OR_NV(16, uint16_t)
+ATOMIC_OR_NV(ushort, ushort_t)
+ATOMIC_OR_NV(32, uint32_t)
+ATOMIC_OR_NV(uint, uint_t)
+ATOMIC_OR_NV(ulong, ulong_t)
+ATOMIC_OR_NV(64, uint64_t)
+
+
+#define ATOMIC_AND_NV(name, type) \
+ type atomic_and_##name##_nv(volatile type *target, type bits) \
+ { \
+ type rc; \
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \
+ rc = (*target &= bits); \
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \
+ return rc; \
+ }
+
+ATOMIC_AND_NV(long, unsigned long)
+ATOMIC_AND_NV(8, uint8_t)
+ATOMIC_AND_NV(uchar, uchar_t)
+ATOMIC_AND_NV(16, uint16_t)
+ATOMIC_AND_NV(ushort, ushort_t)
+ATOMIC_AND_NV(32, uint32_t)
+ATOMIC_AND_NV(uint, uint_t)
+ATOMIC_AND_NV(ulong, ulong_t)
+ATOMIC_AND_NV(64, uint64_t)
+
+
+/*
+ * If *arg1 == arg2, set *arg1 = arg3; return old value
+ */
+
+#define ATOMIC_CAS(name, type) \
+ type atomic_cas_##name(volatile type *target, type arg1, type arg2) \
+ { \
+ type old; \
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \
+ old = *target; \
+ if (old == arg1) \
+ *target = arg2; \
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \
+ return old; \
+ }
+
+ATOMIC_CAS(8, uint8_t)
+ATOMIC_CAS(uchar, uchar_t)
+ATOMIC_CAS(16, uint16_t)
+ATOMIC_CAS(ushort, ushort_t)
+ATOMIC_CAS(32, uint32_t)
+ATOMIC_CAS(uint, uint_t)
+ATOMIC_CAS(ulong, ulong_t)
+ATOMIC_CAS(64, uint64_t)
+
+void *atomic_cas_ptr(volatile void *target, void *arg1, void *arg2)
+{
+ void *old;
+
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
+ old = *(void **)target;
+ if (old == arg1)
+ *(void **)target = arg2;
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
+
+ return old;
+}
+
+
+/*
+ * Swap target and return old value
+ */
+
+#define ATOMIC_SWAP(name, type) \
+ type atomic_swap_##name(volatile type *target, type bits) \
+ { \
+ type old; \
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \
+ old = *target; \
+ *target = bits; \
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \
+ return old; \
+ }
+
+ATOMIC_SWAP(8, uint8_t)
+ATOMIC_SWAP(uchar, uchar_t)
+ATOMIC_SWAP(16, uint16_t)
+ATOMIC_SWAP(ushort, ushort_t)
+ATOMIC_SWAP(32, uint32_t)
+ATOMIC_SWAP(uint, uint_t)
+ATOMIC_SWAP(ulong, ulong_t)
+ATOMIC_SWAP(64, uint64_t)
+
+void *atomic_swap_ptr(volatile void *target, void *bits)
+{
+ void *old;
+
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
+ old = *(void **)target;
+ *(void **)target = bits;
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
+
+ return old;
+}
+
+
+int atomic_set_long_excl(volatile ulong_t *target, uint_t value)
+{
+ ulong_t bit;
+
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
+ bit = (1UL << value);
+ if ((*target & bit) != 0) {
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
+ return -1;
+ }
+ *target |= bit;
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
+
+ return 0;
+}
+
+int atomic_clear_long_excl(volatile ulong_t *target, uint_t value)
+{
+ ulong_t bit;
+
+ VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
+ bit = (1UL << value);
+ if ((*target & bit) != 0) {
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
+ return -1;
+ }
+ *target &= ~bit;
+ VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
+
+ return 0;
+}
+
+void membar_enter(void)
+{
+ /* XXX - Implement me */
+}
+
+void membar_exit(void)
+{
+ /* XXX - Implement me */
+}
+
+void membar_producer(void)
+{
+ /* XXX - Implement me */
+}
+
+void membar_consumer(void)
+{
+ /* XXX - Implement me */
+}
+
+/* Legacy kernel interfaces; they will go away (eventually). */
+
+uint8_t cas8(uint8_t *target, uint8_t arg1, uint8_t arg2)
+{
+ return atomic_cas_8(target, arg1, arg2);
+}
+
+uint32_t cas32(uint32_t *target, uint32_t arg1, uint32_t arg2)
+{
+ return atomic_cas_32(target, arg1, arg2);
+}
+
+uint64_t cas64(uint64_t *target, uint64_t arg1, uint64_t arg2)
+{
+ return atomic_cas_64(target, arg1, arg2);
+}
+
+ulong_t caslong(ulong_t *target, ulong_t arg1, ulong_t arg2)
+{
+ return atomic_cas_ulong(target, arg1, arg2);
+}
+
+void *casptr(void *target, void *arg1, void *arg2)
+{
+ return atomic_cas_ptr(target, arg1, arg2);
+}
+
+void atomic_and_long(ulong_t *target, ulong_t bits)
+{
+ return atomic_and_ulong(target, bits);
+}
+
+void atomic_or_long(ulong_t *target, ulong_t bits)
+{
+ return atomic_or_ulong(target, bits);
+}
diff --git a/lib/libspl/asm-i386/Makefile.am b/lib/libspl/asm-i386/Makefile.am
new file mode 100644
index 000000000..02403eceb
--- /dev/null
+++ b/lib/libspl/asm-i386/Makefile.am
@@ -0,0 +1 @@
+noinst_HEADERS = *.S
diff --git a/lib/libspl/asm-i386/atomic.S b/lib/libspl/asm-i386/atomic.S
new file mode 100644
index 000000000..93c04bfb8
--- /dev/null
+++ b/lib/libspl/asm-i386/atomic.S
@@ -0,0 +1,730 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "%Z%%M% %I% %E% SMI"
+
+ .file "%M%"
+
+#define _ASM
+#include <ia32/sys/asm_linkage.h>
+
+ ENTRY(atomic_inc_8)
+ ALTENTRY(atomic_inc_uchar)
+ movl 4(%esp), %eax
+ lock
+ incb (%eax)
+ ret
+ SET_SIZE(atomic_inc_uchar)
+ SET_SIZE(atomic_inc_8)
+
+ ENTRY(atomic_inc_16)
+ ALTENTRY(atomic_inc_ushort)
+ movl 4(%esp), %eax
+ lock
+ incw (%eax)
+ ret
+ SET_SIZE(atomic_inc_ushort)
+ SET_SIZE(atomic_inc_16)
+
+ ENTRY(atomic_inc_32)
+ ALTENTRY(atomic_inc_uint)
+ ALTENTRY(atomic_inc_ulong)
+ movl 4(%esp), %eax
+ lock
+ incl (%eax)
+ ret
+ SET_SIZE(atomic_inc_ulong)
+ SET_SIZE(atomic_inc_uint)
+ SET_SIZE(atomic_inc_32)
+
+ ENTRY(atomic_inc_8_nv)
+ ALTENTRY(atomic_inc_uchar_nv)
+ movl 4(%esp), %edx
+ movb (%edx), %al
+1:
+ leal 1(%eax), %ecx
+ lock
+ cmpxchgb %cl, (%edx)
+ jne 1b
+ movzbl %cl, %eax
+ ret
+ SET_SIZE(atomic_inc_uchar_nv)
+ SET_SIZE(atomic_inc_8_nv)
+
+ ENTRY(atomic_inc_16_nv)
+ ALTENTRY(atomic_inc_ushort_nv)
+ movl 4(%esp), %edx
+ movw (%edx), %ax
+1:
+ leal 1(%eax), %ecx
+ lock
+ cmpxchgw %cx, (%edx)
+ jne 1b
+ movzwl %cx, %eax
+ ret
+ SET_SIZE(atomic_inc_ushort_nv)
+ SET_SIZE(atomic_inc_16_nv)
+
+ ENTRY(atomic_inc_32_nv)
+ ALTENTRY(atomic_inc_uint_nv)
+ ALTENTRY(atomic_inc_ulong_nv)
+ movl 4(%esp), %edx
+ movl (%edx), %eax
+1:
+ leal 1(%eax), %ecx
+ lock
+ cmpxchgl %ecx, (%edx)
+ jne 1b
+ movl %ecx, %eax
+ ret
+ SET_SIZE(atomic_inc_ulong_nv)
+ SET_SIZE(atomic_inc_uint_nv)
+ SET_SIZE(atomic_inc_32_nv)
+
+ /*
+ * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
+ * separated, you need to also edit the libc i386 platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_inc_64_nv.
+ */
+ ENTRY(atomic_inc_64)
+ ALTENTRY(atomic_inc_64_nv)
+ pushl %edi
+ pushl %ebx
+ movl 12(%esp), %edi
+ movl (%edi), %eax
+ movl 4(%edi), %edx
+1:
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ incl %ebx
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ lock
+ cmpxchg8b (%edi)
+ jne 1b
+ movl %ebx, %eax
+ movl %ecx, %edx
+ popl %ebx
+ popl %edi
+ ret
+ SET_SIZE(atomic_inc_64_nv)
+ SET_SIZE(atomic_inc_64)
+
+ ENTRY(atomic_dec_8)
+ ALTENTRY(atomic_dec_uchar)
+ movl 4(%esp), %eax
+ lock
+ decb (%eax)
+ ret
+ SET_SIZE(atomic_dec_uchar)
+ SET_SIZE(atomic_dec_8)
+
+ ENTRY(atomic_dec_16)
+ ALTENTRY(atomic_dec_ushort)
+ movl 4(%esp), %eax
+ lock
+ decw (%eax)
+ ret
+ SET_SIZE(atomic_dec_ushort)
+ SET_SIZE(atomic_dec_16)
+
+ ENTRY(atomic_dec_32)
+ ALTENTRY(atomic_dec_uint)
+ ALTENTRY(atomic_dec_ulong)
+ movl 4(%esp), %eax
+ lock
+ decl (%eax)
+ ret
+ SET_SIZE(atomic_dec_ulong)
+ SET_SIZE(atomic_dec_uint)
+ SET_SIZE(atomic_dec_32)
+
+ ENTRY(atomic_dec_8_nv)
+ ALTENTRY(atomic_dec_uchar_nv)
+ movl 4(%esp), %edx
+ movb (%edx), %al
+1:
+ leal -1(%eax), %ecx
+ lock
+ cmpxchgb %cl, (%edx)
+ jne 1b
+ movzbl %cl, %eax
+ ret
+ SET_SIZE(atomic_dec_uchar_nv)
+ SET_SIZE(atomic_dec_8_nv)
+
+ ENTRY(atomic_dec_16_nv)
+ ALTENTRY(atomic_dec_ushort_nv)
+ movl 4(%esp), %edx
+ movw (%edx), %ax
+1:
+ leal -1(%eax), %ecx
+ lock
+ cmpxchgw %cx, (%edx)
+ jne 1b
+ movzwl %cx, %eax
+ ret
+ SET_SIZE(atomic_dec_ushort_nv)
+ SET_SIZE(atomic_dec_16_nv)
+
+ ENTRY(atomic_dec_32_nv)
+ ALTENTRY(atomic_dec_uint_nv)
+ ALTENTRY(atomic_dec_ulong_nv)
+ movl 4(%esp), %edx
+ movl (%edx), %eax
+1:
+ leal -1(%eax), %ecx
+ lock
+ cmpxchgl %ecx, (%edx)
+ jne 1b
+ movl %ecx, %eax
+ ret
+ SET_SIZE(atomic_dec_ulong_nv)
+ SET_SIZE(atomic_dec_uint_nv)
+ SET_SIZE(atomic_dec_32_nv)
+
+ /*
+ * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
+ * separated, it is important to edit the libc i386 platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_dec_64_nv.
+ */
+ ENTRY(atomic_dec_64)
+ ALTENTRY(atomic_dec_64_nv)
+ pushl %edi
+ pushl %ebx
+ movl 12(%esp), %edi
+ movl (%edi), %eax
+ movl 4(%edi), %edx
+1:
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ not %ecx
+ not %ebx
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ lock
+ cmpxchg8b (%edi)
+ jne 1b
+ movl %ebx, %eax
+ movl %ecx, %edx
+ popl %ebx
+ popl %edi
+ ret
+ SET_SIZE(atomic_dec_64_nv)
+ SET_SIZE(atomic_dec_64)
+
+ ENTRY(atomic_add_8)
+ ALTENTRY(atomic_add_char)
+ movl 4(%esp), %eax
+ movl 8(%esp), %ecx
+ lock
+ addb %cl, (%eax)
+ ret
+ SET_SIZE(atomic_add_char)
+ SET_SIZE(atomic_add_8)
+
+ ENTRY(atomic_add_16)
+ ALTENTRY(atomic_add_short)
+ movl 4(%esp), %eax
+ movl 8(%esp), %ecx
+ lock
+ addw %cx, (%eax)
+ ret
+ SET_SIZE(atomic_add_short)
+ SET_SIZE(atomic_add_16)
+
+ ENTRY(atomic_add_32)
+ ALTENTRY(atomic_add_int)
+ ALTENTRY(atomic_add_ptr)
+ ALTENTRY(atomic_add_long)
+ movl 4(%esp), %eax
+ movl 8(%esp), %ecx
+ lock
+ addl %ecx, (%eax)
+ ret
+ SET_SIZE(atomic_add_long)
+ SET_SIZE(atomic_add_ptr)
+ SET_SIZE(atomic_add_int)
+ SET_SIZE(atomic_add_32)
+
+ ENTRY(atomic_or_8)
+ ALTENTRY(atomic_or_uchar)
+ movl 4(%esp), %eax
+ movb 8(%esp), %cl
+ lock
+ orb %cl, (%eax)
+ ret
+ SET_SIZE(atomic_or_uchar)
+ SET_SIZE(atomic_or_8)
+
+ ENTRY(atomic_or_16)
+ ALTENTRY(atomic_or_ushort)
+ movl 4(%esp), %eax
+ movw 8(%esp), %cx
+ lock
+ orw %cx, (%eax)
+ ret
+ SET_SIZE(atomic_or_ushort)
+ SET_SIZE(atomic_or_16)
+
+ ENTRY(atomic_or_32)
+ ALTENTRY(atomic_or_uint)
+ ALTENTRY(atomic_or_ulong)
+ movl 4(%esp), %eax
+ movl 8(%esp), %ecx
+ lock
+ orl %ecx, (%eax)
+ ret
+ SET_SIZE(atomic_or_ulong)
+ SET_SIZE(atomic_or_uint)
+ SET_SIZE(atomic_or_32)
+
+ ENTRY(atomic_and_8)
+ ALTENTRY(atomic_and_uchar)
+ movl 4(%esp), %eax
+ movb 8(%esp), %cl
+ lock
+ andb %cl, (%eax)
+ ret
+ SET_SIZE(atomic_and_uchar)
+ SET_SIZE(atomic_and_8)
+
+ ENTRY(atomic_and_16)
+ ALTENTRY(atomic_and_ushort)
+ movl 4(%esp), %eax
+ movw 8(%esp), %cx
+ lock
+ andw %cx, (%eax)
+ ret
+ SET_SIZE(atomic_and_ushort)
+ SET_SIZE(atomic_and_16)
+
+ ENTRY(atomic_and_32)
+ ALTENTRY(atomic_and_uint)
+ ALTENTRY(atomic_and_ulong)
+ movl 4(%esp), %eax
+ movl 8(%esp), %ecx
+ lock
+ andl %ecx, (%eax)
+ ret
+ SET_SIZE(atomic_and_ulong)
+ SET_SIZE(atomic_and_uint)
+ SET_SIZE(atomic_and_32)
+
+ ENTRY(atomic_add_8_nv)
+ ALTENTRY(atomic_add_char_nv)
+ movl 4(%esp), %edx
+ movb (%edx), %al
+1:
+ movl 8(%esp), %ecx
+ addb %al, %cl
+ lock
+ cmpxchgb %cl, (%edx)
+ jne 1b
+ movzbl %cl, %eax
+ ret
+ SET_SIZE(atomic_add_char_nv)
+ SET_SIZE(atomic_add_8_nv)
+
+ ENTRY(atomic_add_16_nv)
+ ALTENTRY(atomic_add_short_nv)
+ movl 4(%esp), %edx
+ movw (%edx), %ax
+1:
+ movl 8(%esp), %ecx
+ addw %ax, %cx
+ lock
+ cmpxchgw %cx, (%edx)
+ jne 1b
+ movzwl %cx, %eax
+ ret
+ SET_SIZE(atomic_add_short_nv)
+ SET_SIZE(atomic_add_16_nv)
+
+ ENTRY(atomic_add_32_nv)
+ ALTENTRY(atomic_add_int_nv)
+ ALTENTRY(atomic_add_ptr_nv)
+ ALTENTRY(atomic_add_long_nv)
+ movl 4(%esp), %edx
+ movl (%edx), %eax
+1:
+ movl 8(%esp), %ecx
+ addl %eax, %ecx
+ lock
+ cmpxchgl %ecx, (%edx)
+ jne 1b
+ movl %ecx, %eax
+ ret
+ SET_SIZE(atomic_add_long_nv)
+ SET_SIZE(atomic_add_ptr_nv)
+ SET_SIZE(atomic_add_int_nv)
+ SET_SIZE(atomic_add_32_nv)
+
+ /*
+ * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
+ * separated, it is important to edit the libc i386 platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_add_64_nv.
+ */
+ ENTRY(atomic_add_64)
+ ALTENTRY(atomic_add_64_nv)
+ pushl %edi
+ pushl %ebx
+ movl 12(%esp), %edi
+ movl (%edi), %eax
+ movl 4(%edi), %edx
+1:
+ movl 16(%esp), %ebx
+ movl 20(%esp), %ecx
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ lock
+ cmpxchg8b (%edi)
+ jne 1b
+ movl %ebx, %eax
+ movl %ecx, %edx
+ popl %ebx
+ popl %edi
+ ret
+ SET_SIZE(atomic_add_64_nv)
+ SET_SIZE(atomic_add_64)
+
+ ENTRY(atomic_or_8_nv)
+ ALTENTRY(atomic_or_uchar_nv)
+ movl 4(%esp), %edx
+ movb (%edx), %al
+1:
+ movl 8(%esp), %ecx
+ orb %al, %cl
+ lock
+ cmpxchgb %cl, (%edx)
+ jne 1b
+ movzbl %cl, %eax
+ ret
+ SET_SIZE(atomic_or_uchar_nv)
+ SET_SIZE(atomic_or_8_nv)
+
+ ENTRY(atomic_or_16_nv)
+ ALTENTRY(atomic_or_ushort_nv)
+ movl 4(%esp), %edx
+ movw (%edx), %ax
+1:
+ movl 8(%esp), %ecx
+ orw %ax, %cx
+ lock
+ cmpxchgw %cx, (%edx)
+ jne 1b
+ movzwl %cx, %eax
+ ret
+ SET_SIZE(atomic_or_ushort_nv)
+ SET_SIZE(atomic_or_16_nv)
+
+ ENTRY(atomic_or_32_nv)
+ ALTENTRY(atomic_or_uint_nv)
+ ALTENTRY(atomic_or_ulong_nv)
+ movl 4(%esp), %edx
+ movl (%edx), %eax
+1:
+ movl 8(%esp), %ecx
+ orl %eax, %ecx
+ lock
+ cmpxchgl %ecx, (%edx)
+ jne 1b
+ movl %ecx, %eax
+ ret
+ SET_SIZE(atomic_or_ulong_nv)
+ SET_SIZE(atomic_or_uint_nv)
+ SET_SIZE(atomic_or_32_nv)
+
+ /*
+ * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
+ * separated, it is important to edit the libc i386 platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_or_64_nv.
+ */
+ ENTRY(atomic_or_64)
+ ALTENTRY(atomic_or_64_nv)
+ pushl %edi
+ pushl %ebx
+ movl 12(%esp), %edi
+ movl (%edi), %eax
+ movl 4(%edi), %edx
+1:
+ movl 16(%esp), %ebx
+ movl 20(%esp), %ecx
+ orl %eax, %ebx
+ orl %edx, %ecx
+ lock
+ cmpxchg8b (%edi)
+ jne 1b
+ movl %ebx, %eax
+ movl %ecx, %edx
+ popl %ebx
+ popl %edi
+ ret
+ SET_SIZE(atomic_or_64_nv)
+ SET_SIZE(atomic_or_64)
+
+ ENTRY(atomic_and_8_nv)
+ ALTENTRY(atomic_and_uchar_nv)
+ movl 4(%esp), %edx
+ movb (%edx), %al
+1:
+ movl 8(%esp), %ecx
+ andb %al, %cl
+ lock
+ cmpxchgb %cl, (%edx)
+ jne 1b
+ movzbl %cl, %eax
+ ret
+ SET_SIZE(atomic_and_uchar_nv)
+ SET_SIZE(atomic_and_8_nv)
+
+ ENTRY(atomic_and_16_nv)
+ ALTENTRY(atomic_and_ushort_nv)
+ movl 4(%esp), %edx
+ movw (%edx), %ax
+1:
+ movl 8(%esp), %ecx
+ andw %ax, %cx
+ lock
+ cmpxchgw %cx, (%edx)
+ jne 1b
+ movzwl %cx, %eax
+ ret
+ SET_SIZE(atomic_and_ushort_nv)
+ SET_SIZE(atomic_and_16_nv)
+
+ ENTRY(atomic_and_32_nv)
+ ALTENTRY(atomic_and_uint_nv)
+ ALTENTRY(atomic_and_ulong_nv)
+ movl 4(%esp), %edx
+ movl (%edx), %eax
+1:
+ movl 8(%esp), %ecx
+ andl %eax, %ecx
+ lock
+ cmpxchgl %ecx, (%edx)
+ jne 1b
+ movl %ecx, %eax
+ ret
+ SET_SIZE(atomic_and_ulong_nv)
+ SET_SIZE(atomic_and_uint_nv)
+ SET_SIZE(atomic_and_32_nv)
+
+ /*
+ * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
+ * separated, it is important to edit the libc i386 platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_and_64_nv.
+ */
+ ENTRY(atomic_and_64)
+ ALTENTRY(atomic_and_64_nv)
+ pushl %edi
+ pushl %ebx
+ movl 12(%esp), %edi
+ movl (%edi), %eax
+ movl 4(%edi), %edx
+1:
+ movl 16(%esp), %ebx
+ movl 20(%esp), %ecx
+ andl %eax, %ebx
+ andl %edx, %ecx
+ lock
+ cmpxchg8b (%edi)
+ jne 1b
+ movl %ebx, %eax
+ movl %ecx, %edx
+ popl %ebx
+ popl %edi
+ ret
+ SET_SIZE(atomic_and_64_nv)
+ SET_SIZE(atomic_and_64)
+
+ ENTRY(atomic_cas_8)
+ ALTENTRY(atomic_cas_uchar)
+ movl 4(%esp), %edx
+ movzbl 8(%esp), %eax
+ movb 12(%esp), %cl
+ lock
+ cmpxchgb %cl, (%edx)
+ ret
+ SET_SIZE(atomic_cas_uchar)
+ SET_SIZE(atomic_cas_8)
+
+ ENTRY(atomic_cas_16)
+ ALTENTRY(atomic_cas_ushort)
+ movl 4(%esp), %edx
+ movzwl 8(%esp), %eax
+ movw 12(%esp), %cx
+ lock
+ cmpxchgw %cx, (%edx)
+ ret
+ SET_SIZE(atomic_cas_ushort)
+ SET_SIZE(atomic_cas_16)
+
+ ENTRY(atomic_cas_32)
+ ALTENTRY(atomic_cas_uint)
+ ALTENTRY(atomic_cas_ulong)
+ ALTENTRY(atomic_cas_ptr)
+ movl 4(%esp), %edx
+ movl 8(%esp), %eax
+ movl 12(%esp), %ecx
+ lock
+ cmpxchgl %ecx, (%edx)
+ ret
+ SET_SIZE(atomic_cas_ptr)
+ SET_SIZE(atomic_cas_ulong)
+ SET_SIZE(atomic_cas_uint)
+ SET_SIZE(atomic_cas_32)
+
+ ENTRY(atomic_cas_64)
+ pushl %ebx
+ pushl %esi
+ movl 12(%esp), %esi
+ movl 16(%esp), %eax
+ movl 20(%esp), %edx
+ movl 24(%esp), %ebx
+ movl 28(%esp), %ecx
+ lock
+ cmpxchg8b (%esi)
+ popl %esi
+ popl %ebx
+ ret
+ SET_SIZE(atomic_cas_64)
+
+ ENTRY(atomic_swap_8)
+ ALTENTRY(atomic_swap_uchar)
+ movl 4(%esp), %edx
+ movzbl 8(%esp), %eax
+ lock
+ xchgb %al, (%edx)
+ ret
+ SET_SIZE(atomic_swap_uchar)
+ SET_SIZE(atomic_swap_8)
+
+ ENTRY(atomic_swap_16)
+ ALTENTRY(atomic_swap_ushort)
+ movl 4(%esp), %edx
+ movzwl 8(%esp), %eax
+ lock
+ xchgw %ax, (%edx)
+ ret
+ SET_SIZE(atomic_swap_ushort)
+ SET_SIZE(atomic_swap_16)
+
+ ENTRY(atomic_swap_32)
+ ALTENTRY(atomic_swap_uint)
+ ALTENTRY(atomic_swap_ptr)
+ ALTENTRY(atomic_swap_ulong)
+ movl 4(%esp), %edx
+ movl 8(%esp), %eax
+ lock
+ xchgl %eax, (%edx)
+ ret
+ SET_SIZE(atomic_swap_ulong)
+ SET_SIZE(atomic_swap_ptr)
+ SET_SIZE(atomic_swap_uint)
+ SET_SIZE(atomic_swap_32)
+
+ ENTRY(atomic_swap_64)
+ pushl %esi
+ pushl %ebx
+ movl 12(%esp), %esi
+ movl 16(%esp), %ebx
+ movl 20(%esp), %ecx
+ movl (%esi), %eax
+ movl 4(%esi), %edx
+1:
+ lock
+ cmpxchg8b (%esi)
+ jne 1b
+ popl %ebx
+ popl %esi
+ ret
+ SET_SIZE(atomic_swap_64)
+
+ ENTRY(atomic_set_long_excl)
+ movl 4(%esp), %edx
+ movl 8(%esp), %ecx
+ xorl %eax, %eax
+ lock
+ btsl %ecx, (%edx)
+ jnc 1f
+ decl %eax
+1:
+ ret
+ SET_SIZE(atomic_set_long_excl)
+
+ ENTRY(atomic_clear_long_excl)
+ movl 4(%esp), %edx
+ movl 8(%esp), %ecx
+ xorl %eax, %eax
+ lock
+ btrl %ecx, (%edx)
+ jc 1f
+ decl %eax
+1:
+ ret
+ SET_SIZE(atomic_clear_long_excl)
+
+ /*
+ * NOTE: membar_enter, membar_exit, membar_producer, and
+ * membar_consumer are all identical routines. We define them
+ * separately, instead of using ALTENTRY definitions to alias them
+ * together, so that DTrace and debuggers will see a unique address
+ * for them, allowing more accurate tracing.
+ */
+
+
+ ENTRY(membar_enter)
+ lock
+ xorl $0, (%esp)
+ ret
+ SET_SIZE(membar_enter)
+
+ ENTRY(membar_exit)
+ lock
+ xorl $0, (%esp)
+ ret
+ SET_SIZE(membar_exit)
+
+ ENTRY(membar_producer)
+ lock
+ xorl $0, (%esp)
+ ret
+ SET_SIZE(membar_producer)
+
+ ENTRY(membar_consumer)
+ lock
+ xorl $0, (%esp)
+ ret
+ SET_SIZE(membar_consumer)
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/lib/libspl/asm-x86_64/Makefile.am b/lib/libspl/asm-x86_64/Makefile.am
new file mode 100644
index 000000000..02403eceb
--- /dev/null
+++ b/lib/libspl/asm-x86_64/Makefile.am
@@ -0,0 +1 @@
+noinst_HEADERS = *.S
diff --git a/lib/libspl/asm-x86_64/atomic.S b/lib/libspl/asm-x86_64/atomic.S
new file mode 100644
index 000000000..e321bf732
--- /dev/null
+++ b/lib/libspl/asm-x86_64/atomic.S
@@ -0,0 +1,595 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "%Z%%M% %I% %E% SMI"
+
+ .file "%M%"
+
+#define _ASM
+#include <ia32/sys/asm_linkage.h>
+
+ ENTRY(atomic_inc_8)
+ ALTENTRY(atomic_inc_uchar)
+ lock
+ incb (%rdi)
+ ret
+ SET_SIZE(atomic_inc_uchar)
+ SET_SIZE(atomic_inc_8)
+
+ ENTRY(atomic_inc_16)
+ ALTENTRY(atomic_inc_ushort)
+ lock
+ incw (%rdi)
+ ret
+ SET_SIZE(atomic_inc_ushort)
+ SET_SIZE(atomic_inc_16)
+
+ ENTRY(atomic_inc_32)
+ ALTENTRY(atomic_inc_uint)
+ lock
+ incl (%rdi)
+ ret
+ SET_SIZE(atomic_inc_uint)
+ SET_SIZE(atomic_inc_32)
+
+ ENTRY(atomic_inc_64)
+ ALTENTRY(atomic_inc_ulong)
+ lock
+ incq (%rdi)
+ ret
+ SET_SIZE(atomic_inc_ulong)
+ SET_SIZE(atomic_inc_64)
+
+ ENTRY(atomic_inc_8_nv)
+ ALTENTRY(atomic_inc_uchar_nv)
+ movb (%rdi), %al
+1:
+ leaq 1(%rax), %rcx
+ lock
+ cmpxchgb %cl, (%rdi)
+ jne 1b
+ movzbl %cl, %eax
+ ret
+ SET_SIZE(atomic_inc_uchar_nv)
+ SET_SIZE(atomic_inc_8_nv)
+
+ ENTRY(atomic_inc_16_nv)
+ ALTENTRY(atomic_inc_ushort_nv)
+ movw (%rdi), %ax
+1:
+ leaq 1(%rax), %rcx
+ lock
+ cmpxchgw %cx, (%rdi)
+ jne 1b
+ movzwl %cx, %eax
+ ret
+ SET_SIZE(atomic_inc_ushort_nv)
+ SET_SIZE(atomic_inc_16_nv)
+
+ ENTRY(atomic_inc_32_nv)
+ ALTENTRY(atomic_inc_uint_nv)
+ movl (%rdi), %eax
+1:
+ leaq 1(%rax), %rcx
+ lock
+ cmpxchgl %ecx, (%rdi)
+ jne 1b
+ movl %ecx, %eax
+ ret
+ SET_SIZE(atomic_inc_uint_nv)
+ SET_SIZE(atomic_inc_32_nv)
+
+ ENTRY(atomic_inc_64_nv)
+ ALTENTRY(atomic_inc_ulong_nv)
+ movq (%rdi), %rax
+1:
+ leaq 1(%rax), %rcx
+ lock
+ cmpxchgq %rcx, (%rdi)
+ jne 1b
+ movq %rcx, %rax
+ ret
+ SET_SIZE(atomic_inc_ulong_nv)
+ SET_SIZE(atomic_inc_64_nv)
+
+ ENTRY(atomic_dec_8)
+ ALTENTRY(atomic_dec_uchar)
+ lock
+ decb (%rdi)
+ ret
+ SET_SIZE(atomic_dec_uchar)
+ SET_SIZE(atomic_dec_8)
+
+ ENTRY(atomic_dec_16)
+ ALTENTRY(atomic_dec_ushort)
+ lock
+ decw (%rdi)
+ ret
+ SET_SIZE(atomic_dec_ushort)
+ SET_SIZE(atomic_dec_16)
+
+ ENTRY(atomic_dec_32)
+ ALTENTRY(atomic_dec_uint)
+ lock
+ decl (%rdi)
+ ret
+ SET_SIZE(atomic_dec_uint)
+ SET_SIZE(atomic_dec_32)
+
+ ENTRY(atomic_dec_64)
+ ALTENTRY(atomic_dec_ulong)
+ lock
+ decq (%rdi)
+ ret
+ SET_SIZE(atomic_dec_ulong)
+ SET_SIZE(atomic_dec_64)
+
+ ENTRY(atomic_dec_8_nv)
+ ALTENTRY(atomic_dec_uchar_nv)
+ movb (%rdi), %al
+1:
+ leaq -1(%rax), %rcx
+ lock
+ cmpxchgb %cl, (%rdi)
+ jne 1b
+ movzbl %cl, %eax
+ ret
+ SET_SIZE(atomic_dec_uchar_nv)
+ SET_SIZE(atomic_dec_8_nv)
+
+ ENTRY(atomic_dec_16_nv)
+ ALTENTRY(atomic_dec_ushort_nv)
+ movw (%rdi), %ax
+1:
+ leaq -1(%rax), %rcx
+ lock
+ cmpxchgw %cx, (%rdi)
+ jne 1b
+ movzwl %cx, %eax
+ ret
+ SET_SIZE(atomic_dec_ushort_nv)
+ SET_SIZE(atomic_dec_16_nv)
+
+ ENTRY(atomic_dec_32_nv)
+ ALTENTRY(atomic_dec_uint_nv)
+ movl (%rdi), %eax
+1:
+ leaq -1(%rax), %rcx
+ lock
+ cmpxchgl %ecx, (%rdi)
+ jne 1b
+ movl %ecx, %eax
+ ret
+ SET_SIZE(atomic_dec_uint_nv)
+ SET_SIZE(atomic_dec_32_nv)
+
+ ENTRY(atomic_dec_64_nv)
+ ALTENTRY(atomic_dec_ulong_nv)
+ movq (%rdi), %rax
+1:
+ leaq -1(%rax), %rcx
+ lock
+ cmpxchgq %rcx, (%rdi)
+ jne 1b
+ movq %rcx, %rax
+ ret
+ SET_SIZE(atomic_dec_ulong_nv)
+ SET_SIZE(atomic_dec_64_nv)
+
+ ENTRY(atomic_add_8)
+ ALTENTRY(atomic_add_char)
+ lock
+ addb %sil, (%rdi)
+ ret
+ SET_SIZE(atomic_add_char)
+ SET_SIZE(atomic_add_8)
+
+ ENTRY(atomic_add_16)
+ ALTENTRY(atomic_add_short)
+ lock
+ addw %si, (%rdi)
+ ret
+ SET_SIZE(atomic_add_short)
+ SET_SIZE(atomic_add_16)
+
+ ENTRY(atomic_add_32)
+ ALTENTRY(atomic_add_int)
+ lock
+ addl %esi, (%rdi)
+ ret
+ SET_SIZE(atomic_add_int)
+ SET_SIZE(atomic_add_32)
+
+ ENTRY(atomic_add_64)
+ ALTENTRY(atomic_add_ptr)
+ ALTENTRY(atomic_add_long)
+ lock
+ addq %rsi, (%rdi)
+ ret
+ SET_SIZE(atomic_add_long)
+ SET_SIZE(atomic_add_ptr)
+ SET_SIZE(atomic_add_64)
+
+ ENTRY(atomic_or_8)
+ ALTENTRY(atomic_or_uchar)
+ lock
+ orb %sil, (%rdi)
+ ret
+ SET_SIZE(atomic_or_uchar)
+ SET_SIZE(atomic_or_8)
+
+ ENTRY(atomic_or_16)
+ ALTENTRY(atomic_or_ushort)
+ lock
+ orw %si, (%rdi)
+ ret
+ SET_SIZE(atomic_or_ushort)
+ SET_SIZE(atomic_or_16)
+
+ ENTRY(atomic_or_32)
+ ALTENTRY(atomic_or_uint)
+ lock
+ orl %esi, (%rdi)
+ ret
+ SET_SIZE(atomic_or_uint)
+ SET_SIZE(atomic_or_32)
+
+ ENTRY(atomic_or_64)
+ ALTENTRY(atomic_or_ulong)
+ lock
+ orq %rsi, (%rdi)
+ ret
+ SET_SIZE(atomic_or_ulong)
+ SET_SIZE(atomic_or_64)
+
+ ENTRY(atomic_and_8)
+ ALTENTRY(atomic_and_uchar)
+ lock
+ andb %sil, (%rdi)
+ ret
+ SET_SIZE(atomic_and_uchar)
+ SET_SIZE(atomic_and_8)
+
+ ENTRY(atomic_and_16)
+ ALTENTRY(atomic_and_ushort)
+ lock
+ andw %si, (%rdi)
+ ret
+ SET_SIZE(atomic_and_ushort)
+ SET_SIZE(atomic_and_16)
+
+ ENTRY(atomic_and_32)
+ ALTENTRY(atomic_and_uint)
+ lock
+ andl %esi, (%rdi)
+ ret
+ SET_SIZE(atomic_and_uint)
+ SET_SIZE(atomic_and_32)
+
+ ENTRY(atomic_and_64)
+ ALTENTRY(atomic_and_ulong)
+ lock
+ andq %rsi, (%rdi)
+ ret
+ SET_SIZE(atomic_and_ulong)
+ SET_SIZE(atomic_and_64)
+
+ ENTRY(atomic_add_8_nv)
+ ALTENTRY(atomic_add_char_nv)
+ movb (%rdi), %al
+1:
+ movb %sil, %cl
+ addb %al, %cl
+ lock
+ cmpxchgb %cl, (%rdi)
+ jne 1b
+ movzbl %cl, %eax
+ ret
+ SET_SIZE(atomic_add_char_nv)
+ SET_SIZE(atomic_add_8_nv)
+
+ ENTRY(atomic_add_16_nv)
+ ALTENTRY(atomic_add_short_nv)
+ movw (%rdi), %ax
+1:
+ movw %si, %cx
+ addw %ax, %cx
+ lock
+ cmpxchgw %cx, (%rdi)
+ jne 1b
+ movzwl %cx, %eax
+ ret
+ SET_SIZE(atomic_add_short_nv)
+ SET_SIZE(atomic_add_16_nv)
+
+ ENTRY(atomic_add_32_nv)
+ ALTENTRY(atomic_add_int_nv)
+ movl (%rdi), %eax
+1:
+ movl %esi, %ecx
+ addl %eax, %ecx
+ lock
+ cmpxchgl %ecx, (%rdi)
+ jne 1b
+ movl %ecx, %eax
+ ret
+ SET_SIZE(atomic_add_int_nv)
+ SET_SIZE(atomic_add_32_nv)
+
+ ENTRY(atomic_add_64_nv)
+ ALTENTRY(atomic_add_ptr_nv)
+ ALTENTRY(atomic_add_long_nv)
+ movq (%rdi), %rax
+1:
+ movq %rsi, %rcx
+ addq %rax, %rcx
+ lock
+ cmpxchgq %rcx, (%rdi)
+ jne 1b
+ movq %rcx, %rax
+ ret
+ SET_SIZE(atomic_add_long_nv)
+ SET_SIZE(atomic_add_ptr_nv)
+ SET_SIZE(atomic_add_64_nv)
+
+ ENTRY(atomic_and_8_nv)
+ ALTENTRY(atomic_and_uchar_nv)
+ movb (%rdi), %al
+1:
+ movb %sil, %cl
+ andb %al, %cl
+ lock
+ cmpxchgb %cl, (%rdi)
+ jne 1b
+ movzbl %cl, %eax
+ ret
+ SET_SIZE(atomic_and_uchar_nv)
+ SET_SIZE(atomic_and_8_nv)
+
+ ENTRY(atomic_and_16_nv)
+ ALTENTRY(atomic_and_ushort_nv)
+ movw (%rdi), %ax
+1:
+ movw %si, %cx
+ andw %ax, %cx
+ lock
+ cmpxchgw %cx, (%rdi)
+ jne 1b
+ movzwl %cx, %eax
+ ret
+ SET_SIZE(atomic_and_ushort_nv)
+ SET_SIZE(atomic_and_16_nv)
+
+ ENTRY(atomic_and_32_nv)
+ ALTENTRY(atomic_and_uint_nv)
+ movl (%rdi), %eax
+1:
+ movl %esi, %ecx
+ andl %eax, %ecx
+ lock
+ cmpxchgl %ecx, (%rdi)
+ jne 1b
+ movl %ecx, %eax
+ ret
+ SET_SIZE(atomic_and_uint_nv)
+ SET_SIZE(atomic_and_32_nv)
+
+ ENTRY(atomic_and_64_nv)
+ ALTENTRY(atomic_and_ulong_nv)
+ movq (%rdi), %rax
+1:
+ movq %rsi, %rcx
+ andq %rax, %rcx
+ lock
+ cmpxchgq %rcx, (%rdi)
+ jne 1b
+ movq %rcx, %rax
+ ret
+ SET_SIZE(atomic_and_ulong_nv)
+ SET_SIZE(atomic_and_64_nv)
+
+ ENTRY(atomic_or_8_nv)
+ ALTENTRY(atomic_or_uchar_nv)
+ movb (%rdi), %al
+1:
+ movb %sil, %cl
+ orb %al, %cl
+ lock
+ cmpxchgb %cl, (%rdi)
+ jne 1b
+ movzbl %cl, %eax
+ ret
+ SET_SIZE(atomic_and_uchar_nv)
+ SET_SIZE(atomic_and_8_nv)
+
+ ENTRY(atomic_or_16_nv)
+ ALTENTRY(atomic_or_ushort_nv)
+ movw (%rdi), %ax
+1:
+ movw %si, %cx
+ orw %ax, %cx
+ lock
+ cmpxchgw %cx, (%rdi)
+ jne 1b
+ movzwl %cx, %eax
+ ret
+ SET_SIZE(atomic_or_ushort_nv)
+ SET_SIZE(atomic_or_16_nv)
+
+ ENTRY(atomic_or_32_nv)
+ ALTENTRY(atomic_or_uint_nv)
+ movl (%rdi), %eax
+1:
+ movl %esi, %ecx
+ orl %eax, %ecx
+ lock
+ cmpxchgl %ecx, (%rdi)
+ jne 1b
+ movl %ecx, %eax
+ ret
+ SET_SIZE(atomic_or_uint_nv)
+ SET_SIZE(atomic_or_32_nv)
+
+ ENTRY(atomic_or_64_nv)
+ ALTENTRY(atomic_or_ulong_nv)
+ movq (%rdi), %rax
+1:
+ movq %rsi, %rcx
+ orq %rax, %rcx
+ lock
+ cmpxchgq %rcx, (%rdi)
+ jne 1b
+ movq %rcx, %rax
+ ret
+ SET_SIZE(atomic_or_ulong_nv)
+ SET_SIZE(atomic_or_64_nv)
+
+ ENTRY(atomic_cas_8)
+ ALTENTRY(atomic_cas_uchar)
+ movzbl %sil, %eax
+ lock
+ cmpxchgb %dl, (%rdi)
+ ret
+ SET_SIZE(atomic_cas_uchar)
+ SET_SIZE(atomic_cas_8)
+
+ ENTRY(atomic_cas_16)
+ ALTENTRY(atomic_cas_ushort)
+ movzwl %si, %eax
+ lock
+ cmpxchgw %dx, (%rdi)
+ ret
+ SET_SIZE(atomic_cas_ushort)
+ SET_SIZE(atomic_cas_16)
+
+ ENTRY(atomic_cas_32)
+ ALTENTRY(atomic_cas_uint)
+ movl %esi, %eax
+ lock
+ cmpxchgl %edx, (%rdi)
+ ret
+ SET_SIZE(atomic_cas_uint)
+ SET_SIZE(atomic_cas_32)
+
+ ENTRY(atomic_cas_64)
+ ALTENTRY(atomic_cas_ulong)
+ ALTENTRY(atomic_cas_ptr)
+ movq %rsi, %rax
+ lock
+ cmpxchgq %rdx, (%rdi)
+ ret
+ SET_SIZE(atomic_cas_ptr)
+ SET_SIZE(atomic_cas_ulong)
+ SET_SIZE(atomic_cas_64)
+
+ ENTRY(atomic_swap_8)
+ ALTENTRY(atomic_swap_uchar)
+ movzbl %sil, %eax
+ lock
+ xchgb %al, (%rdi)
+ ret
+ SET_SIZE(atomic_swap_uchar)
+ SET_SIZE(atomic_swap_8)
+
+ ENTRY(atomic_swap_16)
+ ALTENTRY(atomic_swap_ushort)
+ movzwl %si, %eax
+ lock
+ xchgw %ax, (%rdi)
+ ret
+ SET_SIZE(atomic_swap_ushort)
+ SET_SIZE(atomic_swap_16)
+
+ ENTRY(atomic_swap_32)
+ ALTENTRY(atomic_swap_uint)
+ movl %esi, %eax
+ lock
+ xchgl %eax, (%rdi)
+ ret
+ SET_SIZE(atomic_swap_uint)
+ SET_SIZE(atomic_swap_32)
+
+ ENTRY(atomic_swap_64)
+ ALTENTRY(atomic_swap_ulong)
+ ALTENTRY(atomic_swap_ptr)
+ movq %rsi, %rax
+ lock
+ xchgq %rax, (%rdi)
+ ret
+ SET_SIZE(atomic_swap_ptr)
+ SET_SIZE(atomic_swap_ulong)
+ SET_SIZE(atomic_swap_64)
+
+ ENTRY(atomic_set_long_excl)
+ xorl %eax, %eax
+ lock
+ btsq %rsi, (%rdi)
+ jnc 1f
+ decl %eax
+1:
+ ret
+ SET_SIZE(atomic_set_long_excl)
+
+ ENTRY(atomic_clear_long_excl)
+ xorl %eax, %eax
+ lock
+ btrq %rsi, (%rdi)
+ jc 1f
+ decl %eax
+1:
+ ret
+ SET_SIZE(atomic_clear_long_excl)
+
+ /*
+ * NOTE: membar_enter, and membar_exit are identical routines.
+ * We define them separately, instead of using an ALTENTRY
+ * definitions to alias them together, so that DTrace and
+ * debuggers will see a unique address for them, allowing
+ * more accurate tracing.
+ */
+
+ ENTRY(membar_enter)
+ mfence
+ ret
+ SET_SIZE(membar_enter)
+
+ ENTRY(membar_exit)
+ mfence
+ ret
+ SET_SIZE(membar_exit)
+
+ ENTRY(membar_producer)
+ sfence
+ ret
+ SET_SIZE(membar_producer)
+
+ ENTRY(membar_consumer)
+ lfence
+ ret
+ SET_SIZE(membar_consumer)
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/lib/libspl/getexecname.c b/lib/libspl/getexecname.c
new file mode 100644
index 000000000..43bf39ae3
--- /dev/null
+++ b/lib/libspl/getexecname.c
@@ -0,0 +1,55 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <limits.h>
+
+const char *
+getexecname(void)
+{
+ static char execname[PATH_MAX + 1] = "";
+ static pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
+ char *ptr = NULL;
+ ssize_t rc;
+
+ pthread_mutex_lock(&mtx);
+
+ if (strlen(execname) == 0) {
+ rc = readlink("/proc/self/exe", execname, sizeof(execname) - 1);
+ if (rc == -1) {
+ execname[0] = '\0';
+ } else {
+ execname[rc] = '\0';
+ ptr = execname;
+ }
+ }
+
+ pthread_mutex_unlock(&mtx);
+ return ptr;
+}
diff --git a/lib/libspl/gethrtime.c b/lib/libspl/gethrtime.c
new file mode 100644
index 000000000..c2fd5e034
--- /dev/null
+++ b/lib/libspl/gethrtime.c
@@ -0,0 +1,45 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <time.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+hrtime_t
+gethrtime(void)
+{
+ struct timespec ts;
+ int rc;
+
+ rc = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (rc) {
+ fprintf(stderr, "Error: clock_gettime() = %d\n", rc);
+ abort();
+ }
+
+ return (((u_int64_t)ts.tv_sec) * NANOSEC) + ts.tv_nsec;
+}
diff --git a/lib/libspl/getmntany.c b/lib/libspl/getmntany.c
new file mode 100644
index 000000000..f0b1cda4b
--- /dev/null
+++ b/lib/libspl/getmntany.c
@@ -0,0 +1,99 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Ricardo Correia. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1988 AT&T */
+/* All Rights Reserved */
+
+#include <stdio.h>
+#include <string.h>
+#include <mntent.h>
+#include <sys/mnttab.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#define BUFSIZE (MNT_LINE_MAX + 2)
+
+__thread char buf[BUFSIZE];
+
+#define DIFF(xx) ((mrefp->xx != NULL) && \
+ (mgetp->xx == NULL || strcmp(mrefp->xx, mgetp->xx) != 0))
+
+int
+getmntany(FILE *fp, struct mnttab *mgetp, struct mnttab *mrefp)
+{
+ int ret;
+
+ while (((ret = _sol_getmntent(fp, mgetp)) == 0) &&
+ (DIFF(mnt_special) || DIFF(mnt_mountp) ||
+ DIFF(mnt_fstype) || DIFF(mnt_mntopts)));
+
+ return ret;
+}
+
+int
+_sol_getmntent(FILE *fp, struct mnttab *mgetp)
+{
+ struct mntent mntbuf;
+ struct mntent *ret;
+
+ ret = getmntent_r(fp, &mntbuf, buf, BUFSIZE);
+
+ if (ret != NULL) {
+ mgetp->mnt_special = mntbuf.mnt_fsname;
+ mgetp->mnt_mountp = mntbuf.mnt_dir;
+ mgetp->mnt_fstype = mntbuf.mnt_type;
+ mgetp->mnt_mntopts = mntbuf.mnt_opts;
+ return 0;
+ }
+
+ if (feof(fp))
+ return -1;
+
+ return MNT_TOOLONG;
+}
+
+int
+getextmntent(FILE *fp, struct extmnttab *mp, int len)
+{
+ int ret;
+ struct stat64 st;
+
+ ret = _sol_getmntent(fp, (struct mnttab *) mp);
+ if (ret == 0) {
+ if (stat64(mp->mnt_mountp, &st) != 0) {
+ mp->mnt_major = 0;
+ mp->mnt_minor = 0;
+ return ret;
+ }
+ mp->mnt_major = major(st.st_dev);
+ mp->mnt_minor = minor(st.st_dev);
+ }
+
+ return ret;
+}
diff --git a/lib/libspl/include/Makefile.am b/lib/libspl/include/Makefile.am
new file mode 100644
index 000000000..b47fad239
--- /dev/null
+++ b/lib/libspl/include/Makefile.am
@@ -0,0 +1,8 @@
+nobase_pkginclude_HEADERS = *.h
+nobase_pkginclude_HEADERS += ia32/sys/*.h
+nobase_pkginclude_HEADERS += rpc/*.h
+nobase_pkginclude_HEADERS += sys/*.h
+nobase_pkginclude_HEADERS += sys/fm/*.h
+nobase_pkginclude_HEADERS += sys/dktp/*.h
+nobase_pkginclude_HEADERS += sys/sysevent/*.h
+nobase_pkginclude_HEADERS += tsol/*.h
diff --git a/lib/libspl/include/assert.h b/lib/libspl/include/assert.h
new file mode 100644
index 000000000..7f145b89a
--- /dev/null
+++ b/lib/libspl/include/assert.h
@@ -0,0 +1,96 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include_next <assert.h>
+
+#ifndef _LIBSPL_ASSERT_H
+#define _LIBSPL_ASSERT_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef __assert_c99
+static inline void
+__assert_c99(const char *expr, const char *file, int line, const char *func)
+{
+ fprintf(stderr, "%s:%i: %s: Assertion `%s` failed.\n",
+ file, line, func, expr);
+ abort();
+}
+#endif /* __assert_c99 */
+
+#ifndef verify
+#if defined(__STDC__)
+#if __STDC_VERSION__ - 0 >= 199901L
+#define verify(EX) (void)((EX) || \
+ (__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
+#else
+#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
+#endif /* __STDC_VERSION__ - 0 >= 199901L */
+#else
+#define verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0))
+#endif /* __STDC__ */
+#endif /* verify */
+
+#undef VERIFY
+#undef ASSERT
+
+#define VERIFY verify
+#define ASSERT assert
+
+extern void __assert(const char *, const char *, int);
+
+/* BEGIN CSTYLED */
+#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \
+ const TYPE __left = (TYPE)(LEFT); \
+ const TYPE __right = (TYPE)(RIGHT); \
+ if (!(__left OP __right)) { \
+ char *__buf = alloca(256); \
+ (void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \
+ #LEFT, #OP, #RIGHT, \
+ (u_longlong_t)__left, #OP, (u_longlong_t)__right); \
+ __assert(__buf, __FILE__, __LINE__); \
+ } \
+} while (0)
+/* END CSTYLED */
+
+#define VERIFY3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t)
+#define VERIFY3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t)
+#define VERIFY3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t)
+
+#ifdef NDEBUG
+#define ASSERT3S(x, y, z) ((void)0)
+#define ASSERT3U(x, y, z) ((void)0)
+#define ASSERT3P(x, y, z) ((void)0)
+#define ASSERTV(x)
+#else
+#define ASSERT3S(x, y, z) VERIFY3S(x, y, z)
+#define ASSERT3U(x, y, z) VERIFY3U(x, y, z)
+#define ASSERT3P(x, y, z) VERIFY3P(x, y, z)
+#define ASSERTV(x) x
+#endif /* NDEBUG */
+
+#endif /* _LIBSPL_ASSERT_H */
diff --git a/lib/libspl/include/atomic.h b/lib/libspl/include/atomic.h
new file mode 100644
index 000000000..508000152
--- /dev/null
+++ b/lib/libspl/include/atomic.h
@@ -0,0 +1,266 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_ATOMIC_H
+#define _SYS_ATOMIC_H
+
+#include <sys/types.h>
+#include <sys/inttypes.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__STDC__)
+/*
+ * Increment target.
+ */
+extern void atomic_inc_8(volatile uint8_t *);
+extern void atomic_inc_uchar(volatile uchar_t *);
+extern void atomic_inc_16(volatile uint16_t *);
+extern void atomic_inc_ushort(volatile ushort_t *);
+extern void atomic_inc_32(volatile uint32_t *);
+extern void atomic_inc_uint(volatile uint_t *);
+extern void atomic_inc_ulong(volatile ulong_t *);
+#if defined(_INT64_TYPE)
+extern void atomic_inc_64(volatile uint64_t *);
+#endif
+
+/*
+ * Decrement target
+ */
+extern void atomic_dec_8(volatile uint8_t *);
+extern void atomic_dec_uchar(volatile uchar_t *);
+extern void atomic_dec_16(volatile uint16_t *);
+extern void atomic_dec_ushort(volatile ushort_t *);
+extern void atomic_dec_32(volatile uint32_t *);
+extern void atomic_dec_uint(volatile uint_t *);
+extern void atomic_dec_ulong(volatile ulong_t *);
+#if defined(_INT64_TYPE)
+extern void atomic_dec_64(volatile uint64_t *);
+#endif
+
+/*
+ * Add delta to target
+ */
+extern void atomic_add_8(volatile uint8_t *, int8_t);
+extern void atomic_add_char(volatile uchar_t *, signed char);
+extern void atomic_add_16(volatile uint16_t *, int16_t);
+extern void atomic_add_short(volatile ushort_t *, short);
+extern void atomic_add_32(volatile uint32_t *, int32_t);
+extern void atomic_add_int(volatile uint_t *, int);
+extern void atomic_add_ptr(volatile void *, ssize_t);
+extern void atomic_add_long(volatile ulong_t *, long);
+#if defined(_INT64_TYPE)
+extern void atomic_add_64(volatile uint64_t *, int64_t);
+#endif
+
+/*
+ * logical OR bits with target
+ */
+extern void atomic_or_8(volatile uint8_t *, uint8_t);
+extern void atomic_or_uchar(volatile uchar_t *, uchar_t);
+extern void atomic_or_16(volatile uint16_t *, uint16_t);
+extern void atomic_or_ushort(volatile ushort_t *, ushort_t);
+extern void atomic_or_32(volatile uint32_t *, uint32_t);
+extern void atomic_or_uint(volatile uint_t *, uint_t);
+extern void atomic_or_ulong(volatile ulong_t *, ulong_t);
+#if defined(_INT64_TYPE)
+extern void atomic_or_64(volatile uint64_t *, uint64_t);
+#endif
+
+/*
+ * logical AND bits with target
+ */
+extern void atomic_and_8(volatile uint8_t *, uint8_t);
+extern void atomic_and_uchar(volatile uchar_t *, uchar_t);
+extern void atomic_and_16(volatile uint16_t *, uint16_t);
+extern void atomic_and_ushort(volatile ushort_t *, ushort_t);
+extern void atomic_and_32(volatile uint32_t *, uint32_t);
+extern void atomic_and_uint(volatile uint_t *, uint_t);
+extern void atomic_and_ulong(volatile ulong_t *, ulong_t);
+#if defined(_INT64_TYPE)
+extern void atomic_and_64(volatile uint64_t *, uint64_t);
+#endif
+
+/*
+ * As above, but return the new value. Note that these _nv() variants are
+ * substantially more expensive on some platforms than the no-return-value
+ * versions above, so don't use them unless you really need to know the
+ * new value *atomically* (e.g. when decrementing a reference count and
+ * checking whether it went to zero).
+ */
+
+/*
+ * Increment target and return new value.
+ */
+extern uint8_t atomic_inc_8_nv(volatile uint8_t *);
+extern uchar_t atomic_inc_uchar_nv(volatile uchar_t *);
+extern uint16_t atomic_inc_16_nv(volatile uint16_t *);
+extern ushort_t atomic_inc_ushort_nv(volatile ushort_t *);
+extern uint32_t atomic_inc_32_nv(volatile uint32_t *);
+extern uint_t atomic_inc_uint_nv(volatile uint_t *);
+extern ulong_t atomic_inc_ulong_nv(volatile ulong_t *);
+#if defined(_INT64_TYPE)
+extern uint64_t atomic_inc_64_nv(volatile uint64_t *);
+#endif
+
+/*
+ * Decrement target and return new value.
+ */
+extern uint8_t atomic_dec_8_nv(volatile uint8_t *);
+extern uchar_t atomic_dec_uchar_nv(volatile uchar_t *);
+extern uint16_t atomic_dec_16_nv(volatile uint16_t *);
+extern ushort_t atomic_dec_ushort_nv(volatile ushort_t *);
+extern uint32_t atomic_dec_32_nv(volatile uint32_t *);
+extern uint_t atomic_dec_uint_nv(volatile uint_t *);
+extern ulong_t atomic_dec_ulong_nv(volatile ulong_t *);
+#if defined(_INT64_TYPE)
+extern uint64_t atomic_dec_64_nv(volatile uint64_t *);
+#endif
+
+/*
+ * Add delta to target
+ */
+extern uint8_t atomic_add_8_nv(volatile uint8_t *, int8_t);
+extern uchar_t atomic_add_char_nv(volatile uchar_t *, signed char);
+extern uint16_t atomic_add_16_nv(volatile uint16_t *, int16_t);
+extern ushort_t atomic_add_short_nv(volatile ushort_t *, short);
+extern uint32_t atomic_add_32_nv(volatile uint32_t *, int32_t);
+extern uint_t atomic_add_int_nv(volatile uint_t *, int);
+extern void *atomic_add_ptr_nv(volatile void *, ssize_t);
+extern ulong_t atomic_add_long_nv(volatile ulong_t *, long);
+#if defined(_INT64_TYPE)
+extern uint64_t atomic_add_64_nv(volatile uint64_t *, int64_t);
+#endif
+
+/*
+ * logical OR bits with target and return new value.
+ */
+extern uint8_t atomic_or_8_nv(volatile uint8_t *, uint8_t);
+extern uchar_t atomic_or_uchar_nv(volatile uchar_t *, uchar_t);
+extern uint16_t atomic_or_16_nv(volatile uint16_t *, uint16_t);
+extern ushort_t atomic_or_ushort_nv(volatile ushort_t *, ushort_t);
+extern uint32_t atomic_or_32_nv(volatile uint32_t *, uint32_t);
+extern uint_t atomic_or_uint_nv(volatile uint_t *, uint_t);
+extern ulong_t atomic_or_ulong_nv(volatile ulong_t *, ulong_t);
+#if defined(_INT64_TYPE)
+extern uint64_t atomic_or_64_nv(volatile uint64_t *, uint64_t);
+#endif
+
+/*
+ * logical AND bits with target and return new value.
+ */
+extern uint8_t atomic_and_8_nv(volatile uint8_t *, uint8_t);
+extern uchar_t atomic_and_uchar_nv(volatile uchar_t *, uchar_t);
+extern uint16_t atomic_and_16_nv(volatile uint16_t *, uint16_t);
+extern ushort_t atomic_and_ushort_nv(volatile ushort_t *, ushort_t);
+extern uint32_t atomic_and_32_nv(volatile uint32_t *, uint32_t);
+extern uint_t atomic_and_uint_nv(volatile uint_t *, uint_t);
+extern ulong_t atomic_and_ulong_nv(volatile ulong_t *, ulong_t);
+#if defined(_INT64_TYPE)
+extern uint64_t atomic_and_64_nv(volatile uint64_t *, uint64_t);
+#endif
+
+/*
+ * If *arg1 == arg2, set *arg1 = arg3; return old value
+ */
+extern uint8_t atomic_cas_8(volatile uint8_t *, uint8_t, uint8_t);
+extern uchar_t atomic_cas_uchar(volatile uchar_t *, uchar_t, uchar_t);
+extern uint16_t atomic_cas_16(volatile uint16_t *, uint16_t, uint16_t);
+extern ushort_t atomic_cas_ushort(volatile ushort_t *, ushort_t, ushort_t);
+extern uint32_t atomic_cas_32(volatile uint32_t *, uint32_t, uint32_t);
+extern uint_t atomic_cas_uint(volatile uint_t *, uint_t, uint_t);
+extern void *atomic_cas_ptr(volatile void *, void *, void *);
+extern ulong_t atomic_cas_ulong(volatile ulong_t *, ulong_t, ulong_t);
+#if defined(_INT64_TYPE)
+extern uint64_t atomic_cas_64(volatile uint64_t *, uint64_t, uint64_t);
+#endif
+
+/*
+ * Swap target and return old value
+ */
+extern uint8_t atomic_swap_8(volatile uint8_t *, uint8_t);
+extern uchar_t atomic_swap_uchar(volatile uchar_t *, uchar_t);
+extern uint16_t atomic_swap_16(volatile uint16_t *, uint16_t);
+extern ushort_t atomic_swap_ushort(volatile ushort_t *, ushort_t);
+extern uint32_t atomic_swap_32(volatile uint32_t *, uint32_t);
+extern uint_t atomic_swap_uint(volatile uint_t *, uint_t);
+extern void *atomic_swap_ptr(volatile void *, void *);
+extern ulong_t atomic_swap_ulong(volatile ulong_t *, ulong_t);
+#if defined(_INT64_TYPE)
+extern uint64_t atomic_swap_64(volatile uint64_t *, uint64_t);
+#endif
+
+/*
+ * Perform an exclusive atomic bit set/clear on a target.
+ * Returns 0 if bit was sucessfully set/cleared, or -1
+ * if the bit was already set/cleared.
+ */
+extern int atomic_set_long_excl(volatile ulong_t *, uint_t);
+extern int atomic_clear_long_excl(volatile ulong_t *, uint_t);
+
+/*
+ * Generic memory barrier used during lock entry, placed after the
+ * memory operation that acquires the lock to guarantee that the lock
+ * protects its data. No stores from after the memory barrier will
+ * reach visibility, and no loads from after the barrier will be
+ * resolved, before the lock acquisition reaches global visibility.
+ */
+extern void membar_enter(void);
+
+/*
+ * Generic memory barrier used during lock exit, placed before the
+ * memory operation that releases the lock to guarantee that the lock
+ * protects its data. All loads and stores issued before the barrier
+ * will be resolved before the subsequent lock update reaches visibility.
+ */
+extern void membar_exit(void);
+
+/*
+ * Arrange that all stores issued before this point in the code reach
+ * global visibility before any stores that follow; useful in producer
+ * modules that update a data item, then set a flag that it is available.
+ * The memory barrier guarantees that the available flag is not visible
+ * earlier than the updated data, i.e. it imposes store ordering.
+ */
+extern void membar_producer(void);
+
+/*
+ * Arrange that all loads issued before this point in the code are
+ * completed before any subsequent loads; useful in consumer modules
+ * that check to see if data is available and read the data.
+ * The memory barrier guarantees that the data is not sampled until
+ * after the available flag has been seen, i.e. it imposes load ordering.
+ */
+extern void membar_consumer(void);
+#endif /* __STDC__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_ATOMIC_H */
diff --git a/lib/libspl/include/devid.h b/lib/libspl/include/devid.h
new file mode 100644
index 000000000..9dfdae84b
--- /dev/null
+++ b/lib/libspl/include/devid.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_DEVID_H
+#define _LIBSPL_DEVID_H
+
+#include <stdlib.h>
+
+typedef int ddi_devid_t;
+
+typedef struct devid_nmlist {
+ char *devname;
+ dev_t dev;
+} devid_nmlist_t;
+
+static inline int devid_str_decode(char *devidstr, ddi_devid_t *retdevid, char **retminor_name) { abort(); }
+static inline int devid_deviceid_to_nmlist(char *search_path, ddi_devid_t devid, char *minor_name, devid_nmlist_t **retlist) { abort(); }
+static inline void devid_str_free(char *str) { abort(); }
+static inline void devid_free(ddi_devid_t devid) { abort(); }
+static inline void devid_free_nmlist(devid_nmlist_t *list) { abort(); }
+static inline int devid_get(int fd, ddi_devid_t *retdevid) { return -1; }
+static inline int devid_get_minor_name(int fd, char **retminor_name) { abort(); }
+static inline char *devid_str_encode(ddi_devid_t devid, char *minor_name) { abort(); }
+
+#endif
diff --git a/lib/libspl/include/ia32/sys/asm_linkage.h b/lib/libspl/include/ia32/sys/asm_linkage.h
new file mode 100644
index 000000000..61c4d1a26
--- /dev/null
+++ b/lib/libspl/include/ia32/sys/asm_linkage.h
@@ -0,0 +1,302 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _IA32_SYS_ASM_LINKAGE_H
+#define _IA32_SYS_ASM_LINKAGE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _ASM /* The remainder of this file is only for assembly files */
+
+/*
+ * make annoying differences in assembler syntax go away
+ */
+
+/*
+ * D16 and A16 are used to insert instructions prefixes; the
+ * macros help the assembler code be slightly more portable.
+ */
+#if !defined(__GNUC_AS__)
+/*
+ * /usr/ccs/bin/as prefixes are parsed as separate instructions
+ */
+#define D16 data16;
+#define A16 addr16;
+
+/*
+ * (There are some weird constructs in constant expressions)
+ */
+#define _CONST(const) [const]
+#define _BITNOT(const) -1!_CONST(const)
+#define _MUL(a, b) _CONST(a \* b)
+
+#else
+/*
+ * Why not use the 'data16' and 'addr16' prefixes .. well, the
+ * assembler doesn't quite believe in real mode, and thus argues with
+ * us about what we're trying to do.
+ */
+#define D16 .byte 0x66;
+#define A16 .byte 0x67;
+
+#define _CONST(const) (const)
+#define _BITNOT(const) ~_CONST(const)
+#define _MUL(a, b) _CONST(a * b)
+
+#endif
+
+/*
+ * C pointers are different sizes between i386 and amd64.
+ * These constants can be used to compute offsets into pointer arrays.
+ */
+#if defined(__amd64)
+#define CLONGSHIFT 3
+#define CLONGSIZE 8
+#define CLONGMASK 7
+#elif defined(__i386)
+#define CLONGSHIFT 2
+#define CLONGSIZE 4
+#define CLONGMASK 3
+#endif
+
+/*
+ * Since we know we're either ILP32 or LP64 ..
+ */
+#define CPTRSHIFT CLONGSHIFT
+#define CPTRSIZE CLONGSIZE
+#define CPTRMASK CLONGMASK
+
+#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT)
+#error "inconsistent shift constants"
+#endif
+
+#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1)
+#error "inconsistent mask constants"
+#endif
+
+#define ASM_ENTRY_ALIGN 16
+
+/*
+ * SSE register alignment and save areas
+ */
+
+#define XMM_SIZE 16
+#define XMM_ALIGN 16
+
+#if defined(__amd64)
+
+#define SAVE_XMM_PROLOG(sreg, nreg) \
+ subq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp; \
+ movq %rsp, sreg
+
+#define RSTOR_XMM_EPILOG(sreg, nreg) \
+ addq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp
+
+#elif defined(__i386)
+
+#define SAVE_XMM_PROLOG(sreg, nreg) \
+ subl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp; \
+ movl %esp, sreg; \
+ addl $XMM_ALIGN, sreg; \
+ andl $_BITNOT(XMM_ALIGN-1), sreg
+
+#define RSTOR_XMM_EPILOG(sreg, nreg) \
+ addl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp;
+
+#endif /* __i386 */
+
+/*
+ * profiling causes definitions of the MCOUNT and RTMCOUNT
+ * particular to the type
+ */
+#ifdef GPROF
+
+#define MCOUNT(x) \
+ pushl %ebp; \
+ movl %esp, %ebp; \
+ call _mcount; \
+ popl %ebp
+
+#endif /* GPROF */
+
+#ifdef PROF
+
+#define MCOUNT(x) \
+/* CSTYLED */ \
+ .lcomm .L_/**/x/**/1, 4, 4; \
+ pushl %ebp; \
+ movl %esp, %ebp; \
+/* CSTYLED */ \
+ movl $.L_/**/x/**/1, %edx; \
+ call _mcount; \
+ popl %ebp
+
+#endif /* PROF */
+
+/*
+ * if we are not profiling, MCOUNT should be defined to nothing
+ */
+#if !defined(PROF) && !defined(GPROF)
+#define MCOUNT(x)
+#endif /* !defined(PROF) && !defined(GPROF) */
+
+#define RTMCOUNT(x) MCOUNT(x)
+
+/*
+ * Macro to define weak symbol aliases. These are similar to the ANSI-C
+ * #pragma weak name = _name
+ * except a compiler can determine type. The assembler must be told. Hence,
+ * the second parameter must be the type of the symbol (i.e.: function,...)
+ */
+#define ANSI_PRAGMA_WEAK(sym, stype) \
+ .weak sym; \
+ .type sym, @stype; \
+/* CSTYLED */ \
+sym = _/**/sym
+
+/*
+ * Like ANSI_PRAGMA_WEAK(), but for unrelated names, as in:
+ * #pragma weak sym1 = sym2
+ */
+#define ANSI_PRAGMA_WEAK2(sym1, sym2, stype) \
+ .weak sym1; \
+ .type sym1, @stype; \
+sym1 = sym2
+
+/*
+ * ENTRY provides the standard procedure entry code and an easy way to
+ * insert the calls to mcount for profiling. ENTRY_NP is identical, but
+ * never calls mcount.
+ */
+#define ENTRY(x) \
+ .text; \
+ .align ASM_ENTRY_ALIGN; \
+ .globl x; \
+ .type x, @function; \
+x: MCOUNT(x)
+
+#define ENTRY_NP(x) \
+ .text; \
+ .align ASM_ENTRY_ALIGN; \
+ .globl x; \
+ .type x, @function; \
+x:
+
+#define RTENTRY(x) \
+ .text; \
+ .align ASM_ENTRY_ALIGN; \
+ .globl x; \
+ .type x, @function; \
+x: RTMCOUNT(x)
+
+/*
+ * ENTRY2 is identical to ENTRY but provides two labels for the entry point.
+ */
+#define ENTRY2(x, y) \
+ .text; \
+ .align ASM_ENTRY_ALIGN; \
+ .globl x, y; \
+ .type x, @function; \
+ .type y, @function; \
+/* CSTYLED */ \
+x: ; \
+y: MCOUNT(x)
+
+#define ENTRY_NP2(x, y) \
+ .text; \
+ .align ASM_ENTRY_ALIGN; \
+ .globl x, y; \
+ .type x, @function; \
+ .type y, @function; \
+/* CSTYLED */ \
+x: ; \
+y:
+
+
+/*
+ * ALTENTRY provides for additional entry points.
+ */
+#define ALTENTRY(x) \
+ .globl x; \
+ .type x, @function; \
+x:
+
+/*
+ * DGDEF and DGDEF2 provide global data declarations.
+ *
+ * DGDEF provides a word aligned word of storage.
+ *
+ * DGDEF2 allocates "sz" bytes of storage with **NO** alignment. This
+ * implies this macro is best used for byte arrays.
+ *
+ * DGDEF3 allocates "sz" bytes of storage with "algn" alignment.
+ */
+#define DGDEF2(name, sz) \
+ .data; \
+ .globl name; \
+ .type name, @object; \
+ .size name, sz; \
+name:
+
+#define DGDEF3(name, sz, algn) \
+ .data; \
+ .align algn; \
+ .globl name; \
+ .type name, @object; \
+ .size name, sz; \
+name:
+
+#define DGDEF(name) DGDEF3(name, 4, 4)
+
+/*
+ * SET_SIZE trails a function and set the size for the ELF symbol table.
+ */
+#define SET_SIZE(x) \
+ .size x, [.-x]
+
+/*
+ * NWORD provides native word value.
+ */
+#if defined(__amd64)
+
+/*CSTYLED*/
+#define NWORD quad
+
+#elif defined(__i386)
+
+#define NWORD long
+
+#endif /* __i386 */
+
+#endif /* _ASM */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _IA32_SYS_ASM_LINKAGE_H */
diff --git a/lib/libspl/include/libdevinfo.h b/lib/libspl/include/libdevinfo.h
new file mode 100644
index 000000000..f0f9d7e8e
--- /dev/null
+++ b/lib/libspl/include/libdevinfo.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_LIBDEVINFO_H
+#define _LIBSPL_LIBDEVINFO_H
+
+#endif /* _LIBSPL_LIBDEVINFO_H */
diff --git a/lib/libspl/include/libshare.h b/lib/libspl/include/libshare.h
new file mode 100644
index 000000000..afbdf5b05
--- /dev/null
+++ b/lib/libspl/include/libshare.h
@@ -0,0 +1,34 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * basic API declarations for share management
+ */
+
+#ifndef _LIBSPL_LIBSHARE_H
+#define _LIBSPL_LIBSHARE_H
+
+#endif /* _LIBSPL_LIBSHARE_H */
diff --git a/lib/libspl/include/limits.h b/lib/libspl/include/limits.h
new file mode 100644
index 000000000..341a2eba9
--- /dev/null
+++ b/lib/libspl/include/limits.h
@@ -0,0 +1,40 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include_next <limits.h>
+
+#ifndef _LIBSPL_LIMITS_H
+#define _LIBSPL_LIMITS_H
+
+#define DBL_DIG 15
+#define DBL_MAX 1.7976931348623157081452E+308
+#define DBL_MIN 2.2250738585072013830903E-308
+
+#define FLT_DIG 6
+#define FLT_MAX 3.4028234663852885981170E+38F
+#define FLT_MIN 1.1754943508222875079688E-38F
+
+#endif /* _LIBSPL_LIMITS_H */
diff --git a/lib/libspl/include/locale.h b/lib/libspl/include/locale.h
new file mode 100644
index 000000000..98ca330c3
--- /dev/null
+++ b/lib/libspl/include/locale.h
@@ -0,0 +1,35 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include_next <locale.h>
+
+#ifndef _LIBSPL_LOCALE_H
+#define _LIBSPL_LOCALE_H
+
+#include <time.h>
+#include <sys/time.h>
+
+#endif
diff --git a/lib/libspl/include/priv.h b/lib/libspl/include/priv.h
new file mode 100644
index 000000000..6c9a2c0e6
--- /dev/null
+++ b/lib/libspl/include/priv.h
@@ -0,0 +1,37 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_PRIV_H
+#define _LIBSPL_PRIV_H
+
+#include <sys/types.h>
+
+/* Couldn't find this definition in OpenGrok */
+#define PRIV_SYS_CONFIG "sys_config"
+
+static inline boolean_t priv_ineffect(const char *priv) { return B_TRUE; }
+
+#endif
diff --git a/lib/libspl/include/rpc/xdr.h b/lib/libspl/include/rpc/xdr.h
new file mode 100644
index 000000000..cd6680f57
--- /dev/null
+++ b/lib/libspl/include/rpc/xdr.h
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
+ * All Rights Reserved
+ *
+ * Portions of this source code were derived from Berkeley 4.3 BSD
+ * under license from the Regents of the University of California.
+ */
+
+#ifndef LIBSPL_RPC_XDR_H
+#define LIBSPL_RPC_XDR_H
+
+#include_next <rpc/xdr.h>
+
+/*
+ * These are XDR control operators
+ */
+
+#define XDR_GET_BYTES_AVAIL 1
+
+typedef struct xdr_bytesrec {
+ bool_t xc_is_last_record;
+ size_t xc_num_avail;
+} xdr_bytesrec_t;
+
+/*
+ * These are the request arguments to XDR_CONTROL.
+ *
+ * XDR_PEEK - returns the contents of the next XDR unit on the XDR stream.
+ * XDR_SKIPBYTES - skips the next N bytes in the XDR stream.
+ * XDR_RDMAGET - for xdr implementation over RDMA, gets private flags from
+ * the XDR stream being moved over RDMA
+ * XDR_RDMANOCHUNK - for xdr implementaion over RDMA, sets private flags in
+ * the XDR stream moving over RDMA.
+ */
+#define XDR_PEEK 2
+#define XDR_SKIPBYTES 3
+#define XDR_RDMAGET 4
+#define XDR_RDMASET 5
+
+extern bool_t xdr_control(XDR *xdrs, int request, void *info);
+
+#endif
diff --git a/lib/libspl/include/stdio.h b/lib/libspl/include/stdio.h
new file mode 100644
index 000000000..f80fdc009
--- /dev/null
+++ b/lib/libspl/include/stdio.h
@@ -0,0 +1,34 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include_next <stdio.h>
+
+#ifndef _LIBSPL_STDIO_H
+#define _LIBSPL_STDIO_H
+
+#define enable_extended_FILE_stdio(fd, sig) ((void) 0)
+
+#endif
diff --git a/lib/libspl/include/stdlib.h b/lib/libspl/include/stdlib.h
new file mode 100644
index 000000000..67d6e96e0
--- /dev/null
+++ b/lib/libspl/include/stdlib.h
@@ -0,0 +1,34 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include_next <stdlib.h>
+
+#ifndef _LIBSPL_STDLIB_H
+#define _LIBSPL_STDLIB_H
+
+extern const char *getexecname(void);
+
+#endif
diff --git a/lib/libspl/include/string.h b/lib/libspl/include/string.h
new file mode 100644
index 000000000..213977d0e
--- /dev/null
+++ b/lib/libspl/include/string.h
@@ -0,0 +1,36 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_STRING_H
+#define _LIBSPL_STRING_H
+
+#include_next <string.h>
+
+extern size_t strlcat(char *dst, const char *src, size_t dstsize);
+extern size_t strlcpy(char *dst, const char *src, size_t len);
+extern size_t strnlen(const char *str, size_t maxlen);
+
+#endif
diff --git a/lib/libspl/include/strings.h b/lib/libspl/include/strings.h
new file mode 100644
index 000000000..48944e142
--- /dev/null
+++ b/lib/libspl/include/strings.h
@@ -0,0 +1,33 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_STRINGS_H
+#define _LIBSPL_STRINGS_H
+
+#include <string.h>
+#include_next <strings.h>
+
+#endif
diff --git a/lib/libspl/include/synch.h b/lib/libspl/include/synch.h
new file mode 100644
index 000000000..2da270a42
--- /dev/null
+++ b/lib/libspl/include/synch.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYNCH_H
+#define _LIBSPL_SYNCH_H
+
+#endif
diff --git a/lib/libspl/include/sys/acl.h b/lib/libspl/include/sys/acl.h
new file mode 100644
index 000000000..e6df864f8
--- /dev/null
+++ b/lib/libspl/include/sys/acl.h
@@ -0,0 +1,287 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_ACL_H
+#define _SYS_ACL_H
+
+#include <sys/types.h>
+#include <sys/acl_impl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_ACL_ENTRIES (1024) /* max entries of each type */
+typedef struct acl {
+ int a_type; /* the type of ACL entry */
+ uid_t a_id; /* the entry in -uid or gid */
+ o_mode_t a_perm; /* the permission field */
+} aclent_t;
+
+typedef struct ace {
+ uid_t a_who; /* uid or gid */
+ uint32_t a_access_mask; /* read,write,... */
+ uint16_t a_flags; /* see below */
+ uint16_t a_type; /* allow or deny */
+} ace_t;
+
+typedef struct acl_info acl_t;
+
+/*
+ * The following are Defined types for an aclent_t.
+ */
+#define USER_OBJ (0x01) /* object owner */
+#define USER (0x02) /* additional users */
+#define GROUP_OBJ (0x04) /* owning group of the object */
+#define GROUP (0x08) /* additional groups */
+#define CLASS_OBJ (0x10) /* file group class and mask entry */
+#define OTHER_OBJ (0x20) /* other entry for the object */
+#define ACL_DEFAULT (0x1000) /* default flag */
+/* default object owner */
+#define DEF_USER_OBJ (ACL_DEFAULT | USER_OBJ)
+/* default additional users */
+#define DEF_USER (ACL_DEFAULT | USER)
+/* default owning group */
+#define DEF_GROUP_OBJ (ACL_DEFAULT | GROUP_OBJ)
+/* default additional groups */
+#define DEF_GROUP (ACL_DEFAULT | GROUP)
+/* default mask entry */
+#define DEF_CLASS_OBJ (ACL_DEFAULT | CLASS_OBJ)
+/* default other entry */
+#define DEF_OTHER_OBJ (ACL_DEFAULT | OTHER_OBJ)
+
+/*
+ * The following are defined for ace_t.
+ */
+#define ACE_READ_DATA 0x00000001
+#define ACE_LIST_DIRECTORY 0x00000001
+#define ACE_WRITE_DATA 0x00000002
+#define ACE_ADD_FILE 0x00000002
+#define ACE_APPEND_DATA 0x00000004
+#define ACE_ADD_SUBDIRECTORY 0x00000004
+#define ACE_READ_NAMED_ATTRS 0x00000008
+#define ACE_WRITE_NAMED_ATTRS 0x00000010
+#define ACE_EXECUTE 0x00000020
+#define ACE_DELETE_CHILD 0x00000040
+#define ACE_READ_ATTRIBUTES 0x00000080
+#define ACE_WRITE_ATTRIBUTES 0x00000100
+#define ACE_DELETE 0x00010000
+#define ACE_READ_ACL 0x00020000
+#define ACE_WRITE_ACL 0x00040000
+#define ACE_WRITE_OWNER 0x00080000
+#define ACE_SYNCHRONIZE 0x00100000
+
+#define ACE_FILE_INHERIT_ACE 0x0001
+#define ACE_DIRECTORY_INHERIT_ACE 0x0002
+#define ACE_NO_PROPAGATE_INHERIT_ACE 0x0004
+#define ACE_INHERIT_ONLY_ACE 0x0008
+#define ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x0010
+#define ACE_FAILED_ACCESS_ACE_FLAG 0x0020
+#define ACE_IDENTIFIER_GROUP 0x0040
+#define ACE_INHERITED_ACE 0x0080
+#define ACE_OWNER 0x1000
+#define ACE_GROUP 0x2000
+#define ACE_EVERYONE 0x4000
+
+#define ACE_ACCESS_ALLOWED_ACE_TYPE 0x0000
+#define ACE_ACCESS_DENIED_ACE_TYPE 0x0001
+#define ACE_SYSTEM_AUDIT_ACE_TYPE 0x0002
+#define ACE_SYSTEM_ALARM_ACE_TYPE 0x0003
+
+#define ACL_AUTO_INHERIT 0x0001
+#define ACL_PROTECTED 0x0002
+#define ACL_DEFAULTED 0x0004
+#define ACL_FLAGS_ALL (ACL_AUTO_INHERIT|ACL_PROTECTED| \
+ ACL_DEFAULTED)
+
+#ifdef _KERNEL
+
+/*
+ * These are only applicable in a CIFS context.
+ */
+#define ACE_ACCESS_ALLOWED_COMPOUND_ACE_TYPE 0x04
+#define ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05
+#define ACE_ACCESS_DENIED_OBJECT_ACE_TYPE 0x06
+#define ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07
+#define ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08
+#define ACE_ACCESS_ALLOWED_CALLBACK_ACE_TYPE 0x09
+#define ACE_ACCESS_DENIED_CALLBACK_ACE_TYPE 0x0A
+#define ACE_ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE 0x0B
+#define ACE_ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE 0x0C
+#define ACE_SYSTEM_AUDIT_CALLBACK_ACE_TYPE 0x0D
+#define ACE_SYSTEM_ALARM_CALLBACK_ACE_TYPE 0x0E
+#define ACE_SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE 0x0F
+#define ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE 0x10
+
+#define ACE_ALL_TYPES 0x001F
+
+typedef struct ace_object {
+ uid_t a_who; /* uid or gid */
+ uint32_t a_access_mask; /* read,write,... */
+ uint16_t a_flags; /* see below */
+ uint16_t a_type; /* allow or deny */
+ uint8_t a_obj_type[16]; /* obj type */
+ uint8_t a_inherit_obj_type[16]; /* inherit obj */
+} ace_object_t;
+
+#endif
+
+#define ACE_ALL_PERMS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
+ ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
+ ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
+ ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL| \
+ ACE_WRITE_OWNER|ACE_SYNCHRONIZE)
+
+/*
+ * The following flags are supported by both NFSv4 ACLs and ace_t.
+ */
+#define ACE_NFSV4_SUP_FLAGS (ACE_FILE_INHERIT_ACE | \
+ ACE_DIRECTORY_INHERIT_ACE | \
+ ACE_NO_PROPAGATE_INHERIT_ACE | \
+ ACE_INHERIT_ONLY_ACE | \
+ ACE_IDENTIFIER_GROUP)
+
+#define ACE_TYPE_FLAGS (ACE_OWNER|ACE_GROUP|ACE_EVERYONE| \
+ ACE_IDENTIFIER_GROUP)
+#define ACE_INHERIT_FLAGS (ACE_FILE_INHERIT_ACE| \
+ ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE)
+
+/* cmd args to acl(2) for aclent_t */
+#define GETACL 1
+#define SETACL 2
+#define GETACLCNT 3
+
+/* cmd's to manipulate ace acls. */
+#define ACE_GETACL 4
+#define ACE_SETACL 5
+#define ACE_GETACLCNT 6
+
+/* minimal acl entries from GETACLCNT */
+#define MIN_ACL_ENTRIES 4
+
+#if !defined(_KERNEL)
+
+/* acl check errors */
+#define GRP_ERROR 1
+#define USER_ERROR 2
+#define OTHER_ERROR 3
+#define CLASS_ERROR 4
+#define DUPLICATE_ERROR 5
+#define MISS_ERROR 6
+#define MEM_ERROR 7
+#define ENTRY_ERROR 8
+
+
+/*
+ * similar to ufs_acl.h: changed to char type for user commands (tar, cpio)
+ * Attribute types
+ */
+#define UFSD_FREE ('0') /* Free entry */
+#define UFSD_ACL ('1') /* Access Control Lists */
+#define UFSD_DFACL ('2') /* reserved for future use */
+#define ACE_ACL ('3') /* ace_t style acls */
+
+/*
+ * flag to [f]acl_get()
+ * controls whether a trivial acl should be returned.
+ */
+#define ACL_NO_TRIVIAL 0x2
+
+
+/*
+ * Flags to control acl_totext()
+ */
+
+#define ACL_APPEND_ID 0x1 /* append uid/gid to user/group entries */
+#define ACL_COMPACT_FMT 0x2 /* build ACL in ls -V format */
+#define ACL_NORESOLVE 0x4 /* don't do name service lookups */
+
+/*
+ * Legacy aclcheck errors for aclent_t ACLs
+ */
+#define EACL_GRP_ERROR GRP_ERROR
+#define EACL_USER_ERROR USER_ERROR
+#define EACL_OTHER_ERROR OTHER_ERROR
+#define EACL_CLASS_ERROR CLASS_ERROR
+#define EACL_DUPLICATE_ERROR DUPLICATE_ERROR
+#define EACL_MISS_ERROR MISS_ERROR
+#define EACL_MEM_ERROR MEM_ERROR
+#define EACL_ENTRY_ERROR ENTRY_ERROR
+
+#define EACL_INHERIT_ERROR 9 /* invalid inherit flags */
+#define EACL_FLAGS_ERROR 10 /* unknown flag value */
+#define EACL_PERM_MASK_ERROR 11 /* unknown permission */
+#define EACL_COUNT_ERROR 12 /* invalid acl count */
+
+#define EACL_INVALID_SLOT 13 /* invalid acl slot */
+#define EACL_NO_ACL_ENTRY 14 /* Entry doesn't exist */
+#define EACL_DIFF_TYPE 15 /* acls aren't same type */
+
+#define EACL_INVALID_USER_GROUP 16 /* need user/group name */
+#define EACL_INVALID_STR 17 /* invalid acl string */
+#define EACL_FIELD_NOT_BLANK 18 /* can't have blank field */
+#define EACL_INVALID_ACCESS_TYPE 19 /* invalid access type */
+#define EACL_UNKNOWN_DATA 20 /* Unrecognized data in ACL */
+#define EACL_MISSING_FIELDS 21 /* missing fields in acl */
+
+#define EACL_INHERIT_NOTDIR 22 /* Need dir for inheritance */
+
+extern int aclcheck(aclent_t *, int, int *);
+extern int acltomode(aclent_t *, int, mode_t *);
+extern int aclfrommode(aclent_t *, int, mode_t *);
+extern int aclsort(int, int, aclent_t *);
+extern char *acltotext(aclent_t *, int);
+extern aclent_t *aclfromtext(char *, int *);
+extern void acl_free(acl_t *);
+extern int acl_get(const char *, int, acl_t **);
+extern int facl_get(int, int, acl_t **);
+extern int acl_set(const char *, acl_t *acl);
+extern int facl_set(int, acl_t *acl);
+extern int acl_strip(const char *, uid_t, gid_t, mode_t);
+extern int acl_trivial(const char *);
+extern char *acl_totext(acl_t *, int);
+extern int acl_fromtext(const char *, acl_t **);
+extern int acl_check(acl_t *, int);
+
+#else /* !defined(_KERNEL) */
+
+extern void ksort(caddr_t, int, int, int (*)(void *, void *));
+extern int cmp2acls(void *, void *);
+
+#endif /* !defined(_KERNEL) */
+
+#if defined(__STDC__)
+extern int acl(const char *path, int cmd, int cnt, void *buf);
+extern int facl(int fd, int cmd, int cnt, void *buf);
+#else /* !__STDC__ */
+extern int acl();
+extern int facl();
+#endif /* defined(__STDC__) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_ACL_H */
diff --git a/lib/libspl/include/sys/acl_impl.h b/lib/libspl/include/sys/acl_impl.h
new file mode 100644
index 000000000..717334906
--- /dev/null
+++ b/lib/libspl/include/sys/acl_impl.h
@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_ACL_IMPL_H
+#define _SYS_ACL_IMPL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * acl flags
+ *
+ * ACL_AUTO_INHERIT, ACL_PROTECTED and ACL_DEFAULTED
+ * flags can also be stored in this field.
+ */
+#define ACL_IS_TRIVIAL 0x10000
+#define ACL_IS_DIR 0x20000
+
+typedef enum acl_type {
+ ACLENT_T = 0,
+ ACE_T = 1
+} acl_type_t;
+
+struct acl_info {
+ acl_type_t acl_type; /* style of acl */
+ int acl_cnt; /* number of acl entries */
+ int acl_entry_size; /* sizeof acl entry */
+ int acl_flags; /* special flags about acl */
+ void *acl_aclp; /* the acl */
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_ACL_IMPL_H */
diff --git a/lib/libspl/include/sys/bitmap.h b/lib/libspl/include/sys/bitmap.h
new file mode 100644
index 000000000..8fef7fcfe
--- /dev/null
+++ b/lib/libspl/include/sys/bitmap.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_BITMAP_H
+#define _LIBSPL_SYS_BITMAP_H
+
+#endif
diff --git a/lib/libspl/include/sys/byteorder.h b/lib/libspl/include/sys/byteorder.h
new file mode 100644
index 000000000..528d2d208
--- /dev/null
+++ b/lib/libspl/include/sys/byteorder.h
@@ -0,0 +1,199 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+#ifndef _SYS_BYTEORDER_H
+#define _SYS_BYTEORDER_H
+
+
+
+#include <sys/isa_defs.h>
+#include <sys/int_types.h>
+
+#if defined(__GNUC__) && defined(_ASM_INLINES) && \
+ (defined(__i386) || defined(__amd64))
+#include <asm/byteorder.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * macros for conversion between host and (internet) network byte order
+ */
+
+#if defined(_BIG_ENDIAN) && !defined(ntohl) && !defined(__lint)
+/* big-endian */
+#define ntohl(x) (x)
+#define ntohs(x) (x)
+#define htonl(x) (x)
+#define htons(x) (x)
+
+#elif !defined(ntohl) /* little-endian */
+
+#ifndef _IN_PORT_T
+#define _IN_PORT_T
+typedef uint16_t in_port_t;
+#endif
+
+#ifndef _IN_ADDR_T
+#define _IN_ADDR_T
+typedef uint32_t in_addr_t;
+#endif
+
+#if !defined(_XPG4_2) || defined(__EXTENSIONS__) || defined(_XPG5)
+extern uint32_t htonl(uint32_t);
+extern uint16_t htons(uint16_t);
+extern uint32_t ntohl(uint32_t);
+extern uint16_t ntohs(uint16_t);
+#else
+extern in_addr_t htonl(in_addr_t);
+extern in_port_t htons(in_port_t);
+extern in_addr_t ntohl(in_addr_t);
+extern in_port_t ntohs(in_port_t);
+#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) || defined(_XPG5) */
+#endif
+
+#if !defined(_XPG4_2) || defined(__EXTENSIONS__)
+
+/*
+ * Macros to reverse byte order
+ */
+#define BSWAP_8(x) ((x) & 0xff)
+#define BSWAP_16(x) ((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
+#define BSWAP_32(x) ((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
+#define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
+
+#define BMASK_8(x) ((x) & 0xff)
+#define BMASK_16(x) ((x) & 0xffff)
+#define BMASK_32(x) ((x) & 0xffffffff)
+#define BMASK_64(x) (x)
+
+/*
+ * Macros to convert from a specific byte order to/from native byte order
+ */
+#ifdef _BIG_ENDIAN
+#define BE_8(x) BMASK_8(x)
+#define BE_16(x) BMASK_16(x)
+#define BE_32(x) BMASK_32(x)
+#define BE_64(x) BMASK_64(x)
+#define LE_8(x) BSWAP_8(x)
+#define LE_16(x) BSWAP_16(x)
+#define LE_32(x) BSWAP_32(x)
+#define LE_64(x) BSWAP_64(x)
+#else
+#define LE_8(x) BMASK_8(x)
+#define LE_16(x) BMASK_16(x)
+#define LE_32(x) BMASK_32(x)
+#define LE_64(x) BMASK_64(x)
+#define BE_8(x) BSWAP_8(x)
+#define BE_16(x) BSWAP_16(x)
+#define BE_32(x) BSWAP_32(x)
+#define BE_64(x) BSWAP_64(x)
+#endif
+
+/*
+ * Macros to read unaligned values from a specific byte order to
+ * native byte order
+ */
+
+#define BE_IN8(xa) \
+ *((uint8_t *)(xa))
+
+#define BE_IN16(xa) \
+ (((uint16_t)BE_IN8(xa) << 8) | BE_IN8((uint8_t *)(xa)+1))
+
+#define BE_IN32(xa) \
+ (((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2))
+
+#define BE_IN64(xa) \
+ (((uint64_t)BE_IN32(xa) << 32) | BE_IN32((uint8_t *)(xa)+4))
+
+#define LE_IN8(xa) \
+ *((uint8_t *)(xa))
+
+#define LE_IN16(xa) \
+ (((uint16_t)LE_IN8((uint8_t *)(xa) + 1) << 8) | LE_IN8(xa))
+
+#define LE_IN32(xa) \
+ (((uint32_t)LE_IN16((uint8_t *)(xa) + 2) << 16) | LE_IN16(xa))
+
+#define LE_IN64(xa) \
+ (((uint64_t)LE_IN32((uint8_t *)(xa) + 4) << 32) | LE_IN32(xa))
+
+/*
+ * Macros to write unaligned values from native byte order to a specific byte
+ * order.
+ */
+
+#define BE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv);
+
+#define BE_OUT16(xa, yv) \
+ BE_OUT8((uint8_t *)(xa) + 1, yv); \
+ BE_OUT8((uint8_t *)(xa), (yv) >> 8);
+
+#define BE_OUT32(xa, yv) \
+ BE_OUT16((uint8_t *)(xa) + 2, yv); \
+ BE_OUT16((uint8_t *)(xa), (yv) >> 16);
+
+#define BE_OUT64(xa, yv) \
+ BE_OUT32((uint8_t *)(xa) + 4, yv); \
+ BE_OUT32((uint8_t *)(xa), (yv) >> 32);
+
+#define LE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv);
+
+#define LE_OUT16(xa, yv) \
+ LE_OUT8((uint8_t *)(xa), yv); \
+ LE_OUT8((uint8_t *)(xa) + 1, (yv) >> 8);
+
+#define LE_OUT32(xa, yv) \
+ LE_OUT16((uint8_t *)(xa), yv); \
+ LE_OUT16((uint8_t *)(xa) + 2, (yv) >> 16);
+
+#define LE_OUT64(xa, yv) \
+ LE_OUT32((uint8_t *)(xa), yv); \
+ LE_OUT32((uint8_t *)(xa) + 4, (yv) >> 32);
+
+#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_BYTEORDER_H */
diff --git a/lib/libspl/include/sys/callb.h b/lib/libspl/include/sys/callb.h
new file mode 100644
index 000000000..29a6a6777
--- /dev/null
+++ b/lib/libspl/include/sys/callb.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CALLB_H
+#define _SYS_CALLB_H
+
+#endif
diff --git a/lib/libspl/include/sys/cmn_err.h b/lib/libspl/include/sys/cmn_err.h
new file mode 100644
index 000000000..d199361d7
--- /dev/null
+++ b/lib/libspl/include/sys/cmn_err.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_CMN_ERR_H
+#define _LIBSPL_SYS_CMN_ERR_H
+
+#endif
diff --git a/lib/libspl/include/sys/compress.h b/lib/libspl/include/sys/compress.h
new file mode 100644
index 000000000..6e03e73a3
--- /dev/null
+++ b/lib/libspl/include/sys/compress.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_COMPRESS_H
+#define _LIBSPL_SYS_COMPRESS_H
+
+#endif /* _LIBSPL_SYS_COMPRESS_H */
diff --git a/lib/libspl/include/sys/cred.h b/lib/libspl/include/sys/cred.h
new file mode 100644
index 000000000..6a58315d4
--- /dev/null
+++ b/lib/libspl/include/sys/cred.h
@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_CRED_H
+#define _LIBSPL_SYS_CRED_H
+
+typedef struct cred cred_t;
+
+#endif
diff --git a/lib/libspl/include/sys/debug.h b/lib/libspl/include/sys/debug.h
new file mode 100644
index 000000000..006962055
--- /dev/null
+++ b/lib/libspl/include/sys/debug.h
@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_DEBUG_H
+#define _LIBSPL_SYS_DEBUG_H
+
+#include <assert.h>
+
+#endif
diff --git a/lib/libspl/include/sys/dkio.h b/lib/libspl/include/sys/dkio.h
new file mode 100644
index 000000000..32f786565
--- /dev/null
+++ b/lib/libspl/include/sys/dkio.h
@@ -0,0 +1,484 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_DKIO_H
+#define _SYS_DKIO_H
+
+
+
+#include <sys/dklabel.h> /* Needed for NDKMAP define */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Structures and definitions for disk io control commands
+ */
+
+/*
+ * Structures used as data by ioctl calls.
+ */
+
+#define DK_DEVLEN 16 /* device name max length, including */
+ /* unit # & NULL (ie - "xyc1") */
+
+/*
+ * Used for controller info
+ */
+struct dk_cinfo {
+ char dki_cname[DK_DEVLEN]; /* controller name (no unit #) */
+ ushort_t dki_ctype; /* controller type */
+ ushort_t dki_flags; /* flags */
+ ushort_t dki_cnum; /* controller number */
+ uint_t dki_addr; /* controller address */
+ uint_t dki_space; /* controller bus type */
+ uint_t dki_prio; /* interrupt priority */
+ uint_t dki_vec; /* interrupt vector */
+ char dki_dname[DK_DEVLEN]; /* drive name (no unit #) */
+ uint_t dki_unit; /* unit number */
+ uint_t dki_slave; /* slave number */
+ ushort_t dki_partition; /* partition number */
+ ushort_t dki_maxtransfer; /* max. transfer size in DEV_BSIZE */
+};
+
+/*
+ * Controller types
+ */
+#define DKC_UNKNOWN 0
+#define DKC_CDROM 1 /* CD-ROM, SCSI or otherwise */
+#define DKC_WDC2880 2
+#define DKC_XXX_0 3 /* unassigned */
+#define DKC_XXX_1 4 /* unassigned */
+#define DKC_DSD5215 5
+#define DKC_ACB4000 7
+#define DKC_MD21 8
+#define DKC_XXX_2 9 /* unassigned */
+#define DKC_NCRFLOPPY 10
+#define DKC_SMSFLOPPY 12
+#define DKC_SCSI_CCS 13 /* SCSI CCS compatible */
+#define DKC_INTEL82072 14 /* native floppy chip */
+#define DKC_MD 16 /* meta-disk (virtual-disk) driver */
+#define DKC_INTEL82077 19 /* 82077 floppy disk controller */
+#define DKC_DIRECT 20 /* Intel direct attached device i.e. IDE */
+#define DKC_PCMCIA_MEM 21 /* PCMCIA memory disk-like type */
+#define DKC_PCMCIA_ATA 22 /* PCMCIA AT Attached type */
+#define DKC_VBD 23 /* virtual block device */
+
+/*
+ * Sun reserves up through 1023
+ */
+
+#define DKC_CUSTOMER_BASE 1024
+
+/*
+ * Flags
+ */
+#define DKI_BAD144 0x01 /* use DEC std 144 bad sector fwding */
+#define DKI_MAPTRK 0x02 /* controller does track mapping */
+#define DKI_FMTTRK 0x04 /* formats only full track at a time */
+#define DKI_FMTVOL 0x08 /* formats only full volume at a time */
+#define DKI_FMTCYL 0x10 /* formats only full cylinders at a time */
+#define DKI_HEXUNIT 0x20 /* unit number is printed as 3 hex digits */
+#define DKI_PCMCIA_PFD 0x40 /* PCMCIA pseudo-floppy memory card */
+
+/*
+ * Used for all partitions
+ */
+struct dk_allmap {
+ struct dk_map dka_map[NDKMAP];
+};
+
+#if defined(_SYSCALL32)
+struct dk_allmap32 {
+ struct dk_map32 dka_map[NDKMAP];
+};
+#endif /* _SYSCALL32 */
+
+/*
+ * Definition of a disk's geometry
+ */
+struct dk_geom {
+ unsigned short dkg_ncyl; /* # of data cylinders */
+ unsigned short dkg_acyl; /* # of alternate cylinders */
+ unsigned short dkg_bcyl; /* cyl offset (for fixed head area) */
+ unsigned short dkg_nhead; /* # of heads */
+ unsigned short dkg_obs1; /* obsolete */
+ unsigned short dkg_nsect; /* # of data sectors per track */
+ unsigned short dkg_intrlv; /* interleave factor */
+ unsigned short dkg_obs2; /* obsolete */
+ unsigned short dkg_obs3; /* obsolete */
+ unsigned short dkg_apc; /* alternates per cyl (SCSI only) */
+ unsigned short dkg_rpm; /* revolutions per minute */
+ unsigned short dkg_pcyl; /* # of physical cylinders */
+ unsigned short dkg_write_reinstruct; /* # sectors to skip, writes */
+ unsigned short dkg_read_reinstruct; /* # sectors to skip, reads */
+ unsigned short dkg_extra[7]; /* for compatible expansion */
+};
+
+/*
+ * These defines are for historic compatibility with old drivers.
+ */
+#define dkg_bhead dkg_obs1 /* used to be head offset */
+#define dkg_gap1 dkg_obs2 /* used to be gap1 */
+#define dkg_gap2 dkg_obs3 /* used to be gap2 */
+
+/*
+ * Disk io control commands
+ * Warning: some other ioctls with the DIOC prefix exist elsewhere.
+ * The Generic DKIOC numbers are from 0 - 50.
+ * The Floppy Driver uses 51 - 100.
+ * The Hard Disk (except SCSI) 101 - 106. (these are obsolete)
+ * The CDROM Driver 151 - 200.
+ * The USCSI ioctl 201 - 250.
+ */
+#define DKIOC (0x04 << 8)
+
+/*
+ * The following ioctls are generic in nature and need to be
+ * suported as appropriate by all disk drivers
+ */
+#define DKIOCGGEOM (DKIOC|1) /* Get geometry */
+#define DKIOCINFO (DKIOC|3) /* Get info */
+#define DKIOCEJECT (DKIOC|6) /* Generic 'eject' */
+#define DKIOCGVTOC (DKIOC|11) /* Get VTOC */
+#define DKIOCSVTOC (DKIOC|12) /* Set VTOC & Write to Disk */
+
+/*
+ * Disk Cache Controls. These ioctls should be supported by
+ * all disk drivers.
+ *
+ * DKIOCFLUSHWRITECACHE when used from user-mode ignores the ioctl
+ * argument, but it should be passed as NULL to allow for future
+ * reinterpretation. From user-mode, this ioctl request is synchronous.
+ *
+ * When invoked from within the kernel, the arg can be NULL to indicate
+ * a synchronous request or can be the address of a struct dk_callback
+ * to request an asynchronous callback when the flush request is complete.
+ * In this case, the flag to the ioctl must include FKIOCTL and the
+ * dkc_callback field of the pointed to struct must be non-null or the
+ * request is made synchronously.
+ *
+ * In the callback case: if the ioctl returns 0, a callback WILL be performed.
+ * If the ioctl returns non-zero, a callback will NOT be performed.
+ * NOTE: In some cases, the callback may be done BEFORE the ioctl call
+ * returns. The caller's locking strategy should be prepared for this case.
+ */
+#define DKIOCFLUSHWRITECACHE (DKIOC|34) /* flush cache to phys medium */
+
+struct dk_callback {
+ void (*dkc_callback)(void *dkc_cookie, int error);
+ void *dkc_cookie;
+ int dkc_flag;
+};
+
+/* bit flag definitions for dkc_flag */
+#define FLUSH_VOLATILE 0x1 /* Bit 0: if set, only flush */
+ /* volatile cache; otherwise, flush */
+ /* volatile and non-volatile cache */
+
+#define DKIOCGETWCE (DKIOC|36) /* Get current write cache */
+ /* enablement status */
+#define DKIOCSETWCE (DKIOC|37) /* Enable/Disable write cache */
+
+/*
+ * The following ioctls are used by Sun drivers to communicate
+ * with their associated format routines. Support of these ioctls
+ * is not required of foreign drivers
+ */
+#define DKIOCSGEOM (DKIOC|2) /* Set geometry */
+#define DKIOCSAPART (DKIOC|4) /* Set all partitions */
+#define DKIOCGAPART (DKIOC|5) /* Get all partitions */
+#define DKIOCG_PHYGEOM (DKIOC|32) /* get physical geometry */
+#define DKIOCG_VIRTGEOM (DKIOC|33) /* get virtual geometry */
+
+/*
+ * The following ioctl's are removable media support
+ */
+#define DKIOCLOCK (DKIOC|7) /* Generic 'lock' */
+#define DKIOCUNLOCK (DKIOC|8) /* Generic 'unlock' */
+#define DKIOCSTATE (DKIOC|13) /* Inquire insert/eject state */
+#define DKIOCREMOVABLE (DKIOC|16) /* is media removable */
+
+
+/*
+ * ioctl for hotpluggable devices
+ */
+#define DKIOCHOTPLUGGABLE (DKIOC|35) /* is hotpluggable */
+
+/*
+ * Ioctl to force driver to re-read the alternate partition and rebuild
+ * the internal defect map.
+ */
+#define DKIOCADDBAD (DKIOC|20) /* Re-read the alternate map (IDE) */
+#define DKIOCGETDEF (DKIOC|21) /* read defect list (IDE) */
+
+/*
+ * Used by applications to get disk defect information from IDE
+ * drives.
+ */
+#ifdef _SYSCALL32
+struct defect_header32 {
+ int head;
+ caddr32_t buffer;
+};
+#endif /* _SYSCALL32 */
+
+struct defect_header {
+ int head;
+ caddr_t buffer;
+};
+
+#define DKIOCPARTINFO (DKIOC|22) /* Get partition or slice parameters */
+
+/*
+ * Used by applications to get partition or slice information
+ */
+#ifdef _SYSCALL32
+struct part_info32 {
+ daddr32_t p_start;
+ int p_length;
+};
+#endif /* _SYSCALL32 */
+
+struct part_info {
+ daddr_t p_start;
+ int p_length;
+};
+
+/* The following ioctls are for Optical Memory Device */
+#define DKIOC_EBP_ENABLE (DKIOC|40) /* enable by pass erase on write */
+#define DKIOC_EBP_DISABLE (DKIOC|41) /* disable by pass erase on write */
+
+/*
+ * This state enum is the argument passed to the DKIOCSTATE ioctl.
+ */
+enum dkio_state { DKIO_NONE, DKIO_EJECTED, DKIO_INSERTED, DKIO_DEV_GONE };
+
+#define DKIOCGMEDIAINFO (DKIOC|42) /* get information about the media */
+
+/*
+ * ioctls to read/write mboot info.
+ */
+#define DKIOCGMBOOT (DKIOC|43) /* get mboot info */
+#define DKIOCSMBOOT (DKIOC|44) /* set mboot info */
+
+/*
+ * ioctl to get the device temperature.
+ */
+#define DKIOCGTEMPERATURE (DKIOC|45) /* get temperature */
+
+/*
+ * Used for providing the temperature.
+ */
+
+struct dk_temperature {
+ uint_t dkt_flags; /* Flags */
+ short dkt_cur_temp; /* Current disk temperature */
+ short dkt_ref_temp; /* reference disk temperature */
+};
+
+#define DKT_BYPASS_PM 0x1
+#define DKT_INVALID_TEMP 0xFFFF
+
+
+/*
+ * Used for Media info or the current profile info
+ */
+struct dk_minfo {
+ uint_t dki_media_type; /* Media type or profile info */
+ uint_t dki_lbsize; /* Logical blocksize of media */
+ diskaddr_t dki_capacity; /* Capacity as # of dki_lbsize blks */
+};
+
+/*
+ * Media types or profiles known
+ */
+#define DK_UNKNOWN 0x00 /* Media inserted - type unknown */
+
+
+/*
+ * SFF 8090 Specification Version 3, media types 0x01 - 0xfffe are retained to
+ * maintain compatibility with SFF8090. The following define the
+ * optical media type.
+ */
+#define DK_REMOVABLE_DISK 0x02 /* Removable Disk */
+#define DK_MO_ERASABLE 0x03 /* MO Erasable */
+#define DK_MO_WRITEONCE 0x04 /* MO Write once */
+#define DK_AS_MO 0x05 /* AS MO */
+#define DK_CDROM 0x08 /* CDROM */
+#define DK_CDR 0x09 /* CD-R */
+#define DK_CDRW 0x0A /* CD-RW */
+#define DK_DVDROM 0x10 /* DVD-ROM */
+#define DK_DVDR 0x11 /* DVD-R */
+#define DK_DVDRAM 0x12 /* DVD_RAM or DVD-RW */
+
+/*
+ * Media types for other rewritable magnetic media
+ */
+#define DK_FIXED_DISK 0x10001 /* Fixed disk SCSI or otherwise */
+#define DK_FLOPPY 0x10002 /* Floppy media */
+#define DK_ZIP 0x10003 /* IOMEGA ZIP media */
+#define DK_JAZ 0x10004 /* IOMEGA JAZ media */
+
+#define DKIOCSETEFI (DKIOC|17) /* Set EFI info */
+#define DKIOCGETEFI (DKIOC|18) /* Get EFI info */
+
+#define DKIOCPARTITION (DKIOC|9) /* Get partition info */
+
+/*
+ * Ioctls to get/set volume capabilities related to Logical Volume Managers.
+ * They include the ability to get/set capabilities and to issue a read to a
+ * specific underlying device of a replicated device.
+ */
+
+#define DKIOCGETVOLCAP (DKIOC | 25) /* Get volume capabilities */
+#define DKIOCSETVOLCAP (DKIOC | 26) /* Set volume capabilities */
+#define DKIOCDMR (DKIOC | 27) /* Issue a directed read */
+
+typedef uint_t volcapinfo_t;
+
+typedef uint_t volcapset_t;
+
+#define DKV_ABR_CAP 0x00000001 /* Support Appl.Based Recovery */
+#define DKV_DMR_CAP 0x00000002 /* Support Directed Mirror Read */
+
+typedef struct volcap {
+ volcapinfo_t vc_info; /* Capabilities available */
+ volcapset_t vc_set; /* Capabilities set */
+} volcap_t;
+
+#define VOL_SIDENAME 256
+
+typedef struct vol_directed_rd {
+ int vdr_flags;
+ offset_t vdr_offset;
+ size_t vdr_nbytes;
+ size_t vdr_bytesread;
+ void *vdr_data;
+ int vdr_side;
+ char vdr_side_name[VOL_SIDENAME];
+} vol_directed_rd_t;
+
+#define DKV_SIDE_INIT (-1)
+#define DKV_DMR_NEXT_SIDE 0x00000001
+#define DKV_DMR_DONE 0x00000002
+#define DKV_DMR_ERROR 0x00000004
+#define DKV_DMR_SUCCESS 0x00000008
+#define DKV_DMR_SHORT 0x00000010
+
+#ifdef _MULTI_DATAMODEL
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+typedef struct vol_directed_rd32 {
+ int32_t vdr_flags;
+ offset_t vdr_offset; /* 64-bit element on 32-bit alignment */
+ size32_t vdr_nbytes;
+ size32_t vdr_bytesread;
+ caddr32_t vdr_data;
+ int32_t vdr_side;
+ char vdr_side_name[VOL_SIDENAME];
+} vol_directed_rd32_t;
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+#endif /* _MULTI_DATAMODEL */
+
+/*
+ * The ioctl is used to fetch disk's device type, vendor ID,
+ * model number/product ID, firmware revision and serial number together.
+ *
+ * Currently there are two device types - DKD_ATA_TYPE which means the
+ * disk is driven by cmdk/ata or dad/uata driver, and DKD_SCSI_TYPE
+ * which means the disk is driven by sd/scsi hba driver.
+ */
+#define DKIOC_GETDISKID (DKIOC|46)
+
+/* These two labels are for dkd_dtype of dk_disk_id_t */
+#define DKD_ATA_TYPE 0x01 /* ATA disk or legacy mode SATA disk */
+#define DKD_SCSI_TYPE 0x02 /* SCSI disk or native mode SATA disk */
+
+#define DKD_ATA_MODEL 40 /* model number length */
+#define DKD_ATA_FWVER 8 /* firmware revision length */
+#define DKD_ATA_SERIAL 20 /* serial number length */
+
+#define DKD_SCSI_VENDOR 8 /* vendor ID length */
+#define DKD_SCSI_PRODUCT 16 /* product ID length */
+#define DKD_SCSI_REVLEVEL 4 /* revision level length */
+#define DKD_SCSI_SERIAL 12 /* serial number length */
+
+/*
+ * The argument type for DKIOC_GETDISKID ioctl.
+ */
+typedef struct dk_disk_id {
+ uint_t dkd_dtype;
+ union {
+ struct {
+ char dkd_amodel[DKD_ATA_MODEL]; /* 40 bytes */
+ char dkd_afwver[DKD_ATA_FWVER]; /* 8 bytes */
+ char dkd_aserial[DKD_ATA_SERIAL]; /* 20 bytes */
+ } ata_disk_id;
+ struct {
+ char dkd_svendor[DKD_SCSI_VENDOR]; /* 8 bytes */
+ char dkd_sproduct[DKD_SCSI_PRODUCT]; /* 16 bytes */
+ char dkd_sfwver[DKD_SCSI_REVLEVEL]; /* 4 bytes */
+ char dkd_sserial[DKD_SCSI_SERIAL]; /* 12 bytes */
+ } scsi_disk_id;
+ } disk_id;
+} dk_disk_id_t;
+
+/*
+ * The ioctl is used to update the firmware of device.
+ */
+#define DKIOC_UPDATEFW (DKIOC|47)
+
+/* The argument type for DKIOC_UPDATEFW ioctl */
+typedef struct dk_updatefw {
+ caddr_t dku_ptrbuf; /* pointer to firmware buf */
+ uint_t dku_size; /* firmware buf length */
+ uint8_t dku_type; /* firmware update type */
+} dk_updatefw_t;
+
+#ifdef _SYSCALL32
+typedef struct dk_updatefw_32 {
+ caddr32_t dku_ptrbuf; /* pointer to firmware buf */
+ uint_t dku_size; /* firmware buf length */
+ uint8_t dku_type; /* firmware update type */
+} dk_updatefw_32_t;
+#endif /* _SYSCALL32 */
+
+/*
+ * firmware update type - temporary or permanent use
+ */
+#define FW_TYPE_TEMP 0x0 /* temporary use */
+#define FW_TYPE_PERM 0x1 /* permanent use */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DKIO_H */
diff --git a/lib/libspl/include/sys/dklabel.h b/lib/libspl/include/sys/dklabel.h
new file mode 100644
index 000000000..77d5da10e
--- /dev/null
+++ b/lib/libspl/include/sys/dklabel.h
@@ -0,0 +1,268 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1990-2002 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_DKLABEL_H
+#define _SYS_DKLABEL_H
+
+
+
+#include <sys/isa_defs.h>
+#include <sys/types32.h>
+#include <sys/isa_defs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Miscellaneous defines
+ */
+#define DKL_MAGIC 0xDABE /* magic number */
+#define FKL_MAGIC 0xff /* magic number for DOS floppies */
+
+#if defined(_SUNOS_VTOC_16)
+#define NDKMAP 16 /* # of logical partitions */
+#define DK_LABEL_LOC 1 /* location of disk label */
+#elif defined(_SUNOS_VTOC_8)
+#define NDKMAP 8 /* # of logical partitions */
+#define DK_LABEL_LOC 0 /* location of disk label */
+#else
+#error "No VTOC format defined."
+#endif
+
+#define LEN_DKL_ASCII 128 /* length of dkl_asciilabel */
+#define LEN_DKL_VVOL 8 /* length of v_volume */
+#define DK_LABEL_SIZE 512 /* size of disk label */
+#define DK_MAX_BLOCKS 0x7fffffff /* max # of blocks handled */
+
+/*
+ * Reserve two cylinders on SCSI disks.
+ * One is for the backup disk label and the other is for the deviceid.
+ *
+ * IPI disks only reserve one cylinder, but they will go away soon.
+ * CDROMs do not reserve any cylinders.
+ */
+#define DK_ACYL 2
+
+/*
+ * Format of a Sun disk label.
+ * Resides in cylinder 0, head 0, sector 0.
+ *
+ * sizeof (struct dk_label) should be 512 (the current sector size),
+ * but should the sector size increase, this structure should remain
+ * at the beginning of the sector.
+ */
+
+/*
+ * partition headers: section 1
+ * Returned in struct dk_allmap by ioctl DKIOC[SG]APART (dkio(7I))
+ */
+struct dk_map {
+ daddr_t dkl_cylno; /* starting cylinder */
+ daddr_t dkl_nblk; /* number of blocks; if == 0, */
+ /* partition is undefined */
+};
+
+/*
+ * partition headers: section 1
+ * Fixed size for on-disk dk_label
+ */
+struct dk_map32 {
+ daddr32_t dkl_cylno; /* starting cylinder */
+ daddr32_t dkl_nblk; /* number of blocks; if == 0, */
+ /* partition is undefined */
+};
+
+/*
+ * partition headers: section 2,
+ * brought over from AT&T SVr4 vtoc structure.
+ */
+struct dk_map2 {
+ uint16_t p_tag; /* ID tag of partition */
+ uint16_t p_flag; /* permission flag */
+};
+
+struct dkl_partition {
+ uint16_t p_tag; /* ID tag of partition */
+ uint16_t p_flag; /* permision flags */
+ daddr32_t p_start; /* start sector no of partition */
+ int32_t p_size; /* # of blocks in partition */
+};
+
+
+/*
+ * VTOC inclusions from AT&T SVr4
+ * Fixed sized types for on-disk VTOC
+ */
+
+struct dk_vtoc {
+#if defined(_SUNOS_VTOC_16)
+ uint32_t v_bootinfo[3]; /* info for mboot (unsupported) */
+ uint32_t v_sanity; /* to verify vtoc sanity */
+ uint32_t v_version; /* layout version */
+ char v_volume[LEN_DKL_VVOL]; /* volume name */
+ uint16_t v_sectorsz; /* sector size in bytes */
+ uint16_t v_nparts; /* number of partitions */
+ uint32_t v_reserved[10]; /* free space */
+ struct dkl_partition v_part[NDKMAP]; /* partition headers */
+ time32_t timestamp[NDKMAP]; /* partition timestamp (unsupported) */
+ char v_asciilabel[LEN_DKL_ASCII]; /* for compatibility */
+#elif defined(_SUNOS_VTOC_8)
+ uint32_t v_version; /* layout version */
+ char v_volume[LEN_DKL_VVOL]; /* volume name */
+ uint16_t v_nparts; /* number of partitions */
+ struct dk_map2 v_part[NDKMAP]; /* partition hdrs, sec 2 */
+ uint32_t v_bootinfo[3]; /* info needed by mboot */
+ uint32_t v_sanity; /* to verify vtoc sanity */
+ uint32_t v_reserved[10]; /* free space */
+ time32_t v_timestamp[NDKMAP]; /* partition timestamp */
+#else
+#error "No VTOC format defined."
+#endif
+};
+
+/*
+ * define the amount of disk label padding needed to make
+ * the entire structure occupy 512 bytes.
+ */
+#if defined(_SUNOS_VTOC_16)
+#define LEN_DKL_PAD (DK_LABEL_SIZE - \
+ ((sizeof (struct dk_vtoc) + \
+ (4 * sizeof (uint32_t)) + \
+ (12 * sizeof (uint16_t)) + \
+ (2 * (sizeof (uint16_t))))))
+#elif defined(_SUNOS_VTOC_8)
+#define LEN_DKL_PAD (DK_LABEL_SIZE \
+ - ((LEN_DKL_ASCII) + \
+ (sizeof (struct dk_vtoc)) + \
+ (sizeof (struct dk_map32) * NDKMAP) + \
+ (14 * (sizeof (uint16_t))) + \
+ (2 * (sizeof (uint16_t)))))
+#else
+#error "No VTOC format defined."
+#endif
+
+
+struct dk_label {
+#if defined(_SUNOS_VTOC_16)
+ struct dk_vtoc dkl_vtoc; /* vtoc inclusions from AT&T SVr4 */
+ uint32_t dkl_pcyl; /* # of physical cylinders */
+ uint32_t dkl_ncyl; /* # of data cylinders */
+ uint16_t dkl_acyl; /* # of alternate cylinders */
+ uint16_t dkl_bcyl; /* cyl offset (for fixed head area) */
+ uint32_t dkl_nhead; /* # of heads */
+ uint32_t dkl_nsect; /* # of data sectors per track */
+ uint16_t dkl_intrlv; /* interleave factor */
+ uint16_t dkl_skew; /* skew factor */
+ uint16_t dkl_apc; /* alternates per cyl (SCSI only) */
+ uint16_t dkl_rpm; /* revolutions per minute */
+ uint16_t dkl_write_reinstruct; /* # sectors to skip, writes */
+ uint16_t dkl_read_reinstruct; /* # sectors to skip, reads */
+ uint16_t dkl_extra[4]; /* for compatible expansion */
+ char dkl_pad[LEN_DKL_PAD]; /* unused part of 512 bytes */
+#elif defined(_SUNOS_VTOC_8)
+ char dkl_asciilabel[LEN_DKL_ASCII]; /* for compatibility */
+ struct dk_vtoc dkl_vtoc; /* vtoc inclusions from AT&T SVr4 */
+ uint16_t dkl_write_reinstruct; /* # sectors to skip, writes */
+ uint16_t dkl_read_reinstruct; /* # sectors to skip, reads */
+ char dkl_pad[LEN_DKL_PAD]; /* unused part of 512 bytes */
+ uint16_t dkl_rpm; /* rotations per minute */
+ uint16_t dkl_pcyl; /* # physical cylinders */
+ uint16_t dkl_apc; /* alternates per cylinder */
+ uint16_t dkl_obs1; /* obsolete */
+ uint16_t dkl_obs2; /* obsolete */
+ uint16_t dkl_intrlv; /* interleave factor */
+ uint16_t dkl_ncyl; /* # of data cylinders */
+ uint16_t dkl_acyl; /* # of alternate cylinders */
+ uint16_t dkl_nhead; /* # of heads in this partition */
+ uint16_t dkl_nsect; /* # of 512 byte sectors per track */
+ uint16_t dkl_obs3; /* obsolete */
+ uint16_t dkl_obs4; /* obsolete */
+ struct dk_map32 dkl_map[NDKMAP]; /* logical partition headers */
+#else
+#error "No VTOC format defined."
+#endif
+ uint16_t dkl_magic; /* identifies this label format */
+ uint16_t dkl_cksum; /* xor checksum of sector */
+};
+
+#if defined(_SUNOS_VTOC_16)
+#define dkl_asciilabel dkl_vtoc.v_asciilabel
+#define v_timestamp timestamp
+
+#elif defined(_SUNOS_VTOC_8)
+
+/*
+ * These defines are for historic compatibility with old drivers.
+ */
+#define dkl_gap1 dkl_obs1 /* used to be gap1 */
+#define dkl_gap2 dkl_obs2 /* used to be gap2 */
+#define dkl_bhead dkl_obs3 /* used to be label head offset */
+#define dkl_ppart dkl_obs4 /* used to by physical partition */
+#else
+#error "No VTOC format defined."
+#endif
+
+struct fk_label { /* DOS floppy label */
+ uchar_t fkl_type;
+ uchar_t fkl_magich;
+ uchar_t fkl_magicl;
+ uchar_t filler;
+};
+
+/*
+ * Layout of stored fabricated device id (on-disk)
+ */
+#define DK_DEVID_BLKSIZE (512)
+#define DK_DEVID_SIZE (DK_DEVID_BLKSIZE - ((sizeof (uchar_t) * 7)))
+#define DK_DEVID_REV_MSB (0)
+#define DK_DEVID_REV_LSB (1)
+
+struct dk_devid {
+ uchar_t dkd_rev_hi; /* revision (MSB) */
+ uchar_t dkd_rev_lo; /* revision (LSB) */
+ uchar_t dkd_flags; /* flags (not used yet) */
+ uchar_t dkd_devid[DK_DEVID_SIZE]; /* devid stored here */
+ uchar_t dkd_checksum3; /* checksum (MSB) */
+ uchar_t dkd_checksum2;
+ uchar_t dkd_checksum1;
+ uchar_t dkd_checksum0; /* checksum (LSB) */
+};
+
+#define DKD_GETCHKSUM(dkd) ((dkd)->dkd_checksum3 << 24) + \
+ ((dkd)->dkd_checksum2 << 16) + \
+ ((dkd)->dkd_checksum1 << 8) + \
+ ((dkd)->dkd_checksum0)
+
+#define DKD_FORMCHKSUM(c, dkd) (dkd)->dkd_checksum3 = hibyte(hiword((c))); \
+ (dkd)->dkd_checksum2 = lobyte(hiword((c))); \
+ (dkd)->dkd_checksum1 = hibyte(loword((c))); \
+ (dkd)->dkd_checksum0 = lobyte(loword((c)));
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DKLABEL_H */
diff --git a/lib/libspl/include/sys/dktp/fdisk.h b/lib/libspl/include/sys/dktp/fdisk.h
new file mode 100644
index 000000000..e90135f36
--- /dev/null
+++ b/lib/libspl/include/sys/dktp/fdisk.h
@@ -0,0 +1,173 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/* Copyright (c) 1984, 1986, 1987, 1988 AT&T */
+/* All Rights Reserved */
+
+
+#ifndef _SYS_DKTP_FDISK_H
+#define _SYS_DKTP_FDISK_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * fdisk.h
+ * This file defines the structure of physical disk sector 0 for use on
+ * AT386 systems. The format of this sector is constrained by the ROM
+ * BIOS and MS-DOS conventions.
+ * Note that this block does not define the partitions used by the unix
+ * driver. The unix partitions are obtained from the VTOC.
+ */
+
+/*
+ * the MAX values are the maximum usable values for BIOS chs values
+ * The MAX_CYL value of 1022 is the maximum usable value
+ * the value of 1023 is a fence value,
+ * indicating no CHS geometry exists for the corresponding LBA value.
+ * HEAD range [ 0 .. MAX_HEAD ], so number of heads is (MAX_HEAD + 1)
+ * SECT range [ 1 .. MAX_SECT ], so number of sectors is (MAX_SECT)
+ */
+#define MAX_SECT (63)
+#define MAX_CYL (1022)
+#define MAX_HEAD (254)
+
+/*
+ * BOOTSZ was reduced from 446 to 440 bytes to NOT overwrite the Windows
+ * Vista DISKID. Otherwise Vista won't boot from Solaris GRUB in a dual-boot
+ * setup.
+ * The actual size of mboot code is 425 bytes while that of GRUB stage1 is
+ * 423 bytes. So this changes does not harm them.
+ */
+#define BOOTSZ 440 /* size of boot code in master boot block */
+#define FD_NUMPART 4 /* number of 'partitions' in fdisk table */
+#define MBB_MAGIC 0xAA55 /* magic number for mboot.signature */
+#define DEFAULT_INTLV 4 /* default interleave for testing tracks */
+#define MINPSIZE 4 /* minimum number of cylinders in a partition */
+#define TSTPAT 0xE5 /* test pattern for verifying disk */
+
+/*
+ * structure to hold the fdisk partition table
+ */
+struct ipart {
+ unsigned char bootid; /* bootable or not */
+ unsigned char beghead; /* beginning head, sector, cylinder */
+ unsigned char begsect; /* begcyl is a 10-bit number. High 2 bits */
+ unsigned char begcyl; /* are in begsect. */
+ unsigned char systid; /* OS type */
+ unsigned char endhead; /* ending head, sector, cylinder */
+ unsigned char endsect; /* endcyl is a 10-bit number. High 2 bits */
+ unsigned char endcyl; /* are in endsect. */
+ uint32_t relsect; /* first sector relative to start of disk */
+ uint32_t numsect; /* number of sectors in partition */
+};
+/*
+ * Values for bootid.
+ */
+#define NOTACTIVE 0
+#define ACTIVE 128
+/*
+ * Values for systid.
+ */
+#define UNUSED 0 /* Empty Partition */
+#define DOSOS12 1 /* DOS partition, 12-bit FAT */
+#define PCIXOS 2 /* PC/IX partition */
+#define DOSOS16 4 /* DOS partition, 16-bit FAT */
+#define EXTDOS 5 /* EXT-DOS partition */
+#define DOSHUGE 6 /* Huge DOS partition > 32MB */
+#define FDISK_IFS 7 /* Installable File System (IFS): HPFS & NTFS */
+#define FDISK_AIXBOOT 8 /* AIX Boot */
+#define FDISK_AIXDATA 9 /* AIX Data */
+#define FDISK_OS2BOOT 10 /* OS/2 Boot Manager */
+#define FDISK_WINDOWS 11 /* Windows 95 FAT32 (up to 2047GB) */
+#define FDISK_EXT_WIN 12 /* Windows 95 FAT32 (extended-INT13) */
+#define FDISK_FAT95 14 /* DOS 16-bit FAT, LBA-mapped */
+#define FDISK_EXTLBA 15 /* Extended partition, LBA-mapped */
+#define DIAGPART 18 /* Diagnostic boot partition (OS independent) */
+#define FDISK_LINUX 65 /* Linux */
+#define FDISK_LINUXDSWAP 66 /* Linux swap (sharing disk w/ DRDOS) */
+#define FDISK_LINUXDNAT 67 /* Linux native (sharing disk with DRDOS) */
+#define FDISK_CPM 82 /* CP/M */
+#define DOSDATA 86 /* DOS data partition */
+#define OTHEROS 98 /* part. type for appl. (DB?) needs */
+ /* raw partition. ID was 0 but conflicted */
+ /* with DOS 3.3 fdisk */
+#define UNIXOS 99 /* UNIX V.x partition */
+#define FDISK_NOVELL2 100 /* Novell Netware 286 */
+#define FDISK_NOVELL3 101 /* Novell Netware 3.x and later */
+#define FDISK_QNX4 119 /* QNX 4.x */
+#define FDISK_QNX42 120 /* QNX 4.x 2nd part */
+#define FDISK_QNX43 121 /* QNX 4.x 3rd part */
+#define SUNIXOS 130 /* Solaris UNIX partition */
+#define FDISK_LINUXNAT 131 /* Linux native */
+#define FDISK_NTFSVOL1 134 /* NTFS volume set 1 */
+#define FDISK_NTFSVOL2 135 /* NTFS volume set 2 */
+#define FDISK_BSD 165 /* BSD/386, 386BSD, NetBSD, FreeBSD, OpenBSD */
+#define FDISK_NEXTSTEP 167 /* NeXTSTEP */
+#define FDISK_BSDIFS 183 /* BSDI file system */
+#define FDISK_BSDISWAP 184 /* BSDI swap */
+#define X86BOOT 190 /* x86 Solaris boot partition */
+#define SUNIXOS2 191 /* Solaris UNIX partition */
+#define EFI_PMBR 238 /* EFI PMBR */
+#define EFI_FS 239 /* EFI File System (System Partition) */
+#define MAXDOS 65535L /* max size (sectors) for DOS partition */
+
+/*
+ * structure to hold master boot block in physical sector 0 of the disk.
+ * Note that partitions stuff can't be directly included in the structure
+ * because of lameo '386 compiler alignment design.
+ * Alignment issues also force us to have 2 16bit entities for a single
+ * 32bit win_volserno. It is not used anywhere anyway.
+ */
+
+struct mboot { /* master boot block */
+ char bootinst[BOOTSZ];
+ uint16_t win_volserno_lo;
+ uint16_t win_volserno_hi;
+ uint16_t reserved;
+ char parts[FD_NUMPART * sizeof (struct ipart)];
+ ushort_t signature;
+};
+
+#if defined(__i386) || defined(__amd64)
+
+/* Byte offset of the start of the partition table within the sector */
+#define FDISK_PART_TABLE_START 446
+
+/* Maximum number of valid partitions assumed as 32 */
+#define MAX_EXT_PARTS 32
+
+#else
+
+#define MAX_EXT_PARTS 0
+
+#endif /* if defined(__i386) || defined(__amd64) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DKTP_FDISK_H */
diff --git a/lib/libspl/include/sys/feature_tests.h b/lib/libspl/include/sys/feature_tests.h
new file mode 100644
index 000000000..96f627172
--- /dev/null
+++ b/lib/libspl/include/sys/feature_tests.h
@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_FEATURE_TESTS_H
+#define _SYS_FEATURE_TESTS_H
+
+#define __NORETURN __attribute__((__noreturn__))
+
+#endif
diff --git a/lib/libspl/include/sys/file.h b/lib/libspl/include/sys/file.h
new file mode 100644
index 000000000..9aaba35be
--- /dev/null
+++ b/lib/libspl/include/sys/file.h
@@ -0,0 +1,50 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_FILE_H
+#define _LIBSPL_SYS_FILE_H
+
+#include_next <sys/file.h>
+
+#include <sys/user.h>
+
+#define FREAD 1
+#define FWRITE 2
+//#define FAPPEND 8
+
+#define FCREAT O_CREAT
+#define FTRUNC O_TRUNC
+#define FOFFMAX O_LARGEFILE
+#define FSYNC O_SYNC
+#define FDSYNC O_DSYNC
+#define FRSYNC O_RSYNC
+#define FEXCL O_EXCL
+
+#define FNODSYNC 0x10000 /* fsync pseudo flag */
+#define FNOFOLLOW 0x20000 /* don't follow symlinks */
+#define FIGNORECASE 0x80000 /* request case-insensitive lookups */
+
+#endif
diff --git a/lib/libspl/include/sys/fm/protocol.h b/lib/libspl/include/sys/fm/protocol.h
new file mode 100644
index 000000000..4e146d8b9
--- /dev/null
+++ b/lib/libspl/include/sys/fm/protocol.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_FM_PROTOCOL_H
+#define _LIBSPL_SYS_FM_PROTOCOL_H
+
+#endif
diff --git a/lib/libspl/include/sys/fm/util.h b/lib/libspl/include/sys/fm/util.h
new file mode 100644
index 000000000..564d0b5b5
--- /dev/null
+++ b/lib/libspl/include/sys/fm/util.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_FM_UTIL_H
+#define _LIBSPL_SYS_FM_UTIL_H
+
+#endif
diff --git a/lib/libspl/include/sys/frame.h b/lib/libspl/include/sys/frame.h
new file mode 100644
index 000000000..f936ab825
--- /dev/null
+++ b/lib/libspl/include/sys/frame.h
@@ -0,0 +1,131 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_FRAME_H
+#define _SYS_FRAME_H
+
+#include <sys/types.h>
+
+#if defined(_LP64) || defined(_I32LPx)
+typedef long greg_t;
+#else
+typedef int greg_t;
+#endif
+
+struct frame {
+ greg_t fr_savfp; /* saved frame pointer */
+ greg_t fr_savpc; /* saved program counter */
+};
+
+
+/*
+ * In the x86 world, a stack frame looks like this:
+ *
+ * |--------------------------|
+ * 4n+8(%ebp) ->| argument word n |
+ * | ... | (Previous frame)
+ * 8(%ebp) ->| argument word 0 |
+ * |--------------------------|--------------------
+ * 4(%ebp) ->| return address |
+ * |--------------------------|
+ * 0(%ebp) ->| previous %ebp (optional) |
+ * |--------------------------|
+ * -4(%ebp) ->| unspecified | (Current frame)
+ * | ... |
+ * 0(%esp) ->| variable size |
+ * |--------------------------|
+ */
+
+/*
+ * Stack alignment macros.
+ */
+
+#define STACK_ALIGN32 4
+#define STACK_ENTRY_ALIGN32 4
+#define STACK_BIAS32 0
+#define SA32(x) (((x)+(STACK_ALIGN32-1)) & ~(STACK_ALIGN32-1))
+#define STACK_RESERVE32 0
+#define MINFRAME32 0
+
+#if defined(__amd64)
+
+/*
+ * In the amd64 world, a stack frame looks like this:
+ *
+ * |--------------------------|
+ * 8n+16(%rbp)->| argument word n |
+ * | ... | (Previous frame)
+ * 16(%rbp) ->| argument word 0 |
+ * |--------------------------|--------------------
+ * 8(%rbp) ->| return address |
+ * |--------------------------|
+ * 0(%rbp) ->| previous %rbp |
+ * |--------------------------|
+ * -8(%rbp) ->| unspecified | (Current frame)
+ * | ... |
+ * 0(%rsp) ->| variable size |
+ * |--------------------------|
+ * -128(%rsp) ->| reserved for function |
+ * |--------------------------|
+ *
+ * The end of the input argument area must be aligned on a 16-byte
+ * boundary; i.e. (%rsp - 8) % 16 == 0 at function entry.
+ *
+ * The 128-byte location beyond %rsp is considered to be reserved for
+ * functions and is NOT modified by signal handlers. It can be used
+ * to store temporary data that is not needed across function calls.
+ */
+
+/*
+ * Stack alignment macros.
+ */
+
+#define STACK_ALIGN64 16
+#define STACK_ENTRY_ALIGN64 8
+#define STACK_BIAS64 0
+#define SA64(x) (((x)+(STACK_ALIGN64-1)) & ~(STACK_ALIGN64-1))
+#define STACK_RESERVE64 128
+#define MINFRAME64 0
+
+#define STACK_ALIGN STACK_ALIGN64
+#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN64
+#define STACK_BIAS STACK_BIAS64
+#define SA(x) SA64(x)
+#define STACK_RESERVE STACK_RESERVE64
+#define MINFRAME MINFRAME64
+
+#elif defined(__i386)
+
+#define STACK_ALIGN STACK_ALIGN32
+#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN32
+#define STACK_BIAS STACK_BIAS32
+#define SA(x) SA32(x)
+#define STACK_RESERVE STACK_RESERVE32
+#define MINFRAME MINFRAME32
+
+#endif /* __i386 */
+
+#endif /* _SYS_FRAME_H */
diff --git a/lib/libspl/include/sys/int_limits.h b/lib/libspl/include/sys/int_limits.h
new file mode 100644
index 000000000..2b50ddd1b
--- /dev/null
+++ b/lib/libspl/include/sys/int_limits.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_INT_LIMITS_H
+#define _LIBSPL_SYS_INT_LIMITS_H
+
+#endif
diff --git a/lib/libspl/include/sys/int_types.h b/lib/libspl/include/sys/int_types.h
new file mode 100644
index 000000000..b32512282
--- /dev/null
+++ b/lib/libspl/include/sys/int_types.h
@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SOL_SYS_INT_TYPES_H
+#define _SOL_SYS_INT_TYPES_H
+
+#include <inttypes.h>
+
+#endif
diff --git a/lib/libspl/include/sys/inttypes.h b/lib/libspl/include/sys/inttypes.h
new file mode 100644
index 000000000..7630f2d4c
--- /dev/null
+++ b/lib/libspl/include/sys/inttypes.h
@@ -0,0 +1,34 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SOL_SYS_INTTYPES_H
+#define _SOL_SYS_INTTYPES_H
+
+#include <inttypes.h>
+
+#define _INT64_TYPE
+
+#endif
diff --git a/lib/libspl/include/sys/isa_defs.h b/lib/libspl/include/sys/isa_defs.h
new file mode 100644
index 000000000..677e4fda0
--- /dev/null
+++ b/lib/libspl/include/sys/isa_defs.h
@@ -0,0 +1,125 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_ISA_DEFS_H
+#define _SYS_ISA_DEFS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* x86_64 arch specific defines */
+#if defined(__x86_64) || defined(__x86_64__)
+
+#if !defined(__x86_64)
+#define __x86_64
+#endif
+
+#if !defined(__amd64)
+#define __amd64
+#endif
+
+#if !defined(__x86)
+#define __x86
+#endif
+
+#if !defined(_LP64)
+#define _LP64
+#endif
+
+#if !defined(_LITTLE_ENDIAN)
+#define _LITTLE_ENDIAN
+#endif
+
+#define _SUNOS_VTOC_16
+
+/* i386 arch specific defines */
+#elif defined(__i386) || defined(__i386__)
+
+#if !defined(__i386)
+#define __i386
+#endif
+
+#if !defined(__x86)
+#define __x86
+#endif
+
+#if !defined(_ILP32)
+#define _ILP32
+#endif
+
+#if !defined(_LITTLE_ENDIAN)
+#define _LITTLE_ENDIAN
+#endif
+
+#define _SUNOS_VTOC_16
+
+/* powerpc arch specific defines */
+#elif defined(__powerpc) || defined(__powerpc__)
+
+#if !defined(__powerpc)
+#define __powerpc
+#endif
+
+#if !defined(__powerpc__)
+#define __powerpc__
+#endif
+
+#if !defined(_LP64)
+#ifdef __powerpc64__
+#define _LP64
+#else
+#define _LP32
+#endif
+#endif
+
+#if !defined(_BIG_ENDIAN)
+#define _BIG_ENDIAN
+#endif
+
+#define _SUNOS_VTOC_16
+
+#else /* Currently only x86_64, i386, and powerpc arches supported */
+#error "Unsupported ISA type"
+#endif
+
+#if defined(_ILP32) && defined(_LP64)
+#error "Both _ILP32 and _LP64 are defined"
+#endif
+
+#if defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)
+#error "Both _LITTLE_ENDIAN and _BIG_ENDIAN are defined"
+#endif
+
+#if !defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
+#error "Neither _LITTLE_ENDIAN nor _BIG_ENDIAN are defined"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_ISA_DEFS_H */
diff --git a/lib/libspl/include/sys/kmem.h b/lib/libspl/include/sys/kmem.h
new file mode 100644
index 000000000..401e04072
--- /dev/null
+++ b/lib/libspl/include/sys/kmem.h
@@ -0,0 +1,45 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_KMEM_H
+#define _SYS_KMEM_H
+
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define KM_SLEEP 0x00000000 /* same as KM_SLEEP */
+#define KM_NOSLEEP 0x00000001 /* same as KM_NOSLEEP */
+
+#define kmem_alloc(size, flags) malloc(size)
+#define kmem_free(ptr, size) free(ptr)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_KMEM_H */
diff --git a/lib/libspl/include/sys/kstat.h b/lib/libspl/include/sys/kstat.h
new file mode 100644
index 000000000..fcd3ed98b
--- /dev/null
+++ b/lib/libspl/include/sys/kstat.h
@@ -0,0 +1,820 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_KSTAT_H
+#define _SYS_KSTAT_H
+
+
+
+/*
+ * Definition of general kernel statistics structures and /dev/kstat ioctls
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int kid_t; /* unique kstat id */
+
+/*
+ * Kernel statistics driver (/dev/kstat) ioctls
+ */
+
+#define KSTAT_IOC_BASE ('K' << 8)
+
+#define KSTAT_IOC_CHAIN_ID KSTAT_IOC_BASE | 0x01
+#define KSTAT_IOC_READ KSTAT_IOC_BASE | 0x02
+#define KSTAT_IOC_WRITE KSTAT_IOC_BASE | 0x03
+
+/*
+ * /dev/kstat ioctl usage (kd denotes /dev/kstat descriptor):
+ *
+ * kcid = ioctl(kd, KSTAT_IOC_CHAIN_ID, NULL);
+ * kcid = ioctl(kd, KSTAT_IOC_READ, kstat_t *);
+ * kcid = ioctl(kd, KSTAT_IOC_WRITE, kstat_t *);
+ */
+
+#define KSTAT_STRLEN 31 /* 30 chars + NULL; must be 16 * n - 1 */
+
+/*
+ * The generic kstat header
+ */
+
+typedef struct kstat {
+ /*
+ * Fields relevant to both kernel and user
+ */
+ hrtime_t ks_crtime; /* creation time (from gethrtime()) */
+ struct kstat *ks_next; /* kstat chain linkage */
+ kid_t ks_kid; /* unique kstat ID */
+ char ks_module[KSTAT_STRLEN]; /* provider module name */
+ uchar_t ks_resv; /* reserved, currently just padding */
+ int ks_instance; /* provider module's instance */
+ char ks_name[KSTAT_STRLEN]; /* kstat name */
+ uchar_t ks_type; /* kstat data type */
+ char ks_class[KSTAT_STRLEN]; /* kstat class */
+ uchar_t ks_flags; /* kstat flags */
+ void *ks_data; /* kstat type-specific data */
+ uint_t ks_ndata; /* # of type-specific data records */
+ size_t ks_data_size; /* total size of kstat data section */
+ hrtime_t ks_snaptime; /* time of last data shapshot */
+ /*
+ * Fields relevant to kernel only
+ */
+ int (*ks_update)(struct kstat *, int); /* dynamic update */
+ void *ks_private; /* arbitrary provider-private data */
+ int (*ks_snapshot)(struct kstat *, void *, int);
+ void *ks_lock; /* protects this kstat's data */
+} kstat_t;
+
+#ifdef _SYSCALL32
+
+typedef int32_t kid32_t;
+
+typedef struct kstat32 {
+ /*
+ * Fields relevant to both kernel and user
+ */
+ hrtime_t ks_crtime;
+ caddr32_t ks_next; /* struct kstat pointer */
+ kid32_t ks_kid;
+ char ks_module[KSTAT_STRLEN];
+ uint8_t ks_resv;
+ int32_t ks_instance;
+ char ks_name[KSTAT_STRLEN];
+ uint8_t ks_type;
+ char ks_class[KSTAT_STRLEN];
+ uint8_t ks_flags;
+ caddr32_t ks_data; /* type-specific data */
+ uint32_t ks_ndata;
+ size32_t ks_data_size;
+ hrtime_t ks_snaptime;
+ /*
+ * Fields relevant to kernel only (only needed here for padding)
+ */
+ int32_t _ks_update;
+ caddr32_t _ks_private;
+ int32_t _ks_snapshot;
+ caddr32_t _ks_lock;
+} kstat32_t;
+
+#endif /* _SYSCALL32 */
+
+/*
+ * kstat structure and locking strategy
+ *
+ * Each kstat consists of a header section (a kstat_t) and a data section.
+ * The system maintains a set of kstats, protected by kstat_chain_lock.
+ * kstat_chain_lock protects all additions to/deletions from this set,
+ * as well as all changes to kstat headers. kstat data sections are
+ * *optionally* protected by the per-kstat ks_lock. If ks_lock is non-NULL,
+ * kstat clients (e.g. /dev/kstat) will acquire this lock for all of their
+ * operations on that kstat. It is up to the kstat provider to decide whether
+ * guaranteeing consistent data to kstat clients is sufficiently important
+ * to justify the locking cost. Note, however, that most statistic updates
+ * already occur under one of the provider's mutexes, so if the provider sets
+ * ks_lock to point to that mutex, then kstat data locking is free.
+ *
+ * NOTE: variable-size kstats MUST employ kstat data locking, to prevent
+ * data-size races with kstat clients.
+ *
+ * NOTE: ks_lock is really of type (kmutex_t *); it is declared as (void *)
+ * in the kstat header so that users don't have to be exposed to all of the
+ * kernel's lock-related data structures.
+ */
+
+#if defined(_KERNEL)
+
+#define KSTAT_ENTER(k) \
+ { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_enter(lp); }
+
+#define KSTAT_EXIT(k) \
+ { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_exit(lp); }
+
+#define KSTAT_UPDATE(k, rw) (*(k)->ks_update)((k), (rw))
+
+#define KSTAT_SNAPSHOT(k, buf, rw) (*(k)->ks_snapshot)((k), (buf), (rw))
+
+#endif /* defined(_KERNEL) */
+
+/*
+ * kstat time
+ *
+ * All times associated with kstats (e.g. creation time, snapshot time,
+ * kstat_timer_t and kstat_io_t timestamps, etc.) are 64-bit nanosecond values,
+ * as returned by gethrtime(). The accuracy of these timestamps is machine
+ * dependent, but the precision (units) is the same across all platforms.
+ */
+
+/*
+ * kstat identity (KID)
+ *
+ * Each kstat is assigned a unique KID (kstat ID) when it is added to the
+ * global kstat chain. The KID is used as a cookie by /dev/kstat to
+ * request information about the corresponding kstat. There is also
+ * an identity associated with the entire kstat chain, kstat_chain_id,
+ * which is bumped each time a kstat is added or deleted. /dev/kstat uses
+ * the chain ID to detect changes in the kstat chain (e.g., a new disk
+ * coming online) between ioctl()s.
+ */
+
+/*
+ * kstat module, kstat instance
+ *
+ * ks_module and ks_instance contain the name and instance of the module
+ * that created the kstat. In cases where there can only be one instance,
+ * ks_instance is 0. The kernel proper (/kernel/unix) uses "unix" as its
+ * module name.
+ */
+
+/*
+ * kstat name
+ *
+ * ks_name gives a meaningful name to a kstat. The full kstat namespace
+ * is module.instance.name, so the name only need be unique within a
+ * module. kstat_create() will fail if you try to create a kstat with
+ * an already-used (ks_module, ks_instance, ks_name) triplet. Spaces are
+ * allowed in kstat names, but strongly discouraged, since they hinder
+ * awk-style processing at user level.
+ */
+
+/*
+ * kstat type
+ *
+ * The kstat mechanism provides several flavors of kstat data, defined
+ * below. The "raw" kstat type is just treated as an array of bytes; you
+ * can use this to export any kind of data you want.
+ *
+ * Some kstat types allow multiple data structures per kstat, e.g.
+ * KSTAT_TYPE_NAMED; others do not. This is part of the spec for each
+ * kstat data type.
+ *
+ * User-level tools should *not* rely on the #define KSTAT_NUM_TYPES. To
+ * get this information, read out the standard system kstat "kstat_types".
+ */
+
+#define KSTAT_TYPE_RAW 0 /* can be anything */
+ /* ks_ndata >= 1 */
+#define KSTAT_TYPE_NAMED 1 /* name/value pair */
+ /* ks_ndata >= 1 */
+#define KSTAT_TYPE_INTR 2 /* interrupt statistics */
+ /* ks_ndata == 1 */
+#define KSTAT_TYPE_IO 3 /* I/O statistics */
+ /* ks_ndata == 1 */
+#define KSTAT_TYPE_TIMER 4 /* event timer */
+ /* ks_ndata >= 1 */
+
+#define KSTAT_NUM_TYPES 5
+
+/*
+ * kstat class
+ *
+ * Each kstat can be characterized as belonging to some broad class
+ * of statistics, e.g. disk, tape, net, vm, streams, etc. This field
+ * can be used as a filter to extract related kstats. The following
+ * values are currently in use: disk, tape, net, controller, vm, kvm,
+ * hat, streams, kstat, and misc. (The kstat class encompasses things
+ * like kstat_types.)
+ */
+
+/*
+ * kstat flags
+ *
+ * Any of the following flags may be passed to kstat_create(). They are
+ * all zero by default.
+ *
+ * KSTAT_FLAG_VIRTUAL:
+ *
+ * Tells kstat_create() not to allocate memory for the
+ * kstat data section; instead, you will set the ks_data
+ * field to point to the data you wish to export. This
+ * provides a convenient way to export existing data
+ * structures.
+ *
+ * KSTAT_FLAG_VAR_SIZE:
+ *
+ * The size of the kstat you are creating will vary over time.
+ * For example, you may want to use the kstat mechanism to
+ * export a linked list. NOTE: The kstat framework does not
+ * manage the data section, so all variable-size kstats must be
+ * virtual kstats. Moreover, variable-size kstats MUST employ
+ * kstat data locking to prevent data-size races with kstat
+ * clients. See the section on "kstat snapshot" for details.
+ *
+ * KSTAT_FLAG_WRITABLE:
+ *
+ * Makes the kstat's data section writable by root.
+ * The ks_snapshot routine (see below) does not need to check for
+ * this; permission checking is handled in the kstat driver.
+ *
+ * KSTAT_FLAG_PERSISTENT:
+ *
+ * Indicates that this kstat is to be persistent over time.
+ * For persistent kstats, kstat_delete() simply marks the
+ * kstat as dormant; a subsequent kstat_create() reactivates
+ * the kstat. This feature is provided so that statistics
+ * are not lost across driver close/open (e.g., raw disk I/O
+ * on a disk with no mounted partitions.)
+ * NOTE: Persistent kstats cannot be virtual, since ks_data
+ * points to garbage as soon as the driver goes away.
+ *
+ * The following flags are maintained by the kstat framework:
+ *
+ * KSTAT_FLAG_DORMANT:
+ *
+ * For persistent kstats, indicates that the kstat is in the
+ * dormant state (e.g., the corresponding device is closed).
+ *
+ * KSTAT_FLAG_INVALID:
+ *
+ * This flag is set when a kstat is in a transitional state,
+ * e.g. between kstat_create() and kstat_install().
+ * kstat clients must not attempt to access the kstat's data
+ * if this flag is set.
+ */
+
+#define KSTAT_FLAG_VIRTUAL 0x01
+#define KSTAT_FLAG_VAR_SIZE 0x02
+#define KSTAT_FLAG_WRITABLE 0x04
+#define KSTAT_FLAG_PERSISTENT 0x08
+#define KSTAT_FLAG_DORMANT 0x10
+#define KSTAT_FLAG_INVALID 0x20
+
+/*
+ * Dynamic update support
+ *
+ * The kstat mechanism allows for an optional ks_update function to update
+ * kstat data. This is useful for drivers where the underlying device
+ * keeps cheap hardware stats, but extraction is expensive. Instead of
+ * constantly keeping the kstat data section up to date, you can supply a
+ * ks_update function which updates the kstat's data section on demand.
+ * To take advantage of this feature, simply set the ks_update field before
+ * calling kstat_install().
+ *
+ * The ks_update function, if supplied, must have the following structure:
+ *
+ * int
+ * foo_kstat_update(kstat_t *ksp, int rw)
+ * {
+ * if (rw == KSTAT_WRITE) {
+ * ... update the native stats from ksp->ks_data;
+ * return EACCES if you don't support this
+ * } else {
+ * ... update ksp->ks_data from the native stats
+ * }
+ * }
+ *
+ * The ks_update return codes are: 0 for success, EACCES if you don't allow
+ * KSTAT_WRITE, and EIO for any other type of error.
+ *
+ * In general, the ks_update function may need to refer to provider-private
+ * data; for example, it may need a pointer to the provider's raw statistics.
+ * The ks_private field is available for this purpose. Its use is entirely
+ * at the provider's discretion.
+ *
+ * All variable-size kstats MUST supply a ks_update routine, which computes
+ * and sets ks_data_size (and ks_ndata if that is meaningful), since these
+ * are needed to perform kstat snapshots (see below).
+ *
+ * No kstat locking should be done inside the ks_update routine. The caller
+ * will already be holding the kstat's ks_lock (to ensure consistent data).
+ */
+
+#define KSTAT_READ 0
+#define KSTAT_WRITE 1
+
+/*
+ * Kstat snapshot
+ *
+ * In order to get a consistent view of a kstat's data, clients must obey
+ * the kstat's locking strategy. However, these clients may need to perform
+ * operations on the data which could cause a fault (e.g. copyout()), or
+ * operations which are simply expensive. Doing so could cause deadlock
+ * (e.g. if you're holding a disk's kstat lock which is ultimately required
+ * to resolve a copyout() fault), performance degradation (since the providers'
+ * activity is serialized at the kstat lock), device timing problems, etc.
+ *
+ * To avoid these problems, kstat data is provided via snapshots. Taking
+ * a snapshot is a simple process: allocate a wired-down kernel buffer,
+ * acquire the kstat's data lock, copy the data into the buffer ("take the
+ * snapshot"), and release the lock. This ensures that the kstat's data lock
+ * will be held as briefly as possible, and that no faults will occur while
+ * the lock is held.
+ *
+ * Normally, the snapshot is taken by default_kstat_snapshot(), which
+ * timestamps the data (sets ks_snaptime), copies it, and does a little
+ * massaging to deal with incomplete transactions on i/o kstats. However,
+ * this routine only works for kstats with contiguous data (the typical case).
+ * If you create a kstat whose data is, say, a linked list, you must provide
+ * your own ks_snapshot routine. The routine you supply must have the
+ * following prototype (replace "foo" with something appropriate):
+ *
+ * int foo_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
+ *
+ * The minimal snapshot routine -- one which copies contiguous data that
+ * doesn't need any massaging -- would be this:
+ *
+ * ksp->ks_snaptime = gethrtime();
+ * if (rw == KSTAT_WRITE)
+ * bcopy(buf, ksp->ks_data, ksp->ks_data_size);
+ * else
+ * bcopy(ksp->ks_data, buf, ksp->ks_data_size);
+ * return (0);
+ *
+ * A more illuminating example is taking a snapshot of a linked list:
+ *
+ * ksp->ks_snaptime = gethrtime();
+ * if (rw == KSTAT_WRITE)
+ * return (EACCES); ... See below ...
+ * for (foo = first_foo; foo; foo = foo->next) {
+ * bcopy((char *) foo, (char *) buf, sizeof (struct foo));
+ * buf = ((struct foo *) buf) + 1;
+ * }
+ * return (0);
+ *
+ * In the example above, we have decided that we don't want to allow
+ * KSTAT_WRITE access, so we return EACCES if this is attempted.
+ *
+ * The key points are:
+ *
+ * (1) ks_snaptime must be set (via gethrtime()) to timestamp the data.
+ * (2) Data gets copied from the kstat to the buffer on KSTAT_READ,
+ * and from the buffer to the kstat on KSTAT_WRITE.
+ * (3) ks_snapshot return values are: 0 for success, EACCES if you
+ * don't allow KSTAT_WRITE, and EIO for any other type of error.
+ *
+ * Named kstats (see section on "Named statistics" below) containing long
+ * strings (KSTAT_DATA_STRING) need special handling. The kstat driver
+ * assumes that all strings are copied into the buffer after the array of
+ * named kstats, and the pointers (KSTAT_NAMED_STR_PTR()) are updated to point
+ * into the copy within the buffer. The default snapshot routine does this,
+ * but overriding routines should contain at least the following:
+ *
+ * if (rw == KSTAT_READ) {
+ * kstat_named_t *knp = buf;
+ * char *end = knp + ksp->ks_ndata;
+ * uint_t i;
+ *
+ * ... Do the regular copy ...
+ * bcopy(ksp->ks_data, buf, sizeof (kstat_named_t) * ksp->ks_ndata);
+ *
+ * for (i = 0; i < ksp->ks_ndata; i++, knp++) {
+ * if (knp[i].data_type == KSTAT_DATA_STRING &&
+ * KSTAT_NAMED_STR_PTR(knp) != NULL) {
+ * bcopy(KSTAT_NAMED_STR_PTR(knp), end,
+ * KSTAT_NAMED_STR_BUFLEN(knp));
+ * KSTAT_NAMED_STR_PTR(knp) = end;
+ * end += KSTAT_NAMED_STR_BUFLEN(knp);
+ * }
+ * }
+ */
+
+/*
+ * Named statistics.
+ *
+ * List of arbitrary name=value statistics.
+ */
+
+typedef struct kstat_named {
+ char name[KSTAT_STRLEN]; /* name of counter */
+ uchar_t data_type; /* data type */
+ union {
+ char c[16]; /* enough for 128-bit ints */
+ int32_t i32;
+ uint32_t ui32;
+ struct {
+ union {
+ char *ptr; /* NULL-term string */
+#if defined(_KERNEL) && defined(_MULTI_DATAMODEL)
+ caddr32_t ptr32;
+#endif
+ char __pad[8]; /* 64-bit padding */
+ } addr;
+ uint32_t len; /* # bytes for strlen + '\0' */
+ } str;
+/*
+ * The int64_t and uint64_t types are not valid for a maximally conformant
+ * 32-bit compilation environment (cc -Xc) using compilers prior to the
+ * introduction of C99 conforming compiler (reference ISO/IEC 9899:1990).
+ * In these cases, the visibility of i64 and ui64 is only permitted for
+ * 64-bit compilation environments or 32-bit non-maximally conformant
+ * C89 or C90 ANSI C compilation environments (cc -Xt and cc -Xa). In the
+ * C99 ANSI C compilation environment, the long long type is supported.
+ * The _INT64_TYPE is defined by the implementation (see sys/int_types.h).
+ */
+#if defined(_INT64_TYPE)
+ int64_t i64;
+ uint64_t ui64;
+#endif
+ long l;
+ ulong_t ul;
+
+ /* These structure members are obsolete */
+
+ longlong_t ll;
+ u_longlong_t ull;
+ float f;
+ double d;
+ } value; /* value of counter */
+} kstat_named_t;
+
+#define KSTAT_DATA_CHAR 0
+#define KSTAT_DATA_INT32 1
+#define KSTAT_DATA_UINT32 2
+#define KSTAT_DATA_INT64 3
+#define KSTAT_DATA_UINT64 4
+
+#if !defined(_LP64)
+#define KSTAT_DATA_LONG KSTAT_DATA_INT32
+#define KSTAT_DATA_ULONG KSTAT_DATA_UINT32
+#else
+#if !defined(_KERNEL)
+#define KSTAT_DATA_LONG KSTAT_DATA_INT64
+#define KSTAT_DATA_ULONG KSTAT_DATA_UINT64
+#else
+#define KSTAT_DATA_LONG 7 /* only visible to the kernel */
+#define KSTAT_DATA_ULONG 8 /* only visible to the kernel */
+#endif /* !_KERNEL */
+#endif /* !_LP64 */
+
+/*
+ * Statistics exporting named kstats with long strings (KSTAT_DATA_STRING)
+ * may not make the assumption that ks_data_size is equal to (ks_ndata * sizeof
+ * (kstat_named_t)). ks_data_size in these cases is equal to the sum of the
+ * amount of space required to store the strings (ie, the sum of
+ * KSTAT_NAMED_STR_BUFLEN() for all KSTAT_DATA_STRING statistics) plus the
+ * space required to store the kstat_named_t's.
+ *
+ * The default update routine will update ks_data_size automatically for
+ * variable-length kstats containing long strings (using the default update
+ * routine only makes sense if the string is the only thing that is changing
+ * in size, and ks_ndata is constant). Fixed-length kstats containing long
+ * strings must explicitly change ks_data_size (after creation but before
+ * initialization) to reflect the correct amount of space required for the
+ * long strings and the kstat_named_t's.
+ */
+#define KSTAT_DATA_STRING 9
+
+/* These types are obsolete */
+
+#define KSTAT_DATA_LONGLONG KSTAT_DATA_INT64
+#define KSTAT_DATA_ULONGLONG KSTAT_DATA_UINT64
+#define KSTAT_DATA_FLOAT 5
+#define KSTAT_DATA_DOUBLE 6
+
+#define KSTAT_NAMED_PTR(kptr) ((kstat_named_t *)(kptr)->ks_data)
+
+/*
+ * Retrieve the pointer of the string contained in the given named kstat.
+ */
+#define KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.str.addr.ptr)
+
+/*
+ * Retrieve the length of the buffer required to store the string in the given
+ * named kstat.
+ */
+#define KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.str.len)
+
+/*
+ * Interrupt statistics.
+ *
+ * An interrupt is a hard interrupt (sourced from the hardware device
+ * itself), a soft interrupt (induced by the system via the use of
+ * some system interrupt source), a watchdog interrupt (induced by
+ * a periodic timer call), spurious (an interrupt entry point was
+ * entered but there was no interrupt condition to service),
+ * or multiple service (an interrupt condition was detected and
+ * serviced just prior to returning from any of the other types).
+ *
+ * Measurement of the spurious class of interrupts is useful for
+ * autovectored devices in order to pinpoint any interrupt latency
+ * problems in a particular system configuration.
+ *
+ * Devices that have more than one interrupt of the same
+ * type should use multiple structures.
+ */
+
+#define KSTAT_INTR_HARD 0
+#define KSTAT_INTR_SOFT 1
+#define KSTAT_INTR_WATCHDOG 2
+#define KSTAT_INTR_SPURIOUS 3
+#define KSTAT_INTR_MULTSVC 4
+
+#define KSTAT_NUM_INTRS 5
+
+typedef struct kstat_intr {
+ uint_t intrs[KSTAT_NUM_INTRS]; /* interrupt counters */
+} kstat_intr_t;
+
+#define KSTAT_INTR_PTR(kptr) ((kstat_intr_t *)(kptr)->ks_data)
+
+/*
+ * I/O statistics.
+ */
+
+typedef struct kstat_io {
+
+ /*
+ * Basic counters.
+ *
+ * The counters should be updated at the end of service
+ * (e.g., just prior to calling biodone()).
+ */
+
+ u_longlong_t nread; /* number of bytes read */
+ u_longlong_t nwritten; /* number of bytes written */
+ uint_t reads; /* number of read operations */
+ uint_t writes; /* number of write operations */
+
+ /*
+ * Accumulated time and queue length statistics.
+ *
+ * Accumulated time statistics are kept as a running sum
+ * of "active" time. Queue length statistics are kept as a
+ * running sum of the product of queue length and elapsed time
+ * at that length -- i.e., a Riemann sum for queue length
+ * integrated against time. (You can also think of the active time
+ * as a Riemann sum, for the boolean function (queue_length > 0)
+ * integrated against time, or you can think of it as the
+ * Lebesgue measure of the set on which queue_length > 0.)
+ *
+ * ^
+ * | _________
+ * 8 | i4 |
+ * | | |
+ * Queue 6 | |
+ * Length | _________ | |
+ * 4 | i2 |_______| |
+ * | | i3 |
+ * 2_______| |
+ * | i1 |
+ * |_______________________________|
+ * Time-> t1 t2 t3 t4
+ *
+ * At each change of state (entry or exit from the queue),
+ * we add the elapsed time (since the previous state change)
+ * to the active time if the queue length was non-zero during
+ * that interval; and we add the product of the elapsed time
+ * times the queue length to the running length*time sum.
+ *
+ * This method is generalizable to measuring residency
+ * in any defined system: instead of queue lengths, think
+ * of "outstanding RPC calls to server X".
+ *
+ * A large number of I/O subsystems have at least two basic
+ * "lists" of transactions they manage: one for transactions
+ * that have been accepted for processing but for which processing
+ * has yet to begin, and one for transactions which are actively
+ * being processed (but not done). For this reason, two cumulative
+ * time statistics are defined here: wait (pre-service) time,
+ * and run (service) time.
+ *
+ * All times are 64-bit nanoseconds (hrtime_t), as returned by
+ * gethrtime().
+ *
+ * The units of cumulative busy time are accumulated nanoseconds.
+ * The units of cumulative length*time products are elapsed time
+ * times queue length.
+ *
+ * Updates to the fields below are performed implicitly by calls to
+ * these five functions:
+ *
+ * kstat_waitq_enter()
+ * kstat_waitq_exit()
+ * kstat_runq_enter()
+ * kstat_runq_exit()
+ *
+ * kstat_waitq_to_runq() (see below)
+ * kstat_runq_back_to_waitq() (see below)
+ *
+ * Since kstat_waitq_exit() is typically followed immediately
+ * by kstat_runq_enter(), there is a single kstat_waitq_to_runq()
+ * function which performs both operations. This is a performance
+ * win since only one timestamp is required.
+ *
+ * In some instances, it may be necessary to move a request from
+ * the run queue back to the wait queue, e.g. for write throttling.
+ * For these situations, call kstat_runq_back_to_waitq().
+ *
+ * These fields should never be updated by any other means.
+ */
+
+ hrtime_t wtime; /* cumulative wait (pre-service) time */
+ hrtime_t wlentime; /* cumulative wait length*time product */
+ hrtime_t wlastupdate; /* last time wait queue changed */
+ hrtime_t rtime; /* cumulative run (service) time */
+ hrtime_t rlentime; /* cumulative run length*time product */
+ hrtime_t rlastupdate; /* last time run queue changed */
+
+ uint_t wcnt; /* count of elements in wait state */
+ uint_t rcnt; /* count of elements in run state */
+
+} kstat_io_t;
+
+#define KSTAT_IO_PTR(kptr) ((kstat_io_t *)(kptr)->ks_data)
+
+/*
+ * Event timer statistics - cumulative elapsed time and number of events.
+ *
+ * Updates to these fields are performed implicitly by calls to
+ * kstat_timer_start() and kstat_timer_stop().
+ */
+
+typedef struct kstat_timer {
+ char name[KSTAT_STRLEN]; /* event name */
+ uchar_t resv; /* reserved */
+ u_longlong_t num_events; /* number of events */
+ hrtime_t elapsed_time; /* cumulative elapsed time */
+ hrtime_t min_time; /* shortest event duration */
+ hrtime_t max_time; /* longest event duration */
+ hrtime_t start_time; /* previous event start time */
+ hrtime_t stop_time; /* previous event stop time */
+} kstat_timer_t;
+
+#define KSTAT_TIMER_PTR(kptr) ((kstat_timer_t *)(kptr)->ks_data)
+
+#if defined(_KERNEL)
+
+#include <sys/t_lock.h>
+
+extern kid_t kstat_chain_id; /* bumped at each state change */
+extern void kstat_init(void); /* initialize kstat framework */
+
+/*
+ * Adding and deleting kstats.
+ *
+ * The typical sequence to add a kstat is:
+ *
+ * ksp = kstat_create(module, instance, name, class, type, ndata, flags);
+ * if (ksp) {
+ * ... provider initialization, if necessary
+ * kstat_install(ksp);
+ * }
+ *
+ * There are three logically distinct steps here:
+ *
+ * Step 1: System Initialization (kstat_create)
+ *
+ * kstat_create() performs system initialization. kstat_create()
+ * allocates memory for the entire kstat (header plus data), initializes
+ * all header fields, initializes the data section to all zeroes, assigns
+ * a unique KID, and puts the kstat onto the system's kstat chain.
+ * The returned kstat is marked invalid (KSTAT_FLAG_INVALID is set),
+ * because the provider (caller) has not yet had a chance to initialize
+ * the data section.
+ *
+ * By default, kstats are exported to all zones on the system. A kstat may be
+ * created via kstat_create_zone() to specify a zone to which the statistics
+ * should be exported. kstat_zone_add() may be used to specify additional
+ * zones to which the statistics are to be exported.
+ *
+ * Step 2: Provider Initialization
+ *
+ * The provider performs any necessary initialization of the data section,
+ * e.g. setting the name fields in a KSTAT_TYPE_NAMED. Virtual kstats set
+ * the ks_data field at this time. The provider may also set the ks_update,
+ * ks_snapshot, ks_private, and ks_lock fields if necessary.
+ *
+ * Step 3: Installation (kstat_install)
+ *
+ * Once the kstat is completely initialized, kstat_install() clears the
+ * INVALID flag, thus making the kstat accessible to the outside world.
+ * kstat_install() also clears the DORMANT flag for persistent kstats.
+ *
+ * Removing a kstat from the system
+ *
+ * kstat_delete(ksp) removes ksp from the kstat chain and frees all
+ * associated system resources. NOTE: When you call kstat_delete(),
+ * you must NOT be holding that kstat's ks_lock. Otherwise, you may
+ * deadlock with a kstat reader.
+ *
+ * Persistent kstats
+ *
+ * From the provider's point of view, persistence is transparent. The only
+ * difference between ephemeral (normal) kstats and persistent kstats
+ * is that you pass KSTAT_FLAG_PERSISTENT to kstat_create(). Magically,
+ * this has the effect of making your data visible even when you're
+ * not home. Persistence is important to tools like iostat, which want
+ * to get a meaningful picture of disk activity. Without persistence,
+ * raw disk i/o statistics could never accumulate: they would come and
+ * go with each open/close of the raw device.
+ *
+ * The magic of persistence works by slightly altering the behavior of
+ * kstat_create() and kstat_delete(). The first call to kstat_create()
+ * creates a new kstat, as usual. However, kstat_delete() does not
+ * actually delete the kstat: it performs one final update of the data
+ * (i.e., calls the ks_update routine), marks the kstat as dormant, and
+ * sets the ks_lock, ks_update, ks_private, and ks_snapshot fields back
+ * to their default values (since they might otherwise point to garbage,
+ * e.g. if the provider is going away). kstat clients can still access
+ * the dormant kstat just like a live kstat; they just continue to see
+ * the final data values as long as the kstat remains dormant.
+ * All subsequent kstat_create() calls simply find the already-existing,
+ * dormant kstat and return a pointer to it, without altering any fields.
+ * The provider then performs its usual initialization sequence, and
+ * calls kstat_install(). kstat_install() uses the old data values to
+ * initialize the native data (i.e., ks_update is called with KSTAT_WRITE),
+ * thus making it seem like you were never gone.
+ */
+
+extern kstat_t *kstat_create(const char *, int, const char *, const char *,
+ uchar_t, uint_t, uchar_t);
+extern kstat_t *kstat_create_zone(const char *, int, const char *,
+ const char *, uchar_t, uint_t, uchar_t, zoneid_t);
+extern void kstat_install(kstat_t *);
+extern void kstat_delete(kstat_t *);
+extern void kstat_named_setstr(kstat_named_t *knp, const char *src);
+extern void kstat_set_string(char *, const char *);
+extern void kstat_delete_byname(const char *, int, const char *);
+extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t);
+extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
+extern void kstat_timer_init(kstat_timer_t *, const char *);
+extern void kstat_waitq_enter(kstat_io_t *);
+extern void kstat_waitq_exit(kstat_io_t *);
+extern void kstat_runq_enter(kstat_io_t *);
+extern void kstat_runq_exit(kstat_io_t *);
+extern void kstat_waitq_to_runq(kstat_io_t *);
+extern void kstat_runq_back_to_waitq(kstat_io_t *);
+extern void kstat_timer_start(kstat_timer_t *);
+extern void kstat_timer_stop(kstat_timer_t *);
+
+extern void kstat_zone_add(kstat_t *, zoneid_t);
+extern void kstat_zone_remove(kstat_t *, zoneid_t);
+extern int kstat_zone_find(kstat_t *, zoneid_t);
+
+extern kstat_t *kstat_hold_bykid(kid_t kid, zoneid_t);
+extern kstat_t *kstat_hold_byname(const char *, int, const char *, zoneid_t);
+extern void kstat_rele(kstat_t *);
+
+#endif /* defined(_KERNEL) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_KSTAT_H */
diff --git a/lib/libspl/include/sys/list.h b/lib/libspl/include/sys/list.h
new file mode 100644
index 000000000..6db92ed42
--- /dev/null
+++ b/lib/libspl/include/sys/list.h
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LIST_H
+#define _SYS_LIST_H
+
+#include <sys/list_impl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct list_node list_node_t;
+typedef struct list list_t;
+
+void list_create(list_t *, size_t, size_t);
+void list_destroy(list_t *);
+
+void list_insert_after(list_t *, void *, void *);
+void list_insert_before(list_t *, void *, void *);
+void list_insert_head(list_t *, void *);
+void list_insert_tail(list_t *, void *);
+void list_remove(list_t *, void *);
+void *list_remove_head(list_t *);
+void *list_remove_tail(list_t *);
+void list_move_tail(list_t *, list_t *);
+
+void *list_head(list_t *);
+void *list_tail(list_t *);
+void *list_next(list_t *, void *);
+void *list_prev(list_t *, void *);
+int list_is_empty(list_t *);
+
+void list_link_init(list_node_t *);
+void list_link_replace(list_node_t *, list_node_t *);
+
+int list_link_active(list_node_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LIST_H */
diff --git a/lib/libspl/include/sys/list_impl.h b/lib/libspl/include/sys/list_impl.h
new file mode 100644
index 000000000..a6614f9a3
--- /dev/null
+++ b/lib/libspl/include/sys/list_impl.h
@@ -0,0 +1,51 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LIST_IMPL_H
+#define _SYS_LIST_IMPL_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct list_node {
+ struct list_node *list_next;
+ struct list_node *list_prev;
+};
+
+struct list {
+ size_t list_size;
+ size_t list_offset;
+ struct list_node list_head;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LIST_IMPL_H */
diff --git a/lib/libspl/include/sys/machelf.h b/lib/libspl/include/sys/machelf.h
new file mode 100644
index 000000000..f4c5c356f
--- /dev/null
+++ b/lib/libspl/include/sys/machelf.h
@@ -0,0 +1,180 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_MACHELF_H
+#define _SYS_MACHELF_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__amd64)
+#include <sys/elf_amd64.h>
+#elif defined(__i386)
+#include <sys/elf_386.h>
+#elif defined(__sparc)
+#include <sys/elf_SPARC.h>
+#endif
+#ifndef _ASM
+#include <sys/types.h>
+#include <sys/elf.h>
+#include <sys/link.h> /* for Elf*_Dyn */
+#endif /* _ASM */
+
+/*
+ * Make machine class dependent data types transparent to the common code
+ */
+#if defined(_ELF64) && !defined(_ELF32_COMPAT)
+
+#ifndef _ASM
+typedef Elf64_Xword Xword;
+typedef Elf64_Lword Lword;
+typedef Elf64_Sxword Sxword;
+typedef Elf64_Word Word;
+typedef Elf64_Sword Sword;
+typedef Elf64_Half Half;
+typedef Elf64_Addr Addr;
+typedef Elf64_Off Off;
+typedef uchar_t Byte;
+#endif /* _ASM */
+
+#if defined(_KERNEL)
+#define ELF_R_TYPE ELF64_R_TYPE
+#define ELF_R_SYM ELF64_R_SYM
+#define ELF_R_TYPE_DATA ELF64_R_TYPE_DATA
+#define ELF_R_INFO ELF64_R_INFO
+#define ELF_ST_BIND ELF64_ST_BIND
+#define ELF_ST_TYPE ELF64_ST_TYPE
+#define ELF_M_SYM ELF64_M_SYM
+#define ELF_M_SIZE ELF64_M_SIZE
+#endif
+
+#ifndef _ASM
+typedef Elf64_Ehdr Ehdr;
+typedef Elf64_Shdr Shdr;
+typedef Elf64_Sym Sym;
+typedef Elf64_Syminfo Syminfo;
+typedef Elf64_Rela Rela;
+typedef Elf64_Rel Rel;
+typedef Elf64_Nhdr Nhdr;
+typedef Elf64_Phdr Phdr;
+typedef Elf64_Dyn Dyn;
+typedef Elf64_Boot Boot;
+typedef Elf64_Verdef Verdef;
+typedef Elf64_Verdaux Verdaux;
+typedef Elf64_Verneed Verneed;
+typedef Elf64_Vernaux Vernaux;
+typedef Elf64_Versym Versym;
+typedef Elf64_Move Move;
+typedef Elf64_Cap Cap;
+#endif /* _ASM */
+
+#else /* _ILP32 */
+
+#ifndef _ASM
+typedef Elf32_Word Xword; /* Xword/Sxword are 32-bits in Elf32 */
+typedef Elf32_Lword Lword;
+typedef Elf32_Sword Sxword;
+typedef Elf32_Word Word;
+typedef Elf32_Sword Sword;
+typedef Elf32_Half Half;
+typedef Elf32_Addr Addr;
+typedef Elf32_Off Off;
+typedef uchar_t Byte;
+#endif /* _ASM */
+
+#if defined(_KERNEL)
+#define ELF_R_TYPE ELF32_R_TYPE
+#define ELF_R_SYM ELF32_R_SYM
+#define ELF_R_TYPE_DATA(x) (0)
+#define ELF_R_INFO ELF32_R_INFO
+#define ELF_ST_BIND ELF32_ST_BIND
+#define ELF_ST_TYPE ELF32_ST_TYPE
+#define ELF_M_SYM ELF32_M_SYM
+#define ELF_M_SIZE ELF32_M_SIZE
+#endif
+
+#ifndef _ASM
+typedef Elf32_Ehdr Ehdr;
+typedef Elf32_Shdr Shdr;
+typedef Elf32_Sym Sym;
+typedef Elf32_Syminfo Syminfo;
+typedef Elf32_Rela Rela;
+typedef Elf32_Rel Rel;
+typedef Elf32_Nhdr Nhdr;
+typedef Elf32_Phdr Phdr;
+typedef Elf32_Dyn Dyn;
+typedef Elf32_Boot Boot;
+typedef Elf32_Verdef Verdef;
+typedef Elf32_Verdaux Verdaux;
+typedef Elf32_Verneed Verneed;
+typedef Elf32_Vernaux Vernaux;
+typedef Elf32_Versym Versym;
+typedef Elf32_Move Move;
+typedef Elf32_Cap Cap;
+#endif /* _ASM */
+
+#endif /* _ILP32 */
+
+/*
+ * Elf `printf' type-cast macros. These force arguments to be a fixed size
+ * so that Elf32 and Elf64 can share common format strings.
+ */
+#ifndef __lint
+#define EC_ADDR(a) ((Elf64_Addr)(a)) /* "ull" */
+#define EC_OFF(a) ((Elf64_Off)(a)) /* "ull" */
+#define EC_HALF(a) ((Elf64_Half)(a)) /* "d" */
+#define EC_WORD(a) ((Elf64_Word)(a)) /* "u" */
+#define EC_SWORD(a) ((Elf64_Sword)(a)) /* "d" */
+#define EC_XWORD(a) ((Elf64_Xword)(a)) /* "ull" */
+#define EC_SXWORD(a) ((Elf64_Sxword)(a)) /* "ll" */
+#define EC_LWORD(a) ((Elf64_Lword)(a)) /* "ull" */
+
+/*
+ * A native pointer is special. Although it can be convenient to display
+ * these from a common format (ull), compilers may flag the cast of a pointer
+ * to an integer as illegal. Casting these pointers to the native pointer
+ * size, suppresses any compiler errors.
+ */
+#define EC_NATPTR(a) ((Elf64_Xword)(uintptr_t)(a)) /* "ull" */
+#else
+#define EC_ADDR(a) ((u_longlong_t)(a))
+#define EC_OFF(a) ((u_longlong_t)(a))
+#define EC_HALF(a) ((ushort_t)(a))
+#define EC_WORD(a) ((uint_t)(a))
+#define EC_SWORD(a) ((int)(a))
+#define EC_XWORD(a) ((u_longlong_t)(a))
+#define EC_SXWORD(a) ((longlong_t)(a))
+#define EC_LWORD(a) ((u_longlong_t)(a))
+
+#define EC_NATPTR(a) ((u_longlong_t)(a))
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_MACHELF_H */
diff --git a/lib/libspl/include/sys/mhd.h b/lib/libspl/include/sys/mhd.h
new file mode 100644
index 000000000..fcc062d51
--- /dev/null
+++ b/lib/libspl/include/sys/mhd.h
@@ -0,0 +1,159 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_MHD_H
+#define _SYS_MHD_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Definitions for multi-host device I/O control commands
+ */
+#define MHIOC ('M'<<8)
+#define MHIOCENFAILFAST (MHIOC|1)
+#define MHIOCTKOWN (MHIOC|2)
+#define MHIOCRELEASE (MHIOC|3)
+#define MHIOCSTATUS (MHIOC|4)
+#define MHIOCGRP_INKEYS (MHIOC|5)
+#define MHIOCGRP_INRESV (MHIOC|6)
+#define MHIOCGRP_REGISTER (MHIOC|7)
+#define MHIOCGRP_RESERVE (MHIOC|8)
+#define MHIOCGRP_PREEMPTANDABORT (MHIOC|9)
+#define MHIOCGRP_PREEMPT (MHIOC|10)
+#define MHIOCGRP_CLEAR (MHIOC|11)
+#define MHIOCGRP_REGISTERANDIGNOREKEY (MHIOC|14)
+#define MHIOCQRESERVE (MHIOC|12)
+#define MHIOCREREGISTERDEVID (MHIOC|13)
+
+/*
+ * Following is the structure to specify the delay parameters in
+ * milliseconds, via the MHIOCTKOWN ioctl.
+ */
+struct mhioctkown {
+ int reinstate_resv_delay;
+ int min_ownership_delay;
+ int max_ownership_delay;
+};
+
+#define MHIOC_RESV_KEY_SIZE 8
+typedef struct mhioc_resv_key {
+ uchar_t key[MHIOC_RESV_KEY_SIZE];
+} mhioc_resv_key_t;
+
+typedef struct mhioc_key_list {
+ uint32_t listsize;
+ uint32_t listlen;
+ mhioc_resv_key_t *list;
+} mhioc_key_list_t;
+
+typedef struct mhioc_inkeys {
+ uint32_t generation;
+ mhioc_key_list_t *li;
+} mhioc_inkeys_t;
+
+#if defined(_SYSCALL32)
+struct mhioc_key_list32 {
+ uint32_t listsize;
+ uint32_t listlen;
+ caddr32_t list;
+} mhioc_key_list32_t;
+
+struct mhioc_inkeys32 {
+ uint32_t generation;
+ caddr32_t li;
+} mhioc_inkeys32_t;
+#endif
+
+typedef struct mhioc_resv_desc {
+ mhioc_resv_key_t key;
+ uint8_t type;
+ uint8_t scope;
+ uint32_t scope_specific_addr;
+} mhioc_resv_desc_t;
+
+typedef struct mhioc_resv_desc_list {
+ uint32_t listsize;
+ uint32_t listlen;
+ mhioc_resv_desc_t *list;
+} mhioc_resv_desc_list_t;
+
+typedef struct mhioc_inresvs {
+ uint32_t generation;
+ mhioc_resv_desc_list_t *li;
+} mhioc_inresvs_t;
+
+#if defined(_SYSCALL32)
+struct mhioc_resv_desc_list32 {
+ uint32_t listsize;
+ uint32_t listlen;
+ caddr32_t list;
+} mhioc_resv_desc_list32_t;
+
+typedef struct mhioc_inresvs32 {
+ uint32_t generation;
+ caddr32_t li;
+} mhioc_inresvs32_t;
+#endif
+
+typedef struct mhioc_register {
+ mhioc_resv_key_t oldkey;
+ mhioc_resv_key_t newkey;
+ boolean_t aptpl; /* True if persistent across power failures */
+} mhioc_register_t;
+
+typedef struct mhioc_preemptandabort {
+ mhioc_resv_desc_t resvdesc;
+ mhioc_resv_key_t victim_key;
+} mhioc_preemptandabort_t;
+
+typedef struct mhioc_registerandignorekey {
+ mhioc_resv_key_t newkey;
+ boolean_t aptpl; /* True if persistent across power failures */
+} mhioc_registerandignorekey_t;
+
+/*
+ * SCSI-3 PGR Reservation Type Codes. Codes with the _OBSOLETE suffix
+ * have been removed from the SCSI3 PGR standard.
+ */
+#define SCSI3_RESV_READSHARED_OBSOLETE 0
+#define SCSI3_RESV_WRITEEXCLUSIVE 1
+#define SCSI3_RESV_READEXCLUSIVE_OBSOLETE 2
+#define SCSI3_RESV_EXCLUSIVEACCESS 3
+#define SCSI3_RESV_SHAREDACCESS_OBSOLETE 4
+#define SCSI3_RESV_WRITEEXCLUSIVEREGISTRANTSONLY 5
+#define SCSI3_RESV_EXCLUSIVEACCESSREGISTRANTSONLY 6
+
+#define SCSI3_SCOPE_LOGICALUNIT 0
+#define SCSI3_SCOPE_EXTENT_OBSOLETE 1
+#define SCSI3_SCOPE_ELEMENT 2
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_MHD_H */
diff --git a/lib/libspl/include/sys/mkdev.h b/lib/libspl/include/sys/mkdev.h
new file mode 100644
index 000000000..76e3a4fff
--- /dev/null
+++ b/lib/libspl/include/sys/mkdev.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_MKDEV_H
+#define _LIBSPL_SYS_MKDEV_H
+
+#endif
diff --git a/lib/libspl/include/sys/mntent.h b/lib/libspl/include/sys/mntent.h
new file mode 100644
index 000000000..c0594ca7b
--- /dev/null
+++ b/lib/libspl/include/sys/mntent.h
@@ -0,0 +1,142 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+ * All Rights Reserved
+ */
+
+#ifndef _SYS_MNTENT_H
+#define _SYS_MNTENT_H
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MNTTAB "/proc/mounts"
+#define VFSTAB "/etc/vfstab"
+#define MNTMAXSTR 128
+
+#define MNTTYPE_ZFS "zfs" /* ZFS file system */
+#define MNTTYPE_UFS "ufs" /* Unix file system */
+#define MNTTYPE_SMBFS "smbfs" /* SMBFS file system */
+#define MNTTYPE_NFS "nfs" /* NFS file system */
+#define MNTTYPE_NFS3 "nfs3" /* NFS Version 3 file system */
+#define MNTTYPE_NFS4 "nfs4" /* NFS Version 4 file system */
+#define MNTTYPE_CACHEFS "cachefs" /* Cache File System */
+#define MNTTYPE_PCFS "pcfs" /* PC (MSDOS) file system */
+#define MNTTYPE_PC MNTTYPE_PCFS /* Deprecated name; use MNTTYPE_PCFS */
+#define MNTTYPE_LOFS "lofs" /* Loop back file system */
+#define MNTTYPE_LO MNTTYPE_LOFS /* Deprecated name; use MNTTYPE_LOFS */
+#define MNTTYPE_HSFS "hsfs" /* High Sierra (9660) file system */
+#define MNTTYPE_SWAP "swap" /* Swap file system */
+#define MNTTYPE_TMPFS "tmpfs" /* Tmp volatile file system */
+#define MNTTYPE_AUTOFS "autofs" /* Automounter ``file'' system */
+#define MNTTYPE_MNTFS "mntfs" /* In-kernel mnttab */
+#define MNTTYPE_DEV "dev" /* /dev file system */
+#define MNTTYPE_CTFS "ctfs" /* Contract file system */
+#define MNTTYPE_OBJFS "objfs" /* Kernel object file system */
+#define MNTTYPE_SHAREFS "sharefs" /* Kernel sharetab file system */
+
+
+#define MNTOPT_RO "ro" /* Read only */
+#define MNTOPT_RW "rw" /* Read/write */
+#define MNTOPT_RQ "rq" /* Read/write with quotas */
+#define MNTOPT_QUOTA "quota" /* Check quotas */
+#define MNTOPT_NOQUOTA "noquota" /* Don't check quotas */
+#define MNTOPT_ONERROR "onerror" /* action to taken on error */
+#define MNTOPT_SOFT "soft" /* Soft mount */
+#define MNTOPT_SEMISOFT "semisoft" /* partial soft, uncommited interface */
+#define MNTOPT_HARD "hard" /* Hard mount */
+#define MNTOPT_SUID "suid" /* Both setuid and devices allowed */
+#define MNTOPT_NOSUID "nosuid" /* Neither setuid nor devices allowed */
+#define MNTOPT_DEVICES "devices" /* Device-special allowed */
+#define MNTOPT_NODEVICES "nodevices" /* Device-special disallowed */
+#define MNTOPT_SETUID "setuid" /* Set uid allowed */
+#define MNTOPT_NOSETUID "nosetuid" /* Set uid not allowed */
+#define MNTOPT_GRPID "grpid" /* SysV-compatible gid on create */
+#define MNTOPT_REMOUNT "remount" /* Change mount options */
+#define MNTOPT_NOSUB "nosub" /* Disallow mounts on subdirs */
+#define MNTOPT_MULTI "multi" /* Do multi-component lookup */
+#define MNTOPT_INTR "intr" /* Allow NFS ops to be interrupted */
+#define MNTOPT_NOINTR "nointr" /* Don't allow interrupted ops */
+#define MNTOPT_PORT "port" /* NFS server IP port number */
+#define MNTOPT_SECURE "secure" /* Secure (AUTH_DES) mounting */
+#define MNTOPT_RSIZE "rsize" /* Max NFS read size (bytes) */
+#define MNTOPT_WSIZE "wsize" /* Max NFS write size (bytes) */
+#define MNTOPT_TIMEO "timeo" /* NFS timeout (1/10 sec) */
+#define MNTOPT_RETRANS "retrans" /* Max retransmissions (soft mnts) */
+#define MNTOPT_ACTIMEO "actimeo" /* Attr cache timeout (sec) */
+#define MNTOPT_ACREGMIN "acregmin" /* Min attr cache timeout (files) */
+#define MNTOPT_ACREGMAX "acregmax" /* Max attr cache timeout (files) */
+#define MNTOPT_ACDIRMIN "acdirmin" /* Min attr cache timeout (dirs) */
+#define MNTOPT_ACDIRMAX "acdirmax" /* Max attr cache timeout (dirs) */
+#define MNTOPT_NOAC "noac" /* Don't cache attributes at all */
+#define MNTOPT_NOCTO "nocto" /* No close-to-open consistency */
+#define MNTOPT_BG "bg" /* Do mount retries in background */
+#define MNTOPT_FG "fg" /* Do mount retries in foreground */
+#define MNTOPT_RETRY "retry" /* Number of mount retries */
+#define MNTOPT_DEV "dev" /* Device id of mounted fs */
+#define MNTOPT_POSIX "posix" /* Get static pathconf for mount */
+#define MNTOPT_MAP "map" /* Automount map */
+#define MNTOPT_DIRECT "direct" /* Automount direct map mount */
+#define MNTOPT_INDIRECT "indirect" /* Automount indirect map mount */
+#define MNTOPT_LLOCK "llock" /* Local locking (no lock manager) */
+#define MNTOPT_IGNORE "ignore" /* Ignore this entry */
+#define MNTOPT_VERS "vers" /* protocol version number indicator */
+#define MNTOPT_PROTO "proto" /* protocol network_id indicator */
+#define MNTOPT_SEC "sec" /* Security flavor indicator */
+#define MNTOPT_SYNCDIR "syncdir" /* Synchronous local directory ops */
+#define MNTOPT_NOSETSEC "nosec" /* Do no allow setting sec attrs */
+#define MNTOPT_NOPRINT "noprint" /* Do not print messages */
+#define MNTOPT_LARGEFILES "largefiles" /* allow large files */
+#define MNTOPT_NOLARGEFILES "nolargefiles" /* don't allow large files */
+#define MNTOPT_FORCEDIRECTIO "forcedirectio" /* Force DirectIO on all files */
+#define MNTOPT_NOFORCEDIRECTIO "noforcedirectio" /* No Force DirectIO */
+#define MNTOPT_DISABLEDIRECTIO "disabledirectio" /* Disable DirectIO ioctls */
+#define MNTOPT_PUBLIC "public" /* Use NFS public file handlee */
+#define MNTOPT_LOGGING "logging" /* enable logging */
+#define MNTOPT_NOLOGGING "nologging" /* disable logging */
+#define MNTOPT_ATIME "atime" /* update atime for files */
+#define MNTOPT_NOATIME "noatime" /* do not update atime for files */
+#define MNTOPT_GLOBAL "global" /* Cluster-wide global mount */
+#define MNTOPT_NOGLOBAL "noglobal" /* Mount local to single node */
+#define MNTOPT_DFRATIME "dfratime" /* Deferred access time updates */
+#define MNTOPT_NODFRATIME "nodfratime" /* No Deferred access time updates */
+#define MNTOPT_NBMAND "nbmand" /* allow non-blocking mandatory locks */
+#define MNTOPT_NONBMAND "nonbmand" /* deny non-blocking mandatory locks */
+#define MNTOPT_XATTR "xattr" /* enable extended attributes */
+#define MNTOPT_NOXATTR "noxattr" /* disable extended attributes */
+#define MNTOPT_EXEC "exec" /* enable executables */
+#define MNTOPT_NOEXEC "noexec" /* disable executables */
+#define MNTOPT_RESTRICT "restrict" /* restricted autofs mount */
+#define MNTOPT_BROWSE "browse" /* browsable autofs mount */
+#define MNTOPT_NOBROWSE "nobrowse" /* non-browsable autofs mount */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_MNTENT_H */
diff --git a/lib/libspl/include/sys/mnttab.h b/lib/libspl/include/sys/mnttab.h
new file mode 100644
index 000000000..70f144967
--- /dev/null
+++ b/lib/libspl/include/sys/mnttab.h
@@ -0,0 +1,86 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T*/
+/* All Rights Reserved */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/* Copyright 2006 Ricardo Correia */
+
+#ifndef _SYS_MNTTAB_H
+#define _SYS_MNTTAB_H
+
+#include <stdio.h>
+#include <mntent.h>
+#include <sys/types.h>
+
+#ifdef MNTTAB
+#undef MNTTAB
+#endif
+
+#define MNTTAB "/proc/mounts"
+#define MNT_LINE_MAX 1024
+
+#define MNT_TOOLONG 1 /* entry exceeds MNT_LINE_MAX */
+#define MNT_TOOMANY 2 /* too many fields in line */
+#define MNT_TOOFEW 3 /* too few fields in line */
+
+struct mnttab {
+ char *mnt_special;
+ char *mnt_mountp;
+ char *mnt_fstype;
+ char *mnt_mntopts;
+};
+
+/*
+ * NOTE: fields in extmnttab should match struct mnttab till new fields
+ * are encountered, this allows hasmntopt to work properly when its arg is
+ * a pointer to an extmnttab struct cast to a mnttab struct pointer.
+ */
+
+struct extmnttab {
+ char *mnt_special;
+ char *mnt_mountp;
+ char *mnt_fstype;
+ char *mnt_mntopts;
+ uint_t mnt_major;
+ uint_t mnt_minor;
+};
+
+extern int getmntany(FILE *fp, struct mnttab *mp, struct mnttab *mpref);
+extern int _sol_getmntent(FILE *fp, struct mnttab *mp);
+extern int getextmntent(FILE *fp, struct extmnttab *mp, int len);
+
+static inline char *_sol_hasmntopt(struct mnttab *mnt, char *opt)
+{
+ struct mntent mnt_new;
+
+ mnt_new.mnt_opts = mnt->mnt_mntopts;
+
+ return hasmntopt(&mnt_new, opt);
+}
+
+#define hasmntopt _sol_hasmntopt
+#define getmntent _sol_getmntent
+
+#endif
diff --git a/lib/libspl/include/sys/mount.h b/lib/libspl/include/sys/mount.h
new file mode 100644
index 000000000..144f915d2
--- /dev/null
+++ b/lib/libspl/include/sys/mount.h
@@ -0,0 +1,50 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include_next <sys/mount.h>
+
+#ifndef _LIBSPL_SYS_MOUNT_H
+#define _LIBSPL_SYS_MOUNT_H
+
+#include <sys/mntent.h>
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+/*
+ * Some old glibc headers don't define BLKGETSIZE64
+ * and we don't want to require the kernel headers
+ */
+#if !defined(BLKGETSIZE64)
+#define BLKGETSIZE64 _IOR(0x12, 114, size_t)
+#endif
+
+#define MS_FORCE MNT_FORCE
+#define MS_OVERLAY 32768
+#define MS_NOMNTTAB 0 /* Not supported in Linux */
+#define MS_OPTIONSTR 0 /* Not necessary in Linux */
+
+#endif /* _LIBSPL_SYS_MOUNT_H */
diff --git a/lib/libspl/include/sys/note.h b/lib/libspl/include/sys/note.h
new file mode 100644
index 000000000..88e0eabcd
--- /dev/null
+++ b/lib/libspl/include/sys/note.h
@@ -0,0 +1,56 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1994 by Sun Microsystems, Inc.
+ */
+
+/*
+ * sys/note.h: interface for annotating source with info for tools
+ *
+ * This is the underlying interface; NOTE (/usr/include/note.h) is the
+ * preferred interface, but all exported header files should include this
+ * file directly and use _NOTE so as not to take "NOTE" from the user's
+ * namespace. For consistency, *all* kernel source should use _NOTE.
+ *
+ * By default, annotations expand to nothing. This file implements
+ * that. Tools using annotations will interpose a different version
+ * of this file that will expand annotations as needed.
+ */
+
+#ifndef _SYS_NOTE_H
+#define _SYS_NOTE_H
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _NOTE
+#define _NOTE(s)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_NOTE_H */
diff --git a/lib/libspl/include/sys/param.h b/lib/libspl/include/sys/param.h
new file mode 100644
index 000000000..d9e7782e4
--- /dev/null
+++ b/lib/libspl/include/sys/param.h
@@ -0,0 +1,67 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_PARAM_H
+#define _LIBSPL_SYS_PARAM_H
+
+#include_next <sys/param.h>
+#include <unistd.h>
+
+/*
+ * File system parameters and macros.
+ *
+ * The file system is made out of blocks of at most MAXBSIZE units,
+ * with smaller units (fragments) only in the last direct block.
+ * MAXBSIZE primarily determines the size of buffers in the buffer
+ * pool. It may be made larger without any effect on existing
+ * file systems; however making it smaller make make some file
+ * systems unmountable.
+ *
+ * Note that the blocked devices are assumed to have DEV_BSIZE
+ * "sectors" and that fragments must be some multiple of this size.
+ */
+#define MAXBSIZE 8192
+#define DEV_BSIZE 512
+#define DEV_BSHIFT 9 /* log2(DEV_BSIZE) */
+
+#define MAXNAMELEN 256
+
+#ifdef _LP64
+#define MAXOFFSET_T 0x7fffffffffffffffl
+#else
+#define MAXOFFSET_T 0x7fffffffl
+#endif
+
+#define UID_NOBODY 60001 /* user ID no body */
+#define GID_NOBODY UID_NOBODY
+#define UID_NOACCESS 60002 /* user ID no access */
+
+#define MAXUID 2147483647 /* max user id */
+#define MAXPROJID MAXUID /* max project id */
+
+#define PAGESIZE (sysconf(_SC_PAGESIZE))
+
+#endif
diff --git a/lib/libspl/include/sys/priv.h b/lib/libspl/include/sys/priv.h
new file mode 100644
index 000000000..4a3ab9684
--- /dev/null
+++ b/lib/libspl/include/sys/priv.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_PRIV_H
+#define _LIBSPL_SYS_PRIV_H
+
+#endif
diff --git a/lib/libspl/include/sys/processor.h b/lib/libspl/include/sys/processor.h
new file mode 100644
index 000000000..ce80c88df
--- /dev/null
+++ b/lib/libspl/include/sys/processor.h
@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_PROCESSOR_H
+#define _LIBSPL_SYS_PROCESSOR_H
+
+#define getcpuid() (-1)
+
+#endif
diff --git a/lib/libspl/include/sys/sdt.h b/lib/libspl/include/sys/sdt.h
new file mode 100644
index 000000000..79733eef0
--- /dev/null
+++ b/lib/libspl/include/sys/sdt.h
@@ -0,0 +1,36 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_SDT_H
+#define _LIBSPL_SYS_SDT_H
+
+#define DTRACE_PROBE(a) ((void) 0)
+#define DTRACE_PROBE1(a,b,c) ((void) 0)
+#define DTRACE_PROBE2(a,b,c,d,e) ((void) 0)
+#define DTRACE_PROBE3(a,b,c,d,e,f,g) ((void) 0)
+#define DTRACE_PROBE4(a,b,c,d,e,f,g,h,i) ((void) 0)
+
+#endif
diff --git a/lib/libspl/include/sys/stack.h b/lib/libspl/include/sys/stack.h
new file mode 100644
index 000000000..496605f95
--- /dev/null
+++ b/lib/libspl/include/sys/stack.h
@@ -0,0 +1,52 @@
+/*
+ * This header file distributed under the terms of the CDDL.
+ * Portions Copyright 2008 Sun Microsystems, Inc. All Rights reserved.
+ */
+#ifndef _SYS_STACK_H
+#define _SYS_STACK_H
+
+#include <pthread.h>
+
+#define STACK_BIAS 0
+
+#ifdef __USE_GNU
+
+static inline int
+stack_getbounds(stack_t *sp)
+{
+ pthread_attr_t attr;
+ int rc;
+
+ rc = pthread_getattr_np(pthread_self(), &attr);
+ if (rc)
+ return rc;
+
+ rc = pthread_attr_getstack(&attr, &sp->ss_sp, &sp->ss_size);
+ if (rc == 0)
+ sp->ss_flags = 0;
+
+ pthread_attr_destroy(&attr);
+
+ return rc;
+}
+
+static inline int
+thr_stksegment(stack_t *sp)
+{
+ int rc;
+
+ rc = stack_getbounds(sp);
+ if (rc)
+ return rc;
+
+ /* thr_stksegment() is expected to set sp.ss_sp to the high stack
+ * address, but the stack_getbounds() interface is expected to
+ * set sp.ss_sp to the low address. Adjust accordingly. */
+ sp->ss_sp = (void *)(((uintptr_t)sp->ss_sp) + sp->ss_size);
+ sp->ss_flags = 0;
+
+ return rc;
+}
+
+#endif /* __USE_GNU */
+#endif /* _SYS_STACK_H */
diff --git a/lib/libspl/include/sys/stropts.h b/lib/libspl/include/sys/stropts.h
new file mode 100644
index 000000000..3c86957ba
--- /dev/null
+++ b/lib/libspl/include/sys/stropts.h
@@ -0,0 +1,4 @@
+#ifndef _LIBSPL_SYS_STROPTS_H
+#define _LIBSPL_SYS_STROPTS_H
+
+#endif /* _LIBSPL_SYS_STROPTS_H */
diff --git a/lib/libspl/include/sys/sunddi.h b/lib/libspl/include/sys/sunddi.h
new file mode 100644
index 000000000..ccd2b29b9
--- /dev/null
+++ b/lib/libspl/include/sys/sunddi.h
@@ -0,0 +1,29 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008 by Sun Microsystems, Inc.
+ */
+
+#ifndef _SYS_SUNDDI_H
+#define _SYS_SUNDDI_H
+
+#endif /* _SYS_SUNDDI_H */
diff --git a/lib/libspl/include/sys/sysevent.h b/lib/libspl/include/sys/sysevent.h
new file mode 100644
index 000000000..980d14541
--- /dev/null
+++ b/lib/libspl/include/sys/sysevent.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_SYSEVENT_H
+#define _LIBSPL_SYS_SYSEVENT_H
+
+#endif
diff --git a/lib/libspl/include/sys/sysevent/eventdefs.h b/lib/libspl/include/sys/sysevent/eventdefs.h
new file mode 100644
index 000000000..c4494f778
--- /dev/null
+++ b/lib/libspl/include/sys/sysevent/eventdefs.h
@@ -0,0 +1,235 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_SYSEVENT_EVENTDEFS_H
+#define _SYS_SYSEVENT_EVENTDEFS_H
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * eventdefs.h contains public definitions for sysevent types (classes
+ * and subclasses). All additions/removal/changes are subject
+ * to PSARC approval.
+ */
+
+/* Sysevent Class definitions */
+#define EC_NONE "EC_none"
+#define EC_PRIV "EC_priv"
+#define EC_PLATFORM "EC_platform" /* events private to platform */
+#define EC_DR "EC_dr" /* Dynamic reconfiguration event class */
+#define EC_ENV "EC_env" /* Environmental monitor event class */
+#define EC_DOMAIN "EC_domain" /* Domain event class */
+#define EC_AP_DRIVER "EC_ap_driver" /* Alternate Pathing event class */
+#define EC_IPMP "EC_ipmp" /* IP Multipathing event class */
+#define EC_DEV_ADD "EC_dev_add" /* device add event class */
+#define EC_DEV_REMOVE "EC_dev_remove" /* device remove event class */
+#define EC_DEV_BRANCH "EC_dev_branch" /* device tree branch event class */
+#define EC_FM "EC_fm" /* FMA error report event */
+#define EC_ZFS "EC_zfs" /* ZFS event */
+
+/*
+ * The following event class is reserved for exclusive use
+ * by Sun Cluster software.
+ */
+#define EC_CLUSTER "EC_Cluster"
+
+/*
+ * The following classes are exclusively reserved for use by the
+ * Solaris Volume Manager (SVM)
+ */
+#define EC_SVM_CONFIG "EC_SVM_Config"
+#define EC_SVM_STATE "EC_SVM_State"
+
+/*
+ * EC_SVM_CONFIG subclass definitions - supporting attributes (name/value pairs)
+ * are found in sys/sysevent/svm.h
+ */
+#define ESC_SVM_CREATE "ESC_SVM_Create"
+#define ESC_SVM_DELETE "ESC_SVM_Delete"
+#define ESC_SVM_ADD "ESC_SVM_Add"
+#define ESC_SVM_REMOVE "ESC_SVM_Remove"
+#define ESC_SVM_REPLACE "ESC_SVM_Replace"
+#define ESC_SVM_GROW "ESC_SVM_Grow"
+#define ESC_SVM_RENAME_SRC "ESC_SVM_Rename_Src"
+#define ESC_SVM_RENAME_DST "ESC_SVM_Rename_Dst"
+#define ESC_SVM_MEDIATOR_ADD "ESC_SVM_Mediator_Add"
+#define ESC_SVM_MEDIATOR_DELETE "ESC_SVM_Mediator_Delete"
+#define ESC_SVM_HOST_ADD "ESC_SVM_Host_Add"
+#define ESC_SVM_HOST_DELETE "ESC_SVM_Host_Delete"
+#define ESC_SVM_DRIVE_ADD "ESC_SVM_Drive_Add"
+#define ESC_SVM_DRIVE_DELETE "ESC_SVM_Drive_Delete"
+#define ESC_SVM_DETACH "ESC_SVM_Detach"
+#define ESC_SVM_DETACHING "ESC_SVM_Detaching"
+#define ESC_SVM_ATTACH "ESC_SVM_Attach"
+#define ESC_SVM_ATTACHING "ESC_SVM_Attaching"
+
+/*
+ * EC_SVM_STATE subclass definitions - supporting attributes (name/value pairs)
+ * are found in sys/sysevent/svm.h
+ */
+#define ESC_SVM_INIT_START "ESC_SVM_Init_Start"
+#define ESC_SVM_INIT_FAILED "ESC_SVM_Init_Failed"
+#define ESC_SVM_INIT_FATAL "ESC_SVM_Init_Fatal"
+#define ESC_SVM_INIT_SUCCESS "ESC_SVM_Init_Success"
+#define ESC_SVM_IOERR "ESC_SVM_Ioerr"
+#define ESC_SVM_ERRED "ESC_SVM_Erred"
+#define ESC_SVM_LASTERRED "ESC_SVM_Lasterred"
+#define ESC_SVM_OK "ESC_SVM_Ok"
+#define ESC_SVM_ENABLE "ESC_SVM_Enable"
+#define ESC_SVM_RESYNC_START "ESC_SVM_Resync_Start"
+#define ESC_SVM_RESYNC_FAILED "ESC_SVM_Resync_Failed"
+#define ESC_SVM_RESYNC_SUCCESS "ESC_SVM_Resync_Success"
+#define ESC_SVM_RESYNC_DONE "ESC_SVM_Resync_Done"
+#define ESC_SVM_HOTSPARED "ESC_SVM_Hotspared"
+#define ESC_SVM_HS_FREED "ESC_SVM_HS_Freed"
+#define ESC_SVM_HS_CHANGED "ESC_SVM_HS_Changed"
+#define ESC_SVM_TAKEOVER "ESC_SVM_Takeover"
+#define ESC_SVM_RELEASE "ESC_SVM_Release"
+#define ESC_SVM_OPEN_FAIL "ESC_SVM_Open_Fail"
+#define ESC_SVM_OFFLINE "ESC_SVM_Offline"
+#define ESC_SVM_ONLINE "ESC_SVM_Online"
+#define ESC_SVM_CHANGE "ESC_SVM_Change"
+#define ESC_SVM_EXCHANGE "ESC_SVM_Exchange"
+#define ESC_SVM_REGEN_START "ESC_SVM_Regen_Start"
+#define ESC_SVM_REGEN_DONE "ESC_SVM_Regen_Done"
+#define ESC_SVM_REGEN_FAILED "ESC_SVM_Regen_Failed"
+
+/*
+ * EC_DR subclass definitions - supporting attributes (name/value pairs)
+ * are found in sys/sysevent/dr.h
+ */
+
+/* Attachment point state change */
+#define ESC_DR_AP_STATE_CHANGE "ESC_dr_ap_state_change"
+#define ESC_DR_REQ "ESC_dr_req" /* Request DR */
+#define ESC_DR_TARGET_STATE_CHANGE "ESC_dr_target_state_change"
+
+/*
+ * EC_ENV subclass definitions - supporting attributes (name/value pairs)
+ * are found in sys/sysevent/env.h
+ */
+#define ESC_ENV_TEMP "ESC_env_temp" /* Temperature change event subclass */
+#define ESC_ENV_FAN "ESC_env_fan" /* Fan status change event subclass */
+#define ESC_ENV_POWER "ESC_env_power" /* Power supply change event subclass */
+#define ESC_ENV_LED "ESC_env_led" /* LED change event subclass */
+
+/*
+ * EC_DOMAIN subclass definitions - supporting attributes (name/value pairs)
+ * are found in sys/sysevent/domain.h
+ */
+
+/* Domain state change */
+#define ESC_DOMAIN_STATE_CHANGE "ESC_domain_state_change"
+/* Domain loghost name change */
+#define ESC_DOMAIN_LOGHOST_CHANGE "ESC_domain_loghost_change"
+
+/*
+ * EC_AP_DRIVER subclass definitions - supporting attributes (name/value pairs)
+ * are found in sys/sysevent/ap_driver.h
+ */
+
+/* Alternate Pathing path switch */
+#define ESC_AP_DRIVER_PATHSWITCH "ESC_ap_driver_pathswitch"
+/* Alternate Pathing database commit */
+#define ESC_AP_DRIVER_COMMIT "ESC_ap_driver_commit"
+/* Alternate Pathing physical path status change */
+#define ESC_AP_DRIVER_PHYS_PATH_STATUS_CHANGE \
+ "ESC_ap_driver_phys_path_status_change"
+
+/*
+ * EC_IPMP subclass definitions - supporting attributes (name/value pairs)
+ * are found in sys/sysevent/ipmp.h
+ */
+
+/* IPMP group has changed state */
+#define ESC_IPMP_GROUP_STATE "ESC_ipmp_group_state"
+
+/* IPMP group has been created or removed */
+#define ESC_IPMP_GROUP_CHANGE "ESC_ipmp_group_change"
+
+/* IPMP group has had an interface added or removed */
+#define ESC_IPMP_GROUP_MEMBER_CHANGE "ESC_ipmp_group_member_change"
+
+/* Interface within an IPMP group has changed state or type */
+#define ESC_IPMP_IF_CHANGE "ESC_ipmp_if_change"
+
+
+/*
+ * EC_DEV_ADD and EC_DEV_REMOVE subclass definitions - supporting attributes
+ * (name/value pairs) are found in sys/sysevent/dev.h
+ */
+#define ESC_DISK "disk" /* disk device */
+#define ESC_NETWORK "network" /* network interface */
+#define ESC_PRINTER "printer" /* printer device */
+#define ESC_LOFI "lofi" /* lofi device */
+
+/*
+ * EC_DEV_BRANCH subclass definitions - supporting attributes (name/value pairs)
+ * are found in sys/sysevent/dev.h
+ */
+
+/* device tree branch added */
+#define ESC_DEV_BRANCH_ADD "ESC_dev_branch_add"
+
+/* device tree branch removed */
+#define ESC_DEV_BRANCH_REMOVE "ESC_dev_branch_remove"
+
+/* FMA Fault and Error event protocol subclass */
+#define ESC_FM_ERROR "ESC_FM_error"
+#define ESC_FM_ERROR_REPLAY "ESC_FM_error_replay"
+
+/* Service processor subclass definitions */
+#define ESC_PLATFORM_SP_RESET "ESC_platform_sp_reset"
+
+/*
+ * EC_ACPIEV subclass definitions
+ */
+#define EC_ACPIEV "EC_acpiev"
+#define ESC_ACPIEV_ADD "ESC_acpiev_add"
+#define ESC_ACPIEV_REMOVE "ESC_acpiev_remove"
+#define ESC_ACPIEV_WARN "ESC_acpiev_warn"
+#define ESC_ACPIEV_LOW "ESC_acpiev_low"
+#define ESC_ACPIEV_STATE_CHANGE "ESC_acpiev_state_change"
+
+/*
+ * ZFS subclass definitions. supporting attributes (name/value paris) are found
+ * in sys/fs/zfs.h
+ */
+#define ESC_ZFS_RESILVER_START "ESC_ZFS_resilver_start"
+#define ESC_ZFS_RESILVER_FINISH "ESC_ZFS_resilver_finish"
+#define ESC_ZFS_VDEV_REMOVE "ESC_ZFS_vdev_remove"
+#define ESC_ZFS_POOL_DESTROY "ESC_ZFS_pool_destroy"
+#define ESC_ZFS_VDEV_CLEAR "ESC_ZFS_vdev_clear"
+#define ESC_ZFS_VDEV_CHECK "ESC_ZFS_vdev_check"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SYSEVENT_EVENTDEFS_H */
diff --git a/lib/libspl/include/sys/sysmacros.h b/lib/libspl/include/sys/sysmacros.h
new file mode 100644
index 000000000..07ab8c934
--- /dev/null
+++ b/lib/libspl/include/sys/sysmacros.h
@@ -0,0 +1,98 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_SYSMACROS_H
+#define _LIBSPL_SYS_SYSMACROS_H
+
+#include_next <sys/sysmacros.h>
+
+/* common macros */
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a, b) ((a) < (b) ? (b) : (a))
+#endif
+#ifndef ABS
+#define ABS(a) ((a) < 0 ? -(a) : (a))
+#endif
+
+#define makedevice(maj,min) makedev(maj,min)
+#define _sysconf(a) sysconf(a)
+#define __NORETURN __attribute__ ((noreturn))
+
+/*
+ * Compatibility macros/typedefs needed for Solaris -> Linux port
+ */
+#define P2ALIGN(x, align) ((x) & -(align))
+#define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1)
+#define P2ROUNDUP(x, align) (-(-(x) & -(align)))
+#define P2ROUNDUP_TYPED(x, align, type) \
+ (-(-(type)(x) & -(type)(align)))
+#define P2BOUNDARY(off, len, align) \
+ (((off) ^ ((off) + (len) - 1)) > (align) - 1)
+#define P2PHASE(x, align) ((x) & ((align) - 1))
+#define P2NPHASE(x, align) (-(x) & ((align) - 1))
+#define P2NPHASE_TYPED(x, align, type) \
+ (-(type)(x) & ((type)(align) - 1))
+#define ISP2(x) (((x) & ((x) - 1)) == 0)
+#define IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
+
+/*
+ * Typed version of the P2* macros. These macros should be used to ensure
+ * that the result is correctly calculated based on the data type of (x),
+ * which is passed in as the last argument, regardless of the data
+ * type of the alignment. For example, if (x) is of type uint64_t,
+ * and we want to round it up to a page boundary using "PAGESIZE" as
+ * the alignment, we can do either
+ * P2ROUNDUP(x, (uint64_t)PAGESIZE)
+ * or
+ * P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t)
+ */
+#define P2ALIGN_TYPED(x, align, type) \
+ ((type)(x) & -(type)(align))
+#define P2PHASE_TYPED(x, align, type) \
+ ((type)(x) & ((type)(align) - 1))
+#define P2NPHASE_TYPED(x, align, type) \
+ (-(type)(x) & ((type)(align) - 1))
+#define P2ROUNDUP_TYPED(x, align, type) \
+ (-(-(type)(x) & -(type)(align)))
+#define P2END_TYPED(x, align, type) \
+ (-(~(type)(x) & -(type)(align)))
+#define P2PHASEUP_TYPED(x, align, phase, type) \
+ ((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align)))
+#define P2CROSS_TYPED(x, y, align, type) \
+ (((type)(x) ^ (type)(y)) > (type)(align) - 1)
+#define P2SAMEHIGHBIT_TYPED(x, y, type) \
+ (((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y)))
+
+
+/* avoid any possibility of clashing with <stddef.h> version */
+#if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof)
+#define offsetof(s, m) ((size_t)(&(((s *)0)->m)))
+#endif
+
+#endif /* _LIBSPL_SYS_SYSMACROS_H */
diff --git a/lib/libspl/include/sys/systeminfo.h b/lib/libspl/include/sys/systeminfo.h
new file mode 100644
index 000000000..9f561aaa8
--- /dev/null
+++ b/lib/libspl/include/sys/systeminfo.h
@@ -0,0 +1,37 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_SYSTEMINFO_H
+#define _LIBSPL_SYS_SYSTEMINFO_H
+
+#define HW_INVALID_HOSTID 0xFFFFFFFF /* an invalid hostid */
+#define HW_HOSTID_LEN 11 /* minimum buffer size needed */
+ /* to hold a decimal or hex */
+ /* hostid string */
+
+#define sysinfo(cmd,buf,cnt) (-1)
+
+#endif
diff --git a/lib/libspl/include/sys/time.h b/lib/libspl/include/sys/time.h
new file mode 100644
index 000000000..bb5af8c8b
--- /dev/null
+++ b/lib/libspl/include/sys/time.h
@@ -0,0 +1,39 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_TIME_H
+#define _LIBSPL_SYS_TIME_H
+
+#include_next <sys/time.h>
+#include <sys/types.h>
+
+#ifndef NANOSEC
+#define NANOSEC 1000000000
+#endif
+
+extern hrtime_t gethrtime(void);
+
+#endif
diff --git a/lib/libspl/include/sys/types.h b/lib/libspl/include/sys/types.h
new file mode 100644
index 000000000..5fb49118e
--- /dev/null
+++ b/lib/libspl/include/sys/types.h
@@ -0,0 +1,98 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_TYPES_H
+#define _LIBSPL_SYS_TYPES_H
+
+#include <sys/isa_defs.h>
+#include <sys/feature_tests.h>
+#include_next <sys/types.h>
+#include <sys/param.h> /* for NBBY */
+#include <sys/types32.h>
+#include <sys/va_list.h>
+
+#ifndef HAVE_INTTYPES
+#include <inttypes.h>
+
+typedef enum boolean { B_FALSE, B_TRUE } boolean_t;
+
+typedef unsigned char uchar_t;
+typedef unsigned short ushort_t;
+typedef unsigned int uint_t;
+typedef unsigned long ulong_t;
+
+typedef long long longlong_t;
+typedef unsigned long long u_longlong_t;
+#endif /* HAVE_INTTYPES */
+
+typedef longlong_t offset_t;
+typedef u_longlong_t u_offset_t;
+typedef u_longlong_t len_t;
+typedef longlong_t diskaddr_t;
+
+typedef ulong_t pfn_t; /* page frame number */
+typedef ulong_t pgcnt_t; /* number of pages */
+typedef long spgcnt_t; /* signed number of pages */
+
+typedef longlong_t hrtime_t;
+typedef struct timespec timestruc_t;
+
+typedef short pri_t;
+
+typedef int zoneid_t;
+typedef int projid_t;
+
+typedef int major_t;
+typedef int minor_t;
+
+typedef ushort_t o_mode_t; /* old file attribute type */
+
+/*
+ * Definitions remaining from previous partial support for 64-bit file
+ * offsets. This partial support for devices greater than 2gb requires
+ * compiler support for long long.
+ */
+#ifdef _LONG_LONG_LTOH
+typedef union {
+ offset_t _f; /* Full 64 bit offset value */
+ struct {
+ int32_t _l; /* lower 32 bits of offset value */
+ int32_t _u; /* upper 32 bits of offset value */
+ } _p;
+} lloff_t;
+#endif
+
+#ifdef _LONG_LONG_HTOL
+typedef union {
+ offset_t _f; /* Full 64 bit offset value */
+ struct {
+ int32_t _u; /* upper 32 bits of offset value */
+ int32_t _l; /* lower 32 bits of offset value */
+ } _p;
+} lloff_t;
+#endif
+
+#endif
diff --git a/lib/libspl/include/sys/types32.h b/lib/libspl/include/sys/types32.h
new file mode 100644
index 000000000..9ab3b0782
--- /dev/null
+++ b/lib/libspl/include/sys/types32.h
@@ -0,0 +1,91 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_TYPES32_H
+#define _SYS_TYPES32_H
+
+
+
+#include <sys/inttypes.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Interoperability types for programs. Used for:
+ *
+ * Crossing between 32-bit and 64-bit domains.
+ *
+ * On disk data formats such as filesystem meta data
+ * and disk label.
+ *
+ * Note: Applications should never include this
+ * header file.
+ */
+typedef uint32_t caddr32_t;
+typedef int32_t daddr32_t;
+typedef int32_t off32_t;
+typedef uint32_t ino32_t;
+typedef int32_t blkcnt32_t;
+typedef uint32_t fsblkcnt32_t;
+typedef uint32_t fsfilcnt32_t;
+typedef int32_t id32_t;
+typedef uint32_t major32_t;
+typedef uint32_t minor32_t;
+typedef int32_t key32_t;
+typedef uint32_t mode32_t;
+typedef uint32_t uid32_t;
+typedef uint32_t gid32_t;
+typedef uint32_t nlink32_t;
+typedef uint32_t dev32_t;
+typedef int32_t pid32_t;
+typedef uint32_t size32_t;
+typedef int32_t ssize32_t;
+typedef int32_t time32_t;
+typedef int32_t clock32_t;
+
+struct timeval32 {
+ time32_t tv_sec; /* seconds */
+ int32_t tv_usec; /* and microseconds */
+};
+
+typedef struct timespec32 {
+ time32_t tv_sec; /* seconds */
+ int32_t tv_nsec; /* and nanoseconds */
+} timespec32_t;
+
+typedef struct timespec32 timestruc32_t;
+
+typedef struct itimerspec32 {
+ struct timespec32 it_interval;
+ struct timespec32 it_value;
+} itimerspec32_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_TYPES32_H */
diff --git a/lib/libspl/include/sys/tzfile.h b/lib/libspl/include/sys/tzfile.h
new file mode 100644
index 000000000..e30e75663
--- /dev/null
+++ b/lib/libspl/include/sys/tzfile.h
@@ -0,0 +1,164 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * from Arthur Olson's 6.1
+ */
+
+#ifndef _LIBSPL_SYS_TZFILE_H
+#define _LIBSPL_SYS_TZFILE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Information about time zone files.
+ */
+
+#define TZDIR "/usr/share/lib/zoneinfo" /* Time zone object file directory */
+
+#define TZDEFAULT (getenv("TZ"))
+
+#define TZDEFRULES "posixrules"
+
+/*
+ * Each file begins with. . .
+ */
+
+struct tzhead {
+ char tzh_reserved[24]; /* reserved for future use */
+ char tzh_ttisstdcnt[4]; /* coded number of trans. time flags */
+ char tzh_leapcnt[4]; /* coded number of leap seconds */
+ char tzh_timecnt[4]; /* coded number of transition times */
+ char tzh_typecnt[4]; /* coded number of local time types */
+ char tzh_charcnt[4]; /* coded number of abbr. chars */
+};
+
+/*
+ * . . .followed by. . .
+ *
+ * tzh_timecnt (char [4])s coded transition times a la time(2)
+ * tzh_timecnt (unsigned char)s types of local time starting at above
+ * tzh_typecnt repetitions of
+ * one (char [4]) coded GMT offset in seconds
+ * one (unsigned char) used to set tm_isdst
+ * one (unsigned char) that's an abbreviation list index
+ * tzh_charcnt (char)s '\0'-terminated zone abbreviations
+ * tzh_leapcnt repetitions of
+ * one (char [4]) coded leap second transition times
+ * one (char [4]) total correction after above
+ * tzh_ttisstdcnt (char)s indexed by type; if TRUE, transition
+ * time is standard time, if FALSE,
+ * transition time is wall clock time
+ * if absent, transition times are
+ * assumed to be wall clock time
+ */
+
+/*
+ * In the current implementation, "tzset()" refuses to deal with files that
+ * exceed any of the limits below.
+ */
+
+/*
+ * The TZ_MAX_TIMES value below is enough to handle a bit more than a
+ * year's worth of solar time (corrected daily to the nearest second) or
+ * 138 years of Pacific Presidential Election time
+ * (where there are three time zone transitions every fourth year).
+ */
+#define TZ_MAX_TIMES 370
+
+#define TZ_MAX_TYPES 256 /* Limited by what (unsigned char)'s can hold */
+
+#define TZ_MAX_CHARS 50 /* Maximum number of abbreviation characters */
+
+#define TZ_MAX_LEAPS 50 /* Maximum number of leap second corrections */
+
+#define SECSPERMIN 60
+#define MINSPERHOUR 60
+#define HOURSPERDAY 24
+#define DAYSPERWEEK 7
+#define DAYSPERNYEAR 365
+#define DAYSPERLYEAR 366
+#define SECSPERHOUR (SECSPERMIN * MINSPERHOUR)
+#define SECSPERDAY ((long)SECSPERHOUR * HOURSPERDAY)
+#define MONSPERYEAR 12
+
+#define TM_SUNDAY 0
+#define TM_MONDAY 1
+#define TM_TUESDAY 2
+#define TM_WEDNESDAY 3
+#define TM_THURSDAY 4
+#define TM_FRIDAY 5
+#define TM_SATURDAY 6
+
+#define TM_JANUARY 0
+#define TM_FEBRUARY 1
+#define TM_MARCH 2
+#define TM_APRIL 3
+#define TM_MAY 4
+#define TM_JUNE 5
+#define TM_JULY 6
+#define TM_AUGUST 7
+#define TM_SEPTEMBER 8
+#define TM_OCTOBER 9
+#define TM_NOVEMBER 10
+#define TM_DECEMBER 11
+
+#define TM_YEAR_BASE 1900
+
+#define EPOCH_YEAR 1970
+#define EPOCH_WDAY TM_THURSDAY
+
+/*
+ * Accurate only for the past couple of centuries;
+ * that will probably do.
+ */
+
+#define isleap(y) (((y) % 4) == 0 && ((y) % 100) != 0 || ((y) % 400) == 0)
+
+/*
+ * Use of the underscored variants may cause problems if you move your code to
+ * certain System-V-based systems; for maximum portability, use the
+ * underscore-free variants. The underscored variants are provided for
+ * backward compatibility only; they may disappear from future versions of
+ * this file.
+ */
+
+#define SECS_PER_MIN SECSPERMIN
+#define MINS_PER_HOUR MINSPERHOUR
+#define HOURS_PER_DAY HOURSPERDAY
+#define DAYS_PER_WEEK DAYSPERWEEK
+#define DAYS_PER_NYEAR DAYSPERNYEAR
+#define DAYS_PER_LYEAR DAYSPERLYEAR
+#define SECS_PER_HOUR SECSPERHOUR
+#define SECS_PER_DAY SECSPERDAY
+#define MONS_PER_YEAR MONSPERYEAR
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBSPL_SYS_TZFILE_H */
diff --git a/lib/libspl/include/sys/uio.h b/lib/libspl/include/sys/uio.h
new file mode 100644
index 000000000..0aed91357
--- /dev/null
+++ b/lib/libspl/include/sys/uio.h
@@ -0,0 +1,50 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+#ifndef _LIBSPL_SYS_UIO_H
+#define _LIBSPL_SYS_UIO_H
+
+/* struct iovec is defined in glibc's sys/uio.h */
+#include_next <sys/uio.h>
+
+typedef enum uio_rw { UIO_READ, UIO_WRITE } uio_rw_t;
+
+#define UIO_SYSSPACE 1
+
+#endif /* _SYS_UIO_H */
diff --git a/lib/libspl/include/sys/utsname.h b/lib/libspl/include/sys/utsname.h
new file mode 100644
index 000000000..fd323b96d
--- /dev/null
+++ b/lib/libspl/include/sys/utsname.h
@@ -0,0 +1,34 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENLIBSPLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENLIBSPLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_UTSNAME_H
+#define _LIBSPL_UTSNAME_H
+
+#include_next <sys/utsname.h>
+
+struct utsname utsname;
+
+#endif /* _LIBSPL_UTSNAME_H */
diff --git a/lib/libspl/include/sys/va_list.h b/lib/libspl/include/sys/va_list.h
new file mode 100644
index 000000000..cf6045451
--- /dev/null
+++ b/lib/libspl/include/sys/va_list.h
@@ -0,0 +1,36 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_VA_LIST_H
+#define _SYS_VA_LIST_H
+
+#include <stdarg.h>
+
+#ifndef __va_list
+typedef __gnuc_va_list __va_list;
+#endif
+
+#endif
diff --git a/lib/libspl/include/sys/varargs.h b/lib/libspl/include/sys/varargs.h
new file mode 100644
index 000000000..b8a63d8d7
--- /dev/null
+++ b/lib/libspl/include/sys/varargs.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_VARARGS_H
+#define _LIBSPL_SYS_VARARGS_H
+
+#endif
diff --git a/lib/libspl/include/sys/vtoc.h b/lib/libspl/include/sys/vtoc.h
new file mode 100644
index 000000000..004b49097
--- /dev/null
+++ b/lib/libspl/include/sys/vtoc.h
@@ -0,0 +1,350 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+
+#ifndef _SYS_VTOC_H
+#define _SYS_VTOC_H
+
+#include <sys/dklabel.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Note: the VTOC is not implemented fully, nor in the manner
+ * that AT&T implements it. AT&T puts the vtoc structure
+ * into a sector, usually the second sector (pdsector is first).
+ *
+ * Sun incorporates the tag, flag, version, and volume vtoc fields into
+ * its Disk Label, which already has some vtoc-equivalent fields.
+ * Upon reading the vtoc with read_vtoc(), the following exceptions
+ * occur:
+ * v_bootinfo [all] returned as zero
+ * v_sanity returned as VTOC_SANE
+ * if Disk Label was sane
+ * v_sectorsz returned as 512
+ * v_reserved [all] retunred as zero
+ * timestamp [all] returned as zero
+ *
+ * See dklabel.h, read_vtoc(), and write_vtoc().
+ */
+
+#define V_NUMPAR NDKMAP /* The number of partitions */
+ /* (from dkio.h) */
+
+#define VTOC_SANE 0x600DDEEE /* Indicates a sane VTOC */
+#define V_VERSION 0x01 /* layout version number */
+#define V_EXTVERSION V_VERSION /* extvtoc layout version number */
+
+/*
+ * Partition identification tags
+ */
+#define V_UNASSIGNED 0x00 /* unassigned partition */
+#define V_BOOT 0x01 /* Boot partition */
+#define V_ROOT 0x02 /* Root filesystem */
+#define V_SWAP 0x03 /* Swap filesystem */
+#define V_USR 0x04 /* Usr filesystem */
+#define V_BACKUP 0x05 /* full disk */
+#define V_STAND 0x06 /* Stand partition */
+#define V_VAR 0x07 /* Var partition */
+#define V_HOME 0x08 /* Home partition */
+#define V_ALTSCTR 0x09 /* Alternate sector partition */
+#define V_CACHE 0x0a /* Cache (cachefs) partition */
+#define V_RESERVED 0x0b /* SMI reserved data */
+
+/*
+ * Partition permission flags
+ */
+#define V_UNMNT 0x01 /* Unmountable partition */
+#define V_RONLY 0x10 /* Read only */
+
+/*
+ * error codes for reading & writing vtoc
+ */
+#define VT_ERROR (-2) /* errno supplies specific error */
+#define VT_EIO (-3) /* I/O error accessing vtoc */
+#define VT_EINVAL (-4) /* illegal value in vtoc or request */
+#define VT_ENOTSUP (-5) /* VTOC op. not supported */
+#define VT_ENOSPC (-6) /* requested space not found */
+#define VT_EOVERFLOW (-7) /* VTOC op. data struct limited */
+
+struct partition {
+ ushort_t p_tag; /* ID tag of partition */
+ ushort_t p_flag; /* permission flags */
+ daddr_t p_start; /* start sector no of partition */
+ long p_size; /* # of blocks in partition */
+};
+
+struct vtoc {
+ unsigned long v_bootinfo[3]; /* info needed by mboot (unsupported) */
+ unsigned long v_sanity; /* to verify vtoc sanity */
+ unsigned long v_version; /* layout version */
+ char v_volume[LEN_DKL_VVOL]; /* volume name */
+ ushort_t v_sectorsz; /* sector size in bytes */
+ ushort_t v_nparts; /* number of partitions */
+ unsigned long v_reserved[10]; /* free space */
+ struct partition v_part[V_NUMPAR]; /* partition headers */
+ time_t timestamp[V_NUMPAR]; /* partition timestamp (unsupported) */
+ char v_asciilabel[LEN_DKL_ASCII]; /* for compatibility */
+};
+
+struct extpartition {
+ ushort_t p_tag; /* ID tag of partition */
+ ushort_t p_flag; /* permission flags */
+ ushort_t p_pad[2];
+ diskaddr_t p_start; /* start sector no of partition */
+ diskaddr_t p_size; /* # of blocks in partition */
+};
+
+
+struct extvtoc {
+ uint64_t v_bootinfo[3]; /* info needed by mboot (unsupported) */
+ uint64_t v_sanity; /* to verify vtoc sanity */
+ uint64_t v_version; /* layout version */
+ char v_volume[LEN_DKL_VVOL]; /* volume name */
+ ushort_t v_sectorsz; /* sector size in bytes */
+ ushort_t v_nparts; /* number of partitions */
+ ushort_t pad[2];
+ uint64_t v_reserved[10];
+ struct extpartition v_part[V_NUMPAR]; /* partition headers */
+ uint64_t timestamp[V_NUMPAR]; /* partition timestamp (unsupported) */
+ char v_asciilabel[LEN_DKL_ASCII]; /* for compatibility */
+};
+
+#ifdef _KERNEL
+#define extvtoctovtoc(extv, v) \
+ { \
+ int i; \
+ v.v_bootinfo[0] = (unsigned long)extv.v_bootinfo[0]; \
+ v.v_bootinfo[1] = (unsigned long)extv.v_bootinfo[1]; \
+ v.v_bootinfo[2] = (unsigned long)extv.v_bootinfo[2]; \
+ v.v_sanity = (unsigned long)extv.v_sanity; \
+ v.v_version = (unsigned long)extv.v_version; \
+ bcopy(extv.v_volume, v.v_volume, LEN_DKL_VVOL); \
+ v.v_sectorsz = extv.v_sectorsz; \
+ v.v_nparts = extv.v_nparts; \
+ for (i = 0; i < 10; i++) \
+ v.v_reserved[i] = (unsigned long)extv.v_reserved[i]; \
+ for (i = 0; i < V_NUMPAR; i++) { \
+ v.v_part[i].p_tag = extv.v_part[i].p_tag; \
+ v.v_part[i].p_flag = extv.v_part[i].p_flag; \
+ v.v_part[i].p_start = (daddr_t)extv.v_part[i].p_start; \
+ v.v_part[i].p_size = (long)extv.v_part[i].p_size; \
+ v.timestamp[i] = (time_t)extv.timestamp[i]; \
+ } \
+ bcopy(extv.v_asciilabel, v.v_asciilabel, LEN_DKL_ASCII); \
+ }
+
+#define vtoctoextvtoc(v, extv) \
+ { \
+ int i; \
+ extv.v_bootinfo[0] = (uint64_t)v.v_bootinfo[0]; \
+ extv.v_bootinfo[1] = (uint64_t)v.v_bootinfo[1]; \
+ extv.v_bootinfo[2] = (uint64_t)v.v_bootinfo[2]; \
+ extv.v_sanity = (uint64_t)v.v_sanity; \
+ extv.v_version = (uint64_t)v.v_version; \
+ bcopy(v.v_volume, extv.v_volume, LEN_DKL_VVOL); \
+ extv.v_sectorsz = v.v_sectorsz; \
+ extv.v_nparts = v.v_nparts; \
+ for (i = 0; i < 10; i++) \
+ extv.v_reserved[i] = (uint64_t)v.v_reserved[i]; \
+ for (i = 0; i < V_NUMPAR; i++) { \
+ extv.v_part[i].p_tag = v.v_part[i].p_tag; \
+ extv.v_part[i].p_flag = v.v_part[i].p_flag; \
+ extv.v_part[i].p_start = \
+ (diskaddr_t)(unsigned long)v.v_part[i].p_start; \
+ extv.v_part[i].p_size = \
+ (diskaddr_t)(unsigned long)v.v_part[i].p_size; \
+ extv.timestamp[i] = (uint64_t)v.timestamp[i]; \
+ } \
+ bcopy(v.v_asciilabel, extv.v_asciilabel, LEN_DKL_ASCII); \
+ }
+#endif /* _KERNEL */
+
+#if defined(_SYSCALL32)
+struct partition32 {
+ uint16_t p_tag; /* ID tag of partition */
+ uint16_t p_flag; /* permission flags */
+ daddr32_t p_start; /* start sector no of partition */
+ int32_t p_size; /* # of blocks in partition */
+};
+
+struct vtoc32 {
+ uint32_t v_bootinfo[3]; /* info needed by mboot (unsupported) */
+ uint32_t v_sanity; /* to verify vtoc sanity */
+ uint32_t v_version; /* layout version */
+ char v_volume[LEN_DKL_VVOL]; /* volume name */
+ uint16_t v_sectorsz; /* sector size in bytes */
+ uint16_t v_nparts; /* number of partitions */
+ uint32_t v_reserved[10]; /* free space */
+ struct partition32 v_part[V_NUMPAR]; /* partition headers */
+ time32_t timestamp[V_NUMPAR]; /* partition timestamp (unsupported) */
+ char v_asciilabel[LEN_DKL_ASCII]; /* for compatibility */
+};
+
+#define vtoc32tovtoc(v32, v) \
+ { \
+ int i; \
+ v.v_bootinfo[0] = v32.v_bootinfo[0]; \
+ v.v_bootinfo[1] = v32.v_bootinfo[1]; \
+ v.v_bootinfo[2] = v32.v_bootinfo[2]; \
+ v.v_sanity = v32.v_sanity; \
+ v.v_version = v32.v_version; \
+ bcopy(v32.v_volume, v.v_volume, LEN_DKL_VVOL); \
+ v.v_sectorsz = v32.v_sectorsz; \
+ v.v_nparts = v32.v_nparts; \
+ v.v_version = v32.v_version; \
+ for (i = 0; i < 10; i++) \
+ v.v_reserved[i] = v32.v_reserved[i]; \
+ for (i = 0; i < V_NUMPAR; i++) { \
+ v.v_part[i].p_tag = (ushort_t)v32.v_part[i].p_tag; \
+ v.v_part[i].p_flag = (ushort_t)v32.v_part[i].p_flag; \
+ v.v_part[i].p_start = (unsigned)v32.v_part[i].p_start; \
+ v.v_part[i].p_size = (unsigned)v32.v_part[i].p_size; \
+ } \
+ for (i = 0; i < V_NUMPAR; i++) \
+ v.timestamp[i] = (time_t)v32.timestamp[i]; \
+ bcopy(v32.v_asciilabel, v.v_asciilabel, LEN_DKL_ASCII); \
+ }
+
+#define vtoc32toextvtoc(v32, extv) \
+ { \
+ int i; \
+ extv.v_bootinfo[0] = v32.v_bootinfo[0]; \
+ extv.v_bootinfo[1] = v32.v_bootinfo[1]; \
+ extv.v_bootinfo[2] = v32.v_bootinfo[2]; \
+ extv.v_sanity = v32.v_sanity; \
+ extv.v_version = v32.v_version; \
+ bcopy(v32.v_volume, extv.v_volume, LEN_DKL_VVOL); \
+ extv.v_sectorsz = v32.v_sectorsz; \
+ extv.v_nparts = v32.v_nparts; \
+ extv.v_version = v32.v_version; \
+ for (i = 0; i < 10; i++) \
+ extv.v_reserved[i] = v32.v_reserved[i]; \
+ for (i = 0; i < V_NUMPAR; i++) { \
+ extv.v_part[i].p_tag = (ushort_t)v32.v_part[i].p_tag; \
+ extv.v_part[i].p_flag = (ushort_t)v32.v_part[i].p_flag; \
+ extv.v_part[i].p_start = (diskaddr_t)v32.v_part[i].p_start; \
+ extv.v_part[i].p_size = (diskaddr_t)v32.v_part[i].p_size; \
+ extv.timestamp[i] = (time_t)v32.timestamp[i]; \
+ } \
+ bcopy(v32.v_asciilabel, extv.v_asciilabel, LEN_DKL_ASCII); \
+ }
+
+
+#define vtoctovtoc32(v, v32) \
+ { \
+ int i; \
+ v32.v_bootinfo[0] = v.v_bootinfo[0]; \
+ v32.v_bootinfo[1] = v.v_bootinfo[1]; \
+ v32.v_bootinfo[2] = v.v_bootinfo[2]; \
+ v32.v_sanity = v.v_sanity; \
+ v32.v_version = v.v_version; \
+ bcopy(v.v_volume, v32.v_volume, LEN_DKL_VVOL); \
+ v32.v_sectorsz = v.v_sectorsz; \
+ v32.v_nparts = v.v_nparts; \
+ v32.v_version = v.v_version; \
+ for (i = 0; i < 10; i++) \
+ v32.v_reserved[i] = v.v_reserved[i]; \
+ for (i = 0; i < V_NUMPAR; i++) { \
+ v32.v_part[i].p_tag = (ushort_t)v.v_part[i].p_tag; \
+ v32.v_part[i].p_flag = (ushort_t)v.v_part[i].p_flag; \
+ v32.v_part[i].p_start = (unsigned)v.v_part[i].p_start; \
+ v32.v_part[i].p_size = (unsigned)v.v_part[i].p_size; \
+ } \
+ for (i = 0; i < V_NUMPAR; i++) { \
+ if (v.timestamp[i] > TIME32_MAX) \
+ v32.timestamp[i] = TIME32_MAX; \
+ else \
+ v32.timestamp[i] = (time32_t)v.timestamp[i]; \
+ } \
+ bcopy(v.v_asciilabel, v32.v_asciilabel, LEN_DKL_ASCII); \
+ }
+
+#define extvtoctovtoc32(extv, v32) \
+ { \
+ int i; \
+ v32.v_bootinfo[0] = extv.v_bootinfo[0]; \
+ v32.v_bootinfo[1] = extv.v_bootinfo[1]; \
+ v32.v_bootinfo[2] = extv.v_bootinfo[2]; \
+ v32.v_sanity = extv.v_sanity; \
+ v32.v_version = extv.v_version; \
+ bcopy(extv.v_volume, v32.v_volume, LEN_DKL_VVOL); \
+ v32.v_sectorsz = extv.v_sectorsz; \
+ v32.v_nparts = extv.v_nparts; \
+ v32.v_version = extv.v_version; \
+ for (i = 0; i < 10; i++) \
+ v32.v_reserved[i] = extv.v_reserved[i]; \
+ for (i = 0; i < V_NUMPAR; i++) { \
+ v32.v_part[i].p_tag = (ushort_t)extv.v_part[i].p_tag; \
+ v32.v_part[i].p_flag = (ushort_t)extv.v_part[i].p_flag; \
+ v32.v_part[i].p_start = (unsigned)extv.v_part[i].p_start; \
+ v32.v_part[i].p_size = (unsigned)extv.v_part[i].p_size; \
+ } \
+ for (i = 0; i < V_NUMPAR; i++) { \
+ if (extv.timestamp[i] > TIME32_MAX) \
+ v32.timestamp[i] = TIME32_MAX; \
+ else \
+ v32.timestamp[i] = (time32_t)extv.timestamp[i]; \
+ } \
+ bcopy(extv.v_asciilabel, v32.v_asciilabel, LEN_DKL_ASCII); \
+ }
+
+
+#endif /* _SYSCALL32 */
+
+/*
+ * These defines are the mode parameter for the checksum routines.
+ */
+#define CK_CHECKSUM 0 /* check checksum */
+#define CK_MAKESUM 1 /* generate checksum */
+
+#if defined(__STDC__)
+
+extern int read_vtoc(int, struct vtoc *);
+extern int write_vtoc(int, struct vtoc *);
+extern int read_extvtoc(int, struct extvtoc *);
+extern int write_extvtoc(int, struct extvtoc *);
+
+#else
+
+extern int read_vtoc();
+extern int write_vtoc();
+extern int read_extvtoc();
+extern int write_extvtoc();
+
+#endif /* __STDC__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VTOC_H */
diff --git a/lib/libspl/include/sys/zone.h b/lib/libspl/include/sys/zone.h
new file mode 100644
index 000000000..ea7c8bde3
--- /dev/null
+++ b/lib/libspl/include/sys/zone.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_ZONE_H
+#define _LIBSPL_SYS_ZONE_H
+
+#endif
diff --git a/lib/libspl/include/thread.h b/lib/libspl/include/thread.h
new file mode 100644
index 000000000..a72f6d2b1
--- /dev/null
+++ b/lib/libspl/include/thread.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_THREAD_H
+#define _LIBSPL_THREAD_H
+
+#endif /* _LIBSPL_THREAD_H */
diff --git a/lib/libspl/include/tsol/label.h b/lib/libspl/include/tsol/label.h
new file mode 100644
index 000000000..bfae8a126
--- /dev/null
+++ b/lib/libspl/include/tsol/label.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SOL_TSOL_LABEL_H
+#define _SOL_TSOL_LABEL_H
+
+#endif
diff --git a/lib/libspl/include/tzfile.h b/lib/libspl/include/tzfile.h
new file mode 100644
index 000000000..441b8cf8b
--- /dev/null
+++ b/lib/libspl/include/tzfile.h
@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_TZFILE_H
+#define _LIBSPL_TZFILE_H
+
+#include <sys/tzfile.h>
+
+#endif /* _LIBSPL_TZFILE_H */
diff --git a/lib/libspl/include/ucred.h b/lib/libspl/include/ucred.h
new file mode 100644
index 000000000..4ca424ed3
--- /dev/null
+++ b/lib/libspl/include/ucred.h
@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_UCRED_H
+#define _LIBSPL_UCRED_H
+
+typedef int ucred_t;
+
+#endif
diff --git a/lib/libspl/include/umem.h b/lib/libspl/include/umem.h
new file mode 100644
index 000000000..0ed55ae5a
--- /dev/null
+++ b/lib/libspl/include/umem.h
@@ -0,0 +1,169 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_UMEM_H
+#define _LIBSPL_UMEM_H
+
+/* XXX: We should use the real portable umem library if it is detected
+ * at configure time. However, if the library is not available we can
+ * use a trivial malloc based implementation. This obviously impacts
+ * performance but unless you using a full userspace build of zpool for
+ * something other than ztest your likely not going to notice or care.
+ *
+ * https://labs.omniti.com/trac/portableumem
+ */
+
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void vmem_t;
+
+/*
+ * Flags for umem_alloc/umem_free
+ */
+#define UMEM_DEFAULT 0x0000 /* normal -- may fail */
+#define UMEM_NOFAIL 0x0100 /* Never fails */
+
+/*
+ * Flags for umem_cache_create()
+ */
+#define UMC_NOTOUCH 0x00010000
+#define UMC_NODEBUG 0x00020000
+#define UMC_NOMAGAZINE 0x00040000
+#define UMC_NOHASH 0x00080000
+
+#define UMEM_CACHE_NAMELEN 31
+
+typedef int umem_nofail_callback_t(void);
+typedef int umem_constructor_t(void *, void *, int);
+typedef void umem_destructor_t(void *, void *);
+typedef void umem_reclaim_t(void *);
+
+typedef struct umem_cache {
+ char cache_name[UMEM_CACHE_NAMELEN + 1];
+ size_t cache_bufsize;
+ size_t cache_align;
+ umem_constructor_t *cache_constructor;
+ umem_destructor_t *cache_destructor;
+ umem_reclaim_t *cache_reclaim;
+ void *cache_private;
+ void *cache_arena;
+ int cache_cflags;
+} umem_cache_t;
+
+static inline void *
+umem_alloc(size_t size, int flags)
+{
+ void *ptr;
+
+ ptr = malloc(size);
+ while (ptr == NULL && (flags & UMEM_NOFAIL))
+ ptr = malloc(size);
+
+ return ptr;
+}
+
+static inline void *
+umem_zalloc(size_t size, int flags)
+{
+ void *ptr;
+
+ ptr = umem_alloc(size, flags);
+ if (ptr)
+ memset(ptr, 0, size);
+
+ return ptr;
+}
+
+static inline void
+umem_free(void *ptr, size_t size)
+{
+ free(ptr);
+}
+
+static inline void
+umem_nofail_callback(umem_nofail_callback_t *cb) {}
+
+static inline umem_cache_t *
+umem_cache_create(char *name, size_t bufsize, size_t align,
+ umem_constructor_t *constructor,
+ umem_destructor_t *destructor,
+ umem_reclaim_t *reclaim,
+ void *priv, void *vmp, int cflags)
+{
+ umem_cache_t *cp;
+
+ cp = umem_alloc(sizeof(umem_cache_t), UMEM_DEFAULT);
+ if (cp) {
+ strncpy(cp->cache_name, name, UMEM_CACHE_NAMELEN);
+ cp->cache_bufsize = bufsize;
+ cp->cache_align = align;
+ cp->cache_constructor = constructor;
+ cp->cache_destructor = destructor;
+ cp->cache_reclaim = reclaim;
+ cp->cache_private = priv;
+ cp->cache_arena = vmp;
+ cp->cache_cflags = cflags;
+ }
+
+ return cp;
+}
+
+static inline void
+umem_cache_destroy(umem_cache_t *cp)
+{
+ umem_free(cp, sizeof(umem_cache_t));
+}
+
+static inline void *
+umem_cache_alloc(umem_cache_t *cp, int flags)
+{
+ void *ptr;
+
+ ptr = umem_alloc(cp->cache_bufsize, flags);
+ if (ptr && cp->cache_constructor)
+ cp->cache_constructor(ptr, cp->cache_private, UMEM_DEFAULT);
+
+ return ptr;
+}
+
+static inline void
+umem_cache_free(umem_cache_t *cp, void *ptr)
+{
+ if (cp->cache_destructor)
+ cp->cache_destructor(ptr, cp->cache_private);
+
+ umem_free(ptr, cp->cache_bufsize);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/libspl/include/unistd.h b/lib/libspl/include/unistd.h
new file mode 100644
index 000000000..493efa81b
--- /dev/null
+++ b/lib/libspl/include/unistd.h
@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include_next <unistd.h>
+
+#ifndef _LIBSPL_UNISTD_H
+#define _LIBSPL_UNISTD_H
+
+#include <zfs_config.h>
+
+#if !defined(HAVE_IOCTL_IN_UNISTD_H)
+# if defined(HAVE_IOCTL_IN_SYS_IOCTL_H)
+# include <sys/ioctl.h>
+# elif defined(HAVE_IOCTL_IN_STROPTS_H)
+# include <stropts.h>
+# else
+# error "System call ioctl() unavailable"
+# endif
+#endif
+
+#if !defined(HAVE_ISSETUGID)
+# include <sys/types.h>
+# define issetugid() (geteuid() == 0 || getegid() == 0)
+#endif
+
+#if !defined(__sun__) && !defined(__sun)
+/* It seems Solaris only returns positive host ids */
+static inline long fake_gethostid(void)
+{
+ long id = gethostid();
+ return id >= 0 ? id : -id;
+}
+#define gethostid() fake_gethostid()
+#endif
+
+#endif /* _LIBSPL_UNISTD_H */
diff --git a/lib/libspl/include/zone.h b/lib/libspl/include/zone.h
new file mode 100644
index 000000000..366f95b14
--- /dev/null
+++ b/lib/libspl/include/zone.h
@@ -0,0 +1,86 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_ZONE_H
+#define _LIBSPL_ZONE_H
+
+
+
+#include <sys/types.h>
+#include <sys/zone.h>
+#include <sys/priv.h>
+#include <tsol/label.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define GLOBAL_ZONEID 0
+#define GLOBAL_ZONEID_NAME "global"
+
+/*
+ * Functions for mapping between id and name for active zones.
+ */
+extern zoneid_t getzoneid(void);
+extern zoneid_t getzoneidbyname(const char *);
+extern ssize_t getzonenamebyid(zoneid_t, char *, size_t);
+
+#if 0
+
+/*
+ * NOTE
+ *
+ * The remaining contents of this file are private to the implementation
+ * of Solaris and are subject to change at any time without notice,
+ * Applications using these interfaces may fail to run on future releases.
+ */
+
+extern int zonept(int, zoneid_t);
+extern int zone_get_id(const char *, zoneid_t *);
+
+/* System call API */
+extern zoneid_t zone_create(const char *, const char *,
+ const struct priv_set *, const char *, size_t, const char *, size_t, int *,
+ int, int, const bslabel_t *, int);
+extern int zone_boot(zoneid_t);
+extern int zone_destroy(zoneid_t);
+extern ssize_t zone_getattr(zoneid_t, int, void *, size_t);
+extern int zone_setattr(zoneid_t, int, void *, size_t);
+extern int zone_enter(zoneid_t);
+extern int zone_list(zoneid_t *, uint_t *);
+extern int zone_shutdown(zoneid_t);
+extern int zone_version(int *);
+extern int zone_add_datalink(zoneid_t, char *);
+extern int zone_remove_datalink(zoneid_t, char *);
+extern int zone_check_datalink(zoneid_t *, char *);
+extern int zone_list_datalink(zoneid_t, int *, char *);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBSPL_ZONE_H */
diff --git a/lib/libspl/list.c b/lib/libspl/list.c
new file mode 100644
index 000000000..b29dc8a87
--- /dev/null
+++ b/lib/libspl/list.c
@@ -0,0 +1,243 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Generic doubly-linked list implementation
+ */
+
+#include <sys/list.h>
+#include <sys/list_impl.h>
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+
+#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
+#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
+#define list_empty(a) ((a)->list_head.list_next == &(a)->list_head)
+
+#define list_insert_after_node(list, node, object) { \
+ list_node_t *lnew = list_d2l(list, object); \
+ lnew->list_prev = (node); \
+ lnew->list_next = (node)->list_next; \
+ (node)->list_next->list_prev = lnew; \
+ (node)->list_next = lnew; \
+}
+
+#define list_insert_before_node(list, node, object) { \
+ list_node_t *lnew = list_d2l(list, object); \
+ lnew->list_next = (node); \
+ lnew->list_prev = (node)->list_prev; \
+ (node)->list_prev->list_next = lnew; \
+ (node)->list_prev = lnew; \
+}
+
+#define list_remove_node(node) \
+ (node)->list_prev->list_next = (node)->list_next; \
+ (node)->list_next->list_prev = (node)->list_prev; \
+ (node)->list_next = (node)->list_prev = NULL
+
+void
+list_create(list_t *list, size_t size, size_t offset)
+{
+ ASSERT(list);
+ ASSERT(size > 0);
+ ASSERT(size >= offset + sizeof (list_node_t));
+
+ list->list_size = size;
+ list->list_offset = offset;
+ list->list_head.list_next = list->list_head.list_prev =
+ &list->list_head;
+}
+
+void
+list_destroy(list_t *list)
+{
+ list_node_t *node = &list->list_head;
+
+ ASSERT(list);
+ ASSERT(list->list_head.list_next == node);
+ ASSERT(list->list_head.list_prev == node);
+
+ node->list_next = node->list_prev = NULL;
+}
+
+void
+list_insert_after(list_t *list, void *object, void *nobject)
+{
+ if (object == NULL) {
+ list_insert_head(list, nobject);
+ } else {
+ list_node_t *lold = list_d2l(list, object);
+ list_insert_after_node(list, lold, nobject);
+ }
+}
+
+void
+list_insert_before(list_t *list, void *object, void *nobject)
+{
+ if (object == NULL) {
+ list_insert_tail(list, nobject);
+ } else {
+ list_node_t *lold = list_d2l(list, object);
+ list_insert_before_node(list, lold, nobject);
+ }
+}
+
+void
+list_insert_head(list_t *list, void *object)
+{
+ list_node_t *lold = &list->list_head;
+ list_insert_after_node(list, lold, object);
+}
+
+void
+list_insert_tail(list_t *list, void *object)
+{
+ list_node_t *lold = &list->list_head;
+ list_insert_before_node(list, lold, object);
+}
+
+void
+list_remove(list_t *list, void *object)
+{
+ list_node_t *lold = list_d2l(list, object);
+ ASSERT(!list_empty(list));
+ ASSERT(lold->list_next != NULL);
+ list_remove_node(lold);
+}
+
+void *
+list_remove_head(list_t *list)
+{
+ list_node_t *head = list->list_head.list_next;
+ if (head == &list->list_head)
+ return (NULL);
+ list_remove_node(head);
+ return (list_object(list, head));
+}
+
+void *
+list_remove_tail(list_t *list)
+{
+ list_node_t *tail = list->list_head.list_prev;
+ if (tail == &list->list_head)
+ return (NULL);
+ list_remove_node(tail);
+ return (list_object(list, tail));
+}
+
+void *
+list_head(list_t *list)
+{
+ if (list_empty(list))
+ return (NULL);
+ return (list_object(list, list->list_head.list_next));
+}
+
+void *
+list_tail(list_t *list)
+{
+ if (list_empty(list))
+ return (NULL);
+ return (list_object(list, list->list_head.list_prev));
+}
+
+void *
+list_next(list_t *list, void *object)
+{
+ list_node_t *node = list_d2l(list, object);
+
+ if (node->list_next != &list->list_head)
+ return (list_object(list, node->list_next));
+
+ return (NULL);
+}
+
+void *
+list_prev(list_t *list, void *object)
+{
+ list_node_t *node = list_d2l(list, object);
+
+ if (node->list_prev != &list->list_head)
+ return (list_object(list, node->list_prev));
+
+ return (NULL);
+}
+
+/*
+ * Insert src list after dst list. Empty src list thereafter.
+ */
+void
+list_move_tail(list_t *dst, list_t *src)
+{
+ list_node_t *dstnode = &dst->list_head;
+ list_node_t *srcnode = &src->list_head;
+
+ ASSERT(dst->list_size == src->list_size);
+ ASSERT(dst->list_offset == src->list_offset);
+
+ if (list_empty(src))
+ return;
+
+ dstnode->list_prev->list_next = srcnode->list_next;
+ srcnode->list_next->list_prev = dstnode->list_prev;
+ dstnode->list_prev = srcnode->list_prev;
+ srcnode->list_prev->list_next = dstnode;
+
+ /* empty src list */
+ srcnode->list_next = srcnode->list_prev = srcnode;
+}
+
+void
+list_link_replace(list_node_t *lold, list_node_t *lnew)
+{
+ ASSERT(list_link_active(lold));
+ ASSERT(!list_link_active(lnew));
+
+ lnew->list_next = lold->list_next;
+ lnew->list_prev = lold->list_prev;
+ lold->list_prev->list_next = lnew;
+ lold->list_next->list_prev = lnew;
+ lold->list_next = lold->list_prev = NULL;
+}
+
+void
+list_link_init(list_node_t *ln)
+{
+ ln->list_next = NULL;
+ ln->list_prev = NULL;
+}
+
+int
+list_link_active(list_node_t *ln)
+{
+ return (ln->list_next != NULL);
+}
+
+int
+list_is_empty(list_t *list)
+{
+ return (list_empty(list));
+}
diff --git a/lib/libspl/mkdirp.c b/lib/libspl/mkdirp.c
new file mode 100644
index 000000000..f98e31e2d
--- /dev/null
+++ b/lib/libspl/mkdirp.c
@@ -0,0 +1,210 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1988 AT&T */
+/* All Rights Reserved */
+
+/*
+ * Creates directory and it's parents if the parents do not
+ * exist yet.
+ *
+ * Returns -1 if fails for reasons other than non-existing
+ * parents.
+ * Does NOT simplify pathnames with . or .. in them.
+ */
+
+#include <sys/types.h>
+#include <libgen.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+
+static char *simplify(const char *str);
+
+int
+mkdirp(const char *d, mode_t mode)
+{
+ char *endptr, *ptr, *slash, *str;
+
+ str = simplify(d);
+
+ /* If space couldn't be allocated for the simplified names, return. */
+
+ if (str == NULL)
+ return (-1);
+
+ /* Try to make the directory */
+
+ if (mkdir(str, mode) == 0) {
+ free(str);
+ return (0);
+ }
+ if (errno != ENOENT) {
+ free(str);
+ return (-1);
+ }
+ endptr = strrchr(str, '\0');
+ slash = strrchr(str, '/');
+
+ /* Search upward for the non-existing parent */
+
+ while (slash != NULL) {
+
+ ptr = slash;
+ *ptr = '\0';
+
+ /* If reached an existing parent, break */
+
+ if (access(str, F_OK) == 0)
+ break;
+
+ /* If non-existing parent */
+
+ else {
+ slash = strrchr(str, '/');
+
+ /* If under / or current directory, make it. */
+
+ if (slash == NULL || slash == str) {
+ if (mkdir(str, mode) != 0 && errno != EEXIST) {
+ free(str);
+ return (-1);
+ }
+ break;
+ }
+ }
+ }
+
+ /* Create directories starting from upmost non-existing parent */
+
+ while ((ptr = strchr(str, '\0')) != endptr) {
+ *ptr = '/';
+ if (mkdir(str, mode) != 0 && errno != EEXIST) {
+ /*
+ * If the mkdir fails because str already
+ * exists (EEXIST), then str has the form
+ * "existing-dir/..", and this is really
+ * ok. (Remember, this loop is creating the
+ * portion of the path that didn't exist)
+ */
+ free(str);
+ return (-1);
+ }
+ }
+ free(str);
+ return (0);
+}
+
+/*
+ * simplify - given a pathname, simplify that path by removing
+ * duplicate contiguous slashes.
+ *
+ * A simplified copy of the argument is returned to the
+ * caller, or NULL is returned on error.
+ *
+ * The caller should handle error reporting based upon the
+ * returned vlaue, and should free the returned value,
+ * when appropriate.
+ */
+
+static char *
+simplify(const char *str)
+{
+ int i;
+ size_t mbPathlen; /* length of multi-byte path */
+ size_t wcPathlen; /* length of wide-character path */
+ wchar_t *wptr; /* scratch pointer */
+ wchar_t *wcPath; /* wide-character version of the path */
+ char *mbPath; /* The copy fo the path to be returned */
+
+ /*
+ * bail out if there is nothing there.
+ */
+
+ if (!str)
+ return (NULL);
+
+ /*
+ * Get a copy of the argument.
+ */
+
+ if ((mbPath = strdup(str)) == NULL) {
+ return (NULL);
+ }
+
+ /*
+ * convert the multi-byte version of the path to a
+ * wide-character rendering, for doing our figuring.
+ */
+
+ mbPathlen = strlen(mbPath);
+
+ if ((wcPath = calloc(sizeof (wchar_t), mbPathlen+1)) == NULL) {
+ free(mbPath);
+ return (NULL);
+ }
+
+ if ((wcPathlen = mbstowcs(wcPath, mbPath, mbPathlen)) == (size_t)-1) {
+ free(mbPath);
+ free(wcPath);
+ return (NULL);
+ }
+
+ /*
+ * remove duplicate slashes first ("//../" -> "/")
+ */
+
+ for (wptr = wcPath, i = 0; i < wcPathlen; i++) {
+ *wptr++ = wcPath[i];
+
+ if (wcPath[i] == '/') {
+ i++;
+
+ while (wcPath[i] == '/') {
+ i++;
+ }
+
+ i--;
+ }
+ }
+
+ *wptr = '\0';
+
+ /*
+ * now convert back to the multi-byte format.
+ */
+
+ if (wcstombs(mbPath, wcPath, mbPathlen) == (size_t)-1) {
+ free(mbPath);
+ free(wcPath);
+ return (NULL);
+ }
+
+ free(wcPath);
+ return (mbPath);
+}
diff --git a/lib/libspl/strlcat.c b/lib/libspl/strlcat.c
new file mode 100644
index 000000000..a001df7b1
--- /dev/null
+++ b/lib/libspl/strlcat.c
@@ -0,0 +1,56 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <string.h>
+#include <sys/types.h>
+
+/*
+ * Appends src to the dstsize buffer at dst. The append will never
+ * overflow the destination buffer and the buffer will always be null
+ * terminated. Never reference beyond &dst[dstsize-1] when computing
+ * the length of the pre-existing string.
+ */
+
+size_t
+strlcat(char *dst, const char *src, size_t dstsize)
+{
+ char *df = dst;
+ size_t left = dstsize;
+ size_t l1;
+ size_t l2 = strlen(src);
+ size_t copied;
+
+ while (left-- != 0 && *df != '\0')
+ df++;
+ l1 = df - dst;
+ if (dstsize == l1)
+ return (l1 + l2);
+
+ copied = l1 + l2 >= dstsize ? dstsize - l1 - 1 : l2;
+ (void) memcpy(dst + l1, src, copied);
+ dst[l1+copied] = '\0';
+ return (l1 + l2);
+}
diff --git a/lib/libspl/strlcpy.c b/lib/libspl/strlcpy.c
new file mode 100644
index 000000000..2d0daae05
--- /dev/null
+++ b/lib/libspl/strlcpy.c
@@ -0,0 +1,52 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <string.h>
+#include <sys/types.h>
+
+/*
+ * Copies src to the dstsize buffer at dst. The copy will never
+ * overflow the destination buffer and the buffer will always be null
+ * terminated.
+ */
+
+size_t
+strlcpy(char *dst, const char *src, size_t len)
+{
+ size_t slen = strlen(src);
+ size_t copied;
+
+ if (len == 0)
+ return (slen);
+
+ if (slen >= len)
+ copied = len - 1;
+ else
+ copied = slen;
+ (void) memcpy(dst, src, copied);
+ dst[copied] = '\0';
+ return (slen);
+}
diff --git a/lib/libspl/strnlen.c b/lib/libspl/strnlen.c
new file mode 100644
index 000000000..9fb8227b9
--- /dev/null
+++ b/lib/libspl/strnlen.c
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.
+ * All rights reserved. Use is subject to license terms.
+ */
+
+#include <string.h>
+#include <sys/types.h>
+
+/*
+ * Returns the number of non-NULL bytes in string argument,
+ * but not more than maxlen. Does not look past str + maxlen.
+ */
+size_t
+strnlen(const char *str, size_t maxlen)
+{
+ const char *ptr;
+
+ ptr = memchr(str, 0, maxlen);
+ if (ptr == NULL)
+ return (maxlen);
+
+ return (ptr - str);
+}
diff --git a/lib/libspl/xdr.c b/lib/libspl/xdr.c
new file mode 100644
index 000000000..288a338a1
--- /dev/null
+++ b/lib/libspl/xdr.c
@@ -0,0 +1,78 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
+ * All Rights Reserved
+ *
+ * Portions of this source code were derived from Berkeley 4.3 BSD
+ * under license from the Regents of the University of California.
+ */
+
+#include <rpc/xdr.h>
+
+/*
+ * As of glibc-2.5-25 there is not support for xdr_control(). The
+ * xdrmem implementation from OpenSolaris is used here.
+ *
+ * FIXME: Not well tested it may not work as expected.
+ */
+bool_t
+xdr_control(XDR *xdrs, int request, void *info)
+{
+ xdr_bytesrec_t *xptr;
+ int32_t *int32p;
+ int len;
+
+ switch (request) {
+ case XDR_GET_BYTES_AVAIL:
+ xptr = (xdr_bytesrec_t *)info;
+ xptr->xc_is_last_record = TRUE;
+ xptr->xc_num_avail = xdrs->x_handy;
+ return (TRUE);
+
+ case XDR_PEEK:
+ /*
+ * Return the next 4 byte unit in the XDR stream.
+ */
+ if (xdrs->x_handy < sizeof (int32_t))
+ return (FALSE);
+ int32p = (int32_t *)info;
+ *int32p = (int32_t)ntohl((uint32_t)
+ (*((int32_t *)(xdrs->x_private))));
+ return (TRUE);
+
+ case XDR_SKIPBYTES:
+ /*
+ * Skip the next N bytes in the XDR stream.
+ */
+ int32p = (int32_t *)info;
+ len = RNDUP((int)(*int32p));
+ if ((xdrs->x_handy -= len) < 0)
+ return (FALSE);
+ xdrs->x_private += len;
+ return (TRUE);
+
+ }
+ return (FALSE);
+}
diff --git a/lib/libspl/zone.c b/lib/libspl/zone.c
new file mode 100644
index 000000000..f4269a76c
--- /dev/null
+++ b/lib/libspl/zone.c
@@ -0,0 +1,60 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Ricardo Correia. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <zone.h>
+#include <string.h>
+#include <errno.h>
+
+zoneid_t getzoneid()
+{
+ return GLOBAL_ZONEID;
+}
+
+zoneid_t getzoneidbyname(const char *name)
+{
+ if(name == NULL)
+ return GLOBAL_ZONEID;
+
+ if(strcmp(name, GLOBAL_ZONEID_NAME) == 0)
+ return GLOBAL_ZONEID;
+
+ return EINVAL;
+}
+
+ssize_t getzonenamebyid(zoneid_t id, char *buf, size_t buflen)
+{
+ if(id != GLOBAL_ZONEID)
+ return EINVAL;
+
+ ssize_t ret = strlen(GLOBAL_ZONEID_NAME) + 1;
+
+ if(buf == NULL || buflen == 0)
+ return ret;
+
+ strncpy(buf, GLOBAL_ZONEID_NAME, buflen);
+ buf[buflen - 1] = '\0';
+
+ return ret;
+}
diff --git a/lib/libuutil/uu_misc.c b/lib/libuutil/uu_misc.c
index 3bd5c3119..60f50832d 100644
--- a/lib/libuutil/uu_misc.c
+++ b/lib/libuutil/uu_misc.c
@@ -208,18 +208,6 @@ uu_panic(const char *format, ...)
(void) pause();
}
-int
-assfail(const char *astring, const char *file, int line)
-{
-#if defined(__STDC__) && __STDC_VERSION__ - 0 >= 199901L
- __assert_c99(astring, file, line, "unknown func");
-#else
- __assert(astring, file, line);
-#endif
- /*NOTREACHED*/
- return (0);
-}
-
static void
uu_lockup(void)
{
diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h
index f19e398f6..e3da385d2 100644
--- a/lib/libzfs/include/libzfs.h
+++ b/lib/libzfs/include/libzfs.h
@@ -50,6 +50,26 @@ extern "C" {
#define ZPOOL_MAXPROPLEN MAXPATHLEN
/*
+ * Default device paths
+ */
+
+#if defined(__sun__) || defined(__sun)
+#define DISK_ROOT "/dev/dsk"
+#define RDISK_ROOT "/dev/rdsk"
+#define UDISK_ROOT RDISK_ROOT
+#define FIRST_SLICE "s0"
+#define BACKUP_SLICE "s2"
+#endif
+
+#ifdef __linux__
+#define DISK_ROOT "/dev"
+#define RDISK_ROOT DISK_ROOT
+#define UDISK_ROOT "/dev/disk"
+#define FIRST_SLICE "1"
+#define BACKUP_SLICE ""
+#endif
+
+/*
* libzfs errors
*/
enum {
@@ -235,6 +255,7 @@ extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
boolean_t *, boolean_t *);
extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
boolean_t *, boolean_t *, boolean_t *);
+extern int zpool_label_disk_wait(char *, int);
extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
/*
@@ -550,15 +571,6 @@ extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
void *, void *, int, zfs_share_op_t);
/*
- * When dealing with nvlists, verify() is extremely useful
- */
-#ifdef NDEBUG
-#define verify(EX) ((void)(EX))
-#else
-#define verify(EX) assert(EX)
-#endif
-
-/*
* Utility function to convert a number to a human-readable form.
*/
extern void zfs_nicenum(uint64_t, char *, size_t);
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index 899ffdaae..a55e73880 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -45,9 +45,11 @@
#include <grp.h>
#include <stddef.h>
#include <ucred.h>
+#ifdef HAVE_IDMAP
#include <idmap.h>
#include <aclutils.h>
#include <directory.h>
+#endif /* HAVE_IDMAP */
#include <sys/spa.h>
#include <sys/zap.h>
@@ -1989,6 +1991,7 @@ zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value,
return (0);
}
+#ifdef HAVE_IDMAP
static int
idmap_id_to_numeric_domain_rid(uid_t id, boolean_t isuser,
char **domainp, idmap_rid_t *ridp)
@@ -2023,6 +2026,7 @@ out:
(void) idmap_fini(idmap_hdl);
return (err);
}
+#endif /* HAVE_IDMAP */
/*
* convert the propname into parameters needed by kernel
@@ -2056,6 +2060,7 @@ userquota_propname_decode(const char *propname, boolean_t zoned,
cp = strchr(propname, '@') + 1;
if (strchr(cp, '@')) {
+#ifdef HAVE_IDMAP
/*
* It's a SID name (eg "user@domain") that needs to be
* turned into S-1-domainID-RID.
@@ -2078,6 +2083,9 @@ userquota_propname_decode(const char *propname, boolean_t zoned,
return (ENOENT);
cp = numericsid;
/* will be further decoded below */
+#else
+ return (ENOSYS);
+#endif /* HAVE_IDMAP */
}
if (strncmp(cp, "S-1-", 4) == 0) {
@@ -2116,6 +2124,7 @@ userquota_propname_decode(const char *propname, boolean_t zoned,
*ridp = gr->gr_gid;
}
} else {
+#ifdef HAVE_IDMAP
/* It's a user/group ID (eg "12345"). */
uid_t id = strtoul(cp, &end, 10);
idmap_rid_t rid;
@@ -2133,6 +2142,9 @@ userquota_propname_decode(const char *propname, boolean_t zoned,
} else {
*ridp = id;
}
+#else
+ return (ENOSYS);
+#endif /* HAVE_IDMAP */
}
ASSERT3P(numericsid, ==, NULL);
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c
index 4a7634a63..b28f793bb 100644
--- a/lib/libzfs/libzfs_import.c
+++ b/lib/libzfs/libzfs_import.c
@@ -52,6 +52,9 @@
#include <fcntl.h>
#include <sys/vdev_impl.h>
+#ifdef HAVE_LIBBLKID
+#include <blkid/blkid.h>
+#endif
#include "libzfs.h"
#include "libzfs_impl.h"
@@ -777,6 +780,77 @@ zpool_read_label(int fd, nvlist_t **config)
return (0);
}
+#ifdef HAVE_LIBBLKID
+/*
+ * Use libblkid to quickly search for zfs devices
+ */
+static int
+zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools)
+{
+ blkid_cache cache;
+ blkid_dev_iterate iter;
+ blkid_dev dev;
+ const char *devname;
+ nvlist_t *config;
+ int fd, err;
+
+ err = blkid_get_cache(&cache, NULL);
+ if (err != 0) {
+ (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN, "blkid_get_cache() %d"), err);
+ goto err_blkid1;
+ }
+
+ err = blkid_probe_all(cache);
+ if (err != 0) {
+ (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN, "blkid_probe_all() %d"), err);
+ goto err_blkid2;
+ }
+
+ iter = blkid_dev_iterate_begin(cache);
+ if (iter == NULL) {
+ (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN, "blkid_dev_iterate_begin()"));
+ goto err_blkid2;
+ }
+
+ err = blkid_dev_set_search(iter, "TYPE", "zfs");
+ if (err != 0) {
+ (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN, "blkid_dev_set_search() %d"), err);
+ goto err_blkid3;
+ }
+
+ while (blkid_dev_next(iter, &dev) == 0) {
+ devname = blkid_dev_devname(dev);
+ if ((fd = open64(devname, O_RDONLY)) < 0)
+ continue;
+
+ err = zpool_read_label(fd, &config);
+ (void) close(fd);
+
+ if (err != 0) {
+ (void) no_memory(hdl);
+ goto err_blkid3;
+ }
+
+ if (config != NULL) {
+ err = add_config(hdl, pools, devname, config);
+ if (err != 0)
+ goto err_blkid3;
+ }
+ }
+
+err_blkid3:
+ blkid_dev_iterate_end(iter);
+err_blkid2:
+ blkid_put_cache(cache);
+err_blkid1:
+ return err;
+}
+#endif /* HAVE_LIBBLKID */
+
/*
* Given a list of directories to search, find all pools stored on disk. This
* includes partial pools which are not available to import. If no args are
@@ -791,12 +865,12 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
int i;
DIR *dirp = NULL;
struct dirent64 *dp;
- char path[MAXPATHLEN];
+ char path[MAXPATHLEN], path2[MAXPATHLEN];
char *end;
size_t pathleft;
struct stat64 statbuf;
nvlist_t *ret = NULL, *config;
- static char *default_dir = "/dev/dsk";
+ static char *default_dir = DISK_ROOT;
int fd;
pool_list_t pools = { 0 };
pool_entry_t *pe, *penext;
@@ -807,6 +881,15 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
verify(poolname == NULL || guid == 0);
if (argc == 0) {
+#ifdef HAVE_LIBBLKID
+ /* Use libblkid to scan all device for their type */
+ if (zpool_find_import_blkid(hdl, &pools) == 0)
+ goto skip_scanning;
+
+ (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN, "blkid failure falling back "
+ "to manual probing"));
+#endif /* HAVE_LIBBLKID */
argc = 1;
argv = &default_dir;
}
@@ -818,7 +901,6 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
*/
for (i = 0; i < argc; i++) {
char *rdsk;
- int dfd;
/* use realpath to normalize the path */
if (realpath(argv[i], path) == 0) {
@@ -842,8 +924,7 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
else
rdsk = path;
- if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
- (dirp = fdopendir(dfd)) == NULL) {
+ if ((dirp = opendir(rdsk)) == NULL) {
zfs_error_aux(hdl, strerror(errno));
(void) zfs_error_fmt(hdl, EZFS_BADPATH,
dgettext(TEXT_DOMAIN, "cannot open '%s'"),
@@ -860,20 +941,19 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
(name[1] == 0 || (name[1] == '.' && name[2] == 0)))
continue;
- if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
- continue;
+ snprintf(path2, sizeof (path2), "%s%s", rdsk, name);
/*
* Ignore failed stats. We only want regular
* files, character devs and block devs.
*/
- if (fstat64(fd, &statbuf) != 0 ||
+ if (stat64(path2, &statbuf) != 0 ||
(!S_ISREG(statbuf.st_mode) &&
- !S_ISCHR(statbuf.st_mode) &&
- !S_ISBLK(statbuf.st_mode))) {
- (void) close(fd);
+ !S_ISBLK(statbuf.st_mode)))
+ continue;
+
+ if ((fd = open64(path2, O_RDONLY)) < 0)
continue;
- }
if ((zpool_read_label(fd, &config)) != 0) {
(void) close(fd);
@@ -906,9 +986,7 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
config = NULL;
continue;
}
- /* use the non-raw path for the config */
- (void) strlcpy(end, name, pathleft);
- if (add_config(hdl, &pools, path, config) != 0)
+ if (add_config(hdl, &pools, path2, config) != 0)
goto error;
}
}
@@ -917,6 +995,9 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
dirp = NULL;
}
+#ifdef HAVE_LIBBLKID
+skip_scanning:
+#endif
ret = get_configs(hdl, &pools, active_ok);
error:
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index b8989a026..38cc627fc 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -56,10 +56,6 @@ static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
#define BOOTCMD "installboot(1M)"
#endif
-#define DISK_ROOT "/dev/dsk"
-#define RDISK_ROOT "/dev/rdsk"
-#define BACKUP_SLICE "s2"
-
/*
* ====================================================================
* zpool property functions
@@ -638,9 +634,12 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
/*
* Don't start the slice at the default block of 34; many storage
- * devices will use a stripe width of 128k, so start there instead.
+ * devices will use a stripe width of 128k, other vendors prefer a 1m
+ * alignment. It is best to play it safe and ensure a 1m alignment
+ * give 512b blocks. When the block size is larger by a power of 2
+ * we will still be 1m aligned.
*/
-#define NEW_START_BLOCK 256
+#define NEW_START_BLOCK 2048
/*
* Validate the given pool name, optionally putting an extended error message in
@@ -933,10 +932,12 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
* This can happen if the user has specified the same
* device multiple times. We can't reliably detect this
* until we try to add it and see we already have a
- * label.
+ * label. This can also happen under if the device is
+ * part of an active md or lvm device.
*/
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "one or more vdevs refer to the same device"));
+ "one or more vdevs refer to the same device, or one of\n"
+ "the devices is part of an active md or lvm device"));
return (zfs_error(hdl, EZFS_BADDEV, msg));
case EOVERFLOW:
@@ -1559,7 +1560,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
if (guid != 0 && *end == '\0') {
verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
} else if (path[0] != '/') {
- (void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
+ (void) snprintf(buf, sizeof (buf), "%s/%s", DISK_ROOT, path);
verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
} else {
verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
@@ -1760,22 +1761,14 @@ is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
* the disk to use the new unallocated space.
*/
static int
-zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
+zpool_relabel_disk(libzfs_handle_t *hdl, const char *path)
{
- char path[MAXPATHLEN];
char errbuf[1024];
int fd, error;
- int (*_efi_use_whole_disk)(int);
-
- if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT,
- "efi_use_whole_disk")) == NULL)
- return (-1);
-
- (void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name);
- if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
+ if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
- "relabel '%s': unable to open device"), name);
+ "relabel '%s': unable to open device"), path);
return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
}
@@ -1784,11 +1777,11 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
* does not have any unallocated space left. If so, we simply
* ignore that error and continue on.
*/
- error = _efi_use_whole_disk(fd);
+ error = efi_use_whole_disk(fd);
(void) close(fd);
if (error && error != VT_ENOSPC) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
- "relabel '%s': unable to read disk capacity"), name);
+ "relabel '%s': unable to read disk capacity"), path);
return (zfs_error(hdl, EZFS_NOCAP, errbuf));
}
return (0);
@@ -1847,7 +1840,6 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
}
if (wholedisk) {
- pathname += strlen(DISK_ROOT) + 1;
(void) zpool_relabel_disk(zhp->zpool_hdl, pathname);
}
}
@@ -2611,7 +2603,7 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
char *
zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
{
- char *path, *devid;
+ char *path, *devid, *type;
uint64_t value;
char buf[64];
vdev_stat_t *vs;
@@ -2625,7 +2617,6 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
(u_longlong_t)value);
path = buf;
} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
-
/*
* If the device is dead (faulted, offline, etc) then don't
* bother opening it. Otherwise we may be forcing the user to
@@ -2664,9 +2655,19 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
devid_str_free(newdevid);
}
- if (strncmp(path, "/dev/dsk/", 9) == 0)
- path += 9;
+ /*
+ * For a block device only use the name.
+ */
+ verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
+ if (strcmp(type, VDEV_TYPE_DISK) == 0) {
+ path = strrchr(path, '/');
+ path++;
+ }
+#if defined(__sun__) || defined(__sun)
+ /*
+ * The following code strips the slice from the device path.
+ */
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
&value) == 0 && value) {
char *tmp = zfs_strdup(hdl, path);
@@ -2675,6 +2676,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
tmp[strlen(path) - 2] = '\0';
return (tmp);
}
+#endif
} else {
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
@@ -3054,7 +3056,7 @@ read_efi_label(nvlist_t *config, diskaddr_t *sb)
(void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
strrchr(path, '/'));
- if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
+ if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) {
struct dk_gpt *vtoc;
if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
@@ -3100,6 +3102,54 @@ find_start_block(nvlist_t *config)
return (MAXOFFSET_T);
}
+int
+zpool_label_disk_wait(char *path, int timeout)
+{
+ struct stat64 statbuf;
+ int i;
+
+ /*
+ * Wait timeout miliseconds for a newly created device to be available
+ * from the given path. There is a small window when a /dev/ device
+ * will exist and the udev link will not, so we must wait for the
+ * symlink. Depending on the udev rules this may take a few seconds.
+ */
+ for (i = 0; i < timeout; i++) {
+ usleep(1000);
+
+ errno = 0;
+ if ((stat64(path, &statbuf) == 0) && (errno == 0))
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+int
+zpool_label_disk_check(char *path)
+{
+ struct dk_gpt *vtoc;
+ int fd, err;
+
+ if ((fd = open(path, O_RDWR|O_DIRECT)) < 0)
+ return errno;
+
+ if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
+ (void) close(fd);
+ return err;
+ }
+
+ if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
+ efi_free(vtoc);
+ (void) close(fd);
+ return EIDRM;
+ }
+
+ efi_free(vtoc);
+ (void) close(fd);
+ return 0;
+}
+
/*
* Label an individual disk. The name provided is the short name,
* stripped of any leading /dev path.
@@ -3109,7 +3159,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
{
char path[MAXPATHLEN];
struct dk_gpt *vtoc;
- int fd;
+ int rval, fd;
size_t resv = EFI_MIN_RESV_SIZE;
uint64_t slice_size;
diskaddr_t start_block;
@@ -3145,13 +3195,13 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
BACKUP_SLICE);
- if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
+ if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
/*
* This shouldn't happen. We've long since verified that this
* is a valid device.
*/
- zfs_error_aux(hdl,
- dgettext(TEXT_DOMAIN, "unable to open device"));
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "unable to open device '%s': %d"), path, errno);
return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
}
@@ -3194,7 +3244,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
vtoc->efi_parts[8].p_size = resv;
vtoc->efi_parts[8].p_tag = V_RESERVED;
- if (efi_write(fd, vtoc) != 0) {
+ if ((rval = efi_write(fd, vtoc)) != 0) {
/*
* Some block drivers (like pcata) may not support EFI
* GPT labels. Print out a helpful error message dir-
@@ -3204,14 +3254,36 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
(void) close(fd);
efi_free(vtoc);
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "try using fdisk(1M) and then provide a specific slice"));
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
+ "parted(8) and then provide a specific slice: %d"), rval);
return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
}
(void) close(fd);
efi_free(vtoc);
- return (0);
+
+ /* Wait for the first expected slice to appear. */
+ (void) snprintf(path, sizeof (path), "%s/%s%s%s", DISK_ROOT, name,
+ isdigit(name[strlen(name)-1]) ? "p" : "", FIRST_SLICE);
+ rval = zpool_label_disk_wait(path, 3000);
+ if (rval) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
+ "detect device partitions on '%s': %d"), path, rval);
+ return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+ }
+
+ /* We can't be to paranoid. Read the label back and verify it. */
+ (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
+ rval = zpool_label_disk_check(path);
+ if (rval) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
+ "EFI label on '%s' is damaged. Ensure\nthis device "
+ "is not in in use, and is functioning properly: %d"),
+ path, rval);
+ return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+ }
+
+ return 0;
}
static boolean_t
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
index be5b3949f..eb799901d 100644
--- a/lib/libzfs/libzfs_sendrecv.c
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -39,6 +39,7 @@
#include <sys/mntent.h>
#include <sys/mnttab.h>
#include <sys/avl.h>
+#include <sys/debug.h>
#include <stddef.h>
#include <libzfs.h>
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
index 8d0c47e30..71d0278a4 100644
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -578,11 +578,17 @@ libzfs_init(void)
}
if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
+ (void) fprintf(stderr, "Unable to open %s: (%d) %s\n",
+ ZFS_DEV, errno, strerror(errno));
free(hdl);
return (NULL);
}
+#ifdef HAVE_SETMNTENT
+ if ((hdl->libzfs_mnttab = setmntent(MNTTAB, "r")) == NULL) {
+#else
if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) {
+#endif
(void) close(hdl->libzfs_fd);
free(hdl);
return (NULL);
@@ -602,7 +608,11 @@ libzfs_fini(libzfs_handle_t *hdl)
{
(void) close(hdl->libzfs_fd);
if (hdl->libzfs_mnttab)
+#ifdef HAVE_SETMNTENT
+ (void) endmntent(hdl->libzfs_mnttab);
+#else
(void) fclose(hdl->libzfs_mnttab);
+#endif
if (hdl->libzfs_sharetab)
(void) fclose(hdl->libzfs_sharetab);
zfs_uninit_libshare(hdl);
diff --git a/lib/libzpool/include/sys/zfs_context.h b/lib/libzpool/include/sys/zfs_context.h
index 7c3c5e0d5..9377dab2f 100644
--- a/lib/libzpool/include/sys/zfs_context.h
+++ b/lib/libzpool/include/sys/zfs_context.h
@@ -58,7 +58,6 @@ extern "C" {
#include <atomic.h>
#include <dirent.h>
#include <time.h>
-#include <libsysevent.h>
#include <sys/note.h>
#include <sys/types.h>
#include <sys/cred.h>
@@ -72,8 +71,11 @@ extern "C" {
#include <sys/sdt.h>
#include <sys/kstat.h>
#include <sys/u8_textprep.h>
+#ifdef HAVE_SYSEVENT
+#include <libsysevent.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/dev.h>
+#endif /* HAVE_SYSEVENT */
/*
* Stack
@@ -110,59 +112,12 @@ extern void vpanic(const char *, __va_list);
#define fm_panic panic
-/* This definition is copied from assert.h. */
-#ifndef verify
-#if defined(__STDC__)
-#if __STDC_VERSION__ - 0 >= 199901L
-#define verify(EX) (void)((EX) || \
- (__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
-#else
-#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
-#endif /* __STDC_VERSION__ - 0 >= 199901L */
-#else
-#define verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0))
-#endif /* __STDC__ */
-#endif
-
-#undef VERIFY
-#undef ASSERT
-
-#define VERIFY verify
-#define ASSERT assert
-
-extern void __assert(const char *, const char *, int);
-
-#ifdef lint
-#define VERIFY3_IMPL(x, y, z, t) if (x == z) ((void)0)
-#else
-/* BEGIN CSTYLED */
-#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \
- const TYPE __left = (TYPE)(LEFT); \
- const TYPE __right = (TYPE)(RIGHT); \
- if (!(__left OP __right)) { \
- char *__buf = alloca(256); \
- (void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \
- #LEFT, #OP, #RIGHT, \
- (u_longlong_t)__left, #OP, (u_longlong_t)__right); \
- __assert(__buf, __FILE__, __LINE__); \
- } \
-_NOTE(CONSTCOND) } while (0)
-/* END CSTYLED */
-#endif /* lint */
-
-#define VERIFY3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t)
-#define VERIFY3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t)
-#define VERIFY3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t)
-
-#ifdef NDEBUG
-#define ASSERT3S(x, y, z) ((void)0)
-#define ASSERT3U(x, y, z) ((void)0)
-#define ASSERT3P(x, y, z) ((void)0)
-#else
-#define ASSERT3S(x, y, z) VERIFY3S(x, y, z)
-#define ASSERT3U(x, y, z) VERIFY3U(x, y, z)
-#define ASSERT3P(x, y, z) VERIFY3P(x, y, z)
-#endif
+/*
+ * VERIFY/ASSERT
+ *
+ * The verify/assert support moved to libspl/include/assert.h so only
+ * one version of the code needs to be maintained for all of user space.
+ */
/*
* DTrace SDT probes have different signatures in userland than they do in
@@ -325,6 +280,9 @@ extern void kstat_delete(kstat_t *);
#define kmem_alloc(_s, _f) umem_alloc(_s, _f)
#define kmem_zalloc(_s, _f) umem_zalloc(_s, _f)
#define kmem_free(_b, _s) umem_free(_b, _s)
+#define vmem_alloc(_s, _f) kmem_alloc(_s, _f)
+#define vmem_zalloc(_s, _f) kmem_zalloc(_s, _f)
+#define vmem_free(_b, _s) kmem_free(_b, _s)
#define kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \
umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i)
#define kmem_cache_destroy(_c) umem_cache_destroy(_c)
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
index 97e1bc9d3..ab97636ba 100644
--- a/lib/libzpool/kernel.c
+++ b/lib/libzpool/kernel.c
@@ -540,7 +540,11 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
* for its size. So -- gag -- we open the block device to get
* its size, and remember it for subsequent VOP_GETATTR().
*/
+#if defined(__sun__) || defined(__sun)
if (strncmp(path, "/dev/", 5) == 0) {
+#else
+ if (0) {
+#endif
char *dsk;
fd = open64(path, O_RDONLY);
if (fd == -1)
@@ -561,6 +565,14 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
return (errno);
}
+#ifdef __linux__
+ if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) {
+ flags |= O_DIRECT;
+ if (flags & FWRITE)
+ flags |= O_EXCL;
+ }
+#endif
+
if (flags & FCREAT)
old_umask = umask(0);
diff --git a/module/Makefile.in b/module/Makefile.in
index 13a5a8133..e32bc9fb6 100644
--- a/module/Makefile.in
+++ b/module/Makefile.in
@@ -3,6 +3,7 @@ subdir-m += nvpair
subdir-m += unicode
subdir-m += zcommon
subdir-m += zfs
+subdir-m += zpios
modules clean:
# Make the exported SPL symbols available to these modules.
diff --git a/module/avl/avl.c b/module/avl/avl.c
index a9634d701..728bd8723 100644
--- a/module/avl/avl.c
+++ b/module/avl/avl.c
@@ -1031,3 +1031,29 @@ done:
return (AVL_NODE2DATA(node, off));
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+static int avl_init(void) { return 0; }
+static int avl_fini(void) { return 0; }
+
+spl_module_init(avl_init);
+spl_module_exit(avl_fini);
+
+MODULE_AUTHOR("Sun Microsystems, Inc");
+MODULE_DESCRIPTION("Generic AVL tree implementation");
+MODULE_LICENSE("CDDL");
+
+EXPORT_SYMBOL(avl_create);
+EXPORT_SYMBOL(avl_find);
+EXPORT_SYMBOL(avl_insert);
+EXPORT_SYMBOL(avl_insert_here);
+EXPORT_SYMBOL(avl_walk);
+EXPORT_SYMBOL(avl_first);
+EXPORT_SYMBOL(avl_last);
+EXPORT_SYMBOL(avl_nearest);
+EXPORT_SYMBOL(avl_add);
+EXPORT_SYMBOL(avl_remove);
+EXPORT_SYMBOL(avl_numnodes);
+EXPORT_SYMBOL(avl_destroy_nodes);
+EXPORT_SYMBOL(avl_destroy);
+#endif
diff --git a/module/nvpair/nvpair.c b/module/nvpair/nvpair.c
index f9d99b11f..5bee96429 100644
--- a/module/nvpair/nvpair.c
+++ b/module/nvpair/nvpair.c
@@ -3244,3 +3244,127 @@ nvs_xdr(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen)
return (err);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+static int nvpair_init(void) { return 0; }
+static int nvpair_fini(void) { return 0; }
+
+spl_module_init(nvpair_init);
+spl_module_exit(nvpair_fini);
+
+MODULE_AUTHOR("Sun Microsystems, Inc");
+MODULE_DESCRIPTION("Generic name/value pair implementation");
+MODULE_LICENSE("CDDL");
+
+EXPORT_SYMBOL(nv_alloc_init);
+EXPORT_SYMBOL(nv_alloc_reset);
+EXPORT_SYMBOL(nv_alloc_fini);
+
+/* list management */
+EXPORT_SYMBOL(nvlist_alloc);
+EXPORT_SYMBOL(nvlist_free);
+EXPORT_SYMBOL(nvlist_size);
+EXPORT_SYMBOL(nvlist_pack);
+EXPORT_SYMBOL(nvlist_unpack);
+EXPORT_SYMBOL(nvlist_dup);
+EXPORT_SYMBOL(nvlist_merge);
+
+EXPORT_SYMBOL(nvlist_xalloc);
+EXPORT_SYMBOL(nvlist_xpack);
+EXPORT_SYMBOL(nvlist_xunpack);
+EXPORT_SYMBOL(nvlist_xdup);
+EXPORT_SYMBOL(nvlist_lookup_nv_alloc);
+
+EXPORT_SYMBOL(nvlist_add_nvpair);
+EXPORT_SYMBOL(nvlist_add_boolean);
+EXPORT_SYMBOL(nvlist_add_boolean_value);
+EXPORT_SYMBOL(nvlist_add_byte);
+EXPORT_SYMBOL(nvlist_add_int8);
+EXPORT_SYMBOL(nvlist_add_uint8);
+EXPORT_SYMBOL(nvlist_add_int16);
+EXPORT_SYMBOL(nvlist_add_uint16);
+EXPORT_SYMBOL(nvlist_add_int32);
+EXPORT_SYMBOL(nvlist_add_uint32);
+EXPORT_SYMBOL(nvlist_add_int64);
+EXPORT_SYMBOL(nvlist_add_uint64);
+EXPORT_SYMBOL(nvlist_add_string);
+EXPORT_SYMBOL(nvlist_add_nvlist);
+EXPORT_SYMBOL(nvlist_add_boolean_array);
+EXPORT_SYMBOL(nvlist_add_byte_array);
+EXPORT_SYMBOL(nvlist_add_int8_array);
+EXPORT_SYMBOL(nvlist_add_uint8_array);
+EXPORT_SYMBOL(nvlist_add_int16_array);
+EXPORT_SYMBOL(nvlist_add_uint16_array);
+EXPORT_SYMBOL(nvlist_add_int32_array);
+EXPORT_SYMBOL(nvlist_add_uint32_array);
+EXPORT_SYMBOL(nvlist_add_int64_array);
+EXPORT_SYMBOL(nvlist_add_uint64_array);
+EXPORT_SYMBOL(nvlist_add_string_array);
+EXPORT_SYMBOL(nvlist_add_nvlist_array);
+EXPORT_SYMBOL(nvlist_add_hrtime);
+
+EXPORT_SYMBOL(nvlist_remove);
+EXPORT_SYMBOL(nvlist_remove_all);
+
+EXPORT_SYMBOL(nvlist_lookup_boolean);
+EXPORT_SYMBOL(nvlist_lookup_boolean_value);
+EXPORT_SYMBOL(nvlist_lookup_byte);
+EXPORT_SYMBOL(nvlist_lookup_int8);
+EXPORT_SYMBOL(nvlist_lookup_uint8);
+EXPORT_SYMBOL(nvlist_lookup_int16);
+EXPORT_SYMBOL(nvlist_lookup_uint16);
+EXPORT_SYMBOL(nvlist_lookup_int32);
+EXPORT_SYMBOL(nvlist_lookup_uint32);
+EXPORT_SYMBOL(nvlist_lookup_int64);
+EXPORT_SYMBOL(nvlist_lookup_uint64);
+EXPORT_SYMBOL(nvlist_lookup_string);
+EXPORT_SYMBOL(nvlist_lookup_nvlist);
+EXPORT_SYMBOL(nvlist_lookup_boolean_array);
+EXPORT_SYMBOL(nvlist_lookup_byte_array);
+EXPORT_SYMBOL(nvlist_lookup_int8_array);
+EXPORT_SYMBOL(nvlist_lookup_uint8_array);
+EXPORT_SYMBOL(nvlist_lookup_int16_array);
+EXPORT_SYMBOL(nvlist_lookup_uint16_array);
+EXPORT_SYMBOL(nvlist_lookup_int32_array);
+EXPORT_SYMBOL(nvlist_lookup_uint32_array);
+EXPORT_SYMBOL(nvlist_lookup_int64_array);
+EXPORT_SYMBOL(nvlist_lookup_uint64_array);
+EXPORT_SYMBOL(nvlist_lookup_string_array);
+EXPORT_SYMBOL(nvlist_lookup_nvlist_array);
+EXPORT_SYMBOL(nvlist_lookup_hrtime);
+EXPORT_SYMBOL(nvlist_lookup_pairs);
+
+EXPORT_SYMBOL(nvlist_lookup_nvpair);
+EXPORT_SYMBOL(nvlist_exists);
+
+/* processing nvpair */
+EXPORT_SYMBOL(nvlist_next_nvpair);
+EXPORT_SYMBOL(nvpair_name);
+EXPORT_SYMBOL(nvpair_type);
+EXPORT_SYMBOL(nvpair_value_boolean_value);
+EXPORT_SYMBOL(nvpair_value_byte);
+EXPORT_SYMBOL(nvpair_value_int8);
+EXPORT_SYMBOL(nvpair_value_uint8);
+EXPORT_SYMBOL(nvpair_value_int16);
+EXPORT_SYMBOL(nvpair_value_uint16);
+EXPORT_SYMBOL(nvpair_value_int32);
+EXPORT_SYMBOL(nvpair_value_uint32);
+EXPORT_SYMBOL(nvpair_value_int64);
+EXPORT_SYMBOL(nvpair_value_uint64);
+EXPORT_SYMBOL(nvpair_value_string);
+EXPORT_SYMBOL(nvpair_value_nvlist);
+EXPORT_SYMBOL(nvpair_value_boolean_array);
+EXPORT_SYMBOL(nvpair_value_byte_array);
+EXPORT_SYMBOL(nvpair_value_int8_array);
+EXPORT_SYMBOL(nvpair_value_uint8_array);
+EXPORT_SYMBOL(nvpair_value_int16_array);
+EXPORT_SYMBOL(nvpair_value_uint16_array);
+EXPORT_SYMBOL(nvpair_value_int32_array);
+EXPORT_SYMBOL(nvpair_value_uint32_array);
+EXPORT_SYMBOL(nvpair_value_int64_array);
+EXPORT_SYMBOL(nvpair_value_uint64_array);
+EXPORT_SYMBOL(nvpair_value_string_array);
+EXPORT_SYMBOL(nvpair_value_nvlist_array);
+EXPORT_SYMBOL(nvpair_value_hrtime);
+
+#endif
diff --git a/module/nvpair/nvpair_alloc_spl.c b/module/nvpair/nvpair_alloc_spl.c
new file mode 100644
index 000000000..d26d26913
--- /dev/null
+++ b/module/nvpair/nvpair_alloc_spl.c
@@ -0,0 +1,75 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at * usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/nvpair.h>
+#include <sys/kmem.h>
+
+static void *
+nv_alloc_sleep_spl(nv_alloc_t *nva, size_t size)
+{
+ return (kmem_alloc(size, KM_SLEEP));
+}
+
+static void *
+nv_alloc_nosleep_spl(nv_alloc_t *nva, size_t size)
+{
+ return (kmem_alloc(size, KM_NOSLEEP));
+}
+
+static void
+nv_free_spl(nv_alloc_t *nva, void *buf, size_t size)
+{
+ kmem_free(buf, size);
+}
+
+const nv_alloc_ops_t spl_sleep_ops_def = {
+ NULL, /* nv_ao_init() */
+ NULL, /* nv_ao_fini() */
+ nv_alloc_sleep_spl, /* nv_ao_alloc() */
+ nv_free_spl, /* nv_ao_free() */
+ NULL /* nv_ao_reset() */
+};
+
+const nv_alloc_ops_t spl_nosleep_ops_def = {
+ NULL, /* nv_ao_init() */
+ NULL, /* nv_ao_fini() */
+ nv_alloc_nosleep_spl, /* nv_ao_alloc() */
+ nv_free_spl, /* nv_ao_free() */
+ NULL /* nv_ao_reset() */
+};
+
+nv_alloc_t nv_alloc_sleep_def = {
+ &spl_sleep_ops_def,
+ NULL
+};
+
+nv_alloc_t nv_alloc_nosleep_def = {
+ &spl_nosleep_ops_def,
+ NULL
+};
+
+nv_alloc_t *nv_alloc_sleep = &nv_alloc_sleep_def;
+nv_alloc_t *nv_alloc_nosleep = &nv_alloc_nosleep_def;
diff --git a/module/unicode/u8_textprep.c b/module/unicode/u8_textprep.c
index 2532769c8..37fb2e5a4 100644
--- a/module/unicode/u8_textprep.c
+++ b/module/unicode/u8_textprep.c
@@ -2131,3 +2131,19 @@ u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen,
return (ret_val);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+static int unicode_init(void) { return 0; }
+static int unicode_fini(void) { return 0; }
+
+spl_module_init(unicode_init);
+spl_module_exit(unicode_fini);
+
+MODULE_AUTHOR("Sun Microsystems, Inc");
+MODULE_DESCRIPTION("Unicode implementation");
+MODULE_LICENSE("CDDL");
+
+EXPORT_SYMBOL(u8_validate);
+EXPORT_SYMBOL(u8_strcmp);
+EXPORT_SYMBOL(u8_textprep_str);
+#endif
diff --git a/module/unicode/uconv.c b/module/unicode/uconv.c
index b996e1f60..7a8278322 100644
--- a/module/unicode/uconv.c
+++ b/module/unicode/uconv.c
@@ -853,3 +853,12 @@ uconv_u8tou32(const uchar_t *u8s, size_t *utf8len,
return (0);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(uconv_u16tou32);
+EXPORT_SYMBOL(uconv_u16tou8);
+EXPORT_SYMBOL(uconv_u32tou16);
+EXPORT_SYMBOL(uconv_u32tou8);
+EXPORT_SYMBOL(uconv_u8tou16);
+EXPORT_SYMBOL(uconv_u8tou32);
+#endif
diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c
index 2b9869f18..2fa8d4244 100644
--- a/module/zcommon/zfs_comutil.c
+++ b/module/zcommon/zfs_comutil.c
@@ -63,3 +63,7 @@ zfs_allocatable_devs(nvlist_t *nv)
}
return (B_FALSE);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(zfs_allocatable_devs);
+#endif
diff --git a/module/zcommon/zfs_deleg.c b/module/zcommon/zfs_deleg.c
index ed65f955d..d30c1e0cc 100644
--- a/module/zcommon/zfs_deleg.c
+++ b/module/zcommon/zfs_deleg.c
@@ -235,3 +235,9 @@ zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type,
ASSERT(!"bad zfs_deleg_who_type_t");
}
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(zfs_deleg_verify_nvlist);
+EXPORT_SYMBOL(zfs_deleg_whokey);
+EXPORT_SYMBOL(zfs_deleg_canonicalize_perm);
+#endif
diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c
index 5cfafea47..706968f51 100644
--- a/module/zcommon/zfs_namecheck.c
+++ b/module/zcommon/zfs_namecheck.c
@@ -343,3 +343,9 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
return (0);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(snapshot_namecheck);
+EXPORT_SYMBOL(pool_namecheck);
+EXPORT_SYMBOL(dataset_namecheck);
+#endif
diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c
index 2e8f5a77f..45943602c 100644
--- a/module/zcommon/zfs_prop.c
+++ b/module/zcommon/zfs_prop.c
@@ -532,3 +532,37 @@ zfs_prop_align_right(zfs_prop_t prop)
}
#endif
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+static int zcommon_init(void) { return 0; }
+static int zcommon_fini(void) { return 0; }
+
+spl_module_init(zcommon_init);
+spl_module_exit(zcommon_fini);
+
+MODULE_AUTHOR("Sun Microsystems, Inc");
+MODULE_DESCRIPTION("Generic ZFS support");
+MODULE_LICENSE("CDDL");
+
+/* zfs dataset property functions */
+EXPORT_SYMBOL(zfs_userquota_prop_prefixes);
+EXPORT_SYMBOL(zfs_prop_init);
+EXPORT_SYMBOL(zfs_prop_get_type);
+EXPORT_SYMBOL(zfs_prop_get_table);
+EXPORT_SYMBOL(zfs_prop_delegatable);
+
+/* Dataset property functions shared between libzfs and kernel. */
+EXPORT_SYMBOL(zfs_prop_default_string);
+EXPORT_SYMBOL(zfs_prop_default_numeric);
+EXPORT_SYMBOL(zfs_prop_readonly);
+EXPORT_SYMBOL(zfs_prop_inheritable);
+EXPORT_SYMBOL(zfs_prop_setonce);
+EXPORT_SYMBOL(zfs_prop_to_name);
+EXPORT_SYMBOL(zfs_name_to_prop);
+EXPORT_SYMBOL(zfs_prop_user);
+EXPORT_SYMBOL(zfs_prop_userquota);
+EXPORT_SYMBOL(zfs_prop_index_to_string);
+EXPORT_SYMBOL(zfs_prop_string_to_index);
+EXPORT_SYMBOL(zfs_prop_valid_for_type);
+
+#endif
diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c
index fd24c34d4..a873be5d7 100644
--- a/module/zcommon/zpool_prop.c
+++ b/module/zcommon/zpool_prop.c
@@ -186,3 +186,19 @@ zpool_prop_align_right(zpool_prop_t prop)
return (zpool_prop_table[prop].pd_rightalign);
}
#endif
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/* zpool property functions */
+EXPORT_SYMBOL(zpool_prop_init);
+EXPORT_SYMBOL(zpool_prop_get_type);
+EXPORT_SYMBOL(zpool_prop_get_table);
+
+/* Pool property functions shared between libzfs and kernel. */
+EXPORT_SYMBOL(zpool_name_to_prop);
+EXPORT_SYMBOL(zpool_prop_to_name);
+EXPORT_SYMBOL(zpool_prop_default_string);
+EXPORT_SYMBOL(zpool_prop_default_numeric);
+EXPORT_SYMBOL(zpool_prop_readonly);
+EXPORT_SYMBOL(zpool_prop_index_to_string);
+EXPORT_SYMBOL(zpool_prop_string_to_index);
+#endif
diff --git a/module/zcommon/zprop_common.c b/module/zcommon/zprop_common.c
index 5f968e695..329a278f2 100644
--- a/module/zcommon/zprop_common.c
+++ b/module/zcommon/zprop_common.c
@@ -399,3 +399,20 @@ zprop_width(int prop, boolean_t *fixed, zfs_type_t type)
}
#endif
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/* Common routines to initialize property tables */
+EXPORT_SYMBOL(register_impl);
+EXPORT_SYMBOL(register_string);
+EXPORT_SYMBOL(register_number);
+EXPORT_SYMBOL(register_index);
+EXPORT_SYMBOL(register_hidden);
+
+/* Common routines for zfs and zpool property management */
+EXPORT_SYMBOL(zprop_iter_common);
+EXPORT_SYMBOL(zprop_name_to_prop);
+EXPORT_SYMBOL(zprop_string_to_index);
+EXPORT_SYMBOL(zprop_index_to_string);
+EXPORT_SYMBOL(zprop_values);
+EXPORT_SYMBOL(zprop_valid_for_type);
+#endif
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in
index 2bde8a89a..f39280097 100644
--- a/module/zfs/Makefile.in
+++ b/module/zfs/Makefile.in
@@ -47,6 +47,7 @@ ${MODULE}-objs += uberblock.o
${MODULE}-objs += unique.o
${MODULE}-objs += vdev.o
${MODULE}-objs += vdev_cache.o
+${MODULE}-objs += vdev_disk.o
${MODULE}-objs += vdev_file.o
${MODULE}-objs += vdev_label.o
${MODULE}-objs += vdev_mirror.o
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index a319b217d..880d73711 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -175,9 +175,9 @@ static boolean_t arc_warm;
/*
* These tunables are for performance analysis.
*/
-uint64_t zfs_arc_max;
-uint64_t zfs_arc_min;
-uint64_t zfs_arc_meta_limit = 0;
+unsigned long zfs_arc_max = 0;
+unsigned long zfs_arc_min = 0;
+unsigned long zfs_arc_meta_limit = 0;
int zfs_mdcomp_disable = 0;
int zfs_arc_grow_retry = 0;
int zfs_arc_shrink_shift = 0;
@@ -517,7 +517,7 @@ static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes);
* Hash table routines
*/
-#define HT_LOCK_PAD 64
+#define HT_LOCK_PAD 256
struct ht_lock {
kmutex_t ht_lock;
@@ -757,8 +757,15 @@ buf_fini(void)
{
int i;
+#if defined(_KERNEL) && defined(HAVE_SPL)
+ /* Large allocations which do not require contiguous pages
+ * should be using vmem_free() in the linux kernel */
+ vmem_free(buf_hash_table.ht_table,
+ (buf_hash_table.ht_mask + 1) * sizeof (void *));
+#else
kmem_free(buf_hash_table.ht_table,
(buf_hash_table.ht_mask + 1) * sizeof (void *));
+#endif
for (i = 0; i < BUF_LOCKS; i++)
mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
kmem_cache_destroy(hdr_cache);
@@ -857,8 +864,15 @@ buf_init(void)
hsize <<= 1;
retry:
buf_hash_table.ht_mask = hsize - 1;
+#if defined(_KERNEL) && defined(HAVE_SPL)
+ /* Large allocations which do not require contiguous pages
+ * should be using vmem_alloc() in the linux kernel */
+ buf_hash_table.ht_table =
+ vmem_zalloc(hsize * sizeof (void*), KM_SLEEP);
+#else
buf_hash_table.ht_table =
kmem_zalloc(hsize * sizeof (void*), KM_NOSLEEP);
+#endif
if (buf_hash_table.ht_table == NULL) {
ASSERT(hsize > (1ULL << 8));
hsize >>= 1;
@@ -4688,3 +4702,21 @@ l2arc_stop(void)
cv_wait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock);
mutex_exit(&l2arc_feed_thr_lock);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(arc_read);
+EXPORT_SYMBOL(arc_buf_remove_ref);
+EXPORT_SYMBOL(arc_getbuf_func);
+
+module_param(zfs_arc_min, ulong, 0644);
+MODULE_PARM_DESC(zfs_arc_min, "Minimum arc size");
+
+module_param(zfs_arc_max, ulong, 0644);
+MODULE_PARM_DESC(zfs_arc_max, "Maximum arc size");
+
+module_param(zfs_arc_meta_limit, ulong, 0644);
+MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size");
+
+module_param(zfs_mdcomp_disable, int, 0644);
+MODULE_PARM_DESC(zfs_mdcomp_disable, "Meta compression disable");
+#endif
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 0c8b8aaee..cd8c4e20e 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -24,6 +24,7 @@
*/
#include <sys/zfs_context.h>
+#include <sys/arc.h>
#include <sys/dmu.h>
#include <sys/dmu_impl.h>
#include <sys/dbuf.h>
@@ -255,7 +256,13 @@ dbuf_init(void)
retry:
h->hash_table_mask = hsize - 1;
+#if defined(_KERNEL) && defined(HAVE_SPL)
+ /* Large allocations which do not require contiguous pages
+ * should be using vmem_alloc() in the linux kernel */
+ h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_SLEEP);
+#else
h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP);
+#endif
if (h->hash_table == NULL) {
/* XXX - we should really return an error instead of assert */
ASSERT(hsize > (1ULL << 10));
@@ -279,7 +286,13 @@ dbuf_fini(void)
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_destroy(&h->hash_mutexes[i]);
+#if defined(_KERNEL) && defined(HAVE_SPL)
+ /* Large allocations which do not require contiguous pages
+ * should be using vmem_free() in the linux kernel */
+ vmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
+#else
kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
+#endif
kmem_cache_destroy(dbuf_cache);
}
@@ -2436,3 +2449,8 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
dbuf_rele(db, (void *)(uintptr_t)txg);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dmu_buf_rele);
+EXPORT_SYMBOL(dmu_buf_will_dirty);
+#endif
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 8ca5c9d7d..a5d37bf33 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -584,7 +584,8 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
bufoff = offset - db->db_offset;
tocpy = (int)MIN(db->db_size - bufoff, size);
- bcopy((char *)db->db_data + bufoff, buf, tocpy);
+ if (!(flags & DMU_READ_ZEROCOPY))
+ bcopy((char *)db->db_data + bufoff, buf, tocpy);
offset += tocpy;
size -= tocpy;
@@ -597,8 +598,8 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
}
void
-dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- const void *buf, dmu_tx_t *tx)
+dmu_write_impl(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
+ const void *buf, dmu_tx_t *tx, int flags)
{
dmu_buf_t **dbp;
int numbufs, i;
@@ -626,7 +627,8 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
else
dmu_buf_will_dirty(db, tx);
- bcopy(buf, (char *)db->db_data + bufoff, tocpy);
+ if (!(flags & DMU_WRITE_ZEROCOPY))
+ bcopy(buf, (char *)db->db_data + bufoff, tocpy);
if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
@@ -639,6 +641,13 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
}
void
+dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
+ const void *buf, dmu_tx_t *tx)
+{
+ dmu_write_impl(os, object, offset, size, buf, tx, 0);
+}
+
+void
dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_tx_t *tx)
{
@@ -659,7 +668,7 @@ dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_buf_rele_array(dbp, numbufs, FTAG);
}
-#ifdef _KERNEL
+#if defined(_KERNEL) && defined(HAVE_UIO_RW)
int
dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
{
@@ -1274,3 +1283,23 @@ dmu_fini(void)
dbuf_fini();
l2arc_fini();
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dmu_bonus_hold);
+EXPORT_SYMBOL(dmu_free_range);
+EXPORT_SYMBOL(dmu_read);
+EXPORT_SYMBOL(dmu_write_impl);
+EXPORT_SYMBOL(dmu_write);
+
+/* Get information on a DMU object. */
+EXPORT_SYMBOL(dmu_object_info);
+EXPORT_SYMBOL(dmu_object_info_from_dnode);
+EXPORT_SYMBOL(dmu_object_info_from_db);
+EXPORT_SYMBOL(dmu_object_size_from_db);
+
+EXPORT_SYMBOL(dmu_object_set_blocksize);
+EXPORT_SYMBOL(dmu_object_set_checksum);
+EXPORT_SYMBOL(dmu_object_set_compress);
+
+EXPORT_SYMBOL(dmu_ot);
+#endif
diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c
index 1f91fc1ad..c0031e155 100644
--- a/module/zfs/dmu_object.c
+++ b/module/zfs/dmu_object.c
@@ -192,3 +192,11 @@ dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
return (error);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dmu_object_alloc);
+EXPORT_SYMBOL(dmu_object_claim);
+EXPORT_SYMBOL(dmu_object_reclaim);
+EXPORT_SYMBOL(dmu_object_free);
+EXPORT_SYMBOL(dmu_object_next);
+#endif
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 01792600b..30ef576bd 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -781,9 +781,11 @@ dmu_objset_snapshot_one(char *name, void *arg)
* doing a recursive snapshot. The permission checks for the starting
* dataset have already been performed in zfs_secpolicy_snapshot()
*/
+#ifdef HAVE_ZPL
if (sn->checkperms == B_TRUE &&
(err = zfs_secpolicy_snapshot_perms(name, CRED())))
return (err);
+#endif
err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_USER, &os);
if (err != 0)
@@ -1479,3 +1481,37 @@ dmu_objset_get_user(objset_t *os)
ASSERT(MUTEX_HELD(&os->os->os_user_ptr_lock));
return (os->os->os_user_ptr);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dmu_objset_spa);
+EXPORT_SYMBOL(dmu_objset_zil);
+EXPORT_SYMBOL(dmu_objset_pool);
+EXPORT_SYMBOL(dmu_objset_ds);
+EXPORT_SYMBOL(dmu_objset_name);
+EXPORT_SYMBOL(dmu_objset_type);
+EXPORT_SYMBOL(dmu_objset_id);
+EXPORT_SYMBOL(dmu_snapshot_list_next);
+EXPORT_SYMBOL(dmu_dir_list_next);
+EXPORT_SYMBOL(dmu_objset_set_user);
+EXPORT_SYMBOL(dmu_objset_get_user);
+
+/* Public routines to create, destroy, open, and close objsets. */
+EXPORT_SYMBOL(dmu_objset_open);
+EXPORT_SYMBOL(dmu_objset_open_ds);
+EXPORT_SYMBOL(dmu_objset_close);
+EXPORT_SYMBOL(dmu_objset_evict_dbufs);
+EXPORT_SYMBOL(dmu_objset_create);
+EXPORT_SYMBOL(dmu_objset_create_impl);
+EXPORT_SYMBOL(dmu_objset_destroy);
+EXPORT_SYMBOL(dmu_snapshots_destroy);
+EXPORT_SYMBOL(dmu_objset_rollback);
+EXPORT_SYMBOL(dmu_objset_snapshot);
+EXPORT_SYMBOL(dmu_objset_rename);
+EXPORT_SYMBOL(dmu_objset_find);
+EXPORT_SYMBOL(dmu_objset_byteswap);
+
+/* Get stats on a dataset. */
+EXPORT_SYMBOL(dmu_objset_fast_stat);
+EXPORT_SYMBOL(dmu_objset_stats);
+EXPORT_SYMBOL(dmu_objset_space);
+#endif
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index b977a2ff7..c9a1647fa 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -952,7 +952,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp)
ra.vp = vp;
ra.voff = *voffp;
ra.bufsize = 1<<20;
- ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
+ ra.buf = vmem_alloc(ra.bufsize, KM_SLEEP);
/* these were verified in dmu_recv_begin */
ASSERT(drc->drc_drrb->drr_version == DMU_BACKUP_STREAM_VERSION);
@@ -1048,7 +1048,7 @@ out:
}
}
- kmem_free(ra.buf, ra.bufsize);
+ vmem_free(ra.buf, ra.bufsize);
*voffp = ra.voff;
return (ra.err);
}
diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c
index 569bf0a3c..d7977dfd2 100644
--- a/module/zfs/dmu_traverse.c
+++ b/module/zfs/dmu_traverse.c
@@ -421,3 +421,8 @@ traverse_pool(spa_t *spa, blkptr_cb_t func, void *arg)
err = 0;
return (err);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(traverse_dataset);
+EXPORT_SYMBOL(traverse_pool);
+#endif
diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
index a3c9e5241..261d1cf5f 100644
--- a/module/zfs/dmu_tx.c
+++ b/module/zfs/dmu_tx.c
@@ -1201,3 +1201,17 @@ dmu_tx_callback(list_t *cb_list, int error)
kmem_free(dcb, sizeof (dmu_tx_callback_t));
}
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dmu_tx_create);
+EXPORT_SYMBOL(dmu_tx_hold_write);
+EXPORT_SYMBOL(dmu_tx_hold_free);
+EXPORT_SYMBOL(dmu_tx_hold_zap);
+EXPORT_SYMBOL(dmu_tx_hold_bonus);
+EXPORT_SYMBOL(dmu_tx_abort);
+EXPORT_SYMBOL(dmu_tx_assign);
+EXPORT_SYMBOL(dmu_tx_wait);
+EXPORT_SYMBOL(dmu_tx_commit);
+EXPORT_SYMBOL(dmu_tx_get_txg);
+EXPORT_SYMBOL(dmu_tx_callback_register);
+#endif
diff --git a/module/zfs/dmu_zfetch.c b/module/zfs/dmu_zfetch.c
index fc3d23b87..3e33527c4 100644
--- a/module/zfs/dmu_zfetch.c
+++ b/module/zfs/dmu_zfetch.c
@@ -656,3 +656,9 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
}
}
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+module_param(zfs_prefetch_disable, int, 0644);
+MODULE_PARM_DESC(zfs_prefetch_disable, "Disable all ZFS prefetching");
+#endif
+
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 926034836..dd74ad510 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -2391,6 +2391,7 @@ dsl_snapshot_rename_one(char *name, void *arg)
* For recursive snapshot renames the parent won't be changing
* so we just pass name for both the to/from argument.
*/
+#ifdef HAVE_ZPL
err = zfs_secpolicy_rename_perms(name, name, CRED());
if (err == ENOENT) {
return (0);
@@ -2398,8 +2399,10 @@ dsl_snapshot_rename_one(char *name, void *arg)
(void) strcpy(ra->failed, name);
return (err);
}
+#endif
-#ifdef _KERNEL
+/* XXX: Ignore for SPL version until mounting the FS is supported */
+#if defined(_KERNEL) && !defined(HAVE_SPL)
/*
* For all filesystems undergoing rename, we'll need to unmount it.
*/
@@ -3823,3 +3826,49 @@ dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
dsl_dataset_rele(ds, FTAG);
return (0);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dsl_dataset_hold);
+EXPORT_SYMBOL(dsl_dataset_hold_obj);
+EXPORT_SYMBOL(dsl_dataset_own);
+EXPORT_SYMBOL(dsl_dataset_own_obj);
+EXPORT_SYMBOL(dsl_dataset_name);
+EXPORT_SYMBOL(dsl_dataset_rele);
+EXPORT_SYMBOL(dsl_dataset_disown);
+EXPORT_SYMBOL(dsl_dataset_drop_ref);
+EXPORT_SYMBOL(dsl_dataset_tryown);
+EXPORT_SYMBOL(dsl_dataset_make_exclusive);
+EXPORT_SYMBOL(dsl_dataset_create_sync);
+EXPORT_SYMBOL(dsl_dataset_create_sync_dd);
+EXPORT_SYMBOL(dsl_dataset_destroy);
+EXPORT_SYMBOL(dsl_snapshots_destroy);
+EXPORT_SYMBOL(dsl_dataset_destroy_check);
+EXPORT_SYMBOL(dsl_dataset_destroy_sync);
+EXPORT_SYMBOL(dsl_dataset_snapshot_check);
+EXPORT_SYMBOL(dsl_dataset_snapshot_sync);
+EXPORT_SYMBOL(dsl_dataset_rollback);
+EXPORT_SYMBOL(dsl_dataset_rename);
+EXPORT_SYMBOL(dsl_dataset_promote);
+EXPORT_SYMBOL(dsl_dataset_clone_swap);
+EXPORT_SYMBOL(dsl_dataset_set_user_ptr);
+EXPORT_SYMBOL(dsl_dataset_get_user_ptr);
+EXPORT_SYMBOL(dsl_dataset_get_blkptr);
+EXPORT_SYMBOL(dsl_dataset_set_blkptr);
+EXPORT_SYMBOL(dsl_dataset_get_spa);
+EXPORT_SYMBOL(dsl_dataset_modified_since_lastsnap);
+EXPORT_SYMBOL(dsl_dataset_sync);
+EXPORT_SYMBOL(dsl_dataset_block_born);
+EXPORT_SYMBOL(dsl_dataset_block_kill);
+EXPORT_SYMBOL(dsl_dataset_block_freeable);
+EXPORT_SYMBOL(dsl_dataset_prev_snap_txg);
+EXPORT_SYMBOL(dsl_dataset_dirty);
+EXPORT_SYMBOL(dsl_dataset_stats);
+EXPORT_SYMBOL(dsl_dataset_fast_stat);
+EXPORT_SYMBOL(dsl_dataset_space);
+EXPORT_SYMBOL(dsl_dataset_fsid_guid);
+EXPORT_SYMBOL(dsl_dsobj_to_dsname);
+EXPORT_SYMBOL(dsl_dataset_check_quota);
+EXPORT_SYMBOL(dsl_dataset_set_quota);
+EXPORT_SYMBOL(dsl_dataset_set_quota_sync);
+EXPORT_SYMBOL(dsl_dataset_set_reservation);
+#endif
diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c
index 22605053e..9201f9573 100644
--- a/module/zfs/dsl_deleg.c
+++ b/module/zfs/dsl_deleg.c
@@ -741,3 +741,8 @@ dsl_delegation_on(objset_t *os)
{
return (os->os->os_spa->spa_delegation);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dsl_deleg_get);
+EXPORT_SYMBOL(dsl_deleg_set);
+#endif
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index eb2767fe1..2f83709bc 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -1317,3 +1317,10 @@ dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
return (0);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dsl_dir_set_quota);
+EXPORT_SYMBOL(dsl_dir_set_reservation);
+EXPORT_SYMBOL(dsl_dir_open);
+EXPORT_SYMBOL(dsl_dir_close);
+#endif
diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c
index 4708565b1..45c81630e 100644
--- a/module/zfs/dsl_prop.c
+++ b/module/zfs/dsl_prop.c
@@ -675,3 +675,10 @@ dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value)
VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(prop), propval) == 0);
nvlist_free(propval);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dsl_prop_set);
+EXPORT_SYMBOL(dsl_prop_get_all);
+EXPORT_SYMBOL(dsl_prop_nvlist_add_uint64);
+EXPORT_SYMBOL(dsl_prop_get_integer);
+#endif
diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c
index 828911170..4391aa21b 100644
--- a/module/zfs/dsl_synctask.c
+++ b/module/zfs/dsl_synctask.c
@@ -223,3 +223,8 @@ dsl_sync_task_do_nowait(dsl_pool_t *dp,
arg1, arg2, blocks_modified);
dsl_sync_task_group_nowait(dstg, tx);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dsl_sync_task_do);
+EXPORT_SYMBOL(dsl_sync_task_do_nowait);
+#endif
diff --git a/module/zfs/fletcher.c b/module/zfs/fletcher.c
index 54247d724..e0d062236 100644
--- a/module/zfs/fletcher.c
+++ b/module/zfs/fletcher.c
@@ -243,3 +243,12 @@ fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
ZIO_SET_CHECKSUM(zcp, a, b, c, d);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(fletcher_2_native);
+EXPORT_SYMBOL(fletcher_2_byteswap);
+EXPORT_SYMBOL(fletcher_4_native);
+EXPORT_SYMBOL(fletcher_4_byteswap);
+EXPORT_SYMBOL(fletcher_4_incremental_native);
+EXPORT_SYMBOL(fletcher_4_incremental_byteswap);
+#endif
diff --git a/module/zfs/include/sys/dmu.h b/module/zfs/include/sys/dmu.h
index b15da8391..e3c69ee3e 100644
--- a/module/zfs/include/sys/dmu.h
+++ b/module/zfs/include/sys/dmu.h
@@ -157,8 +157,8 @@ void zfs_znode_byteswap(void *buf, size_t size);
* The maximum number of bytes that can be accessed as part of one
* operation, including metadata.
*/
-#define DMU_MAX_ACCESS (10<<20) /* 10MB */
-#define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
+#define DMU_MAX_ACCESS (10<<20) /* 10MB */
+#define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
#define DMU_USERUSED_OBJECT (-1ULL)
#define DMU_GROUPUSED_OBJECT (-2ULL)
@@ -480,17 +480,23 @@ int dmu_free_object(objset_t *os, uint64_t object);
*/
#define DMU_READ_PREFETCH 0 /* prefetch */
#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
+#define DMU_READ_ZEROCOPY 2 /* zerocopy on read (test) */
+#define DMU_WRITE_ZEROCOPY 4 /* zerocopy on write (test) */
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
void *buf, uint32_t flags);
+void dmu_write_impl(objset_t *os, uint64_t object, uint64_t offset,
+ uint64_t size, const void *buf, dmu_tx_t *tx, int flags);
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
const void *buf, dmu_tx_t *tx);
void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_tx_t *tx);
+#if defined(_KERNEL) && defined(HAVE_UIO_RW)
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
dmu_tx_t *tx);
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, struct page *pp, dmu_tx_t *tx);
+#endif
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
void dmu_return_arcbuf(struct arc_buf *buf);
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
diff --git a/module/zfs/include/sys/spa.h b/module/zfs/include/sys/spa.h
index 30554ae0e..14c3db86f 100644
--- a/module/zfs/include/sys/spa.h
+++ b/module/zfs/include/sys/spa.h
@@ -535,7 +535,11 @@ extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t);
/* asynchronous event notification */
+#ifdef HAVE_SYSEVENT
extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
+#else
+#define spa_event_notify(s,v,n) ((void)0)
+#endif
#ifdef ZFS_DEBUG
#define dprintf_bp(bp, fmt, ...) do { \
diff --git a/module/zfs/include/sys/spa_impl.h b/module/zfs/include/sys/spa_impl.h
index 84da68488..8a931a053 100644
--- a/module/zfs/include/sys/spa_impl.h
+++ b/module/zfs/include/sys/spa_impl.h
@@ -181,7 +181,7 @@ struct spa {
refcount_t spa_refcount; /* number of opens */
};
-extern const char *spa_config_path;
+extern char *spa_config_path;
#define BOOTFS_COMPRESS_VALID(compress) \
((compress) == ZIO_COMPRESS_LZJB || \
diff --git a/module/zfs/include/sys/vdev_disk.h b/module/zfs/include/sys/vdev_disk.h
new file mode 100644
index 000000000..544036bbc
--- /dev/null
+++ b/module/zfs/include/sys/vdev_disk.h
@@ -0,0 +1,71 @@
+#ifndef _SYS_VDEV_DISK_H
+#define _SYS_VDEV_DISK_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+#include <sys/vdev.h>
+#include <sys/ddi.h>
+#include <sys/sunldi.h>
+#include <sys/sunddi.h>
+#include <zfs_config.h>
+
+typedef struct vdev_disk {
+ ddi_devid_t vd_devid;
+ char *vd_minor;
+ struct block_device *vd_bdev;
+} vdev_disk_t;
+
+extern int vdev_disk_physio(struct block_device *, caddr_t,
+ size_t, uint64_t, int);
+extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
+
+/* 2.6.24 API change */
+#ifdef HAVE_2ARGS_BIO_END_IO_T
+# define BIO_END_IO_PROTO(fn, x, y, z) static void fn(struct bio *x, int z)
+# define BIO_END_IO_RETURN(rc) return
+#else
+# define BIO_END_IO_PROTO(fn, x, y, z) static int fn(struct bio *x, \
+ unsigned int y, int z)
+# define BIO_END_IO_RETURN(rc) return rc
+#endif /* HAVE_2ARGS_BIO_END_IO_T */
+
+/* 2.6.29 API change */
+#ifdef HAVE_BIO_RW_SYNCIO
+# define DIO_RW_SYNCIO BIO_RW_SYNCIO
+#else
+# define DIO_RW_SYNCIO BIO_RW_SYNC
+#endif /* HAVE_BIO_RW_SYNCIO */
+
+/* 2.6.28 API change */
+#ifdef HAVE_OPEN_BDEV_EXCLUSIVE
+# define vdev_bdev_open(path, md, hld) open_bdev_exclusive(path, md, hld)
+# define vdev_bdev_close(bdev, md) close_bdev_exclusive(bdev, md)
+#else
+# define vdev_bdev_open(path, md, hld) open_bdev_excl(path, md, hld)
+# define vdev_bdev_close(bdev, md) close_bdev_excl(bdev)
+#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
+
+/* 2.6.22 API change */
+#ifdef HAVE_1ARG_INVALIDATE_BDEV
+# define vdev_bdev_invalidate(bdev) invalidate_bdev(bdev)
+#else
+# define vdev_bdev_invalidate(bdev) invalidate_bdev(bdev, 1)
+#endif /* HAVE_1ARG_INVALIDATE_BDEV */
+
+/* 2.6.30 API change */
+#ifdef HAVE_BDEV_LOGICAL_BLOCK_SIZE
+# define vdev_bdev_block_size(bdev) bdev_logical_block_size(bdev)
+#else
+# define vdev_bdev_block_size(bdev) bdev_hardsect_size(bdev)
+#endif
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VDEV_DISK_H */
diff --git a/module/zfs/include/sys/zfs_context.h b/module/zfs/include/sys/zfs_context.h
index 40de32084..29b88a37f 100644
--- a/module/zfs/include/sys/zfs_context.h
+++ b/module/zfs/include/sys/zfs_context.h
@@ -58,13 +58,13 @@ extern "C" {
#include <sys/zone.h>
#include <sys/uio.h>
#include <sys/zfs_debug.h>
+#ifdef HAVE_SYSEVENT
#include <sys/sysevent.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/dev.h>
+#endif /* HAVE_SYSEVENT */
#include <sys/fm/util.h>
-#define CPU_SEQID (CPU->cpu_seqid)
-
#ifdef __cplusplus
}
#endif
diff --git a/module/zfs/include/sys/zfs_debug.h b/module/zfs/include/sys/zfs_debug.h
index 02d9da131..a8f0f186f 100644
--- a/module/zfs/include/sys/zfs_debug.h
+++ b/module/zfs/include/sys/zfs_debug.h
@@ -57,11 +57,16 @@ extern int zfs_flags;
#define ZFS_DEBUG_MODIFY 0x0010
#ifdef ZFS_DEBUG
+#if defined(_KERNEL) && defined(HAVE_SPL)
+#include <sys/debug.h>
+#define dprintf(...) CDEBUG_LIMIT(D_DPRINTF, __VA_ARGS__)
+#else
extern void __dprintf(const char *file, const char *func,
int line, const char *fmt, ...);
#define dprintf(...) \
if (zfs_flags & ZFS_DEBUG_DPRINTF) \
__dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__)
+#endif /* _KERNEL && HAVE_SPL */
#else
#define dprintf(...) ((void)0)
#endif /* ZFS_DEBUG */
diff --git a/module/zfs/include/sys/zfs_znode.h b/module/zfs/include/sys/zfs_znode.h
index f5ee2fc7b..d141c0302 100644
--- a/module/zfs/include/sys/zfs_znode.h
+++ b/module/zfs/include/sys/zfs_znode.h
@@ -118,9 +118,12 @@ extern "C" {
/*
* Convert mode bits (zp_mode) to BSD-style DT_* values for storing in
- * the directory entries.
+ * the directory entries. On Linux systems this value is already
+ * defined correctly as part of the /usr/include/dirent.h header file.
*/
+#ifndef IFTODT
#define IFTODT(mode) (((mode) & S_IFMT) >> 12)
+#endif
/*
* The directory entry has the type (currently unused on Solaris) in the
@@ -342,8 +345,10 @@ extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap);
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
+#if defined(HAVE_UIO_RW)
extern caddr_t zfs_map_page(page_t *, enum seg_rw);
extern void zfs_unmap_page(page_t *, caddr_t);
+#endif /* HAVE_UIO_RW */
extern zil_get_data_t zfs_get_data;
extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index d147b8e91..391290a76 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -41,6 +41,7 @@
#include <sys/zap.h>
#include <sys/zil.h>
#include <sys/vdev_impl.h>
+#include <sys/vdev_disk.h>
#include <sys/metaslab.h>
#include <sys/uberblock_impl.h>
#include <sys/txg.h>
@@ -200,9 +201,11 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
zap_cursor_t zc;
zap_attribute_t za;
objset_t *mos = spa->spa_meta_objset;
- int err;
+ int err = 0;
- VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP);
+ if (err)
+ return err;
mutex_enter(&spa->spa_props_lock);
@@ -214,7 +217,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
/* If no pool property object, no more prop to get. */
if (spa->spa_pool_props_object == 0) {
mutex_exit(&spa->spa_props_lock);
- return (0);
+ goto out;
}
/*
@@ -3718,9 +3721,11 @@ spa_async_probe(spa_t *spa, vdev_t *vd)
static void
spa_async_autoexpand(spa_t *spa, vdev_t *vd)
{
+#ifdef HAVE_SYSEVENT
sysevent_id_t eid;
nvlist_t *attr;
char *physpath;
+#endif
int c;
if (!spa->spa_autoexpand)
@@ -3734,6 +3739,7 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd)
if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
return;
+#ifdef HAVE_SYSEVENT
physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
(void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath);
@@ -3745,6 +3751,7 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd)
nvlist_free(attr);
kmem_free(physpath, MAXPATHLEN);
+#endif
}
static void
@@ -4514,10 +4521,10 @@ spa_has_active_shared_spare(spa_t *spa)
* in the userland libzpool, as we don't want consumers to misinterpret ztest
* or zdb as real changes.
*/
+#ifdef HAVE_SYSEVENT
void
spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
{
-#ifdef _KERNEL
sysevent_t *ev;
sysevent_attr_list_t *attr = NULL;
sysevent_value_t value;
@@ -4562,5 +4569,60 @@ done:
if (attr)
sysevent_free_attr(attr);
sysevent_free(ev);
-#endif
}
+#endif /* HAVE_SYSEVENT */
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/* state manipulation functions */
+EXPORT_SYMBOL(spa_open);
+EXPORT_SYMBOL(spa_get_stats);
+EXPORT_SYMBOL(spa_create);
+EXPORT_SYMBOL(spa_import);
+EXPORT_SYMBOL(spa_tryimport);
+EXPORT_SYMBOL(spa_destroy);
+EXPORT_SYMBOL(spa_export);
+EXPORT_SYMBOL(spa_reset);
+EXPORT_SYMBOL(spa_async_request);
+EXPORT_SYMBOL(spa_async_suspend);
+EXPORT_SYMBOL(spa_async_resume);
+EXPORT_SYMBOL(spa_inject_addref);
+EXPORT_SYMBOL(spa_inject_delref);
+
+/* device maniion */
+EXPORT_SYMBOL(spa_vdev_add);
+EXPORT_SYMBOL(spa_vdev_attach);
+EXPORT_SYMBOL(spa_vdev_detach);
+EXPORT_SYMBOL(spa_vdev_remove);
+EXPORT_SYMBOL(spa_vdev_setpath);
+
+/* spare statech is global across all pools) */
+EXPORT_SYMBOL(spa_spare_add);
+EXPORT_SYMBOL(spa_spare_remove);
+EXPORT_SYMBOL(spa_spare_exists);
+EXPORT_SYMBOL(spa_spare_activate);
+
+/* L2ARC statech is global across all pools) */
+EXPORT_SYMBOL(spa_l2cache_add);
+EXPORT_SYMBOL(spa_l2cache_remove);
+EXPORT_SYMBOL(spa_l2cache_exists);
+EXPORT_SYMBOL(spa_l2cache_activate);
+EXPORT_SYMBOL(spa_l2cache_drop);
+EXPORT_SYMBOL(spa_l2cache_space_update);
+
+/* scrubbing */
+EXPORT_SYMBOL(spa_scrub);
+
+/* spa syncing */
+EXPORT_SYMBOL(spa_sync); /* only for DMU use */
+EXPORT_SYMBOL(spa_sync_allpools);
+
+/* properties */
+EXPORT_SYMBOL(spa_prop_set);
+EXPORT_SYMBOL(spa_prop_get);
+EXPORT_SYMBOL(spa_prop_clear_bootfs);
+
+#if defined(HAVE_SYSEVENT)
+/* asynchronous event notification */
+EXPORT_SYMBOL(spa_event_notify);
+#endif
+#endif
diff --git a/module/zfs/spa_boot.c b/module/zfs/spa_boot.c
index 053903cac..aa276835a 100644
--- a/module/zfs/spa_boot.c
+++ b/module/zfs/spa_boot.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
-
+#ifdef _KERNEL
#include <sys/spa.h>
#include <sys/sunddi.h>
@@ -45,3 +45,5 @@ spa_free_bootprop(char *value)
{
ddi_prop_free(value);
}
+
+#endif /* _KERNEL */
diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c
index b2063bba1..19dca52c1 100644
--- a/module/zfs/spa_config.c
+++ b/module/zfs/spa_config.c
@@ -62,7 +62,7 @@ static uint64_t spa_config_generation = 1;
* This can be overridden in userland to preserve an alternate namespace for
* userland pools when doing testing.
*/
-const char *spa_config_path = ZPOOL_CACHE;
+char *spa_config_path = ZPOOL_CACHE;
/*
* Called when the module is first loaded, this routine loads the configuration
@@ -442,3 +442,15 @@ spa_config_update(spa_t *spa, int what)
if (what == SPA_CONFIG_UPDATE_POOL)
spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(spa_config_sync);
+EXPORT_SYMBOL(spa_config_load);
+EXPORT_SYMBOL(spa_all_configs);
+EXPORT_SYMBOL(spa_config_set);
+EXPORT_SYMBOL(spa_config_generate);
+EXPORT_SYMBOL(spa_config_update);
+
+module_param(spa_config_path, charp, 0444);
+MODULE_PARM_DESC(spa_config_path, "SPA config file (/etc/zfs/zpool.cache)");
+#endif
diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c
index ac0a20aaf..480ea9c86 100644
--- a/module/zfs/spa_errlog.c
+++ b/module/zfs/spa_errlog.c
@@ -434,3 +434,17 @@ spa_errlog_sync(spa_t *spa, uint64_t txg)
mutex_exit(&spa->spa_errlog_lock);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/* error handling */
+EXPORT_SYMBOL(spa_log_error);
+EXPORT_SYMBOL(zfs_ereport_post);
+EXPORT_SYMBOL(zfs_post_remove);
+EXPORT_SYMBOL(zfs_post_autoreplace);
+EXPORT_SYMBOL(spa_get_errlog_size);
+EXPORT_SYMBOL(spa_get_errlog);
+EXPORT_SYMBOL(spa_errlog_rotate);
+EXPORT_SYMBOL(spa_errlog_drain);
+EXPORT_SYMBOL(spa_errlog_sync);
+EXPORT_SYMBOL(spa_get_errlists);
+#endif
diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c
index b719f7b80..7f1ac22ac 100644
--- a/module/zfs/spa_history.c
+++ b/module/zfs/spa_history.c
@@ -177,7 +177,11 @@ static char *
spa_history_zone(void)
{
#ifdef _KERNEL
+#ifdef HAVE_SPL
+ return ("linux");
+#else
return (curproc->p_zone->zone_name);
+#endif
#else
return ("global");
#endif
@@ -279,7 +283,7 @@ spa_history_log_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
dmu_buf_rele(dbp, FTAG);
if (hap->ha_log_type == LOG_INTERNAL) {
- kmem_free((void*)hap->ha_history_str, HIS_MAX_RECORD_LEN);
+ vmem_free((void*)hap->ha_history_str, HIS_MAX_RECORD_LEN);
kmem_free(hap, sizeof (history_arg_t));
}
}
@@ -409,7 +413,7 @@ log_internal(history_internal_events_t event, spa_t *spa,
return;
hap = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
- str = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
+ str = vmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
(void) vsnprintf(str, HIS_MAX_RECORD_LEN, fmt, adx);
@@ -471,3 +475,10 @@ spa_history_log_version(spa_t *spa, history_internal_events_t event)
(u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
#endif
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(spa_history_create_obj);
+EXPORT_SYMBOL(spa_history_get);
+EXPORT_SYMBOL(spa_history_log);
+EXPORT_SYMBOL(spa_history_internal_log);
+#endif
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index ef74a443d..3b6d33834 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -233,8 +233,13 @@ kmem_cache_t *spa_buffer_pool;
int spa_mode_global;
#ifdef ZFS_DEBUG
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/* All filtering done by the SPL */
+int zfs_flags = ~0;
+#else
/* Everything except dprintf is on by default in debug builds */
int zfs_flags = ~ZFS_DEBUG_DPRINTF;
+#endif
#else
int zfs_flags = 0;
#endif
@@ -1441,3 +1446,71 @@ spa_mode(spa_t *spa)
{
return (spa->spa_mode);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/* Namespace manipulation */
+EXPORT_SYMBOL(spa_lookup);
+EXPORT_SYMBOL(spa_add);
+EXPORT_SYMBOL(spa_remove);
+EXPORT_SYMBOL(spa_next);
+
+/* Refcount functions */
+EXPORT_SYMBOL(spa_open_ref);
+EXPORT_SYMBOL(spa_close);
+EXPORT_SYMBOL(spa_refcount_zero);
+
+/* Pool configuration lock */
+EXPORT_SYMBOL(spa_config_tryenter);
+EXPORT_SYMBOL(spa_config_enter);
+EXPORT_SYMBOL(spa_config_exit);
+EXPORT_SYMBOL(spa_config_held);
+
+/* Pool vdev add/remove lock */
+EXPORT_SYMBOL(spa_vdev_enter);
+EXPORT_SYMBOL(spa_vdev_exit);
+
+/* Pool vdev state change lock */
+EXPORT_SYMBOL(spa_vdev_state_enter);
+EXPORT_SYMBOL(spa_vdev_state_exit);
+
+/* Accessor functions */
+EXPORT_SYMBOL(spa_shutting_down);
+EXPORT_SYMBOL(spa_get_dsl);
+EXPORT_SYMBOL(spa_get_rootblkptr);
+EXPORT_SYMBOL(spa_set_rootblkptr);
+EXPORT_SYMBOL(spa_altroot);
+EXPORT_SYMBOL(spa_sync_pass);
+EXPORT_SYMBOL(spa_name);
+EXPORT_SYMBOL(spa_guid);
+EXPORT_SYMBOL(spa_last_synced_txg);
+EXPORT_SYMBOL(spa_first_txg);
+EXPORT_SYMBOL(spa_version);
+EXPORT_SYMBOL(spa_state);
+EXPORT_SYMBOL(spa_freeze_txg);
+EXPORT_SYMBOL(spa_get_alloc);
+EXPORT_SYMBOL(spa_get_space);
+EXPORT_SYMBOL(spa_get_dspace);
+EXPORT_SYMBOL(spa_get_asize);
+EXPORT_SYMBOL(spa_max_replication);
+EXPORT_SYMBOL(spa_busy);
+EXPORT_SYMBOL(spa_get_failmode);
+EXPORT_SYMBOL(spa_suspended);
+
+/* Miscellaneous support routines */
+EXPORT_SYMBOL(spa_rename);
+EXPORT_SYMBOL(spa_guid_exists);
+EXPORT_SYMBOL(spa_strdup);
+EXPORT_SYMBOL(spa_strfree);
+EXPORT_SYMBOL(spa_get_random);
+EXPORT_SYMBOL(sprintf_blkptr);
+EXPORT_SYMBOL(spa_freeze);
+EXPORT_SYMBOL(spa_upgrade);
+EXPORT_SYMBOL(spa_evict_all);
+EXPORT_SYMBOL(spa_lookup_by_guid);
+EXPORT_SYMBOL(spa_has_spare);
+EXPORT_SYMBOL(bp_get_dasize);
+EXPORT_SYMBOL(spa_has_slogs);
+EXPORT_SYMBOL(spa_is_root);
+
+EXPORT_SYMBOL(spa_namespace_lock);
+#endif
diff --git a/module/zfs/txg.c b/module/zfs/txg.c
index fb95361f8..3d82990f5 100644
--- a/module/zfs/txg.c
+++ b/module/zfs/txg.c
@@ -49,7 +49,7 @@ txg_init(dsl_pool_t *dp, uint64_t txg)
int c;
bzero(tx, sizeof (tx_state_t));
- tx->tx_cpu = kmem_zalloc(max_ncpus * sizeof (tx_cpu_t), KM_SLEEP);
+ tx->tx_cpu = vmem_zalloc(max_ncpus * sizeof (tx_cpu_t), KM_SLEEP);
for (c = 0; c < max_ncpus; c++) {
int i;
@@ -109,7 +109,7 @@ txg_fini(dsl_pool_t *dp)
if (tx->tx_commit_cb_taskq != NULL)
taskq_destroy(tx->tx_commit_cb_taskq);
- kmem_free(tx->tx_cpu, max_ncpus * sizeof (tx_cpu_t));
+ vmem_free(tx->tx_cpu, max_ncpus * sizeof (tx_cpu_t));
bzero(tx, sizeof (tx_state_t));
}
@@ -735,3 +735,21 @@ txg_list_next(txg_list_t *tl, void *p, uint64_t txg)
return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(txg_init);
+EXPORT_SYMBOL(txg_fini);
+EXPORT_SYMBOL(txg_sync_start);
+EXPORT_SYMBOL(txg_sync_stop);
+EXPORT_SYMBOL(txg_hold_open);
+EXPORT_SYMBOL(txg_rele_to_quiesce);
+EXPORT_SYMBOL(txg_rele_to_sync);
+EXPORT_SYMBOL(txg_register_callbacks);
+EXPORT_SYMBOL(txg_suspend);
+EXPORT_SYMBOL(txg_resume);
+EXPORT_SYMBOL(txg_delay);
+EXPORT_SYMBOL(txg_wait_synced);
+EXPORT_SYMBOL(txg_wait_open);
+EXPORT_SYMBOL(txg_stalled);
+EXPORT_SYMBOL(txg_sync_waiting);
+#endif
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 57869b6e6..dc9416ee4 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -2782,3 +2782,11 @@ vdev_expand(vdev_t *vd, uint64_t txg)
vdev_config_dirty(vd);
}
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(vdev_fault);
+EXPORT_SYMBOL(vdev_degrade);
+EXPORT_SYMBOL(vdev_online);
+EXPORT_SYMBOL(vdev_offline);
+EXPORT_SYMBOL(vdev_clear);
+#endif
diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
new file mode 100644
index 000000000..c1e0aa779
--- /dev/null
+++ b/module/zfs/vdev_disk.c
@@ -0,0 +1,623 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/vdev_disk.h>
+#include <sys/vdev_impl.h>
+#include <sys/fs/zfs.h>
+#include <sys/zio.h>
+#include <sys/sunldi.h>
+
+/*
+ * Virtual device vector for disks.
+ */
+typedef struct dio_request {
+ struct completion dr_comp; /* Completion for sync IO */
+ atomic_t dr_ref; /* References */
+ zio_t *dr_zio; /* Parent ZIO */
+ int dr_rw; /* Read/Write */
+ int dr_error; /* Bio error */
+ int dr_bio_count; /* Count of bio's */
+ struct bio *dr_bio[0]; /* Attached bio's */
+} dio_request_t;
+
+
+#ifdef HAVE_OPEN_BDEV_EXCLUSIVE
+static fmode_t
+vdev_bdev_mode(int smode)
+{
+ fmode_t mode = 0;
+
+ ASSERT3S(smode & (FREAD | FWRITE), !=, 0);
+
+ if (smode & FREAD)
+ mode |= FMODE_READ;
+
+ if (smode & FWRITE)
+ mode |= FMODE_WRITE;
+
+ return mode;
+}
+#else
+static int
+vdev_bdev_mode(int smode)
+{
+ int mode = 0;
+
+ ASSERT3S(smode & (FREAD | FWRITE), !=, 0);
+
+ if ((smode & FREAD) && !(smode & FWRITE))
+ mode = MS_RDONLY;
+
+ return mode;
+}
+#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
+
+static uint64_t
+bdev_capacity(struct block_device *bdev)
+{
+ struct hd_struct *part = bdev->bd_part;
+
+ /* The partition capacity referenced by the block device */
+ if (part)
+ return part->nr_sects;
+
+ /* Otherwise assume the full device capacity */
+ return get_capacity(bdev->bd_disk);
+}
+
+static int
+vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift)
+{
+ struct block_device *bdev;
+ vdev_disk_t *vd;
+ int mode, block_size;
+
+ /* Must have a pathname and it must be absolute. */
+ if (v->vdev_path == NULL || v->vdev_path[0] != '/') {
+ v->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
+ return EINVAL;
+ }
+
+ vd = kmem_zalloc(sizeof(vdev_disk_t), KM_SLEEP);
+ if (vd == NULL)
+ return ENOMEM;
+
+ /*
+ * Devices are always opened by the path provided at configuration
+ * time. This means that if the provided path is a udev by-id path
+ * then drives may be recabled without an issue. If the provided
+ * path is a udev by-path path then the physical location information
+ * will be preserved. This can be critical for more complicated
+ * configurations where drives are located in specific physical
+ * locations to maximize the systems tolerence to component failure.
+ * Alternately you can provide your own udev rule to flexibly map
+ * the drives as you see fit. It is not advised that you use the
+ * /dev/[hd]d devices which may be reorder due to probing order.
+ * Devices in the wrong locations will be detected by the higher
+ * level vdev validation.
+ */
+ mode = spa_mode(v->vdev_spa);
+ bdev = vdev_bdev_open(v->vdev_path, vdev_bdev_mode(mode), vd);
+ if (IS_ERR(bdev)) {
+ kmem_free(vd, sizeof(vdev_disk_t));
+ return -PTR_ERR(bdev);
+ }
+
+ v->vdev_tsd = vd;
+ vd->vd_bdev = bdev;
+ block_size = vdev_bdev_block_size(bdev);
+
+ /* Check if this is a whole device. When bdev->bd_contains ==
+ * bdev we have a whole device and not simply a partition. */
+ v->vdev_wholedisk = !!(bdev->bd_contains == bdev);
+
+ /* Clear the nowritecache bit, causes vdev_reopen() to try again. */
+ v->vdev_nowritecache = B_FALSE;
+
+ /* Physical volume size in bytes */
+ *psize = bdev_capacity(bdev) * block_size;
+
+ /* Based on the minimum sector size set the block size */
+ *ashift = highbit(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;
+
+ return 0;
+}
+
+static void
+vdev_disk_close(vdev_t *v)
+{
+ vdev_disk_t *vd = v->vdev_tsd;
+
+ if (vd == NULL)
+ return;
+
+ if (vd->vd_bdev != NULL)
+ vdev_bdev_close(vd->vd_bdev,
+ vdev_bdev_mode(spa_mode(v->vdev_spa)));
+
+ kmem_free(vd, sizeof(vdev_disk_t));
+ v->vdev_tsd = NULL;
+}
+
+static dio_request_t *
+vdev_disk_dio_alloc(int bio_count)
+{
+ dio_request_t *dr;
+ int i;
+
+ dr = kmem_zalloc(sizeof(dio_request_t) +
+ sizeof(struct bio *) * bio_count, KM_SLEEP);
+ if (dr) {
+ init_completion(&dr->dr_comp);
+ atomic_set(&dr->dr_ref, 0);
+ dr->dr_bio_count = bio_count;
+ dr->dr_error = 0;
+
+ for (i = 0; i < dr->dr_bio_count; i++)
+ dr->dr_bio[i] = NULL;
+ }
+
+ return dr;
+}
+
+static void
+vdev_disk_dio_free(dio_request_t *dr)
+{
+ int i;
+
+ for (i = 0; i < dr->dr_bio_count; i++)
+ if (dr->dr_bio[i])
+ bio_put(dr->dr_bio[i]);
+
+ kmem_free(dr, sizeof(dio_request_t) +
+ sizeof(struct bio *) * dr->dr_bio_count);
+}
+
+static void
+vdev_disk_dio_get(dio_request_t *dr)
+{
+ atomic_inc(&dr->dr_ref);
+}
+
+static int
+vdev_disk_dio_put(dio_request_t *dr)
+{
+ int rc = atomic_dec_return(&dr->dr_ref);
+
+ /*
+ * Free the dio_request when the last reference is dropped and
+ * ensure zio_interpret is called only once with the correct zio
+ */
+ if (rc == 0) {
+ zio_t *zio = dr->dr_zio;
+ int error = dr->dr_error;
+
+ vdev_disk_dio_free(dr);
+
+ if (zio) {
+ zio->io_error = error;
+ zio_interrupt(zio);
+ }
+ }
+
+ return rc;
+}
+
+BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, size, error)
+{
+ dio_request_t *dr = bio->bi_private;
+ int rc;
+
+ /* Fatal error but print some useful debugging before asserting */
+ if (dr == NULL) {
+ printk("FATAL: bio->bi_private == NULL\n"
+ "bi_next: %p, bi_flags: %lx, bi_rw: %lu, bi_vcnt: %d\n"
+ "bi_idx: %d, bi_size: %d, bi_end_io: %p, bi_cnt: %d\n",
+ bio->bi_next, bio->bi_flags, bio->bi_rw, bio->bi_vcnt,
+ bio->bi_idx, bio->bi_size, bio->bi_end_io,
+ atomic_read(&bio->bi_cnt));
+ SBUG();
+ }
+
+#ifndef HAVE_2ARGS_BIO_END_IO_T
+ if (bio->bi_size)
+ return 1;
+#endif /* HAVE_2ARGS_BIO_END_IO_T */
+
+ if (error == 0 && !test_bit(BIO_UPTODATE, &bio->bi_flags))
+ error = EIO;
+
+ if (dr->dr_error == 0)
+ dr->dr_error = error;
+
+ /* Drop reference aquired by __vdev_disk_physio */
+ rc = vdev_disk_dio_put(dr);
+
+ /* Wake up synchronous waiter this is the last outstanding bio */
+ if ((rc == 1) && (dr->dr_rw & (1 << DIO_RW_SYNCIO)))
+ complete(&dr->dr_comp);
+
+ BIO_END_IO_RETURN(0);
+}
+
+static inline unsigned long
+bio_nr_pages(void *bio_ptr, unsigned int bio_size)
+{
+ return ((((unsigned long)bio_ptr + bio_size + PAGE_SIZE - 1) >>
+ PAGE_SHIFT) - ((unsigned long)bio_ptr >> PAGE_SHIFT));
+}
+
+static unsigned int
+bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size)
+{
+ unsigned int offset, size, i;
+ struct page *page;
+
+ offset = offset_in_page(bio_ptr);
+ for (i = 0; i < bio->bi_max_vecs; i++) {
+ size = PAGE_SIZE - offset;
+
+ if (bio_size <= 0)
+ break;
+
+ if (size > bio_size)
+ size = bio_size;
+
+ if (kmem_virt(bio_ptr))
+ page = vmalloc_to_page(bio_ptr);
+ else
+ page = virt_to_page(bio_ptr);
+
+ if (bio_add_page(bio, page, size, offset) != size)
+ break;
+
+ bio_ptr += size;
+ bio_size -= size;
+ offset = 0;
+ }
+
+ return bio_size;
+}
+
+static int
+__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
+ size_t kbuf_size, uint64_t kbuf_offset, int flags)
+{
+ dio_request_t *dr;
+ caddr_t bio_ptr;
+ uint64_t bio_offset;
+ int bio_size, bio_count = 16;
+ int i = 0, error = 0, block_size;
+
+retry:
+ dr = vdev_disk_dio_alloc(bio_count);
+ if (dr == NULL)
+ return ENOMEM;
+
+ dr->dr_zio = zio;
+ dr->dr_rw = flags;
+ block_size = vdev_bdev_block_size(bdev);
+
+#ifdef BIO_RW_FAILFAST
+ if (flags & (1 << BIO_RW_FAILFAST))
+ dr->dr_rw |= 1 << BIO_RW_FAILFAST;
+#endif /* BIO_RW_FAILFAST */
+
+ /*
+ * When the IO size exceeds the maximum bio size for the request
+ * queue we are forced to break the IO in multiple bio's and wait
+ * for them all to complete. Ideally, all pool users will set
+ * their volume block size to match the maximum request size and
+ * the common case will be one bio per vdev IO request.
+ */
+ bio_ptr = kbuf_ptr;
+ bio_offset = kbuf_offset;
+ bio_size = kbuf_size;
+ for (i = 0; i <= dr->dr_bio_count; i++) {
+
+ /* Finished constructing bio's for given buffer */
+ if (bio_size <= 0)
+ break;
+
+ /*
+ * By default only 'bio_count' bio's per dio are allowed.
+ * However, if we find ourselves in a situation where more
+ * are needed we allocate a larger dio and warn the user.
+ */
+ if (dr->dr_bio_count == i) {
+ vdev_disk_dio_free(dr);
+ bio_count *= 2;
+ printk("WARNING: Resized bio's/dio to %d\n",bio_count);
+ goto retry;
+ }
+
+ dr->dr_bio[i] = bio_alloc(GFP_NOIO,
+ bio_nr_pages(bio_ptr, bio_size));
+ if (dr->dr_bio[i] == NULL) {
+ vdev_disk_dio_free(dr);
+ return ENOMEM;
+ }
+
+ /* Matching put called by vdev_disk_physio_completion */
+ vdev_disk_dio_get(dr);
+
+ dr->dr_bio[i]->bi_bdev = bdev;
+ dr->dr_bio[i]->bi_sector = bio_offset / block_size;
+ dr->dr_bio[i]->bi_rw = dr->dr_rw;
+ dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
+ dr->dr_bio[i]->bi_private = dr;
+
+ /* Remaining size is returned to become the new size */
+ bio_size = bio_map(dr->dr_bio[i], bio_ptr, bio_size);
+
+ /* Advance in buffer and construct another bio if needed */
+ bio_ptr += dr->dr_bio[i]->bi_size;
+ bio_offset += dr->dr_bio[i]->bi_size;
+ }
+
+ /* Extra reference to protect dio_request during submit_bio */
+ vdev_disk_dio_get(dr);
+
+ /* Submit all bio's associated with this dio */
+ for (i = 0; i < dr->dr_bio_count; i++)
+ if (dr->dr_bio[i])
+ submit_bio(dr->dr_rw, dr->dr_bio[i]);
+
+ /*
+ * On synchronous blocking requests we wait for all bio the completion
+ * callbacks to run. We will be woken when the last callback runs
+ * for this dio. We are responsible for putting the last dio_request
+ * reference will in turn put back the last bio references. The
+ * only synchronous consumer is vdev_disk_read_rootlabel() all other
+ * IO originating from vdev_disk_io_start() is asynchronous.
+ */
+ if (dr->dr_rw & (1 << DIO_RW_SYNCIO)) {
+ wait_for_completion(&dr->dr_comp);
+ error = dr->dr_error;
+ ASSERT3S(atomic_read(&dr->dr_ref), ==, 1);
+ }
+
+ (void)vdev_disk_dio_put(dr);
+
+ return error;
+}
+
+int
+vdev_disk_physio(struct block_device *bdev, caddr_t kbuf,
+ size_t size, uint64_t offset, int flags)
+{
+ return __vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags);
+}
+
+/* 2.6.24 API change */
+#ifdef HAVE_BIO_EMPTY_BARRIER
+BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, size, rc)
+{
+ zio_t *zio = bio->bi_private;
+
+ zio->io_error = -rc;
+ if (rc && (rc == -EOPNOTSUPP))
+ zio->io_vd->vdev_nowritecache = B_TRUE;
+
+ bio_put(bio);
+ zio_interrupt(zio);
+
+ BIO_END_IO_RETURN(0);
+}
+
+static int
+vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
+{
+ struct request_queue *q;
+ struct bio *bio;
+
+ q = bdev_get_queue(bdev);
+ if (!q)
+ return ENXIO;
+
+ bio = bio_alloc(GFP_KERNEL, 0);
+ if (!bio)
+ return ENOMEM;
+
+ bio->bi_end_io = vdev_disk_io_flush_completion;
+ bio->bi_private = zio;
+ bio->bi_bdev = bdev;
+ submit_bio(WRITE_BARRIER, bio);
+
+ return 0;
+}
+#else
+static int
+vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
+{
+ return ENOTSUP;
+}
+#endif /* HAVE_BIO_EMPTY_BARRIER */
+
+static int
+vdev_disk_io_start(zio_t *zio)
+{
+ vdev_t *v = zio->io_vd;
+ vdev_disk_t *vd = v->vdev_tsd;
+ int flags, error;
+
+ switch (zio->io_type) {
+ case ZIO_TYPE_IOCTL:
+
+ if (!vdev_readable(v)) {
+ zio->io_error = ENXIO;
+ return ZIO_PIPELINE_CONTINUE;
+ }
+
+ switch (zio->io_cmd) {
+ case DKIOCFLUSHWRITECACHE:
+
+ if (zfs_nocacheflush)
+ break;
+
+ if (v->vdev_nowritecache) {
+ zio->io_error = ENOTSUP;
+ break;
+ }
+
+ error = vdev_disk_io_flush(vd->vd_bdev, zio);
+ if (error == 0)
+ return ZIO_PIPELINE_STOP;
+
+ zio->io_error = error;
+ if (error == ENOTSUP)
+ v->vdev_nowritecache = B_TRUE;
+
+ break;
+
+ default:
+ zio->io_error = ENOTSUP;
+ }
+
+ return ZIO_PIPELINE_CONTINUE;
+
+ case ZIO_TYPE_WRITE:
+ flags = WRITE;
+ break;
+
+ case ZIO_TYPE_READ:
+ flags = READ;
+ break;
+
+ default:
+ zio->io_error = ENOTSUP;
+ return ZIO_PIPELINE_CONTINUE;
+ }
+
+#ifdef BIO_RW_FAILFAST
+ if (zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD))
+ flags |= (1 << BIO_RW_FAILFAST);
+#endif /* BIO_RW_FAILFAST */
+
+ error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data,
+ zio->io_size, zio->io_offset, flags);
+ if (error) {
+ zio->io_error = error;
+ return ZIO_PIPELINE_CONTINUE;
+ }
+
+ return ZIO_PIPELINE_STOP;
+}
+
+static void
+vdev_disk_io_done(zio_t *zio)
+{
+ /*
+ * If the device returned EIO, we revalidate the media. If it is
+ * determined the media has changed this triggers the asynchronous
+ * removal of the device from the configuration.
+ */
+ if (zio->io_error == EIO) {
+ vdev_t *v = zio->io_vd;
+ vdev_disk_t *vd = v->vdev_tsd;
+
+ if (check_disk_change(vd->vd_bdev)) {
+ vdev_bdev_invalidate(vd->vd_bdev);
+ v->vdev_remove_wanted = B_TRUE;
+ spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
+ }
+ }
+}
+
+vdev_ops_t vdev_disk_ops = {
+ vdev_disk_open,
+ vdev_disk_close,
+ vdev_default_asize,
+ vdev_disk_io_start,
+ vdev_disk_io_done,
+ NULL,
+ VDEV_TYPE_DISK, /* name of this vdev type */
+ B_TRUE /* leaf vdev */
+};
+
+/*
+ * Given the root disk device devid or pathname, read the label from
+ * the device, and construct a configuration nvlist.
+ */
+int
+vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
+{
+ struct block_device *bdev;
+ vdev_label_t *label;
+ uint64_t s, size;
+ int i;
+
+ bdev = vdev_bdev_open(devpath, vdev_bdev_mode(FREAD), NULL);
+ if (IS_ERR(bdev))
+ return -PTR_ERR(bdev);
+
+ s = bdev_capacity(bdev) * vdev_bdev_block_size(bdev);
+ if (s == 0) {
+ vdev_bdev_close(bdev, vdev_bdev_mode(FREAD));
+ return EIO;
+ }
+
+ size = P2ALIGN_TYPED(s, sizeof(vdev_label_t), uint64_t);
+ label = vmem_alloc(sizeof(vdev_label_t), KM_SLEEP);
+
+ for (i = 0; i < VDEV_LABELS; i++) {
+ uint64_t offset, state, txg = 0;
+
+ /* read vdev label */
+ offset = vdev_label_offset(size, i, 0);
+ if (vdev_disk_physio(bdev, (caddr_t)label,
+ VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, READ_SYNC) != 0)
+ continue;
+
+ if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
+ sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) {
+ *config = NULL;
+ continue;
+ }
+
+ if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
+ &state) != 0 || state >= POOL_STATE_DESTROYED) {
+ nvlist_free(*config);
+ *config = NULL;
+ continue;
+ }
+
+ if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
+ &txg) != 0 || txg == 0) {
+ nvlist_free(*config);
+ *config = NULL;
+ continue;
+ }
+
+ break;
+ }
+
+ vmem_free(label, sizeof(vdev_label_t));
+ vdev_bdev_close(bdev, vdev_bdev_mode(FREAD));
+
+ return 0;
+}
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c
index 9958a0e78..ab54d52b5 100644
--- a/module/zfs/vdev_queue.c
+++ b/module/zfs/vdev_queue.c
@@ -405,3 +405,14 @@ vdev_queue_io_done(zio_t *zio)
mutex_exit(&vq->vq_lock);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+module_param(zfs_vdev_max_pending, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_max_pending, "Maximum pending VDEV IO");
+
+module_param(zfs_vdev_min_pending, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_min_pending, "Minimum pending VDEV IO");
+
+module_param(zfs_vdev_aggregation_limit, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_aggregation_limit, "Maximum VDEV IO aggregation");
+#endif
diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c
index 7e278dd2b..f5164ced6 100644
--- a/module/zfs/zap_micro.c
+++ b/module/zfs/zap_micro.c
@@ -1180,3 +1180,19 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
zap_unlockdir(zap);
return (err);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(zap_add);
+EXPORT_SYMBOL(zap_create);
+EXPORT_SYMBOL(zap_cursor_advance);
+EXPORT_SYMBOL(zap_cursor_fini);
+EXPORT_SYMBOL(zap_cursor_init);
+EXPORT_SYMBOL(zap_cursor_init_serialized);
+EXPORT_SYMBOL(zap_cursor_move_to_key);
+EXPORT_SYMBOL(zap_cursor_retrieve);
+EXPORT_SYMBOL(zap_cursor_serialize);
+EXPORT_SYMBOL(zap_lookup);
+EXPORT_SYMBOL(zap_lookup_norm);
+EXPORT_SYMBOL(zap_remove);
+EXPORT_SYMBOL(zap_update);
+#endif
diff --git a/module/zfs/zfs_byteswap.c b/module/zfs/zfs_byteswap.c
index d5f3013df..f08135e88 100644
--- a/module/zfs/zfs_byteswap.c
+++ b/module/zfs/zfs_byteswap.c
@@ -196,3 +196,9 @@ zfs_znode_byteswap(void *buf, size_t size)
ACE_SLOT_CNT);
}
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(zfs_oldacl_byteswap);
+EXPORT_SYMBOL(zfs_acl_byteswap);
+EXPORT_SYMBOL(zfs_znode_byteswap);
+#endif
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c
index 8b7785fa8..9809aa5d0 100644
--- a/module/zfs/zfs_fm.c
+++ b/module/zfs/zfs_fm.c
@@ -92,7 +92,7 @@ void
zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
uint64_t stateoroffset, uint64_t size)
{
-#ifdef _KERNEL
+#if defined(_KERNEL) && defined(HAVE_FM)
nvlist_t *ereport, *detector;
uint64_t ena;
char class[64];
@@ -334,7 +334,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
static void
zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
{
-#ifdef _KERNEL
+#if defined(_KERNEL) && defined(HAVE_FM)
nvlist_t *resource;
char class[64];
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index b039414db..ac85a8ec5 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -67,6 +67,7 @@
#include "zfs_namecheck.h"
#include "zfs_prop.h"
#include "zfs_deleg.h"
+#include "zfs_config.h"
extern struct modlfs zfs_modlfs;
@@ -146,7 +147,7 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
static void
history_str_free(char *buf)
{
- kmem_free(buf, HIS_MAX_RECORD_LEN);
+ vmem_free(buf, HIS_MAX_RECORD_LEN);
}
static char *
@@ -157,7 +158,7 @@ history_str_get(zfs_cmd_t *zc)
if (zc->zc_history == 0)
return (NULL);
- buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
+ buf = vmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
if (copyinstr((void *)(uintptr_t)zc->zc_history,
buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
history_str_free(buf);
@@ -824,12 +825,12 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
if (size > zc->zc_nvlist_dst_size) {
error = ENOMEM;
} else {
- packed = kmem_alloc(size, KM_SLEEP);
+ packed = vmem_alloc(size, KM_SLEEP);
VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
KM_SLEEP) == 0);
error = ddi_copyout(packed,
(void *)(uintptr_t)zc->zc_nvlist_dst, size, zc->zc_iflags);
- kmem_free(packed, size);
+ vmem_free(packed, size);
}
zc->zc_nvlist_dst_size = size;
@@ -3648,7 +3649,7 @@ zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
return (EINVAL);
- zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
+ zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
@@ -3694,7 +3695,7 @@ zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
zfs_log_history(zc);
}
- kmem_free(zc, sizeof (zfs_cmd_t));
+ vmem_free(zc, sizeof (zfs_cmd_t));
return (error);
}
@@ -3795,15 +3796,27 @@ static struct dev_ops zfs_dev_ops = {
};
static struct modldrv zfs_modldrv = {
+#ifdef HAVE_SPL
+ NULL,
+#else
&mod_driverops,
+#endif /* HAVE_SPL */
"ZFS storage pool",
&zfs_dev_ops
};
static struct modlinkage modlinkage = {
MODREV_1,
+#ifdef HAVE_ZPL
(void *)&zfs_modlfs,
+#else
+ NULL,
+#endif /* HAVE_ZPL */
(void *)&zfs_modldrv,
+#ifdef HAVE_SPL
+ ZFS_MAJOR,
+ ZFS_MINORS,
+#endif /* HAVE_SPL */
NULL
};
@@ -3834,6 +3847,8 @@ _init(void)
ASSERT(error == 0);
mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
+ printk(KERN_INFO "ZFS: Loaded ZFS Filesystem v%s\n", ZFS_META_VERSION);
+
return (0);
}
@@ -3866,8 +3881,17 @@ _fini(void)
return (error);
}
+#ifdef HAVE_SPL
+spl_module_init(_init);
+spl_module_exit(_fini);
+
+MODULE_AUTHOR("Sun Microsystems, Inc");
+MODULE_DESCRIPTION("ZFS");
+MODULE_LICENSE("CDDL");
+#else
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
+#endif /* HAVE_SPL */
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 8eb4665ae..afecb374b 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -318,6 +318,7 @@ zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
return (ENOTTY);
}
+#if defined(_KERNEL) && defined(HAVE_UIO_RW)
/*
* Utility functions to map and unmap a single physical page. These
* are used to manage the mappable copies of ZFS file data, and therefore
@@ -342,6 +343,7 @@ zfs_unmap_page(page_t *pp, caddr_t addr)
ppmapout(addr);
}
}
+#endif /* _KERNEL && HAVE_UIO_RW */
/*
* When a file is memory mapped, we must keep the IO data synchronized
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index ee27195a4..e6ec7bcf6 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -1683,3 +1683,8 @@ zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
(void) memmove(buf, path, buf + len - path);
return (error);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(zfs_create_fs);
+EXPORT_SYMBOL(zfs_obj_to_path);
+#endif
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index db3822f5a..f5cbf15dd 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -1588,7 +1588,10 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE])
zr.zr_replay = replay_func;
zr.zr_arg = arg;
zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log);
- zr.zr_lrbuf = kmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP);
+ /* XXX: Changed to use vmem_alloc instead of kmem_alloc for
+ * large allocation size (I think this is safe here).
+ */
+ zr.zr_lrbuf = vmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP);
/*
* Wait for in-progress removes to sync before starting replay.
@@ -1600,7 +1603,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE])
ASSERT(zilog->zl_replay_blks == 0);
(void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr,
zh->zh_claim_txg);
- kmem_free(zr.zr_lrbuf, 2 * SPA_MAXBLOCKSIZE);
+ vmem_free(zr.zr_lrbuf, 2 * SPA_MAXBLOCKSIZE);
zil_destroy(zilog, B_FALSE);
txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 703abb17f..3cf22dd85 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -72,6 +72,7 @@ kmem_cache_t *zio_cache;
kmem_cache_t *zio_link_cache;
kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
+int zio_bulk_flags = 0;
#ifdef _KERNEL
extern vmem_t *zio_alloc_arena;
@@ -124,12 +125,13 @@ zio_init(void)
char name[36];
(void) sprintf(name, "zio_buf_%lu", (ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name, size,
- align, NULL, NULL, NULL, NULL, NULL, KMC_NODEBUG);
+ align, NULL, NULL, NULL, NULL, NULL,
+ KMC_NODEBUG | zio_bulk_flags);
(void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size);
zio_data_buf_cache[c] = kmem_cache_create(name, size,
align, NULL, NULL, NULL, NULL, data_alloc_arena,
- KMC_NODEBUG);
+ KMC_NODEBUG | zio_bulk_flags);
}
}
@@ -2361,3 +2363,19 @@ static zio_pipe_stage_t *zio_pipeline[ZIO_STAGES] = {
zio_checksum_verify,
zio_done
};
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/* Fault injection */
+EXPORT_SYMBOL(zio_injection_enabled);
+EXPORT_SYMBOL(zio_inject_fault);
+EXPORT_SYMBOL(zio_inject_list_next);
+EXPORT_SYMBOL(zio_clear_fault);
+EXPORT_SYMBOL(zio_handle_fault_injection);
+EXPORT_SYMBOL(zio_handle_device_injection);
+EXPORT_SYMBOL(zio_handle_label_injection);
+EXPORT_SYMBOL(zio_priority_table);
+EXPORT_SYMBOL(zio_type_name);
+
+module_param(zio_bulk_flags, int, 0644);
+MODULE_PARM_DESC(zio_bulk_flags, "Additional flags to pass to bulk buffers");
+#endif
diff --git a/module/zpios/Makefile.in b/module/zpios/Makefile.in
new file mode 100644
index 000000000..4924082a1
--- /dev/null
+++ b/module/zpios/Makefile.in
@@ -0,0 +1,11 @@
+MODULE := zpios
+
+EXTRA_CFLAGS = -I@MODDIR@/zfs/include
+EXTRA_CFLAGS += -I@MODDIR@/zcommon/include
+EXTRA_CFLAGS += -I@MODDIR@/avl/include
+EXTRA_CFLAGS += -I@MODDIR@/nvpair/include
+EXTRA_CFLAGS += -I@MODDIR@/unicode/include
+EXTRA_CFLAGS += -I@MODDIR@/zpios/include
+EXTRA_CFLAGS += @KERNELCPPFLAGS@
+
+obj-m := ${MODULE}.o
diff --git a/module/zpios/include/zpios-ctl.h b/module/zpios/include/zpios-ctl.h
new file mode 100644
index 000000000..3880f7794
--- /dev/null
+++ b/module/zpios/include/zpios-ctl.h
@@ -0,0 +1,197 @@
+/*
+ * This file is part of the ZFS Linux port.
+ *
+ * Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory
+ * Written by:
+ * Brian Behlendorf <[email protected]>,
+ * Herb Wartens <[email protected]>,
+ * Jim Garlick <[email protected]>
+ * LLNL-CODE-403049
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef _ZPIOS_CTL_H
+#define _ZPIOS_CTL_H
+
+/* Contains shared definitions which both the userspace
+ * and kernelspace portions of zpios must agree on.
+ */
+#ifndef _KERNEL
+#include <stdint.h>
+#endif
+
+#define ZPIOS_MAJOR 232 /* XXX - Arbitrary */
+#define ZPIOS_MINORS 1
+#define ZPIOS_NAME "zpios"
+#define ZPIOS_DEV "/dev/zpios"
+
+#define DMU_IO 0x01
+
+#define DMU_WRITE 0x0001
+#define DMU_READ 0x0002
+#define DMU_VERIFY 0x0004
+#define DMU_REMOVE 0x0008
+#define DMU_FPP 0x0010
+#define DMU_WRITE_ZC 0x0020 /* Incompatible w/DMU_VERIFY */
+#define DMU_READ_ZC 0x0040 /* Incompatible w/DMU_VERIFY */
+#define DMU_WRITE_NOWAIT 0x0080
+#define DMU_READ_NOPF 0x0100
+
+#define ZPIOS_NAME_SIZE 16
+#define ZPIOS_PATH_SIZE 128
+
+#define PHASE_PRE_RUN "pre-run"
+#define PHASE_PRE_CREATE "pre-create"
+#define PHASE_PRE_WRITE "pre-write"
+#define PHASE_PRE_READ "pre-read"
+#define PHASE_PRE_REMOVE "pre-remove"
+#define PHASE_POST_RUN "post-run"
+#define PHASE_POST_CREATE "post-create"
+#define PHASE_POST_WRITE "post-write"
+#define PHASE_POST_READ "post-read"
+#define PHASE_POST_REMOVE "post-remove"
+
+#define ZPIOS_CFG_MAGIC 0x87237190U
+typedef struct zpios_cfg {
+ uint32_t cfg_magic; /* Unique magic */
+ int32_t cfg_cmd; /* Config command */
+ int32_t cfg_arg1; /* Config command arg 1 */
+ int32_t cfg_rc1; /* Config response 1 */
+} zpios_cfg_t;
+
+typedef struct zpios_timespec {
+ uint32_t ts_sec;
+ uint32_t ts_nsec;
+} zpios_timespec_t;
+
+typedef struct zpios_time {
+ zpios_timespec_t start;
+ zpios_timespec_t stop;
+ zpios_timespec_t delta;
+} zpios_time_t;
+
+typedef struct zpios_stats {
+ zpios_time_t total_time;
+ zpios_time_t cr_time;
+ zpios_time_t rm_time;
+ zpios_time_t wr_time;
+ zpios_time_t rd_time;
+ uint64_t wr_data;
+ uint64_t wr_chunks;
+ uint64_t rd_data;
+ uint64_t rd_chunks;
+} zpios_stats_t;
+
+#define ZPIOS_CMD_MAGIC 0x49715385U
+typedef struct zpios_cmd {
+ uint32_t cmd_magic; /* Unique magic */
+ uint32_t cmd_id; /* Run ID */
+ char cmd_pool[ZPIOS_NAME_SIZE]; /* Pool name */
+ uint64_t cmd_chunk_size; /* Chunk size */
+ uint32_t cmd_thread_count; /* Thread count */
+ uint32_t cmd_region_count; /* Region count */
+ uint64_t cmd_region_size; /* Region size */
+ uint64_t cmd_offset; /* Region offset */
+ uint32_t cmd_region_noise; /* Region noise */
+ uint32_t cmd_chunk_noise; /* Chunk noise */
+ uint32_t cmd_thread_delay; /* Thread delay */
+ uint32_t cmd_flags; /* Test flags */
+ char cmd_pre[ZPIOS_PATH_SIZE]; /* Pre-exec hook */
+ char cmd_post[ZPIOS_PATH_SIZE]; /* Post-exec hook */
+ char cmd_log[ZPIOS_PATH_SIZE]; /* Requested log dir */
+ uint64_t cmd_data_size; /* Opaque data size */
+ char cmd_data_str[0]; /* Opaque data region */
+} zpios_cmd_t;
+
+/* Valid ioctls */
+#define ZPIOS_CFG _IOWR('f', 101, zpios_cfg_t)
+#define ZPIOS_CMD _IOWR('f', 102, zpios_cmd_t)
+
+/* Valid configuration commands */
+#define ZPIOS_CFG_BUFFER_CLEAR 0x001 /* Clear text buffer */
+#define ZPIOS_CFG_BUFFER_SIZE 0x002 /* Resize text buffer */
+
+#ifndef NSEC_PER_SEC
+#define NSEC_PER_SEC 1000000000L
+#endif
+
+static inline
+void zpios_timespec_normalize(zpios_timespec_t *ts, uint32_t sec, uint32_t nsec)
+{
+ while (nsec >= NSEC_PER_SEC) {
+ nsec -= NSEC_PER_SEC;
+ sec++;
+ }
+ while (nsec < 0) {
+ nsec += NSEC_PER_SEC;
+ sec--;
+ }
+ ts->ts_sec = sec;
+ ts->ts_nsec = nsec;
+}
+
+static inline
+zpios_timespec_t zpios_timespec_add(zpios_timespec_t lhs, zpios_timespec_t rhs)
+{
+ zpios_timespec_t ts_delta;
+ zpios_timespec_normalize(&ts_delta, lhs.ts_sec + rhs.ts_sec,
+ lhs.ts_nsec + rhs.ts_nsec);
+ return ts_delta;
+}
+
+static inline
+zpios_timespec_t zpios_timespec_sub(zpios_timespec_t lhs, zpios_timespec_t rhs)
+{
+ zpios_timespec_t ts_delta;
+ zpios_timespec_normalize(&ts_delta, lhs.ts_sec - rhs.ts_sec,
+ lhs.ts_nsec - rhs.ts_nsec);
+ return ts_delta;
+}
+
+#ifdef _KERNEL
+
+static inline
+zpios_timespec_t zpios_timespec_now(void)
+{
+ zpios_timespec_t zts_now;
+ struct timespec ts_now;
+
+ ts_now = current_kernel_time();
+ zts_now.ts_sec = ts_now.tv_sec;
+ zts_now.ts_nsec = ts_now.tv_nsec;
+
+ return zts_now;
+}
+
+#else
+
+static inline
+double zpios_timespec_to_double(zpios_timespec_t ts)
+{
+ return ((double)(ts.ts_sec) +
+ ((double)(ts.ts_nsec) / (double)(NSEC_PER_SEC)));
+}
+
+#endif /* _KERNEL */
+
+#endif /* _ZPIOS_CTL_H */
diff --git a/module/zpios/include/zpios-internal.h b/module/zpios/include/zpios-internal.h
new file mode 100644
index 000000000..cafe8b7d0
--- /dev/null
+++ b/module/zpios/include/zpios-internal.h
@@ -0,0 +1,137 @@
+/*
+ * This file is part of the ZFS Linux port.
+ *
+ * Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory
+ * Written by:
+ * Brian Behlendorf <[email protected]>,
+ * Herb Wartens <[email protected]>,
+ * Jim Garlick <[email protected]>
+ * LLNL-CODE-403049
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef _ZPIOS_INTERNAL_H
+#define _ZPIOS_INTERNAL_H
+
+#include "zpios-ctl.h"
+
+#define OBJ_SIZE 64
+
+struct run_args;
+
+typedef struct dmu_obj {
+ objset_t *os;
+ uint64_t obj;
+} dmu_obj_t;
+
+/* thread doing the IO data */
+typedef struct thread_data {
+ struct run_args *run_args;
+ int thread_no;
+ int rc;
+ zpios_stats_t stats;
+ kmutex_t lock;
+} thread_data_t;
+
+/* region for IO data */
+typedef struct zpios_region {
+ __u64 wr_offset;
+ __u64 rd_offset;
+ __u64 init_offset;
+ __u64 max_offset;
+ dmu_obj_t obj;
+ zpios_stats_t stats;
+ kmutex_t lock;
+} zpios_region_t;
+
+/* arguments for one run */
+typedef struct run_args {
+ /* Config args */
+ int id;
+ char pool[ZPIOS_NAME_SIZE];
+ __u64 chunk_size;
+ __u32 thread_count;
+ __u32 region_count;
+ __u64 region_size;
+ __u64 offset;
+ __u32 region_noise;
+ __u32 chunk_noise;
+ __u32 thread_delay;
+ __u32 flags;
+ char pre[ZPIOS_PATH_SIZE];
+ char post[ZPIOS_PATH_SIZE];
+ char log[ZPIOS_PATH_SIZE];
+
+ /* Control data */
+ objset_t *os;
+ wait_queue_head_t waitq;
+ volatile uint64_t threads_done;
+ kmutex_t lock_work;
+ kmutex_t lock_ctl;
+ __u32 region_next;
+
+ /* Results data */
+ struct file *file;
+ zpios_stats_t stats;
+
+ thread_data_t **threads;
+ zpios_region_t regions[0]; /* Must be last element */
+} run_args_t;
+
+#define ZPIOS_INFO_BUFFER_SIZE 65536
+#define ZPIOS_INFO_BUFFER_REDZONE 1024
+
+typedef struct zpios_info {
+ spinlock_t info_lock;
+ int info_size;
+ char *info_buffer;
+ char *info_head; /* Internal kernel use only */
+} zpios_info_t;
+
+#define zpios_print(file, format, args...) \
+({ zpios_info_t *_info_ = (zpios_info_t *)file->private_data; \
+ int _rc_; \
+ \
+ ASSERT(_info_); \
+ ASSERT(_info_->info_buffer); \
+ \
+ spin_lock(&_info_->info_lock); \
+ \
+ /* Don't allow the kernel to start a write in the red zone */ \
+ if ((int)(_info_->info_head - _info_->info_buffer) > \
+ (_info_->info_size - ZPIOS_INFO_BUFFER_REDZONE)) { \
+ _rc_ = -EOVERFLOW; \
+ } else { \
+ _rc_ = sprintf(_info_->info_head, format, args); \
+ if (_rc_ >= 0) \
+ _info_->info_head += _rc_; \
+ } \
+ \
+ spin_unlock(&_info_->info_lock); \
+ _rc_; \
+})
+
+#define zpios_vprint(file, test, format, args...) \
+ zpios_print(file, "%*s: " format, ZPIOS_NAME_SIZE, test, args)
+
+#endif /* _ZPIOS_INTERNAL_H */
diff --git a/module/zpios/zpios.c b/module/zpios/zpios.c
new file mode 100644
index 000000000..f198946d2
--- /dev/null
+++ b/module/zpios/zpios.c
@@ -0,0 +1,1297 @@
+/*
+ * This file is part of the ZFS Linux port.
+ *
+ * Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory
+ * Written by:
+ * Brian Behlendorf <[email protected]>,
+ * Herb Wartens <[email protected]>,
+ * Jim Garlick <[email protected]>
+ * LLNL-CODE-403049
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Kernel PIOS DMU implementation originally derived from PIOS test code.
+ * Character control interface derived from SPL code.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/dmu.h>
+#include <sys/txg.h>
+#include <linux/cdev.h>
+#include "zpios-internal.h"
+
+
+static spl_class *zpios_class;
+static spl_device *zpios_device;
+
+
+static
+int zpios_upcall(char *path, char *phase, run_args_t *run_args, int rc)
+{
+ /* This is stack heavy but it should be OK since we are only
+ * making the upcall between tests when the stack is shallow.
+ */
+ char id[16], chunk_size[16], region_size[16], thread_count[16];
+ char region_count[16], offset[16], region_noise[16], chunk_noise[16];
+ char thread_delay[16], flags[16], result[8];
+ char *argv[16], *envp[4];
+
+ if ((path == NULL) || (strlen(path) == 0))
+ return -ENOENT;
+
+ snprintf(id, 15, "%d", run_args->id);
+ snprintf(chunk_size, 15, "%lu", (long unsigned)run_args->chunk_size);
+ snprintf(region_size, 15, "%lu",(long unsigned) run_args->region_size);
+ snprintf(thread_count, 15, "%u", run_args->thread_count);
+ snprintf(region_count, 15, "%u", run_args->region_count);
+ snprintf(offset, 15, "%lu", (long unsigned)run_args->offset);
+ snprintf(region_noise, 15, "%u", run_args->region_noise);
+ snprintf(chunk_noise, 15, "%u", run_args->chunk_noise);
+ snprintf(thread_delay, 15, "%u", run_args->thread_delay);
+ snprintf(flags, 15, "0x%x", run_args->flags);
+ snprintf(result, 7, "%d", rc);
+
+ /* Passing 15 args to registered pre/post upcall */
+ argv[0] = path;
+ argv[1] = phase;
+ argv[2] = strlen(run_args->log) ? run_args->log : "<none>";
+ argv[3] = id;
+ argv[4] = run_args->pool;
+ argv[5] = chunk_size;
+ argv[6] = region_size;
+ argv[7] = thread_count;
+ argv[8] = region_count;
+ argv[9] = offset;
+ argv[10] = region_noise;
+ argv[11] = chunk_noise;
+ argv[12] = thread_delay;
+ argv[13] = flags;
+ argv[14] = result;
+ argv[15] = NULL;
+
+ /* Passing environment for user space upcall */
+ envp[0] = "HOME=/";
+ envp[1] = "TERM=linux";
+ envp[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin";
+ envp[3] = NULL;
+
+ return call_usermodehelper(path, argv, envp, 1);
+}
+
+static uint64_t
+zpios_dmu_object_create(run_args_t *run_args, objset_t *os)
+{
+ struct dmu_tx *tx;
+ uint64_t obj = 0ULL;
+ int rc;
+
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE);
+ rc = dmu_tx_assign(tx, TXG_WAIT);
+ if (rc) {
+ zpios_print(run_args->file,
+ "dmu_tx_assign() failed: %d\n", rc);
+ dmu_tx_abort(tx);
+ return obj;
+ }
+
+ obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
+ DMU_OT_NONE, 0, tx);
+ rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx);
+ if (rc) {
+ zpios_print(run_args->file,
+ "dmu_object_set_blocksize() failed: %d\n", rc);
+ dmu_tx_abort(tx);
+ return obj;
+ }
+
+ dmu_tx_commit(tx);
+
+ return obj;
+}
+
+static int
+zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj)
+{
+ struct dmu_tx *tx;
+ int rc;
+
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
+ rc = dmu_tx_assign(tx, TXG_WAIT);
+ if (rc) {
+ zpios_print(run_args->file,
+ "dmu_tx_assign() failed: %d\n", rc);
+ dmu_tx_abort(tx);
+ return rc;
+ }
+
+ rc = dmu_object_free(os, obj, tx);
+ if (rc) {
+ zpios_print(run_args->file,
+ "dmu_object_free() failed: %d\n", rc);
+ dmu_tx_abort(tx);
+ return rc;
+ }
+
+ dmu_tx_commit(tx);
+
+ return 0;
+}
+
+static int
+zpios_dmu_setup(run_args_t *run_args)
+{
+ zpios_time_t *t = &(run_args->stats.cr_time);
+ objset_t *os;
+ uint64_t obj = 0ULL;
+ int i, rc = 0;
+
+ (void)zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0);
+ t->start = zpios_timespec_now();
+
+ rc = dmu_objset_open(run_args->pool, DMU_OST_ZFS, DS_MODE_USER, &os);
+ if (rc) {
+ zpios_print(run_args->file, "Error dmu_objset_open() "
+ "failed: %d\n", rc);
+ goto out;
+ }
+
+ if (!(run_args->flags & DMU_FPP)) {
+ obj = zpios_dmu_object_create(run_args, os);
+ if (obj == 0) {
+ rc = -EBADF;
+ zpios_print(run_args->file, "Error zpios_dmu_"
+ "object_create() failed, %d\n", rc);
+ goto out;
+ }
+ }
+
+ for (i = 0; i < run_args->region_count; i++) {
+ zpios_region_t *region;
+
+ region = &run_args->regions[i];
+ mutex_init(&region->lock, NULL, MUTEX_DEFAULT, NULL);
+
+ if (run_args->flags & DMU_FPP) {
+ /* File per process */
+ region->obj.os = os;
+ region->obj.obj = zpios_dmu_object_create(run_args, os);
+ ASSERT(region->obj.obj > 0); /* XXX - Handle this */
+ region->wr_offset = run_args->offset;
+ region->rd_offset = run_args->offset;
+ region->init_offset = run_args->offset;
+ region->max_offset = run_args->offset +
+ run_args->region_size;
+ } else {
+ /* Single shared file */
+ region->obj.os = os;
+ region->obj.obj = obj;
+ region->wr_offset = run_args->offset * i;
+ region->rd_offset = run_args->offset * i;
+ region->init_offset = run_args->offset * i;
+ region->max_offset = run_args->offset *
+ i + run_args->region_size;
+ }
+ }
+
+ run_args->os = os;
+out:
+ t->stop = zpios_timespec_now();
+ t->delta = zpios_timespec_sub(t->stop, t->start);
+ (void)zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc);
+
+ return rc;
+}
+
+static int
+zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file)
+{
+ run_args_t *ra;
+ int rc, size;
+
+ size = sizeof(*ra) + kcmd->cmd_region_count * sizeof(zpios_region_t);
+
+ ra = vmem_zalloc(size, KM_SLEEP);
+ if (ra == NULL) {
+ zpios_print(file, "Unable to vmem_zalloc() %d bytes "
+ "for regions\n", size);
+ return -ENOMEM;
+ }
+
+ *run_args = ra;
+ strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1);
+ strncpy(ra->pre, kcmd->cmd_pre, ZPIOS_PATH_SIZE - 1);
+ strncpy(ra->post, kcmd->cmd_post, ZPIOS_PATH_SIZE - 1);
+ strncpy(ra->log, kcmd->cmd_log, ZPIOS_PATH_SIZE - 1);
+ ra->id = kcmd->cmd_id;
+ ra->chunk_size = kcmd->cmd_chunk_size;
+ ra->thread_count = kcmd->cmd_thread_count;
+ ra->region_count = kcmd->cmd_region_count;
+ ra->region_size = kcmd->cmd_region_size;
+ ra->offset = kcmd->cmd_offset;
+ ra->region_noise = kcmd->cmd_region_noise;
+ ra->chunk_noise = kcmd->cmd_chunk_noise;
+ ra->thread_delay = kcmd->cmd_thread_delay;
+ ra->flags = kcmd->cmd_flags;
+ ra->stats.wr_data = 0;
+ ra->stats.wr_chunks = 0;
+ ra->stats.rd_data = 0;
+ ra->stats.rd_chunks = 0;
+ ra->region_next = 0;
+ ra->file = file;
+ mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL);
+
+ (void)zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0);
+
+ rc = zpios_dmu_setup(ra);
+ if (rc) {
+ mutex_destroy(&ra->lock_ctl);
+ mutex_destroy(&ra->lock_work);
+ vmem_free(ra, size);
+ *run_args = NULL;
+ }
+
+ return rc;
+}
+
+static int
+zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset,
+ __u32 *chunk_size, zpios_region_t **region, __u32 flags)
+{
+ int i, j, count = 0;
+ unsigned int random_int;
+
+ get_random_bytes(&random_int, sizeof(unsigned int));
+
+ mutex_enter(&run_args->lock_work);
+ i = run_args->region_next;
+
+ /* XXX: I don't much care for this chunk selection mechansim
+ * there's the potential to burn a lot of time here doing nothing
+ * useful while holding the global lock. This could give some
+ * misleading performance results. I'll fix it latter.
+ */
+ while (count < run_args->region_count) {
+ __u64 *rw_offset;
+ zpios_time_t *rw_time;
+
+ j = i % run_args->region_count;
+ *region = &(run_args->regions[j]);
+
+ if (flags & DMU_WRITE) {
+ rw_offset = &((*region)->wr_offset);
+ rw_time = &((*region)->stats.wr_time);
+ } else {
+ rw_offset = &((*region)->rd_offset);
+ rw_time = &((*region)->stats.rd_time);
+ }
+
+ /* test if region is fully written */
+ if (*rw_offset + *chunk_size > (*region)->max_offset) {
+ i++;
+ count++;
+
+ if (unlikely(rw_time->stop.ts_sec == 0) &&
+ unlikely(rw_time->stop.ts_nsec == 0))
+ rw_time->stop = zpios_timespec_now();
+
+ continue;
+ }
+
+ *offset = *rw_offset;
+ *obj = (*region)->obj;
+ *rw_offset += *chunk_size;
+
+ /* update ctl structure */
+ if (run_args->region_noise) {
+ get_random_bytes(&random_int, sizeof(unsigned int));
+ run_args->region_next += random_int % run_args->region_noise;
+ } else {
+ run_args->region_next++;
+ }
+
+ mutex_exit(&run_args->lock_work);
+ return 1;
+ }
+
+ /* nothing left to do */
+ mutex_exit(&run_args->lock_work);
+
+ return 0;
+}
+
+static void
+zpios_remove_objects(run_args_t *run_args)
+{
+ zpios_time_t *t = &(run_args->stats.rm_time);
+ zpios_region_t *region;
+ int rc = 0, i;
+
+ (void)zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0);
+ t->start = zpios_timespec_now();
+
+ if (run_args->flags & DMU_REMOVE) {
+ if (run_args->flags & DMU_FPP) {
+ for (i = 0; i < run_args->region_count; i++) {
+ region = &run_args->regions[i];
+ rc = zpios_dmu_object_free(run_args,
+ region->obj.os,
+ region->obj.obj);
+ if (rc)
+ zpios_print(run_args->file, "Error "
+ "removing object %d, %d\n",
+ (int)region->obj.obj, rc);
+ }
+ } else {
+ region = &run_args->regions[0];
+ rc = zpios_dmu_object_free(run_args,
+ region->obj.os,
+ region->obj.obj);
+ if (rc)
+ zpios_print(run_args->file, "Error "
+ "removing object %d, %d\n",
+ (int)region->obj.obj, rc);
+ }
+ }
+
+ dmu_objset_close(run_args->os);
+
+ t->stop = zpios_timespec_now();
+ t->delta = zpios_timespec_sub(t->stop, t->start);
+ (void)zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc);
+}
+
+static void
+zpios_cleanup_run(run_args_t *run_args)
+{
+ int i, size = 0;
+
+ if (run_args == NULL)
+ return;
+
+ if (run_args->threads != NULL) {
+ for (i = 0; i < run_args->thread_count; i++) {
+ if (run_args->threads[i]) {
+ mutex_destroy(&run_args->threads[i]->lock);
+ kmem_free(run_args->threads[i],
+ sizeof(thread_data_t));
+ }
+ }
+
+ kmem_free(run_args->threads,
+ sizeof(thread_data_t *) * run_args->thread_count);
+ }
+
+ for (i = 0; i < run_args->region_count; i++)
+ mutex_destroy(&run_args->regions[i].lock);
+
+ mutex_destroy(&run_args->lock_work);
+ mutex_destroy(&run_args->lock_ctl);
+ size = run_args->region_count * sizeof(zpios_region_t);
+
+ vmem_free(run_args, sizeof(*run_args) + size);
+}
+
+static int
+zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object,
+ uint64_t offset, uint64_t size, const void *buf)
+{
+ struct dmu_tx *tx;
+ int rc, how = TXG_WAIT;
+ int flags = 0;
+
+ if (run_args->flags & DMU_WRITE_NOWAIT)
+ how = TXG_NOWAIT;
+
+ while (1) {
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_write(tx, object, offset, size);
+ rc = dmu_tx_assign(tx, how);
+
+ if (rc) {
+ if (rc == ERESTART && how == TXG_NOWAIT) {
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
+ continue;
+ }
+ zpios_print(run_args->file,
+ "Error in dmu_tx_assign(), %d", rc);
+ dmu_tx_abort(tx);
+ return rc;
+ }
+ break;
+ }
+
+ if (run_args->flags & DMU_WRITE_ZC)
+ flags |= DMU_WRITE_ZEROCOPY;
+
+ dmu_write_impl(os, object, offset, size, buf, tx, flags);
+ dmu_tx_commit(tx);
+
+ return 0;
+}
+
+static int
+zpios_dmu_read(run_args_t *run_args, objset_t *os, uint64_t object,
+ uint64_t offset, uint64_t size, void *buf)
+{
+ int flags = 0;
+
+ if (run_args->flags & DMU_READ_ZC)
+ flags |= DMU_READ_ZEROCOPY;
+
+ if (run_args->flags & DMU_READ_NOPF)
+ flags |= DMU_READ_NO_PREFETCH;
+
+ return dmu_read(os, object, offset, size, buf, flags);
+}
+
+static int
+zpios_thread_main(void *data)
+{
+ thread_data_t *thr = (thread_data_t *)data;
+ run_args_t *run_args = thr->run_args;
+ zpios_time_t t;
+ dmu_obj_t obj;
+ __u64 offset;
+ __u32 chunk_size;
+ zpios_region_t *region;
+ char *buf;
+ unsigned int random_int;
+ int chunk_noise = run_args->chunk_noise;
+ int chunk_noise_tmp = 0;
+ int thread_delay = run_args->thread_delay;
+ int thread_delay_tmp = 0;
+ int i, rc = 0;
+
+ if (chunk_noise) {
+ get_random_bytes(&random_int, sizeof(unsigned int));
+ chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise;
+ }
+
+ /* It's OK to vmem_alloc() this memory because it will be copied
+ * in to the slab and pointers to the slab copy will be setup in
+ * the bio when the IO is submitted. This of course is not ideal
+ * since we want a zero-copy IO path if possible. It would be nice
+ * to have direct access to those slab entries.
+ */
+ chunk_size = run_args->chunk_size + chunk_noise_tmp;
+ buf = (char *)vmem_alloc(chunk_size, KM_SLEEP);
+ ASSERT(buf);
+
+ /* Trivial data verification pattern for now. */
+ if (run_args->flags & DMU_VERIFY)
+ memset(buf, 'z', chunk_size);
+
+ /* Write phase */
+ mutex_enter(&thr->lock);
+ thr->stats.wr_time.start = zpios_timespec_now();
+ mutex_exit(&thr->lock);
+
+ while (zpios_get_work_item(run_args, &obj, &offset,
+ &chunk_size, &region, DMU_WRITE)) {
+ if (thread_delay) {
+ get_random_bytes(&random_int, sizeof(unsigned int));
+ thread_delay_tmp = random_int % thread_delay;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(thread_delay_tmp); /* In jiffies */
+ }
+
+ t.start = zpios_timespec_now();
+ rc = zpios_dmu_write(run_args, obj.os, obj.obj,
+ offset, chunk_size, buf);
+ t.stop = zpios_timespec_now();
+ t.delta = zpios_timespec_sub(t.stop, t.start);
+
+ if (rc) {
+ zpios_print(run_args->file, "IO error while doing "
+ "dmu_write(): %d\n", rc);
+ break;
+ }
+
+ mutex_enter(&thr->lock);
+ thr->stats.wr_data += chunk_size;
+ thr->stats.wr_chunks++;
+ thr->stats.wr_time.delta = zpios_timespec_add(
+ thr->stats.wr_time.delta, t.delta);
+ mutex_exit(&thr->lock);
+
+ mutex_enter(&region->lock);
+ region->stats.wr_data += chunk_size;
+ region->stats.wr_chunks++;
+ region->stats.wr_time.delta = zpios_timespec_add(
+ region->stats.wr_time.delta, t.delta);
+
+ /* First time region was accessed */
+ if (region->init_offset == offset)
+ region->stats.wr_time.start = t.start;
+
+ mutex_exit(&region->lock);
+ }
+
+ mutex_enter(&run_args->lock_ctl);
+ run_args->threads_done++;
+ mutex_exit(&run_args->lock_ctl);
+
+ mutex_enter(&thr->lock);
+ thr->rc = rc;
+ thr->stats.wr_time.stop = zpios_timespec_now();
+ mutex_exit(&thr->lock);
+ wake_up(&run_args->waitq);
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule();
+
+ /* Check if we should exit */
+ mutex_enter(&thr->lock);
+ rc = thr->rc;
+ mutex_exit(&thr->lock);
+ if (rc)
+ goto out;
+
+ /* Read phase */
+ mutex_enter(&thr->lock);
+ thr->stats.rd_time.start = zpios_timespec_now();
+ mutex_exit(&thr->lock);
+
+ while (zpios_get_work_item(run_args, &obj, &offset,
+ &chunk_size, &region, DMU_READ)) {
+ if (thread_delay) {
+ get_random_bytes(&random_int, sizeof(unsigned int));
+ thread_delay_tmp = random_int % thread_delay;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(thread_delay_tmp); /* In jiffies */
+ }
+
+ if (run_args->flags & DMU_VERIFY)
+ memset(buf, 0, chunk_size);
+
+ t.start = zpios_timespec_now();
+ rc = zpios_dmu_read(run_args, obj.os, obj.obj,
+ offset, chunk_size, buf);
+ t.stop = zpios_timespec_now();
+ t.delta = zpios_timespec_sub(t.stop, t.start);
+
+ if (rc) {
+ zpios_print(run_args->file, "IO error while doing "
+ "dmu_read(): %d\n", rc);
+ break;
+ }
+
+ /* Trivial data verification, expensive! */
+ if (run_args->flags & DMU_VERIFY) {
+ for (i = 0; i < chunk_size; i++) {
+ if (buf[i] != 'z') {
+ zpios_print(run_args->file,
+ "IO verify error: %d/%d/%d\n",
+ (int)obj.obj, (int)offset,
+ (int)chunk_size);
+ break;
+ }
+ }
+ }
+
+ mutex_enter(&thr->lock);
+ thr->stats.rd_data += chunk_size;
+ thr->stats.rd_chunks++;
+ thr->stats.rd_time.delta = zpios_timespec_add(
+ thr->stats.rd_time.delta, t.delta);
+ mutex_exit(&thr->lock);
+
+ mutex_enter(&region->lock);
+ region->stats.rd_data += chunk_size;
+ region->stats.rd_chunks++;
+ region->stats.rd_time.delta = zpios_timespec_add(
+ region->stats.rd_time.delta, t.delta);
+
+ /* First time region was accessed */
+ if (region->init_offset == offset)
+ region->stats.rd_time.start = t.start;
+
+ mutex_exit(&region->lock);
+ }
+
+ mutex_enter(&run_args->lock_ctl);
+ run_args->threads_done++;
+ mutex_exit(&run_args->lock_ctl);
+
+ mutex_enter(&thr->lock);
+ thr->rc = rc;
+ thr->stats.rd_time.stop = zpios_timespec_now();
+ mutex_exit(&thr->lock);
+ wake_up(&run_args->waitq);
+
+out:
+ vmem_free(buf, chunk_size);
+ do_exit(0);
+
+ return rc; /* Unreachable, due to do_exit() */
+}
+
+static int
+zpios_thread_done(run_args_t *run_args)
+{
+ ASSERT(run_args->threads_done <= run_args->thread_count);
+ return (run_args->threads_done == run_args->thread_count);
+}
+
+static int
+zpios_threads_run(run_args_t *run_args)
+{
+ struct task_struct *tsk, **tsks;
+ thread_data_t *thr = NULL;
+ zpios_time_t *tt = &(run_args->stats.total_time);
+ zpios_time_t *tw = &(run_args->stats.wr_time);
+ zpios_time_t *tr = &(run_args->stats.rd_time);
+ int i, rc = 0, tc = run_args->thread_count;
+
+ tsks = kmem_zalloc(sizeof(struct task_struct *) * tc, KM_SLEEP);
+ if (tsks == NULL) {
+ rc = -ENOMEM;
+ goto cleanup2;
+ }
+
+ run_args->threads = kmem_zalloc(sizeof(thread_data_t *) * tc, KM_SLEEP);
+ if (run_args->threads == NULL) {
+ rc = -ENOMEM;
+ goto cleanup;
+ }
+
+ init_waitqueue_head(&run_args->waitq);
+ run_args->threads_done = 0;
+
+ /* Create all the needed threads which will sleep until awoken */
+ for (i = 0; i < tc; i++) {
+ thr = kmem_zalloc(sizeof(thread_data_t), KM_SLEEP);
+ if (thr == NULL) {
+ rc = -ENOMEM;
+ goto taskerr;
+ }
+
+ thr->thread_no = i;
+ thr->run_args = run_args;
+ thr->rc = 0;
+ mutex_init(&thr->lock, NULL, MUTEX_DEFAULT, NULL);
+ run_args->threads[i] = thr;
+
+ tsk = kthread_create(zpios_thread_main, (void *)thr,
+ "%s/%d", "zpios_io", i);
+ if (IS_ERR(tsk)) {
+ rc = -EINVAL;
+ goto taskerr;
+ }
+
+ tsks[i] = tsk;
+ }
+
+ tt->start = zpios_timespec_now();
+
+ /* Wake up all threads for write phase */
+ (void)zpios_upcall(run_args->pre, PHASE_PRE_WRITE, run_args, 0);
+ for (i = 0; i < tc; i++)
+ wake_up_process(tsks[i]);
+
+ /* Wait for write phase to complete */
+ tw->start = zpios_timespec_now();
+ wait_event(run_args->waitq, zpios_thread_done(run_args));
+ tw->stop = zpios_timespec_now();
+ (void)zpios_upcall(run_args->post, PHASE_POST_WRITE, run_args, rc);
+
+ for (i = 0; i < tc; i++) {
+ thr = run_args->threads[i];
+
+ mutex_enter(&thr->lock);
+
+ if (!rc && thr->rc)
+ rc = thr->rc;
+
+ run_args->stats.wr_data += thr->stats.wr_data;
+ run_args->stats.wr_chunks += thr->stats.wr_chunks;
+ mutex_exit(&thr->lock);
+ }
+
+ if (rc) {
+ /* Wake up all threads and tell them to exit */
+ for (i = 0; i < tc; i++) {
+ mutex_enter(&thr->lock);
+ thr->rc = rc;
+ mutex_exit(&thr->lock);
+
+ wake_up_process(tsks[i]);
+ }
+ goto out;
+ }
+
+ mutex_enter(&run_args->lock_ctl);
+ ASSERT(run_args->threads_done == run_args->thread_count);
+ run_args->threads_done = 0;
+ mutex_exit(&run_args->lock_ctl);
+
+ /* Wake up all threads for read phase */
+ (void)zpios_upcall(run_args->pre, PHASE_PRE_READ, run_args, 0);
+ for (i = 0; i < tc; i++)
+ wake_up_process(tsks[i]);
+
+ /* Wait for read phase to complete */
+ tr->start = zpios_timespec_now();
+ wait_event(run_args->waitq, zpios_thread_done(run_args));
+ tr->stop = zpios_timespec_now();
+ (void)zpios_upcall(run_args->post, PHASE_POST_READ, run_args, rc);
+
+ for (i = 0; i < tc; i++) {
+ thr = run_args->threads[i];
+
+ mutex_enter(&thr->lock);
+
+ if (!rc && thr->rc)
+ rc = thr->rc;
+
+ run_args->stats.rd_data += thr->stats.rd_data;
+ run_args->stats.rd_chunks += thr->stats.rd_chunks;
+ mutex_exit(&thr->lock);
+ }
+out:
+ tt->stop = zpios_timespec_now();
+ tt->delta = zpios_timespec_sub(tt->stop, tt->start);
+ tw->delta = zpios_timespec_sub(tw->stop, tw->start);
+ tr->delta = zpios_timespec_sub(tr->stop, tr->start);
+
+cleanup:
+ kmem_free(tsks, sizeof(struct task_struct *) * tc);
+cleanup2:
+ /* Returns first encountered thread error (if any) */
+ return rc;
+
+taskerr:
+ /* Destroy all threads that were created successfully */
+ for (i = 0; i < tc; i++)
+ if (tsks[i] != NULL)
+ (void) kthread_stop(tsks[i]);
+
+ goto cleanup;
+}
+
+static int
+zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd,
+ int data_size, void *data)
+{
+ run_args_t *run_args = { 0 };
+ zpios_stats_t *stats = (zpios_stats_t *)data;
+ int i, n, m, size, rc;
+
+ if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) ||
+ (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) {
+ zpios_print(file, "Invalid chunk_size, region_size, "
+ "thread_count, or region_count, %d\n", -EINVAL);
+ return -EINVAL;
+ }
+
+ if (!(kcmd->cmd_flags & DMU_WRITE) ||
+ !(kcmd->cmd_flags & DMU_READ)) {
+ zpios_print(file, "Invalid flags, minimally DMU_WRITE "
+ "and DMU_READ must be set, %d\n", -EINVAL);
+ return -EINVAL;
+ }
+
+ if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) &&
+ (kcmd->cmd_flags & DMU_VERIFY)) {
+ zpios_print(file, "Invalid flags, DMU_*_ZC incompatible "
+ "with DMU_VERIFY, used for performance analysis "
+ "only, %d\n", -EINVAL);
+ return -EINVAL;
+ }
+
+ /* Opaque data on return contains structs of the following form:
+ *
+ * zpios_stat_t stats[];
+ * stats[0] = run_args->stats;
+ * stats[1-N] = threads[N]->stats;
+ * stats[N+1-M] = regions[M]->stats;
+ *
+ * Where N is the number of threads, and M is the number of regions.
+ */
+ size = (sizeof(zpios_stats_t) +
+ (kcmd->cmd_thread_count * sizeof(zpios_stats_t)) +
+ (kcmd->cmd_region_count * sizeof(zpios_stats_t)));
+ if (data_size < size) {
+ zpios_print(file, "Invalid size, command data buffer "
+ "size too small, (%d < %d)\n", data_size, size);
+ return -ENOSPC;
+ }
+
+ rc = zpios_setup_run(&run_args, kcmd, file);
+ if (rc)
+ return rc;
+
+ rc = zpios_threads_run(run_args);
+ zpios_remove_objects(run_args);
+ if (rc)
+ goto cleanup;
+
+ if (stats) {
+ n = 1;
+ m = 1 + kcmd->cmd_thread_count;
+ stats[0] = run_args->stats;
+
+ for (i = 0; i < kcmd->cmd_thread_count; i++)
+ stats[n+i] = run_args->threads[i]->stats;
+
+ for (i = 0; i < kcmd->cmd_region_count; i++)
+ stats[m+i] = run_args->regions[i].stats;
+ }
+
+cleanup:
+ zpios_cleanup_run(run_args);
+
+ (void)zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0);
+
+ return rc;
+}
+
+static int
+zpios_open(struct inode *inode, struct file *file)
+{
+ unsigned int minor = iminor(inode);
+ zpios_info_t *info;
+
+ if (minor >= ZPIOS_MINORS)
+ return -ENXIO;
+
+ info = (zpios_info_t *)kmem_alloc(sizeof(*info), KM_SLEEP);
+ if (info == NULL)
+ return -ENOMEM;
+
+ spin_lock_init(&info->info_lock);
+ info->info_size = ZPIOS_INFO_BUFFER_SIZE;
+ info->info_buffer = (char *)vmem_alloc(ZPIOS_INFO_BUFFER_SIZE,KM_SLEEP);
+ if (info->info_buffer == NULL) {
+ kmem_free(info, sizeof(*info));
+ return -ENOMEM;
+ }
+
+ info->info_head = info->info_buffer;
+ file->private_data = (void *)info;
+
+ return 0;
+}
+
+static int
+zpios_release(struct inode *inode, struct file *file)
+{
+ unsigned int minor = iminor(inode);
+ zpios_info_t *info = (zpios_info_t *)file->private_data;
+
+ if (minor >= ZPIOS_MINORS)
+ return -ENXIO;
+
+ ASSERT(info);
+ ASSERT(info->info_buffer);
+
+ vmem_free(info->info_buffer, ZPIOS_INFO_BUFFER_SIZE);
+ kmem_free(info, sizeof(*info));
+
+ return 0;
+}
+
+static int
+zpios_buffer_clear(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
+{
+ zpios_info_t *info = (zpios_info_t *)file->private_data;
+
+ ASSERT(info);
+ ASSERT(info->info_buffer);
+
+ spin_lock(&info->info_lock);
+ memset(info->info_buffer, 0, info->info_size);
+ info->info_head = info->info_buffer;
+ spin_unlock(&info->info_lock);
+
+ return 0;
+}
+
+static int
+zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
+{
+ zpios_info_t *info = (zpios_info_t *)file->private_data;
+ char *buf;
+ int min, size, rc = 0;
+
+ ASSERT(info);
+ ASSERT(info->info_buffer);
+
+ spin_lock(&info->info_lock);
+ if (kcfg->cfg_arg1 > 0) {
+
+ size = kcfg->cfg_arg1;
+ buf = (char *)vmem_alloc(size, KM_SLEEP);
+ if (buf == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Zero fill and truncate contents when coping buffer */
+ min = ((size < info->info_size) ? size : info->info_size);
+ memset(buf, 0, size);
+ memcpy(buf, info->info_buffer, min);
+ vmem_free(info->info_buffer, info->info_size);
+ info->info_size = size;
+ info->info_buffer = buf;
+ info->info_head = info->info_buffer;
+ }
+
+ kcfg->cfg_rc1 = info->info_size;
+
+ if (copy_to_user((struct zpios_cfg_t __user *)arg, kcfg, sizeof(*kcfg)))
+ rc = -EFAULT;
+out:
+ spin_unlock(&info->info_lock);
+
+ return rc;
+}
+
+static int
+zpios_ioctl_cfg(struct file *file, unsigned long arg)
+{
+ zpios_cfg_t kcfg;
+ int rc = 0;
+
+ if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof(kcfg)))
+ return -EFAULT;
+
+ if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) {
+ zpios_print(file, "Bad config magic 0x%x != 0x%x\n",
+ kcfg.cfg_magic, ZPIOS_CFG_MAGIC);
+ return -EINVAL;
+ }
+
+ switch (kcfg.cfg_cmd) {
+ case ZPIOS_CFG_BUFFER_CLEAR:
+ /* cfg_arg1 - Unused
+ * cfg_rc1 - Unused
+ */
+ rc = zpios_buffer_clear(file, &kcfg, arg);
+ break;
+ case ZPIOS_CFG_BUFFER_SIZE:
+ /* cfg_arg1 - 0 - query size; >0 resize
+ * cfg_rc1 - Set to current buffer size
+ */
+ rc = zpios_buffer_size(file, &kcfg, arg);
+ break;
+ default:
+ zpios_print(file, "Bad config command %d\n",
+ kcfg.cfg_cmd);
+ rc = -EINVAL;
+ break;
+ }
+
+ return rc;
+}
+
+static int
+zpios_ioctl_cmd(struct file *file, unsigned long arg)
+{
+ zpios_cmd_t kcmd;
+ int rc = -EINVAL;
+ void *data = NULL;
+
+ rc = copy_from_user(&kcmd, (zpios_cfg_t *)arg, sizeof(kcmd));
+ if (rc) {
+ zpios_print(file, "Unable to copy command structure "
+ "from user to kernel memory, %d\n", rc);
+ return -EFAULT;
+ }
+
+ if (kcmd.cmd_magic != ZPIOS_CMD_MAGIC) {
+ zpios_print(file, "Bad command magic 0x%x != 0x%x\n",
+ kcmd.cmd_magic, ZPIOS_CFG_MAGIC);
+ return -EINVAL;
+ }
+
+ /* Allocate memory for any opaque data the caller needed to pass on */
+ if (kcmd.cmd_data_size > 0) {
+ data = (void *)vmem_alloc(kcmd.cmd_data_size, KM_SLEEP);
+ if (data == NULL) {
+ zpios_print(file, "Unable to vmem_alloc() %ld "
+ "bytes for data buffer\n",
+ (long)kcmd.cmd_data_size);
+ return -ENOMEM;
+ }
+
+ rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t,
+ cmd_data_str)), kcmd.cmd_data_size);
+ if (rc) {
+ zpios_print(file, "Unable to copy data buffer "
+ "from user to kernel memory, %d\n", rc);
+ vmem_free(data, kcmd.cmd_data_size);
+ return -EFAULT;
+ }
+ }
+
+ rc = zpios_do_one_run(file, &kcmd, kcmd.cmd_data_size, data);
+
+ if (data != NULL) {
+ /* If the test failed do not print out the stats */
+ if (rc)
+ goto cleanup;
+
+ rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t,
+ cmd_data_str)), data, kcmd.cmd_data_size);
+ if (rc) {
+ zpios_print(file, "Unable to copy data buffer "
+ "from kernel to user memory, %d\n", rc);
+ rc = -EFAULT;
+ }
+
+cleanup:
+ vmem_free(data, kcmd.cmd_data_size);
+ }
+
+ return rc;
+}
+
+static int
+zpios_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ unsigned int minor = iminor(inode);
+ int rc = 0;
+
+ /* Ignore tty ioctls */
+ if ((cmd & 0xffffff00) == ((int)'T') << 8)
+ return -ENOTTY;
+
+ if (minor >= ZPIOS_MINORS)
+ return -ENXIO;
+
+ switch (cmd) {
+ case ZPIOS_CFG:
+ rc = zpios_ioctl_cfg(file, arg);
+ break;
+ case ZPIOS_CMD:
+ rc = zpios_ioctl_cmd(file, arg);
+ break;
+ default:
+ zpios_print(file, "Bad ioctl command %d\n", cmd);
+ rc = -EINVAL;
+ break;
+ }
+
+ return rc;
+}
+
+#ifdef CONFIG_COMPAT
+/* Compatibility handler for ioctls from 32-bit ELF binaries */
+static long
+zpios_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ return zpios_ioctl(file->f_dentry->d_inode, file, cmd, arg);
+}
+#endif /* CONFIG_COMPAT */
+
+/* I'm not sure why you would want to write in to this buffer from
+ * user space since its principle use is to pass test status info
+ * back to the user space, but I don't see any reason to prevent it.
+ */
+static ssize_t
+zpios_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned int minor = iminor(file->f_dentry->d_inode);
+ zpios_info_t *info = (zpios_info_t *)file->private_data;
+ int rc = 0;
+
+ if (minor >= ZPIOS_MINORS)
+ return -ENXIO;
+
+ ASSERT(info);
+ ASSERT(info->info_buffer);
+
+ spin_lock(&info->info_lock);
+
+ /* Write beyond EOF */
+ if (*ppos >= info->info_size) {
+ rc = -EFBIG;
+ goto out;
+ }
+
+ /* Resize count if beyond EOF */
+ if (*ppos + count > info->info_size)
+ count = info->info_size - *ppos;
+
+ if (copy_from_user(info->info_buffer, buf, count)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ *ppos += count;
+ rc = count;
+out:
+ spin_unlock(&info->info_lock);
+ return rc;
+}
+
+static ssize_t
+zpios_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned int minor = iminor(file->f_dentry->d_inode);
+ zpios_info_t *info = (zpios_info_t *)file->private_data;
+ int rc = 0;
+
+ if (minor >= ZPIOS_MINORS)
+ return -ENXIO;
+
+ ASSERT(info);
+ ASSERT(info->info_buffer);
+
+ spin_lock(&info->info_lock);
+
+ /* Read beyond EOF */
+ if (*ppos >= info->info_size)
+ goto out;
+
+ /* Resize count if beyond EOF */
+ if (*ppos + count > info->info_size)
+ count = info->info_size - *ppos;
+
+ if (copy_to_user(buf, info->info_buffer + *ppos, count)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ *ppos += count;
+ rc = count;
+out:
+ spin_unlock(&info->info_lock);
+ return rc;
+}
+
+static loff_t zpios_seek(struct file *file, loff_t offset, int origin)
+{
+ unsigned int minor = iminor(file->f_dentry->d_inode);
+ zpios_info_t *info = (zpios_info_t *)file->private_data;
+ int rc = -EINVAL;
+
+ if (minor >= ZPIOS_MINORS)
+ return -ENXIO;
+
+ ASSERT(info);
+ ASSERT(info->info_buffer);
+
+ spin_lock(&info->info_lock);
+
+ switch (origin) {
+ case 0: /* SEEK_SET - No-op just do it */
+ break;
+ case 1: /* SEEK_CUR - Seek from current */
+ offset = file->f_pos + offset;
+ break;
+ case 2: /* SEEK_END - Seek from end */
+ offset = info->info_size + offset;
+ break;
+ }
+
+ if (offset >= 0) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ rc = offset;
+ }
+
+ spin_unlock(&info->info_lock);
+
+ return rc;
+}
+
+static struct cdev zpios_cdev;
+static struct file_operations zpios_fops = {
+ .owner = THIS_MODULE,
+ .open = zpios_open,
+ .release = zpios_release,
+ .ioctl = zpios_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = zpios_compat_ioctl,
+#endif
+ .read = zpios_read,
+ .write = zpios_write,
+ .llseek = zpios_seek,
+};
+
+static int
+zpios_init(void)
+{
+ dev_t dev;
+ int rc;
+
+ dev = MKDEV(ZPIOS_MAJOR, 0);
+ if ((rc = register_chrdev_region(dev, ZPIOS_MINORS, ZPIOS_NAME)))
+ goto error;
+
+ /* Support for registering a character driver */
+ cdev_init(&zpios_cdev, &zpios_fops);
+ zpios_cdev.owner = THIS_MODULE;
+ kobject_set_name(&zpios_cdev.kobj, ZPIOS_NAME);
+ if ((rc = cdev_add(&zpios_cdev, dev, ZPIOS_MINORS))) {
+ printk(KERN_ERR "ZPIOS: Error adding cdev, %d\n", rc);
+ kobject_put(&zpios_cdev.kobj);
+ unregister_chrdev_region(dev, ZPIOS_MINORS);
+ goto error;
+ }
+
+ /* Support for udev make driver info available in sysfs */
+ zpios_class = spl_class_create(THIS_MODULE, ZPIOS_NAME);
+ if (IS_ERR(zpios_class)) {
+ rc = PTR_ERR(zpios_class);
+ printk(KERN_ERR "ZPIOS: Error creating zpios class, %d\n", rc);
+ cdev_del(&zpios_cdev);
+ unregister_chrdev_region(dev, ZPIOS_MINORS);
+ goto error;
+ }
+
+ zpios_device = spl_device_create(zpios_class, NULL,
+ dev, NULL, ZPIOS_NAME);
+ return 0;
+error:
+ printk(KERN_ERR "ZPIOS: Error registering zpios device, %d\n", rc);
+ return rc;
+}
+
+static int
+zpios_fini(void)
+{
+ dev_t dev = MKDEV(ZPIOS_MAJOR, 0);
+
+ spl_device_destroy(zpios_class, zpios_device, dev);
+ spl_class_destroy(zpios_class);
+ cdev_del(&zpios_cdev);
+ unregister_chrdev_region(dev, ZPIOS_MINORS);
+
+ return 0;
+}
+
+spl_module_init(zpios_init);
+spl_module_exit(zpios_fini);
+
+MODULE_AUTHOR("LLNL / Sun");
+MODULE_DESCRIPTION("Kernel PIOS implementation");
+MODULE_LICENSE("GPL");
diff --git a/scripts/Makefile.am b/scripts/Makefile.am
index a1dfc3871..ed6e5028f 100644
--- a/scripts/Makefile.am
+++ b/scripts/Makefile.am
@@ -4,11 +4,17 @@ nobase_pkglibexec_SCRIPTS += zconfig.sh
nobase_pkglibexec_SCRIPTS += zfs.sh
nobase_pkglibexec_SCRIPTS += zpool-create.sh
nobase_pkglibexec_SCRIPTS += zpool-config/*
+nobase_pkglibexec_SCRIPTS += zpios.sh
+nobase_pkglibexec_SCRIPTS += zpios-sanity.sh
+nobase_pkglibexec_SCRIPTS += zpios-survey.sh
+nobase_pkglibexec_SCRIPTS += zpios-test/*
+nobase_pkglibexec_SCRIPTS += zpios-profile/*
EXTRA_DIST = zfs-update.sh $(nobase_pkglibexec_SCRIPTS)
ZFS=${top_srcdir}/scripts/zfs.sh
ZCONFIG=${top_srcdir}/scripts/zconfig.sh
ZTEST=${top_builddir}/cmd/ztest/ztest
+ZPIOS_SANITY=${top_srcdir}/scripts/zpios-sanity.sh
check:
@echo
@@ -27,3 +33,11 @@ check:
@echo
@$(ZCONFIG)
@echo
+ @echo -n "===================================="
+ @echo -n " ZPIOS "
+ @echo "===================================="
+ @echo
+ @$(ZFS)
+ @$(ZPIOS_SANITY)
+ @$(ZFS) -u
+ @echo
diff --git a/scripts/common.sh b/scripts/common.sh
index a5cfb5380..a840befc1 100755
--- a/scripts/common.sh
+++ b/scripts/common.sh
@@ -27,16 +27,21 @@ RAIDZ2S=()
UDEVDIR=${UDEVDIR:-/usr/libexec/zfs/udev-rules}
ZPOOLDIR=${ZPOOLDIR:-/usr/libexec/zfs/zpool-config}
+ZPIOSDIR=${ZPIOSDIR:-/usr/libexec/zfs/zpios-test}
+ZPIOSPROFILEDIR=${ZPIOSPROFILEDIR:-/usr/libexec/zfs/zpios-profile}
ZDB=${ZDB:-/usr/sbin/zdb}
ZFS=${ZFS:-/usr/sbin/zfs}
ZINJECT=${ZINJECT:-/usr/sbin/zinject}
ZPOOL=${ZPOOL:-/usr/sbin/zpool}
ZTEST=${ZTEST:-/usr/sbin/ztest}
+ZPIOS=${ZPIOS:-/usr/sbin/zpios}
COMMON_SH=${COMMON_SH:-/usr/libexec/zfs/common.sh}
ZFS_SH=${ZFS_SH:-/usr/libexec/zfs/zfs.sh}
ZPOOL_CREATE_SH=${ZPOOL_CREATE_SH:-/usr/libexec/zfs/zpool-create.sh}
+ZPIOS_SH=${ZPIOS_SH:-/usr/libexec/zfs/zpios.sh}
+ZPIOS_SURVEY_SH=${ZPIOS_SURVEY_SH:-/usr/libexec/zfs/zpios-survey.sh}
LDMOD=${LDMOD:-/sbin/modprobe}
LSMOD=${LSMOD:-/sbin/lsmod}
diff --git a/scripts/zpios-profile/zpios-profile-disk.sh b/scripts/zpios-profile/zpios-profile-disk.sh
new file mode 100755
index 000000000..b56ee1ee4
--- /dev/null
+++ b/scripts/zpios-profile/zpios-profile-disk.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+#
+# /proc/diskinfo <after skipping major/minor>
+# Field 1 -- device name
+# Field 2 -- # of reads issued
+# Field 3 -- # of reads merged
+# Field 4 -- # of sectors read
+# Field 5 -- # of milliseconds spent reading
+# Field 6 -- # of writes completed
+# Field 7 -- # of writes merged
+# Field 8 -- # of sectors written
+# Field 9 -- # of milliseconds spent writing
+# Field 10 -- # of I/Os currently in progress
+# Field 11 -- # of milliseconds spent doing I/Os
+# Field 12 -- weighted # of milliseconds spent doing I/Os
+
+PROG=zpios-profile-disk.sh
+
+RUN_PIDS=${0}
+RUN_LOG_DIR=${1}
+RUN_ID=${2}
+
+create_table() {
+ local FIELD=$1
+ local ROW_M=()
+ local ROW_N=()
+ local HEADER=1
+ local STEP=1
+
+ for DISK_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/disk-[0-9]*`; do
+ ROW_M=( ${ROW_N[@]} )
+ ROW_N=( `cat ${DISK_FILE} | grep sd | cut -c11- | cut -f${FIELD} -d' ' | tr "\n" "\t"` )
+
+ if [ $HEADER -eq 1 ]; then
+ echo -n "step, "
+ cat ${DISK_FILE} | grep sd | cut -c11- | cut -f1 -d' ' | tr "\n" ", "
+ echo "total"
+ HEADER=0
+ fi
+
+ if [ ${#ROW_M[@]} -eq 0 ]; then
+ continue
+ fi
+
+ if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
+ echo "Badly formatted profile data in ${DISK_FILE}"
+ break
+ fi
+
+ TOTAL=0
+ echo -n "${STEP}, "
+ for (( i=0; i<${#ROW_N[@]}; i++ )); do
+ DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc`
+ let TOTAL=${TOTAL}+${DELTA}
+ echo -n "${DELTA}, "
+ done
+ echo "${TOTAL}, "
+
+ let STEP=${STEP}+1
+ done
+}
+
+create_table_mbs() {
+ local FIELD=$1
+ local TIME=$2
+ local ROW_M=()
+ local ROW_N=()
+ local HEADER=1
+ local STEP=1
+
+ for DISK_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/disk-[0-9]*`; do
+ ROW_M=( ${ROW_N[@]} )
+ ROW_N=( `cat ${DISK_FILE} | grep sd | cut -c11- | cut -f${FIELD} -d' ' | tr "\n" "\t"` )
+
+ if [ $HEADER -eq 1 ]; then
+ echo -n "step, "
+ cat ${DISK_FILE} | grep sd | cut -c11- | cut -f1 -d' ' | tr "\n" ", "
+ echo "total"
+ HEADER=0
+ fi
+
+ if [ ${#ROW_M[@]} -eq 0 ]; then
+ continue
+ fi
+
+ if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
+ echo "Badly formatted profile data in ${DISK_FILE}"
+ break
+ fi
+
+ TOTAL=0
+ echo -n "${STEP}, "
+ for (( i=0; i<${#ROW_N[@]}; i++ )); do
+ DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc`
+ MBS=`echo "scale=2; ((${DELTA}*512)/${TIME})/(1024*1024)" | bc`
+ TOTAL=`echo "scale=2; ${TOTAL}+${MBS}" | bc`
+ echo -n "${MBS}, "
+ done
+ echo "${TOTAL}, "
+
+ let STEP=${STEP}+1
+ done
+}
+
+echo
+echo "Reads issued per device"
+create_table 2
+echo
+echo "Reads merged per device"
+create_table 3
+echo
+echo "Sectors read per device"
+create_table 4
+echo "MB/s per device"
+create_table_mbs 4 3
+
+echo
+echo "Writes issued per device"
+create_table 6
+echo
+echo "Writes merged per device"
+create_table 7
+echo
+echo "Sectors written per device"
+create_table 8
+echo "MB/s per device"
+create_table_mbs 8 3
+
+exit 0
diff --git a/scripts/zpios-profile/zpios-profile-pids.sh b/scripts/zpios-profile/zpios-profile-pids.sh
new file mode 100755
index 000000000..3514b38e2
--- /dev/null
+++ b/scripts/zpios-profile/zpios-profile-pids.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+PROG=zpios-profile-pids.sh
+
+RUN_PIDS=${0}
+RUN_LOG_DIR=${1}
+RUN_ID=${2}
+
+ROW_M=()
+ROW_N=()
+ROW_N_SCHED=()
+ROW_N_WAIT=()
+
+HEADER=1
+STEP=1
+
+for PID_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/pids-[0-9]*`; do
+ ROW_M=( ${ROW_N[@]} )
+ ROW_N=( 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 )
+ ROW_N_SCHED=( `cat ${PID_FILE} | cut -f15 -d' ' | tr "\n" "\t"` )
+ ROW_N_WAIT=( `cat ${PID_FILE} | cut -f17 -d' ' | tr "\n" "\t"` )
+ ROW_N_NAMES=( `cat ${PID_FILE} | cut -f2 -d' ' | cut -f2 -d'(' |
+ cut -f1 -d')' | cut -f1 -d'/' | tr "\n" "\t"` )
+
+ for (( i=0; i<${#ROW_N_SCHED[@]}; i++ )); do
+ SUM=`echo "${ROW_N_WAIT[${i}]}+${ROW_N_SCHED[${i}]}" | bc`
+
+ case ${ROW_N_NAMES[${i}]} in
+ zio_taskq) IDX=0;;
+ zio_req_nul) IDX=1;;
+ zio_irq_nul) IDX=2;;
+ zio_req_rd) IDX=3;;
+ zio_irq_rd) IDX=4;;
+ zio_req_wr) IDX=5;;
+ zio_irq_wr) IDX=6;;
+ zio_req_fr) IDX=7;;
+ zio_irq_fr) IDX=8;;
+ zio_req_cm) IDX=9;;
+ zio_irq_cm) IDX=10;;
+ zio_req_ctl) IDX=11;;
+ zio_irq_ctl) IDX=12;;
+ txg_quiesce) IDX=13;;
+ txg_sync) IDX=14;;
+ txg_timelimit) IDX=15;;
+ arc_reclaim) IDX=16;;
+ l2arc_feed) IDX=17;;
+ zpios_io) IDX=18;;
+ *) continue;;
+ esac
+
+ let ROW_N[${IDX}]=${ROW_N[${IDX}]}+${SUM}
+ done
+
+ if [ $HEADER -eq 1 ]; then
+ echo "step, zio_taskq, zio_req_nul, zio_irq_nul, " \
+ "zio_req_rd, zio_irq_rd, zio_req_wr, zio_irq_wr, " \
+ "zio_req_fr, zio_irq_fr, zio_req_cm, zio_irq_cm, " \
+ "zio_req_ctl, zio_irq_ctl, txg_quiesce, txg_sync, " \
+ "txg_timelimit, arc_reclaim, l2arc_feed, zpios_io, " \
+ "idle"
+ HEADER=0
+ fi
+
+ if [ ${#ROW_M[@]} -eq 0 ]; then
+ continue
+ fi
+
+ if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
+ echo "Badly formatted profile data in ${PID_FILE}"
+ break
+ fi
+
+ # Original values are in jiffies and we expect HZ to be 1000
+ # on most 2.6 systems thus we divide by 10 to get a percentage.
+ IDLE=1000
+ echo -n "${STEP}, "
+ for (( i=0; i<${#ROW_N[@]}; i++ )); do
+ DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc`
+ DELTA_PERCENT=`echo "scale=1; ${DELTA}/10" | bc`
+ let IDLE=${IDLE}-${DELTA}
+ echo -n "${DELTA_PERCENT}, "
+ done
+ ILDE_PERCENT=`echo "scale=1; ${IDLE}/10" | bc`
+ echo "${ILDE_PERCENT}"
+
+ let STEP=${STEP}+1
+done
+
+exit
+
+echo
+echo "Percent of total system time per pid"
+for PID_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/pids-[0-9]*`; do
+ ROW_M=( ${ROW_N[@]} )
+ ROW_N_SCHED=( `cat ${PID_FILE} | cut -f15 -d' ' | tr "\n" "\t"` )
+ ROW_N_WAIT=( `cat ${PID_FILE} | cut -f17 -d' ' | tr "\n" "\t"` )
+
+ for (( i=0; i<${#ROW_N_SCHED[@]}; i++ )); do
+ ROW_N[${i}]=`echo "${ROW_N_WAIT[${i}]}+${ROW_N_SCHED[${i}]}" | bc`
+ done
+
+ if [ $HEADER -eq 1 ]; then
+ echo -n "step, "
+ cat ${PID_FILE} | cut -f2 -d' ' | tr "\n" ", "
+ echo
+ HEADER=0
+ fi
+
+ if [ ${#ROW_M[@]} -eq 0 ]; then
+ continue
+ fi
+
+ if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
+ echo "Badly formatted profile data in ${PID_FILE}"
+ break
+ fi
+
+ # Original values are in jiffies and we expect HZ to be 1000
+ # on most 2.6 systems thus we divide by 10 to get a percentage.
+ echo -n "${STEP}, "
+ for (( i=0; i<${#ROW_N[@]}; i++ )); do
+ DELTA=`echo "scale=1; (${ROW_N[${i}]}-${ROW_M[${i}]})/10" | bc`
+ echo -n "${DELTA}, "
+ done
+
+ echo
+ let STEP=${STEP}+1
+done
+
+
+exit 0
diff --git a/scripts/zpios-profile/zpios-profile-post.sh b/scripts/zpios-profile/zpios-profile-post.sh
new file mode 100755
index 000000000..3a454ba04
--- /dev/null
+++ b/scripts/zpios-profile/zpios-profile-post.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+PROG=zpios-profile-post.sh
+
+RUN_POST=${0}
+RUN_PHASE=${1}
+RUN_DIR=${2}
+RUN_ID=${3}
+RUN_POOL=${4}
+RUN_CHUNK_SIZE=${5}
+RUN_REGION_SIZE=${6}
+RUN_THRD_COUNT=${7}
+RUN_REGION_COUNT=${8}
+RUN_OFFSET=${9}
+RUN_REGION_NOISE=${10}
+RUN_CHUNK_NOISE=${11}
+RUN_THRD_DELAY=${12}
+RUN_FLAGS=${13}
+RUN_RESULT=${14}
+
+# Summarize system time per process
+zpios_profile_post_pids() {
+ ${PROFILE_PIDS} ${PROFILE_RUN_CR_PIDS_LOG} >${PROFILE_RUN_CR_PIDS_CSV}
+ ${PROFILE_PIDS} ${PROFILE_RUN_WR_PIDS_LOG} >${PROFILE_RUN_WR_PIDS_CSV}
+ ${PROFILE_PIDS} ${PROFILE_RUN_RD_PIDS_LOG} >${PROFILE_RUN_RD_PIDS_CSV}
+ ${PROFILE_PIDS} ${PROFILE_RUN_RM_PIDS_LOG} >${PROFILE_RUN_RM_PIDS_CSV}
+}
+
+zpios_profile_post_disk() {
+ ${PROFILE_DISK} ${PROFILE_RUN_CR_DISK_LOG} >${PROFILE_RUN_CR_DISK_CSV}
+ ${PROFILE_DISK} ${PROFILE_RUN_WR_DISK_LOG} >${PROFILE_RUN_WR_DISK_CSV}
+ ${PROFILE_DISK} ${PROFILE_RUN_RD_DISK_LOG} >${PROFILE_RUN_RD_DISK_CSV}
+ ${PROFILE_DISK} ${PROFILE_RUN_RM_DISK_LOG} >${PROFILE_RUN_RM_DISK_CSV}
+}
+
+# Summarize per device performance
+
+# Stop a user defined profiling script which is gathering additional data
+zpios_profile_post_stop() {
+ local PROFILE_PID=$1
+
+ kill -s SIGHUP `cat ${PROFILE_PID}`
+
+
+ # Sleep waiting for profile script to exit
+ while [ -f ${PROFILE_PID} ]; do
+ sleep 0.01
+ done
+}
+
+zpios_profile_post_proc_stop() {
+ local PROC_DIR=$1
+
+ if [ -f ${PROFILE_ARC_PROC} ]; then
+ cat ${PROFILE_ARC_PROC} >${PROC_DIR}/arcstats.txt
+ fi
+
+ if [ -f ${PROFILE_VDEV_CACHE_PROC} ]; then
+ cat ${PROFILE_VDEV_CACHE_PROC} >${PROC_DIR}/vdev_cache_stats.txt
+ fi
+}
+
+zpios_profile_post_oprofile_stop() {
+ local OPROFILE_LOG=$1
+ local OPROFILE_ARGS="-a -g -l -p ${OPROFILE_KERNEL_DIR},${OPROFILE_SPL_DIR},${OPROFILE_ZFS_DIR}"
+
+ /usr/bin/opcontrol --stop >>${OPROFILE_LOG} 2>&1
+ /usr/bin/opcontrol --dump >>${OPROFILE_LOG} 2>&1
+ /usr/bin/opreport ${OPROFILE_ARGS} >${OPROFILE_LOG} 2>&1
+ /usr/bin/oparchive
+}
+
+zpios_profile_post_create() {
+ zpios_profile_post_oprofile_stop ${PROFILE_RUN_CR_OPROFILE_LOG}
+ zpios_profile_post_proc_stop ${PROFILE_RUN_CR_DIR}
+ zpios_profile_post_stop ${PROFILE_RUN_CR_PID}
+}
+
+zpios_profile_post_write() {
+ zpios_profile_post_oprofile_stop ${PROFILE_RUN_WR_OPROFILE_LOG}
+ zpios_profile_post_proc_stop ${PROFILE_RUN_WR_DIR}
+ zpios_profile_post_stop ${PROFILE_RUN_WR_PID}
+}
+
+zpios_profile_post_read() {
+ zpios_profile_post_oprofile_stop ${PROFILE_RUN_CR_RD_LOG}
+ zpios_profile_post_proc_stop ${PROFILE_RUN_RD_DIR}
+ zpios_profile_post_stop ${PROFILE_RUN_RD_PID}
+}
+
+zpios_profile_post_remove() {
+ zpios_profile_post_oprofile_stop ${PROFILE_RUN_RM_OPROFILE_LOG}
+ zpios_profile_post_proc_stop ${PROFILE_RUN_RM_DIR}
+ zpios_profile_post_stop ${PROFILE_RUN_RM_PID}
+}
+
+# Source global zpios test configuration
+if [ -f ${RUN_DIR}/zpios-config.sh ]; then
+ . ${RUN_DIR}/zpios-config.sh
+fi
+
+# Source global per-run test configuration
+if [ -f ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh ]; then
+ . ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh
+fi
+
+case "${RUN_PHASE}" in
+ post-run)
+ zpios_profile_post_pids
+ zpios_profile_post_disk
+ ;;
+ post-create)
+ zpios_profile_post_create
+ ;;
+ post-write)
+ zpios_profile_post_write
+ ;;
+ post-read)
+ zpios_profile_post_read
+ ;;
+ post-remove)
+ zpios_profile_post_remove
+ ;;
+ *)
+ echo "Usage: ${PROG} {post-run|post-create|post-write|post-read|post-remove}"
+ exit 1
+esac
+
+exit 0
diff --git a/scripts/zpios-profile/zpios-profile-pre.sh b/scripts/zpios-profile/zpios-profile-pre.sh
new file mode 100755
index 000000000..a2a885798
--- /dev/null
+++ b/scripts/zpios-profile/zpios-profile-pre.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+
+PROG=zpios-profile-pre.sh
+
+PROFILE_RDY=0
+trap "PROFILE_RDY=1" SIGHUP
+
+RUN_PRE=${0}
+RUN_PHASE=${1}
+RUN_DIR=${2}
+RUN_ID=${3}
+RUN_POOL=${4}
+RUN_CHUNK_SIZE=${5}
+RUN_REGION_SIZE=${6}
+RUN_THRD_COUNT=${7}
+RUN_REGION_COUNT=${8}
+RUN_OFFSET=${9}
+RUN_REGION_NOISE=${10}
+RUN_CHUNK_NOISE=${11}
+RUN_THRD_DELAY=${12}
+RUN_FLAGS=${13}
+RUN_RESULT=${14}
+
+zpios_profile_pre_run_cfg() {
+cat > ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh << EOF
+#
+# Zpios Profiling Configuration for Run ${RUN_ID}
+#
+
+PROFILE_RUN_DIR=${RUN_DIR}/${RUN_ID}
+
+PROFILE_RUN_CR_DIR=${RUN_DIR}/${RUN_ID}/create
+PROFILE_RUN_CR_PID=${RUN_DIR}/${RUN_ID}/create/profile.pid
+PROFILE_RUN_CR_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/create/oprofile.txt
+PROFILE_RUN_CR_PIDS_LOG=${RUN_DIR}/${RUN_ID}/create/pids.txt
+PROFILE_RUN_CR_PIDS_CSV=${RUN_DIR}/${RUN_ID}/create/pids.csv
+PROFILE_RUN_CR_DISK_LOG=${RUN_DIR}/${RUN_ID}/create/disk.txt
+PROFILE_RUN_CR_DISK_CSV=${RUN_DIR}/${RUN_ID}/create/disk.csv
+
+PROFILE_RUN_WR_DIR=${RUN_DIR}/${RUN_ID}/write
+PROFILE_RUN_WR_PID=${RUN_DIR}/${RUN_ID}/write/profile.pid
+PROFILE_RUN_WR_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/write/oprofile.txt
+PROFILE_RUN_WR_PIDS_LOG=${RUN_DIR}/${RUN_ID}/write/pids.txt
+PROFILE_RUN_WR_PIDS_CSV=${RUN_DIR}/${RUN_ID}/write/pids.csv
+PROFILE_RUN_WR_DISK_LOG=${RUN_DIR}/${RUN_ID}/write/disk.txt
+PROFILE_RUN_WR_DISK_CSV=${RUN_DIR}/${RUN_ID}/write/disk.csv
+
+PROFILE_RUN_RD_DIR=${RUN_DIR}/${RUN_ID}/read
+PROFILE_RUN_RD_PID=${RUN_DIR}/${RUN_ID}/read/profile.pid
+PROFILE_RUN_RD_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/read/oprofile.txt
+PROFILE_RUN_RD_PIDS_LOG=${RUN_DIR}/${RUN_ID}/read/pids.txt
+PROFILE_RUN_RD_PIDS_CSV=${RUN_DIR}/${RUN_ID}/read/pids.csv
+PROFILE_RUN_RD_DISK_LOG=${RUN_DIR}/${RUN_ID}/read/disk.txt
+PROFILE_RUN_RD_DISK_CSV=${RUN_DIR}/${RUN_ID}/read/disk.csv
+
+PROFILE_RUN_RM_DIR=${RUN_DIR}/${RUN_ID}/remove
+PROFILE_RUN_RM_PID=${RUN_DIR}/${RUN_ID}/remove/profile.pid
+PROFILE_RUN_RM_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/remove/oprofile.txt
+PROFILE_RUN_RM_PIDS_LOG=${RUN_DIR}/${RUN_ID}/remove/pids.txt
+PROFILE_RUN_RM_PIDS_CSV=${RUN_DIR}/${RUN_ID}/remove/pids.csv
+PROFILE_RUN_RM_DISK_LOG=${RUN_DIR}/${RUN_ID}/remove/disk.txt
+PROFILE_RUN_RM_DISK_CSV=${RUN_DIR}/${RUN_ID}/remove/disk.csv
+
+# PROFILE_PIDS_LOG=${RUN_DIR}/${RUN_ID}/pids-summary.csv
+# PROFILE_DISK_LOG=${RUN_DIR}/${RUN_ID}/disk-summary.csv
+EOF
+}
+
+zpios_profile_pre_run_args() {
+cat > ${RUN_DIR}/${RUN_ID}/zpios-args.txt << EOF
+#
+# Zpios Arguments for Run ${RUN_ID}
+#
+
+DIR=${RUN_DIR}
+ID=${RUN_ID}
+POOL=${RUN_POOL}
+CHUNK_SIZE=${RUN_CHUNK_SIZE}
+REGION_SIZE=${RUN_REGION_SIZE}
+THRD_COUNT=${RUN_THRD_COUNT}
+REGION_COUNT=${RUN_REGION_COUNT}
+OFFSET=${RUN_OFFSET}
+REGION_NOISE=${RUN_REGION_NOISE}
+CHUNK_NOISE=${RUN_CHUNK_NOISE}
+THRD_DELAY=${RUN_THRD_DELAY}
+FLAGS=${RUN_FLAGS}
+RESULT=${RUN_RESULT}
+EOF
+}
+
+# Spawn a user defined profiling script to gather additional data
+zpios_profile_pre_start() {
+ local PROFILE_PID=$1
+
+ ${PROFILE_USER} ${RUN_PHASE} ${RUN_DIR} ${RUN_ID} &
+ echo "$!" >${PROFILE_PID}
+
+ # Sleep waiting for profile script to be ready, it will
+ # signal us via SIGHUP when it is ready to start profiling.
+ while [ ${PROFILE_RDY} -eq 0 ]; do
+ sleep 0.01
+ done
+}
+
+zpios_profile_post_proc_start() {
+
+ if [ -f ${PROFILE_ARC_PROC} ]; then
+ echo 0 >${PROFILE_ARC_PROC}
+ fi
+
+ if [ -f ${PROFILE_VDEV_CACHE_PROC} ]; then
+ echo 0 >${PROFILE_VDEV_CACHE_PROC}
+ fi
+}
+
+zpios_profile_pre_oprofile_start() {
+ local OPROFILE_LOG=$1
+
+ /usr/bin/opcontrol --reset >>${OPROFILE_LOG} 2>&1
+ /usr/bin/opcontrol --start >>${OPROFILE_LOG} 2>&1
+}
+
+zpios_profile_pre_create() {
+ mkdir ${PROFILE_RUN_CR_DIR}
+ zpios_profile_pre_start ${PROFILE_RUN_CR_PID}
+ zpios_profile_post_proc_start
+ zpios_profile_pre_oprofile_start ${PROFILE_RUN_CR_OPROFILE_LOG}
+}
+
+zpios_profile_pre_write() {
+ mkdir ${PROFILE_RUN_WR_DIR}
+ zpios_profile_pre_start ${PROFILE_RUN_WR_PID}
+ zpios_profile_post_proc_start
+ zpios_profile_pre_oprofile_start ${PROFILE_RUN_WR_OPROFILE_LOG}
+}
+
+zpios_profile_pre_read() {
+ mkdir ${PROFILE_RUN_RD_DIR}
+ zpios_profile_pre_start ${PROFILE_RUN_RD_PID}
+ zpios_profile_post_proc_start
+ zpios_profile_pre_oprofile_start ${PROFILE_RUN_CR_RD_LOG}
+}
+
+zpios_profile_pre_remove() {
+ mkdir ${PROFILE_RUN_RM_DIR}
+ zpios_profile_pre_start ${PROFILE_RUN_RM_PID}
+ zpios_profile_post_proc_start
+ zpios_profile_pre_oprofile_start ${PROFILE_RUN_RM_OPROFILE_LOG}
+}
+
+# Source global zpios test configuration
+if [ -f ${RUN_DIR}/zpios-config.sh ]; then
+ . ${RUN_DIR}/zpios-config.sh
+fi
+
+# Source global per-run test configuration
+if [ -f ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh ]; then
+ . ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh
+fi
+
+case "${RUN_PHASE}" in
+ pre-run)
+ mkdir -p ${RUN_DIR}/${RUN_ID}/
+ zpios_profile_pre_run_cfg
+ zpios_profile_pre_run_args
+ ;;
+ pre-create)
+ zpios_profile_pre_create
+ ;;
+ pre-write)
+ zpios_profile_pre_write
+ ;;
+ pre-read)
+ zpios_profile_pre_read
+ ;;
+ pre-remove)
+ zpios_profile_pre_remove
+ ;;
+ *)
+ echo "Usage: ${PROG} {pre-run|pre-create|pre-write|pre-read|pre-remove}"
+ exit 1
+esac
+
+exit 0
diff --git a/scripts/zpios-profile/zpios-profile.sh b/scripts/zpios-profile/zpios-profile.sh
new file mode 100755
index 000000000..f4f0ee97f
--- /dev/null
+++ b/scripts/zpios-profile/zpios-profile.sh
@@ -0,0 +1,226 @@
+#!/bin/bash
+
+
+PROG=zpios-profile.sh
+
+trap "RUN_DONE=1" SIGHUP
+
+RUN_PHASE=${1}
+RUN_LOG_DIR=${2}
+RUN_ID=${3}
+RUN_DONE=0
+
+POLL_INTERVAL=2.99
+
+# Log these pids, the exact pid numbers will vary from system to system
+# so I harvest pid for all the following type of processes from /proc/<pid>/
+#
+# zio_taskq/#
+# spa_zio_issue/#
+# spa_zio_intr/#
+# txg_quiesce_thr
+# txg_sync_thread
+# txg_timelimit_t
+# arc_reclaim_thr
+# l2arc_feed_thre
+# zpios_io/#
+
+ZIO_TASKQ_PIDS=()
+ZIO_REQ_NUL_PIDS=()
+ZIO_IRQ_NUL_PIDS=()
+ZIO_REQ_RD_PIDS=()
+ZIO_IRQ_RD_PIDS=()
+ZIO_REQ_WR_PIDS=()
+ZIO_IRQ_WR_PIDS=()
+ZIO_REQ_FR_PIDS=()
+ZIO_IRQ_FR_PIDS=()
+ZIO_REQ_CM_PIDS=()
+ZIO_IRQ_CM_PIDS=()
+ZIO_REQ_CTL_PIDS=()
+ZIO_IRQ_CTL_PIDS=()
+
+TXG_QUIESCE_PIDS=()
+TXG_SYNC_PIDS=()
+TXG_TIMELIMIT_PIDS=()
+
+ARC_RECLAIM_PIDS=()
+L2ARC_FEED_PIDS=()
+
+ZPIOS_IO_PIDS=()
+
+show_pids() {
+ echo "* zio_taskq: { ${ZIO_TASKQ_PIDS[@]} } = ${#ZIO_TASKQ_PIDS[@]}"
+ echo "* zio_req_nul: { ${ZIO_REQ_NUL_PIDS[@]} } = ${#ZIO_REQ_NUL_PIDS[@]}"
+ echo "* zio_irq_nul: { ${ZIO_IRQ_NUL_PIDS[@]} } = ${#ZIO_IRQ_NUL_PIDS[@]}"
+ echo "* zio_req_rd: { ${ZIO_REQ_RD_PIDS[@]} } = ${#ZIO_REQ_RD_PIDS[@]}"
+ echo "* zio_irq_rd: { ${ZIO_IRQ_RD_PIDS[@]} } = ${#ZIO_IRQ_RD_PIDS[@]}"
+ echo "* zio_req_wr: { ${ZIO_REQ_WR_PIDS[@]} } = ${#ZIO_REQ_WR_PIDS[@]}"
+ echo "* zio_irq_wr: { ${ZIO_IRQ_WR_PIDS[@]} } = ${#ZIO_IRQ_WR_PIDS[@]}"
+ echo "* zio_req_fr: { ${ZIO_REQ_FR_PIDS[@]} } = ${#ZIO_REQ_FR_PIDS[@]}"
+ echo "* zio_irq_fr: { ${ZIO_IRQ_FR_PIDS[@]} } = ${#ZIO_IRQ_FR_PIDS[@]}"
+ echo "* zio_req_cm: { ${ZIO_REQ_CM_PIDS[@]} } = ${#ZIO_REQ_CM_PIDS[@]}"
+ echo "* zio_irq_cm: { ${ZIO_IRQ_CM_PIDS[@]} } = ${#ZIO_IRQ_CM_PIDS[@]}"
+ echo "* zio_req_ctl: { ${ZIO_REQ_CTL_PIDS[@]} } = ${#ZIO_REQ_CTL_PIDS[@]}"
+ echo "* zio_irq_ctl: { ${ZIO_IRQ_CTL_PIDS[@]} } = ${#ZIO_IRQ_CTL_PIDS[@]}"
+ echo "* txg_quiesce: { ${TXG_QUIESCE_PIDS[@]} } = ${#TXG_QUIESCE_PIDS[@]}"
+ echo "* txg_sync: { ${TXG_SYNC_PIDS[@]} } = ${#TXG_SYNC_PIDS[@]}"
+ echo "* txg_timelimit: { ${TXG_TIMELIMIT_PIDS[@]} } = ${#TXG_TIMELIMIT_PIDS[@]}"
+ echo "* arc_reclaim: { ${ARC_RECLAIM_PIDS[@]} } = ${#ARC_RECLAIM_PIDS[@]}"
+ echo "* l2arc_feed: { ${L2ARC_FEED_PIDS[@]} } = ${#L2ARC_FEED_PIDS[@]}"
+ echo "* zpios_io: { ${ZPIOS_IO_PIDS[@]} } = ${#ZPIOS_IO_PIDS[@]}"
+}
+
+check_pid() {
+ local PID=$1
+ local NAME=$2
+ local TYPE=$3
+ local PIDS=( "$4" )
+ local NAME_STRING=`echo ${NAME} | cut -f1 -d'/'`
+ local NAME_NUMBER=`echo ${NAME} | cut -f2 -d'/'`
+
+ if [ "${NAME_STRING}" == "${TYPE}" ]; then
+ if [ -n "${NAME_NUMBER}" ]; then
+ PIDS[${NAME_NUMBER}]=${PID}
+ else
+ PIDS[${#PIDS[@]}]=${PID}
+
+ fi
+ fi
+
+ echo "${PIDS[@]}"
+}
+
+# NOTE: This whole process is crazy slow but it will do for now
+aquire_pids() {
+ echo "--- Aquiring ZFS pids ---"
+
+ for PID in `ls /proc/ | grep [0-9] | sort -n -u`; do
+ if [ ! -e /proc/${PID}/status ]; then
+ continue
+ fi
+
+ NAME=`cat /proc/${PID}/status | head -n1 | cut -f2`
+
+ ZIO_TASKQ_PIDS=( `check_pid ${PID} ${NAME} "zio_taskq" \
+ "$(echo "${ZIO_TASKQ_PIDS[@]}")"` )
+
+ ZIO_REQ_NUL_PIDS=( `check_pid ${PID} ${NAME} "zio_req_nul" \
+ "$(echo "${ZIO_REQ_NUL_PIDS[@]}")"` )
+
+ ZIO_IRQ_NUL_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_nul" \
+ "$(echo "${ZIO_IRQ_NUL_PIDS[@]}")"` )
+
+ ZIO_REQ_RD_PIDS=( `check_pid ${PID} ${NAME} "zio_req_rd" \
+ "$(echo "${ZIO_REQ_RD_PIDS[@]}")"` )
+
+ ZIO_IRQ_RD_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_rd" \
+ "$(echo "${ZIO_IRQ_RD_PIDS[@]}")"` )
+
+ ZIO_REQ_WR_PIDS=( `check_pid ${PID} ${NAME} "zio_req_wr" \
+ "$(echo "${ZIO_REQ_WR_PIDS[@]}")"` )
+
+ ZIO_IRQ_WR_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_wr" \
+ "$(echo "${ZIO_IRQ_WR_PIDS[@]}")"` )
+
+ ZIO_REQ_FR_PIDS=( `check_pid ${PID} ${NAME} "zio_req_fr" \
+ "$(echo "${ZIO_REQ_FR_PIDS[@]}")"` )
+
+ ZIO_IRQ_FR_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_fr" \
+ "$(echo "${ZIO_IRQ_FR_PIDS[@]}")"` )
+
+ ZIO_REQ_CM_PIDS=( `check_pid ${PID} ${NAME} "zio_req_cm" \
+ "$(echo "${ZIO_REQ_CM_PIDS[@]}")"` )
+
+ ZIO_IRQ_CM_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_cm" \
+ "$(echo "${ZIO_IRQ_CM_PIDS[@]}")"` )
+
+ ZIO_REQ_CTL_PIDS=( `check_pid ${PID} ${NAME} "zio_req_ctl" \
+ "$(echo "${ZIO_REQ_CTL_PIDS[@]}")"` )
+
+ ZIO_IRQ_CTL_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_ctl" \
+ "$(echo "${ZIO_IRQ_CTL_PIDS[@]}")"` )
+
+ TXG_QUIESCE_PIDS=( `check_pid ${PID} ${NAME} "txg_quiesce" \
+ "$(echo "${TXG_QUIESCE_PIDS[@]}")"` )
+
+ TXG_SYNC_PIDS=( `check_pid ${PID} ${NAME} "txg_sync" \
+ "$(echo "${TXG_SYNC_PIDS[@]}")"` )
+
+ TXG_TIMELIMIT_PIDS=( `check_pid ${PID} ${NAME} "txg_timelimit" \
+ "$(echo "${TXG_TIMELIMIT_PIDS[@]}")"` )
+
+ ARC_RECLAIM_PIDS=( `check_pid ${PID} ${NAME} "arc_reclaim" \
+ "$(echo "${ARC_RECLAIM_PIDS[@]}")"` )
+
+ L2ARC_FEED_PIDS=( `check_pid ${PID} ${NAME} "l2arc_feed" \
+ "$(echo "${L2ARC_FEED_PIDS[@]}")"` )
+ done
+
+ # Wait for zpios_io threads to start
+ kill -s SIGHUP ${PPID}
+ echo "* Waiting for zpios_io threads to start"
+ while [ ${RUN_DONE} -eq 0 ]; do
+ ZPIOS_IO_PIDS=( `ps ax | grep zpios_io | grep -v grep | \
+ sed 's/^ *//g' | cut -f1 -d' '` )
+ if [ ${#ZPIOS_IO_PIDS[@]} -gt 0 ]; then
+ break;
+ fi
+ sleep 0.1
+ done
+
+ echo "`show_pids`" >${RUN_LOG_DIR}/${RUN_ID}/pids.txt
+}
+
+log_pids() {
+ echo "--- Logging ZFS profile to ${RUN_LOG_DIR}/${RUN_ID}/ ---"
+ ALL_PIDS=( ${ZIO_TASKQ_PIDS[@]} \
+ ${ZIO_REQ_NUL_PIDS[@]} \
+ ${ZIO_IRQ_NUL_PIDS[@]} \
+ ${ZIO_REQ_RD_PID[@]} \
+ ${ZIO_IRQ_RD_PIDS[@]} \
+ ${ZIO_REQ_WR_PIDS[@]} \
+ ${ZIO_IRQ_WR_PIDS[@]} \
+ ${ZIO_REQ_FR_PIDS[@]} \
+ ${ZIO_IRQ_FR_PIDS[@]} \
+ ${ZIO_REQ_CM_PIDS[@]} \
+ ${ZIO_IRQ_CM_PIDS[@]} \
+ ${ZIO_REQ_CTL_PIDS[@]} \
+ ${ZIO_IRQ_CTL_PIDS[@]} \
+ ${TXG_QUIESCE_PIDS[@]} \
+ ${TXG_SYNC_PIDS[@]} \
+ ${TXG_TIMELIMIT_PIDS[@]} \
+ ${ARC_RECLAIM_PIDS[@]} \
+ ${L2ARC_FEED_PIDS[@]} \
+ ${ZPIOS_IO_PIDS[@]} )
+
+ while [ ${RUN_DONE} -eq 0 ]; do
+ NOW=`date +%s.%N`
+ LOG_PIDS="${RUN_LOG_DIR}/${RUN_ID}/pids-${NOW}"
+ LOG_DISK="${RUN_LOG_DIR}/${RUN_ID}/disk-${NOW}"
+
+ for PID in "${ALL_PIDS[@]}"; do
+ if [ -z ${PID} ]; then
+ continue;
+ fi
+
+ if [ -e /proc/${PID}/stat ]; then
+ cat /proc/${PID}/stat | head -n1 >>${LOG_PIDS}
+ else
+ echo "<${PID} exited>" >>${LOG_PIDS}
+ fi
+ done
+
+ cat /proc/diskstats >${LOG_DISK}
+
+ NOW2=`date +%s.%N`
+ DELTA=`echo "${POLL_INTERVAL}-(${NOW2}-${NOW})" | bc`
+ sleep ${DELTA}
+ done
+}
+
+aquire_pids
+log_pids
+
+# rm ${PROFILE_PID}
+
+exit 0
diff --git a/scripts/zpios-sanity.sh b/scripts/zpios-sanity.sh
new file mode 100755
index 000000000..d27a5b8ca
--- /dev/null
+++ b/scripts/zpios-sanity.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+#
+# ZFS/ZPOOL configuration test script.
+
+SCRIPT_COMMON=common.sh
+if [ -f ./${SCRIPT_COMMON} ]; then
+. ./${SCRIPT_COMMON}
+elif [ -f /usr/libexec/zfs/${SCRIPT_COMMON} ]; then
+. /usr/libexec/zfs/${SCRIPT_COMMON}
+else
+echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
+fi
+
+PROG=zpios-sanity.sh
+HEADER=
+
+usage() {
+cat << EOF
+USAGE:
+$0 [hv]
+
+DESCRIPTION:
+ ZPIOS sanity tests
+
+OPTIONS:
+ -h Show this message
+ -v Verbose
+ -x Destructive hd/sd/md/dm/ram tests
+ -f Don't prompt due to -x
+
+EOF
+}
+
+while getopts 'hvxf' OPTION; do
+ case $OPTION in
+ h)
+ usage
+ exit 1
+ ;;
+ v)
+ VERBOSE=1
+ ;;
+ x)
+ DANGEROUS=1
+ ;;
+ f)
+ FORCE=1
+ ;;
+ ?)
+ usage
+ exit
+ ;;
+ esac
+done
+
+if [ $(id -u) != 0 ]; then
+ die "Must run as root"
+fi
+
+zpios_test() {
+ CONFIG=$1
+ TEST=$2
+ LOG=`mktemp`
+
+ ${ZPIOS_SH} -f -c ${CONFIG} -t ${TEST} &>${LOG}
+ if [ $? -ne 0 ]; then
+ if [ ${VERBOSE} ]; then
+ printf "FAIL: %-13s\n" ${CONFIG}
+ cat ${LOG}
+ else
+ if [ ! ${HEADER} ]; then
+ head -2 ${LOG}
+ HEADER=1
+ fi
+
+ printf "FAIL: %-13s" ${CONFIG}
+ tail -1 ${LOG}
+ fi
+ else
+ if [ ${VERBOSE} ]; then
+ cat ${LOG}
+ else
+ if [ ! ${HEADER} ]; then
+ head -2 ${LOG}
+ HEADER=1
+ fi
+
+ tail -1 ${LOG}
+ fi
+ fi
+
+ rm -f ${LOG}
+}
+
+if [ ${DANGEROUS} ] && [ ! ${FORCE} ]; then
+ cat << EOF
+The -x option was passed which will result in UNRECOVERABLE DATA LOSS
+on on the following block devices:
+
+ /dev/sd[abcd]
+ /dev/hda
+ /dev/ram0
+ /dev/md0
+ /dev/dm-0
+
+To continue please confirm by entering YES:
+EOF
+ read CONFIRM
+ if [ ${CONFIRM} != "YES" ] && [ ${CONFIRM} != "yes" ]; then
+ exit 0;
+ fi
+fi
+
+#
+# These configurations are all safe and pose no risk to any data on
+# the system which runs them. They will confine all their IO to a
+# file in /tmp or a loopback device configured to use a file in /tmp.
+#
+SAFE_CONFIGS=( \
+ file-raid0 file-raid10 file-raidz file-raidz2 \
+ lo-raid0 lo-raid10 lo-raidz lo-raidz2 \
+)
+
+#
+# These configurations are down right dangerous. They will attempt
+# to use various real block devices on your system which may contain
+# data you car about. You are STRONGLY advised not to run this unless
+# you are certain there is no data on the system you care about.
+#
+DANGEROUS_CONFIGS=( \
+ hda-raid0 \
+ sda-raid0 \
+ ram0-raid0 \
+ md0-raid10 md0-raid5 \
+ dm0-raid0 \
+)
+
+for CONFIG in ${SAFE_CONFIGS[*]}; do
+ zpios_test $CONFIG tiny
+done
+
+if [ ${DANGEROUS} ]; then
+ for CONFIG in ${DANGEROUS_CONFIGS[*]}; do
+ zpios_test $CONFIG tiny
+ done
+fi
+
+exit 0
diff --git a/scripts/zpios-survey.sh b/scripts/zpios-survey.sh
new file mode 100755
index 000000000..0b16d1bb8
--- /dev/null
+++ b/scripts/zpios-survey.sh
@@ -0,0 +1,215 @@
+#!/bin/bash
+#
+# Wrapper script for easily running a survey of zpios based tests
+#
+
+SCRIPT_COMMON=common.sh
+if [ -f ./${SCRIPT_COMMON} ]; then
+. ./${SCRIPT_COMMON}
+elif [ -f /usr/libexec/zfs/${SCRIPT_COMMON} ]; then
+. /usr/libexec/zfs/${SCRIPT_COMMON}
+else
+echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
+fi
+
+PROG=zpios-survey.sh
+
+usage() {
+cat << EOF
+USAGE:
+$0 [hvp] <-c config> <-t test>
+
+DESCRIPTION:
+ Helper script for easy zpios survey benchmarking.
+
+OPTIONS:
+ -h Show this message
+ -v Verbose
+ -p Enable profiling
+ -c Zpool configuration
+ -t Zpios test
+ -l Zpios survey log
+
+EOF
+}
+
+print_header() {
+tee -a ${ZPIOS_SURVEY_LOG} << EOF
+
+================================================================
+Test: $1
+EOF
+}
+
+# Baseline performance for an out of the box config with no manual tuning.
+# Ideally, we want everything to be automatically tuned for your system and
+# for this to perform reasonably well.
+zpios_survey_base() {
+ TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+baseline"
+ print_header ${TEST_NAME}
+
+ ${ZFS_SH} ${VERBOSE_FLAG} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+# Disable ZFS's prefetching. For some reason still not clear to me
+# current prefetching policy is quite bad for a random workload.
+# Allowing the algorithm to detect a random workload and not do
+# anything may be the way to address this issue.
+zpios_survey_prefetch() {
+ TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+prefetch"
+ print_header ${TEST_NAME}
+
+ ${ZFS_SH} ${VERBOSE_FLAG} \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \
+ -o "--noprefetch" | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+# Simulating a zerocopy IO path should improve performance by freeing up
+# lots of CPU which is wasted move data between buffers.
+zpios_survey_zerocopy() {
+ TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+zerocopy"
+ print_header ${TEST_NAME}
+
+ ${ZFS_SH} ${VERBOSE_FLAG} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \
+ -o "--zerocopy" | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+# Disabling checksumming should show some (if small) improvement
+# simply due to freeing up a modest amount of CPU.
+zpios_survey_checksum() {
+ TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+checksum"
+ print_header ${TEST_NAME}
+
+ ${ZFS_SH} ${VERBOSE_FLAG} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \
+ -s "set checksum=off" | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+# Increasing the pending IO depth also seems to improve things likely
+# at the expense of latency. This should be explored more because I'm
+# seeing a much bigger impact there that I would have expected. There
+# may be some low hanging fruit to be found here.
+zpios_survey_pending() {
+ TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+pending"
+ print_header ${TEST_NAME}
+
+ ${ZFS_SH} ${VERBOSE_FLAG} \
+ zfs="zfs_vdev_max_pending=1024" | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+# To avoid memory fragmentation issues our slab implementation can be
+# based on a virtual address space. Interestingly, we take a pretty
+# substantial performance penalty for this somewhere in the low level
+# IO drivers. If we back the slab with kmem pages we see far better
+# read performance numbers at the cost of memory fragmention and general
+# system instability due to large allocations. This may be because of
+# an optimization in the low level drivers due to the contigeous kmem
+# based memory. This needs to be explained. The good news here is that
+# with zerocopy interfaces added at the DMU layer we could gaurentee
+# kmem based memory for a pool of pages.
+#
+# 0x100 = KMC_KMEM - Force kmem_* based slab
+# 0x200 = KMC_VMEM - Force vmem_* based slab
+zpios_survey_kmem() {
+ TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+kmem"
+ print_header ${TEST_NAME}
+
+ ${ZFS_SH} ${VERBOSE_FLAG} \
+ zfs="zio_bulk_flags=0x100" | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+# Apply all possible turning concurrently to get a best case number
+zpios_survey_all() {
+ TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+all"
+ print_header ${TEST_NAME}
+
+ ${ZFS_SH} ${VERBOSE_FLAG} \
+ zfs="zfs_vdev_max_pending=1024" \
+ zfs="zio_bulk_flags=0x100" | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \
+ -o "--noprefetch --zerocopy" \
+ -s "set checksum=off" | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+ ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+ tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+
+PROFILE=
+ZPOOL_NAME=zpios-survey
+ZPOOL_CONFIG=zpool-config.sh
+ZPIOS_TEST=zpios-test.sh
+ZPIOS_SURVEY_LOG=/dev/null
+
+while getopts 'hvpc:t:l:' OPTION; do
+ case $OPTION in
+ h)
+ usage
+ exit 1
+ ;;
+ v)
+ VERBOSE=1
+ VERBOSE_FLAG="-v"
+ ;;
+ p)
+ PROFILE=1
+ PROFILE_FLAG="-p"
+ ;;
+ c)
+ ZPOOL_CONFIG=${OPTARG}
+ ;;
+ t)
+ ZPIOS_TEST=${OPTARG}
+ ;;
+ l)
+ ZPIOS_SURVEY_LOG=${OPTARG}
+ ;;
+ ?)
+ usage
+ exit
+ ;;
+ esac
+done
+
+if [ $(id -u) != 0 ]; then
+ die "Must run as root"
+fi
+
+zpios_survey_base
+zpios_survey_prefetch
+zpios_survey_zerocopy
+zpios_survey_checksum
+zpios_survey_pending
+zpios_survey_kmem
+zpios_survey_all
+
+exit 0
diff --git a/scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh b/scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh
new file mode 100755
index 000000000..cbd9c697a
--- /dev/null
+++ b/scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# Usage: zpios
+# --threadcount -t =values
+# --threadcount_low -l =value
+# --threadcount_high -h =value
+# --threadcount_incr -e =value
+# --regioncount -n =values
+# --regioncount_low -i =value
+# --regioncount_high -j =value
+# --regioncount_incr -k =value
+# --offset -o =values
+# --offset_low -m =value
+# --offset_high -q =value
+# --offset_incr -r =value
+# --chunksize -c =values
+# --chunksize_low -a =value
+# --chunksize_high -b =value
+# --chunksize_incr -g =value
+# --regionsize -s =values
+# --regionsize_low -A =value
+# --regionsize_high -B =value
+# --regionsize_incr -C =value
+# --load -L =dmuio|ssf|fpp
+# --pool -p =pool name
+# --name -M =test name
+# --cleanup -x
+# --prerun -P =pre-command
+# --postrun -R =post-command
+# --log -G =log directory
+# --regionnoise -I =shift
+# --chunknoise -N =bytes
+# --threaddelay -T =jiffies
+# --verify -V
+# --zerocopy -z
+# --nowait -O
+# --human-readable -H
+# --verbose -v =increase verbosity
+# --help -? =this help
+
+ZPIOS_CMD="${ZPIOS} \
+ --load=dmuio \
+ --pool=${ZPOOL_NAME} \
+ --name=${ZPOOL_CONFIG} \
+ --threadcount=16 \
+ --regioncount=8192 \
+ --regionsize=4M \
+ --chunksize=1M \
+ --offset=4M \
+ --cleanup \
+ --human-readable \
+ ${ZPIOS_OPTIONS}"
+
+zpios_start() {
+ if [ ${VERBOSE} ]; then
+ ZPIOS_CMD="${ZPIOS_CMD} --verbose"
+ echo ${ZPIOS_CMD}
+ fi
+
+ ${ZPIOS_CMD} || exit 1
+}
+
+zpios_stop() {
+ [ ${VERBOSE} ] && echo
+}
diff --git a/scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh b/scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh
new file mode 100755
index 000000000..cd3c50b77
--- /dev/null
+++ b/scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+#
+# Usage: zpios
+# --threadcount -t =values
+# --threadcount_low -l =value
+# --threadcount_high -h =value
+# --threadcount_incr -e =value
+# --regioncount -n =values
+# --regioncount_low -i =value
+# --regioncount_high -j =value
+# --regioncount_incr -k =value
+# --offset -o =values
+# --offset_low -m =value
+# --offset_high -q =value
+# --offset_incr -r =value
+# --chunksize -c =values
+# --chunksize_low -a =value
+# --chunksize_high -b =value
+# --chunksize_incr -g =value
+# --regionsize -s =values
+# --regionsize_low -A =value
+# --regionsize_high -B =value
+# --regionsize_incr -C =value
+# --load -L =dmuio|ssf|fpp
+# --pool -p =pool name
+# --name -M =test name
+# --cleanup -x
+# --prerun -P =pre-command
+# --postrun -R =post-command
+# --log -G =log directory
+# --regionnoise -I =shift
+# --chunknoise -N =bytes
+# --threaddelay -T =jiffies
+# --verify -V
+# --zerocopy -z
+# --nowait -O
+# --human-readable -H
+# --verbose -v =increase verbosity
+# --help -? =this help
+
+
+ZPIOS_CMD="${ZPIOS} \
+ --load=dmuio \
+ --pool=${ZPOOL_NAME} \
+ --name=${ZPOOL_CONFIG} \
+ --threadcount=1 \
+ --regioncount=16 \
+ --regionsize=4M \
+ --chunksize=1M \
+ --offset=4M \
+ --cleanup \
+ --human-readable \
+ ${ZPIOS_OPTIONS}"
+
+zpios_start() {
+ if [ ${VERBOSE} ]; then
+ ZPIOS_CMD="${ZPIOS_CMD} --verbose"
+ echo ${ZPIOS_CMD}
+ fi
+
+ ${ZPIOS_CMD} || exit 1
+}
+
+zpios_stop() {
+ [ ${VERBOSE} ] && echo
+}
diff --git a/scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh b/scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh
new file mode 100755
index 000000000..743e97b64
--- /dev/null
+++ b/scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# Usage: zpios
+# --threadcount -t =values
+# --threadcount_low -l =value
+# --threadcount_high -h =value
+# --threadcount_incr -e =value
+# --regioncount -n =values
+# --regioncount_low -i =value
+# --regioncount_high -j =value
+# --regioncount_incr -k =value
+# --offset -o =values
+# --offset_low -m =value
+# --offset_high -q =value
+# --offset_incr -r =value
+# --chunksize -c =values
+# --chunksize_low -a =value
+# --chunksize_high -b =value
+# --chunksize_incr -g =value
+# --regionsize -s =values
+# --regionsize_low -A =value
+# --regionsize_high -B =value
+# --regionsize_incr -C =value
+# --load -L =dmuio|ssf|fpp
+# --pool -p =pool name
+# --name -M =test name
+# --cleanup -x
+# --prerun -P =pre-command
+# --postrun -R =post-command
+# --log -G =log directory
+# --regionnoise -I =shift
+# --chunknoise -N =bytes
+# --threaddelay -T =jiffies
+# --verify -V
+# --zerocopy -z
+# --nowait -O
+# --human-readable -H
+# --verbose -v =increase verbosity
+# --help -? =this help
+
+ZPIOS_CMD="${ZPIOS} \
+ --load=dmuio \
+ --pool=${ZPOOL_NAME} \
+ --name=${ZPOOL_CONFIG} \
+ --threadcount=1,2,4,8,16,32,64,128,256 \
+ --regioncount=65536 \
+ --regionsize=4M \
+ --chunksize=1M \
+ --offset=4M \
+ --cleanup \
+ --human-readable \
+ ${ZPIOS_OPTIONS}"
+
+zpios_start() {
+ if [ ${VERBOSE} ]; then
+ ZPIOS_CMD="${ZPIOS_CMD} --verbose"
+ echo ${ZPIOS_CMD}
+ fi
+
+ ${ZPIOS_CMD} || exit 1
+}
+
+zpios_stop() {
+ [ ${VERBOSE} ] && echo
+}
diff --git a/scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh b/scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh
new file mode 100755
index 000000000..92a3b77b4
--- /dev/null
+++ b/scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# Usage: zpios
+# --threadcount -t =values
+# --threadcount_low -l =value
+# --threadcount_high -h =value
+# --threadcount_incr -e =value
+# --regioncount -n =values
+# --regioncount_low -i =value
+# --regioncount_high -j =value
+# --regioncount_incr -k =value
+# --offset -o =values
+# --offset_low -m =value
+# --offset_high -q =value
+# --offset_incr -r =value
+# --chunksize -c =values
+# --chunksize_low -a =value
+# --chunksize_high -b =value
+# --chunksize_incr -g =value
+# --regionsize -s =values
+# --regionsize_low -A =value
+# --regionsize_high -B =value
+# --regionsize_incr -C =value
+# --load -L =dmuio|ssf|fpp
+# --pool -p =pool name
+# --name -M =test name
+# --cleanup -x
+# --prerun -P =pre-command
+# --postrun -R =post-command
+# --log -G =log directory
+# --regionnoise -I =shift
+# --chunknoise -N =bytes
+# --threaddelay -T =jiffies
+# --verify -V
+# --zerocopy -z
+# --nowait -O
+# --human-readable -H
+# --verbose -v =increase verbosity
+# --help -? =this help
+
+ZPIOS_CMD="${ZPIOS} \
+ --load=dmuio \
+ --pool=${ZPOOL_NAME} \
+ --name=${ZPOOL_CONFIG} \
+ --threadcount=256 \
+ --regioncount=65536 \
+ --regionsize=4M \
+ --chunksize=1M \
+ --offset=4M \
+ --cleanup \
+ --human-readable \
+ ${ZPIOS_OPTIONS}"
+
+zpios_start() {
+ if [ ${VERBOSE} ]; then
+ ZPIOS_CMD="${ZPIOS_CMD} --verbose"
+ echo ${ZPIOS_CMD}
+ fi
+
+ ${ZPIOS_CMD} || exit 1
+}
+
+zpios_stop() {
+ [ ${VERBOSE} ] && echo
+}
diff --git a/scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh b/scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh
new file mode 100755
index 000000000..0db952cd6
--- /dev/null
+++ b/scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# Usage: zpios
+# --threadcount -t =values
+# --threadcount_low -l =value
+# --threadcount_high -h =value
+# --threadcount_incr -e =value
+# --regioncount -n =values
+# --regioncount_low -i =value
+# --regioncount_high -j =value
+# --regioncount_incr -k =value
+# --offset -o =values
+# --offset_low -m =value
+# --offset_high -q =value
+# --offset_incr -r =value
+# --chunksize -c =values
+# --chunksize_low -a =value
+# --chunksize_high -b =value
+# --chunksize_incr -g =value
+# --regionsize -s =values
+# --regionsize_low -A =value
+# --regionsize_high -B =value
+# --regionsize_incr -C =value
+# --load -L =dmuio|ssf|fpp
+# --pool -p =pool name
+# --name -M =test name
+# --cleanup -x
+# --prerun -P =pre-command
+# --postrun -R =post-command
+# --log -G =log directory
+# --regionnoise -I =shift
+# --chunknoise -N =bytes
+# --threaddelay -T =jiffies
+# --verify -V
+# --zerocopy -z
+# --nowait -O
+# --human-readable -H
+# --verbose -v =increase verbosity
+# --help -? =this help
+
+ZPIOS_CMD="${ZPIOS} \
+ --load=dmuio \
+ --pool=${ZPOOL_NAME} \
+ --name=${ZPOOL_CONFIG} \
+ --threadcount=4 \
+ --regioncount=1024 \
+ --regionsize=4M \
+ --chunksize=1M \
+ --offset=4M \
+ --cleanup \
+ --human-readable \
+ ${ZPIOS_OPTIONS}"
+
+zpios_start() {
+ if [ ${VERBOSE} ]; then
+ ZPIOS_CMD="${ZPIOS_CMD} --verbose"
+ echo ${ZPIOS_CMD}
+ fi
+
+ ${ZPIOS_CMD} || exit 1
+}
+
+zpios_stop() {
+ [ ${VERBOSE} ] && echo
+}
diff --git a/scripts/zpios-test/large-thread-survey.sh b/scripts/zpios-test/large-thread-survey.sh
new file mode 120000
index 000000000..90b6e3c47
--- /dev/null
+++ b/scripts/zpios-test/large-thread-survey.sh
@@ -0,0 +1 @@
+1x256th-65536rc-4rs-1cs-4off.sh \ No newline at end of file
diff --git a/scripts/zpios-test/large.sh b/scripts/zpios-test/large.sh
new file mode 120000
index 000000000..b8e22bf54
--- /dev/null
+++ b/scripts/zpios-test/large.sh
@@ -0,0 +1 @@
+256th-65536rc-4rs-1cs-4off.sh \ No newline at end of file
diff --git a/scripts/zpios-test/medium.sh b/scripts/zpios-test/medium.sh
new file mode 120000
index 000000000..d81027b73
--- /dev/null
+++ b/scripts/zpios-test/medium.sh
@@ -0,0 +1 @@
+16th-8192rc-4rs-1cs-4off.sh \ No newline at end of file
diff --git a/scripts/zpios-test/small.sh b/scripts/zpios-test/small.sh
new file mode 120000
index 000000000..cbf03b5ce
--- /dev/null
+++ b/scripts/zpios-test/small.sh
@@ -0,0 +1 @@
+4th-1024rc-4rs-1cs-4off.sh \ No newline at end of file
diff --git a/scripts/zpios-test/tiny.sh b/scripts/zpios-test/tiny.sh
new file mode 120000
index 000000000..ba8b7cd0c
--- /dev/null
+++ b/scripts/zpios-test/tiny.sh
@@ -0,0 +1 @@
+1th-16rc-4rs-1cs-4off.sh \ No newline at end of file
diff --git a/scripts/zpios.sh b/scripts/zpios.sh
new file mode 100755
index 000000000..6e9b3f50d
--- /dev/null
+++ b/scripts/zpios.sh
@@ -0,0 +1,266 @@
+#!/bin/bash
+#
+# Wrapper script for easily running zpios based tests
+#
+
+SCRIPT_COMMON=common.sh
+if [ -f ./${SCRIPT_COMMON} ]; then
+. ./${SCRIPT_COMMON}
+elif [ -f /usr/libexec/zfs/${SCRIPT_COMMON} ]; then
+. /usr/libexec/zfs/${SCRIPT_COMMON}
+else
+echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
+fi
+
+PROG=zpios.sh
+DATE=`date +%Y%m%d-%H%M%S`
+if [ "${ZPIOS_MODULES}" ]; then
+ MODULES=(${ZPIOS_MODULES[*]})
+else
+ MODULES=(zpios)
+fi
+
+usage() {
+cat << EOF
+USAGE:
+$0 [hvp] <-c config> <-t test>
+
+DESCRIPTION:
+ Helper script for easy zpios benchmarking.
+
+OPTIONS:
+ -h Show this message
+ -v Verbose
+ -p Enable profiling
+ -c Zpool configuration
+ -t Zpios test
+ -o Additional zpios options
+ -l Additional zpool options
+ -s Additional zfs options
+
+EOF
+}
+
+print_header() {
+ echo --------------------- ZPIOS RESULTS ----------------------------
+ echo -n "Date: "; date
+ echo -n "Kernel: "; uname -r
+ dmesg | grep "Loaded Solaris Porting Layer" | tail -n1
+ dmesg | grep "Loaded ZFS Filesystem" | tail -n1
+ echo
+}
+
+print_spl_info() {
+ echo --------------------- SPL Tunings ------------------------------
+ ${SYSCTL} -A | grep spl
+
+ if [ -d /sys/module/spl/parameters ]; then
+ grep [0-9] /sys/module/spl/parameters/*
+ else
+ grep [0-9] /sys/module/spl/*
+ fi
+
+ echo
+}
+
+print_zfs_info() {
+ echo --------------------- ZFS Tunings ------------------------------
+ ${SYSCTL} -A | grep zfs
+
+ if [ -d /sys/module/zfs/parameters ]; then
+ grep [0-9] /sys/module/zfs/parameters/*
+ else
+ grep [0-9] /sys/module/zfs/*
+ fi
+
+ echo
+}
+
+print_stats() {
+ echo ---------------------- Statistics -------------------------------
+ ${SYSCTL} -A | grep spl | grep stack_max
+
+ if [ -d /proc/spl/kstat/ ]; then
+ if [ -f /proc/spl/kstat/zfs/arcstats ]; then
+ echo "* ARC"
+ cat /proc/spl/kstat/zfs/arcstats
+ echo
+ fi
+
+ if [ -f /proc/spl/kstat/zfs/vdev_cache_stats ]; then
+ echo "* VDEV Cache"
+ cat /proc/spl/kstat/zfs/vdev_cache_stats
+ echo
+ fi
+ fi
+
+ if [ -f /proc/spl/kmem/slab ]; then
+ echo "* SPL SLAB"
+ cat /proc/spl/kmem/slab
+ echo
+ fi
+
+ echo
+}
+
+check_test() {
+
+ if [ ! -f ${ZPIOS_TEST} ]; then
+ local NAME=`basename ${ZPIOS_TEST} .sh`
+ ERROR="Unknown test '${NAME}', available tests are:\n"
+
+ for TST in `ls ${ZPIOSDIR}/ | grep ".sh"`; do
+ local NAME=`basename ${TST} .sh`
+ ERROR="${ERROR}${NAME}\n"
+ done
+
+ return 1
+ fi
+
+ return 0
+}
+
+zpios_profile_config() {
+cat > ${PROFILE_DIR}/zpios-config.sh << EOF
+#
+# Zpios Profiling Configuration
+#
+
+PROFILE_DIR=/tmp/zpios/${ZPOOL_CONFIG}+${ZPIOS_TEST_ARG}+${DATE}
+PROFILE_PRE=${ZPIOSPROFILEDIR}/zpios-profile-pre.sh
+PROFILE_POST=${ZPIOSPROFILEDIR}/zpios-profile-post.sh
+PROFILE_USER=${ZPIOSPROFILEDIR}/zpios-profile.sh
+PROFILE_PIDS=${ZPIOSPROFILEDIR}/zpios-profile-pids.sh
+PROFILE_DISK=${ZPIOSPROFILEDIR}/zpios-profile-disk.sh
+PROFILE_ARC_PROC=/proc/spl/kstat/zfs/arcstats
+PROFILE_VDEV_CACHE_PROC=/proc/spl/kstat/zfs/vdev_cache_stats
+
+OPROFILE_KERNEL="/boot/vmlinux-`uname -r`"
+OPROFILE_KERNEL_DIR="/lib/modules/`uname -r`/kernel/"
+OPROFILE_SPL_DIR=${SPLBUILD}/module/
+OPROFILE_ZFS_DIR=${MODDIR}
+
+EOF
+}
+
+zpios_profile_start() {
+ PROFILE_DIR=/tmp/zpios/${ZPOOL_CONFIG}+${ZPIOS_TEST_ARG}+${DATE}
+
+ mkdir -p ${PROFILE_DIR}
+ zpios_profile_config
+ . ${PROFILE_DIR}/zpios-config.sh
+
+ ZPIOS_OPTIONS="${ZPIOS_OPTIONS} --log=${PROFILE_DIR}"
+ ZPIOS_OPTIONS="${ZPIOS_OPTIONS} --prerun=${PROFILE_PRE}"
+ ZPIOS_OPTIONS="${ZPIOS_OPTIONS} --postrun=${PROFILE_POST}"
+
+ /usr/bin/opcontrol --init
+ /usr/bin/opcontrol --setup --vmlinux=${OPROFILE_KERNEL}
+}
+
+zpios_profile_stop() {
+ /usr/bin/opcontrol --shutdown
+ /usr/bin/opcontrol --deinit
+}
+
+PROFILE=
+ZPOOL_CONFIG=zpool-config.sh
+ZPIOS_TEST=zpios-test.sh
+ZPOOL_NAME=zpios
+ZPIOS_OPTIONS=
+ZPOOL_OPTIONS=""
+ZFS_OPTIONS=""
+
+while getopts 'hvpc:t:o:l:s:' OPTION; do
+ case $OPTION in
+ h)
+ usage
+ exit 1
+ ;;
+ v)
+ VERBOSE=1
+ VERBOSE_FLAG="-v"
+ ;;
+ p)
+ PROFILE=1
+ ;;
+ c)
+ ZPOOL_CONFIG=${OPTARG}
+ ;;
+ t)
+ ZPIOS_TEST_ARG=${OPTARG}
+ ZPIOS_TEST=${ZPIOSDIR}/${OPTARG}.sh
+ ;;
+ o)
+ ZPIOS_OPTIONS=${OPTARG}
+ ;;
+ l) # Passed through to zpool-create.sh
+ ZPOOL_OPTIONS=${OPTARG}
+ ;;
+ s) # Passed through to zpool-create.sh
+ ZFS_OPTIONS=${OPTARG}
+ ;;
+ ?)
+ usage
+ exit
+ ;;
+ esac
+done
+
+if [ $(id -u) != 0 ]; then
+ die "Must run as root"
+fi
+
+# Validate and source your test config
+check_test || die "${ERROR}"
+. ${ZPIOS_TEST}
+
+# Pull in the zpios test module is not loaded. If this fails it is
+# likely because the full module stack was not yet loaded with zfs.sh
+if check_modules; then
+ if ! load_modules; then
+ die "Run 'zfs.sh' to ensure the full module stack is loaded"
+ fi
+fi
+
+# Wait for device creation
+while [ ! -c /dev/zpios ]; do
+ sleep 1
+done
+
+if [ ${VERBOSE} ]; then
+ print_header
+ print_spl_info
+ print_zfs_info
+fi
+
+# Create the zpool configuration
+${ZPOOL_CREATE_SH} ${VERBOSE_FLAG} -p ${ZPOOL_NAME} -c ${ZPOOL_CONFIG} \
+ -l "${ZPOOL_OPTIONS}" -s "${ZFS_OPTIONS}" || exit 1
+
+if [ ${PROFILE} ]; then
+ zpios_profile_start
+fi
+
+zpios_start
+zpios_stop
+
+if [ ${PROFILE} ]; then
+ zpios_profile_stop
+fi
+
+if [ ${VERBOSE} ]; then
+ print_stats
+fi
+
+# Destroy the zpool configuration
+${ZPOOL_CREATE_SH} ${VERBOSE_FLAG} -p ${ZPOOL_NAME} \
+ -c ${ZPOOL_CONFIG} -d || exit 1
+
+# Unload the test module stack and wait for device removal
+unload_modules
+while [ -c /dev/zpios ]; do
+ sleep 1
+done
+
+exit 0