diff options
198 files changed, 16876 insertions, 491 deletions
@@ -1 +1 @@ -zfs-branch +linux-debug-zerocopy @@ -1,6 +1,6 @@ From: Brian Behlendorf <[email protected]> -Subject: [PATCH] linux docs +Subject: [PATCH] linux arc -Documentation branch used simply to track historical results. +Linux VM arc integration. Signed-off-by: Brian Behlendorf <[email protected]> @@ -333,7 +333,7 @@ - Minor build system improvements - Minor script improvements - Create a full copy and not a link tree with quilt - - KPIOS_MAJOR changed from 231 to 232 + - ZPIOS_MAJOR changed from 231 to 232 - BIO_RW_BARRIER flag removed from IO request 2008-06-30 Brian Behlendorf <[email protected]> diff --git a/cmd/Makefile.am b/cmd/Makefile.am index 86ec885bc..42e6d9c3c 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -1 +1 @@ -SUBDIRS = zfs zpool zdb zinject ztest +SUBDIRS = zfs zpool zdb zinject ztest zpios diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index ce8434958..2cab6da0f 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -2395,7 +2395,7 @@ main(int argc, char **argv) kernel_init(FREAD); g_zfs = libzfs_init(); - ASSERT(g_zfs != NULL); + VERIFY(g_zfs != NULL); for (c = 0; c < 256; c++) { if (dump_all && c != 'l' && c != 'R') diff --git a/cmd/zpios/Makefile.am b/cmd/zpios/Makefile.am new file mode 100644 index 000000000..4e13a76c9 --- /dev/null +++ b/cmd/zpios/Makefile.am @@ -0,0 +1,12 @@ +include $(top_srcdir)/config/Rules.am + +DEFAULT_INCLUDES += \ + -I${top_srcdir}/module/zpios/include + +sbin_PROGRAMS = zpios + +zpios_SOURCES = \ + $(top_srcdir)/cmd/zpios/zpios_main.c \ + $(top_srcdir)/cmd/zpios/zpios_util.c \ + $(top_srcdir)/cmd/zpios/zpios.h + diff --git a/cmd/zpios/zpios.h b/cmd/zpios/zpios.h new file mode 100644 index 000000000..d88af0b04 --- /dev/null +++ b/cmd/zpios/zpios.h @@ -0,0 +1,120 @@ +/* + * This file is part of the ZFS Linux port. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * LLNL-CODE-403049 + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#ifndef _ZPIOS_H +#define _ZPIOS_H + +#include <zpios-ctl.h> + +#define VERSION_SIZE 64 + +/* Regular expressions */ +#define REGEX_NUMBERS "^[0-9]*[0-9]$" +#define REGEX_NUMBERS_COMMA "^([0-9]+,)*[0-9]+$" +#define REGEX_SIZE "^[0-9][0-9]*[kmgt]$" +#define REGEX_SIZE_COMMA "^([0-9][0-9]*[kmgt]+,)*[0-9][0-9]*[kmgt]$" + +/* Flags for low, high, incr */ +#define FLAG_SET 0x01 +#define FLAG_LOW 0x02 +#define FLAG_HIGH 0x04 +#define FLAG_INCR 0x08 + +#define TRUE 1 +#define FALSE 0 + +#define KB (1024) +#define MB (KB * 1024) +#define GB (MB * 1024) +#define TB (GB * 1024) + +#define KMGT_SIZE 16 + +/* All offsets, sizes and counts can be passed to the application in + * multiple ways. + * 1. a value (stored in val[0], val_count will be 1) + * 2. a comma separated list of values (stored in val[], using val_count) + * 3. a range and block sizes, low, high, factor (val_count must be 0) + */ +typedef struct pios_range_repeat { + uint64_t val[32]; /* Comma sep array, or low, high, inc */ + uint64_t val_count; /* Num of values */ + uint64_t val_low; + uint64_t val_high; + uint64_t val_inc_perc; + uint64_t next_val; /* Used for multiple runs in get_next() */ +} range_repeat_t; + +typedef struct cmd_args { + range_repeat_t T; /* Thread count */ + range_repeat_t N; /* Region count */ + range_repeat_t O; /* Offset count */ + range_repeat_t C; /* Chunksize */ + range_repeat_t S; /* Regionsize */ + + const char *pool; /* Pool */ + const char *name; /* Name */ + uint32_t flags; /* Flags */ + uint32_t io_type; /* DMUIO only */ + uint32_t verbose; /* Verbose */ + uint32_t human_readable; /* Human readable output */ + + uint64_t regionnoise; /* Region noise */ + uint64_t chunknoise; /* Chunk noise */ + uint64_t thread_delay; /* Thread delay */ + + char pre[ZPIOS_PATH_SIZE]; /* Pre-exec hook */ + char post[ZPIOS_PATH_SIZE]; /* Post-exec hook */ + char log[ZPIOS_PATH_SIZE]; /* Requested log dir */ + + /* Control */ + int current_id; + uint64_t current_T; + uint64_t current_N; + uint64_t current_C; + uint64_t current_S; + uint64_t current_O; + + uint32_t rc; +} cmd_args_t; + +int set_count(char *pattern1, char *pattern2, range_repeat_t *range, + char *optarg, uint32_t *flags, char *arg); +int set_lhi(char *pattern, range_repeat_t *range, char *optarg, + int flag, uint32_t *flag_thread, char *arg); +int set_noise(uint64_t *noise, char *optarg, char *arg); +int set_load_params(cmd_args_t *args, char *optarg); +int check_mutual_exclusive_command_lines(uint32_t flag, char *arg); +void print_stats_header(cmd_args_t *args); +void print_stats(cmd_args_t *args, zpios_cmd_t *cmd); + +#endif /* _ZPIOS_H */ diff --git a/cmd/zpios/zpios_main.c b/cmd/zpios/zpios_main.c new file mode 100644 index 000000000..f774682ac --- /dev/null +++ b/cmd/zpios/zpios_main.c @@ -0,0 +1,631 @@ +/* + * This file is part of the ZFS Linux port. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * LLNL-CODE-403049 + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Kernel PIOS DMU implemenation originally derived from PIOS test code. + * Character control interface derived from SPL code. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <getopt.h> +#include <assert.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include "zpios.h" + +static const char short_opt[] = "t:l:h:e:n:i:j:k:o:m:q:r:c:a:b:g:s:A:B:C:" + "L:p:M:xP:R:G:I:N:T:VzOfHv?"; +static const struct option long_opt[] = { + {"threadcount", required_argument, 0, 't' }, + {"threadcount_low", required_argument, 0, 'l' }, + {"threadcount_high", required_argument, 0, 'h' }, + {"threadcount_incr", required_argument, 0, 'e' }, + {"regioncount", required_argument, 0, 'n' }, + {"regioncount_low", required_argument, 0, 'i' }, + {"regioncount_high", required_argument, 0, 'j' }, + {"regioncount_incr", required_argument, 0, 'k' }, + {"offset", required_argument, 0, 'o' }, + {"offset_low", required_argument, 0, 'm' }, + {"offset_high", required_argument, 0, 'q' }, + {"offset_incr", required_argument, 0, 'r' }, + {"chunksize", required_argument, 0, 'c' }, + {"chunksize_low", required_argument, 0, 'a' }, + {"chunksize_high", required_argument, 0, 'b' }, + {"chunksize_incr", required_argument, 0, 'g' }, + {"regionsize", required_argument, 0, 's' }, + {"regionsize_low", required_argument, 0, 'A' }, + {"regionsize_high", required_argument, 0, 'B' }, + {"regionsize_incr", required_argument, 0, 'C' }, + {"load", required_argument, 0, 'L' }, + {"pool", required_argument, 0, 'p' }, + {"name", required_argument, 0, 'M' }, + {"cleanup", no_argument, 0, 'x' }, + {"prerun", required_argument, 0, 'P' }, + {"postrun", required_argument, 0, 'R' }, + {"log", required_argument, 0, 'G' }, + {"regionnoise", required_argument, 0, 'I' }, + {"chunknoise", required_argument, 0, 'N' }, + {"threaddelay", required_argument, 0, 'T' }, + {"verify", no_argument, 0, 'V' }, + {"zerocopy", no_argument, 0, 'z' }, + {"nowait", no_argument, 0, 'O' }, + {"noprefetch", no_argument, 0, 'f' }, + {"human-readable", no_argument, 0, 'H' }, + {"verbose", no_argument, 0, 'v' }, + {"help", no_argument, 0, '?' }, + { 0, 0, 0, 0 }, +}; + +static int zpiosctl_fd; /* Control file descriptor */ +static char zpios_version[VERSION_SIZE]; /* Kernel version string */ +static char *zpios_buffer = NULL; /* Scratch space area */ +static int zpios_buffer_size = 0; /* Scratch space size */ + +static int +usage(void) +{ + fprintf(stderr, "Usage: zpios\n"); + fprintf(stderr, + " --threadcount -t =values\n" + " --threadcount_low -l =value\n" + " --threadcount_high -h =value\n" + " --threadcount_incr -e =value\n" + " --regioncount -n =values\n" + " --regioncount_low -i =value\n" + " --regioncount_high -j =value\n" + " --regioncount_incr -k =value\n" + " --offset -o =values\n" + " --offset_low -m =value\n" + " --offset_high -q =value\n" + " --offset_incr -r =value\n" + " --chunksize -c =values\n" + " --chunksize_low -a =value\n" + " --chunksize_high -b =value\n" + " --chunksize_incr -g =value\n" + " --regionsize -s =values\n" + " --regionsize_low -A =value\n" + " --regionsize_high -B =value\n" + " --regionsize_incr -C =value\n" + " --load -L =dmuio|ssf|fpp\n" + " --pool -p =pool name\n" + " --name -M =test name\n" + " --cleanup -x\n" + " --prerun -P =pre-command\n" + " --postrun -R =post-command\n" + " --log -G =log directory\n" + " --regionnoise -I =shift\n" + " --chunknoise -N =bytes\n" + " --threaddelay -T =jiffies\n" + " --verify -V\n" + " --zerocopy -z\n" + " --nowait -O\n" + " --noprefetch -f\n" + " --human-readable -H\n" + " --verbose -v =increase verbosity\n" + " --help -? =this help\n\n"); + + return 0; +} + +static void args_fini(cmd_args_t *args) +{ + assert(args != NULL); + free(args); +} + +static cmd_args_t * +args_init(int argc, char **argv) +{ + cmd_args_t *args; + uint32_t fl_th = 0; + uint32_t fl_rc = 0; + uint32_t fl_of = 0; + uint32_t fl_rs = 0; + uint32_t fl_cs = 0; + int c, rc; + + if (argc == 1) { + usage(); + return (cmd_args_t *)NULL; + } + + /* Configure and populate the args structures */ + args = malloc(sizeof(*args)); + if (args == NULL) + return NULL; + + memset(args, 0, sizeof(*args)); + + while ((c=getopt_long(argc, argv, short_opt, long_opt, NULL)) != -1) { + rc = 0; + + switch (c) { + case 't': /* --thread count */ + rc = set_count(REGEX_NUMBERS, REGEX_NUMBERS_COMMA, + &args->T, optarg, &fl_th, "threadcount"); + break; + case 'l': /* --threadcount_low */ + rc = set_lhi(REGEX_NUMBERS, &args->T, optarg, + FLAG_LOW, &fl_th, "threadcount_low"); + break; + case 'h': /* --threadcount_high */ + rc = set_lhi(REGEX_NUMBERS, &args->T, optarg, + FLAG_HIGH, &fl_th, "threadcount_high"); + break; + case 'e': /* --threadcount_inc */ + rc = set_lhi(REGEX_NUMBERS, &args->T, optarg, + FLAG_INCR, &fl_th, "threadcount_incr"); + break; + case 'n': /* --regioncount */ + rc = set_count(REGEX_NUMBERS, REGEX_NUMBERS_COMMA, + &args->N, optarg, &fl_rc, "regioncount"); + break; + case 'i': /* --regioncount_low */ + rc = set_lhi(REGEX_NUMBERS, &args->N, optarg, + FLAG_LOW, &fl_rc, "regioncount_low"); + break; + case 'j': /* --regioncount_high */ + rc = set_lhi(REGEX_NUMBERS, &args->N, optarg, + FLAG_HIGH, &fl_rc, "regioncount_high"); + break; + case 'k': /* --regioncount_inc */ + rc = set_lhi(REGEX_NUMBERS, &args->N, optarg, + FLAG_INCR, &fl_rc, "regioncount_incr"); + break; + case 'o': /* --offset */ + rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA, + &args->O, optarg, &fl_of, "offset"); + break; + case 'm': /* --offset_low */ + rc = set_lhi(REGEX_SIZE, &args->O, optarg, + FLAG_LOW, &fl_of, "offset_low"); + break; + case 'q': /* --offset_high */ + rc = set_lhi(REGEX_SIZE, &args->O, optarg, + FLAG_HIGH, &fl_of, "offset_high"); + break; + case 'r': /* --offset_inc */ + rc = set_lhi(REGEX_NUMBERS, &args->O, optarg, + FLAG_INCR, &fl_of, "offset_incr"); + break; + case 'c': /* --chunksize */ + rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA, + &args->C, optarg, &fl_cs, "chunksize"); + break; + case 'a': /* --chunksize_low */ + rc = set_lhi(REGEX_SIZE, &args->C, optarg, + FLAG_LOW, &fl_cs, "chunksize_low"); + break; + case 'b': /* --chunksize_high */ + rc = set_lhi(REGEX_SIZE, &args->C, optarg, + FLAG_HIGH, &fl_cs, "chunksize_high"); + break; + case 'g': /* --chunksize_inc */ + rc = set_lhi(REGEX_NUMBERS, &args->C, optarg, + FLAG_INCR, &fl_cs, "chunksize_incr"); + break; + case 's': /* --regionsize */ + rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA, + &args->S, optarg, &fl_rs, "regionsize"); + break; + case 'A': /* --regionsize_low */ + rc = set_lhi(REGEX_SIZE, &args->S, optarg, + FLAG_LOW, &fl_rs, "regionsize_low"); + break; + case 'B': /* --regionsize_high */ + rc = set_lhi(REGEX_SIZE, &args->S, optarg, + FLAG_HIGH, &fl_rs, "regionsize_high"); + break; + case 'C': /* --regionsize_inc */ + rc = set_lhi(REGEX_NUMBERS, &args->S, optarg, + FLAG_INCR, &fl_rs, "regionsize_incr"); + break; + case 'L': /* --load */ + rc = set_load_params(args, optarg); + break; + case 'p': /* --pool */ + args->pool = optarg; + break; + case 'M': + args->name = optarg; + break; + case 'x': /* --cleanup */ + args->flags |= DMU_REMOVE; + break; + case 'P': /* --prerun */ + strncpy(args->pre, optarg, ZPIOS_PATH_SIZE - 1); + break; + case 'R': /* --postrun */ + strncpy(args->post, optarg, ZPIOS_PATH_SIZE - 1); + break; + case 'G': /* --log */ + strncpy(args->log, optarg, ZPIOS_PATH_SIZE - 1); + break; + case 'I': /* --regionnoise */ + rc = set_noise(&args->regionnoise, optarg, "regionnoise"); + break; + case 'N': /* --chunknoise */ + rc = set_noise(&args->chunknoise, optarg, "chunknoise"); + break; + case 'T': /* --threaddelay */ + rc = set_noise(&args->thread_delay, optarg, "threaddelay"); + break; + case 'V': /* --verify */ + args->flags |= DMU_VERIFY; + break; + case 'z': /* --zerocopy */ + args->flags |= (DMU_WRITE_ZC | DMU_READ_ZC); + break; + case 'O': /* --nowait */ + args->flags |= DMU_WRITE_NOWAIT; + break; + case 'f': /* --noprefetch */ + args->flags |= DMU_READ_NOPF; + break; + case 'H': /* --human-readable */ + args->human_readable = 1; + break; + case 'v': /* --verbose */ + args->verbose++; + break; + case '?': + rc = 1; + break; + default: + fprintf(stderr,"Unknown option '%s'\n",argv[optind-1]); + rc = EINVAL; + break; + } + + if (rc) { + usage(); + args_fini(args); + return NULL; + } + } + + check_mutual_exclusive_command_lines(fl_th, "threadcount"); + check_mutual_exclusive_command_lines(fl_rc, "regioncount"); + check_mutual_exclusive_command_lines(fl_of, "offset"); + check_mutual_exclusive_command_lines(fl_rs, "regionsize"); + check_mutual_exclusive_command_lines(fl_cs, "chunksize"); + + if (args->pool == NULL) { + fprintf(stderr, "Error: Pool not specificed\n"); + usage(); + args_fini(args); + return NULL; + } + + if ((args->flags & (DMU_WRITE_ZC | DMU_READ_ZC)) && + (args->flags & DMU_VERIFY)) { + fprintf(stderr, "Error, --zerocopy incompatible --verify, " + "used for performance analysis only\n"); + usage(); + args_fini(args); + return NULL; + } + + return args; +} + +static int +dev_clear(void) +{ + zpios_cfg_t cfg; + int rc; + + memset(&cfg, 0, sizeof(cfg)); + cfg.cfg_magic = ZPIOS_CFG_MAGIC; + cfg.cfg_cmd = ZPIOS_CFG_BUFFER_CLEAR; + cfg.cfg_arg1 = 0; + + rc = ioctl(zpiosctl_fd, ZPIOS_CFG, &cfg); + if (rc) + fprintf(stderr, "Ioctl() error %lu / %d: %d\n", + (unsigned long) ZPIOS_CFG, cfg.cfg_cmd, errno); + + lseek(zpiosctl_fd, 0, SEEK_SET); + + return rc; +} + +/* Passing a size of zero simply results in querying the current size */ +static int +dev_size(int size) +{ + zpios_cfg_t cfg; + int rc; + + memset(&cfg, 0, sizeof(cfg)); + cfg.cfg_magic = ZPIOS_CFG_MAGIC; + cfg.cfg_cmd = ZPIOS_CFG_BUFFER_SIZE; + cfg.cfg_arg1 = size; + + rc = ioctl(zpiosctl_fd, ZPIOS_CFG, &cfg); + if (rc) { + fprintf(stderr, "Ioctl() error %lu / %d: %d\n", + (unsigned long) ZPIOS_CFG, cfg.cfg_cmd, errno); + return rc; + } + + return cfg.cfg_rc1; +} + +static void +dev_fini(void) +{ + if (zpios_buffer) + free(zpios_buffer); + + if (zpiosctl_fd != -1) { + if (close(zpiosctl_fd) == -1) { + fprintf(stderr, "Unable to close %s: %d\n", + ZPIOS_DEV, errno); + } + } +} + +static int +dev_init(void) +{ + int rc; + + zpiosctl_fd = open(ZPIOS_DEV, O_RDONLY); + if (zpiosctl_fd == -1) { + fprintf(stderr, "Unable to open %s: %d\n" + "Is the zpios module loaded?\n", ZPIOS_DEV, errno); + rc = errno; + goto error; + } + + if ((rc = dev_clear())) + goto error; + + if ((rc = dev_size(0)) < 0) + goto error; + + zpios_buffer_size = rc; + zpios_buffer = (char *)malloc(zpios_buffer_size); + if (zpios_buffer == NULL) { + rc = ENOMEM; + goto error; + } + + memset(zpios_buffer, 0, zpios_buffer_size); + return 0; +error: + if (zpiosctl_fd != -1) { + if (close(zpiosctl_fd) == -1) { + fprintf(stderr, "Unable to close %s: %d\n", + ZPIOS_DEV, errno); + } + } + + return rc; +} + +static int +get_next(uint64_t *val, range_repeat_t *range) +{ + /* if low, incr, high is given */ + if (range->val_count == 0) { + *val = (range->val_low) + + (range->val_low * range->next_val / 100); + + if (*val > range->val_high) + return 0; /* No more values, limit exceeded */ + + if (!range->next_val) + range->next_val = range->val_inc_perc; + else + range->next_val = range->next_val+range->val_inc_perc; + + return 1; /* more values to come */ + + /* if only one val is given */ + } else if (range->val_count == 1) { + if (range->next_val) + return 0; /* No more values, we only have one */ + + *val = range->val[0]; + range->next_val = 1; + return 1; /* more values to come */ + + /* if comma separated values are given */ + } else if (range->val_count > 1) { + if (range->next_val > range->val_count - 1) + return 0; /* No more values, limit exceeded */ + + *val = range->val[range->next_val]; + range->next_val++; + return 1; /* more values to come */ + } + + return 0; +} + +static int +run_one(cmd_args_t *args, uint32_t id, uint32_t T, uint32_t N, + uint64_t C, uint64_t S, uint64_t O) +{ + zpios_cmd_t *cmd; + int rc, rc2, cmd_size; + + dev_clear(); + + cmd_size = sizeof(zpios_cmd_t) + ((T + N + 1) * sizeof(zpios_stats_t)); + cmd = (zpios_cmd_t *)malloc(cmd_size); + if (cmd == NULL) + return ENOMEM; + + memset(cmd, 0, cmd_size); + cmd->cmd_magic = ZPIOS_CMD_MAGIC; + strncpy(cmd->cmd_pool, args->pool, ZPIOS_NAME_SIZE - 1); + strncpy(cmd->cmd_pre, args->pre, ZPIOS_PATH_SIZE - 1); + strncpy(cmd->cmd_post, args->post, ZPIOS_PATH_SIZE - 1); + strncpy(cmd->cmd_log, args->log, ZPIOS_PATH_SIZE - 1); + cmd->cmd_id = id; + cmd->cmd_chunk_size = C; + cmd->cmd_thread_count = T; + cmd->cmd_region_count = N; + cmd->cmd_region_size = S; + cmd->cmd_offset = O; + cmd->cmd_region_noise = args->regionnoise; + cmd->cmd_chunk_noise = args->chunknoise; + cmd->cmd_thread_delay = args->thread_delay; + cmd->cmd_flags = args->flags; + cmd->cmd_data_size = (T + N + 1) * sizeof(zpios_stats_t); + + rc = ioctl(zpiosctl_fd, ZPIOS_CMD, cmd); + if (rc) + args->rc = errno; + + print_stats(args, cmd); + + if (args->verbose) { + rc2 = read(zpiosctl_fd, zpios_buffer, zpios_buffer_size - 1); + if (rc2 < 0) { + fprintf(stdout, "Error reading results: %d\n", rc2); + } else if ((rc2 > 0) && (strlen(zpios_buffer) > 0)) { + fprintf(stdout, "\n%s\n", zpios_buffer); + fflush(stdout); + } + } + + free(cmd); + + return rc; +} + +static int +run_offsets(cmd_args_t *args) +{ + int rc = 0; + + while (rc == 0 && get_next(&args->current_O, &args->O)) { + rc = run_one(args, args->current_id, + args->current_T, args->current_N, args->current_C, + args->current_S, args->current_O); + args->current_id++; + } + + args->O.next_val = 0; + return rc; +} + +static int +run_region_counts(cmd_args_t *args) +{ + int rc = 0; + + while (rc == 0 && get_next((uint64_t *)&args->current_N, &args->N)) + rc = run_offsets(args); + + args->N.next_val = 0; + return rc; +} + +static int +run_region_sizes(cmd_args_t *args) +{ + int rc = 0; + + while (rc == 0 && get_next(&args->current_S, &args->S)) { + if (args->current_S < args->current_C) { + fprintf(stderr, "Error: in any run chunksize can " + "not be smaller than regionsize.\n"); + return EINVAL; + } + + rc = run_region_counts(args); + } + + args->S.next_val = 0; + return rc; +} + +static int +run_chunk_sizes(cmd_args_t *args) +{ + int rc = 0; + + while (rc == 0 && get_next(&args->current_C, &args->C)) { + rc = run_region_sizes(args); + } + + args->C.next_val = 0; + return rc; +} + +static int +run_thread_counts(cmd_args_t *args) +{ + int rc = 0; + + while (rc == 0 && get_next((uint64_t *)&args->current_T, &args->T)) + rc = run_chunk_sizes(args); + + return rc; +} + +int +main(int argc, char **argv) +{ + cmd_args_t *args; + int rc = 0; + + /* Argument init and parsing */ + if ((args = args_init(argc, argv)) == NULL) { + rc = -1; + goto out; + } + + /* Device specific init */ + if ((rc = dev_init())) + goto out; + + /* Generic kernel version string */ + if (args->verbose) + fprintf(stdout, "%s", zpios_version); + + print_stats_header(args); + rc = run_thread_counts(args); +out: + if (args != NULL) + args_fini(args); + + dev_fini(); + return rc; +} diff --git a/cmd/zpios/zpios_util.c b/cmd/zpios/zpios_util.c new file mode 100644 index 000000000..e42d06f11 --- /dev/null +++ b/cmd/zpios/zpios_util.c @@ -0,0 +1,456 @@ +/* + * This file is part of the ZFS Linux port. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * LLNL-CODE-403049 + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Kernel PIOS DMU implemenation originally derived from PIOS test code. + * Character control interface derived from SPL code. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <regex.h> +#include "zpios.h" + +/* extracts an unsigned int (64) and K,M,G,T from the string */ +/* and returns a 64 bit value converted to the proper units */ +static int +kmgt_to_uint64(const char *str, uint64_t *val) +{ + char *endptr; + int rc = 0; + + *val = strtoll(str, &endptr, 0); + if ((str == endptr) && (*val == 0)) + return EINVAL; + + switch (endptr[0]) { + case 'k': case 'K': + *val = (*val) << 10; + break; + case 'm': case 'M': + *val = (*val) << 20; + break; + case 'g': case 'G': + *val = (*val) << 30; + break; + case 't': case 'T': + *val = (*val) << 40; + break; + case '\0': + break; + default: + rc = EINVAL; + } + + return rc; +} + +static char * +uint64_to_kmgt(char *str, uint64_t val) +{ + char postfix[] = "kmgt"; + int i = -1; + + while ((val >= KB) && (i < 4)) { + val = (val >> 10); + i++; + } + + if (i >= 4) + (void)snprintf(str, KMGT_SIZE-1, "inf"); + else + (void)snprintf(str, KMGT_SIZE-1, "%lu%c", (unsigned long)val, + (i == -1) ? '\0' : postfix[i]); + + return str; +} + +static char * +kmgt_per_sec(char *str, uint64_t v, double t) +{ + char postfix[] = "kmgt"; + double val = ((double)v) / t; + int i = -1; + + while ((val >= (double)KB) && (i < 4)) { + val /= (double)KB; + i++; + } + + if (i >= 4) + (void)snprintf(str, KMGT_SIZE-1, "inf"); + else + (void)snprintf(str, KMGT_SIZE-1, "%.2f%c", val, + (i == -1) ? '\0' : postfix[i]); + + return str; +} + +static char * +print_flags(char *str, uint32_t flags) +{ + str[0] = (flags & DMU_WRITE) ? 'w' : '-'; + str[1] = (flags & DMU_READ) ? 'r' : '-'; + str[2] = (flags & DMU_VERIFY) ? 'v' : '-'; + str[3] = (flags & DMU_REMOVE) ? 'c' : '-'; + str[4] = (flags & DMU_FPP) ? 'p' : 's'; + str[5] = (flags & (DMU_WRITE_ZC | DMU_READ_ZC)) ? 'z' : '-'; + str[6] = (flags & DMU_WRITE_NOWAIT) ? 'O' : '-'; + str[7] = '\0'; + + return str; +} + +static int +regex_match(const char *string, char *pattern) +{ + regex_t re = { 0 }; + int rc; + + rc = regcomp(&re, pattern, REG_EXTENDED | REG_NOSUB | REG_ICASE); + if (rc) { + fprintf(stderr, "Error: Couldn't do regcomp, %d\n", rc); + return rc; + } + + rc = regexec(&re, string, (size_t) 0, NULL, 0); + regfree(&re); + + return rc; +} + +/* fills the pios_range_repeat structure of comma separated values */ +static int +split_string(const char *optarg, char *pattern, range_repeat_t *range) +{ + const char comma[] = ","; + char *cp, *token[32]; + int rc, i = 0; + + if ((rc = regex_match(optarg, pattern))) + return rc; + + cp = strdup(optarg); + if (cp == NULL) + return ENOMEM; + + do { + /* STRTOK(3) Each subsequent call, with a null pointer as the + * value of the * first argument, starts searching from the + * saved pointer and behaves as described above. + */ + token[i] = strtok(cp, comma); + cp = NULL; + } while ((token[i++] != NULL) && (i < 32)); + + range->val_count = i - 1; + + for (i = 0; i < range->val_count; i++) + kmgt_to_uint64(token[i], &range->val[i]); + + free(cp); + return 0; +} + +int +set_count(char *pattern1, char *pattern2, range_repeat_t *range, + char *optarg, uint32_t *flags, char *arg) +{ + if (flags) + *flags |= FLAG_SET; + + range->next_val = 0; + + if (regex_match(optarg, pattern1) == 0) { + kmgt_to_uint64(optarg, &range->val[0]); + range->val_count = 1; + } else if (split_string(optarg, pattern2, range) < 0) { + fprintf(stderr, "Error: Incorrect pattern for %s, '%s'\n", + arg, optarg); + return EINVAL; + } + + return 0; +} + +/* validates the value with regular expression and sets low, high, incr + * according to value at which flag will be set. Sets the flag after. */ +int +set_lhi(char *pattern, range_repeat_t *range, char *optarg, + int flag, uint32_t *flag_thread, char *arg) +{ + int rc; + + if ((rc = regex_match(optarg, pattern))) { + fprintf(stderr, "Error: Wrong pattern in %s, '%s'\n", + arg, optarg); + return rc; + } + + switch (flag) { + case FLAG_LOW: + kmgt_to_uint64(optarg, &range->val_low); + break; + case FLAG_HIGH: + kmgt_to_uint64(optarg, &range->val_high); + break; + case FLAG_INCR: + kmgt_to_uint64(optarg, &range->val_inc_perc); + break; + default: + assert(0); + } + + *flag_thread |= flag; + + return 0; +} + +int +set_noise(uint64_t *noise, char *optarg, char *arg) +{ + if (regex_match(optarg, REGEX_NUMBERS) == 0) { + kmgt_to_uint64(optarg, noise); + } else { + fprintf(stderr, "Error: Incorrect pattern for %s\n", arg); + return EINVAL; + } + + return 0; +} + +int +set_load_params(cmd_args_t *args, char *optarg) +{ + char *param, *search, comma[] = ","; + int rc = 0; + + search = strdup(optarg); + if (search == NULL) + return ENOMEM; + + while ((param = strtok(search, comma)) != NULL) { + search = NULL; + + if (strcmp("fpp", param) == 0) { + args->flags |= DMU_FPP; /* File Per Process/Thread */ + } else if (strcmp("ssf", param) == 0) { + args->flags &= ~DMU_FPP; /* Single Shared File */ + } else if (strcmp("dmuio", param) == 0) { + args->io_type |= DMU_IO; + args->flags |= (DMU_WRITE | DMU_READ); + } else { + fprintf(stderr, "Invalid load: %s\n", param); + rc = EINVAL; + } + } + + free(search); + + return rc; +} + + +/* checks the low, high, increment values against the single value for + * mutual exclusion, for e.g threadcount is mutually exclusive to + * threadcount_low, ..._high, ..._incr */ +int +check_mutual_exclusive_command_lines(uint32_t flag, char *arg) +{ + if ((flag & FLAG_SET) && (flag & (FLAG_LOW | FLAG_HIGH | FLAG_INCR))) { + fprintf(stderr, "Error: --%s can not be given with --%s_low, " + "--%s_high or --%s_incr.\n", arg, arg, arg, arg); + return 0; + } + + if ((flag & (FLAG_LOW | FLAG_HIGH | FLAG_INCR)) && !(flag & FLAG_SET)){ + if (flag != (FLAG_LOW | FLAG_HIGH | FLAG_INCR)) { + fprintf(stderr, "Error: One or more values missing " + "from --%s_low, --%s_high, --%s_incr.\n", + arg, arg, arg); + return 0; + } + } + + return 1; +} + +void +print_stats_header(cmd_args_t *args) +{ + if (args->verbose) { + printf("status name id\tth-cnt\trg-cnt\trg-sz\t" + "ch-sz\toffset\trg-no\tch-no\tth-dly\tflags\ttime\t" + "cr-time\trm-time\twr-time\trd-time\twr-data\twr-ch\t" + "wr-bw\trd-data\trd-ch\trd-bw\n"); + printf("------------------------------------------------" + "------------------------------------------------" + "------------------------------------------------" + "----------------------------------------------\n"); + } else { + printf("status name id\t" + "wr-data\twr-ch\twr-bw\t" + "rd-data\trd-ch\trd-bw\n"); + printf("-----------------------------------------" + "--------------------------------------\n"); + } +} + +static void +print_stats_human_readable(cmd_args_t *args, zpios_cmd_t *cmd) +{ + zpios_stats_t *summary_stats; + double t_time, wr_time, rd_time, cr_time, rm_time; + char str[KMGT_SIZE]; + + if (args->rc) + printf("FAIL: %3d ", args->rc); + else + printf("PASS: "); + + printf("%-12s", args->name ? args->name : ZPIOS_NAME); + printf("%2u\t", cmd->cmd_id); + + if (args->verbose) { + printf("%u\t", cmd->cmd_thread_count); + printf("%u\t", cmd->cmd_region_count); + printf("%s\t", uint64_to_kmgt(str, cmd->cmd_region_size)); + printf("%s\t", uint64_to_kmgt(str, cmd->cmd_chunk_size)); + printf("%s\t", uint64_to_kmgt(str, cmd->cmd_offset)); + printf("%s\t", uint64_to_kmgt(str, cmd->cmd_region_noise)); + printf("%s\t", uint64_to_kmgt(str, cmd->cmd_chunk_noise)); + printf("%s\t", uint64_to_kmgt(str, cmd->cmd_thread_delay)); + printf("%s\t", print_flags(str, cmd->cmd_flags)); + } + + if (args->rc) { + printf("\n"); + return; + } + + summary_stats = (zpios_stats_t *)cmd->cmd_data_str; + t_time = zpios_timespec_to_double(summary_stats->total_time.delta); + wr_time = zpios_timespec_to_double(summary_stats->wr_time.delta); + rd_time = zpios_timespec_to_double(summary_stats->rd_time.delta); + cr_time = zpios_timespec_to_double(summary_stats->cr_time.delta); + rm_time = zpios_timespec_to_double(summary_stats->rm_time.delta); + + if (args->verbose) { + printf("%.2f\t", t_time); + printf("%.3f\t", cr_time); + printf("%.3f\t", rm_time); + printf("%.2f\t", wr_time); + printf("%.2f\t", rd_time); + } + + printf("%s\t", uint64_to_kmgt(str, summary_stats->wr_data)); + printf("%s\t", uint64_to_kmgt(str, summary_stats->wr_chunks)); + printf("%s\t", kmgt_per_sec(str, summary_stats->wr_data, wr_time)); + + printf("%s\t", uint64_to_kmgt(str, summary_stats->rd_data)); + printf("%s\t", uint64_to_kmgt(str, summary_stats->rd_chunks)); + printf("%s\n", kmgt_per_sec(str, summary_stats->rd_data, rd_time)); + fflush(stdout); +} + +static void +print_stats_table(cmd_args_t *args, zpios_cmd_t *cmd) +{ + zpios_stats_t *summary_stats; + double wr_time, rd_time; + + if (args->rc) + printf("FAIL: %3d ", args->rc); + else + printf("PASS: "); + + printf("%-12s", args->name ? args->name : ZPIOS_NAME); + printf("%2u\t", cmd->cmd_id); + + if (args->verbose) { + printf("%u\t", cmd->cmd_thread_count); + printf("%u\t", cmd->cmd_region_count); + printf("%llu\t", (long long unsigned)cmd->cmd_region_size); + printf("%llu\t", (long long unsigned)cmd->cmd_chunk_size); + printf("%llu\t", (long long unsigned)cmd->cmd_offset); + printf("%u\t", cmd->cmd_region_noise); + printf("%u\t", cmd->cmd_chunk_noise); + printf("%u\t", cmd->cmd_thread_delay); + printf("0x%x\t", cmd->cmd_flags); + } + + if (args->rc) { + printf("\n"); + return; + } + + summary_stats = (zpios_stats_t *)cmd->cmd_data_str; + wr_time = zpios_timespec_to_double(summary_stats->wr_time.delta); + rd_time = zpios_timespec_to_double(summary_stats->rd_time.delta); + + if (args->verbose) { + printf("%ld.%02ld\t", + (long)summary_stats->total_time.delta.ts_sec, + (long)summary_stats->total_time.delta.ts_nsec); + printf("%ld.%02ld\t", + (long)summary_stats->cr_time.delta.ts_sec, + (long)summary_stats->cr_time.delta.ts_nsec); + printf("%ld.%02ld\t", + (long)summary_stats->rm_time.delta.ts_sec, + (long)summary_stats->rm_time.delta.ts_nsec); + printf("%ld.%02ld\t", + (long)summary_stats->wr_time.delta.ts_sec, + (long)summary_stats->wr_time.delta.ts_nsec); + printf("%ld.%02ld\t", + (long)summary_stats->rd_time.delta.ts_sec, + (long)summary_stats->rd_time.delta.ts_nsec); + } + + printf("%lld\t", (long long unsigned)summary_stats->wr_data); + printf("%lld\t", (long long unsigned)summary_stats->wr_chunks); + printf("%.4f\t", (double)summary_stats->wr_data / wr_time); + + printf("%lld\t", (long long unsigned)summary_stats->rd_data); + printf("%lld\t", (long long unsigned)summary_stats->rd_chunks); + printf("%.4f\n", (double)summary_stats->rd_data / rd_time); + fflush(stdout); +} + +void +print_stats(cmd_args_t *args, zpios_cmd_t *cmd) +{ + if (args->human_readable) + print_stats_human_readable(args, cmd); + else + print_stats_table(args, cmd); +} diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 3cdc269b0..b6c454d24 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -1674,12 +1674,6 @@ zpool_do_import(int argc, char **argv) usage(B_FALSE); } - if (searchdirs == NULL) { - searchdirs = safe_malloc(sizeof (char *)); - searchdirs[0] = "/dev/dsk"; - nsearch = 1; - } - /* check argument count */ if (do_all) { if (argc != 0) { @@ -1700,7 +1694,8 @@ zpool_do_import(int argc, char **argv) if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) { (void) fprintf(stderr, gettext("cannot " "discover pools: permission denied\n")); - free(searchdirs); + if (searchdirs != NULL) + free(searchdirs); return (1); } } @@ -1747,7 +1742,8 @@ zpool_do_import(int argc, char **argv) (void) fprintf(stderr, gettext("cannot import '%s': " "no such pool available\n"), argv[0]); } - free(searchdirs); + if (searchdirs != NULL) + free(searchdirs); return (1); } @@ -1842,7 +1838,8 @@ zpool_do_import(int argc, char **argv) error: nvlist_free(props); nvlist_free(pools); - free(searchdirs); + if (searchdirs != NULL) + free(searchdirs); return (err ? 1 : 0); } diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 7ff368415..5f540ac71 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -51,7 +51,7 @@ * * 1. Construct the vdev specification. Performs syntax validation and * makes sure each device is valid. - * 2. Check for devices in use. Using libdiskmgt, makes sure that no + * 2. Check for devices in use. Using libblkid to make sure that no * devices are also in use. Some can be overridden using the 'force' * flag, others cannot. * 3. Check for replication errors if the 'force' flag is not specified. @@ -61,10 +61,10 @@ */ #include <assert.h> +#include <ctype.h> #include <devid.h> #include <errno.h> #include <fcntl.h> -#include <libdiskmgt.h> #include <libintl.h> #include <libnvpair.h> #include <limits.h> @@ -75,13 +75,13 @@ #include <sys/stat.h> #include <sys/vtoc.h> #include <sys/mntent.h> +#include <uuid/uuid.h> +#ifdef HAVE_LIBBLKID +#include <blkid/blkid.h> +#endif #include "zpool_util.h" -#define DISK_ROOT "/dev/dsk" -#define RDISK_ROOT "/dev/rdsk" -#define BACKUP_SLICE "s2" - /* * For any given vdev specification, we can have multiple errors. The * vdev_error() function keeps track of whether we have seen an error yet, and @@ -112,168 +112,6 @@ vdev_error(const char *fmt, ...) va_end(ap); } -static void -libdiskmgt_error(int error) -{ - /* - * ENXIO/ENODEV is a valid error message if the device doesn't live in - * /dev/dsk. Don't bother printing an error message in this case. - */ - if (error == ENXIO || error == ENODEV) - return; - - (void) fprintf(stderr, gettext("warning: device in use checking " - "failed: %s\n"), strerror(error)); -} - -/* - * Validate a device, passing the bulk of the work off to libdiskmgt. - */ -static int -check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare) -{ - char *msg; - int error = 0; - dm_who_type_t who; - - if (force) - who = DM_WHO_ZPOOL_FORCE; - else if (isspare) - who = DM_WHO_ZPOOL_SPARE; - else - who = DM_WHO_ZPOOL; - - if (dm_inuse((char *)path, &msg, who, &error) || error) { - if (error != 0) { - libdiskmgt_error(error); - return (0); - } else { - vdev_error("%s", msg); - free(msg); - return (-1); - } - } - - /* - * If we're given a whole disk, ignore overlapping slices since we're - * about to label it anyway. - */ - error = 0; - if (!wholedisk && !force && - (dm_isoverlapping((char *)path, &msg, &error) || error)) { - if (error == 0) { - /* dm_isoverlapping returned -1 */ - vdev_error(gettext("%s overlaps with %s\n"), path, msg); - free(msg); - return (-1); - } else if (error != ENODEV) { - /* libdiskmgt's devcache only handles physical drives */ - libdiskmgt_error(error); - return (0); - } - } - - return (0); -} - - -/* - * Validate a whole disk. Iterate over all slices on the disk and make sure - * that none is in use by calling check_slice(). - */ -static int -check_disk(const char *name, dm_descriptor_t disk, int force, int isspare) -{ - dm_descriptor_t *drive, *media, *slice; - int err = 0; - int i; - int ret; - - /* - * Get the drive associated with this disk. This should never fail, - * because we already have an alias handle open for the device. - */ - if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE, - &err)) == NULL || *drive == NULL) { - if (err) - libdiskmgt_error(err); - return (0); - } - - if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA, - &err)) == NULL) { - dm_free_descriptors(drive); - if (err) - libdiskmgt_error(err); - return (0); - } - - dm_free_descriptors(drive); - - /* - * It is possible that the user has specified a removable media drive, - * and the media is not present. - */ - if (*media == NULL) { - dm_free_descriptors(media); - vdev_error(gettext("'%s' has no media in drive\n"), name); - return (-1); - } - - if ((slice = dm_get_associated_descriptors(*media, DM_SLICE, - &err)) == NULL) { - dm_free_descriptors(media); - if (err) - libdiskmgt_error(err); - return (0); - } - - dm_free_descriptors(media); - - ret = 0; - - /* - * Iterate over all slices and report any errors. We don't care about - * overlapping slices because we are using the whole disk. - */ - for (i = 0; slice[i] != NULL; i++) { - char *name = dm_get_name(slice[i], &err); - - if (check_slice(name, force, B_TRUE, isspare) != 0) - ret = -1; - - dm_free_name(name); - } - - dm_free_descriptors(slice); - return (ret); -} - -/* - * Validate a device. - */ -static int -check_device(const char *path, boolean_t force, boolean_t isspare) -{ - dm_descriptor_t desc; - int err; - char *dev; - - /* - * For whole disks, libdiskmgt does not include the leading dev path. - */ - dev = strrchr(path, '/'); - assert(dev != NULL); - dev++; - if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) { - err = check_disk(path, desc, force, isspare); - dm_free_descriptor(desc); - return (err); - } - - return (check_slice(path, force, B_FALSE, isspare)); -} - /* * Check that a file is valid. All we can do in this case is check that it's * not in use by another pool, and not in use by swap. @@ -284,20 +122,10 @@ check_file(const char *file, boolean_t force, boolean_t isspare) char *name; int fd; int ret = 0; - int err; pool_state_t state; boolean_t inuse; - if (dm_inuse_swap(file, &err)) { - if (err) - libdiskmgt_error(err); - else - vdev_error(gettext("%s is currently used by swap. " - "Please see swap(1M).\n"), file); - return (-1); - } - - if ((fd = open(file, O_RDONLY)) < 0) + if ((fd = open(file, O_RDONLY|O_EXCL)) < 0) return (0); if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) { @@ -349,6 +177,177 @@ check_file(const char *file, boolean_t force, boolean_t isspare) return (ret); } +#ifdef HAVE_LIBBLKID +static void +check_error(int err) +{ + (void) fprintf(stderr, gettext("warning: device in use checking " + "failed: %s\n"), strerror(err)); +} + +static int +check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare) +{ + struct stat64 statbuf; + char *value; + int err; + + if (stat64(path, &statbuf) != 0) { + vdev_error(gettext("cannot stat %s: %s\n"), + path, strerror(errno)); + return (-1); + } + + /* No valid type detected device is safe to use */ + value = blkid_get_tag_value(cache, "TYPE", path); + if (value == NULL) + return (0); + + /* + * If libblkid detects a ZFS device, we check the device + * using check_file() to see if it's safe. The one safe + * case is a spare device shared between multiple pools. + */ + if (strcmp(value, "zfs") == 0) { + err = check_file(path, force, isspare); + } else { + if (force) { + err = 0; + } else { + err = -1; + vdev_error(gettext("%s contains a filesystem of " + "type '%s'\n"), path, value); + } + } + + free(value); + + return (err); +} + +/* + * Validate a whole disk. Iterate over all slices on the disk and make sure + * that none is in use by calling check_slice(). + */ +static int +check_disk(const char *path, blkid_cache cache, int force, + boolean_t isspare, boolean_t iswholedisk) +{ + struct dk_gpt *vtoc; + char slice_path[MAXPATHLEN]; + int err = 0; + int fd, i; + + /* This is not a wholedisk we only check the given partition */ + if (!iswholedisk) + return check_slice(path, cache, force, isspare); + + /* + * When the device is a whole disk try to read the efi partition + * label. If this is successful we safely check the all of the + * partitions. However, when it fails it may simply be because + * the disk is partitioned via the MBR. Since we currently can + * not easily decode the MBR return a failure and prompt to the + * user to use force option since we cannot check the partitions. + */ + if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) { + check_error(errno); + return -1; + } + + if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) { + (void) close(fd); + + if (force) { + return 0; + } else { + vdev_error(gettext("%s does not contain an EFI " + "label but it may contain partition\n" + "information in the MBR.\n"), path); + return -1; + } + } + + /* + * The primary efi partition label is damaged however the secondary + * label at the end of the device is intact. Rather than use this + * label we should play it safe and treat this as a non efi device. + */ + if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) { + efi_free(vtoc); + (void) close(fd); + + if (force) { + /* Partitions will no be created using the backup */ + return 0; + } else { + vdev_error(gettext("%s contains a corrupt primary " + "EFI label.\n"), path); + return -1; + } + } + + for (i = 0; i < vtoc->efi_nparts; i++) { + + if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED || + uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid)) + continue; + + if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) + (void) snprintf(slice_path, sizeof (slice_path), + "%s%s%d", path, "-part", i+1); + else + (void) snprintf(slice_path, sizeof (slice_path), + "%s%s%d", path, isdigit(path[strlen(path)-1]) ? + "p" : "", i+1); + + err = check_slice(slice_path, cache, force, isspare); + if (err) + break; + } + + efi_free(vtoc); + (void) close(fd); + + return (err); +} + +static int +check_device(const char *path, boolean_t force, + boolean_t isspare, boolean_t iswholedisk) +{ + static blkid_cache cache = NULL; + int err; + + /* + * There is no easy way to add a correct blkid_put_cache() call, + * memory will be reclaimed when the command exits. + */ + if (cache == NULL) { + if ((err = blkid_get_cache(&cache, NULL)) != 0) { + check_error(err); + return -1; + } + + if ((err = blkid_probe_all(cache)) != 0) { + blkid_put_cache(cache); + check_error(err); + return -1; + } + } + + return check_disk(path, cache, force, isspare, iswholedisk); +} + +#else /* HAVE_LIBBLKID */ + +static int +check_device(const char *path, boolean_t force, + boolean_t isspare, boolean_t iswholedisk) +{ + return check_file(path, force, isspare); +} +#endif /* HAVE_LIBBLKID */ /* * By "whole disk" we mean an entire physical disk (something we can @@ -367,7 +366,7 @@ is_whole_disk(const char *arg) (void) snprintf(path, sizeof (path), "%s%s%s", RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE); - if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) + if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) return (B_FALSE); if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) { (void) close(fd); @@ -404,17 +403,28 @@ make_leaf_vdev(const char *arg, uint64_t is_log) if (arg[0] == '/') { /* * Complete device or file path. Exact type is determined by - * examining the file descriptor afterwards. + * examining the file descriptor afterwards. Symbolic links + * are resolved to their real paths for the is_whole_disk() + * and S_ISBLK/S_ISREG type checks. However, we are careful + * to store the given path as ZPOOL_CONFIG_PATH to ensure we + * can leverage udev's persistent device labels. */ - wholedisk = is_whole_disk(arg); - if (!wholedisk && (stat64(arg, &statbuf) != 0)) { + if (realpath(arg, path) == NULL) { + (void) fprintf(stderr, + gettext("cannot resolve path '%s'\n"), arg); + return (NULL); + } + + wholedisk = is_whole_disk(path); + if (!wholedisk && (stat64(path, &statbuf) != 0)) { (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), - arg, strerror(errno)); + path, strerror(errno)); return (NULL); } - (void) strlcpy(path, arg, sizeof (path)); + /* After is_whole_disk() check restore original passed path */ + strlcpy(path, arg, MAXPATHLEN); } else { /* * This may be a short path for a device, or it could be total @@ -476,6 +486,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log) verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, (uint64_t)wholedisk) == 0); +#if defined(__sun__) || defined(__sun) /* * For a whole disk, defer getting its devid until after labeling it. */ @@ -487,7 +498,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log) ddi_devid_t devid; char *minor = NULL, *devid_str = NULL; - if ((fd = open(path, O_RDONLY)) < 0) { + if ((fd = open(path, O_RDONLY|O_EXCL)) < 0) { (void) fprintf(stderr, gettext("cannot open '%s': " "%s\n"), path, strerror(errno)); nvlist_free(vdev); @@ -510,6 +521,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log) (void) close(fd); } +#endif return (vdev); } @@ -872,6 +884,39 @@ check_replication(nvlist_t *config, nvlist_t *newroot) return (ret); } +static int +zero_label(char *path) +{ + const int size = 4096; + char buf[size]; + int err, fd; + + if ((fd = open(path, O_WRONLY|O_EXCL)) < 0) { + (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), + path, strerror(errno)); + return (-1); + } + + memset(buf, 0, size); + err = write(fd, buf, size); + (void) fdatasync(fd); + (void) close(fd); + + if (err == -1) { + (void) fprintf(stderr, gettext("cannot zero first %d bytes " + "of '%s': %s\n"), size, path, strerror(errno)); + return (-1); + } + + if (err != size) { + (void) fprintf(stderr, gettext("could only zero %d/%d bytes " + "of '%s'\n"), err, size, path); + return (-1); + } + + return 0; +} + /* * Go through and find any whole disks in the vdev specification, labelling them * as appropriate. When constructing the vdev spec, we were unable to open this @@ -890,10 +935,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) char *type, *path, *diskname; char buf[MAXPATHLEN]; uint64_t wholedisk; - int fd; int ret; - ddi_devid_t devid; - char *minor = NULL, *devid_str = NULL; verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); @@ -904,55 +946,66 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) return (0); /* - * We have a disk device. Get the path to the device - * and see if it's a whole disk by appending the backup - * slice and stat()ing the device. + * We have a disk device. If this is a whole disk write + * out the efi partition table, otherwise write zero's to + * the first 4k of the partition. This is to ensure that + * libblkid will not misidentify the partition due to a + * magic value left by the previous filesystem. */ - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk) != 0 || !wholedisk) - return (0); + verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path)); + verify(!nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk)); - diskname = strrchr(path, '/'); + if (!wholedisk) { + ret = zero_label(path); + return (ret); + } + + if (realpath(path, buf) == NULL) { + ret = errno; + (void) fprintf(stderr, + gettext("cannot resolve path '%s'\n"), path); + return (ret); + } + + diskname = strrchr(buf, '/'); assert(diskname != NULL); diskname++; if (zpool_label_disk(g_zfs, zhp, diskname) == -1) return (-1); /* - * Fill in the devid, now that we've labeled the disk. + * Now the we've labeled the disk and the partitions have + * been created. We still need to wait for udev to create + * the symlinks to those partitions. If we are accessing + * the devices via a udev disk path, /dev/disk, then wait + * for *-part# to be created. Otherwise just use the normal + * syntax for devices in /dev. */ - (void) snprintf(buf, sizeof (buf), "%ss0", path); - if ((fd = open(buf, O_RDONLY)) < 0) { + if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) + (void) snprintf(buf, sizeof (buf), + "%s%s%s", path, "-part", FIRST_SLICE); + else + (void) snprintf(buf, sizeof (buf), + "%s%s%s", path, isdigit(path[strlen(path)-1]) ? + "p" : "", FIRST_SLICE); + + if ((ret = zpool_label_disk_wait(buf, 1000)) != 0) { (void) fprintf(stderr, - gettext("cannot open '%s': %s\n"), - buf, strerror(errno)); + gettext( "cannot resolve path '%s'\n"), buf); return (-1); } - if (devid_get(fd, &devid) == 0) { - if (devid_get_minor_name(fd, &minor) == 0 && - (devid_str = devid_str_encode(devid, minor)) != - NULL) { - verify(nvlist_add_string(nv, - ZPOOL_CONFIG_DEVID, devid_str) == 0); - } - if (devid_str != NULL) - devid_str_free(devid_str); - if (minor != NULL) - devid_str_free(minor); - devid_free(devid); - } - /* - * Update the path to refer to the 's0' slice. The presence of + * Update the path to refer to FIRST_SLICE. The presence of * the 'whole_disk' field indicates to the CLI that we should * chop off the slice number when displaying the device in * future output. */ verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0); - (void) close(fd); + /* Just in case this partition already existed. */ + (void) zero_label(buf); return (0); } @@ -992,7 +1045,7 @@ is_spare(nvlist_t *config, const char *path) uint_t i, nspares; boolean_t inuse; - if ((fd = open(path, O_RDONLY)) < 0) + if ((fd = open(path, O_RDONLY|O_EXCL)) < 0) return (B_FALSE); if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || @@ -1035,25 +1088,27 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing, nvlist_t **child; uint_t c, children; char *type, *path; - int ret; + int ret = 0; char buf[MAXPATHLEN]; - uint64_t wholedisk; + uint64_t wholedisk = B_FALSE; verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); + verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path)); + if (strcmp(type, VDEV_TYPE_DISK) == 0) + verify(!nvlist_lookup_uint64(nv, + ZPOOL_CONFIG_WHOLE_DISK, &wholedisk)); /* * As a generic check, we look to see if this is a replace of a * hot spare within the same pool. If so, we allow it - * regardless of what libdiskmgt or zpool_in_use() says. + * regardless of what libblkid or zpool_in_use() says. */ if (isreplacing) { - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk) == 0 && wholedisk) + if (wholedisk) (void) snprintf(buf, sizeof (buf), "%ss0", path); else @@ -1063,7 +1118,7 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing, } if (strcmp(type, VDEV_TYPE_DISK) == 0) - ret = check_device(path, force, isspare); + ret = check_device(path, force, isspare, wholedisk); if (strcmp(type, VDEV_TYPE_FILE) == 0) ret = check_file(path, force, isspare); diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index e10649919..a10bd5ed1 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -3642,31 +3642,21 @@ static void ztest_verify_blocks(char *pool) { int status; + char bin[MAXPATHLEN + MAXNAMELEN + 20]; char zdb[MAXPATHLEN + MAXNAMELEN + 20]; char zbuf[1024]; - char *bin; - char *ztest; - char *isa; - int isalen; FILE *fp; - (void) realpath(getexecname(), zdb); + /* Designed to be run exclusively in the development tree */ + VERIFY(realpath(getexecname(), bin) != NULL); + strstr(bin, "/ztest/")[0] = '\0'; - /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */ - bin = strstr(zdb, "/usr/bin/"); - ztest = strstr(bin, "/ztest"); - isa = bin + 8; - isalen = ztest - isa; - isa = strdup(isa); - /* LINTED */ - (void) sprintf(bin, - "/usr/sbin%.*s/zdb -bcc%s%s -U /tmp/zpool.cache %s", - isalen, - isa, + (void) sprintf(zdb, + "%s/zdb/zdb -bcc%s%s -U /tmp/zpool.cache %s", + bin, zopt_verbose >= 3 ? "s" : "", zopt_verbose >= 4 ? "v" : "", pool); - free(isa); if (zopt_verbose >= 5) (void) printf("Executing %s\n", strstr(zdb, "zdb ")); diff --git a/config/user-arch.m4 b/config/user-arch.m4 index 1ffa28b0e..fcc566fc5 100644 --- a/config/user-arch.m4 +++ b/config/user-arch.m4 @@ -2,20 +2,18 @@ dnl # dnl # Set the target arch for libspl atomic implementation dnl # AC_DEFUN([ZFS_AC_CONFIG_USER_ARCH], [ - AC_MSG_CHECKING(for target arch) + AC_MSG_CHECKING(for target asm dir) TARGET_ARCH=`echo ${target_cpu} | sed -e s/i.86/i386/` - TARGET_ARCH_DIR=asm-$TARGET_ARCH - AC_MSG_RESULT([$TARGET_ARCH]) case $TARGET_ARCH in - i386|x86_64|powerpc64) - AC_SUBST([TARGET_ARCH]) - AC_SUBST([TARGET_ARCH_DIR]) + i386|x86_64) + TARGET_ASM_DIR=asm-${TARGET_ARCH} ;; *) - AC_MSG_ERROR([ - *** Unsupported architecture $TARGET_ARCH - *** Available architectures: x86, x86_64, powerpc64]) + TARGET_ASM_DIR=asm-generic ;; esac + + AC_SUBST([TARGET_ASM_DIR]) + AC_MSG_RESULT([$TARGET_ASM_DIR]) ]) diff --git a/config/zfs-build.m4 b/config/zfs-build.m4 index ccfd2eda2..955793be7 100644 --- a/config/zfs-build.m4 +++ b/config/zfs-build.m4 @@ -54,16 +54,21 @@ MODDIR=${MODDIR} SCRIPTDIR=${SCRIPTDIR} UDEVDIR=\${TOPDIR}/scripts/udev-rules ZPOOLDIR=\${TOPDIR}/scripts/zpool-config +ZPIOSDIR=\${TOPDIR}/scripts/zpios-test +ZPIOSPROFILEDIR=\${TOPDIR}/scripts/zpios-profile ZDB=\${CMDDIR}/zdb/zdb ZFS=\${CMDDIR}/zfs/zfs ZINJECT=\${CMDDIR}/zinject/zinject ZPOOL=\${CMDDIR}/zpool/zpool ZTEST=\${CMDDIR}/ztest/ztest +ZPIOS=\${CMDDIR}/zpios/zpios COMMON_SH=\${SCRIPTDIR}/common.sh ZFS_SH=\${SCRIPTDIR}/zfs.sh ZPOOL_CREATE_SH=\${SCRIPTDIR}/zpool-create.sh +ZPIOS_SH=\${SCRIPTDIR}/zpios.sh +ZPIOS_SURVEY_SH=\${SCRIPTDIR}/zpios-survey.sh LDMOD=/sbin/insmod @@ -83,6 +88,10 @@ ZFS_MODULES=( \\ \${MODDIR}/zfs/zfs.ko \\ ) +ZPIOS_MODULES=( \\ + \${MODDIR}/zpios/zpios.ko \\ +) + MODULES=( \\ \${KERNEL_MODULES[[*]]} \\ \${SPL_MODULES[[*]]} \\ diff --git a/configure.ac b/configure.ac index 97ab13729..26c389037 100644 --- a/configure.ac +++ b/configure.ac @@ -54,6 +54,11 @@ AC_CONFIG_FILES([ config/Makefile doc/Makefile lib/Makefile + lib/libspl/Makefile + lib/libspl/asm-generic/Makefile + lib/libspl/asm-i386/Makefile + lib/libspl/asm-x86_64/Makefile + lib/libspl/include/Makefile lib/libavl/Makefile lib/libefi/Makefile lib/libnvpair/Makefile @@ -68,12 +73,14 @@ AC_CONFIG_FILES([ cmd/zinject/Makefile cmd/zpool/Makefile cmd/ztest/Makefile + cmd/zpios/Makefile module/Makefile module/avl/Makefile module/nvpair/Makefile module/unicode/Makefile module/zcommon/Makefile module/zfs/Makefile + module/zpios/Makefile scripts/Makefile zfs.spec zfs-modules.spec diff --git a/lib/Makefile.am b/lib/Makefile.am index 2de022787..042656813 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -1 +1 @@ -SUBDIRS = libavl libefi libnvpair libunicode libuutil libzfs libzpool +SUBDIRS = libspl libavl libefi libnvpair libunicode libuutil libzfs libzpool diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c index 31eb3d3f6..7c0f5b478 100644 --- a/lib/libefi/rdwr_efi.c +++ b/lib/libefi/rdwr_efi.c @@ -30,6 +30,7 @@ #include <strings.h> #include <unistd.h> #include <uuid/uuid.h> +#include <zlib.h> #include <libintl.h> #include <sys/types.h> #include <sys/dkio.h> @@ -39,7 +40,9 @@ #include <sys/dktp/fdisk.h> #include <sys/efi_partition.h> #include <sys/byteorder.h> -#include <sys/ddi.h> +#if defined(__linux__) +#include <linux/fs.h> +#endif static struct uuid_to_ptag { struct uuid uuid; @@ -50,11 +53,11 @@ static struct uuid_to_ptag { { EFI_SWAP }, { EFI_USR }, { EFI_BACKUP }, - { 0 }, /* STAND is never used */ + { EFI_UNUSED }, /* STAND is never used */ { EFI_VAR }, { EFI_HOME }, { EFI_ALTSCTR }, - { 0 }, /* CACHE (cachefs) is never used */ + { EFI_UNUSED }, /* CACHE (cachefs) is never used */ { EFI_RESERVED }, { EFI_SYSTEM }, { EFI_LEGACY_MBR }, @@ -108,19 +111,134 @@ int efi_debug = 1; int efi_debug = 0; #endif -extern unsigned int efi_crc32(const unsigned char *, unsigned int); -static int efi_read(int, struct dk_gpt *); +static int efi_read(int, struct dk_gpt *); + +/* + * Return a 32-bit CRC of the contents of the buffer. Pre-and-post + * one's conditioning will be handled by crc32() internally. + */ +static uint32_t +efi_crc32(const unsigned char *buf, unsigned int size) +{ + uint32_t crc = crc32(0, Z_NULL, 0); + + crc = crc32(crc, buf, size); + + return (crc); +} static int read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize) { - struct dk_minfo disk_info; + int sector_size; + unsigned long long capacity_size; + + if (ioctl(fd, BLKSSZGET, §or_size) < 0) + return (-1); + + if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0) + return (-1); + + *lbsize = (uint_t)sector_size; + *capacity = (diskaddr_t)(capacity_size / sector_size); + + return (0); +} - if ((ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info)) == -1) - return (errno); - *capacity = disk_info.dki_capacity; - *lbsize = disk_info.dki_lbsize; +static int +efi_get_info(int fd, struct dk_cinfo *dki_info) +{ +#if defined(__linux__) + char path[PATH_MAX]; + char *dev_path; + int rval = 0; + + /* + * The simplest way to get the partition number under linux is + * to parse it out of the /dev/<disk><parition> block device name. + * The kernel creates this using the partition number when it + * populates /dev/ so it may be trusted. The tricky bit here is + * that the naming convention is based on the block device type. + * So we need to take this in to account when parsing out the + * partition information. Another issue is that the libefi API + * API only provides the open fd and not the file path. To handle + * this realpath(3) is used to resolve the block device name from + * /proc/self/fd/<fd>. Aside from the partition number we collect + * some additional device info. + */ + memset(dki_info, 0, sizeof(*dki_info)); + (void) sprintf(path, "/proc/self/fd/%d", fd); + if ((dev_path = realpath(path, NULL)) == NULL) + goto error; + + if ((strncmp(dev_path, "/dev/sd", 7) == 0)) { + strcpy(dki_info->dki_cname, "sd"); + dki_info->dki_ctype = DKC_SCSI_CCS; + rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + } else if ((strncmp(dev_path, "/dev/hd", 7) == 0)) { + strcpy(dki_info->dki_cname, "hd"); + dki_info->dki_ctype = DKC_DIRECT; + rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + } else if ((strncmp(dev_path, "/dev/md", 7) == 0)) { + strcpy(dki_info->dki_cname, "pseudo"); + dki_info->dki_ctype = DKC_MD; + rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + } else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) { + strcpy(dki_info->dki_cname, "pseudo"); + dki_info->dki_ctype = DKC_VBD; + rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9-]p%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + } else if ((strncmp(dev_path, "/dev/ram", 8) == 0)) { + strcpy(dki_info->dki_cname, "pseudo"); + dki_info->dki_ctype = DKC_PCMCIA_MEM; + rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + } else if ((strncmp(dev_path, "/dev/loop", 9) == 0)) { + strcpy(dki_info->dki_cname, "pseudo"); + dki_info->dki_ctype = DKC_VBD; + rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + } else { + strcpy(dki_info->dki_dname, "unknown"); + strcpy(dki_info->dki_cname, "unknown"); + dki_info->dki_ctype = DKC_UNKNOWN; + } + + switch (rval) { + case 0: + errno = EINVAL; + goto error; + case 1: + dki_info->dki_partition = 0; + } + + free(dev_path); +#else + if (ioctl(fd, DKIOCINFO, (caddr_t)dki_info) == -1) + goto error; +#endif return (0); +error: + if (efi_debug) + (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); + + switch (errno) { + case EIO: + return (VT_EIO); + case EINVAL: + return (VT_EINVAL); + default: + return (VT_ERROR); + } } /* @@ -136,12 +254,13 @@ read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize) int efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) { - diskaddr_t capacity; - uint_t lbsize; + diskaddr_t capacity = 0; + uint_t lbsize = 0; uint_t nblocks; size_t length; struct dk_gpt *vptr; struct uuid uuid; + struct dk_cinfo dki_info; if (read_disk_info(fd, &capacity, &lbsize) != 0) { if (efi_debug) @@ -149,6 +268,22 @@ efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) "couldn't read disk information\n"); return (-1); } +#if defined(__linux__) + if (efi_get_info(fd, &dki_info) != 0) { + if (efi_debug) + (void) fprintf(stderr, + "couldn't read disk information\n"); + return (-1); + } + + if (dki_info.dki_partition != 0) + return (-1); + + if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) || + (dki_info.dki_ctype == DKC_VBD) || + (dki_info.dki_ctype == DKC_UNKNOWN)) + return (-1); +#endif nblocks = NBLOCKS(nparts, lbsize); if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) { @@ -244,14 +379,138 @@ efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc) { void *data = dk_ioc->dki_data; int error; +#if defined(__linux__) + diskaddr_t capacity; + uint_t lbsize; + + /* + * When the IO is not being performed in kernel as an ioctl we need + * to know the sector size so we can seek to the proper byte offset. + */ + if (read_disk_info(fd, &capacity, &lbsize) == -1) { + if (efi_debug) + fprintf(stderr,"unable to read disk info: %d",errno); + + errno = EIO; + return -1; + } + + switch (cmd) { + case DKIOCGETEFI: + if (lbsize == 0) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCGETEFI assuming " + "LBA %d bytes\n", DEV_BSIZE); + + lbsize = DEV_BSIZE; + } + + error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); + if (error == -1) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCGETEFI lseek " + "error: %d\n", errno); + return error; + } + + error = read(fd, data, dk_ioc->dki_length); + if (error == -1) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCGETEFI read " + "error: %d\n", errno); + return error; + } + + if (error != dk_ioc->dki_length) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCGETEFI short " + "read of %d bytes\n", error); + errno = EIO; + return -1; + } + error = 0; + break; + + case DKIOCSETEFI: + if (lbsize == 0) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCSETEFI unknown " + "LBA size\n"); + errno = EIO; + return -1; + } + + error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); + if (error == -1) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCSETEFI lseek " + "error: %d\n", errno); + return error; + } + + error = write(fd, data, dk_ioc->dki_length); + if (error == -1) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCSETEFI write " + "error: %d\n", errno); + return error; + } + + if (error != dk_ioc->dki_length) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCSETEFI short " + "write of %d bytes\n", error); + errno = EIO; + return -1; + } + /* Sync the new EFI table to disk */ + error = fsync(fd); + if (error == -1) + return error; + + /* Ensure any local disk cache is also flushed */ + if (ioctl(fd, BLKFLSBUF, 0) == -1) + return error; + + error = 0; + break; + + default: + if (efi_debug) + (void) fprintf(stderr, "unsupported ioctl()\n"); + + errno = EIO; + return -1; + } +#else dk_ioc->dki_data_64 = (uint64_t)(uintptr_t)data; error = ioctl(fd, cmd, (void *)dk_ioc); dk_ioc->dki_data = data; - +#endif return (error); } +#if defined(__linux__) +static int +efi_rescan(int fd) +{ + int retry = 5; + int error; + + /* Notify the kernel a devices partition table has been updated */ + while ((error = ioctl(fd, BLKRRPART)) != 0) { + if (--retry == 0) { + (void) fprintf(stderr, "the kernel failed to rescan " + "the partition table: %d\n", errno); + return (-1); + } + } + + return (0); +} +#endif + static int check_label(int fd, dk_efi_t *dk_ioc) { @@ -306,6 +565,8 @@ efi_read(int fd, struct dk_gpt *vtoc) int rval = 0; int md_flag = 0; int vdc_flag = 0; + diskaddr_t capacity = 0; + uint_t lbsize = 0; struct dk_minfo disk_info; dk_efi_t dk_ioc; efi_gpt_t *efi; @@ -317,19 +578,9 @@ efi_read(int fd, struct dk_gpt *vtoc) /* * get the partition number for this file descriptor. */ - if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) { - if (efi_debug) { - (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); - } - switch (errno) { - case EIO: - return (VT_EIO); - case EINVAL: - return (VT_EINVAL); - default: - return (VT_ERROR); - } - } + if ((rval = efi_get_info(fd, &dki_info)) != 0) + return rval; + if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && (strncmp(dki_info.dki_dname, "md", 3) == 0)) { md_flag++; @@ -343,14 +594,18 @@ efi_read(int fd, struct dk_gpt *vtoc) } /* get the LBA size */ - if (ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info) == -1) { + if (read_disk_info(fd, &capacity, &lbsize) == -1) { if (efi_debug) { (void) fprintf(stderr, - "assuming LBA 512 bytes %d\n", - errno); + "unable to read disk info: %d", + errno); } - disk_info.dki_lbsize = DEV_BSIZE; + return (VT_EINVAL); } + + disk_info.dki_lbsize = lbsize; + disk_info.dki_capacity = capacity; + if (disk_info.dki_lbsize == 0) { if (efi_debug) { (void) fprintf(stderr, @@ -375,9 +630,11 @@ efi_read(int fd, struct dk_gpt *vtoc) } } - if ((dk_ioc.dki_data = calloc(label_len, 1)) == NULL) + if (posix_memalign((void **)&dk_ioc.dki_data, + disk_info.dki_lbsize, label_len)) return (VT_ERROR); + memset(dk_ioc.dki_data, 0, label_len); dk_ioc.dki_length = disk_info.dki_lbsize; user_length = vtoc->efi_nparts; efi = dk_ioc.dki_data; @@ -573,12 +830,14 @@ write_pmbr(int fd, struct dk_gpt *vtoc) int len; len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize; - buf = calloc(len, 1); + if (posix_memalign((void **)&buf, len, len)) + return (VT_ERROR); /* * Preserve any boot code and disk signature if the first block is * already an MBR. */ + memset(buf, 0, len); dk_ioc.dki_lba = 0; dk_ioc.dki_length = len; /* LINTED -- always longlong aligned */ @@ -664,10 +923,9 @@ check_input(struct dk_gpt *vtoc) if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) && (vtoc->efi_parts[i].p_size != 0)) { if (efi_debug) { - (void) fprintf(stderr, -"partition %d is \"unassigned\" but has a size of %llu", - i, - vtoc->efi_parts[i].p_size); + (void) fprintf(stderr, "partition %d is " + "\"unassigned\" but has a size of %llu", + i, vtoc->efi_parts[i].p_size); } return (VT_EINVAL); } @@ -680,9 +938,9 @@ check_input(struct dk_gpt *vtoc) if (vtoc->efi_parts[i].p_tag == V_RESERVED) { if (resv_part != -1) { if (efi_debug) { - (void) fprintf(stderr, -"found duplicate reserved partition at %d\n", - i); + (void) fprintf(stderr, "found " + "duplicate reserved partition " + "at %d\n", i); } return (VT_EINVAL); } @@ -733,8 +991,8 @@ check_input(struct dk_gpt *vtoc) (istart <= endsect)) { if (efi_debug) { (void) fprintf(stderr, -"Partition %d overlaps partition %d.", - i, j); + "Partition %d overlaps " + "partition %d.", i, j); } return (VT_EINVAL); } @@ -840,22 +1098,13 @@ efi_write(int fd, struct dk_gpt *vtoc) efi_gpe_t *efi_parts; int i, j; struct dk_cinfo dki_info; + int rval; int md_flag = 0; int nblocks; diskaddr_t lba_backup_gpt_hdr; - if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) { - if (efi_debug) - (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); - switch (errno) { - case EIO: - return (VT_EIO); - case EINVAL: - return (VT_EINVAL); - default: - return (VT_ERROR); - } - } + if ((rval = efi_get_info(fd, &dki_info)) != 0) + return rval; /* check if we are dealing wih a metadevice */ if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && @@ -893,9 +1142,11 @@ efi_write(int fd, struct dk_gpt *vtoc) * for backup GPT header. */ lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks; - if ((dk_ioc.dki_data = calloc(dk_ioc.dki_length, 1)) == NULL) + if (posix_memalign((void **)&dk_ioc.dki_data, + vtoc->efi_lbasize, dk_ioc.dki_length)) return (VT_ERROR); + memset(dk_ioc.dki_data, 0, dk_ioc.dki_length); efi = dk_ioc.dki_data; /* stuff user's input into EFI struct */ @@ -942,6 +1193,10 @@ efi_write(int fd, struct dk_gpt *vtoc) return (VT_EINVAL); } + /* Zero's should be written for empty partitions */ + if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) + continue; + efi_parts[i].efi_gpe_StartingLBA = LE_64(vtoc->efi_parts[i].p_start); efi_parts[i].efi_gpe_EndingLBA = @@ -1033,6 +1288,13 @@ efi_write(int fd, struct dk_gpt *vtoc) /* write the PMBR */ (void) write_pmbr(fd, vtoc); free(dk_ioc.dki_data); + +#if defined(__linux__) + rval = efi_rescan(fd); + if (rval) + return (VT_ERROR); +#endif + return (0); } @@ -1050,6 +1312,7 @@ efi_free(struct dk_gpt *ptr) int efi_type(int fd) { +#if 0 struct vtoc vtoc; struct extvtoc extvtoc; @@ -1063,6 +1326,9 @@ efi_type(int fd) } } return (0); +#else + return (ENOSYS); +#endif } void @@ -1176,7 +1442,7 @@ efi_auto_sense(int fd, struct dk_gpt **vtoc) return (-1); } - for (i = 0; i < min((*vtoc)->efi_nparts, V_NUMPAR); i++) { + for (i = 0; i < MIN((*vtoc)->efi_nparts, V_NUMPAR); i++) { (*vtoc)->efi_parts[i].p_tag = default_vtoc_map[i].p_tag; (*vtoc)->efi_parts[i].p_flag = default_vtoc_map[i].p_flag; (*vtoc)->efi_parts[i].p_start = 0; diff --git a/lib/libnvpair/nvpair_alloc_system.c b/lib/libnvpair/nvpair_alloc_system.c index f45dc5f0b..af30c1f40 100644 --- a/lib/libnvpair/nvpair_alloc_system.c +++ b/lib/libnvpair/nvpair_alloc_system.c @@ -26,7 +26,7 @@ -#include <rpc/types.h> +#include <sys/kmem.h> #include <sys/nvpair.h> static void * diff --git a/lib/libspl/Makefile.am b/lib/libspl/Makefile.am new file mode 100644 index 000000000..ed8550c22 --- /dev/null +++ b/lib/libspl/Makefile.am @@ -0,0 +1,27 @@ +include $(top_srcdir)/config/Rules.am + +SUBDIRS = include $(TARGET_ASM_DIR) +DIST_SUBDIRS = include asm-generic asm-i386 asm-x86_64 + +DEFAULT_INCLUDES += \ + -I${top_srcdir}/lib/libspl/include + +AM_CCASFLAGS = \ + -I${top_srcdir}/lib/libspl/include + +lib_LTLIBRARIES = libspl.la + +libspl_la_SOURCES = \ + ${top_srcdir}/lib/libspl/getexecname.c \ + ${top_srcdir}/lib/libspl/gethrtime.c \ + ${top_srcdir}/lib/libspl/getmntany.c \ + ${top_srcdir}/lib/libspl/list.c \ + ${top_srcdir}/lib/libspl/mkdirp.c \ + ${top_srcdir}/lib/libspl/strlcat.c \ + ${top_srcdir}/lib/libspl/strlcpy.c \ + ${top_srcdir}/lib/libspl/strnlen.c \ + ${top_srcdir}/lib/libspl/zone.c \ + ${top_srcdir}/lib/libspl/xdr.c \ + ${top_srcdir}/lib/libspl/${TARGET_ASM_DIR}/atomic.S \ + ${top_srcdir}/lib/libspl/include/sys/list.h \ + ${top_srcdir}/lib/libspl/include/sys/list_impl.h diff --git a/lib/libspl/asm-generic/Makefile.am b/lib/libspl/asm-generic/Makefile.am new file mode 100644 index 000000000..4f5032f73 --- /dev/null +++ b/lib/libspl/asm-generic/Makefile.am @@ -0,0 +1,18 @@ +include $(top_srcdir)/config/Rules.am + +DEFAULT_INCLUDES += \ + -I${top_srcdir}/lib/libspl/include + +atomic_SOURCE = atomic.c +atomic_ASM = atomic.S + +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -fPIC +EXTRA_DIST = ${atomic_SOURCE} + +# Generates assembly to simplify inclusion in ../Makefile.am +all-am: + $(COMPILE) -c -S ${atomic_SOURCE} -o ${atomic_ASM} + +clean-generic: + $(RM) ${atomic_ASM} diff --git a/lib/libspl/asm-generic/atomic.S b/lib/libspl/asm-generic/atomic.S new file mode 100644 index 000000000..7550fd92b --- /dev/null +++ b/lib/libspl/asm-generic/atomic.S @@ -0,0 +1,6 @@ +Stub file for 'make dist' distdir rule. + +This file is directly referenced by ../Makefile.am as a source +file and thus will be expected by 'make dist'. To avoid this +being a problem this stub file was added. It will be overwritten +at build time based on assmebly generated from atomic.c. diff --git a/lib/libspl/asm-generic/atomic.c b/lib/libspl/asm-generic/atomic.c new file mode 100644 index 000000000..de4430f9f --- /dev/null +++ b/lib/libspl/asm-generic/atomic.c @@ -0,0 +1,424 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2009 by Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <atomic.h> +#include <assert.h> +#include <pthread.h> + +/* + * All operations are implemented by serializing them through a global + * pthread mutex. This provides a correct generic implementation. + * However all supported architectures are encouraged to provide a + * native implementation is assembly for performance reasons. + */ +pthread_mutex_t atomic_lock = PTHREAD_MUTEX_INITIALIZER; + +/* + * Theses are the void returning variants + */ + +#define ATOMIC_INC(name, type) \ + void atomic_inc_##name(volatile type *target) \ + { \ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \ + (*target)++; \ + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \ + } + +ATOMIC_INC(long, unsigned long) +ATOMIC_INC(8, uint8_t) +ATOMIC_INC(uchar, uchar_t) +ATOMIC_INC(16, uint16_t) +ATOMIC_INC(ushort, ushort_t) +ATOMIC_INC(32, uint32_t) +ATOMIC_INC(uint, uint_t) +ATOMIC_INC(ulong, ulong_t) +ATOMIC_INC(64, uint64_t) + + +#define ATOMIC_DEC(name, type) \ + void atomic_dec_##name(volatile type *target) \ + { \ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \ + (*target)--; \ + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \ + } + +ATOMIC_DEC(long, unsigned long) +ATOMIC_DEC(8, uint8_t) +ATOMIC_DEC(uchar, uchar_t) +ATOMIC_DEC(16, uint16_t) +ATOMIC_DEC(ushort, ushort_t) +ATOMIC_DEC(32, uint32_t) +ATOMIC_DEC(uint, uint_t) +ATOMIC_DEC(ulong, ulong_t) +ATOMIC_DEC(64, uint64_t) + + +#define ATOMIC_ADD(name, type1, type2) \ + void atomic_add_##name(volatile type1 *target, type2 bits) \ + { \ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \ + *target += bits; \ + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \ + } + +ATOMIC_ADD(8, uint8_t, int8_t) +ATOMIC_ADD(char, uchar_t, signed char) +ATOMIC_ADD(16, uint16_t, int16_t) +ATOMIC_ADD(short, ushort_t, short) +ATOMIC_ADD(32, uint32_t, int32_t) +ATOMIC_ADD(int, uint_t, int) +ATOMIC_ADD(long, ulong_t, long) +ATOMIC_ADD(64, uint64_t, int64_t) + +void atomic_add_ptr(volatile void *target, ssize_t bits) +{ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); + *(caddr_t *)target += bits; + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); +} + + +#define ATOMIC_OR(name, type) \ + void atomic_or_##name(volatile type *target, type bits) \ + { \ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \ + *target |= bits; \ + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \ + } + +ATOMIC_OR(8, uint8_t) +ATOMIC_OR(uchar, uchar_t) +ATOMIC_OR(16, uint16_t) +ATOMIC_OR(ushort, ushort_t) +ATOMIC_OR(32, uint32_t) +ATOMIC_OR(uint, uint_t) +ATOMIC_OR(ulong, ulong_t) +ATOMIC_OR(64, uint64_t) + + +#define ATOMIC_AND(name, type) \ + void atomic_and_##name(volatile type *target, type bits) \ + { \ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \ + *target &= bits; \ + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \ + } + +ATOMIC_AND(8, uint8_t) +ATOMIC_AND(uchar, uchar_t) +ATOMIC_AND(16, uint16_t) +ATOMIC_AND(ushort, ushort_t) +ATOMIC_AND(32, uint32_t) +ATOMIC_AND(uint, uint_t) +ATOMIC_AND(ulong, ulong_t) +ATOMIC_AND(64, uint64_t) + + +/* + * New value returning variants + */ + +#define ATOMIC_INC_NV(name, type) \ + type atomic_inc_##name##_nv(volatile type *target) \ + { \ + type rc; \ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \ + rc = (++(*target)); \ + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \ + return rc; \ + } + +ATOMIC_INC_NV(long, unsigned long) +ATOMIC_INC_NV(8, uint8_t) +ATOMIC_INC_NV(uchar, uchar_t) +ATOMIC_INC_NV(16, uint16_t) +ATOMIC_INC_NV(ushort, ushort_t) +ATOMIC_INC_NV(32, uint32_t) +ATOMIC_INC_NV(uint, uint_t) +ATOMIC_INC_NV(ulong, ulong_t) +ATOMIC_INC_NV(64, uint64_t) + + +#define ATOMIC_DEC_NV(name, type) \ + type atomic_dec_##name##_nv(volatile type *target) \ + { \ + type rc; \ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \ + rc = (--(*target)); \ + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \ + return rc; \ + } + +ATOMIC_DEC_NV(long, unsigned long) +ATOMIC_DEC_NV(8, uint8_t) +ATOMIC_DEC_NV(uchar, uchar_t) +ATOMIC_DEC_NV(16, uint16_t) +ATOMIC_DEC_NV(ushort, ushort_t) +ATOMIC_DEC_NV(32, uint32_t) +ATOMIC_DEC_NV(uint, uint_t) +ATOMIC_DEC_NV(ulong, ulong_t) +ATOMIC_DEC_NV(64, uint64_t) + + +#define ATOMIC_ADD_NV(name, type1, type2) \ + type1 atomic_add_##name##_nv(volatile type1 *target, type2 bits)\ + { \ + type1 rc; \ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \ + rc = (*target += bits); \ + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \ + return rc; \ + } + +ATOMIC_ADD_NV(8, uint8_t, int8_t) +ATOMIC_ADD_NV(char, uchar_t, signed char) +ATOMIC_ADD_NV(16, uint16_t, int16_t) +ATOMIC_ADD_NV(short, ushort_t, short) +ATOMIC_ADD_NV(32, uint32_t, int32_t) +ATOMIC_ADD_NV(int, uint_t, int) +ATOMIC_ADD_NV(long, ulong_t, long) +ATOMIC_ADD_NV(64, uint64_t, int64_t) + +void *atomic_add_ptr_nv(volatile void *target, ssize_t bits) +{ + void *ptr; + + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); + ptr = (*(caddr_t *)target += bits); + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); + + return ptr; +} + + +#define ATOMIC_OR_NV(name, type) \ + type atomic_or_##name##_nv(volatile type *target, type bits) \ + { \ + type rc; \ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \ + rc = (*target |= bits); \ + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \ + return rc; \ + } + +ATOMIC_OR_NV(long, unsigned long) +ATOMIC_OR_NV(8, uint8_t) +ATOMIC_OR_NV(uchar, uchar_t) +ATOMIC_OR_NV(16, uint16_t) +ATOMIC_OR_NV(ushort, ushort_t) +ATOMIC_OR_NV(32, uint32_t) +ATOMIC_OR_NV(uint, uint_t) +ATOMIC_OR_NV(ulong, ulong_t) +ATOMIC_OR_NV(64, uint64_t) + + +#define ATOMIC_AND_NV(name, type) \ + type atomic_and_##name##_nv(volatile type *target, type bits) \ + { \ + type rc; \ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \ + rc = (*target &= bits); \ + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \ + return rc; \ + } + +ATOMIC_AND_NV(long, unsigned long) +ATOMIC_AND_NV(8, uint8_t) +ATOMIC_AND_NV(uchar, uchar_t) +ATOMIC_AND_NV(16, uint16_t) +ATOMIC_AND_NV(ushort, ushort_t) +ATOMIC_AND_NV(32, uint32_t) +ATOMIC_AND_NV(uint, uint_t) +ATOMIC_AND_NV(ulong, ulong_t) +ATOMIC_AND_NV(64, uint64_t) + + +/* + * If *arg1 == arg2, set *arg1 = arg3; return old value + */ + +#define ATOMIC_CAS(name, type) \ + type atomic_cas_##name(volatile type *target, type arg1, type arg2) \ + { \ + type old; \ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \ + old = *target; \ + if (old == arg1) \ + *target = arg2; \ + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \ + return old; \ + } + +ATOMIC_CAS(8, uint8_t) +ATOMIC_CAS(uchar, uchar_t) +ATOMIC_CAS(16, uint16_t) +ATOMIC_CAS(ushort, ushort_t) +ATOMIC_CAS(32, uint32_t) +ATOMIC_CAS(uint, uint_t) +ATOMIC_CAS(ulong, ulong_t) +ATOMIC_CAS(64, uint64_t) + +void *atomic_cas_ptr(volatile void *target, void *arg1, void *arg2) +{ + void *old; + + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); + old = *(void **)target; + if (old == arg1) + *(void **)target = arg2; + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); + + return old; +} + + +/* + * Swap target and return old value + */ + +#define ATOMIC_SWAP(name, type) \ + type atomic_swap_##name(volatile type *target, type bits) \ + { \ + type old; \ + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); \ + old = *target; \ + *target = bits; \ + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); \ + return old; \ + } + +ATOMIC_SWAP(8, uint8_t) +ATOMIC_SWAP(uchar, uchar_t) +ATOMIC_SWAP(16, uint16_t) +ATOMIC_SWAP(ushort, ushort_t) +ATOMIC_SWAP(32, uint32_t) +ATOMIC_SWAP(uint, uint_t) +ATOMIC_SWAP(ulong, ulong_t) +ATOMIC_SWAP(64, uint64_t) + +void *atomic_swap_ptr(volatile void *target, void *bits) +{ + void *old; + + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); + old = *(void **)target; + *(void **)target = bits; + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); + + return old; +} + + +int atomic_set_long_excl(volatile ulong_t *target, uint_t value) +{ + ulong_t bit; + + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); + bit = (1UL << value); + if ((*target & bit) != 0) { + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); + return -1; + } + *target |= bit; + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); + + return 0; +} + +int atomic_clear_long_excl(volatile ulong_t *target, uint_t value) +{ + ulong_t bit; + + VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0); + bit = (1UL << value); + if ((*target & bit) != 0) { + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); + return -1; + } + *target &= ~bit; + VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0); + + return 0; +} + +void membar_enter(void) +{ + /* XXX - Implement me */ +} + +void membar_exit(void) +{ + /* XXX - Implement me */ +} + +void membar_producer(void) +{ + /* XXX - Implement me */ +} + +void membar_consumer(void) +{ + /* XXX - Implement me */ +} + +/* Legacy kernel interfaces; they will go away (eventually). */ + +uint8_t cas8(uint8_t *target, uint8_t arg1, uint8_t arg2) +{ + return atomic_cas_8(target, arg1, arg2); +} + +uint32_t cas32(uint32_t *target, uint32_t arg1, uint32_t arg2) +{ + return atomic_cas_32(target, arg1, arg2); +} + +uint64_t cas64(uint64_t *target, uint64_t arg1, uint64_t arg2) +{ + return atomic_cas_64(target, arg1, arg2); +} + +ulong_t caslong(ulong_t *target, ulong_t arg1, ulong_t arg2) +{ + return atomic_cas_ulong(target, arg1, arg2); +} + +void *casptr(void *target, void *arg1, void *arg2) +{ + return atomic_cas_ptr(target, arg1, arg2); +} + +void atomic_and_long(ulong_t *target, ulong_t bits) +{ + return atomic_and_ulong(target, bits); +} + +void atomic_or_long(ulong_t *target, ulong_t bits) +{ + return atomic_or_ulong(target, bits); +} diff --git a/lib/libspl/asm-i386/Makefile.am b/lib/libspl/asm-i386/Makefile.am new file mode 100644 index 000000000..02403eceb --- /dev/null +++ b/lib/libspl/asm-i386/Makefile.am @@ -0,0 +1 @@ +noinst_HEADERS = *.S diff --git a/lib/libspl/asm-i386/atomic.S b/lib/libspl/asm-i386/atomic.S new file mode 100644 index 000000000..93c04bfb8 --- /dev/null +++ b/lib/libspl/asm-i386/atomic.S @@ -0,0 +1,730 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "%Z%%M% %I% %E% SMI" + + .file "%M%" + +#define _ASM +#include <ia32/sys/asm_linkage.h> + + ENTRY(atomic_inc_8) + ALTENTRY(atomic_inc_uchar) + movl 4(%esp), %eax + lock + incb (%eax) + ret + SET_SIZE(atomic_inc_uchar) + SET_SIZE(atomic_inc_8) + + ENTRY(atomic_inc_16) + ALTENTRY(atomic_inc_ushort) + movl 4(%esp), %eax + lock + incw (%eax) + ret + SET_SIZE(atomic_inc_ushort) + SET_SIZE(atomic_inc_16) + + ENTRY(atomic_inc_32) + ALTENTRY(atomic_inc_uint) + ALTENTRY(atomic_inc_ulong) + movl 4(%esp), %eax + lock + incl (%eax) + ret + SET_SIZE(atomic_inc_ulong) + SET_SIZE(atomic_inc_uint) + SET_SIZE(atomic_inc_32) + + ENTRY(atomic_inc_8_nv) + ALTENTRY(atomic_inc_uchar_nv) + movl 4(%esp), %edx + movb (%edx), %al +1: + leal 1(%eax), %ecx + lock + cmpxchgb %cl, (%edx) + jne 1b + movzbl %cl, %eax + ret + SET_SIZE(atomic_inc_uchar_nv) + SET_SIZE(atomic_inc_8_nv) + + ENTRY(atomic_inc_16_nv) + ALTENTRY(atomic_inc_ushort_nv) + movl 4(%esp), %edx + movw (%edx), %ax +1: + leal 1(%eax), %ecx + lock + cmpxchgw %cx, (%edx) + jne 1b + movzwl %cx, %eax + ret + SET_SIZE(atomic_inc_ushort_nv) + SET_SIZE(atomic_inc_16_nv) + + ENTRY(atomic_inc_32_nv) + ALTENTRY(atomic_inc_uint_nv) + ALTENTRY(atomic_inc_ulong_nv) + movl 4(%esp), %edx + movl (%edx), %eax +1: + leal 1(%eax), %ecx + lock + cmpxchgl %ecx, (%edx) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_inc_ulong_nv) + SET_SIZE(atomic_inc_uint_nv) + SET_SIZE(atomic_inc_32_nv) + + /* + * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever + * separated, you need to also edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_inc_64_nv. + */ + ENTRY(atomic_inc_64) + ALTENTRY(atomic_inc_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi + movl (%edi), %eax + movl 4(%edi), %edx +1: + xorl %ebx, %ebx + xorl %ecx, %ecx + incl %ebx + addl %eax, %ebx + adcl %edx, %ecx + lock + cmpxchg8b (%edi) + jne 1b + movl %ebx, %eax + movl %ecx, %edx + popl %ebx + popl %edi + ret + SET_SIZE(atomic_inc_64_nv) + SET_SIZE(atomic_inc_64) + + ENTRY(atomic_dec_8) + ALTENTRY(atomic_dec_uchar) + movl 4(%esp), %eax + lock + decb (%eax) + ret + SET_SIZE(atomic_dec_uchar) + SET_SIZE(atomic_dec_8) + + ENTRY(atomic_dec_16) + ALTENTRY(atomic_dec_ushort) + movl 4(%esp), %eax + lock + decw (%eax) + ret + SET_SIZE(atomic_dec_ushort) + SET_SIZE(atomic_dec_16) + + ENTRY(atomic_dec_32) + ALTENTRY(atomic_dec_uint) + ALTENTRY(atomic_dec_ulong) + movl 4(%esp), %eax + lock + decl (%eax) + ret + SET_SIZE(atomic_dec_ulong) + SET_SIZE(atomic_dec_uint) + SET_SIZE(atomic_dec_32) + + ENTRY(atomic_dec_8_nv) + ALTENTRY(atomic_dec_uchar_nv) + movl 4(%esp), %edx + movb (%edx), %al +1: + leal -1(%eax), %ecx + lock + cmpxchgb %cl, (%edx) + jne 1b + movzbl %cl, %eax + ret + SET_SIZE(atomic_dec_uchar_nv) + SET_SIZE(atomic_dec_8_nv) + + ENTRY(atomic_dec_16_nv) + ALTENTRY(atomic_dec_ushort_nv) + movl 4(%esp), %edx + movw (%edx), %ax +1: + leal -1(%eax), %ecx + lock + cmpxchgw %cx, (%edx) + jne 1b + movzwl %cx, %eax + ret + SET_SIZE(atomic_dec_ushort_nv) + SET_SIZE(atomic_dec_16_nv) + + ENTRY(atomic_dec_32_nv) + ALTENTRY(atomic_dec_uint_nv) + ALTENTRY(atomic_dec_ulong_nv) + movl 4(%esp), %edx + movl (%edx), %eax +1: + leal -1(%eax), %ecx + lock + cmpxchgl %ecx, (%edx) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_dec_ulong_nv) + SET_SIZE(atomic_dec_uint_nv) + SET_SIZE(atomic_dec_32_nv) + + /* + * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever + * separated, it is important to edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_dec_64_nv. + */ + ENTRY(atomic_dec_64) + ALTENTRY(atomic_dec_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi + movl (%edi), %eax + movl 4(%edi), %edx +1: + xorl %ebx, %ebx + xorl %ecx, %ecx + not %ecx + not %ebx + addl %eax, %ebx + adcl %edx, %ecx + lock + cmpxchg8b (%edi) + jne 1b + movl %ebx, %eax + movl %ecx, %edx + popl %ebx + popl %edi + ret + SET_SIZE(atomic_dec_64_nv) + SET_SIZE(atomic_dec_64) + + ENTRY(atomic_add_8) + ALTENTRY(atomic_add_char) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + addb %cl, (%eax) + ret + SET_SIZE(atomic_add_char) + SET_SIZE(atomic_add_8) + + ENTRY(atomic_add_16) + ALTENTRY(atomic_add_short) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + addw %cx, (%eax) + ret + SET_SIZE(atomic_add_short) + SET_SIZE(atomic_add_16) + + ENTRY(atomic_add_32) + ALTENTRY(atomic_add_int) + ALTENTRY(atomic_add_ptr) + ALTENTRY(atomic_add_long) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + addl %ecx, (%eax) + ret + SET_SIZE(atomic_add_long) + SET_SIZE(atomic_add_ptr) + SET_SIZE(atomic_add_int) + SET_SIZE(atomic_add_32) + + ENTRY(atomic_or_8) + ALTENTRY(atomic_or_uchar) + movl 4(%esp), %eax + movb 8(%esp), %cl + lock + orb %cl, (%eax) + ret + SET_SIZE(atomic_or_uchar) + SET_SIZE(atomic_or_8) + + ENTRY(atomic_or_16) + ALTENTRY(atomic_or_ushort) + movl 4(%esp), %eax + movw 8(%esp), %cx + lock + orw %cx, (%eax) + ret + SET_SIZE(atomic_or_ushort) + SET_SIZE(atomic_or_16) + + ENTRY(atomic_or_32) + ALTENTRY(atomic_or_uint) + ALTENTRY(atomic_or_ulong) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + orl %ecx, (%eax) + ret + SET_SIZE(atomic_or_ulong) + SET_SIZE(atomic_or_uint) + SET_SIZE(atomic_or_32) + + ENTRY(atomic_and_8) + ALTENTRY(atomic_and_uchar) + movl 4(%esp), %eax + movb 8(%esp), %cl + lock + andb %cl, (%eax) + ret + SET_SIZE(atomic_and_uchar) + SET_SIZE(atomic_and_8) + + ENTRY(atomic_and_16) + ALTENTRY(atomic_and_ushort) + movl 4(%esp), %eax + movw 8(%esp), %cx + lock + andw %cx, (%eax) + ret + SET_SIZE(atomic_and_ushort) + SET_SIZE(atomic_and_16) + + ENTRY(atomic_and_32) + ALTENTRY(atomic_and_uint) + ALTENTRY(atomic_and_ulong) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + andl %ecx, (%eax) + ret + SET_SIZE(atomic_and_ulong) + SET_SIZE(atomic_and_uint) + SET_SIZE(atomic_and_32) + + ENTRY(atomic_add_8_nv) + ALTENTRY(atomic_add_char_nv) + movl 4(%esp), %edx + movb (%edx), %al +1: + movl 8(%esp), %ecx + addb %al, %cl + lock + cmpxchgb %cl, (%edx) + jne 1b + movzbl %cl, %eax + ret + SET_SIZE(atomic_add_char_nv) + SET_SIZE(atomic_add_8_nv) + + ENTRY(atomic_add_16_nv) + ALTENTRY(atomic_add_short_nv) + movl 4(%esp), %edx + movw (%edx), %ax +1: + movl 8(%esp), %ecx + addw %ax, %cx + lock + cmpxchgw %cx, (%edx) + jne 1b + movzwl %cx, %eax + ret + SET_SIZE(atomic_add_short_nv) + SET_SIZE(atomic_add_16_nv) + + ENTRY(atomic_add_32_nv) + ALTENTRY(atomic_add_int_nv) + ALTENTRY(atomic_add_ptr_nv) + ALTENTRY(atomic_add_long_nv) + movl 4(%esp), %edx + movl (%edx), %eax +1: + movl 8(%esp), %ecx + addl %eax, %ecx + lock + cmpxchgl %ecx, (%edx) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_add_long_nv) + SET_SIZE(atomic_add_ptr_nv) + SET_SIZE(atomic_add_int_nv) + SET_SIZE(atomic_add_32_nv) + + /* + * NOTE: If atomic_add_64 and atomic_add_64_nv are ever + * separated, it is important to edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_add_64_nv. + */ + ENTRY(atomic_add_64) + ALTENTRY(atomic_add_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi + movl (%edi), %eax + movl 4(%edi), %edx +1: + movl 16(%esp), %ebx + movl 20(%esp), %ecx + addl %eax, %ebx + adcl %edx, %ecx + lock + cmpxchg8b (%edi) + jne 1b + movl %ebx, %eax + movl %ecx, %edx + popl %ebx + popl %edi + ret + SET_SIZE(atomic_add_64_nv) + SET_SIZE(atomic_add_64) + + ENTRY(atomic_or_8_nv) + ALTENTRY(atomic_or_uchar_nv) + movl 4(%esp), %edx + movb (%edx), %al +1: + movl 8(%esp), %ecx + orb %al, %cl + lock + cmpxchgb %cl, (%edx) + jne 1b + movzbl %cl, %eax + ret + SET_SIZE(atomic_or_uchar_nv) + SET_SIZE(atomic_or_8_nv) + + ENTRY(atomic_or_16_nv) + ALTENTRY(atomic_or_ushort_nv) + movl 4(%esp), %edx + movw (%edx), %ax +1: + movl 8(%esp), %ecx + orw %ax, %cx + lock + cmpxchgw %cx, (%edx) + jne 1b + movzwl %cx, %eax + ret + SET_SIZE(atomic_or_ushort_nv) + SET_SIZE(atomic_or_16_nv) + + ENTRY(atomic_or_32_nv) + ALTENTRY(atomic_or_uint_nv) + ALTENTRY(atomic_or_ulong_nv) + movl 4(%esp), %edx + movl (%edx), %eax +1: + movl 8(%esp), %ecx + orl %eax, %ecx + lock + cmpxchgl %ecx, (%edx) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_or_ulong_nv) + SET_SIZE(atomic_or_uint_nv) + SET_SIZE(atomic_or_32_nv) + + /* + * NOTE: If atomic_or_64 and atomic_or_64_nv are ever + * separated, it is important to edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_or_64_nv. + */ + ENTRY(atomic_or_64) + ALTENTRY(atomic_or_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi + movl (%edi), %eax + movl 4(%edi), %edx +1: + movl 16(%esp), %ebx + movl 20(%esp), %ecx + orl %eax, %ebx + orl %edx, %ecx + lock + cmpxchg8b (%edi) + jne 1b + movl %ebx, %eax + movl %ecx, %edx + popl %ebx + popl %edi + ret + SET_SIZE(atomic_or_64_nv) + SET_SIZE(atomic_or_64) + + ENTRY(atomic_and_8_nv) + ALTENTRY(atomic_and_uchar_nv) + movl 4(%esp), %edx + movb (%edx), %al +1: + movl 8(%esp), %ecx + andb %al, %cl + lock + cmpxchgb %cl, (%edx) + jne 1b + movzbl %cl, %eax + ret + SET_SIZE(atomic_and_uchar_nv) + SET_SIZE(atomic_and_8_nv) + + ENTRY(atomic_and_16_nv) + ALTENTRY(atomic_and_ushort_nv) + movl 4(%esp), %edx + movw (%edx), %ax +1: + movl 8(%esp), %ecx + andw %ax, %cx + lock + cmpxchgw %cx, (%edx) + jne 1b + movzwl %cx, %eax + ret + SET_SIZE(atomic_and_ushort_nv) + SET_SIZE(atomic_and_16_nv) + + ENTRY(atomic_and_32_nv) + ALTENTRY(atomic_and_uint_nv) + ALTENTRY(atomic_and_ulong_nv) + movl 4(%esp), %edx + movl (%edx), %eax +1: + movl 8(%esp), %ecx + andl %eax, %ecx + lock + cmpxchgl %ecx, (%edx) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_and_ulong_nv) + SET_SIZE(atomic_and_uint_nv) + SET_SIZE(atomic_and_32_nv) + + /* + * NOTE: If atomic_and_64 and atomic_and_64_nv are ever + * separated, it is important to edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_and_64_nv. + */ + ENTRY(atomic_and_64) + ALTENTRY(atomic_and_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi + movl (%edi), %eax + movl 4(%edi), %edx +1: + movl 16(%esp), %ebx + movl 20(%esp), %ecx + andl %eax, %ebx + andl %edx, %ecx + lock + cmpxchg8b (%edi) + jne 1b + movl %ebx, %eax + movl %ecx, %edx + popl %ebx + popl %edi + ret + SET_SIZE(atomic_and_64_nv) + SET_SIZE(atomic_and_64) + + ENTRY(atomic_cas_8) + ALTENTRY(atomic_cas_uchar) + movl 4(%esp), %edx + movzbl 8(%esp), %eax + movb 12(%esp), %cl + lock + cmpxchgb %cl, (%edx) + ret + SET_SIZE(atomic_cas_uchar) + SET_SIZE(atomic_cas_8) + + ENTRY(atomic_cas_16) + ALTENTRY(atomic_cas_ushort) + movl 4(%esp), %edx + movzwl 8(%esp), %eax + movw 12(%esp), %cx + lock + cmpxchgw %cx, (%edx) + ret + SET_SIZE(atomic_cas_ushort) + SET_SIZE(atomic_cas_16) + + ENTRY(atomic_cas_32) + ALTENTRY(atomic_cas_uint) + ALTENTRY(atomic_cas_ulong) + ALTENTRY(atomic_cas_ptr) + movl 4(%esp), %edx + movl 8(%esp), %eax + movl 12(%esp), %ecx + lock + cmpxchgl %ecx, (%edx) + ret + SET_SIZE(atomic_cas_ptr) + SET_SIZE(atomic_cas_ulong) + SET_SIZE(atomic_cas_uint) + SET_SIZE(atomic_cas_32) + + ENTRY(atomic_cas_64) + pushl %ebx + pushl %esi + movl 12(%esp), %esi + movl 16(%esp), %eax + movl 20(%esp), %edx + movl 24(%esp), %ebx + movl 28(%esp), %ecx + lock + cmpxchg8b (%esi) + popl %esi + popl %ebx + ret + SET_SIZE(atomic_cas_64) + + ENTRY(atomic_swap_8) + ALTENTRY(atomic_swap_uchar) + movl 4(%esp), %edx + movzbl 8(%esp), %eax + lock + xchgb %al, (%edx) + ret + SET_SIZE(atomic_swap_uchar) + SET_SIZE(atomic_swap_8) + + ENTRY(atomic_swap_16) + ALTENTRY(atomic_swap_ushort) + movl 4(%esp), %edx + movzwl 8(%esp), %eax + lock + xchgw %ax, (%edx) + ret + SET_SIZE(atomic_swap_ushort) + SET_SIZE(atomic_swap_16) + + ENTRY(atomic_swap_32) + ALTENTRY(atomic_swap_uint) + ALTENTRY(atomic_swap_ptr) + ALTENTRY(atomic_swap_ulong) + movl 4(%esp), %edx + movl 8(%esp), %eax + lock + xchgl %eax, (%edx) + ret + SET_SIZE(atomic_swap_ulong) + SET_SIZE(atomic_swap_ptr) + SET_SIZE(atomic_swap_uint) + SET_SIZE(atomic_swap_32) + + ENTRY(atomic_swap_64) + pushl %esi + pushl %ebx + movl 12(%esp), %esi + movl 16(%esp), %ebx + movl 20(%esp), %ecx + movl (%esi), %eax + movl 4(%esi), %edx +1: + lock + cmpxchg8b (%esi) + jne 1b + popl %ebx + popl %esi + ret + SET_SIZE(atomic_swap_64) + + ENTRY(atomic_set_long_excl) + movl 4(%esp), %edx + movl 8(%esp), %ecx + xorl %eax, %eax + lock + btsl %ecx, (%edx) + jnc 1f + decl %eax +1: + ret + SET_SIZE(atomic_set_long_excl) + + ENTRY(atomic_clear_long_excl) + movl 4(%esp), %edx + movl 8(%esp), %ecx + xorl %eax, %eax + lock + btrl %ecx, (%edx) + jc 1f + decl %eax +1: + ret + SET_SIZE(atomic_clear_long_excl) + + /* + * NOTE: membar_enter, membar_exit, membar_producer, and + * membar_consumer are all identical routines. We define them + * separately, instead of using ALTENTRY definitions to alias them + * together, so that DTrace and debuggers will see a unique address + * for them, allowing more accurate tracing. + */ + + + ENTRY(membar_enter) + lock + xorl $0, (%esp) + ret + SET_SIZE(membar_enter) + + ENTRY(membar_exit) + lock + xorl $0, (%esp) + ret + SET_SIZE(membar_exit) + + ENTRY(membar_producer) + lock + xorl $0, (%esp) + ret + SET_SIZE(membar_producer) + + ENTRY(membar_consumer) + lock + xorl $0, (%esp) + ret + SET_SIZE(membar_consumer) + +#ifdef __ELF__ +.section .note.GNU-stack,"",%progbits +#endif diff --git a/lib/libspl/asm-x86_64/Makefile.am b/lib/libspl/asm-x86_64/Makefile.am new file mode 100644 index 000000000..02403eceb --- /dev/null +++ b/lib/libspl/asm-x86_64/Makefile.am @@ -0,0 +1 @@ +noinst_HEADERS = *.S diff --git a/lib/libspl/asm-x86_64/atomic.S b/lib/libspl/asm-x86_64/atomic.S new file mode 100644 index 000000000..e321bf732 --- /dev/null +++ b/lib/libspl/asm-x86_64/atomic.S @@ -0,0 +1,595 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "%Z%%M% %I% %E% SMI" + + .file "%M%" + +#define _ASM +#include <ia32/sys/asm_linkage.h> + + ENTRY(atomic_inc_8) + ALTENTRY(atomic_inc_uchar) + lock + incb (%rdi) + ret + SET_SIZE(atomic_inc_uchar) + SET_SIZE(atomic_inc_8) + + ENTRY(atomic_inc_16) + ALTENTRY(atomic_inc_ushort) + lock + incw (%rdi) + ret + SET_SIZE(atomic_inc_ushort) + SET_SIZE(atomic_inc_16) + + ENTRY(atomic_inc_32) + ALTENTRY(atomic_inc_uint) + lock + incl (%rdi) + ret + SET_SIZE(atomic_inc_uint) + SET_SIZE(atomic_inc_32) + + ENTRY(atomic_inc_64) + ALTENTRY(atomic_inc_ulong) + lock + incq (%rdi) + ret + SET_SIZE(atomic_inc_ulong) + SET_SIZE(atomic_inc_64) + + ENTRY(atomic_inc_8_nv) + ALTENTRY(atomic_inc_uchar_nv) + movb (%rdi), %al +1: + leaq 1(%rax), %rcx + lock + cmpxchgb %cl, (%rdi) + jne 1b + movzbl %cl, %eax + ret + SET_SIZE(atomic_inc_uchar_nv) + SET_SIZE(atomic_inc_8_nv) + + ENTRY(atomic_inc_16_nv) + ALTENTRY(atomic_inc_ushort_nv) + movw (%rdi), %ax +1: + leaq 1(%rax), %rcx + lock + cmpxchgw %cx, (%rdi) + jne 1b + movzwl %cx, %eax + ret + SET_SIZE(atomic_inc_ushort_nv) + SET_SIZE(atomic_inc_16_nv) + + ENTRY(atomic_inc_32_nv) + ALTENTRY(atomic_inc_uint_nv) + movl (%rdi), %eax +1: + leaq 1(%rax), %rcx + lock + cmpxchgl %ecx, (%rdi) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_inc_uint_nv) + SET_SIZE(atomic_inc_32_nv) + + ENTRY(atomic_inc_64_nv) + ALTENTRY(atomic_inc_ulong_nv) + movq (%rdi), %rax +1: + leaq 1(%rax), %rcx + lock + cmpxchgq %rcx, (%rdi) + jne 1b + movq %rcx, %rax + ret + SET_SIZE(atomic_inc_ulong_nv) + SET_SIZE(atomic_inc_64_nv) + + ENTRY(atomic_dec_8) + ALTENTRY(atomic_dec_uchar) + lock + decb (%rdi) + ret + SET_SIZE(atomic_dec_uchar) + SET_SIZE(atomic_dec_8) + + ENTRY(atomic_dec_16) + ALTENTRY(atomic_dec_ushort) + lock + decw (%rdi) + ret + SET_SIZE(atomic_dec_ushort) + SET_SIZE(atomic_dec_16) + + ENTRY(atomic_dec_32) + ALTENTRY(atomic_dec_uint) + lock + decl (%rdi) + ret + SET_SIZE(atomic_dec_uint) + SET_SIZE(atomic_dec_32) + + ENTRY(atomic_dec_64) + ALTENTRY(atomic_dec_ulong) + lock + decq (%rdi) + ret + SET_SIZE(atomic_dec_ulong) + SET_SIZE(atomic_dec_64) + + ENTRY(atomic_dec_8_nv) + ALTENTRY(atomic_dec_uchar_nv) + movb (%rdi), %al +1: + leaq -1(%rax), %rcx + lock + cmpxchgb %cl, (%rdi) + jne 1b + movzbl %cl, %eax + ret + SET_SIZE(atomic_dec_uchar_nv) + SET_SIZE(atomic_dec_8_nv) + + ENTRY(atomic_dec_16_nv) + ALTENTRY(atomic_dec_ushort_nv) + movw (%rdi), %ax +1: + leaq -1(%rax), %rcx + lock + cmpxchgw %cx, (%rdi) + jne 1b + movzwl %cx, %eax + ret + SET_SIZE(atomic_dec_ushort_nv) + SET_SIZE(atomic_dec_16_nv) + + ENTRY(atomic_dec_32_nv) + ALTENTRY(atomic_dec_uint_nv) + movl (%rdi), %eax +1: + leaq -1(%rax), %rcx + lock + cmpxchgl %ecx, (%rdi) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_dec_uint_nv) + SET_SIZE(atomic_dec_32_nv) + + ENTRY(atomic_dec_64_nv) + ALTENTRY(atomic_dec_ulong_nv) + movq (%rdi), %rax +1: + leaq -1(%rax), %rcx + lock + cmpxchgq %rcx, (%rdi) + jne 1b + movq %rcx, %rax + ret + SET_SIZE(atomic_dec_ulong_nv) + SET_SIZE(atomic_dec_64_nv) + + ENTRY(atomic_add_8) + ALTENTRY(atomic_add_char) + lock + addb %sil, (%rdi) + ret + SET_SIZE(atomic_add_char) + SET_SIZE(atomic_add_8) + + ENTRY(atomic_add_16) + ALTENTRY(atomic_add_short) + lock + addw %si, (%rdi) + ret + SET_SIZE(atomic_add_short) + SET_SIZE(atomic_add_16) + + ENTRY(atomic_add_32) + ALTENTRY(atomic_add_int) + lock + addl %esi, (%rdi) + ret + SET_SIZE(atomic_add_int) + SET_SIZE(atomic_add_32) + + ENTRY(atomic_add_64) + ALTENTRY(atomic_add_ptr) + ALTENTRY(atomic_add_long) + lock + addq %rsi, (%rdi) + ret + SET_SIZE(atomic_add_long) + SET_SIZE(atomic_add_ptr) + SET_SIZE(atomic_add_64) + + ENTRY(atomic_or_8) + ALTENTRY(atomic_or_uchar) + lock + orb %sil, (%rdi) + ret + SET_SIZE(atomic_or_uchar) + SET_SIZE(atomic_or_8) + + ENTRY(atomic_or_16) + ALTENTRY(atomic_or_ushort) + lock + orw %si, (%rdi) + ret + SET_SIZE(atomic_or_ushort) + SET_SIZE(atomic_or_16) + + ENTRY(atomic_or_32) + ALTENTRY(atomic_or_uint) + lock + orl %esi, (%rdi) + ret + SET_SIZE(atomic_or_uint) + SET_SIZE(atomic_or_32) + + ENTRY(atomic_or_64) + ALTENTRY(atomic_or_ulong) + lock + orq %rsi, (%rdi) + ret + SET_SIZE(atomic_or_ulong) + SET_SIZE(atomic_or_64) + + ENTRY(atomic_and_8) + ALTENTRY(atomic_and_uchar) + lock + andb %sil, (%rdi) + ret + SET_SIZE(atomic_and_uchar) + SET_SIZE(atomic_and_8) + + ENTRY(atomic_and_16) + ALTENTRY(atomic_and_ushort) + lock + andw %si, (%rdi) + ret + SET_SIZE(atomic_and_ushort) + SET_SIZE(atomic_and_16) + + ENTRY(atomic_and_32) + ALTENTRY(atomic_and_uint) + lock + andl %esi, (%rdi) + ret + SET_SIZE(atomic_and_uint) + SET_SIZE(atomic_and_32) + + ENTRY(atomic_and_64) + ALTENTRY(atomic_and_ulong) + lock + andq %rsi, (%rdi) + ret + SET_SIZE(atomic_and_ulong) + SET_SIZE(atomic_and_64) + + ENTRY(atomic_add_8_nv) + ALTENTRY(atomic_add_char_nv) + movb (%rdi), %al +1: + movb %sil, %cl + addb %al, %cl + lock + cmpxchgb %cl, (%rdi) + jne 1b + movzbl %cl, %eax + ret + SET_SIZE(atomic_add_char_nv) + SET_SIZE(atomic_add_8_nv) + + ENTRY(atomic_add_16_nv) + ALTENTRY(atomic_add_short_nv) + movw (%rdi), %ax +1: + movw %si, %cx + addw %ax, %cx + lock + cmpxchgw %cx, (%rdi) + jne 1b + movzwl %cx, %eax + ret + SET_SIZE(atomic_add_short_nv) + SET_SIZE(atomic_add_16_nv) + + ENTRY(atomic_add_32_nv) + ALTENTRY(atomic_add_int_nv) + movl (%rdi), %eax +1: + movl %esi, %ecx + addl %eax, %ecx + lock + cmpxchgl %ecx, (%rdi) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_add_int_nv) + SET_SIZE(atomic_add_32_nv) + + ENTRY(atomic_add_64_nv) + ALTENTRY(atomic_add_ptr_nv) + ALTENTRY(atomic_add_long_nv) + movq (%rdi), %rax +1: + movq %rsi, %rcx + addq %rax, %rcx + lock + cmpxchgq %rcx, (%rdi) + jne 1b + movq %rcx, %rax + ret + SET_SIZE(atomic_add_long_nv) + SET_SIZE(atomic_add_ptr_nv) + SET_SIZE(atomic_add_64_nv) + + ENTRY(atomic_and_8_nv) + ALTENTRY(atomic_and_uchar_nv) + movb (%rdi), %al +1: + movb %sil, %cl + andb %al, %cl + lock + cmpxchgb %cl, (%rdi) + jne 1b + movzbl %cl, %eax + ret + SET_SIZE(atomic_and_uchar_nv) + SET_SIZE(atomic_and_8_nv) + + ENTRY(atomic_and_16_nv) + ALTENTRY(atomic_and_ushort_nv) + movw (%rdi), %ax +1: + movw %si, %cx + andw %ax, %cx + lock + cmpxchgw %cx, (%rdi) + jne 1b + movzwl %cx, %eax + ret + SET_SIZE(atomic_and_ushort_nv) + SET_SIZE(atomic_and_16_nv) + + ENTRY(atomic_and_32_nv) + ALTENTRY(atomic_and_uint_nv) + movl (%rdi), %eax +1: + movl %esi, %ecx + andl %eax, %ecx + lock + cmpxchgl %ecx, (%rdi) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_and_uint_nv) + SET_SIZE(atomic_and_32_nv) + + ENTRY(atomic_and_64_nv) + ALTENTRY(atomic_and_ulong_nv) + movq (%rdi), %rax +1: + movq %rsi, %rcx + andq %rax, %rcx + lock + cmpxchgq %rcx, (%rdi) + jne 1b + movq %rcx, %rax + ret + SET_SIZE(atomic_and_ulong_nv) + SET_SIZE(atomic_and_64_nv) + + ENTRY(atomic_or_8_nv) + ALTENTRY(atomic_or_uchar_nv) + movb (%rdi), %al +1: + movb %sil, %cl + orb %al, %cl + lock + cmpxchgb %cl, (%rdi) + jne 1b + movzbl %cl, %eax + ret + SET_SIZE(atomic_and_uchar_nv) + SET_SIZE(atomic_and_8_nv) + + ENTRY(atomic_or_16_nv) + ALTENTRY(atomic_or_ushort_nv) + movw (%rdi), %ax +1: + movw %si, %cx + orw %ax, %cx + lock + cmpxchgw %cx, (%rdi) + jne 1b + movzwl %cx, %eax + ret + SET_SIZE(atomic_or_ushort_nv) + SET_SIZE(atomic_or_16_nv) + + ENTRY(atomic_or_32_nv) + ALTENTRY(atomic_or_uint_nv) + movl (%rdi), %eax +1: + movl %esi, %ecx + orl %eax, %ecx + lock + cmpxchgl %ecx, (%rdi) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_or_uint_nv) + SET_SIZE(atomic_or_32_nv) + + ENTRY(atomic_or_64_nv) + ALTENTRY(atomic_or_ulong_nv) + movq (%rdi), %rax +1: + movq %rsi, %rcx + orq %rax, %rcx + lock + cmpxchgq %rcx, (%rdi) + jne 1b + movq %rcx, %rax + ret + SET_SIZE(atomic_or_ulong_nv) + SET_SIZE(atomic_or_64_nv) + + ENTRY(atomic_cas_8) + ALTENTRY(atomic_cas_uchar) + movzbl %sil, %eax + lock + cmpxchgb %dl, (%rdi) + ret + SET_SIZE(atomic_cas_uchar) + SET_SIZE(atomic_cas_8) + + ENTRY(atomic_cas_16) + ALTENTRY(atomic_cas_ushort) + movzwl %si, %eax + lock + cmpxchgw %dx, (%rdi) + ret + SET_SIZE(atomic_cas_ushort) + SET_SIZE(atomic_cas_16) + + ENTRY(atomic_cas_32) + ALTENTRY(atomic_cas_uint) + movl %esi, %eax + lock + cmpxchgl %edx, (%rdi) + ret + SET_SIZE(atomic_cas_uint) + SET_SIZE(atomic_cas_32) + + ENTRY(atomic_cas_64) + ALTENTRY(atomic_cas_ulong) + ALTENTRY(atomic_cas_ptr) + movq %rsi, %rax + lock + cmpxchgq %rdx, (%rdi) + ret + SET_SIZE(atomic_cas_ptr) + SET_SIZE(atomic_cas_ulong) + SET_SIZE(atomic_cas_64) + + ENTRY(atomic_swap_8) + ALTENTRY(atomic_swap_uchar) + movzbl %sil, %eax + lock + xchgb %al, (%rdi) + ret + SET_SIZE(atomic_swap_uchar) + SET_SIZE(atomic_swap_8) + + ENTRY(atomic_swap_16) + ALTENTRY(atomic_swap_ushort) + movzwl %si, %eax + lock + xchgw %ax, (%rdi) + ret + SET_SIZE(atomic_swap_ushort) + SET_SIZE(atomic_swap_16) + + ENTRY(atomic_swap_32) + ALTENTRY(atomic_swap_uint) + movl %esi, %eax + lock + xchgl %eax, (%rdi) + ret + SET_SIZE(atomic_swap_uint) + SET_SIZE(atomic_swap_32) + + ENTRY(atomic_swap_64) + ALTENTRY(atomic_swap_ulong) + ALTENTRY(atomic_swap_ptr) + movq %rsi, %rax + lock + xchgq %rax, (%rdi) + ret + SET_SIZE(atomic_swap_ptr) + SET_SIZE(atomic_swap_ulong) + SET_SIZE(atomic_swap_64) + + ENTRY(atomic_set_long_excl) + xorl %eax, %eax + lock + btsq %rsi, (%rdi) + jnc 1f + decl %eax +1: + ret + SET_SIZE(atomic_set_long_excl) + + ENTRY(atomic_clear_long_excl) + xorl %eax, %eax + lock + btrq %rsi, (%rdi) + jc 1f + decl %eax +1: + ret + SET_SIZE(atomic_clear_long_excl) + + /* + * NOTE: membar_enter, and membar_exit are identical routines. + * We define them separately, instead of using an ALTENTRY + * definitions to alias them together, so that DTrace and + * debuggers will see a unique address for them, allowing + * more accurate tracing. + */ + + ENTRY(membar_enter) + mfence + ret + SET_SIZE(membar_enter) + + ENTRY(membar_exit) + mfence + ret + SET_SIZE(membar_exit) + + ENTRY(membar_producer) + sfence + ret + SET_SIZE(membar_producer) + + ENTRY(membar_consumer) + lfence + ret + SET_SIZE(membar_consumer) + +#ifdef __ELF__ +.section .note.GNU-stack,"",%progbits +#endif diff --git a/lib/libspl/getexecname.c b/lib/libspl/getexecname.c new file mode 100644 index 000000000..43bf39ae3 --- /dev/null +++ b/lib/libspl/getexecname.c @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#include <string.h> +#include <unistd.h> +#include <pthread.h> +#include <limits.h> + +const char * +getexecname(void) +{ + static char execname[PATH_MAX + 1] = ""; + static pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER; + char *ptr = NULL; + ssize_t rc; + + pthread_mutex_lock(&mtx); + + if (strlen(execname) == 0) { + rc = readlink("/proc/self/exe", execname, sizeof(execname) - 1); + if (rc == -1) { + execname[0] = '\0'; + } else { + execname[rc] = '\0'; + ptr = execname; + } + } + + pthread_mutex_unlock(&mtx); + return ptr; +} diff --git a/lib/libspl/gethrtime.c b/lib/libspl/gethrtime.c new file mode 100644 index 000000000..c2fd5e034 --- /dev/null +++ b/lib/libspl/gethrtime.c @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <time.h> +#include <sys/time.h> +#include <stdlib.h> +#include <stdio.h> + +hrtime_t +gethrtime(void) +{ + struct timespec ts; + int rc; + + rc = clock_gettime(CLOCK_MONOTONIC, &ts); + if (rc) { + fprintf(stderr, "Error: clock_gettime() = %d\n", rc); + abort(); + } + + return (((u_int64_t)ts.tv_sec) * NANOSEC) + ts.tv_nsec; +} diff --git a/lib/libspl/getmntany.c b/lib/libspl/getmntany.c new file mode 100644 index 000000000..f0b1cda4b --- /dev/null +++ b/lib/libspl/getmntany.c @@ -0,0 +1,99 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Ricardo Correia. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +#include <stdio.h> +#include <string.h> +#include <mntent.h> +#include <sys/mnttab.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#define BUFSIZE (MNT_LINE_MAX + 2) + +__thread char buf[BUFSIZE]; + +#define DIFF(xx) ((mrefp->xx != NULL) && \ + (mgetp->xx == NULL || strcmp(mrefp->xx, mgetp->xx) != 0)) + +int +getmntany(FILE *fp, struct mnttab *mgetp, struct mnttab *mrefp) +{ + int ret; + + while (((ret = _sol_getmntent(fp, mgetp)) == 0) && + (DIFF(mnt_special) || DIFF(mnt_mountp) || + DIFF(mnt_fstype) || DIFF(mnt_mntopts))); + + return ret; +} + +int +_sol_getmntent(FILE *fp, struct mnttab *mgetp) +{ + struct mntent mntbuf; + struct mntent *ret; + + ret = getmntent_r(fp, &mntbuf, buf, BUFSIZE); + + if (ret != NULL) { + mgetp->mnt_special = mntbuf.mnt_fsname; + mgetp->mnt_mountp = mntbuf.mnt_dir; + mgetp->mnt_fstype = mntbuf.mnt_type; + mgetp->mnt_mntopts = mntbuf.mnt_opts; + return 0; + } + + if (feof(fp)) + return -1; + + return MNT_TOOLONG; +} + +int +getextmntent(FILE *fp, struct extmnttab *mp, int len) +{ + int ret; + struct stat64 st; + + ret = _sol_getmntent(fp, (struct mnttab *) mp); + if (ret == 0) { + if (stat64(mp->mnt_mountp, &st) != 0) { + mp->mnt_major = 0; + mp->mnt_minor = 0; + return ret; + } + mp->mnt_major = major(st.st_dev); + mp->mnt_minor = minor(st.st_dev); + } + + return ret; +} diff --git a/lib/libspl/include/Makefile.am b/lib/libspl/include/Makefile.am new file mode 100644 index 000000000..b47fad239 --- /dev/null +++ b/lib/libspl/include/Makefile.am @@ -0,0 +1,8 @@ +nobase_pkginclude_HEADERS = *.h +nobase_pkginclude_HEADERS += ia32/sys/*.h +nobase_pkginclude_HEADERS += rpc/*.h +nobase_pkginclude_HEADERS += sys/*.h +nobase_pkginclude_HEADERS += sys/fm/*.h +nobase_pkginclude_HEADERS += sys/dktp/*.h +nobase_pkginclude_HEADERS += sys/sysevent/*.h +nobase_pkginclude_HEADERS += tsol/*.h diff --git a/lib/libspl/include/assert.h b/lib/libspl/include/assert.h new file mode 100644 index 000000000..7f145b89a --- /dev/null +++ b/lib/libspl/include/assert.h @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include_next <assert.h> + +#ifndef _LIBSPL_ASSERT_H +#define _LIBSPL_ASSERT_H + +#include <stdio.h> +#include <stdlib.h> + +#ifndef __assert_c99 +static inline void +__assert_c99(const char *expr, const char *file, int line, const char *func) +{ + fprintf(stderr, "%s:%i: %s: Assertion `%s` failed.\n", + file, line, func, expr); + abort(); +} +#endif /* __assert_c99 */ + +#ifndef verify +#if defined(__STDC__) +#if __STDC_VERSION__ - 0 >= 199901L +#define verify(EX) (void)((EX) || \ + (__assert_c99(#EX, __FILE__, __LINE__, __func__), 0)) +#else +#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0)) +#endif /* __STDC_VERSION__ - 0 >= 199901L */ +#else +#define verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0)) +#endif /* __STDC__ */ +#endif /* verify */ + +#undef VERIFY +#undef ASSERT + +#define VERIFY verify +#define ASSERT assert + +extern void __assert(const char *, const char *, int); + +/* BEGIN CSTYLED */ +#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \ + const TYPE __left = (TYPE)(LEFT); \ + const TYPE __right = (TYPE)(RIGHT); \ + if (!(__left OP __right)) { \ + char *__buf = alloca(256); \ + (void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \ + #LEFT, #OP, #RIGHT, \ + (u_longlong_t)__left, #OP, (u_longlong_t)__right); \ + __assert(__buf, __FILE__, __LINE__); \ + } \ +} while (0) +/* END CSTYLED */ + +#define VERIFY3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t) +#define VERIFY3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t) +#define VERIFY3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t) + +#ifdef NDEBUG +#define ASSERT3S(x, y, z) ((void)0) +#define ASSERT3U(x, y, z) ((void)0) +#define ASSERT3P(x, y, z) ((void)0) +#define ASSERTV(x) +#else +#define ASSERT3S(x, y, z) VERIFY3S(x, y, z) +#define ASSERT3U(x, y, z) VERIFY3U(x, y, z) +#define ASSERT3P(x, y, z) VERIFY3P(x, y, z) +#define ASSERTV(x) x +#endif /* NDEBUG */ + +#endif /* _LIBSPL_ASSERT_H */ diff --git a/lib/libspl/include/atomic.h b/lib/libspl/include/atomic.h new file mode 100644 index 000000000..508000152 --- /dev/null +++ b/lib/libspl/include/atomic.h @@ -0,0 +1,266 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ATOMIC_H +#define _SYS_ATOMIC_H + +#include <sys/types.h> +#include <sys/inttypes.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__STDC__) +/* + * Increment target. + */ +extern void atomic_inc_8(volatile uint8_t *); +extern void atomic_inc_uchar(volatile uchar_t *); +extern void atomic_inc_16(volatile uint16_t *); +extern void atomic_inc_ushort(volatile ushort_t *); +extern void atomic_inc_32(volatile uint32_t *); +extern void atomic_inc_uint(volatile uint_t *); +extern void atomic_inc_ulong(volatile ulong_t *); +#if defined(_INT64_TYPE) +extern void atomic_inc_64(volatile uint64_t *); +#endif + +/* + * Decrement target + */ +extern void atomic_dec_8(volatile uint8_t *); +extern void atomic_dec_uchar(volatile uchar_t *); +extern void atomic_dec_16(volatile uint16_t *); +extern void atomic_dec_ushort(volatile ushort_t *); +extern void atomic_dec_32(volatile uint32_t *); +extern void atomic_dec_uint(volatile uint_t *); +extern void atomic_dec_ulong(volatile ulong_t *); +#if defined(_INT64_TYPE) +extern void atomic_dec_64(volatile uint64_t *); +#endif + +/* + * Add delta to target + */ +extern void atomic_add_8(volatile uint8_t *, int8_t); +extern void atomic_add_char(volatile uchar_t *, signed char); +extern void atomic_add_16(volatile uint16_t *, int16_t); +extern void atomic_add_short(volatile ushort_t *, short); +extern void atomic_add_32(volatile uint32_t *, int32_t); +extern void atomic_add_int(volatile uint_t *, int); +extern void atomic_add_ptr(volatile void *, ssize_t); +extern void atomic_add_long(volatile ulong_t *, long); +#if defined(_INT64_TYPE) +extern void atomic_add_64(volatile uint64_t *, int64_t); +#endif + +/* + * logical OR bits with target + */ +extern void atomic_or_8(volatile uint8_t *, uint8_t); +extern void atomic_or_uchar(volatile uchar_t *, uchar_t); +extern void atomic_or_16(volatile uint16_t *, uint16_t); +extern void atomic_or_ushort(volatile ushort_t *, ushort_t); +extern void atomic_or_32(volatile uint32_t *, uint32_t); +extern void atomic_or_uint(volatile uint_t *, uint_t); +extern void atomic_or_ulong(volatile ulong_t *, ulong_t); +#if defined(_INT64_TYPE) +extern void atomic_or_64(volatile uint64_t *, uint64_t); +#endif + +/* + * logical AND bits with target + */ +extern void atomic_and_8(volatile uint8_t *, uint8_t); +extern void atomic_and_uchar(volatile uchar_t *, uchar_t); +extern void atomic_and_16(volatile uint16_t *, uint16_t); +extern void atomic_and_ushort(volatile ushort_t *, ushort_t); +extern void atomic_and_32(volatile uint32_t *, uint32_t); +extern void atomic_and_uint(volatile uint_t *, uint_t); +extern void atomic_and_ulong(volatile ulong_t *, ulong_t); +#if defined(_INT64_TYPE) +extern void atomic_and_64(volatile uint64_t *, uint64_t); +#endif + +/* + * As above, but return the new value. Note that these _nv() variants are + * substantially more expensive on some platforms than the no-return-value + * versions above, so don't use them unless you really need to know the + * new value *atomically* (e.g. when decrementing a reference count and + * checking whether it went to zero). + */ + +/* + * Increment target and return new value. + */ +extern uint8_t atomic_inc_8_nv(volatile uint8_t *); +extern uchar_t atomic_inc_uchar_nv(volatile uchar_t *); +extern uint16_t atomic_inc_16_nv(volatile uint16_t *); +extern ushort_t atomic_inc_ushort_nv(volatile ushort_t *); +extern uint32_t atomic_inc_32_nv(volatile uint32_t *); +extern uint_t atomic_inc_uint_nv(volatile uint_t *); +extern ulong_t atomic_inc_ulong_nv(volatile ulong_t *); +#if defined(_INT64_TYPE) +extern uint64_t atomic_inc_64_nv(volatile uint64_t *); +#endif + +/* + * Decrement target and return new value. + */ +extern uint8_t atomic_dec_8_nv(volatile uint8_t *); +extern uchar_t atomic_dec_uchar_nv(volatile uchar_t *); +extern uint16_t atomic_dec_16_nv(volatile uint16_t *); +extern ushort_t atomic_dec_ushort_nv(volatile ushort_t *); +extern uint32_t atomic_dec_32_nv(volatile uint32_t *); +extern uint_t atomic_dec_uint_nv(volatile uint_t *); +extern ulong_t atomic_dec_ulong_nv(volatile ulong_t *); +#if defined(_INT64_TYPE) +extern uint64_t atomic_dec_64_nv(volatile uint64_t *); +#endif + +/* + * Add delta to target + */ +extern uint8_t atomic_add_8_nv(volatile uint8_t *, int8_t); +extern uchar_t atomic_add_char_nv(volatile uchar_t *, signed char); +extern uint16_t atomic_add_16_nv(volatile uint16_t *, int16_t); +extern ushort_t atomic_add_short_nv(volatile ushort_t *, short); +extern uint32_t atomic_add_32_nv(volatile uint32_t *, int32_t); +extern uint_t atomic_add_int_nv(volatile uint_t *, int); +extern void *atomic_add_ptr_nv(volatile void *, ssize_t); +extern ulong_t atomic_add_long_nv(volatile ulong_t *, long); +#if defined(_INT64_TYPE) +extern uint64_t atomic_add_64_nv(volatile uint64_t *, int64_t); +#endif + +/* + * logical OR bits with target and return new value. + */ +extern uint8_t atomic_or_8_nv(volatile uint8_t *, uint8_t); +extern uchar_t atomic_or_uchar_nv(volatile uchar_t *, uchar_t); +extern uint16_t atomic_or_16_nv(volatile uint16_t *, uint16_t); +extern ushort_t atomic_or_ushort_nv(volatile ushort_t *, ushort_t); +extern uint32_t atomic_or_32_nv(volatile uint32_t *, uint32_t); +extern uint_t atomic_or_uint_nv(volatile uint_t *, uint_t); +extern ulong_t atomic_or_ulong_nv(volatile ulong_t *, ulong_t); +#if defined(_INT64_TYPE) +extern uint64_t atomic_or_64_nv(volatile uint64_t *, uint64_t); +#endif + +/* + * logical AND bits with target and return new value. + */ +extern uint8_t atomic_and_8_nv(volatile uint8_t *, uint8_t); +extern uchar_t atomic_and_uchar_nv(volatile uchar_t *, uchar_t); +extern uint16_t atomic_and_16_nv(volatile uint16_t *, uint16_t); +extern ushort_t atomic_and_ushort_nv(volatile ushort_t *, ushort_t); +extern uint32_t atomic_and_32_nv(volatile uint32_t *, uint32_t); +extern uint_t atomic_and_uint_nv(volatile uint_t *, uint_t); +extern ulong_t atomic_and_ulong_nv(volatile ulong_t *, ulong_t); +#if defined(_INT64_TYPE) +extern uint64_t atomic_and_64_nv(volatile uint64_t *, uint64_t); +#endif + +/* + * If *arg1 == arg2, set *arg1 = arg3; return old value + */ +extern uint8_t atomic_cas_8(volatile uint8_t *, uint8_t, uint8_t); +extern uchar_t atomic_cas_uchar(volatile uchar_t *, uchar_t, uchar_t); +extern uint16_t atomic_cas_16(volatile uint16_t *, uint16_t, uint16_t); +extern ushort_t atomic_cas_ushort(volatile ushort_t *, ushort_t, ushort_t); +extern uint32_t atomic_cas_32(volatile uint32_t *, uint32_t, uint32_t); +extern uint_t atomic_cas_uint(volatile uint_t *, uint_t, uint_t); +extern void *atomic_cas_ptr(volatile void *, void *, void *); +extern ulong_t atomic_cas_ulong(volatile ulong_t *, ulong_t, ulong_t); +#if defined(_INT64_TYPE) +extern uint64_t atomic_cas_64(volatile uint64_t *, uint64_t, uint64_t); +#endif + +/* + * Swap target and return old value + */ +extern uint8_t atomic_swap_8(volatile uint8_t *, uint8_t); +extern uchar_t atomic_swap_uchar(volatile uchar_t *, uchar_t); +extern uint16_t atomic_swap_16(volatile uint16_t *, uint16_t); +extern ushort_t atomic_swap_ushort(volatile ushort_t *, ushort_t); +extern uint32_t atomic_swap_32(volatile uint32_t *, uint32_t); +extern uint_t atomic_swap_uint(volatile uint_t *, uint_t); +extern void *atomic_swap_ptr(volatile void *, void *); +extern ulong_t atomic_swap_ulong(volatile ulong_t *, ulong_t); +#if defined(_INT64_TYPE) +extern uint64_t atomic_swap_64(volatile uint64_t *, uint64_t); +#endif + +/* + * Perform an exclusive atomic bit set/clear on a target. + * Returns 0 if bit was sucessfully set/cleared, or -1 + * if the bit was already set/cleared. + */ +extern int atomic_set_long_excl(volatile ulong_t *, uint_t); +extern int atomic_clear_long_excl(volatile ulong_t *, uint_t); + +/* + * Generic memory barrier used during lock entry, placed after the + * memory operation that acquires the lock to guarantee that the lock + * protects its data. No stores from after the memory barrier will + * reach visibility, and no loads from after the barrier will be + * resolved, before the lock acquisition reaches global visibility. + */ +extern void membar_enter(void); + +/* + * Generic memory barrier used during lock exit, placed before the + * memory operation that releases the lock to guarantee that the lock + * protects its data. All loads and stores issued before the barrier + * will be resolved before the subsequent lock update reaches visibility. + */ +extern void membar_exit(void); + +/* + * Arrange that all stores issued before this point in the code reach + * global visibility before any stores that follow; useful in producer + * modules that update a data item, then set a flag that it is available. + * The memory barrier guarantees that the available flag is not visible + * earlier than the updated data, i.e. it imposes store ordering. + */ +extern void membar_producer(void); + +/* + * Arrange that all loads issued before this point in the code are + * completed before any subsequent loads; useful in consumer modules + * that check to see if data is available and read the data. + * The memory barrier guarantees that the data is not sampled until + * after the available flag has been seen, i.e. it imposes load ordering. + */ +extern void membar_consumer(void); +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ATOMIC_H */ diff --git a/lib/libspl/include/devid.h b/lib/libspl/include/devid.h new file mode 100644 index 000000000..9dfdae84b --- /dev/null +++ b/lib/libspl/include/devid.h @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_DEVID_H +#define _LIBSPL_DEVID_H + +#include <stdlib.h> + +typedef int ddi_devid_t; + +typedef struct devid_nmlist { + char *devname; + dev_t dev; +} devid_nmlist_t; + +static inline int devid_str_decode(char *devidstr, ddi_devid_t *retdevid, char **retminor_name) { abort(); } +static inline int devid_deviceid_to_nmlist(char *search_path, ddi_devid_t devid, char *minor_name, devid_nmlist_t **retlist) { abort(); } +static inline void devid_str_free(char *str) { abort(); } +static inline void devid_free(ddi_devid_t devid) { abort(); } +static inline void devid_free_nmlist(devid_nmlist_t *list) { abort(); } +static inline int devid_get(int fd, ddi_devid_t *retdevid) { return -1; } +static inline int devid_get_minor_name(int fd, char **retminor_name) { abort(); } +static inline char *devid_str_encode(ddi_devid_t devid, char *minor_name) { abort(); } + +#endif diff --git a/lib/libspl/include/ia32/sys/asm_linkage.h b/lib/libspl/include/ia32/sys/asm_linkage.h new file mode 100644 index 000000000..61c4d1a26 --- /dev/null +++ b/lib/libspl/include/ia32/sys/asm_linkage.h @@ -0,0 +1,302 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _IA32_SYS_ASM_LINKAGE_H +#define _IA32_SYS_ASM_LINKAGE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _ASM /* The remainder of this file is only for assembly files */ + +/* + * make annoying differences in assembler syntax go away + */ + +/* + * D16 and A16 are used to insert instructions prefixes; the + * macros help the assembler code be slightly more portable. + */ +#if !defined(__GNUC_AS__) +/* + * /usr/ccs/bin/as prefixes are parsed as separate instructions + */ +#define D16 data16; +#define A16 addr16; + +/* + * (There are some weird constructs in constant expressions) + */ +#define _CONST(const) [const] +#define _BITNOT(const) -1!_CONST(const) +#define _MUL(a, b) _CONST(a \* b) + +#else +/* + * Why not use the 'data16' and 'addr16' prefixes .. well, the + * assembler doesn't quite believe in real mode, and thus argues with + * us about what we're trying to do. + */ +#define D16 .byte 0x66; +#define A16 .byte 0x67; + +#define _CONST(const) (const) +#define _BITNOT(const) ~_CONST(const) +#define _MUL(a, b) _CONST(a * b) + +#endif + +/* + * C pointers are different sizes between i386 and amd64. + * These constants can be used to compute offsets into pointer arrays. + */ +#if defined(__amd64) +#define CLONGSHIFT 3 +#define CLONGSIZE 8 +#define CLONGMASK 7 +#elif defined(__i386) +#define CLONGSHIFT 2 +#define CLONGSIZE 4 +#define CLONGMASK 3 +#endif + +/* + * Since we know we're either ILP32 or LP64 .. + */ +#define CPTRSHIFT CLONGSHIFT +#define CPTRSIZE CLONGSIZE +#define CPTRMASK CLONGMASK + +#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT) +#error "inconsistent shift constants" +#endif + +#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1) +#error "inconsistent mask constants" +#endif + +#define ASM_ENTRY_ALIGN 16 + +/* + * SSE register alignment and save areas + */ + +#define XMM_SIZE 16 +#define XMM_ALIGN 16 + +#if defined(__amd64) + +#define SAVE_XMM_PROLOG(sreg, nreg) \ + subq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp; \ + movq %rsp, sreg + +#define RSTOR_XMM_EPILOG(sreg, nreg) \ + addq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp + +#elif defined(__i386) + +#define SAVE_XMM_PROLOG(sreg, nreg) \ + subl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp; \ + movl %esp, sreg; \ + addl $XMM_ALIGN, sreg; \ + andl $_BITNOT(XMM_ALIGN-1), sreg + +#define RSTOR_XMM_EPILOG(sreg, nreg) \ + addl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp; + +#endif /* __i386 */ + +/* + * profiling causes definitions of the MCOUNT and RTMCOUNT + * particular to the type + */ +#ifdef GPROF + +#define MCOUNT(x) \ + pushl %ebp; \ + movl %esp, %ebp; \ + call _mcount; \ + popl %ebp + +#endif /* GPROF */ + +#ifdef PROF + +#define MCOUNT(x) \ +/* CSTYLED */ \ + .lcomm .L_/**/x/**/1, 4, 4; \ + pushl %ebp; \ + movl %esp, %ebp; \ +/* CSTYLED */ \ + movl $.L_/**/x/**/1, %edx; \ + call _mcount; \ + popl %ebp + +#endif /* PROF */ + +/* + * if we are not profiling, MCOUNT should be defined to nothing + */ +#if !defined(PROF) && !defined(GPROF) +#define MCOUNT(x) +#endif /* !defined(PROF) && !defined(GPROF) */ + +#define RTMCOUNT(x) MCOUNT(x) + +/* + * Macro to define weak symbol aliases. These are similar to the ANSI-C + * #pragma weak name = _name + * except a compiler can determine type. The assembler must be told. Hence, + * the second parameter must be the type of the symbol (i.e.: function,...) + */ +#define ANSI_PRAGMA_WEAK(sym, stype) \ + .weak sym; \ + .type sym, @stype; \ +/* CSTYLED */ \ +sym = _/**/sym + +/* + * Like ANSI_PRAGMA_WEAK(), but for unrelated names, as in: + * #pragma weak sym1 = sym2 + */ +#define ANSI_PRAGMA_WEAK2(sym1, sym2, stype) \ + .weak sym1; \ + .type sym1, @stype; \ +sym1 = sym2 + +/* + * ENTRY provides the standard procedure entry code and an easy way to + * insert the calls to mcount for profiling. ENTRY_NP is identical, but + * never calls mcount. + */ +#define ENTRY(x) \ + .text; \ + .align ASM_ENTRY_ALIGN; \ + .globl x; \ + .type x, @function; \ +x: MCOUNT(x) + +#define ENTRY_NP(x) \ + .text; \ + .align ASM_ENTRY_ALIGN; \ + .globl x; \ + .type x, @function; \ +x: + +#define RTENTRY(x) \ + .text; \ + .align ASM_ENTRY_ALIGN; \ + .globl x; \ + .type x, @function; \ +x: RTMCOUNT(x) + +/* + * ENTRY2 is identical to ENTRY but provides two labels for the entry point. + */ +#define ENTRY2(x, y) \ + .text; \ + .align ASM_ENTRY_ALIGN; \ + .globl x, y; \ + .type x, @function; \ + .type y, @function; \ +/* CSTYLED */ \ +x: ; \ +y: MCOUNT(x) + +#define ENTRY_NP2(x, y) \ + .text; \ + .align ASM_ENTRY_ALIGN; \ + .globl x, y; \ + .type x, @function; \ + .type y, @function; \ +/* CSTYLED */ \ +x: ; \ +y: + + +/* + * ALTENTRY provides for additional entry points. + */ +#define ALTENTRY(x) \ + .globl x; \ + .type x, @function; \ +x: + +/* + * DGDEF and DGDEF2 provide global data declarations. + * + * DGDEF provides a word aligned word of storage. + * + * DGDEF2 allocates "sz" bytes of storage with **NO** alignment. This + * implies this macro is best used for byte arrays. + * + * DGDEF3 allocates "sz" bytes of storage with "algn" alignment. + */ +#define DGDEF2(name, sz) \ + .data; \ + .globl name; \ + .type name, @object; \ + .size name, sz; \ +name: + +#define DGDEF3(name, sz, algn) \ + .data; \ + .align algn; \ + .globl name; \ + .type name, @object; \ + .size name, sz; \ +name: + +#define DGDEF(name) DGDEF3(name, 4, 4) + +/* + * SET_SIZE trails a function and set the size for the ELF symbol table. + */ +#define SET_SIZE(x) \ + .size x, [.-x] + +/* + * NWORD provides native word value. + */ +#if defined(__amd64) + +/*CSTYLED*/ +#define NWORD quad + +#elif defined(__i386) + +#define NWORD long + +#endif /* __i386 */ + +#endif /* _ASM */ + +#ifdef __cplusplus +} +#endif + +#endif /* _IA32_SYS_ASM_LINKAGE_H */ diff --git a/lib/libspl/include/libdevinfo.h b/lib/libspl/include/libdevinfo.h new file mode 100644 index 000000000..f0f9d7e8e --- /dev/null +++ b/lib/libspl/include/libdevinfo.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_LIBDEVINFO_H +#define _LIBSPL_LIBDEVINFO_H + +#endif /* _LIBSPL_LIBDEVINFO_H */ diff --git a/lib/libspl/include/libshare.h b/lib/libspl/include/libshare.h new file mode 100644 index 000000000..afbdf5b05 --- /dev/null +++ b/lib/libspl/include/libshare.h @@ -0,0 +1,34 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * basic API declarations for share management + */ + +#ifndef _LIBSPL_LIBSHARE_H +#define _LIBSPL_LIBSHARE_H + +#endif /* _LIBSPL_LIBSHARE_H */ diff --git a/lib/libspl/include/limits.h b/lib/libspl/include/limits.h new file mode 100644 index 000000000..341a2eba9 --- /dev/null +++ b/lib/libspl/include/limits.h @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include_next <limits.h> + +#ifndef _LIBSPL_LIMITS_H +#define _LIBSPL_LIMITS_H + +#define DBL_DIG 15 +#define DBL_MAX 1.7976931348623157081452E+308 +#define DBL_MIN 2.2250738585072013830903E-308 + +#define FLT_DIG 6 +#define FLT_MAX 3.4028234663852885981170E+38F +#define FLT_MIN 1.1754943508222875079688E-38F + +#endif /* _LIBSPL_LIMITS_H */ diff --git a/lib/libspl/include/locale.h b/lib/libspl/include/locale.h new file mode 100644 index 000000000..98ca330c3 --- /dev/null +++ b/lib/libspl/include/locale.h @@ -0,0 +1,35 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include_next <locale.h> + +#ifndef _LIBSPL_LOCALE_H +#define _LIBSPL_LOCALE_H + +#include <time.h> +#include <sys/time.h> + +#endif diff --git a/lib/libspl/include/priv.h b/lib/libspl/include/priv.h new file mode 100644 index 000000000..6c9a2c0e6 --- /dev/null +++ b/lib/libspl/include/priv.h @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_PRIV_H +#define _LIBSPL_PRIV_H + +#include <sys/types.h> + +/* Couldn't find this definition in OpenGrok */ +#define PRIV_SYS_CONFIG "sys_config" + +static inline boolean_t priv_ineffect(const char *priv) { return B_TRUE; } + +#endif diff --git a/lib/libspl/include/rpc/xdr.h b/lib/libspl/include/rpc/xdr.h new file mode 100644 index 000000000..cd6680f57 --- /dev/null +++ b/lib/libspl/include/rpc/xdr.h @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T + * All Rights Reserved + * + * Portions of this source code were derived from Berkeley 4.3 BSD + * under license from the Regents of the University of California. + */ + +#ifndef LIBSPL_RPC_XDR_H +#define LIBSPL_RPC_XDR_H + +#include_next <rpc/xdr.h> + +/* + * These are XDR control operators + */ + +#define XDR_GET_BYTES_AVAIL 1 + +typedef struct xdr_bytesrec { + bool_t xc_is_last_record; + size_t xc_num_avail; +} xdr_bytesrec_t; + +/* + * These are the request arguments to XDR_CONTROL. + * + * XDR_PEEK - returns the contents of the next XDR unit on the XDR stream. + * XDR_SKIPBYTES - skips the next N bytes in the XDR stream. + * XDR_RDMAGET - for xdr implementation over RDMA, gets private flags from + * the XDR stream being moved over RDMA + * XDR_RDMANOCHUNK - for xdr implementaion over RDMA, sets private flags in + * the XDR stream moving over RDMA. + */ +#define XDR_PEEK 2 +#define XDR_SKIPBYTES 3 +#define XDR_RDMAGET 4 +#define XDR_RDMASET 5 + +extern bool_t xdr_control(XDR *xdrs, int request, void *info); + +#endif diff --git a/lib/libspl/include/stdio.h b/lib/libspl/include/stdio.h new file mode 100644 index 000000000..f80fdc009 --- /dev/null +++ b/lib/libspl/include/stdio.h @@ -0,0 +1,34 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include_next <stdio.h> + +#ifndef _LIBSPL_STDIO_H +#define _LIBSPL_STDIO_H + +#define enable_extended_FILE_stdio(fd, sig) ((void) 0) + +#endif diff --git a/lib/libspl/include/stdlib.h b/lib/libspl/include/stdlib.h new file mode 100644 index 000000000..67d6e96e0 --- /dev/null +++ b/lib/libspl/include/stdlib.h @@ -0,0 +1,34 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include_next <stdlib.h> + +#ifndef _LIBSPL_STDLIB_H +#define _LIBSPL_STDLIB_H + +extern const char *getexecname(void); + +#endif diff --git a/lib/libspl/include/string.h b/lib/libspl/include/string.h new file mode 100644 index 000000000..213977d0e --- /dev/null +++ b/lib/libspl/include/string.h @@ -0,0 +1,36 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_STRING_H +#define _LIBSPL_STRING_H + +#include_next <string.h> + +extern size_t strlcat(char *dst, const char *src, size_t dstsize); +extern size_t strlcpy(char *dst, const char *src, size_t len); +extern size_t strnlen(const char *str, size_t maxlen); + +#endif diff --git a/lib/libspl/include/strings.h b/lib/libspl/include/strings.h new file mode 100644 index 000000000..48944e142 --- /dev/null +++ b/lib/libspl/include/strings.h @@ -0,0 +1,33 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_STRINGS_H +#define _LIBSPL_STRINGS_H + +#include <string.h> +#include_next <strings.h> + +#endif diff --git a/lib/libspl/include/synch.h b/lib/libspl/include/synch.h new file mode 100644 index 000000000..2da270a42 --- /dev/null +++ b/lib/libspl/include/synch.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYNCH_H +#define _LIBSPL_SYNCH_H + +#endif diff --git a/lib/libspl/include/sys/acl.h b/lib/libspl/include/sys/acl.h new file mode 100644 index 000000000..e6df864f8 --- /dev/null +++ b/lib/libspl/include/sys/acl.h @@ -0,0 +1,287 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ACL_H +#define _SYS_ACL_H + +#include <sys/types.h> +#include <sys/acl_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_ACL_ENTRIES (1024) /* max entries of each type */ +typedef struct acl { + int a_type; /* the type of ACL entry */ + uid_t a_id; /* the entry in -uid or gid */ + o_mode_t a_perm; /* the permission field */ +} aclent_t; + +typedef struct ace { + uid_t a_who; /* uid or gid */ + uint32_t a_access_mask; /* read,write,... */ + uint16_t a_flags; /* see below */ + uint16_t a_type; /* allow or deny */ +} ace_t; + +typedef struct acl_info acl_t; + +/* + * The following are Defined types for an aclent_t. + */ +#define USER_OBJ (0x01) /* object owner */ +#define USER (0x02) /* additional users */ +#define GROUP_OBJ (0x04) /* owning group of the object */ +#define GROUP (0x08) /* additional groups */ +#define CLASS_OBJ (0x10) /* file group class and mask entry */ +#define OTHER_OBJ (0x20) /* other entry for the object */ +#define ACL_DEFAULT (0x1000) /* default flag */ +/* default object owner */ +#define DEF_USER_OBJ (ACL_DEFAULT | USER_OBJ) +/* default additional users */ +#define DEF_USER (ACL_DEFAULT | USER) +/* default owning group */ +#define DEF_GROUP_OBJ (ACL_DEFAULT | GROUP_OBJ) +/* default additional groups */ +#define DEF_GROUP (ACL_DEFAULT | GROUP) +/* default mask entry */ +#define DEF_CLASS_OBJ (ACL_DEFAULT | CLASS_OBJ) +/* default other entry */ +#define DEF_OTHER_OBJ (ACL_DEFAULT | OTHER_OBJ) + +/* + * The following are defined for ace_t. + */ +#define ACE_READ_DATA 0x00000001 +#define ACE_LIST_DIRECTORY 0x00000001 +#define ACE_WRITE_DATA 0x00000002 +#define ACE_ADD_FILE 0x00000002 +#define ACE_APPEND_DATA 0x00000004 +#define ACE_ADD_SUBDIRECTORY 0x00000004 +#define ACE_READ_NAMED_ATTRS 0x00000008 +#define ACE_WRITE_NAMED_ATTRS 0x00000010 +#define ACE_EXECUTE 0x00000020 +#define ACE_DELETE_CHILD 0x00000040 +#define ACE_READ_ATTRIBUTES 0x00000080 +#define ACE_WRITE_ATTRIBUTES 0x00000100 +#define ACE_DELETE 0x00010000 +#define ACE_READ_ACL 0x00020000 +#define ACE_WRITE_ACL 0x00040000 +#define ACE_WRITE_OWNER 0x00080000 +#define ACE_SYNCHRONIZE 0x00100000 + +#define ACE_FILE_INHERIT_ACE 0x0001 +#define ACE_DIRECTORY_INHERIT_ACE 0x0002 +#define ACE_NO_PROPAGATE_INHERIT_ACE 0x0004 +#define ACE_INHERIT_ONLY_ACE 0x0008 +#define ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x0010 +#define ACE_FAILED_ACCESS_ACE_FLAG 0x0020 +#define ACE_IDENTIFIER_GROUP 0x0040 +#define ACE_INHERITED_ACE 0x0080 +#define ACE_OWNER 0x1000 +#define ACE_GROUP 0x2000 +#define ACE_EVERYONE 0x4000 + +#define ACE_ACCESS_ALLOWED_ACE_TYPE 0x0000 +#define ACE_ACCESS_DENIED_ACE_TYPE 0x0001 +#define ACE_SYSTEM_AUDIT_ACE_TYPE 0x0002 +#define ACE_SYSTEM_ALARM_ACE_TYPE 0x0003 + +#define ACL_AUTO_INHERIT 0x0001 +#define ACL_PROTECTED 0x0002 +#define ACL_DEFAULTED 0x0004 +#define ACL_FLAGS_ALL (ACL_AUTO_INHERIT|ACL_PROTECTED| \ + ACL_DEFAULTED) + +#ifdef _KERNEL + +/* + * These are only applicable in a CIFS context. + */ +#define ACE_ACCESS_ALLOWED_COMPOUND_ACE_TYPE 0x04 +#define ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05 +#define ACE_ACCESS_DENIED_OBJECT_ACE_TYPE 0x06 +#define ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07 +#define ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08 +#define ACE_ACCESS_ALLOWED_CALLBACK_ACE_TYPE 0x09 +#define ACE_ACCESS_DENIED_CALLBACK_ACE_TYPE 0x0A +#define ACE_ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE 0x0B +#define ACE_ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE 0x0C +#define ACE_SYSTEM_AUDIT_CALLBACK_ACE_TYPE 0x0D +#define ACE_SYSTEM_ALARM_CALLBACK_ACE_TYPE 0x0E +#define ACE_SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE 0x0F +#define ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE 0x10 + +#define ACE_ALL_TYPES 0x001F + +typedef struct ace_object { + uid_t a_who; /* uid or gid */ + uint32_t a_access_mask; /* read,write,... */ + uint16_t a_flags; /* see below */ + uint16_t a_type; /* allow or deny */ + uint8_t a_obj_type[16]; /* obj type */ + uint8_t a_inherit_obj_type[16]; /* inherit obj */ +} ace_object_t; + +#endif + +#define ACE_ALL_PERMS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \ + ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \ + ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \ + ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL| \ + ACE_WRITE_OWNER|ACE_SYNCHRONIZE) + +/* + * The following flags are supported by both NFSv4 ACLs and ace_t. + */ +#define ACE_NFSV4_SUP_FLAGS (ACE_FILE_INHERIT_ACE | \ + ACE_DIRECTORY_INHERIT_ACE | \ + ACE_NO_PROPAGATE_INHERIT_ACE | \ + ACE_INHERIT_ONLY_ACE | \ + ACE_IDENTIFIER_GROUP) + +#define ACE_TYPE_FLAGS (ACE_OWNER|ACE_GROUP|ACE_EVERYONE| \ + ACE_IDENTIFIER_GROUP) +#define ACE_INHERIT_FLAGS (ACE_FILE_INHERIT_ACE| \ + ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE) + +/* cmd args to acl(2) for aclent_t */ +#define GETACL 1 +#define SETACL 2 +#define GETACLCNT 3 + +/* cmd's to manipulate ace acls. */ +#define ACE_GETACL 4 +#define ACE_SETACL 5 +#define ACE_GETACLCNT 6 + +/* minimal acl entries from GETACLCNT */ +#define MIN_ACL_ENTRIES 4 + +#if !defined(_KERNEL) + +/* acl check errors */ +#define GRP_ERROR 1 +#define USER_ERROR 2 +#define OTHER_ERROR 3 +#define CLASS_ERROR 4 +#define DUPLICATE_ERROR 5 +#define MISS_ERROR 6 +#define MEM_ERROR 7 +#define ENTRY_ERROR 8 + + +/* + * similar to ufs_acl.h: changed to char type for user commands (tar, cpio) + * Attribute types + */ +#define UFSD_FREE ('0') /* Free entry */ +#define UFSD_ACL ('1') /* Access Control Lists */ +#define UFSD_DFACL ('2') /* reserved for future use */ +#define ACE_ACL ('3') /* ace_t style acls */ + +/* + * flag to [f]acl_get() + * controls whether a trivial acl should be returned. + */ +#define ACL_NO_TRIVIAL 0x2 + + +/* + * Flags to control acl_totext() + */ + +#define ACL_APPEND_ID 0x1 /* append uid/gid to user/group entries */ +#define ACL_COMPACT_FMT 0x2 /* build ACL in ls -V format */ +#define ACL_NORESOLVE 0x4 /* don't do name service lookups */ + +/* + * Legacy aclcheck errors for aclent_t ACLs + */ +#define EACL_GRP_ERROR GRP_ERROR +#define EACL_USER_ERROR USER_ERROR +#define EACL_OTHER_ERROR OTHER_ERROR +#define EACL_CLASS_ERROR CLASS_ERROR +#define EACL_DUPLICATE_ERROR DUPLICATE_ERROR +#define EACL_MISS_ERROR MISS_ERROR +#define EACL_MEM_ERROR MEM_ERROR +#define EACL_ENTRY_ERROR ENTRY_ERROR + +#define EACL_INHERIT_ERROR 9 /* invalid inherit flags */ +#define EACL_FLAGS_ERROR 10 /* unknown flag value */ +#define EACL_PERM_MASK_ERROR 11 /* unknown permission */ +#define EACL_COUNT_ERROR 12 /* invalid acl count */ + +#define EACL_INVALID_SLOT 13 /* invalid acl slot */ +#define EACL_NO_ACL_ENTRY 14 /* Entry doesn't exist */ +#define EACL_DIFF_TYPE 15 /* acls aren't same type */ + +#define EACL_INVALID_USER_GROUP 16 /* need user/group name */ +#define EACL_INVALID_STR 17 /* invalid acl string */ +#define EACL_FIELD_NOT_BLANK 18 /* can't have blank field */ +#define EACL_INVALID_ACCESS_TYPE 19 /* invalid access type */ +#define EACL_UNKNOWN_DATA 20 /* Unrecognized data in ACL */ +#define EACL_MISSING_FIELDS 21 /* missing fields in acl */ + +#define EACL_INHERIT_NOTDIR 22 /* Need dir for inheritance */ + +extern int aclcheck(aclent_t *, int, int *); +extern int acltomode(aclent_t *, int, mode_t *); +extern int aclfrommode(aclent_t *, int, mode_t *); +extern int aclsort(int, int, aclent_t *); +extern char *acltotext(aclent_t *, int); +extern aclent_t *aclfromtext(char *, int *); +extern void acl_free(acl_t *); +extern int acl_get(const char *, int, acl_t **); +extern int facl_get(int, int, acl_t **); +extern int acl_set(const char *, acl_t *acl); +extern int facl_set(int, acl_t *acl); +extern int acl_strip(const char *, uid_t, gid_t, mode_t); +extern int acl_trivial(const char *); +extern char *acl_totext(acl_t *, int); +extern int acl_fromtext(const char *, acl_t **); +extern int acl_check(acl_t *, int); + +#else /* !defined(_KERNEL) */ + +extern void ksort(caddr_t, int, int, int (*)(void *, void *)); +extern int cmp2acls(void *, void *); + +#endif /* !defined(_KERNEL) */ + +#if defined(__STDC__) +extern int acl(const char *path, int cmd, int cnt, void *buf); +extern int facl(int fd, int cmd, int cnt, void *buf); +#else /* !__STDC__ */ +extern int acl(); +extern int facl(); +#endif /* defined(__STDC__) */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ACL_H */ diff --git a/lib/libspl/include/sys/acl_impl.h b/lib/libspl/include/sys/acl_impl.h new file mode 100644 index 000000000..717334906 --- /dev/null +++ b/lib/libspl/include/sys/acl_impl.h @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ACL_IMPL_H +#define _SYS_ACL_IMPL_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * acl flags + * + * ACL_AUTO_INHERIT, ACL_PROTECTED and ACL_DEFAULTED + * flags can also be stored in this field. + */ +#define ACL_IS_TRIVIAL 0x10000 +#define ACL_IS_DIR 0x20000 + +typedef enum acl_type { + ACLENT_T = 0, + ACE_T = 1 +} acl_type_t; + +struct acl_info { + acl_type_t acl_type; /* style of acl */ + int acl_cnt; /* number of acl entries */ + int acl_entry_size; /* sizeof acl entry */ + int acl_flags; /* special flags about acl */ + void *acl_aclp; /* the acl */ +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ACL_IMPL_H */ diff --git a/lib/libspl/include/sys/bitmap.h b/lib/libspl/include/sys/bitmap.h new file mode 100644 index 000000000..8fef7fcfe --- /dev/null +++ b/lib/libspl/include/sys/bitmap.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_BITMAP_H +#define _LIBSPL_SYS_BITMAP_H + +#endif diff --git a/lib/libspl/include/sys/byteorder.h b/lib/libspl/include/sys/byteorder.h new file mode 100644 index 000000000..528d2d208 --- /dev/null +++ b/lib/libspl/include/sys/byteorder.h @@ -0,0 +1,199 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * University Copyright- Copyright (c) 1982, 1986, 1988 + * The Regents of the University of California + * All Rights Reserved + * + * University Acknowledgment- Portions of this document are derived from + * software developed by the University of California, Berkeley, and its + * contributors. + */ + +#ifndef _SYS_BYTEORDER_H +#define _SYS_BYTEORDER_H + + + +#include <sys/isa_defs.h> +#include <sys/int_types.h> + +#if defined(__GNUC__) && defined(_ASM_INLINES) && \ + (defined(__i386) || defined(__amd64)) +#include <asm/byteorder.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * macros for conversion between host and (internet) network byte order + */ + +#if defined(_BIG_ENDIAN) && !defined(ntohl) && !defined(__lint) +/* big-endian */ +#define ntohl(x) (x) +#define ntohs(x) (x) +#define htonl(x) (x) +#define htons(x) (x) + +#elif !defined(ntohl) /* little-endian */ + +#ifndef _IN_PORT_T +#define _IN_PORT_T +typedef uint16_t in_port_t; +#endif + +#ifndef _IN_ADDR_T +#define _IN_ADDR_T +typedef uint32_t in_addr_t; +#endif + +#if !defined(_XPG4_2) || defined(__EXTENSIONS__) || defined(_XPG5) +extern uint32_t htonl(uint32_t); +extern uint16_t htons(uint16_t); +extern uint32_t ntohl(uint32_t); +extern uint16_t ntohs(uint16_t); +#else +extern in_addr_t htonl(in_addr_t); +extern in_port_t htons(in_port_t); +extern in_addr_t ntohl(in_addr_t); +extern in_port_t ntohs(in_port_t); +#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) || defined(_XPG5) */ +#endif + +#if !defined(_XPG4_2) || defined(__EXTENSIONS__) + +/* + * Macros to reverse byte order + */ +#define BSWAP_8(x) ((x) & 0xff) +#define BSWAP_16(x) ((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8)) +#define BSWAP_32(x) ((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16)) +#define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32)) + +#define BMASK_8(x) ((x) & 0xff) +#define BMASK_16(x) ((x) & 0xffff) +#define BMASK_32(x) ((x) & 0xffffffff) +#define BMASK_64(x) (x) + +/* + * Macros to convert from a specific byte order to/from native byte order + */ +#ifdef _BIG_ENDIAN +#define BE_8(x) BMASK_8(x) +#define BE_16(x) BMASK_16(x) +#define BE_32(x) BMASK_32(x) +#define BE_64(x) BMASK_64(x) +#define LE_8(x) BSWAP_8(x) +#define LE_16(x) BSWAP_16(x) +#define LE_32(x) BSWAP_32(x) +#define LE_64(x) BSWAP_64(x) +#else +#define LE_8(x) BMASK_8(x) +#define LE_16(x) BMASK_16(x) +#define LE_32(x) BMASK_32(x) +#define LE_64(x) BMASK_64(x) +#define BE_8(x) BSWAP_8(x) +#define BE_16(x) BSWAP_16(x) +#define BE_32(x) BSWAP_32(x) +#define BE_64(x) BSWAP_64(x) +#endif + +/* + * Macros to read unaligned values from a specific byte order to + * native byte order + */ + +#define BE_IN8(xa) \ + *((uint8_t *)(xa)) + +#define BE_IN16(xa) \ + (((uint16_t)BE_IN8(xa) << 8) | BE_IN8((uint8_t *)(xa)+1)) + +#define BE_IN32(xa) \ + (((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2)) + +#define BE_IN64(xa) \ + (((uint64_t)BE_IN32(xa) << 32) | BE_IN32((uint8_t *)(xa)+4)) + +#define LE_IN8(xa) \ + *((uint8_t *)(xa)) + +#define LE_IN16(xa) \ + (((uint16_t)LE_IN8((uint8_t *)(xa) + 1) << 8) | LE_IN8(xa)) + +#define LE_IN32(xa) \ + (((uint32_t)LE_IN16((uint8_t *)(xa) + 2) << 16) | LE_IN16(xa)) + +#define LE_IN64(xa) \ + (((uint64_t)LE_IN32((uint8_t *)(xa) + 4) << 32) | LE_IN32(xa)) + +/* + * Macros to write unaligned values from native byte order to a specific byte + * order. + */ + +#define BE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv); + +#define BE_OUT16(xa, yv) \ + BE_OUT8((uint8_t *)(xa) + 1, yv); \ + BE_OUT8((uint8_t *)(xa), (yv) >> 8); + +#define BE_OUT32(xa, yv) \ + BE_OUT16((uint8_t *)(xa) + 2, yv); \ + BE_OUT16((uint8_t *)(xa), (yv) >> 16); + +#define BE_OUT64(xa, yv) \ + BE_OUT32((uint8_t *)(xa) + 4, yv); \ + BE_OUT32((uint8_t *)(xa), (yv) >> 32); + +#define LE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv); + +#define LE_OUT16(xa, yv) \ + LE_OUT8((uint8_t *)(xa), yv); \ + LE_OUT8((uint8_t *)(xa) + 1, (yv) >> 8); + +#define LE_OUT32(xa, yv) \ + LE_OUT16((uint8_t *)(xa), yv); \ + LE_OUT16((uint8_t *)(xa) + 2, (yv) >> 16); + +#define LE_OUT64(xa, yv) \ + LE_OUT32((uint8_t *)(xa), yv); \ + LE_OUT32((uint8_t *)(xa) + 4, (yv) >> 32); + +#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_BYTEORDER_H */ diff --git a/lib/libspl/include/sys/callb.h b/lib/libspl/include/sys/callb.h new file mode 100644 index 000000000..29a6a6777 --- /dev/null +++ b/lib/libspl/include/sys/callb.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_CALLB_H +#define _SYS_CALLB_H + +#endif diff --git a/lib/libspl/include/sys/cmn_err.h b/lib/libspl/include/sys/cmn_err.h new file mode 100644 index 000000000..d199361d7 --- /dev/null +++ b/lib/libspl/include/sys/cmn_err.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_CMN_ERR_H +#define _LIBSPL_SYS_CMN_ERR_H + +#endif diff --git a/lib/libspl/include/sys/compress.h b/lib/libspl/include/sys/compress.h new file mode 100644 index 000000000..6e03e73a3 --- /dev/null +++ b/lib/libspl/include/sys/compress.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_COMPRESS_H +#define _LIBSPL_SYS_COMPRESS_H + +#endif /* _LIBSPL_SYS_COMPRESS_H */ diff --git a/lib/libspl/include/sys/cred.h b/lib/libspl/include/sys/cred.h new file mode 100644 index 000000000..6a58315d4 --- /dev/null +++ b/lib/libspl/include/sys/cred.h @@ -0,0 +1,32 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_CRED_H +#define _LIBSPL_SYS_CRED_H + +typedef struct cred cred_t; + +#endif diff --git a/lib/libspl/include/sys/debug.h b/lib/libspl/include/sys/debug.h new file mode 100644 index 000000000..006962055 --- /dev/null +++ b/lib/libspl/include/sys/debug.h @@ -0,0 +1,32 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_DEBUG_H +#define _LIBSPL_SYS_DEBUG_H + +#include <assert.h> + +#endif diff --git a/lib/libspl/include/sys/dkio.h b/lib/libspl/include/sys/dkio.h new file mode 100644 index 000000000..32f786565 --- /dev/null +++ b/lib/libspl/include/sys/dkio.h @@ -0,0 +1,484 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DKIO_H +#define _SYS_DKIO_H + + + +#include <sys/dklabel.h> /* Needed for NDKMAP define */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Structures and definitions for disk io control commands + */ + +/* + * Structures used as data by ioctl calls. + */ + +#define DK_DEVLEN 16 /* device name max length, including */ + /* unit # & NULL (ie - "xyc1") */ + +/* + * Used for controller info + */ +struct dk_cinfo { + char dki_cname[DK_DEVLEN]; /* controller name (no unit #) */ + ushort_t dki_ctype; /* controller type */ + ushort_t dki_flags; /* flags */ + ushort_t dki_cnum; /* controller number */ + uint_t dki_addr; /* controller address */ + uint_t dki_space; /* controller bus type */ + uint_t dki_prio; /* interrupt priority */ + uint_t dki_vec; /* interrupt vector */ + char dki_dname[DK_DEVLEN]; /* drive name (no unit #) */ + uint_t dki_unit; /* unit number */ + uint_t dki_slave; /* slave number */ + ushort_t dki_partition; /* partition number */ + ushort_t dki_maxtransfer; /* max. transfer size in DEV_BSIZE */ +}; + +/* + * Controller types + */ +#define DKC_UNKNOWN 0 +#define DKC_CDROM 1 /* CD-ROM, SCSI or otherwise */ +#define DKC_WDC2880 2 +#define DKC_XXX_0 3 /* unassigned */ +#define DKC_XXX_1 4 /* unassigned */ +#define DKC_DSD5215 5 +#define DKC_ACB4000 7 +#define DKC_MD21 8 +#define DKC_XXX_2 9 /* unassigned */ +#define DKC_NCRFLOPPY 10 +#define DKC_SMSFLOPPY 12 +#define DKC_SCSI_CCS 13 /* SCSI CCS compatible */ +#define DKC_INTEL82072 14 /* native floppy chip */ +#define DKC_MD 16 /* meta-disk (virtual-disk) driver */ +#define DKC_INTEL82077 19 /* 82077 floppy disk controller */ +#define DKC_DIRECT 20 /* Intel direct attached device i.e. IDE */ +#define DKC_PCMCIA_MEM 21 /* PCMCIA memory disk-like type */ +#define DKC_PCMCIA_ATA 22 /* PCMCIA AT Attached type */ +#define DKC_VBD 23 /* virtual block device */ + +/* + * Sun reserves up through 1023 + */ + +#define DKC_CUSTOMER_BASE 1024 + +/* + * Flags + */ +#define DKI_BAD144 0x01 /* use DEC std 144 bad sector fwding */ +#define DKI_MAPTRK 0x02 /* controller does track mapping */ +#define DKI_FMTTRK 0x04 /* formats only full track at a time */ +#define DKI_FMTVOL 0x08 /* formats only full volume at a time */ +#define DKI_FMTCYL 0x10 /* formats only full cylinders at a time */ +#define DKI_HEXUNIT 0x20 /* unit number is printed as 3 hex digits */ +#define DKI_PCMCIA_PFD 0x40 /* PCMCIA pseudo-floppy memory card */ + +/* + * Used for all partitions + */ +struct dk_allmap { + struct dk_map dka_map[NDKMAP]; +}; + +#if defined(_SYSCALL32) +struct dk_allmap32 { + struct dk_map32 dka_map[NDKMAP]; +}; +#endif /* _SYSCALL32 */ + +/* + * Definition of a disk's geometry + */ +struct dk_geom { + unsigned short dkg_ncyl; /* # of data cylinders */ + unsigned short dkg_acyl; /* # of alternate cylinders */ + unsigned short dkg_bcyl; /* cyl offset (for fixed head area) */ + unsigned short dkg_nhead; /* # of heads */ + unsigned short dkg_obs1; /* obsolete */ + unsigned short dkg_nsect; /* # of data sectors per track */ + unsigned short dkg_intrlv; /* interleave factor */ + unsigned short dkg_obs2; /* obsolete */ + unsigned short dkg_obs3; /* obsolete */ + unsigned short dkg_apc; /* alternates per cyl (SCSI only) */ + unsigned short dkg_rpm; /* revolutions per minute */ + unsigned short dkg_pcyl; /* # of physical cylinders */ + unsigned short dkg_write_reinstruct; /* # sectors to skip, writes */ + unsigned short dkg_read_reinstruct; /* # sectors to skip, reads */ + unsigned short dkg_extra[7]; /* for compatible expansion */ +}; + +/* + * These defines are for historic compatibility with old drivers. + */ +#define dkg_bhead dkg_obs1 /* used to be head offset */ +#define dkg_gap1 dkg_obs2 /* used to be gap1 */ +#define dkg_gap2 dkg_obs3 /* used to be gap2 */ + +/* + * Disk io control commands + * Warning: some other ioctls with the DIOC prefix exist elsewhere. + * The Generic DKIOC numbers are from 0 - 50. + * The Floppy Driver uses 51 - 100. + * The Hard Disk (except SCSI) 101 - 106. (these are obsolete) + * The CDROM Driver 151 - 200. + * The USCSI ioctl 201 - 250. + */ +#define DKIOC (0x04 << 8) + +/* + * The following ioctls are generic in nature and need to be + * suported as appropriate by all disk drivers + */ +#define DKIOCGGEOM (DKIOC|1) /* Get geometry */ +#define DKIOCINFO (DKIOC|3) /* Get info */ +#define DKIOCEJECT (DKIOC|6) /* Generic 'eject' */ +#define DKIOCGVTOC (DKIOC|11) /* Get VTOC */ +#define DKIOCSVTOC (DKIOC|12) /* Set VTOC & Write to Disk */ + +/* + * Disk Cache Controls. These ioctls should be supported by + * all disk drivers. + * + * DKIOCFLUSHWRITECACHE when used from user-mode ignores the ioctl + * argument, but it should be passed as NULL to allow for future + * reinterpretation. From user-mode, this ioctl request is synchronous. + * + * When invoked from within the kernel, the arg can be NULL to indicate + * a synchronous request or can be the address of a struct dk_callback + * to request an asynchronous callback when the flush request is complete. + * In this case, the flag to the ioctl must include FKIOCTL and the + * dkc_callback field of the pointed to struct must be non-null or the + * request is made synchronously. + * + * In the callback case: if the ioctl returns 0, a callback WILL be performed. + * If the ioctl returns non-zero, a callback will NOT be performed. + * NOTE: In some cases, the callback may be done BEFORE the ioctl call + * returns. The caller's locking strategy should be prepared for this case. + */ +#define DKIOCFLUSHWRITECACHE (DKIOC|34) /* flush cache to phys medium */ + +struct dk_callback { + void (*dkc_callback)(void *dkc_cookie, int error); + void *dkc_cookie; + int dkc_flag; +}; + +/* bit flag definitions for dkc_flag */ +#define FLUSH_VOLATILE 0x1 /* Bit 0: if set, only flush */ + /* volatile cache; otherwise, flush */ + /* volatile and non-volatile cache */ + +#define DKIOCGETWCE (DKIOC|36) /* Get current write cache */ + /* enablement status */ +#define DKIOCSETWCE (DKIOC|37) /* Enable/Disable write cache */ + +/* + * The following ioctls are used by Sun drivers to communicate + * with their associated format routines. Support of these ioctls + * is not required of foreign drivers + */ +#define DKIOCSGEOM (DKIOC|2) /* Set geometry */ +#define DKIOCSAPART (DKIOC|4) /* Set all partitions */ +#define DKIOCGAPART (DKIOC|5) /* Get all partitions */ +#define DKIOCG_PHYGEOM (DKIOC|32) /* get physical geometry */ +#define DKIOCG_VIRTGEOM (DKIOC|33) /* get virtual geometry */ + +/* + * The following ioctl's are removable media support + */ +#define DKIOCLOCK (DKIOC|7) /* Generic 'lock' */ +#define DKIOCUNLOCK (DKIOC|8) /* Generic 'unlock' */ +#define DKIOCSTATE (DKIOC|13) /* Inquire insert/eject state */ +#define DKIOCREMOVABLE (DKIOC|16) /* is media removable */ + + +/* + * ioctl for hotpluggable devices + */ +#define DKIOCHOTPLUGGABLE (DKIOC|35) /* is hotpluggable */ + +/* + * Ioctl to force driver to re-read the alternate partition and rebuild + * the internal defect map. + */ +#define DKIOCADDBAD (DKIOC|20) /* Re-read the alternate map (IDE) */ +#define DKIOCGETDEF (DKIOC|21) /* read defect list (IDE) */ + +/* + * Used by applications to get disk defect information from IDE + * drives. + */ +#ifdef _SYSCALL32 +struct defect_header32 { + int head; + caddr32_t buffer; +}; +#endif /* _SYSCALL32 */ + +struct defect_header { + int head; + caddr_t buffer; +}; + +#define DKIOCPARTINFO (DKIOC|22) /* Get partition or slice parameters */ + +/* + * Used by applications to get partition or slice information + */ +#ifdef _SYSCALL32 +struct part_info32 { + daddr32_t p_start; + int p_length; +}; +#endif /* _SYSCALL32 */ + +struct part_info { + daddr_t p_start; + int p_length; +}; + +/* The following ioctls are for Optical Memory Device */ +#define DKIOC_EBP_ENABLE (DKIOC|40) /* enable by pass erase on write */ +#define DKIOC_EBP_DISABLE (DKIOC|41) /* disable by pass erase on write */ + +/* + * This state enum is the argument passed to the DKIOCSTATE ioctl. + */ +enum dkio_state { DKIO_NONE, DKIO_EJECTED, DKIO_INSERTED, DKIO_DEV_GONE }; + +#define DKIOCGMEDIAINFO (DKIOC|42) /* get information about the media */ + +/* + * ioctls to read/write mboot info. + */ +#define DKIOCGMBOOT (DKIOC|43) /* get mboot info */ +#define DKIOCSMBOOT (DKIOC|44) /* set mboot info */ + +/* + * ioctl to get the device temperature. + */ +#define DKIOCGTEMPERATURE (DKIOC|45) /* get temperature */ + +/* + * Used for providing the temperature. + */ + +struct dk_temperature { + uint_t dkt_flags; /* Flags */ + short dkt_cur_temp; /* Current disk temperature */ + short dkt_ref_temp; /* reference disk temperature */ +}; + +#define DKT_BYPASS_PM 0x1 +#define DKT_INVALID_TEMP 0xFFFF + + +/* + * Used for Media info or the current profile info + */ +struct dk_minfo { + uint_t dki_media_type; /* Media type or profile info */ + uint_t dki_lbsize; /* Logical blocksize of media */ + diskaddr_t dki_capacity; /* Capacity as # of dki_lbsize blks */ +}; + +/* + * Media types or profiles known + */ +#define DK_UNKNOWN 0x00 /* Media inserted - type unknown */ + + +/* + * SFF 8090 Specification Version 3, media types 0x01 - 0xfffe are retained to + * maintain compatibility with SFF8090. The following define the + * optical media type. + */ +#define DK_REMOVABLE_DISK 0x02 /* Removable Disk */ +#define DK_MO_ERASABLE 0x03 /* MO Erasable */ +#define DK_MO_WRITEONCE 0x04 /* MO Write once */ +#define DK_AS_MO 0x05 /* AS MO */ +#define DK_CDROM 0x08 /* CDROM */ +#define DK_CDR 0x09 /* CD-R */ +#define DK_CDRW 0x0A /* CD-RW */ +#define DK_DVDROM 0x10 /* DVD-ROM */ +#define DK_DVDR 0x11 /* DVD-R */ +#define DK_DVDRAM 0x12 /* DVD_RAM or DVD-RW */ + +/* + * Media types for other rewritable magnetic media + */ +#define DK_FIXED_DISK 0x10001 /* Fixed disk SCSI or otherwise */ +#define DK_FLOPPY 0x10002 /* Floppy media */ +#define DK_ZIP 0x10003 /* IOMEGA ZIP media */ +#define DK_JAZ 0x10004 /* IOMEGA JAZ media */ + +#define DKIOCSETEFI (DKIOC|17) /* Set EFI info */ +#define DKIOCGETEFI (DKIOC|18) /* Get EFI info */ + +#define DKIOCPARTITION (DKIOC|9) /* Get partition info */ + +/* + * Ioctls to get/set volume capabilities related to Logical Volume Managers. + * They include the ability to get/set capabilities and to issue a read to a + * specific underlying device of a replicated device. + */ + +#define DKIOCGETVOLCAP (DKIOC | 25) /* Get volume capabilities */ +#define DKIOCSETVOLCAP (DKIOC | 26) /* Set volume capabilities */ +#define DKIOCDMR (DKIOC | 27) /* Issue a directed read */ + +typedef uint_t volcapinfo_t; + +typedef uint_t volcapset_t; + +#define DKV_ABR_CAP 0x00000001 /* Support Appl.Based Recovery */ +#define DKV_DMR_CAP 0x00000002 /* Support Directed Mirror Read */ + +typedef struct volcap { + volcapinfo_t vc_info; /* Capabilities available */ + volcapset_t vc_set; /* Capabilities set */ +} volcap_t; + +#define VOL_SIDENAME 256 + +typedef struct vol_directed_rd { + int vdr_flags; + offset_t vdr_offset; + size_t vdr_nbytes; + size_t vdr_bytesread; + void *vdr_data; + int vdr_side; + char vdr_side_name[VOL_SIDENAME]; +} vol_directed_rd_t; + +#define DKV_SIDE_INIT (-1) +#define DKV_DMR_NEXT_SIDE 0x00000001 +#define DKV_DMR_DONE 0x00000002 +#define DKV_DMR_ERROR 0x00000004 +#define DKV_DMR_SUCCESS 0x00000008 +#define DKV_DMR_SHORT 0x00000010 + +#ifdef _MULTI_DATAMODEL +#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 +#pragma pack(4) +#endif +typedef struct vol_directed_rd32 { + int32_t vdr_flags; + offset_t vdr_offset; /* 64-bit element on 32-bit alignment */ + size32_t vdr_nbytes; + size32_t vdr_bytesread; + caddr32_t vdr_data; + int32_t vdr_side; + char vdr_side_name[VOL_SIDENAME]; +} vol_directed_rd32_t; +#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 +#pragma pack() +#endif +#endif /* _MULTI_DATAMODEL */ + +/* + * The ioctl is used to fetch disk's device type, vendor ID, + * model number/product ID, firmware revision and serial number together. + * + * Currently there are two device types - DKD_ATA_TYPE which means the + * disk is driven by cmdk/ata or dad/uata driver, and DKD_SCSI_TYPE + * which means the disk is driven by sd/scsi hba driver. + */ +#define DKIOC_GETDISKID (DKIOC|46) + +/* These two labels are for dkd_dtype of dk_disk_id_t */ +#define DKD_ATA_TYPE 0x01 /* ATA disk or legacy mode SATA disk */ +#define DKD_SCSI_TYPE 0x02 /* SCSI disk or native mode SATA disk */ + +#define DKD_ATA_MODEL 40 /* model number length */ +#define DKD_ATA_FWVER 8 /* firmware revision length */ +#define DKD_ATA_SERIAL 20 /* serial number length */ + +#define DKD_SCSI_VENDOR 8 /* vendor ID length */ +#define DKD_SCSI_PRODUCT 16 /* product ID length */ +#define DKD_SCSI_REVLEVEL 4 /* revision level length */ +#define DKD_SCSI_SERIAL 12 /* serial number length */ + +/* + * The argument type for DKIOC_GETDISKID ioctl. + */ +typedef struct dk_disk_id { + uint_t dkd_dtype; + union { + struct { + char dkd_amodel[DKD_ATA_MODEL]; /* 40 bytes */ + char dkd_afwver[DKD_ATA_FWVER]; /* 8 bytes */ + char dkd_aserial[DKD_ATA_SERIAL]; /* 20 bytes */ + } ata_disk_id; + struct { + char dkd_svendor[DKD_SCSI_VENDOR]; /* 8 bytes */ + char dkd_sproduct[DKD_SCSI_PRODUCT]; /* 16 bytes */ + char dkd_sfwver[DKD_SCSI_REVLEVEL]; /* 4 bytes */ + char dkd_sserial[DKD_SCSI_SERIAL]; /* 12 bytes */ + } scsi_disk_id; + } disk_id; +} dk_disk_id_t; + +/* + * The ioctl is used to update the firmware of device. + */ +#define DKIOC_UPDATEFW (DKIOC|47) + +/* The argument type for DKIOC_UPDATEFW ioctl */ +typedef struct dk_updatefw { + caddr_t dku_ptrbuf; /* pointer to firmware buf */ + uint_t dku_size; /* firmware buf length */ + uint8_t dku_type; /* firmware update type */ +} dk_updatefw_t; + +#ifdef _SYSCALL32 +typedef struct dk_updatefw_32 { + caddr32_t dku_ptrbuf; /* pointer to firmware buf */ + uint_t dku_size; /* firmware buf length */ + uint8_t dku_type; /* firmware update type */ +} dk_updatefw_32_t; +#endif /* _SYSCALL32 */ + +/* + * firmware update type - temporary or permanent use + */ +#define FW_TYPE_TEMP 0x0 /* temporary use */ +#define FW_TYPE_PERM 0x1 /* permanent use */ + + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DKIO_H */ diff --git a/lib/libspl/include/sys/dklabel.h b/lib/libspl/include/sys/dklabel.h new file mode 100644 index 000000000..77d5da10e --- /dev/null +++ b/lib/libspl/include/sys/dklabel.h @@ -0,0 +1,268 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1990-2002 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DKLABEL_H +#define _SYS_DKLABEL_H + + + +#include <sys/isa_defs.h> +#include <sys/types32.h> +#include <sys/isa_defs.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Miscellaneous defines + */ +#define DKL_MAGIC 0xDABE /* magic number */ +#define FKL_MAGIC 0xff /* magic number for DOS floppies */ + +#if defined(_SUNOS_VTOC_16) +#define NDKMAP 16 /* # of logical partitions */ +#define DK_LABEL_LOC 1 /* location of disk label */ +#elif defined(_SUNOS_VTOC_8) +#define NDKMAP 8 /* # of logical partitions */ +#define DK_LABEL_LOC 0 /* location of disk label */ +#else +#error "No VTOC format defined." +#endif + +#define LEN_DKL_ASCII 128 /* length of dkl_asciilabel */ +#define LEN_DKL_VVOL 8 /* length of v_volume */ +#define DK_LABEL_SIZE 512 /* size of disk label */ +#define DK_MAX_BLOCKS 0x7fffffff /* max # of blocks handled */ + +/* + * Reserve two cylinders on SCSI disks. + * One is for the backup disk label and the other is for the deviceid. + * + * IPI disks only reserve one cylinder, but they will go away soon. + * CDROMs do not reserve any cylinders. + */ +#define DK_ACYL 2 + +/* + * Format of a Sun disk label. + * Resides in cylinder 0, head 0, sector 0. + * + * sizeof (struct dk_label) should be 512 (the current sector size), + * but should the sector size increase, this structure should remain + * at the beginning of the sector. + */ + +/* + * partition headers: section 1 + * Returned in struct dk_allmap by ioctl DKIOC[SG]APART (dkio(7I)) + */ +struct dk_map { + daddr_t dkl_cylno; /* starting cylinder */ + daddr_t dkl_nblk; /* number of blocks; if == 0, */ + /* partition is undefined */ +}; + +/* + * partition headers: section 1 + * Fixed size for on-disk dk_label + */ +struct dk_map32 { + daddr32_t dkl_cylno; /* starting cylinder */ + daddr32_t dkl_nblk; /* number of blocks; if == 0, */ + /* partition is undefined */ +}; + +/* + * partition headers: section 2, + * brought over from AT&T SVr4 vtoc structure. + */ +struct dk_map2 { + uint16_t p_tag; /* ID tag of partition */ + uint16_t p_flag; /* permission flag */ +}; + +struct dkl_partition { + uint16_t p_tag; /* ID tag of partition */ + uint16_t p_flag; /* permision flags */ + daddr32_t p_start; /* start sector no of partition */ + int32_t p_size; /* # of blocks in partition */ +}; + + +/* + * VTOC inclusions from AT&T SVr4 + * Fixed sized types for on-disk VTOC + */ + +struct dk_vtoc { +#if defined(_SUNOS_VTOC_16) + uint32_t v_bootinfo[3]; /* info for mboot (unsupported) */ + uint32_t v_sanity; /* to verify vtoc sanity */ + uint32_t v_version; /* layout version */ + char v_volume[LEN_DKL_VVOL]; /* volume name */ + uint16_t v_sectorsz; /* sector size in bytes */ + uint16_t v_nparts; /* number of partitions */ + uint32_t v_reserved[10]; /* free space */ + struct dkl_partition v_part[NDKMAP]; /* partition headers */ + time32_t timestamp[NDKMAP]; /* partition timestamp (unsupported) */ + char v_asciilabel[LEN_DKL_ASCII]; /* for compatibility */ +#elif defined(_SUNOS_VTOC_8) + uint32_t v_version; /* layout version */ + char v_volume[LEN_DKL_VVOL]; /* volume name */ + uint16_t v_nparts; /* number of partitions */ + struct dk_map2 v_part[NDKMAP]; /* partition hdrs, sec 2 */ + uint32_t v_bootinfo[3]; /* info needed by mboot */ + uint32_t v_sanity; /* to verify vtoc sanity */ + uint32_t v_reserved[10]; /* free space */ + time32_t v_timestamp[NDKMAP]; /* partition timestamp */ +#else +#error "No VTOC format defined." +#endif +}; + +/* + * define the amount of disk label padding needed to make + * the entire structure occupy 512 bytes. + */ +#if defined(_SUNOS_VTOC_16) +#define LEN_DKL_PAD (DK_LABEL_SIZE - \ + ((sizeof (struct dk_vtoc) + \ + (4 * sizeof (uint32_t)) + \ + (12 * sizeof (uint16_t)) + \ + (2 * (sizeof (uint16_t)))))) +#elif defined(_SUNOS_VTOC_8) +#define LEN_DKL_PAD (DK_LABEL_SIZE \ + - ((LEN_DKL_ASCII) + \ + (sizeof (struct dk_vtoc)) + \ + (sizeof (struct dk_map32) * NDKMAP) + \ + (14 * (sizeof (uint16_t))) + \ + (2 * (sizeof (uint16_t))))) +#else +#error "No VTOC format defined." +#endif + + +struct dk_label { +#if defined(_SUNOS_VTOC_16) + struct dk_vtoc dkl_vtoc; /* vtoc inclusions from AT&T SVr4 */ + uint32_t dkl_pcyl; /* # of physical cylinders */ + uint32_t dkl_ncyl; /* # of data cylinders */ + uint16_t dkl_acyl; /* # of alternate cylinders */ + uint16_t dkl_bcyl; /* cyl offset (for fixed head area) */ + uint32_t dkl_nhead; /* # of heads */ + uint32_t dkl_nsect; /* # of data sectors per track */ + uint16_t dkl_intrlv; /* interleave factor */ + uint16_t dkl_skew; /* skew factor */ + uint16_t dkl_apc; /* alternates per cyl (SCSI only) */ + uint16_t dkl_rpm; /* revolutions per minute */ + uint16_t dkl_write_reinstruct; /* # sectors to skip, writes */ + uint16_t dkl_read_reinstruct; /* # sectors to skip, reads */ + uint16_t dkl_extra[4]; /* for compatible expansion */ + char dkl_pad[LEN_DKL_PAD]; /* unused part of 512 bytes */ +#elif defined(_SUNOS_VTOC_8) + char dkl_asciilabel[LEN_DKL_ASCII]; /* for compatibility */ + struct dk_vtoc dkl_vtoc; /* vtoc inclusions from AT&T SVr4 */ + uint16_t dkl_write_reinstruct; /* # sectors to skip, writes */ + uint16_t dkl_read_reinstruct; /* # sectors to skip, reads */ + char dkl_pad[LEN_DKL_PAD]; /* unused part of 512 bytes */ + uint16_t dkl_rpm; /* rotations per minute */ + uint16_t dkl_pcyl; /* # physical cylinders */ + uint16_t dkl_apc; /* alternates per cylinder */ + uint16_t dkl_obs1; /* obsolete */ + uint16_t dkl_obs2; /* obsolete */ + uint16_t dkl_intrlv; /* interleave factor */ + uint16_t dkl_ncyl; /* # of data cylinders */ + uint16_t dkl_acyl; /* # of alternate cylinders */ + uint16_t dkl_nhead; /* # of heads in this partition */ + uint16_t dkl_nsect; /* # of 512 byte sectors per track */ + uint16_t dkl_obs3; /* obsolete */ + uint16_t dkl_obs4; /* obsolete */ + struct dk_map32 dkl_map[NDKMAP]; /* logical partition headers */ +#else +#error "No VTOC format defined." +#endif + uint16_t dkl_magic; /* identifies this label format */ + uint16_t dkl_cksum; /* xor checksum of sector */ +}; + +#if defined(_SUNOS_VTOC_16) +#define dkl_asciilabel dkl_vtoc.v_asciilabel +#define v_timestamp timestamp + +#elif defined(_SUNOS_VTOC_8) + +/* + * These defines are for historic compatibility with old drivers. + */ +#define dkl_gap1 dkl_obs1 /* used to be gap1 */ +#define dkl_gap2 dkl_obs2 /* used to be gap2 */ +#define dkl_bhead dkl_obs3 /* used to be label head offset */ +#define dkl_ppart dkl_obs4 /* used to by physical partition */ +#else +#error "No VTOC format defined." +#endif + +struct fk_label { /* DOS floppy label */ + uchar_t fkl_type; + uchar_t fkl_magich; + uchar_t fkl_magicl; + uchar_t filler; +}; + +/* + * Layout of stored fabricated device id (on-disk) + */ +#define DK_DEVID_BLKSIZE (512) +#define DK_DEVID_SIZE (DK_DEVID_BLKSIZE - ((sizeof (uchar_t) * 7))) +#define DK_DEVID_REV_MSB (0) +#define DK_DEVID_REV_LSB (1) + +struct dk_devid { + uchar_t dkd_rev_hi; /* revision (MSB) */ + uchar_t dkd_rev_lo; /* revision (LSB) */ + uchar_t dkd_flags; /* flags (not used yet) */ + uchar_t dkd_devid[DK_DEVID_SIZE]; /* devid stored here */ + uchar_t dkd_checksum3; /* checksum (MSB) */ + uchar_t dkd_checksum2; + uchar_t dkd_checksum1; + uchar_t dkd_checksum0; /* checksum (LSB) */ +}; + +#define DKD_GETCHKSUM(dkd) ((dkd)->dkd_checksum3 << 24) + \ + ((dkd)->dkd_checksum2 << 16) + \ + ((dkd)->dkd_checksum1 << 8) + \ + ((dkd)->dkd_checksum0) + +#define DKD_FORMCHKSUM(c, dkd) (dkd)->dkd_checksum3 = hibyte(hiword((c))); \ + (dkd)->dkd_checksum2 = lobyte(hiword((c))); \ + (dkd)->dkd_checksum1 = hibyte(loword((c))); \ + (dkd)->dkd_checksum0 = lobyte(loword((c))); +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DKLABEL_H */ diff --git a/lib/libspl/include/sys/dktp/fdisk.h b/lib/libspl/include/sys/dktp/fdisk.h new file mode 100644 index 000000000..e90135f36 --- /dev/null +++ b/lib/libspl/include/sys/dktp/fdisk.h @@ -0,0 +1,173 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* Copyright (c) 1984, 1986, 1987, 1988 AT&T */ +/* All Rights Reserved */ + + +#ifndef _SYS_DKTP_FDISK_H +#define _SYS_DKTP_FDISK_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * fdisk.h + * This file defines the structure of physical disk sector 0 for use on + * AT386 systems. The format of this sector is constrained by the ROM + * BIOS and MS-DOS conventions. + * Note that this block does not define the partitions used by the unix + * driver. The unix partitions are obtained from the VTOC. + */ + +/* + * the MAX values are the maximum usable values for BIOS chs values + * The MAX_CYL value of 1022 is the maximum usable value + * the value of 1023 is a fence value, + * indicating no CHS geometry exists for the corresponding LBA value. + * HEAD range [ 0 .. MAX_HEAD ], so number of heads is (MAX_HEAD + 1) + * SECT range [ 1 .. MAX_SECT ], so number of sectors is (MAX_SECT) + */ +#define MAX_SECT (63) +#define MAX_CYL (1022) +#define MAX_HEAD (254) + +/* + * BOOTSZ was reduced from 446 to 440 bytes to NOT overwrite the Windows + * Vista DISKID. Otherwise Vista won't boot from Solaris GRUB in a dual-boot + * setup. + * The actual size of mboot code is 425 bytes while that of GRUB stage1 is + * 423 bytes. So this changes does not harm them. + */ +#define BOOTSZ 440 /* size of boot code in master boot block */ +#define FD_NUMPART 4 /* number of 'partitions' in fdisk table */ +#define MBB_MAGIC 0xAA55 /* magic number for mboot.signature */ +#define DEFAULT_INTLV 4 /* default interleave for testing tracks */ +#define MINPSIZE 4 /* minimum number of cylinders in a partition */ +#define TSTPAT 0xE5 /* test pattern for verifying disk */ + +/* + * structure to hold the fdisk partition table + */ +struct ipart { + unsigned char bootid; /* bootable or not */ + unsigned char beghead; /* beginning head, sector, cylinder */ + unsigned char begsect; /* begcyl is a 10-bit number. High 2 bits */ + unsigned char begcyl; /* are in begsect. */ + unsigned char systid; /* OS type */ + unsigned char endhead; /* ending head, sector, cylinder */ + unsigned char endsect; /* endcyl is a 10-bit number. High 2 bits */ + unsigned char endcyl; /* are in endsect. */ + uint32_t relsect; /* first sector relative to start of disk */ + uint32_t numsect; /* number of sectors in partition */ +}; +/* + * Values for bootid. + */ +#define NOTACTIVE 0 +#define ACTIVE 128 +/* + * Values for systid. + */ +#define UNUSED 0 /* Empty Partition */ +#define DOSOS12 1 /* DOS partition, 12-bit FAT */ +#define PCIXOS 2 /* PC/IX partition */ +#define DOSOS16 4 /* DOS partition, 16-bit FAT */ +#define EXTDOS 5 /* EXT-DOS partition */ +#define DOSHUGE 6 /* Huge DOS partition > 32MB */ +#define FDISK_IFS 7 /* Installable File System (IFS): HPFS & NTFS */ +#define FDISK_AIXBOOT 8 /* AIX Boot */ +#define FDISK_AIXDATA 9 /* AIX Data */ +#define FDISK_OS2BOOT 10 /* OS/2 Boot Manager */ +#define FDISK_WINDOWS 11 /* Windows 95 FAT32 (up to 2047GB) */ +#define FDISK_EXT_WIN 12 /* Windows 95 FAT32 (extended-INT13) */ +#define FDISK_FAT95 14 /* DOS 16-bit FAT, LBA-mapped */ +#define FDISK_EXTLBA 15 /* Extended partition, LBA-mapped */ +#define DIAGPART 18 /* Diagnostic boot partition (OS independent) */ +#define FDISK_LINUX 65 /* Linux */ +#define FDISK_LINUXDSWAP 66 /* Linux swap (sharing disk w/ DRDOS) */ +#define FDISK_LINUXDNAT 67 /* Linux native (sharing disk with DRDOS) */ +#define FDISK_CPM 82 /* CP/M */ +#define DOSDATA 86 /* DOS data partition */ +#define OTHEROS 98 /* part. type for appl. (DB?) needs */ + /* raw partition. ID was 0 but conflicted */ + /* with DOS 3.3 fdisk */ +#define UNIXOS 99 /* UNIX V.x partition */ +#define FDISK_NOVELL2 100 /* Novell Netware 286 */ +#define FDISK_NOVELL3 101 /* Novell Netware 3.x and later */ +#define FDISK_QNX4 119 /* QNX 4.x */ +#define FDISK_QNX42 120 /* QNX 4.x 2nd part */ +#define FDISK_QNX43 121 /* QNX 4.x 3rd part */ +#define SUNIXOS 130 /* Solaris UNIX partition */ +#define FDISK_LINUXNAT 131 /* Linux native */ +#define FDISK_NTFSVOL1 134 /* NTFS volume set 1 */ +#define FDISK_NTFSVOL2 135 /* NTFS volume set 2 */ +#define FDISK_BSD 165 /* BSD/386, 386BSD, NetBSD, FreeBSD, OpenBSD */ +#define FDISK_NEXTSTEP 167 /* NeXTSTEP */ +#define FDISK_BSDIFS 183 /* BSDI file system */ +#define FDISK_BSDISWAP 184 /* BSDI swap */ +#define X86BOOT 190 /* x86 Solaris boot partition */ +#define SUNIXOS2 191 /* Solaris UNIX partition */ +#define EFI_PMBR 238 /* EFI PMBR */ +#define EFI_FS 239 /* EFI File System (System Partition) */ +#define MAXDOS 65535L /* max size (sectors) for DOS partition */ + +/* + * structure to hold master boot block in physical sector 0 of the disk. + * Note that partitions stuff can't be directly included in the structure + * because of lameo '386 compiler alignment design. + * Alignment issues also force us to have 2 16bit entities for a single + * 32bit win_volserno. It is not used anywhere anyway. + */ + +struct mboot { /* master boot block */ + char bootinst[BOOTSZ]; + uint16_t win_volserno_lo; + uint16_t win_volserno_hi; + uint16_t reserved; + char parts[FD_NUMPART * sizeof (struct ipart)]; + ushort_t signature; +}; + +#if defined(__i386) || defined(__amd64) + +/* Byte offset of the start of the partition table within the sector */ +#define FDISK_PART_TABLE_START 446 + +/* Maximum number of valid partitions assumed as 32 */ +#define MAX_EXT_PARTS 32 + +#else + +#define MAX_EXT_PARTS 0 + +#endif /* if defined(__i386) || defined(__amd64) */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DKTP_FDISK_H */ diff --git a/lib/libspl/include/sys/feature_tests.h b/lib/libspl/include/sys/feature_tests.h new file mode 100644 index 000000000..96f627172 --- /dev/null +++ b/lib/libspl/include/sys/feature_tests.h @@ -0,0 +1,32 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FEATURE_TESTS_H +#define _SYS_FEATURE_TESTS_H + +#define __NORETURN __attribute__((__noreturn__)) + +#endif diff --git a/lib/libspl/include/sys/file.h b/lib/libspl/include/sys/file.h new file mode 100644 index 000000000..9aaba35be --- /dev/null +++ b/lib/libspl/include/sys/file.h @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_FILE_H +#define _LIBSPL_SYS_FILE_H + +#include_next <sys/file.h> + +#include <sys/user.h> + +#define FREAD 1 +#define FWRITE 2 +//#define FAPPEND 8 + +#define FCREAT O_CREAT +#define FTRUNC O_TRUNC +#define FOFFMAX O_LARGEFILE +#define FSYNC O_SYNC +#define FDSYNC O_DSYNC +#define FRSYNC O_RSYNC +#define FEXCL O_EXCL + +#define FNODSYNC 0x10000 /* fsync pseudo flag */ +#define FNOFOLLOW 0x20000 /* don't follow symlinks */ +#define FIGNORECASE 0x80000 /* request case-insensitive lookups */ + +#endif diff --git a/lib/libspl/include/sys/fm/protocol.h b/lib/libspl/include/sys/fm/protocol.h new file mode 100644 index 000000000..4e146d8b9 --- /dev/null +++ b/lib/libspl/include/sys/fm/protocol.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_FM_PROTOCOL_H +#define _LIBSPL_SYS_FM_PROTOCOL_H + +#endif diff --git a/lib/libspl/include/sys/fm/util.h b/lib/libspl/include/sys/fm/util.h new file mode 100644 index 000000000..564d0b5b5 --- /dev/null +++ b/lib/libspl/include/sys/fm/util.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_FM_UTIL_H +#define _LIBSPL_SYS_FM_UTIL_H + +#endif diff --git a/lib/libspl/include/sys/frame.h b/lib/libspl/include/sys/frame.h new file mode 100644 index 000000000..f936ab825 --- /dev/null +++ b/lib/libspl/include/sys/frame.h @@ -0,0 +1,131 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FRAME_H +#define _SYS_FRAME_H + +#include <sys/types.h> + +#if defined(_LP64) || defined(_I32LPx) +typedef long greg_t; +#else +typedef int greg_t; +#endif + +struct frame { + greg_t fr_savfp; /* saved frame pointer */ + greg_t fr_savpc; /* saved program counter */ +}; + + +/* + * In the x86 world, a stack frame looks like this: + * + * |--------------------------| + * 4n+8(%ebp) ->| argument word n | + * | ... | (Previous frame) + * 8(%ebp) ->| argument word 0 | + * |--------------------------|-------------------- + * 4(%ebp) ->| return address | + * |--------------------------| + * 0(%ebp) ->| previous %ebp (optional) | + * |--------------------------| + * -4(%ebp) ->| unspecified | (Current frame) + * | ... | + * 0(%esp) ->| variable size | + * |--------------------------| + */ + +/* + * Stack alignment macros. + */ + +#define STACK_ALIGN32 4 +#define STACK_ENTRY_ALIGN32 4 +#define STACK_BIAS32 0 +#define SA32(x) (((x)+(STACK_ALIGN32-1)) & ~(STACK_ALIGN32-1)) +#define STACK_RESERVE32 0 +#define MINFRAME32 0 + +#if defined(__amd64) + +/* + * In the amd64 world, a stack frame looks like this: + * + * |--------------------------| + * 8n+16(%rbp)->| argument word n | + * | ... | (Previous frame) + * 16(%rbp) ->| argument word 0 | + * |--------------------------|-------------------- + * 8(%rbp) ->| return address | + * |--------------------------| + * 0(%rbp) ->| previous %rbp | + * |--------------------------| + * -8(%rbp) ->| unspecified | (Current frame) + * | ... | + * 0(%rsp) ->| variable size | + * |--------------------------| + * -128(%rsp) ->| reserved for function | + * |--------------------------| + * + * The end of the input argument area must be aligned on a 16-byte + * boundary; i.e. (%rsp - 8) % 16 == 0 at function entry. + * + * The 128-byte location beyond %rsp is considered to be reserved for + * functions and is NOT modified by signal handlers. It can be used + * to store temporary data that is not needed across function calls. + */ + +/* + * Stack alignment macros. + */ + +#define STACK_ALIGN64 16 +#define STACK_ENTRY_ALIGN64 8 +#define STACK_BIAS64 0 +#define SA64(x) (((x)+(STACK_ALIGN64-1)) & ~(STACK_ALIGN64-1)) +#define STACK_RESERVE64 128 +#define MINFRAME64 0 + +#define STACK_ALIGN STACK_ALIGN64 +#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN64 +#define STACK_BIAS STACK_BIAS64 +#define SA(x) SA64(x) +#define STACK_RESERVE STACK_RESERVE64 +#define MINFRAME MINFRAME64 + +#elif defined(__i386) + +#define STACK_ALIGN STACK_ALIGN32 +#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN32 +#define STACK_BIAS STACK_BIAS32 +#define SA(x) SA32(x) +#define STACK_RESERVE STACK_RESERVE32 +#define MINFRAME MINFRAME32 + +#endif /* __i386 */ + +#endif /* _SYS_FRAME_H */ diff --git a/lib/libspl/include/sys/int_limits.h b/lib/libspl/include/sys/int_limits.h new file mode 100644 index 000000000..2b50ddd1b --- /dev/null +++ b/lib/libspl/include/sys/int_limits.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_INT_LIMITS_H +#define _LIBSPL_SYS_INT_LIMITS_H + +#endif diff --git a/lib/libspl/include/sys/int_types.h b/lib/libspl/include/sys/int_types.h new file mode 100644 index 000000000..b32512282 --- /dev/null +++ b/lib/libspl/include/sys/int_types.h @@ -0,0 +1,32 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SOL_SYS_INT_TYPES_H +#define _SOL_SYS_INT_TYPES_H + +#include <inttypes.h> + +#endif diff --git a/lib/libspl/include/sys/inttypes.h b/lib/libspl/include/sys/inttypes.h new file mode 100644 index 000000000..7630f2d4c --- /dev/null +++ b/lib/libspl/include/sys/inttypes.h @@ -0,0 +1,34 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SOL_SYS_INTTYPES_H +#define _SOL_SYS_INTTYPES_H + +#include <inttypes.h> + +#define _INT64_TYPE + +#endif diff --git a/lib/libspl/include/sys/isa_defs.h b/lib/libspl/include/sys/isa_defs.h new file mode 100644 index 000000000..677e4fda0 --- /dev/null +++ b/lib/libspl/include/sys/isa_defs.h @@ -0,0 +1,125 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ISA_DEFS_H +#define _SYS_ISA_DEFS_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* x86_64 arch specific defines */ +#if defined(__x86_64) || defined(__x86_64__) + +#if !defined(__x86_64) +#define __x86_64 +#endif + +#if !defined(__amd64) +#define __amd64 +#endif + +#if !defined(__x86) +#define __x86 +#endif + +#if !defined(_LP64) +#define _LP64 +#endif + +#if !defined(_LITTLE_ENDIAN) +#define _LITTLE_ENDIAN +#endif + +#define _SUNOS_VTOC_16 + +/* i386 arch specific defines */ +#elif defined(__i386) || defined(__i386__) + +#if !defined(__i386) +#define __i386 +#endif + +#if !defined(__x86) +#define __x86 +#endif + +#if !defined(_ILP32) +#define _ILP32 +#endif + +#if !defined(_LITTLE_ENDIAN) +#define _LITTLE_ENDIAN +#endif + +#define _SUNOS_VTOC_16 + +/* powerpc arch specific defines */ +#elif defined(__powerpc) || defined(__powerpc__) + +#if !defined(__powerpc) +#define __powerpc +#endif + +#if !defined(__powerpc__) +#define __powerpc__ +#endif + +#if !defined(_LP64) +#ifdef __powerpc64__ +#define _LP64 +#else +#define _LP32 +#endif +#endif + +#if !defined(_BIG_ENDIAN) +#define _BIG_ENDIAN +#endif + +#define _SUNOS_VTOC_16 + +#else /* Currently only x86_64, i386, and powerpc arches supported */ +#error "Unsupported ISA type" +#endif + +#if defined(_ILP32) && defined(_LP64) +#error "Both _ILP32 and _LP64 are defined" +#endif + +#if defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN) +#error "Both _LITTLE_ENDIAN and _BIG_ENDIAN are defined" +#endif + +#if !defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN) +#error "Neither _LITTLE_ENDIAN nor _BIG_ENDIAN are defined" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ISA_DEFS_H */ diff --git a/lib/libspl/include/sys/kmem.h b/lib/libspl/include/sys/kmem.h new file mode 100644 index 000000000..401e04072 --- /dev/null +++ b/lib/libspl/include/sys/kmem.h @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_KMEM_H +#define _SYS_KMEM_H + +#include <stdlib.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define KM_SLEEP 0x00000000 /* same as KM_SLEEP */ +#define KM_NOSLEEP 0x00000001 /* same as KM_NOSLEEP */ + +#define kmem_alloc(size, flags) malloc(size) +#define kmem_free(ptr, size) free(ptr) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_KMEM_H */ diff --git a/lib/libspl/include/sys/kstat.h b/lib/libspl/include/sys/kstat.h new file mode 100644 index 000000000..fcd3ed98b --- /dev/null +++ b/lib/libspl/include/sys/kstat.h @@ -0,0 +1,820 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_KSTAT_H +#define _SYS_KSTAT_H + + + +/* + * Definition of general kernel statistics structures and /dev/kstat ioctls + */ + +#include <sys/types.h> +#include <sys/time.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int kid_t; /* unique kstat id */ + +/* + * Kernel statistics driver (/dev/kstat) ioctls + */ + +#define KSTAT_IOC_BASE ('K' << 8) + +#define KSTAT_IOC_CHAIN_ID KSTAT_IOC_BASE | 0x01 +#define KSTAT_IOC_READ KSTAT_IOC_BASE | 0x02 +#define KSTAT_IOC_WRITE KSTAT_IOC_BASE | 0x03 + +/* + * /dev/kstat ioctl usage (kd denotes /dev/kstat descriptor): + * + * kcid = ioctl(kd, KSTAT_IOC_CHAIN_ID, NULL); + * kcid = ioctl(kd, KSTAT_IOC_READ, kstat_t *); + * kcid = ioctl(kd, KSTAT_IOC_WRITE, kstat_t *); + */ + +#define KSTAT_STRLEN 31 /* 30 chars + NULL; must be 16 * n - 1 */ + +/* + * The generic kstat header + */ + +typedef struct kstat { + /* + * Fields relevant to both kernel and user + */ + hrtime_t ks_crtime; /* creation time (from gethrtime()) */ + struct kstat *ks_next; /* kstat chain linkage */ + kid_t ks_kid; /* unique kstat ID */ + char ks_module[KSTAT_STRLEN]; /* provider module name */ + uchar_t ks_resv; /* reserved, currently just padding */ + int ks_instance; /* provider module's instance */ + char ks_name[KSTAT_STRLEN]; /* kstat name */ + uchar_t ks_type; /* kstat data type */ + char ks_class[KSTAT_STRLEN]; /* kstat class */ + uchar_t ks_flags; /* kstat flags */ + void *ks_data; /* kstat type-specific data */ + uint_t ks_ndata; /* # of type-specific data records */ + size_t ks_data_size; /* total size of kstat data section */ + hrtime_t ks_snaptime; /* time of last data shapshot */ + /* + * Fields relevant to kernel only + */ + int (*ks_update)(struct kstat *, int); /* dynamic update */ + void *ks_private; /* arbitrary provider-private data */ + int (*ks_snapshot)(struct kstat *, void *, int); + void *ks_lock; /* protects this kstat's data */ +} kstat_t; + +#ifdef _SYSCALL32 + +typedef int32_t kid32_t; + +typedef struct kstat32 { + /* + * Fields relevant to both kernel and user + */ + hrtime_t ks_crtime; + caddr32_t ks_next; /* struct kstat pointer */ + kid32_t ks_kid; + char ks_module[KSTAT_STRLEN]; + uint8_t ks_resv; + int32_t ks_instance; + char ks_name[KSTAT_STRLEN]; + uint8_t ks_type; + char ks_class[KSTAT_STRLEN]; + uint8_t ks_flags; + caddr32_t ks_data; /* type-specific data */ + uint32_t ks_ndata; + size32_t ks_data_size; + hrtime_t ks_snaptime; + /* + * Fields relevant to kernel only (only needed here for padding) + */ + int32_t _ks_update; + caddr32_t _ks_private; + int32_t _ks_snapshot; + caddr32_t _ks_lock; +} kstat32_t; + +#endif /* _SYSCALL32 */ + +/* + * kstat structure and locking strategy + * + * Each kstat consists of a header section (a kstat_t) and a data section. + * The system maintains a set of kstats, protected by kstat_chain_lock. + * kstat_chain_lock protects all additions to/deletions from this set, + * as well as all changes to kstat headers. kstat data sections are + * *optionally* protected by the per-kstat ks_lock. If ks_lock is non-NULL, + * kstat clients (e.g. /dev/kstat) will acquire this lock for all of their + * operations on that kstat. It is up to the kstat provider to decide whether + * guaranteeing consistent data to kstat clients is sufficiently important + * to justify the locking cost. Note, however, that most statistic updates + * already occur under one of the provider's mutexes, so if the provider sets + * ks_lock to point to that mutex, then kstat data locking is free. + * + * NOTE: variable-size kstats MUST employ kstat data locking, to prevent + * data-size races with kstat clients. + * + * NOTE: ks_lock is really of type (kmutex_t *); it is declared as (void *) + * in the kstat header so that users don't have to be exposed to all of the + * kernel's lock-related data structures. + */ + +#if defined(_KERNEL) + +#define KSTAT_ENTER(k) \ + { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_enter(lp); } + +#define KSTAT_EXIT(k) \ + { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_exit(lp); } + +#define KSTAT_UPDATE(k, rw) (*(k)->ks_update)((k), (rw)) + +#define KSTAT_SNAPSHOT(k, buf, rw) (*(k)->ks_snapshot)((k), (buf), (rw)) + +#endif /* defined(_KERNEL) */ + +/* + * kstat time + * + * All times associated with kstats (e.g. creation time, snapshot time, + * kstat_timer_t and kstat_io_t timestamps, etc.) are 64-bit nanosecond values, + * as returned by gethrtime(). The accuracy of these timestamps is machine + * dependent, but the precision (units) is the same across all platforms. + */ + +/* + * kstat identity (KID) + * + * Each kstat is assigned a unique KID (kstat ID) when it is added to the + * global kstat chain. The KID is used as a cookie by /dev/kstat to + * request information about the corresponding kstat. There is also + * an identity associated with the entire kstat chain, kstat_chain_id, + * which is bumped each time a kstat is added or deleted. /dev/kstat uses + * the chain ID to detect changes in the kstat chain (e.g., a new disk + * coming online) between ioctl()s. + */ + +/* + * kstat module, kstat instance + * + * ks_module and ks_instance contain the name and instance of the module + * that created the kstat. In cases where there can only be one instance, + * ks_instance is 0. The kernel proper (/kernel/unix) uses "unix" as its + * module name. + */ + +/* + * kstat name + * + * ks_name gives a meaningful name to a kstat. The full kstat namespace + * is module.instance.name, so the name only need be unique within a + * module. kstat_create() will fail if you try to create a kstat with + * an already-used (ks_module, ks_instance, ks_name) triplet. Spaces are + * allowed in kstat names, but strongly discouraged, since they hinder + * awk-style processing at user level. + */ + +/* + * kstat type + * + * The kstat mechanism provides several flavors of kstat data, defined + * below. The "raw" kstat type is just treated as an array of bytes; you + * can use this to export any kind of data you want. + * + * Some kstat types allow multiple data structures per kstat, e.g. + * KSTAT_TYPE_NAMED; others do not. This is part of the spec for each + * kstat data type. + * + * User-level tools should *not* rely on the #define KSTAT_NUM_TYPES. To + * get this information, read out the standard system kstat "kstat_types". + */ + +#define KSTAT_TYPE_RAW 0 /* can be anything */ + /* ks_ndata >= 1 */ +#define KSTAT_TYPE_NAMED 1 /* name/value pair */ + /* ks_ndata >= 1 */ +#define KSTAT_TYPE_INTR 2 /* interrupt statistics */ + /* ks_ndata == 1 */ +#define KSTAT_TYPE_IO 3 /* I/O statistics */ + /* ks_ndata == 1 */ +#define KSTAT_TYPE_TIMER 4 /* event timer */ + /* ks_ndata >= 1 */ + +#define KSTAT_NUM_TYPES 5 + +/* + * kstat class + * + * Each kstat can be characterized as belonging to some broad class + * of statistics, e.g. disk, tape, net, vm, streams, etc. This field + * can be used as a filter to extract related kstats. The following + * values are currently in use: disk, tape, net, controller, vm, kvm, + * hat, streams, kstat, and misc. (The kstat class encompasses things + * like kstat_types.) + */ + +/* + * kstat flags + * + * Any of the following flags may be passed to kstat_create(). They are + * all zero by default. + * + * KSTAT_FLAG_VIRTUAL: + * + * Tells kstat_create() not to allocate memory for the + * kstat data section; instead, you will set the ks_data + * field to point to the data you wish to export. This + * provides a convenient way to export existing data + * structures. + * + * KSTAT_FLAG_VAR_SIZE: + * + * The size of the kstat you are creating will vary over time. + * For example, you may want to use the kstat mechanism to + * export a linked list. NOTE: The kstat framework does not + * manage the data section, so all variable-size kstats must be + * virtual kstats. Moreover, variable-size kstats MUST employ + * kstat data locking to prevent data-size races with kstat + * clients. See the section on "kstat snapshot" for details. + * + * KSTAT_FLAG_WRITABLE: + * + * Makes the kstat's data section writable by root. + * The ks_snapshot routine (see below) does not need to check for + * this; permission checking is handled in the kstat driver. + * + * KSTAT_FLAG_PERSISTENT: + * + * Indicates that this kstat is to be persistent over time. + * For persistent kstats, kstat_delete() simply marks the + * kstat as dormant; a subsequent kstat_create() reactivates + * the kstat. This feature is provided so that statistics + * are not lost across driver close/open (e.g., raw disk I/O + * on a disk with no mounted partitions.) + * NOTE: Persistent kstats cannot be virtual, since ks_data + * points to garbage as soon as the driver goes away. + * + * The following flags are maintained by the kstat framework: + * + * KSTAT_FLAG_DORMANT: + * + * For persistent kstats, indicates that the kstat is in the + * dormant state (e.g., the corresponding device is closed). + * + * KSTAT_FLAG_INVALID: + * + * This flag is set when a kstat is in a transitional state, + * e.g. between kstat_create() and kstat_install(). + * kstat clients must not attempt to access the kstat's data + * if this flag is set. + */ + +#define KSTAT_FLAG_VIRTUAL 0x01 +#define KSTAT_FLAG_VAR_SIZE 0x02 +#define KSTAT_FLAG_WRITABLE 0x04 +#define KSTAT_FLAG_PERSISTENT 0x08 +#define KSTAT_FLAG_DORMANT 0x10 +#define KSTAT_FLAG_INVALID 0x20 + +/* + * Dynamic update support + * + * The kstat mechanism allows for an optional ks_update function to update + * kstat data. This is useful for drivers where the underlying device + * keeps cheap hardware stats, but extraction is expensive. Instead of + * constantly keeping the kstat data section up to date, you can supply a + * ks_update function which updates the kstat's data section on demand. + * To take advantage of this feature, simply set the ks_update field before + * calling kstat_install(). + * + * The ks_update function, if supplied, must have the following structure: + * + * int + * foo_kstat_update(kstat_t *ksp, int rw) + * { + * if (rw == KSTAT_WRITE) { + * ... update the native stats from ksp->ks_data; + * return EACCES if you don't support this + * } else { + * ... update ksp->ks_data from the native stats + * } + * } + * + * The ks_update return codes are: 0 for success, EACCES if you don't allow + * KSTAT_WRITE, and EIO for any other type of error. + * + * In general, the ks_update function may need to refer to provider-private + * data; for example, it may need a pointer to the provider's raw statistics. + * The ks_private field is available for this purpose. Its use is entirely + * at the provider's discretion. + * + * All variable-size kstats MUST supply a ks_update routine, which computes + * and sets ks_data_size (and ks_ndata if that is meaningful), since these + * are needed to perform kstat snapshots (see below). + * + * No kstat locking should be done inside the ks_update routine. The caller + * will already be holding the kstat's ks_lock (to ensure consistent data). + */ + +#define KSTAT_READ 0 +#define KSTAT_WRITE 1 + +/* + * Kstat snapshot + * + * In order to get a consistent view of a kstat's data, clients must obey + * the kstat's locking strategy. However, these clients may need to perform + * operations on the data which could cause a fault (e.g. copyout()), or + * operations which are simply expensive. Doing so could cause deadlock + * (e.g. if you're holding a disk's kstat lock which is ultimately required + * to resolve a copyout() fault), performance degradation (since the providers' + * activity is serialized at the kstat lock), device timing problems, etc. + * + * To avoid these problems, kstat data is provided via snapshots. Taking + * a snapshot is a simple process: allocate a wired-down kernel buffer, + * acquire the kstat's data lock, copy the data into the buffer ("take the + * snapshot"), and release the lock. This ensures that the kstat's data lock + * will be held as briefly as possible, and that no faults will occur while + * the lock is held. + * + * Normally, the snapshot is taken by default_kstat_snapshot(), which + * timestamps the data (sets ks_snaptime), copies it, and does a little + * massaging to deal with incomplete transactions on i/o kstats. However, + * this routine only works for kstats with contiguous data (the typical case). + * If you create a kstat whose data is, say, a linked list, you must provide + * your own ks_snapshot routine. The routine you supply must have the + * following prototype (replace "foo" with something appropriate): + * + * int foo_kstat_snapshot(kstat_t *ksp, void *buf, int rw); + * + * The minimal snapshot routine -- one which copies contiguous data that + * doesn't need any massaging -- would be this: + * + * ksp->ks_snaptime = gethrtime(); + * if (rw == KSTAT_WRITE) + * bcopy(buf, ksp->ks_data, ksp->ks_data_size); + * else + * bcopy(ksp->ks_data, buf, ksp->ks_data_size); + * return (0); + * + * A more illuminating example is taking a snapshot of a linked list: + * + * ksp->ks_snaptime = gethrtime(); + * if (rw == KSTAT_WRITE) + * return (EACCES); ... See below ... + * for (foo = first_foo; foo; foo = foo->next) { + * bcopy((char *) foo, (char *) buf, sizeof (struct foo)); + * buf = ((struct foo *) buf) + 1; + * } + * return (0); + * + * In the example above, we have decided that we don't want to allow + * KSTAT_WRITE access, so we return EACCES if this is attempted. + * + * The key points are: + * + * (1) ks_snaptime must be set (via gethrtime()) to timestamp the data. + * (2) Data gets copied from the kstat to the buffer on KSTAT_READ, + * and from the buffer to the kstat on KSTAT_WRITE. + * (3) ks_snapshot return values are: 0 for success, EACCES if you + * don't allow KSTAT_WRITE, and EIO for any other type of error. + * + * Named kstats (see section on "Named statistics" below) containing long + * strings (KSTAT_DATA_STRING) need special handling. The kstat driver + * assumes that all strings are copied into the buffer after the array of + * named kstats, and the pointers (KSTAT_NAMED_STR_PTR()) are updated to point + * into the copy within the buffer. The default snapshot routine does this, + * but overriding routines should contain at least the following: + * + * if (rw == KSTAT_READ) { + * kstat_named_t *knp = buf; + * char *end = knp + ksp->ks_ndata; + * uint_t i; + * + * ... Do the regular copy ... + * bcopy(ksp->ks_data, buf, sizeof (kstat_named_t) * ksp->ks_ndata); + * + * for (i = 0; i < ksp->ks_ndata; i++, knp++) { + * if (knp[i].data_type == KSTAT_DATA_STRING && + * KSTAT_NAMED_STR_PTR(knp) != NULL) { + * bcopy(KSTAT_NAMED_STR_PTR(knp), end, + * KSTAT_NAMED_STR_BUFLEN(knp)); + * KSTAT_NAMED_STR_PTR(knp) = end; + * end += KSTAT_NAMED_STR_BUFLEN(knp); + * } + * } + */ + +/* + * Named statistics. + * + * List of arbitrary name=value statistics. + */ + +typedef struct kstat_named { + char name[KSTAT_STRLEN]; /* name of counter */ + uchar_t data_type; /* data type */ + union { + char c[16]; /* enough for 128-bit ints */ + int32_t i32; + uint32_t ui32; + struct { + union { + char *ptr; /* NULL-term string */ +#if defined(_KERNEL) && defined(_MULTI_DATAMODEL) + caddr32_t ptr32; +#endif + char __pad[8]; /* 64-bit padding */ + } addr; + uint32_t len; /* # bytes for strlen + '\0' */ + } str; +/* + * The int64_t and uint64_t types are not valid for a maximally conformant + * 32-bit compilation environment (cc -Xc) using compilers prior to the + * introduction of C99 conforming compiler (reference ISO/IEC 9899:1990). + * In these cases, the visibility of i64 and ui64 is only permitted for + * 64-bit compilation environments or 32-bit non-maximally conformant + * C89 or C90 ANSI C compilation environments (cc -Xt and cc -Xa). In the + * C99 ANSI C compilation environment, the long long type is supported. + * The _INT64_TYPE is defined by the implementation (see sys/int_types.h). + */ +#if defined(_INT64_TYPE) + int64_t i64; + uint64_t ui64; +#endif + long l; + ulong_t ul; + + /* These structure members are obsolete */ + + longlong_t ll; + u_longlong_t ull; + float f; + double d; + } value; /* value of counter */ +} kstat_named_t; + +#define KSTAT_DATA_CHAR 0 +#define KSTAT_DATA_INT32 1 +#define KSTAT_DATA_UINT32 2 +#define KSTAT_DATA_INT64 3 +#define KSTAT_DATA_UINT64 4 + +#if !defined(_LP64) +#define KSTAT_DATA_LONG KSTAT_DATA_INT32 +#define KSTAT_DATA_ULONG KSTAT_DATA_UINT32 +#else +#if !defined(_KERNEL) +#define KSTAT_DATA_LONG KSTAT_DATA_INT64 +#define KSTAT_DATA_ULONG KSTAT_DATA_UINT64 +#else +#define KSTAT_DATA_LONG 7 /* only visible to the kernel */ +#define KSTAT_DATA_ULONG 8 /* only visible to the kernel */ +#endif /* !_KERNEL */ +#endif /* !_LP64 */ + +/* + * Statistics exporting named kstats with long strings (KSTAT_DATA_STRING) + * may not make the assumption that ks_data_size is equal to (ks_ndata * sizeof + * (kstat_named_t)). ks_data_size in these cases is equal to the sum of the + * amount of space required to store the strings (ie, the sum of + * KSTAT_NAMED_STR_BUFLEN() for all KSTAT_DATA_STRING statistics) plus the + * space required to store the kstat_named_t's. + * + * The default update routine will update ks_data_size automatically for + * variable-length kstats containing long strings (using the default update + * routine only makes sense if the string is the only thing that is changing + * in size, and ks_ndata is constant). Fixed-length kstats containing long + * strings must explicitly change ks_data_size (after creation but before + * initialization) to reflect the correct amount of space required for the + * long strings and the kstat_named_t's. + */ +#define KSTAT_DATA_STRING 9 + +/* These types are obsolete */ + +#define KSTAT_DATA_LONGLONG KSTAT_DATA_INT64 +#define KSTAT_DATA_ULONGLONG KSTAT_DATA_UINT64 +#define KSTAT_DATA_FLOAT 5 +#define KSTAT_DATA_DOUBLE 6 + +#define KSTAT_NAMED_PTR(kptr) ((kstat_named_t *)(kptr)->ks_data) + +/* + * Retrieve the pointer of the string contained in the given named kstat. + */ +#define KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.str.addr.ptr) + +/* + * Retrieve the length of the buffer required to store the string in the given + * named kstat. + */ +#define KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.str.len) + +/* + * Interrupt statistics. + * + * An interrupt is a hard interrupt (sourced from the hardware device + * itself), a soft interrupt (induced by the system via the use of + * some system interrupt source), a watchdog interrupt (induced by + * a periodic timer call), spurious (an interrupt entry point was + * entered but there was no interrupt condition to service), + * or multiple service (an interrupt condition was detected and + * serviced just prior to returning from any of the other types). + * + * Measurement of the spurious class of interrupts is useful for + * autovectored devices in order to pinpoint any interrupt latency + * problems in a particular system configuration. + * + * Devices that have more than one interrupt of the same + * type should use multiple structures. + */ + +#define KSTAT_INTR_HARD 0 +#define KSTAT_INTR_SOFT 1 +#define KSTAT_INTR_WATCHDOG 2 +#define KSTAT_INTR_SPURIOUS 3 +#define KSTAT_INTR_MULTSVC 4 + +#define KSTAT_NUM_INTRS 5 + +typedef struct kstat_intr { + uint_t intrs[KSTAT_NUM_INTRS]; /* interrupt counters */ +} kstat_intr_t; + +#define KSTAT_INTR_PTR(kptr) ((kstat_intr_t *)(kptr)->ks_data) + +/* + * I/O statistics. + */ + +typedef struct kstat_io { + + /* + * Basic counters. + * + * The counters should be updated at the end of service + * (e.g., just prior to calling biodone()). + */ + + u_longlong_t nread; /* number of bytes read */ + u_longlong_t nwritten; /* number of bytes written */ + uint_t reads; /* number of read operations */ + uint_t writes; /* number of write operations */ + + /* + * Accumulated time and queue length statistics. + * + * Accumulated time statistics are kept as a running sum + * of "active" time. Queue length statistics are kept as a + * running sum of the product of queue length and elapsed time + * at that length -- i.e., a Riemann sum for queue length + * integrated against time. (You can also think of the active time + * as a Riemann sum, for the boolean function (queue_length > 0) + * integrated against time, or you can think of it as the + * Lebesgue measure of the set on which queue_length > 0.) + * + * ^ + * | _________ + * 8 | i4 | + * | | | + * Queue 6 | | + * Length | _________ | | + * 4 | i2 |_______| | + * | | i3 | + * 2_______| | + * | i1 | + * |_______________________________| + * Time-> t1 t2 t3 t4 + * + * At each change of state (entry or exit from the queue), + * we add the elapsed time (since the previous state change) + * to the active time if the queue length was non-zero during + * that interval; and we add the product of the elapsed time + * times the queue length to the running length*time sum. + * + * This method is generalizable to measuring residency + * in any defined system: instead of queue lengths, think + * of "outstanding RPC calls to server X". + * + * A large number of I/O subsystems have at least two basic + * "lists" of transactions they manage: one for transactions + * that have been accepted for processing but for which processing + * has yet to begin, and one for transactions which are actively + * being processed (but not done). For this reason, two cumulative + * time statistics are defined here: wait (pre-service) time, + * and run (service) time. + * + * All times are 64-bit nanoseconds (hrtime_t), as returned by + * gethrtime(). + * + * The units of cumulative busy time are accumulated nanoseconds. + * The units of cumulative length*time products are elapsed time + * times queue length. + * + * Updates to the fields below are performed implicitly by calls to + * these five functions: + * + * kstat_waitq_enter() + * kstat_waitq_exit() + * kstat_runq_enter() + * kstat_runq_exit() + * + * kstat_waitq_to_runq() (see below) + * kstat_runq_back_to_waitq() (see below) + * + * Since kstat_waitq_exit() is typically followed immediately + * by kstat_runq_enter(), there is a single kstat_waitq_to_runq() + * function which performs both operations. This is a performance + * win since only one timestamp is required. + * + * In some instances, it may be necessary to move a request from + * the run queue back to the wait queue, e.g. for write throttling. + * For these situations, call kstat_runq_back_to_waitq(). + * + * These fields should never be updated by any other means. + */ + + hrtime_t wtime; /* cumulative wait (pre-service) time */ + hrtime_t wlentime; /* cumulative wait length*time product */ + hrtime_t wlastupdate; /* last time wait queue changed */ + hrtime_t rtime; /* cumulative run (service) time */ + hrtime_t rlentime; /* cumulative run length*time product */ + hrtime_t rlastupdate; /* last time run queue changed */ + + uint_t wcnt; /* count of elements in wait state */ + uint_t rcnt; /* count of elements in run state */ + +} kstat_io_t; + +#define KSTAT_IO_PTR(kptr) ((kstat_io_t *)(kptr)->ks_data) + +/* + * Event timer statistics - cumulative elapsed time and number of events. + * + * Updates to these fields are performed implicitly by calls to + * kstat_timer_start() and kstat_timer_stop(). + */ + +typedef struct kstat_timer { + char name[KSTAT_STRLEN]; /* event name */ + uchar_t resv; /* reserved */ + u_longlong_t num_events; /* number of events */ + hrtime_t elapsed_time; /* cumulative elapsed time */ + hrtime_t min_time; /* shortest event duration */ + hrtime_t max_time; /* longest event duration */ + hrtime_t start_time; /* previous event start time */ + hrtime_t stop_time; /* previous event stop time */ +} kstat_timer_t; + +#define KSTAT_TIMER_PTR(kptr) ((kstat_timer_t *)(kptr)->ks_data) + +#if defined(_KERNEL) + +#include <sys/t_lock.h> + +extern kid_t kstat_chain_id; /* bumped at each state change */ +extern void kstat_init(void); /* initialize kstat framework */ + +/* + * Adding and deleting kstats. + * + * The typical sequence to add a kstat is: + * + * ksp = kstat_create(module, instance, name, class, type, ndata, flags); + * if (ksp) { + * ... provider initialization, if necessary + * kstat_install(ksp); + * } + * + * There are three logically distinct steps here: + * + * Step 1: System Initialization (kstat_create) + * + * kstat_create() performs system initialization. kstat_create() + * allocates memory for the entire kstat (header plus data), initializes + * all header fields, initializes the data section to all zeroes, assigns + * a unique KID, and puts the kstat onto the system's kstat chain. + * The returned kstat is marked invalid (KSTAT_FLAG_INVALID is set), + * because the provider (caller) has not yet had a chance to initialize + * the data section. + * + * By default, kstats are exported to all zones on the system. A kstat may be + * created via kstat_create_zone() to specify a zone to which the statistics + * should be exported. kstat_zone_add() may be used to specify additional + * zones to which the statistics are to be exported. + * + * Step 2: Provider Initialization + * + * The provider performs any necessary initialization of the data section, + * e.g. setting the name fields in a KSTAT_TYPE_NAMED. Virtual kstats set + * the ks_data field at this time. The provider may also set the ks_update, + * ks_snapshot, ks_private, and ks_lock fields if necessary. + * + * Step 3: Installation (kstat_install) + * + * Once the kstat is completely initialized, kstat_install() clears the + * INVALID flag, thus making the kstat accessible to the outside world. + * kstat_install() also clears the DORMANT flag for persistent kstats. + * + * Removing a kstat from the system + * + * kstat_delete(ksp) removes ksp from the kstat chain and frees all + * associated system resources. NOTE: When you call kstat_delete(), + * you must NOT be holding that kstat's ks_lock. Otherwise, you may + * deadlock with a kstat reader. + * + * Persistent kstats + * + * From the provider's point of view, persistence is transparent. The only + * difference between ephemeral (normal) kstats and persistent kstats + * is that you pass KSTAT_FLAG_PERSISTENT to kstat_create(). Magically, + * this has the effect of making your data visible even when you're + * not home. Persistence is important to tools like iostat, which want + * to get a meaningful picture of disk activity. Without persistence, + * raw disk i/o statistics could never accumulate: they would come and + * go with each open/close of the raw device. + * + * The magic of persistence works by slightly altering the behavior of + * kstat_create() and kstat_delete(). The first call to kstat_create() + * creates a new kstat, as usual. However, kstat_delete() does not + * actually delete the kstat: it performs one final update of the data + * (i.e., calls the ks_update routine), marks the kstat as dormant, and + * sets the ks_lock, ks_update, ks_private, and ks_snapshot fields back + * to their default values (since they might otherwise point to garbage, + * e.g. if the provider is going away). kstat clients can still access + * the dormant kstat just like a live kstat; they just continue to see + * the final data values as long as the kstat remains dormant. + * All subsequent kstat_create() calls simply find the already-existing, + * dormant kstat and return a pointer to it, without altering any fields. + * The provider then performs its usual initialization sequence, and + * calls kstat_install(). kstat_install() uses the old data values to + * initialize the native data (i.e., ks_update is called with KSTAT_WRITE), + * thus making it seem like you were never gone. + */ + +extern kstat_t *kstat_create(const char *, int, const char *, const char *, + uchar_t, uint_t, uchar_t); +extern kstat_t *kstat_create_zone(const char *, int, const char *, + const char *, uchar_t, uint_t, uchar_t, zoneid_t); +extern void kstat_install(kstat_t *); +extern void kstat_delete(kstat_t *); +extern void kstat_named_setstr(kstat_named_t *knp, const char *src); +extern void kstat_set_string(char *, const char *); +extern void kstat_delete_byname(const char *, int, const char *); +extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t); +extern void kstat_named_init(kstat_named_t *, const char *, uchar_t); +extern void kstat_timer_init(kstat_timer_t *, const char *); +extern void kstat_waitq_enter(kstat_io_t *); +extern void kstat_waitq_exit(kstat_io_t *); +extern void kstat_runq_enter(kstat_io_t *); +extern void kstat_runq_exit(kstat_io_t *); +extern void kstat_waitq_to_runq(kstat_io_t *); +extern void kstat_runq_back_to_waitq(kstat_io_t *); +extern void kstat_timer_start(kstat_timer_t *); +extern void kstat_timer_stop(kstat_timer_t *); + +extern void kstat_zone_add(kstat_t *, zoneid_t); +extern void kstat_zone_remove(kstat_t *, zoneid_t); +extern int kstat_zone_find(kstat_t *, zoneid_t); + +extern kstat_t *kstat_hold_bykid(kid_t kid, zoneid_t); +extern kstat_t *kstat_hold_byname(const char *, int, const char *, zoneid_t); +extern void kstat_rele(kstat_t *); + +#endif /* defined(_KERNEL) */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_KSTAT_H */ diff --git a/lib/libspl/include/sys/list.h b/lib/libspl/include/sys/list.h new file mode 100644 index 000000000..6db92ed42 --- /dev/null +++ b/lib/libspl/include/sys/list.h @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LIST_H +#define _SYS_LIST_H + +#include <sys/list_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct list_node list_node_t; +typedef struct list list_t; + +void list_create(list_t *, size_t, size_t); +void list_destroy(list_t *); + +void list_insert_after(list_t *, void *, void *); +void list_insert_before(list_t *, void *, void *); +void list_insert_head(list_t *, void *); +void list_insert_tail(list_t *, void *); +void list_remove(list_t *, void *); +void *list_remove_head(list_t *); +void *list_remove_tail(list_t *); +void list_move_tail(list_t *, list_t *); + +void *list_head(list_t *); +void *list_tail(list_t *); +void *list_next(list_t *, void *); +void *list_prev(list_t *, void *); +int list_is_empty(list_t *); + +void list_link_init(list_node_t *); +void list_link_replace(list_node_t *, list_node_t *); + +int list_link_active(list_node_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LIST_H */ diff --git a/lib/libspl/include/sys/list_impl.h b/lib/libspl/include/sys/list_impl.h new file mode 100644 index 000000000..a6614f9a3 --- /dev/null +++ b/lib/libspl/include/sys/list_impl.h @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LIST_IMPL_H +#define _SYS_LIST_IMPL_H + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct list_node { + struct list_node *list_next; + struct list_node *list_prev; +}; + +struct list { + size_t list_size; + size_t list_offset; + struct list_node list_head; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LIST_IMPL_H */ diff --git a/lib/libspl/include/sys/machelf.h b/lib/libspl/include/sys/machelf.h new file mode 100644 index 000000000..f4c5c356f --- /dev/null +++ b/lib/libspl/include/sys/machelf.h @@ -0,0 +1,180 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_MACHELF_H +#define _SYS_MACHELF_H + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__amd64) +#include <sys/elf_amd64.h> +#elif defined(__i386) +#include <sys/elf_386.h> +#elif defined(__sparc) +#include <sys/elf_SPARC.h> +#endif +#ifndef _ASM +#include <sys/types.h> +#include <sys/elf.h> +#include <sys/link.h> /* for Elf*_Dyn */ +#endif /* _ASM */ + +/* + * Make machine class dependent data types transparent to the common code + */ +#if defined(_ELF64) && !defined(_ELF32_COMPAT) + +#ifndef _ASM +typedef Elf64_Xword Xword; +typedef Elf64_Lword Lword; +typedef Elf64_Sxword Sxword; +typedef Elf64_Word Word; +typedef Elf64_Sword Sword; +typedef Elf64_Half Half; +typedef Elf64_Addr Addr; +typedef Elf64_Off Off; +typedef uchar_t Byte; +#endif /* _ASM */ + +#if defined(_KERNEL) +#define ELF_R_TYPE ELF64_R_TYPE +#define ELF_R_SYM ELF64_R_SYM +#define ELF_R_TYPE_DATA ELF64_R_TYPE_DATA +#define ELF_R_INFO ELF64_R_INFO +#define ELF_ST_BIND ELF64_ST_BIND +#define ELF_ST_TYPE ELF64_ST_TYPE +#define ELF_M_SYM ELF64_M_SYM +#define ELF_M_SIZE ELF64_M_SIZE +#endif + +#ifndef _ASM +typedef Elf64_Ehdr Ehdr; +typedef Elf64_Shdr Shdr; +typedef Elf64_Sym Sym; +typedef Elf64_Syminfo Syminfo; +typedef Elf64_Rela Rela; +typedef Elf64_Rel Rel; +typedef Elf64_Nhdr Nhdr; +typedef Elf64_Phdr Phdr; +typedef Elf64_Dyn Dyn; +typedef Elf64_Boot Boot; +typedef Elf64_Verdef Verdef; +typedef Elf64_Verdaux Verdaux; +typedef Elf64_Verneed Verneed; +typedef Elf64_Vernaux Vernaux; +typedef Elf64_Versym Versym; +typedef Elf64_Move Move; +typedef Elf64_Cap Cap; +#endif /* _ASM */ + +#else /* _ILP32 */ + +#ifndef _ASM +typedef Elf32_Word Xword; /* Xword/Sxword are 32-bits in Elf32 */ +typedef Elf32_Lword Lword; +typedef Elf32_Sword Sxword; +typedef Elf32_Word Word; +typedef Elf32_Sword Sword; +typedef Elf32_Half Half; +typedef Elf32_Addr Addr; +typedef Elf32_Off Off; +typedef uchar_t Byte; +#endif /* _ASM */ + +#if defined(_KERNEL) +#define ELF_R_TYPE ELF32_R_TYPE +#define ELF_R_SYM ELF32_R_SYM +#define ELF_R_TYPE_DATA(x) (0) +#define ELF_R_INFO ELF32_R_INFO +#define ELF_ST_BIND ELF32_ST_BIND +#define ELF_ST_TYPE ELF32_ST_TYPE +#define ELF_M_SYM ELF32_M_SYM +#define ELF_M_SIZE ELF32_M_SIZE +#endif + +#ifndef _ASM +typedef Elf32_Ehdr Ehdr; +typedef Elf32_Shdr Shdr; +typedef Elf32_Sym Sym; +typedef Elf32_Syminfo Syminfo; +typedef Elf32_Rela Rela; +typedef Elf32_Rel Rel; +typedef Elf32_Nhdr Nhdr; +typedef Elf32_Phdr Phdr; +typedef Elf32_Dyn Dyn; +typedef Elf32_Boot Boot; +typedef Elf32_Verdef Verdef; +typedef Elf32_Verdaux Verdaux; +typedef Elf32_Verneed Verneed; +typedef Elf32_Vernaux Vernaux; +typedef Elf32_Versym Versym; +typedef Elf32_Move Move; +typedef Elf32_Cap Cap; +#endif /* _ASM */ + +#endif /* _ILP32 */ + +/* + * Elf `printf' type-cast macros. These force arguments to be a fixed size + * so that Elf32 and Elf64 can share common format strings. + */ +#ifndef __lint +#define EC_ADDR(a) ((Elf64_Addr)(a)) /* "ull" */ +#define EC_OFF(a) ((Elf64_Off)(a)) /* "ull" */ +#define EC_HALF(a) ((Elf64_Half)(a)) /* "d" */ +#define EC_WORD(a) ((Elf64_Word)(a)) /* "u" */ +#define EC_SWORD(a) ((Elf64_Sword)(a)) /* "d" */ +#define EC_XWORD(a) ((Elf64_Xword)(a)) /* "ull" */ +#define EC_SXWORD(a) ((Elf64_Sxword)(a)) /* "ll" */ +#define EC_LWORD(a) ((Elf64_Lword)(a)) /* "ull" */ + +/* + * A native pointer is special. Although it can be convenient to display + * these from a common format (ull), compilers may flag the cast of a pointer + * to an integer as illegal. Casting these pointers to the native pointer + * size, suppresses any compiler errors. + */ +#define EC_NATPTR(a) ((Elf64_Xword)(uintptr_t)(a)) /* "ull" */ +#else +#define EC_ADDR(a) ((u_longlong_t)(a)) +#define EC_OFF(a) ((u_longlong_t)(a)) +#define EC_HALF(a) ((ushort_t)(a)) +#define EC_WORD(a) ((uint_t)(a)) +#define EC_SWORD(a) ((int)(a)) +#define EC_XWORD(a) ((u_longlong_t)(a)) +#define EC_SXWORD(a) ((longlong_t)(a)) +#define EC_LWORD(a) ((u_longlong_t)(a)) + +#define EC_NATPTR(a) ((u_longlong_t)(a)) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_MACHELF_H */ diff --git a/lib/libspl/include/sys/mhd.h b/lib/libspl/include/sys/mhd.h new file mode 100644 index 000000000..fcc062d51 --- /dev/null +++ b/lib/libspl/include/sys/mhd.h @@ -0,0 +1,159 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_MHD_H +#define _SYS_MHD_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Definitions for multi-host device I/O control commands + */ +#define MHIOC ('M'<<8) +#define MHIOCENFAILFAST (MHIOC|1) +#define MHIOCTKOWN (MHIOC|2) +#define MHIOCRELEASE (MHIOC|3) +#define MHIOCSTATUS (MHIOC|4) +#define MHIOCGRP_INKEYS (MHIOC|5) +#define MHIOCGRP_INRESV (MHIOC|6) +#define MHIOCGRP_REGISTER (MHIOC|7) +#define MHIOCGRP_RESERVE (MHIOC|8) +#define MHIOCGRP_PREEMPTANDABORT (MHIOC|9) +#define MHIOCGRP_PREEMPT (MHIOC|10) +#define MHIOCGRP_CLEAR (MHIOC|11) +#define MHIOCGRP_REGISTERANDIGNOREKEY (MHIOC|14) +#define MHIOCQRESERVE (MHIOC|12) +#define MHIOCREREGISTERDEVID (MHIOC|13) + +/* + * Following is the structure to specify the delay parameters in + * milliseconds, via the MHIOCTKOWN ioctl. + */ +struct mhioctkown { + int reinstate_resv_delay; + int min_ownership_delay; + int max_ownership_delay; +}; + +#define MHIOC_RESV_KEY_SIZE 8 +typedef struct mhioc_resv_key { + uchar_t key[MHIOC_RESV_KEY_SIZE]; +} mhioc_resv_key_t; + +typedef struct mhioc_key_list { + uint32_t listsize; + uint32_t listlen; + mhioc_resv_key_t *list; +} mhioc_key_list_t; + +typedef struct mhioc_inkeys { + uint32_t generation; + mhioc_key_list_t *li; +} mhioc_inkeys_t; + +#if defined(_SYSCALL32) +struct mhioc_key_list32 { + uint32_t listsize; + uint32_t listlen; + caddr32_t list; +} mhioc_key_list32_t; + +struct mhioc_inkeys32 { + uint32_t generation; + caddr32_t li; +} mhioc_inkeys32_t; +#endif + +typedef struct mhioc_resv_desc { + mhioc_resv_key_t key; + uint8_t type; + uint8_t scope; + uint32_t scope_specific_addr; +} mhioc_resv_desc_t; + +typedef struct mhioc_resv_desc_list { + uint32_t listsize; + uint32_t listlen; + mhioc_resv_desc_t *list; +} mhioc_resv_desc_list_t; + +typedef struct mhioc_inresvs { + uint32_t generation; + mhioc_resv_desc_list_t *li; +} mhioc_inresvs_t; + +#if defined(_SYSCALL32) +struct mhioc_resv_desc_list32 { + uint32_t listsize; + uint32_t listlen; + caddr32_t list; +} mhioc_resv_desc_list32_t; + +typedef struct mhioc_inresvs32 { + uint32_t generation; + caddr32_t li; +} mhioc_inresvs32_t; +#endif + +typedef struct mhioc_register { + mhioc_resv_key_t oldkey; + mhioc_resv_key_t newkey; + boolean_t aptpl; /* True if persistent across power failures */ +} mhioc_register_t; + +typedef struct mhioc_preemptandabort { + mhioc_resv_desc_t resvdesc; + mhioc_resv_key_t victim_key; +} mhioc_preemptandabort_t; + +typedef struct mhioc_registerandignorekey { + mhioc_resv_key_t newkey; + boolean_t aptpl; /* True if persistent across power failures */ +} mhioc_registerandignorekey_t; + +/* + * SCSI-3 PGR Reservation Type Codes. Codes with the _OBSOLETE suffix + * have been removed from the SCSI3 PGR standard. + */ +#define SCSI3_RESV_READSHARED_OBSOLETE 0 +#define SCSI3_RESV_WRITEEXCLUSIVE 1 +#define SCSI3_RESV_READEXCLUSIVE_OBSOLETE 2 +#define SCSI3_RESV_EXCLUSIVEACCESS 3 +#define SCSI3_RESV_SHAREDACCESS_OBSOLETE 4 +#define SCSI3_RESV_WRITEEXCLUSIVEREGISTRANTSONLY 5 +#define SCSI3_RESV_EXCLUSIVEACCESSREGISTRANTSONLY 6 + +#define SCSI3_SCOPE_LOGICALUNIT 0 +#define SCSI3_SCOPE_EXTENT_OBSOLETE 1 +#define SCSI3_SCOPE_ELEMENT 2 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_MHD_H */ diff --git a/lib/libspl/include/sys/mkdev.h b/lib/libspl/include/sys/mkdev.h new file mode 100644 index 000000000..76e3a4fff --- /dev/null +++ b/lib/libspl/include/sys/mkdev.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_MKDEV_H +#define _LIBSPL_SYS_MKDEV_H + +#endif diff --git a/lib/libspl/include/sys/mntent.h b/lib/libspl/include/sys/mntent.h new file mode 100644 index 000000000..c0594ca7b --- /dev/null +++ b/lib/libspl/include/sys/mntent.h @@ -0,0 +1,142 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T + * All Rights Reserved + */ + +#ifndef _SYS_MNTENT_H +#define _SYS_MNTENT_H + + + +#ifdef __cplusplus +extern "C" { +#endif + +#define MNTTAB "/proc/mounts" +#define VFSTAB "/etc/vfstab" +#define MNTMAXSTR 128 + +#define MNTTYPE_ZFS "zfs" /* ZFS file system */ +#define MNTTYPE_UFS "ufs" /* Unix file system */ +#define MNTTYPE_SMBFS "smbfs" /* SMBFS file system */ +#define MNTTYPE_NFS "nfs" /* NFS file system */ +#define MNTTYPE_NFS3 "nfs3" /* NFS Version 3 file system */ +#define MNTTYPE_NFS4 "nfs4" /* NFS Version 4 file system */ +#define MNTTYPE_CACHEFS "cachefs" /* Cache File System */ +#define MNTTYPE_PCFS "pcfs" /* PC (MSDOS) file system */ +#define MNTTYPE_PC MNTTYPE_PCFS /* Deprecated name; use MNTTYPE_PCFS */ +#define MNTTYPE_LOFS "lofs" /* Loop back file system */ +#define MNTTYPE_LO MNTTYPE_LOFS /* Deprecated name; use MNTTYPE_LOFS */ +#define MNTTYPE_HSFS "hsfs" /* High Sierra (9660) file system */ +#define MNTTYPE_SWAP "swap" /* Swap file system */ +#define MNTTYPE_TMPFS "tmpfs" /* Tmp volatile file system */ +#define MNTTYPE_AUTOFS "autofs" /* Automounter ``file'' system */ +#define MNTTYPE_MNTFS "mntfs" /* In-kernel mnttab */ +#define MNTTYPE_DEV "dev" /* /dev file system */ +#define MNTTYPE_CTFS "ctfs" /* Contract file system */ +#define MNTTYPE_OBJFS "objfs" /* Kernel object file system */ +#define MNTTYPE_SHAREFS "sharefs" /* Kernel sharetab file system */ + + +#define MNTOPT_RO "ro" /* Read only */ +#define MNTOPT_RW "rw" /* Read/write */ +#define MNTOPT_RQ "rq" /* Read/write with quotas */ +#define MNTOPT_QUOTA "quota" /* Check quotas */ +#define MNTOPT_NOQUOTA "noquota" /* Don't check quotas */ +#define MNTOPT_ONERROR "onerror" /* action to taken on error */ +#define MNTOPT_SOFT "soft" /* Soft mount */ +#define MNTOPT_SEMISOFT "semisoft" /* partial soft, uncommited interface */ +#define MNTOPT_HARD "hard" /* Hard mount */ +#define MNTOPT_SUID "suid" /* Both setuid and devices allowed */ +#define MNTOPT_NOSUID "nosuid" /* Neither setuid nor devices allowed */ +#define MNTOPT_DEVICES "devices" /* Device-special allowed */ +#define MNTOPT_NODEVICES "nodevices" /* Device-special disallowed */ +#define MNTOPT_SETUID "setuid" /* Set uid allowed */ +#define MNTOPT_NOSETUID "nosetuid" /* Set uid not allowed */ +#define MNTOPT_GRPID "grpid" /* SysV-compatible gid on create */ +#define MNTOPT_REMOUNT "remount" /* Change mount options */ +#define MNTOPT_NOSUB "nosub" /* Disallow mounts on subdirs */ +#define MNTOPT_MULTI "multi" /* Do multi-component lookup */ +#define MNTOPT_INTR "intr" /* Allow NFS ops to be interrupted */ +#define MNTOPT_NOINTR "nointr" /* Don't allow interrupted ops */ +#define MNTOPT_PORT "port" /* NFS server IP port number */ +#define MNTOPT_SECURE "secure" /* Secure (AUTH_DES) mounting */ +#define MNTOPT_RSIZE "rsize" /* Max NFS read size (bytes) */ +#define MNTOPT_WSIZE "wsize" /* Max NFS write size (bytes) */ +#define MNTOPT_TIMEO "timeo" /* NFS timeout (1/10 sec) */ +#define MNTOPT_RETRANS "retrans" /* Max retransmissions (soft mnts) */ +#define MNTOPT_ACTIMEO "actimeo" /* Attr cache timeout (sec) */ +#define MNTOPT_ACREGMIN "acregmin" /* Min attr cache timeout (files) */ +#define MNTOPT_ACREGMAX "acregmax" /* Max attr cache timeout (files) */ +#define MNTOPT_ACDIRMIN "acdirmin" /* Min attr cache timeout (dirs) */ +#define MNTOPT_ACDIRMAX "acdirmax" /* Max attr cache timeout (dirs) */ +#define MNTOPT_NOAC "noac" /* Don't cache attributes at all */ +#define MNTOPT_NOCTO "nocto" /* No close-to-open consistency */ +#define MNTOPT_BG "bg" /* Do mount retries in background */ +#define MNTOPT_FG "fg" /* Do mount retries in foreground */ +#define MNTOPT_RETRY "retry" /* Number of mount retries */ +#define MNTOPT_DEV "dev" /* Device id of mounted fs */ +#define MNTOPT_POSIX "posix" /* Get static pathconf for mount */ +#define MNTOPT_MAP "map" /* Automount map */ +#define MNTOPT_DIRECT "direct" /* Automount direct map mount */ +#define MNTOPT_INDIRECT "indirect" /* Automount indirect map mount */ +#define MNTOPT_LLOCK "llock" /* Local locking (no lock manager) */ +#define MNTOPT_IGNORE "ignore" /* Ignore this entry */ +#define MNTOPT_VERS "vers" /* protocol version number indicator */ +#define MNTOPT_PROTO "proto" /* protocol network_id indicator */ +#define MNTOPT_SEC "sec" /* Security flavor indicator */ +#define MNTOPT_SYNCDIR "syncdir" /* Synchronous local directory ops */ +#define MNTOPT_NOSETSEC "nosec" /* Do no allow setting sec attrs */ +#define MNTOPT_NOPRINT "noprint" /* Do not print messages */ +#define MNTOPT_LARGEFILES "largefiles" /* allow large files */ +#define MNTOPT_NOLARGEFILES "nolargefiles" /* don't allow large files */ +#define MNTOPT_FORCEDIRECTIO "forcedirectio" /* Force DirectIO on all files */ +#define MNTOPT_NOFORCEDIRECTIO "noforcedirectio" /* No Force DirectIO */ +#define MNTOPT_DISABLEDIRECTIO "disabledirectio" /* Disable DirectIO ioctls */ +#define MNTOPT_PUBLIC "public" /* Use NFS public file handlee */ +#define MNTOPT_LOGGING "logging" /* enable logging */ +#define MNTOPT_NOLOGGING "nologging" /* disable logging */ +#define MNTOPT_ATIME "atime" /* update atime for files */ +#define MNTOPT_NOATIME "noatime" /* do not update atime for files */ +#define MNTOPT_GLOBAL "global" /* Cluster-wide global mount */ +#define MNTOPT_NOGLOBAL "noglobal" /* Mount local to single node */ +#define MNTOPT_DFRATIME "dfratime" /* Deferred access time updates */ +#define MNTOPT_NODFRATIME "nodfratime" /* No Deferred access time updates */ +#define MNTOPT_NBMAND "nbmand" /* allow non-blocking mandatory locks */ +#define MNTOPT_NONBMAND "nonbmand" /* deny non-blocking mandatory locks */ +#define MNTOPT_XATTR "xattr" /* enable extended attributes */ +#define MNTOPT_NOXATTR "noxattr" /* disable extended attributes */ +#define MNTOPT_EXEC "exec" /* enable executables */ +#define MNTOPT_NOEXEC "noexec" /* disable executables */ +#define MNTOPT_RESTRICT "restrict" /* restricted autofs mount */ +#define MNTOPT_BROWSE "browse" /* browsable autofs mount */ +#define MNTOPT_NOBROWSE "nobrowse" /* non-browsable autofs mount */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_MNTENT_H */ diff --git a/lib/libspl/include/sys/mnttab.h b/lib/libspl/include/sys/mnttab.h new file mode 100644 index 000000000..70f144967 --- /dev/null +++ b/lib/libspl/include/sys/mnttab.h @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T*/ +/* All Rights Reserved */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* Copyright 2006 Ricardo Correia */ + +#ifndef _SYS_MNTTAB_H +#define _SYS_MNTTAB_H + +#include <stdio.h> +#include <mntent.h> +#include <sys/types.h> + +#ifdef MNTTAB +#undef MNTTAB +#endif + +#define MNTTAB "/proc/mounts" +#define MNT_LINE_MAX 1024 + +#define MNT_TOOLONG 1 /* entry exceeds MNT_LINE_MAX */ +#define MNT_TOOMANY 2 /* too many fields in line */ +#define MNT_TOOFEW 3 /* too few fields in line */ + +struct mnttab { + char *mnt_special; + char *mnt_mountp; + char *mnt_fstype; + char *mnt_mntopts; +}; + +/* + * NOTE: fields in extmnttab should match struct mnttab till new fields + * are encountered, this allows hasmntopt to work properly when its arg is + * a pointer to an extmnttab struct cast to a mnttab struct pointer. + */ + +struct extmnttab { + char *mnt_special; + char *mnt_mountp; + char *mnt_fstype; + char *mnt_mntopts; + uint_t mnt_major; + uint_t mnt_minor; +}; + +extern int getmntany(FILE *fp, struct mnttab *mp, struct mnttab *mpref); +extern int _sol_getmntent(FILE *fp, struct mnttab *mp); +extern int getextmntent(FILE *fp, struct extmnttab *mp, int len); + +static inline char *_sol_hasmntopt(struct mnttab *mnt, char *opt) +{ + struct mntent mnt_new; + + mnt_new.mnt_opts = mnt->mnt_mntopts; + + return hasmntopt(&mnt_new, opt); +} + +#define hasmntopt _sol_hasmntopt +#define getmntent _sol_getmntent + +#endif diff --git a/lib/libspl/include/sys/mount.h b/lib/libspl/include/sys/mount.h new file mode 100644 index 000000000..144f915d2 --- /dev/null +++ b/lib/libspl/include/sys/mount.h @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include_next <sys/mount.h> + +#ifndef _LIBSPL_SYS_MOUNT_H +#define _LIBSPL_SYS_MOUNT_H + +#include <sys/mntent.h> +#include <assert.h> +#include <string.h> +#include <stdlib.h> + +/* + * Some old glibc headers don't define BLKGETSIZE64 + * and we don't want to require the kernel headers + */ +#if !defined(BLKGETSIZE64) +#define BLKGETSIZE64 _IOR(0x12, 114, size_t) +#endif + +#define MS_FORCE MNT_FORCE +#define MS_OVERLAY 32768 +#define MS_NOMNTTAB 0 /* Not supported in Linux */ +#define MS_OPTIONSTR 0 /* Not necessary in Linux */ + +#endif /* _LIBSPL_SYS_MOUNT_H */ diff --git a/lib/libspl/include/sys/note.h b/lib/libspl/include/sys/note.h new file mode 100644 index 000000000..88e0eabcd --- /dev/null +++ b/lib/libspl/include/sys/note.h @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1994 by Sun Microsystems, Inc. + */ + +/* + * sys/note.h: interface for annotating source with info for tools + * + * This is the underlying interface; NOTE (/usr/include/note.h) is the + * preferred interface, but all exported header files should include this + * file directly and use _NOTE so as not to take "NOTE" from the user's + * namespace. For consistency, *all* kernel source should use _NOTE. + * + * By default, annotations expand to nothing. This file implements + * that. Tools using annotations will interpose a different version + * of this file that will expand annotations as needed. + */ + +#ifndef _SYS_NOTE_H +#define _SYS_NOTE_H + + + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _NOTE +#define _NOTE(s) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_NOTE_H */ diff --git a/lib/libspl/include/sys/param.h b/lib/libspl/include/sys/param.h new file mode 100644 index 000000000..d9e7782e4 --- /dev/null +++ b/lib/libspl/include/sys/param.h @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_PARAM_H +#define _LIBSPL_SYS_PARAM_H + +#include_next <sys/param.h> +#include <unistd.h> + +/* + * File system parameters and macros. + * + * The file system is made out of blocks of at most MAXBSIZE units, + * with smaller units (fragments) only in the last direct block. + * MAXBSIZE primarily determines the size of buffers in the buffer + * pool. It may be made larger without any effect on existing + * file systems; however making it smaller make make some file + * systems unmountable. + * + * Note that the blocked devices are assumed to have DEV_BSIZE + * "sectors" and that fragments must be some multiple of this size. + */ +#define MAXBSIZE 8192 +#define DEV_BSIZE 512 +#define DEV_BSHIFT 9 /* log2(DEV_BSIZE) */ + +#define MAXNAMELEN 256 + +#ifdef _LP64 +#define MAXOFFSET_T 0x7fffffffffffffffl +#else +#define MAXOFFSET_T 0x7fffffffl +#endif + +#define UID_NOBODY 60001 /* user ID no body */ +#define GID_NOBODY UID_NOBODY +#define UID_NOACCESS 60002 /* user ID no access */ + +#define MAXUID 2147483647 /* max user id */ +#define MAXPROJID MAXUID /* max project id */ + +#define PAGESIZE (sysconf(_SC_PAGESIZE)) + +#endif diff --git a/lib/libspl/include/sys/priv.h b/lib/libspl/include/sys/priv.h new file mode 100644 index 000000000..4a3ab9684 --- /dev/null +++ b/lib/libspl/include/sys/priv.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_PRIV_H +#define _LIBSPL_SYS_PRIV_H + +#endif diff --git a/lib/libspl/include/sys/processor.h b/lib/libspl/include/sys/processor.h new file mode 100644 index 000000000..ce80c88df --- /dev/null +++ b/lib/libspl/include/sys/processor.h @@ -0,0 +1,32 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_PROCESSOR_H +#define _LIBSPL_SYS_PROCESSOR_H + +#define getcpuid() (-1) + +#endif diff --git a/lib/libspl/include/sys/sdt.h b/lib/libspl/include/sys/sdt.h new file mode 100644 index 000000000..79733eef0 --- /dev/null +++ b/lib/libspl/include/sys/sdt.h @@ -0,0 +1,36 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_SDT_H +#define _LIBSPL_SYS_SDT_H + +#define DTRACE_PROBE(a) ((void) 0) +#define DTRACE_PROBE1(a,b,c) ((void) 0) +#define DTRACE_PROBE2(a,b,c,d,e) ((void) 0) +#define DTRACE_PROBE3(a,b,c,d,e,f,g) ((void) 0) +#define DTRACE_PROBE4(a,b,c,d,e,f,g,h,i) ((void) 0) + +#endif diff --git a/lib/libspl/include/sys/stack.h b/lib/libspl/include/sys/stack.h new file mode 100644 index 000000000..496605f95 --- /dev/null +++ b/lib/libspl/include/sys/stack.h @@ -0,0 +1,52 @@ +/* + * This header file distributed under the terms of the CDDL. + * Portions Copyright 2008 Sun Microsystems, Inc. All Rights reserved. + */ +#ifndef _SYS_STACK_H +#define _SYS_STACK_H + +#include <pthread.h> + +#define STACK_BIAS 0 + +#ifdef __USE_GNU + +static inline int +stack_getbounds(stack_t *sp) +{ + pthread_attr_t attr; + int rc; + + rc = pthread_getattr_np(pthread_self(), &attr); + if (rc) + return rc; + + rc = pthread_attr_getstack(&attr, &sp->ss_sp, &sp->ss_size); + if (rc == 0) + sp->ss_flags = 0; + + pthread_attr_destroy(&attr); + + return rc; +} + +static inline int +thr_stksegment(stack_t *sp) +{ + int rc; + + rc = stack_getbounds(sp); + if (rc) + return rc; + + /* thr_stksegment() is expected to set sp.ss_sp to the high stack + * address, but the stack_getbounds() interface is expected to + * set sp.ss_sp to the low address. Adjust accordingly. */ + sp->ss_sp = (void *)(((uintptr_t)sp->ss_sp) + sp->ss_size); + sp->ss_flags = 0; + + return rc; +} + +#endif /* __USE_GNU */ +#endif /* _SYS_STACK_H */ diff --git a/lib/libspl/include/sys/stropts.h b/lib/libspl/include/sys/stropts.h new file mode 100644 index 000000000..3c86957ba --- /dev/null +++ b/lib/libspl/include/sys/stropts.h @@ -0,0 +1,4 @@ +#ifndef _LIBSPL_SYS_STROPTS_H +#define _LIBSPL_SYS_STROPTS_H + +#endif /* _LIBSPL_SYS_STROPTS_H */ diff --git a/lib/libspl/include/sys/sunddi.h b/lib/libspl/include/sys/sunddi.h new file mode 100644 index 000000000..ccd2b29b9 --- /dev/null +++ b/lib/libspl/include/sys/sunddi.h @@ -0,0 +1,29 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008 by Sun Microsystems, Inc. + */ + +#ifndef _SYS_SUNDDI_H +#define _SYS_SUNDDI_H + +#endif /* _SYS_SUNDDI_H */ diff --git a/lib/libspl/include/sys/sysevent.h b/lib/libspl/include/sys/sysevent.h new file mode 100644 index 000000000..980d14541 --- /dev/null +++ b/lib/libspl/include/sys/sysevent.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_SYSEVENT_H +#define _LIBSPL_SYS_SYSEVENT_H + +#endif diff --git a/lib/libspl/include/sys/sysevent/eventdefs.h b/lib/libspl/include/sys/sysevent/eventdefs.h new file mode 100644 index 000000000..c4494f778 --- /dev/null +++ b/lib/libspl/include/sys/sysevent/eventdefs.h @@ -0,0 +1,235 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_SYSEVENT_EVENTDEFS_H +#define _SYS_SYSEVENT_EVENTDEFS_H + + + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * eventdefs.h contains public definitions for sysevent types (classes + * and subclasses). All additions/removal/changes are subject + * to PSARC approval. + */ + +/* Sysevent Class definitions */ +#define EC_NONE "EC_none" +#define EC_PRIV "EC_priv" +#define EC_PLATFORM "EC_platform" /* events private to platform */ +#define EC_DR "EC_dr" /* Dynamic reconfiguration event class */ +#define EC_ENV "EC_env" /* Environmental monitor event class */ +#define EC_DOMAIN "EC_domain" /* Domain event class */ +#define EC_AP_DRIVER "EC_ap_driver" /* Alternate Pathing event class */ +#define EC_IPMP "EC_ipmp" /* IP Multipathing event class */ +#define EC_DEV_ADD "EC_dev_add" /* device add event class */ +#define EC_DEV_REMOVE "EC_dev_remove" /* device remove event class */ +#define EC_DEV_BRANCH "EC_dev_branch" /* device tree branch event class */ +#define EC_FM "EC_fm" /* FMA error report event */ +#define EC_ZFS "EC_zfs" /* ZFS event */ + +/* + * The following event class is reserved for exclusive use + * by Sun Cluster software. + */ +#define EC_CLUSTER "EC_Cluster" + +/* + * The following classes are exclusively reserved for use by the + * Solaris Volume Manager (SVM) + */ +#define EC_SVM_CONFIG "EC_SVM_Config" +#define EC_SVM_STATE "EC_SVM_State" + +/* + * EC_SVM_CONFIG subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/svm.h + */ +#define ESC_SVM_CREATE "ESC_SVM_Create" +#define ESC_SVM_DELETE "ESC_SVM_Delete" +#define ESC_SVM_ADD "ESC_SVM_Add" +#define ESC_SVM_REMOVE "ESC_SVM_Remove" +#define ESC_SVM_REPLACE "ESC_SVM_Replace" +#define ESC_SVM_GROW "ESC_SVM_Grow" +#define ESC_SVM_RENAME_SRC "ESC_SVM_Rename_Src" +#define ESC_SVM_RENAME_DST "ESC_SVM_Rename_Dst" +#define ESC_SVM_MEDIATOR_ADD "ESC_SVM_Mediator_Add" +#define ESC_SVM_MEDIATOR_DELETE "ESC_SVM_Mediator_Delete" +#define ESC_SVM_HOST_ADD "ESC_SVM_Host_Add" +#define ESC_SVM_HOST_DELETE "ESC_SVM_Host_Delete" +#define ESC_SVM_DRIVE_ADD "ESC_SVM_Drive_Add" +#define ESC_SVM_DRIVE_DELETE "ESC_SVM_Drive_Delete" +#define ESC_SVM_DETACH "ESC_SVM_Detach" +#define ESC_SVM_DETACHING "ESC_SVM_Detaching" +#define ESC_SVM_ATTACH "ESC_SVM_Attach" +#define ESC_SVM_ATTACHING "ESC_SVM_Attaching" + +/* + * EC_SVM_STATE subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/svm.h + */ +#define ESC_SVM_INIT_START "ESC_SVM_Init_Start" +#define ESC_SVM_INIT_FAILED "ESC_SVM_Init_Failed" +#define ESC_SVM_INIT_FATAL "ESC_SVM_Init_Fatal" +#define ESC_SVM_INIT_SUCCESS "ESC_SVM_Init_Success" +#define ESC_SVM_IOERR "ESC_SVM_Ioerr" +#define ESC_SVM_ERRED "ESC_SVM_Erred" +#define ESC_SVM_LASTERRED "ESC_SVM_Lasterred" +#define ESC_SVM_OK "ESC_SVM_Ok" +#define ESC_SVM_ENABLE "ESC_SVM_Enable" +#define ESC_SVM_RESYNC_START "ESC_SVM_Resync_Start" +#define ESC_SVM_RESYNC_FAILED "ESC_SVM_Resync_Failed" +#define ESC_SVM_RESYNC_SUCCESS "ESC_SVM_Resync_Success" +#define ESC_SVM_RESYNC_DONE "ESC_SVM_Resync_Done" +#define ESC_SVM_HOTSPARED "ESC_SVM_Hotspared" +#define ESC_SVM_HS_FREED "ESC_SVM_HS_Freed" +#define ESC_SVM_HS_CHANGED "ESC_SVM_HS_Changed" +#define ESC_SVM_TAKEOVER "ESC_SVM_Takeover" +#define ESC_SVM_RELEASE "ESC_SVM_Release" +#define ESC_SVM_OPEN_FAIL "ESC_SVM_Open_Fail" +#define ESC_SVM_OFFLINE "ESC_SVM_Offline" +#define ESC_SVM_ONLINE "ESC_SVM_Online" +#define ESC_SVM_CHANGE "ESC_SVM_Change" +#define ESC_SVM_EXCHANGE "ESC_SVM_Exchange" +#define ESC_SVM_REGEN_START "ESC_SVM_Regen_Start" +#define ESC_SVM_REGEN_DONE "ESC_SVM_Regen_Done" +#define ESC_SVM_REGEN_FAILED "ESC_SVM_Regen_Failed" + +/* + * EC_DR subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/dr.h + */ + +/* Attachment point state change */ +#define ESC_DR_AP_STATE_CHANGE "ESC_dr_ap_state_change" +#define ESC_DR_REQ "ESC_dr_req" /* Request DR */ +#define ESC_DR_TARGET_STATE_CHANGE "ESC_dr_target_state_change" + +/* + * EC_ENV subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/env.h + */ +#define ESC_ENV_TEMP "ESC_env_temp" /* Temperature change event subclass */ +#define ESC_ENV_FAN "ESC_env_fan" /* Fan status change event subclass */ +#define ESC_ENV_POWER "ESC_env_power" /* Power supply change event subclass */ +#define ESC_ENV_LED "ESC_env_led" /* LED change event subclass */ + +/* + * EC_DOMAIN subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/domain.h + */ + +/* Domain state change */ +#define ESC_DOMAIN_STATE_CHANGE "ESC_domain_state_change" +/* Domain loghost name change */ +#define ESC_DOMAIN_LOGHOST_CHANGE "ESC_domain_loghost_change" + +/* + * EC_AP_DRIVER subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/ap_driver.h + */ + +/* Alternate Pathing path switch */ +#define ESC_AP_DRIVER_PATHSWITCH "ESC_ap_driver_pathswitch" +/* Alternate Pathing database commit */ +#define ESC_AP_DRIVER_COMMIT "ESC_ap_driver_commit" +/* Alternate Pathing physical path status change */ +#define ESC_AP_DRIVER_PHYS_PATH_STATUS_CHANGE \ + "ESC_ap_driver_phys_path_status_change" + +/* + * EC_IPMP subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/ipmp.h + */ + +/* IPMP group has changed state */ +#define ESC_IPMP_GROUP_STATE "ESC_ipmp_group_state" + +/* IPMP group has been created or removed */ +#define ESC_IPMP_GROUP_CHANGE "ESC_ipmp_group_change" + +/* IPMP group has had an interface added or removed */ +#define ESC_IPMP_GROUP_MEMBER_CHANGE "ESC_ipmp_group_member_change" + +/* Interface within an IPMP group has changed state or type */ +#define ESC_IPMP_IF_CHANGE "ESC_ipmp_if_change" + + +/* + * EC_DEV_ADD and EC_DEV_REMOVE subclass definitions - supporting attributes + * (name/value pairs) are found in sys/sysevent/dev.h + */ +#define ESC_DISK "disk" /* disk device */ +#define ESC_NETWORK "network" /* network interface */ +#define ESC_PRINTER "printer" /* printer device */ +#define ESC_LOFI "lofi" /* lofi device */ + +/* + * EC_DEV_BRANCH subclass definitions - supporting attributes (name/value pairs) + * are found in sys/sysevent/dev.h + */ + +/* device tree branch added */ +#define ESC_DEV_BRANCH_ADD "ESC_dev_branch_add" + +/* device tree branch removed */ +#define ESC_DEV_BRANCH_REMOVE "ESC_dev_branch_remove" + +/* FMA Fault and Error event protocol subclass */ +#define ESC_FM_ERROR "ESC_FM_error" +#define ESC_FM_ERROR_REPLAY "ESC_FM_error_replay" + +/* Service processor subclass definitions */ +#define ESC_PLATFORM_SP_RESET "ESC_platform_sp_reset" + +/* + * EC_ACPIEV subclass definitions + */ +#define EC_ACPIEV "EC_acpiev" +#define ESC_ACPIEV_ADD "ESC_acpiev_add" +#define ESC_ACPIEV_REMOVE "ESC_acpiev_remove" +#define ESC_ACPIEV_WARN "ESC_acpiev_warn" +#define ESC_ACPIEV_LOW "ESC_acpiev_low" +#define ESC_ACPIEV_STATE_CHANGE "ESC_acpiev_state_change" + +/* + * ZFS subclass definitions. supporting attributes (name/value paris) are found + * in sys/fs/zfs.h + */ +#define ESC_ZFS_RESILVER_START "ESC_ZFS_resilver_start" +#define ESC_ZFS_RESILVER_FINISH "ESC_ZFS_resilver_finish" +#define ESC_ZFS_VDEV_REMOVE "ESC_ZFS_vdev_remove" +#define ESC_ZFS_POOL_DESTROY "ESC_ZFS_pool_destroy" +#define ESC_ZFS_VDEV_CLEAR "ESC_ZFS_vdev_clear" +#define ESC_ZFS_VDEV_CHECK "ESC_ZFS_vdev_check" + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SYSEVENT_EVENTDEFS_H */ diff --git a/lib/libspl/include/sys/sysmacros.h b/lib/libspl/include/sys/sysmacros.h new file mode 100644 index 000000000..07ab8c934 --- /dev/null +++ b/lib/libspl/include/sys/sysmacros.h @@ -0,0 +1,98 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_SYSMACROS_H +#define _LIBSPL_SYS_SYSMACROS_H + +#include_next <sys/sysmacros.h> + +/* common macros */ +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif +#ifndef MAX +#define MAX(a, b) ((a) < (b) ? (b) : (a)) +#endif +#ifndef ABS +#define ABS(a) ((a) < 0 ? -(a) : (a)) +#endif + +#define makedevice(maj,min) makedev(maj,min) +#define _sysconf(a) sysconf(a) +#define __NORETURN __attribute__ ((noreturn)) + +/* + * Compatibility macros/typedefs needed for Solaris -> Linux port + */ +#define P2ALIGN(x, align) ((x) & -(align)) +#define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1) +#define P2ROUNDUP(x, align) (-(-(x) & -(align))) +#define P2ROUNDUP_TYPED(x, align, type) \ + (-(-(type)(x) & -(type)(align))) +#define P2BOUNDARY(off, len, align) \ + (((off) ^ ((off) + (len) - 1)) > (align) - 1) +#define P2PHASE(x, align) ((x) & ((align) - 1)) +#define P2NPHASE(x, align) (-(x) & ((align) - 1)) +#define P2NPHASE_TYPED(x, align, type) \ + (-(type)(x) & ((type)(align) - 1)) +#define ISP2(x) (((x) & ((x) - 1)) == 0) +#define IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0) + +/* + * Typed version of the P2* macros. These macros should be used to ensure + * that the result is correctly calculated based on the data type of (x), + * which is passed in as the last argument, regardless of the data + * type of the alignment. For example, if (x) is of type uint64_t, + * and we want to round it up to a page boundary using "PAGESIZE" as + * the alignment, we can do either + * P2ROUNDUP(x, (uint64_t)PAGESIZE) + * or + * P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t) + */ +#define P2ALIGN_TYPED(x, align, type) \ + ((type)(x) & -(type)(align)) +#define P2PHASE_TYPED(x, align, type) \ + ((type)(x) & ((type)(align) - 1)) +#define P2NPHASE_TYPED(x, align, type) \ + (-(type)(x) & ((type)(align) - 1)) +#define P2ROUNDUP_TYPED(x, align, type) \ + (-(-(type)(x) & -(type)(align))) +#define P2END_TYPED(x, align, type) \ + (-(~(type)(x) & -(type)(align))) +#define P2PHASEUP_TYPED(x, align, phase, type) \ + ((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align))) +#define P2CROSS_TYPED(x, y, align, type) \ + (((type)(x) ^ (type)(y)) > (type)(align) - 1) +#define P2SAMEHIGHBIT_TYPED(x, y, type) \ + (((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y))) + + +/* avoid any possibility of clashing with <stddef.h> version */ +#if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof) +#define offsetof(s, m) ((size_t)(&(((s *)0)->m))) +#endif + +#endif /* _LIBSPL_SYS_SYSMACROS_H */ diff --git a/lib/libspl/include/sys/systeminfo.h b/lib/libspl/include/sys/systeminfo.h new file mode 100644 index 000000000..9f561aaa8 --- /dev/null +++ b/lib/libspl/include/sys/systeminfo.h @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_SYSTEMINFO_H +#define _LIBSPL_SYS_SYSTEMINFO_H + +#define HW_INVALID_HOSTID 0xFFFFFFFF /* an invalid hostid */ +#define HW_HOSTID_LEN 11 /* minimum buffer size needed */ + /* to hold a decimal or hex */ + /* hostid string */ + +#define sysinfo(cmd,buf,cnt) (-1) + +#endif diff --git a/lib/libspl/include/sys/time.h b/lib/libspl/include/sys/time.h new file mode 100644 index 000000000..bb5af8c8b --- /dev/null +++ b/lib/libspl/include/sys/time.h @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_TIME_H +#define _LIBSPL_SYS_TIME_H + +#include_next <sys/time.h> +#include <sys/types.h> + +#ifndef NANOSEC +#define NANOSEC 1000000000 +#endif + +extern hrtime_t gethrtime(void); + +#endif diff --git a/lib/libspl/include/sys/types.h b/lib/libspl/include/sys/types.h new file mode 100644 index 000000000..5fb49118e --- /dev/null +++ b/lib/libspl/include/sys/types.h @@ -0,0 +1,98 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_TYPES_H +#define _LIBSPL_SYS_TYPES_H + +#include <sys/isa_defs.h> +#include <sys/feature_tests.h> +#include_next <sys/types.h> +#include <sys/param.h> /* for NBBY */ +#include <sys/types32.h> +#include <sys/va_list.h> + +#ifndef HAVE_INTTYPES +#include <inttypes.h> + +typedef enum boolean { B_FALSE, B_TRUE } boolean_t; + +typedef unsigned char uchar_t; +typedef unsigned short ushort_t; +typedef unsigned int uint_t; +typedef unsigned long ulong_t; + +typedef long long longlong_t; +typedef unsigned long long u_longlong_t; +#endif /* HAVE_INTTYPES */ + +typedef longlong_t offset_t; +typedef u_longlong_t u_offset_t; +typedef u_longlong_t len_t; +typedef longlong_t diskaddr_t; + +typedef ulong_t pfn_t; /* page frame number */ +typedef ulong_t pgcnt_t; /* number of pages */ +typedef long spgcnt_t; /* signed number of pages */ + +typedef longlong_t hrtime_t; +typedef struct timespec timestruc_t; + +typedef short pri_t; + +typedef int zoneid_t; +typedef int projid_t; + +typedef int major_t; +typedef int minor_t; + +typedef ushort_t o_mode_t; /* old file attribute type */ + +/* + * Definitions remaining from previous partial support for 64-bit file + * offsets. This partial support for devices greater than 2gb requires + * compiler support for long long. + */ +#ifdef _LONG_LONG_LTOH +typedef union { + offset_t _f; /* Full 64 bit offset value */ + struct { + int32_t _l; /* lower 32 bits of offset value */ + int32_t _u; /* upper 32 bits of offset value */ + } _p; +} lloff_t; +#endif + +#ifdef _LONG_LONG_HTOL +typedef union { + offset_t _f; /* Full 64 bit offset value */ + struct { + int32_t _u; /* upper 32 bits of offset value */ + int32_t _l; /* lower 32 bits of offset value */ + } _p; +} lloff_t; +#endif + +#endif diff --git a/lib/libspl/include/sys/types32.h b/lib/libspl/include/sys/types32.h new file mode 100644 index 000000000..9ab3b0782 --- /dev/null +++ b/lib/libspl/include/sys/types32.h @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_TYPES32_H +#define _SYS_TYPES32_H + + + +#include <sys/inttypes.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Interoperability types for programs. Used for: + * + * Crossing between 32-bit and 64-bit domains. + * + * On disk data formats such as filesystem meta data + * and disk label. + * + * Note: Applications should never include this + * header file. + */ +typedef uint32_t caddr32_t; +typedef int32_t daddr32_t; +typedef int32_t off32_t; +typedef uint32_t ino32_t; +typedef int32_t blkcnt32_t; +typedef uint32_t fsblkcnt32_t; +typedef uint32_t fsfilcnt32_t; +typedef int32_t id32_t; +typedef uint32_t major32_t; +typedef uint32_t minor32_t; +typedef int32_t key32_t; +typedef uint32_t mode32_t; +typedef uint32_t uid32_t; +typedef uint32_t gid32_t; +typedef uint32_t nlink32_t; +typedef uint32_t dev32_t; +typedef int32_t pid32_t; +typedef uint32_t size32_t; +typedef int32_t ssize32_t; +typedef int32_t time32_t; +typedef int32_t clock32_t; + +struct timeval32 { + time32_t tv_sec; /* seconds */ + int32_t tv_usec; /* and microseconds */ +}; + +typedef struct timespec32 { + time32_t tv_sec; /* seconds */ + int32_t tv_nsec; /* and nanoseconds */ +} timespec32_t; + +typedef struct timespec32 timestruc32_t; + +typedef struct itimerspec32 { + struct timespec32 it_interval; + struct timespec32 it_value; +} itimerspec32_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_TYPES32_H */ diff --git a/lib/libspl/include/sys/tzfile.h b/lib/libspl/include/sys/tzfile.h new file mode 100644 index 000000000..e30e75663 --- /dev/null +++ b/lib/libspl/include/sys/tzfile.h @@ -0,0 +1,164 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * from Arthur Olson's 6.1 + */ + +#ifndef _LIBSPL_SYS_TZFILE_H +#define _LIBSPL_SYS_TZFILE_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Information about time zone files. + */ + +#define TZDIR "/usr/share/lib/zoneinfo" /* Time zone object file directory */ + +#define TZDEFAULT (getenv("TZ")) + +#define TZDEFRULES "posixrules" + +/* + * Each file begins with. . . + */ + +struct tzhead { + char tzh_reserved[24]; /* reserved for future use */ + char tzh_ttisstdcnt[4]; /* coded number of trans. time flags */ + char tzh_leapcnt[4]; /* coded number of leap seconds */ + char tzh_timecnt[4]; /* coded number of transition times */ + char tzh_typecnt[4]; /* coded number of local time types */ + char tzh_charcnt[4]; /* coded number of abbr. chars */ +}; + +/* + * . . .followed by. . . + * + * tzh_timecnt (char [4])s coded transition times a la time(2) + * tzh_timecnt (unsigned char)s types of local time starting at above + * tzh_typecnt repetitions of + * one (char [4]) coded GMT offset in seconds + * one (unsigned char) used to set tm_isdst + * one (unsigned char) that's an abbreviation list index + * tzh_charcnt (char)s '\0'-terminated zone abbreviations + * tzh_leapcnt repetitions of + * one (char [4]) coded leap second transition times + * one (char [4]) total correction after above + * tzh_ttisstdcnt (char)s indexed by type; if TRUE, transition + * time is standard time, if FALSE, + * transition time is wall clock time + * if absent, transition times are + * assumed to be wall clock time + */ + +/* + * In the current implementation, "tzset()" refuses to deal with files that + * exceed any of the limits below. + */ + +/* + * The TZ_MAX_TIMES value below is enough to handle a bit more than a + * year's worth of solar time (corrected daily to the nearest second) or + * 138 years of Pacific Presidential Election time + * (where there are three time zone transitions every fourth year). + */ +#define TZ_MAX_TIMES 370 + +#define TZ_MAX_TYPES 256 /* Limited by what (unsigned char)'s can hold */ + +#define TZ_MAX_CHARS 50 /* Maximum number of abbreviation characters */ + +#define TZ_MAX_LEAPS 50 /* Maximum number of leap second corrections */ + +#define SECSPERMIN 60 +#define MINSPERHOUR 60 +#define HOURSPERDAY 24 +#define DAYSPERWEEK 7 +#define DAYSPERNYEAR 365 +#define DAYSPERLYEAR 366 +#define SECSPERHOUR (SECSPERMIN * MINSPERHOUR) +#define SECSPERDAY ((long)SECSPERHOUR * HOURSPERDAY) +#define MONSPERYEAR 12 + +#define TM_SUNDAY 0 +#define TM_MONDAY 1 +#define TM_TUESDAY 2 +#define TM_WEDNESDAY 3 +#define TM_THURSDAY 4 +#define TM_FRIDAY 5 +#define TM_SATURDAY 6 + +#define TM_JANUARY 0 +#define TM_FEBRUARY 1 +#define TM_MARCH 2 +#define TM_APRIL 3 +#define TM_MAY 4 +#define TM_JUNE 5 +#define TM_JULY 6 +#define TM_AUGUST 7 +#define TM_SEPTEMBER 8 +#define TM_OCTOBER 9 +#define TM_NOVEMBER 10 +#define TM_DECEMBER 11 + +#define TM_YEAR_BASE 1900 + +#define EPOCH_YEAR 1970 +#define EPOCH_WDAY TM_THURSDAY + +/* + * Accurate only for the past couple of centuries; + * that will probably do. + */ + +#define isleap(y) (((y) % 4) == 0 && ((y) % 100) != 0 || ((y) % 400) == 0) + +/* + * Use of the underscored variants may cause problems if you move your code to + * certain System-V-based systems; for maximum portability, use the + * underscore-free variants. The underscored variants are provided for + * backward compatibility only; they may disappear from future versions of + * this file. + */ + +#define SECS_PER_MIN SECSPERMIN +#define MINS_PER_HOUR MINSPERHOUR +#define HOURS_PER_DAY HOURSPERDAY +#define DAYS_PER_WEEK DAYSPERWEEK +#define DAYS_PER_NYEAR DAYSPERNYEAR +#define DAYS_PER_LYEAR DAYSPERLYEAR +#define SECS_PER_HOUR SECSPERHOUR +#define SECS_PER_DAY SECSPERDAY +#define MONS_PER_YEAR MONSPERYEAR + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBSPL_SYS_TZFILE_H */ diff --git a/lib/libspl/include/sys/uio.h b/lib/libspl/include/sys/uio.h new file mode 100644 index 000000000..0aed91357 --- /dev/null +++ b/lib/libspl/include/sys/uio.h @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * University Copyright- Copyright (c) 1982, 1986, 1988 + * The Regents of the University of California + * All Rights Reserved + * + * University Acknowledgment- Portions of this document are derived from + * software developed by the University of California, Berkeley, and its + * contributors. + */ + +#ifndef _LIBSPL_SYS_UIO_H +#define _LIBSPL_SYS_UIO_H + +/* struct iovec is defined in glibc's sys/uio.h */ +#include_next <sys/uio.h> + +typedef enum uio_rw { UIO_READ, UIO_WRITE } uio_rw_t; + +#define UIO_SYSSPACE 1 + +#endif /* _SYS_UIO_H */ diff --git a/lib/libspl/include/sys/utsname.h b/lib/libspl/include/sys/utsname.h new file mode 100644 index 000000000..fd323b96d --- /dev/null +++ b/lib/libspl/include/sys/utsname.h @@ -0,0 +1,34 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENLIBSPLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENLIBSPLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_UTSNAME_H +#define _LIBSPL_UTSNAME_H + +#include_next <sys/utsname.h> + +struct utsname utsname; + +#endif /* _LIBSPL_UTSNAME_H */ diff --git a/lib/libspl/include/sys/va_list.h b/lib/libspl/include/sys/va_list.h new file mode 100644 index 000000000..cf6045451 --- /dev/null +++ b/lib/libspl/include/sys/va_list.h @@ -0,0 +1,36 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_VA_LIST_H +#define _SYS_VA_LIST_H + +#include <stdarg.h> + +#ifndef __va_list +typedef __gnuc_va_list __va_list; +#endif + +#endif diff --git a/lib/libspl/include/sys/varargs.h b/lib/libspl/include/sys/varargs.h new file mode 100644 index 000000000..b8a63d8d7 --- /dev/null +++ b/lib/libspl/include/sys/varargs.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_VARARGS_H +#define _LIBSPL_SYS_VARARGS_H + +#endif diff --git a/lib/libspl/include/sys/vtoc.h b/lib/libspl/include/sys/vtoc.h new file mode 100644 index 000000000..004b49097 --- /dev/null +++ b/lib/libspl/include/sys/vtoc.h @@ -0,0 +1,350 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + + +#ifndef _SYS_VTOC_H +#define _SYS_VTOC_H + +#include <sys/dklabel.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Note: the VTOC is not implemented fully, nor in the manner + * that AT&T implements it. AT&T puts the vtoc structure + * into a sector, usually the second sector (pdsector is first). + * + * Sun incorporates the tag, flag, version, and volume vtoc fields into + * its Disk Label, which already has some vtoc-equivalent fields. + * Upon reading the vtoc with read_vtoc(), the following exceptions + * occur: + * v_bootinfo [all] returned as zero + * v_sanity returned as VTOC_SANE + * if Disk Label was sane + * v_sectorsz returned as 512 + * v_reserved [all] retunred as zero + * timestamp [all] returned as zero + * + * See dklabel.h, read_vtoc(), and write_vtoc(). + */ + +#define V_NUMPAR NDKMAP /* The number of partitions */ + /* (from dkio.h) */ + +#define VTOC_SANE 0x600DDEEE /* Indicates a sane VTOC */ +#define V_VERSION 0x01 /* layout version number */ +#define V_EXTVERSION V_VERSION /* extvtoc layout version number */ + +/* + * Partition identification tags + */ +#define V_UNASSIGNED 0x00 /* unassigned partition */ +#define V_BOOT 0x01 /* Boot partition */ +#define V_ROOT 0x02 /* Root filesystem */ +#define V_SWAP 0x03 /* Swap filesystem */ +#define V_USR 0x04 /* Usr filesystem */ +#define V_BACKUP 0x05 /* full disk */ +#define V_STAND 0x06 /* Stand partition */ +#define V_VAR 0x07 /* Var partition */ +#define V_HOME 0x08 /* Home partition */ +#define V_ALTSCTR 0x09 /* Alternate sector partition */ +#define V_CACHE 0x0a /* Cache (cachefs) partition */ +#define V_RESERVED 0x0b /* SMI reserved data */ + +/* + * Partition permission flags + */ +#define V_UNMNT 0x01 /* Unmountable partition */ +#define V_RONLY 0x10 /* Read only */ + +/* + * error codes for reading & writing vtoc + */ +#define VT_ERROR (-2) /* errno supplies specific error */ +#define VT_EIO (-3) /* I/O error accessing vtoc */ +#define VT_EINVAL (-4) /* illegal value in vtoc or request */ +#define VT_ENOTSUP (-5) /* VTOC op. not supported */ +#define VT_ENOSPC (-6) /* requested space not found */ +#define VT_EOVERFLOW (-7) /* VTOC op. data struct limited */ + +struct partition { + ushort_t p_tag; /* ID tag of partition */ + ushort_t p_flag; /* permission flags */ + daddr_t p_start; /* start sector no of partition */ + long p_size; /* # of blocks in partition */ +}; + +struct vtoc { + unsigned long v_bootinfo[3]; /* info needed by mboot (unsupported) */ + unsigned long v_sanity; /* to verify vtoc sanity */ + unsigned long v_version; /* layout version */ + char v_volume[LEN_DKL_VVOL]; /* volume name */ + ushort_t v_sectorsz; /* sector size in bytes */ + ushort_t v_nparts; /* number of partitions */ + unsigned long v_reserved[10]; /* free space */ + struct partition v_part[V_NUMPAR]; /* partition headers */ + time_t timestamp[V_NUMPAR]; /* partition timestamp (unsupported) */ + char v_asciilabel[LEN_DKL_ASCII]; /* for compatibility */ +}; + +struct extpartition { + ushort_t p_tag; /* ID tag of partition */ + ushort_t p_flag; /* permission flags */ + ushort_t p_pad[2]; + diskaddr_t p_start; /* start sector no of partition */ + diskaddr_t p_size; /* # of blocks in partition */ +}; + + +struct extvtoc { + uint64_t v_bootinfo[3]; /* info needed by mboot (unsupported) */ + uint64_t v_sanity; /* to verify vtoc sanity */ + uint64_t v_version; /* layout version */ + char v_volume[LEN_DKL_VVOL]; /* volume name */ + ushort_t v_sectorsz; /* sector size in bytes */ + ushort_t v_nparts; /* number of partitions */ + ushort_t pad[2]; + uint64_t v_reserved[10]; + struct extpartition v_part[V_NUMPAR]; /* partition headers */ + uint64_t timestamp[V_NUMPAR]; /* partition timestamp (unsupported) */ + char v_asciilabel[LEN_DKL_ASCII]; /* for compatibility */ +}; + +#ifdef _KERNEL +#define extvtoctovtoc(extv, v) \ + { \ + int i; \ + v.v_bootinfo[0] = (unsigned long)extv.v_bootinfo[0]; \ + v.v_bootinfo[1] = (unsigned long)extv.v_bootinfo[1]; \ + v.v_bootinfo[2] = (unsigned long)extv.v_bootinfo[2]; \ + v.v_sanity = (unsigned long)extv.v_sanity; \ + v.v_version = (unsigned long)extv.v_version; \ + bcopy(extv.v_volume, v.v_volume, LEN_DKL_VVOL); \ + v.v_sectorsz = extv.v_sectorsz; \ + v.v_nparts = extv.v_nparts; \ + for (i = 0; i < 10; i++) \ + v.v_reserved[i] = (unsigned long)extv.v_reserved[i]; \ + for (i = 0; i < V_NUMPAR; i++) { \ + v.v_part[i].p_tag = extv.v_part[i].p_tag; \ + v.v_part[i].p_flag = extv.v_part[i].p_flag; \ + v.v_part[i].p_start = (daddr_t)extv.v_part[i].p_start; \ + v.v_part[i].p_size = (long)extv.v_part[i].p_size; \ + v.timestamp[i] = (time_t)extv.timestamp[i]; \ + } \ + bcopy(extv.v_asciilabel, v.v_asciilabel, LEN_DKL_ASCII); \ + } + +#define vtoctoextvtoc(v, extv) \ + { \ + int i; \ + extv.v_bootinfo[0] = (uint64_t)v.v_bootinfo[0]; \ + extv.v_bootinfo[1] = (uint64_t)v.v_bootinfo[1]; \ + extv.v_bootinfo[2] = (uint64_t)v.v_bootinfo[2]; \ + extv.v_sanity = (uint64_t)v.v_sanity; \ + extv.v_version = (uint64_t)v.v_version; \ + bcopy(v.v_volume, extv.v_volume, LEN_DKL_VVOL); \ + extv.v_sectorsz = v.v_sectorsz; \ + extv.v_nparts = v.v_nparts; \ + for (i = 0; i < 10; i++) \ + extv.v_reserved[i] = (uint64_t)v.v_reserved[i]; \ + for (i = 0; i < V_NUMPAR; i++) { \ + extv.v_part[i].p_tag = v.v_part[i].p_tag; \ + extv.v_part[i].p_flag = v.v_part[i].p_flag; \ + extv.v_part[i].p_start = \ + (diskaddr_t)(unsigned long)v.v_part[i].p_start; \ + extv.v_part[i].p_size = \ + (diskaddr_t)(unsigned long)v.v_part[i].p_size; \ + extv.timestamp[i] = (uint64_t)v.timestamp[i]; \ + } \ + bcopy(v.v_asciilabel, extv.v_asciilabel, LEN_DKL_ASCII); \ + } +#endif /* _KERNEL */ + +#if defined(_SYSCALL32) +struct partition32 { + uint16_t p_tag; /* ID tag of partition */ + uint16_t p_flag; /* permission flags */ + daddr32_t p_start; /* start sector no of partition */ + int32_t p_size; /* # of blocks in partition */ +}; + +struct vtoc32 { + uint32_t v_bootinfo[3]; /* info needed by mboot (unsupported) */ + uint32_t v_sanity; /* to verify vtoc sanity */ + uint32_t v_version; /* layout version */ + char v_volume[LEN_DKL_VVOL]; /* volume name */ + uint16_t v_sectorsz; /* sector size in bytes */ + uint16_t v_nparts; /* number of partitions */ + uint32_t v_reserved[10]; /* free space */ + struct partition32 v_part[V_NUMPAR]; /* partition headers */ + time32_t timestamp[V_NUMPAR]; /* partition timestamp (unsupported) */ + char v_asciilabel[LEN_DKL_ASCII]; /* for compatibility */ +}; + +#define vtoc32tovtoc(v32, v) \ + { \ + int i; \ + v.v_bootinfo[0] = v32.v_bootinfo[0]; \ + v.v_bootinfo[1] = v32.v_bootinfo[1]; \ + v.v_bootinfo[2] = v32.v_bootinfo[2]; \ + v.v_sanity = v32.v_sanity; \ + v.v_version = v32.v_version; \ + bcopy(v32.v_volume, v.v_volume, LEN_DKL_VVOL); \ + v.v_sectorsz = v32.v_sectorsz; \ + v.v_nparts = v32.v_nparts; \ + v.v_version = v32.v_version; \ + for (i = 0; i < 10; i++) \ + v.v_reserved[i] = v32.v_reserved[i]; \ + for (i = 0; i < V_NUMPAR; i++) { \ + v.v_part[i].p_tag = (ushort_t)v32.v_part[i].p_tag; \ + v.v_part[i].p_flag = (ushort_t)v32.v_part[i].p_flag; \ + v.v_part[i].p_start = (unsigned)v32.v_part[i].p_start; \ + v.v_part[i].p_size = (unsigned)v32.v_part[i].p_size; \ + } \ + for (i = 0; i < V_NUMPAR; i++) \ + v.timestamp[i] = (time_t)v32.timestamp[i]; \ + bcopy(v32.v_asciilabel, v.v_asciilabel, LEN_DKL_ASCII); \ + } + +#define vtoc32toextvtoc(v32, extv) \ + { \ + int i; \ + extv.v_bootinfo[0] = v32.v_bootinfo[0]; \ + extv.v_bootinfo[1] = v32.v_bootinfo[1]; \ + extv.v_bootinfo[2] = v32.v_bootinfo[2]; \ + extv.v_sanity = v32.v_sanity; \ + extv.v_version = v32.v_version; \ + bcopy(v32.v_volume, extv.v_volume, LEN_DKL_VVOL); \ + extv.v_sectorsz = v32.v_sectorsz; \ + extv.v_nparts = v32.v_nparts; \ + extv.v_version = v32.v_version; \ + for (i = 0; i < 10; i++) \ + extv.v_reserved[i] = v32.v_reserved[i]; \ + for (i = 0; i < V_NUMPAR; i++) { \ + extv.v_part[i].p_tag = (ushort_t)v32.v_part[i].p_tag; \ + extv.v_part[i].p_flag = (ushort_t)v32.v_part[i].p_flag; \ + extv.v_part[i].p_start = (diskaddr_t)v32.v_part[i].p_start; \ + extv.v_part[i].p_size = (diskaddr_t)v32.v_part[i].p_size; \ + extv.timestamp[i] = (time_t)v32.timestamp[i]; \ + } \ + bcopy(v32.v_asciilabel, extv.v_asciilabel, LEN_DKL_ASCII); \ + } + + +#define vtoctovtoc32(v, v32) \ + { \ + int i; \ + v32.v_bootinfo[0] = v.v_bootinfo[0]; \ + v32.v_bootinfo[1] = v.v_bootinfo[1]; \ + v32.v_bootinfo[2] = v.v_bootinfo[2]; \ + v32.v_sanity = v.v_sanity; \ + v32.v_version = v.v_version; \ + bcopy(v.v_volume, v32.v_volume, LEN_DKL_VVOL); \ + v32.v_sectorsz = v.v_sectorsz; \ + v32.v_nparts = v.v_nparts; \ + v32.v_version = v.v_version; \ + for (i = 0; i < 10; i++) \ + v32.v_reserved[i] = v.v_reserved[i]; \ + for (i = 0; i < V_NUMPAR; i++) { \ + v32.v_part[i].p_tag = (ushort_t)v.v_part[i].p_tag; \ + v32.v_part[i].p_flag = (ushort_t)v.v_part[i].p_flag; \ + v32.v_part[i].p_start = (unsigned)v.v_part[i].p_start; \ + v32.v_part[i].p_size = (unsigned)v.v_part[i].p_size; \ + } \ + for (i = 0; i < V_NUMPAR; i++) { \ + if (v.timestamp[i] > TIME32_MAX) \ + v32.timestamp[i] = TIME32_MAX; \ + else \ + v32.timestamp[i] = (time32_t)v.timestamp[i]; \ + } \ + bcopy(v.v_asciilabel, v32.v_asciilabel, LEN_DKL_ASCII); \ + } + +#define extvtoctovtoc32(extv, v32) \ + { \ + int i; \ + v32.v_bootinfo[0] = extv.v_bootinfo[0]; \ + v32.v_bootinfo[1] = extv.v_bootinfo[1]; \ + v32.v_bootinfo[2] = extv.v_bootinfo[2]; \ + v32.v_sanity = extv.v_sanity; \ + v32.v_version = extv.v_version; \ + bcopy(extv.v_volume, v32.v_volume, LEN_DKL_VVOL); \ + v32.v_sectorsz = extv.v_sectorsz; \ + v32.v_nparts = extv.v_nparts; \ + v32.v_version = extv.v_version; \ + for (i = 0; i < 10; i++) \ + v32.v_reserved[i] = extv.v_reserved[i]; \ + for (i = 0; i < V_NUMPAR; i++) { \ + v32.v_part[i].p_tag = (ushort_t)extv.v_part[i].p_tag; \ + v32.v_part[i].p_flag = (ushort_t)extv.v_part[i].p_flag; \ + v32.v_part[i].p_start = (unsigned)extv.v_part[i].p_start; \ + v32.v_part[i].p_size = (unsigned)extv.v_part[i].p_size; \ + } \ + for (i = 0; i < V_NUMPAR; i++) { \ + if (extv.timestamp[i] > TIME32_MAX) \ + v32.timestamp[i] = TIME32_MAX; \ + else \ + v32.timestamp[i] = (time32_t)extv.timestamp[i]; \ + } \ + bcopy(extv.v_asciilabel, v32.v_asciilabel, LEN_DKL_ASCII); \ + } + + +#endif /* _SYSCALL32 */ + +/* + * These defines are the mode parameter for the checksum routines. + */ +#define CK_CHECKSUM 0 /* check checksum */ +#define CK_MAKESUM 1 /* generate checksum */ + +#if defined(__STDC__) + +extern int read_vtoc(int, struct vtoc *); +extern int write_vtoc(int, struct vtoc *); +extern int read_extvtoc(int, struct extvtoc *); +extern int write_extvtoc(int, struct extvtoc *); + +#else + +extern int read_vtoc(); +extern int write_vtoc(); +extern int read_extvtoc(); +extern int write_extvtoc(); + +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VTOC_H */ diff --git a/lib/libspl/include/sys/zone.h b/lib/libspl/include/sys/zone.h new file mode 100644 index 000000000..ea7c8bde3 --- /dev/null +++ b/lib/libspl/include/sys/zone.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_SYS_ZONE_H +#define _LIBSPL_SYS_ZONE_H + +#endif diff --git a/lib/libspl/include/thread.h b/lib/libspl/include/thread.h new file mode 100644 index 000000000..a72f6d2b1 --- /dev/null +++ b/lib/libspl/include/thread.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_THREAD_H +#define _LIBSPL_THREAD_H + +#endif /* _LIBSPL_THREAD_H */ diff --git a/lib/libspl/include/tsol/label.h b/lib/libspl/include/tsol/label.h new file mode 100644 index 000000000..bfae8a126 --- /dev/null +++ b/lib/libspl/include/tsol/label.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SOL_TSOL_LABEL_H +#define _SOL_TSOL_LABEL_H + +#endif diff --git a/lib/libspl/include/tzfile.h b/lib/libspl/include/tzfile.h new file mode 100644 index 000000000..441b8cf8b --- /dev/null +++ b/lib/libspl/include/tzfile.h @@ -0,0 +1,32 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_TZFILE_H +#define _LIBSPL_TZFILE_H + +#include <sys/tzfile.h> + +#endif /* _LIBSPL_TZFILE_H */ diff --git a/lib/libspl/include/ucred.h b/lib/libspl/include/ucred.h new file mode 100644 index 000000000..4ca424ed3 --- /dev/null +++ b/lib/libspl/include/ucred.h @@ -0,0 +1,32 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_UCRED_H +#define _LIBSPL_UCRED_H + +typedef int ucred_t; + +#endif diff --git a/lib/libspl/include/umem.h b/lib/libspl/include/umem.h new file mode 100644 index 000000000..0ed55ae5a --- /dev/null +++ b/lib/libspl/include/umem.h @@ -0,0 +1,169 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_UMEM_H +#define _LIBSPL_UMEM_H + +/* XXX: We should use the real portable umem library if it is detected + * at configure time. However, if the library is not available we can + * use a trivial malloc based implementation. This obviously impacts + * performance but unless you using a full userspace build of zpool for + * something other than ztest your likely not going to notice or care. + * + * https://labs.omniti.com/trac/portableumem + */ + +#include <stdlib.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void vmem_t; + +/* + * Flags for umem_alloc/umem_free + */ +#define UMEM_DEFAULT 0x0000 /* normal -- may fail */ +#define UMEM_NOFAIL 0x0100 /* Never fails */ + +/* + * Flags for umem_cache_create() + */ +#define UMC_NOTOUCH 0x00010000 +#define UMC_NODEBUG 0x00020000 +#define UMC_NOMAGAZINE 0x00040000 +#define UMC_NOHASH 0x00080000 + +#define UMEM_CACHE_NAMELEN 31 + +typedef int umem_nofail_callback_t(void); +typedef int umem_constructor_t(void *, void *, int); +typedef void umem_destructor_t(void *, void *); +typedef void umem_reclaim_t(void *); + +typedef struct umem_cache { + char cache_name[UMEM_CACHE_NAMELEN + 1]; + size_t cache_bufsize; + size_t cache_align; + umem_constructor_t *cache_constructor; + umem_destructor_t *cache_destructor; + umem_reclaim_t *cache_reclaim; + void *cache_private; + void *cache_arena; + int cache_cflags; +} umem_cache_t; + +static inline void * +umem_alloc(size_t size, int flags) +{ + void *ptr; + + ptr = malloc(size); + while (ptr == NULL && (flags & UMEM_NOFAIL)) + ptr = malloc(size); + + return ptr; +} + +static inline void * +umem_zalloc(size_t size, int flags) +{ + void *ptr; + + ptr = umem_alloc(size, flags); + if (ptr) + memset(ptr, 0, size); + + return ptr; +} + +static inline void +umem_free(void *ptr, size_t size) +{ + free(ptr); +} + +static inline void +umem_nofail_callback(umem_nofail_callback_t *cb) {} + +static inline umem_cache_t * +umem_cache_create(char *name, size_t bufsize, size_t align, + umem_constructor_t *constructor, + umem_destructor_t *destructor, + umem_reclaim_t *reclaim, + void *priv, void *vmp, int cflags) +{ + umem_cache_t *cp; + + cp = umem_alloc(sizeof(umem_cache_t), UMEM_DEFAULT); + if (cp) { + strncpy(cp->cache_name, name, UMEM_CACHE_NAMELEN); + cp->cache_bufsize = bufsize; + cp->cache_align = align; + cp->cache_constructor = constructor; + cp->cache_destructor = destructor; + cp->cache_reclaim = reclaim; + cp->cache_private = priv; + cp->cache_arena = vmp; + cp->cache_cflags = cflags; + } + + return cp; +} + +static inline void +umem_cache_destroy(umem_cache_t *cp) +{ + umem_free(cp, sizeof(umem_cache_t)); +} + +static inline void * +umem_cache_alloc(umem_cache_t *cp, int flags) +{ + void *ptr; + + ptr = umem_alloc(cp->cache_bufsize, flags); + if (ptr && cp->cache_constructor) + cp->cache_constructor(ptr, cp->cache_private, UMEM_DEFAULT); + + return ptr; +} + +static inline void +umem_cache_free(umem_cache_t *cp, void *ptr) +{ + if (cp->cache_destructor) + cp->cache_destructor(ptr, cp->cache_private); + + umem_free(ptr, cp->cache_bufsize); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/libspl/include/unistd.h b/lib/libspl/include/unistd.h new file mode 100644 index 000000000..493efa81b --- /dev/null +++ b/lib/libspl/include/unistd.h @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include_next <unistd.h> + +#ifndef _LIBSPL_UNISTD_H +#define _LIBSPL_UNISTD_H + +#include <zfs_config.h> + +#if !defined(HAVE_IOCTL_IN_UNISTD_H) +# if defined(HAVE_IOCTL_IN_SYS_IOCTL_H) +# include <sys/ioctl.h> +# elif defined(HAVE_IOCTL_IN_STROPTS_H) +# include <stropts.h> +# else +# error "System call ioctl() unavailable" +# endif +#endif + +#if !defined(HAVE_ISSETUGID) +# include <sys/types.h> +# define issetugid() (geteuid() == 0 || getegid() == 0) +#endif + +#if !defined(__sun__) && !defined(__sun) +/* It seems Solaris only returns positive host ids */ +static inline long fake_gethostid(void) +{ + long id = gethostid(); + return id >= 0 ? id : -id; +} +#define gethostid() fake_gethostid() +#endif + +#endif /* _LIBSPL_UNISTD_H */ diff --git a/lib/libspl/include/zone.h b/lib/libspl/include/zone.h new file mode 100644 index 000000000..366f95b14 --- /dev/null +++ b/lib/libspl/include/zone.h @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBSPL_ZONE_H +#define _LIBSPL_ZONE_H + + + +#include <sys/types.h> +#include <sys/zone.h> +#include <sys/priv.h> +#include <tsol/label.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define GLOBAL_ZONEID 0 +#define GLOBAL_ZONEID_NAME "global" + +/* + * Functions for mapping between id and name for active zones. + */ +extern zoneid_t getzoneid(void); +extern zoneid_t getzoneidbyname(const char *); +extern ssize_t getzonenamebyid(zoneid_t, char *, size_t); + +#if 0 + +/* + * NOTE + * + * The remaining contents of this file are private to the implementation + * of Solaris and are subject to change at any time without notice, + * Applications using these interfaces may fail to run on future releases. + */ + +extern int zonept(int, zoneid_t); +extern int zone_get_id(const char *, zoneid_t *); + +/* System call API */ +extern zoneid_t zone_create(const char *, const char *, + const struct priv_set *, const char *, size_t, const char *, size_t, int *, + int, int, const bslabel_t *, int); +extern int zone_boot(zoneid_t); +extern int zone_destroy(zoneid_t); +extern ssize_t zone_getattr(zoneid_t, int, void *, size_t); +extern int zone_setattr(zoneid_t, int, void *, size_t); +extern int zone_enter(zoneid_t); +extern int zone_list(zoneid_t *, uint_t *); +extern int zone_shutdown(zoneid_t); +extern int zone_version(int *); +extern int zone_add_datalink(zoneid_t, char *); +extern int zone_remove_datalink(zoneid_t, char *); +extern int zone_check_datalink(zoneid_t *, char *); +extern int zone_list_datalink(zoneid_t, int *, char *); + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBSPL_ZONE_H */ diff --git a/lib/libspl/list.c b/lib/libspl/list.c new file mode 100644 index 000000000..b29dc8a87 --- /dev/null +++ b/lib/libspl/list.c @@ -0,0 +1,243 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Generic doubly-linked list implementation + */ + +#include <sys/list.h> +#include <sys/list_impl.h> +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> + +#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset)) +#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset)) +#define list_empty(a) ((a)->list_head.list_next == &(a)->list_head) + +#define list_insert_after_node(list, node, object) { \ + list_node_t *lnew = list_d2l(list, object); \ + lnew->list_prev = (node); \ + lnew->list_next = (node)->list_next; \ + (node)->list_next->list_prev = lnew; \ + (node)->list_next = lnew; \ +} + +#define list_insert_before_node(list, node, object) { \ + list_node_t *lnew = list_d2l(list, object); \ + lnew->list_next = (node); \ + lnew->list_prev = (node)->list_prev; \ + (node)->list_prev->list_next = lnew; \ + (node)->list_prev = lnew; \ +} + +#define list_remove_node(node) \ + (node)->list_prev->list_next = (node)->list_next; \ + (node)->list_next->list_prev = (node)->list_prev; \ + (node)->list_next = (node)->list_prev = NULL + +void +list_create(list_t *list, size_t size, size_t offset) +{ + ASSERT(list); + ASSERT(size > 0); + ASSERT(size >= offset + sizeof (list_node_t)); + + list->list_size = size; + list->list_offset = offset; + list->list_head.list_next = list->list_head.list_prev = + &list->list_head; +} + +void +list_destroy(list_t *list) +{ + list_node_t *node = &list->list_head; + + ASSERT(list); + ASSERT(list->list_head.list_next == node); + ASSERT(list->list_head.list_prev == node); + + node->list_next = node->list_prev = NULL; +} + +void +list_insert_after(list_t *list, void *object, void *nobject) +{ + if (object == NULL) { + list_insert_head(list, nobject); + } else { + list_node_t *lold = list_d2l(list, object); + list_insert_after_node(list, lold, nobject); + } +} + +void +list_insert_before(list_t *list, void *object, void *nobject) +{ + if (object == NULL) { + list_insert_tail(list, nobject); + } else { + list_node_t *lold = list_d2l(list, object); + list_insert_before_node(list, lold, nobject); + } +} + +void +list_insert_head(list_t *list, void *object) +{ + list_node_t *lold = &list->list_head; + list_insert_after_node(list, lold, object); +} + +void +list_insert_tail(list_t *list, void *object) +{ + list_node_t *lold = &list->list_head; + list_insert_before_node(list, lold, object); +} + +void +list_remove(list_t *list, void *object) +{ + list_node_t *lold = list_d2l(list, object); + ASSERT(!list_empty(list)); + ASSERT(lold->list_next != NULL); + list_remove_node(lold); +} + +void * +list_remove_head(list_t *list) +{ + list_node_t *head = list->list_head.list_next; + if (head == &list->list_head) + return (NULL); + list_remove_node(head); + return (list_object(list, head)); +} + +void * +list_remove_tail(list_t *list) +{ + list_node_t *tail = list->list_head.list_prev; + if (tail == &list->list_head) + return (NULL); + list_remove_node(tail); + return (list_object(list, tail)); +} + +void * +list_head(list_t *list) +{ + if (list_empty(list)) + return (NULL); + return (list_object(list, list->list_head.list_next)); +} + +void * +list_tail(list_t *list) +{ + if (list_empty(list)) + return (NULL); + return (list_object(list, list->list_head.list_prev)); +} + +void * +list_next(list_t *list, void *object) +{ + list_node_t *node = list_d2l(list, object); + + if (node->list_next != &list->list_head) + return (list_object(list, node->list_next)); + + return (NULL); +} + +void * +list_prev(list_t *list, void *object) +{ + list_node_t *node = list_d2l(list, object); + + if (node->list_prev != &list->list_head) + return (list_object(list, node->list_prev)); + + return (NULL); +} + +/* + * Insert src list after dst list. Empty src list thereafter. + */ +void +list_move_tail(list_t *dst, list_t *src) +{ + list_node_t *dstnode = &dst->list_head; + list_node_t *srcnode = &src->list_head; + + ASSERT(dst->list_size == src->list_size); + ASSERT(dst->list_offset == src->list_offset); + + if (list_empty(src)) + return; + + dstnode->list_prev->list_next = srcnode->list_next; + srcnode->list_next->list_prev = dstnode->list_prev; + dstnode->list_prev = srcnode->list_prev; + srcnode->list_prev->list_next = dstnode; + + /* empty src list */ + srcnode->list_next = srcnode->list_prev = srcnode; +} + +void +list_link_replace(list_node_t *lold, list_node_t *lnew) +{ + ASSERT(list_link_active(lold)); + ASSERT(!list_link_active(lnew)); + + lnew->list_next = lold->list_next; + lnew->list_prev = lold->list_prev; + lold->list_prev->list_next = lnew; + lold->list_next->list_prev = lnew; + lold->list_next = lold->list_prev = NULL; +} + +void +list_link_init(list_node_t *ln) +{ + ln->list_next = NULL; + ln->list_prev = NULL; +} + +int +list_link_active(list_node_t *ln) +{ + return (ln->list_next != NULL); +} + +int +list_is_empty(list_t *list) +{ + return (list_empty(list)); +} diff --git a/lib/libspl/mkdirp.c b/lib/libspl/mkdirp.c new file mode 100644 index 000000000..f98e31e2d --- /dev/null +++ b/lib/libspl/mkdirp.c @@ -0,0 +1,210 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* + * Creates directory and it's parents if the parents do not + * exist yet. + * + * Returns -1 if fails for reasons other than non-existing + * parents. + * Does NOT simplify pathnames with . or .. in them. + */ + +#include <sys/types.h> +#include <libgen.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/stat.h> + +static char *simplify(const char *str); + +int +mkdirp(const char *d, mode_t mode) +{ + char *endptr, *ptr, *slash, *str; + + str = simplify(d); + + /* If space couldn't be allocated for the simplified names, return. */ + + if (str == NULL) + return (-1); + + /* Try to make the directory */ + + if (mkdir(str, mode) == 0) { + free(str); + return (0); + } + if (errno != ENOENT) { + free(str); + return (-1); + } + endptr = strrchr(str, '\0'); + slash = strrchr(str, '/'); + + /* Search upward for the non-existing parent */ + + while (slash != NULL) { + + ptr = slash; + *ptr = '\0'; + + /* If reached an existing parent, break */ + + if (access(str, F_OK) == 0) + break; + + /* If non-existing parent */ + + else { + slash = strrchr(str, '/'); + + /* If under / or current directory, make it. */ + + if (slash == NULL || slash == str) { + if (mkdir(str, mode) != 0 && errno != EEXIST) { + free(str); + return (-1); + } + break; + } + } + } + + /* Create directories starting from upmost non-existing parent */ + + while ((ptr = strchr(str, '\0')) != endptr) { + *ptr = '/'; + if (mkdir(str, mode) != 0 && errno != EEXIST) { + /* + * If the mkdir fails because str already + * exists (EEXIST), then str has the form + * "existing-dir/..", and this is really + * ok. (Remember, this loop is creating the + * portion of the path that didn't exist) + */ + free(str); + return (-1); + } + } + free(str); + return (0); +} + +/* + * simplify - given a pathname, simplify that path by removing + * duplicate contiguous slashes. + * + * A simplified copy of the argument is returned to the + * caller, or NULL is returned on error. + * + * The caller should handle error reporting based upon the + * returned vlaue, and should free the returned value, + * when appropriate. + */ + +static char * +simplify(const char *str) +{ + int i; + size_t mbPathlen; /* length of multi-byte path */ + size_t wcPathlen; /* length of wide-character path */ + wchar_t *wptr; /* scratch pointer */ + wchar_t *wcPath; /* wide-character version of the path */ + char *mbPath; /* The copy fo the path to be returned */ + + /* + * bail out if there is nothing there. + */ + + if (!str) + return (NULL); + + /* + * Get a copy of the argument. + */ + + if ((mbPath = strdup(str)) == NULL) { + return (NULL); + } + + /* + * convert the multi-byte version of the path to a + * wide-character rendering, for doing our figuring. + */ + + mbPathlen = strlen(mbPath); + + if ((wcPath = calloc(sizeof (wchar_t), mbPathlen+1)) == NULL) { + free(mbPath); + return (NULL); + } + + if ((wcPathlen = mbstowcs(wcPath, mbPath, mbPathlen)) == (size_t)-1) { + free(mbPath); + free(wcPath); + return (NULL); + } + + /* + * remove duplicate slashes first ("//../" -> "/") + */ + + for (wptr = wcPath, i = 0; i < wcPathlen; i++) { + *wptr++ = wcPath[i]; + + if (wcPath[i] == '/') { + i++; + + while (wcPath[i] == '/') { + i++; + } + + i--; + } + } + + *wptr = '\0'; + + /* + * now convert back to the multi-byte format. + */ + + if (wcstombs(mbPath, wcPath, mbPathlen) == (size_t)-1) { + free(mbPath); + free(wcPath); + return (NULL); + } + + free(wcPath); + return (mbPath); +} diff --git a/lib/libspl/strlcat.c b/lib/libspl/strlcat.c new file mode 100644 index 000000000..a001df7b1 --- /dev/null +++ b/lib/libspl/strlcat.c @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <string.h> +#include <sys/types.h> + +/* + * Appends src to the dstsize buffer at dst. The append will never + * overflow the destination buffer and the buffer will always be null + * terminated. Never reference beyond &dst[dstsize-1] when computing + * the length of the pre-existing string. + */ + +size_t +strlcat(char *dst, const char *src, size_t dstsize) +{ + char *df = dst; + size_t left = dstsize; + size_t l1; + size_t l2 = strlen(src); + size_t copied; + + while (left-- != 0 && *df != '\0') + df++; + l1 = df - dst; + if (dstsize == l1) + return (l1 + l2); + + copied = l1 + l2 >= dstsize ? dstsize - l1 - 1 : l2; + (void) memcpy(dst + l1, src, copied); + dst[l1+copied] = '\0'; + return (l1 + l2); +} diff --git a/lib/libspl/strlcpy.c b/lib/libspl/strlcpy.c new file mode 100644 index 000000000..2d0daae05 --- /dev/null +++ b/lib/libspl/strlcpy.c @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <string.h> +#include <sys/types.h> + +/* + * Copies src to the dstsize buffer at dst. The copy will never + * overflow the destination buffer and the buffer will always be null + * terminated. + */ + +size_t +strlcpy(char *dst, const char *src, size_t len) +{ + size_t slen = strlen(src); + size_t copied; + + if (len == 0) + return (slen); + + if (slen >= len) + copied = len - 1; + else + copied = slen; + (void) memcpy(dst, src, copied); + dst[copied] = '\0'; + return (slen); +} diff --git a/lib/libspl/strnlen.c b/lib/libspl/strnlen.c new file mode 100644 index 000000000..9fb8227b9 --- /dev/null +++ b/lib/libspl/strnlen.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. + * All rights reserved. Use is subject to license terms. + */ + +#include <string.h> +#include <sys/types.h> + +/* + * Returns the number of non-NULL bytes in string argument, + * but not more than maxlen. Does not look past str + maxlen. + */ +size_t +strnlen(const char *str, size_t maxlen) +{ + const char *ptr; + + ptr = memchr(str, 0, maxlen); + if (ptr == NULL) + return (maxlen); + + return (ptr - str); +} diff --git a/lib/libspl/xdr.c b/lib/libspl/xdr.c new file mode 100644 index 000000000..288a338a1 --- /dev/null +++ b/lib/libspl/xdr.c @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T + * All Rights Reserved + * + * Portions of this source code were derived from Berkeley 4.3 BSD + * under license from the Regents of the University of California. + */ + +#include <rpc/xdr.h> + +/* + * As of glibc-2.5-25 there is not support for xdr_control(). The + * xdrmem implementation from OpenSolaris is used here. + * + * FIXME: Not well tested it may not work as expected. + */ +bool_t +xdr_control(XDR *xdrs, int request, void *info) +{ + xdr_bytesrec_t *xptr; + int32_t *int32p; + int len; + + switch (request) { + case XDR_GET_BYTES_AVAIL: + xptr = (xdr_bytesrec_t *)info; + xptr->xc_is_last_record = TRUE; + xptr->xc_num_avail = xdrs->x_handy; + return (TRUE); + + case XDR_PEEK: + /* + * Return the next 4 byte unit in the XDR stream. + */ + if (xdrs->x_handy < sizeof (int32_t)) + return (FALSE); + int32p = (int32_t *)info; + *int32p = (int32_t)ntohl((uint32_t) + (*((int32_t *)(xdrs->x_private)))); + return (TRUE); + + case XDR_SKIPBYTES: + /* + * Skip the next N bytes in the XDR stream. + */ + int32p = (int32_t *)info; + len = RNDUP((int)(*int32p)); + if ((xdrs->x_handy -= len) < 0) + return (FALSE); + xdrs->x_private += len; + return (TRUE); + + } + return (FALSE); +} diff --git a/lib/libspl/zone.c b/lib/libspl/zone.c new file mode 100644 index 000000000..f4269a76c --- /dev/null +++ b/lib/libspl/zone.c @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Ricardo Correia. All rights reserved. + * Use is subject to license terms. + */ + +#include <zone.h> +#include <string.h> +#include <errno.h> + +zoneid_t getzoneid() +{ + return GLOBAL_ZONEID; +} + +zoneid_t getzoneidbyname(const char *name) +{ + if(name == NULL) + return GLOBAL_ZONEID; + + if(strcmp(name, GLOBAL_ZONEID_NAME) == 0) + return GLOBAL_ZONEID; + + return EINVAL; +} + +ssize_t getzonenamebyid(zoneid_t id, char *buf, size_t buflen) +{ + if(id != GLOBAL_ZONEID) + return EINVAL; + + ssize_t ret = strlen(GLOBAL_ZONEID_NAME) + 1; + + if(buf == NULL || buflen == 0) + return ret; + + strncpy(buf, GLOBAL_ZONEID_NAME, buflen); + buf[buflen - 1] = '\0'; + + return ret; +} diff --git a/lib/libuutil/uu_misc.c b/lib/libuutil/uu_misc.c index 3bd5c3119..60f50832d 100644 --- a/lib/libuutil/uu_misc.c +++ b/lib/libuutil/uu_misc.c @@ -208,18 +208,6 @@ uu_panic(const char *format, ...) (void) pause(); } -int -assfail(const char *astring, const char *file, int line) -{ -#if defined(__STDC__) && __STDC_VERSION__ - 0 >= 199901L - __assert_c99(astring, file, line, "unknown func"); -#else - __assert(astring, file, line); -#endif - /*NOTREACHED*/ - return (0); -} - static void uu_lockup(void) { diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h index f19e398f6..e3da385d2 100644 --- a/lib/libzfs/include/libzfs.h +++ b/lib/libzfs/include/libzfs.h @@ -50,6 +50,26 @@ extern "C" { #define ZPOOL_MAXPROPLEN MAXPATHLEN /* + * Default device paths + */ + +#if defined(__sun__) || defined(__sun) +#define DISK_ROOT "/dev/dsk" +#define RDISK_ROOT "/dev/rdsk" +#define UDISK_ROOT RDISK_ROOT +#define FIRST_SLICE "s0" +#define BACKUP_SLICE "s2" +#endif + +#ifdef __linux__ +#define DISK_ROOT "/dev" +#define RDISK_ROOT DISK_ROOT +#define UDISK_ROOT "/dev/disk" +#define FIRST_SLICE "1" +#define BACKUP_SLICE "" +#endif + +/* * libzfs errors */ enum { @@ -235,6 +255,7 @@ extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); +extern int zpool_label_disk_wait(char *, int); extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *); /* @@ -550,15 +571,6 @@ extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *, void *, void *, int, zfs_share_op_t); /* - * When dealing with nvlists, verify() is extremely useful - */ -#ifdef NDEBUG -#define verify(EX) ((void)(EX)) -#else -#define verify(EX) assert(EX) -#endif - -/* * Utility function to convert a number to a human-readable form. */ extern void zfs_nicenum(uint64_t, char *, size_t); diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 899ffdaae..a55e73880 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -45,9 +45,11 @@ #include <grp.h> #include <stddef.h> #include <ucred.h> +#ifdef HAVE_IDMAP #include <idmap.h> #include <aclutils.h> #include <directory.h> +#endif /* HAVE_IDMAP */ #include <sys/spa.h> #include <sys/zap.h> @@ -1989,6 +1991,7 @@ zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value, return (0); } +#ifdef HAVE_IDMAP static int idmap_id_to_numeric_domain_rid(uid_t id, boolean_t isuser, char **domainp, idmap_rid_t *ridp) @@ -2023,6 +2026,7 @@ out: (void) idmap_fini(idmap_hdl); return (err); } +#endif /* HAVE_IDMAP */ /* * convert the propname into parameters needed by kernel @@ -2056,6 +2060,7 @@ userquota_propname_decode(const char *propname, boolean_t zoned, cp = strchr(propname, '@') + 1; if (strchr(cp, '@')) { +#ifdef HAVE_IDMAP /* * It's a SID name (eg "user@domain") that needs to be * turned into S-1-domainID-RID. @@ -2078,6 +2083,9 @@ userquota_propname_decode(const char *propname, boolean_t zoned, return (ENOENT); cp = numericsid; /* will be further decoded below */ +#else + return (ENOSYS); +#endif /* HAVE_IDMAP */ } if (strncmp(cp, "S-1-", 4) == 0) { @@ -2116,6 +2124,7 @@ userquota_propname_decode(const char *propname, boolean_t zoned, *ridp = gr->gr_gid; } } else { +#ifdef HAVE_IDMAP /* It's a user/group ID (eg "12345"). */ uid_t id = strtoul(cp, &end, 10); idmap_rid_t rid; @@ -2133,6 +2142,9 @@ userquota_propname_decode(const char *propname, boolean_t zoned, } else { *ridp = id; } +#else + return (ENOSYS); +#endif /* HAVE_IDMAP */ } ASSERT3P(numericsid, ==, NULL); diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c index 4a7634a63..b28f793bb 100644 --- a/lib/libzfs/libzfs_import.c +++ b/lib/libzfs/libzfs_import.c @@ -52,6 +52,9 @@ #include <fcntl.h> #include <sys/vdev_impl.h> +#ifdef HAVE_LIBBLKID +#include <blkid/blkid.h> +#endif #include "libzfs.h" #include "libzfs_impl.h" @@ -777,6 +780,77 @@ zpool_read_label(int fd, nvlist_t **config) return (0); } +#ifdef HAVE_LIBBLKID +/* + * Use libblkid to quickly search for zfs devices + */ +static int +zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools) +{ + blkid_cache cache; + blkid_dev_iterate iter; + blkid_dev dev; + const char *devname; + nvlist_t *config; + int fd, err; + + err = blkid_get_cache(&cache, NULL); + if (err != 0) { + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid_get_cache() %d"), err); + goto err_blkid1; + } + + err = blkid_probe_all(cache); + if (err != 0) { + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid_probe_all() %d"), err); + goto err_blkid2; + } + + iter = blkid_dev_iterate_begin(cache); + if (iter == NULL) { + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid_dev_iterate_begin()")); + goto err_blkid2; + } + + err = blkid_dev_set_search(iter, "TYPE", "zfs"); + if (err != 0) { + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid_dev_set_search() %d"), err); + goto err_blkid3; + } + + while (blkid_dev_next(iter, &dev) == 0) { + devname = blkid_dev_devname(dev); + if ((fd = open64(devname, O_RDONLY)) < 0) + continue; + + err = zpool_read_label(fd, &config); + (void) close(fd); + + if (err != 0) { + (void) no_memory(hdl); + goto err_blkid3; + } + + if (config != NULL) { + err = add_config(hdl, pools, devname, config); + if (err != 0) + goto err_blkid3; + } + } + +err_blkid3: + blkid_dev_iterate_end(iter); +err_blkid2: + blkid_put_cache(cache); +err_blkid1: + return err; +} +#endif /* HAVE_LIBBLKID */ + /* * Given a list of directories to search, find all pools stored on disk. This * includes partial pools which are not available to import. If no args are @@ -791,12 +865,12 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, int i; DIR *dirp = NULL; struct dirent64 *dp; - char path[MAXPATHLEN]; + char path[MAXPATHLEN], path2[MAXPATHLEN]; char *end; size_t pathleft; struct stat64 statbuf; nvlist_t *ret = NULL, *config; - static char *default_dir = "/dev/dsk"; + static char *default_dir = DISK_ROOT; int fd; pool_list_t pools = { 0 }; pool_entry_t *pe, *penext; @@ -807,6 +881,15 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, verify(poolname == NULL || guid == 0); if (argc == 0) { +#ifdef HAVE_LIBBLKID + /* Use libblkid to scan all device for their type */ + if (zpool_find_import_blkid(hdl, &pools) == 0) + goto skip_scanning; + + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid failure falling back " + "to manual probing")); +#endif /* HAVE_LIBBLKID */ argc = 1; argv = &default_dir; } @@ -818,7 +901,6 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, */ for (i = 0; i < argc; i++) { char *rdsk; - int dfd; /* use realpath to normalize the path */ if (realpath(argv[i], path) == 0) { @@ -842,8 +924,7 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, else rdsk = path; - if ((dfd = open64(rdsk, O_RDONLY)) < 0 || - (dirp = fdopendir(dfd)) == NULL) { + if ((dirp = opendir(rdsk)) == NULL) { zfs_error_aux(hdl, strerror(errno)); (void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext(TEXT_DOMAIN, "cannot open '%s'"), @@ -860,20 +941,19 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, (name[1] == 0 || (name[1] == '.' && name[2] == 0))) continue; - if ((fd = openat64(dfd, name, O_RDONLY)) < 0) - continue; + snprintf(path2, sizeof (path2), "%s%s", rdsk, name); /* * Ignore failed stats. We only want regular * files, character devs and block devs. */ - if (fstat64(fd, &statbuf) != 0 || + if (stat64(path2, &statbuf) != 0 || (!S_ISREG(statbuf.st_mode) && - !S_ISCHR(statbuf.st_mode) && - !S_ISBLK(statbuf.st_mode))) { - (void) close(fd); + !S_ISBLK(statbuf.st_mode))) + continue; + + if ((fd = open64(path2, O_RDONLY)) < 0) continue; - } if ((zpool_read_label(fd, &config)) != 0) { (void) close(fd); @@ -906,9 +986,7 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, config = NULL; continue; } - /* use the non-raw path for the config */ - (void) strlcpy(end, name, pathleft); - if (add_config(hdl, &pools, path, config) != 0) + if (add_config(hdl, &pools, path2, config) != 0) goto error; } } @@ -917,6 +995,9 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, dirp = NULL; } +#ifdef HAVE_LIBBLKID +skip_scanning: +#endif ret = get_configs(hdl, &pools, active_ok); error: diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index b8989a026..38cc627fc 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -56,10 +56,6 @@ static int read_efi_label(nvlist_t *config, diskaddr_t *sb); #define BOOTCMD "installboot(1M)" #endif -#define DISK_ROOT "/dev/dsk" -#define RDISK_ROOT "/dev/rdsk" -#define BACKUP_SLICE "s2" - /* * ==================================================================== * zpool property functions @@ -638,9 +634,12 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp) /* * Don't start the slice at the default block of 34; many storage - * devices will use a stripe width of 128k, so start there instead. + * devices will use a stripe width of 128k, other vendors prefer a 1m + * alignment. It is best to play it safe and ensure a 1m alignment + * give 512b blocks. When the block size is larger by a power of 2 + * we will still be 1m aligned. */ -#define NEW_START_BLOCK 256 +#define NEW_START_BLOCK 2048 /* * Validate the given pool name, optionally putting an extended error message in @@ -933,10 +932,12 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, * This can happen if the user has specified the same * device multiple times. We can't reliably detect this * until we try to add it and see we already have a - * label. + * label. This can also happen under if the device is + * part of an active md or lvm device. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "one or more vdevs refer to the same device")); + "one or more vdevs refer to the same device, or one of\n" + "the devices is part of an active md or lvm device")); return (zfs_error(hdl, EZFS_BADDEV, msg)); case EOVERFLOW: @@ -1559,7 +1560,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, if (guid != 0 && *end == '\0') { verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0); } else if (path[0] != '/') { - (void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path); + (void) snprintf(buf, sizeof (buf), "%s/%s", DISK_ROOT, path); verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0); } else { verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0); @@ -1760,22 +1761,14 @@ is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type) * the disk to use the new unallocated space. */ static int -zpool_relabel_disk(libzfs_handle_t *hdl, const char *name) +zpool_relabel_disk(libzfs_handle_t *hdl, const char *path) { - char path[MAXPATHLEN]; char errbuf[1024]; int fd, error; - int (*_efi_use_whole_disk)(int); - - if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT, - "efi_use_whole_disk")) == NULL) - return (-1); - - (void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name); - if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) { + if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " - "relabel '%s': unable to open device"), name); + "relabel '%s': unable to open device"), path); return (zfs_error(hdl, EZFS_OPENFAILED, errbuf)); } @@ -1784,11 +1777,11 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *name) * does not have any unallocated space left. If so, we simply * ignore that error and continue on. */ - error = _efi_use_whole_disk(fd); + error = efi_use_whole_disk(fd); (void) close(fd); if (error && error != VT_ENOSPC) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " - "relabel '%s': unable to read disk capacity"), name); + "relabel '%s': unable to read disk capacity"), path); return (zfs_error(hdl, EZFS_NOCAP, errbuf)); } return (0); @@ -1847,7 +1840,6 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, } if (wholedisk) { - pathname += strlen(DISK_ROOT) + 1; (void) zpool_relabel_disk(zhp->zpool_hdl, pathname); } } @@ -2611,7 +2603,7 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path) char * zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv) { - char *path, *devid; + char *path, *devid, *type; uint64_t value; char buf[64]; vdev_stat_t *vs; @@ -2625,7 +2617,6 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv) (u_longlong_t)value); path = buf; } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) { - /* * If the device is dead (faulted, offline, etc) then don't * bother opening it. Otherwise we may be forcing the user to @@ -2664,9 +2655,19 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv) devid_str_free(newdevid); } - if (strncmp(path, "/dev/dsk/", 9) == 0) - path += 9; + /* + * For a block device only use the name. + */ + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + if (strcmp(type, VDEV_TYPE_DISK) == 0) { + path = strrchr(path, '/'); + path++; + } +#if defined(__sun__) || defined(__sun) + /* + * The following code strips the slice from the device path. + */ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value) == 0 && value) { char *tmp = zfs_strdup(hdl, path); @@ -2675,6 +2676,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv) tmp[strlen(path) - 2] = '\0'; return (tmp); } +#endif } else { verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0); @@ -3054,7 +3056,7 @@ read_efi_label(nvlist_t *config, diskaddr_t *sb) (void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT, strrchr(path, '/')); - if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) { + if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) { struct dk_gpt *vtoc; if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) { @@ -3100,6 +3102,54 @@ find_start_block(nvlist_t *config) return (MAXOFFSET_T); } +int +zpool_label_disk_wait(char *path, int timeout) +{ + struct stat64 statbuf; + int i; + + /* + * Wait timeout miliseconds for a newly created device to be available + * from the given path. There is a small window when a /dev/ device + * will exist and the udev link will not, so we must wait for the + * symlink. Depending on the udev rules this may take a few seconds. + */ + for (i = 0; i < timeout; i++) { + usleep(1000); + + errno = 0; + if ((stat64(path, &statbuf) == 0) && (errno == 0)) + return (0); + } + + return (ENOENT); +} + +int +zpool_label_disk_check(char *path) +{ + struct dk_gpt *vtoc; + int fd, err; + + if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) + return errno; + + if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) { + (void) close(fd); + return err; + } + + if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) { + efi_free(vtoc); + (void) close(fd); + return EIDRM; + } + + efi_free(vtoc); + (void) close(fd); + return 0; +} + /* * Label an individual disk. The name provided is the short name, * stripped of any leading /dev path. @@ -3109,7 +3159,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) { char path[MAXPATHLEN]; struct dk_gpt *vtoc; - int fd; + int rval, fd; size_t resv = EFI_MIN_RESV_SIZE; uint64_t slice_size; diskaddr_t start_block; @@ -3145,13 +3195,13 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name, BACKUP_SLICE); - if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) { + if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) { /* * This shouldn't happen. We've long since verified that this * is a valid device. */ - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "unable to open device")); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "unable to open device '%s': %d"), path, errno); return (zfs_error(hdl, EZFS_OPENFAILED, errbuf)); } @@ -3194,7 +3244,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) vtoc->efi_parts[8].p_size = resv; vtoc->efi_parts[8].p_tag = V_RESERVED; - if (efi_write(fd, vtoc) != 0) { + if ((rval = efi_write(fd, vtoc)) != 0) { /* * Some block drivers (like pcata) may not support EFI * GPT labels. Print out a helpful error message dir- @@ -3204,14 +3254,36 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) (void) close(fd); efi_free(vtoc); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "try using fdisk(1M) and then provide a specific slice")); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using " + "parted(8) and then provide a specific slice: %d"), rval); return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); } (void) close(fd); efi_free(vtoc); - return (0); + + /* Wait for the first expected slice to appear. */ + (void) snprintf(path, sizeof (path), "%s/%s%s%s", DISK_ROOT, name, + isdigit(name[strlen(name)-1]) ? "p" : "", FIRST_SLICE); + rval = zpool_label_disk_wait(path, 3000); + if (rval) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to " + "detect device partitions on '%s': %d"), path, rval); + return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); + } + + /* We can't be to paranoid. Read the label back and verify it. */ + (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name); + rval = zpool_label_disk_check(path); + if (rval) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written " + "EFI label on '%s' is damaged. Ensure\nthis device " + "is not in in use, and is functioning properly: %d"), + path, rval); + return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); + } + + return 0; } static boolean_t diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index be5b3949f..eb799901d 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -39,6 +39,7 @@ #include <sys/mntent.h> #include <sys/mnttab.h> #include <sys/avl.h> +#include <sys/debug.h> #include <stddef.h> #include <libzfs.h> diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 8d0c47e30..71d0278a4 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -578,11 +578,17 @@ libzfs_init(void) } if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { + (void) fprintf(stderr, "Unable to open %s: (%d) %s\n", + ZFS_DEV, errno, strerror(errno)); free(hdl); return (NULL); } +#ifdef HAVE_SETMNTENT + if ((hdl->libzfs_mnttab = setmntent(MNTTAB, "r")) == NULL) { +#else if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) { +#endif (void) close(hdl->libzfs_fd); free(hdl); return (NULL); @@ -602,7 +608,11 @@ libzfs_fini(libzfs_handle_t *hdl) { (void) close(hdl->libzfs_fd); if (hdl->libzfs_mnttab) +#ifdef HAVE_SETMNTENT + (void) endmntent(hdl->libzfs_mnttab); +#else (void) fclose(hdl->libzfs_mnttab); +#endif if (hdl->libzfs_sharetab) (void) fclose(hdl->libzfs_sharetab); zfs_uninit_libshare(hdl); diff --git a/lib/libzpool/include/sys/zfs_context.h b/lib/libzpool/include/sys/zfs_context.h index 7c3c5e0d5..9377dab2f 100644 --- a/lib/libzpool/include/sys/zfs_context.h +++ b/lib/libzpool/include/sys/zfs_context.h @@ -58,7 +58,6 @@ extern "C" { #include <atomic.h> #include <dirent.h> #include <time.h> -#include <libsysevent.h> #include <sys/note.h> #include <sys/types.h> #include <sys/cred.h> @@ -72,8 +71,11 @@ extern "C" { #include <sys/sdt.h> #include <sys/kstat.h> #include <sys/u8_textprep.h> +#ifdef HAVE_SYSEVENT +#include <libsysevent.h> #include <sys/sysevent/eventdefs.h> #include <sys/sysevent/dev.h> +#endif /* HAVE_SYSEVENT */ /* * Stack @@ -110,59 +112,12 @@ extern void vpanic(const char *, __va_list); #define fm_panic panic -/* This definition is copied from assert.h. */ -#ifndef verify -#if defined(__STDC__) -#if __STDC_VERSION__ - 0 >= 199901L -#define verify(EX) (void)((EX) || \ - (__assert_c99(#EX, __FILE__, __LINE__, __func__), 0)) -#else -#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0)) -#endif /* __STDC_VERSION__ - 0 >= 199901L */ -#else -#define verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0)) -#endif /* __STDC__ */ -#endif - -#undef VERIFY -#undef ASSERT - -#define VERIFY verify -#define ASSERT assert - -extern void __assert(const char *, const char *, int); - -#ifdef lint -#define VERIFY3_IMPL(x, y, z, t) if (x == z) ((void)0) -#else -/* BEGIN CSTYLED */ -#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \ - const TYPE __left = (TYPE)(LEFT); \ - const TYPE __right = (TYPE)(RIGHT); \ - if (!(__left OP __right)) { \ - char *__buf = alloca(256); \ - (void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \ - #LEFT, #OP, #RIGHT, \ - (u_longlong_t)__left, #OP, (u_longlong_t)__right); \ - __assert(__buf, __FILE__, __LINE__); \ - } \ -_NOTE(CONSTCOND) } while (0) -/* END CSTYLED */ -#endif /* lint */ - -#define VERIFY3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t) -#define VERIFY3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t) -#define VERIFY3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t) - -#ifdef NDEBUG -#define ASSERT3S(x, y, z) ((void)0) -#define ASSERT3U(x, y, z) ((void)0) -#define ASSERT3P(x, y, z) ((void)0) -#else -#define ASSERT3S(x, y, z) VERIFY3S(x, y, z) -#define ASSERT3U(x, y, z) VERIFY3U(x, y, z) -#define ASSERT3P(x, y, z) VERIFY3P(x, y, z) -#endif +/* + * VERIFY/ASSERT + * + * The verify/assert support moved to libspl/include/assert.h so only + * one version of the code needs to be maintained for all of user space. + */ /* * DTrace SDT probes have different signatures in userland than they do in @@ -325,6 +280,9 @@ extern void kstat_delete(kstat_t *); #define kmem_alloc(_s, _f) umem_alloc(_s, _f) #define kmem_zalloc(_s, _f) umem_zalloc(_s, _f) #define kmem_free(_b, _s) umem_free(_b, _s) +#define vmem_alloc(_s, _f) kmem_alloc(_s, _f) +#define vmem_zalloc(_s, _f) kmem_zalloc(_s, _f) +#define vmem_free(_b, _s) kmem_free(_b, _s) #define kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \ umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) #define kmem_cache_destroy(_c) umem_cache_destroy(_c) diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 97e1bc9d3..ab97636ba 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -540,7 +540,11 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) * for its size. So -- gag -- we open the block device to get * its size, and remember it for subsequent VOP_GETATTR(). */ +#if defined(__sun__) || defined(__sun) if (strncmp(path, "/dev/", 5) == 0) { +#else + if (0) { +#endif char *dsk; fd = open64(path, O_RDONLY); if (fd == -1) @@ -561,6 +565,14 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) return (errno); } +#ifdef __linux__ + if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) { + flags |= O_DIRECT; + if (flags & FWRITE) + flags |= O_EXCL; + } +#endif + if (flags & FCREAT) old_umask = umask(0); diff --git a/module/Makefile.in b/module/Makefile.in index 13a5a8133..e32bc9fb6 100644 --- a/module/Makefile.in +++ b/module/Makefile.in @@ -3,6 +3,7 @@ subdir-m += nvpair subdir-m += unicode subdir-m += zcommon subdir-m += zfs +subdir-m += zpios modules clean: # Make the exported SPL symbols available to these modules. diff --git a/module/avl/avl.c b/module/avl/avl.c index a9634d701..728bd8723 100644 --- a/module/avl/avl.c +++ b/module/avl/avl.c @@ -1031,3 +1031,29 @@ done: return (AVL_NODE2DATA(node, off)); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +static int avl_init(void) { return 0; } +static int avl_fini(void) { return 0; } + +spl_module_init(avl_init); +spl_module_exit(avl_fini); + +MODULE_AUTHOR("Sun Microsystems, Inc"); +MODULE_DESCRIPTION("Generic AVL tree implementation"); +MODULE_LICENSE("CDDL"); + +EXPORT_SYMBOL(avl_create); +EXPORT_SYMBOL(avl_find); +EXPORT_SYMBOL(avl_insert); +EXPORT_SYMBOL(avl_insert_here); +EXPORT_SYMBOL(avl_walk); +EXPORT_SYMBOL(avl_first); +EXPORT_SYMBOL(avl_last); +EXPORT_SYMBOL(avl_nearest); +EXPORT_SYMBOL(avl_add); +EXPORT_SYMBOL(avl_remove); +EXPORT_SYMBOL(avl_numnodes); +EXPORT_SYMBOL(avl_destroy_nodes); +EXPORT_SYMBOL(avl_destroy); +#endif diff --git a/module/nvpair/nvpair.c b/module/nvpair/nvpair.c index f9d99b11f..5bee96429 100644 --- a/module/nvpair/nvpair.c +++ b/module/nvpair/nvpair.c @@ -3244,3 +3244,127 @@ nvs_xdr(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen) return (err); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +static int nvpair_init(void) { return 0; } +static int nvpair_fini(void) { return 0; } + +spl_module_init(nvpair_init); +spl_module_exit(nvpair_fini); + +MODULE_AUTHOR("Sun Microsystems, Inc"); +MODULE_DESCRIPTION("Generic name/value pair implementation"); +MODULE_LICENSE("CDDL"); + +EXPORT_SYMBOL(nv_alloc_init); +EXPORT_SYMBOL(nv_alloc_reset); +EXPORT_SYMBOL(nv_alloc_fini); + +/* list management */ +EXPORT_SYMBOL(nvlist_alloc); +EXPORT_SYMBOL(nvlist_free); +EXPORT_SYMBOL(nvlist_size); +EXPORT_SYMBOL(nvlist_pack); +EXPORT_SYMBOL(nvlist_unpack); +EXPORT_SYMBOL(nvlist_dup); +EXPORT_SYMBOL(nvlist_merge); + +EXPORT_SYMBOL(nvlist_xalloc); +EXPORT_SYMBOL(nvlist_xpack); +EXPORT_SYMBOL(nvlist_xunpack); +EXPORT_SYMBOL(nvlist_xdup); +EXPORT_SYMBOL(nvlist_lookup_nv_alloc); + +EXPORT_SYMBOL(nvlist_add_nvpair); +EXPORT_SYMBOL(nvlist_add_boolean); +EXPORT_SYMBOL(nvlist_add_boolean_value); +EXPORT_SYMBOL(nvlist_add_byte); +EXPORT_SYMBOL(nvlist_add_int8); +EXPORT_SYMBOL(nvlist_add_uint8); +EXPORT_SYMBOL(nvlist_add_int16); +EXPORT_SYMBOL(nvlist_add_uint16); +EXPORT_SYMBOL(nvlist_add_int32); +EXPORT_SYMBOL(nvlist_add_uint32); +EXPORT_SYMBOL(nvlist_add_int64); +EXPORT_SYMBOL(nvlist_add_uint64); +EXPORT_SYMBOL(nvlist_add_string); +EXPORT_SYMBOL(nvlist_add_nvlist); +EXPORT_SYMBOL(nvlist_add_boolean_array); +EXPORT_SYMBOL(nvlist_add_byte_array); +EXPORT_SYMBOL(nvlist_add_int8_array); +EXPORT_SYMBOL(nvlist_add_uint8_array); +EXPORT_SYMBOL(nvlist_add_int16_array); +EXPORT_SYMBOL(nvlist_add_uint16_array); +EXPORT_SYMBOL(nvlist_add_int32_array); +EXPORT_SYMBOL(nvlist_add_uint32_array); +EXPORT_SYMBOL(nvlist_add_int64_array); +EXPORT_SYMBOL(nvlist_add_uint64_array); +EXPORT_SYMBOL(nvlist_add_string_array); +EXPORT_SYMBOL(nvlist_add_nvlist_array); +EXPORT_SYMBOL(nvlist_add_hrtime); + +EXPORT_SYMBOL(nvlist_remove); +EXPORT_SYMBOL(nvlist_remove_all); + +EXPORT_SYMBOL(nvlist_lookup_boolean); +EXPORT_SYMBOL(nvlist_lookup_boolean_value); +EXPORT_SYMBOL(nvlist_lookup_byte); +EXPORT_SYMBOL(nvlist_lookup_int8); +EXPORT_SYMBOL(nvlist_lookup_uint8); +EXPORT_SYMBOL(nvlist_lookup_int16); +EXPORT_SYMBOL(nvlist_lookup_uint16); +EXPORT_SYMBOL(nvlist_lookup_int32); +EXPORT_SYMBOL(nvlist_lookup_uint32); +EXPORT_SYMBOL(nvlist_lookup_int64); +EXPORT_SYMBOL(nvlist_lookup_uint64); +EXPORT_SYMBOL(nvlist_lookup_string); +EXPORT_SYMBOL(nvlist_lookup_nvlist); +EXPORT_SYMBOL(nvlist_lookup_boolean_array); +EXPORT_SYMBOL(nvlist_lookup_byte_array); +EXPORT_SYMBOL(nvlist_lookup_int8_array); +EXPORT_SYMBOL(nvlist_lookup_uint8_array); +EXPORT_SYMBOL(nvlist_lookup_int16_array); +EXPORT_SYMBOL(nvlist_lookup_uint16_array); +EXPORT_SYMBOL(nvlist_lookup_int32_array); +EXPORT_SYMBOL(nvlist_lookup_uint32_array); +EXPORT_SYMBOL(nvlist_lookup_int64_array); +EXPORT_SYMBOL(nvlist_lookup_uint64_array); +EXPORT_SYMBOL(nvlist_lookup_string_array); +EXPORT_SYMBOL(nvlist_lookup_nvlist_array); +EXPORT_SYMBOL(nvlist_lookup_hrtime); +EXPORT_SYMBOL(nvlist_lookup_pairs); + +EXPORT_SYMBOL(nvlist_lookup_nvpair); +EXPORT_SYMBOL(nvlist_exists); + +/* processing nvpair */ +EXPORT_SYMBOL(nvlist_next_nvpair); +EXPORT_SYMBOL(nvpair_name); +EXPORT_SYMBOL(nvpair_type); +EXPORT_SYMBOL(nvpair_value_boolean_value); +EXPORT_SYMBOL(nvpair_value_byte); +EXPORT_SYMBOL(nvpair_value_int8); +EXPORT_SYMBOL(nvpair_value_uint8); +EXPORT_SYMBOL(nvpair_value_int16); +EXPORT_SYMBOL(nvpair_value_uint16); +EXPORT_SYMBOL(nvpair_value_int32); +EXPORT_SYMBOL(nvpair_value_uint32); +EXPORT_SYMBOL(nvpair_value_int64); +EXPORT_SYMBOL(nvpair_value_uint64); +EXPORT_SYMBOL(nvpair_value_string); +EXPORT_SYMBOL(nvpair_value_nvlist); +EXPORT_SYMBOL(nvpair_value_boolean_array); +EXPORT_SYMBOL(nvpair_value_byte_array); +EXPORT_SYMBOL(nvpair_value_int8_array); +EXPORT_SYMBOL(nvpair_value_uint8_array); +EXPORT_SYMBOL(nvpair_value_int16_array); +EXPORT_SYMBOL(nvpair_value_uint16_array); +EXPORT_SYMBOL(nvpair_value_int32_array); +EXPORT_SYMBOL(nvpair_value_uint32_array); +EXPORT_SYMBOL(nvpair_value_int64_array); +EXPORT_SYMBOL(nvpair_value_uint64_array); +EXPORT_SYMBOL(nvpair_value_string_array); +EXPORT_SYMBOL(nvpair_value_nvlist_array); +EXPORT_SYMBOL(nvpair_value_hrtime); + +#endif diff --git a/module/nvpair/nvpair_alloc_spl.c b/module/nvpair/nvpair_alloc_spl.c new file mode 100644 index 000000000..d26d26913 --- /dev/null +++ b/module/nvpair/nvpair_alloc_spl.c @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at * usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/nvpair.h> +#include <sys/kmem.h> + +static void * +nv_alloc_sleep_spl(nv_alloc_t *nva, size_t size) +{ + return (kmem_alloc(size, KM_SLEEP)); +} + +static void * +nv_alloc_nosleep_spl(nv_alloc_t *nva, size_t size) +{ + return (kmem_alloc(size, KM_NOSLEEP)); +} + +static void +nv_free_spl(nv_alloc_t *nva, void *buf, size_t size) +{ + kmem_free(buf, size); +} + +const nv_alloc_ops_t spl_sleep_ops_def = { + NULL, /* nv_ao_init() */ + NULL, /* nv_ao_fini() */ + nv_alloc_sleep_spl, /* nv_ao_alloc() */ + nv_free_spl, /* nv_ao_free() */ + NULL /* nv_ao_reset() */ +}; + +const nv_alloc_ops_t spl_nosleep_ops_def = { + NULL, /* nv_ao_init() */ + NULL, /* nv_ao_fini() */ + nv_alloc_nosleep_spl, /* nv_ao_alloc() */ + nv_free_spl, /* nv_ao_free() */ + NULL /* nv_ao_reset() */ +}; + +nv_alloc_t nv_alloc_sleep_def = { + &spl_sleep_ops_def, + NULL +}; + +nv_alloc_t nv_alloc_nosleep_def = { + &spl_nosleep_ops_def, + NULL +}; + +nv_alloc_t *nv_alloc_sleep = &nv_alloc_sleep_def; +nv_alloc_t *nv_alloc_nosleep = &nv_alloc_nosleep_def; diff --git a/module/unicode/u8_textprep.c b/module/unicode/u8_textprep.c index 2532769c8..37fb2e5a4 100644 --- a/module/unicode/u8_textprep.c +++ b/module/unicode/u8_textprep.c @@ -2131,3 +2131,19 @@ u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen, return (ret_val); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +static int unicode_init(void) { return 0; } +static int unicode_fini(void) { return 0; } + +spl_module_init(unicode_init); +spl_module_exit(unicode_fini); + +MODULE_AUTHOR("Sun Microsystems, Inc"); +MODULE_DESCRIPTION("Unicode implementation"); +MODULE_LICENSE("CDDL"); + +EXPORT_SYMBOL(u8_validate); +EXPORT_SYMBOL(u8_strcmp); +EXPORT_SYMBOL(u8_textprep_str); +#endif diff --git a/module/unicode/uconv.c b/module/unicode/uconv.c index b996e1f60..7a8278322 100644 --- a/module/unicode/uconv.c +++ b/module/unicode/uconv.c @@ -853,3 +853,12 @@ uconv_u8tou32(const uchar_t *u8s, size_t *utf8len, return (0); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(uconv_u16tou32); +EXPORT_SYMBOL(uconv_u16tou8); +EXPORT_SYMBOL(uconv_u32tou16); +EXPORT_SYMBOL(uconv_u32tou8); +EXPORT_SYMBOL(uconv_u8tou16); +EXPORT_SYMBOL(uconv_u8tou32); +#endif diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c index 2b9869f18..2fa8d4244 100644 --- a/module/zcommon/zfs_comutil.c +++ b/module/zcommon/zfs_comutil.c @@ -63,3 +63,7 @@ zfs_allocatable_devs(nvlist_t *nv) } return (B_FALSE); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(zfs_allocatable_devs); +#endif diff --git a/module/zcommon/zfs_deleg.c b/module/zcommon/zfs_deleg.c index ed65f955d..d30c1e0cc 100644 --- a/module/zcommon/zfs_deleg.c +++ b/module/zcommon/zfs_deleg.c @@ -235,3 +235,9 @@ zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type, ASSERT(!"bad zfs_deleg_who_type_t"); } } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(zfs_deleg_verify_nvlist); +EXPORT_SYMBOL(zfs_deleg_whokey); +EXPORT_SYMBOL(zfs_deleg_canonicalize_perm); +#endif diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c index 5cfafea47..706968f51 100644 --- a/module/zcommon/zfs_namecheck.c +++ b/module/zcommon/zfs_namecheck.c @@ -343,3 +343,9 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what) return (0); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(snapshot_namecheck); +EXPORT_SYMBOL(pool_namecheck); +EXPORT_SYMBOL(dataset_namecheck); +#endif diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 2e8f5a77f..45943602c 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -532,3 +532,37 @@ zfs_prop_align_right(zfs_prop_t prop) } #endif + +#if defined(_KERNEL) && defined(HAVE_SPL) +static int zcommon_init(void) { return 0; } +static int zcommon_fini(void) { return 0; } + +spl_module_init(zcommon_init); +spl_module_exit(zcommon_fini); + +MODULE_AUTHOR("Sun Microsystems, Inc"); +MODULE_DESCRIPTION("Generic ZFS support"); +MODULE_LICENSE("CDDL"); + +/* zfs dataset property functions */ +EXPORT_SYMBOL(zfs_userquota_prop_prefixes); +EXPORT_SYMBOL(zfs_prop_init); +EXPORT_SYMBOL(zfs_prop_get_type); +EXPORT_SYMBOL(zfs_prop_get_table); +EXPORT_SYMBOL(zfs_prop_delegatable); + +/* Dataset property functions shared between libzfs and kernel. */ +EXPORT_SYMBOL(zfs_prop_default_string); +EXPORT_SYMBOL(zfs_prop_default_numeric); +EXPORT_SYMBOL(zfs_prop_readonly); +EXPORT_SYMBOL(zfs_prop_inheritable); +EXPORT_SYMBOL(zfs_prop_setonce); +EXPORT_SYMBOL(zfs_prop_to_name); +EXPORT_SYMBOL(zfs_name_to_prop); +EXPORT_SYMBOL(zfs_prop_user); +EXPORT_SYMBOL(zfs_prop_userquota); +EXPORT_SYMBOL(zfs_prop_index_to_string); +EXPORT_SYMBOL(zfs_prop_string_to_index); +EXPORT_SYMBOL(zfs_prop_valid_for_type); + +#endif diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index fd24c34d4..a873be5d7 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -186,3 +186,19 @@ zpool_prop_align_right(zpool_prop_t prop) return (zpool_prop_table[prop].pd_rightalign); } #endif + +#if defined(_KERNEL) && defined(HAVE_SPL) +/* zpool property functions */ +EXPORT_SYMBOL(zpool_prop_init); +EXPORT_SYMBOL(zpool_prop_get_type); +EXPORT_SYMBOL(zpool_prop_get_table); + +/* Pool property functions shared between libzfs and kernel. */ +EXPORT_SYMBOL(zpool_name_to_prop); +EXPORT_SYMBOL(zpool_prop_to_name); +EXPORT_SYMBOL(zpool_prop_default_string); +EXPORT_SYMBOL(zpool_prop_default_numeric); +EXPORT_SYMBOL(zpool_prop_readonly); +EXPORT_SYMBOL(zpool_prop_index_to_string); +EXPORT_SYMBOL(zpool_prop_string_to_index); +#endif diff --git a/module/zcommon/zprop_common.c b/module/zcommon/zprop_common.c index 5f968e695..329a278f2 100644 --- a/module/zcommon/zprop_common.c +++ b/module/zcommon/zprop_common.c @@ -399,3 +399,20 @@ zprop_width(int prop, boolean_t *fixed, zfs_type_t type) } #endif + +#if defined(_KERNEL) && defined(HAVE_SPL) +/* Common routines to initialize property tables */ +EXPORT_SYMBOL(register_impl); +EXPORT_SYMBOL(register_string); +EXPORT_SYMBOL(register_number); +EXPORT_SYMBOL(register_index); +EXPORT_SYMBOL(register_hidden); + +/* Common routines for zfs and zpool property management */ +EXPORT_SYMBOL(zprop_iter_common); +EXPORT_SYMBOL(zprop_name_to_prop); +EXPORT_SYMBOL(zprop_string_to_index); +EXPORT_SYMBOL(zprop_index_to_string); +EXPORT_SYMBOL(zprop_values); +EXPORT_SYMBOL(zprop_valid_for_type); +#endif diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index 2bde8a89a..f39280097 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -47,6 +47,7 @@ ${MODULE}-objs += uberblock.o ${MODULE}-objs += unique.o ${MODULE}-objs += vdev.o ${MODULE}-objs += vdev_cache.o +${MODULE}-objs += vdev_disk.o ${MODULE}-objs += vdev_file.o ${MODULE}-objs += vdev_label.o ${MODULE}-objs += vdev_mirror.o diff --git a/module/zfs/arc.c b/module/zfs/arc.c index a319b217d..880d73711 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -175,9 +175,9 @@ static boolean_t arc_warm; /* * These tunables are for performance analysis. */ -uint64_t zfs_arc_max; -uint64_t zfs_arc_min; -uint64_t zfs_arc_meta_limit = 0; +unsigned long zfs_arc_max = 0; +unsigned long zfs_arc_min = 0; +unsigned long zfs_arc_meta_limit = 0; int zfs_mdcomp_disable = 0; int zfs_arc_grow_retry = 0; int zfs_arc_shrink_shift = 0; @@ -517,7 +517,7 @@ static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes); * Hash table routines */ -#define HT_LOCK_PAD 64 +#define HT_LOCK_PAD 256 struct ht_lock { kmutex_t ht_lock; @@ -757,8 +757,15 @@ buf_fini(void) { int i; +#if defined(_KERNEL) && defined(HAVE_SPL) + /* Large allocations which do not require contiguous pages + * should be using vmem_free() in the linux kernel */ + vmem_free(buf_hash_table.ht_table, + (buf_hash_table.ht_mask + 1) * sizeof (void *)); +#else kmem_free(buf_hash_table.ht_table, (buf_hash_table.ht_mask + 1) * sizeof (void *)); +#endif for (i = 0; i < BUF_LOCKS; i++) mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock); kmem_cache_destroy(hdr_cache); @@ -857,8 +864,15 @@ buf_init(void) hsize <<= 1; retry: buf_hash_table.ht_mask = hsize - 1; +#if defined(_KERNEL) && defined(HAVE_SPL) + /* Large allocations which do not require contiguous pages + * should be using vmem_alloc() in the linux kernel */ + buf_hash_table.ht_table = + vmem_zalloc(hsize * sizeof (void*), KM_SLEEP); +#else buf_hash_table.ht_table = kmem_zalloc(hsize * sizeof (void*), KM_NOSLEEP); +#endif if (buf_hash_table.ht_table == NULL) { ASSERT(hsize > (1ULL << 8)); hsize >>= 1; @@ -4688,3 +4702,21 @@ l2arc_stop(void) cv_wait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock); mutex_exit(&l2arc_feed_thr_lock); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(arc_read); +EXPORT_SYMBOL(arc_buf_remove_ref); +EXPORT_SYMBOL(arc_getbuf_func); + +module_param(zfs_arc_min, ulong, 0644); +MODULE_PARM_DESC(zfs_arc_min, "Minimum arc size"); + +module_param(zfs_arc_max, ulong, 0644); +MODULE_PARM_DESC(zfs_arc_max, "Maximum arc size"); + +module_param(zfs_arc_meta_limit, ulong, 0644); +MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size"); + +module_param(zfs_mdcomp_disable, int, 0644); +MODULE_PARM_DESC(zfs_mdcomp_disable, "Meta compression disable"); +#endif diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 0c8b8aaee..cd8c4e20e 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -24,6 +24,7 @@ */ #include <sys/zfs_context.h> +#include <sys/arc.h> #include <sys/dmu.h> #include <sys/dmu_impl.h> #include <sys/dbuf.h> @@ -255,7 +256,13 @@ dbuf_init(void) retry: h->hash_table_mask = hsize - 1; +#if defined(_KERNEL) && defined(HAVE_SPL) + /* Large allocations which do not require contiguous pages + * should be using vmem_alloc() in the linux kernel */ + h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_SLEEP); +#else h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP); +#endif if (h->hash_table == NULL) { /* XXX - we should really return an error instead of assert */ ASSERT(hsize > (1ULL << 10)); @@ -279,7 +286,13 @@ dbuf_fini(void) for (i = 0; i < DBUF_MUTEXES; i++) mutex_destroy(&h->hash_mutexes[i]); +#if defined(_KERNEL) && defined(HAVE_SPL) + /* Large allocations which do not require contiguous pages + * should be using vmem_free() in the linux kernel */ + vmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *)); +#else kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *)); +#endif kmem_cache_destroy(dbuf_cache); } @@ -2436,3 +2449,8 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) dbuf_rele(db, (void *)(uintptr_t)txg); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dmu_buf_rele); +EXPORT_SYMBOL(dmu_buf_will_dirty); +#endif diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 8ca5c9d7d..a5d37bf33 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -584,7 +584,8 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, bufoff = offset - db->db_offset; tocpy = (int)MIN(db->db_size - bufoff, size); - bcopy((char *)db->db_data + bufoff, buf, tocpy); + if (!(flags & DMU_READ_ZEROCOPY)) + bcopy((char *)db->db_data + bufoff, buf, tocpy); offset += tocpy; size -= tocpy; @@ -597,8 +598,8 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, } void -dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, - const void *buf, dmu_tx_t *tx) +dmu_write_impl(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, + const void *buf, dmu_tx_t *tx, int flags) { dmu_buf_t **dbp; int numbufs, i; @@ -626,7 +627,8 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, else dmu_buf_will_dirty(db, tx); - bcopy(buf, (char *)db->db_data + bufoff, tocpy); + if (!(flags & DMU_WRITE_ZEROCOPY)) + bcopy(buf, (char *)db->db_data + bufoff, tocpy); if (tocpy == db->db_size) dmu_buf_fill_done(db, tx); @@ -639,6 +641,13 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, } void +dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, + const void *buf, dmu_tx_t *tx) +{ + dmu_write_impl(os, object, offset, size, buf, tx, 0); +} + +void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx) { @@ -659,7 +668,7 @@ dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_buf_rele_array(dbp, numbufs, FTAG); } -#ifdef _KERNEL +#if defined(_KERNEL) && defined(HAVE_UIO_RW) int dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) { @@ -1274,3 +1283,23 @@ dmu_fini(void) dbuf_fini(); l2arc_fini(); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dmu_bonus_hold); +EXPORT_SYMBOL(dmu_free_range); +EXPORT_SYMBOL(dmu_read); +EXPORT_SYMBOL(dmu_write_impl); +EXPORT_SYMBOL(dmu_write); + +/* Get information on a DMU object. */ +EXPORT_SYMBOL(dmu_object_info); +EXPORT_SYMBOL(dmu_object_info_from_dnode); +EXPORT_SYMBOL(dmu_object_info_from_db); +EXPORT_SYMBOL(dmu_object_size_from_db); + +EXPORT_SYMBOL(dmu_object_set_blocksize); +EXPORT_SYMBOL(dmu_object_set_checksum); +EXPORT_SYMBOL(dmu_object_set_compress); + +EXPORT_SYMBOL(dmu_ot); +#endif diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c index 1f91fc1ad..c0031e155 100644 --- a/module/zfs/dmu_object.c +++ b/module/zfs/dmu_object.c @@ -192,3 +192,11 @@ dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg) return (error); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dmu_object_alloc); +EXPORT_SYMBOL(dmu_object_claim); +EXPORT_SYMBOL(dmu_object_reclaim); +EXPORT_SYMBOL(dmu_object_free); +EXPORT_SYMBOL(dmu_object_next); +#endif diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 01792600b..30ef576bd 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -781,9 +781,11 @@ dmu_objset_snapshot_one(char *name, void *arg) * doing a recursive snapshot. The permission checks for the starting * dataset have already been performed in zfs_secpolicy_snapshot() */ +#ifdef HAVE_ZPL if (sn->checkperms == B_TRUE && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) return (err); +#endif err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_USER, &os); if (err != 0) @@ -1479,3 +1481,37 @@ dmu_objset_get_user(objset_t *os) ASSERT(MUTEX_HELD(&os->os->os_user_ptr_lock)); return (os->os->os_user_ptr); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dmu_objset_spa); +EXPORT_SYMBOL(dmu_objset_zil); +EXPORT_SYMBOL(dmu_objset_pool); +EXPORT_SYMBOL(dmu_objset_ds); +EXPORT_SYMBOL(dmu_objset_name); +EXPORT_SYMBOL(dmu_objset_type); +EXPORT_SYMBOL(dmu_objset_id); +EXPORT_SYMBOL(dmu_snapshot_list_next); +EXPORT_SYMBOL(dmu_dir_list_next); +EXPORT_SYMBOL(dmu_objset_set_user); +EXPORT_SYMBOL(dmu_objset_get_user); + +/* Public routines to create, destroy, open, and close objsets. */ +EXPORT_SYMBOL(dmu_objset_open); +EXPORT_SYMBOL(dmu_objset_open_ds); +EXPORT_SYMBOL(dmu_objset_close); +EXPORT_SYMBOL(dmu_objset_evict_dbufs); +EXPORT_SYMBOL(dmu_objset_create); +EXPORT_SYMBOL(dmu_objset_create_impl); +EXPORT_SYMBOL(dmu_objset_destroy); +EXPORT_SYMBOL(dmu_snapshots_destroy); +EXPORT_SYMBOL(dmu_objset_rollback); +EXPORT_SYMBOL(dmu_objset_snapshot); +EXPORT_SYMBOL(dmu_objset_rename); +EXPORT_SYMBOL(dmu_objset_find); +EXPORT_SYMBOL(dmu_objset_byteswap); + +/* Get stats on a dataset. */ +EXPORT_SYMBOL(dmu_objset_fast_stat); +EXPORT_SYMBOL(dmu_objset_stats); +EXPORT_SYMBOL(dmu_objset_space); +#endif diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index b977a2ff7..c9a1647fa 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -952,7 +952,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp) ra.vp = vp; ra.voff = *voffp; ra.bufsize = 1<<20; - ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); + ra.buf = vmem_alloc(ra.bufsize, KM_SLEEP); /* these were verified in dmu_recv_begin */ ASSERT(drc->drc_drrb->drr_version == DMU_BACKUP_STREAM_VERSION); @@ -1048,7 +1048,7 @@ out: } } - kmem_free(ra.buf, ra.bufsize); + vmem_free(ra.buf, ra.bufsize); *voffp = ra.voff; return (ra.err); } diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c index 569bf0a3c..d7977dfd2 100644 --- a/module/zfs/dmu_traverse.c +++ b/module/zfs/dmu_traverse.c @@ -421,3 +421,8 @@ traverse_pool(spa_t *spa, blkptr_cb_t func, void *arg) err = 0; return (err); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(traverse_dataset); +EXPORT_SYMBOL(traverse_pool); +#endif diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index a3c9e5241..261d1cf5f 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -1201,3 +1201,17 @@ dmu_tx_callback(list_t *cb_list, int error) kmem_free(dcb, sizeof (dmu_tx_callback_t)); } } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dmu_tx_create); +EXPORT_SYMBOL(dmu_tx_hold_write); +EXPORT_SYMBOL(dmu_tx_hold_free); +EXPORT_SYMBOL(dmu_tx_hold_zap); +EXPORT_SYMBOL(dmu_tx_hold_bonus); +EXPORT_SYMBOL(dmu_tx_abort); +EXPORT_SYMBOL(dmu_tx_assign); +EXPORT_SYMBOL(dmu_tx_wait); +EXPORT_SYMBOL(dmu_tx_commit); +EXPORT_SYMBOL(dmu_tx_get_txg); +EXPORT_SYMBOL(dmu_tx_callback_register); +#endif diff --git a/module/zfs/dmu_zfetch.c b/module/zfs/dmu_zfetch.c index fc3d23b87..3e33527c4 100644 --- a/module/zfs/dmu_zfetch.c +++ b/module/zfs/dmu_zfetch.c @@ -656,3 +656,9 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched) } } } + +#if defined(_KERNEL) && defined(HAVE_SPL) +module_param(zfs_prefetch_disable, int, 0644); +MODULE_PARM_DESC(zfs_prefetch_disable, "Disable all ZFS prefetching"); +#endif + diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 926034836..dd74ad510 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -2391,6 +2391,7 @@ dsl_snapshot_rename_one(char *name, void *arg) * For recursive snapshot renames the parent won't be changing * so we just pass name for both the to/from argument. */ +#ifdef HAVE_ZPL err = zfs_secpolicy_rename_perms(name, name, CRED()); if (err == ENOENT) { return (0); @@ -2398,8 +2399,10 @@ dsl_snapshot_rename_one(char *name, void *arg) (void) strcpy(ra->failed, name); return (err); } +#endif -#ifdef _KERNEL +/* XXX: Ignore for SPL version until mounting the FS is supported */ +#if defined(_KERNEL) && !defined(HAVE_SPL) /* * For all filesystems undergoing rename, we'll need to unmount it. */ @@ -3823,3 +3826,49 @@ dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp) dsl_dataset_rele(ds, FTAG); return (0); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dsl_dataset_hold); +EXPORT_SYMBOL(dsl_dataset_hold_obj); +EXPORT_SYMBOL(dsl_dataset_own); +EXPORT_SYMBOL(dsl_dataset_own_obj); +EXPORT_SYMBOL(dsl_dataset_name); +EXPORT_SYMBOL(dsl_dataset_rele); +EXPORT_SYMBOL(dsl_dataset_disown); +EXPORT_SYMBOL(dsl_dataset_drop_ref); +EXPORT_SYMBOL(dsl_dataset_tryown); +EXPORT_SYMBOL(dsl_dataset_make_exclusive); +EXPORT_SYMBOL(dsl_dataset_create_sync); +EXPORT_SYMBOL(dsl_dataset_create_sync_dd); +EXPORT_SYMBOL(dsl_dataset_destroy); +EXPORT_SYMBOL(dsl_snapshots_destroy); +EXPORT_SYMBOL(dsl_dataset_destroy_check); +EXPORT_SYMBOL(dsl_dataset_destroy_sync); +EXPORT_SYMBOL(dsl_dataset_snapshot_check); +EXPORT_SYMBOL(dsl_dataset_snapshot_sync); +EXPORT_SYMBOL(dsl_dataset_rollback); +EXPORT_SYMBOL(dsl_dataset_rename); +EXPORT_SYMBOL(dsl_dataset_promote); +EXPORT_SYMBOL(dsl_dataset_clone_swap); +EXPORT_SYMBOL(dsl_dataset_set_user_ptr); +EXPORT_SYMBOL(dsl_dataset_get_user_ptr); +EXPORT_SYMBOL(dsl_dataset_get_blkptr); +EXPORT_SYMBOL(dsl_dataset_set_blkptr); +EXPORT_SYMBOL(dsl_dataset_get_spa); +EXPORT_SYMBOL(dsl_dataset_modified_since_lastsnap); +EXPORT_SYMBOL(dsl_dataset_sync); +EXPORT_SYMBOL(dsl_dataset_block_born); +EXPORT_SYMBOL(dsl_dataset_block_kill); +EXPORT_SYMBOL(dsl_dataset_block_freeable); +EXPORT_SYMBOL(dsl_dataset_prev_snap_txg); +EXPORT_SYMBOL(dsl_dataset_dirty); +EXPORT_SYMBOL(dsl_dataset_stats); +EXPORT_SYMBOL(dsl_dataset_fast_stat); +EXPORT_SYMBOL(dsl_dataset_space); +EXPORT_SYMBOL(dsl_dataset_fsid_guid); +EXPORT_SYMBOL(dsl_dsobj_to_dsname); +EXPORT_SYMBOL(dsl_dataset_check_quota); +EXPORT_SYMBOL(dsl_dataset_set_quota); +EXPORT_SYMBOL(dsl_dataset_set_quota_sync); +EXPORT_SYMBOL(dsl_dataset_set_reservation); +#endif diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c index 22605053e..9201f9573 100644 --- a/module/zfs/dsl_deleg.c +++ b/module/zfs/dsl_deleg.c @@ -741,3 +741,8 @@ dsl_delegation_on(objset_t *os) { return (os->os->os_spa->spa_delegation); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dsl_deleg_get); +EXPORT_SYMBOL(dsl_deleg_set); +#endif diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index eb2767fe1..2f83709bc 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -1317,3 +1317,10 @@ dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space) return (0); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dsl_dir_set_quota); +EXPORT_SYMBOL(dsl_dir_set_reservation); +EXPORT_SYMBOL(dsl_dir_open); +EXPORT_SYMBOL(dsl_dir_close); +#endif diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index 4708565b1..45c81630e 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -675,3 +675,10 @@ dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value) VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(prop), propval) == 0); nvlist_free(propval); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dsl_prop_set); +EXPORT_SYMBOL(dsl_prop_get_all); +EXPORT_SYMBOL(dsl_prop_nvlist_add_uint64); +EXPORT_SYMBOL(dsl_prop_get_integer); +#endif diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c index 828911170..4391aa21b 100644 --- a/module/zfs/dsl_synctask.c +++ b/module/zfs/dsl_synctask.c @@ -223,3 +223,8 @@ dsl_sync_task_do_nowait(dsl_pool_t *dp, arg1, arg2, blocks_modified); dsl_sync_task_group_nowait(dstg, tx); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dsl_sync_task_do); +EXPORT_SYMBOL(dsl_sync_task_do_nowait); +#endif diff --git a/module/zfs/fletcher.c b/module/zfs/fletcher.c index 54247d724..e0d062236 100644 --- a/module/zfs/fletcher.c +++ b/module/zfs/fletcher.c @@ -243,3 +243,12 @@ fletcher_4_incremental_byteswap(const void *buf, uint64_t size, ZIO_SET_CHECKSUM(zcp, a, b, c, d); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(fletcher_2_native); +EXPORT_SYMBOL(fletcher_2_byteswap); +EXPORT_SYMBOL(fletcher_4_native); +EXPORT_SYMBOL(fletcher_4_byteswap); +EXPORT_SYMBOL(fletcher_4_incremental_native); +EXPORT_SYMBOL(fletcher_4_incremental_byteswap); +#endif diff --git a/module/zfs/include/sys/dmu.h b/module/zfs/include/sys/dmu.h index b15da8391..e3c69ee3e 100644 --- a/module/zfs/include/sys/dmu.h +++ b/module/zfs/include/sys/dmu.h @@ -157,8 +157,8 @@ void zfs_znode_byteswap(void *buf, size_t size); * The maximum number of bytes that can be accessed as part of one * operation, including metadata. */ -#define DMU_MAX_ACCESS (10<<20) /* 10MB */ -#define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */ +#define DMU_MAX_ACCESS (10<<20) /* 10MB */ +#define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */ #define DMU_USERUSED_OBJECT (-1ULL) #define DMU_GROUPUSED_OBJECT (-2ULL) @@ -480,17 +480,23 @@ int dmu_free_object(objset_t *os, uint64_t object); */ #define DMU_READ_PREFETCH 0 /* prefetch */ #define DMU_READ_NO_PREFETCH 1 /* don't prefetch */ +#define DMU_READ_ZEROCOPY 2 /* zerocopy on read (test) */ +#define DMU_WRITE_ZEROCOPY 4 /* zerocopy on write (test) */ int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, void *buf, uint32_t flags); +void dmu_write_impl(objset_t *os, uint64_t object, uint64_t offset, + uint64_t size, const void *buf, dmu_tx_t *tx, int flags); void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, const void *buf, dmu_tx_t *tx); void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx); +#if defined(_KERNEL) && defined(HAVE_UIO_RW) int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size); int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size, dmu_tx_t *tx); int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, struct page *pp, dmu_tx_t *tx); +#endif struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size); void dmu_return_arcbuf(struct arc_buf *buf); void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf, diff --git a/module/zfs/include/sys/spa.h b/module/zfs/include/sys/spa.h index 30554ae0e..14c3db86f 100644 --- a/module/zfs/include/sys/spa.h +++ b/module/zfs/include/sys/spa.h @@ -535,7 +535,11 @@ extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx); extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t); /* asynchronous event notification */ +#ifdef HAVE_SYSEVENT extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name); +#else +#define spa_event_notify(s,v,n) ((void)0) +#endif #ifdef ZFS_DEBUG #define dprintf_bp(bp, fmt, ...) do { \ diff --git a/module/zfs/include/sys/spa_impl.h b/module/zfs/include/sys/spa_impl.h index 84da68488..8a931a053 100644 --- a/module/zfs/include/sys/spa_impl.h +++ b/module/zfs/include/sys/spa_impl.h @@ -181,7 +181,7 @@ struct spa { refcount_t spa_refcount; /* number of opens */ }; -extern const char *spa_config_path; +extern char *spa_config_path; #define BOOTFS_COMPRESS_VALID(compress) \ ((compress) == ZIO_COMPRESS_LZJB || \ diff --git a/module/zfs/include/sys/vdev_disk.h b/module/zfs/include/sys/vdev_disk.h new file mode 100644 index 000000000..544036bbc --- /dev/null +++ b/module/zfs/include/sys/vdev_disk.h @@ -0,0 +1,71 @@ +#ifndef _SYS_VDEV_DISK_H +#define _SYS_VDEV_DISK_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL +#include <sys/vdev.h> +#include <sys/ddi.h> +#include <sys/sunldi.h> +#include <sys/sunddi.h> +#include <zfs_config.h> + +typedef struct vdev_disk { + ddi_devid_t vd_devid; + char *vd_minor; + struct block_device *vd_bdev; +} vdev_disk_t; + +extern int vdev_disk_physio(struct block_device *, caddr_t, + size_t, uint64_t, int); +extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); + +/* 2.6.24 API change */ +#ifdef HAVE_2ARGS_BIO_END_IO_T +# define BIO_END_IO_PROTO(fn, x, y, z) static void fn(struct bio *x, int z) +# define BIO_END_IO_RETURN(rc) return +#else +# define BIO_END_IO_PROTO(fn, x, y, z) static int fn(struct bio *x, \ + unsigned int y, int z) +# define BIO_END_IO_RETURN(rc) return rc +#endif /* HAVE_2ARGS_BIO_END_IO_T */ + +/* 2.6.29 API change */ +#ifdef HAVE_BIO_RW_SYNCIO +# define DIO_RW_SYNCIO BIO_RW_SYNCIO +#else +# define DIO_RW_SYNCIO BIO_RW_SYNC +#endif /* HAVE_BIO_RW_SYNCIO */ + +/* 2.6.28 API change */ +#ifdef HAVE_OPEN_BDEV_EXCLUSIVE +# define vdev_bdev_open(path, md, hld) open_bdev_exclusive(path, md, hld) +# define vdev_bdev_close(bdev, md) close_bdev_exclusive(bdev, md) +#else +# define vdev_bdev_open(path, md, hld) open_bdev_excl(path, md, hld) +# define vdev_bdev_close(bdev, md) close_bdev_excl(bdev) +#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */ + +/* 2.6.22 API change */ +#ifdef HAVE_1ARG_INVALIDATE_BDEV +# define vdev_bdev_invalidate(bdev) invalidate_bdev(bdev) +#else +# define vdev_bdev_invalidate(bdev) invalidate_bdev(bdev, 1) +#endif /* HAVE_1ARG_INVALIDATE_BDEV */ + +/* 2.6.30 API change */ +#ifdef HAVE_BDEV_LOGICAL_BLOCK_SIZE +# define vdev_bdev_block_size(bdev) bdev_logical_block_size(bdev) +#else +# define vdev_bdev_block_size(bdev) bdev_hardsect_size(bdev) +#endif + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VDEV_DISK_H */ diff --git a/module/zfs/include/sys/zfs_context.h b/module/zfs/include/sys/zfs_context.h index 40de32084..29b88a37f 100644 --- a/module/zfs/include/sys/zfs_context.h +++ b/module/zfs/include/sys/zfs_context.h @@ -58,13 +58,13 @@ extern "C" { #include <sys/zone.h> #include <sys/uio.h> #include <sys/zfs_debug.h> +#ifdef HAVE_SYSEVENT #include <sys/sysevent.h> #include <sys/sysevent/eventdefs.h> #include <sys/sysevent/dev.h> +#endif /* HAVE_SYSEVENT */ #include <sys/fm/util.h> -#define CPU_SEQID (CPU->cpu_seqid) - #ifdef __cplusplus } #endif diff --git a/module/zfs/include/sys/zfs_debug.h b/module/zfs/include/sys/zfs_debug.h index 02d9da131..a8f0f186f 100644 --- a/module/zfs/include/sys/zfs_debug.h +++ b/module/zfs/include/sys/zfs_debug.h @@ -57,11 +57,16 @@ extern int zfs_flags; #define ZFS_DEBUG_MODIFY 0x0010 #ifdef ZFS_DEBUG +#if defined(_KERNEL) && defined(HAVE_SPL) +#include <sys/debug.h> +#define dprintf(...) CDEBUG_LIMIT(D_DPRINTF, __VA_ARGS__) +#else extern void __dprintf(const char *file, const char *func, int line, const char *fmt, ...); #define dprintf(...) \ if (zfs_flags & ZFS_DEBUG_DPRINTF) \ __dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__) +#endif /* _KERNEL && HAVE_SPL */ #else #define dprintf(...) ((void)0) #endif /* ZFS_DEBUG */ diff --git a/module/zfs/include/sys/zfs_znode.h b/module/zfs/include/sys/zfs_znode.h index f5ee2fc7b..d141c0302 100644 --- a/module/zfs/include/sys/zfs_znode.h +++ b/module/zfs/include/sys/zfs_znode.h @@ -118,9 +118,12 @@ extern "C" { /* * Convert mode bits (zp_mode) to BSD-style DT_* values for storing in - * the directory entries. + * the directory entries. On Linux systems this value is already + * defined correctly as part of the /usr/include/dirent.h header file. */ +#ifndef IFTODT #define IFTODT(mode) (((mode) & S_IFMT) >> 12) +#endif /* * The directory entry has the type (currently unused on Solaris) in the @@ -342,8 +345,10 @@ extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap); extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx); extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx); +#if defined(HAVE_UIO_RW) extern caddr_t zfs_map_page(page_t *, enum seg_rw); extern void zfs_unmap_page(page_t *, caddr_t); +#endif /* HAVE_UIO_RW */ extern zil_get_data_t zfs_get_data; extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE]; diff --git a/module/zfs/spa.c b/module/zfs/spa.c index d147b8e91..391290a76 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -41,6 +41,7 @@ #include <sys/zap.h> #include <sys/zil.h> #include <sys/vdev_impl.h> +#include <sys/vdev_disk.h> #include <sys/metaslab.h> #include <sys/uberblock_impl.h> #include <sys/txg.h> @@ -200,9 +201,11 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp) zap_cursor_t zc; zap_attribute_t za; objset_t *mos = spa->spa_meta_objset; - int err; + int err = 0; - VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); + err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP); + if (err) + return err; mutex_enter(&spa->spa_props_lock); @@ -214,7 +217,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp) /* If no pool property object, no more prop to get. */ if (spa->spa_pool_props_object == 0) { mutex_exit(&spa->spa_props_lock); - return (0); + goto out; } /* @@ -3718,9 +3721,11 @@ spa_async_probe(spa_t *spa, vdev_t *vd) static void spa_async_autoexpand(spa_t *spa, vdev_t *vd) { +#ifdef HAVE_SYSEVENT sysevent_id_t eid; nvlist_t *attr; char *physpath; +#endif int c; if (!spa->spa_autoexpand) @@ -3734,6 +3739,7 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd) if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) return; +#ifdef HAVE_SYSEVENT physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); @@ -3745,6 +3751,7 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd) nvlist_free(attr); kmem_free(physpath, MAXPATHLEN); +#endif } static void @@ -4514,10 +4521,10 @@ spa_has_active_shared_spare(spa_t *spa) * in the userland libzpool, as we don't want consumers to misinterpret ztest * or zdb as real changes. */ +#ifdef HAVE_SYSEVENT void spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) { -#ifdef _KERNEL sysevent_t *ev; sysevent_attr_list_t *attr = NULL; sysevent_value_t value; @@ -4562,5 +4569,60 @@ done: if (attr) sysevent_free_attr(attr); sysevent_free(ev); -#endif } +#endif /* HAVE_SYSEVENT */ + +#if defined(_KERNEL) && defined(HAVE_SPL) +/* state manipulation functions */ +EXPORT_SYMBOL(spa_open); +EXPORT_SYMBOL(spa_get_stats); +EXPORT_SYMBOL(spa_create); +EXPORT_SYMBOL(spa_import); +EXPORT_SYMBOL(spa_tryimport); +EXPORT_SYMBOL(spa_destroy); +EXPORT_SYMBOL(spa_export); +EXPORT_SYMBOL(spa_reset); +EXPORT_SYMBOL(spa_async_request); +EXPORT_SYMBOL(spa_async_suspend); +EXPORT_SYMBOL(spa_async_resume); +EXPORT_SYMBOL(spa_inject_addref); +EXPORT_SYMBOL(spa_inject_delref); + +/* device maniion */ +EXPORT_SYMBOL(spa_vdev_add); +EXPORT_SYMBOL(spa_vdev_attach); +EXPORT_SYMBOL(spa_vdev_detach); +EXPORT_SYMBOL(spa_vdev_remove); +EXPORT_SYMBOL(spa_vdev_setpath); + +/* spare statech is global across all pools) */ +EXPORT_SYMBOL(spa_spare_add); +EXPORT_SYMBOL(spa_spare_remove); +EXPORT_SYMBOL(spa_spare_exists); +EXPORT_SYMBOL(spa_spare_activate); + +/* L2ARC statech is global across all pools) */ +EXPORT_SYMBOL(spa_l2cache_add); +EXPORT_SYMBOL(spa_l2cache_remove); +EXPORT_SYMBOL(spa_l2cache_exists); +EXPORT_SYMBOL(spa_l2cache_activate); +EXPORT_SYMBOL(spa_l2cache_drop); +EXPORT_SYMBOL(spa_l2cache_space_update); + +/* scrubbing */ +EXPORT_SYMBOL(spa_scrub); + +/* spa syncing */ +EXPORT_SYMBOL(spa_sync); /* only for DMU use */ +EXPORT_SYMBOL(spa_sync_allpools); + +/* properties */ +EXPORT_SYMBOL(spa_prop_set); +EXPORT_SYMBOL(spa_prop_get); +EXPORT_SYMBOL(spa_prop_clear_bootfs); + +#if defined(HAVE_SYSEVENT) +/* asynchronous event notification */ +EXPORT_SYMBOL(spa_event_notify); +#endif +#endif diff --git a/module/zfs/spa_boot.c b/module/zfs/spa_boot.c index 053903cac..aa276835a 100644 --- a/module/zfs/spa_boot.c +++ b/module/zfs/spa_boot.c @@ -24,7 +24,7 @@ * Use is subject to license terms. */ - +#ifdef _KERNEL #include <sys/spa.h> #include <sys/sunddi.h> @@ -45,3 +45,5 @@ spa_free_bootprop(char *value) { ddi_prop_free(value); } + +#endif /* _KERNEL */ diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index b2063bba1..19dca52c1 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -62,7 +62,7 @@ static uint64_t spa_config_generation = 1; * This can be overridden in userland to preserve an alternate namespace for * userland pools when doing testing. */ -const char *spa_config_path = ZPOOL_CACHE; +char *spa_config_path = ZPOOL_CACHE; /* * Called when the module is first loaded, this routine loads the configuration @@ -442,3 +442,15 @@ spa_config_update(spa_t *spa, int what) if (what == SPA_CONFIG_UPDATE_POOL) spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(spa_config_sync); +EXPORT_SYMBOL(spa_config_load); +EXPORT_SYMBOL(spa_all_configs); +EXPORT_SYMBOL(spa_config_set); +EXPORT_SYMBOL(spa_config_generate); +EXPORT_SYMBOL(spa_config_update); + +module_param(spa_config_path, charp, 0444); +MODULE_PARM_DESC(spa_config_path, "SPA config file (/etc/zfs/zpool.cache)"); +#endif diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c index ac0a20aaf..480ea9c86 100644 --- a/module/zfs/spa_errlog.c +++ b/module/zfs/spa_errlog.c @@ -434,3 +434,17 @@ spa_errlog_sync(spa_t *spa, uint64_t txg) mutex_exit(&spa->spa_errlog_lock); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +/* error handling */ +EXPORT_SYMBOL(spa_log_error); +EXPORT_SYMBOL(zfs_ereport_post); +EXPORT_SYMBOL(zfs_post_remove); +EXPORT_SYMBOL(zfs_post_autoreplace); +EXPORT_SYMBOL(spa_get_errlog_size); +EXPORT_SYMBOL(spa_get_errlog); +EXPORT_SYMBOL(spa_errlog_rotate); +EXPORT_SYMBOL(spa_errlog_drain); +EXPORT_SYMBOL(spa_errlog_sync); +EXPORT_SYMBOL(spa_get_errlists); +#endif diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index b719f7b80..7f1ac22ac 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -177,7 +177,11 @@ static char * spa_history_zone(void) { #ifdef _KERNEL +#ifdef HAVE_SPL + return ("linux"); +#else return (curproc->p_zone->zone_name); +#endif #else return ("global"); #endif @@ -279,7 +283,7 @@ spa_history_log_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) dmu_buf_rele(dbp, FTAG); if (hap->ha_log_type == LOG_INTERNAL) { - kmem_free((void*)hap->ha_history_str, HIS_MAX_RECORD_LEN); + vmem_free((void*)hap->ha_history_str, HIS_MAX_RECORD_LEN); kmem_free(hap, sizeof (history_arg_t)); } } @@ -409,7 +413,7 @@ log_internal(history_internal_events_t event, spa_t *spa, return; hap = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); - str = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP); + str = vmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP); (void) vsnprintf(str, HIS_MAX_RECORD_LEN, fmt, adx); @@ -471,3 +475,10 @@ spa_history_log_version(spa_t *spa, history_internal_events_t event) (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); #endif } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(spa_history_create_obj); +EXPORT_SYMBOL(spa_history_get); +EXPORT_SYMBOL(spa_history_log); +EXPORT_SYMBOL(spa_history_internal_log); +#endif diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index ef74a443d..3b6d33834 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -233,8 +233,13 @@ kmem_cache_t *spa_buffer_pool; int spa_mode_global; #ifdef ZFS_DEBUG +#if defined(_KERNEL) && defined(HAVE_SPL) +/* All filtering done by the SPL */ +int zfs_flags = ~0; +#else /* Everything except dprintf is on by default in debug builds */ int zfs_flags = ~ZFS_DEBUG_DPRINTF; +#endif #else int zfs_flags = 0; #endif @@ -1441,3 +1446,71 @@ spa_mode(spa_t *spa) { return (spa->spa_mode); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +/* Namespace manipulation */ +EXPORT_SYMBOL(spa_lookup); +EXPORT_SYMBOL(spa_add); +EXPORT_SYMBOL(spa_remove); +EXPORT_SYMBOL(spa_next); + +/* Refcount functions */ +EXPORT_SYMBOL(spa_open_ref); +EXPORT_SYMBOL(spa_close); +EXPORT_SYMBOL(spa_refcount_zero); + +/* Pool configuration lock */ +EXPORT_SYMBOL(spa_config_tryenter); +EXPORT_SYMBOL(spa_config_enter); +EXPORT_SYMBOL(spa_config_exit); +EXPORT_SYMBOL(spa_config_held); + +/* Pool vdev add/remove lock */ +EXPORT_SYMBOL(spa_vdev_enter); +EXPORT_SYMBOL(spa_vdev_exit); + +/* Pool vdev state change lock */ +EXPORT_SYMBOL(spa_vdev_state_enter); +EXPORT_SYMBOL(spa_vdev_state_exit); + +/* Accessor functions */ +EXPORT_SYMBOL(spa_shutting_down); +EXPORT_SYMBOL(spa_get_dsl); +EXPORT_SYMBOL(spa_get_rootblkptr); +EXPORT_SYMBOL(spa_set_rootblkptr); +EXPORT_SYMBOL(spa_altroot); +EXPORT_SYMBOL(spa_sync_pass); +EXPORT_SYMBOL(spa_name); +EXPORT_SYMBOL(spa_guid); +EXPORT_SYMBOL(spa_last_synced_txg); +EXPORT_SYMBOL(spa_first_txg); +EXPORT_SYMBOL(spa_version); +EXPORT_SYMBOL(spa_state); +EXPORT_SYMBOL(spa_freeze_txg); +EXPORT_SYMBOL(spa_get_alloc); +EXPORT_SYMBOL(spa_get_space); +EXPORT_SYMBOL(spa_get_dspace); +EXPORT_SYMBOL(spa_get_asize); +EXPORT_SYMBOL(spa_max_replication); +EXPORT_SYMBOL(spa_busy); +EXPORT_SYMBOL(spa_get_failmode); +EXPORT_SYMBOL(spa_suspended); + +/* Miscellaneous support routines */ +EXPORT_SYMBOL(spa_rename); +EXPORT_SYMBOL(spa_guid_exists); +EXPORT_SYMBOL(spa_strdup); +EXPORT_SYMBOL(spa_strfree); +EXPORT_SYMBOL(spa_get_random); +EXPORT_SYMBOL(sprintf_blkptr); +EXPORT_SYMBOL(spa_freeze); +EXPORT_SYMBOL(spa_upgrade); +EXPORT_SYMBOL(spa_evict_all); +EXPORT_SYMBOL(spa_lookup_by_guid); +EXPORT_SYMBOL(spa_has_spare); +EXPORT_SYMBOL(bp_get_dasize); +EXPORT_SYMBOL(spa_has_slogs); +EXPORT_SYMBOL(spa_is_root); + +EXPORT_SYMBOL(spa_namespace_lock); +#endif diff --git a/module/zfs/txg.c b/module/zfs/txg.c index fb95361f8..3d82990f5 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -49,7 +49,7 @@ txg_init(dsl_pool_t *dp, uint64_t txg) int c; bzero(tx, sizeof (tx_state_t)); - tx->tx_cpu = kmem_zalloc(max_ncpus * sizeof (tx_cpu_t), KM_SLEEP); + tx->tx_cpu = vmem_zalloc(max_ncpus * sizeof (tx_cpu_t), KM_SLEEP); for (c = 0; c < max_ncpus; c++) { int i; @@ -109,7 +109,7 @@ txg_fini(dsl_pool_t *dp) if (tx->tx_commit_cb_taskq != NULL) taskq_destroy(tx->tx_commit_cb_taskq); - kmem_free(tx->tx_cpu, max_ncpus * sizeof (tx_cpu_t)); + vmem_free(tx->tx_cpu, max_ncpus * sizeof (tx_cpu_t)); bzero(tx, sizeof (tx_state_t)); } @@ -735,3 +735,21 @@ txg_list_next(txg_list_t *tl, void *p, uint64_t txg) return (tn == NULL ? NULL : (char *)tn - tl->tl_offset); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(txg_init); +EXPORT_SYMBOL(txg_fini); +EXPORT_SYMBOL(txg_sync_start); +EXPORT_SYMBOL(txg_sync_stop); +EXPORT_SYMBOL(txg_hold_open); +EXPORT_SYMBOL(txg_rele_to_quiesce); +EXPORT_SYMBOL(txg_rele_to_sync); +EXPORT_SYMBOL(txg_register_callbacks); +EXPORT_SYMBOL(txg_suspend); +EXPORT_SYMBOL(txg_resume); +EXPORT_SYMBOL(txg_delay); +EXPORT_SYMBOL(txg_wait_synced); +EXPORT_SYMBOL(txg_wait_open); +EXPORT_SYMBOL(txg_stalled); +EXPORT_SYMBOL(txg_sync_waiting); +#endif diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 57869b6e6..dc9416ee4 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -2782,3 +2782,11 @@ vdev_expand(vdev_t *vd, uint64_t txg) vdev_config_dirty(vd); } } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(vdev_fault); +EXPORT_SYMBOL(vdev_degrade); +EXPORT_SYMBOL(vdev_online); +EXPORT_SYMBOL(vdev_offline); +EXPORT_SYMBOL(vdev_clear); +#endif diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c new file mode 100644 index 000000000..c1e0aa779 --- /dev/null +++ b/module/zfs/vdev_disk.c @@ -0,0 +1,623 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/vdev_disk.h> +#include <sys/vdev_impl.h> +#include <sys/fs/zfs.h> +#include <sys/zio.h> +#include <sys/sunldi.h> + +/* + * Virtual device vector for disks. + */ +typedef struct dio_request { + struct completion dr_comp; /* Completion for sync IO */ + atomic_t dr_ref; /* References */ + zio_t *dr_zio; /* Parent ZIO */ + int dr_rw; /* Read/Write */ + int dr_error; /* Bio error */ + int dr_bio_count; /* Count of bio's */ + struct bio *dr_bio[0]; /* Attached bio's */ +} dio_request_t; + + +#ifdef HAVE_OPEN_BDEV_EXCLUSIVE +static fmode_t +vdev_bdev_mode(int smode) +{ + fmode_t mode = 0; + + ASSERT3S(smode & (FREAD | FWRITE), !=, 0); + + if (smode & FREAD) + mode |= FMODE_READ; + + if (smode & FWRITE) + mode |= FMODE_WRITE; + + return mode; +} +#else +static int +vdev_bdev_mode(int smode) +{ + int mode = 0; + + ASSERT3S(smode & (FREAD | FWRITE), !=, 0); + + if ((smode & FREAD) && !(smode & FWRITE)) + mode = MS_RDONLY; + + return mode; +} +#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */ + +static uint64_t +bdev_capacity(struct block_device *bdev) +{ + struct hd_struct *part = bdev->bd_part; + + /* The partition capacity referenced by the block device */ + if (part) + return part->nr_sects; + + /* Otherwise assume the full device capacity */ + return get_capacity(bdev->bd_disk); +} + +static int +vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) +{ + struct block_device *bdev; + vdev_disk_t *vd; + int mode, block_size; + + /* Must have a pathname and it must be absolute. */ + if (v->vdev_path == NULL || v->vdev_path[0] != '/') { + v->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; + return EINVAL; + } + + vd = kmem_zalloc(sizeof(vdev_disk_t), KM_SLEEP); + if (vd == NULL) + return ENOMEM; + + /* + * Devices are always opened by the path provided at configuration + * time. This means that if the provided path is a udev by-id path + * then drives may be recabled without an issue. If the provided + * path is a udev by-path path then the physical location information + * will be preserved. This can be critical for more complicated + * configurations where drives are located in specific physical + * locations to maximize the systems tolerence to component failure. + * Alternately you can provide your own udev rule to flexibly map + * the drives as you see fit. It is not advised that you use the + * /dev/[hd]d devices which may be reorder due to probing order. + * Devices in the wrong locations will be detected by the higher + * level vdev validation. + */ + mode = spa_mode(v->vdev_spa); + bdev = vdev_bdev_open(v->vdev_path, vdev_bdev_mode(mode), vd); + if (IS_ERR(bdev)) { + kmem_free(vd, sizeof(vdev_disk_t)); + return -PTR_ERR(bdev); + } + + v->vdev_tsd = vd; + vd->vd_bdev = bdev; + block_size = vdev_bdev_block_size(bdev); + + /* Check if this is a whole device. When bdev->bd_contains == + * bdev we have a whole device and not simply a partition. */ + v->vdev_wholedisk = !!(bdev->bd_contains == bdev); + + /* Clear the nowritecache bit, causes vdev_reopen() to try again. */ + v->vdev_nowritecache = B_FALSE; + + /* Physical volume size in bytes */ + *psize = bdev_capacity(bdev) * block_size; + + /* Based on the minimum sector size set the block size */ + *ashift = highbit(MAX(block_size, SPA_MINBLOCKSIZE)) - 1; + + return 0; +} + +static void +vdev_disk_close(vdev_t *v) +{ + vdev_disk_t *vd = v->vdev_tsd; + + if (vd == NULL) + return; + + if (vd->vd_bdev != NULL) + vdev_bdev_close(vd->vd_bdev, + vdev_bdev_mode(spa_mode(v->vdev_spa))); + + kmem_free(vd, sizeof(vdev_disk_t)); + v->vdev_tsd = NULL; +} + +static dio_request_t * +vdev_disk_dio_alloc(int bio_count) +{ + dio_request_t *dr; + int i; + + dr = kmem_zalloc(sizeof(dio_request_t) + + sizeof(struct bio *) * bio_count, KM_SLEEP); + if (dr) { + init_completion(&dr->dr_comp); + atomic_set(&dr->dr_ref, 0); + dr->dr_bio_count = bio_count; + dr->dr_error = 0; + + for (i = 0; i < dr->dr_bio_count; i++) + dr->dr_bio[i] = NULL; + } + + return dr; +} + +static void +vdev_disk_dio_free(dio_request_t *dr) +{ + int i; + + for (i = 0; i < dr->dr_bio_count; i++) + if (dr->dr_bio[i]) + bio_put(dr->dr_bio[i]); + + kmem_free(dr, sizeof(dio_request_t) + + sizeof(struct bio *) * dr->dr_bio_count); +} + +static void +vdev_disk_dio_get(dio_request_t *dr) +{ + atomic_inc(&dr->dr_ref); +} + +static int +vdev_disk_dio_put(dio_request_t *dr) +{ + int rc = atomic_dec_return(&dr->dr_ref); + + /* + * Free the dio_request when the last reference is dropped and + * ensure zio_interpret is called only once with the correct zio + */ + if (rc == 0) { + zio_t *zio = dr->dr_zio; + int error = dr->dr_error; + + vdev_disk_dio_free(dr); + + if (zio) { + zio->io_error = error; + zio_interrupt(zio); + } + } + + return rc; +} + +BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, size, error) +{ + dio_request_t *dr = bio->bi_private; + int rc; + + /* Fatal error but print some useful debugging before asserting */ + if (dr == NULL) { + printk("FATAL: bio->bi_private == NULL\n" + "bi_next: %p, bi_flags: %lx, bi_rw: %lu, bi_vcnt: %d\n" + "bi_idx: %d, bi_size: %d, bi_end_io: %p, bi_cnt: %d\n", + bio->bi_next, bio->bi_flags, bio->bi_rw, bio->bi_vcnt, + bio->bi_idx, bio->bi_size, bio->bi_end_io, + atomic_read(&bio->bi_cnt)); + SBUG(); + } + +#ifndef HAVE_2ARGS_BIO_END_IO_T + if (bio->bi_size) + return 1; +#endif /* HAVE_2ARGS_BIO_END_IO_T */ + + if (error == 0 && !test_bit(BIO_UPTODATE, &bio->bi_flags)) + error = EIO; + + if (dr->dr_error == 0) + dr->dr_error = error; + + /* Drop reference aquired by __vdev_disk_physio */ + rc = vdev_disk_dio_put(dr); + + /* Wake up synchronous waiter this is the last outstanding bio */ + if ((rc == 1) && (dr->dr_rw & (1 << DIO_RW_SYNCIO))) + complete(&dr->dr_comp); + + BIO_END_IO_RETURN(0); +} + +static inline unsigned long +bio_nr_pages(void *bio_ptr, unsigned int bio_size) +{ + return ((((unsigned long)bio_ptr + bio_size + PAGE_SIZE - 1) >> + PAGE_SHIFT) - ((unsigned long)bio_ptr >> PAGE_SHIFT)); +} + +static unsigned int +bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size) +{ + unsigned int offset, size, i; + struct page *page; + + offset = offset_in_page(bio_ptr); + for (i = 0; i < bio->bi_max_vecs; i++) { + size = PAGE_SIZE - offset; + + if (bio_size <= 0) + break; + + if (size > bio_size) + size = bio_size; + + if (kmem_virt(bio_ptr)) + page = vmalloc_to_page(bio_ptr); + else + page = virt_to_page(bio_ptr); + + if (bio_add_page(bio, page, size, offset) != size) + break; + + bio_ptr += size; + bio_size -= size; + offset = 0; + } + + return bio_size; +} + +static int +__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, + size_t kbuf_size, uint64_t kbuf_offset, int flags) +{ + dio_request_t *dr; + caddr_t bio_ptr; + uint64_t bio_offset; + int bio_size, bio_count = 16; + int i = 0, error = 0, block_size; + +retry: + dr = vdev_disk_dio_alloc(bio_count); + if (dr == NULL) + return ENOMEM; + + dr->dr_zio = zio; + dr->dr_rw = flags; + block_size = vdev_bdev_block_size(bdev); + +#ifdef BIO_RW_FAILFAST + if (flags & (1 << BIO_RW_FAILFAST)) + dr->dr_rw |= 1 << BIO_RW_FAILFAST; +#endif /* BIO_RW_FAILFAST */ + + /* + * When the IO size exceeds the maximum bio size for the request + * queue we are forced to break the IO in multiple bio's and wait + * for them all to complete. Ideally, all pool users will set + * their volume block size to match the maximum request size and + * the common case will be one bio per vdev IO request. + */ + bio_ptr = kbuf_ptr; + bio_offset = kbuf_offset; + bio_size = kbuf_size; + for (i = 0; i <= dr->dr_bio_count; i++) { + + /* Finished constructing bio's for given buffer */ + if (bio_size <= 0) + break; + + /* + * By default only 'bio_count' bio's per dio are allowed. + * However, if we find ourselves in a situation where more + * are needed we allocate a larger dio and warn the user. + */ + if (dr->dr_bio_count == i) { + vdev_disk_dio_free(dr); + bio_count *= 2; + printk("WARNING: Resized bio's/dio to %d\n",bio_count); + goto retry; + } + + dr->dr_bio[i] = bio_alloc(GFP_NOIO, + bio_nr_pages(bio_ptr, bio_size)); + if (dr->dr_bio[i] == NULL) { + vdev_disk_dio_free(dr); + return ENOMEM; + } + + /* Matching put called by vdev_disk_physio_completion */ + vdev_disk_dio_get(dr); + + dr->dr_bio[i]->bi_bdev = bdev; + dr->dr_bio[i]->bi_sector = bio_offset / block_size; + dr->dr_bio[i]->bi_rw = dr->dr_rw; + dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion; + dr->dr_bio[i]->bi_private = dr; + + /* Remaining size is returned to become the new size */ + bio_size = bio_map(dr->dr_bio[i], bio_ptr, bio_size); + + /* Advance in buffer and construct another bio if needed */ + bio_ptr += dr->dr_bio[i]->bi_size; + bio_offset += dr->dr_bio[i]->bi_size; + } + + /* Extra reference to protect dio_request during submit_bio */ + vdev_disk_dio_get(dr); + + /* Submit all bio's associated with this dio */ + for (i = 0; i < dr->dr_bio_count; i++) + if (dr->dr_bio[i]) + submit_bio(dr->dr_rw, dr->dr_bio[i]); + + /* + * On synchronous blocking requests we wait for all bio the completion + * callbacks to run. We will be woken when the last callback runs + * for this dio. We are responsible for putting the last dio_request + * reference will in turn put back the last bio references. The + * only synchronous consumer is vdev_disk_read_rootlabel() all other + * IO originating from vdev_disk_io_start() is asynchronous. + */ + if (dr->dr_rw & (1 << DIO_RW_SYNCIO)) { + wait_for_completion(&dr->dr_comp); + error = dr->dr_error; + ASSERT3S(atomic_read(&dr->dr_ref), ==, 1); + } + + (void)vdev_disk_dio_put(dr); + + return error; +} + +int +vdev_disk_physio(struct block_device *bdev, caddr_t kbuf, + size_t size, uint64_t offset, int flags) +{ + return __vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags); +} + +/* 2.6.24 API change */ +#ifdef HAVE_BIO_EMPTY_BARRIER +BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, size, rc) +{ + zio_t *zio = bio->bi_private; + + zio->io_error = -rc; + if (rc && (rc == -EOPNOTSUPP)) + zio->io_vd->vdev_nowritecache = B_TRUE; + + bio_put(bio); + zio_interrupt(zio); + + BIO_END_IO_RETURN(0); +} + +static int +vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) +{ + struct request_queue *q; + struct bio *bio; + + q = bdev_get_queue(bdev); + if (!q) + return ENXIO; + + bio = bio_alloc(GFP_KERNEL, 0); + if (!bio) + return ENOMEM; + + bio->bi_end_io = vdev_disk_io_flush_completion; + bio->bi_private = zio; + bio->bi_bdev = bdev; + submit_bio(WRITE_BARRIER, bio); + + return 0; +} +#else +static int +vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) +{ + return ENOTSUP; +} +#endif /* HAVE_BIO_EMPTY_BARRIER */ + +static int +vdev_disk_io_start(zio_t *zio) +{ + vdev_t *v = zio->io_vd; + vdev_disk_t *vd = v->vdev_tsd; + int flags, error; + + switch (zio->io_type) { + case ZIO_TYPE_IOCTL: + + if (!vdev_readable(v)) { + zio->io_error = ENXIO; + return ZIO_PIPELINE_CONTINUE; + } + + switch (zio->io_cmd) { + case DKIOCFLUSHWRITECACHE: + + if (zfs_nocacheflush) + break; + + if (v->vdev_nowritecache) { + zio->io_error = ENOTSUP; + break; + } + + error = vdev_disk_io_flush(vd->vd_bdev, zio); + if (error == 0) + return ZIO_PIPELINE_STOP; + + zio->io_error = error; + if (error == ENOTSUP) + v->vdev_nowritecache = B_TRUE; + + break; + + default: + zio->io_error = ENOTSUP; + } + + return ZIO_PIPELINE_CONTINUE; + + case ZIO_TYPE_WRITE: + flags = WRITE; + break; + + case ZIO_TYPE_READ: + flags = READ; + break; + + default: + zio->io_error = ENOTSUP; + return ZIO_PIPELINE_CONTINUE; + } + +#ifdef BIO_RW_FAILFAST + if (zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) + flags |= (1 << BIO_RW_FAILFAST); +#endif /* BIO_RW_FAILFAST */ + + error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data, + zio->io_size, zio->io_offset, flags); + if (error) { + zio->io_error = error; + return ZIO_PIPELINE_CONTINUE; + } + + return ZIO_PIPELINE_STOP; +} + +static void +vdev_disk_io_done(zio_t *zio) +{ + /* + * If the device returned EIO, we revalidate the media. If it is + * determined the media has changed this triggers the asynchronous + * removal of the device from the configuration. + */ + if (zio->io_error == EIO) { + vdev_t *v = zio->io_vd; + vdev_disk_t *vd = v->vdev_tsd; + + if (check_disk_change(vd->vd_bdev)) { + vdev_bdev_invalidate(vd->vd_bdev); + v->vdev_remove_wanted = B_TRUE; + spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); + } + } +} + +vdev_ops_t vdev_disk_ops = { + vdev_disk_open, + vdev_disk_close, + vdev_default_asize, + vdev_disk_io_start, + vdev_disk_io_done, + NULL, + VDEV_TYPE_DISK, /* name of this vdev type */ + B_TRUE /* leaf vdev */ +}; + +/* + * Given the root disk device devid or pathname, read the label from + * the device, and construct a configuration nvlist. + */ +int +vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) +{ + struct block_device *bdev; + vdev_label_t *label; + uint64_t s, size; + int i; + + bdev = vdev_bdev_open(devpath, vdev_bdev_mode(FREAD), NULL); + if (IS_ERR(bdev)) + return -PTR_ERR(bdev); + + s = bdev_capacity(bdev) * vdev_bdev_block_size(bdev); + if (s == 0) { + vdev_bdev_close(bdev, vdev_bdev_mode(FREAD)); + return EIO; + } + + size = P2ALIGN_TYPED(s, sizeof(vdev_label_t), uint64_t); + label = vmem_alloc(sizeof(vdev_label_t), KM_SLEEP); + + for (i = 0; i < VDEV_LABELS; i++) { + uint64_t offset, state, txg = 0; + + /* read vdev label */ + offset = vdev_label_offset(size, i, 0); + if (vdev_disk_physio(bdev, (caddr_t)label, + VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, READ_SYNC) != 0) + continue; + + if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, + sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) { + *config = NULL; + continue; + } + + if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, + &state) != 0 || state >= POOL_STATE_DESTROYED) { + nvlist_free(*config); + *config = NULL; + continue; + } + + if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, + &txg) != 0 || txg == 0) { + nvlist_free(*config); + *config = NULL; + continue; + } + + break; + } + + vmem_free(label, sizeof(vdev_label_t)); + vdev_bdev_close(bdev, vdev_bdev_mode(FREAD)); + + return 0; +} diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index 9958a0e78..ab54d52b5 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -405,3 +405,14 @@ vdev_queue_io_done(zio_t *zio) mutex_exit(&vq->vq_lock); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +module_param(zfs_vdev_max_pending, int, 0644); +MODULE_PARM_DESC(zfs_vdev_max_pending, "Maximum pending VDEV IO"); + +module_param(zfs_vdev_min_pending, int, 0644); +MODULE_PARM_DESC(zfs_vdev_min_pending, "Minimum pending VDEV IO"); + +module_param(zfs_vdev_aggregation_limit, int, 0644); +MODULE_PARM_DESC(zfs_vdev_aggregation_limit, "Maximum VDEV IO aggregation"); +#endif diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index 7e278dd2b..f5164ced6 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -1180,3 +1180,19 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, zap_unlockdir(zap); return (err); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(zap_add); +EXPORT_SYMBOL(zap_create); +EXPORT_SYMBOL(zap_cursor_advance); +EXPORT_SYMBOL(zap_cursor_fini); +EXPORT_SYMBOL(zap_cursor_init); +EXPORT_SYMBOL(zap_cursor_init_serialized); +EXPORT_SYMBOL(zap_cursor_move_to_key); +EXPORT_SYMBOL(zap_cursor_retrieve); +EXPORT_SYMBOL(zap_cursor_serialize); +EXPORT_SYMBOL(zap_lookup); +EXPORT_SYMBOL(zap_lookup_norm); +EXPORT_SYMBOL(zap_remove); +EXPORT_SYMBOL(zap_update); +#endif diff --git a/module/zfs/zfs_byteswap.c b/module/zfs/zfs_byteswap.c index d5f3013df..f08135e88 100644 --- a/module/zfs/zfs_byteswap.c +++ b/module/zfs/zfs_byteswap.c @@ -196,3 +196,9 @@ zfs_znode_byteswap(void *buf, size_t size) ACE_SLOT_CNT); } } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(zfs_oldacl_byteswap); +EXPORT_SYMBOL(zfs_acl_byteswap); +EXPORT_SYMBOL(zfs_znode_byteswap); +#endif diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index 8b7785fa8..9809aa5d0 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -92,7 +92,7 @@ void zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, uint64_t stateoroffset, uint64_t size) { -#ifdef _KERNEL +#if defined(_KERNEL) && defined(HAVE_FM) nvlist_t *ereport, *detector; uint64_t ena; char class[64]; @@ -334,7 +334,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, static void zfs_post_common(spa_t *spa, vdev_t *vd, const char *name) { -#ifdef _KERNEL +#if defined(_KERNEL) && defined(HAVE_FM) nvlist_t *resource; char class[64]; diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index b039414db..ac85a8ec5 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -67,6 +67,7 @@ #include "zfs_namecheck.h" #include "zfs_prop.h" #include "zfs_deleg.h" +#include "zfs_config.h" extern struct modlfs zfs_modlfs; @@ -146,7 +147,7 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...) static void history_str_free(char *buf) { - kmem_free(buf, HIS_MAX_RECORD_LEN); + vmem_free(buf, HIS_MAX_RECORD_LEN); } static char * @@ -157,7 +158,7 @@ history_str_get(zfs_cmd_t *zc) if (zc->zc_history == 0) return (NULL); - buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP); + buf = vmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP); if (copyinstr((void *)(uintptr_t)zc->zc_history, buf, HIS_MAX_RECORD_LEN, NULL) != 0) { history_str_free(buf); @@ -824,12 +825,12 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) if (size > zc->zc_nvlist_dst_size) { error = ENOMEM; } else { - packed = kmem_alloc(size, KM_SLEEP); + packed = vmem_alloc(size, KM_SLEEP); VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE, KM_SLEEP) == 0); error = ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst, size, zc->zc_iflags); - kmem_free(packed, size); + vmem_free(packed, size); } zc->zc_nvlist_dst_size = size; @@ -3648,7 +3649,7 @@ zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) return (EINVAL); - zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); + zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); @@ -3694,7 +3695,7 @@ zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) zfs_log_history(zc); } - kmem_free(zc, sizeof (zfs_cmd_t)); + vmem_free(zc, sizeof (zfs_cmd_t)); return (error); } @@ -3795,15 +3796,27 @@ static struct dev_ops zfs_dev_ops = { }; static struct modldrv zfs_modldrv = { +#ifdef HAVE_SPL + NULL, +#else &mod_driverops, +#endif /* HAVE_SPL */ "ZFS storage pool", &zfs_dev_ops }; static struct modlinkage modlinkage = { MODREV_1, +#ifdef HAVE_ZPL (void *)&zfs_modlfs, +#else + NULL, +#endif /* HAVE_ZPL */ (void *)&zfs_modldrv, +#ifdef HAVE_SPL + ZFS_MAJOR, + ZFS_MINORS, +#endif /* HAVE_SPL */ NULL }; @@ -3834,6 +3847,8 @@ _init(void) ASSERT(error == 0); mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL); + printk(KERN_INFO "ZFS: Loaded ZFS Filesystem v%s\n", ZFS_META_VERSION); + return (0); } @@ -3866,8 +3881,17 @@ _fini(void) return (error); } +#ifdef HAVE_SPL +spl_module_init(_init); +spl_module_exit(_fini); + +MODULE_AUTHOR("Sun Microsystems, Inc"); +MODULE_DESCRIPTION("ZFS"); +MODULE_LICENSE("CDDL"); +#else int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } +#endif /* HAVE_SPL */ diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 8eb4665ae..afecb374b 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -318,6 +318,7 @@ zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, return (ENOTTY); } +#if defined(_KERNEL) && defined(HAVE_UIO_RW) /* * Utility functions to map and unmap a single physical page. These * are used to manage the mappable copies of ZFS file data, and therefore @@ -342,6 +343,7 @@ zfs_unmap_page(page_t *pp, caddr_t addr) ppmapout(addr); } } +#endif /* _KERNEL && HAVE_UIO_RW */ /* * When a file is memory mapped, we must keep the IO data synchronized diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index ee27195a4..e6ec7bcf6 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1683,3 +1683,8 @@ zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) (void) memmove(buf, path, buf + len - path); return (error); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(zfs_create_fs); +EXPORT_SYMBOL(zfs_obj_to_path); +#endif diff --git a/module/zfs/zil.c b/module/zfs/zil.c index db3822f5a..f5cbf15dd 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -1588,7 +1588,10 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE]) zr.zr_replay = replay_func; zr.zr_arg = arg; zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log); - zr.zr_lrbuf = kmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP); + /* XXX: Changed to use vmem_alloc instead of kmem_alloc for + * large allocation size (I think this is safe here). + */ + zr.zr_lrbuf = vmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP); /* * Wait for in-progress removes to sync before starting replay. @@ -1600,7 +1603,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE]) ASSERT(zilog->zl_replay_blks == 0); (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, zh->zh_claim_txg); - kmem_free(zr.zr_lrbuf, 2 * SPA_MAXBLOCKSIZE); + vmem_free(zr.zr_lrbuf, 2 * SPA_MAXBLOCKSIZE); zil_destroy(zilog, B_FALSE); txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 703abb17f..3cf22dd85 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -72,6 +72,7 @@ kmem_cache_t *zio_cache; kmem_cache_t *zio_link_cache; kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; +int zio_bulk_flags = 0; #ifdef _KERNEL extern vmem_t *zio_alloc_arena; @@ -124,12 +125,13 @@ zio_init(void) char name[36]; (void) sprintf(name, "zio_buf_%lu", (ulong_t)size); zio_buf_cache[c] = kmem_cache_create(name, size, - align, NULL, NULL, NULL, NULL, NULL, KMC_NODEBUG); + align, NULL, NULL, NULL, NULL, NULL, + KMC_NODEBUG | zio_bulk_flags); (void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size); zio_data_buf_cache[c] = kmem_cache_create(name, size, align, NULL, NULL, NULL, NULL, data_alloc_arena, - KMC_NODEBUG); + KMC_NODEBUG | zio_bulk_flags); } } @@ -2361,3 +2363,19 @@ static zio_pipe_stage_t *zio_pipeline[ZIO_STAGES] = { zio_checksum_verify, zio_done }; + +#if defined(_KERNEL) && defined(HAVE_SPL) +/* Fault injection */ +EXPORT_SYMBOL(zio_injection_enabled); +EXPORT_SYMBOL(zio_inject_fault); +EXPORT_SYMBOL(zio_inject_list_next); +EXPORT_SYMBOL(zio_clear_fault); +EXPORT_SYMBOL(zio_handle_fault_injection); +EXPORT_SYMBOL(zio_handle_device_injection); +EXPORT_SYMBOL(zio_handle_label_injection); +EXPORT_SYMBOL(zio_priority_table); +EXPORT_SYMBOL(zio_type_name); + +module_param(zio_bulk_flags, int, 0644); +MODULE_PARM_DESC(zio_bulk_flags, "Additional flags to pass to bulk buffers"); +#endif diff --git a/module/zpios/Makefile.in b/module/zpios/Makefile.in new file mode 100644 index 000000000..4924082a1 --- /dev/null +++ b/module/zpios/Makefile.in @@ -0,0 +1,11 @@ +MODULE := zpios + +EXTRA_CFLAGS = -I@MODDIR@/zfs/include +EXTRA_CFLAGS += -I@MODDIR@/zcommon/include +EXTRA_CFLAGS += -I@MODDIR@/avl/include +EXTRA_CFLAGS += -I@MODDIR@/nvpair/include +EXTRA_CFLAGS += -I@MODDIR@/unicode/include +EXTRA_CFLAGS += -I@MODDIR@/zpios/include +EXTRA_CFLAGS += @KERNELCPPFLAGS@ + +obj-m := ${MODULE}.o diff --git a/module/zpios/include/zpios-ctl.h b/module/zpios/include/zpios-ctl.h new file mode 100644 index 000000000..3880f7794 --- /dev/null +++ b/module/zpios/include/zpios-ctl.h @@ -0,0 +1,197 @@ +/* + * This file is part of the ZFS Linux port. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * LLNL-CODE-403049 + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#ifndef _ZPIOS_CTL_H +#define _ZPIOS_CTL_H + +/* Contains shared definitions which both the userspace + * and kernelspace portions of zpios must agree on. + */ +#ifndef _KERNEL +#include <stdint.h> +#endif + +#define ZPIOS_MAJOR 232 /* XXX - Arbitrary */ +#define ZPIOS_MINORS 1 +#define ZPIOS_NAME "zpios" +#define ZPIOS_DEV "/dev/zpios" + +#define DMU_IO 0x01 + +#define DMU_WRITE 0x0001 +#define DMU_READ 0x0002 +#define DMU_VERIFY 0x0004 +#define DMU_REMOVE 0x0008 +#define DMU_FPP 0x0010 +#define DMU_WRITE_ZC 0x0020 /* Incompatible w/DMU_VERIFY */ +#define DMU_READ_ZC 0x0040 /* Incompatible w/DMU_VERIFY */ +#define DMU_WRITE_NOWAIT 0x0080 +#define DMU_READ_NOPF 0x0100 + +#define ZPIOS_NAME_SIZE 16 +#define ZPIOS_PATH_SIZE 128 + +#define PHASE_PRE_RUN "pre-run" +#define PHASE_PRE_CREATE "pre-create" +#define PHASE_PRE_WRITE "pre-write" +#define PHASE_PRE_READ "pre-read" +#define PHASE_PRE_REMOVE "pre-remove" +#define PHASE_POST_RUN "post-run" +#define PHASE_POST_CREATE "post-create" +#define PHASE_POST_WRITE "post-write" +#define PHASE_POST_READ "post-read" +#define PHASE_POST_REMOVE "post-remove" + +#define ZPIOS_CFG_MAGIC 0x87237190U +typedef struct zpios_cfg { + uint32_t cfg_magic; /* Unique magic */ + int32_t cfg_cmd; /* Config command */ + int32_t cfg_arg1; /* Config command arg 1 */ + int32_t cfg_rc1; /* Config response 1 */ +} zpios_cfg_t; + +typedef struct zpios_timespec { + uint32_t ts_sec; + uint32_t ts_nsec; +} zpios_timespec_t; + +typedef struct zpios_time { + zpios_timespec_t start; + zpios_timespec_t stop; + zpios_timespec_t delta; +} zpios_time_t; + +typedef struct zpios_stats { + zpios_time_t total_time; + zpios_time_t cr_time; + zpios_time_t rm_time; + zpios_time_t wr_time; + zpios_time_t rd_time; + uint64_t wr_data; + uint64_t wr_chunks; + uint64_t rd_data; + uint64_t rd_chunks; +} zpios_stats_t; + +#define ZPIOS_CMD_MAGIC 0x49715385U +typedef struct zpios_cmd { + uint32_t cmd_magic; /* Unique magic */ + uint32_t cmd_id; /* Run ID */ + char cmd_pool[ZPIOS_NAME_SIZE]; /* Pool name */ + uint64_t cmd_chunk_size; /* Chunk size */ + uint32_t cmd_thread_count; /* Thread count */ + uint32_t cmd_region_count; /* Region count */ + uint64_t cmd_region_size; /* Region size */ + uint64_t cmd_offset; /* Region offset */ + uint32_t cmd_region_noise; /* Region noise */ + uint32_t cmd_chunk_noise; /* Chunk noise */ + uint32_t cmd_thread_delay; /* Thread delay */ + uint32_t cmd_flags; /* Test flags */ + char cmd_pre[ZPIOS_PATH_SIZE]; /* Pre-exec hook */ + char cmd_post[ZPIOS_PATH_SIZE]; /* Post-exec hook */ + char cmd_log[ZPIOS_PATH_SIZE]; /* Requested log dir */ + uint64_t cmd_data_size; /* Opaque data size */ + char cmd_data_str[0]; /* Opaque data region */ +} zpios_cmd_t; + +/* Valid ioctls */ +#define ZPIOS_CFG _IOWR('f', 101, zpios_cfg_t) +#define ZPIOS_CMD _IOWR('f', 102, zpios_cmd_t) + +/* Valid configuration commands */ +#define ZPIOS_CFG_BUFFER_CLEAR 0x001 /* Clear text buffer */ +#define ZPIOS_CFG_BUFFER_SIZE 0x002 /* Resize text buffer */ + +#ifndef NSEC_PER_SEC +#define NSEC_PER_SEC 1000000000L +#endif + +static inline +void zpios_timespec_normalize(zpios_timespec_t *ts, uint32_t sec, uint32_t nsec) +{ + while (nsec >= NSEC_PER_SEC) { + nsec -= NSEC_PER_SEC; + sec++; + } + while (nsec < 0) { + nsec += NSEC_PER_SEC; + sec--; + } + ts->ts_sec = sec; + ts->ts_nsec = nsec; +} + +static inline +zpios_timespec_t zpios_timespec_add(zpios_timespec_t lhs, zpios_timespec_t rhs) +{ + zpios_timespec_t ts_delta; + zpios_timespec_normalize(&ts_delta, lhs.ts_sec + rhs.ts_sec, + lhs.ts_nsec + rhs.ts_nsec); + return ts_delta; +} + +static inline +zpios_timespec_t zpios_timespec_sub(zpios_timespec_t lhs, zpios_timespec_t rhs) +{ + zpios_timespec_t ts_delta; + zpios_timespec_normalize(&ts_delta, lhs.ts_sec - rhs.ts_sec, + lhs.ts_nsec - rhs.ts_nsec); + return ts_delta; +} + +#ifdef _KERNEL + +static inline +zpios_timespec_t zpios_timespec_now(void) +{ + zpios_timespec_t zts_now; + struct timespec ts_now; + + ts_now = current_kernel_time(); + zts_now.ts_sec = ts_now.tv_sec; + zts_now.ts_nsec = ts_now.tv_nsec; + + return zts_now; +} + +#else + +static inline +double zpios_timespec_to_double(zpios_timespec_t ts) +{ + return ((double)(ts.ts_sec) + + ((double)(ts.ts_nsec) / (double)(NSEC_PER_SEC))); +} + +#endif /* _KERNEL */ + +#endif /* _ZPIOS_CTL_H */ diff --git a/module/zpios/include/zpios-internal.h b/module/zpios/include/zpios-internal.h new file mode 100644 index 000000000..cafe8b7d0 --- /dev/null +++ b/module/zpios/include/zpios-internal.h @@ -0,0 +1,137 @@ +/* + * This file is part of the ZFS Linux port. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * LLNL-CODE-403049 + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#ifndef _ZPIOS_INTERNAL_H +#define _ZPIOS_INTERNAL_H + +#include "zpios-ctl.h" + +#define OBJ_SIZE 64 + +struct run_args; + +typedef struct dmu_obj { + objset_t *os; + uint64_t obj; +} dmu_obj_t; + +/* thread doing the IO data */ +typedef struct thread_data { + struct run_args *run_args; + int thread_no; + int rc; + zpios_stats_t stats; + kmutex_t lock; +} thread_data_t; + +/* region for IO data */ +typedef struct zpios_region { + __u64 wr_offset; + __u64 rd_offset; + __u64 init_offset; + __u64 max_offset; + dmu_obj_t obj; + zpios_stats_t stats; + kmutex_t lock; +} zpios_region_t; + +/* arguments for one run */ +typedef struct run_args { + /* Config args */ + int id; + char pool[ZPIOS_NAME_SIZE]; + __u64 chunk_size; + __u32 thread_count; + __u32 region_count; + __u64 region_size; + __u64 offset; + __u32 region_noise; + __u32 chunk_noise; + __u32 thread_delay; + __u32 flags; + char pre[ZPIOS_PATH_SIZE]; + char post[ZPIOS_PATH_SIZE]; + char log[ZPIOS_PATH_SIZE]; + + /* Control data */ + objset_t *os; + wait_queue_head_t waitq; + volatile uint64_t threads_done; + kmutex_t lock_work; + kmutex_t lock_ctl; + __u32 region_next; + + /* Results data */ + struct file *file; + zpios_stats_t stats; + + thread_data_t **threads; + zpios_region_t regions[0]; /* Must be last element */ +} run_args_t; + +#define ZPIOS_INFO_BUFFER_SIZE 65536 +#define ZPIOS_INFO_BUFFER_REDZONE 1024 + +typedef struct zpios_info { + spinlock_t info_lock; + int info_size; + char *info_buffer; + char *info_head; /* Internal kernel use only */ +} zpios_info_t; + +#define zpios_print(file, format, args...) \ +({ zpios_info_t *_info_ = (zpios_info_t *)file->private_data; \ + int _rc_; \ + \ + ASSERT(_info_); \ + ASSERT(_info_->info_buffer); \ + \ + spin_lock(&_info_->info_lock); \ + \ + /* Don't allow the kernel to start a write in the red zone */ \ + if ((int)(_info_->info_head - _info_->info_buffer) > \ + (_info_->info_size - ZPIOS_INFO_BUFFER_REDZONE)) { \ + _rc_ = -EOVERFLOW; \ + } else { \ + _rc_ = sprintf(_info_->info_head, format, args); \ + if (_rc_ >= 0) \ + _info_->info_head += _rc_; \ + } \ + \ + spin_unlock(&_info_->info_lock); \ + _rc_; \ +}) + +#define zpios_vprint(file, test, format, args...) \ + zpios_print(file, "%*s: " format, ZPIOS_NAME_SIZE, test, args) + +#endif /* _ZPIOS_INTERNAL_H */ diff --git a/module/zpios/zpios.c b/module/zpios/zpios.c new file mode 100644 index 000000000..f198946d2 --- /dev/null +++ b/module/zpios/zpios.c @@ -0,0 +1,1297 @@ +/* + * This file is part of the ZFS Linux port. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * LLNL-CODE-403049 + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Kernel PIOS DMU implementation originally derived from PIOS test code. + * Character control interface derived from SPL code. + */ + +#include <sys/zfs_context.h> +#include <sys/dmu.h> +#include <sys/txg.h> +#include <linux/cdev.h> +#include "zpios-internal.h" + + +static spl_class *zpios_class; +static spl_device *zpios_device; + + +static +int zpios_upcall(char *path, char *phase, run_args_t *run_args, int rc) +{ + /* This is stack heavy but it should be OK since we are only + * making the upcall between tests when the stack is shallow. + */ + char id[16], chunk_size[16], region_size[16], thread_count[16]; + char region_count[16], offset[16], region_noise[16], chunk_noise[16]; + char thread_delay[16], flags[16], result[8]; + char *argv[16], *envp[4]; + + if ((path == NULL) || (strlen(path) == 0)) + return -ENOENT; + + snprintf(id, 15, "%d", run_args->id); + snprintf(chunk_size, 15, "%lu", (long unsigned)run_args->chunk_size); + snprintf(region_size, 15, "%lu",(long unsigned) run_args->region_size); + snprintf(thread_count, 15, "%u", run_args->thread_count); + snprintf(region_count, 15, "%u", run_args->region_count); + snprintf(offset, 15, "%lu", (long unsigned)run_args->offset); + snprintf(region_noise, 15, "%u", run_args->region_noise); + snprintf(chunk_noise, 15, "%u", run_args->chunk_noise); + snprintf(thread_delay, 15, "%u", run_args->thread_delay); + snprintf(flags, 15, "0x%x", run_args->flags); + snprintf(result, 7, "%d", rc); + + /* Passing 15 args to registered pre/post upcall */ + argv[0] = path; + argv[1] = phase; + argv[2] = strlen(run_args->log) ? run_args->log : "<none>"; + argv[3] = id; + argv[4] = run_args->pool; + argv[5] = chunk_size; + argv[6] = region_size; + argv[7] = thread_count; + argv[8] = region_count; + argv[9] = offset; + argv[10] = region_noise; + argv[11] = chunk_noise; + argv[12] = thread_delay; + argv[13] = flags; + argv[14] = result; + argv[15] = NULL; + + /* Passing environment for user space upcall */ + envp[0] = "HOME=/"; + envp[1] = "TERM=linux"; + envp[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin"; + envp[3] = NULL; + + return call_usermodehelper(path, argv, envp, 1); +} + +static uint64_t +zpios_dmu_object_create(run_args_t *run_args, objset_t *os) +{ + struct dmu_tx *tx; + uint64_t obj = 0ULL; + int rc; + + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE); + rc = dmu_tx_assign(tx, TXG_WAIT); + if (rc) { + zpios_print(run_args->file, + "dmu_tx_assign() failed: %d\n", rc); + dmu_tx_abort(tx); + return obj; + } + + obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, + DMU_OT_NONE, 0, tx); + rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx); + if (rc) { + zpios_print(run_args->file, + "dmu_object_set_blocksize() failed: %d\n", rc); + dmu_tx_abort(tx); + return obj; + } + + dmu_tx_commit(tx); + + return obj; +} + +static int +zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj) +{ + struct dmu_tx *tx; + int rc; + + tx = dmu_tx_create(os); + dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); + rc = dmu_tx_assign(tx, TXG_WAIT); + if (rc) { + zpios_print(run_args->file, + "dmu_tx_assign() failed: %d\n", rc); + dmu_tx_abort(tx); + return rc; + } + + rc = dmu_object_free(os, obj, tx); + if (rc) { + zpios_print(run_args->file, + "dmu_object_free() failed: %d\n", rc); + dmu_tx_abort(tx); + return rc; + } + + dmu_tx_commit(tx); + + return 0; +} + +static int +zpios_dmu_setup(run_args_t *run_args) +{ + zpios_time_t *t = &(run_args->stats.cr_time); + objset_t *os; + uint64_t obj = 0ULL; + int i, rc = 0; + + (void)zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0); + t->start = zpios_timespec_now(); + + rc = dmu_objset_open(run_args->pool, DMU_OST_ZFS, DS_MODE_USER, &os); + if (rc) { + zpios_print(run_args->file, "Error dmu_objset_open() " + "failed: %d\n", rc); + goto out; + } + + if (!(run_args->flags & DMU_FPP)) { + obj = zpios_dmu_object_create(run_args, os); + if (obj == 0) { + rc = -EBADF; + zpios_print(run_args->file, "Error zpios_dmu_" + "object_create() failed, %d\n", rc); + goto out; + } + } + + for (i = 0; i < run_args->region_count; i++) { + zpios_region_t *region; + + region = &run_args->regions[i]; + mutex_init(®ion->lock, NULL, MUTEX_DEFAULT, NULL); + + if (run_args->flags & DMU_FPP) { + /* File per process */ + region->obj.os = os; + region->obj.obj = zpios_dmu_object_create(run_args, os); + ASSERT(region->obj.obj > 0); /* XXX - Handle this */ + region->wr_offset = run_args->offset; + region->rd_offset = run_args->offset; + region->init_offset = run_args->offset; + region->max_offset = run_args->offset + + run_args->region_size; + } else { + /* Single shared file */ + region->obj.os = os; + region->obj.obj = obj; + region->wr_offset = run_args->offset * i; + region->rd_offset = run_args->offset * i; + region->init_offset = run_args->offset * i; + region->max_offset = run_args->offset * + i + run_args->region_size; + } + } + + run_args->os = os; +out: + t->stop = zpios_timespec_now(); + t->delta = zpios_timespec_sub(t->stop, t->start); + (void)zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc); + + return rc; +} + +static int +zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file) +{ + run_args_t *ra; + int rc, size; + + size = sizeof(*ra) + kcmd->cmd_region_count * sizeof(zpios_region_t); + + ra = vmem_zalloc(size, KM_SLEEP); + if (ra == NULL) { + zpios_print(file, "Unable to vmem_zalloc() %d bytes " + "for regions\n", size); + return -ENOMEM; + } + + *run_args = ra; + strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1); + strncpy(ra->pre, kcmd->cmd_pre, ZPIOS_PATH_SIZE - 1); + strncpy(ra->post, kcmd->cmd_post, ZPIOS_PATH_SIZE - 1); + strncpy(ra->log, kcmd->cmd_log, ZPIOS_PATH_SIZE - 1); + ra->id = kcmd->cmd_id; + ra->chunk_size = kcmd->cmd_chunk_size; + ra->thread_count = kcmd->cmd_thread_count; + ra->region_count = kcmd->cmd_region_count; + ra->region_size = kcmd->cmd_region_size; + ra->offset = kcmd->cmd_offset; + ra->region_noise = kcmd->cmd_region_noise; + ra->chunk_noise = kcmd->cmd_chunk_noise; + ra->thread_delay = kcmd->cmd_thread_delay; + ra->flags = kcmd->cmd_flags; + ra->stats.wr_data = 0; + ra->stats.wr_chunks = 0; + ra->stats.rd_data = 0; + ra->stats.rd_chunks = 0; + ra->region_next = 0; + ra->file = file; + mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL); + + (void)zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0); + + rc = zpios_dmu_setup(ra); + if (rc) { + mutex_destroy(&ra->lock_ctl); + mutex_destroy(&ra->lock_work); + vmem_free(ra, size); + *run_args = NULL; + } + + return rc; +} + +static int +zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset, + __u32 *chunk_size, zpios_region_t **region, __u32 flags) +{ + int i, j, count = 0; + unsigned int random_int; + + get_random_bytes(&random_int, sizeof(unsigned int)); + + mutex_enter(&run_args->lock_work); + i = run_args->region_next; + + /* XXX: I don't much care for this chunk selection mechansim + * there's the potential to burn a lot of time here doing nothing + * useful while holding the global lock. This could give some + * misleading performance results. I'll fix it latter. + */ + while (count < run_args->region_count) { + __u64 *rw_offset; + zpios_time_t *rw_time; + + j = i % run_args->region_count; + *region = &(run_args->regions[j]); + + if (flags & DMU_WRITE) { + rw_offset = &((*region)->wr_offset); + rw_time = &((*region)->stats.wr_time); + } else { + rw_offset = &((*region)->rd_offset); + rw_time = &((*region)->stats.rd_time); + } + + /* test if region is fully written */ + if (*rw_offset + *chunk_size > (*region)->max_offset) { + i++; + count++; + + if (unlikely(rw_time->stop.ts_sec == 0) && + unlikely(rw_time->stop.ts_nsec == 0)) + rw_time->stop = zpios_timespec_now(); + + continue; + } + + *offset = *rw_offset; + *obj = (*region)->obj; + *rw_offset += *chunk_size; + + /* update ctl structure */ + if (run_args->region_noise) { + get_random_bytes(&random_int, sizeof(unsigned int)); + run_args->region_next += random_int % run_args->region_noise; + } else { + run_args->region_next++; + } + + mutex_exit(&run_args->lock_work); + return 1; + } + + /* nothing left to do */ + mutex_exit(&run_args->lock_work); + + return 0; +} + +static void +zpios_remove_objects(run_args_t *run_args) +{ + zpios_time_t *t = &(run_args->stats.rm_time); + zpios_region_t *region; + int rc = 0, i; + + (void)zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0); + t->start = zpios_timespec_now(); + + if (run_args->flags & DMU_REMOVE) { + if (run_args->flags & DMU_FPP) { + for (i = 0; i < run_args->region_count; i++) { + region = &run_args->regions[i]; + rc = zpios_dmu_object_free(run_args, + region->obj.os, + region->obj.obj); + if (rc) + zpios_print(run_args->file, "Error " + "removing object %d, %d\n", + (int)region->obj.obj, rc); + } + } else { + region = &run_args->regions[0]; + rc = zpios_dmu_object_free(run_args, + region->obj.os, + region->obj.obj); + if (rc) + zpios_print(run_args->file, "Error " + "removing object %d, %d\n", + (int)region->obj.obj, rc); + } + } + + dmu_objset_close(run_args->os); + + t->stop = zpios_timespec_now(); + t->delta = zpios_timespec_sub(t->stop, t->start); + (void)zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc); +} + +static void +zpios_cleanup_run(run_args_t *run_args) +{ + int i, size = 0; + + if (run_args == NULL) + return; + + if (run_args->threads != NULL) { + for (i = 0; i < run_args->thread_count; i++) { + if (run_args->threads[i]) { + mutex_destroy(&run_args->threads[i]->lock); + kmem_free(run_args->threads[i], + sizeof(thread_data_t)); + } + } + + kmem_free(run_args->threads, + sizeof(thread_data_t *) * run_args->thread_count); + } + + for (i = 0; i < run_args->region_count; i++) + mutex_destroy(&run_args->regions[i].lock); + + mutex_destroy(&run_args->lock_work); + mutex_destroy(&run_args->lock_ctl); + size = run_args->region_count * sizeof(zpios_region_t); + + vmem_free(run_args, sizeof(*run_args) + size); +} + +static int +zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object, + uint64_t offset, uint64_t size, const void *buf) +{ + struct dmu_tx *tx; + int rc, how = TXG_WAIT; + int flags = 0; + + if (run_args->flags & DMU_WRITE_NOWAIT) + how = TXG_NOWAIT; + + while (1) { + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, object, offset, size); + rc = dmu_tx_assign(tx, how); + + if (rc) { + if (rc == ERESTART && how == TXG_NOWAIT) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + continue; + } + zpios_print(run_args->file, + "Error in dmu_tx_assign(), %d", rc); + dmu_tx_abort(tx); + return rc; + } + break; + } + + if (run_args->flags & DMU_WRITE_ZC) + flags |= DMU_WRITE_ZEROCOPY; + + dmu_write_impl(os, object, offset, size, buf, tx, flags); + dmu_tx_commit(tx); + + return 0; +} + +static int +zpios_dmu_read(run_args_t *run_args, objset_t *os, uint64_t object, + uint64_t offset, uint64_t size, void *buf) +{ + int flags = 0; + + if (run_args->flags & DMU_READ_ZC) + flags |= DMU_READ_ZEROCOPY; + + if (run_args->flags & DMU_READ_NOPF) + flags |= DMU_READ_NO_PREFETCH; + + return dmu_read(os, object, offset, size, buf, flags); +} + +static int +zpios_thread_main(void *data) +{ + thread_data_t *thr = (thread_data_t *)data; + run_args_t *run_args = thr->run_args; + zpios_time_t t; + dmu_obj_t obj; + __u64 offset; + __u32 chunk_size; + zpios_region_t *region; + char *buf; + unsigned int random_int; + int chunk_noise = run_args->chunk_noise; + int chunk_noise_tmp = 0; + int thread_delay = run_args->thread_delay; + int thread_delay_tmp = 0; + int i, rc = 0; + + if (chunk_noise) { + get_random_bytes(&random_int, sizeof(unsigned int)); + chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise; + } + + /* It's OK to vmem_alloc() this memory because it will be copied + * in to the slab and pointers to the slab copy will be setup in + * the bio when the IO is submitted. This of course is not ideal + * since we want a zero-copy IO path if possible. It would be nice + * to have direct access to those slab entries. + */ + chunk_size = run_args->chunk_size + chunk_noise_tmp; + buf = (char *)vmem_alloc(chunk_size, KM_SLEEP); + ASSERT(buf); + + /* Trivial data verification pattern for now. */ + if (run_args->flags & DMU_VERIFY) + memset(buf, 'z', chunk_size); + + /* Write phase */ + mutex_enter(&thr->lock); + thr->stats.wr_time.start = zpios_timespec_now(); + mutex_exit(&thr->lock); + + while (zpios_get_work_item(run_args, &obj, &offset, + &chunk_size, ®ion, DMU_WRITE)) { + if (thread_delay) { + get_random_bytes(&random_int, sizeof(unsigned int)); + thread_delay_tmp = random_int % thread_delay; + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(thread_delay_tmp); /* In jiffies */ + } + + t.start = zpios_timespec_now(); + rc = zpios_dmu_write(run_args, obj.os, obj.obj, + offset, chunk_size, buf); + t.stop = zpios_timespec_now(); + t.delta = zpios_timespec_sub(t.stop, t.start); + + if (rc) { + zpios_print(run_args->file, "IO error while doing " + "dmu_write(): %d\n", rc); + break; + } + + mutex_enter(&thr->lock); + thr->stats.wr_data += chunk_size; + thr->stats.wr_chunks++; + thr->stats.wr_time.delta = zpios_timespec_add( + thr->stats.wr_time.delta, t.delta); + mutex_exit(&thr->lock); + + mutex_enter(®ion->lock); + region->stats.wr_data += chunk_size; + region->stats.wr_chunks++; + region->stats.wr_time.delta = zpios_timespec_add( + region->stats.wr_time.delta, t.delta); + + /* First time region was accessed */ + if (region->init_offset == offset) + region->stats.wr_time.start = t.start; + + mutex_exit(®ion->lock); + } + + mutex_enter(&run_args->lock_ctl); + run_args->threads_done++; + mutex_exit(&run_args->lock_ctl); + + mutex_enter(&thr->lock); + thr->rc = rc; + thr->stats.wr_time.stop = zpios_timespec_now(); + mutex_exit(&thr->lock); + wake_up(&run_args->waitq); + + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + + /* Check if we should exit */ + mutex_enter(&thr->lock); + rc = thr->rc; + mutex_exit(&thr->lock); + if (rc) + goto out; + + /* Read phase */ + mutex_enter(&thr->lock); + thr->stats.rd_time.start = zpios_timespec_now(); + mutex_exit(&thr->lock); + + while (zpios_get_work_item(run_args, &obj, &offset, + &chunk_size, ®ion, DMU_READ)) { + if (thread_delay) { + get_random_bytes(&random_int, sizeof(unsigned int)); + thread_delay_tmp = random_int % thread_delay; + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(thread_delay_tmp); /* In jiffies */ + } + + if (run_args->flags & DMU_VERIFY) + memset(buf, 0, chunk_size); + + t.start = zpios_timespec_now(); + rc = zpios_dmu_read(run_args, obj.os, obj.obj, + offset, chunk_size, buf); + t.stop = zpios_timespec_now(); + t.delta = zpios_timespec_sub(t.stop, t.start); + + if (rc) { + zpios_print(run_args->file, "IO error while doing " + "dmu_read(): %d\n", rc); + break; + } + + /* Trivial data verification, expensive! */ + if (run_args->flags & DMU_VERIFY) { + for (i = 0; i < chunk_size; i++) { + if (buf[i] != 'z') { + zpios_print(run_args->file, + "IO verify error: %d/%d/%d\n", + (int)obj.obj, (int)offset, + (int)chunk_size); + break; + } + } + } + + mutex_enter(&thr->lock); + thr->stats.rd_data += chunk_size; + thr->stats.rd_chunks++; + thr->stats.rd_time.delta = zpios_timespec_add( + thr->stats.rd_time.delta, t.delta); + mutex_exit(&thr->lock); + + mutex_enter(®ion->lock); + region->stats.rd_data += chunk_size; + region->stats.rd_chunks++; + region->stats.rd_time.delta = zpios_timespec_add( + region->stats.rd_time.delta, t.delta); + + /* First time region was accessed */ + if (region->init_offset == offset) + region->stats.rd_time.start = t.start; + + mutex_exit(®ion->lock); + } + + mutex_enter(&run_args->lock_ctl); + run_args->threads_done++; + mutex_exit(&run_args->lock_ctl); + + mutex_enter(&thr->lock); + thr->rc = rc; + thr->stats.rd_time.stop = zpios_timespec_now(); + mutex_exit(&thr->lock); + wake_up(&run_args->waitq); + +out: + vmem_free(buf, chunk_size); + do_exit(0); + + return rc; /* Unreachable, due to do_exit() */ +} + +static int +zpios_thread_done(run_args_t *run_args) +{ + ASSERT(run_args->threads_done <= run_args->thread_count); + return (run_args->threads_done == run_args->thread_count); +} + +static int +zpios_threads_run(run_args_t *run_args) +{ + struct task_struct *tsk, **tsks; + thread_data_t *thr = NULL; + zpios_time_t *tt = &(run_args->stats.total_time); + zpios_time_t *tw = &(run_args->stats.wr_time); + zpios_time_t *tr = &(run_args->stats.rd_time); + int i, rc = 0, tc = run_args->thread_count; + + tsks = kmem_zalloc(sizeof(struct task_struct *) * tc, KM_SLEEP); + if (tsks == NULL) { + rc = -ENOMEM; + goto cleanup2; + } + + run_args->threads = kmem_zalloc(sizeof(thread_data_t *) * tc, KM_SLEEP); + if (run_args->threads == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + init_waitqueue_head(&run_args->waitq); + run_args->threads_done = 0; + + /* Create all the needed threads which will sleep until awoken */ + for (i = 0; i < tc; i++) { + thr = kmem_zalloc(sizeof(thread_data_t), KM_SLEEP); + if (thr == NULL) { + rc = -ENOMEM; + goto taskerr; + } + + thr->thread_no = i; + thr->run_args = run_args; + thr->rc = 0; + mutex_init(&thr->lock, NULL, MUTEX_DEFAULT, NULL); + run_args->threads[i] = thr; + + tsk = kthread_create(zpios_thread_main, (void *)thr, + "%s/%d", "zpios_io", i); + if (IS_ERR(tsk)) { + rc = -EINVAL; + goto taskerr; + } + + tsks[i] = tsk; + } + + tt->start = zpios_timespec_now(); + + /* Wake up all threads for write phase */ + (void)zpios_upcall(run_args->pre, PHASE_PRE_WRITE, run_args, 0); + for (i = 0; i < tc; i++) + wake_up_process(tsks[i]); + + /* Wait for write phase to complete */ + tw->start = zpios_timespec_now(); + wait_event(run_args->waitq, zpios_thread_done(run_args)); + tw->stop = zpios_timespec_now(); + (void)zpios_upcall(run_args->post, PHASE_POST_WRITE, run_args, rc); + + for (i = 0; i < tc; i++) { + thr = run_args->threads[i]; + + mutex_enter(&thr->lock); + + if (!rc && thr->rc) + rc = thr->rc; + + run_args->stats.wr_data += thr->stats.wr_data; + run_args->stats.wr_chunks += thr->stats.wr_chunks; + mutex_exit(&thr->lock); + } + + if (rc) { + /* Wake up all threads and tell them to exit */ + for (i = 0; i < tc; i++) { + mutex_enter(&thr->lock); + thr->rc = rc; + mutex_exit(&thr->lock); + + wake_up_process(tsks[i]); + } + goto out; + } + + mutex_enter(&run_args->lock_ctl); + ASSERT(run_args->threads_done == run_args->thread_count); + run_args->threads_done = 0; + mutex_exit(&run_args->lock_ctl); + + /* Wake up all threads for read phase */ + (void)zpios_upcall(run_args->pre, PHASE_PRE_READ, run_args, 0); + for (i = 0; i < tc; i++) + wake_up_process(tsks[i]); + + /* Wait for read phase to complete */ + tr->start = zpios_timespec_now(); + wait_event(run_args->waitq, zpios_thread_done(run_args)); + tr->stop = zpios_timespec_now(); + (void)zpios_upcall(run_args->post, PHASE_POST_READ, run_args, rc); + + for (i = 0; i < tc; i++) { + thr = run_args->threads[i]; + + mutex_enter(&thr->lock); + + if (!rc && thr->rc) + rc = thr->rc; + + run_args->stats.rd_data += thr->stats.rd_data; + run_args->stats.rd_chunks += thr->stats.rd_chunks; + mutex_exit(&thr->lock); + } +out: + tt->stop = zpios_timespec_now(); + tt->delta = zpios_timespec_sub(tt->stop, tt->start); + tw->delta = zpios_timespec_sub(tw->stop, tw->start); + tr->delta = zpios_timespec_sub(tr->stop, tr->start); + +cleanup: + kmem_free(tsks, sizeof(struct task_struct *) * tc); +cleanup2: + /* Returns first encountered thread error (if any) */ + return rc; + +taskerr: + /* Destroy all threads that were created successfully */ + for (i = 0; i < tc; i++) + if (tsks[i] != NULL) + (void) kthread_stop(tsks[i]); + + goto cleanup; +} + +static int +zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd, + int data_size, void *data) +{ + run_args_t *run_args = { 0 }; + zpios_stats_t *stats = (zpios_stats_t *)data; + int i, n, m, size, rc; + + if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) || + (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) { + zpios_print(file, "Invalid chunk_size, region_size, " + "thread_count, or region_count, %d\n", -EINVAL); + return -EINVAL; + } + + if (!(kcmd->cmd_flags & DMU_WRITE) || + !(kcmd->cmd_flags & DMU_READ)) { + zpios_print(file, "Invalid flags, minimally DMU_WRITE " + "and DMU_READ must be set, %d\n", -EINVAL); + return -EINVAL; + } + + if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) && + (kcmd->cmd_flags & DMU_VERIFY)) { + zpios_print(file, "Invalid flags, DMU_*_ZC incompatible " + "with DMU_VERIFY, used for performance analysis " + "only, %d\n", -EINVAL); + return -EINVAL; + } + + /* Opaque data on return contains structs of the following form: + * + * zpios_stat_t stats[]; + * stats[0] = run_args->stats; + * stats[1-N] = threads[N]->stats; + * stats[N+1-M] = regions[M]->stats; + * + * Where N is the number of threads, and M is the number of regions. + */ + size = (sizeof(zpios_stats_t) + + (kcmd->cmd_thread_count * sizeof(zpios_stats_t)) + + (kcmd->cmd_region_count * sizeof(zpios_stats_t))); + if (data_size < size) { + zpios_print(file, "Invalid size, command data buffer " + "size too small, (%d < %d)\n", data_size, size); + return -ENOSPC; + } + + rc = zpios_setup_run(&run_args, kcmd, file); + if (rc) + return rc; + + rc = zpios_threads_run(run_args); + zpios_remove_objects(run_args); + if (rc) + goto cleanup; + + if (stats) { + n = 1; + m = 1 + kcmd->cmd_thread_count; + stats[0] = run_args->stats; + + for (i = 0; i < kcmd->cmd_thread_count; i++) + stats[n+i] = run_args->threads[i]->stats; + + for (i = 0; i < kcmd->cmd_region_count; i++) + stats[m+i] = run_args->regions[i].stats; + } + +cleanup: + zpios_cleanup_run(run_args); + + (void)zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0); + + return rc; +} + +static int +zpios_open(struct inode *inode, struct file *file) +{ + unsigned int minor = iminor(inode); + zpios_info_t *info; + + if (minor >= ZPIOS_MINORS) + return -ENXIO; + + info = (zpios_info_t *)kmem_alloc(sizeof(*info), KM_SLEEP); + if (info == NULL) + return -ENOMEM; + + spin_lock_init(&info->info_lock); + info->info_size = ZPIOS_INFO_BUFFER_SIZE; + info->info_buffer = (char *)vmem_alloc(ZPIOS_INFO_BUFFER_SIZE,KM_SLEEP); + if (info->info_buffer == NULL) { + kmem_free(info, sizeof(*info)); + return -ENOMEM; + } + + info->info_head = info->info_buffer; + file->private_data = (void *)info; + + return 0; +} + +static int +zpios_release(struct inode *inode, struct file *file) +{ + unsigned int minor = iminor(inode); + zpios_info_t *info = (zpios_info_t *)file->private_data; + + if (minor >= ZPIOS_MINORS) + return -ENXIO; + + ASSERT(info); + ASSERT(info->info_buffer); + + vmem_free(info->info_buffer, ZPIOS_INFO_BUFFER_SIZE); + kmem_free(info, sizeof(*info)); + + return 0; +} + +static int +zpios_buffer_clear(struct file *file, zpios_cfg_t *kcfg, unsigned long arg) +{ + zpios_info_t *info = (zpios_info_t *)file->private_data; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + memset(info->info_buffer, 0, info->info_size); + info->info_head = info->info_buffer; + spin_unlock(&info->info_lock); + + return 0; +} + +static int +zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg) +{ + zpios_info_t *info = (zpios_info_t *)file->private_data; + char *buf; + int min, size, rc = 0; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + if (kcfg->cfg_arg1 > 0) { + + size = kcfg->cfg_arg1; + buf = (char *)vmem_alloc(size, KM_SLEEP); + if (buf == NULL) { + rc = -ENOMEM; + goto out; + } + + /* Zero fill and truncate contents when coping buffer */ + min = ((size < info->info_size) ? size : info->info_size); + memset(buf, 0, size); + memcpy(buf, info->info_buffer, min); + vmem_free(info->info_buffer, info->info_size); + info->info_size = size; + info->info_buffer = buf; + info->info_head = info->info_buffer; + } + + kcfg->cfg_rc1 = info->info_size; + + if (copy_to_user((struct zpios_cfg_t __user *)arg, kcfg, sizeof(*kcfg))) + rc = -EFAULT; +out: + spin_unlock(&info->info_lock); + + return rc; +} + +static int +zpios_ioctl_cfg(struct file *file, unsigned long arg) +{ + zpios_cfg_t kcfg; + int rc = 0; + + if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof(kcfg))) + return -EFAULT; + + if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) { + zpios_print(file, "Bad config magic 0x%x != 0x%x\n", + kcfg.cfg_magic, ZPIOS_CFG_MAGIC); + return -EINVAL; + } + + switch (kcfg.cfg_cmd) { + case ZPIOS_CFG_BUFFER_CLEAR: + /* cfg_arg1 - Unused + * cfg_rc1 - Unused + */ + rc = zpios_buffer_clear(file, &kcfg, arg); + break; + case ZPIOS_CFG_BUFFER_SIZE: + /* cfg_arg1 - 0 - query size; >0 resize + * cfg_rc1 - Set to current buffer size + */ + rc = zpios_buffer_size(file, &kcfg, arg); + break; + default: + zpios_print(file, "Bad config command %d\n", + kcfg.cfg_cmd); + rc = -EINVAL; + break; + } + + return rc; +} + +static int +zpios_ioctl_cmd(struct file *file, unsigned long arg) +{ + zpios_cmd_t kcmd; + int rc = -EINVAL; + void *data = NULL; + + rc = copy_from_user(&kcmd, (zpios_cfg_t *)arg, sizeof(kcmd)); + if (rc) { + zpios_print(file, "Unable to copy command structure " + "from user to kernel memory, %d\n", rc); + return -EFAULT; + } + + if (kcmd.cmd_magic != ZPIOS_CMD_MAGIC) { + zpios_print(file, "Bad command magic 0x%x != 0x%x\n", + kcmd.cmd_magic, ZPIOS_CFG_MAGIC); + return -EINVAL; + } + + /* Allocate memory for any opaque data the caller needed to pass on */ + if (kcmd.cmd_data_size > 0) { + data = (void *)vmem_alloc(kcmd.cmd_data_size, KM_SLEEP); + if (data == NULL) { + zpios_print(file, "Unable to vmem_alloc() %ld " + "bytes for data buffer\n", + (long)kcmd.cmd_data_size); + return -ENOMEM; + } + + rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t, + cmd_data_str)), kcmd.cmd_data_size); + if (rc) { + zpios_print(file, "Unable to copy data buffer " + "from user to kernel memory, %d\n", rc); + vmem_free(data, kcmd.cmd_data_size); + return -EFAULT; + } + } + + rc = zpios_do_one_run(file, &kcmd, kcmd.cmd_data_size, data); + + if (data != NULL) { + /* If the test failed do not print out the stats */ + if (rc) + goto cleanup; + + rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t, + cmd_data_str)), data, kcmd.cmd_data_size); + if (rc) { + zpios_print(file, "Unable to copy data buffer " + "from kernel to user memory, %d\n", rc); + rc = -EFAULT; + } + +cleanup: + vmem_free(data, kcmd.cmd_data_size); + } + + return rc; +} + +static int +zpios_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + unsigned int minor = iminor(inode); + int rc = 0; + + /* Ignore tty ioctls */ + if ((cmd & 0xffffff00) == ((int)'T') << 8) + return -ENOTTY; + + if (minor >= ZPIOS_MINORS) + return -ENXIO; + + switch (cmd) { + case ZPIOS_CFG: + rc = zpios_ioctl_cfg(file, arg); + break; + case ZPIOS_CMD: + rc = zpios_ioctl_cmd(file, arg); + break; + default: + zpios_print(file, "Bad ioctl command %d\n", cmd); + rc = -EINVAL; + break; + } + + return rc; +} + +#ifdef CONFIG_COMPAT +/* Compatibility handler for ioctls from 32-bit ELF binaries */ +static long +zpios_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return zpios_ioctl(file->f_dentry->d_inode, file, cmd, arg); +} +#endif /* CONFIG_COMPAT */ + +/* I'm not sure why you would want to write in to this buffer from + * user space since its principle use is to pass test status info + * back to the user space, but I don't see any reason to prevent it. + */ +static ssize_t +zpios_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned int minor = iminor(file->f_dentry->d_inode); + zpios_info_t *info = (zpios_info_t *)file->private_data; + int rc = 0; + + if (minor >= ZPIOS_MINORS) + return -ENXIO; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + + /* Write beyond EOF */ + if (*ppos >= info->info_size) { + rc = -EFBIG; + goto out; + } + + /* Resize count if beyond EOF */ + if (*ppos + count > info->info_size) + count = info->info_size - *ppos; + + if (copy_from_user(info->info_buffer, buf, count)) { + rc = -EFAULT; + goto out; + } + + *ppos += count; + rc = count; +out: + spin_unlock(&info->info_lock); + return rc; +} + +static ssize_t +zpios_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned int minor = iminor(file->f_dentry->d_inode); + zpios_info_t *info = (zpios_info_t *)file->private_data; + int rc = 0; + + if (minor >= ZPIOS_MINORS) + return -ENXIO; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + + /* Read beyond EOF */ + if (*ppos >= info->info_size) + goto out; + + /* Resize count if beyond EOF */ + if (*ppos + count > info->info_size) + count = info->info_size - *ppos; + + if (copy_to_user(buf, info->info_buffer + *ppos, count)) { + rc = -EFAULT; + goto out; + } + + *ppos += count; + rc = count; +out: + spin_unlock(&info->info_lock); + return rc; +} + +static loff_t zpios_seek(struct file *file, loff_t offset, int origin) +{ + unsigned int minor = iminor(file->f_dentry->d_inode); + zpios_info_t *info = (zpios_info_t *)file->private_data; + int rc = -EINVAL; + + if (minor >= ZPIOS_MINORS) + return -ENXIO; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + + switch (origin) { + case 0: /* SEEK_SET - No-op just do it */ + break; + case 1: /* SEEK_CUR - Seek from current */ + offset = file->f_pos + offset; + break; + case 2: /* SEEK_END - Seek from end */ + offset = info->info_size + offset; + break; + } + + if (offset >= 0) { + file->f_pos = offset; + file->f_version = 0; + rc = offset; + } + + spin_unlock(&info->info_lock); + + return rc; +} + +static struct cdev zpios_cdev; +static struct file_operations zpios_fops = { + .owner = THIS_MODULE, + .open = zpios_open, + .release = zpios_release, + .ioctl = zpios_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = zpios_compat_ioctl, +#endif + .read = zpios_read, + .write = zpios_write, + .llseek = zpios_seek, +}; + +static int +zpios_init(void) +{ + dev_t dev; + int rc; + + dev = MKDEV(ZPIOS_MAJOR, 0); + if ((rc = register_chrdev_region(dev, ZPIOS_MINORS, ZPIOS_NAME))) + goto error; + + /* Support for registering a character driver */ + cdev_init(&zpios_cdev, &zpios_fops); + zpios_cdev.owner = THIS_MODULE; + kobject_set_name(&zpios_cdev.kobj, ZPIOS_NAME); + if ((rc = cdev_add(&zpios_cdev, dev, ZPIOS_MINORS))) { + printk(KERN_ERR "ZPIOS: Error adding cdev, %d\n", rc); + kobject_put(&zpios_cdev.kobj); + unregister_chrdev_region(dev, ZPIOS_MINORS); + goto error; + } + + /* Support for udev make driver info available in sysfs */ + zpios_class = spl_class_create(THIS_MODULE, ZPIOS_NAME); + if (IS_ERR(zpios_class)) { + rc = PTR_ERR(zpios_class); + printk(KERN_ERR "ZPIOS: Error creating zpios class, %d\n", rc); + cdev_del(&zpios_cdev); + unregister_chrdev_region(dev, ZPIOS_MINORS); + goto error; + } + + zpios_device = spl_device_create(zpios_class, NULL, + dev, NULL, ZPIOS_NAME); + return 0; +error: + printk(KERN_ERR "ZPIOS: Error registering zpios device, %d\n", rc); + return rc; +} + +static int +zpios_fini(void) +{ + dev_t dev = MKDEV(ZPIOS_MAJOR, 0); + + spl_device_destroy(zpios_class, zpios_device, dev); + spl_class_destroy(zpios_class); + cdev_del(&zpios_cdev); + unregister_chrdev_region(dev, ZPIOS_MINORS); + + return 0; +} + +spl_module_init(zpios_init); +spl_module_exit(zpios_fini); + +MODULE_AUTHOR("LLNL / Sun"); +MODULE_DESCRIPTION("Kernel PIOS implementation"); +MODULE_LICENSE("GPL"); diff --git a/scripts/Makefile.am b/scripts/Makefile.am index a1dfc3871..ed6e5028f 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -4,11 +4,17 @@ nobase_pkglibexec_SCRIPTS += zconfig.sh nobase_pkglibexec_SCRIPTS += zfs.sh nobase_pkglibexec_SCRIPTS += zpool-create.sh nobase_pkglibexec_SCRIPTS += zpool-config/* +nobase_pkglibexec_SCRIPTS += zpios.sh +nobase_pkglibexec_SCRIPTS += zpios-sanity.sh +nobase_pkglibexec_SCRIPTS += zpios-survey.sh +nobase_pkglibexec_SCRIPTS += zpios-test/* +nobase_pkglibexec_SCRIPTS += zpios-profile/* EXTRA_DIST = zfs-update.sh $(nobase_pkglibexec_SCRIPTS) ZFS=${top_srcdir}/scripts/zfs.sh ZCONFIG=${top_srcdir}/scripts/zconfig.sh ZTEST=${top_builddir}/cmd/ztest/ztest +ZPIOS_SANITY=${top_srcdir}/scripts/zpios-sanity.sh check: @echo @@ -27,3 +33,11 @@ check: @echo @$(ZCONFIG) @echo + @echo -n "====================================" + @echo -n " ZPIOS " + @echo "====================================" + @echo + @$(ZFS) + @$(ZPIOS_SANITY) + @$(ZFS) -u + @echo diff --git a/scripts/common.sh b/scripts/common.sh index a5cfb5380..a840befc1 100755 --- a/scripts/common.sh +++ b/scripts/common.sh @@ -27,16 +27,21 @@ RAIDZ2S=() UDEVDIR=${UDEVDIR:-/usr/libexec/zfs/udev-rules} ZPOOLDIR=${ZPOOLDIR:-/usr/libexec/zfs/zpool-config} +ZPIOSDIR=${ZPIOSDIR:-/usr/libexec/zfs/zpios-test} +ZPIOSPROFILEDIR=${ZPIOSPROFILEDIR:-/usr/libexec/zfs/zpios-profile} ZDB=${ZDB:-/usr/sbin/zdb} ZFS=${ZFS:-/usr/sbin/zfs} ZINJECT=${ZINJECT:-/usr/sbin/zinject} ZPOOL=${ZPOOL:-/usr/sbin/zpool} ZTEST=${ZTEST:-/usr/sbin/ztest} +ZPIOS=${ZPIOS:-/usr/sbin/zpios} COMMON_SH=${COMMON_SH:-/usr/libexec/zfs/common.sh} ZFS_SH=${ZFS_SH:-/usr/libexec/zfs/zfs.sh} ZPOOL_CREATE_SH=${ZPOOL_CREATE_SH:-/usr/libexec/zfs/zpool-create.sh} +ZPIOS_SH=${ZPIOS_SH:-/usr/libexec/zfs/zpios.sh} +ZPIOS_SURVEY_SH=${ZPIOS_SURVEY_SH:-/usr/libexec/zfs/zpios-survey.sh} LDMOD=${LDMOD:-/sbin/modprobe} LSMOD=${LSMOD:-/sbin/lsmod} diff --git a/scripts/zpios-profile/zpios-profile-disk.sh b/scripts/zpios-profile/zpios-profile-disk.sh new file mode 100755 index 000000000..b56ee1ee4 --- /dev/null +++ b/scripts/zpios-profile/zpios-profile-disk.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# +# /proc/diskinfo <after skipping major/minor> +# Field 1 -- device name +# Field 2 -- # of reads issued +# Field 3 -- # of reads merged +# Field 4 -- # of sectors read +# Field 5 -- # of milliseconds spent reading +# Field 6 -- # of writes completed +# Field 7 -- # of writes merged +# Field 8 -- # of sectors written +# Field 9 -- # of milliseconds spent writing +# Field 10 -- # of I/Os currently in progress +# Field 11 -- # of milliseconds spent doing I/Os +# Field 12 -- weighted # of milliseconds spent doing I/Os + +PROG=zpios-profile-disk.sh + +RUN_PIDS=${0} +RUN_LOG_DIR=${1} +RUN_ID=${2} + +create_table() { + local FIELD=$1 + local ROW_M=() + local ROW_N=() + local HEADER=1 + local STEP=1 + + for DISK_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/disk-[0-9]*`; do + ROW_M=( ${ROW_N[@]} ) + ROW_N=( `cat ${DISK_FILE} | grep sd | cut -c11- | cut -f${FIELD} -d' ' | tr "\n" "\t"` ) + + if [ $HEADER -eq 1 ]; then + echo -n "step, " + cat ${DISK_FILE} | grep sd | cut -c11- | cut -f1 -d' ' | tr "\n" ", " + echo "total" + HEADER=0 + fi + + if [ ${#ROW_M[@]} -eq 0 ]; then + continue + fi + + if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then + echo "Badly formatted profile data in ${DISK_FILE}" + break + fi + + TOTAL=0 + echo -n "${STEP}, " + for (( i=0; i<${#ROW_N[@]}; i++ )); do + DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc` + let TOTAL=${TOTAL}+${DELTA} + echo -n "${DELTA}, " + done + echo "${TOTAL}, " + + let STEP=${STEP}+1 + done +} + +create_table_mbs() { + local FIELD=$1 + local TIME=$2 + local ROW_M=() + local ROW_N=() + local HEADER=1 + local STEP=1 + + for DISK_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/disk-[0-9]*`; do + ROW_M=( ${ROW_N[@]} ) + ROW_N=( `cat ${DISK_FILE} | grep sd | cut -c11- | cut -f${FIELD} -d' ' | tr "\n" "\t"` ) + + if [ $HEADER -eq 1 ]; then + echo -n "step, " + cat ${DISK_FILE} | grep sd | cut -c11- | cut -f1 -d' ' | tr "\n" ", " + echo "total" + HEADER=0 + fi + + if [ ${#ROW_M[@]} -eq 0 ]; then + continue + fi + + if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then + echo "Badly formatted profile data in ${DISK_FILE}" + break + fi + + TOTAL=0 + echo -n "${STEP}, " + for (( i=0; i<${#ROW_N[@]}; i++ )); do + DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc` + MBS=`echo "scale=2; ((${DELTA}*512)/${TIME})/(1024*1024)" | bc` + TOTAL=`echo "scale=2; ${TOTAL}+${MBS}" | bc` + echo -n "${MBS}, " + done + echo "${TOTAL}, " + + let STEP=${STEP}+1 + done +} + +echo +echo "Reads issued per device" +create_table 2 +echo +echo "Reads merged per device" +create_table 3 +echo +echo "Sectors read per device" +create_table 4 +echo "MB/s per device" +create_table_mbs 4 3 + +echo +echo "Writes issued per device" +create_table 6 +echo +echo "Writes merged per device" +create_table 7 +echo +echo "Sectors written per device" +create_table 8 +echo "MB/s per device" +create_table_mbs 8 3 + +exit 0 diff --git a/scripts/zpios-profile/zpios-profile-pids.sh b/scripts/zpios-profile/zpios-profile-pids.sh new file mode 100755 index 000000000..3514b38e2 --- /dev/null +++ b/scripts/zpios-profile/zpios-profile-pids.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +PROG=zpios-profile-pids.sh + +RUN_PIDS=${0} +RUN_LOG_DIR=${1} +RUN_ID=${2} + +ROW_M=() +ROW_N=() +ROW_N_SCHED=() +ROW_N_WAIT=() + +HEADER=1 +STEP=1 + +for PID_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/pids-[0-9]*`; do + ROW_M=( ${ROW_N[@]} ) + ROW_N=( 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ) + ROW_N_SCHED=( `cat ${PID_FILE} | cut -f15 -d' ' | tr "\n" "\t"` ) + ROW_N_WAIT=( `cat ${PID_FILE} | cut -f17 -d' ' | tr "\n" "\t"` ) + ROW_N_NAMES=( `cat ${PID_FILE} | cut -f2 -d' ' | cut -f2 -d'(' | + cut -f1 -d')' | cut -f1 -d'/' | tr "\n" "\t"` ) + + for (( i=0; i<${#ROW_N_SCHED[@]}; i++ )); do + SUM=`echo "${ROW_N_WAIT[${i}]}+${ROW_N_SCHED[${i}]}" | bc` + + case ${ROW_N_NAMES[${i}]} in + zio_taskq) IDX=0;; + zio_req_nul) IDX=1;; + zio_irq_nul) IDX=2;; + zio_req_rd) IDX=3;; + zio_irq_rd) IDX=4;; + zio_req_wr) IDX=5;; + zio_irq_wr) IDX=6;; + zio_req_fr) IDX=7;; + zio_irq_fr) IDX=8;; + zio_req_cm) IDX=9;; + zio_irq_cm) IDX=10;; + zio_req_ctl) IDX=11;; + zio_irq_ctl) IDX=12;; + txg_quiesce) IDX=13;; + txg_sync) IDX=14;; + txg_timelimit) IDX=15;; + arc_reclaim) IDX=16;; + l2arc_feed) IDX=17;; + zpios_io) IDX=18;; + *) continue;; + esac + + let ROW_N[${IDX}]=${ROW_N[${IDX}]}+${SUM} + done + + if [ $HEADER -eq 1 ]; then + echo "step, zio_taskq, zio_req_nul, zio_irq_nul, " \ + "zio_req_rd, zio_irq_rd, zio_req_wr, zio_irq_wr, " \ + "zio_req_fr, zio_irq_fr, zio_req_cm, zio_irq_cm, " \ + "zio_req_ctl, zio_irq_ctl, txg_quiesce, txg_sync, " \ + "txg_timelimit, arc_reclaim, l2arc_feed, zpios_io, " \ + "idle" + HEADER=0 + fi + + if [ ${#ROW_M[@]} -eq 0 ]; then + continue + fi + + if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then + echo "Badly formatted profile data in ${PID_FILE}" + break + fi + + # Original values are in jiffies and we expect HZ to be 1000 + # on most 2.6 systems thus we divide by 10 to get a percentage. + IDLE=1000 + echo -n "${STEP}, " + for (( i=0; i<${#ROW_N[@]}; i++ )); do + DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc` + DELTA_PERCENT=`echo "scale=1; ${DELTA}/10" | bc` + let IDLE=${IDLE}-${DELTA} + echo -n "${DELTA_PERCENT}, " + done + ILDE_PERCENT=`echo "scale=1; ${IDLE}/10" | bc` + echo "${ILDE_PERCENT}" + + let STEP=${STEP}+1 +done + +exit + +echo +echo "Percent of total system time per pid" +for PID_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/pids-[0-9]*`; do + ROW_M=( ${ROW_N[@]} ) + ROW_N_SCHED=( `cat ${PID_FILE} | cut -f15 -d' ' | tr "\n" "\t"` ) + ROW_N_WAIT=( `cat ${PID_FILE} | cut -f17 -d' ' | tr "\n" "\t"` ) + + for (( i=0; i<${#ROW_N_SCHED[@]}; i++ )); do + ROW_N[${i}]=`echo "${ROW_N_WAIT[${i}]}+${ROW_N_SCHED[${i}]}" | bc` + done + + if [ $HEADER -eq 1 ]; then + echo -n "step, " + cat ${PID_FILE} | cut -f2 -d' ' | tr "\n" ", " + echo + HEADER=0 + fi + + if [ ${#ROW_M[@]} -eq 0 ]; then + continue + fi + + if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then + echo "Badly formatted profile data in ${PID_FILE}" + break + fi + + # Original values are in jiffies and we expect HZ to be 1000 + # on most 2.6 systems thus we divide by 10 to get a percentage. + echo -n "${STEP}, " + for (( i=0; i<${#ROW_N[@]}; i++ )); do + DELTA=`echo "scale=1; (${ROW_N[${i}]}-${ROW_M[${i}]})/10" | bc` + echo -n "${DELTA}, " + done + + echo + let STEP=${STEP}+1 +done + + +exit 0 diff --git a/scripts/zpios-profile/zpios-profile-post.sh b/scripts/zpios-profile/zpios-profile-post.sh new file mode 100755 index 000000000..3a454ba04 --- /dev/null +++ b/scripts/zpios-profile/zpios-profile-post.sh @@ -0,0 +1,129 @@ +#!/bin/bash + +PROG=zpios-profile-post.sh + +RUN_POST=${0} +RUN_PHASE=${1} +RUN_DIR=${2} +RUN_ID=${3} +RUN_POOL=${4} +RUN_CHUNK_SIZE=${5} +RUN_REGION_SIZE=${6} +RUN_THRD_COUNT=${7} +RUN_REGION_COUNT=${8} +RUN_OFFSET=${9} +RUN_REGION_NOISE=${10} +RUN_CHUNK_NOISE=${11} +RUN_THRD_DELAY=${12} +RUN_FLAGS=${13} +RUN_RESULT=${14} + +# Summarize system time per process +zpios_profile_post_pids() { + ${PROFILE_PIDS} ${PROFILE_RUN_CR_PIDS_LOG} >${PROFILE_RUN_CR_PIDS_CSV} + ${PROFILE_PIDS} ${PROFILE_RUN_WR_PIDS_LOG} >${PROFILE_RUN_WR_PIDS_CSV} + ${PROFILE_PIDS} ${PROFILE_RUN_RD_PIDS_LOG} >${PROFILE_RUN_RD_PIDS_CSV} + ${PROFILE_PIDS} ${PROFILE_RUN_RM_PIDS_LOG} >${PROFILE_RUN_RM_PIDS_CSV} +} + +zpios_profile_post_disk() { + ${PROFILE_DISK} ${PROFILE_RUN_CR_DISK_LOG} >${PROFILE_RUN_CR_DISK_CSV} + ${PROFILE_DISK} ${PROFILE_RUN_WR_DISK_LOG} >${PROFILE_RUN_WR_DISK_CSV} + ${PROFILE_DISK} ${PROFILE_RUN_RD_DISK_LOG} >${PROFILE_RUN_RD_DISK_CSV} + ${PROFILE_DISK} ${PROFILE_RUN_RM_DISK_LOG} >${PROFILE_RUN_RM_DISK_CSV} +} + +# Summarize per device performance + +# Stop a user defined profiling script which is gathering additional data +zpios_profile_post_stop() { + local PROFILE_PID=$1 + + kill -s SIGHUP `cat ${PROFILE_PID}` + + + # Sleep waiting for profile script to exit + while [ -f ${PROFILE_PID} ]; do + sleep 0.01 + done +} + +zpios_profile_post_proc_stop() { + local PROC_DIR=$1 + + if [ -f ${PROFILE_ARC_PROC} ]; then + cat ${PROFILE_ARC_PROC} >${PROC_DIR}/arcstats.txt + fi + + if [ -f ${PROFILE_VDEV_CACHE_PROC} ]; then + cat ${PROFILE_VDEV_CACHE_PROC} >${PROC_DIR}/vdev_cache_stats.txt + fi +} + +zpios_profile_post_oprofile_stop() { + local OPROFILE_LOG=$1 + local OPROFILE_ARGS="-a -g -l -p ${OPROFILE_KERNEL_DIR},${OPROFILE_SPL_DIR},${OPROFILE_ZFS_DIR}" + + /usr/bin/opcontrol --stop >>${OPROFILE_LOG} 2>&1 + /usr/bin/opcontrol --dump >>${OPROFILE_LOG} 2>&1 + /usr/bin/opreport ${OPROFILE_ARGS} >${OPROFILE_LOG} 2>&1 + /usr/bin/oparchive +} + +zpios_profile_post_create() { + zpios_profile_post_oprofile_stop ${PROFILE_RUN_CR_OPROFILE_LOG} + zpios_profile_post_proc_stop ${PROFILE_RUN_CR_DIR} + zpios_profile_post_stop ${PROFILE_RUN_CR_PID} +} + +zpios_profile_post_write() { + zpios_profile_post_oprofile_stop ${PROFILE_RUN_WR_OPROFILE_LOG} + zpios_profile_post_proc_stop ${PROFILE_RUN_WR_DIR} + zpios_profile_post_stop ${PROFILE_RUN_WR_PID} +} + +zpios_profile_post_read() { + zpios_profile_post_oprofile_stop ${PROFILE_RUN_CR_RD_LOG} + zpios_profile_post_proc_stop ${PROFILE_RUN_RD_DIR} + zpios_profile_post_stop ${PROFILE_RUN_RD_PID} +} + +zpios_profile_post_remove() { + zpios_profile_post_oprofile_stop ${PROFILE_RUN_RM_OPROFILE_LOG} + zpios_profile_post_proc_stop ${PROFILE_RUN_RM_DIR} + zpios_profile_post_stop ${PROFILE_RUN_RM_PID} +} + +# Source global zpios test configuration +if [ -f ${RUN_DIR}/zpios-config.sh ]; then + . ${RUN_DIR}/zpios-config.sh +fi + +# Source global per-run test configuration +if [ -f ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh ]; then + . ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh +fi + +case "${RUN_PHASE}" in + post-run) + zpios_profile_post_pids + zpios_profile_post_disk + ;; + post-create) + zpios_profile_post_create + ;; + post-write) + zpios_profile_post_write + ;; + post-read) + zpios_profile_post_read + ;; + post-remove) + zpios_profile_post_remove + ;; + *) + echo "Usage: ${PROG} {post-run|post-create|post-write|post-read|post-remove}" + exit 1 +esac + +exit 0 diff --git a/scripts/zpios-profile/zpios-profile-pre.sh b/scripts/zpios-profile/zpios-profile-pre.sh new file mode 100755 index 000000000..a2a885798 --- /dev/null +++ b/scripts/zpios-profile/zpios-profile-pre.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +PROG=zpios-profile-pre.sh + +PROFILE_RDY=0 +trap "PROFILE_RDY=1" SIGHUP + +RUN_PRE=${0} +RUN_PHASE=${1} +RUN_DIR=${2} +RUN_ID=${3} +RUN_POOL=${4} +RUN_CHUNK_SIZE=${5} +RUN_REGION_SIZE=${6} +RUN_THRD_COUNT=${7} +RUN_REGION_COUNT=${8} +RUN_OFFSET=${9} +RUN_REGION_NOISE=${10} +RUN_CHUNK_NOISE=${11} +RUN_THRD_DELAY=${12} +RUN_FLAGS=${13} +RUN_RESULT=${14} + +zpios_profile_pre_run_cfg() { +cat > ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh << EOF +# +# Zpios Profiling Configuration for Run ${RUN_ID} +# + +PROFILE_RUN_DIR=${RUN_DIR}/${RUN_ID} + +PROFILE_RUN_CR_DIR=${RUN_DIR}/${RUN_ID}/create +PROFILE_RUN_CR_PID=${RUN_DIR}/${RUN_ID}/create/profile.pid +PROFILE_RUN_CR_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/create/oprofile.txt +PROFILE_RUN_CR_PIDS_LOG=${RUN_DIR}/${RUN_ID}/create/pids.txt +PROFILE_RUN_CR_PIDS_CSV=${RUN_DIR}/${RUN_ID}/create/pids.csv +PROFILE_RUN_CR_DISK_LOG=${RUN_DIR}/${RUN_ID}/create/disk.txt +PROFILE_RUN_CR_DISK_CSV=${RUN_DIR}/${RUN_ID}/create/disk.csv + +PROFILE_RUN_WR_DIR=${RUN_DIR}/${RUN_ID}/write +PROFILE_RUN_WR_PID=${RUN_DIR}/${RUN_ID}/write/profile.pid +PROFILE_RUN_WR_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/write/oprofile.txt +PROFILE_RUN_WR_PIDS_LOG=${RUN_DIR}/${RUN_ID}/write/pids.txt +PROFILE_RUN_WR_PIDS_CSV=${RUN_DIR}/${RUN_ID}/write/pids.csv +PROFILE_RUN_WR_DISK_LOG=${RUN_DIR}/${RUN_ID}/write/disk.txt +PROFILE_RUN_WR_DISK_CSV=${RUN_DIR}/${RUN_ID}/write/disk.csv + +PROFILE_RUN_RD_DIR=${RUN_DIR}/${RUN_ID}/read +PROFILE_RUN_RD_PID=${RUN_DIR}/${RUN_ID}/read/profile.pid +PROFILE_RUN_RD_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/read/oprofile.txt +PROFILE_RUN_RD_PIDS_LOG=${RUN_DIR}/${RUN_ID}/read/pids.txt +PROFILE_RUN_RD_PIDS_CSV=${RUN_DIR}/${RUN_ID}/read/pids.csv +PROFILE_RUN_RD_DISK_LOG=${RUN_DIR}/${RUN_ID}/read/disk.txt +PROFILE_RUN_RD_DISK_CSV=${RUN_DIR}/${RUN_ID}/read/disk.csv + +PROFILE_RUN_RM_DIR=${RUN_DIR}/${RUN_ID}/remove +PROFILE_RUN_RM_PID=${RUN_DIR}/${RUN_ID}/remove/profile.pid +PROFILE_RUN_RM_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/remove/oprofile.txt +PROFILE_RUN_RM_PIDS_LOG=${RUN_DIR}/${RUN_ID}/remove/pids.txt +PROFILE_RUN_RM_PIDS_CSV=${RUN_DIR}/${RUN_ID}/remove/pids.csv +PROFILE_RUN_RM_DISK_LOG=${RUN_DIR}/${RUN_ID}/remove/disk.txt +PROFILE_RUN_RM_DISK_CSV=${RUN_DIR}/${RUN_ID}/remove/disk.csv + +# PROFILE_PIDS_LOG=${RUN_DIR}/${RUN_ID}/pids-summary.csv +# PROFILE_DISK_LOG=${RUN_DIR}/${RUN_ID}/disk-summary.csv +EOF +} + +zpios_profile_pre_run_args() { +cat > ${RUN_DIR}/${RUN_ID}/zpios-args.txt << EOF +# +# Zpios Arguments for Run ${RUN_ID} +# + +DIR=${RUN_DIR} +ID=${RUN_ID} +POOL=${RUN_POOL} +CHUNK_SIZE=${RUN_CHUNK_SIZE} +REGION_SIZE=${RUN_REGION_SIZE} +THRD_COUNT=${RUN_THRD_COUNT} +REGION_COUNT=${RUN_REGION_COUNT} +OFFSET=${RUN_OFFSET} +REGION_NOISE=${RUN_REGION_NOISE} +CHUNK_NOISE=${RUN_CHUNK_NOISE} +THRD_DELAY=${RUN_THRD_DELAY} +FLAGS=${RUN_FLAGS} +RESULT=${RUN_RESULT} +EOF +} + +# Spawn a user defined profiling script to gather additional data +zpios_profile_pre_start() { + local PROFILE_PID=$1 + + ${PROFILE_USER} ${RUN_PHASE} ${RUN_DIR} ${RUN_ID} & + echo "$!" >${PROFILE_PID} + + # Sleep waiting for profile script to be ready, it will + # signal us via SIGHUP when it is ready to start profiling. + while [ ${PROFILE_RDY} -eq 0 ]; do + sleep 0.01 + done +} + +zpios_profile_post_proc_start() { + + if [ -f ${PROFILE_ARC_PROC} ]; then + echo 0 >${PROFILE_ARC_PROC} + fi + + if [ -f ${PROFILE_VDEV_CACHE_PROC} ]; then + echo 0 >${PROFILE_VDEV_CACHE_PROC} + fi +} + +zpios_profile_pre_oprofile_start() { + local OPROFILE_LOG=$1 + + /usr/bin/opcontrol --reset >>${OPROFILE_LOG} 2>&1 + /usr/bin/opcontrol --start >>${OPROFILE_LOG} 2>&1 +} + +zpios_profile_pre_create() { + mkdir ${PROFILE_RUN_CR_DIR} + zpios_profile_pre_start ${PROFILE_RUN_CR_PID} + zpios_profile_post_proc_start + zpios_profile_pre_oprofile_start ${PROFILE_RUN_CR_OPROFILE_LOG} +} + +zpios_profile_pre_write() { + mkdir ${PROFILE_RUN_WR_DIR} + zpios_profile_pre_start ${PROFILE_RUN_WR_PID} + zpios_profile_post_proc_start + zpios_profile_pre_oprofile_start ${PROFILE_RUN_WR_OPROFILE_LOG} +} + +zpios_profile_pre_read() { + mkdir ${PROFILE_RUN_RD_DIR} + zpios_profile_pre_start ${PROFILE_RUN_RD_PID} + zpios_profile_post_proc_start + zpios_profile_pre_oprofile_start ${PROFILE_RUN_CR_RD_LOG} +} + +zpios_profile_pre_remove() { + mkdir ${PROFILE_RUN_RM_DIR} + zpios_profile_pre_start ${PROFILE_RUN_RM_PID} + zpios_profile_post_proc_start + zpios_profile_pre_oprofile_start ${PROFILE_RUN_RM_OPROFILE_LOG} +} + +# Source global zpios test configuration +if [ -f ${RUN_DIR}/zpios-config.sh ]; then + . ${RUN_DIR}/zpios-config.sh +fi + +# Source global per-run test configuration +if [ -f ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh ]; then + . ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh +fi + +case "${RUN_PHASE}" in + pre-run) + mkdir -p ${RUN_DIR}/${RUN_ID}/ + zpios_profile_pre_run_cfg + zpios_profile_pre_run_args + ;; + pre-create) + zpios_profile_pre_create + ;; + pre-write) + zpios_profile_pre_write + ;; + pre-read) + zpios_profile_pre_read + ;; + pre-remove) + zpios_profile_pre_remove + ;; + *) + echo "Usage: ${PROG} {pre-run|pre-create|pre-write|pre-read|pre-remove}" + exit 1 +esac + +exit 0 diff --git a/scripts/zpios-profile/zpios-profile.sh b/scripts/zpios-profile/zpios-profile.sh new file mode 100755 index 000000000..f4f0ee97f --- /dev/null +++ b/scripts/zpios-profile/zpios-profile.sh @@ -0,0 +1,226 @@ +#!/bin/bash + + +PROG=zpios-profile.sh + +trap "RUN_DONE=1" SIGHUP + +RUN_PHASE=${1} +RUN_LOG_DIR=${2} +RUN_ID=${3} +RUN_DONE=0 + +POLL_INTERVAL=2.99 + +# Log these pids, the exact pid numbers will vary from system to system +# so I harvest pid for all the following type of processes from /proc/<pid>/ +# +# zio_taskq/# +# spa_zio_issue/# +# spa_zio_intr/# +# txg_quiesce_thr +# txg_sync_thread +# txg_timelimit_t +# arc_reclaim_thr +# l2arc_feed_thre +# zpios_io/# + +ZIO_TASKQ_PIDS=() +ZIO_REQ_NUL_PIDS=() +ZIO_IRQ_NUL_PIDS=() +ZIO_REQ_RD_PIDS=() +ZIO_IRQ_RD_PIDS=() +ZIO_REQ_WR_PIDS=() +ZIO_IRQ_WR_PIDS=() +ZIO_REQ_FR_PIDS=() +ZIO_IRQ_FR_PIDS=() +ZIO_REQ_CM_PIDS=() +ZIO_IRQ_CM_PIDS=() +ZIO_REQ_CTL_PIDS=() +ZIO_IRQ_CTL_PIDS=() + +TXG_QUIESCE_PIDS=() +TXG_SYNC_PIDS=() +TXG_TIMELIMIT_PIDS=() + +ARC_RECLAIM_PIDS=() +L2ARC_FEED_PIDS=() + +ZPIOS_IO_PIDS=() + +show_pids() { + echo "* zio_taskq: { ${ZIO_TASKQ_PIDS[@]} } = ${#ZIO_TASKQ_PIDS[@]}" + echo "* zio_req_nul: { ${ZIO_REQ_NUL_PIDS[@]} } = ${#ZIO_REQ_NUL_PIDS[@]}" + echo "* zio_irq_nul: { ${ZIO_IRQ_NUL_PIDS[@]} } = ${#ZIO_IRQ_NUL_PIDS[@]}" + echo "* zio_req_rd: { ${ZIO_REQ_RD_PIDS[@]} } = ${#ZIO_REQ_RD_PIDS[@]}" + echo "* zio_irq_rd: { ${ZIO_IRQ_RD_PIDS[@]} } = ${#ZIO_IRQ_RD_PIDS[@]}" + echo "* zio_req_wr: { ${ZIO_REQ_WR_PIDS[@]} } = ${#ZIO_REQ_WR_PIDS[@]}" + echo "* zio_irq_wr: { ${ZIO_IRQ_WR_PIDS[@]} } = ${#ZIO_IRQ_WR_PIDS[@]}" + echo "* zio_req_fr: { ${ZIO_REQ_FR_PIDS[@]} } = ${#ZIO_REQ_FR_PIDS[@]}" + echo "* zio_irq_fr: { ${ZIO_IRQ_FR_PIDS[@]} } = ${#ZIO_IRQ_FR_PIDS[@]}" + echo "* zio_req_cm: { ${ZIO_REQ_CM_PIDS[@]} } = ${#ZIO_REQ_CM_PIDS[@]}" + echo "* zio_irq_cm: { ${ZIO_IRQ_CM_PIDS[@]} } = ${#ZIO_IRQ_CM_PIDS[@]}" + echo "* zio_req_ctl: { ${ZIO_REQ_CTL_PIDS[@]} } = ${#ZIO_REQ_CTL_PIDS[@]}" + echo "* zio_irq_ctl: { ${ZIO_IRQ_CTL_PIDS[@]} } = ${#ZIO_IRQ_CTL_PIDS[@]}" + echo "* txg_quiesce: { ${TXG_QUIESCE_PIDS[@]} } = ${#TXG_QUIESCE_PIDS[@]}" + echo "* txg_sync: { ${TXG_SYNC_PIDS[@]} } = ${#TXG_SYNC_PIDS[@]}" + echo "* txg_timelimit: { ${TXG_TIMELIMIT_PIDS[@]} } = ${#TXG_TIMELIMIT_PIDS[@]}" + echo "* arc_reclaim: { ${ARC_RECLAIM_PIDS[@]} } = ${#ARC_RECLAIM_PIDS[@]}" + echo "* l2arc_feed: { ${L2ARC_FEED_PIDS[@]} } = ${#L2ARC_FEED_PIDS[@]}" + echo "* zpios_io: { ${ZPIOS_IO_PIDS[@]} } = ${#ZPIOS_IO_PIDS[@]}" +} + +check_pid() { + local PID=$1 + local NAME=$2 + local TYPE=$3 + local PIDS=( "$4" ) + local NAME_STRING=`echo ${NAME} | cut -f1 -d'/'` + local NAME_NUMBER=`echo ${NAME} | cut -f2 -d'/'` + + if [ "${NAME_STRING}" == "${TYPE}" ]; then + if [ -n "${NAME_NUMBER}" ]; then + PIDS[${NAME_NUMBER}]=${PID} + else + PIDS[${#PIDS[@]}]=${PID} + + fi + fi + + echo "${PIDS[@]}" +} + +# NOTE: This whole process is crazy slow but it will do for now +aquire_pids() { + echo "--- Aquiring ZFS pids ---" + + for PID in `ls /proc/ | grep [0-9] | sort -n -u`; do + if [ ! -e /proc/${PID}/status ]; then + continue + fi + + NAME=`cat /proc/${PID}/status | head -n1 | cut -f2` + + ZIO_TASKQ_PIDS=( `check_pid ${PID} ${NAME} "zio_taskq" \ + "$(echo "${ZIO_TASKQ_PIDS[@]}")"` ) + + ZIO_REQ_NUL_PIDS=( `check_pid ${PID} ${NAME} "zio_req_nul" \ + "$(echo "${ZIO_REQ_NUL_PIDS[@]}")"` ) + + ZIO_IRQ_NUL_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_nul" \ + "$(echo "${ZIO_IRQ_NUL_PIDS[@]}")"` ) + + ZIO_REQ_RD_PIDS=( `check_pid ${PID} ${NAME} "zio_req_rd" \ + "$(echo "${ZIO_REQ_RD_PIDS[@]}")"` ) + + ZIO_IRQ_RD_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_rd" \ + "$(echo "${ZIO_IRQ_RD_PIDS[@]}")"` ) + + ZIO_REQ_WR_PIDS=( `check_pid ${PID} ${NAME} "zio_req_wr" \ + "$(echo "${ZIO_REQ_WR_PIDS[@]}")"` ) + + ZIO_IRQ_WR_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_wr" \ + "$(echo "${ZIO_IRQ_WR_PIDS[@]}")"` ) + + ZIO_REQ_FR_PIDS=( `check_pid ${PID} ${NAME} "zio_req_fr" \ + "$(echo "${ZIO_REQ_FR_PIDS[@]}")"` ) + + ZIO_IRQ_FR_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_fr" \ + "$(echo "${ZIO_IRQ_FR_PIDS[@]}")"` ) + + ZIO_REQ_CM_PIDS=( `check_pid ${PID} ${NAME} "zio_req_cm" \ + "$(echo "${ZIO_REQ_CM_PIDS[@]}")"` ) + + ZIO_IRQ_CM_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_cm" \ + "$(echo "${ZIO_IRQ_CM_PIDS[@]}")"` ) + + ZIO_REQ_CTL_PIDS=( `check_pid ${PID} ${NAME} "zio_req_ctl" \ + "$(echo "${ZIO_REQ_CTL_PIDS[@]}")"` ) + + ZIO_IRQ_CTL_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_ctl" \ + "$(echo "${ZIO_IRQ_CTL_PIDS[@]}")"` ) + + TXG_QUIESCE_PIDS=( `check_pid ${PID} ${NAME} "txg_quiesce" \ + "$(echo "${TXG_QUIESCE_PIDS[@]}")"` ) + + TXG_SYNC_PIDS=( `check_pid ${PID} ${NAME} "txg_sync" \ + "$(echo "${TXG_SYNC_PIDS[@]}")"` ) + + TXG_TIMELIMIT_PIDS=( `check_pid ${PID} ${NAME} "txg_timelimit" \ + "$(echo "${TXG_TIMELIMIT_PIDS[@]}")"` ) + + ARC_RECLAIM_PIDS=( `check_pid ${PID} ${NAME} "arc_reclaim" \ + "$(echo "${ARC_RECLAIM_PIDS[@]}")"` ) + + L2ARC_FEED_PIDS=( `check_pid ${PID} ${NAME} "l2arc_feed" \ + "$(echo "${L2ARC_FEED_PIDS[@]}")"` ) + done + + # Wait for zpios_io threads to start + kill -s SIGHUP ${PPID} + echo "* Waiting for zpios_io threads to start" + while [ ${RUN_DONE} -eq 0 ]; do + ZPIOS_IO_PIDS=( `ps ax | grep zpios_io | grep -v grep | \ + sed 's/^ *//g' | cut -f1 -d' '` ) + if [ ${#ZPIOS_IO_PIDS[@]} -gt 0 ]; then + break; + fi + sleep 0.1 + done + + echo "`show_pids`" >${RUN_LOG_DIR}/${RUN_ID}/pids.txt +} + +log_pids() { + echo "--- Logging ZFS profile to ${RUN_LOG_DIR}/${RUN_ID}/ ---" + ALL_PIDS=( ${ZIO_TASKQ_PIDS[@]} \ + ${ZIO_REQ_NUL_PIDS[@]} \ + ${ZIO_IRQ_NUL_PIDS[@]} \ + ${ZIO_REQ_RD_PID[@]} \ + ${ZIO_IRQ_RD_PIDS[@]} \ + ${ZIO_REQ_WR_PIDS[@]} \ + ${ZIO_IRQ_WR_PIDS[@]} \ + ${ZIO_REQ_FR_PIDS[@]} \ + ${ZIO_IRQ_FR_PIDS[@]} \ + ${ZIO_REQ_CM_PIDS[@]} \ + ${ZIO_IRQ_CM_PIDS[@]} \ + ${ZIO_REQ_CTL_PIDS[@]} \ + ${ZIO_IRQ_CTL_PIDS[@]} \ + ${TXG_QUIESCE_PIDS[@]} \ + ${TXG_SYNC_PIDS[@]} \ + ${TXG_TIMELIMIT_PIDS[@]} \ + ${ARC_RECLAIM_PIDS[@]} \ + ${L2ARC_FEED_PIDS[@]} \ + ${ZPIOS_IO_PIDS[@]} ) + + while [ ${RUN_DONE} -eq 0 ]; do + NOW=`date +%s.%N` + LOG_PIDS="${RUN_LOG_DIR}/${RUN_ID}/pids-${NOW}" + LOG_DISK="${RUN_LOG_DIR}/${RUN_ID}/disk-${NOW}" + + for PID in "${ALL_PIDS[@]}"; do + if [ -z ${PID} ]; then + continue; + fi + + if [ -e /proc/${PID}/stat ]; then + cat /proc/${PID}/stat | head -n1 >>${LOG_PIDS} + else + echo "<${PID} exited>" >>${LOG_PIDS} + fi + done + + cat /proc/diskstats >${LOG_DISK} + + NOW2=`date +%s.%N` + DELTA=`echo "${POLL_INTERVAL}-(${NOW2}-${NOW})" | bc` + sleep ${DELTA} + done +} + +aquire_pids +log_pids + +# rm ${PROFILE_PID} + +exit 0 diff --git a/scripts/zpios-sanity.sh b/scripts/zpios-sanity.sh new file mode 100755 index 000000000..d27a5b8ca --- /dev/null +++ b/scripts/zpios-sanity.sh @@ -0,0 +1,148 @@ +#!/bin/bash +# +# ZFS/ZPOOL configuration test script. + +SCRIPT_COMMON=common.sh +if [ -f ./${SCRIPT_COMMON} ]; then +. ./${SCRIPT_COMMON} +elif [ -f /usr/libexec/zfs/${SCRIPT_COMMON} ]; then +. /usr/libexec/zfs/${SCRIPT_COMMON} +else +echo "Missing helper script ${SCRIPT_COMMON}" && exit 1 +fi + +PROG=zpios-sanity.sh +HEADER= + +usage() { +cat << EOF +USAGE: +$0 [hv] + +DESCRIPTION: + ZPIOS sanity tests + +OPTIONS: + -h Show this message + -v Verbose + -x Destructive hd/sd/md/dm/ram tests + -f Don't prompt due to -x + +EOF +} + +while getopts 'hvxf' OPTION; do + case $OPTION in + h) + usage + exit 1 + ;; + v) + VERBOSE=1 + ;; + x) + DANGEROUS=1 + ;; + f) + FORCE=1 + ;; + ?) + usage + exit + ;; + esac +done + +if [ $(id -u) != 0 ]; then + die "Must run as root" +fi + +zpios_test() { + CONFIG=$1 + TEST=$2 + LOG=`mktemp` + + ${ZPIOS_SH} -f -c ${CONFIG} -t ${TEST} &>${LOG} + if [ $? -ne 0 ]; then + if [ ${VERBOSE} ]; then + printf "FAIL: %-13s\n" ${CONFIG} + cat ${LOG} + else + if [ ! ${HEADER} ]; then + head -2 ${LOG} + HEADER=1 + fi + + printf "FAIL: %-13s" ${CONFIG} + tail -1 ${LOG} + fi + else + if [ ${VERBOSE} ]; then + cat ${LOG} + else + if [ ! ${HEADER} ]; then + head -2 ${LOG} + HEADER=1 + fi + + tail -1 ${LOG} + fi + fi + + rm -f ${LOG} +} + +if [ ${DANGEROUS} ] && [ ! ${FORCE} ]; then + cat << EOF +The -x option was passed which will result in UNRECOVERABLE DATA LOSS +on on the following block devices: + + /dev/sd[abcd] + /dev/hda + /dev/ram0 + /dev/md0 + /dev/dm-0 + +To continue please confirm by entering YES: +EOF + read CONFIRM + if [ ${CONFIRM} != "YES" ] && [ ${CONFIRM} != "yes" ]; then + exit 0; + fi +fi + +# +# These configurations are all safe and pose no risk to any data on +# the system which runs them. They will confine all their IO to a +# file in /tmp or a loopback device configured to use a file in /tmp. +# +SAFE_CONFIGS=( \ + file-raid0 file-raid10 file-raidz file-raidz2 \ + lo-raid0 lo-raid10 lo-raidz lo-raidz2 \ +) + +# +# These configurations are down right dangerous. They will attempt +# to use various real block devices on your system which may contain +# data you car about. You are STRONGLY advised not to run this unless +# you are certain there is no data on the system you care about. +# +DANGEROUS_CONFIGS=( \ + hda-raid0 \ + sda-raid0 \ + ram0-raid0 \ + md0-raid10 md0-raid5 \ + dm0-raid0 \ +) + +for CONFIG in ${SAFE_CONFIGS[*]}; do + zpios_test $CONFIG tiny +done + +if [ ${DANGEROUS} ]; then + for CONFIG in ${DANGEROUS_CONFIGS[*]}; do + zpios_test $CONFIG tiny + done +fi + +exit 0 diff --git a/scripts/zpios-survey.sh b/scripts/zpios-survey.sh new file mode 100755 index 000000000..0b16d1bb8 --- /dev/null +++ b/scripts/zpios-survey.sh @@ -0,0 +1,215 @@ +#!/bin/bash +# +# Wrapper script for easily running a survey of zpios based tests +# + +SCRIPT_COMMON=common.sh +if [ -f ./${SCRIPT_COMMON} ]; then +. ./${SCRIPT_COMMON} +elif [ -f /usr/libexec/zfs/${SCRIPT_COMMON} ]; then +. /usr/libexec/zfs/${SCRIPT_COMMON} +else +echo "Missing helper script ${SCRIPT_COMMON}" && exit 1 +fi + +PROG=zpios-survey.sh + +usage() { +cat << EOF +USAGE: +$0 [hvp] <-c config> <-t test> + +DESCRIPTION: + Helper script for easy zpios survey benchmarking. + +OPTIONS: + -h Show this message + -v Verbose + -p Enable profiling + -c Zpool configuration + -t Zpios test + -l Zpios survey log + +EOF +} + +print_header() { +tee -a ${ZPIOS_SURVEY_LOG} << EOF + +================================================================ +Test: $1 +EOF +} + +# Baseline performance for an out of the box config with no manual tuning. +# Ideally, we want everything to be automatically tuned for your system and +# for this to perform reasonably well. +zpios_survey_base() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+baseline" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + +# Disable ZFS's prefetching. For some reason still not clear to me +# current prefetching policy is quite bad for a random workload. +# Allowing the algorithm to detect a random workload and not do +# anything may be the way to address this issue. +zpios_survey_prefetch() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+prefetch" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \ + -o "--noprefetch" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + +# Simulating a zerocopy IO path should improve performance by freeing up +# lots of CPU which is wasted move data between buffers. +zpios_survey_zerocopy() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+zerocopy" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \ + -o "--zerocopy" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + +# Disabling checksumming should show some (if small) improvement +# simply due to freeing up a modest amount of CPU. +zpios_survey_checksum() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+checksum" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \ + -s "set checksum=off" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + +# Increasing the pending IO depth also seems to improve things likely +# at the expense of latency. This should be explored more because I'm +# seeing a much bigger impact there that I would have expected. There +# may be some low hanging fruit to be found here. +zpios_survey_pending() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+pending" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} \ + zfs="zfs_vdev_max_pending=1024" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + +# To avoid memory fragmentation issues our slab implementation can be +# based on a virtual address space. Interestingly, we take a pretty +# substantial performance penalty for this somewhere in the low level +# IO drivers. If we back the slab with kmem pages we see far better +# read performance numbers at the cost of memory fragmention and general +# system instability due to large allocations. This may be because of +# an optimization in the low level drivers due to the contigeous kmem +# based memory. This needs to be explained. The good news here is that +# with zerocopy interfaces added at the DMU layer we could gaurentee +# kmem based memory for a pool of pages. +# +# 0x100 = KMC_KMEM - Force kmem_* based slab +# 0x200 = KMC_VMEM - Force vmem_* based slab +zpios_survey_kmem() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+kmem" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} \ + zfs="zio_bulk_flags=0x100" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + +# Apply all possible turning concurrently to get a best case number +zpios_survey_all() { + TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+all" + print_header ${TEST_NAME} + + ${ZFS_SH} ${VERBOSE_FLAG} \ + zfs="zfs_vdev_max_pending=1024" \ + zfs="zio_bulk_flags=0x100" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \ + -o "--noprefetch --zerocopy" \ + -s "set checksum=off" | \ + tee -a ${ZPIOS_SURVEY_LOG} + ${ZFS_SH} -u ${VERBOSE_FLAG} | \ + tee -a ${ZPIOS_SURVEY_LOG} +} + + +PROFILE= +ZPOOL_NAME=zpios-survey +ZPOOL_CONFIG=zpool-config.sh +ZPIOS_TEST=zpios-test.sh +ZPIOS_SURVEY_LOG=/dev/null + +while getopts 'hvpc:t:l:' OPTION; do + case $OPTION in + h) + usage + exit 1 + ;; + v) + VERBOSE=1 + VERBOSE_FLAG="-v" + ;; + p) + PROFILE=1 + PROFILE_FLAG="-p" + ;; + c) + ZPOOL_CONFIG=${OPTARG} + ;; + t) + ZPIOS_TEST=${OPTARG} + ;; + l) + ZPIOS_SURVEY_LOG=${OPTARG} + ;; + ?) + usage + exit + ;; + esac +done + +if [ $(id -u) != 0 ]; then + die "Must run as root" +fi + +zpios_survey_base +zpios_survey_prefetch +zpios_survey_zerocopy +zpios_survey_checksum +zpios_survey_pending +zpios_survey_kmem +zpios_survey_all + +exit 0 diff --git a/scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh b/scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh new file mode 100755 index 000000000..cbd9c697a --- /dev/null +++ b/scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Usage: zpios +# --threadcount -t =values +# --threadcount_low -l =value +# --threadcount_high -h =value +# --threadcount_incr -e =value +# --regioncount -n =values +# --regioncount_low -i =value +# --regioncount_high -j =value +# --regioncount_incr -k =value +# --offset -o =values +# --offset_low -m =value +# --offset_high -q =value +# --offset_incr -r =value +# --chunksize -c =values +# --chunksize_low -a =value +# --chunksize_high -b =value +# --chunksize_incr -g =value +# --regionsize -s =values +# --regionsize_low -A =value +# --regionsize_high -B =value +# --regionsize_incr -C =value +# --load -L =dmuio|ssf|fpp +# --pool -p =pool name +# --name -M =test name +# --cleanup -x +# --prerun -P =pre-command +# --postrun -R =post-command +# --log -G =log directory +# --regionnoise -I =shift +# --chunknoise -N =bytes +# --threaddelay -T =jiffies +# --verify -V +# --zerocopy -z +# --nowait -O +# --human-readable -H +# --verbose -v =increase verbosity +# --help -? =this help + +ZPIOS_CMD="${ZPIOS} \ + --load=dmuio \ + --pool=${ZPOOL_NAME} \ + --name=${ZPOOL_CONFIG} \ + --threadcount=16 \ + --regioncount=8192 \ + --regionsize=4M \ + --chunksize=1M \ + --offset=4M \ + --cleanup \ + --human-readable \ + ${ZPIOS_OPTIONS}" + +zpios_start() { + if [ ${VERBOSE} ]; then + ZPIOS_CMD="${ZPIOS_CMD} --verbose" + echo ${ZPIOS_CMD} + fi + + ${ZPIOS_CMD} || exit 1 +} + +zpios_stop() { + [ ${VERBOSE} ] && echo +} diff --git a/scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh b/scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh new file mode 100755 index 000000000..cd3c50b77 --- /dev/null +++ b/scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# +# Usage: zpios +# --threadcount -t =values +# --threadcount_low -l =value +# --threadcount_high -h =value +# --threadcount_incr -e =value +# --regioncount -n =values +# --regioncount_low -i =value +# --regioncount_high -j =value +# --regioncount_incr -k =value +# --offset -o =values +# --offset_low -m =value +# --offset_high -q =value +# --offset_incr -r =value +# --chunksize -c =values +# --chunksize_low -a =value +# --chunksize_high -b =value +# --chunksize_incr -g =value +# --regionsize -s =values +# --regionsize_low -A =value +# --regionsize_high -B =value +# --regionsize_incr -C =value +# --load -L =dmuio|ssf|fpp +# --pool -p =pool name +# --name -M =test name +# --cleanup -x +# --prerun -P =pre-command +# --postrun -R =post-command +# --log -G =log directory +# --regionnoise -I =shift +# --chunknoise -N =bytes +# --threaddelay -T =jiffies +# --verify -V +# --zerocopy -z +# --nowait -O +# --human-readable -H +# --verbose -v =increase verbosity +# --help -? =this help + + +ZPIOS_CMD="${ZPIOS} \ + --load=dmuio \ + --pool=${ZPOOL_NAME} \ + --name=${ZPOOL_CONFIG} \ + --threadcount=1 \ + --regioncount=16 \ + --regionsize=4M \ + --chunksize=1M \ + --offset=4M \ + --cleanup \ + --human-readable \ + ${ZPIOS_OPTIONS}" + +zpios_start() { + if [ ${VERBOSE} ]; then + ZPIOS_CMD="${ZPIOS_CMD} --verbose" + echo ${ZPIOS_CMD} + fi + + ${ZPIOS_CMD} || exit 1 +} + +zpios_stop() { + [ ${VERBOSE} ] && echo +} diff --git a/scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh b/scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh new file mode 100755 index 000000000..743e97b64 --- /dev/null +++ b/scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Usage: zpios +# --threadcount -t =values +# --threadcount_low -l =value +# --threadcount_high -h =value +# --threadcount_incr -e =value +# --regioncount -n =values +# --regioncount_low -i =value +# --regioncount_high -j =value +# --regioncount_incr -k =value +# --offset -o =values +# --offset_low -m =value +# --offset_high -q =value +# --offset_incr -r =value +# --chunksize -c =values +# --chunksize_low -a =value +# --chunksize_high -b =value +# --chunksize_incr -g =value +# --regionsize -s =values +# --regionsize_low -A =value +# --regionsize_high -B =value +# --regionsize_incr -C =value +# --load -L =dmuio|ssf|fpp +# --pool -p =pool name +# --name -M =test name +# --cleanup -x +# --prerun -P =pre-command +# --postrun -R =post-command +# --log -G =log directory +# --regionnoise -I =shift +# --chunknoise -N =bytes +# --threaddelay -T =jiffies +# --verify -V +# --zerocopy -z +# --nowait -O +# --human-readable -H +# --verbose -v =increase verbosity +# --help -? =this help + +ZPIOS_CMD="${ZPIOS} \ + --load=dmuio \ + --pool=${ZPOOL_NAME} \ + --name=${ZPOOL_CONFIG} \ + --threadcount=1,2,4,8,16,32,64,128,256 \ + --regioncount=65536 \ + --regionsize=4M \ + --chunksize=1M \ + --offset=4M \ + --cleanup \ + --human-readable \ + ${ZPIOS_OPTIONS}" + +zpios_start() { + if [ ${VERBOSE} ]; then + ZPIOS_CMD="${ZPIOS_CMD} --verbose" + echo ${ZPIOS_CMD} + fi + + ${ZPIOS_CMD} || exit 1 +} + +zpios_stop() { + [ ${VERBOSE} ] && echo +} diff --git a/scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh b/scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh new file mode 100755 index 000000000..92a3b77b4 --- /dev/null +++ b/scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Usage: zpios +# --threadcount -t =values +# --threadcount_low -l =value +# --threadcount_high -h =value +# --threadcount_incr -e =value +# --regioncount -n =values +# --regioncount_low -i =value +# --regioncount_high -j =value +# --regioncount_incr -k =value +# --offset -o =values +# --offset_low -m =value +# --offset_high -q =value +# --offset_incr -r =value +# --chunksize -c =values +# --chunksize_low -a =value +# --chunksize_high -b =value +# --chunksize_incr -g =value +# --regionsize -s =values +# --regionsize_low -A =value +# --regionsize_high -B =value +# --regionsize_incr -C =value +# --load -L =dmuio|ssf|fpp +# --pool -p =pool name +# --name -M =test name +# --cleanup -x +# --prerun -P =pre-command +# --postrun -R =post-command +# --log -G =log directory +# --regionnoise -I =shift +# --chunknoise -N =bytes +# --threaddelay -T =jiffies +# --verify -V +# --zerocopy -z +# --nowait -O +# --human-readable -H +# --verbose -v =increase verbosity +# --help -? =this help + +ZPIOS_CMD="${ZPIOS} \ + --load=dmuio \ + --pool=${ZPOOL_NAME} \ + --name=${ZPOOL_CONFIG} \ + --threadcount=256 \ + --regioncount=65536 \ + --regionsize=4M \ + --chunksize=1M \ + --offset=4M \ + --cleanup \ + --human-readable \ + ${ZPIOS_OPTIONS}" + +zpios_start() { + if [ ${VERBOSE} ]; then + ZPIOS_CMD="${ZPIOS_CMD} --verbose" + echo ${ZPIOS_CMD} + fi + + ${ZPIOS_CMD} || exit 1 +} + +zpios_stop() { + [ ${VERBOSE} ] && echo +} diff --git a/scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh b/scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh new file mode 100755 index 000000000..0db952cd6 --- /dev/null +++ b/scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Usage: zpios +# --threadcount -t =values +# --threadcount_low -l =value +# --threadcount_high -h =value +# --threadcount_incr -e =value +# --regioncount -n =values +# --regioncount_low -i =value +# --regioncount_high -j =value +# --regioncount_incr -k =value +# --offset -o =values +# --offset_low -m =value +# --offset_high -q =value +# --offset_incr -r =value +# --chunksize -c =values +# --chunksize_low -a =value +# --chunksize_high -b =value +# --chunksize_incr -g =value +# --regionsize -s =values +# --regionsize_low -A =value +# --regionsize_high -B =value +# --regionsize_incr -C =value +# --load -L =dmuio|ssf|fpp +# --pool -p =pool name +# --name -M =test name +# --cleanup -x +# --prerun -P =pre-command +# --postrun -R =post-command +# --log -G =log directory +# --regionnoise -I =shift +# --chunknoise -N =bytes +# --threaddelay -T =jiffies +# --verify -V +# --zerocopy -z +# --nowait -O +# --human-readable -H +# --verbose -v =increase verbosity +# --help -? =this help + +ZPIOS_CMD="${ZPIOS} \ + --load=dmuio \ + --pool=${ZPOOL_NAME} \ + --name=${ZPOOL_CONFIG} \ + --threadcount=4 \ + --regioncount=1024 \ + --regionsize=4M \ + --chunksize=1M \ + --offset=4M \ + --cleanup \ + --human-readable \ + ${ZPIOS_OPTIONS}" + +zpios_start() { + if [ ${VERBOSE} ]; then + ZPIOS_CMD="${ZPIOS_CMD} --verbose" + echo ${ZPIOS_CMD} + fi + + ${ZPIOS_CMD} || exit 1 +} + +zpios_stop() { + [ ${VERBOSE} ] && echo +} diff --git a/scripts/zpios-test/large-thread-survey.sh b/scripts/zpios-test/large-thread-survey.sh new file mode 120000 index 000000000..90b6e3c47 --- /dev/null +++ b/scripts/zpios-test/large-thread-survey.sh @@ -0,0 +1 @@ +1x256th-65536rc-4rs-1cs-4off.sh
\ No newline at end of file diff --git a/scripts/zpios-test/large.sh b/scripts/zpios-test/large.sh new file mode 120000 index 000000000..b8e22bf54 --- /dev/null +++ b/scripts/zpios-test/large.sh @@ -0,0 +1 @@ +256th-65536rc-4rs-1cs-4off.sh
\ No newline at end of file diff --git a/scripts/zpios-test/medium.sh b/scripts/zpios-test/medium.sh new file mode 120000 index 000000000..d81027b73 --- /dev/null +++ b/scripts/zpios-test/medium.sh @@ -0,0 +1 @@ +16th-8192rc-4rs-1cs-4off.sh
\ No newline at end of file diff --git a/scripts/zpios-test/small.sh b/scripts/zpios-test/small.sh new file mode 120000 index 000000000..cbf03b5ce --- /dev/null +++ b/scripts/zpios-test/small.sh @@ -0,0 +1 @@ +4th-1024rc-4rs-1cs-4off.sh
\ No newline at end of file diff --git a/scripts/zpios-test/tiny.sh b/scripts/zpios-test/tiny.sh new file mode 120000 index 000000000..ba8b7cd0c --- /dev/null +++ b/scripts/zpios-test/tiny.sh @@ -0,0 +1 @@ +1th-16rc-4rs-1cs-4off.sh
\ No newline at end of file diff --git a/scripts/zpios.sh b/scripts/zpios.sh new file mode 100755 index 000000000..6e9b3f50d --- /dev/null +++ b/scripts/zpios.sh @@ -0,0 +1,266 @@ +#!/bin/bash +# +# Wrapper script for easily running zpios based tests +# + +SCRIPT_COMMON=common.sh +if [ -f ./${SCRIPT_COMMON} ]; then +. ./${SCRIPT_COMMON} +elif [ -f /usr/libexec/zfs/${SCRIPT_COMMON} ]; then +. /usr/libexec/zfs/${SCRIPT_COMMON} +else +echo "Missing helper script ${SCRIPT_COMMON}" && exit 1 +fi + +PROG=zpios.sh +DATE=`date +%Y%m%d-%H%M%S` +if [ "${ZPIOS_MODULES}" ]; then + MODULES=(${ZPIOS_MODULES[*]}) +else + MODULES=(zpios) +fi + +usage() { +cat << EOF +USAGE: +$0 [hvp] <-c config> <-t test> + +DESCRIPTION: + Helper script for easy zpios benchmarking. + +OPTIONS: + -h Show this message + -v Verbose + -p Enable profiling + -c Zpool configuration + -t Zpios test + -o Additional zpios options + -l Additional zpool options + -s Additional zfs options + +EOF +} + +print_header() { + echo --------------------- ZPIOS RESULTS ---------------------------- + echo -n "Date: "; date + echo -n "Kernel: "; uname -r + dmesg | grep "Loaded Solaris Porting Layer" | tail -n1 + dmesg | grep "Loaded ZFS Filesystem" | tail -n1 + echo +} + +print_spl_info() { + echo --------------------- SPL Tunings ------------------------------ + ${SYSCTL} -A | grep spl + + if [ -d /sys/module/spl/parameters ]; then + grep [0-9] /sys/module/spl/parameters/* + else + grep [0-9] /sys/module/spl/* + fi + + echo +} + +print_zfs_info() { + echo --------------------- ZFS Tunings ------------------------------ + ${SYSCTL} -A | grep zfs + + if [ -d /sys/module/zfs/parameters ]; then + grep [0-9] /sys/module/zfs/parameters/* + else + grep [0-9] /sys/module/zfs/* + fi + + echo +} + +print_stats() { + echo ---------------------- Statistics ------------------------------- + ${SYSCTL} -A | grep spl | grep stack_max + + if [ -d /proc/spl/kstat/ ]; then + if [ -f /proc/spl/kstat/zfs/arcstats ]; then + echo "* ARC" + cat /proc/spl/kstat/zfs/arcstats + echo + fi + + if [ -f /proc/spl/kstat/zfs/vdev_cache_stats ]; then + echo "* VDEV Cache" + cat /proc/spl/kstat/zfs/vdev_cache_stats + echo + fi + fi + + if [ -f /proc/spl/kmem/slab ]; then + echo "* SPL SLAB" + cat /proc/spl/kmem/slab + echo + fi + + echo +} + +check_test() { + + if [ ! -f ${ZPIOS_TEST} ]; then + local NAME=`basename ${ZPIOS_TEST} .sh` + ERROR="Unknown test '${NAME}', available tests are:\n" + + for TST in `ls ${ZPIOSDIR}/ | grep ".sh"`; do + local NAME=`basename ${TST} .sh` + ERROR="${ERROR}${NAME}\n" + done + + return 1 + fi + + return 0 +} + +zpios_profile_config() { +cat > ${PROFILE_DIR}/zpios-config.sh << EOF +# +# Zpios Profiling Configuration +# + +PROFILE_DIR=/tmp/zpios/${ZPOOL_CONFIG}+${ZPIOS_TEST_ARG}+${DATE} +PROFILE_PRE=${ZPIOSPROFILEDIR}/zpios-profile-pre.sh +PROFILE_POST=${ZPIOSPROFILEDIR}/zpios-profile-post.sh +PROFILE_USER=${ZPIOSPROFILEDIR}/zpios-profile.sh +PROFILE_PIDS=${ZPIOSPROFILEDIR}/zpios-profile-pids.sh +PROFILE_DISK=${ZPIOSPROFILEDIR}/zpios-profile-disk.sh +PROFILE_ARC_PROC=/proc/spl/kstat/zfs/arcstats +PROFILE_VDEV_CACHE_PROC=/proc/spl/kstat/zfs/vdev_cache_stats + +OPROFILE_KERNEL="/boot/vmlinux-`uname -r`" +OPROFILE_KERNEL_DIR="/lib/modules/`uname -r`/kernel/" +OPROFILE_SPL_DIR=${SPLBUILD}/module/ +OPROFILE_ZFS_DIR=${MODDIR} + +EOF +} + +zpios_profile_start() { + PROFILE_DIR=/tmp/zpios/${ZPOOL_CONFIG}+${ZPIOS_TEST_ARG}+${DATE} + + mkdir -p ${PROFILE_DIR} + zpios_profile_config + . ${PROFILE_DIR}/zpios-config.sh + + ZPIOS_OPTIONS="${ZPIOS_OPTIONS} --log=${PROFILE_DIR}" + ZPIOS_OPTIONS="${ZPIOS_OPTIONS} --prerun=${PROFILE_PRE}" + ZPIOS_OPTIONS="${ZPIOS_OPTIONS} --postrun=${PROFILE_POST}" + + /usr/bin/opcontrol --init + /usr/bin/opcontrol --setup --vmlinux=${OPROFILE_KERNEL} +} + +zpios_profile_stop() { + /usr/bin/opcontrol --shutdown + /usr/bin/opcontrol --deinit +} + +PROFILE= +ZPOOL_CONFIG=zpool-config.sh +ZPIOS_TEST=zpios-test.sh +ZPOOL_NAME=zpios +ZPIOS_OPTIONS= +ZPOOL_OPTIONS="" +ZFS_OPTIONS="" + +while getopts 'hvpc:t:o:l:s:' OPTION; do + case $OPTION in + h) + usage + exit 1 + ;; + v) + VERBOSE=1 + VERBOSE_FLAG="-v" + ;; + p) + PROFILE=1 + ;; + c) + ZPOOL_CONFIG=${OPTARG} + ;; + t) + ZPIOS_TEST_ARG=${OPTARG} + ZPIOS_TEST=${ZPIOSDIR}/${OPTARG}.sh + ;; + o) + ZPIOS_OPTIONS=${OPTARG} + ;; + l) # Passed through to zpool-create.sh + ZPOOL_OPTIONS=${OPTARG} + ;; + s) # Passed through to zpool-create.sh + ZFS_OPTIONS=${OPTARG} + ;; + ?) + usage + exit + ;; + esac +done + +if [ $(id -u) != 0 ]; then + die "Must run as root" +fi + +# Validate and source your test config +check_test || die "${ERROR}" +. ${ZPIOS_TEST} + +# Pull in the zpios test module is not loaded. If this fails it is +# likely because the full module stack was not yet loaded with zfs.sh +if check_modules; then + if ! load_modules; then + die "Run 'zfs.sh' to ensure the full module stack is loaded" + fi +fi + +# Wait for device creation +while [ ! -c /dev/zpios ]; do + sleep 1 +done + +if [ ${VERBOSE} ]; then + print_header + print_spl_info + print_zfs_info +fi + +# Create the zpool configuration +${ZPOOL_CREATE_SH} ${VERBOSE_FLAG} -p ${ZPOOL_NAME} -c ${ZPOOL_CONFIG} \ + -l "${ZPOOL_OPTIONS}" -s "${ZFS_OPTIONS}" || exit 1 + +if [ ${PROFILE} ]; then + zpios_profile_start +fi + +zpios_start +zpios_stop + +if [ ${PROFILE} ]; then + zpios_profile_stop +fi + +if [ ${VERBOSE} ]; then + print_stats +fi + +# Destroy the zpool configuration +${ZPOOL_CREATE_SH} ${VERBOSE_FLAG} -p ${ZPOOL_NAME} \ + -c ${ZPOOL_CONFIG} -d || exit 1 + +# Unload the test module stack and wait for device removal +unload_modules +while [ -c /dev/zpios ]; do + sleep 1 +done + +exit 0 |