summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDon Brady <[email protected]>2017-06-16 18:21:11 -0600
committerBrian Behlendorf <[email protected]>2017-06-16 17:21:11 -0700
commit0241e491a08ffa471a08ceaa0b0943999d775cbe (patch)
tree17f07ce2cf37c4f19155dba12851c8eab519c8a1
parent05a5357a6c63b8c83062c1b295ee98d14f8e85aa (diff)
Inject zinject(8) a percentage amount of dev errs
In the original form of device error injection, it was an all or nothing situation. To help simulate intermittent error conditions, you can now specify a real number percentage value. This is also very useful for our ZFS fault diagnosis testing and for injecting intermittent errors during load testing. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Don Brady <[email protected]> Closes #6227
-rw-r--r--cmd/zinject/zinject.c48
-rw-r--r--include/sys/zfs_ioctl.h5
-rw-r--r--man/man8/zinject.86
-rw-r--r--module/zfs/zio_inject.c37
4 files changed, 76 insertions, 20 deletions
diff --git a/cmd/zinject/zinject.c b/cmd/zinject/zinject.c
index 604554e71..ccd3534d0 100644
--- a/cmd/zinject/zinject.c
+++ b/cmd/zinject/zinject.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2017, Intel Corporation.
*/
/*
@@ -124,7 +125,7 @@
* cache.
*
* The '-f' flag controls the frequency of errors injected, expressed as a
- * integer percentage between 1 and 100. The default is 100.
+ * real number percentage between 0.0001 and 100. The default is 100.
*
* The this form is responsible for actually injecting the handler into the
* framework. It takes the arguments described above, translates them to the
@@ -230,11 +231,13 @@ usage(void)
"\t\tspa_vdev_exit() will trigger a panic.\n"
"\n"
"\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
- "\t [-T <read|write|free|claim|all> pool\n"
+ "\t [-T <read|write|free|claim|all>] [-f frequency] pool\n"
"\t\tInject a fault into a particular device or the device's\n"
"\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
"\t\t'pad1', or 'pad2'.\n"
"\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
+ "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
+ "\t\tdevice error injection to a percentage of the IOs.\n"
"\n"
"\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
"\t\tPerform a specific action on a particular device.\n"
@@ -305,7 +308,7 @@ usage(void)
"\t\t-u\tUnload the associated pool. Can be specified with only\n"
"\t\t\ta pool object.\n"
"\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n"
- "\t\t\ta percentage between 1 and 100.\n"
+ "\t\t\ta percentage between 0.0001 and 100.\n"
"\n"
"\t-t data\t\tInject an error into the plain file contents of a\n"
"\t\t\tfile. The object must be specified as a complete path\n"
@@ -645,6 +648,27 @@ parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
return (0);
}
+static int
+parse_frequency(const char *str, uint32_t *percent)
+{
+ double val;
+ char *post;
+
+ val = strtod(str, &post);
+ if (post == NULL || *post != '\0')
+ return (EINVAL);
+
+ /* valid range is [0.0001, 100.0] */
+ val /= 100.0f;
+ if (val < 0.000001f || val > 1.0f)
+ return (ERANGE);
+
+ /* convert to an integer for use by kernel */
+ *percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX));
+
+ return (0);
+}
+
int
main(int argc, char **argv)
{
@@ -760,10 +784,12 @@ main(int argc, char **argv)
}
break;
case 'f':
- record.zi_freq = atoi(optarg);
- if (record.zi_freq < 1 || record.zi_freq > 100) {
- (void) fprintf(stderr, "frequency range must "
- "be in the range (0, 100]\n");
+ ret = parse_frequency(optarg, &record.zi_freq);
+ if (ret != 0) {
+ (void) fprintf(stderr, "%sfrequency value must "
+ "be in the range [0.0001, 100.0]\n",
+ ret == EINVAL ? "invalid value: " :
+ ret == ERANGE ? "out of range: " : "");
libzfs_fini(g_zfs);
return (1);
}
@@ -898,7 +924,8 @@ main(int argc, char **argv)
* '-c' is invalid with any other options.
*/
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
- level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
+ level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
+ record.zi_freq > 0) {
(void) fprintf(stderr, "cancel (-c) incompatible with "
"any other options\n");
usage();
@@ -972,7 +999,8 @@ main(int argc, char **argv)
} else if (raw != NULL) {
if (range != NULL || type != TYPE_INVAL || level != 0 ||
- record.zi_cmd != ZINJECT_UNINITIALIZED) {
+ record.zi_cmd != ZINJECT_UNINITIALIZED ||
+ record.zi_freq > 0) {
(void) fprintf(stderr, "raw (-b) format with "
"any other options\n");
usage();
@@ -1007,7 +1035,7 @@ main(int argc, char **argv)
error = EIO;
} else if (record.zi_cmd == ZINJECT_PANIC) {
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
- level != 0 || device != NULL) {
+ level != 0 || device != NULL || record.zi_freq > 0) {
(void) fprintf(stderr, "panic (-p) incompatible with "
"other options\n");
usage();
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index 495cdea3a..c68b8770b 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright 2016 RackTop Systems.
+ * Copyright (c) 2017, Intel Corporation.
*/
#ifndef _SYS_ZFS_IOCTL_H
@@ -338,6 +339,10 @@ typedef struct zinject_record {
#define ZEVENT_SEEK_START 0
#define ZEVENT_SEEK_END UINT64_MAX
+/* scaled frequency ranges */
+#define ZI_PERCENTAGE_MIN 4294UL
+#define ZI_PERCENTAGE_MAX UINT32_MAX
+
typedef enum zinject_type {
ZINJECT_UNINITIALIZED,
ZINJECT_DATA_FAULT,
diff --git a/man/man8/zinject.8 b/man/man8/zinject.8
index ab22d4a5b..50fecfb64 100644
--- a/man/man8/zinject.8
+++ b/man/man8/zinject.8
@@ -76,7 +76,7 @@ create 3 lanes on the device; one lane with a latency
of 10 ms and two lanes with a 25 ms latency.
.TP
-.B "zinject \-d \fIvdev\fB [\-e \fIdevice_error\fB] [\-L \fIlabel_error\fB] [\-T \fIfailure\fB] [\-F] \fIpool\fB"
+.B "zinject \-d \fIvdev\fB [\-e \fIdevice_error\fB] [\-L \fIlabel_error\fB] [\-T \fIfailure\fB] [\-f \fIfrequency\fB] [\-F] \fIpool\fB"
Force a vdev error.
.TP
.B "zinject \-I [\-s \fIseconds\fB | \-g \fItxgs\fB] \fIpool\fB"
@@ -113,8 +113,8 @@ Specify
.BR "nxio" " for an ENXIO error where reopening the device will fail."
.TP
.BI "\-f" " frequency"
-Only inject errors a fraction of the time. Expressed as an integer
-percentage between 1 and 100.
+Only inject errors a fraction of the time. Expressed as a real number
+percentage between 0.0001 and 100.
.TP
.BI "\-F"
Fail faster. Do fewer checks.
diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c
index 0e8e9d932..4a4d431e3 100644
--- a/module/zfs/zio_inject.c
+++ b/module/zfs/zio_inject.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2017, Intel Corporation.
*/
/*
@@ -99,6 +100,26 @@ static kmutex_t inject_delay_mtx;
static int inject_next_id = 1;
/*
+ * Test if the requested frequency was triggered
+ */
+static boolean_t
+freq_triggered(uint32_t frequency)
+{
+ /*
+ * zero implies always (100%)
+ */
+ if (frequency == 0)
+ return (B_TRUE);
+
+ /*
+ * Note: we still handle legacy (unscaled) frequecy values
+ */
+ uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX;
+
+ return (spa_get_random(maximum) < frequency);
+}
+
+/*
* Returns true if the given record matches the I/O in progress.
*/
static boolean_t
@@ -113,8 +134,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type,
record->zi_object == DMU_META_DNODE_OBJECT) {
if (record->zi_type == DMU_OT_NONE ||
type == record->zi_type)
- return (record->zi_freq == 0 ||
- spa_get_random(100) < record->zi_freq);
+ return (freq_triggered(record->zi_freq));
else
return (B_FALSE);
}
@@ -128,8 +148,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type,
zb->zb_blkid >= record->zi_start &&
zb->zb_blkid <= record->zi_end &&
error == record->zi_error)
- return (record->zi_freq == 0 ||
- spa_get_random(100) < record->zi_freq);
+ return (freq_triggered(record->zi_freq));
return (B_FALSE);
}
@@ -294,6 +313,12 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
if (handler->zi_record.zi_error == error) {
/*
+ * limit error injection if requested
+ */
+ if (!freq_triggered(handler->zi_record.zi_freq))
+ continue;
+
+ /*
* For a failed open, pretend like the device
* has gone away.
*/
@@ -466,10 +491,8 @@ zio_handle_io_delay(zio_t *zio)
if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
continue;
- if (handler->zi_record.zi_freq != 0 &&
- spa_get_random(100) >= handler->zi_record.zi_freq) {
+ if (!freq_triggered(handler->zi_record.zi_freq))
continue;
- }
if (vd->vdev_guid != handler->zi_record.zi_guid)
continue;