summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cmd/zdb/zdb.c6
-rw-r--r--include/sys/arc_impl.h14
-rw-r--r--include/sys/spa.h8
-rw-r--r--include/sys/vdev_impl.h2
-rw-r--r--include/sys/vdev_trim.h2
-rw-r--r--man/man5/zfs-module-parameters.522
-rw-r--r--man/man8/zpoolprops.82
-rw-r--r--module/os/linux/zfs/spa_stats.c15
-rw-r--r--module/zfs/arc.c141
-rw-r--r--module/zfs/spa.c20
-rw-r--r--module/zfs/vdev.c13
-rw-r--r--module/zfs/vdev_removal.c14
-rw-r--r--module/zfs/vdev_trim.c245
-rw-r--r--tests/runfiles/common.run2
-rw-r--r--tests/zfs-tests/include/tunables.cfg1
-rw-r--r--tests/zfs-tests/tests/functional/trim/Makefile.am3
-rw-r--r--tests/zfs-tests/tests/functional/trim/trim.kshlib8
-rwxr-xr-xtests/zfs-tests/tests/functional/trim/trim_l2arc.ksh106
18 files changed, 573 insertions, 51 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 00258799b..763a086ac 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -3707,8 +3707,12 @@ dump_l2arc_header(int fd)
(u_longlong_t)l2dhdr.dh_evict);
(void) printf(" lb_asize_refcount: %llu\n",
(u_longlong_t)l2dhdr.dh_lb_asize);
- (void) printf(" lb_count_refcount: %llu\n\n",
+ (void) printf(" lb_count_refcount: %llu\n",
(u_longlong_t)l2dhdr.dh_lb_count);
+ (void) printf(" trim_action_time: %llu\n",
+ (u_longlong_t)l2dhdr.dh_trim_action_time);
+ (void) printf(" trim_state: %llu\n\n",
+ (u_longlong_t)l2dhdr.dh_trim_state);
}
dump_l2arc_log_blocks(fd, l2dhdr, &rebuild);
diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h
index e8c944ce8..5724db0a5 100644
--- a/include/sys/arc_impl.h
+++ b/include/sys/arc_impl.h
@@ -240,7 +240,14 @@ typedef struct l2arc_dev_hdr_phys {
*/
uint64_t dh_lb_asize; /* mirror of l2ad_lb_asize */
uint64_t dh_lb_count; /* mirror of l2ad_lb_count */
- const uint64_t dh_pad[32]; /* pad to 512 bytes */
+ /*
+ * Mirrors of vdev_trim_action_time and vdev_trim_state, used to
+ * display when the cache device was fully trimmed for the last
+ * time.
+ */
+ uint64_t dh_trim_action_time;
+ uint64_t dh_trim_state;
+ const uint64_t dh_pad[30]; /* pad to 512 bytes */
zio_eck_t dh_tail;
} l2arc_dev_hdr_phys_t;
CTASSERT_GLOBAL(sizeof (l2arc_dev_hdr_phys_t) == SPA_MINBLOCKSIZE);
@@ -399,6 +406,7 @@ typedef struct l2arc_dev {
* Number of log blocks present on the device.
*/
zfs_refcount_t l2ad_lb_count;
+ boolean_t l2ad_trim_all; /* TRIM whole device */
} l2arc_dev_t;
/*
@@ -902,6 +910,10 @@ extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS);
boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev,
const l2arc_log_blkptr_t *lbp);
+/* used in vdev_trim.c */
+void l2arc_dev_hdr_update(l2arc_dev_t *dev);
+l2arc_dev_t *l2arc_vdev_get(vdev_t *vd);
+
#ifdef __cplusplus
}
#endif
diff --git a/include/sys/spa.h b/include/sys/spa.h
index a03319e2b..5806dda41 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -745,6 +745,7 @@ typedef enum {
typedef enum trim_type {
TRIM_TYPE_MANUAL = 0,
TRIM_TYPE_AUTO = 1,
+ TRIM_TYPE_SIMPLE = 2
} trim_type_t;
/* state manipulation functions */
@@ -788,6 +789,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
#define SPA_ASYNC_TRIM_RESTART 0x200
#define SPA_ASYNC_AUTOTRIM_RESTART 0x400
#define SPA_ASYNC_L2CACHE_REBUILD 0x800
+#define SPA_ASYNC_L2CACHE_TRIM 0x1000
/*
* Controls the behavior of spa_vdev_remove().
@@ -940,6 +942,12 @@ typedef struct spa_iostats {
kstat_named_t autotrim_bytes_skipped;
kstat_named_t autotrim_extents_failed;
kstat_named_t autotrim_bytes_failed;
+ kstat_named_t simple_trim_extents_written;
+ kstat_named_t simple_trim_bytes_written;
+ kstat_named_t simple_trim_extents_skipped;
+ kstat_named_t simple_trim_bytes_skipped;
+ kstat_named_t simple_trim_extents_failed;
+ kstat_named_t simple_trim_bytes_failed;
} spa_iostats_t;
extern void spa_stats_init(spa_t *spa);
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index 96546ac35..56407a191 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -301,7 +301,7 @@ struct vdev {
uint64_t vdev_initialize_inflight;
kmutex_t vdev_trim_io_lock;
kcondvar_t vdev_trim_io_cv;
- uint64_t vdev_trim_inflight[2];
+ uint64_t vdev_trim_inflight[3];
/*
* Values stored in the config for an indirect or removing vdev.
diff --git a/include/sys/vdev_trim.h b/include/sys/vdev_trim.h
index 1e5401766..16f4be2a4 100644
--- a/include/sys/vdev_trim.h
+++ b/include/sys/vdev_trim.h
@@ -44,6 +44,8 @@ extern void vdev_autotrim(spa_t *spa);
extern void vdev_autotrim_stop_all(spa_t *spa);
extern void vdev_autotrim_stop_wait(vdev_t *vd);
extern void vdev_autotrim_restart(spa_t *spa);
+extern int vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size);
+extern void vdev_trim_l2arc(spa_t *spa);
#ifdef __cplusplus
}
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index 40666c8f3..7ef82d9a0 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -194,7 +194,8 @@ Default value: \fB2\fR.
.ad
.RS 12n
Scales \fBl2arc_headroom\fR by this percentage when L2ARC contents are being
-successfully compressed before writing. A value of 100 disables this feature.
+successfully compressed before writing. A value of \fB100\fR disables this
+feature.
.sp
Default value: \fB200\fR%.
.RE
@@ -202,6 +203,25 @@ Default value: \fB200\fR%.
.sp
.ne 2
.na
+\fBl2arc_trim_ahead\fR (ulong)
+.ad
+.RS 12n
+Trims ahead of the current write size (\fBl2arc_write_max\fR) on L2ARC devices
+by this percentage of write size if we have filled the device. If set to
+\fB100\fR we TRIM twice the space required to accommodate upcoming writes. A
+minimum of 64MB will be trimmed. It also enables TRIM of the whole L2ARC device
+upon creation or addition to an existing pool or if the header of the device is
+invalid upon importing a pool or onlining a cache device. A value of \fB0\fR
+disables TRIM on L2ARC altogether and is the default as it can put significant
+stress on the underlying storage devices. This will vary depending of how well
+the specific device handles these commands.
+.sp
+Default value: \fB0\fR%.
+.RE
+
+.sp
+.ne 2
+.na
\fBl2arc_noprefetch\fR (int)
.ad
.RS 12n
diff --git a/man/man8/zpoolprops.8 b/man/man8/zpoolprops.8
index f0522ef78..d85b6d436 100644
--- a/man/man8/zpoolprops.8
+++ b/man/man8/zpoolprops.8
@@ -238,6 +238,8 @@ this property is
Automatic TRIM does not immediately reclaim blocks after a free. Instead,
it will optimistically delay allowing smaller ranges to be aggregated in to
a few larger ones. These can then be issued more efficiently to the storage.
+TRIM on L2ARC devices is enabled by setting
+.Sy l2arc_trim_ahead > 0 .
.Pp
Be aware that automatic trimming of recently freed data blocks can put
significant stress on the underlying storage devices. This will vary
diff --git a/module/os/linux/zfs/spa_stats.c b/module/os/linux/zfs/spa_stats.c
index eae9c3f22..2ec32da46 100644
--- a/module/os/linux/zfs/spa_stats.c
+++ b/module/os/linux/zfs/spa_stats.c
@@ -903,6 +903,12 @@ static spa_iostats_t spa_iostats_template = {
{ "autotrim_bytes_skipped", KSTAT_DATA_UINT64 },
{ "autotrim_extents_failed", KSTAT_DATA_UINT64 },
{ "autotrim_bytes_failed", KSTAT_DATA_UINT64 },
+ { "simple_trim_extents_written", KSTAT_DATA_UINT64 },
+ { "simple_trim_bytes_written", KSTAT_DATA_UINT64 },
+ { "simple_trim_extents_skipped", KSTAT_DATA_UINT64 },
+ { "simple_trim_bytes_skipped", KSTAT_DATA_UINT64 },
+ { "simple_trim_extents_failed", KSTAT_DATA_UINT64 },
+ { "simple_trim_bytes_failed", KSTAT_DATA_UINT64 },
};
#define SPA_IOSTATS_ADD(stat, val) \
@@ -929,13 +935,20 @@ spa_iostats_trim_add(spa_t *spa, trim_type_t type,
SPA_IOSTATS_ADD(trim_bytes_skipped, bytes_skipped);
SPA_IOSTATS_ADD(trim_extents_failed, extents_failed);
SPA_IOSTATS_ADD(trim_bytes_failed, bytes_failed);
- } else {
+ } else if (type == TRIM_TYPE_AUTO) {
SPA_IOSTATS_ADD(autotrim_extents_written, extents_written);
SPA_IOSTATS_ADD(autotrim_bytes_written, bytes_written);
SPA_IOSTATS_ADD(autotrim_extents_skipped, extents_skipped);
SPA_IOSTATS_ADD(autotrim_bytes_skipped, bytes_skipped);
SPA_IOSTATS_ADD(autotrim_extents_failed, extents_failed);
SPA_IOSTATS_ADD(autotrim_bytes_failed, bytes_failed);
+ } else {
+ SPA_IOSTATS_ADD(simple_trim_extents_written, extents_written);
+ SPA_IOSTATS_ADD(simple_trim_bytes_written, bytes_written);
+ SPA_IOSTATS_ADD(simple_trim_extents_skipped, extents_skipped);
+ SPA_IOSTATS_ADD(simple_trim_bytes_skipped, bytes_skipped);
+ SPA_IOSTATS_ADD(simple_trim_extents_failed, extents_failed);
+ SPA_IOSTATS_ADD(simple_trim_bytes_failed, bytes_failed);
}
}
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 29da08a49..e7ad976af 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -301,6 +301,7 @@
#include <sys/trace_zfs.h>
#include <sys/aggsum.h>
#include <cityhash.h>
+#include <sys/vdev_trim.h>
#ifndef _KERNEL
/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
@@ -854,7 +855,6 @@ static void arc_hdr_alloc_abd(arc_buf_hdr_t *, boolean_t);
static void arc_access(arc_buf_hdr_t *, kmutex_t *);
static boolean_t arc_is_overflowing(void);
static void arc_buf_watch(arc_buf_t *);
-static l2arc_dev_t *l2arc_vdev_get(vdev_t *vd);
static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *);
static uint32_t arc_bufc_to_flags(arc_buf_contents_t);
@@ -865,6 +865,23 @@ static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *);
static void l2arc_read_done(zio_t *);
/*
+ * L2ARC TRIM
+ * l2arc_trim_ahead : A ZFS module parameter that controls how much ahead of
+ * the current write size (l2arc_write_max) we should TRIM if we
+ * have filled the device. It is defined as a percentage of the
+ * write size. If set to 100 we trim twice the space required to
+ * accommodate upcoming writes. A minimum of 64MB will be trimmed.
+ * It also enables TRIM of the whole L2ARC device upon creation or
+ * addition to an existing pool or if the header of the device is
+ * invalid upon importing a pool or onlining a cache device. The
+ * default is 0, which disables TRIM on L2ARC altogether as it can
+ * put significant stress on the underlying storage devices. This
+ * will vary depending of how well the specific device handles
+ * these commands.
+ */
+unsigned long l2arc_trim_ahead = 0;
+
+/*
* Performance tuning of L2ARC persistence:
*
* l2arc_rebuild_enabled : A ZFS module parameter that controls whether adding
@@ -902,7 +919,6 @@ static void l2arc_hdr_restore(const l2arc_log_ent_phys_t *le,
l2arc_dev_t *dev);
/* L2ARC persistence write I/O routines. */
-static void l2arc_dev_hdr_update(l2arc_dev_t *dev);
static void l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio,
l2arc_write_callback_t *cb);
@@ -7709,7 +7725,7 @@ l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr)
static uint64_t
l2arc_write_size(l2arc_dev_t *dev)
{
- uint64_t size, dev_size;
+ uint64_t size, dev_size, tsize;
/*
* Make sure our globals have meaningful values in case the user
@@ -7732,7 +7748,12 @@ l2arc_write_size(l2arc_dev_t *dev)
* iteration can occur.
*/
dev_size = dev->l2ad_end - dev->l2ad_start;
- if ((size + l2arc_log_blk_overhead(size, dev)) >= dev_size) {
+ tsize = size + l2arc_log_blk_overhead(size, dev);
+ if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0)
+ tsize += MAX(64 * 1024 * 1024,
+ (tsize * l2arc_trim_ahead) / 100);
+
+ if (tsize >= dev_size) {
cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
"plus the overhead of log blocks (persistent L2ARC, "
"%llu bytes) exceeds the size of the cache device "
@@ -7810,10 +7831,12 @@ l2arc_dev_get_next(void)
else if (next == first)
break;
- } while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild);
+ } while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
+ next->l2ad_trim_all);
/* if we were unable to find any usable vdevs, return NULL */
- if (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild)
+ if (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
+ next->l2ad_trim_all)
next = NULL;
l2arc_dev_last = next;
@@ -8336,8 +8359,9 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
arc_buf_hdr_t *hdr, *hdr_prev;
kmutex_t *hash_lock;
uint64_t taddr;
- boolean_t rerun;
l2arc_lb_ptr_buf_t *lb_ptr_buf, *lb_ptr_buf_prev;
+ vdev_t *vd = dev->l2ad_vdev;
+ boolean_t rerun;
buflist = &dev->l2ad_buflist;
@@ -8345,6 +8369,14 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
* We need to add in the worst case scenario of log block overhead.
*/
distance += l2arc_log_blk_overhead(distance, dev);
+ if (vd->vdev_has_trim && l2arc_trim_ahead > 0) {
+ /*
+ * Trim ahead of the write size 64MB or (l2arc_trim_ahead/100)
+ * times the write size, whichever is greater.
+ */
+ distance += MAX(64 * 1024 * 1024,
+ (distance * l2arc_trim_ahead) / 100);
+ }
top:
rerun = B_FALSE;
@@ -8365,25 +8397,51 @@ top:
DTRACE_PROBE4(l2arc__evict, l2arc_dev_t *, dev, list_t *, buflist,
uint64_t, taddr, boolean_t, all);
- /*
- * This check has to be placed after deciding whether to iterate
- * (rerun).
- */
- if (!all && dev->l2ad_first) {
+ if (!all) {
/*
- * This is the first sweep through the device. There is
- * nothing to evict.
+ * This check has to be placed after deciding whether to
+ * iterate (rerun).
*/
- goto out;
- }
+ if (dev->l2ad_first) {
+ /*
+ * This is the first sweep through the device. There is
+ * nothing to evict. We have already trimmmed the
+ * whole device.
+ */
+ goto out;
+ } else {
+ /*
+ * Trim the space to be evicted.
+ */
+ if (vd->vdev_has_trim && dev->l2ad_evict < taddr &&
+ l2arc_trim_ahead > 0) {
+ /*
+ * We have to drop the spa_config lock because
+ * vdev_trim_range() will acquire it.
+ * l2ad_evict already accounts for the label
+ * size. To prevent vdev_trim_ranges() from
+ * adding it again, we subtract it from
+ * l2ad_evict.
+ */
+ spa_config_exit(dev->l2ad_spa, SCL_L2ARC, dev);
+ vdev_trim_simple(vd,
+ dev->l2ad_evict - VDEV_LABEL_START_SIZE,
+ taddr - dev->l2ad_evict);
+ spa_config_enter(dev->l2ad_spa, SCL_L2ARC, dev,
+ RW_READER);
+ }
- /*
- * When rebuilding L2ARC we retrieve the evict hand from the header of
- * the device. Of note, l2arc_evict() does not actually delete buffers
- * from the cache device, but keeping track of the evict hand will be
- * useful when TRIM is implemented.
- */
- dev->l2ad_evict = MAX(dev->l2ad_evict, taddr);
+ /*
+ * When rebuilding L2ARC we retrieve the evict hand
+ * from the header of the device. Of note, l2arc_evict()
+ * does not actually delete buffers from the cache
+ * device, but trimming may do so depending on the
+ * hardware implementation. Thus keeping track of the
+ * evict hand is useful.
+ */
+ dev->l2ad_evict = MAX(dev->l2ad_evict, taddr);
+ }
+ }
retry:
mutex_enter(&dev->l2ad_mtx);
@@ -8410,7 +8468,7 @@ retry:
if (!all && l2arc_log_blkptr_valid(dev, lb_ptr_buf->lb_ptr)) {
break;
} else {
- vdev_space_update(dev->l2ad_vdev, -asize, 0, 0);
+ vdev_space_update(vd, -asize, 0, 0);
ARCSTAT_INCR(arcstat_l2_log_blk_asize, -asize);
ARCSTAT_BUMPDOWN(arcstat_l2_log_blk_count);
zfs_refcount_remove_many(&dev->l2ad_lb_asize, asize,
@@ -9015,7 +9073,7 @@ l2arc_vdev_present(vdev_t *vd)
* Returns the l2arc_dev_t associated with a particular vdev_t or NULL if
* the vdev_t isn't an L2ARC device.
*/
-static l2arc_dev_t *
+l2arc_dev_t *
l2arc_vdev_get(vdev_t *vd)
{
l2arc_dev_t *dev;
@@ -9059,6 +9117,7 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
adddev->l2ad_evict = adddev->l2ad_start;
adddev->l2ad_first = B_TRUE;
adddev->l2ad_writing = B_FALSE;
+ adddev->l2ad_trim_all = B_FALSE;
list_link_init(&adddev->l2ad_node);
adddev->l2ad_dev_hdr = kmem_zalloc(l2dhdr_asize, KM_SLEEP);
@@ -9164,11 +9223,21 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen)
dev->l2ad_rebuild = B_TRUE;
} else if (spa_writeable(spa)) {
/*
- * In this case create a new header. We zero out the memory
- * holding the header to reset dh_start_lbps.
+ * In this case TRIM the whole device if l2arc_trim_ahead > 0,
+ * otherwise create a new header. We zero out the memory holding
+ * the header to reset dh_start_lbps. If we TRIM the whole
+ * device the new header will be written by
+ * vdev_trim_l2arc_thread() at the end of the TRIM to update the
+ * trim_state in the header too. When reading the header, if
+ * trim_state is not VDEV_TRIM_COMPLETE and l2arc_trim_ahead > 0
+ * we opt to TRIM the whole device again.
*/
- bzero(l2dhdr, l2dhdr_asize);
- l2arc_dev_hdr_update(dev);
+ if (l2arc_trim_ahead > 0) {
+ dev->l2ad_trim_all = B_TRUE;
+ } else {
+ bzero(l2dhdr, l2dhdr_asize);
+ l2arc_dev_hdr_update(dev);
+ }
}
}
@@ -9385,6 +9454,9 @@ l2arc_rebuild(l2arc_dev_t *dev)
dev->l2ad_start);
dev->l2ad_first = !!(l2dhdr->dh_flags & L2ARC_DEV_HDR_EVICT_FIRST);
+ vd->vdev_trim_action_time = l2dhdr->dh_trim_action_time;
+ vd->vdev_trim_state = l2dhdr->dh_trim_state;
+
/*
* In case the zfs module parameter l2arc_rebuild_enabled is false
* we do not start the rebuild process.
@@ -9594,7 +9666,9 @@ l2arc_dev_hdr_read(l2arc_dev_t *dev)
l2dhdr->dh_log_entries != dev->l2ad_log_entries ||
l2dhdr->dh_end != dev->l2ad_end ||
!l2arc_range_check_overlap(dev->l2ad_start, dev->l2ad_end,
- l2dhdr->dh_evict)) {
+ l2dhdr->dh_evict) ||
+ (l2dhdr->dh_trim_state != VDEV_TRIM_COMPLETE &&
+ l2arc_trim_ahead > 0)) {
/*
* Attempt to rebuild a device containing no actual dev hdr
* or containing a header from some other pool or from another
@@ -9903,7 +9977,7 @@ l2arc_log_blk_fetch_abort(zio_t *zio)
* Creates a zio to update the device header on an l2arc device. The zio is
* initiated as a child of `pio'.
*/
-static void
+void
l2arc_dev_hdr_update(l2arc_dev_t *dev)
{
l2arc_dev_hdr_phys_t *l2dhdr = dev->l2ad_dev_hdr;
@@ -9924,6 +9998,8 @@ l2arc_dev_hdr_update(l2arc_dev_t *dev)
l2dhdr->dh_lb_asize = zfs_refcount_count(&dev->l2ad_lb_asize);
l2dhdr->dh_lb_count = zfs_refcount_count(&dev->l2ad_lb_count);
l2dhdr->dh_flags = 0;
+ l2dhdr->dh_trim_action_time = dev->l2ad_vdev->vdev_trim_action_time;
+ l2dhdr->dh_trim_state = dev->l2ad_vdev->vdev_trim_state;
if (dev->l2ad_first)
l2dhdr->dh_flags |= L2ARC_DEV_HDR_EVICT_FIRST;
@@ -10260,6 +10336,9 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom, ULONG, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom_boost, ULONG, ZMOD_RW,
"Compressed l2arc_headroom multiplier");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, trim_ahead, ULONG, ZMOD_RW,
+ "TRIM ahead L2ARC write size multiplier");
+
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_secs, ULONG, ZMOD_RW,
"Seconds between L2ARC writing");
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 87af3073a..ba9a0dce3 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -1896,6 +1896,15 @@ spa_load_l2cache(spa_t *spa)
if (!vdev_is_dead(vd))
l2arc_add_vdev(spa, vd);
+
+ /*
+ * Upon cache device addition to a pool or pool
+ * creation with a cache device or if the header
+ * of the device is invalid we issue an async
+ * TRIM command for the whole device which will
+ * execute if l2arc_trim_ahead > 0.
+ */
+ spa_async_request(spa, SPA_ASYNC_L2CACHE_TRIM);
}
}
@@ -7994,6 +8003,17 @@ spa_async_thread(void *arg)
}
/*
+ * Kick off L2 cache whole device TRIM.
+ */
+ if (tasks & SPA_ASYNC_L2CACHE_TRIM) {
+ mutex_enter(&spa_namespace_lock);
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+ vdev_trim_l2arc(spa);
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+ mutex_exit(&spa_namespace_lock);
+ }
+
+ /*
* Kick off L2 cache rebuilding.
*/
if (tasks & SPA_ASYNC_L2CACHE_REBUILD) {
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 923bf2e33..83c39d119 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -2281,9 +2281,6 @@ vdev_reopen(vdev_t *vd)
if (vdev_readable(vd) && vdev_writeable(vd) &&
vd->vdev_aux == &spa->spa_l2cache) {
/*
- * When reopening we can assume the device label has
- * already the attribute l2cache_persistent, since we've
- * opened the device in the past and updated the label.
* In case the vdev is present we should evict all ARC
* buffers and pointers to log blocks and reclaim their
* space before restoring its contents to L2ARC.
@@ -2294,6 +2291,7 @@ vdev_reopen(vdev_t *vd)
l2arc_add_vdev(spa, vd);
}
spa_async_request(spa, SPA_ASYNC_L2CACHE_REBUILD);
+ spa_async_request(spa, SPA_ASYNC_L2CACHE_TRIM);
}
} else {
(void) vdev_validate(vd);
@@ -3542,9 +3540,14 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
}
mutex_exit(&vd->vdev_initialize_lock);
- /* Restart trimming if necessary */
+ /*
+ * Restart trimming if necessary. We do not restart trimming for cache
+ * devices here. This is triggered by l2arc_rebuild_vdev()
+ * asynchronously for the whole device or in l2arc_evict() as it evicts
+ * space for upcoming writes.
+ */
mutex_enter(&vd->vdev_trim_lock);
- if (vdev_writeable(vd) &&
+ if (vdev_writeable(vd) && !vd->vdev_isl2cache &&
vd->vdev_trim_thread == NULL &&
vd->vdev_trim_state == VDEV_TRIM_ACTIVE) {
(void) vdev_trim(vd, vd->vdev_trim_rate, vd->vdev_trim_partial,
diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c
index 3f4f9091f..56e420871 100644
--- a/module/zfs/vdev_removal.c
+++ b/module/zfs/vdev_removal.c
@@ -2224,6 +2224,20 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
* Cache devices can always be removed.
*/
vd = spa_lookup_by_guid(spa, guid, B_TRUE);
+
+ /*
+ * Stop trimming the cache device. We need to release the
+ * config lock to allow the syncing of TRIM transactions
+ * without releasing the spa_namespace_lock. The same
+ * strategy is employed in spa_vdev_remove_top().
+ */
+ spa_vdev_config_exit(spa, NULL,
+ txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
+ mutex_enter(&vd->vdev_trim_lock);
+ vdev_trim_stop(vd, VDEV_TRIM_CANCELED, NULL);
+ mutex_exit(&vd->vdev_trim_lock);
+ txg = spa_vdev_config_enter(spa);
+
ev = spa_event_create(spa, vd, NULL, ESC_ZFS_VDEV_REMOVE_AUX);
spa_vdev_remove_aux(spa->spa_l2cache.sav_config,
ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv);
diff --git a/module/zfs/vdev_trim.c b/module/zfs/vdev_trim.c
index b0cd40f68..0254c2904 100644
--- a/module/zfs/vdev_trim.c
+++ b/module/zfs/vdev_trim.c
@@ -34,6 +34,7 @@
#include <sys/dsl_synctask.h>
#include <sys/zap.h>
#include <sys/dmu_tx.h>
+#include <sys/arc_impl.h>
/*
* TRIM is a feature which is used to notify a SSD that some previously
@@ -423,6 +424,35 @@ vdev_autotrim_cb(zio_t *zio)
}
/*
+ * The zio_done_func_t done callback for each TRIM issued via
+ * vdev_trim_simple(). It is responsible for updating the TRIM stats and
+ * limiting the number of in flight TRIM I/Os. Simple TRIM I/Os are best
+ * effort and are never reissued on failure.
+ */
+static void
+vdev_trim_simple_cb(zio_t *zio)
+{
+ vdev_t *vd = zio->io_vd;
+
+ mutex_enter(&vd->vdev_trim_io_lock);
+
+ if (zio->io_error != 0) {
+ vd->vdev_stat.vs_trim_errors++;
+ spa_iostats_trim_add(vd->vdev_spa, TRIM_TYPE_SIMPLE,
+ 0, 0, 0, 0, 1, zio->io_orig_size);
+ } else {
+ spa_iostats_trim_add(vd->vdev_spa, TRIM_TYPE_SIMPLE,
+ 1, zio->io_orig_size, 0, 0, 0, 0);
+ }
+
+ ASSERT3U(vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE], >, 0);
+ vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE]--;
+ cv_broadcast(&vd->vdev_trim_io_cv);
+ mutex_exit(&vd->vdev_trim_io_lock);
+
+ spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
+}
+/*
* Returns the average trim rate in bytes/sec for the ta->trim_vdev.
*/
static uint64_t
@@ -441,6 +471,7 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
{
vdev_t *vd = ta->trim_vdev;
spa_t *spa = vd->vdev_spa;
+ void *cb;
mutex_enter(&vd->vdev_trim_io_lock);
@@ -459,8 +490,8 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
ta->trim_bytes_done += size;
/* Limit in flight trimming I/Os */
- while (vd->vdev_trim_inflight[0] + vd->vdev_trim_inflight[1] >=
- zfs_trim_queue_limit) {
+ while (vd->vdev_trim_inflight[0] + vd->vdev_trim_inflight[1] +
+ vd->vdev_trim_inflight[2] >= zfs_trim_queue_limit) {
cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
}
vd->vdev_trim_inflight[ta->trim_type]++;
@@ -505,10 +536,17 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
if (ta->trim_type == TRIM_TYPE_MANUAL)
vd->vdev_trim_offset[txg & TXG_MASK] = start + size;
+ if (ta->trim_type == TRIM_TYPE_MANUAL) {
+ cb = vdev_trim_cb;
+ } else if (ta->trim_type == TRIM_TYPE_AUTO) {
+ cb = vdev_autotrim_cb;
+ } else {
+ cb = vdev_trim_simple_cb;
+ }
+
zio_nowait(zio_trim(spa->spa_txg_zio[txg & TXG_MASK], vd,
- start, size, ta->trim_type == TRIM_TYPE_MANUAL ?
- vdev_trim_cb : vdev_autotrim_cb, NULL,
- ZIO_PRIORITY_TRIM, ZIO_FLAG_CANFAIL, ta->trim_flags));
+ start, size, cb, NULL, ZIO_PRIORITY_TRIM, ZIO_FLAG_CANFAIL,
+ ta->trim_flags));
/* vdev_trim_cb and vdev_autotrim_cb release SCL_STATE_ALL */
dmu_tx_commit(tx);
@@ -1016,6 +1054,7 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
{
spa_t *spa = vd->vdev_spa;
list_t vd_list;
+ vdev_t *vd_l2cache;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
@@ -1023,6 +1062,17 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
offsetof(vdev_t, vdev_trim_node));
vdev_trim_stop_all_impl(vd, tgt_state, &vd_list);
+
+ /*
+ * Iterate over cache devices and request stop trimming the
+ * whole device in case we export the pool or remove the cache
+ * device prematurely.
+ */
+ for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+ vd_l2cache = spa->spa_l2cache.sav_vdevs[i];
+ vdev_trim_stop_all_impl(vd_l2cache, tgt_state, &vd_list);
+ }
+
vdev_trim_stop_wait(spa, &vd_list);
if (vd->vdev_spa->spa_sync_on) {
@@ -1437,6 +1487,189 @@ vdev_autotrim_restart(spa_t *spa)
vdev_autotrim(spa);
}
+static void
+vdev_trim_l2arc_thread(void *arg)
+{
+ vdev_t *vd = arg;
+ spa_t *spa = vd->vdev_spa;
+ l2arc_dev_t *dev = l2arc_vdev_get(vd);
+ trim_args_t ta;
+ range_seg64_t physical_rs;
+
+ ASSERT(vdev_is_concrete(vd));
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+ vd->vdev_trim_last_offset = 0;
+ vd->vdev_trim_rate = 0;
+ vd->vdev_trim_partial = 0;
+ vd->vdev_trim_secure = 0;
+
+ bzero(&ta, sizeof (ta));
+ ta.trim_vdev = vd;
+ ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
+ ta.trim_type = TRIM_TYPE_MANUAL;
+ ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
+ ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
+ ta.trim_flags = 0;
+
+ physical_rs.rs_start = vd->vdev_trim_bytes_done = 0;
+ physical_rs.rs_end = vd->vdev_trim_bytes_est =
+ vdev_get_min_asize(vd);
+
+ range_tree_add(ta.trim_tree, physical_rs.rs_start,
+ physical_rs.rs_end - physical_rs.rs_start);
+
+ mutex_enter(&vd->vdev_trim_lock);
+ vdev_trim_change_state(vd, VDEV_TRIM_ACTIVE, 0, 0, 0);
+ mutex_exit(&vd->vdev_trim_lock);
+
+ (void) vdev_trim_ranges(&ta);
+
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+ mutex_enter(&vd->vdev_trim_io_lock);
+ while (vd->vdev_trim_inflight[TRIM_TYPE_MANUAL] > 0) {
+ cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
+ }
+ mutex_exit(&vd->vdev_trim_io_lock);
+
+ range_tree_vacate(ta.trim_tree, NULL, NULL);
+ range_tree_destroy(ta.trim_tree);
+
+ mutex_enter(&vd->vdev_trim_lock);
+ if (!vd->vdev_trim_exit_wanted && vdev_writeable(vd)) {
+ vdev_trim_change_state(vd, VDEV_TRIM_COMPLETE,
+ vd->vdev_trim_rate, vd->vdev_trim_partial,
+ vd->vdev_trim_secure);
+ }
+ ASSERT(vd->vdev_trim_thread != NULL ||
+ vd->vdev_trim_inflight[TRIM_TYPE_MANUAL] == 0);
+
+ /*
+ * Drop the vdev_trim_lock while we sync out the txg since it's
+ * possible that a device might be trying to come online and
+ * must check to see if it needs to restart a trim. That thread
+ * will be holding the spa_config_lock which would prevent the
+ * txg_wait_synced from completing. Same strategy as in
+ * vdev_trim_thread().
+ */
+ mutex_exit(&vd->vdev_trim_lock);
+ txg_wait_synced(spa_get_dsl(vd->vdev_spa), 0);
+ mutex_enter(&vd->vdev_trim_lock);
+
+ /*
+ * Update the header of the cache device here, before
+ * broadcasting vdev_trim_cv which may lead to the removal
+ * of the device. The same applies for setting l2ad_trim_all to
+ * false.
+ */
+ spa_config_enter(vd->vdev_spa, SCL_L2ARC, vd,
+ RW_READER);
+ bzero(dev->l2ad_dev_hdr, dev->l2ad_dev_hdr_asize);
+ l2arc_dev_hdr_update(dev);
+ spa_config_exit(vd->vdev_spa, SCL_L2ARC, vd);
+
+ vd->vdev_trim_thread = NULL;
+ if (vd->vdev_trim_state == VDEV_TRIM_COMPLETE)
+ dev->l2ad_trim_all = B_FALSE;
+
+ cv_broadcast(&vd->vdev_trim_cv);
+ mutex_exit(&vd->vdev_trim_lock);
+
+ thread_exit();
+}
+
+/*
+ * Punches out TRIM threads for the L2ARC devices in a spa and assigns them
+ * to vd->vdev_trim_thread variable. This facilitates the management of
+ * trimming the whole cache device using TRIM_TYPE_MANUAL upon addition
+ * to a pool or pool creation or when the header of the device is invalid.
+ */
+void
+vdev_trim_l2arc(spa_t *spa)
+{
+ ASSERT(MUTEX_HELD(&spa_namespace_lock));
+
+ /*
+ * Locate the spa's l2arc devices and kick off TRIM threads.
+ */
+ for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+ vdev_t *vd = spa->spa_l2cache.sav_vdevs[i];
+ l2arc_dev_t *dev = l2arc_vdev_get(vd);
+
+ if (dev == NULL || !dev->l2ad_trim_all) {
+ /*
+ * Don't attempt TRIM if the vdev is UNAVAIL or if the
+ * cache device was not marked for whole device TRIM
+ * (ie l2arc_trim_ahead = 0, or the L2ARC device header
+ * is valid with trim_state = VDEV_TRIM_COMPLETE and
+ * l2ad_log_entries > 0).
+ */
+ continue;
+ }
+
+ mutex_enter(&vd->vdev_trim_lock);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+ ASSERT(vdev_is_concrete(vd));
+ ASSERT3P(vd->vdev_trim_thread, ==, NULL);
+ ASSERT(!vd->vdev_detached);
+ ASSERT(!vd->vdev_trim_exit_wanted);
+ ASSERT(!vd->vdev_top->vdev_removing);
+ vdev_trim_change_state(vd, VDEV_TRIM_ACTIVE, 0, 0, 0);
+ vd->vdev_trim_thread = thread_create(NULL, 0,
+ vdev_trim_l2arc_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
+ mutex_exit(&vd->vdev_trim_lock);
+ }
+}
+
+/*
+ * A wrapper which calls vdev_trim_ranges(). It is intended to be called
+ * on leaf vdevs.
+ */
+int
+vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size)
+{
+ trim_args_t ta;
+ range_seg64_t physical_rs;
+ int error;
+ physical_rs.rs_start = start;
+ physical_rs.rs_end = start + size;
+
+ ASSERT(vdev_is_concrete(vd));
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+ ASSERT(!vd->vdev_detached);
+ ASSERT(!vd->vdev_top->vdev_removing);
+
+ bzero(&ta, sizeof (ta));
+ ta.trim_vdev = vd;
+ ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
+ ta.trim_type = TRIM_TYPE_SIMPLE;
+ ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
+ ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
+ ta.trim_flags = 0;
+
+ ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
+
+ if (physical_rs.rs_end > physical_rs.rs_start) {
+ range_tree_add(ta.trim_tree, physical_rs.rs_start,
+ physical_rs.rs_end - physical_rs.rs_start);
+ } else {
+ ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
+ }
+
+ error = vdev_trim_ranges(&ta);
+
+ mutex_enter(&vd->vdev_trim_io_lock);
+ while (vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE] > 0) {
+ cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
+ }
+ mutex_exit(&vd->vdev_trim_io_lock);
+
+ range_tree_vacate(ta.trim_tree, NULL, NULL);
+ range_tree_destroy(ta.trim_tree);
+
+ return (error);
+}
+
EXPORT_SYMBOL(vdev_trim);
EXPORT_SYMBOL(vdev_trim_stop);
EXPORT_SYMBOL(vdev_trim_stop_all);
@@ -1446,6 +1679,8 @@ EXPORT_SYMBOL(vdev_autotrim);
EXPORT_SYMBOL(vdev_autotrim_stop_all);
EXPORT_SYMBOL(vdev_autotrim_stop_wait);
EXPORT_SYMBOL(vdev_autotrim_restart);
+EXPORT_SYMBOL(vdev_trim_l2arc);
+EXPORT_SYMBOL(vdev_trim_simple);
/* BEGIN CSTYLED */
ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, extent_bytes_max, UINT, ZMOD_RW,
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index 01bab0870..cbad90ad1 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -832,7 +832,7 @@ tags = ['functional', 'threadsappend']
[tests/functional/trim]
tests = ['autotrim_integrity', 'autotrim_config', 'autotrim_trim_integrity',
- 'trim_integrity', 'trim_config']
+ 'trim_integrity', 'trim_config', 'trim_l2arc']
tags = ['functional', 'trim']
[tests/functional/truncate]
diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
index efbcc09e7..c450764db 100644
--- a/tests/zfs-tests/include/tunables.cfg
+++ b/tests/zfs-tests/include/tunables.cfg
@@ -38,6 +38,7 @@ KEEP_LOG_SPACEMAPS_AT_EXPORT keep_log_spacemaps_at_export zfs_keep_log_spacemaps
L2ARC_NOPREFETCH l2arc.noprefetch l2arc_noprefetch
L2ARC_REBUILD_BLOCKS_MIN_L2SIZE l2arc.rebuild_blocks_min_l2size l2arc_rebuild_blocks_min_l2size
L2ARC_REBUILD_ENABLED l2arc.rebuild_enabled l2arc_rebuild_enabled
+L2ARC_TRIM_AHEAD UNSUPPORTED l2arc_trim_ahead
L2ARC_WRITE_BOOST l2arc.write_boost l2arc_write_boost
L2ARC_WRITE_MAX l2arc.write_max l2arc_write_max
LIVELIST_CONDENSE_NEW_ALLOC livelist.condense.new_alloc zfs_livelist_condense_new_alloc
diff --git a/tests/zfs-tests/tests/functional/trim/Makefile.am b/tests/zfs-tests/tests/functional/trim/Makefile.am
index 4f260a8e4..8917ed726 100644
--- a/tests/zfs-tests/tests/functional/trim/Makefile.am
+++ b/tests/zfs-tests/tests/functional/trim/Makefile.am
@@ -8,4 +8,5 @@ dist_pkgdata_SCRIPTS = \
autotrim_config.ksh \
autotrim_trim_integrity.ksh \
trim_integrity.ksh \
- trim_config.ksh
+ trim_config.ksh \
+ trim_l2arc.ksh
diff --git a/tests/zfs-tests/tests/functional/trim/trim.kshlib b/tests/zfs-tests/tests/functional/trim/trim.kshlib
index 7f1bcdacf..bede946a0 100644
--- a/tests/zfs-tests/tests/functional/trim/trim.kshlib
+++ b/tests/zfs-tests/tests/functional/trim/trim.kshlib
@@ -33,17 +33,18 @@ function get_trim_io
{
typeset pool="${1-:$TESTPOOL}"
typeset type="${2-:ind}"
+ typeset vdev="${3}"
typeset rval
# Sum the ind or agg columns of the trim request size histogram.
case "$type" in
"ind")
- rval=$(zpool iostat -pr $pool | awk \
+ rval=$(zpool iostat -pr $pool $vdev | awk \
'$1 ~ /[0-9].*/ { sum += $12 } END { print sum }')
echo -n "$rval"
;;
"agg")
- rval=$(zpool iostat -pr $pool | awk \
+ rval=$(zpool iostat -pr $pool $vdev | awk \
'$1 ~ /[0-9].*/ { sum += $13 } END { print sum }')
echo -n "$rval"
;;
@@ -61,9 +62,10 @@ function verify_trim_io
typeset pool="${1:-$TESTPOOL}"
typeset type="${2:-ind}"
typeset min_trim_ios=${3:-100}
+ typeset vdev="${4}"
typeset ios
- ios=$(get_trim_io $pool $type)
+ ios=$(get_trim_io $pool $type $vdev)
if [[ $ios -ge $min_trim_ios ]]; then
log_note "Issued $ios $type trim IOs for pool $pool"
else
diff --git a/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh b/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh
new file mode 100755
index 000000000..ecf9f3424
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh
@@ -0,0 +1,106 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/trim/trim.kshlib
+. $STF_SUITE/tests/functional/trim/trim.cfg
+
+#
+# DESCRIPTION:
+# Verify trimming of L2ARC
+#
+# STRATEGY:
+# 1. Set 'l2arc_trim_ahead = 1' and `l2arc_write_size = 64MB`.
+# 2. Create a pool on file vdevs to trim.
+# 3. Verify the cache device was trimmed.
+# 4. Fill the pool with a file larger than the L2ARC vdev.
+# 5. Randomly read the previous written file long enough for the
+# L2ARC vdev to be filled and overwritten 5 times.
+# 6. Verify trim IOs of the expected type were issued for the pool.
+# 7. Verify the allocated space on the cache device is less than
+# its size.
+#
+
+verify_runnable "global"
+
+log_assert "Trim of L2ARC succeeds."
+
+function cleanup
+{
+ if poolexists $TESTPOOL; then
+ destroy_pool $TESTPOOL
+ fi
+
+ log_must rm -f $VDEVS
+ log_must set_tunable32 L2ARC_TRIM_AHEAD $l2arc_trimahead
+ log_must set_tunable32 L2ARC_WRITE_MAX $l2arc_writemax
+}
+log_onexit cleanup
+
+# The cache device $TRIM_VDEV2 has to be small enough, so that
+# dev->l2ad_hand loops around and dev->l2ad_first=0. Otherwise
+# l2arc_evict() exits before evicting/trimming.
+typeset l2arc_trimahead=$(get_tunable L2ARC_TRIM_AHEAD)
+typeset l2arc_writemax=$(get_tunable L2ARC_WRITE_MAX)
+log_must set_tunable32 L2ARC_TRIM_AHEAD 1
+log_must set_tunable32 L2ARC_WRITE_MAX $((64 * 1024 * 1024))
+VDEVS="$TRIM_VDEV1 $TRIM_VDEV2"
+log_must truncate -s $((MINVDEVSIZE)) $TRIM_VDEV2
+log_must truncate -s $((4 * MINVDEVSIZE)) $TRIM_VDEV1
+typeset VDEV_MIN_MB=$((MINVDEVSIZE * 0.30 / 1024 / 1024))
+
+log_must zpool create -f $TESTPOOL $TRIM_VDEV1 cache $TRIM_VDEV2
+verify_vdevs "-le" "$VDEV_MIN_MB" $TRIM_VDEV2
+
+typeset fill_mb=$(( floor(2 * MINVDEVSIZE) ))
+export DIRECTORY=/$TESTPOOL
+export NUMJOBS=1
+export FILE_SIZE=${fill_mb}
+export PERF_RANDSEED=1234
+export PERF_COMPPERCENT=66
+export PERF_COMPCHUNK=0
+export RUNTIME=30
+export BLOCKSIZE=128K
+export SYNC_TYPE=0
+export DIRECT=1
+
+# Write to the pool.
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+
+# Read randomly from the pool to fill L2ARC.
+export RUNTIME=30
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+export RUNTIME=1
+typeset do_once=true
+while $do_once || [[ $l2_size1 -le $l2_size2 ]]; do
+ typeset l2_size1=$(get_arcstat l2_size)
+ log_must fio $FIO_SCRIPTS/random_reads.fio
+ typeset l2_size2=$(get_arcstat l2_size)
+ do_once=false
+done
+
+verify_trim_io $TESTPOOL "ind" 5 $TRIM_VDEV2
+
+typeset cache_size=$(zpool list -vp | grep $TRIM_VDEV2 | awk '{print $2}')
+typeset cache_alloc=$(zpool list -vp | grep $TRIM_VDEV2 | awk '{print $3}')
+
+log_must test $cache_alloc -lt $cache_size
+
+log_must zpool destroy $TESTPOOL
+log_must rm -f $VDEVS
+
+log_pass "Trim of L2ARC succeeds."