5 files changed, 392 insertions, 41 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 29da08a49..e7ad976af 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -301,6 +301,7 @@
 #include <sys/trace_zfs.h>
 #include <sys/aggsum.h>
 #include <cityhash.h>
+#include <sys/vdev_trim.h>
 
 #ifndef _KERNEL
 /* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
@@ -854,7 +855,6 @@ static void arc_hdr_alloc_abd(arc_buf_hdr_t *, boolean_t);
 static void arc_access(arc_buf_hdr_t *, kmutex_t *);
 static boolean_t arc_is_overflowing(void);
 static void arc_buf_watch(arc_buf_t *);
-static l2arc_dev_t *l2arc_vdev_get(vdev_t *vd);
 
 static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *);
 static uint32_t arc_bufc_to_flags(arc_buf_contents_t);
@@ -865,6 +865,23 @@ static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *);
 static void l2arc_read_done(zio_t *);
 
 /*
+ * L2ARC TRIM
+ * l2arc_trim_ahead : A ZFS module parameter that controls how much ahead of
+ * 		the current write size (l2arc_write_max) we should TRIM if we
+ * 		have filled the device. It is defined as a percentage of the
+ * 		write size. If set to 100 we trim twice the space required to
+ * 		accommodate upcoming writes. A minimum of 64MB will be trimmed.
+ * 		It also enables TRIM of the whole L2ARC device upon creation or
+ * 		addition to an existing pool or if the header of the device is
+ * 		invalid upon importing a pool or onlining a cache device. The
+ * 		default is 0, which disables TRIM on L2ARC altogether as it can
+ * 		put significant stress on the underlying storage devices. This
+ * 		will vary depending of how well the specific device handles
+ * 		these commands.
+ */
+unsigned long l2arc_trim_ahead = 0;
+
+/*
  * Performance tuning of L2ARC persistence:
  *
  * l2arc_rebuild_enabled : A ZFS module parameter that controls whether adding
@@ -902,7 +919,6 @@ static void l2arc_hdr_restore(const l2arc_log_ent_phys_t *le,
     l2arc_dev_t *dev);
 
 /* L2ARC persistence write I/O routines. */
-static void l2arc_dev_hdr_update(l2arc_dev_t *dev);
 static void l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio,
     l2arc_write_callback_t *cb);
 
@@ -7709,7 +7725,7 @@ l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr)
 static uint64_t
 l2arc_write_size(l2arc_dev_t *dev)
 {
-	uint64_t size, dev_size;
+	uint64_t size, dev_size, tsize;
 
 	/*
 	 * Make sure our globals have meaningful values in case the user
@@ -7732,7 +7748,12 @@ l2arc_write_size(l2arc_dev_t *dev)
 	 * iteration can occur.
 	 */
 	dev_size = dev->l2ad_end - dev->l2ad_start;
-	if ((size + l2arc_log_blk_overhead(size, dev)) >= dev_size) {
+	tsize = size + l2arc_log_blk_overhead(size, dev);
+	if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0)
+		tsize += MAX(64 * 1024 * 1024,
+		    (tsize * l2arc_trim_ahead) / 100);
+
+	if (tsize >= dev_size) {
 		cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
 		    "plus the overhead of log blocks (persistent L2ARC, "
 		    "%llu bytes) exceeds the size of the cache device "
@@ -7810,10 +7831,12 @@ l2arc_dev_get_next(void)
 		else if (next == first)
 			break;
 
-	} while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild);
+	} while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
+	    next->l2ad_trim_all);
 
 	/* if we were unable to find any usable vdevs, return NULL */
-	if (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild)
+	if (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
+	    next->l2ad_trim_all)
 		next = NULL;
 
 	l2arc_dev_last = next;
@@ -8336,8 +8359,9 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
 	arc_buf_hdr_t *hdr, *hdr_prev;
 	kmutex_t *hash_lock;
 	uint64_t taddr;
-	boolean_t rerun;
 	l2arc_lb_ptr_buf_t *lb_ptr_buf, *lb_ptr_buf_prev;
+	vdev_t *vd = dev->l2ad_vdev;
+	boolean_t rerun;
 
 	buflist = &dev->l2ad_buflist;
 
@@ -8345,6 +8369,14 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
 	 * We need to add in the worst case scenario of log block overhead.
 	 */
 	distance += l2arc_log_blk_overhead(distance, dev);
+	if (vd->vdev_has_trim && l2arc_trim_ahead > 0) {
+		/*
+		 * Trim ahead of the write size 64MB or (l2arc_trim_ahead/100)
+		 * times the write size, whichever is greater.
+		 */
+		distance += MAX(64 * 1024 * 1024,
+		    (distance * l2arc_trim_ahead) / 100);
+	}
 
 top:
 	rerun = B_FALSE;
@@ -8365,25 +8397,51 @@ top:
 	DTRACE_PROBE4(l2arc__evict, l2arc_dev_t *, dev, list_t *, buflist,
 	    uint64_t, taddr, boolean_t, all);
 
-	/*
-	 * This check has to be placed after deciding whether to iterate
-	 * (rerun).
-	 */
-	if (!all && dev->l2ad_first) {
+	if (!all) {
 		/*
-		 * This is the first sweep through the device. There is
-		 * nothing to evict.
+		 * This check has to be placed after deciding whether to
+		 * iterate (rerun).
 		 */
-		goto out;
-	}
+		if (dev->l2ad_first) {
+			/*
+			 * This is the first sweep through the device. There is
+			 * nothing to evict. We have already trimmmed the
+			 * whole device.
+			 */
+			goto out;
+		} else {
+			/*
+			 * Trim the space to be evicted.
+			 */
+			if (vd->vdev_has_trim && dev->l2ad_evict < taddr &&
+			    l2arc_trim_ahead > 0) {
+				/*
+				 * We have to drop the spa_config lock because
+				 * vdev_trim_range() will acquire it.
+				 * l2ad_evict already accounts for the label
+				 * size. To prevent vdev_trim_ranges() from
+				 * adding it again, we subtract it from
+				 * l2ad_evict.
+				 */
+				spa_config_exit(dev->l2ad_spa, SCL_L2ARC, dev);
+				vdev_trim_simple(vd,
+				    dev->l2ad_evict - VDEV_LABEL_START_SIZE,
+				    taddr - dev->l2ad_evict);
+				spa_config_enter(dev->l2ad_spa, SCL_L2ARC, dev,
+				    RW_READER);
+			}
 
-	/*
-	 * When rebuilding L2ARC we retrieve the evict hand from the header of
-	 * the device. Of note, l2arc_evict() does not actually delete buffers
-	 * from the cache device, but keeping track of the evict hand will be
-	 * useful when TRIM is implemented.
-	 */
-	dev->l2ad_evict = MAX(dev->l2ad_evict, taddr);
+			/*
+			 * When rebuilding L2ARC we retrieve the evict hand
+			 * from the header of the device. Of note, l2arc_evict()
+			 * does not actually delete buffers from the cache
+			 * device, but trimming may do so depending on the
+			 * hardware implementation. Thus keeping track of the
+			 * evict hand is useful.
+			 */
+			dev->l2ad_evict = MAX(dev->l2ad_evict, taddr);
+		}
+	}
 
 retry:
 	mutex_enter(&dev->l2ad_mtx);
@@ -8410,7 +8468,7 @@ retry:
 		if (!all && l2arc_log_blkptr_valid(dev, lb_ptr_buf->lb_ptr)) {
 			break;
 		} else {
-			vdev_space_update(dev->l2ad_vdev, -asize, 0, 0);
+			vdev_space_update(vd, -asize, 0, 0);
 			ARCSTAT_INCR(arcstat_l2_log_blk_asize, -asize);
 			ARCSTAT_BUMPDOWN(arcstat_l2_log_blk_count);
 			zfs_refcount_remove_many(&dev->l2ad_lb_asize, asize,
@@ -9015,7 +9073,7 @@ l2arc_vdev_present(vdev_t *vd)
  * Returns the l2arc_dev_t associated with a particular vdev_t or NULL if
  * the vdev_t isn't an L2ARC device.
  */
-static l2arc_dev_t *
+l2arc_dev_t *
 l2arc_vdev_get(vdev_t *vd)
 {
 	l2arc_dev_t	*dev;
@@ -9059,6 +9117,7 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
 	adddev->l2ad_evict = adddev->l2ad_start;
 	adddev->l2ad_first = B_TRUE;
 	adddev->l2ad_writing = B_FALSE;
+	adddev->l2ad_trim_all = B_FALSE;
 	list_link_init(&adddev->l2ad_node);
 	adddev->l2ad_dev_hdr = kmem_zalloc(l2dhdr_asize, KM_SLEEP);
 
@@ -9164,11 +9223,21 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen)
 		dev->l2ad_rebuild = B_TRUE;
 	} else if (spa_writeable(spa)) {
 		/*
-		 * In this case create a new header. We zero out the memory
-		 * holding the header to reset dh_start_lbps.
+		 * In this case TRIM the whole device if l2arc_trim_ahead > 0,
+		 * otherwise create a new header. We zero out the memory holding
+		 * the header to reset dh_start_lbps. If we TRIM the whole
+		 * device the new header will be written by
+		 * vdev_trim_l2arc_thread() at the end of the TRIM to update the
+		 * trim_state in the header too. When reading the header, if
+		 * trim_state is not VDEV_TRIM_COMPLETE and l2arc_trim_ahead > 0
+		 * we opt to TRIM the whole device again.
 		 */
-		bzero(l2dhdr, l2dhdr_asize);
-		l2arc_dev_hdr_update(dev);
+		if (l2arc_trim_ahead > 0) {
+			dev->l2ad_trim_all = B_TRUE;
+		} else {
+			bzero(l2dhdr, l2dhdr_asize);
+			l2arc_dev_hdr_update(dev);
+		}
 	}
 }
 
@@ -9385,6 +9454,9 @@ l2arc_rebuild(l2arc_dev_t *dev)
 	    dev->l2ad_start);
 	dev->l2ad_first = !!(l2dhdr->dh_flags & L2ARC_DEV_HDR_EVICT_FIRST);
 
+	vd->vdev_trim_action_time = l2dhdr->dh_trim_action_time;
+	vd->vdev_trim_state = l2dhdr->dh_trim_state;
+
 	/*
 	 * In case the zfs module parameter l2arc_rebuild_enabled is false
 	 * we do not start the rebuild process.
@@ -9594,7 +9666,9 @@ l2arc_dev_hdr_read(l2arc_dev_t *dev)
 	    l2dhdr->dh_log_entries != dev->l2ad_log_entries ||
 	    l2dhdr->dh_end != dev->l2ad_end ||
 	    !l2arc_range_check_overlap(dev->l2ad_start, dev->l2ad_end,
-	    l2dhdr->dh_evict)) {
+	    l2dhdr->dh_evict) ||
+	    (l2dhdr->dh_trim_state != VDEV_TRIM_COMPLETE &&
+	    l2arc_trim_ahead > 0)) {
 		/*
 		 * Attempt to rebuild a device containing no actual dev hdr
 		 * or containing a header from some other pool or from another
@@ -9903,7 +9977,7 @@ l2arc_log_blk_fetch_abort(zio_t *zio)
  * Creates a zio to update the device header on an l2arc device. The zio is
  * initiated as a child of `pio'.
  */
-static void
+void
 l2arc_dev_hdr_update(l2arc_dev_t *dev)
 {
 	l2arc_dev_hdr_phys_t	*l2dhdr = dev->l2ad_dev_hdr;
@@ -9924,6 +9998,8 @@ l2arc_dev_hdr_update(l2arc_dev_t *dev)
 	l2dhdr->dh_lb_asize = zfs_refcount_count(&dev->l2ad_lb_asize);
 	l2dhdr->dh_lb_count = zfs_refcount_count(&dev->l2ad_lb_count);
 	l2dhdr->dh_flags = 0;
+	l2dhdr->dh_trim_action_time = dev->l2ad_vdev->vdev_trim_action_time;
+	l2dhdr->dh_trim_state = dev->l2ad_vdev->vdev_trim_state;
 	if (dev->l2ad_first)
 		l2dhdr->dh_flags |= L2ARC_DEV_HDR_EVICT_FIRST;
 
@@ -10260,6 +10336,9 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom, ULONG, ZMOD_RW,
 ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom_boost, ULONG, ZMOD_RW,
 	"Compressed l2arc_headroom multiplier");
 
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, trim_ahead, ULONG, ZMOD_RW,
+	"TRIM ahead L2ARC write size multiplier");
+
 ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_secs, ULONG, ZMOD_RW,
 	"Seconds between L2ARC writing");
 
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 87af3073a..ba9a0dce3 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -1896,6 +1896,15 @@ spa_load_l2cache(spa_t *spa)
 
 			if (!vdev_is_dead(vd))
 				l2arc_add_vdev(spa, vd);
+
+			/*
+			 * Upon cache device addition to a pool or pool
+			 * creation with a cache device or if the header
+			 * of the device is invalid we issue an async
+			 * TRIM command for the whole device which will
+			 * execute if l2arc_trim_ahead > 0.
+			 */
+			spa_async_request(spa, SPA_ASYNC_L2CACHE_TRIM);
 		}
 	}
 
@@ -7994,6 +8003,17 @@ spa_async_thread(void *arg)
 	}
 
 	/*
+	 * Kick off L2 cache whole device TRIM.
+	 */
+	if (tasks & SPA_ASYNC_L2CACHE_TRIM) {
+		mutex_enter(&spa_namespace_lock);
+		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+		vdev_trim_l2arc(spa);
+		spa_config_exit(spa, SCL_CONFIG, FTAG);
+		mutex_exit(&spa_namespace_lock);
+	}
+
+	/*
 	 * Kick off L2 cache rebuilding.
 	 */
 	if (tasks & SPA_ASYNC_L2CACHE_REBUILD) {
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 923bf2e33..83c39d119 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -2281,9 +2281,6 @@ vdev_reopen(vdev_t *vd)
 		if (vdev_readable(vd) && vdev_writeable(vd) &&
 		    vd->vdev_aux == &spa->spa_l2cache) {
 			/*
-			 * When reopening we can assume the device label has
-			 * already the attribute l2cache_persistent, since we've
-			 * opened the device in the past and updated the label.
 			 * In case the vdev is present we should evict all ARC
 			 * buffers and pointers to log blocks and reclaim their
 			 * space before restoring its contents to L2ARC.
@@ -2294,6 +2291,7 @@ vdev_reopen(vdev_t *vd)
 				l2arc_add_vdev(spa, vd);
 			}
 			spa_async_request(spa, SPA_ASYNC_L2CACHE_REBUILD);
+			spa_async_request(spa, SPA_ASYNC_L2CACHE_TRIM);
 		}
 	} else {
 		(void) vdev_validate(vd);
@@ -3542,9 +3540,14 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
 	}
 	mutex_exit(&vd->vdev_initialize_lock);
 
-	/* Restart trimming if necessary */
+	/*
+	 * Restart trimming if necessary. We do not restart trimming for cache
+	 * devices here. This is triggered by l2arc_rebuild_vdev()
+	 * asynchronously for the whole device or in l2arc_evict() as it evicts
+	 * space for upcoming writes.
+	 */
 	mutex_enter(&vd->vdev_trim_lock);
-	if (vdev_writeable(vd) &&
+	if (vdev_writeable(vd) && !vd->vdev_isl2cache &&
 	    vd->vdev_trim_thread == NULL &&
 	    vd->vdev_trim_state == VDEV_TRIM_ACTIVE) {
 		(void) vdev_trim(vd, vd->vdev_trim_rate, vd->vdev_trim_partial,
diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c
index 3f4f9091f..56e420871 100644
--- a/module/zfs/vdev_removal.c
+++ b/module/zfs/vdev_removal.c
@@ -2224,6 +2224,20 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
 		 * Cache devices can always be removed.
 		 */
 		vd = spa_lookup_by_guid(spa, guid, B_TRUE);
+
+		/*
+		 * Stop trimming the cache device. We need to release the
+		 * config lock to allow the syncing of TRIM transactions
+		 * without releasing the spa_namespace_lock. The same
+		 * strategy is employed in spa_vdev_remove_top().
+		 */
+		spa_vdev_config_exit(spa, NULL,
+		    txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
+		mutex_enter(&vd->vdev_trim_lock);
+		vdev_trim_stop(vd, VDEV_TRIM_CANCELED, NULL);
+		mutex_exit(&vd->vdev_trim_lock);
+		txg = spa_vdev_config_enter(spa);
+
 		ev = spa_event_create(spa, vd, NULL, ESC_ZFS_VDEV_REMOVE_AUX);
 		spa_vdev_remove_aux(spa->spa_l2cache.sav_config,
 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv);
diff --git a/module/zfs/vdev_trim.c b/module/zfs/vdev_trim.c
index b0cd40f68..0254c2904 100644
--- a/module/zfs/vdev_trim.c
+++ b/module/zfs/vdev_trim.c
@@ -34,6 +34,7 @@
 #include <sys/dsl_synctask.h>
 #include <sys/zap.h>
 #include <sys/dmu_tx.h>
+#include <sys/arc_impl.h>
 
 /*
  * TRIM is a feature which is used to notify a SSD that some previously
@@ -423,6 +424,35 @@ vdev_autotrim_cb(zio_t *zio)
 }
 
 /*
+ * The zio_done_func_t done callback for each TRIM issued via
+ * vdev_trim_simple(). It is responsible for updating the TRIM stats and
+ * limiting the number of in flight TRIM I/Os.  Simple TRIM I/Os are best
+ * effort and are never reissued on failure.
+ */
+static void
+vdev_trim_simple_cb(zio_t *zio)
+{
+	vdev_t *vd = zio->io_vd;
+
+	mutex_enter(&vd->vdev_trim_io_lock);
+
+	if (zio->io_error != 0) {
+		vd->vdev_stat.vs_trim_errors++;
+		spa_iostats_trim_add(vd->vdev_spa, TRIM_TYPE_SIMPLE,
+		    0, 0, 0, 0, 1, zio->io_orig_size);
+	} else {
+		spa_iostats_trim_add(vd->vdev_spa, TRIM_TYPE_SIMPLE,
+		    1, zio->io_orig_size, 0, 0, 0, 0);
+	}
+
+	ASSERT3U(vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE], >, 0);
+	vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE]--;
+	cv_broadcast(&vd->vdev_trim_io_cv);
+	mutex_exit(&vd->vdev_trim_io_lock);
+
+	spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
+}
+/*
  * Returns the average trim rate in bytes/sec for the ta->trim_vdev.
  */
 static uint64_t
@@ -441,6 +471,7 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
 {
 	vdev_t *vd = ta->trim_vdev;
 	spa_t *spa = vd->vdev_spa;
+	void *cb;
 
 	mutex_enter(&vd->vdev_trim_io_lock);
 
@@ -459,8 +490,8 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
 	ta->trim_bytes_done += size;
 
 	/* Limit in flight trimming I/Os */
-	while (vd->vdev_trim_inflight[0] + vd->vdev_trim_inflight[1] >=
-	    zfs_trim_queue_limit) {
+	while (vd->vdev_trim_inflight[0] + vd->vdev_trim_inflight[1] +
+	    vd->vdev_trim_inflight[2] >= zfs_trim_queue_limit) {
 		cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
 	}
 	vd->vdev_trim_inflight[ta->trim_type]++;
@@ -505,10 +536,17 @@ vdev_trim_range(trim_args_t *ta, uint64_t start, uint64_t size)
 	if (ta->trim_type == TRIM_TYPE_MANUAL)
 		vd->vdev_trim_offset[txg & TXG_MASK] = start + size;
 
+	if (ta->trim_type == TRIM_TYPE_MANUAL) {
+		cb = vdev_trim_cb;
+	} else if (ta->trim_type == TRIM_TYPE_AUTO) {
+		cb = vdev_autotrim_cb;
+	} else {
+		cb = vdev_trim_simple_cb;
+	}
+
 	zio_nowait(zio_trim(spa->spa_txg_zio[txg & TXG_MASK], vd,
-	    start, size, ta->trim_type == TRIM_TYPE_MANUAL ?
-	    vdev_trim_cb : vdev_autotrim_cb, NULL,
-	    ZIO_PRIORITY_TRIM, ZIO_FLAG_CANFAIL, ta->trim_flags));
+	    start, size, cb, NULL, ZIO_PRIORITY_TRIM, ZIO_FLAG_CANFAIL,
+	    ta->trim_flags));
 	/* vdev_trim_cb and vdev_autotrim_cb release SCL_STATE_ALL */
 
 	dmu_tx_commit(tx);
@@ -1016,6 +1054,7 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
 {
 	spa_t *spa = vd->vdev_spa;
 	list_t vd_list;
+	vdev_t *vd_l2cache;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
@@ -1023,6 +1062,17 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
 	    offsetof(vdev_t, vdev_trim_node));
 
 	vdev_trim_stop_all_impl(vd, tgt_state, &vd_list);
+
+	/*
+	 * Iterate over cache devices and request stop trimming the
+	 * whole device in case we export the pool or remove the cache
+	 * device prematurely.
+	 */
+	for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+		vd_l2cache = spa->spa_l2cache.sav_vdevs[i];
+		vdev_trim_stop_all_impl(vd_l2cache, tgt_state, &vd_list);
+	}
+
 	vdev_trim_stop_wait(spa, &vd_list);
 
 	if (vd->vdev_spa->spa_sync_on) {
@@ -1437,6 +1487,189 @@ vdev_autotrim_restart(spa_t *spa)
 		vdev_autotrim(spa);
 }
 
+static void
+vdev_trim_l2arc_thread(void *arg)
+{
+	vdev_t		*vd = arg;
+	spa_t		*spa = vd->vdev_spa;
+	l2arc_dev_t	*dev = l2arc_vdev_get(vd);
+	trim_args_t	ta;
+	range_seg64_t 	physical_rs;
+
+	ASSERT(vdev_is_concrete(vd));
+	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+	vd->vdev_trim_last_offset = 0;
+	vd->vdev_trim_rate = 0;
+	vd->vdev_trim_partial = 0;
+	vd->vdev_trim_secure = 0;
+
+	bzero(&ta, sizeof (ta));
+	ta.trim_vdev = vd;
+	ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
+	ta.trim_type = TRIM_TYPE_MANUAL;
+	ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
+	ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
+	ta.trim_flags = 0;
+
+	physical_rs.rs_start = vd->vdev_trim_bytes_done = 0;
+	physical_rs.rs_end = vd->vdev_trim_bytes_est =
+	    vdev_get_min_asize(vd);
+
+	range_tree_add(ta.trim_tree, physical_rs.rs_start,
+	    physical_rs.rs_end - physical_rs.rs_start);
+
+	mutex_enter(&vd->vdev_trim_lock);
+	vdev_trim_change_state(vd, VDEV_TRIM_ACTIVE, 0, 0, 0);
+	mutex_exit(&vd->vdev_trim_lock);
+
+	(void) vdev_trim_ranges(&ta);
+
+	spa_config_exit(spa, SCL_CONFIG, FTAG);
+	mutex_enter(&vd->vdev_trim_io_lock);
+	while (vd->vdev_trim_inflight[TRIM_TYPE_MANUAL] > 0) {
+		cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
+	}
+	mutex_exit(&vd->vdev_trim_io_lock);
+
+	range_tree_vacate(ta.trim_tree, NULL, NULL);
+	range_tree_destroy(ta.trim_tree);
+
+	mutex_enter(&vd->vdev_trim_lock);
+	if (!vd->vdev_trim_exit_wanted && vdev_writeable(vd)) {
+		vdev_trim_change_state(vd, VDEV_TRIM_COMPLETE,
+		    vd->vdev_trim_rate, vd->vdev_trim_partial,
+		    vd->vdev_trim_secure);
+	}
+	ASSERT(vd->vdev_trim_thread != NULL ||
+	    vd->vdev_trim_inflight[TRIM_TYPE_MANUAL] == 0);
+
+	/*
+	 * Drop the vdev_trim_lock while we sync out the txg since it's
+	 * possible that a device might be trying to come online and
+	 * must check to see if it needs to restart a trim. That thread
+	 * will be holding the spa_config_lock which would prevent the
+	 * txg_wait_synced from completing. Same strategy as in
+	 * vdev_trim_thread().
+	 */
+	mutex_exit(&vd->vdev_trim_lock);
+	txg_wait_synced(spa_get_dsl(vd->vdev_spa), 0);
+	mutex_enter(&vd->vdev_trim_lock);
+
+	/*
+	 * Update the header of the cache device here, before
+	 * broadcasting vdev_trim_cv which may lead to the removal
+	 * of the device. The same applies for setting l2ad_trim_all to
+	 * false.
+	 */
+	spa_config_enter(vd->vdev_spa, SCL_L2ARC, vd,
+	    RW_READER);
+	bzero(dev->l2ad_dev_hdr, dev->l2ad_dev_hdr_asize);
+	l2arc_dev_hdr_update(dev);
+	spa_config_exit(vd->vdev_spa, SCL_L2ARC, vd);
+
+	vd->vdev_trim_thread = NULL;
+	if (vd->vdev_trim_state == VDEV_TRIM_COMPLETE)
+		dev->l2ad_trim_all = B_FALSE;
+
+	cv_broadcast(&vd->vdev_trim_cv);
+	mutex_exit(&vd->vdev_trim_lock);
+
+	thread_exit();
+}
+
+/*
+ * Punches out TRIM threads for the L2ARC devices in a spa and assigns them
+ * to vd->vdev_trim_thread variable. This facilitates the management of
+ * trimming the whole cache device using TRIM_TYPE_MANUAL upon addition
+ * to a pool or pool creation or when the header of the device is invalid.
+ */
+void
+vdev_trim_l2arc(spa_t *spa)
+{
+	ASSERT(MUTEX_HELD(&spa_namespace_lock));
+
+	/*
+	 * Locate the spa's l2arc devices and kick off TRIM threads.
+	 */
+	for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+		vdev_t *vd = spa->spa_l2cache.sav_vdevs[i];
+		l2arc_dev_t *dev = l2arc_vdev_get(vd);
+
+		if (dev == NULL || !dev->l2ad_trim_all) {
+			/*
+			 * Don't attempt TRIM if the vdev is UNAVAIL or if the
+			 * cache device was not marked for whole device TRIM
+			 * (ie l2arc_trim_ahead = 0, or the L2ARC device header
+			 * is valid with trim_state = VDEV_TRIM_COMPLETE and
+			 * l2ad_log_entries > 0).
+			 */
+			continue;
+		}
+
+		mutex_enter(&vd->vdev_trim_lock);
+		ASSERT(vd->vdev_ops->vdev_op_leaf);
+		ASSERT(vdev_is_concrete(vd));
+		ASSERT3P(vd->vdev_trim_thread, ==, NULL);
+		ASSERT(!vd->vdev_detached);
+		ASSERT(!vd->vdev_trim_exit_wanted);
+		ASSERT(!vd->vdev_top->vdev_removing);
+		vdev_trim_change_state(vd, VDEV_TRIM_ACTIVE, 0, 0, 0);
+		vd->vdev_trim_thread = thread_create(NULL, 0,
+		    vdev_trim_l2arc_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
+		mutex_exit(&vd->vdev_trim_lock);
+	}
+}
+
+/*
+ * A wrapper which calls vdev_trim_ranges(). It is intended to be called
+ * on leaf vdevs.
+ */
+int
+vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size)
+{
+	trim_args_t		ta;
+	range_seg64_t 		physical_rs;
+	int			error;
+	physical_rs.rs_start = start;
+	physical_rs.rs_end = start + size;
+
+	ASSERT(vdev_is_concrete(vd));
+	ASSERT(vd->vdev_ops->vdev_op_leaf);
+	ASSERT(!vd->vdev_detached);
+	ASSERT(!vd->vdev_top->vdev_removing);
+
+	bzero(&ta, sizeof (ta));
+	ta.trim_vdev = vd;
+	ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
+	ta.trim_type = TRIM_TYPE_SIMPLE;
+	ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
+	ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
+	ta.trim_flags = 0;
+
+	ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
+
+	if (physical_rs.rs_end > physical_rs.rs_start) {
+		range_tree_add(ta.trim_tree, physical_rs.rs_start,
+		    physical_rs.rs_end - physical_rs.rs_start);
+	} else {
+		ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
+	}
+
+	error = vdev_trim_ranges(&ta);
+
+	mutex_enter(&vd->vdev_trim_io_lock);
+	while (vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE] > 0) {
+		cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
+	}
+	mutex_exit(&vd->vdev_trim_io_lock);
+
+	range_tree_vacate(ta.trim_tree, NULL, NULL);
+	range_tree_destroy(ta.trim_tree);
+
+	return (error);
+}
+
 EXPORT_SYMBOL(vdev_trim);
 EXPORT_SYMBOL(vdev_trim_stop);
 EXPORT_SYMBOL(vdev_trim_stop_all);
@@ -1446,6 +1679,8 @@ EXPORT_SYMBOL(vdev_autotrim);
 EXPORT_SYMBOL(vdev_autotrim_stop_all);
 EXPORT_SYMBOL(vdev_autotrim_stop_wait);
 EXPORT_SYMBOL(vdev_autotrim_restart);
+EXPORT_SYMBOL(vdev_trim_l2arc);
+EXPORT_SYMBOL(vdev_trim_simple);
 
 /* BEGIN CSTYLED */
 ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, extent_bytes_max, UINT, ZMOD_RW,