diff options
Diffstat (limited to 'module/zfs/vdev.c')
-rw-r--r-- | module/zfs/vdev.c | 106 |
1 files changed, 98 insertions, 8 deletions
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index ae1c2bcec..085ae6873 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -51,6 +51,7 @@ #include <sys/dsl_scan.h> #include <sys/abd.h> #include <sys/vdev_initialize.h> +#include <sys/vdev_trim.h> #include <sys/zvol.h> #include <sys/zfs_ratelimit.h> @@ -543,6 +544,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) list_link_init(&vd->vdev_state_dirty_node); list_link_init(&vd->vdev_initialize_node); list_link_init(&vd->vdev_leaf_node); + list_link_init(&vd->vdev_trim_node); mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_NOLOCKDEP, NULL); mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL); @@ -551,6 +553,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) mutex_init(&vd->vdev_initialize_io_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&vd->vdev_initialize_cv, NULL, CV_DEFAULT, NULL); cv_init(&vd->vdev_initialize_io_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&vd->vdev_trim_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&vd->vdev_autotrim_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&vd->vdev_trim_io_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&vd->vdev_trim_cv, NULL, CV_DEFAULT, NULL); + cv_init(&vd->vdev_autotrim_cv, NULL, CV_DEFAULT, NULL); + cv_init(&vd->vdev_trim_io_cv, NULL, CV_DEFAULT, NULL); for (int t = 0; t < DTL_TYPES; t++) { vd->vdev_dtl[t] = range_tree_create(NULL, NULL); @@ -875,7 +883,10 @@ void vdev_free(vdev_t *vd) { spa_t *spa = vd->vdev_spa; + ASSERT3P(vd->vdev_initialize_thread, ==, NULL); + ASSERT3P(vd->vdev_trim_thread, ==, NULL); + ASSERT3P(vd->vdev_autotrim_thread, ==, NULL); /* * Scan queues are normally destroyed at the end of a scan. If the @@ -906,7 +917,6 @@ vdev_free(vdev_t *vd) ASSERT(vd->vdev_child == NULL); ASSERT(vd->vdev_guid_sum == vd->vdev_guid); - ASSERT(vd->vdev_initialize_thread == NULL); /* * Discard allocation state. @@ -988,6 +998,12 @@ vdev_free(vdev_t *vd) mutex_destroy(&vd->vdev_initialize_io_lock); cv_destroy(&vd->vdev_initialize_io_cv); cv_destroy(&vd->vdev_initialize_cv); + mutex_destroy(&vd->vdev_trim_lock); + mutex_destroy(&vd->vdev_autotrim_lock); + mutex_destroy(&vd->vdev_trim_io_lock); + cv_destroy(&vd->vdev_trim_cv); + cv_destroy(&vd->vdev_autotrim_cv); + cv_destroy(&vd->vdev_trim_io_cv); zfs_ratelimit_fini(&vd->vdev_delay_rl); zfs_ratelimit_fini(&vd->vdev_checksum_rl); @@ -3475,6 +3491,16 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate) } mutex_exit(&vd->vdev_initialize_lock); + /* Restart trimming if necessary */ + mutex_enter(&vd->vdev_trim_lock); + if (vdev_writeable(vd) && + vd->vdev_trim_thread == NULL && + vd->vdev_trim_state == VDEV_TRIM_ACTIVE) { + (void) vdev_trim(vd, vd->vdev_trim_rate, vd->vdev_trim_partial, + vd->vdev_trim_secure); + } + mutex_exit(&vd->vdev_trim_lock); + if (wasoffline || (oldstate < VDEV_STATE_DEGRADED && vd->vdev_state >= VDEV_STATE_DEGRADED)) @@ -3745,8 +3771,7 @@ vdev_accessible(vdev_t *vd, zio_t *zio) static void vdev_get_child_stat(vdev_t *cvd, vdev_stat_t *vs, vdev_stat_t *cvs) { - int t; - for (t = 0; t < ZIO_TYPES; t++) { + for (int t = 0; t < VS_ZIO_TYPES; t++) { vs->vs_ops[t] += cvs->vs_ops[t]; vs->vs_bytes[t] += cvs->vs_bytes[t]; } @@ -3873,7 +3898,7 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx) vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; /* - * Report intializing progress. Since we don't + * Report initializing progress. Since we don't * have the initializing locks held, this is only * an estimate (although a fairly accurate one). */ @@ -3884,9 +3909,20 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx) vs->vs_initialize_state = vd->vdev_initialize_state; vs->vs_initialize_action_time = vd->vdev_initialize_action_time; + + /* + * Report manual TRIM progress. Since we don't have + * the manual TRIM locks held, this is only an + * estimate (although fairly accurate one). + */ + vs->vs_trim_notsup = !vd->vdev_has_trim; + vs->vs_trim_bytes_done = vd->vdev_trim_bytes_done; + vs->vs_trim_bytes_est = vd->vdev_trim_bytes_est; + vs->vs_trim_state = vd->vdev_trim_state; + vs->vs_trim_action_time = vd->vdev_trim_action_time; } /* - * Report expandable space on top-level, non-auxillary devices + * Report expandable space on top-level, non-auxiliary devices * only. The expandable space is reported in terms of metaslab * sized units since that determines how much space the pool * can expand. @@ -4004,9 +4040,18 @@ vdev_stat_update(zio_t *zio, uint64_t psize) */ if (vd->vdev_ops->vdev_op_leaf && (zio->io_priority < ZIO_PRIORITY_NUM_QUEUEABLE)) { + zio_type_t vs_type = type; + + /* + * TRIM ops and bytes are reported to user space as + * ZIO_TYPE_IOCTL. This is done to preserve the + * vdev_stat_t structure layout for user space. + */ + if (type == ZIO_TYPE_TRIM) + vs_type = ZIO_TYPE_IOCTL; - vs->vs_ops[type]++; - vs->vs_bytes[type] += psize; + vs->vs_ops[vs_type]++; + vs->vs_bytes[vs_type] += psize; if (flags & ZIO_FLAG_DELEGATED) { vsx->vsx_agg_histo[zio->io_priority] @@ -4104,7 +4149,8 @@ vdev_deflated_space(vdev_t *vd, int64_t space) } /* - * Update the in-core space usage stats for this vdev and the root vdev. + * Update the in-core space usage stats for this vdev, its metaslab class, + * and the root vdev. */ void vdev_space_update(vdev_t *vd, int64_t alloc_delta, int64_t defer_delta, @@ -4650,12 +4696,56 @@ vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd) spa->spa_resilver_deferred = B_TRUE; } +/* + * Translate a logical range to the physical range for the specified vdev_t. + * This function is initially called with a leaf vdev and will walk each + * parent vdev until it reaches a top-level vdev. Once the top-level is + * reached the physical range is initialized and the recursive function + * begins to unwind. As it unwinds it calls the parent's vdev specific + * translation function to do the real conversion. + */ +void +vdev_xlate(vdev_t *vd, const range_seg_t *logical_rs, range_seg_t *physical_rs) +{ + /* + * Walk up the vdev tree + */ + if (vd != vd->vdev_top) { + vdev_xlate(vd->vdev_parent, logical_rs, physical_rs); + } else { + /* + * We've reached the top-level vdev, initialize the + * physical range to the logical range and start to + * unwind. + */ + physical_rs->rs_start = logical_rs->rs_start; + physical_rs->rs_end = logical_rs->rs_end; + return; + } + + vdev_t *pvd = vd->vdev_parent; + ASSERT3P(pvd, !=, NULL); + ASSERT3P(pvd->vdev_ops->vdev_op_xlate, !=, NULL); + + /* + * As this recursive function unwinds, translate the logical + * range into its physical components by calling the + * vdev specific translate function. + */ + range_seg_t intermediate = { { { 0, 0 } } }; + pvd->vdev_ops->vdev_op_xlate(vd, physical_rs, &intermediate); + + physical_rs->rs_start = intermediate.rs_start; + physical_rs->rs_end = intermediate.rs_end; +} + #if defined(_KERNEL) EXPORT_SYMBOL(vdev_fault); EXPORT_SYMBOL(vdev_degrade); EXPORT_SYMBOL(vdev_online); EXPORT_SYMBOL(vdev_offline); EXPORT_SYMBOL(vdev_clear); + /* BEGIN CSTYLED */ module_param(zfs_vdev_default_ms_count, int, 0644); MODULE_PARM_DESC(zfs_vdev_default_ms_count, |