diff options
author | George Wilson <[email protected]> | 2014-04-18 08:35:03 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2014-07-30 10:30:05 -0700 |
commit | 672692c7b77cfcf1d7abbdec2388a08741a32c57 (patch) | |
tree | cd6f5dc103bf0e2805df58e22a6a8e34edd5ff18 /module | |
parent | 9bd274ddd846cd4024ebe3253c7b2d4f3b6f9dc0 (diff) |
Illumos 4754, 4755
4754 io issued to near-full luns even after setting noalloc threshold
4755 mg_alloc_failures is no longer needed
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: Adam Leventhal <[email protected]>
Reviewed by: Dan McDonald <[email protected]>
Approved by: Dan McDonald <[email protected]>
References:
https://www.illumos.org/issues/4754
https://www.illumos.org/issues/4755
https://github.com/illumos/illumos-gate/commit/b6240e8
Ported by: Tim Chase <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #2533
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/metaslab.c | 47 | ||||
-rw-r--r-- | module/zfs/zio.c | 9 |
2 files changed, 6 insertions, 50 deletions
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 06d7f7cd2..2dfdafb3a 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -40,7 +40,7 @@ * avoid having to load lots of space_maps in a given txg. There are, * however, some cases where we want to avoid "fast" ganging and instead * we want to do an exhaustive search of all metaslabs on this device. - * Currently we don't allow any gang, zil, or dump device related allocations + * Currently we don't allow any gang, slog, or dump device related allocations * to "fast" gang. */ #define CAN_FASTGANG(flags) \ @@ -64,14 +64,6 @@ uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */ int zfs_condense_pct = 200; /* - * This value defines the number of allowed allocation failures per vdev. - * If a device reaches this threshold in a given txg then we consider skipping - * allocations on that device. The value of zfs_mg_alloc_failures is computed - * in zio_init() unless it has been overridden in /etc/system. - */ -int zfs_mg_alloc_failures = 0; - -/* * The zfs_mg_noalloc_threshold defines which metaslab groups should * be eligible for allocation. The value is defined as a percentage of * a free space. Metaslab groups that have more free space than @@ -1660,10 +1652,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) void metaslab_sync_reassess(metaslab_group_t *mg) { - int64_t failures = mg->mg_alloc_failures; - metaslab_group_alloc_update(mg); - atomic_add_64(&mg->mg_alloc_failures, -failures); /* * Preload the next potential metaslabs @@ -1690,7 +1679,7 @@ metaslab_distance(metaslab_t *msp, dva_t *dva) static uint64_t metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, - uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags) + uint64_t txg, uint64_t min_distance, dva_t *dva, int d) { spa_t *spa = mg->mg_vd->vdev_spa; metaslab_t *msp = NULL; @@ -1717,10 +1706,9 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, spa_dbgmsg(spa, "%s: failed to meet weight " "requirement: vdev %llu, txg %llu, mg %p, " "msp %p, psize %llu, asize %llu, " - "failures %llu, weight %llu", - spa_name(spa), mg->mg_vd->vdev_id, txg, - mg, msp, psize, asize, - mg->mg_alloc_failures, msp->ms_weight); + "weight %llu", spa_name(spa), + mg->mg_vd->vdev_id, txg, + mg, msp, psize, asize, msp->ms_weight); mutex_exit(&mg->mg_lock); return (-1ULL); } @@ -1753,27 +1741,6 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, mutex_enter(&msp->ms_lock); /* - * If we've already reached the allowable number of failed - * allocation attempts on this metaslab group then we - * consider skipping it. We skip it only if we're allowed - * to "fast" gang, the physical size is larger than - * a gang block, and we're attempting to allocate from - * the primary metaslab. - */ - if (mg->mg_alloc_failures > zfs_mg_alloc_failures && - CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE && - activation_weight == METASLAB_WEIGHT_PRIMARY) { - spa_dbgmsg(spa, "%s: skipping metaslab group: " - "vdev %llu, txg %llu, mg %p, msp[%llu] %p, " - "psize %llu, asize %llu, failures %llu", - spa_name(spa), mg->mg_vd->vdev_id, txg, mg, - msp->ms_id, msp, psize, asize, - mg->mg_alloc_failures); - mutex_exit(&msp->ms_lock); - return (-1ULL); - } - - /* * Ensure that the metaslab we have selected is still * capable of handling our request. It's possible that * another thread may have changed the weight while we @@ -1812,8 +1779,6 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, if ((offset = metaslab_block_alloc(msp, asize)) != -1ULL) break; - atomic_inc_64(&mg->mg_alloc_failures); - metaslab_passivate(msp, metaslab_block_maxsize(msp)); mutex_exit(&msp->ms_lock); } @@ -1980,7 +1945,7 @@ top: ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0); offset = metaslab_group_alloc(mg, psize, asize, txg, distance, - dva, d, flags); + dva, d); if (offset != -1ULL) { /* * If we've just selected this metaslab group, diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 7e440a381..6352ab3a3 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -60,8 +60,6 @@ kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; int zio_bulk_flags = 0; int zio_delay_max = ZIO_DELAY_MAX; -extern int zfs_mg_alloc_failures; - /* * The following actions directly effect the spa's sync-to-convergence logic. * The values below define the sync pass when we start performing the action. @@ -193,13 +191,6 @@ zio_init(void) zio_data_buf_cache[c - 1] = zio_data_buf_cache[c]; } - /* - * The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs - * to fail 3 times per txg or 8 failures, whichever is greater. - */ - if (zfs_mg_alloc_failures == 0) - zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8); - zio_inject_init(); lz4_init(); |