aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Motin <[email protected]>2023-06-30 11:47:13 -0400
committerGitHub <[email protected]>2023-06-30 08:47:13 -0700
commitfa7b2390d4982412f9dd27c151bb5ec2da89dcca (patch)
tree6b87a6f3968470b1bbd4e33fffe6836a389a3beb
parent6052060c133d0caed0e1bc3ec2c057f8c33e5f7a (diff)
Do not report bytes skipped by scan as issued.
Scan process may skip blocks based on their birth time, DVA, etc. Traditionally those blocks were accounted as issued, that caused reporting of hugely over-inflated numbers, having nothing to do with actual disk I/O. This change utilizes never used field in struct dsl_scan_phys to account such skipped bytes, allowing to report how much data were actually scrubbed/resilvered and what is the actual I/O speed. While formally it is an on-disk format change, it should be compatible both ways, so should not need a feature flag. This should partially address the same issue as c85ac731a0e, but from a different perspective, complementing it. Reviewed-by: Tony Hutter <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Akash B <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc. Closes #15007
-rw-r--r--cmd/zpool/zpool_main.c94
-rw-r--r--cmd/zpool_influxdb/zpool_influxdb.c2
-rw-r--r--include/sys/dsl_scan.h2
-rw-r--r--include/sys/fs/zfs.h3
-rw-r--r--include/sys/vdev_rebuild.h1
-rw-r--r--man/man8/zpool-scrub.84
-rw-r--r--module/zfs/dsl_scan.c22
-rw-r--r--module/zfs/spa_misc.c2
-rw-r--r--module/zfs/vdev_rebuild.c6
9 files changed, 84 insertions, 52 deletions
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index 013dd4a23..10a3b5b14 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -7662,11 +7662,11 @@ static void
print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
{
time_t start, end, pause;
- uint64_t pass_scanned, scanned, pass_issued, issued, total;
+ uint64_t pass_scanned, scanned, pass_issued, issued, total_s, total_i;
uint64_t elapsed, scan_rate, issue_rate;
double fraction_done;
- char processed_buf[7], scanned_buf[7], issued_buf[7], total_buf[7];
- char srate_buf[7], irate_buf[7], time_buf[32];
+ char processed_buf[7], scanned_buf[7], issued_buf[7], total_s_buf[7];
+ char total_i_buf[7], srate_buf[7], irate_buf[7], time_buf[32];
printf(" ");
printf_color(ANSI_BOLD, gettext("scan:"));
@@ -7738,10 +7738,11 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
pass_scanned = ps->pss_pass_exam;
issued = ps->pss_issued;
pass_issued = ps->pss_pass_issued;
- total = ps->pss_to_examine;
+ total_s = ps->pss_to_examine;
+ total_i = ps->pss_to_examine - ps->pss_skipped;
/* we are only done with a block once we have issued the IO for it */
- fraction_done = (double)issued / total;
+ fraction_done = (double)issued / total_i;
/* elapsed time for this pass, rounding up to 1 if it's 0 */
elapsed = time(NULL) - ps->pss_pass_start;
@@ -7750,26 +7751,25 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
scan_rate = pass_scanned / elapsed;
issue_rate = pass_issued / elapsed;
- uint64_t total_secs_left = (issue_rate != 0 && total >= issued) ?
- ((total - issued) / issue_rate) : UINT64_MAX;
- secs_to_dhms(total_secs_left, time_buf);
/* format all of the numbers we will be reporting */
zfs_nicebytes(scanned, scanned_buf, sizeof (scanned_buf));
zfs_nicebytes(issued, issued_buf, sizeof (issued_buf));
- zfs_nicebytes(total, total_buf, sizeof (total_buf));
- zfs_nicebytes(scan_rate, srate_buf, sizeof (srate_buf));
- zfs_nicebytes(issue_rate, irate_buf, sizeof (irate_buf));
+ zfs_nicebytes(total_s, total_s_buf, sizeof (total_s_buf));
+ zfs_nicebytes(total_i, total_i_buf, sizeof (total_i_buf));
/* do not print estimated time if we have a paused scrub */
- if (pause == 0) {
- (void) printf(gettext("\t%s scanned at %s/s, "
- "%s issued at %s/s, %s total\n"),
- scanned_buf, srate_buf, issued_buf, irate_buf, total_buf);
- } else {
- (void) printf(gettext("\t%s scanned, %s issued, %s total\n"),
- scanned_buf, issued_buf, total_buf);
+ (void) printf(gettext("\t%s / %s scanned"), scanned_buf, total_s_buf);
+ if (pause == 0 && scan_rate > 0) {
+ zfs_nicebytes(scan_rate, srate_buf, sizeof (srate_buf));
+ (void) printf(gettext(" at %s/s"), srate_buf);
}
+ (void) printf(gettext(", %s / %s issued"), issued_buf, total_i_buf);
+ if (pause == 0 && issue_rate > 0) {
+ zfs_nicebytes(issue_rate, irate_buf, sizeof (irate_buf));
+ (void) printf(gettext(" at %s/s"), irate_buf);
+ }
+ (void) printf(gettext("\n"));
if (is_resilver) {
(void) printf(gettext("\t%s resilvered, %.2f%% done"),
@@ -7782,16 +7782,16 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
if (pause == 0) {
/*
* Only provide an estimate iff:
- * 1) the time remaining is valid, and
+ * 1) we haven't yet issued all we expected, and
* 2) the issue rate exceeds 10 MB/s, and
* 3) it's either:
* a) a resilver which has started repairs, or
* b) a scrub which has entered the issue phase.
*/
- if (total_secs_left != UINT64_MAX &&
- issue_rate >= 10 * 1024 * 1024 &&
+ if (total_i >= issued && issue_rate >= 10 * 1024 * 1024 &&
((is_resilver && ps->pss_processed > 0) ||
(is_scrub && issued > 0))) {
+ secs_to_dhms((total_i - issued) / issue_rate, time_buf);
(void) printf(gettext(", %s to go\n"), time_buf);
} else {
(void) printf(gettext(", no estimated "
@@ -7803,7 +7803,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
}
static void
-print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, char *vdev_name)
+print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, uint_t c, char *vdev_name)
{
if (vrs == NULL || vrs->vrs_state == VDEV_REBUILD_NONE)
return;
@@ -7815,17 +7815,20 @@ print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, char *vdev_name)
uint64_t bytes_scanned = vrs->vrs_bytes_scanned;
uint64_t bytes_issued = vrs->vrs_bytes_issued;
uint64_t bytes_rebuilt = vrs->vrs_bytes_rebuilt;
- uint64_t bytes_est = vrs->vrs_bytes_est;
+ uint64_t bytes_est_s = vrs->vrs_bytes_est;
+ uint64_t bytes_est_i = vrs->vrs_bytes_est;
+ if (c > offsetof(vdev_rebuild_stat_t, vrs_pass_bytes_skipped) / 8)
+ bytes_est_i -= vrs->vrs_pass_bytes_skipped;
uint64_t scan_rate = (vrs->vrs_pass_bytes_scanned /
(vrs->vrs_pass_time_ms + 1)) * 1000;
uint64_t issue_rate = (vrs->vrs_pass_bytes_issued /
(vrs->vrs_pass_time_ms + 1)) * 1000;
double scan_pct = MIN((double)bytes_scanned * 100 /
- (bytes_est + 1), 100);
+ (bytes_est_s + 1), 100);
/* Format all of the numbers we will be reporting */
char bytes_scanned_buf[7], bytes_issued_buf[7];
- char bytes_rebuilt_buf[7], bytes_est_buf[7];
+ char bytes_rebuilt_buf[7], bytes_est_s_buf[7], bytes_est_i_buf[7];
char scan_rate_buf[7], issue_rate_buf[7], time_buf[32];
zfs_nicebytes(bytes_scanned, bytes_scanned_buf,
sizeof (bytes_scanned_buf));
@@ -7833,9 +7836,8 @@ print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, char *vdev_name)
sizeof (bytes_issued_buf));
zfs_nicebytes(bytes_rebuilt, bytes_rebuilt_buf,
sizeof (bytes_rebuilt_buf));
- zfs_nicebytes(bytes_est, bytes_est_buf, sizeof (bytes_est_buf));
- zfs_nicebytes(scan_rate, scan_rate_buf, sizeof (scan_rate_buf));
- zfs_nicebytes(issue_rate, issue_rate_buf, sizeof (issue_rate_buf));
+ zfs_nicebytes(bytes_est_s, bytes_est_s_buf, sizeof (bytes_est_s_buf));
+ zfs_nicebytes(bytes_est_i, bytes_est_i_buf, sizeof (bytes_est_i_buf));
time_t start = vrs->vrs_start_time;
time_t end = vrs->vrs_end_time;
@@ -7858,17 +7860,29 @@ print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, char *vdev_name)
assert(vrs->vrs_state == VDEV_REBUILD_ACTIVE);
- secs_to_dhms(MAX((int64_t)bytes_est - (int64_t)bytes_scanned, 0) /
- MAX(scan_rate, 1), time_buf);
+ (void) printf(gettext("\t%s / %s scanned"), bytes_scanned_buf,
+ bytes_est_s_buf);
+ if (scan_rate > 0) {
+ zfs_nicebytes(scan_rate, scan_rate_buf, sizeof (scan_rate_buf));
+ (void) printf(gettext(" at %s/s"), scan_rate_buf);
+ }
+ (void) printf(gettext(", %s / %s issued"), bytes_issued_buf,
+ bytes_est_i_buf);
+ if (issue_rate > 0) {
+ zfs_nicebytes(issue_rate, issue_rate_buf,
+ sizeof (issue_rate_buf));
+ (void) printf(gettext(" at %s/s"), issue_rate_buf);
+ }
+ (void) printf(gettext("\n"));
- (void) printf(gettext("\t%s scanned at %s/s, %s issued %s/s, "
- "%s total\n"), bytes_scanned_buf, scan_rate_buf,
- bytes_issued_buf, issue_rate_buf, bytes_est_buf);
(void) printf(gettext("\t%s resilvered, %.2f%% done"),
bytes_rebuilt_buf, scan_pct);
if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) {
- if (scan_rate >= 10 * 1024 * 1024) {
+ if (bytes_est_s >= bytes_scanned &&
+ scan_rate >= 10 * 1024 * 1024) {
+ secs_to_dhms((bytes_est_s - bytes_scanned) / scan_rate,
+ time_buf);
(void) printf(gettext(", %s to go\n"), time_buf);
} else {
(void) printf(gettext(", no estimated "
@@ -7900,7 +7914,7 @@ print_rebuild_status(zpool_handle_t *zhp, nvlist_t *nvroot)
ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i) == 0) {
char *name = zpool_vdev_name(g_zfs, zhp,
child[c], VDEV_NAME_TYPE_ID);
- print_rebuild_status_impl(vrs, name);
+ print_rebuild_status_impl(vrs, i, name);
free(name);
}
}
@@ -8005,13 +8019,15 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
active_resilver = (ps->pss_state == DSS_SCANNING);
}
-
have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
scrub_start = ps->pss_start_time;
- have_errorscrub = (ps->pss_error_scrub_func ==
- POOL_SCAN_ERRORSCRUB);
- errorscrub_start = ps->pss_error_scrub_start;
+ if (c > offsetof(pool_scan_stat_t,
+ pss_pass_error_scrub_pause) / 8) {
+ have_errorscrub = (ps->pss_error_scrub_func ==
+ POOL_SCAN_ERRORSCRUB);
+ errorscrub_start = ps->pss_error_scrub_start;
+ }
}
boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);
diff --git a/cmd/zpool_influxdb/zpool_influxdb.c b/cmd/zpool_influxdb/zpool_influxdb.c
index 80d084858..520e56926 100644
--- a/cmd/zpool_influxdb/zpool_influxdb.c
+++ b/cmd/zpool_influxdb/zpool_influxdb.c
@@ -238,6 +238,7 @@ print_scan_status(nvlist_t *nvroot, const char *pool_name)
print_kv("end_ts", ps->pss_end_time);
print_kv(",errors", ps->pss_errors);
print_kv(",examined", examined);
+ print_kv(",skipped", ps->pss_skipped);
print_kv(",issued", ps->pss_issued);
print_kv(",pass_examined", pass_exam);
print_kv(",pass_issued", ps->pss_pass_issued);
@@ -249,7 +250,6 @@ print_scan_status(nvlist_t *nvroot, const char *pool_name)
print_kv(",remaining_t", remaining_time);
print_kv(",start_ts", ps->pss_start_time);
print_kv(",to_examine", ps->pss_to_examine);
- print_kv(",to_process", ps->pss_to_process);
printf(" %llu\n", (u_longlong_t)timestamp);
return (0);
}
diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h
index 6753b4a8f..2e3452e5e 100644
--- a/include/sys/dsl_scan.h
+++ b/include/sys/dsl_scan.h
@@ -61,7 +61,7 @@ typedef struct dsl_scan_phys {
uint64_t scn_end_time;
uint64_t scn_to_examine; /* total bytes to be scanned */
uint64_t scn_examined; /* bytes scanned so far */
- uint64_t scn_to_process;
+ uint64_t scn_skipped; /* bytes skipped by scanner */
uint64_t scn_processed;
uint64_t scn_errors; /* scan I/O error count */
uint64_t scn_ddt_class_max;
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 93193fa14..bc940e8a7 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -1088,7 +1088,7 @@ typedef struct pool_scan_stat {
uint64_t pss_end_time; /* scan end time */
uint64_t pss_to_examine; /* total bytes to scan */
uint64_t pss_examined; /* total bytes located by scanner */
- uint64_t pss_to_process; /* total bytes to process */
+ uint64_t pss_skipped; /* total bytes skipped by scanner */
uint64_t pss_processed; /* total processed bytes */
uint64_t pss_errors; /* scan errors */
@@ -1152,6 +1152,7 @@ typedef struct vdev_rebuild_stat {
uint64_t vrs_pass_time_ms; /* pass run time (millisecs) */
uint64_t vrs_pass_bytes_scanned; /* bytes scanned since start/resume */
uint64_t vrs_pass_bytes_issued; /* bytes rebuilt since start/resume */
+ uint64_t vrs_pass_bytes_skipped; /* bytes skipped since start/resume */
} vdev_rebuild_stat_t;
/*
diff --git a/include/sys/vdev_rebuild.h b/include/sys/vdev_rebuild.h
index c4cfe0c56..55ec6c570 100644
--- a/include/sys/vdev_rebuild.h
+++ b/include/sys/vdev_rebuild.h
@@ -79,6 +79,7 @@ typedef struct vdev_rebuild {
uint64_t vr_pass_start_time;
uint64_t vr_pass_bytes_scanned;
uint64_t vr_pass_bytes_issued;
+ uint64_t vr_pass_bytes_skipped;
/* On-disk state updated by vdev_rebuild_zap_update_sync() */
vdev_rebuild_phys_t vr_rebuild_phys;
diff --git a/man/man8/zpool-scrub.8 b/man/man8/zpool-scrub.8
index 138226e45..03f3ad499 100644
--- a/man/man8/zpool-scrub.8
+++ b/man/man8/zpool-scrub.8
@@ -26,7 +26,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd July 25, 2021
+.Dd June 22, 2023
.Dt ZPOOL-SCRUB 8
.Os
.
@@ -123,7 +123,7 @@ Status of pool with ongoing scrub:
.No # Nm zpool Cm status
...
scan: scrub in progress since Sun Jul 25 16:07:49 2021
- 403M scanned at 100M/s, 68.4M issued at 10.0M/s, 405M total
+ 403M / 405M scanned at 100M/s, 68.4M / 405M issued at 10.0M/s
0B repaired, 16.91% done, 00:00:04 to go
...
.Ed
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 1dd44171c..50428bff3 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -573,7 +573,8 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
* counter to how far we've scanned. We know we're consistent
* up to here.
*/
- scn->scn_issued_before_pass = scn->scn_phys.scn_examined;
+ scn->scn_issued_before_pass = scn->scn_phys.scn_examined -
+ scn->scn_phys.scn_skipped;
if (dsl_scan_is_running(scn) &&
spa_prev_software_version(dp->dp_spa) < SPA_VERSION_SCAN) {
@@ -4362,7 +4363,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
* Disabled by default, set zfs_scan_report_txgs to report
* average performance over the last zfs_scan_report_txgs TXGs.
*/
- if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 &&
+ if (zfs_scan_report_txgs != 0 &&
tx->tx_txg % zfs_scan_report_txgs == 0) {
scn->scn_issued_before_pass += spa->spa_scan_pass_issued;
spa_scan_stat_init(spa);
@@ -4565,6 +4566,15 @@ count_block_issued(spa_t *spa, const blkptr_t *bp, boolean_t all)
}
static void
+count_block_skipped(dsl_scan_t *scn, const blkptr_t *bp, boolean_t all)
+{
+ if (BP_IS_EMBEDDED(bp))
+ return;
+ atomic_add_64(&scn->scn_phys.scn_skipped,
+ all ? BP_GET_ASIZE(bp) : DVA_GET_ASIZE(&bp->blk_dva[0]));
+}
+
+static void
count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
{
/*
@@ -4709,7 +4719,7 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
count_block(dp->dp_blkstats, bp);
if (phys_birth <= scn->scn_phys.scn_min_txg ||
phys_birth >= scn->scn_phys.scn_max_txg) {
- count_block_issued(spa, bp, B_TRUE);
+ count_block_skipped(scn, bp, B_TRUE);
return (0);
}
@@ -4750,7 +4760,7 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
if (needs_io && !zfs_no_scrub_io) {
dsl_scan_enqueue(dp, bp, zio_flags, zb);
} else {
- count_block_issued(spa, bp, B_TRUE);
+ count_block_skipped(scn, bp, B_TRUE);
}
/* do not relocate this block */
@@ -5119,9 +5129,9 @@ dsl_scan_freed_dva(spa_t *spa, const blkptr_t *bp, int dva_i)
ASSERT(range_tree_contains(queue->q_exts_by_addr, start, size));
range_tree_remove_fill(queue->q_exts_by_addr, start, size);
- /* count the block as though we issued it */
+ /* count the block as though we skipped it */
sio2bp(sio, &tmpbp);
- count_block_issued(spa, &tmpbp, B_FALSE);
+ count_block_skipped(scn, &tmpbp, B_FALSE);
sio_free(sio);
}
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index 8dc83445e..06f640769 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -2611,7 +2611,7 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
ps->pss_end_time = scn->scn_phys.scn_end_time;
ps->pss_to_examine = scn->scn_phys.scn_to_examine;
ps->pss_examined = scn->scn_phys.scn_examined;
- ps->pss_to_process = scn->scn_phys.scn_to_process;
+ ps->pss_skipped = scn->scn_phys.scn_skipped;
ps->pss_processed = scn->scn_phys.scn_processed;
ps->pss_errors = scn->scn_phys.scn_errors;
diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c
index 62aa61b3b..75c3900cb 100644
--- a/module/zfs/vdev_rebuild.c
+++ b/module/zfs/vdev_rebuild.c
@@ -571,8 +571,10 @@ vdev_rebuild_range(vdev_rebuild_t *vr, uint64_t start, uint64_t size)
vdev_rebuild_blkptr_init(&blk, vd, start, size);
uint64_t psize = BP_GET_PSIZE(&blk);
- if (!vdev_dtl_need_resilver(vd, &blk.blk_dva[0], psize, TXG_UNKNOWN))
+ if (!vdev_dtl_need_resilver(vd, &blk.blk_dva[0], psize, TXG_UNKNOWN)) {
+ vr->vr_pass_bytes_skipped += size;
return (0);
+ }
mutex_enter(&vr->vr_io_lock);
@@ -786,6 +788,7 @@ vdev_rebuild_thread(void *arg)
vr->vr_pass_start_time = gethrtime();
vr->vr_pass_bytes_scanned = 0;
vr->vr_pass_bytes_issued = 0;
+ vr->vr_pass_bytes_skipped = 0;
uint64_t update_est_time = gethrtime();
vdev_rebuild_update_bytes_est(vd, 0);
@@ -1153,6 +1156,7 @@ vdev_rebuild_get_stats(vdev_t *tvd, vdev_rebuild_stat_t *vrs)
vr->vr_pass_start_time);
vrs->vrs_pass_bytes_scanned = vr->vr_pass_bytes_scanned;
vrs->vrs_pass_bytes_issued = vr->vr_pass_bytes_issued;
+ vrs->vrs_pass_bytes_skipped = vr->vr_pass_bytes_skipped;
mutex_exit(&tvd->vdev_rebuild_lock);
}