diff options
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/dsl_scan.c | 21 | ||||
-rw-r--r-- | module/zfs/spa.c | 11 | ||||
-rw-r--r-- | module/zfs/spa_config.c | 9 | ||||
-rw-r--r-- | module/zfs/vdev.c | 100 | ||||
-rw-r--r-- | module/zfs/vdev_label.c | 105 | ||||
-rw-r--r-- | module/zfs/zfs_debug.c | 15 |
6 files changed, 176 insertions, 85 deletions
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 6cc08ab47..eef509bda 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -190,6 +190,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) scn->scn_phys.scn_errors = 0; scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc; scn->scn_restart_txg = 0; + scn->scn_done_txg = 0; spa_scan_stat_init(spa); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { @@ -776,7 +777,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, * Don't scan it now unless we need to because something * under it was modified. */ - if (bp->blk_birth <= scn->scn_phys.scn_cur_max_txg) { + if (BP_PHYSICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_max_txg) { scan_funcs[scn->scn_phys.scn_func](dp, bp, zb); } if (buf) @@ -1227,7 +1228,7 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { if (ddp->ddp_phys_birth == 0 || - ddp->ddp_phys_birth > scn->scn_phys.scn_cur_max_txg) + ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg) continue; ddt_bp_create(checksum, ddk, ddp, &bp); @@ -1475,6 +1476,16 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (scn->scn_phys.scn_state != DSS_SCANNING) return; + if (scn->scn_done_txg == tx->tx_txg) { + ASSERT(!scn->scn_pausing); + /* finished with scan. */ + zfs_dbgmsg("txg %llu scan complete", tx->tx_txg); + dsl_scan_done(scn, B_TRUE, tx); + ASSERT3U(spa->spa_scrub_inflight, ==, 0); + dsl_scan_sync_state(scn, tx); + return; + } + if (scn->scn_phys.scn_ddt_bookmark.ddb_class <= scn->scn_phys.scn_ddt_class_max) { zfs_dbgmsg("doing scan sync txg %llu; " @@ -1510,9 +1521,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time)); if (!scn->scn_pausing) { - /* finished with scan. */ - zfs_dbgmsg("finished scan txg %llu", (longlong_t)tx->tx_txg); - dsl_scan_done(scn, B_TRUE, tx); + scn->scn_done_txg = tx->tx_txg + 1; + zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu", + tx->tx_txg, scn->scn_done_txg); } if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 6b9b079df..c7ea59536 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -4509,7 +4509,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) } /* mark the device being resilvered */ - newvd->vdev_resilvering = B_TRUE; + newvd->vdev_resilver_txg = txg; /* * If the parent is not a mirror, or if we're replacing, insert the new @@ -5370,13 +5370,6 @@ spa_vdev_resilver_done_hunt(vdev_t *vd) return (oldvd); } - if (vd->vdev_resilvering && vdev_dtl_empty(vd, DTL_MISSING) && - vdev_dtl_empty(vd, DTL_OUTAGE)) { - ASSERT(vd->vdev_ops->vdev_op_leaf); - vd->vdev_resilvering = B_FALSE; - vdev_config_dirty(vd->vdev_top); - } - /* * Check for a completed replacement. We always consider the first * vdev in the list to be the oldest vdev, and the last one to be @@ -5466,6 +5459,8 @@ spa_vdev_resilver_done(spa_t *spa) ASSERT(pvd->vdev_ops == &vdev_replacing_ops); sguid = ppvd->vdev_child[1]->vdev_guid; } + ASSERT(vd->vdev_resilver_txg == 0 || !vdev_dtl_required(vd)); + spa_config_exit(spa, SCL_ALL, FTAG); if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) return; diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 858d9d16e..9efa05361 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include <sys/spa.h> @@ -196,7 +196,12 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) /* * Synchronize pool configuration to disk. This must be called with the - * namespace lock held. + * namespace lock held. Synchronizing the pool cache is typically done after + * the configuration has been synced to the MOS. This exposes a window where + * the MOS config will have been updated but the cache file has not. If + * the system were to crash at that instant then the cached config may not + * contain the correct information to open the pool and an explicity import + * would be required. */ void spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index ddfca71df..ff239d72c 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -526,8 +526,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &vd->vdev_offline); - (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVERING, - &vd->vdev_resilvering); + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG, + &vd->vdev_resilver_txg); /* * When importing a pool, we want to ignore the persistent fault @@ -1683,6 +1683,75 @@ vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t t) } /* + * Returns the lowest txg in the DTL range. + */ +static uint64_t +vdev_dtl_min(vdev_t *vd) +{ + space_seg_t *ss; + + ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock)); + ASSERT3U(vd->vdev_dtl[DTL_MISSING].sm_space, !=, 0); + ASSERT0(vd->vdev_children); + + ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root); + return (ss->ss_start - 1); +} + +/* + * Returns the highest txg in the DTL. + */ +static uint64_t +vdev_dtl_max(vdev_t *vd) +{ + space_seg_t *ss; + + ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock)); + ASSERT3U(vd->vdev_dtl[DTL_MISSING].sm_space, !=, 0); + ASSERT0(vd->vdev_children); + + ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root); + return (ss->ss_end); +} + +/* + * Determine if a resilvering vdev should remove any DTL entries from + * its range. If the vdev was resilvering for the entire duration of the + * scan then it should excise that range from its DTLs. Otherwise, this + * vdev is considered partially resilvered and should leave its DTL + * entries intact. The comment in vdev_dtl_reassess() describes how we + * excise the DTLs. + */ +static boolean_t +vdev_dtl_should_excise(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; + + ASSERT0(scn->scn_phys.scn_errors); + ASSERT0(vd->vdev_children); + + if (vd->vdev_resilver_txg == 0 || + vd->vdev_dtl[DTL_MISSING].sm_space == 0) + return (B_TRUE); + + /* + * When a resilver is initiated the scan will assign the scn_max_txg + * value to the highest txg value that exists in all DTLs. If this + * device's max DTL is not part of this scan (i.e. it is not in + * the range (scn_min_txg, scn_max_txg] then it is not eligible + * for excision. + */ + if (vdev_dtl_max(vd) <= scn->scn_phys.scn_max_txg) { + ASSERT3U(scn->scn_phys.scn_min_txg, <=, vdev_dtl_min(vd)); + ASSERT3U(scn->scn_phys.scn_min_txg, <, vd->vdev_resilver_txg); + ASSERT3U(vd->vdev_resilver_txg, <=, scn->scn_phys.scn_max_txg); + return (B_TRUE); + } + return (B_FALSE); +} + +/* * Reassess DTLs after a config change or scrub completion. */ void @@ -1705,9 +1774,17 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; mutex_enter(&vd->vdev_dtl_lock); + + /* + * If we've completed a scan cleanly then determine + * if this vdev should remove any DTLs. We only want to + * excise regions on vdevs that were available during + * the entire duration of this scan. + */ if (scrub_txg != 0 && (spa->spa_scrub_started || - (scn && scn->scn_phys.scn_errors == 0))) { + (scn != NULL && scn->scn_phys.scn_errors == 0)) && + vdev_dtl_should_excise(vd)) { /* * We completed a scrub up to scrub_txg. If we * did it without rebooting, then the scrub dtl @@ -1746,6 +1823,16 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) else space_map_walk(&vd->vdev_dtl[DTL_MISSING], space_map_add, &vd->vdev_dtl[DTL_OUTAGE]); + + /* + * If the vdev was resilvering and no longer has any + * DTLs then reset its resilvering flag. + */ + if (vd->vdev_resilver_txg != 0 && + vd->vdev_dtl[DTL_MISSING].sm_space == 0 && + vd->vdev_dtl[DTL_OUTAGE].sm_space == 0) + vd->vdev_resilver_txg = 0; + mutex_exit(&vd->vdev_dtl_lock); if (txg != 0) @@ -1922,12 +2009,9 @@ vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp) mutex_enter(&vd->vdev_dtl_lock); if (vd->vdev_dtl[DTL_MISSING].sm_space != 0 && vdev_writeable(vd)) { - space_seg_t *ss; - ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root); - thismin = ss->ss_start - 1; - ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root); - thismax = ss->ss_end; + thismin = vdev_dtl_min(vd); + thismax = vdev_dtl_max(vd); needed = B_TRUE; } mutex_exit(&vd->vdev_dtl_lock); diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 0405608c2..146dc28e8 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -218,28 +218,23 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); - VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, - vd->vdev_ops->vdev_op_type) == 0); + fnvlist_add_string(nv, ZPOOL_CONFIG_TYPE, vd->vdev_ops->vdev_op_type); if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE))) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id) - == 0); - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid); if (vd->vdev_path != NULL) - VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, - vd->vdev_path) == 0); + fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path); if (vd->vdev_devid != NULL) - VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, - vd->vdev_devid) == 0); + fnvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vd->vdev_devid); if (vd->vdev_physpath != NULL) - VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, - vd->vdev_physpath) == 0); + fnvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, + vd->vdev_physpath); if (vd->vdev_fru != NULL) - VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_FRU, - vd->vdev_fru) == 0); + fnvlist_add_string(nv, ZPOOL_CONFIG_FRU, vd->vdev_fru); if (vd->vdev_nparity != 0) { ASSERT(strcmp(vd->vdev_ops->vdev_op_type, @@ -260,59 +255,54 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, * that only support a single parity device -- older software * will just ignore it. */ - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, - vd->vdev_nparity) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vd->vdev_nparity); } if (vd->vdev_wholedisk != -1ULL) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, - vd->vdev_wholedisk) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + vd->vdev_wholedisk); if (vd->vdev_not_present) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1); if (vd->vdev_isspare) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1); if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) && vd == vd->vdev_top) { - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, - vd->vdev_ms_array) == 0); - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, - vd->vdev_ms_shift) == 0); - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, - vd->vdev_ashift) == 0); - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE, - vd->vdev_asize) == 0); - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, - vd->vdev_islog) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, + vd->vdev_ms_array); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, + vd->vdev_ms_shift); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE, + vd->vdev_asize); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, vd->vdev_islog); if (vd->vdev_removing) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING, - vd->vdev_removing) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING, + vd->vdev_removing); } if (vd->vdev_dtl_smo.smo_object != 0) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DTL, - vd->vdev_dtl_smo.smo_object) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_DTL, + vd->vdev_dtl_smo.smo_object); if (vd->vdev_crtxg) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, - vd->vdev_crtxg) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, vd->vdev_crtxg); if (getstats) { vdev_stat_t vs; pool_scan_stat_t ps; vdev_get_stats(vd, &vs); - VERIFY(nvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t)) == 0); + fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t)); /* provide either current or previous scan information */ if (spa_scan_get_stats(spa, &ps) == 0) { - VERIFY(nvlist_add_uint64_array(nv, + fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_SCAN_STATS, (uint64_t *)&ps, - sizeof (pool_scan_stat_t) / sizeof (uint64_t)) - == 0); + sizeof (pool_scan_stat_t) / sizeof (uint64_t)); } } @@ -342,8 +332,8 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, } if (idx) { - VERIFY(nvlist_add_nvlist_array(nv, - ZPOOL_CONFIG_CHILDREN, child, idx) == 0); + fnvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + child, idx); } for (c = 0; c < idx; c++) @@ -355,26 +345,20 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, const char *aux = NULL; if (vd->vdev_offline && !vd->vdev_tmpoffline) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE, - B_TRUE) == 0); - if (vd->vdev_resilvering) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_RESILVERING, - B_TRUE) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE, B_TRUE); + if (vd->vdev_resilver_txg != 0) + fnvlist_add_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG, + vd->vdev_resilver_txg); if (vd->vdev_faulted) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_FAULTED, - B_TRUE) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_FAULTED, B_TRUE); if (vd->vdev_degraded) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DEGRADED, - B_TRUE) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_DEGRADED, B_TRUE); if (vd->vdev_removed) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVED, - B_TRUE) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVED, B_TRUE); if (vd->vdev_unspare) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_UNSPARE, - B_TRUE) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_UNSPARE, B_TRUE); if (vd->vdev_ishole) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, - B_TRUE) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, B_TRUE); switch (vd->vdev_stat.vs_aux) { case VDEV_AUX_ERR_EXCEEDED: @@ -387,12 +371,11 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, } if (aux != NULL) - VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, - aux) == 0); + fnvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, aux); if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) { - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID, - vd->vdev_orig_guid) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID, + vd->vdev_orig_guid); } } diff --git a/module/zfs/zfs_debug.c b/module/zfs/zfs_debug.c index 29423303f..cd83e2392 100644 --- a/module/zfs/zfs_debug.c +++ b/module/zfs/zfs_debug.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -141,6 +141,19 @@ zfs_dbgmsg(const char *fmt, ...) } mutex_exit(&zfs_dbgmsgs_lock); } + +void +zfs_dbgmsg_print(const char *tag) +{ + zfs_dbgmsg_t *zdm; + + (void) printf("ZFS_DBGMSG(%s):\n", tag); + mutex_enter(&zfs_dbgmsgs_lock); + for (zdm = list_head(&zfs_dbgmsgs); zdm; + zdm = list_next(&zfs_dbgmsgs, zdm)) + (void) printf("%s\n", zdm->zdm_msg); + mutex_exit(&zfs_dbgmsgs_lock); +} #endif #if defined(_KERNEL) |