diff options
author | George Melikov <mail@gmelikov.ru> | 2017-01-23 20:36:24 +0300 |
---|---|---|
committer | Brian Behlendorf <behlendorf1@llnl.gov> | 2017-01-23 09:36:24 -0800 |
commit | f85c06bedfd2a60f5b5d6a7492ed847c2bffd9fe (patch) | |
tree | f9088bd3cfb4c9cacdbeae63012ad7d346f4bce8 /module/zfs | |
parent | 4ea3f86426f76e59244ec6f66504da688d90193c (diff) |
OpenZFS 7054 - dmu_tx_hold_t should use refcount_t to track space
Authored by: Igor Kozhukhov ikozhukhov@gmail.com
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Igor Kozhukhov <ikozhukhov@gmail.com>
Approved by: Dan McDonald <danmcd@omniti.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: George Melikov mail@gmelikov.ru
OpenZFS-issue: https://www.illumos.org/issues/7054
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/0c779ad
Closes #5600
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/dmu_tx.c | 238 | ||||
-rw-r--r-- | module/zfs/zap.c | 26 | ||||
-rw-r--r-- | module/zfs/zap_micro.c | 26 |
3 files changed, 184 insertions, 106 deletions
diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 8462432d1..7c1801be4 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -137,6 +137,12 @@ dmu_tx_hold_dnode_impl(dmu_tx_t *tx, dnode_t *dn, enum dmu_tx_hold_type type, txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP); txh->txh_tx = tx; txh->txh_dnode = dn; + refcount_create(&txh->txh_space_towrite); + refcount_create(&txh->txh_space_tofree); + refcount_create(&txh->txh_space_tooverwrite); + refcount_create(&txh->txh_space_tounref); + refcount_create(&txh->txh_memory_tohold); + refcount_create(&txh->txh_fudge); #ifdef DEBUG_DMU_TX txh->txh_type = type; txh->txh_arg1 = arg1; @@ -228,12 +234,18 @@ dmu_tx_count_twig(dmu_tx_hold_t *txh, dnode_t *dn, dmu_buf_impl_t *db, freeable = (bp && (freeable || dsl_dataset_block_freeable(ds, bp, bp->blk_birth))); - if (freeable) - txh->txh_space_tooverwrite += space; - else - txh->txh_space_towrite += space; - if (bp) - txh->txh_space_tounref += bp_get_dsize(os->os_spa, bp); + if (freeable) { + (void) refcount_add_many(&txh->txh_space_tooverwrite, + space, FTAG); + } else { + (void) refcount_add_many(&txh->txh_space_towrite, + space, FTAG); + } + + if (bp) { + (void) refcount_add_many(&txh->txh_space_tounref, + bp_get_dsize(os->os_spa, bp), FTAG); + } dmu_tx_count_twig(txh, dn, parent, level + 1, blkid >> epbs, freeable, history); @@ -364,8 +376,11 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) bits = 64 - min_bs; epbs = min_ibs - SPA_BLKPTRSHIFT; for (bits -= epbs * (nlvls - 1); - bits >= 0; bits -= epbs) - txh->txh_fudge += 1ULL << max_ibs; + bits >= 0; bits -= epbs) { + (void) refcount_add_many( + &txh->txh_fudge, + 1ULL << max_ibs, FTAG); + } goto out; } off += delta; @@ -381,7 +396,8 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) */ start = P2ALIGN(off, 1ULL << max_bs); end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1; - txh->txh_space_towrite += end - start + 1; + (void) refcount_add_many(&txh->txh_space_towrite, + end - start + 1, FTAG); start >>= min_bs; end >>= min_bs; @@ -396,18 +412,21 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) start >>= epbs; end >>= epbs; ASSERT3U(end, >=, start); - txh->txh_space_towrite += (end - start + 1) << max_ibs; + (void) refcount_add_many(&txh->txh_space_towrite, + (end - start + 1) << max_ibs, FTAG); if (start != 0) { /* * We also need a new blkid=0 indirect block * to reference any existing file data. */ - txh->txh_space_towrite += 1ULL << max_ibs; + (void) refcount_add_many(&txh->txh_space_towrite, + 1ULL << max_ibs, FTAG); } } out: - if (txh->txh_space_towrite + txh->txh_space_tooverwrite > + if (refcount_count(&txh->txh_space_towrite) + + refcount_count(&txh->txh_space_tooverwrite) > 2 * DMU_MAX_ACCESS) err = SET_ERROR(EFBIG); @@ -426,12 +445,15 @@ dmu_tx_count_dnode(dmu_tx_hold_t *txh) if (dn && dn->dn_dbuf->db_blkptr && dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, dn->dn_dbuf->db_blkptr, dn->dn_dbuf->db_blkptr->blk_birth)) { - txh->txh_space_tooverwrite += space; - txh->txh_space_tounref += space; + (void) refcount_add_many(&txh->txh_space_tooverwrite, + space, FTAG); + (void) refcount_add_many(&txh->txh_space_tounref, space, FTAG); } else { - txh->txh_space_towrite += space; - if (dn && dn->dn_dbuf->db_blkptr) - txh->txh_space_tounref += space; + (void) refcount_add_many(&txh->txh_space_towrite, space, FTAG); + if (dn && dn->dn_dbuf->db_blkptr) { + (void) refcount_add_many(&txh->txh_space_tounref, + space, FTAG); + } } } @@ -570,7 +592,8 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) break; } - txh->txh_memory_tohold += dbuf->db.db_size; + (void) refcount_add_many(&txh->txh_memory_tohold, + dbuf->db.db_size, FTAG); /* * We don't check memory_tohold against DMU_MAX_ACCESS because @@ -623,20 +646,23 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) (dn->dn_indblkshift - SPA_BLKPTRSHIFT); while (level++ < maxlevel) { - txh->txh_memory_tohold += MAX(MIN(blkcnt, nl1blks), 1) - << dn->dn_indblkshift; + (void) refcount_add_many(&txh->txh_memory_tohold, + MAX(MIN(blkcnt, nl1blks), 1) << dn->dn_indblkshift, + FTAG); blkcnt = 1 + (blkcnt >> epbs); } } /* account for new level 1 indirect blocks that might show up */ if (skipped > 0) { - txh->txh_fudge += skipped << dn->dn_indblkshift; + (void) refcount_add_many(&txh->txh_fudge, + skipped << dn->dn_indblkshift, FTAG); skipped = MIN(skipped, DMU_MAX_DELETEBLKCNT >> epbs); - txh->txh_memory_tohold += skipped << dn->dn_indblkshift; + (void) refcount_add_many(&txh->txh_memory_tohold, + skipped << dn->dn_indblkshift, FTAG); } - txh->txh_space_tofree += space; - txh->txh_space_tounref += unref; + (void) refcount_add_many(&txh->txh_space_tofree, space, FTAG); + (void) refcount_add_many(&txh->txh_space_tounref, unref, FTAG); } /* @@ -662,7 +688,10 @@ dmu_tx_mark_netfree(dmu_tx_t *tx) * cause overflows when doing math with these values (e.g. in * dmu_tx_try_assign()). */ - txh->txh_space_tofree = txh->txh_space_tounref = 1024 * 1024 * 1024; + (void) refcount_add_many(&txh->txh_space_tofree, + 1024 * 1024 * 1024, FTAG); + (void) refcount_add_many(&txh->txh_space_tounref, + 1024 * 1024 * 1024, FTAG); } static void @@ -784,9 +813,10 @@ dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, int add, const char *name) { dmu_tx_t *tx = txh->txh_tx; dnode_t *dn; + int err; + int epbs; dsl_dataset_phys_t *ds_phys; - uint64_t nblocks; - int epbs, err; + int lvl; ASSERT(tx->tx_txg == 0); @@ -825,12 +855,17 @@ dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, int add, const char *name) */ bp = &dn->dn_phys->dn_blkptr[0]; if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, - bp, bp->blk_birth)) - txh->txh_space_tooverwrite += MZAP_MAX_BLKSZ; - else - txh->txh_space_towrite += MZAP_MAX_BLKSZ; - if (!BP_IS_HOLE(bp)) - txh->txh_space_tounref += MZAP_MAX_BLKSZ; + bp, bp->blk_birth)) { + (void) refcount_add_many(&txh->txh_space_tooverwrite, + MZAP_MAX_BLKSZ, FTAG); + } else { + (void) refcount_add_many(&txh->txh_space_towrite, + MZAP_MAX_BLKSZ, FTAG); + } + if (!BP_IS_HOLE(bp)) { + (void) refcount_add_many(&txh->txh_space_tounref, + MZAP_MAX_BLKSZ, FTAG); + } return; } @@ -851,15 +886,29 @@ dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, int add, const char *name) /* * If the modified blocks are scattered to the four winds, - * we'll have to modify an indirect twig for each. + * we'll have to modify an indirect twig for each. We can make + * modifications at up to 3 locations: + * - header block at the beginning of the object + * - target leaf block + * - end of the object, where we might need to write: + * - a new leaf block if the target block needs to be split + * - the new pointer table, if it is growing + * - the new cookie table, if it is growing */ epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; - ds_phys = dsl_dataset_phys(dn->dn_objset->os_dsl_dataset); - for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs) - if (ds_phys->ds_prev_snap_obj) - txh->txh_space_towrite += 3 << dn->dn_indblkshift; - else - txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift; + ds_phys = + dsl_dataset_phys(dn->dn_objset->os_dsl_dataset); + for (lvl = 1; lvl < dn->dn_nlevels; lvl++) { + uint64_t num_indirects = 1 + (dn->dn_maxblkid >> (epbs * lvl)); + uint64_t spc = MIN(3, num_indirects) << dn->dn_indblkshift; + if (ds_phys->ds_prev_snap_obj != 0) { + (void) refcount_add_many(&txh->txh_space_towrite, + spc, FTAG); + } else { + (void) refcount_add_many(&txh->txh_space_tooverwrite, + spc, FTAG); + } + } } void @@ -925,7 +974,7 @@ dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space) txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT, THT_SPACE, space, 0); if (txh) - txh->txh_space_towrite += space; + (void) refcount_add_many(&txh->txh_space_towrite, space, FTAG); } int @@ -1267,12 +1316,12 @@ dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how) (void) refcount_add(&dn->dn_tx_holds, tx); mutex_exit(&dn->dn_mtx); } - towrite += txh->txh_space_towrite; - tofree += txh->txh_space_tofree; - tooverwrite += txh->txh_space_tooverwrite; - tounref += txh->txh_space_tounref; - tohold += txh->txh_memory_tohold; - fudge += txh->txh_fudge; + towrite += refcount_count(&txh->txh_space_towrite); + tofree += refcount_count(&txh->txh_space_tofree); + tooverwrite += refcount_count(&txh->txh_space_tooverwrite); + tounref += refcount_count(&txh->txh_space_tounref); + tohold += refcount_count(&txh->txh_memory_tohold); + fudge += refcount_count(&txh->txh_fudge); } /* @@ -1487,6 +1536,43 @@ dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta) #endif } +static void +dmu_tx_destroy(dmu_tx_t *tx) +{ + dmu_tx_hold_t *txh; + + while ((txh = list_head(&tx->tx_holds)) != NULL) { + dnode_t *dn = txh->txh_dnode; + + list_remove(&tx->tx_holds, txh); + refcount_destroy_many(&txh->txh_space_towrite, + refcount_count(&txh->txh_space_towrite)); + refcount_destroy_many(&txh->txh_space_tofree, + refcount_count(&txh->txh_space_tofree)); + refcount_destroy_many(&txh->txh_space_tooverwrite, + refcount_count(&txh->txh_space_tooverwrite)); + refcount_destroy_many(&txh->txh_space_tounref, + refcount_count(&txh->txh_space_tounref)); + refcount_destroy_many(&txh->txh_memory_tohold, + refcount_count(&txh->txh_memory_tohold)); + refcount_destroy_many(&txh->txh_fudge, + refcount_count(&txh->txh_fudge)); + kmem_free(txh, sizeof (dmu_tx_hold_t)); + if (dn != NULL) + dnode_rele(dn, tx); + } + + list_destroy(&tx->tx_callbacks); + list_destroy(&tx->tx_holds); +#ifdef DEBUG_DMU_TX + refcount_destroy_many(&tx->tx_space_written, + refcount_count(&tx->tx_space_written)); + refcount_destroy_many(&tx->tx_space_freed, + refcount_count(&tx->tx_space_freed)); +#endif + kmem_free(tx, sizeof (dmu_tx_t)); +} + void dmu_tx_commit(dmu_tx_t *tx) { @@ -1498,13 +1584,13 @@ dmu_tx_commit(dmu_tx_t *tx) * Go through the transaction's hold list and remove holds on * associated dnodes, notifying waiters if no holds remain. */ - while ((txh = list_head(&tx->tx_holds))) { + for (txh = list_head(&tx->tx_holds); txh != NULL; + txh = list_next(&tx->tx_holds, txh)) { dnode_t *dn = txh->txh_dnode; - list_remove(&tx->tx_holds, txh); - kmem_free(txh, sizeof (dmu_tx_hold_t)); if (dn == NULL) continue; + mutex_enter(&dn->dn_mtx); ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); @@ -1513,7 +1599,6 @@ dmu_tx_commit(dmu_tx_t *tx) cv_broadcast(&dn->dn_notxholds); } mutex_exit(&dn->dn_mtx); - dnode_rele(dn, tx); } if (tx->tx_tempreserve_cookie) @@ -1525,51 +1610,26 @@ dmu_tx_commit(dmu_tx_t *tx) if (tx->tx_anyobj == FALSE) txg_rele_to_sync(&tx->tx_txgh); - list_destroy(&tx->tx_callbacks); - list_destroy(&tx->tx_holds); #ifdef DEBUG_DMU_TX dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n", tx->tx_space_towrite, refcount_count(&tx->tx_space_written), tx->tx_space_tofree, refcount_count(&tx->tx_space_freed)); - refcount_destroy_many(&tx->tx_space_written, - refcount_count(&tx->tx_space_written)); - refcount_destroy_many(&tx->tx_space_freed, - refcount_count(&tx->tx_space_freed)); #endif - kmem_free(tx, sizeof (dmu_tx_t)); + dmu_tx_destroy(tx); } void dmu_tx_abort(dmu_tx_t *tx) { - dmu_tx_hold_t *txh; - ASSERT(tx->tx_txg == 0); - while ((txh = list_head(&tx->tx_holds))) { - dnode_t *dn = txh->txh_dnode; - - list_remove(&tx->tx_holds, txh); - kmem_free(txh, sizeof (dmu_tx_hold_t)); - if (dn != NULL) - dnode_rele(dn, tx); - } - /* * Call any registered callbacks with an error code. */ if (!list_is_empty(&tx->tx_callbacks)) dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED); - list_destroy(&tx->tx_callbacks); - list_destroy(&tx->tx_holds); -#ifdef DEBUG_DMU_TX - refcount_destroy_many(&tx->tx_space_written, - refcount_count(&tx->tx_space_written)); - refcount_destroy_many(&tx->tx_space_freed, - refcount_count(&tx->tx_space_freed)); -#endif - kmem_free(tx, sizeof (dmu_tx_t)); + dmu_tx_destroy(tx); } uint64_t @@ -1607,7 +1667,7 @@ dmu_tx_do_callbacks(list_t *cb_list, int error) { dmu_tx_callback_t *dcb; - while ((dcb = list_head(cb_list))) { + while ((dcb = list_head(cb_list)) != NULL) { list_remove(cb_list, dcb); dcb->dcb_func(dcb->dcb_data, error); kmem_free(dcb, sizeof (dmu_tx_callback_t)); @@ -1667,18 +1727,24 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) /* If blkptr doesn't exist then add space to towrite */ if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) { - txh->txh_space_towrite += SPA_OLD_MAXBLOCKSIZE; + (void) refcount_add_many(&txh->txh_space_towrite, + SPA_OLD_MAXBLOCKSIZE, FTAG); } else { blkptr_t *bp; bp = DN_SPILL_BLKPTR(dn->dn_phys); if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, - bp, bp->blk_birth)) - txh->txh_space_tooverwrite += SPA_OLD_MAXBLOCKSIZE; - else - txh->txh_space_towrite += SPA_OLD_MAXBLOCKSIZE; - if (!BP_IS_HOLE(bp)) - txh->txh_space_tounref += SPA_OLD_MAXBLOCKSIZE; + bp, bp->blk_birth)) { + (void) refcount_add_many(&txh->txh_space_tooverwrite, + SPA_OLD_MAXBLOCKSIZE, FTAG); + } else { + (void) refcount_add_many(&txh->txh_space_towrite, + SPA_OLD_MAXBLOCKSIZE, FTAG); + } + if (!BP_IS_HOLE(bp)) { + (void) refcount_add_many(&txh->txh_space_tounref, + SPA_OLD_MAXBLOCKSIZE, FTAG); + } } } diff --git a/module/zfs/zap.c b/module/zfs/zap.c index a8ccd2895..17107eb28 100644 --- a/module/zfs/zap.c +++ b/module/zfs/zap.c @@ -1358,8 +1358,8 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs) } int -fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite, - uint64_t *tooverwrite) +fzap_count_write(zap_name_t *zn, int add, refcount_t *towrite, + refcount_t *tooverwrite) { zap_t *zap = zn->zn_zap; zap_leaf_t *l; @@ -1369,9 +1369,11 @@ fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite, * Account for the header block of the fatzap. */ if (!add && dmu_buf_freeable(zap->zap_dbuf)) { - *tooverwrite += zap->zap_dbuf->db_size; + (void) refcount_add_many(tooverwrite, + zap->zap_dbuf->db_size, FTAG); } else { - *towrite += zap->zap_dbuf->db_size; + (void) refcount_add_many(towrite, + zap->zap_dbuf->db_size, FTAG); } /* @@ -1383,10 +1385,13 @@ fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite, * could extend the table. */ if (add) { - if (zap_f_phys(zap)->zap_ptrtbl.zt_blk == 0) - *towrite += zap->zap_dbuf->db_size; - else - *towrite += (zap->zap_dbuf->db_size * 3); + if (zap_f_phys(zap)->zap_ptrtbl.zt_blk == 0) { + (void) refcount_add_many(towrite, + zap->zap_dbuf->db_size, FTAG); + } else { + (void) refcount_add_many(towrite, + zap->zap_dbuf->db_size * 3, FTAG); + } } /* @@ -1399,13 +1404,14 @@ fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite, } if (!add && dmu_buf_freeable(l->l_dbuf)) { - *tooverwrite += l->l_dbuf->db_size; + (void) refcount_add_many(tooverwrite, l->l_dbuf->db_size, FTAG); } else { /* * If this an add operation, the leaf block could split. * Hence, we need to account for an additional leaf block. */ - *towrite += (add ? 2 : 1) * l->l_dbuf->db_size; + (void) refcount_add_many(towrite, + (add ? 2 : 1) * l->l_dbuf->db_size, FTAG); } zap_put_leaf(l); diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index 7cf7e927c..97c0fff65 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -1552,7 +1552,7 @@ zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) int zap_count_write_by_dnode(dnode_t *dn, const char *name, int add, - uint64_t *towrite, uint64_t *tooverwrite) + refcount_t *towrite, refcount_t *tooverwrite) { zap_t *zap; int err = 0; @@ -1562,14 +1562,15 @@ zap_count_write_by_dnode(dnode_t *dn, const char *name, int add, * be affected in this operation. So, account for the worst case : * - 3 blocks overwritten: target leaf, ptrtbl block, header block * - 4 new blocks written if adding: - * - 2 blocks for possibly split leaves, - * - 2 grown ptrtbl blocks + * - 2 blocks for possibly split leaves, + * - 2 grown ptrtbl blocks * * This also accommodates the case where an add operation to a fairly * large microzap results in a promotion to fatzap. */ if (name == NULL) { - *towrite += (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE; + (void) refcount_add_many(towrite, + (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE, FTAG); return (err); } @@ -1594,7 +1595,8 @@ zap_count_write_by_dnode(dnode_t *dn, const char *name, int add, /* * We treat this case as similar to (name == NULL) */ - *towrite += (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE; + (void) refcount_add_many(towrite, + (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE, FTAG); } } else { /* @@ -1612,13 +1614,17 @@ zap_count_write_by_dnode(dnode_t *dn, const char *name, int add, * 4 new blocks written : 2 new split leaf, 2 grown * ptrtbl blocks */ - if (dmu_buf_freeable(zap->zap_dbuf)) - *tooverwrite += MZAP_MAX_BLKSZ; - else - *towrite += MZAP_MAX_BLKSZ; + if (dmu_buf_freeable(zap->zap_dbuf)) { + (void) refcount_add_many(tooverwrite, + MZAP_MAX_BLKSZ, FTAG); + } else { + (void) refcount_add_many(towrite, + MZAP_MAX_BLKSZ, FTAG); + } if (add) { - *towrite += 4 * MZAP_MAX_BLKSZ; + (void) refcount_add_many(towrite, + 4 * MZAP_MAX_BLKSZ, FTAG); } } |