diff options
author | Matthew Ahrens <[email protected]> | 2016-07-20 15:42:13 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-08-19 12:48:03 -0700 |
commit | 2bce8049c3d782f4feb72493564754c0595606bf (patch) | |
tree | cbeb72318d3d772c4c689ad9e9cccc52a9229676 /module/zfs/dmu.c | |
parent | 8bea9815048e4b1a85905e0b381865dedd266f2d (diff) |
OpenZFS 7004 - dmu_tx_hold_zap() does dnode_hold() 7x on same object
Using a benchmark which has 32 threads creating 2 million files in the
same directory, on a machine with 16 CPU cores, I observed poor
performance. I noticed that dmu_tx_hold_zap() was using about 30% of
all CPU, and doing dnode_hold() 7 times on the same object (the ZAP
object that is being held).
dmu_tx_hold_zap() keeps a hold on the dnode_t the entire time it is
running, in dmu_tx_hold_t:txh_dnode, so it would be nice to use the
dnode_t that we already have in hand, rather than repeatedly calling
dnode_hold(). To do this, we need to pass the dnode_t down through
all the intermediate calls that dmu_tx_hold_zap() makes, making these
routines take the dnode_t* rather than an objset_t* and a uint64_t
object number. In particular, the following routines will need to have
analogous *_by_dnode() variants created:
dmu_buf_hold_noread()
dmu_buf_hold()
zap_lookup()
zap_lookup_norm()
zap_count_write()
zap_lockdir()
zap_count_write()
This can improve performance on the benchmark described above by 100%,
from 30,000 file creations per second to 60,000. (This improvement is on
top of that provided by working around the object allocation issue. Peak
performance of ~90,000 creations per second was observed with 8 CPUs;
adding CPUs past that decreased performance due to lock contention.) The
CPU used by dmu_tx_hold_zap() was reduced by 88%, from 340 CPU-seconds
to 40 CPU-seconds.
Sponsored by: Intel Corp.
Signed-off-by: Matthew Ahrens <[email protected]>
Signed-off-by: Ned Bass <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
OpenZFS-issue: https://www.illumos.org/issues/7004
OpenZFS-commit: https://github.com/openzfs/openzfs/pull/109
Closes #4641
Closes #4972
Diffstat (limited to 'module/zfs/dmu.c')
-rw-r--r-- | module/zfs/dmu.c | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index e1dfb41ff..6ed4743ee 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -128,6 +128,26 @@ const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = { }; int +dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, + void *tag, dmu_buf_t **dbp) +{ + uint64_t blkid; + dmu_buf_impl_t *db; + + blkid = dbuf_whichblock(dn, 0, offset); + rw_enter(&dn->dn_struct_rwlock, RW_READER); + db = dbuf_hold(dn, blkid, tag); + rw_exit(&dn->dn_struct_rwlock); + + if (db == NULL) { + *dbp = NULL; + return (SET_ERROR(EIO)); + } + + *dbp = &db->db; + return (0); +} +int dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset, void *tag, dmu_buf_t **dbp) { @@ -155,6 +175,29 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset, } int +dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset, + void *tag, dmu_buf_t **dbp, int flags) +{ + int err; + int db_flags = DB_RF_CANFAIL; + + if (flags & DMU_READ_NO_PREFETCH) + db_flags |= DB_RF_NOPREFETCH; + + err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp); + if (err == 0) { + dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp); + err = dbuf_read(db, NULL, db_flags); + if (err != 0) { + dbuf_rele(db, tag); + *dbp = NULL; + } + } + + return (err); +} + +int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, void *tag, dmu_buf_t **dbp, int flags) { |