diff options
author | Matthew Ahrens <[email protected]> | 2016-07-20 15:42:13 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-08-19 12:48:03 -0700 |
commit | 2bce8049c3d782f4feb72493564754c0595606bf (patch) | |
tree | cbeb72318d3d772c4c689ad9e9cccc52a9229676 /module/zfs/zap_micro.c | |
parent | 8bea9815048e4b1a85905e0b381865dedd266f2d (diff) |
OpenZFS 7004 - dmu_tx_hold_zap() does dnode_hold() 7x on same object
Using a benchmark which has 32 threads creating 2 million files in the
same directory, on a machine with 16 CPU cores, I observed poor
performance. I noticed that dmu_tx_hold_zap() was using about 30% of
all CPU, and doing dnode_hold() 7 times on the same object (the ZAP
object that is being held).
dmu_tx_hold_zap() keeps a hold on the dnode_t the entire time it is
running, in dmu_tx_hold_t:txh_dnode, so it would be nice to use the
dnode_t that we already have in hand, rather than repeatedly calling
dnode_hold(). To do this, we need to pass the dnode_t down through
all the intermediate calls that dmu_tx_hold_zap() makes, making these
routines take the dnode_t* rather than an objset_t* and a uint64_t
object number. In particular, the following routines will need to have
analogous *_by_dnode() variants created:
dmu_buf_hold_noread()
dmu_buf_hold()
zap_lookup()
zap_lookup_norm()
zap_count_write()
zap_lockdir()
zap_count_write()
This can improve performance on the benchmark described above by 100%,
from 30,000 file creations per second to 60,000. (This improvement is on
top of that provided by working around the object allocation issue. Peak
performance of ~90,000 creations per second was observed with 8 CPUs;
adding CPUs past that decreased performance due to lock contention.) The
CPU used by dmu_tx_hold_zap() was reduced by 88%, from 340 CPU-seconds
to 40 CPU-seconds.
Sponsored by: Intel Corp.
Signed-off-by: Matthew Ahrens <[email protected]>
Signed-off-by: Ned Bass <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
OpenZFS-issue: https://www.illumos.org/issues/7004
OpenZFS-commit: https://github.com/openzfs/openzfs/pull/109
Closes #4641
Closes #4972
Diffstat (limited to 'module/zfs/zap_micro.c')
-rw-r--r-- | module/zfs/zap_micro.c | 51 |
1 files changed, 48 insertions, 3 deletions
diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index de646287e..bc0a5e71d 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -536,6 +536,24 @@ zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx, return (0); } +static int +zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx, + krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp) +{ + dmu_buf_t *db; + int err; + + err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH); + if (err != 0) { + return (err); + } + err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); + if (err != 0) { + dmu_buf_rele(db, tag); + } + return (err); +} + int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp) @@ -928,6 +946,33 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name) } int +zap_lookup_by_dnode(dnode_t *dn, const char *name, + uint64_t integer_size, uint64_t num_integers, void *buf) +{ + return (zap_lookup_norm_by_dnode(dn, name, integer_size, + num_integers, buf, MT_EXACT, NULL, 0, NULL)); +} + +int +zap_lookup_norm_by_dnode(dnode_t *dn, const char *name, + uint64_t integer_size, uint64_t num_integers, void *buf, + matchtype_t mt, char *realname, int rn_len, + boolean_t *ncp) +{ + zap_t *zap; + int err; + + err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, + FTAG, &zap); + if (err != 0) + return (err); + err = zap_lookup_impl(zap, name, integer_size, + num_integers, buf, mt, realname, rn_len, ncp); + zap_unlockdir(zap, FTAG); + return (err); +} + +int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints) { @@ -1460,7 +1505,7 @@ zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) } int -zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, +zap_count_write_by_dnode(dnode_t *dn, const char *name, int add, uint64_t *towrite, uint64_t *tooverwrite) { zap_t *zap; @@ -1488,7 +1533,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, * At present we are just evaluating the possibility of this operation * and hence we do not want to trigger an upgrade. */ - err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, + err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); if (err != 0) return (err); @@ -1552,7 +1597,7 @@ EXPORT_SYMBOL(zap_lookup_uint64); EXPORT_SYMBOL(zap_contains); EXPORT_SYMBOL(zap_prefetch); EXPORT_SYMBOL(zap_prefetch_uint64); -EXPORT_SYMBOL(zap_count_write); +EXPORT_SYMBOL(zap_count_write_by_dnode); EXPORT_SYMBOL(zap_add); EXPORT_SYMBOL(zap_add_uint64); EXPORT_SYMBOL(zap_update); |