diff options
author | Matthew Ahrens <[email protected]> | 2016-07-20 15:42:13 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-08-19 12:48:03 -0700 |
commit | 2bce8049c3d782f4feb72493564754c0595606bf (patch) | |
tree | cbeb72318d3d772c4c689ad9e9cccc52a9229676 /include/sys/dmu.h | |
parent | 8bea9815048e4b1a85905e0b381865dedd266f2d (diff) |
OpenZFS 7004 - dmu_tx_hold_zap() does dnode_hold() 7x on same object
Using a benchmark which has 32 threads creating 2 million files in the
same directory, on a machine with 16 CPU cores, I observed poor
performance. I noticed that dmu_tx_hold_zap() was using about 30% of
all CPU, and doing dnode_hold() 7 times on the same object (the ZAP
object that is being held).
dmu_tx_hold_zap() keeps a hold on the dnode_t the entire time it is
running, in dmu_tx_hold_t:txh_dnode, so it would be nice to use the
dnode_t that we already have in hand, rather than repeatedly calling
dnode_hold(). To do this, we need to pass the dnode_t down through
all the intermediate calls that dmu_tx_hold_zap() makes, making these
routines take the dnode_t* rather than an objset_t* and a uint64_t
object number. In particular, the following routines will need to have
analogous *_by_dnode() variants created:
dmu_buf_hold_noread()
dmu_buf_hold()
zap_lookup()
zap_lookup_norm()
zap_count_write()
zap_lockdir()
zap_count_write()
This can improve performance on the benchmark described above by 100%,
from 30,000 file creations per second to 60,000. (This improvement is on
top of that provided by working around the object allocation issue. Peak
performance of ~90,000 creations per second was observed with 8 CPUs;
adding CPUs past that decreased performance due to lock contention.) The
CPU used by dmu_tx_hold_zap() was reduced by 88%, from 340 CPU-seconds
to 40 CPU-seconds.
Sponsored by: Intel Corp.
Signed-off-by: Matthew Ahrens <[email protected]>
Signed-off-by: Ned Bass <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
OpenZFS-issue: https://www.illumos.org/issues/7004
OpenZFS-commit: https://github.com/openzfs/openzfs/pull/109
Closes #4641
Closes #4972
Diffstat (limited to 'include/sys/dmu.h')
-rw-r--r-- | include/sys/dmu.h | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 98da92890..a8ed2868f 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright 2014 HybridCluster. All rights reserved. @@ -73,6 +73,7 @@ struct sa_handle; typedef struct objset objset_t; typedef struct dmu_tx dmu_tx_t; typedef struct dsl_dir dsl_dir_t; +typedef struct dnode dnode_t; typedef enum dmu_object_byteswap { DMU_BSWAP_UINT8, @@ -420,7 +421,7 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset, #define WP_DMU_SYNC 0x2 #define WP_SPILL 0x4 -void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp, +void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, struct zio_prop *zp); /* * The bonus data is accessed more or less like a regular buffer. @@ -446,7 +447,7 @@ int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *); */ int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp); -int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags, +int dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags, void *tag, dmu_buf_t **dbp); int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp); @@ -466,6 +467,8 @@ int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp); */ int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, void *tag, dmu_buf_t **, int flags); +int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset, + void *tag, dmu_buf_t **dbp, int flags); /* * Add a reference to a dmu buffer that has already been held via @@ -620,6 +623,8 @@ void *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user); void *dmu_buf_get_user(dmu_buf_t *db); objset_t *dmu_buf_get_objset(dmu_buf_t *db); +dnode_t *dmu_buf_dnode_enter(dmu_buf_t *db); +void dmu_buf_dnode_exit(dmu_buf_t *db); /* Block until any in-progress dmu buf user evictions complete. */ void dmu_buf_user_evict_wait(void); @@ -799,7 +804,7 @@ extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS]; int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi); void __dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi); /* Like dmu_object_info, but faster if you have a held dnode in hand. */ -void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi); +void dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi); /* Like dmu_object_info, but faster if you have a held dbuf in hand. */ void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi); /* |