summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2012-03-02 10:35:50 -0800
committerBrian Behlendorf <[email protected]>2012-03-02 13:20:48 -0800
commitec2626ad3f695a2ced3946c4197ef64cbcac4959 (patch)
tree9efe62507581fdef859b06390413ebda57632d43
parent4b787d75c869a7f633607ecb17f3a7c482a553d2 (diff)
Use SA_HDL_PRIVATE for SA xattrs
A private SA handle must be used to ensure we can drop the dbuf hold on the spill block prior to calling dmu_tx_commit(). If we call dmu_tx_commit() before sa_handle_destroy(), then our hold will trigger a copy of the dbuf to be made. This is done to prevent data from leaking in to the syncing txg. As a result the original dirty spill block will remain cached. Additionally, relying on the shared zp->z_sa_hdl is unsafe in the xattr context because the znode may be asynchronously dropped from the cache. It's far safer and simpler just to use a private handle for xattrs. Plus any additional overhead is offset by the avoidance of the previously mentioned memory copy. These forever dirty buffers can be noticed in the arcstats under the anon_size. On a quiescent system the value should be zero. Without this fix and a SA xattr write workload you will see anon_size increase. Eventually, if enough dirty data builds up your system it will appear to hang. This occurs because the dmu won't allow new txs to be assigned until that dirty data is flushed, and it won't be because it's not part of an assigned tx. As an aside, I typically see anon_size lurk around 16k so I think there is another place in the code which needs a similar fix. However, this value doesn't grow over time so it isn't critical. Signed-off-by: Brian Behlendorf <[email protected]> Issue #503 Issue #513
-rw-r--r--module/zfs/zfs_sa.c33
1 files changed, 28 insertions, 5 deletions
diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c
index 621c5f904..f35f6f64c 100644
--- a/module/zfs/zfs_sa.c
+++ b/module/zfs/zfs_sa.c
@@ -188,6 +188,7 @@ int
zfs_sa_get_xattr(znode_t *zp)
{
zfs_sb_t *zsb = ZTOZSB(zp);
+ sa_handle_t *sa;
char *obj;
int size;
int error;
@@ -196,8 +197,14 @@ zfs_sa_get_xattr(znode_t *zp)
ASSERT(!zp->z_xattr_cached);
ASSERT(zp->z_is_sa);
- error = sa_size(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), &size);
+ error = sa_handle_get(zsb->z_os, zp->z_id, NULL, SA_HDL_PRIVATE, &sa);
+ if (error)
+ return (error);
+
+ error = sa_size(sa, SA_ZPL_DXATTR(zsb), &size);
if (error) {
+ sa_handle_destroy(sa);
+
if (error == ENOENT)
return nvlist_alloc(&zp->z_xattr_cached,
NV_UNIQUE_NAME, KM_SLEEP);
@@ -207,11 +214,12 @@ zfs_sa_get_xattr(znode_t *zp)
obj = sa_spill_alloc(KM_SLEEP);
- error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), obj, size);
+ error = sa_lookup(sa, SA_ZPL_DXATTR(zsb), obj, size);
if (error == 0)
error = nvlist_unpack(obj, size, &zp->z_xattr_cached, KM_SLEEP);
sa_spill_free(obj);
+ sa_handle_destroy(sa);
return (error);
}
@@ -220,6 +228,7 @@ int
zfs_sa_set_xattr(znode_t *zp)
{
zfs_sb_t *zsb = ZTOZSB(zp);
+ sa_handle_t *sa;
dmu_tx_t *tx;
char *obj;
size_t size;
@@ -240,16 +249,30 @@ zfs_sa_set_xattr(znode_t *zp)
if (error)
goto out_free;
+ /*
+ * A private SA handle must be used to ensure we can drop the hold
+ * on the spill block prior to calling dmu_tx_commit(). If we call
+ * dmu_tx_commit() before sa_handle_destroy(), then our hold will
+ * trigger a copy of the buffer at txg sync time. This is done to
+ * prevent data from leaking in to the syncing txg. As a result
+ * the original dirty spill block will be remain dirty in the arc
+ * while the copy is written and laundered.
+ */
+ error = sa_handle_get(zsb->z_os, zp->z_id, NULL, SA_HDL_PRIVATE, &sa);
+ if (error)
+ goto out_free;
+
tx = dmu_tx_create(zsb->z_os);
dmu_tx_hold_sa_create(tx, size);
- dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+ dmu_tx_hold_sa(tx, sa, B_TRUE);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
+ sa_handle_destroy(sa);
} else {
- error = sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb),
- obj, size, tx);
+ error = sa_update(sa, SA_ZPL_DXATTR(zsb), obj, size, tx);
+ sa_handle_destroy(sa);
if (error)
dmu_tx_abort(tx);
else