diff options
author | Brian Behlendorf <[email protected]> | 2012-03-02 10:35:50 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2012-03-02 13:20:48 -0800 |
commit | ec2626ad3f695a2ced3946c4197ef64cbcac4959 (patch) | |
tree | 9efe62507581fdef859b06390413ebda57632d43 | |
parent | 4b787d75c869a7f633607ecb17f3a7c482a553d2 (diff) |
Use SA_HDL_PRIVATE for SA xattrs
A private SA handle must be used to ensure we can drop the dbuf
hold on the spill block prior to calling dmu_tx_commit(). If we
call dmu_tx_commit() before sa_handle_destroy(), then our hold
will trigger a copy of the dbuf to be made. This is done to
prevent data from leaking in to the syncing txg. As a result
the original dirty spill block will remain cached.
Additionally, relying on the shared zp->z_sa_hdl is unsafe in
the xattr context because the znode may be asynchronously dropped
from the cache. It's far safer and simpler just to use a private
handle for xattrs. Plus any additional overhead is offset by
the avoidance of the previously mentioned memory copy.
These forever dirty buffers can be noticed in the arcstats under
the anon_size. On a quiescent system the value should be zero.
Without this fix and a SA xattr write workload you will see
anon_size increase. Eventually, if enough dirty data builds up
your system it will appear to hang. This occurs because the dmu
won't allow new txs to be assigned until that dirty data is
flushed, and it won't be because it's not part of an assigned tx.
As an aside, I typically see anon_size lurk around 16k so I think
there is another place in the code which needs a similar fix.
However, this value doesn't grow over time so it isn't critical.
Signed-off-by: Brian Behlendorf <[email protected]>
Issue #503
Issue #513
-rw-r--r-- | module/zfs/zfs_sa.c | 33 |
1 files changed, 28 insertions, 5 deletions
diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c index 621c5f904..f35f6f64c 100644 --- a/module/zfs/zfs_sa.c +++ b/module/zfs/zfs_sa.c @@ -188,6 +188,7 @@ int zfs_sa_get_xattr(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); + sa_handle_t *sa; char *obj; int size; int error; @@ -196,8 +197,14 @@ zfs_sa_get_xattr(znode_t *zp) ASSERT(!zp->z_xattr_cached); ASSERT(zp->z_is_sa); - error = sa_size(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), &size); + error = sa_handle_get(zsb->z_os, zp->z_id, NULL, SA_HDL_PRIVATE, &sa); + if (error) + return (error); + + error = sa_size(sa, SA_ZPL_DXATTR(zsb), &size); if (error) { + sa_handle_destroy(sa); + if (error == ENOENT) return nvlist_alloc(&zp->z_xattr_cached, NV_UNIQUE_NAME, KM_SLEEP); @@ -207,11 +214,12 @@ zfs_sa_get_xattr(znode_t *zp) obj = sa_spill_alloc(KM_SLEEP); - error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), obj, size); + error = sa_lookup(sa, SA_ZPL_DXATTR(zsb), obj, size); if (error == 0) error = nvlist_unpack(obj, size, &zp->z_xattr_cached, KM_SLEEP); sa_spill_free(obj); + sa_handle_destroy(sa); return (error); } @@ -220,6 +228,7 @@ int zfs_sa_set_xattr(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); + sa_handle_t *sa; dmu_tx_t *tx; char *obj; size_t size; @@ -240,16 +249,30 @@ zfs_sa_set_xattr(znode_t *zp) if (error) goto out_free; + /* + * A private SA handle must be used to ensure we can drop the hold + * on the spill block prior to calling dmu_tx_commit(). If we call + * dmu_tx_commit() before sa_handle_destroy(), then our hold will + * trigger a copy of the buffer at txg sync time. This is done to + * prevent data from leaking in to the syncing txg. As a result + * the original dirty spill block will be remain dirty in the arc + * while the copy is written and laundered. + */ + error = sa_handle_get(zsb->z_os, zp->z_id, NULL, SA_HDL_PRIVATE, &sa); + if (error) + goto out_free; + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa_create(tx, size); - dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); + dmu_tx_hold_sa(tx, sa, B_TRUE); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); + sa_handle_destroy(sa); } else { - error = sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), - obj, size, tx); + error = sa_update(sa, SA_ZPL_DXATTR(zsb), obj, size, tx); + sa_handle_destroy(sa); if (error) dmu_tx_abort(tx); else |