aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/zfs_replay.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs/zfs_replay.c')
-rw-r--r--module/zfs/zfs_replay.c120
1 files changed, 73 insertions, 47 deletions
diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c
index 85b79703a..f26009b02 100644
--- a/module/zfs/zfs_replay.c
+++ b/module/zfs/zfs_replay.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
@@ -129,6 +127,8 @@ zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
ZFS_TIME_DECODE(&xoap->xoa_createtime, crtime);
if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
bcopy(scanstamp, xoap->xoa_av_scanstamp, AV_SCANSTAMP_SZ);
+ if (XVA_ISSET_REQ(xvap, XAT_REPARSE))
+ xoap->xoa_reparse = ((*attrs & XAT0_REPARSE) != 0);
}
static int
@@ -275,9 +275,9 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs,
uint64_t txtype;
int error;
+ txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
if (byteswap) {
byteswap_uint64_array(lracl, sizeof (*lracl));
- txtype = (int)lr->lr_common.lrc_txtype;
if (txtype == TX_CREATE_ACL_ATTR ||
txtype == TX_MKDIR_ACL_ATTR) {
lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
@@ -318,7 +318,7 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs,
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
- switch ((int)lr->lr_common.lrc_txtype) {
+ switch (txtype) {
case TX_CREATE_ACL:
aclstart = (caddr_t)(lracl + 1);
fuidstart = (caddr_t)aclstart +
@@ -391,7 +391,8 @@ bail:
VN_RELE(ZTOV(dzp));
- zfs_fuid_info_free(zfsvfs->z_fuid_replay);
+ if (zfsvfs->z_fuid_replay)
+ zfs_fuid_info_free(zfsvfs->z_fuid_replay);
zfsvfs->z_fuid_replay = NULL;
return (error);
@@ -413,9 +414,9 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
uint64_t txtype;
int error;
+ txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
if (byteswap) {
byteswap_uint64_array(lr, sizeof (*lr));
- txtype = (int)lr->lr_common.lrc_txtype;
if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR)
zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
}
@@ -460,7 +461,7 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
lr->lr_uid, lr->lr_gid);
}
- switch ((int)lr->lr_common.lrc_txtype) {
+ switch (txtype) {
case TX_CREATE_ATTR:
lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
@@ -498,7 +499,6 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
&vp, kcred, NULL, vflg, NULL);
break;
case TX_MKXATTR:
- name = (char *)(lr + 1);
error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &vp, kcred);
break;
case TX_SYMLINK:
@@ -625,6 +625,7 @@ zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
znode_t *zp;
int error;
ssize_t resid;
+ uint64_t orig_eof, eod, offset, length;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
@@ -640,8 +641,64 @@ zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
return (error);
}
- error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, lr->lr_length,
- lr->lr_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
+ offset = lr->lr_offset;
+ length = lr->lr_length;
+ eod = offset + length; /* end of data for this write */
+
+ orig_eof = zp->z_size;
+
+ /* If it's a dmu_sync() block, write the whole block */
+ if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
+ uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
+ if (length < blocksize) {
+ offset -= offset % blocksize;
+ length = blocksize;
+ }
+ }
+
+ error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, length, offset,
+ UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
+
+ /*
+ * This may be a write from a dmu_sync() for a whole block,
+ * and may extend beyond the current end of the file.
+ * We can't just replay what was written for this TX_WRITE as
+ * a future TX_WRITE2 may extend the eof and the data for that
+ * write needs to be there. So we write the whole block and
+ * reduce the eof.
+ */
+ if (orig_eof < zp->z_size) /* file length grew ? */
+ zp->z_size = eod;
+
+ VN_RELE(ZTOV(zp));
+
+ return (error);
+}
+
+/*
+ * TX_WRITE2 are only generated when dmu_sync() returns EALREADY
+ * meaning the pool block is already being synced. So now that we always write
+ * out full blocks, all we have to do is expand the eof if
+ * the file is grown.
+ */
+static int
+zfs_replay_write2(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
+{
+ znode_t *zp;
+ int error;
+ uint64_t end;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
+ return (error);
+
+ end = lr->lr_offset + lr->lr_length;
+ if (end > zp->z_size) {
+ ASSERT3U(end - zp->z_size, <, zp->z_blksz);
+ zp->z_size = end;
+ }
VN_RELE(ZTOV(zp));
@@ -658,16 +715,8 @@ zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap)
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
- if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
- /*
- * As we can log truncates out of order, it's possible the
- * file has been removed. In this case just drop the truncate
- * and return success.
- */
- if (error == ENOENT)
- error = 0;
+ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
return (error);
- }
bzero(&fl, sizeof (fl));
fl.l_type = F_WRLCK;
@@ -701,16 +750,8 @@ zfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap)
zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
}
- if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
- /*
- * As we can log setattrs out of order, it's possible the
- * file has been removed. In this case just drop the setattr
- * and return success.
- */
- if (error == ENOENT)
- error = 0;
+ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
return (error);
- }
zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode,
lr->lr_uid, lr->lr_gid, 0, lr->lr_foid);
@@ -756,16 +797,8 @@ zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap)
zfs_oldace_byteswap(ace, lr->lr_aclcnt);
}
- if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
- /*
- * As we can log acls out of order, it's possible the
- * file has been removed. In this case just drop the acl
- * and return success.
- */
- if (error == ENOENT)
- error = 0;
+ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
return (error);
- }
bzero(&vsa, sizeof (vsa));
vsa.vsa_mask = VSA_ACE | VSA_ACECNT;
@@ -813,16 +846,8 @@ zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap)
}
}
- if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
- /*
- * As we can log acls out of order, it's possible the
- * file has been removed. In this case just drop the acl
- * and return success.
- */
- if (error == ENOENT)
- error = 0;
+ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
return (error);
- }
bzero(&vsa, sizeof (vsa));
vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS;
@@ -875,4 +900,5 @@ zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = {
zfs_replay_create_acl, /* TX_MKDIR_ACL */
zfs_replay_create, /* TX_MKDIR_ATTR */
zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */
+ zfs_replay_write2, /* TX_WRITE2 */
};