7 files changed, 69 insertions, 45 deletions
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index d4c6fb810..1487a99f4 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -733,6 +733,11 @@ void ksiddomain_rele(ksiddomain_t *);
 		(void) nanosleep(&ts, NULL);				\
 	} while (0)
 
-#endif /* _KERNEL */
+typedef int fstrans_cookie_t;
+
+extern fstrans_cookie_t spl_fstrans_mark(void);
+extern void spl_fstrans_unmark(fstrans_cookie_t);
+extern int spl_fstrans_check(void);
 
+#endif /* _KERNEL */
 #endif	/* _SYS_ZFS_CONTEXT_H */
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
index 995f61d05..db50352c5 100644
--- a/lib/libzpool/kernel.c
+++ b/lib/libzpool/kernel.c
@@ -1275,3 +1275,20 @@ zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
 {
 	return (0);
 }
+
+fstrans_cookie_t
+spl_fstrans_mark(void)
+{
+	return ((fstrans_cookie_t) 0);
+}
+
+void
+spl_fstrans_unmark(fstrans_cookie_t cookie)
+{
+}
+
+int
+spl_fstrans_check(void)
+{
+	return (0);
+}
diff --git a/module/zfs/txg.c b/module/zfs/txg.c
index 4693762b8..81afeb373 100644
--- a/module/zfs/txg.c
+++ b/module/zfs/txg.c
@@ -483,15 +483,7 @@ txg_sync_thread(dsl_pool_t *dp)
 	vdev_stat_t *vs1, *vs2;
 	clock_t start, delta;
 
-#ifdef _KERNEL
-	/*
-	 * Annotate this process with a flag that indicates that it is
-	 * unsafe to use KM_SLEEP during memory allocations due to the
-	 * potential for a deadlock.  KM_PUSHPAGE should be used instead.
-	 */
-	current->flags |= PF_NOFS;
-#endif /* _KERNEL */
-
+	(void) spl_fstrans_mark();
 	txg_thread_enter(tx, &cpr);
 
 	vs1 = kmem_alloc(sizeof (vdev_stat_t), KM_PUSHPAGE);
diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c
index 8059cdea4..8573a3a66 100644
--- a/module/zfs/vdev_file.c
+++ b/module/zfs/vdev_file.c
@@ -161,6 +161,17 @@ vdev_file_io_strategy(void *arg)
 	zio_interrupt(zio);
 }
 
+static void
+vdev_file_io_fsync(void *arg)
+{
+	zio_t *zio = (zio_t *)arg;
+	vdev_file_t *vf = zio->io_vd->vdev_tsd;
+
+	zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL);
+
+	zio_interrupt(zio);
+}
+
 static int
 vdev_file_io_start(zio_t *zio)
 {
@@ -180,6 +191,19 @@ vdev_file_io_start(zio_t *zio)
 			if (zfs_nocacheflush)
 				break;
 
+			/*
+			 * We cannot safely call vfs_fsync() when PF_FSTRANS
+			 * is set in the current context.  Filesystems like
+			 * XFS include sanity checks to verify it is not
+			 * already set, see xfs_vm_writepage().  Therefore
+			 * the sync must be dispatched to a different context.
+			 */
+			if (spl_fstrans_check()) {
+				VERIFY3U(taskq_dispatch(vdev_file_taskq,
+				    vdev_file_io_fsync, zio, TQ_SLEEP), !=, 0);
+				return (ZIO_PIPELINE_STOP);
+			}
+
 			zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
 			    kcred, NULL);
 			break;
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 9d70b3e59..7c0e6bf7e 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -1361,7 +1361,11 @@ static zio_pipe_stage_t *zio_pipeline[];
 void
 zio_execute(zio_t *zio)
 {
+	fstrans_cookie_t cookie;
+
+	cookie = spl_fstrans_mark();
 	__zio_execute(zio);
+	spl_fstrans_unmark(cookie);
 }
 
 __attribute__((always_inline))
diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c
index cabe9bf15..61005dcd4 100644
--- a/module/zfs/zpl_file.c
+++ b/module/zfs/zpl_file.c
@@ -481,19 +481,14 @@ int
 zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
 {
 	struct address_space *mapping = data;
+	fstrans_cookie_t cookie;
 
 	ASSERT(PageLocked(pp));
 	ASSERT(!PageWriteback(pp));
-	ASSERT(!(current->flags & PF_NOFS));
 
-	/*
-	 * Annotate this call path with a flag that indicates that it is
-	 * unsafe to use KM_SLEEP during memory allocations due to the
-	 * potential for a deadlock.  KM_PUSHPAGE should be used instead.
-	 */
-	current->flags |= PF_NOFS;
+	cookie = spl_fstrans_mark();
 	(void) zfs_putpage(mapping->host, pp, wbc);
-	current->flags &= ~PF_NOFS;
+	spl_fstrans_unmark(cookie);
 
 	return (0);
 }
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index fa5c7eb4e..ddaf520a2 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -577,20 +577,13 @@ zvol_write(void *arg)
 	struct request *req = (struct request *)arg;
 	struct request_queue *q = req->q;
 	zvol_state_t *zv = q->queuedata;
+	fstrans_cookie_t cookie = spl_fstrans_mark();
 	uint64_t offset = blk_rq_pos(req) << 9;
 	uint64_t size = blk_rq_bytes(req);
 	int error = 0;
 	dmu_tx_t *tx;
 	rl_t *rl;
 
-	/*
-	 * Annotate this call path with a flag that indicates that it is
-	 * unsafe to use KM_SLEEP during memory allocations due to the
-	 * potential for a deadlock.  KM_PUSHPAGE should be used instead.
-	 */
-	ASSERT(!(current->flags & PF_NOFS));
-	current->flags |= PF_NOFS;
-
 	if (req->cmd_flags & VDEV_REQ_FLUSH)
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 
@@ -598,7 +591,7 @@ zvol_write(void *arg)
 	 * Some requests are just for flush and nothing else.
 	 */
 	if (size == 0) {
-		blk_end_request(req, 0, size);
+		error = 0;
 		goto out;
 	}
 
@@ -612,7 +605,6 @@ zvol_write(void *arg)
 	if (error) {
 		dmu_tx_abort(tx);
 		zfs_range_unlock(rl);
-		blk_end_request(req, -error, size);
 		goto out;
 	}
 
@@ -628,9 +620,9 @@ zvol_write(void *arg)
 	    zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 
-	blk_end_request(req, -error, size);
 out:
-	current->flags &= ~PF_NOFS;
+	blk_end_request(req, -error, size);
+	spl_fstrans_unmark(cookie);
 }
 
 #ifdef HAVE_BLK_QUEUE_DISCARD
@@ -640,21 +632,14 @@ zvol_discard(void *arg)
 	struct request *req = (struct request *)arg;
 	struct request_queue *q = req->q;
 	zvol_state_t *zv = q->queuedata;
+	fstrans_cookie_t cookie = spl_fstrans_mark();
 	uint64_t start = blk_rq_pos(req) << 9;
 	uint64_t end = start + blk_rq_bytes(req);
 	int error;
 	rl_t *rl;
 
-	/*
-	 * Annotate this call path with a flag that indicates that it is
-	 * unsafe to use KM_SLEEP during memory allocations due to the
-	 * potential for a deadlock.  KM_PUSHPAGE should be used instead.
-	 */
-	ASSERT(!(current->flags & PF_NOFS));
-	current->flags |= PF_NOFS;
-
 	if (end > zv->zv_volsize) {
-		blk_end_request(req, -EIO, blk_rq_bytes(req));
+		error = EIO;
 		goto out;
 	}
 
@@ -668,7 +653,7 @@ zvol_discard(void *arg)
 	end = P2ALIGN(end, zv->zv_volblocksize);
 
 	if (start >= end) {
-		blk_end_request(req, 0, blk_rq_bytes(req));
+		error = 0;
 		goto out;
 	}
 
@@ -681,10 +666,9 @@ zvol_discard(void *arg)
 	 */
 
 	zfs_range_unlock(rl);
-
-	blk_end_request(req, -error, blk_rq_bytes(req));
 out:
-	current->flags &= ~PF_NOFS;
+	blk_end_request(req, -error, blk_rq_bytes(req));
+	spl_fstrans_unmark(cookie);
 }
 #endif /* HAVE_BLK_QUEUE_DISCARD */
 
@@ -700,14 +684,15 @@ zvol_read(void *arg)
 	struct request *req = (struct request *)arg;
 	struct request_queue *q = req->q;
 	zvol_state_t *zv = q->queuedata;
+	fstrans_cookie_t cookie = spl_fstrans_mark();
 	uint64_t offset = blk_rq_pos(req) << 9;
 	uint64_t size = blk_rq_bytes(req);
 	int error;
 	rl_t *rl;
 
 	if (size == 0) {
-		blk_end_request(req, 0, size);
-		return;
+		error = 0;
+		goto out;
 	}
 
 	rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
@@ -720,7 +705,9 @@ zvol_read(void *arg)
 	if (error == ECKSUM)
 		error = SET_ERROR(EIO);
 
+out:
 	blk_end_request(req, -error, size);
+	spl_fstrans_unmark(cookie);
 }
 
 /*