diff options
author | Brian Behlendorf <[email protected]> | 2011-08-01 18:24:40 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2011-08-04 13:37:38 -0700 |
commit | 76659dc110ef2ada13bcb8e4e2ec60d8216c6836 (patch) | |
tree | 19de8ecbb20ae764ae16b0f05399fff16424c440 /include | |
parent | 3c0e5c0f455576d045fa443cbab74834d70ded55 (diff) |
Add backing_device_info per-filesystem
For a long time now the kernel has been moving away from using the
pdflush daemon to write 'old' dirty pages to disk. The primary reason
for this is because the pdflush daemon is single threaded and can be
a limiting factor for performance. Since pdflush sequentially walks
the dirty inode list for each super block any delay in processing can
slow down dirty page writeback for all filesystems.
The replacement for pdflush is called bdi (backing device info). The
bdi system involves creating a per-filesystem control structure each
with its own private sets of queues to manage writeback. The advantage
is greater parallelism which improves performance and prevents a single
filesystem from slowing writeback to the others.
For a long time both systems co-existed in the kernel so it wasn't
strictly required to implement the bdi scheme. However, as of
Linux 2.6.36 kernels the pdflush functionality has been retired.
Since ZFS already bypasses the page cache for most I/O this is only
an issue for mmap(2) writes which must go through the page cache.
Even then adding this missing support for newer kernels was overlooked
because there are other mechanisms which can trigger writeback.
However, there is one critical case where not implementing the bdi
functionality can cause problems. If an application handles a page
fault it can enter the balance_dirty_pages() callpath. This will
result in the application hanging until the number of dirty pages in
the system drops below the dirty ratio.
Without a registered backing_device_info for the filesystem the
dirty pages will not get written out. Thus the application will hang.
As mentioned above this was less of an issue with older kernels because
pdflush would eventually write out the dirty pages.
This change adds a backing_device_info structure to the zfs_sb_t
which is already allocated per-super block. It is then registered
when the filesystem mounted and unregistered on unmount. It will
not be registered for mounted snapshots which are read-only. This
change will result in flush-<pool> thread being dynamically created
and destroyed per-mounted filesystem for writeback.
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #174
Diffstat (limited to 'include')
-rw-r--r-- | include/Makefile.in | 1 | ||||
-rw-r--r-- | include/linux/Makefile.in | 1 | ||||
-rw-r--r-- | include/linux/vfs_compat.h | 19 | ||||
-rw-r--r-- | include/sys/Makefile.in | 1 | ||||
-rw-r--r-- | include/sys/fm/Makefile.in | 1 | ||||
-rw-r--r-- | include/sys/fm/fs/Makefile.in | 1 | ||||
-rw-r--r-- | include/sys/fs/Makefile.in | 1 | ||||
-rw-r--r-- | include/sys/zfs_vfsops.h | 1 |
8 files changed, 26 insertions, 0 deletions
diff --git a/include/Makefile.in b/include/Makefile.in index 379657fbc..8764d0958 100644 --- a/include/Makefile.in +++ b/include/Makefile.in @@ -43,6 +43,7 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/always-no-unused-but-set-variable.m4 \ $(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \ $(top_srcdir)/config/kernel-bdev-logical-size.m4 \ + $(top_srcdir)/config/kernel-bdi.m4 \ $(top_srcdir)/config/kernel-bio-empty-barrier.m4 \ $(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \ $(top_srcdir)/config/kernel-bio-failfast.m4 \ diff --git a/include/linux/Makefile.in b/include/linux/Makefile.in index e0678c1ef..3a901c33e 100644 --- a/include/linux/Makefile.in +++ b/include/linux/Makefile.in @@ -43,6 +43,7 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/always-no-unused-but-set-variable.m4 \ $(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \ $(top_srcdir)/config/kernel-bdev-logical-size.m4 \ + $(top_srcdir)/config/kernel-bdi.m4 \ $(top_srcdir)/config/kernel-bio-empty-barrier.m4 \ $(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \ $(top_srcdir)/config/kernel-bio-failfast.m4 \ diff --git a/include/linux/vfs_compat.h b/include/linux/vfs_compat.h index 1d8a523cd..cbbf21e61 100644 --- a/include/linux/vfs_compat.h +++ b/include/linux/vfs_compat.h @@ -62,4 +62,23 @@ truncate_setsize(struct inode *ip, loff_t new) } #endif /* HAVE_TRUNCATE_SETSIZE */ +/* + * 2.6.32 API change, + * Added backing_device_info (bdi) per super block interfaces. When + * available a bdi must be configured when using a non-device backed + * filesystem for proper writeback. It's safe to leave this code + * dormant for kernels which only support pdflush and not bdi. + */ +#ifdef HAVE_BDI +#define bdi_get_sb(sb) (sb->s_bdi) +#define bdi_put_sb(sb, bdi) (sb->s_bdi = bdi) +#else +#define bdi_init(bdi) (0) +#define bdi_destroy(bdi) (0) +#define bdi_register(bdi, parent, fmt, args) (0) +#define bdi_unregister(bdi) (0) +#define bdi_get_sb(sb) (0) +#define bdi_put_sb(sb, bdi) (0) +#endif /* HAVE_BDI */ + #endif /* _ZFS_VFS_H */ diff --git a/include/sys/Makefile.in b/include/sys/Makefile.in index 53c721f67..4cf397cba 100644 --- a/include/sys/Makefile.in +++ b/include/sys/Makefile.in @@ -43,6 +43,7 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/always-no-unused-but-set-variable.m4 \ $(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \ $(top_srcdir)/config/kernel-bdev-logical-size.m4 \ + $(top_srcdir)/config/kernel-bdi.m4 \ $(top_srcdir)/config/kernel-bio-empty-barrier.m4 \ $(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \ $(top_srcdir)/config/kernel-bio-failfast.m4 \ diff --git a/include/sys/fm/Makefile.in b/include/sys/fm/Makefile.in index fce681a1e..30d2c4aff 100644 --- a/include/sys/fm/Makefile.in +++ b/include/sys/fm/Makefile.in @@ -43,6 +43,7 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/always-no-unused-but-set-variable.m4 \ $(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \ $(top_srcdir)/config/kernel-bdev-logical-size.m4 \ + $(top_srcdir)/config/kernel-bdi.m4 \ $(top_srcdir)/config/kernel-bio-empty-barrier.m4 \ $(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \ $(top_srcdir)/config/kernel-bio-failfast.m4 \ diff --git a/include/sys/fm/fs/Makefile.in b/include/sys/fm/fs/Makefile.in index 859972f52..76a215704 100644 --- a/include/sys/fm/fs/Makefile.in +++ b/include/sys/fm/fs/Makefile.in @@ -43,6 +43,7 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/always-no-unused-but-set-variable.m4 \ $(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \ $(top_srcdir)/config/kernel-bdev-logical-size.m4 \ + $(top_srcdir)/config/kernel-bdi.m4 \ $(top_srcdir)/config/kernel-bio-empty-barrier.m4 \ $(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \ $(top_srcdir)/config/kernel-bio-failfast.m4 \ diff --git a/include/sys/fs/Makefile.in b/include/sys/fs/Makefile.in index 61c97ab10..8567e0186 100644 --- a/include/sys/fs/Makefile.in +++ b/include/sys/fs/Makefile.in @@ -43,6 +43,7 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/always-no-unused-but-set-variable.m4 \ $(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \ $(top_srcdir)/config/kernel-bdev-logical-size.m4 \ + $(top_srcdir)/config/kernel-bdi.m4 \ $(top_srcdir)/config/kernel-bio-empty-barrier.m4 \ $(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \ $(top_srcdir)/config/kernel-bio-failfast.m4 \ diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h index ae5f811c0..c8861f6e2 100644 --- a/include/sys/zfs_vfsops.h +++ b/include/sys/zfs_vfsops.h @@ -43,6 +43,7 @@ struct znode; typedef struct zfs_sb { struct super_block *z_sb; /* generic super_block */ + struct backing_dev_info z_bdi; /* generic backing dev info */ struct zfs_sb *z_parent; /* parent fs */ objset_t *z_os; /* objset reference */ uint64_t z_flags; /* super_block flags */ |