diff options
author | Matthew Ahrens <[email protected]> | 2015-12-26 22:10:31 +0100 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-01-12 09:02:33 -0800 |
commit | 7f60329a261bd48558a498fb10e9b29638eab33b (patch) | |
tree | 5e6616fbac91442ce424ca44a8288bf9f46db3cc /module/zfs/dmu.c | |
parent | ab5cbbd1078bf007b50b084bb31fd58c7c5652f4 (diff) |
Illumos 5987 - zfs prefetch code needs work
5987 zfs prefetch code needs work
Reviewed by: Adam Leventhal <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: Paul Dagnelie <[email protected]>
Approved by: Gordon Ross <[email protected]>
References:
https://www.illumos.org/issues/5987 zfs prefetch code needs work
illumos/illumos-gate@cf6106c 5987 zfs prefetch code needs work
Porting notes:
- [module/zfs/dbuf.c]
- 5f6d0b6 Handle block pointers with a corrupt logical size
- [module/zfs/dmu_zfetch.c]
- c65aa5b Fix gcc missing parenthesis warnings
- 428870f Update core ZFS code from build 121 to build 141.
- 79c76d5 Change KM_PUSHPAGE -> KM_SLEEP
- b8d06fc Switch KM_SLEEP to KM_PUSHPAGE
- Account for ISO C90 - mixed declarations and code - warnings
- Module parameters (new/changed):
- Replaced zfetch_block_cap with zfetch_max_distance
(Max bytes to prefetch per stream (default 8MB; 8 * 1024 * 1024))
- Preserved zfs_prefetch_disable as 'int' for consistency with
existing Linux module options.
- [include/sys/trace_arc.h]
- Added new tracepoints
- DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__sync__wait__for__async);
- DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__demand__hit__predictive__prefetch);
- [man/man5/zfs-module-parameters.5]
- Updated man page
Ported-by: kernelOfTruth [email protected]
Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'module/zfs/dmu.c')
-rw-r--r-- | module/zfs/dmu.c | 35 |
1 files changed, 21 insertions, 14 deletions
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index b4133f0e4..1bf108bfe 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2015 by Chunwei Chen. All rights reserved. @@ -386,7 +386,7 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp) */ static int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, - int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags) + boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags) { dmu_buf_t **dbp; uint64_t blkid, nblks, i; @@ -396,15 +396,19 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, ASSERT(length <= DMU_MAX_ACCESS); - dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT; - if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz) - dbuf_flags |= DB_RF_NOPREFETCH; + /* + * Note: We directly notify the prefetch code of this read, so that + * we can tell it about the multi-block read. dbuf_read() only knows + * about the one block it is accessing. + */ + dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT | + DB_RF_NOPREFETCH; rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_datablkshift) { int blkshift = dn->dn_datablkshift; - nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) - - P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift; + nblks = (P2ROUNDUP(offset + length, 1ULL << blkshift) - + P2ALIGN(offset, 1ULL << blkshift)) >> blkshift; } else { if (offset + length > dn->dn_datablksz) { zfs_panic_recover("zfs: accessing past end of object " @@ -423,19 +427,24 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL); blkid = dbuf_whichblock(dn, 0, offset); for (i = 0; i < nblks; i++) { - dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); + dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag); if (db == NULL) { rw_exit(&dn->dn_struct_rwlock); dmu_buf_rele_array(dbp, nblks, tag); zio_nowait(zio); return (SET_ERROR(EIO)); } + /* initiate async i/o */ - if (read) { + if (read) (void) dbuf_read(db, zio, dbuf_flags); - } dbp[i] = &db->db; } + + if ((flags & DMU_READ_NO_PREFETCH) == 0 && read && + length < zfetch_array_rd_sz) { + dmu_zfetch(&dn->dn_zfetch, blkid, nblks); + } rw_exit(&dn->dn_struct_rwlock); /* wait for async i/o */ @@ -489,7 +498,8 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, int dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset, - uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) + uint64_t length, boolean_t read, void *tag, int *numbufsp, + dmu_buf_t ***dbpp) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; dnode_t *dn; @@ -537,9 +547,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, uint64_t blkid; int nblks, err; - if (zfs_prefetch_disable) - return; - if (len == 0) { /* they're interested in the bonus buffer */ dn = DMU_META_DNODE(os); |