summaryrefslogtreecommitdiffstats
path: root/module/zfs/arc.c
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2009-07-02 16:49:11 -0700
committerBrian Behlendorf <[email protected]>2009-07-02 16:49:11 -0700
commite2b2597751329d4b0b96b7660fc4115182701303 (patch)
treeec67675b4b1b774534bdc4086b4b5462ad5a103b /module/zfs/arc.c
parente56aa929e8f5edc3690fbad967dc646801e177d0 (diff)
parente9f14862a51da916ebad8f8aeeb92d170e4475e4 (diff)
Merge branch 'gcc-c90' into refs/top-bases/gcc-branch
Conflicts: cmd/zdb/zdb_il.c cmd/zfs/zfs_main.c cmd/zinject/zinject.c cmd/ztest/ztest.c lib/libzfs/libzfs_dataset.c lib/libzfs/libzfs_graph.c module/zcommon/include/sys/fm/fs/zfs.h module/zcommon/include/zfs_deleg.h module/zcommon/include/zfs_namecheck.h module/zcommon/zfs_deleg.c module/zcommon/zfs_namecheck.c module/zcommon/zprop_common.c module/zfs/arc.c module/zfs/dmu.c module/zfs/dmu_object.c module/zfs/dmu_objset.c module/zfs/dsl_pool.c module/zfs/dsl_prop.c module/zfs/fletcher.c module/zfs/include/sys/dsl_deleg.h module/zfs/include/sys/dsl_prop.h module/zfs/include/sys/metaslab_impl.h module/zfs/include/sys/spa_boot.h module/zfs/include/sys/zap.h module/zfs/include/sys/zap_impl.h module/zfs/include/sys/zfs_context.h module/zfs/include/sys/zfs_ctldir.h module/zfs/include/sys/zfs_dir.h module/zfs/include/sys/zfs_fuid.h module/zfs/include/sys/zfs_ioctl.h module/zfs/spa.c module/zfs/spa_errlog.c module/zfs/spa_history.c module/zfs/zap.c module/zfs/zap_leaf.c module/zfs/zap_micro.c module/zfs/zfs_ioctl.c module/zfs/zio.c
Diffstat (limited to 'module/zfs/arc.c')
-rw-r--r--module/zfs/arc.c82
1 files changed, 66 insertions, 16 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index bdb5e8b32..421b3ba8d 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -124,6 +124,7 @@
#include <sys/arc.h>
#include <sys/refcount.h>
#include <sys/vdev.h>
+#include <sys/vdev_impl.h>
#ifdef _KERNEL
#include <sys/vmsystm.h>
#include <vm/anon.h>
@@ -397,6 +398,7 @@ static arc_state_t *arc_l2c_only;
static int arc_no_grow; /* Don't try to grow cache size */
static uint64_t arc_tempreserve;
+static uint64_t arc_loaned_bytes;
static uint64_t arc_meta_used;
static uint64_t arc_meta_limit;
static uint64_t arc_meta_max = 0;
@@ -610,7 +612,7 @@ typedef struct l2arc_write_callback {
struct l2arc_buf_hdr {
/* protected by arc_buf_hdr mutex */
l2arc_dev_t *b_dev; /* L2ARC device */
- daddr_t b_daddr; /* disk address, offset byte */
+ uint64_t b_daddr; /* disk address, offset byte */
};
typedef struct l2arc_data_free {
@@ -1207,6 +1209,41 @@ arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
return (buf);
}
+static char *arc_onloan_tag = "onloan";
+
+/*
+ * Loan out an anonymous arc buffer. Loaned buffers are not counted as in
+ * flight data by arc_tempreserve_space() until they are "returned". Loaned
+ * buffers must be returned to the arc before they can be used by the DMU or
+ * freed.
+ */
+arc_buf_t *
+arc_loan_buf(spa_t *spa, int size)
+{
+ arc_buf_t *buf;
+
+ buf = arc_buf_alloc(spa, size, arc_onloan_tag, ARC_BUFC_DATA);
+
+ atomic_add_64(&arc_loaned_bytes, size);
+ return (buf);
+}
+
+/*
+ * Return a loaned arc buffer to the arc.
+ */
+void
+arc_return_buf(arc_buf_t *buf, void *tag)
+{
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+
+ ASSERT(hdr->b_state == arc_anon);
+ ASSERT(buf->b_data != NULL);
+ VERIFY(refcount_remove(&hdr->b_refcnt, arc_onloan_tag) == 0);
+ VERIFY(refcount_add(&hdr->b_refcnt, tag) == 1);
+
+ atomic_add_64(&arc_loaned_bytes, -hdr->b_size);
+}
+
static arc_buf_t *
arc_buf_clone(arc_buf_t *from)
{
@@ -2507,7 +2544,6 @@ arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf,
uint32_t *arc_flags, const zbookmark_t *zb)
{
int err;
- arc_buf_hdr_t *hdr = pbuf->b_hdr;
ASSERT(!refcount_is_zero(&pbuf->b_hdr->b_refcnt));
ASSERT3U((char *)bp - (char *)pbuf->b_data, <, pbuf->b_hdr->b_size);
@@ -2515,9 +2551,8 @@ arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf,
err = arc_read_nolock(pio, spa, bp, done, private, priority,
zio_flags, arc_flags, zb);
-
- VERIFY3P(hdr, ==, pbuf->b_hdr);
rw_exit(&pbuf->b_lock);
+
return (err);
}
@@ -2926,6 +2961,7 @@ arc_release(arc_buf_t *buf, void *tag)
kmutex_t *hash_lock;
l2arc_buf_hdr_t *l2hdr;
uint64_t buf_size = 0;
+ boolean_t released = B_FALSE;
rw_enter(&buf->b_lock, RW_WRITER);
hdr = buf->b_hdr;
@@ -2941,12 +2977,12 @@ arc_release(arc_buf_t *buf, void *tag)
ASSERT(buf->b_efunc == NULL);
arc_buf_thaw(buf);
rw_exit(&buf->b_lock);
- return;
+ released = B_TRUE;
+ } else {
+ hash_lock = HDR_LOCK(hdr);
+ mutex_enter(hash_lock);
}
- hash_lock = HDR_LOCK(hdr);
- mutex_enter(hash_lock);
-
l2hdr = hdr->b_l2hdr;
if (l2hdr) {
mutex_enter(&l2arc_buflist_mtx);
@@ -2954,6 +2990,9 @@ arc_release(arc_buf_t *buf, void *tag)
buf_size = hdr->b_size;
}
+ if (released)
+ goto out;
+
/*
* Do we have more than one buf?
*/
@@ -3021,6 +3060,7 @@ arc_release(arc_buf_t *buf, void *tag)
buf->b_efunc = NULL;
buf->b_private = NULL;
+out:
if (l2hdr) {
list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
@@ -3314,10 +3354,9 @@ arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
}
static int
-arc_memory_throttle(uint64_t reserve, uint64_t txg)
+arc_memory_throttle(uint64_t reserve, uint64_t inflight_data, uint64_t txg)
{
#ifdef _KERNEL
- uint64_t inflight_data = arc_anon->arcs_size;
uint64_t available_memory = ptob(freemem);
static uint64_t page_load = 0;
static uint64_t last_txg = 0;
@@ -3379,6 +3418,7 @@ int
arc_tempreserve_space(uint64_t reserve, uint64_t txg)
{
int error;
+ uint64_t anon_size;
#ifdef ZFS_DEBUG
/*
@@ -3395,11 +3435,18 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
return (ENOMEM);
/*
+ * Don't count loaned bufs as in flight dirty data to prevent long
+ * network delays from blocking transactions that are ready to be
+ * assigned to a txg.
+ */
+ anon_size = MAX((int64_t)(arc_anon->arcs_size - arc_loaned_bytes), 0);
+
+ /*
* Writes will, almost always, require additional memory allocations
* in order to compress/encrypt/etc the data. We therefor need to
* make sure that there is sufficient available memory for this.
*/
- if ((error = arc_memory_throttle(reserve, txg)))
+ if ((error = arc_memory_throttle(reserve, anon_size, txg)))
return (error);
/*
@@ -3409,8 +3456,9 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
* Note: if two requests come in concurrently, we might let them
* both succeed, when one of them should fail. Not a huge deal.
*/
- if (reserve + arc_tempreserve + arc_anon->arcs_size > arc_c / 2 &&
- arc_anon->arcs_size > arc_c / 4) {
+
+ if (reserve + arc_tempreserve + anon_size > arc_c / 2 &&
+ anon_size > arc_c / 4) {
dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK "
"anon_data=%lluK tempreserve=%lluK arc_c=%lluK\n",
arc_tempreserve>>10,
@@ -3595,6 +3643,8 @@ arc_fini(void)
mutex_destroy(&zfs_write_limit_lock);
buf_fini();
+
+ ASSERT(arc_loaned_bytes == 0);
}
/*
@@ -4490,7 +4540,7 @@ l2arc_vdev_present(vdev_t *vd)
* validated the vdev and opened it.
*/
void
-l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end)
+l2arc_add_vdev(spa_t *spa, vdev_t *vd)
{
l2arc_dev_t *adddev;
@@ -4504,8 +4554,8 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end)
adddev->l2ad_vdev = vd;
adddev->l2ad_write = l2arc_write_max;
adddev->l2ad_boost = l2arc_write_boost;
- adddev->l2ad_start = start;
- adddev->l2ad_end = end;
+ adddev->l2ad_start = VDEV_LABEL_START_SIZE;
+ adddev->l2ad_end = VDEV_LABEL_START_SIZE + vdev_get_min_asize(vd);
adddev->l2ad_hand = adddev->l2ad_start;
adddev->l2ad_evict = adddev->l2ad_start;
adddev->l2ad_first = B_TRUE;