diff options
Diffstat (limited to 'module/zfs/arc.c')
-rw-r--r-- | module/zfs/arc.c | 179 |
1 files changed, 93 insertions, 86 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 4db6c0614..195364013 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -3872,7 +3872,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker, ASSERT3P(marker, !=, NULL); - mls = multilist_sublist_lock(ml, idx); + mls = multilist_sublist_lock_idx(ml, idx); for (hdr = multilist_sublist_prev(mls, marker); likely(hdr != NULL); hdr = multilist_sublist_prev(mls, marker)) { @@ -3984,6 +3984,26 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker, return (bytes_evicted); } +static arc_buf_hdr_t * +arc_state_alloc_marker(void) +{ + arc_buf_hdr_t *marker = kmem_cache_alloc(hdr_full_cache, KM_SLEEP); + + /* + * A b_spa of 0 is used to indicate that this header is + * a marker. This fact is used in arc_evict_state_impl(). + */ + marker->b_spa = 0; + + return (marker); +} + +static void +arc_state_free_marker(arc_buf_hdr_t *marker) +{ + kmem_cache_free(hdr_full_cache, marker); +} + /* * Allocate an array of buffer headers used as placeholders during arc state * eviction. @@ -3994,16 +4014,8 @@ arc_state_alloc_markers(int count) arc_buf_hdr_t **markers; markers = kmem_zalloc(sizeof (*markers) * count, KM_SLEEP); - for (int i = 0; i < count; i++) { - markers[i] = kmem_cache_alloc(hdr_full_cache, KM_SLEEP); - - /* - * A b_spa of 0 is used to indicate that this header is - * a marker. This fact is used in arc_evict_state_impl(). - */ - markers[i]->b_spa = 0; - - } + for (int i = 0; i < count; i++) + markers[i] = arc_state_alloc_marker(); return (markers); } @@ -4011,7 +4023,7 @@ static void arc_state_free_markers(arc_buf_hdr_t **markers, int count) { for (int i = 0; i < count; i++) - kmem_cache_free(hdr_full_cache, markers[i]); + arc_state_free_marker(markers[i]); kmem_free(markers, sizeof (*markers) * count); } @@ -4055,7 +4067,7 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa, for (int i = 0; i < num_sublists; i++) { multilist_sublist_t *mls; - mls = multilist_sublist_lock(ml, i); + mls = multilist_sublist_lock_idx(ml, i); multilist_sublist_insert_tail(mls, markers[i]); multilist_sublist_unlock(mls); } @@ -4120,7 +4132,7 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa, } for (int i = 0; i < num_sublists; i++) { - multilist_sublist_t *mls = multilist_sublist_lock(ml, i); + multilist_sublist_t *mls = multilist_sublist_lock_idx(ml, i); multilist_sublist_remove(mls, markers[i]); multilist_sublist_unlock(mls); } @@ -8628,7 +8640,7 @@ l2arc_sublist_lock(int list_num) * sublists being selected. */ idx = multilist_get_random_index(ml); - return (multilist_sublist_lock(ml, idx)); + return (multilist_sublist_lock_idx(ml, idx)); } /* @@ -9040,9 +9052,9 @@ l2arc_blk_fetch_done(zio_t *zio) static uint64_t l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) { - arc_buf_hdr_t *hdr, *hdr_prev, *head; - uint64_t write_asize, write_psize, write_lsize, headroom; - boolean_t full; + arc_buf_hdr_t *hdr, *head, *marker; + uint64_t write_asize, write_psize, headroom; + boolean_t full, from_head = !arc_warm; l2arc_write_callback_t *cb = NULL; zio_t *pio, *wzio; uint64_t guid = spa_load_guid(spa); @@ -9051,10 +9063,11 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) ASSERT3P(dev->l2ad_vdev, !=, NULL); pio = NULL; - write_lsize = write_asize = write_psize = 0; + write_asize = write_psize = 0; full = B_FALSE; head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE); arc_hdr_set_flags(head, ARC_FLAG_L2_WRITE_HEAD | ARC_FLAG_HAS_L2HDR); + marker = arc_state_alloc_marker(); /* * Copy buffers for L2ARC writing. @@ -9069,40 +9082,34 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) continue; } - multilist_sublist_t *mls = l2arc_sublist_lock(pass); uint64_t passed_sz = 0; - - VERIFY3P(mls, !=, NULL); + headroom = target_sz * l2arc_headroom; + if (zfs_compressed_arc_enabled) + headroom = (headroom * l2arc_headroom_boost) / 100; /* - * L2ARC fast warmup. - * * Until the ARC is warm and starts to evict, read from the * head of the ARC lists rather than the tail. */ - if (arc_warm == B_FALSE) + multilist_sublist_t *mls = l2arc_sublist_lock(pass); + ASSERT3P(mls, !=, NULL); + if (from_head) hdr = multilist_sublist_head(mls); else hdr = multilist_sublist_tail(mls); - headroom = target_sz * l2arc_headroom; - if (zfs_compressed_arc_enabled) - headroom = (headroom * l2arc_headroom_boost) / 100; - - for (; hdr; hdr = hdr_prev) { + while (hdr != NULL) { kmutex_t *hash_lock; abd_t *to_write = NULL; - if (arc_warm == B_FALSE) - hdr_prev = multilist_sublist_next(mls, hdr); - else - hdr_prev = multilist_sublist_prev(mls, hdr); - hash_lock = HDR_LOCK(hdr); if (!mutex_tryenter(hash_lock)) { - /* - * Skip this buffer rather than waiting. - */ +skip: + /* Skip this buffer rather than waiting. */ + if (from_head) + hdr = multilist_sublist_next(mls, hdr); + else + hdr = multilist_sublist_prev(mls, hdr); continue; } @@ -9117,11 +9124,10 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) if (!l2arc_write_eligible(guid, hdr)) { mutex_exit(hash_lock); - continue; + goto skip; } ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT3U(HDR_GET_PSIZE(hdr), >, 0); ASSERT3U(arc_hdr_size(hdr), >, 0); ASSERT(hdr->b_l1hdr.b_pabd != NULL || @@ -9143,12 +9149,18 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) } /* - * We rely on the L1 portion of the header below, so - * it's invalid for this header to have been evicted out - * of the ghost cache, prior to being written out. The - * ARC_FLAG_L2_WRITING bit ensures this won't happen. + * We should not sleep with sublist lock held or it + * may block ARC eviction. Insert a marker to save + * the position and drop the lock. */ - arc_hdr_set_flags(hdr, ARC_FLAG_L2_WRITING); + if (from_head) { + multilist_sublist_insert_after(mls, hdr, + marker); + } else { + multilist_sublist_insert_before(mls, hdr, + marker); + } + multilist_sublist_unlock(mls); /* * If this header has b_rabd, we can use this since it @@ -9179,32 +9191,45 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) &to_write); if (ret != 0) { arc_hdr_clear_flags(hdr, - ARC_FLAG_L2_WRITING); + ARC_FLAG_L2CACHE); mutex_exit(hash_lock); - continue; + goto next; } l2arc_free_abd_on_write(to_write, asize, type); } + hdr->b_l2hdr.b_dev = dev; + hdr->b_l2hdr.b_daddr = dev->l2ad_hand; + hdr->b_l2hdr.b_hits = 0; + hdr->b_l2hdr.b_arcs_state = + hdr->b_l1hdr.b_state->arcs_state; + mutex_enter(&dev->l2ad_mtx); if (pio == NULL) { /* * Insert a dummy header on the buflist so * l2arc_write_done() can find where the * write buffers begin without searching. */ - mutex_enter(&dev->l2ad_mtx); list_insert_head(&dev->l2ad_buflist, head); - mutex_exit(&dev->l2ad_mtx); + } + list_insert_head(&dev->l2ad_buflist, hdr); + mutex_exit(&dev->l2ad_mtx); + arc_hdr_set_flags(hdr, ARC_FLAG_HAS_L2HDR | + ARC_FLAG_L2_WRITING); + + (void) zfs_refcount_add_many(&dev->l2ad_alloc, + arc_hdr_size(hdr), hdr); + l2arc_hdr_arcstats_increment(hdr); + boolean_t commit = l2arc_log_blk_insert(dev, hdr); + mutex_exit(hash_lock); + + if (pio == NULL) { cb = kmem_alloc( sizeof (l2arc_write_callback_t), KM_SLEEP); cb->l2wcb_dev = dev; cb->l2wcb_head = head; - /* - * Create a list to save allocated abd buffers - * for l2arc_log_blk_commit(). - */ list_create(&cb->l2wcb_abd_list, sizeof (l2arc_lb_abd_buf_t), offsetof(l2arc_lb_abd_buf_t, node)); @@ -9212,54 +9237,34 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) ZIO_FLAG_CANFAIL); } - hdr->b_l2hdr.b_dev = dev; - hdr->b_l2hdr.b_hits = 0; - - hdr->b_l2hdr.b_daddr = dev->l2ad_hand; - hdr->b_l2hdr.b_arcs_state = - hdr->b_l1hdr.b_state->arcs_state; - arc_hdr_set_flags(hdr, ARC_FLAG_HAS_L2HDR); - - mutex_enter(&dev->l2ad_mtx); - list_insert_head(&dev->l2ad_buflist, hdr); - mutex_exit(&dev->l2ad_mtx); - - (void) zfs_refcount_add_many(&dev->l2ad_alloc, - arc_hdr_size(hdr), hdr); - wzio = zio_write_phys(pio, dev->l2ad_vdev, - hdr->b_l2hdr.b_daddr, asize, to_write, + dev->l2ad_hand, asize, to_write, ZIO_CHECKSUM_OFF, NULL, hdr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_CANFAIL, B_FALSE); - write_lsize += HDR_GET_LSIZE(hdr); DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev, zio_t *, wzio); + zio_nowait(wzio); write_psize += psize; write_asize += asize; dev->l2ad_hand += asize; - l2arc_hdr_arcstats_increment(hdr); vdev_space_update(dev->l2ad_vdev, asize, 0, 0); - mutex_exit(hash_lock); - - /* - * Append buf info to current log and commit if full. - * arcstat_l2_{size,asize} kstats are updated - * internally. - */ - if (l2arc_log_blk_insert(dev, hdr)) { - /* - * l2ad_hand will be adjusted in - * l2arc_log_blk_commit(). - */ + if (commit) { + /* l2ad_hand will be adjusted inside. */ write_asize += l2arc_log_blk_commit(dev, pio, cb); } - zio_nowait(wzio); +next: + multilist_sublist_lock(mls); + if (from_head) + hdr = multilist_sublist_next(mls, marker); + else + hdr = multilist_sublist_prev(mls, marker); + multilist_sublist_remove(mls, marker); } multilist_sublist_unlock(mls); @@ -9268,9 +9273,11 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) break; } + arc_state_free_marker(marker); + /* No buffers selected for writing? */ if (pio == NULL) { - ASSERT0(write_lsize); + ASSERT0(write_psize); ASSERT(!HDR_HAS_L1HDR(head)); kmem_cache_free(hdr_l2only_cache, head); @@ -10598,7 +10605,7 @@ l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr) L2BLK_SET_TYPE((le)->le_prop, hdr->b_type); L2BLK_SET_PROTECTED((le)->le_prop, !!(HDR_PROTECTED(hdr))); L2BLK_SET_PREFETCH((le)->le_prop, !!(HDR_PREFETCH(hdr))); - L2BLK_SET_STATE((le)->le_prop, hdr->b_l1hdr.b_state->arcs_state); + L2BLK_SET_STATE((le)->le_prop, hdr->b_l2hdr.b_arcs_state); dev->l2ad_log_blk_payload_asize += vdev_psize_to_asize(dev->l2ad_vdev, HDR_GET_PSIZE(hdr)); |