1 files changed, 53 insertions, 61 deletions
diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c
index 16bf2c441..2faf1af52 100644
--- a/module/zfs/dsl_bookmark.c
+++ b/module/zfs/dsl_bookmark.c
@@ -1561,33 +1561,6 @@ dsl_bookmark_latest_txg(dsl_dataset_t *ds)
 	return (dbn->dbn_phys.zbm_creation_txg);
 }
 
-static inline unsigned int
-redact_block_buf_num_entries(unsigned int size)
-{
-	return (size / sizeof (redact_block_phys_t));
-}
-
-/*
- * This function calculates the offset of the last entry in the array of
- * redact_block_phys_t.  If we're reading the redaction list into buffers of
- * size bufsize, then for all but the last buffer, the last valid entry in the
- * array will be the last entry in the array.  However, for the last buffer, any
- * amount of it may be filled.  Thus, we check to see if we're looking at the
- * last buffer in the redaction list, and if so, we return the total number of
- * entries modulo the number of entries per buffer.  Otherwise, we return the
- * number of entries per buffer minus one.
- */
-static inline unsigned int
-last_entry(redaction_list_t *rl, unsigned int bufsize, uint64_t bufid)
-{
-	if (bufid == (rl->rl_phys->rlp_num_entries - 1) /
-	    redact_block_buf_num_entries(bufsize)) {
-		return ((rl->rl_phys->rlp_num_entries - 1) %
-		    redact_block_buf_num_entries(bufsize));
-	}
-	return (redact_block_buf_num_entries(bufsize) - 1);
-}
-
 /*
  * Compare the redact_block_phys_t to the bookmark. If the last block in the
  * redact_block_phys_t is before the bookmark, return -1.  If the first block in
@@ -1633,8 +1606,6 @@ dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
     rl_traverse_callback_t cb, void *arg)
 {
 	objset_t *mos = rl->rl_mos;
-	redact_block_phys_t *buf;
-	unsigned int bufsize = SPA_OLD_MAXBLOCKSIZE;
 	int err = 0;
 
 	if (rl->rl_phys->rlp_last_object != UINT64_MAX ||
@@ -1651,42 +1622,48 @@ dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
 	}
 
 	/*
-	 * Binary search for the point to resume from.  The goal is to minimize
-	 * the number of disk reads we have to perform.
+	 * This allows us to skip the binary search and resume checking logic
+	 * below, if we're not resuming a redacted send.
+	 */
+	if (ZB_IS_ZERO(resume))
+		resume = NULL;
+
+	/*
+	 * Binary search for the point to resume from.
 	 */
-	buf = zio_data_buf_alloc(bufsize);
-	uint64_t maxbufid = (rl->rl_phys->rlp_num_entries - 1) /
-	    redact_block_buf_num_entries(bufsize);
-	uint64_t minbufid = 0;
-	while (resume != NULL && maxbufid - minbufid >= 1) {
-		ASSERT3U(maxbufid, >, minbufid);
-		uint64_t midbufid = minbufid + ((maxbufid - minbufid) / 2);
-		err = dmu_read(mos, rl->rl_object, midbufid * bufsize, bufsize,
-		    buf, DMU_READ_NO_PREFETCH);
+	uint64_t maxidx = rl->rl_phys->rlp_num_entries - 1;
+	uint64_t minidx = 0;
+	while (resume != NULL && maxidx > minidx) {
+		redact_block_phys_t rbp = { 0 };
+		ASSERT3U(maxidx, >, minidx);
+		uint64_t mididx = minidx + ((maxidx - minidx) / 2);
+		err = dmu_read(mos, rl->rl_object, mididx * sizeof (rbp),
+		    sizeof (rbp), &rbp, DMU_READ_NO_PREFETCH);
 		if (err != 0)
 			break;
 
-		int cmp0 = redact_block_zb_compare(&buf[0], resume);
-		int cmpn = redact_block_zb_compare(
-		    &buf[last_entry(rl, bufsize, maxbufid)], resume);
+		int cmp = redact_block_zb_compare(&rbp, resume);
 
-		/*
-		 * If the first block is before or equal to the resume point,
-		 * and the last one is equal or after, then the resume point is
-		 * in this buf, and we should start here.
-		 */
-		if (cmp0 <= 0 && cmpn >= 0)
+		if (cmp == 0) {
+			minidx = mididx;
 			break;
-
-		if (cmp0 > 0)
-			maxbufid = midbufid - 1;
-		else if (cmpn < 0)
-			minbufid = midbufid + 1;
-		else
-			panic("No progress in binary search for resume point");
+		} else if (cmp > 0) {
+			maxidx =
+			    (mididx == minidx ? minidx : mididx - 1);
+		} else {
+			minidx = mididx + 1;
+		}
 	}
 
-	for (uint64_t curidx = minbufid * redact_block_buf_num_entries(bufsize);
+	unsigned int bufsize = SPA_OLD_MAXBLOCKSIZE;
+	redact_block_phys_t *buf = zio_data_buf_alloc(bufsize);
+
+	unsigned int entries_per_buf = bufsize / sizeof (redact_block_phys_t);
+	uint64_t start_block = minidx / entries_per_buf;
+	err = dmu_read(mos, rl->rl_object, start_block * bufsize, bufsize, buf,
+	    DMU_READ_PREFETCH);
+
+	for (uint64_t curidx = minidx;
 	    err == 0 && curidx < rl->rl_phys->rlp_num_entries;
 	    curidx++) {
 		/*
@@ -1696,22 +1673,35 @@ dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
 		 * prefetching, and this code shouldn't be the bottleneck, so we
 		 * don't need to do manual prefetching.
 		 */
-		if (curidx % redact_block_buf_num_entries(bufsize) == 0) {
+		if (curidx % entries_per_buf == 0) {
 			err = dmu_read(mos, rl->rl_object, curidx *
 			    sizeof (*buf), bufsize, buf,
 			    DMU_READ_PREFETCH);
 			if (err != 0)
 				break;
 		}
-		redact_block_phys_t *rb = &buf[curidx %
-		    redact_block_buf_num_entries(bufsize)];
+		redact_block_phys_t *rb = &buf[curidx % entries_per_buf];
 		/*
 		 * If resume is non-null, we should either not send the data, or
 		 * null out resume so we don't have to keep doing these
 		 * comparisons.
 		 */
 		if (resume != NULL) {
+			/*
+			 * It is possible that after the binary search we got
+			 * a record before the resume point. There's two cases
+			 * where this can occur. If the record is the last
+			 * redaction record, and the resume point is after the
+			 * end of the redacted data, curidx will be the last
+			 * redaction record. In that case, the loop will end
+			 * after this iteration. The second case is if the
+			 * resume point is between two redaction records, the
+			 * binary search can return either the record before
+			 * or after the resume point. In that case, the next
+			 * iteration will be greater than the resume point.
+			 */
 			if (redact_block_zb_compare(rb, resume) < 0) {
+				ASSERT3U(curidx, ==, minidx);
 				continue;
 			} else {
 				/*
@@ -1733,8 +1723,10 @@ dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
 			}
 		}
 
-		if (cb(rb, arg) != 0)
+		if (cb(rb, arg) != 0) {
+			err = EINTR;
 			break;
+		}
 	}
 
 	zio_data_buf_free(buf, bufsize);