summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2016-10-24 11:39:11 -0700
committerJason Ekstrand <[email protected]>2016-10-27 14:44:54 -0700
commit80d3af812935978f01c9bb6d02102140383c4034 (patch)
tree8b64fc7a4599494fee8824adb53055f5f746c2e7 /src/mesa
parentb7979a849bc185fbcab93a841eed692a10d61e25 (diff)
i965/blit: Break blits into chunks in intel_miptree_blit
This allows us to blit much larger images than if we use the blitter directly. In particular, it gives us an almost infinite image height compared to the fairly limiting 32k. We do, however, still have a restriction on stride of the image because handling larger strides, while possible, is fairly difficult. v2: Properly handle linear blit alignment restrictions Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Topi Pohjolainen <[email protected]> Reviewed-by: Anuj Phogat <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/intel_blit.c64
1 files changed, 41 insertions, 23 deletions
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 05a78d7a8f1..7e97fbc1292 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -313,31 +313,49 @@ intel_miptree_blit(struct brw_context *brw,
dst_x += dst_image_x;
dst_y += dst_image_y;
- /* The blitter interprets the 16-bit destination x/y as a signed 16-bit
- * value. The values we're working with are unsigned, so make sure we don't
- * overflow.
+ /* We need to split the blit into chunks that each fit within the blitter's
+ * restrictions. We can't use a chunk size of 32768 because we need to
+ * ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
+ * a nice round power of two, big enough that performance won't suffer, and
+ * small enough to guarantee everything fits.
*/
- if (src_x >= 32768 || src_y >= 32768 || dst_x >= 32768 || dst_y >= 32768) {
- perf_debug("Falling back due to >=32k offset [src(%d, %d) dst(%d, %d)]\n",
- src_x, src_y, dst_x, dst_y);
- return false;
- }
+ const uint32_t max_chunk_size = 16384;
- if (!intelEmitCopyBlit(brw,
- src_mt->cpp,
- src_flip == dst_flip ? src_mt->pitch : -src_mt->pitch,
- src_mt->bo, src_mt->offset,
- src_mt->tiling,
- src_mt->tr_mode,
- dst_mt->pitch,
- dst_mt->bo, dst_mt->offset,
- dst_mt->tiling,
- dst_mt->tr_mode,
- src_x, src_y,
- dst_x, dst_y,
- width, height,
- logicop)) {
- return false;
+ for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
+ for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
+ const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
+ const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
+
+ uint32_t src_offset, src_tile_x, src_tile_y;
+ get_blit_intratile_offset_el(brw, src_mt,
+ src_x + chunk_x, src_y + chunk_y,
+ &src_offset, &src_tile_x, &src_tile_y);
+
+ uint32_t dst_offset, dst_tile_x, dst_tile_y;
+ get_blit_intratile_offset_el(brw, dst_mt,
+ dst_x + chunk_x, dst_y + chunk_y,
+ &dst_offset, &dst_tile_x, &dst_tile_y);
+
+ if (!intelEmitCopyBlit(brw,
+ src_mt->cpp,
+ src_flip == dst_flip ? src_mt->pitch :
+ -src_mt->pitch,
+ src_mt->bo, src_mt->offset + src_offset,
+ src_mt->tiling,
+ src_mt->tr_mode,
+ dst_mt->pitch,
+ dst_mt->bo, dst_mt->offset + dst_offset,
+ dst_mt->tiling,
+ dst_mt->tr_mode,
+ src_tile_x, src_tile_y,
+ dst_tile_x, dst_tile_y,
+ chunk_w, chunk_h,
+ logicop)) {
+ /* If this is ever going to fail, it will fail on the first chunk */
+ assert(chunk_x == 0 && chunk_y == 0);
+ return false;
+ }
+ }
}
/* XXX This could be done in a single pass using XY_FULL_MONO_PATTERN_BLT */