diff options
author | Kenneth Graunke <[email protected]> | 2019-06-26 00:05:06 -0700 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2019-07-01 13:59:49 -0700 |
commit | 9b1b9714915c3e3d08582fd1d77f182cdf3e5090 (patch) | |
tree | e833eeb344e932488af1bda9be1943b832f2737e | |
parent | d7e6541cc720a7d11d678adbeec7143b099127b0 (diff) |
iris: Use MI_COPY_MEM_MEM for tiny resource_copy_region calls.
If our resource_copy_region size is a small number of DWords, then
instead of firing up BLORP, we can simply use MI_COPY_MEM_MEM (after
a CS stall). We also try and select the optimal batch.
Improves performance in Shadow of Mordor on Low settings at 1920x1080
on Skylake GT4e by 0.689096% +/- 0.473968% (n=4). It tries to copy
4 bytes of data to a buffer which was most recently used as a writable
compute shader SSBO. Previously we were switching from compute to the
render pipeline, then firing up all of blorp_buffer_copy...for 4 bytes.
I arbitrarily decided to support 4/8/12/16 bytes. Jason thinks this
is about the right threshold where it's cheaper to use MI_COPY_MEM_MEM.
-rw-r--r-- | src/gallium/drivers/iris/iris_blit.c | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/src/gallium/drivers/iris/iris_blit.c b/src/gallium/drivers/iris/iris_blit.c index 6f90910b516..6fa452c68b1 100644 --- a/src/gallium/drivers/iris/iris_blit.c +++ b/src/gallium/drivers/iris/iris_blit.c @@ -608,6 +608,19 @@ iris_copy_region(struct blorp_context *blorp, tex_cache_flush_hack(batch); } +static struct iris_batch * +get_preferred_batch(struct iris_context *ice, struct iris_bo *bo) +{ + /* If the compute batch is already using this buffer, we'd prefer to + * continue queueing in the compute batch. + */ + if (iris_batch_references(&ice->batches[IRIS_BATCH_COMPUTE], bo)) + return &ice->batches[IRIS_BATCH_COMPUTE]; + + /* Otherwise default to the render batch. */ + return &ice->batches[IRIS_BATCH_RENDER]; +} + /** * The pipe->resource_copy_region() driver hook. @@ -627,6 +640,20 @@ iris_resource_copy_region(struct pipe_context *ctx, struct iris_context *ice = (void *) ctx; struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; + /* Use MI_COPY_MEM_MEM for tiny (<= 16 byte, % 4) buffer copies. */ + if (src->target == PIPE_BUFFER && dst->target == PIPE_BUFFER && + (src_box->width % 4 == 0) && src_box->width <= 16) { + struct iris_bo *dst_bo = iris_resource_bo(dst); + batch = get_preferred_batch(ice, dst_bo); + iris_batch_maybe_flush(batch, 24 + 5 * (src_box->width / 4)); + iris_emit_pipe_control_flush(batch, + "stall for MI_COPY_MEM_MEM copy_region", + PIPE_CONTROL_CS_STALL); + ice->vtbl.copy_mem_mem(batch, dst_bo, dstx, iris_resource_bo(src), + src_box->x, src_box->width); + return; + } + iris_copy_region(&ice->blorp, batch, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); |