summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-12-27 01:33:42 +0100
committerMarek Olšák <[email protected]>2017-01-05 18:43:23 +0100
commita31a92e7efa33850ebc152ec1d46cf57b9338bee (patch)
tree1493beea0a86f5120562b07ff6e53de60fe618ef
parent69f489dfa11a6182c413c252addb0b0010550861 (diff)
radeonsi: always use SDMA for big buffer clears and first buffer uses
Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_cp_dma.c20
1 files changed, 20 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 58093177b93..3e908f6e4c3 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -168,6 +168,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
enum r600_coherency coher)
{
struct si_context *sctx = (struct si_context*)ctx;
+ struct radeon_winsys *ws = sctx->b.ws;
unsigned tc_l2_flag = get_tc_l2_flag(sctx, coher);
unsigned flush_flags = get_flush_flags(sctx, coher);
@@ -193,6 +194,25 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
return;
}
+ /* dma_clear_buffer can use clear_buffer on failure. Make sure that
+ * doesn't happen. We don't want an infinite recursion: */
+ if (sctx->b.chip_class >= CIK && sctx->b.dma.cs &&
+ /* CP DMA is very slow. Always use SDMA for big clears. This
+ * alone improves DeusEx:MD performance by 70%. */
+ (size > 128 * 1024 ||
+ /* Buffers not used by the GFX IB yet will be cleared by SDMA.
+ * This happens to move most buffer clears to SDMA, including
+ * DCC and CMASK clears, because pipe->clear clears them before
+ * si_emit_framebuffer_state (in a draw call) adds them.
+ * For example, DeusEx:MD has 21 buffer clears per frame and all
+ * of them are moved to SDMA thanks to this. */
+ !ws->cs_is_buffer_referenced(sctx->b.gfx.cs,
+ r600_resource(dst)->buf,
+ RADEON_USAGE_READWRITE))) {
+ sctx->b.dma_clear_buffer(ctx, dst, offset, size, value);
+ return;
+ }
+
uint64_t va = r600_resource(dst)->gpu_address + offset;
/* Flush the caches. */