summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-12-27 16:12:05 +0100
committerMarek Olšák <[email protected]>2017-01-05 18:43:24 +0100
commit9e1aa81dfeced2381aa0df73758dd76f2722d857 (patch)
treeb77dedf4281b6f83a1d0ecc8ca16285569bd1b40
parent3be83364405da8d5b7085512fcd80c0d910dffd9 (diff)
gallium/radeon: prevent SDMA stalls by detecting RAW hazards in need_dma_space
Call r600_dma_emit_wait_idle only when there is a possibility of a read-after-write hazard. Buffers not yet used by the SDMA IB don't have to wait. Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/drivers/r600/evergreen_hw_context.c1
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c1
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c1
-rw-r--r--src/gallium/drivers/r600/r600_state.c1
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c48
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h1
-rw-r--r--src/gallium/drivers/radeonsi/cik_sdma.c8
-rw-r--r--src/gallium/drivers/radeonsi/si_dma.c2
8 files changed, 27 insertions, 36 deletions
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index 06f03482341..5352dc05779 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -77,7 +77,6 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
src_offset += csize << shift;
size -= csize;
}
- r600_dma_emit_wait_idle(&rctx->b);
}
/* The max number of bytes to copy per packet. */
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 015ff026562..c5dd9f71dd9 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3453,7 +3453,6 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
addr += cheight * pitch;
y += cheight;
}
- r600_dma_emit_wait_idle(&rctx->b);
}
static void evergreen_dma_copy(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index bc6217ad223..4663d99c0b6 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -555,5 +555,4 @@ void r600_dma_copy_buffer(struct r600_context *rctx,
src_offset += csize << 2;
size -= csize;
}
- r600_dma_emit_wait_idle(&rctx->b);
}
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index ba97490ac91..006bb629d60 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2904,7 +2904,6 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
addr += cheight * pitch;
y += cheight;
}
- r600_dma_emit_wait_idle(&rctx->b);
return TRUE;
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 73fc40db2ea..35d3e7b16bd 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -220,6 +220,21 @@ void r600_draw_rectangle(struct blitter_context *blitter,
pipe_resource_reference(&buf, NULL);
}
+static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
+{
+ struct radeon_winsys_cs *cs = rctx->dma.cs;
+
+ /* NOP waits for idle on Evergreen and later. */
+ if (rctx->chip_class >= CIK)
+ radeon_emit(cs, 0x00000000); /* NOP */
+ else if (rctx->chip_class >= EVERGREEN)
+ radeon_emit(cs, 0xf0000000); /* NOP */
+ else {
+ /* TODO: R600-R700 should use the FENCE packet.
+ * CS checker support is required. */
+ }
+}
+
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
struct r600_resource *dst, struct r600_resource *src)
{
@@ -257,6 +272,7 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
* It improves texture upload performance by keeping the DMA
* engine busy while uploads are being submitted.
*/
+ num_dw++; /* for emit_wait_idle below */
if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 ||
!radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) {
@@ -264,6 +280,17 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
}
+ /* Wait for idle if either buffer has been used in the IB before to
+ * prevent read-after-write hazards.
+ */
+ if ((dst &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf,
+ RADEON_USAGE_READWRITE)) ||
+ (src &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf,
+ RADEON_USAGE_WRITE)))
+ r600_dma_emit_wait_idle(ctx);
+
/* If GPUVM is not supported, the CS checker needs 2 entries
* in the buffer list per packet, which has to be done manually.
*/
@@ -282,27 +309,6 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
ctx->num_dma_calls++;
}
-/* This is required to prevent read-after-write hazards. */
-void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
-{
- struct radeon_winsys_cs *cs = rctx->dma.cs;
-
- r600_need_dma_space(rctx, 1, NULL, NULL);
-
- if (!radeon_emitted(cs, 0)) /* empty queue */
- return;
-
- /* NOP waits for idle on Evergreen and later. */
- if (rctx->chip_class >= CIK)
- radeon_emit(cs, 0x00000000); /* NOP */
- else if (rctx->chip_class >= EVERGREEN)
- radeon_emit(cs, 0xf0000000); /* NOP */
- else {
- /* TODO: R600-R700 should use the FENCE packet.
- * CS checker support is required. */
- }
-}
-
static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
{
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 2bb622ab365..2e0655602c2 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -729,7 +729,6 @@ struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
const char *r600_get_llvm_processor_name(enum radeon_family family);
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
struct r600_resource *dst, struct r600_resource *src);
-void r600_dma_emit_wait_idle(struct r600_common_context *rctx);
void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
struct radeon_saved_cs *saved);
void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 648b1ca7687..bee35cd8212 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -67,7 +67,6 @@ static void cik_sdma_copy_buffer(struct si_context *ctx,
src_offset += csize;
size -= csize;
}
- r600_dma_emit_wait_idle(&ctx->b);
}
static void cik_sdma_clear_buffer(struct pipe_context *ctx,
@@ -108,7 +107,6 @@ static void cik_sdma_clear_buffer(struct pipe_context *ctx,
offset += csize;
size -= csize;
}
- r600_dma_emit_wait_idle(&sctx->b);
}
static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w)
@@ -251,8 +249,6 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
radeon_emit(cs, (copy_width - 1) | ((copy_height - 1) << 16));
radeon_emit(cs, (copy_depth - 1));
}
-
- r600_dma_emit_wait_idle(&sctx->b);
return true;
}
@@ -417,8 +413,6 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
radeon_emit(cs, (copy_width_aligned - 1) | ((copy_height - 1) << 16));
radeon_emit(cs, (copy_depth - 1));
}
-
- r600_dma_emit_wait_idle(&sctx->b);
return true;
}
}
@@ -515,8 +509,6 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
((copy_height_aligned - 8) << 16));
radeon_emit(cs, (copy_depth - 1));
}
-
- r600_dma_emit_wait_idle(&sctx->b);
return true;
}
}
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index 8d186c3fb43..1009bb23993 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -76,7 +76,6 @@ static void si_dma_copy_buffer(struct si_context *ctx,
src_offset += csize << shift;
size -= csize;
}
- r600_dma_emit_wait_idle(&ctx->b);
}
static void si_dma_copy_tile(struct si_context *ctx,
@@ -177,7 +176,6 @@ static void si_dma_copy_tile(struct si_context *ctx,
addr += cheight * pitch;
tiled_y += cheight;
}
- r600_dma_emit_wait_idle(&ctx->b);
}
static void si_dma_copy(struct pipe_context *ctx,