diff options
author | Rob Clark <[email protected]> | 2013-12-23 09:59:42 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2013-12-26 12:13:42 -0500 |
commit | 8ab47b435319c05521bdfced6da6ce72850a2dd7 (patch) | |
tree | 41ad842a87397da59e7e7dbb756a2e75d57e8ee9 /src/gallium/drivers/freedreno | |
parent | be01d7a905d827c3c0c222cab2430c6d4575429a (diff) |
freedreno/a3xx: fix blend state corruption issue
Using RMW on banked context registers is not safe. The value read
could be the wrong one. So if there has been a DRAW_IDX launched,
the RMW must be preceded by a WAIT_FOR_IDLE to ensure the read part
of RMW sees the correct value.
To avoid unnecessary WFI's, keep track if there is a need for WFI,
and only emit one if needed. Furthermore, keep track if we even
need to update the register in the first place.
And to cut down on the amount of RMW to avoid excessive WFI's, at the
tiling/GMEM level we can always overwrite RB_RENDER_CONTROL, as the
state at beginning of draw/clear cmds (which we IB to) is always
undefined. In the draw/clear commands, we always still use RMW (with
WFI if needed), but only if the register value actually changes. (At
points where the current value cannot be known, the saved value is
reset to ~0, which includes bits outside of RBRC_DRAW_STATE, so there
never is chance for confusion.)
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.h | 29 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 22 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_context.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_context.h | 32 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_draw.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_gmem.c | 2 |
8 files changed, 66 insertions, 33 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index d808777a1fb..c5d8b774552 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -80,8 +80,6 @@ fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info) OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1); OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */ - OUT_WFI (ring); - OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); OUT_RING(ring, info->min_index); /* VFD_INDEX_MIN */ OUT_RING(ring, info->max_index); /* VFD_INDEX_MAX */ @@ -111,7 +109,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) | A3XX_RB_BLEND_ALPHA_FLOAT(1.0)); - fd3_emit_rbrc_draw_state(ring, + fd3_emit_rbrc_draw_state(ctx, ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); if (buffers & PIPE_CLEAR_DEPTH) { @@ -220,8 +218,6 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); - - OUT_WFI (ring); } void diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 825656ae62b..91993725ea6 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -243,8 +243,6 @@ emit_cache_flush(struct fd_ringbuffer *ring) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - - OUT_WFI (ring); } /* emit texture state for mem->gmem restore operation.. eventually it would @@ -356,7 +354,7 @@ fd3_emit_state(struct fd_context *ctx, uint32_t dirty) struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa); struct pipe_stencil_ref *sr = &ctx->stencil_ref; - fd3_emit_rbrc_draw_state(ring, zsa->rb_render_control); + fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control); OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1); OUT_RING(ring, zsa->rb_alpha_ref); @@ -607,4 +605,5 @@ fd3_emit_restore(struct fd_context *ctx) } emit_cache_flush(ring); + fd_rmw_wfi(ctx, ring); } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h index 668e5ddd095..bf7787ab6f7 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -66,24 +66,23 @@ void fd3_emit_restore(struct fd_context *ctx); * GMEM/binning code is deciding on the bin-width (and whether to * use binning) after the draw/clear state is emitted. */ -static inline void -fd3_emit_rbrc_draw_state(struct fd_ringbuffer *ring, uint32_t val) -{ - OUT_PKT3(ring, CP_REG_RMW, 3); - OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL); - OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK | - A3XX_RB_RENDER_CONTROL_ENABLE_GMEM); - OUT_RING(ring, val); -} + +#define RBRC_DRAW_STATE (A3XX_RB_RENDER_CONTROL_ALPHA_TEST | \ + A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK) static inline void -fd3_emit_rbrc_tile_state(struct fd_ringbuffer *ring, uint32_t val) +fd3_emit_rbrc_draw_state(struct fd_context *ctx, + struct fd_ringbuffer *ring, uint32_t val) { - OUT_PKT3(ring, CP_REG_RMW, 3); - OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL); - OUT_RING(ring, ~(A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK | - A3XX_RB_RENDER_CONTROL_ENABLE_GMEM)); - OUT_RING(ring, val); + assert(!(val & ~RBRC_DRAW_STATE)); + if (val != ctx->rmw.rbrc_draw) { + fd_rmw_wfi(ctx, ring); + OUT_PKT3(ring, CP_REG_RMW, 3); + OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL); + OUT_RING(ring, ~RBRC_DRAW_STATE); + OUT_RING(ring, val); + ctx->rmw.rbrc_draw = val; + } } #endif /* FD3_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 4a5a241bac9..3d0a607ed28 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -181,9 +181,11 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); - fd3_emit_rbrc_draw_state(ring, - A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | - A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); + OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); + OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | + A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | + A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | + A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w)); OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | @@ -295,7 +297,8 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE); } - fd3_emit_rbrc_tile_state(ring, + OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); + OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) | A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); @@ -335,9 +338,6 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); - fd3_emit_rbrc_draw_state(ring, - A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS)); - OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | @@ -423,7 +423,8 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0); - fd3_emit_rbrc_tile_state(ring, + OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); + OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); /* setup scissor/offset for current tile: */ @@ -514,8 +515,9 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w); - fd3_emit_rbrc_tile_state(ring, - A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | + OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); + OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | + A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); /* setup scissor/offset for current tile: */ diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index c959ccfcc89..28be508e329 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -174,6 +174,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, } fd_context_next_rb(pctx); + fd_reset_rmw_state(ctx); util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer), 16, UTIL_SLAB_SINGLETHREADED); diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index db37f9cb8a3..a8abbca7a62 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -38,6 +38,7 @@ #include "freedreno_screen.h" #include "freedreno_gmem.h" +#include "freedreno_util.h" struct fd_vertex_stateobj; @@ -150,6 +151,20 @@ struct fd_context { struct fd_ringbuffer *ring; struct fd_ringmarker *draw_start, *draw_end; + /* Keep track if WAIT_FOR_IDLE is needed for registers we need + * to update via RMW: + */ + struct { + bool need_wfi; + /* note: would be nicer to have in fd3_context, fd2_context, + * etc, because the registered modified via RMR differ across + * generation. But as long as it is a small set of registers + * that might be more hassle than it's worth. + */ + /* state for RB_RENDER_CONTROL: */ + uint32_t rbrc_draw; + } rmw; + struct pipe_scissor_state scissor; /* we don't have a disable/enable bit for scissor, so instead we keep @@ -249,6 +264,23 @@ fd_supported_prim(struct fd_context *ctx, unsigned prim) return (1 << prim) & ctx->primtype_mask; } +static INLINE void +fd_reset_rmw_state(struct fd_context *ctx) +{ + ctx->rmw.need_wfi = true; + ctx->rmw.rbrc_draw = ~0; +} + +/* emit before a RMW a WAIT_FOR_IDLE only if needed: */ +static inline void +fd_rmw_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + if (ctx->rmw.need_wfi) { + OUT_WFI(ring); + ctx->rmw.need_wfi = false; + } +} + struct pipe_context * fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, const uint8_t *primtypes, void *priv); diff --git a/src/gallium/drivers/freedreno/freedreno_draw.h b/src/gallium/drivers/freedreno/freedreno_draw.h index 9ccc246395f..190c0e52d24 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.h +++ b/src/gallium/drivers/freedreno/freedreno_draw.h @@ -85,6 +85,8 @@ fd_draw(struct fd_context *ctx, enum pc_di_primtype primtype, } emit_marker(ring, 7); + + ctx->rmw.need_wfi = true; } #endif /* FREEDRENO_DRAW_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 68b4eb607a6..47d99d3429c 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -272,6 +272,8 @@ fd_gmem_render_tiles(struct pipe_context *pctx) /* mark start for next draw cmds: */ fd_ringmarker_mark(ctx->draw_start); + fd_reset_rmw_state(ctx); + /* update timestamps on render targets: */ timestamp = fd_ringbuffer_timestamp(ctx->ring); if (pfb->cbufs[0]) |