diff options
author | Rob Clark <[email protected]> | 2019-06-06 10:22:04 -0700 |
---|---|---|
committer | Rob Clark <[email protected]> | 2019-06-07 12:07:29 -0700 |
commit | 958f6ffb60640c333a8b568c5f41467a1fecd1c0 (patch) | |
tree | 11d5fb71e61c4183d74f8f2e4bf19e12f74c4637 /src/gallium/drivers/freedreno/a6xx | |
parent | 1d002cfade6d5fa912bb7bfd98291bf39b3346a9 (diff) |
freedreno/a6xx: fix hangs with newer sqe fw
With the newer (v1.76) fw, we were getting hangs (compared to older
v1.66 fw). Re-work the GMEM code to structure things a bit closer to
the blob. This moves some PKT7 packets from IB2 to IB1, which I think
is what was confusing SQE and causing it to get stuck in an infinite
loop. But in general structuring things at least closer to the same way
blob does makes it easier to compare cmdstream.
Note: this is a bit on the large side for what I'd normally consider for
stable.. but right now it is looking like it is the newer fw that is
headed for linux-firmware. This should defn have some soak time on
master, but probably a good idea for this patch to end up in distro mesa
builds by the time a630_sqe.fw hits linux-firmware.
Cc: [email protected]
Signed-off-by: Rob Clark <[email protected]>
Reviewed-by: Kristian H. Kristensen <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/a6xx')
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_gmem.c | 113 |
1 files changed, 81 insertions, 32 deletions
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c index 6ad0bc68ef4..ebdfd5b8923 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c @@ -214,6 +214,12 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, OUT_RING(ring, 0x00000000); } + /* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE + * plus this CP_EVENT_WRITE at the end in it's own IB.. + */ + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25)); + if (rsc->stencil) { struct fd_resource_slice *slice = fd_resource_slice(rsc->stencil, 0); stride = slice->pitch * rsc->stencil->cpp; @@ -402,7 +408,6 @@ set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag) static void emit_binning_pass(struct fd_batch *batch) { - struct fd_context *ctx = batch->ctx; struct fd_ringbuffer *ring = batch->gmem; struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; @@ -463,12 +468,22 @@ emit_binning_pass(struct fd_batch *batch) OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, UNK_2D); - OUT_PKT7(ring, CP_EVENT_WRITE, 4); - OUT_RING(ring, CACHE_FLUSH_TS); - OUT_RELOCW(ring, fd6_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ - OUT_RING(ring, 0x00000000); - + fd6_cache_inv(batch, ring); + fd6_cache_flush(batch, ring); fd_wfi(batch, ring); + + OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); + + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x0); + + OUT_PKT7(ring, CP_SET_MODE, 1); + OUT_RING(ring, 0x0); + + OUT_WFI5(ring); + + OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); + OUT_RING(ring, 0x7c400004); /* RB_CCU_CNTL */ } static void @@ -544,6 +559,15 @@ fd6_emit_tile_init(struct fd_batch *batch) OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1); OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1); + OUT_RING(ring, 0x1); + + OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1); + OUT_RING(ring, 0x1); + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x1); } else { set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000); patch_draws(batch, IGNORE_VISIBILITY); @@ -580,9 +604,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) struct fd6_context *fd6_ctx = fd6_context(ctx); struct fd_ringbuffer *ring = batch->gmem; - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7)); - emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10); @@ -595,8 +616,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) set_scissor(ring, x1, y1, x2, y2); - set_window_offset(ring, x1, y1); - OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1); OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); @@ -620,7 +639,32 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) (tile->p * 4) + (32 * A6XX_VSC_DATA_PITCH), 0, 0); OUT_RELOC(ring, fd6_ctx->vsc_data2, (tile->p * A6XX_VSC_DATA2_PITCH), 0, 0); + + set_window_offset(ring, x1, y1); + + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000); + + OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1); + OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); + + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x0); + + OUT_PKT7(ring, CP_SET_MODE, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8804, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_SP_TP_UNKNOWN_B304, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_80A4, 1); + OUT_RING(ring, 0x0); } else { + set_window_offset(ring, x1, y1); + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x1); @@ -1028,26 +1072,6 @@ prepare_tile_fini_ib(struct fd_batch *batch) FD_RINGBUFFER_STREAMING); ring = batch->tile_fini; - if (use_hw_binning(batch)) { - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10); - } - - OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); - OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | - CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | - CP_SET_DRAW_STATE__0_GROUP_ID(0)); - OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); - OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); - - OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); - OUT_RING(ring, 0x0); - - emit_marker6(ring, 7); - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); - emit_marker6(ring, 7); - set_blit_scissor(batch, ring); if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { @@ -1081,7 +1105,32 @@ prepare_tile_fini_ib(struct fd_batch *batch) static void fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) { - fd6_emit_ib(batch->gmem, batch->tile_fini); + struct fd_ringbuffer *ring = batch->gmem; + + if (use_hw_binning(batch)) { + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10); + } + + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); + OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1); + OUT_RING(ring, 0x0); + + emit_marker6(ring, 7); + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); + emit_marker6(ring, 7); + + fd6_emit_ib(ring, batch->tile_fini); + + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7)); } static void |