summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno/a6xx
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2019-06-06 10:22:04 -0700
committerRob Clark <[email protected]>2019-06-07 12:07:29 -0700
commit958f6ffb60640c333a8b568c5f41467a1fecd1c0 (patch)
tree11d5fb71e61c4183d74f8f2e4bf19e12f74c4637 /src/gallium/drivers/freedreno/a6xx
parent1d002cfade6d5fa912bb7bfd98291bf39b3346a9 (diff)
freedreno/a6xx: fix hangs with newer sqe fw
With the newer (v1.76) fw, we were getting hangs (compared to older v1.66 fw). Re-work the GMEM code to structure things a bit closer to the blob. This moves some PKT7 packets from IB2 to IB1, which I think is what was confusing SQE and causing it to get stuck in an infinite loop. But in general structuring things at least closer to the same way blob does makes it easier to compare cmdstream. Note: this is a bit on the large side for what I'd normally consider for stable.. but right now it is looking like it is the newer fw that is headed for linux-firmware. This should defn have some soak time on master, but probably a good idea for this patch to end up in distro mesa builds by the time a630_sqe.fw hits linux-firmware. Cc: [email protected] Signed-off-by: Rob Clark <[email protected]> Reviewed-by: Kristian H. Kristensen <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/a6xx')
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_gmem.c113
1 files changed, 81 insertions, 32 deletions
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
index 6ad0bc68ef4..ebdfd5b8923 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
@@ -214,6 +214,12 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
OUT_RING(ring, 0x00000000);
}
+ /* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE
+ * plus this CP_EVENT_WRITE at the end in it's own IB..
+ */
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25));
+
if (rsc->stencil) {
struct fd_resource_slice *slice = fd_resource_slice(rsc->stencil, 0);
stride = slice->pitch * rsc->stencil->cpp;
@@ -402,7 +408,6 @@ set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag)
static void
emit_binning_pass(struct fd_batch *batch)
{
- struct fd_context *ctx = batch->ctx;
struct fd_ringbuffer *ring = batch->gmem;
struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
@@ -463,12 +468,22 @@ emit_binning_pass(struct fd_batch *batch)
OUT_PKT7(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, UNK_2D);
- OUT_PKT7(ring, CP_EVENT_WRITE, 4);
- OUT_RING(ring, CACHE_FLUSH_TS);
- OUT_RELOCW(ring, fd6_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */
- OUT_RING(ring, 0x00000000);
-
+ fd6_cache_inv(batch, ring);
+ fd6_cache_flush(batch, ring);
fd_wfi(batch, ring);
+
+ OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+
+ OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+ OUT_RING(ring, 0x0);
+
+ OUT_PKT7(ring, CP_SET_MODE, 1);
+ OUT_RING(ring, 0x0);
+
+ OUT_WFI5(ring);
+
+ OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
+ OUT_RING(ring, 0x7c400004); /* RB_CCU_CNTL */
}
static void
@@ -544,6 +559,15 @@ fd6_emit_tile_init(struct fd_batch *batch)
OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
OUT_RING(ring, 0x0);
+
+ OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);
+ OUT_RING(ring, 0x1);
+
+ OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);
+ OUT_RING(ring, 0x1);
+
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x1);
} else {
set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
patch_draws(batch, IGNORE_VISIBILITY);
@@ -580,9 +604,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
struct fd6_context *fd6_ctx = fd6_context(ctx);
struct fd_ringbuffer *ring = batch->gmem;
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7));
-
emit_marker6(ring, 7);
OUT_PKT7(ring, CP_SET_MARKER, 1);
OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10);
@@ -595,8 +616,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
set_scissor(ring, x1, y1, x2, y2);
- set_window_offset(ring, x1, y1);
-
OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1);
OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
@@ -620,7 +639,32 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
(tile->p * 4) + (32 * A6XX_VSC_DATA_PITCH), 0, 0);
OUT_RELOC(ring, fd6_ctx->vsc_data2,
(tile->p * A6XX_VSC_DATA2_PITCH), 0, 0);
+
+ set_window_offset(ring, x1, y1);
+
+ struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
+ set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
+
+ OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1);
+ OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
+
+ OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+ OUT_RING(ring, 0x0);
+
+ OUT_PKT7(ring, CP_SET_MODE, 1);
+ OUT_RING(ring, 0x0);
+
+ OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8804, 1);
+ OUT_RING(ring, 0x0);
+
+ OUT_PKT4(ring, REG_A6XX_SP_TP_UNKNOWN_B304, 1);
+ OUT_RING(ring, 0x0);
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_80A4, 1);
+ OUT_RING(ring, 0x0);
} else {
+ set_window_offset(ring, x1, y1);
+
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
OUT_RING(ring, 0x1);
@@ -1028,26 +1072,6 @@ prepare_tile_fini_ib(struct fd_batch *batch)
FD_RINGBUFFER_STREAMING);
ring = batch->tile_fini;
- if (use_hw_binning(batch)) {
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10);
- }
-
- OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
- OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
- CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
- CP_SET_DRAW_STATE__0_GROUP_ID(0));
- OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
- OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
-
- OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
- OUT_RING(ring, 0x0);
-
- emit_marker6(ring, 7);
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10);
- emit_marker6(ring, 7);
-
set_blit_scissor(batch, ring);
if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
@@ -1081,7 +1105,32 @@ prepare_tile_fini_ib(struct fd_batch *batch)
static void
fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
{
- fd6_emit_ib(batch->gmem, batch->tile_fini);
+ struct fd_ringbuffer *ring = batch->gmem;
+
+ if (use_hw_binning(batch)) {
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10);
+ }
+
+ OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+ OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
+ CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
+ CP_SET_DRAW_STATE__0_GROUP_ID(0));
+ OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
+ OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
+
+ OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
+ OUT_RING(ring, 0x0);
+
+ emit_marker6(ring, 7);
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10);
+ emit_marker6(ring, 7);
+
+ fd6_emit_ib(ring, batch->tile_fini);
+
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7));
}
static void