summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/freedreno')
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_draw.c6
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_gmem.c8
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c81
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c26
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.h3
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_gmem.c360
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c136
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.h2
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.c45
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h12
-rw-r--r--src/gallium/drivers/freedreno/freedreno_draw.c9
-rw-r--r--src/gallium/drivers/freedreno/freedreno_draw.h23
-rw-r--r--src/gallium/drivers/freedreno/freedreno_gmem.c100
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c9
-rw-r--r--src/gallium/drivers/freedreno/freedreno_util.h44
15 files changed, 706 insertions, 158 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
index 300ce2e51c1..d6e42b668a8 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
@@ -108,7 +108,7 @@ fd2_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
OUT_RING(ring, info->max_index); /* VGT_MAX_VTX_INDX */
OUT_RING(ring, info->min_index); /* VGT_MIN_VTX_INDX */
- fd_draw_emit(ctx, info);
+ fd_draw_emit(ctx, ring, IGNORE_VISIBILITY, info);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
@@ -269,8 +269,8 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
- fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 3,
- INDEX_SIZE_IGN, 0, 0, NULL);
+ fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 3, INDEX_SIZE_IGN, 0, 0, NULL);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
index c494bf153e0..274b6145fde 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
@@ -90,8 +90,8 @@ emit_gmem2mem_surf(struct fd_context *ctx, uint32_t base,
OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
- fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 3,
- INDEX_SIZE_IGN, 0, 0, NULL);
+ fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 3, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
@@ -212,8 +212,8 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
- fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 3,
- INDEX_SIZE_IGN, 0, 0, NULL);
+ fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 3, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index c5d8b774552..4c90d984955 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -43,7 +43,7 @@
static void
-emit_vertexbufs(struct fd_context *ctx)
+emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
struct fd_vertex_stateobj *vtx = ctx->vtx;
struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
@@ -63,19 +63,17 @@ emit_vertexbufs(struct fd_context *ctx)
bufs[i].format = elem->src_format;
}
- fd3_emit_vertex_bufs(ctx->ring, &ctx->prog, bufs, vtx->num_elements);
+ fd3_emit_vertex_bufs(ring, &ctx->prog, bufs, vtx->num_elements);
}
static void
-fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
+draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
+ struct fd_ringbuffer *ring, unsigned dirty, bool binning)
{
- struct fd_ringbuffer *ring = ctx->ring;
- unsigned dirty = ctx->dirty;
-
- fd3_emit_state(ctx, dirty);
+ fd3_emit_state(ctx, ring, dirty, binning);
if (dirty & FD_DIRTY_VTXBUF)
- emit_vertexbufs(ctx);
+ emit_vertexbufs(ctx, ring);
OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
@@ -90,7 +88,59 @@ fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
- fd_draw_emit(ctx, info);
+ fd_draw_emit(ctx, ring, binning ? IGNORE_VISIBILITY : USE_VISIBILITY, info);
+}
+
+static void
+fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
+{
+ unsigned dirty = ctx->dirty;
+ draw_impl(ctx, info, ctx->binning_ring,
+ dirty & ~(FD_DIRTY_BLEND), true);
+ draw_impl(ctx, info, ctx->ring, dirty, false);
+}
+
+/* binning pass cmds for a clear:
+ * NOTE: newer blob drivers don't use binning for clear, which is probably
+ * preferable since it is low vtx count. However that doesn't seem to
+ * actually work for me. Not sure if it is depending on support for
+ * clear pass (rather than using solid-fill shader), or something else
+ * that newer blob is doing differently. Once that is figured out, we
+ * can remove fd3_clear_binning().
+ */
+static void
+fd3_clear_binning(struct fd_context *ctx, unsigned dirty)
+{
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
+ struct fd_ringbuffer *ring = ctx->binning_ring;
+
+ fd3_emit_state(ctx, ring, dirty & (FD_DIRTY_VIEWPORT |
+ FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR), true);
+
+ fd3_program_emit(ring, &ctx->solid_prog, true);
+
+ fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
+ { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
+ }, 1);
+
+ OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+ OUT_RING(ring, 0); /* VFD_INDEX_MIN */
+ OUT_RING(ring, 2); /* VFD_INDEX_MAX */
+ OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+ OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */
+
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, PERFCOUNTER_STOP);
+
+ fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
@@ -99,11 +149,14 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
+ unsigned dirty = ctx->dirty;
unsigned ce, i;
+ fd3_clear_binning(ctx, dirty);
+
/* emit generic state now: */
- fd3_emit_state(ctx, ctx->dirty & (FD_DIRTY_VIEWPORT |
- FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR));
+ fd3_emit_state(ctx, ring, dirty & (FD_DIRTY_VIEWPORT |
+ FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR), false);
OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1);
OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
@@ -192,7 +245,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
- fd3_program_emit(ring, &ctx->solid_prog);
+ fd3_program_emit(ring, &ctx->solid_prog, false);
fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
{ .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
@@ -216,8 +269,8 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, PERFCOUNTER_STOP);
- fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2,
- INDEX_SIZE_IGN, 0, 0, NULL);
+ fd_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}
void
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 91993725ea6..9cfe4ddb662 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -337,10 +337,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
}
void
-fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
+fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ uint32_t dirty, bool binning)
{
- struct fd_ringbuffer *ring = ctx->ring;
-
emit_marker(ring, 5);
if (dirty & FD_DIRTY_SAMPLE_MASK) {
@@ -354,7 +353,8 @@ fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
struct pipe_stencil_ref *sr = &ctx->stencil_ref;
- fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control);
+ if (!binning)
+ fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control);
OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1);
OUT_RING(ring, zsa->rb_alpha_ref);
@@ -432,7 +432,10 @@ fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
}
if (dirty & FD_DIRTY_PROG)
- fd3_program_emit(ring, &ctx->prog);
+ fd3_program_emit(ring, &ctx->prog, binning);
+
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, HLSQ_FLUSH);
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
struct fd_program_stateobj *prog = &ctx->prog;
@@ -566,11 +569,11 @@ fd3_emit_restore(struct fd_context *ctx)
OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));
- OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 1);
- OUT_RING(ring, 0x00000001); /* UCHE_CACHE_MODE_CONTROL_REG */
-
- OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
- OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
+ OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+ OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
+ OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
+ A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
+ A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
@@ -604,6 +607,9 @@ fd3_emit_restore(struct fd_context *ctx)
OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].W */
}
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+
emit_cache_flush(ring);
fd_rmw_wfi(ctx, ring);
}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
index bf7787ab6f7..50559d10d22 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -58,7 +58,8 @@ struct fd3_vertex_buf {
void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
struct fd_program_stateobj *prog,
struct fd3_vertex_buf *vbufs, uint32_t n);
-void fd3_emit_state(struct fd_context *ctx, uint32_t dirty);
+void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ uint32_t dirty, bool binning);
void fd3_emit_restore(struct fd_context *ctx);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index 3d0a607ed28..8720e087b7b 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -106,6 +106,159 @@ depth_base(struct fd_gmem_stateobj *gmem)
return align(gmem->bin_w * gmem->bin_h, 0x4000);
}
+static bool
+use_hw_binning(struct fd_context *ctx)
+{
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
+}
+
+/* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */
+static void update_vsc_pipe(struct fd_context *ctx);
+static void
+emit_binning_workaround(struct fd_context *ctx)
+{
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ struct fd_ringbuffer *ring = ctx->ring;
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+ OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
+ A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+
+ OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
+ OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+ A3XX_RB_COPY_CONTROL_MODE(0) |
+ A3XX_RB_COPY_CONTROL_GMEM_BASE(0));
+ OUT_RELOC(ring, fd_resource(fd3_ctx->solid_vbuf)->bo, 0x20, 0, -1); /* RB_COPY_DEST_BASE */
+ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128));
+ OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
+ A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) |
+ A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) |
+ A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
+ A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+ fd3_program_emit(ring, &ctx->solid_prog, false);
+
+ fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
+ { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
+ }, 1);
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+ A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
+ A3XX_HLSQ_CONTROL_0_REG_RESERVED2 |
+ A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
+ A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
+ OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
+ OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) |
+ A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
+ A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
+ A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
+
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+ OUT_RING(ring, 0); /* VFD_INDEX_MIN */
+ OUT_RING(ring, 2); /* VFD_INDEX_MAX */
+ OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+
+ OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1));
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE |
+ A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE |
+ A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE |
+ A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE |
+ A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
+ OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
+ A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
+
+ OUT_PKT3(ring, CP_DRAW_INDX_2, 5);
+ OUT_RING(ring, 0x00000000); /* viz query info. */
+ OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE,
+ INDEX_SIZE_32_BIT, IGNORE_VISIBILITY));
+ OUT_RING(ring, 2); /* NumIndices */
+ OUT_RING(ring, 2);
+ OUT_RING(ring, 1);
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_WFI(ring);
+
+ OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
+ OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+ A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
+}
+
/* transfer from gmem to system memory (ie. normal RAM) */
static void
@@ -129,8 +282,8 @@ emit_gmem2mem_surf(struct fd_context *ctx,
A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(psurf->format)));
- fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2,
- INDEX_SIZE_IGN, 0, 0, NULL);
+ fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
@@ -210,7 +363,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
- fd3_program_emit(ring, &ctx->solid_prog);
+ fd3_program_emit(ring, &ctx->solid_prog, false);
fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
{ .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
@@ -252,8 +405,8 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
fd3_emit_gmem_restore_tex(ring, psurf);
- fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2,
- INDEX_SIZE_IGN, 0, 0, NULL);
+ fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
@@ -355,7 +508,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
- fd3_program_emit(ring, &ctx->blit_prog);
+ fd3_program_emit(ring, &ctx->blit_prog, false);
fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) {
{ .prsc = fd3_ctx->blit_texcoord_vbuf, .stride = 8, .format = PIPE_FORMAT_R32G32_FLOAT },
@@ -381,11 +534,68 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
}
static void
+patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
+{
+ unsigned i;
+ for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i);
+ *patch->cs = patch->val | DRAW(0, 0, 0, vismode);
+ }
+ util_dynarray_resize(&ctx->draw_patches, 0);
+}
+
+/* for rendering directly to system memory: */
+static void
+fd3_emit_sysmem_prep(struct fd_context *ctx)
+{
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ struct fd_ringbuffer *ring = ctx->ring;
+ uint32_t pitch = 0;
+
+ if (pfb->cbufs[0])
+ pitch = fd_resource(pfb->cbufs[0]->texture)->slices[0].pitch;
+
+ fd3_emit_restore(ctx);
+
+ OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
+ OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+ A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
+
+ OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
+
+ /* setup scissor/offset for current tile: */
+ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
+ A3XX_RB_WINDOW_OFFSET_Y(0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+
+ patch_draws(ctx, IGNORE_VISIBILITY);
+}
+
+static void
update_vsc_pipe(struct fd_context *ctx)
{
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
int i;
+ OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
+ OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
+
for (i = 0; i < 8; i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
@@ -394,7 +604,7 @@ update_vsc_pipe(struct fd_context *ctx)
DRM_FREEDRENO_GEM_TYPE_KMEM);
}
- OUT_PKT0(ring, REG_A3XX_VSC_PIPE(0), 3);
+ OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);
OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) |
A3XX_VSC_PIPE_CONFIG_Y(pipe->y) |
A3XX_VSC_PIPE_CONFIG_W(pipe->w) |
@@ -404,34 +614,45 @@ update_vsc_pipe(struct fd_context *ctx)
}
}
-/* for rendering directly to system memory: */
static void
-fd3_emit_sysmem_prep(struct fd_context *ctx)
+emit_binning_pass(struct fd_context *ctx)
{
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_ringbuffer *ring = ctx->ring;
- uint32_t pitch = 0;
+ int i;
- if (pfb->cbufs[0])
- pitch = fd_resource(pfb->cbufs[0]->texture)->slices[0].pitch;
+ if (ctx->screen->gpu_id == 320) {
+ emit_binning_workaround(ctx);
- fd3_emit_restore(ctx);
+ OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+ OUT_RING(ring, 0x00007fff);
+ }
+
+ OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
+ OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
-
OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
- A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
+ A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w));
- /* setup scissor/offset for current tile: */
+ /* setup scissor/offset for whole screen: */
OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
A3XX_RB_WINDOW_OFFSET_Y(0));
+ OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE);
+
OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
@@ -439,9 +660,72 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+
+ for (i = 0; i < 4; i++) {
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(0) |
+ A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
+ A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0));
+ }
+
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(1) |
+ A3XX_PC_VSTREAM_CONTROL_N(0));
+
+ /* emit IB to binning drawcmds: */
+ OUT_IB(ring, ctx->binning_start, ctx->binning_end);
+
+ /* and then put stuff back the way it was: */
+
+ OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
+ OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE |
+ A3XX_SP_SP_CTRL_REG_CONSTMODE(1) |
+ A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
+ A3XX_SP_SP_CTRL_REG_L0MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+ OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w));
+
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, CACHE_FLUSH);
+
+ if (ctx->screen->gpu_id == 320) {
+ /* dummy-draw workaround: */
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
+ INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+ OUT_RING(ring, 0); /* NumIndices */
+ }
+
+ OUT_PKT3(ring, CP_NOP, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_WFI(ring);
+
+ if (ctx->screen->gpu_id == 320) {
+ emit_binning_workaround(ctx);
+ }
}
/* before first tile */
@@ -461,6 +745,18 @@ fd3_emit_tile_init(struct fd_context *ctx)
A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
update_vsc_pipe(ctx);
+
+ if (use_hw_binning(ctx)) {
+ /* mark the end of the binning cmds: */
+ fd_ringmarker_mark(ctx->binning_end);
+
+ /* emit hw binning pass: */
+ emit_binning_pass(ctx);
+
+ patch_draws(ctx, USE_VISIBILITY);
+ } else {
+ patch_draws(ctx, IGNORE_VISIBILITY);
+ }
}
/* before mem2gmem */
@@ -472,7 +768,6 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
struct fd_gmem_stateobj *gmem = &ctx->gmem;
uint32_t reg;
-
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(gmem));
if (pfb->zsbuf) {
@@ -499,6 +794,7 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
static void
fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
{
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
@@ -508,6 +804,32 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
uint32_t x2 = tile->xoff + tile->bin_w - 1;
uint32_t y2 = tile->yoff + tile->bin_h - 1;
+ if (use_hw_binning(ctx)) {
+ struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p];
+
+ assert(pipe->w * pipe->h);
+
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, HLSQ_FLUSH);
+
+ OUT_WFI(ring);
+
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
+ A3XX_PC_VSTREAM_CONTROL_N(tile->n));
+
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, CACHE_FLUSH);
+
+ OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
+ OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
+ OUT_RELOC(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */
+ (tile->p * 4), 0, 0);
+ } else {
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+ }
+
OUT_PKT3(ring, CP_SET_BIN, 3);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index c02b14cba39..2622006ff09 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -36,6 +36,7 @@
#include "fd3_program.h"
#include "fd3_compiler.h"
+#include "fd3_emit.h"
#include "fd3_texture.h"
#include "fd3_util.h"
@@ -175,9 +176,9 @@ fd3_vp_state_bind(struct pipe_context *pctx, void *hwcso)
}
static void
-emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
+emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_stateobj *so)
{
- struct ir3_shader_info *si = &so->info;
+ const struct ir3_shader_info *si = &so->info;
enum adreno_state_block sb;
enum adreno_state_src src;
uint32_t i, sz, *bin;
@@ -216,7 +217,7 @@ emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
}
static int
-find_output(struct fd3_shader_stateobj *so, fd3_semantic semantic)
+find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
{
int j;
for (j = 0; j < so->outputs_count; j++)
@@ -227,14 +228,21 @@ find_output(struct fd3_shader_stateobj *so, fd3_semantic semantic)
void
fd3_program_emit(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog)
+ struct fd_program_stateobj *prog, bool binning)
{
- struct fd3_shader_stateobj *vp = prog->vp;
- struct fd3_shader_stateobj *fp = prog->fp;
- struct ir3_shader_info *vsi = &vp->info;
- struct ir3_shader_info *fsi = &fp->info;
+ const struct fd3_shader_stateobj *vp = prog->vp;
+ const struct fd3_shader_stateobj *fp = prog->fp;
+ const struct ir3_shader_info *vsi = &vp->info;
+ const struct ir3_shader_info *fsi = &fp->info;
int i;
+ if (binning) {
+ /* use dummy stateobj to simplify binning vs non-binning: */
+ static const struct fd3_shader_stateobj binning_fp = {};
+ fp = &binning_fp;
+ fsi = &fp->info;
+ }
+
/* we could probably divide this up into things that need to be
* emitted if frag-prog is dirty vs if vert-prog is dirty..
*/
@@ -260,11 +268,9 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
+ COND(binning, A3XX_SP_SP_CTRL_REG_BINNING) |
A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
- // XXX "resolve" (?) bit set on gmem->mem pass..
-// COND(!uniforms, A3XX_SP_SP_CTRL_REG_RESOLVE) |
- // XXX sometimes 0, sometimes 1:
- A3XX_SP_SP_CTRL_REG_LOMODE(1));
+ A3XX_SP_SP_CTRL_REG_L0MODE(0));
OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
@@ -272,6 +278,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
+ A3XX_SP_VS_CTRL_REG0_CACHEINVALID |
A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
@@ -323,28 +330,38 @@ fd3_program_emit(struct fd_ringbuffer *ring,
A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
- OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
- OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
- OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
- A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
- A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
- A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
- A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
- A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
- A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
- COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
- A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
- OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
- A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
- A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) |
- A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
- OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
- A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
- OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
+ if (binning) {
+ OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
+ OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+ A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
+ OUT_RING(ring, 0x00000000);
+ } else {
+ OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
+ OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
+ OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+ A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
+ A3XX_SP_FS_CTRL_REG0_CACHEINVALID |
+ A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
+ A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
+ A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
+ A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
+ A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
+ COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
+ A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
+ OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
+ A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
+ A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) |
+ A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
+ OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
+ OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
+ A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
+ OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
+ }
OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
@@ -360,24 +377,31 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
- OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
- OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
- A3XX_VPC_ATTR_THRDASSIGN(1) |
- A3XX_VPC_ATTR_LMSIZE(1));
- OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
- A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
-
- OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
- OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
- OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
- OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
- OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
-
- OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
- OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
- OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
- OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
- OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
+ if (binning) {
+ OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
+ OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
+ A3XX_VPC_ATTR_LMSIZE(1));
+ OUT_RING(ring, 0x00000000);
+ } else {
+ OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
+ OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
+ A3XX_VPC_ATTR_THRDASSIGN(1) |
+ A3XX_VPC_ATTR_LMSIZE(1));
+ OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
+ A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
+
+ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
+ OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
+ OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
+ OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
+ OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
+
+ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
+ OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
+ OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
+ OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
+ OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
+ }
OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
@@ -388,10 +412,12 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
- emit_shader(ring, fp);
+ if (!binning) {
+ emit_shader(ring, fp);
- OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
- OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
+ OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+ OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
+ }
OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
index 85c22a54cf7..bd6483ff42c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -117,7 +117,7 @@ struct fd3_shader_stateobj {
};
void fd3_program_emit(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog);
+ struct fd_program_stateobj *prog, bool binning);
void fd3_prog_init(struct pipe_context *pctx);
void fd3_prog_fini(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 28be508e329..23f6a67734d 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -34,16 +34,11 @@
#include "freedreno_gmem.h"
#include "freedreno_util.h"
-static void
-fd_context_next_rb(struct pipe_context *pctx)
+static struct fd_ringbuffer *next_rb(struct fd_context *ctx)
{
- struct fd_context *ctx = fd_context(pctx);
struct fd_ringbuffer *ring;
uint32_t ts;
- fd_ringmarker_del(ctx->draw_start);
- fd_ringmarker_del(ctx->draw_end);
-
/* grab next ringbuffer: */
ring = ctx->rings[(ctx->rings_idx++) % ARRAY_SIZE(ctx->rings)];
@@ -56,10 +51,36 @@ fd_context_next_rb(struct pipe_context *pctx)
fd_ringbuffer_reset(ring);
+ return ring;
+}
+
+static void
+fd_context_next_rb(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_ringbuffer *ring;
+
+ fd_ringmarker_del(ctx->draw_start);
+ fd_ringmarker_del(ctx->draw_end);
+
+ ring = next_rb(ctx);
+
ctx->draw_start = fd_ringmarker_new(ring);
ctx->draw_end = fd_ringmarker_new(ring);
+ fd_ringbuffer_set_parent(ring, NULL);
ctx->ring = ring;
+
+ fd_ringmarker_del(ctx->binning_start);
+ fd_ringmarker_del(ctx->binning_end);
+
+ ring = next_rb(ctx);
+
+ ctx->binning_start = fd_ringmarker_new(ring);
+ ctx->binning_end = fd_ringmarker_new(ring);
+
+ fd_ringbuffer_set_parent(ring, ctx->ring);
+ ctx->binning_ring = ring;
}
/* emit accumulated render cmds, needed for example if render target has
@@ -121,6 +142,10 @@ fd_context_destroy(struct pipe_context *pctx)
DBG("");
+ util_slab_destroy(&ctx->transfer_pool);
+
+ util_dynarray_fini(&ctx->draw_patches);
+
if (ctx->blitter)
util_blitter_destroy(ctx->blitter);
@@ -129,7 +154,11 @@ fd_context_destroy(struct pipe_context *pctx)
fd_ringmarker_del(ctx->draw_start);
fd_ringmarker_del(ctx->draw_end);
- fd_ringbuffer_del(ctx->ring);
+ fd_ringmarker_del(ctx->binning_start);
+ fd_ringmarker_del(ctx->binning_end);
+
+ for (i = 0; i < ARRAY_SIZE(ctx->rings); i++)
+ fd_ringbuffer_del(ctx->rings[i]);
for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
@@ -176,6 +205,8 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
fd_context_next_rb(pctx);
fd_reset_rmw_state(ctx);
+ util_dynarray_init(&ctx->draw_patches);
+
util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer),
16, UTIL_SLAB_SINGLETHREADED);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index a8abbca7a62..a0227e49c03 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -111,7 +111,7 @@ struct fd_context {
*/
enum {
/* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
- FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,
+ FD_BUFFER_COLOR = PIPE_CLEAR_COLOR0,
FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
@@ -148,9 +148,14 @@ struct fd_context {
struct fd_ringbuffer *rings[4];
unsigned rings_idx;
+ /* normal draw/clear cmds: */
struct fd_ringbuffer *ring;
struct fd_ringmarker *draw_start, *draw_end;
+ /* binning pass draw/clear cmds: */
+ struct fd_ringbuffer *binning_ring;
+ struct fd_ringmarker *binning_start, *binning_end;
+
/* Keep track if WAIT_FOR_IDLE is needed for registers we need
* to update via RMW:
*/
@@ -165,6 +170,11 @@ struct fd_context {
uint32_t rbrc_draw;
} rmw;
+ /* Keep track of DRAW initiators that need to be patched up depending
+ * on whether we using binning or not:
+ */
+ struct util_dynarray draw_patches;
+
struct pipe_scissor_state scissor;
/* we don't have a disable/enable bit for scissor, so instead we keep
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index 0069438c87d..d80f3565614 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -54,7 +54,9 @@ size2indextype(unsigned index_size)
/* this is same for a2xx/a3xx, so split into helper: */
void
-fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info)
+fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ enum pc_di_vis_cull_mode vismode,
+ const struct pipe_draw_info *info)
{
struct pipe_index_buffer *idx = &ctx->indexbuf;
struct fd_bo *idx_bo = NULL;
@@ -78,8 +80,8 @@ fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info)
src_sel = DI_SRC_SEL_AUTO_INDEX;
}
- fd_draw(ctx, ctx->primtypes[info->mode], src_sel, info->count,
- idx_type, idx_size, idx_offset, idx_bo);
+ fd_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel,
+ info->count, idx_type, idx_size, idx_offset, idx_bo);
}
static void
@@ -180,6 +182,7 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
ctx->clear(ctx, buffers, color, depth, stencil);
ctx->dirty |= FD_DIRTY_ZSA |
+ FD_DIRTY_VIEWPORT |
FD_DIRTY_RASTERIZER |
FD_DIRTY_SAMPLE_MASK |
FD_DIRTY_PROG |
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.h b/src/gallium/drivers/freedreno/freedreno_draw.h
index 190c0e52d24..e8bb420889e 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.h
+++ b/src/gallium/drivers/freedreno/freedreno_draw.h
@@ -38,19 +38,21 @@
struct fd_ringbuffer;
-void fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info);
+void fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ enum pc_di_vis_cull_mode vismode,
+ const struct pipe_draw_info *info);
void fd_draw_init(struct pipe_context *pctx);
static inline void
-fd_draw(struct fd_context *ctx, enum pc_di_primtype primtype,
+fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ enum pc_di_primtype primtype,
+ enum pc_di_vis_cull_mode vismode,
enum pc_di_src_sel src_sel, uint32_t count,
enum pc_di_index_size idx_type,
uint32_t idx_size, uint32_t idx_offset,
struct fd_bo *idx_bo)
{
- struct fd_ringbuffer *ring = ctx->ring;
-
/* for debug after a lock up, write a unique counter value
* to scratch7 for each draw, to make it easier to match up
* register dumps to cmdstream. The combination of IB
@@ -64,7 +66,7 @@ fd_draw(struct fd_context *ctx, enum pc_di_primtype primtype,
OUT_PKT3(ring, CP_DRAW_INDX, 3);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
- INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+ INDEX_SIZE_IGN, USE_VISIBILITY));
OUT_RING(ring, 0); /* NumIndices */
/* ugg, hard-code register offset to avoid pulling in the
@@ -76,8 +78,15 @@ fd_draw(struct fd_context *ctx, enum pc_di_primtype primtype,
OUT_PKT3(ring, CP_DRAW_INDX, idx_bo ? 5 : 3);
OUT_RING(ring, 0x00000000); /* viz query info. */
- OUT_RING(ring, DRAW(primtype, src_sel,
- idx_type, IGNORE_VISIBILITY));
+ if (vismode == USE_VISIBILITY) {
+ /* leave vis mode blank for now, it will be patched up when
+ * we know if we are binning or not
+ */
+ OUT_RINGP(ring, DRAW(primtype, src_sel, idx_type, 0),
+ &ctx->draw_patches);
+ } else {
+ OUT_RING(ring, DRAW(primtype, src_sel, idx_type, vismode));
+ }
OUT_RING(ring, count); /* NumIndices */
if (idx_bo) {
OUT_RELOC(ring, idx_bo, idx_offset, 0, 0);
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index 47f7a310e8c..0270538a3d0 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -85,7 +85,8 @@ calculate_tiles(struct fd_context *ctx)
uint32_t bin_w, bin_h;
uint32_t max_width = bin_width(ctx);
uint32_t cpp = 4;
- uint32_t i, j, t, p, n, xoff, yoff;
+ uint32_t i, j, t, xoff, yoff;
+ uint32_t tpp_x, tpp_y;
bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
if (pfb->cbufs[0])
@@ -145,20 +146,65 @@ calculate_tiles(struct fd_context *ctx)
gmem->width = width;
gmem->height = height;
- /* Assign tiles and pipes:
- * NOTE we currently take a rather simplistic approach of
- * mapping rows of tiles to a pipe. At some point it might
- * be worth playing with different strategies and seeing if
- * that makes much impact on performance.
+ /*
+ * Assign tiles and pipes:
+ *
+ * At some point it might be worth playing with different
+ * strategies and seeing if that makes much impact on
+ * performance.
*/
- t = p = n = 0;
+
+#define div_round_up(v, a) (((v) + (a) - 1) / (a))
+ /* figure out number of tiles per pipe: */
+ tpp_x = tpp_y = 1;
+ while (div_round_up(nbins_y, tpp_y) > 8)
+ tpp_y += 2;
+ while ((div_round_up(nbins_y, tpp_y) *
+ div_round_up(nbins_x, tpp_x)) > 8)
+ tpp_x += 1;
+
+ /* configure pipes: */
+ xoff = yoff = 0;
+ for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
+ struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+
+ if (xoff >= nbins_x) {
+ xoff = 0;
+ yoff += tpp_y;
+ }
+
+ if (yoff >= nbins_y) {
+ break;
+ }
+
+ pipe->x = xoff;
+ pipe->y = yoff;
+ pipe->w = MIN2(tpp_x, nbins_x - xoff);
+ pipe->h = MIN2(tpp_y, nbins_y - yoff);
+
+ xoff += tpp_x;
+ }
+
+ for (; i < ARRAY_SIZE(ctx->pipe); i++) {
+ struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+ pipe->x = pipe->y = pipe->w = pipe->h = 0;
+ }
+
+#if 0 /* debug */
+ printf("%dx%d ... tpp=%dx%d\n", nbins_x, nbins_y, tpp_x, tpp_y);
+ for (i = 0; i < 8; i++) {
+ struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+ printf("pipe[%d]: %ux%u @ %u,%u\n", i,
+ pipe->w, pipe->h, pipe->x, pipe->y);
+ }
+#endif
+
+ /* configure tiles: */
+ t = 0;
yoff = miny;
for (i = 0; i < nbins_y; i++) {
- struct fd_vsc_pipe *pipe = &ctx->pipe[p];
uint32_t bw, bh;
- assert(p < ARRAY_SIZE(ctx->pipe));
-
xoff = minx;
/* clip bin height: */
@@ -166,13 +212,20 @@ calculate_tiles(struct fd_context *ctx)
for (j = 0; j < nbins_x; j++) {
struct fd_tile *tile = &ctx->tile[t];
+ uint32_t n, p;
assert(t < ARRAY_SIZE(ctx->tile));
+ /* pipe number: */
+ p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);
+
+ /* slot number: */
+ n = ((i % tpp_y) * tpp_x) + (j % tpp_x);
+
/* clip bin width: */
bw = MIN2(bin_w, minx + width - xoff);
- tile->n = n++;
+ tile->n = n;
tile->p = p;
tile->bin_w = bw;
tile->bin_h = bh;
@@ -184,22 +237,19 @@ calculate_tiles(struct fd_context *ctx)
xoff += bw;
}
- /* one pipe per row: */
- pipe->x = 0;
- pipe->y = i;
- pipe->w = nbins_x;
- pipe->h = 1;
-
- p++;
- n = 0;
-
yoff += bh;
}
- for (; p < ARRAY_SIZE(ctx->pipe); p++) {
- struct fd_vsc_pipe *pipe = &ctx->pipe[p];
- pipe->x = pipe->y = pipe->w = pipe->h = 0;
+#if 0 /* debug */
+ t = 0;
+ for (i = 0; i < nbins_y; i++) {
+ for (j = 0; j < nbins_x; j++) {
+ struct fd_tile *tile = &ctx->tile[t++];
+ printf("|p:%u n:%u|", tile->p, tile->n);
+ }
+ printf("\n");
}
+#endif
}
static void
@@ -259,6 +309,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
/* mark the end of the clear/draw cmds before emitting per-tile cmds: */
fd_ringmarker_mark(ctx->draw_end);
+ fd_ringmarker_mark(ctx->binning_end);
if (sysmem) {
DBG("rendering sysmem (%s/%s)",
@@ -277,8 +328,9 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
/* GPU executes starting from tile cmds, which IB back to draw cmds: */
fd_ringmarker_flush(ctx->draw_end);
- /* mark start for next draw cmds: */
+ /* mark start for next draw/binning cmds: */
fd_ringmarker_mark(ctx->draw_start);
+ fd_ringmarker_mark(ctx->binning_start);
fd_reset_rmw_state(ctx);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 319e29f3ada..28a09166acd 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -64,12 +64,15 @@ static const struct debug_named_value debug_options[] = {
{"direct", FD_DBG_DIRECT, "Force inline (SS_DIRECT) state loads"},
{"dbypass", FD_DBG_DBYPASS,"Disable GMEM bypass"},
{"fraghalf", FD_DBG_FRAGHALF, "Use half-precision in fragment shader"},
+ {"binning", FD_DBG_BINNING, "Enable hw binning"},
+ {"dbinning", FD_DBG_DBINNING, "Disable hw binning"},
DEBUG_NAMED_VALUE_END
};
DEBUG_GET_ONCE_FLAGS_OPTION(fd_mesa_debug, "FD_MESA_DEBUG", debug_options, 0)
int fd_mesa_debug = 0;
+bool fd_binning_enabled = false; /* default to off for now */
static const char *
fd_screen_get_name(struct pipe_screen *pscreen)
@@ -386,6 +389,12 @@ fd_screen_create(struct fd_device *dev)
fd_mesa_debug = debug_get_option_fd_mesa_debug();
+ if (fd_mesa_debug & FD_DBG_BINNING)
+ fd_binning_enabled = true;
+
+ if (fd_mesa_debug & FD_DBG_DBINNING)
+ fd_binning_enabled = false;
+
if (!screen)
return NULL;
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index 48d346eb35b..fae5ba06b1d 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -37,6 +37,7 @@
#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_half.h"
+#include "util/u_dynarray.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
@@ -52,16 +53,19 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
/* TBD if it is same on a2xx, but for now: */
#define MAX_MIP_LEVELS A3XX_MAX_MIP_LEVELS
-#define FD_DBG_MSGS 0x01
-#define FD_DBG_DISASM 0x02
-#define FD_DBG_DCLEAR 0x04
-#define FD_DBG_DGMEM 0x08
-#define FD_DBG_DSCIS 0x10
-#define FD_DBG_DIRECT 0x20
-#define FD_DBG_DBYPASS 0x40
-#define FD_DBG_FRAGHALF 0x80
+#define FD_DBG_MSGS 0x0001
+#define FD_DBG_DISASM 0x0002
+#define FD_DBG_DCLEAR 0x0004
+#define FD_DBG_DGMEM 0x0008
+#define FD_DBG_DSCIS 0x0010
+#define FD_DBG_DIRECT 0x0020
+#define FD_DBG_DBYPASS 0x0040
+#define FD_DBG_FRAGHALF 0x0080
+#define FD_DBG_BINNING 0x0100
+#define FD_DBG_DBINNING 0x0200
extern int fd_mesa_debug;
+extern bool fd_binning_enabled;
#define DBG(fmt, ...) \
do { if (fd_mesa_debug & FD_DBG_MSGS) \
@@ -87,6 +91,13 @@ static inline uint32_t DRAW(enum pc_di_primtype prim_type,
(1 << 14);
}
+/* for tracking cmdstream positions that need to be patched: */
+struct fd_cs_patch {
+ uint32_t *cs;
+ uint32_t val;
+};
+#define fd_patch_num_elements(buf) ((buf)->size / sizeof(struct fd_cs_patch))
+#define fd_patch_element(buf, i) util_dynarray_element(buf, struct fd_cs_patch, i)
static inline enum pipe_format
pipe_surface_format(struct pipe_surface *psurf)
@@ -110,6 +121,21 @@ OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
*(ring->cur++) = data;
}
+/* like OUT_RING() but appends a cmdstream patch point to 'buf' */
+static inline void
+OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data,
+ struct util_dynarray *buf)
+{
+ if (LOG_DWORDS) {
+ DBG("ring[%p]: OUT_RINGP %04x: %08x", ring,
+ (uint32_t)(ring->cur - ring->last_start), data);
+ }
+ util_dynarray_append(buf, struct fd_cs_patch, ((struct fd_cs_patch){
+ .cs = ring->cur++,
+ .val = data,
+ }));
+}
+
static inline void
OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint32_t or, int32_t shift)
@@ -132,7 +158,7 @@ OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint32_t or, int32_t shift)
{
if (LOG_DWORDS) {
- DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring,
+ DBG("ring[%p]: OUT_RELOCW %04x: %p+%u << %d", ring,
(uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
}
fd_ringbuffer_reloc(ring, &(struct fd_reloc){