summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno/a3xx
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/freedreno/a3xx')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c81
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c26
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.h3
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_gmem.c360
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c136
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.h2
6 files changed, 508 insertions, 100 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index c5d8b774552..4c90d984955 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -43,7 +43,7 @@
static void
-emit_vertexbufs(struct fd_context *ctx)
+emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
struct fd_vertex_stateobj *vtx = ctx->vtx;
struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
@@ -63,19 +63,17 @@ emit_vertexbufs(struct fd_context *ctx)
bufs[i].format = elem->src_format;
}
- fd3_emit_vertex_bufs(ctx->ring, &ctx->prog, bufs, vtx->num_elements);
+ fd3_emit_vertex_bufs(ring, &ctx->prog, bufs, vtx->num_elements);
}
static void
-fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
+draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
+ struct fd_ringbuffer *ring, unsigned dirty, bool binning)
{
- struct fd_ringbuffer *ring = ctx->ring;
- unsigned dirty = ctx->dirty;
-
- fd3_emit_state(ctx, dirty);
+ fd3_emit_state(ctx, ring, dirty, binning);
if (dirty & FD_DIRTY_VTXBUF)
- emit_vertexbufs(ctx);
+ emit_vertexbufs(ctx, ring);
OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
@@ -90,7 +88,59 @@ fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
- fd_draw_emit(ctx, info);
+ fd_draw_emit(ctx, ring, binning ? IGNORE_VISIBILITY : USE_VISIBILITY, info);
+}
+
+static void
+fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
+{
+ unsigned dirty = ctx->dirty;
+ draw_impl(ctx, info, ctx->binning_ring,
+ dirty & ~(FD_DIRTY_BLEND), true);
+ draw_impl(ctx, info, ctx->ring, dirty, false);
+}
+
+/* binning pass cmds for a clear:
+ * NOTE: newer blob drivers don't use binning for clear, which is probably
+ * preferable since it is low vtx count. However that doesn't seem to
+ * actually work for me. Not sure if it is depending on support for
+ * clear pass (rather than using solid-fill shader), or something else
+ * that newer blob is doing differently. Once that is figured out, we
+ * can remove fd3_clear_binning().
+ */
+static void
+fd3_clear_binning(struct fd_context *ctx, unsigned dirty)
+{
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
+ struct fd_ringbuffer *ring = ctx->binning_ring;
+
+ fd3_emit_state(ctx, ring, dirty & (FD_DIRTY_VIEWPORT |
+ FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR), true);
+
+ fd3_program_emit(ring, &ctx->solid_prog, true);
+
+ fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
+ { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
+ }, 1);
+
+ OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+ OUT_RING(ring, 0); /* VFD_INDEX_MIN */
+ OUT_RING(ring, 2); /* VFD_INDEX_MAX */
+ OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+ OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */
+
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, PERFCOUNTER_STOP);
+
+ fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
@@ -99,11 +149,14 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
+ unsigned dirty = ctx->dirty;
unsigned ce, i;
+ fd3_clear_binning(ctx, dirty);
+
/* emit generic state now: */
- fd3_emit_state(ctx, ctx->dirty & (FD_DIRTY_VIEWPORT |
- FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR));
+ fd3_emit_state(ctx, ring, dirty & (FD_DIRTY_VIEWPORT |
+ FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR), false);
OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1);
OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
@@ -192,7 +245,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
- fd3_program_emit(ring, &ctx->solid_prog);
+ fd3_program_emit(ring, &ctx->solid_prog, false);
fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
{ .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
@@ -216,8 +269,8 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, PERFCOUNTER_STOP);
- fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2,
- INDEX_SIZE_IGN, 0, 0, NULL);
+ fd_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}
void
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 91993725ea6..9cfe4ddb662 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -337,10 +337,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
}
void
-fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
+fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ uint32_t dirty, bool binning)
{
- struct fd_ringbuffer *ring = ctx->ring;
-
emit_marker(ring, 5);
if (dirty & FD_DIRTY_SAMPLE_MASK) {
@@ -354,7 +353,8 @@ fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
struct pipe_stencil_ref *sr = &ctx->stencil_ref;
- fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control);
+ if (!binning)
+ fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control);
OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1);
OUT_RING(ring, zsa->rb_alpha_ref);
@@ -432,7 +432,10 @@ fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
}
if (dirty & FD_DIRTY_PROG)
- fd3_program_emit(ring, &ctx->prog);
+ fd3_program_emit(ring, &ctx->prog, binning);
+
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, HLSQ_FLUSH);
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
struct fd_program_stateobj *prog = &ctx->prog;
@@ -566,11 +569,11 @@ fd3_emit_restore(struct fd_context *ctx)
OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));
- OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 1);
- OUT_RING(ring, 0x00000001); /* UCHE_CACHE_MODE_CONTROL_REG */
-
- OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
- OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
+ OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+ OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
+ OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
+ A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
+ A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
@@ -604,6 +607,9 @@ fd3_emit_restore(struct fd_context *ctx)
OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].W */
}
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+
emit_cache_flush(ring);
fd_rmw_wfi(ctx, ring);
}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
index bf7787ab6f7..50559d10d22 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -58,7 +58,8 @@ struct fd3_vertex_buf {
void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
struct fd_program_stateobj *prog,
struct fd3_vertex_buf *vbufs, uint32_t n);
-void fd3_emit_state(struct fd_context *ctx, uint32_t dirty);
+void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ uint32_t dirty, bool binning);
void fd3_emit_restore(struct fd_context *ctx);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index 3d0a607ed28..8720e087b7b 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -106,6 +106,159 @@ depth_base(struct fd_gmem_stateobj *gmem)
return align(gmem->bin_w * gmem->bin_h, 0x4000);
}
+static bool
+use_hw_binning(struct fd_context *ctx)
+{
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
+}
+
+/* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */
+static void update_vsc_pipe(struct fd_context *ctx);
+static void
+emit_binning_workaround(struct fd_context *ctx)
+{
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ struct fd_ringbuffer *ring = ctx->ring;
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+ OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
+ A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+
+ OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
+ OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+ A3XX_RB_COPY_CONTROL_MODE(0) |
+ A3XX_RB_COPY_CONTROL_GMEM_BASE(0));
+ OUT_RELOC(ring, fd_resource(fd3_ctx->solid_vbuf)->bo, 0x20, 0, -1); /* RB_COPY_DEST_BASE */
+ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128));
+ OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
+ A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) |
+ A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) |
+ A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
+ A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+ fd3_program_emit(ring, &ctx->solid_prog, false);
+
+ fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
+ { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
+ }, 1);
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+ A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
+ A3XX_HLSQ_CONTROL_0_REG_RESERVED2 |
+ A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
+ A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
+ OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
+ OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) |
+ A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
+ A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
+ A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
+
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+ OUT_RING(ring, 0); /* VFD_INDEX_MIN */
+ OUT_RING(ring, 2); /* VFD_INDEX_MAX */
+ OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+
+ OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1));
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE |
+ A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE |
+ A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE |
+ A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE |
+ A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
+ OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
+ A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
+
+ OUT_PKT3(ring, CP_DRAW_INDX_2, 5);
+ OUT_RING(ring, 0x00000000); /* viz query info. */
+ OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE,
+ INDEX_SIZE_32_BIT, IGNORE_VISIBILITY));
+ OUT_RING(ring, 2); /* NumIndices */
+ OUT_RING(ring, 2);
+ OUT_RING(ring, 1);
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_WFI(ring);
+
+ OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
+ OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+ A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
+}
+
/* transfer from gmem to system memory (ie. normal RAM) */
static void
@@ -129,8 +282,8 @@ emit_gmem2mem_surf(struct fd_context *ctx,
A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(psurf->format)));
- fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2,
- INDEX_SIZE_IGN, 0, 0, NULL);
+ fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
@@ -210,7 +363,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
- fd3_program_emit(ring, &ctx->solid_prog);
+ fd3_program_emit(ring, &ctx->solid_prog, false);
fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
{ .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
@@ -252,8 +405,8 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
fd3_emit_gmem_restore_tex(ring, psurf);
- fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2,
- INDEX_SIZE_IGN, 0, 0, NULL);
+ fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
@@ -355,7 +508,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
- fd3_program_emit(ring, &ctx->blit_prog);
+ fd3_program_emit(ring, &ctx->blit_prog, false);
fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) {
{ .prsc = fd3_ctx->blit_texcoord_vbuf, .stride = 8, .format = PIPE_FORMAT_R32G32_FLOAT },
@@ -381,11 +534,68 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
}
static void
+patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
+{
+ unsigned i;
+ for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i);
+ *patch->cs = patch->val | DRAW(0, 0, 0, vismode);
+ }
+ util_dynarray_resize(&ctx->draw_patches, 0);
+}
+
+/* for rendering directly to system memory: */
+static void
+fd3_emit_sysmem_prep(struct fd_context *ctx)
+{
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ struct fd_ringbuffer *ring = ctx->ring;
+ uint32_t pitch = 0;
+
+ if (pfb->cbufs[0])
+ pitch = fd_resource(pfb->cbufs[0]->texture)->slices[0].pitch;
+
+ fd3_emit_restore(ctx);
+
+ OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
+ OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+ A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
+
+ OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
+
+ /* setup scissor/offset for current tile: */
+ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
+ A3XX_RB_WINDOW_OFFSET_Y(0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+
+ patch_draws(ctx, IGNORE_VISIBILITY);
+}
+
+static void
update_vsc_pipe(struct fd_context *ctx)
{
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
int i;
+ OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
+ OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
+
for (i = 0; i < 8; i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
@@ -394,7 +604,7 @@ update_vsc_pipe(struct fd_context *ctx)
DRM_FREEDRENO_GEM_TYPE_KMEM);
}
- OUT_PKT0(ring, REG_A3XX_VSC_PIPE(0), 3);
+ OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);
OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) |
A3XX_VSC_PIPE_CONFIG_Y(pipe->y) |
A3XX_VSC_PIPE_CONFIG_W(pipe->w) |
@@ -404,34 +614,45 @@ update_vsc_pipe(struct fd_context *ctx)
}
}
-/* for rendering directly to system memory: */
static void
-fd3_emit_sysmem_prep(struct fd_context *ctx)
+emit_binning_pass(struct fd_context *ctx)
{
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_ringbuffer *ring = ctx->ring;
- uint32_t pitch = 0;
+ int i;
- if (pfb->cbufs[0])
- pitch = fd_resource(pfb->cbufs[0]->texture)->slices[0].pitch;
+ if (ctx->screen->gpu_id == 320) {
+ emit_binning_workaround(ctx);
- fd3_emit_restore(ctx);
+ OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+ OUT_RING(ring, 0x00007fff);
+ }
+
+ OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
+ OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
-
OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
- A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
+ A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w));
- /* setup scissor/offset for current tile: */
+ /* setup scissor/offset for whole screen: */
OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
A3XX_RB_WINDOW_OFFSET_Y(0));
+ OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE);
+
OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
@@ -439,9 +660,72 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+
+ for (i = 0; i < 4; i++) {
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(0) |
+ A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
+ A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0));
+ }
+
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(1) |
+ A3XX_PC_VSTREAM_CONTROL_N(0));
+
+ /* emit IB to binning drawcmds: */
+ OUT_IB(ring, ctx->binning_start, ctx->binning_end);
+
+ /* and then put stuff back the way it was: */
+
+ OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
+ OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE |
+ A3XX_SP_SP_CTRL_REG_CONSTMODE(1) |
+ A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
+ A3XX_SP_SP_CTRL_REG_L0MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+ OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w));
+
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, CACHE_FLUSH);
+
+ if (ctx->screen->gpu_id == 320) {
+ /* dummy-draw workaround: */
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
+ INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+ OUT_RING(ring, 0); /* NumIndices */
+ }
+
+ OUT_PKT3(ring, CP_NOP, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_WFI(ring);
+
+ if (ctx->screen->gpu_id == 320) {
+ emit_binning_workaround(ctx);
+ }
}
/* before first tile */
@@ -461,6 +745,18 @@ fd3_emit_tile_init(struct fd_context *ctx)
A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
update_vsc_pipe(ctx);
+
+ if (use_hw_binning(ctx)) {
+ /* mark the end of the binning cmds: */
+ fd_ringmarker_mark(ctx->binning_end);
+
+ /* emit hw binning pass: */
+ emit_binning_pass(ctx);
+
+ patch_draws(ctx, USE_VISIBILITY);
+ } else {
+ patch_draws(ctx, IGNORE_VISIBILITY);
+ }
}
/* before mem2gmem */
@@ -472,7 +768,6 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
struct fd_gmem_stateobj *gmem = &ctx->gmem;
uint32_t reg;
-
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(gmem));
if (pfb->zsbuf) {
@@ -499,6 +794,7 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
static void
fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
{
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
@@ -508,6 +804,32 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
uint32_t x2 = tile->xoff + tile->bin_w - 1;
uint32_t y2 = tile->yoff + tile->bin_h - 1;
+ if (use_hw_binning(ctx)) {
+ struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p];
+
+ assert(pipe->w * pipe->h);
+
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, HLSQ_FLUSH);
+
+ OUT_WFI(ring);
+
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
+ A3XX_PC_VSTREAM_CONTROL_N(tile->n));
+
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, CACHE_FLUSH);
+
+ OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
+ OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
+ OUT_RELOC(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */
+ (tile->p * 4), 0, 0);
+ } else {
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+ }
+
OUT_PKT3(ring, CP_SET_BIN, 3);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index c02b14cba39..2622006ff09 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -36,6 +36,7 @@
#include "fd3_program.h"
#include "fd3_compiler.h"
+#include "fd3_emit.h"
#include "fd3_texture.h"
#include "fd3_util.h"
@@ -175,9 +176,9 @@ fd3_vp_state_bind(struct pipe_context *pctx, void *hwcso)
}
static void
-emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
+emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_stateobj *so)
{
- struct ir3_shader_info *si = &so->info;
+ const struct ir3_shader_info *si = &so->info;
enum adreno_state_block sb;
enum adreno_state_src src;
uint32_t i, sz, *bin;
@@ -216,7 +217,7 @@ emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
}
static int
-find_output(struct fd3_shader_stateobj *so, fd3_semantic semantic)
+find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
{
int j;
for (j = 0; j < so->outputs_count; j++)
@@ -227,14 +228,21 @@ find_output(struct fd3_shader_stateobj *so, fd3_semantic semantic)
void
fd3_program_emit(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog)
+ struct fd_program_stateobj *prog, bool binning)
{
- struct fd3_shader_stateobj *vp = prog->vp;
- struct fd3_shader_stateobj *fp = prog->fp;
- struct ir3_shader_info *vsi = &vp->info;
- struct ir3_shader_info *fsi = &fp->info;
+ const struct fd3_shader_stateobj *vp = prog->vp;
+ const struct fd3_shader_stateobj *fp = prog->fp;
+ const struct ir3_shader_info *vsi = &vp->info;
+ const struct ir3_shader_info *fsi = &fp->info;
int i;
+ if (binning) {
+ /* use dummy stateobj to simplify binning vs non-binning: */
+ static const struct fd3_shader_stateobj binning_fp = {};
+ fp = &binning_fp;
+ fsi = &fp->info;
+ }
+
/* we could probably divide this up into things that need to be
* emitted if frag-prog is dirty vs if vert-prog is dirty..
*/
@@ -260,11 +268,9 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
+ COND(binning, A3XX_SP_SP_CTRL_REG_BINNING) |
A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
- // XXX "resolve" (?) bit set on gmem->mem pass..
-// COND(!uniforms, A3XX_SP_SP_CTRL_REG_RESOLVE) |
- // XXX sometimes 0, sometimes 1:
- A3XX_SP_SP_CTRL_REG_LOMODE(1));
+ A3XX_SP_SP_CTRL_REG_L0MODE(0));
OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
@@ -272,6 +278,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
+ A3XX_SP_VS_CTRL_REG0_CACHEINVALID |
A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
@@ -323,28 +330,38 @@ fd3_program_emit(struct fd_ringbuffer *ring,
A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
- OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
- OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
- OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
- A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
- A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
- A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
- A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
- A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
- A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
- COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
- A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
- OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
- A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
- A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) |
- A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
-
- OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
- OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
- A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
- OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
+ if (binning) {
+ OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
+ OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+ A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
+ OUT_RING(ring, 0x00000000);
+ } else {
+ OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
+ OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
+ OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+ A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
+ A3XX_SP_FS_CTRL_REG0_CACHEINVALID |
+ A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
+ A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
+ A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
+ A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
+ A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
+ COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
+ A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
+ OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
+ A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
+ A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) |
+ A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
+ OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
+ OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
+ A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
+ OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
+ }
OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
@@ -360,24 +377,31 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
- OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
- OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
- A3XX_VPC_ATTR_THRDASSIGN(1) |
- A3XX_VPC_ATTR_LMSIZE(1));
- OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
- A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
-
- OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
- OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
- OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
- OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
- OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
-
- OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
- OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
- OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
- OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
- OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
+ if (binning) {
+ OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
+ OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
+ A3XX_VPC_ATTR_LMSIZE(1));
+ OUT_RING(ring, 0x00000000);
+ } else {
+ OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
+ OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
+ A3XX_VPC_ATTR_THRDASSIGN(1) |
+ A3XX_VPC_ATTR_LMSIZE(1));
+ OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
+ A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
+
+ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
+ OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
+ OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
+ OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
+ OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
+
+ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
+ OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
+ OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
+ OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
+ OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
+ }
OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
@@ -388,10 +412,12 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
- emit_shader(ring, fp);
+ if (!binning) {
+ emit_shader(ring, fp);
- OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
- OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
+ OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+ OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
+ }
OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
index 85c22a54cf7..bd6483ff42c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -117,7 +117,7 @@ struct fd3_shader_stateobj {
};
void fd3_program_emit(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog);
+ struct fd_program_stateobj *prog, bool binning);
void fd3_prog_init(struct pipe_context *pctx);
void fd3_prog_fini(struct pipe_context *pctx);