diff options
Diffstat (limited to 'src/gallium/drivers/freedreno/a3xx')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 81 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 26 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 360 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.c | 136 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.h | 2 |
6 files changed, 508 insertions, 100 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index c5d8b774552..4c90d984955 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -43,7 +43,7 @@ static void -emit_vertexbufs(struct fd_context *ctx) +emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring) { struct fd_vertex_stateobj *vtx = ctx->vtx; struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf; @@ -63,19 +63,17 @@ emit_vertexbufs(struct fd_context *ctx) bufs[i].format = elem->src_format; } - fd3_emit_vertex_bufs(ctx->ring, &ctx->prog, bufs, vtx->num_elements); + fd3_emit_vertex_bufs(ring, &ctx->prog, bufs, vtx->num_elements); } static void -fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info) +draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info, + struct fd_ringbuffer *ring, unsigned dirty, bool binning) { - struct fd_ringbuffer *ring = ctx->ring; - unsigned dirty = ctx->dirty; - - fd3_emit_state(ctx, dirty); + fd3_emit_state(ctx, ring, dirty, binning); if (dirty & FD_DIRTY_VTXBUF) - emit_vertexbufs(ctx); + emit_vertexbufs(ctx, ring); OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1); OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */ @@ -90,7 +88,59 @@ fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info) OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ info->restart_index : 0xffffffff); - fd_draw_emit(ctx, info); + fd_draw_emit(ctx, ring, binning ? IGNORE_VISIBILITY : USE_VISIBILITY, info); +} + +static void +fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info) +{ + unsigned dirty = ctx->dirty; + draw_impl(ctx, info, ctx->binning_ring, + dirty & ~(FD_DIRTY_BLEND), true); + draw_impl(ctx, info, ctx->ring, dirty, false); +} + +/* binning pass cmds for a clear: + * NOTE: newer blob drivers don't use binning for clear, which is probably + * preferable since it is low vtx count. However that doesn't seem to + * actually work for me. Not sure if it is depending on support for + * clear pass (rather than using solid-fill shader), or something else + * that newer blob is doing differently. Once that is figured out, we + * can remove fd3_clear_binning(). + */ +static void +fd3_clear_binning(struct fd_context *ctx, unsigned dirty) +{ + struct fd3_context *fd3_ctx = fd3_context(ctx); + struct fd_ringbuffer *ring = ctx->binning_ring; + + fd3_emit_state(ctx, ring, dirty & (FD_DIRTY_VIEWPORT | + FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR), true); + + fd3_program_emit(ring, &ctx->solid_prog, true); + + fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { + { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, + }, 1); + + OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | + A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); + OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); + OUT_RING(ring, 0); /* VFD_INDEX_MIN */ + OUT_RING(ring, 2); /* VFD_INDEX_MAX */ + OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ + OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ + OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1); + OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */ + + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, PERFCOUNTER_STOP); + + fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); } static void @@ -99,11 +149,14 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, { struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_ringbuffer *ring = ctx->ring; + unsigned dirty = ctx->dirty; unsigned ce, i; + fd3_clear_binning(ctx, dirty); + /* emit generic state now: */ - fd3_emit_state(ctx, ctx->dirty & (FD_DIRTY_VIEWPORT | - FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR)); + fd3_emit_state(ctx, ring, dirty & (FD_DIRTY_VIEWPORT | + FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR), false); OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1); OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) | @@ -192,7 +245,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0)); - fd3_program_emit(ring, &ctx->solid_prog); + fd3_program_emit(ring, &ctx->solid_prog, false); fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, @@ -216,8 +269,8 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, OUT_PKT3(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, PERFCOUNTER_STOP); - fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2, - INDEX_SIZE_IGN, 0, 0, NULL); + fd_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY, + DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); } void diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 91993725ea6..9cfe4ddb662 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -337,10 +337,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, } void -fd3_emit_state(struct fd_context *ctx, uint32_t dirty) +fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + uint32_t dirty, bool binning) { - struct fd_ringbuffer *ring = ctx->ring; - emit_marker(ring, 5); if (dirty & FD_DIRTY_SAMPLE_MASK) { @@ -354,7 +353,8 @@ fd3_emit_state(struct fd_context *ctx, uint32_t dirty) struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa); struct pipe_stencil_ref *sr = &ctx->stencil_ref; - fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control); + if (!binning) + fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control); OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1); OUT_RING(ring, zsa->rb_alpha_ref); @@ -432,7 +432,10 @@ fd3_emit_state(struct fd_context *ctx, uint32_t dirty) } if (dirty & FD_DIRTY_PROG) - fd3_program_emit(ring, &ctx->prog); + fd3_program_emit(ring, &ctx->prog, binning); + + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, HLSQ_FLUSH); if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) { struct fd_program_stateobj *prog = &ctx->prog; @@ -566,11 +569,11 @@ fd3_emit_restore(struct fd_context *ctx) OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) | A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0)); - OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 1); - OUT_RING(ring, 0x00000001); /* UCHE_CACHE_MODE_CONTROL_REG */ - - OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1); - OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */ + OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0)); + OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) | + A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) | + A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE); OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ @@ -604,6 +607,9 @@ fd3_emit_restore(struct fd_context *ctx) OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].W */ } + OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); + OUT_RING(ring, 0x00000000); + emit_cache_flush(ring); fd_rmw_wfi(ctx, ring); } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h index bf7787ab6f7..50559d10d22 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -58,7 +58,8 @@ struct fd3_vertex_buf { void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd_program_stateobj *prog, struct fd3_vertex_buf *vbufs, uint32_t n); -void fd3_emit_state(struct fd_context *ctx, uint32_t dirty); +void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + uint32_t dirty, bool binning); void fd3_emit_restore(struct fd_context *ctx); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 3d0a607ed28..8720e087b7b 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -106,6 +106,159 @@ depth_base(struct fd_gmem_stateobj *gmem) return align(gmem->bin_w * gmem->bin_h, 0x4000); } +static bool +use_hw_binning(struct fd_context *ctx) +{ + struct fd_gmem_stateobj *gmem = &ctx->gmem; + return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2); +} + +/* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */ +static void update_vsc_pipe(struct fd_context *ctx); +static void +emit_binning_workaround(struct fd_context *ctx) +{ + struct fd3_context *fd3_ctx = fd3_context(ctx); + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct fd_ringbuffer *ring = ctx->ring; + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) | + A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | + A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); + + OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4); + OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | + A3XX_RB_COPY_CONTROL_MODE(0) | + A3XX_RB_COPY_CONTROL_GMEM_BASE(0)); + OUT_RELOC(ring, fd_resource(fd3_ctx->solid_vbuf)->bo, 0x20, 0, -1); /* RB_COPY_DEST_BASE */ + OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128)); + OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | + A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) | + A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) | + A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | + A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); + + fd3_program_emit(ring, &ctx->solid_prog, false); + + fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { + { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, + }, 1); + + OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4); + OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | + A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE | + A3XX_HLSQ_CONTROL_0_REG_RESERVED2 | + A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); + OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | + A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE); + OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31)); + OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */ + + OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1); + OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) | + A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20)); + + OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE | + A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) | + A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff)); + + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); + + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); + OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) | + A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | + A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0)); + + OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); + OUT_RING(ring, 0); /* VFD_INDEX_MIN */ + OUT_RING(ring, 2); /* VFD_INDEX_MAX */ + OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ + OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ + + OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | + A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | + A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1)); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) | + A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); + OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | + A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); + OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) | + A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0)); + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0)); + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE | + A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE | + A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE | + A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE | + A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE); + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1); + OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) | + A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0)); + + OUT_PKT3(ring, CP_DRAW_INDX_2, 5); + OUT_RING(ring, 0x00000000); /* viz query info. */ + OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE, + INDEX_SIZE_32_BIT, IGNORE_VISIBILITY)); + OUT_RING(ring, 2); /* NumIndices */ + OUT_RING(ring, 2); + OUT_RING(ring, 1); + + OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1); + OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS)); + + OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); + OUT_RING(ring, 0x00000000); + + OUT_WFI(ring); + + OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1); + OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | + A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, 0x00000000); +} + /* transfer from gmem to system memory (ie. normal RAM) */ static void @@ -129,8 +282,8 @@ emit_gmem2mem_surf(struct fd_context *ctx, A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) | A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(psurf->format))); - fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2, - INDEX_SIZE_IGN, 0, 0, NULL); + fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); } static void @@ -210,7 +363,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd3_program_emit(ring, &ctx->solid_prog); + fd3_program_emit(ring, &ctx->solid_prog, false); fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, @@ -252,8 +405,8 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base, fd3_emit_gmem_restore_tex(ring, psurf); - fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2, - INDEX_SIZE_IGN, 0, 0, NULL); + fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); } static void @@ -355,7 +508,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd3_program_emit(ring, &ctx->blit_prog); + fd3_program_emit(ring, &ctx->blit_prog, false); fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) { { .prsc = fd3_ctx->blit_texcoord_vbuf, .stride = 8, .format = PIPE_FORMAT_R32G32_FLOAT }, @@ -381,11 +534,68 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) } static void +patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode) +{ + unsigned i; + for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i); + *patch->cs = patch->val | DRAW(0, 0, 0, vismode); + } + util_dynarray_resize(&ctx->draw_patches, 0); +} + +/* for rendering directly to system memory: */ +static void +fd3_emit_sysmem_prep(struct fd_context *ctx) +{ + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_ringbuffer *ring = ctx->ring; + uint32_t pitch = 0; + + if (pfb->cbufs[0]) + pitch = fd_resource(pfb->cbufs[0]->texture)->slices[0].pitch; + + fd3_emit_restore(ctx); + + OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); + OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | + A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); + + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0); + + OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); + OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | + A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); + + /* setup scissor/offset for current tile: */ + OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); + OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) | + A3XX_RB_WINDOW_OFFSET_Y(0)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); + OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | + A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); + OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) | + A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1)); + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_RB_MODE_CONTROL_GMEM_BYPASS | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + + patch_draws(ctx, IGNORE_VISIBILITY); +} + +static void update_vsc_pipe(struct fd_context *ctx) { + struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_ringbuffer *ring = ctx->ring; int i; + OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1); + OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */ + for (i = 0; i < 8; i++) { struct fd_vsc_pipe *pipe = &ctx->pipe[i]; @@ -394,7 +604,7 @@ update_vsc_pipe(struct fd_context *ctx) DRM_FREEDRENO_GEM_TYPE_KMEM); } - OUT_PKT0(ring, REG_A3XX_VSC_PIPE(0), 3); + OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3); OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) | A3XX_VSC_PIPE_CONFIG_Y(pipe->y) | A3XX_VSC_PIPE_CONFIG_W(pipe->w) | @@ -404,34 +614,45 @@ update_vsc_pipe(struct fd_context *ctx) } } -/* for rendering directly to system memory: */ static void -fd3_emit_sysmem_prep(struct fd_context *ctx) +emit_binning_pass(struct fd_context *ctx) { struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd_ringbuffer *ring = ctx->ring; - uint32_t pitch = 0; + int i; - if (pfb->cbufs[0]) - pitch = fd_resource(pfb->cbufs[0]->texture)->slices[0].pitch; + if (ctx->screen->gpu_id == 320) { + emit_binning_workaround(ctx); - fd3_emit_restore(ctx); + OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); + OUT_RING(ring, 0x00007fff); + } + + OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1); + OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); - emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0); - OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | - A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); + A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | + A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w)); - /* setup scissor/offset for current tile: */ + /* setup scissor/offset for whole screen: */ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) | A3XX_RB_WINDOW_OFFSET_Y(0)); + OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1); + OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE); + OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); @@ -439,9 +660,72 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1)); OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + + for (i = 0; i < 4; i++) { + OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); + OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(0) | + A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) | + A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0)); + } + + OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); + OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(1) | + A3XX_PC_VSTREAM_CONTROL_N(0)); + + /* emit IB to binning drawcmds: */ + OUT_IB(ring, ctx->binning_start, ctx->binning_end); + + /* and then put stuff back the way it was: */ + + OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1); + OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE | + A3XX_SP_SP_CTRL_REG_CONSTMODE(1) | + A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) | + A3XX_SP_SP_CTRL_REG_L0MODE(0)); + + OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | - A3XX_RB_MODE_CONTROL_GMEM_BYPASS | A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | + A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | + A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w)); + + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, CACHE_FLUSH); + + if (ctx->screen->gpu_id == 320) { + /* dummy-draw workaround: */ + OUT_PKT3(ring, CP_DRAW_INDX, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, + INDEX_SIZE_IGN, IGNORE_VISIBILITY)); + OUT_RING(ring, 0); /* NumIndices */ + } + + OUT_PKT3(ring, CP_NOP, 4); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_WFI(ring); + + if (ctx->screen->gpu_id == 320) { + emit_binning_workaround(ctx); + } } /* before first tile */ @@ -461,6 +745,18 @@ fd3_emit_tile_init(struct fd_context *ctx) A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); update_vsc_pipe(ctx); + + if (use_hw_binning(ctx)) { + /* mark the end of the binning cmds: */ + fd_ringmarker_mark(ctx->binning_end); + + /* emit hw binning pass: */ + emit_binning_pass(ctx); + + patch_draws(ctx, USE_VISIBILITY); + } else { + patch_draws(ctx, IGNORE_VISIBILITY); + } } /* before mem2gmem */ @@ -472,7 +768,6 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) struct fd_gmem_stateobj *gmem = &ctx->gmem; uint32_t reg; - OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(gmem)); if (pfb->zsbuf) { @@ -499,6 +794,7 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) static void fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) { + struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_ringbuffer *ring = ctx->ring; struct fd_gmem_stateobj *gmem = &ctx->gmem; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; @@ -508,6 +804,32 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) uint32_t x2 = tile->xoff + tile->bin_w - 1; uint32_t y2 = tile->yoff + tile->bin_h - 1; + if (use_hw_binning(ctx)) { + struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p]; + + assert(pipe->w * pipe->h); + + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, HLSQ_FLUSH); + + OUT_WFI(ring); + + OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); + OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) | + A3XX_PC_VSTREAM_CONTROL_N(tile->n)); + + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, CACHE_FLUSH); + + OUT_PKT3(ring, CP_SET_BIN_DATA, 2); + OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */ + OUT_RELOC(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */ + (tile->p * 4), 0, 0); + } else { + OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); + OUT_RING(ring, 0x00000000); + } + OUT_PKT3(ring, CP_SET_BIN, 3); OUT_RING(ring, 0x00000000); OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1)); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index c02b14cba39..2622006ff09 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -36,6 +36,7 @@ #include "fd3_program.h" #include "fd3_compiler.h" +#include "fd3_emit.h" #include "fd3_texture.h" #include "fd3_util.h" @@ -175,9 +176,9 @@ fd3_vp_state_bind(struct pipe_context *pctx, void *hwcso) } static void -emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so) +emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_stateobj *so) { - struct ir3_shader_info *si = &so->info; + const struct ir3_shader_info *si = &so->info; enum adreno_state_block sb; enum adreno_state_src src; uint32_t i, sz, *bin; @@ -216,7 +217,7 @@ emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so) } static int -find_output(struct fd3_shader_stateobj *so, fd3_semantic semantic) +find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic) { int j; for (j = 0; j < so->outputs_count; j++) @@ -227,14 +228,21 @@ find_output(struct fd3_shader_stateobj *so, fd3_semantic semantic) void fd3_program_emit(struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog) + struct fd_program_stateobj *prog, bool binning) { - struct fd3_shader_stateobj *vp = prog->vp; - struct fd3_shader_stateobj *fp = prog->fp; - struct ir3_shader_info *vsi = &vp->info; - struct ir3_shader_info *fsi = &fp->info; + const struct fd3_shader_stateobj *vp = prog->vp; + const struct fd3_shader_stateobj *fp = prog->fp; + const struct ir3_shader_info *vsi = &vp->info; + const struct ir3_shader_info *fsi = &fp->info; int i; + if (binning) { + /* use dummy stateobj to simplify binning vs non-binning: */ + static const struct fd3_shader_stateobj binning_fp = {}; + fp = &binning_fp; + fsi = &fp->info; + } + /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ @@ -260,11 +268,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1); OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) | + COND(binning, A3XX_SP_SP_CTRL_REG_BINNING) | A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) | - // XXX "resolve" (?) bit set on gmem->mem pass.. -// COND(!uniforms, A3XX_SP_SP_CTRL_REG_RESOLVE) | - // XXX sometimes 0, sometimes 1: - A3XX_SP_SP_CTRL_REG_LOMODE(1)); + A3XX_SP_SP_CTRL_REG_L0MODE(0)); OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1); OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen)); @@ -272,6 +278,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3); OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) | A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) | + A3XX_SP_VS_CTRL_REG0_CACHEINVALID | A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) | A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) | A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) | @@ -323,28 +330,38 @@ fd3_program_emit(struct fd_ringbuffer *ring, A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ - OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); - OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen)); - - OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2); - OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | - A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) | - A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) | - A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) | - A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | - A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | - A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | - COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | - A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen)); - OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) | - A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) | - A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) | - A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63)); - - OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2); - OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) | - A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); - OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ + if (binning) { + OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2); + OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | + A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER)); + OUT_RING(ring, 0x00000000); + } else { + OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); + OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen)); + + OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2); + OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | + A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) | + A3XX_SP_FS_CTRL_REG0_CACHEINVALID | + A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) | + A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) | + A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | + A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | + A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | + COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | + A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen)); + OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) | + A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) | + A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) | + A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63)); + OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2); + OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) | + A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); + OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ + } OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2); OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_0 */ @@ -360,24 +377,31 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); - OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); - OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) | - A3XX_VPC_ATTR_THRDASSIGN(1) | - A3XX_VPC_ATTR_LMSIZE(1)); - OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) | - A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in)); - - OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4); - OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ - OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ - OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ - OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ - - OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4); - OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ - OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ - OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ - OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ + if (binning) { + OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); + OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) | + A3XX_VPC_ATTR_LMSIZE(1)); + OUT_RING(ring, 0x00000000); + } else { + OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); + OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) | + A3XX_VPC_ATTR_THRDASSIGN(1) | + A3XX_VPC_ATTR_LMSIZE(1)); + OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) | + A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in)); + + OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4); + OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ + OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ + OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ + OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ + + OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4); + OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ + OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ + OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ + OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ + } OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1); OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) | @@ -388,10 +412,12 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ - emit_shader(ring, fp); + if (!binning) { + emit_shader(ring, fp); - OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); - OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ + OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); + OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ + } OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2); OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index 85c22a54cf7..bd6483ff42c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -117,7 +117,7 @@ struct fd3_shader_stateobj { }; void fd3_program_emit(struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog); + struct fd_program_stateobj *prog, bool binning); void fd3_prog_init(struct pipe_context *pctx); void fd3_prog_fini(struct pipe_context *pctx); |