diff options
author | Rob Clark <[email protected]> | 2014-01-07 10:55:07 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2014-01-08 16:30:18 -0500 |
commit | c0766528baaef48902c87bbdaa4f5926c472269b (patch) | |
tree | f825706059f50c37a9fda7961ec596b0fb6a65da /src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | |
parent | bfb44c24bc1eff850d47984b2cb60c957ffc143d (diff) |
freedreno/a3xx: support for hw binning pass
The binning pass sorts vertices into which bins/tiles they apply to.
The visibility information generated during the binning pass can be
used to speed up the rendering pass by filtering out vertices which
do not apply to the current tile. See:
https://github.com/freedreno/freedreno/wiki/Adreno-tiling#optimized-approach
This brings a significant fps boost. A rough assortment of tests
(supertuxkart, etracer, tremulous, glmark2 'build' test, etc) seems
to yield a ~35-45% fps improvement.
For now, to be conservative, the binning pass is not enabled yet by
default. To enable it use:
FD_MESA_DEBUG=binning
So far I haven't found anything that breaks with binning enabled,
but I'd like a bit more testing before I enable it as default.
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/a3xx/fd3_gmem.c')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 360 |
1 files changed, 341 insertions, 19 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 3d0a607ed28..8720e087b7b 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -106,6 +106,159 @@ depth_base(struct fd_gmem_stateobj *gmem) return align(gmem->bin_w * gmem->bin_h, 0x4000); } +static bool +use_hw_binning(struct fd_context *ctx) +{ + struct fd_gmem_stateobj *gmem = &ctx->gmem; + return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2); +} + +/* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */ +static void update_vsc_pipe(struct fd_context *ctx); +static void +emit_binning_workaround(struct fd_context *ctx) +{ + struct fd3_context *fd3_ctx = fd3_context(ctx); + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct fd_ringbuffer *ring = ctx->ring; + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) | + A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | + A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); + + OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4); + OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | + A3XX_RB_COPY_CONTROL_MODE(0) | + A3XX_RB_COPY_CONTROL_GMEM_BASE(0)); + OUT_RELOC(ring, fd_resource(fd3_ctx->solid_vbuf)->bo, 0x20, 0, -1); /* RB_COPY_DEST_BASE */ + OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128)); + OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | + A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) | + A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) | + A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | + A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); + + fd3_program_emit(ring, &ctx->solid_prog, false); + + fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { + { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, + }, 1); + + OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4); + OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | + A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE | + A3XX_HLSQ_CONTROL_0_REG_RESERVED2 | + A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); + OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | + A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE); + OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31)); + OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */ + + OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1); + OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) | + A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20)); + + OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE | + A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) | + A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff)); + + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); + + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); + OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) | + A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | + A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0)); + + OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); + OUT_RING(ring, 0); /* VFD_INDEX_MIN */ + OUT_RING(ring, 2); /* VFD_INDEX_MAX */ + OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ + OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ + + OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | + A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | + A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1)); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) | + A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); + OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | + A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); + OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) | + A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0)); + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0)); + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE | + A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE | + A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE | + A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE | + A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE); + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1); + OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) | + A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0)); + + OUT_PKT3(ring, CP_DRAW_INDX_2, 5); + OUT_RING(ring, 0x00000000); /* viz query info. */ + OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE, + INDEX_SIZE_32_BIT, IGNORE_VISIBILITY)); + OUT_RING(ring, 2); /* NumIndices */ + OUT_RING(ring, 2); + OUT_RING(ring, 1); + + OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1); + OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS)); + + OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); + OUT_RING(ring, 0x00000000); + + OUT_WFI(ring); + + OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1); + OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | + A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, 0x00000000); +} + /* transfer from gmem to system memory (ie. normal RAM) */ static void @@ -129,8 +282,8 @@ emit_gmem2mem_surf(struct fd_context *ctx, A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) | A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(psurf->format))); - fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2, - INDEX_SIZE_IGN, 0, 0, NULL); + fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); } static void @@ -210,7 +363,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd3_program_emit(ring, &ctx->solid_prog); + fd3_program_emit(ring, &ctx->solid_prog, false); fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, @@ -252,8 +405,8 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base, fd3_emit_gmem_restore_tex(ring, psurf); - fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2, - INDEX_SIZE_IGN, 0, 0, NULL); + fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); } static void @@ -355,7 +508,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd3_program_emit(ring, &ctx->blit_prog); + fd3_program_emit(ring, &ctx->blit_prog, false); fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) { { .prsc = fd3_ctx->blit_texcoord_vbuf, .stride = 8, .format = PIPE_FORMAT_R32G32_FLOAT }, @@ -381,11 +534,68 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) } static void +patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode) +{ + unsigned i; + for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i); + *patch->cs = patch->val | DRAW(0, 0, 0, vismode); + } + util_dynarray_resize(&ctx->draw_patches, 0); +} + +/* for rendering directly to system memory: */ +static void +fd3_emit_sysmem_prep(struct fd_context *ctx) +{ + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_ringbuffer *ring = ctx->ring; + uint32_t pitch = 0; + + if (pfb->cbufs[0]) + pitch = fd_resource(pfb->cbufs[0]->texture)->slices[0].pitch; + + fd3_emit_restore(ctx); + + OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); + OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | + A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); + + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0); + + OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); + OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | + A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); + + /* setup scissor/offset for current tile: */ + OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); + OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) | + A3XX_RB_WINDOW_OFFSET_Y(0)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); + OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | + A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); + OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) | + A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1)); + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_RB_MODE_CONTROL_GMEM_BYPASS | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + + patch_draws(ctx, IGNORE_VISIBILITY); +} + +static void update_vsc_pipe(struct fd_context *ctx) { + struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_ringbuffer *ring = ctx->ring; int i; + OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1); + OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */ + for (i = 0; i < 8; i++) { struct fd_vsc_pipe *pipe = &ctx->pipe[i]; @@ -394,7 +604,7 @@ update_vsc_pipe(struct fd_context *ctx) DRM_FREEDRENO_GEM_TYPE_KMEM); } - OUT_PKT0(ring, REG_A3XX_VSC_PIPE(0), 3); + OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3); OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) | A3XX_VSC_PIPE_CONFIG_Y(pipe->y) | A3XX_VSC_PIPE_CONFIG_W(pipe->w) | @@ -404,34 +614,45 @@ update_vsc_pipe(struct fd_context *ctx) } } -/* for rendering directly to system memory: */ static void -fd3_emit_sysmem_prep(struct fd_context *ctx) +emit_binning_pass(struct fd_context *ctx) { struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd_ringbuffer *ring = ctx->ring; - uint32_t pitch = 0; + int i; - if (pfb->cbufs[0]) - pitch = fd_resource(pfb->cbufs[0]->texture)->slices[0].pitch; + if (ctx->screen->gpu_id == 320) { + emit_binning_workaround(ctx); - fd3_emit_restore(ctx); + OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); + OUT_RING(ring, 0x00007fff); + } + + OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1); + OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); - emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0); - OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | - A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); + A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | + A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w)); - /* setup scissor/offset for current tile: */ + /* setup scissor/offset for whole screen: */ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) | A3XX_RB_WINDOW_OFFSET_Y(0)); + OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1); + OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE); + OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); @@ -439,9 +660,72 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1)); OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + + for (i = 0; i < 4; i++) { + OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); + OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(0) | + A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) | + A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0)); + } + + OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); + OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(1) | + A3XX_PC_VSTREAM_CONTROL_N(0)); + + /* emit IB to binning drawcmds: */ + OUT_IB(ring, ctx->binning_start, ctx->binning_end); + + /* and then put stuff back the way it was: */ + + OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1); + OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE | + A3XX_SP_SP_CTRL_REG_CONSTMODE(1) | + A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) | + A3XX_SP_SP_CTRL_REG_L0MODE(0)); + + OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | - A3XX_RB_MODE_CONTROL_GMEM_BYPASS | A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | + A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | + A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w)); + + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, CACHE_FLUSH); + + if (ctx->screen->gpu_id == 320) { + /* dummy-draw workaround: */ + OUT_PKT3(ring, CP_DRAW_INDX, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, + INDEX_SIZE_IGN, IGNORE_VISIBILITY)); + OUT_RING(ring, 0); /* NumIndices */ + } + + OUT_PKT3(ring, CP_NOP, 4); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_WFI(ring); + + if (ctx->screen->gpu_id == 320) { + emit_binning_workaround(ctx); + } } /* before first tile */ @@ -461,6 +745,18 @@ fd3_emit_tile_init(struct fd_context *ctx) A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); update_vsc_pipe(ctx); + + if (use_hw_binning(ctx)) { + /* mark the end of the binning cmds: */ + fd_ringmarker_mark(ctx->binning_end); + + /* emit hw binning pass: */ + emit_binning_pass(ctx); + + patch_draws(ctx, USE_VISIBILITY); + } else { + patch_draws(ctx, IGNORE_VISIBILITY); + } } /* before mem2gmem */ @@ -472,7 +768,6 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) struct fd_gmem_stateobj *gmem = &ctx->gmem; uint32_t reg; - OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(gmem)); if (pfb->zsbuf) { @@ -499,6 +794,7 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) static void fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) { + struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_ringbuffer *ring = ctx->ring; struct fd_gmem_stateobj *gmem = &ctx->gmem; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; @@ -508,6 +804,32 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) uint32_t x2 = tile->xoff + tile->bin_w - 1; uint32_t y2 = tile->yoff + tile->bin_h - 1; + if (use_hw_binning(ctx)) { + struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p]; + + assert(pipe->w * pipe->h); + + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, HLSQ_FLUSH); + + OUT_WFI(ring); + + OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); + OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) | + A3XX_PC_VSTREAM_CONTROL_N(tile->n)); + + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, CACHE_FLUSH); + + OUT_PKT3(ring, CP_SET_BIN_DATA, 2); + OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */ + OUT_RELOC(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */ + (tile->p * 4), 0, 0); + } else { + OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); + OUT_RING(ring, 0x00000000); + } + OUT_PKT3(ring, CP_SET_BIN, 3); OUT_RING(ring, 0x00000000); OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1)); |