diff options
author | Rob Clark <[email protected]> | 2017-06-03 13:36:25 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2017-06-07 12:32:00 -0400 |
commit | 5b60004525876616c4719bb790108db4650b1f49 (patch) | |
tree | cc11b8987407f837a7967a8b2a4087d3a3deaed2 /src/gallium/drivers/freedreno/a5xx | |
parent | 313f6360aa1204eea8639112d5ddce697a7aabdf (diff) |
freedreno/a5xx: LRZ support
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/a5xx')
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_blend.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_blend.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_draw.c | 99 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_emit.c | 29 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_emit.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_gmem.c | 30 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_zsa.c | 20 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_zsa.h | 2 |
8 files changed, 178 insertions, 12 deletions
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blend.c b/src/gallium/drivers/freedreno/a5xx/fd5_blend.c index e5107a718b1..42918f753a8 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_blend.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_blend.c @@ -90,6 +90,8 @@ fd5_blend_state_create(struct pipe_context *pctx, so->base = *cso; + so->lrz_write = true; /* unless blend enabled for any MRT */ + for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) { const struct pipe_rt_blend_state *rt; @@ -126,6 +128,7 @@ fd5_blend_state_create(struct pipe_context *pctx, A5XX_RB_MRT_CONTROL_BLEND | A5XX_RB_MRT_CONTROL_BLEND2; mrt_blend |= (1 << i); + so->lrz_write = false; } if (reads_dest) { diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blend.h b/src/gallium/drivers/freedreno/a5xx/fd5_blend.h index 85c615824db..f758738f198 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_blend.h +++ b/src/gallium/drivers/freedreno/a5xx/fd5_blend.h @@ -46,6 +46,7 @@ struct fd5_blend_stateobj { uint32_t blend_control_alpha; } rb_mrt[A5XX_MAX_RENDER_TARGETS]; uint32_t rb_blend_cntl; + bool lrz_write; }; static inline struct fd5_blend_stateobj * diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_draw.c b/src/gallium/drivers/freedreno/a5xx/fd5_draw.c index bc5232a4c17..d1f1d039b69 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_draw.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_draw.c @@ -128,12 +128,19 @@ fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, fixup_shader_state(ctx, &emit.key); unsigned dirty = ctx->dirty; + const struct ir3_shader_variant *vp = fd5_emit_get_vp(&emit); + const struct ir3_shader_variant *fp = fd5_emit_get_fp(&emit); /* do regular pass first, since that is more likely to fail compiling: */ - if (!(fd5_emit_get_vp(&emit) && fd5_emit_get_fp(&emit))) + if (!vp || !fp) return false; + /* figure out whether we need to disable LRZ write for binning + * pass using draw pass's fp: + */ + emit.no_lrz_write = fp->writes_pos || fp->has_kill; + emit.key.binning_pass = false; emit.dirty = dirty; @@ -174,6 +181,86 @@ static bool is_z32(enum pipe_format format) } } +static void +fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) +{ + struct fd_ringbuffer *ring; + uint32_t clear = util_pack_z(PIPE_FORMAT_Z16_UNORM, depth); + + // TODO mid-frame clears (ie. app doing crazy stuff)?? Maybe worth + // splitting both clear and lrz clear out into their own rb's. And + // just throw away any draws prior to clear. (Anything not fullscreen + // clear, just fallback to generic path that treats it as a normal + // draw + + if (!batch->lrz_clear) { + batch->lrz_clear = fd_ringbuffer_new(batch->ctx->screen->pipe, 0x1000); + fd_ringbuffer_set_parent(batch->lrz_clear, batch->gmem); + } + + ring = batch->lrz_clear; + + OUT_WFI5(ring); + + OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); + OUT_RING(ring, 0x10000000); + + OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); + OUT_RING(ring, 0x20fffff); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1); + OUT_RING(ring, A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(0.0)); + + OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1); + OUT_RING(ring, 0x00000181); + + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5); + OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) | + A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) | + A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX)); + OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2)); + OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz))); + OUT_RELOCW(ring, zsbuf->lrz, 0x1000, 0, 0); + + OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_RB_DEST_MSAA_CNTL, 1); + OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE)); + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); + OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0)); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); + OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR | + A5XX_RB_CLEAR_CNTL_MASK(0xf)); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1); + OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */ + + OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2); + OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) | + A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height)); + OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE + + OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); + OUT_RING(ring, A5XX_RB_CNTL_BYPASS); + + OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); + OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | + A5XX_RB_RESOLVE_CNTL_1_Y(0)); + OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) | + A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1)); + + fd5_emit_blit(batch->ctx, ring); +} + static bool fd5_clear(struct fd_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) @@ -186,8 +273,6 @@ fd5_clear(struct fd_context *ctx, unsigned buffers, is_z32(pfb->zsbuf->format)) return false; - /* TODO handle scissor.. or fallback to slow-clear? */ - ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); @@ -283,6 +368,14 @@ fd5_clear(struct fd_context *ctx, unsigned buffers, OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */ fd5_emit_blit(ctx, ring); + + if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) { + struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture); + if (zsbuf->lrz) { + zsbuf->lrz_valid = true; + fd5_clear_lrz(ctx->batch, zsbuf, depth); + } + } } /* disable fast clear to not interfere w/ gmem->mem, etc.. */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index 2c31831c976..0f65802d6ac 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -459,6 +459,7 @@ void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd5_emit *emit) { + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit); const struct ir3_shader_variant *fp = fd5_emit_get_fp(emit); const enum fd_dirty_3d_state dirty = emit->dirty; @@ -467,7 +468,6 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, emit_marker5(ring, 5); if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) { - struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; unsigned char mrt_comp[A5XX_MAX_RENDER_TARGETS] = {0}; for (unsigned i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) { @@ -487,7 +487,6 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) { struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa); - struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; uint32_t rb_alpha_control = zsa->rb_alpha_control; if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0]))) @@ -500,6 +499,24 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, zsa->rb_stencil_control); } + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG)) { + struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend); + struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa); + + if (pfb->zsbuf) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl; + + if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid) + gras_lrz_cntl = 0; + else if (emit->key.binning_pass && blend->lrz_write && zsa->lrz_write) + gras_lrz_cntl |= A5XX_GRAS_LRZ_CNTL_LRZ_WRITE; + + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1); + OUT_RING(ring, gras_lrz_cntl); + } + } + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa); struct pipe_stencil_ref *sr = &ctx->stencil_ref; @@ -588,7 +605,6 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER)) { - struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; uint32_t posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH); unsigned nr = pfb->nr_cbufs; @@ -648,8 +664,7 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, uint32_t i; for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) { - enum pipe_format format = pipe_surface_format( - ctx->batch->framebuffer.cbufs[i]); + enum pipe_format format = pipe_surface_format(pfb->cbufs[i]); bool is_int = util_format_is_pure_integer(format); bool has_alpha = util_format_has_alpha(format); uint32_t control = blend->rb_mrt[i].control; @@ -858,10 +873,6 @@ t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); - /* other regs not used (yet?) and always seem to have same value: */ - OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1); - OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */ - OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1); OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.h b/src/gallium/drivers/freedreno/a5xx/fd5_emit.h index 7df7eb71232..2d8a0fd09c4 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.h +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.h @@ -51,6 +51,12 @@ struct fd5_emit { bool rasterflat; bool no_decode_srgb; + /* in binning pass, we don't have real frag shader, so we + * don't know if real draw disqualifies lrz write. So just + * figure that out up-front and stash it in the emit. + */ + bool no_lrz_write; + /* cached to avoid repeated lookups of same variants: */ const struct ir3_shader_variant *vp, *fp; /* TODO: other shader stages.. */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c index 6669885959e..d82315a7082 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c @@ -162,6 +162,24 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */ + if (rsc->lrz) { + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3); + OUT_RELOCW(ring, rsc->lrz, 0x1000, 0, 0); + OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch)); + + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2); + OUT_RELOCW(ring, rsc->lrz, 0, 0, 0); + } else { + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */ + + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + if (rsc->stencil) { if (gmem) { stride = 1 * gmem->bin_w; @@ -344,12 +362,20 @@ emit_binning_pass(struct fd_batch *batch) static void fd5_emit_tile_init(struct fd_batch *batch) { + struct fd_context *ctx = batch->ctx; struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; fd5_emit_restore(batch, ring); + if (batch->lrz_clear) + ctx->emit_ib(ring, batch->lrz_clear); + fd5_emit_lrz_flush(ring); + OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1); + OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */ + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); OUT_RING(ring, 0x0); @@ -364,8 +390,12 @@ fd5_emit_tile_init(struct fd_batch *batch) OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */ + emit_zs(ring, pfb->zsbuf, &ctx->gmem); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, &ctx->gmem); + if (use_hw_binning(batch)) { emit_binning_pass(batch); + fd5_emit_lrz_flush(ring); patch_draws(batch, USE_VISIBILITY); } else { patch_draws(batch, IGNORE_VISIBILITY); diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c index 7b2be934146..ee8e0fce375 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c @@ -45,6 +45,26 @@ fd5_zsa_state_create(struct pipe_context *pctx, so->base = *cso; + switch (cso->depth.func) { + case PIPE_FUNC_LESS: + case PIPE_FUNC_LEQUAL: + so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE; + break; + + case PIPE_FUNC_GREATER: + case PIPE_FUNC_GEQUAL: + so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE | A5XX_GRAS_LRZ_CNTL_GREATER; + break; + + default: + /* LRZ not enabled */ + so->gras_lrz_cntl = 0; + break; + } + + if (!(cso->stencil->enabled || cso->alpha.enabled || !cso->depth.writemask)) + so->lrz_write = true; + so->rb_depth_cntl |= A5XX_RB_DEPTH_CNTL_ZFUNC(cso->depth.func); /* maps 1:1 */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h index 86bdd5feeb1..cacc6323807 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h +++ b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h @@ -40,6 +40,8 @@ struct fd5_zsa_stateobj { uint32_t rb_depth_cntl; uint32_t rb_stencil_control; uint32_t rb_stencilrefmask; + uint32_t gras_lrz_cntl; + bool lrz_write; }; static inline struct fd5_zsa_stateobj * |