summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno/a5xx
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/freedreno/a5xx')
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_blend.c3
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_blend.h1
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_draw.c99
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_emit.c29
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_emit.h6
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_gmem.c30
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_zsa.c20
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_zsa.h2
8 files changed, 178 insertions, 12 deletions
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blend.c b/src/gallium/drivers/freedreno/a5xx/fd5_blend.c
index e5107a718b1..42918f753a8 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_blend.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_blend.c
@@ -90,6 +90,8 @@ fd5_blend_state_create(struct pipe_context *pctx,
so->base = *cso;
+ so->lrz_write = true; /* unless blend enabled for any MRT */
+
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
const struct pipe_rt_blend_state *rt;
@@ -126,6 +128,7 @@ fd5_blend_state_create(struct pipe_context *pctx,
A5XX_RB_MRT_CONTROL_BLEND |
A5XX_RB_MRT_CONTROL_BLEND2;
mrt_blend |= (1 << i);
+ so->lrz_write = false;
}
if (reads_dest) {
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blend.h b/src/gallium/drivers/freedreno/a5xx/fd5_blend.h
index 85c615824db..f758738f198 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_blend.h
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_blend.h
@@ -46,6 +46,7 @@ struct fd5_blend_stateobj {
uint32_t blend_control_alpha;
} rb_mrt[A5XX_MAX_RENDER_TARGETS];
uint32_t rb_blend_cntl;
+ bool lrz_write;
};
static inline struct fd5_blend_stateobj *
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_draw.c b/src/gallium/drivers/freedreno/a5xx/fd5_draw.c
index bc5232a4c17..d1f1d039b69 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_draw.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_draw.c
@@ -128,12 +128,19 @@ fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
fixup_shader_state(ctx, &emit.key);
unsigned dirty = ctx->dirty;
+ const struct ir3_shader_variant *vp = fd5_emit_get_vp(&emit);
+ const struct ir3_shader_variant *fp = fd5_emit_get_fp(&emit);
/* do regular pass first, since that is more likely to fail compiling: */
- if (!(fd5_emit_get_vp(&emit) && fd5_emit_get_fp(&emit)))
+ if (!vp || !fp)
return false;
+ /* figure out whether we need to disable LRZ write for binning
+ * pass using draw pass's fp:
+ */
+ emit.no_lrz_write = fp->writes_pos || fp->has_kill;
+
emit.key.binning_pass = false;
emit.dirty = dirty;
@@ -174,6 +181,86 @@ static bool is_z32(enum pipe_format format)
}
}
+static void
+fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
+{
+ struct fd_ringbuffer *ring;
+ uint32_t clear = util_pack_z(PIPE_FORMAT_Z16_UNORM, depth);
+
+ // TODO mid-frame clears (ie. app doing crazy stuff)?? Maybe worth
+ // splitting both clear and lrz clear out into their own rb's. And
+ // just throw away any draws prior to clear. (Anything not fullscreen
+ // clear, just fallback to generic path that treats it as a normal
+ // draw
+
+ if (!batch->lrz_clear) {
+ batch->lrz_clear = fd_ringbuffer_new(batch->ctx->screen->pipe, 0x1000);
+ fd_ringbuffer_set_parent(batch->lrz_clear, batch->gmem);
+ }
+
+ ring = batch->lrz_clear;
+
+ OUT_WFI5(ring);
+
+ OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
+ OUT_RING(ring, 0x10000000);
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
+ OUT_RING(ring, 0x20fffff);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
+ OUT_RING(ring, A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(0.0));
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
+ OUT_RING(ring, 0x00000181);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
+ OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) |
+ A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) |
+ A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
+ OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2));
+ OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz)));
+ OUT_RELOCW(ring, zsbuf->lrz, 0x1000, 0, 0);
+
+ OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_RB_DEST_MSAA_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE));
+
+ OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0));
+
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR |
+ A5XX_RB_CLEAR_CNTL_MASK(0xf));
+
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
+ OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
+
+ OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2);
+ OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) |
+ A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height));
+ OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE
+
+ OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
+
+ OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) |
+ A5XX_RB_RESOLVE_CNTL_1_Y(0));
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) |
+ A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1));
+
+ fd5_emit_blit(batch->ctx, ring);
+}
+
static bool
fd5_clear(struct fd_context *ctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil)
@@ -186,8 +273,6 @@ fd5_clear(struct fd_context *ctx, unsigned buffers,
is_z32(pfb->zsbuf->format))
return false;
- /* TODO handle scissor.. or fallback to slow-clear? */
-
ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
@@ -283,6 +368,14 @@ fd5_clear(struct fd_context *ctx, unsigned buffers,
OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
fd5_emit_blit(ctx, ring);
+
+ if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
+ struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
+ if (zsbuf->lrz) {
+ zsbuf->lrz_valid = true;
+ fd5_clear_lrz(ctx->batch, zsbuf, depth);
+ }
+ }
}
/* disable fast clear to not interfere w/ gmem->mem, etc.. */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
index 2c31831c976..0f65802d6ac 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
@@ -459,6 +459,7 @@ void
fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd5_emit *emit)
{
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit);
const struct ir3_shader_variant *fp = fd5_emit_get_fp(emit);
const enum fd_dirty_3d_state dirty = emit->dirty;
@@ -467,7 +468,6 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
emit_marker5(ring, 5);
if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) {
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
unsigned char mrt_comp[A5XX_MAX_RENDER_TARGETS] = {0};
for (unsigned i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
@@ -487,7 +487,6 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
uint32_t rb_alpha_control = zsa->rb_alpha_control;
if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
@@ -500,6 +499,24 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, zsa->rb_stencil_control);
}
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG)) {
+ struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
+ struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
+
+ if (pfb->zsbuf) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl;
+
+ if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid)
+ gras_lrz_cntl = 0;
+ else if (emit->key.binning_pass && blend->lrz_write && zsa->lrz_write)
+ gras_lrz_cntl |= A5XX_GRAS_LRZ_CNTL_LRZ_WRITE;
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
+ OUT_RING(ring, gras_lrz_cntl);
+ }
+ }
+
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
struct pipe_stencil_ref *sr = &ctx->stencil_ref;
@@ -588,7 +605,6 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
}
if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER)) {
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
uint32_t posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
unsigned nr = pfb->nr_cbufs;
@@ -648,8 +664,7 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
uint32_t i;
for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
- enum pipe_format format = pipe_surface_format(
- ctx->batch->framebuffer.cbufs[i]);
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[i]);
bool is_int = util_format_is_pure_integer(format);
bool has_alpha = util_format_has_alpha(format);
uint32_t control = blend->rb_mrt[i].control;
@@ -858,10 +873,6 @@ t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
- /* other regs not used (yet?) and always seem to have same value: */
- OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
- OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
-
OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1);
OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.h b/src/gallium/drivers/freedreno/a5xx/fd5_emit.h
index 7df7eb71232..2d8a0fd09c4 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.h
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.h
@@ -51,6 +51,12 @@ struct fd5_emit {
bool rasterflat;
bool no_decode_srgb;
+ /* in binning pass, we don't have real frag shader, so we
+ * don't know if real draw disqualifies lrz write. So just
+ * figure that out up-front and stash it in the emit.
+ */
+ bool no_lrz_write;
+
/* cached to avoid repeated lookups of same variants: */
const struct ir3_shader_variant *vp, *fp;
/* TODO: other shader stages.. */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
index 6669885959e..d82315a7082 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
@@ -162,6 +162,24 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
+ if (rsc->lrz) {
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
+ OUT_RELOCW(ring, rsc->lrz, 0x1000, 0, 0);
+ OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
+ OUT_RELOCW(ring, rsc->lrz, 0, 0, 0);
+ } else {
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+
if (rsc->stencil) {
if (gmem) {
stride = 1 * gmem->bin_w;
@@ -344,12 +362,20 @@ emit_binning_pass(struct fd_batch *batch)
static void
fd5_emit_tile_init(struct fd_batch *batch)
{
+ struct fd_context *ctx = batch->ctx;
struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
fd5_emit_restore(batch, ring);
+ if (batch->lrz_clear)
+ ctx->emit_ib(ring, batch->lrz_clear);
+
fd5_emit_lrz_flush(ring);
+ OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
+ OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
+
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
OUT_RING(ring, 0x0);
@@ -364,8 +390,12 @@ fd5_emit_tile_init(struct fd_batch *batch)
OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
+ emit_zs(ring, pfb->zsbuf, &ctx->gmem);
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, &ctx->gmem);
+
if (use_hw_binning(batch)) {
emit_binning_pass(batch);
+ fd5_emit_lrz_flush(ring);
patch_draws(batch, USE_VISIBILITY);
} else {
patch_draws(batch, IGNORE_VISIBILITY);
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c
index 7b2be934146..ee8e0fce375 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c
@@ -45,6 +45,26 @@ fd5_zsa_state_create(struct pipe_context *pctx,
so->base = *cso;
+ switch (cso->depth.func) {
+ case PIPE_FUNC_LESS:
+ case PIPE_FUNC_LEQUAL:
+ so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE;
+ break;
+
+ case PIPE_FUNC_GREATER:
+ case PIPE_FUNC_GEQUAL:
+ so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE | A5XX_GRAS_LRZ_CNTL_GREATER;
+ break;
+
+ default:
+ /* LRZ not enabled */
+ so->gras_lrz_cntl = 0;
+ break;
+ }
+
+ if (!(cso->stencil->enabled || cso->alpha.enabled || !cso->depth.writemask))
+ so->lrz_write = true;
+
so->rb_depth_cntl |=
A5XX_RB_DEPTH_CNTL_ZFUNC(cso->depth.func); /* maps 1:1 */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h
index 86bdd5feeb1..cacc6323807 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h
@@ -40,6 +40,8 @@ struct fd5_zsa_stateobj {
uint32_t rb_depth_cntl;
uint32_t rb_stencil_control;
uint32_t rb_stencilrefmask;
+ uint32_t gras_lrz_cntl;
+ bool lrz_write;
};
static inline struct fd5_zsa_stateobj *