summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <[email protected]>2019-03-12 23:16:37 +0000
committerAlyssa Rosenzweig <[email protected]>2019-03-14 22:47:11 +0000
commit8c26890ac251526bf74777faf62b0c8b84b0c19f (patch)
treecac92ba21af4c7fc70c6e106de643ec065901549
parent9bf6024c6bccae34fb992db6ede6485c045cafde (diff)
panfrost/mfbd: Respect per-job depth write flag
While a depth buffer may be supplied, it only needs to be written to if the depth writemask is set for any draw AND if the depth buffer is not immediately invalidated (as is the case for scanout). This refactors panfrost_job to provide a depth write requirement, which is now implemented for MFBD depth buffers. Signed-off-by: Alyssa Rosenzweig <[email protected]>
-rw-r--r--src/gallium/drivers/panfrost/pan_context.c30
-rw-r--r--src/gallium/drivers/panfrost/pan_job.h9
-rw-r--r--src/gallium/drivers/panfrost/pan_mfbd.c21
-rw-r--r--src/gallium/drivers/panfrost/pan_sfbd.c2
4 files changed, 42 insertions, 20 deletions
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index cb226cc2220..a038ea122f7 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -50,19 +50,6 @@ extern const char *pan_counters_base;
/* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */
//#define DRY_RUN
-/* TODO: Sample size, etc */
-
-static void
-panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled)
-{
- struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
-
- job->msaa |= enabled;
-
- SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled);
- SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled);
-}
-
/* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically
* indepdent between color buffers and depth/stencil). To enable, we allocate
* the AFBC metadata buffer and mark that it is enabled. We do -not- actually
@@ -789,15 +776,30 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx)
void
panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
{
+ struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
+
if (with_vertex_data) {
panfrost_emit_vertex_data(ctx);
}
+ bool msaa = ctx->rasterizer->base.multisample;
+
if (ctx->dirty & PAN_DIRTY_RASTERIZER) {
ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables;
- panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample);
+
+ /* TODO: Sample size */
+ SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa);
+ SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa);
}
+ /* Enable job requirements at draw-time */
+
+ if (msaa)
+ job->requirements |= PAN_REQ_MSAA;
+
+ if (ctx->depth_stencil->depth.writemask)
+ job->requirements |= PAN_REQ_DEPTH_WRITE;
+
if (ctx->occlusion_query) {
ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE;
ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu;
diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h
index 10503d944ac..30f1cf4bd5c 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -33,6 +33,9 @@ struct panfrost_job_key {
struct pipe_surface *zsbuf;
};
+#define PAN_REQ_MSAA (1 << 0)
+#define PAN_REQ_DEPTH_WRITE (1 << 1)
+
/* A panfrost_job corresponds to a bound FBO we're rendering to,
* collecting over multiple draws. */
@@ -48,8 +51,10 @@ struct panfrost_job {
float clear_depth;
unsigned clear_stencil;
- /* Whether this job uses MSAA */
- bool msaa;
+ /* Whether this job uses the corresponding requirement (PAN_REQ_*
+ * bitmask) */
+ unsigned requirements;
+
};
/* Functions for managing the above */
diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c
index b9c7cb221e7..68c842981f3 100644
--- a/src/gallium/drivers/panfrost/pan_mfbd.c
+++ b/src/gallium/drivers/panfrost/pan_mfbd.c
@@ -143,8 +143,6 @@ panfrost_mfbd_set_zsbuf(
fbx->ds_afbc.zero1 = 0x10009;
fbx->ds_afbc.padding = 0x1000;
-
- fb->unk3 |= MALI_MFBD_DEPTH_WRITE;
} else if (rsrc->bo->layout == PAN_LINEAR) {
fb->unk3 |= MALI_MFBD_EXTRA;
fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS | 0x1;
@@ -246,7 +244,21 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y)
rts[0].framebuffer_stride = 0;
}
- if (job->msaa) {
+ /* When scanning out, the depth buffer is immediately invalidated, so
+ * we don't need to waste bandwidth writing it out. This can improve
+ * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of
+ * memory bandwidth!).
+ *
+ * The exception is ReadPixels, but this is not supported on GLES so we
+ * can safely ignore it. */
+
+ if (panfrost_is_scanout(ctx)) {
+ job->requirements &= ~PAN_REQ_DEPTH_WRITE;
+ }
+
+ /* Actualize the requirements */
+
+ if (job->requirements & PAN_REQ_MSAA) {
rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA;
/* XXX */
@@ -254,6 +266,9 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y)
fb.rt_count_2 = 4;
}
+ if (job->requirements & PAN_REQ_DEPTH_WRITE)
+ fb.unk3 |= MALI_MFBD_DEPTH_WRITE;
+
if (ctx->pipe_framebuffer.nr_cbufs == 1) {
struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture;
diff --git a/src/gallium/drivers/panfrost/pan_sfbd.c b/src/gallium/drivers/panfrost/pan_sfbd.c
index 0e283bbb082..1c08f97fd1d 100644
--- a/src/gallium/drivers/panfrost/pan_sfbd.c
+++ b/src/gallium/drivers/panfrost/pan_sfbd.c
@@ -132,7 +132,7 @@ panfrost_sfbd_fragment(struct panfrost_context *ctx, bool flip_y)
/* TODO */
}
- if (job->msaa)
+ if (job->requirements & PAN_REQ_MSAA)
fb.format |= MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B;
return panfrost_upload_transient(ctx, &fb, sizeof(fb)) | MALI_SFBD;