summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/panfrost/pan_context.c30
-rw-r--r--src/gallium/drivers/panfrost/pan_job.h9
-rw-r--r--src/gallium/drivers/panfrost/pan_mfbd.c21
-rw-r--r--src/gallium/drivers/panfrost/pan_sfbd.c2
4 files changed, 42 insertions, 20 deletions
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index cb226cc2220..a038ea122f7 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -50,19 +50,6 @@ extern const char *pan_counters_base;
/* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */
//#define DRY_RUN
-/* TODO: Sample size, etc */
-
-static void
-panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled)
-{
- struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
-
- job->msaa |= enabled;
-
- SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled);
- SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled);
-}
-
/* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically
* indepdent between color buffers and depth/stencil). To enable, we allocate
* the AFBC metadata buffer and mark that it is enabled. We do -not- actually
@@ -789,15 +776,30 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx)
void
panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
{
+ struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
+
if (with_vertex_data) {
panfrost_emit_vertex_data(ctx);
}
+ bool msaa = ctx->rasterizer->base.multisample;
+
if (ctx->dirty & PAN_DIRTY_RASTERIZER) {
ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables;
- panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample);
+
+ /* TODO: Sample size */
+ SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa);
+ SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa);
}
+ /* Enable job requirements at draw-time */
+
+ if (msaa)
+ job->requirements |= PAN_REQ_MSAA;
+
+ if (ctx->depth_stencil->depth.writemask)
+ job->requirements |= PAN_REQ_DEPTH_WRITE;
+
if (ctx->occlusion_query) {
ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE;
ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu;
diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h
index 10503d944ac..30f1cf4bd5c 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -33,6 +33,9 @@ struct panfrost_job_key {
struct pipe_surface *zsbuf;
};
+#define PAN_REQ_MSAA (1 << 0)
+#define PAN_REQ_DEPTH_WRITE (1 << 1)
+
/* A panfrost_job corresponds to a bound FBO we're rendering to,
* collecting over multiple draws. */
@@ -48,8 +51,10 @@ struct panfrost_job {
float clear_depth;
unsigned clear_stencil;
- /* Whether this job uses MSAA */
- bool msaa;
+ /* Whether this job uses the corresponding requirement (PAN_REQ_*
+ * bitmask) */
+ unsigned requirements;
+
};
/* Functions for managing the above */
diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c
index b9c7cb221e7..68c842981f3 100644
--- a/src/gallium/drivers/panfrost/pan_mfbd.c
+++ b/src/gallium/drivers/panfrost/pan_mfbd.c
@@ -143,8 +143,6 @@ panfrost_mfbd_set_zsbuf(
fbx->ds_afbc.zero1 = 0x10009;
fbx->ds_afbc.padding = 0x1000;
-
- fb->unk3 |= MALI_MFBD_DEPTH_WRITE;
} else if (rsrc->bo->layout == PAN_LINEAR) {
fb->unk3 |= MALI_MFBD_EXTRA;
fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS | 0x1;
@@ -246,7 +244,21 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y)
rts[0].framebuffer_stride = 0;
}
- if (job->msaa) {
+ /* When scanning out, the depth buffer is immediately invalidated, so
+ * we don't need to waste bandwidth writing it out. This can improve
+ * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of
+ * memory bandwidth!).
+ *
+ * The exception is ReadPixels, but this is not supported on GLES so we
+ * can safely ignore it. */
+
+ if (panfrost_is_scanout(ctx)) {
+ job->requirements &= ~PAN_REQ_DEPTH_WRITE;
+ }
+
+ /* Actualize the requirements */
+
+ if (job->requirements & PAN_REQ_MSAA) {
rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA;
/* XXX */
@@ -254,6 +266,9 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y)
fb.rt_count_2 = 4;
}
+ if (job->requirements & PAN_REQ_DEPTH_WRITE)
+ fb.unk3 |= MALI_MFBD_DEPTH_WRITE;
+
if (ctx->pipe_framebuffer.nr_cbufs == 1) {
struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture;
diff --git a/src/gallium/drivers/panfrost/pan_sfbd.c b/src/gallium/drivers/panfrost/pan_sfbd.c
index 0e283bbb082..1c08f97fd1d 100644
--- a/src/gallium/drivers/panfrost/pan_sfbd.c
+++ b/src/gallium/drivers/panfrost/pan_sfbd.c
@@ -132,7 +132,7 @@ panfrost_sfbd_fragment(struct panfrost_context *ctx, bool flip_y)
/* TODO */
}
- if (job->msaa)
+ if (job->requirements & PAN_REQ_MSAA)
fb.format |= MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B;
return panfrost_upload_transient(ctx, &fb, sizeof(fb)) | MALI_SFBD;