diff options
author | Alyssa Rosenzweig <[email protected]> | 2019-03-12 23:16:37 +0000 |
---|---|---|
committer | Alyssa Rosenzweig <[email protected]> | 2019-03-14 22:47:11 +0000 |
commit | 8c26890ac251526bf74777faf62b0c8b84b0c19f (patch) | |
tree | cac92ba21af4c7fc70c6e106de643ec065901549 /src/gallium | |
parent | 9bf6024c6bccae34fb992db6ede6485c045cafde (diff) |
panfrost/mfbd: Respect per-job depth write flag
While a depth buffer may be supplied, it only needs to be written to if
the depth writemask is set for any draw AND if the depth buffer is not
immediately invalidated (as is the case for scanout). This refactors
panfrost_job to provide a depth write requirement, which is now
implemented for MFBD depth buffers.
Signed-off-by: Alyssa Rosenzweig <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/panfrost/pan_context.c | 30 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_job.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_mfbd.c | 21 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_sfbd.c | 2 |
4 files changed, 42 insertions, 20 deletions
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index cb226cc2220..a038ea122f7 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -50,19 +50,6 @@ extern const char *pan_counters_base; /* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */ //#define DRY_RUN -/* TODO: Sample size, etc */ - -static void -panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled) -{ - struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); - - job->msaa |= enabled; - - SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled); - SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled); -} - /* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically * indepdent between color buffers and depth/stencil). To enable, we allocate * the AFBC metadata buffer and mark that it is enabled. We do -not- actually @@ -789,15 +776,30 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx) void panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) { + struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); + if (with_vertex_data) { panfrost_emit_vertex_data(ctx); } + bool msaa = ctx->rasterizer->base.multisample; + if (ctx->dirty & PAN_DIRTY_RASTERIZER) { ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables; - panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample); + + /* TODO: Sample size */ + SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa); + SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa); } + /* Enable job requirements at draw-time */ + + if (msaa) + job->requirements |= PAN_REQ_MSAA; + + if (ctx->depth_stencil->depth.writemask) + job->requirements |= PAN_REQ_DEPTH_WRITE; + if (ctx->occlusion_query) { ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE; ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu; diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h index 10503d944ac..30f1cf4bd5c 100644 --- a/src/gallium/drivers/panfrost/pan_job.h +++ b/src/gallium/drivers/panfrost/pan_job.h @@ -33,6 +33,9 @@ struct panfrost_job_key { struct pipe_surface *zsbuf; }; +#define PAN_REQ_MSAA (1 << 0) +#define PAN_REQ_DEPTH_WRITE (1 << 1) + /* A panfrost_job corresponds to a bound FBO we're rendering to, * collecting over multiple draws. */ @@ -48,8 +51,10 @@ struct panfrost_job { float clear_depth; unsigned clear_stencil; - /* Whether this job uses MSAA */ - bool msaa; + /* Whether this job uses the corresponding requirement (PAN_REQ_* + * bitmask) */ + unsigned requirements; + }; /* Functions for managing the above */ diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c index b9c7cb221e7..68c842981f3 100644 --- a/src/gallium/drivers/panfrost/pan_mfbd.c +++ b/src/gallium/drivers/panfrost/pan_mfbd.c @@ -143,8 +143,6 @@ panfrost_mfbd_set_zsbuf( fbx->ds_afbc.zero1 = 0x10009; fbx->ds_afbc.padding = 0x1000; - - fb->unk3 |= MALI_MFBD_DEPTH_WRITE; } else if (rsrc->bo->layout == PAN_LINEAR) { fb->unk3 |= MALI_MFBD_EXTRA; fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS | 0x1; @@ -246,7 +244,21 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y) rts[0].framebuffer_stride = 0; } - if (job->msaa) { + /* When scanning out, the depth buffer is immediately invalidated, so + * we don't need to waste bandwidth writing it out. This can improve + * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of + * memory bandwidth!). + * + * The exception is ReadPixels, but this is not supported on GLES so we + * can safely ignore it. */ + + if (panfrost_is_scanout(ctx)) { + job->requirements &= ~PAN_REQ_DEPTH_WRITE; + } + + /* Actualize the requirements */ + + if (job->requirements & PAN_REQ_MSAA) { rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA; /* XXX */ @@ -254,6 +266,9 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y) fb.rt_count_2 = 4; } + if (job->requirements & PAN_REQ_DEPTH_WRITE) + fb.unk3 |= MALI_MFBD_DEPTH_WRITE; + if (ctx->pipe_framebuffer.nr_cbufs == 1) { struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture; diff --git a/src/gallium/drivers/panfrost/pan_sfbd.c b/src/gallium/drivers/panfrost/pan_sfbd.c index 0e283bbb082..1c08f97fd1d 100644 --- a/src/gallium/drivers/panfrost/pan_sfbd.c +++ b/src/gallium/drivers/panfrost/pan_sfbd.c @@ -132,7 +132,7 @@ panfrost_sfbd_fragment(struct panfrost_context *ctx, bool flip_y) /* TODO */ } - if (job->msaa) + if (job->requirements & PAN_REQ_MSAA) fb.format |= MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B; return panfrost_upload_transient(ctx, &fb, sizeof(fb)) | MALI_SFBD; |