diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/panfrost/pan_context.c | 30 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_job.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_mfbd.c | 21 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_sfbd.c | 2 |
4 files changed, 42 insertions, 20 deletions
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index cb226cc2220..a038ea122f7 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -50,19 +50,6 @@ extern const char *pan_counters_base; /* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */ //#define DRY_RUN -/* TODO: Sample size, etc */ - -static void -panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled) -{ - struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); - - job->msaa |= enabled; - - SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled); - SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled); -} - /* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically * indepdent between color buffers and depth/stencil). To enable, we allocate * the AFBC metadata buffer and mark that it is enabled. We do -not- actually @@ -789,15 +776,30 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx) void panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) { + struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); + if (with_vertex_data) { panfrost_emit_vertex_data(ctx); } + bool msaa = ctx->rasterizer->base.multisample; + if (ctx->dirty & PAN_DIRTY_RASTERIZER) { ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables; - panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample); + + /* TODO: Sample size */ + SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa); + SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa); } + /* Enable job requirements at draw-time */ + + if (msaa) + job->requirements |= PAN_REQ_MSAA; + + if (ctx->depth_stencil->depth.writemask) + job->requirements |= PAN_REQ_DEPTH_WRITE; + if (ctx->occlusion_query) { ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE; ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu; diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h index 10503d944ac..30f1cf4bd5c 100644 --- a/src/gallium/drivers/panfrost/pan_job.h +++ b/src/gallium/drivers/panfrost/pan_job.h @@ -33,6 +33,9 @@ struct panfrost_job_key { struct pipe_surface *zsbuf; }; +#define PAN_REQ_MSAA (1 << 0) +#define PAN_REQ_DEPTH_WRITE (1 << 1) + /* A panfrost_job corresponds to a bound FBO we're rendering to, * collecting over multiple draws. */ @@ -48,8 +51,10 @@ struct panfrost_job { float clear_depth; unsigned clear_stencil; - /* Whether this job uses MSAA */ - bool msaa; + /* Whether this job uses the corresponding requirement (PAN_REQ_* + * bitmask) */ + unsigned requirements; + }; /* Functions for managing the above */ diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c index b9c7cb221e7..68c842981f3 100644 --- a/src/gallium/drivers/panfrost/pan_mfbd.c +++ b/src/gallium/drivers/panfrost/pan_mfbd.c @@ -143,8 +143,6 @@ panfrost_mfbd_set_zsbuf( fbx->ds_afbc.zero1 = 0x10009; fbx->ds_afbc.padding = 0x1000; - - fb->unk3 |= MALI_MFBD_DEPTH_WRITE; } else if (rsrc->bo->layout == PAN_LINEAR) { fb->unk3 |= MALI_MFBD_EXTRA; fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS | 0x1; @@ -246,7 +244,21 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y) rts[0].framebuffer_stride = 0; } - if (job->msaa) { + /* When scanning out, the depth buffer is immediately invalidated, so + * we don't need to waste bandwidth writing it out. This can improve + * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of + * memory bandwidth!). + * + * The exception is ReadPixels, but this is not supported on GLES so we + * can safely ignore it. */ + + if (panfrost_is_scanout(ctx)) { + job->requirements &= ~PAN_REQ_DEPTH_WRITE; + } + + /* Actualize the requirements */ + + if (job->requirements & PAN_REQ_MSAA) { rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA; /* XXX */ @@ -254,6 +266,9 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y) fb.rt_count_2 = 4; } + if (job->requirements & PAN_REQ_DEPTH_WRITE) + fb.unk3 |= MALI_MFBD_DEPTH_WRITE; + if (ctx->pipe_framebuffer.nr_cbufs == 1) { struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture; diff --git a/src/gallium/drivers/panfrost/pan_sfbd.c b/src/gallium/drivers/panfrost/pan_sfbd.c index 0e283bbb082..1c08f97fd1d 100644 --- a/src/gallium/drivers/panfrost/pan_sfbd.c +++ b/src/gallium/drivers/panfrost/pan_sfbd.c @@ -132,7 +132,7 @@ panfrost_sfbd_fragment(struct panfrost_context *ctx, bool flip_y) /* TODO */ } - if (job->msaa) + if (job->requirements & PAN_REQ_MSAA) fb.format |= MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B; return panfrost_upload_transient(ctx, &fb, sizeof(fb)) | MALI_SFBD; |