diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/iris/iris_blorp.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_context.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_genx_protos.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_program.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_state.c | 198 |
5 files changed, 209 insertions, 2 deletions
diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c index fa46dafcda2..d6b99c59ca7 100644 --- a/src/gallium/drivers/iris/iris_blorp.c +++ b/src/gallium/drivers/iris/iris_blorp.c @@ -307,6 +307,10 @@ iris_blorp_exec(struct blorp_batch *blorp_batch, iris_require_command_space(batch, 1400); +#if GEN_GEN == 8 + genX(update_pma_fix)(ice, batch, false); +#endif + const unsigned scale = params->fast_clear_op ? UINT_MAX : 1; if (ice->state.current_hash_scale != scale) { genX(emit_hashing_mode)(ice, batch, params->x1 - params->x0, diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 85ffd1fece4..74a66f4a5cf 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -134,6 +134,7 @@ enum { #define IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES (1ull << 55) #define IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES (1ull << 56) #define IRIS_DIRTY_VF_STATISTICS (1ull << 57) +#define IRIS_DIRTY_PMA_FIX (1ull << 58) #define IRIS_ALL_DIRTY_FOR_COMPUTE (IRIS_DIRTY_CS | \ IRIS_DIRTY_SAMPLER_STATES_CS | \ diff --git a/src/gallium/drivers/iris/iris_genx_protos.h b/src/gallium/drivers/iris/iris_genx_protos.h index 16da78d7e9f..84d4b4b324c 100644 --- a/src/gallium/drivers/iris/iris_genx_protos.h +++ b/src/gallium/drivers/iris/iris_genx_protos.h @@ -37,6 +37,9 @@ void genX(emit_hashing_mode)(struct iris_context *ice, struct iris_batch *batch, unsigned width, unsigned height, unsigned scale); +void genX(update_pma_fix)(struct iris_context *ice, + struct iris_batch *batch, + bool enable); /* iris_blorp.c */ void genX(init_blorp)(struct iris_context *ice); diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index ae701ec984d..886cdff56b6 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -2387,6 +2387,8 @@ static void iris_bind_fs_state(struct pipe_context *ctx, void *state) { struct iris_context *ice = (struct iris_context *) ctx; + struct iris_screen *screen = (struct iris_screen *) ctx->screen; + const struct gen_device_info *devinfo = &screen->devinfo; struct iris_uncompiled_shader *old_ish = ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; struct iris_uncompiled_shader *new_ish = state; @@ -2401,6 +2403,9 @@ iris_bind_fs_state(struct pipe_context *ctx, void *state) (new_ish->nir->info.outputs_written & color_bits)) ice->state.dirty |= IRIS_DIRTY_PS_BLEND; + if (devinfo->gen == 8) + ice->state.dirty |= IRIS_DIRTY_PMA_FIX; + bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT); } diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 6f0aa182339..ac6a5dd5fd1 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -1055,6 +1055,10 @@ struct iris_genx_state { uint32_t so_buffers[4 * GENX(3DSTATE_SO_BUFFER_length)]; +#if GEN_GEN == 8 + bool pma_fix_enabled; +#endif + #if GEN_GEN == 9 /* Is object level preemption enabled? */ bool object_preemption; @@ -1242,6 +1246,9 @@ iris_bind_blend_state(struct pipe_context *ctx, void *state) ice->state.dirty |= IRIS_DIRTY_BLEND_STATE; ice->state.dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_BLEND]; + + if (GEN_GEN == 8) + ice->state.dirty |= IRIS_DIRTY_PMA_FIX; } /** @@ -1276,6 +1283,9 @@ struct iris_depth_stencil_alpha_state { /** Outbound to resolve and cache set tracking. */ bool depth_writes_enabled; bool stencil_writes_enabled; + + /** Outbound to Gen8-9 PMA stall equations */ + bool depth_test_enabled; }; /** @@ -1295,6 +1305,7 @@ iris_create_zsa_state(struct pipe_context *ctx, cso->alpha = state->alpha; cso->depth_writes_enabled = state->depth.writemask; + cso->depth_test_enabled = state->depth.enabled; cso->stencil_writes_enabled = state->stencil[0].writemask != 0 || (two_sided_stencil && state->stencil[1].writemask != 0); @@ -1364,6 +1375,181 @@ iris_bind_zsa_state(struct pipe_context *ctx, void *state) ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT; ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL; ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_DEPTH_STENCIL_ALPHA]; + + if (GEN_GEN == 8) + ice->state.dirty |= IRIS_DIRTY_PMA_FIX; +} + +#if GEN_GEN == 8 +static bool +want_pma_fix(struct iris_context *ice) +{ + UNUSED struct iris_screen *screen = (void *) ice->ctx.screen; + UNUSED const struct gen_device_info *devinfo = &screen->devinfo; + const struct brw_wm_prog_data *wm_prog_data = (void *) + ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; + const struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; + const struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa; + const struct iris_blend_state *cso_blend = ice->state.cso_blend; + + /* In very specific combinations of state, we can instruct Gen8-9 hardware + * to avoid stalling at the pixel mask array. The state equations are + * documented in these places: + * + * - Gen8 Depth PMA Fix: CACHE_MODE_1::NP_PMA_FIX_ENABLE + * - Gen9 Stencil PMA Fix: CACHE_MODE_0::STC PMA Optimization Enable + * + * Both equations share some common elements: + * + * no_hiz_op = + * !(3DSTATE_WM_HZ_OP::DepthBufferClear || + * 3DSTATE_WM_HZ_OP::DepthBufferResolve || + * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || + * 3DSTATE_WM_HZ_OP::StencilBufferClear) && + * + * killpixels = + * 3DSTATE_WM::ForceKillPix != ForceOff && + * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || + * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || + * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || + * 3DSTATE_PS_BLEND::AlphaTestEnable || + * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) + * + * (Technically the stencil PMA treats ForceKillPix differently, + * but I think this is a documentation oversight, and we don't + * ever use it in this way, so it doesn't matter). + * + * common_pma_fix = + * 3DSTATE_WM::ForceThreadDispatch != 1 && + * 3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0 && + * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL && + * 3DSTATE_DEPTH_BUFFER::HIZ Enable && + * 3DSTATE_WM::EDSC_Mode != EDSC_PREPS && + * 3DSTATE_PS_EXTRA::PixelShaderValid && + * no_hiz_op + * + * These are always true: + * + * 3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0 + * 3DSTATE_PS_EXTRA::PixelShaderValid + * + * Also, we never use the normal drawing path for HiZ ops; these are true: + * + * !(3DSTATE_WM_HZ_OP::DepthBufferClear || + * 3DSTATE_WM_HZ_OP::DepthBufferResolve || + * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || + * 3DSTATE_WM_HZ_OP::StencilBufferClear) + * + * This happens sometimes: + * + * 3DSTATE_WM::ForceThreadDispatch != 1 + * + * However, we choose to ignore it as it either agrees with the signal + * (dispatch was already enabled, so nothing out of the ordinary), or + * there are no framebuffer attachments (so no depth or HiZ anyway, + * meaning the PMA signal will already be disabled). + */ + + if (!cso_fb->zsbuf) + return false; + + struct iris_resource *zres, *sres; + iris_get_depth_stencil_resources(cso_fb->zsbuf->texture, &zres, &sres); + + /* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL && + * 3DSTATE_DEPTH_BUFFER::HIZ Enable && + */ + if (!zres || !iris_resource_level_has_hiz(zres, cso_fb->zsbuf->u.tex.level)) + return false; + + /* 3DSTATE_WM::EDSC_Mode != EDSC_PREPS */ + if (wm_prog_data->early_fragment_tests) + return false; + + /* 3DSTATE_WM::ForceKillPix != ForceOff && + * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || + * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || + * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || + * 3DSTATE_PS_BLEND::AlphaTestEnable || + * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) + */ + bool killpixels = wm_prog_data->uses_kill || wm_prog_data->uses_omask || + cso_blend->alpha_to_coverage || cso_zsa->alpha.enabled; + + /* The Gen8 depth PMA equation becomes: + * + * depth_writes = + * 3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable && + * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE + * + * stencil_writes = + * 3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && + * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE && + * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE + * + * Z_PMA_OPT = + * common_pma_fix && + * 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable && + * ((killpixels && (depth_writes || stencil_writes)) || + * 3DSTATE_PS_EXTRA::PixelShaderComputedDepthMode != PSCDEPTH_OFF) + * + */ + if (!cso_zsa->depth_test_enabled) + return false; + + return wm_prog_data->computed_depth_mode != PSCDEPTH_OFF || + (killpixels && (cso_zsa->depth_writes_enabled || + (sres && cso_zsa->stencil_writes_enabled))); +} +#endif + +void +genX(update_pma_fix)(struct iris_context *ice, + struct iris_batch *batch, + bool enable) +{ +#if GEN_GEN == 8 + struct iris_genx_state *genx = ice->state.genx; + + if (genx->pma_fix_enabled == enable) + return; + + genx->pma_fix_enabled = enable; + + /* According to the Broadwell PIPE_CONTROL documentation, software should + * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set + * prior to the LRI. If stencil buffer writes are enabled, then a Render * Cache Flush is also necessary. + * + * The Gen9 docs say to use a depth stall rather than a command streamer + * stall. However, the hardware seems to violently disagree. A full + * command streamer stall seems to be needed in both cases. + */ + iris_emit_pipe_control_flush(batch, "PMA fix change (1/2)", + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_FLUSH); + + uint32_t reg_val; + iris_pack_state(GENX(CACHE_MODE_1), ®_val, reg) { + reg.NPPMAFixEnable = enable; + reg.NPEarlyZFailsDisable = enable; + reg.NPPMAFixEnableMask = true; + reg.NPEarlyZFailsDisableMask = true; + } + iris_emit_lri(batch, CACHE_MODE_1, reg_val); + + /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache + * Flush bits is often necessary. We do it regardless because it's easier. + * The render cache flush is also necessary if stencil writes are enabled. + * + * Again, the Gen9 docs give a different set of flushes but the Broadwell + * flushes seem to work just as well. + */ + iris_emit_pipe_control_flush(batch, "PMA fix change (1/2)", + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_FLUSH); +#endif } /** @@ -2816,6 +3002,9 @@ iris_set_framebuffer_state(struct pipe_context *ctx, ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_FRAMEBUFFER]; + if (GEN_GEN == 8) + ice->state.dirty |= IRIS_DIRTY_PMA_FIX; + #if GEN_GEN == 11 // XXX: we may want to flag IRIS_DIRTY_MULTISAMPLE (or SAMPLE_MASK?) // XXX: see commit 979fc1bc9bcc64027ff2cfafd285676f31b930a6 @@ -5642,10 +5831,15 @@ iris_upload_dirty_render_state(struct iris_context *ice, } } +#if GEN_GEN == 8 + if (dirty & IRIS_DIRTY_PMA_FIX) { + bool enable = want_pma_fix(ice); + genX(update_pma_fix)(ice, batch, enable); + } +#endif + if (ice->state.current_hash_scale != 1) genX(emit_hashing_mode)(ice, batch, UINT_MAX, UINT_MAX, 1); - - /* TODO: Gen8 PMA fix */ } static void |