summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2016-12-06 17:52:14 -0800
committerJason Ekstrand <[email protected]>2017-02-14 14:18:55 -0800
commite8d52dab482045ced3f5b9154aedbb8641fa961a (patch)
tree5451303c69dde2b1e89edb16c8b466bba87fe0ce
parent62bba4ba2dd491502eb3910a7a62e851bcc61884 (diff)
anv: Add support for the PMA fix on Broadwell
This helps Dota 2 on Broadwell by 8-9%. I also hacked up the driver and used the Sascha "shadowmapping" demo to get some results. Setting uses_kill to true dropped the framerate on the demo by 25-30%. Enabling the PMA fix brought it back up to around 90% of the original framerate. Reviewed-by: Lionel Landwerlin <[email protected]> Reviewed-by: Nanley Chery <[email protected]>
-rw-r--r--src/intel/vulkan/TODO1
-rw-r--r--src/intel/vulkan/anv_cmd_buffer.c2
-rw-r--r--src/intel/vulkan/anv_genX.h3
-rw-r--r--src/intel/vulkan/anv_private.h17
-rw-r--r--src/intel/vulkan/gen7_cmd_buffer.c7
-rw-r--r--src/intel/vulkan/gen8_cmd_buffer.c131
-rw-r--r--src/intel/vulkan/genX_blorp_exec.c5
-rw-r--r--src/intel/vulkan/genX_cmd_buffer.c15
-rw-r--r--src/intel/vulkan/genX_pipeline.c42
9 files changed, 221 insertions, 2 deletions
diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO
index 38acc0dd5b6..f8b73a11315 100644
--- a/src/intel/vulkan/TODO
+++ b/src/intel/vulkan/TODO
@@ -12,5 +12,4 @@ Performance:
- Compressed multisample support
- Pushing pieces of UBOs?
- Enable guardband clipping
- - pma stall workaround
- Use soft-pin to avoid relocations
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 5886fa63e69..8c08f8d48e6 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -135,6 +135,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
state->restart_index = UINT32_MAX;
state->dynamic = default_dynamic_state;
state->need_query_wa = true;
+ state->pma_fix_enabled = false;
+ state->hiz_enabled = false;
if (state->attachments != NULL) {
vk_free(&cmd_buffer->pool->alloc, state->attachments);
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index d04fe38a517..67147b0e92b 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -55,6 +55,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer);
+void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
+ bool enable);
+
void
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
const struct gen_l3_config *l3_config,
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 89ae8183799..449aa0f4ab6 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1169,6 +1169,20 @@ struct anv_cmd_state {
bool need_query_wa;
/**
+ * Whether or not the gen8 PMA fix is enabled. We ensure that, at the top
+ * of any command buffer it is disabled by disabling it in EndCommandBuffer
+ * and before invoking the secondary in ExecuteCommands.
+ */
+ bool pma_fix_enabled;
+
+ /**
+ * Whether or not we know for certain that HiZ is enabled for the current
+ * subpass. If, for whatever reason, we are unsure as to whether HiZ is
+ * enabled or not, this will be false.
+ */
+ bool hiz_enabled;
+
+ /**
* Array length is anv_cmd_state::pass::attachment_count. Array content is
* valid only when recording a render pass instance.
*/
@@ -1471,8 +1485,11 @@ struct anv_pipeline {
uint32_t cs_right_mask;
+ bool writes_depth;
+ bool depth_test_enable;
bool writes_stencil;
bool depth_clamp_enable;
+ bool kill_pixel;
struct {
uint32_t sf[7];
diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c
index 013ed8718a4..c1a25e8ce26 100644
--- a/src/intel/vulkan/gen7_cmd_buffer.c
+++ b/src/intel/vulkan/gen7_cmd_buffer.c
@@ -260,6 +260,13 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.dirty = 0;
}
+void
+genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
+ bool enable)
+{
+ /* The NP PMA fix doesn't exist on gen7 */
+}
+
void genX(CmdSetEvent)(
VkCommandBuffer commandBuffer,
VkEvent event,
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
index 8c8de622eb8..0628f3a0dd1 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -155,6 +155,133 @@ __emit_sf_state(struct anv_cmd_buffer *cmd_buffer)
#endif
void
+genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
+{
+#if GEN_GEN == 8
+ if (cmd_buffer->state.pma_fix_enabled == enable)
+ return;
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.DepthCacheFlushEnable = true;
+ pc.CommandStreamerStallEnable = true;
+ pc.RenderTargetCacheFlushEnable = true;
+ }
+
+ uint32_t cache_mode;
+ anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
+ .NPPMAFixEnable = enable,
+ .NPEarlyZFailsDisable = enable,
+ .NPPMAFixEnableMask = true,
+ .NPEarlyZFailsDisableMask = true);
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+ lri.RegisterOffset = GENX(CACHE_MODE_1_num);
+ lri.DataDWord = cache_mode;
+ }
+
+ /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
+ * Flush bits is often necessary. We do it regardless because it's easier.
+ * The render cache flush is also necessary if stencil writes are enabled.
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.DepthStallEnable = true;
+ pc.DepthCacheFlushEnable = true;
+ pc.RenderTargetCacheFlushEnable = true;
+ }
+
+ cmd_buffer->state.pma_fix_enabled = enable;
+#endif /* GEN_GEN == 8 */
+}
+
+static inline bool
+want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer)
+{
+ assert(GEN_GEN == 8);
+
+ /* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE:
+ *
+ * SW must set this bit in order to enable this fix when following
+ * expression is TRUE.
+ *
+ * 3DSTATE_WM::ForceThreadDispatch != 1 &&
+ * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
+ * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
+ * (3DSTATE_DEPTH_BUFFER::HIZ Enable) &&
+ * !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) &&
+ * (3DSTATE_PS_EXTRA::PixelShaderValid) &&
+ * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
+ * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
+ * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
+ * 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
+ * (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) &&
+ * (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
+ * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
+ * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
+ * 3DSTATE_PS_BLEND::AlphaTestEnable ||
+ * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
+ * 3DSTATE_WM::ForceKillPix != ForceOff &&
+ * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
+ * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
+ * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
+ * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
+ * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
+ * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
+ */
+
+ /* These are always true:
+ * 3DSTATE_WM::ForceThreadDispatch != 1 &&
+ * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
+ */
+
+ /* We only enable the PMA fix if we know for certain that HiZ is enabled.
+ * If we don't know whether HiZ is enabled or not, we disable the PMA fix
+ * and there is no harm.
+ *
+ * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
+ * 3DSTATE_DEPTH_BUFFER::HIZ Enable
+ */
+ if (!cmd_buffer->state.hiz_enabled)
+ return false;
+
+ /* 3DSTATE_PS_EXTRA::PixelShaderValid */
+ struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
+ return false;
+
+ /* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */
+ const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+ if (wm_prog_data->early_fragment_tests)
+ return false;
+
+ /* We never use anv_pipeline for HiZ ops so this is trivially true:
+ * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
+ * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
+ * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
+ * 3DSTATE_WM_HZ_OP::StencilBufferClear)
+ */
+
+ /* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */
+ if (!pipeline->depth_test_enable)
+ return false;
+
+ /* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
+ * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
+ * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
+ * 3DSTATE_PS_BLEND::AlphaTestEnable ||
+ * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
+ * 3DSTATE_WM::ForceKillPix != ForceOff &&
+ * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
+ * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
+ * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
+ * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
+ * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
+ * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
+ */
+ return (pipeline->kill_pixel && (pipeline->writes_depth ||
+ pipeline->writes_stencil)) ||
+ wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
+}
+
+void
genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
@@ -211,6 +338,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
}
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
+ ANV_CMD_DIRTY_RENDER_TARGETS |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
@@ -234,6 +362,9 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw,
pipeline->gen8.wm_depth_stencil);
+
+ genX(cmd_buffer_enable_pma_fix)(cmd_buffer,
+ want_depth_pma_fix(cmd_buffer));
}
#else
if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) {
diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c
index 663e6c92819..6f0b0638973 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -154,6 +154,11 @@ genX(blorp_exec)(struct blorp_batch *batch,
genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer);
+ /* BLORP doesn't do anything fancy with depth such as discards, so we want
+ * the PMA fix off. Also, off is always the safe option.
+ */
+ genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false);
+
blorp_exec(batch, params);
cmd_buffer->state.vb_dirty = ~0;
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 14338b22ece..40a72f4d141 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -637,6 +637,11 @@ genX(EndCommandBuffer)(
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ /* We want every command buffer to start with the PMA fix in a known state,
+ * so we disable it at the end of the command buffer.
+ */
+ genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false);
+
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
anv_cmd_buffer_end_batch_buffer(cmd_buffer);
@@ -654,6 +659,11 @@ genX(CmdExecuteCommands)(
assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+ /* The secondary command buffers will assume that the PMA fix is disabled
+ * when they begin executing. Make sure this is true.
+ */
+ genX(cmd_buffer_enable_pma_fix)(primary, false);
+
for (uint32_t i = 0; i < commandBufferCount; i++) {
ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
@@ -2227,7 +2237,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
const bool has_stencil =
image && (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
- /* FIXME: Implement the PMA stall W/A */
+ cmd_buffer->state.hiz_enabled = has_hiz;
+
/* FIXME: Width and Height are wrong */
genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer);
@@ -2465,6 +2476,8 @@ void genX(CmdEndRenderPass)(
anv_cmd_buffer_resolve_subpass(cmd_buffer);
+ cmd_buffer->state.hiz_enabled = false;
+
#ifndef NDEBUG
anv_dump_add_framebuffer(cmd_buffer, cmd_buffer->state.framebuffer);
#endif
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index f641661db1b..d0dbe13f393 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -455,6 +455,10 @@ emit_rs_state(struct anv_pipeline *pipeline,
*/
#if GEN_GEN >= 8
raster.DXMultisampleRasterizationEnable = true;
+ /* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix
+ * computations. If we ever set this bit to a different value, they will
+ * need to be updated accordingly.
+ */
raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
raster.ForceMultisampling = false;
#else
@@ -664,6 +668,8 @@ emit_ds_state(struct anv_pipeline *pipeline,
* to make sure it's initialized to something useful.
*/
pipeline->writes_stencil = false;
+ pipeline->writes_depth = false;
+ pipeline->depth_test_enable = false;
memset(depth_stencil_dw, 0, sizeof(depth_stencil_dw));
return;
}
@@ -722,6 +728,9 @@ emit_ds_state(struct anv_pipeline *pipeline,
if (info->depthTestEnable && info->depthCompareOp == VK_COMPARE_OP_EQUAL)
depth_stencil.DepthBufferWriteEnable = false;
+ pipeline->writes_depth = depth_stencil.DepthBufferWriteEnable;
+ pipeline->depth_test_enable = depth_stencil.DepthTestEnable;
+
#if GEN_GEN <= 7
GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);
#else
@@ -1443,6 +1452,38 @@ emit_3dstate_vf_topology(struct anv_pipeline *pipeline)
}
#endif
+static void
+compute_kill_pixel(struct anv_pipeline *pipeline,
+ const VkPipelineMultisampleStateCreateInfo *ms_info,
+ const struct anv_subpass *subpass)
+{
+ if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
+ pipeline->kill_pixel = false;
+ return;
+ }
+
+ const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+
+ /* This computes the KillPixel portion of the computation for whether or
+ * not we want to enable the PMA fix on gen8. It's given by this chunk of
+ * the giant formula:
+ *
+ * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
+ * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
+ * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
+ * 3DSTATE_PS_BLEND::AlphaTestEnable ||
+ * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
+ *
+ * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and so is
+ * 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept
+ * of an alpha test.
+ */
+ pipeline->kill_pixel =
+ subpass->has_ds_self_dep || wm_prog_data->uses_kill ||
+ wm_prog_data->uses_omask ||
+ (ms_info && ms_info->alphaToCoverageEnable);
+}
+
static VkResult
genX(graphics_pipeline_create)(
VkDevice _device,
@@ -1480,6 +1521,7 @@ genX(graphics_pipeline_create)(
emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass);
emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
pCreateInfo->pMultisampleState);
+ compute_kill_pixel(pipeline, pCreateInfo->pMultisampleState, subpass);
emit_urb_setup(pipeline);