summaryrefslogtreecommitdiffstats
path: root/src/intel
diff options
context:
space:
mode:
authorJordan Justen <[email protected]>2016-03-28 12:08:49 -0700
committerJordan Justen <[email protected]>2016-03-28 17:01:35 -0700
commit5879cb0251e7b4593eb4fd01684bd68f0945e3d1 (patch)
tree32e93c1721f919b189d14d98b971d57a6e51e0bf /src/intel
parent433cf90650f12039e16c2c245fd2e1cbf1ed3ac2 (diff)
anv: Fix cache pollution race during L3 partitioning set-up.
Port 0aa4f99f562a05880a779707cbcd46be459863bf to anv. Signed-off-by: Jordan Justen <[email protected]>
Diffstat (limited to 'src/intel')
-rw-r--r--src/intel/vulkan/gen7_cmd_buffer.c36
-rw-r--r--src/intel/vulkan/gen8_cmd_buffer.c36
2 files changed, 52 insertions, 20 deletions
diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c
index 04c1d3b3477..06b3a75cbef 100644
--- a/src/intel/vulkan/gen7_cmd_buffer.c
+++ b/src/intel/vulkan/gen7_cmd_buffer.c
@@ -323,22 +323,38 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm)
bool changed = cmd_buffer->state.current_l3_config != l3cr2_val;
if (changed) {
- /* According to the hardware docs, the L3 partitioning can only be changed
- * while the pipeline is completely drained and the caches are flushed,
- * which involves a first PIPE_CONTROL flush which stalls the pipeline and
- * initiates invalidation of the relevant caches...
+ /* According to the hardware docs, the L3 partitioning can only be
+ * changed while the pipeline is completely drained and the caches are
+ * flushed, which involves a first PIPE_CONTROL flush which stalls the
+ * pipeline...
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
- .TextureCacheInvalidationEnable = true,
- .ConstantCacheInvalidationEnable = true,
- .InstructionCacheInvalidateEnable = true,
.DCFlushEnable = true,
.PostSyncOperation = NoWrite,
.CommandStreamerStallEnable = true);
- /* ...followed by a second stalling flush which guarantees that
- * invalidation is complete when the L3 configuration registers are
- * modified.
+ /* ...followed by a second pipelined PIPE_CONTROL that initiates
+ * invalidation of the relevant caches. Note that because RO
+ * invalidation happens at the top of the pipeline (i.e. right away as
+ * the PIPE_CONTROL command is processed by the CS) we cannot combine it
+ * with the previous stalling flush as the hardware documentation
+ * suggests, because that would cause the CS to stall on previous
+ * rendering *after* RO invalidation and wouldn't prevent the RO caches
+ * from being polluted by concurrent rendering before the stall
+ * completes. This intentionally doesn't implement the SKL+ hardware
+ * workaround suggesting to enable CS stall on PIPE_CONTROLs with the
+ * texture cache invalidation bit set for GPGPU workloads because the
+ * previous and subsequent PIPE_CONTROLs already guarantee that there is
+ * no concurrent GPGPU kernel execution (see SKL HSD 2132585).
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
+ .TextureCacheInvalidationEnable = true,
+ .ConstantCacheInvalidationEnable = true,
+ .InstructionCacheInvalidateEnable = true,
+ .PostSyncOperation = NoWrite);
+
+ /* Now send a third stalling flush to make sure that invalidation is
+ * complete when the L3 configuration registers are modified.
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
.DCFlushEnable = true,
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
index 3fb5c276107..dab1d7411e7 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -134,22 +134,38 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm)
bool changed = cmd_buffer->state.current_l3_config != l3cr_val;
if (changed) {
- /* According to the hardware docs, the L3 partitioning can only be changed
- * while the pipeline is completely drained and the caches are flushed,
- * which involves a first PIPE_CONTROL flush which stalls the pipeline and
- * initiates invalidation of the relevant caches...
+ /* According to the hardware docs, the L3 partitioning can only be
+ * changed while the pipeline is completely drained and the caches are
+ * flushed, which involves a first PIPE_CONTROL flush which stalls the
+ * pipeline...
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
- .TextureCacheInvalidationEnable = true,
- .ConstantCacheInvalidationEnable = true,
- .InstructionCacheInvalidateEnable = true,
.DCFlushEnable = true,
.PostSyncOperation = NoWrite,
.CommandStreamerStallEnable = true);
- /* ...followed by a second stalling flush which guarantees that
- * invalidation is complete when the L3 configuration registers are
- * modified.
+ /* ...followed by a second pipelined PIPE_CONTROL that initiates
+ * invalidation of the relevant caches. Note that because RO
+ * invalidation happens at the top of the pipeline (i.e. right away as
+ * the PIPE_CONTROL command is processed by the CS) we cannot combine it
+ * with the previous stalling flush as the hardware documentation
+ * suggests, because that would cause the CS to stall on previous
+ * rendering *after* RO invalidation and wouldn't prevent the RO caches
+ * from being polluted by concurrent rendering before the stall
+ * completes. This intentionally doesn't implement the SKL+ hardware
+ * workaround suggesting to enable CS stall on PIPE_CONTROLs with the
+ * texture cache invalidation bit set for GPGPU workloads because the
+ * previous and subsequent PIPE_CONTROLs already guarantee that there is
+ * no concurrent GPGPU kernel execution (see SKL HSD 2132585).
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
+ .TextureCacheInvalidationEnable = true,
+ .ConstantCacheInvalidationEnable = true,
+ .InstructionCacheInvalidateEnable = true,
+ .PostSyncOperation = NoWrite);
+
+ /* Now send a third stalling flush to make sure that invalidation is
+ * complete when the L3 configuration registers are modified.
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
.DCFlushEnable = true,