summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPaul Berry <[email protected]>2013-08-29 10:17:31 -0700
committerPaul Berry <[email protected]>2013-08-31 17:11:46 -0700
commitdf62421382c39678ac014e88cee7c73c64d42107 (patch)
tree7fc6105bc05c2800ae2e21ad236e7f37f641eebe /src
parentfffba41c6828b8f46a162185147d3e9b9cc479e4 (diff)
i965/gen7: Emit CS stall after 3DSTATE_PUSH_CONSTANT_ALLOC_PS.
This is required by the internal hardware docs and the PRM. Probably the reason we were getting away with not doing it was because we only emitted 3DSTATE_PUSH_CONSTANT_ALLOC_PS during startup. However that's going to change with the introduction of geometry shaders. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/gen7_urb.c30
1 files changed, 30 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c
index ed5cda8001b..63467945e3a 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -67,6 +67,36 @@ gen7_allocate_push_constants(struct brw_context *brw)
OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
OUT_BATCH(size | size << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
ADVANCE_BATCH();
+
+ /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
+ *
+ * A PIPE_CONTOL command with the CS Stall bit set must be programmed
+ * in the ring after this instruction.
+ *
+ * No such restriction exists for Haswell.
+ */
+ if (!brw->is_haswell) {
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+ /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
+ * CS Stall):
+ *
+ * One of the following must also be set:
+ * - Render Target Cache Flush Enable ([12] of DW1)
+ * - Depth Cache Flush Enable ([0] of DW1)
+ * - Stall at Pixel Scoreboard ([1] of DW1)
+ * - Depth Stall ([13] of DW1)
+ * - Post-Sync Operation ([13] of DW1)
+ *
+ * We choose to do a Post-Sync Operation (Write Immediate Data), since
+ * it seems like it will incur the least additional performance penalty.
+ */
+ OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
+ OUT_RELOC(brw->batch.workaround_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
}
static void