summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/genX_pipe_control.c539
1 files changed, 403 insertions, 136 deletions
diff --git a/src/mesa/drivers/dri/i965/genX_pipe_control.c b/src/mesa/drivers/dri/i965/genX_pipe_control.c
index 8eb37444253..35a99fa4c28 100644
--- a/src/mesa/drivers/dri/i965/genX_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/genX_pipe_control.c
@@ -25,172 +25,439 @@
#include "brw_defines.h"
#include "brw_state.h"
+static unsigned
+flags_to_post_sync_op(uint32_t flags)
+{
+ if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
+ return WriteImmediateData;
+
+ if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
+ return WritePSDepthCount;
+
+ if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
+ return WriteTimestamp;
+
+ return 0;
+}
+
/**
- * According to the latest documentation, any PIPE_CONTROL with the
- * "Command Streamer Stall" bit set must also have another bit set,
- * with five different options:
- *
- * - Render Target Cache Flush
- * - Depth Cache Flush
- * - Stall at Pixel Scoreboard
- * - Post-Sync Operation
- * - Depth Stall
- * - DC Flush Enable
- *
- * I chose "Stall at Pixel Scoreboard" since we've used it effectively
- * in the past, but the choice is fairly arbitrary.
+ * Do the given flags have a Post Sync or LRI Post Sync operation?
*/
-static void
-gen8_add_cs_stall_workaround_bits(uint32_t *flags)
+static enum pipe_control_flags
+get_post_sync_flags(enum pipe_control_flags flags)
{
- uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_WRITE_IMMEDIATE |
- PIPE_CONTROL_WRITE_DEPTH_COUNT |
- PIPE_CONTROL_WRITE_TIMESTAMP |
- PIPE_CONTROL_STALL_AT_SCOREBOARD |
- PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_DATA_CACHE_FLUSH;
-
- /* If we're doing a CS stall, and don't already have one of the
- * workaround bits set, add "Stall at Pixel Scoreboard."
+ flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT |
+ PIPE_CONTROL_WRITE_TIMESTAMP |
+ PIPE_CONTROL_LRI_POST_SYNC_OP;
+
+ /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
+ * "LRI Post Sync Operation". So more than one bit set would be illegal.
*/
- if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
- *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ assert(util_bitcount(flags) <= 1);
+
+ return flags;
}
-/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
+#define IS_COMPUTE_PIPELINE(brw) \
+ (GEN_GEN >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE)
+
+/* Closed interval - GEN_GEN \in [x, y] */
+#define IS_GEN_BETWEEN(x, y) (GEN_GEN >= x && GEN_GEN <= y)
+#define IS_GENx10_BETWEEN(x, y) \
+ (GEN_VERSIONx10 >= x && GEN_VERSIONx10 <= y)
+
+/**
+ * Emit a series of PIPE_CONTROL commands, taking into account any
+ * workarounds necessary to actually accomplish the caller's request.
*
- * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
- * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
+ * Unless otherwise noted, spec quotations in this function come from:
*
- * Note that the kernel does CS stalls between batches, so we only need
- * to count them within a batch.
+ * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
+ * Restrictions for PIPE_CONTROL.
+ *
+ * You should not use this function directly. Use the helpers in
+ * brw_pipe_control.c instead, which may split the pipe control further.
*/
-static uint32_t
-gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
+void
+genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset, uint64_t imm)
{
- if (GEN_GEN == 7 && !GEN_IS_HASWELL) {
- if (flags & PIPE_CONTROL_CS_STALL) {
- /* If we're doing a CS stall, reset the counter and carry on. */
- brw->pipe_controls_since_last_cs_stall = 0;
- return 0;
- }
+ UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
+ enum pipe_control_flags non_lri_post_sync_flags =
+ post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
- /* If this is the fourth pipe control without a CS stall, do one now. */
- if (++brw->pipe_controls_since_last_cs_stall == 4) {
- brw->pipe_controls_since_last_cs_stall = 0;
- return PIPE_CONTROL_CS_STALL;
+ /* Recursive PIPE_CONTROL workarounds --------------------------------
+ * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
+ *
+ * We do these first because we want to look at the original operation,
+ * rather than any workarounds we set.
+ */
+ if (GEN_GEN == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
+ /* Hardware workaround: SNB B-Spec says:
+ *
+ * "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
+ * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
+ * required."
+ */
+ brw_emit_post_sync_nonzero_flush(brw);
+ }
+
+ if (GEN_GEN == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
+ /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
+ * lists several workarounds:
+ *
+ * "Project: SKL, KBL, BXT
+ *
+ * If the VF Cache Invalidation Enable is set to a 1 in a
+ * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
+ * sets to 0, with the VF Cache Invalidation Enable set to 0
+ * needs to be sent prior to the PIPE_CONTROL with VF Cache
+ * Invalidation Enable set to a 1."
+ */
+ genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0);
+ }
+
+ if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) {
+ /* Project: SKL / Argument: LRI Post Sync Operation [23]
+ *
+ * "PIPECONTROL command with “Command Streamer Stall Enable” must be
+ * programmed prior to programming a PIPECONTROL command with "LRI
+ * Post Sync Operation" in GPGPU mode of operation (i.e when
+ * PIPELINE_SELECT command is set to GPGPU mode of operation)."
+ *
+ * The same text exists a few rows below for Post Sync Op.
+ */
+ genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, NULL, 0, 0);
+ }
+
+ /* "Flush Types" workarounds ---------------------------------------------
+ * We do these now because they may add post-sync operations or CS stalls.
+ */
+
+ if (IS_GEN_BETWEEN(8, 10) && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
+ /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
+ *
+ * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
+ * 'Write PS Depth Count' or 'Write Timestamp'."
+ */
+ if (!bo) {
+ flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
+ post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
+ non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
+ bo = brw->workaround_bo;
}
}
- return 0;
-}
-/* #1130 from gen10 workarounds page in h/w specs:
- * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
- * not enabled in same PIPE CONTROL and Enable Pixel score board stall if
- * Render target cache flush is enabled."
- *
- * Applicable to CNL B0 and C0 steppings only.
- */
-static void
-gen10_add_rcpfe_workaround_bits(uint32_t *flags)
-{
- if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
- *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD;
- } else if (*flags &
- (PIPE_CONTROL_WRITE_IMMEDIATE |
- PIPE_CONTROL_WRITE_DEPTH_COUNT |
- PIPE_CONTROL_WRITE_TIMESTAMP)) {
- *flags = *flags | PIPE_CONTROL_DEPTH_STALL;
+ if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
+ /* Project: PRE-HSW / Argument: Depth Stall
+ *
+ * "The following bits must be clear:
+ * - Render Target Cache Flush Enable ([12] of DW1)
+ * - Depth Cache Flush Enable ([0] of DW1)"
+ */
+ assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
}
-}
-static unsigned
-flags_to_post_sync_op(uint32_t flags)
-{
- flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
- PIPE_CONTROL_WRITE_DEPTH_COUNT |
- PIPE_CONTROL_WRITE_TIMESTAMP;
+ if (GEN_GEN >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
+ /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
+ *
+ * "This bit must be DISABLED for operations other than writing
+ * PS_DEPTH_COUNT."
+ *
+ * This seems like nonsense. An Ivybridge workaround requires us to
+ * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
+ * operation. Gen8+ requires us to emit depth stalls and depth cache
+ * flushes together. So, it's hard to imagine this means anything other
+ * than "we originally intended this to be used for PS_DEPTH_COUNT".
+ *
+ * We ignore the supposed restriction and do nothing.
+ */
+ }
- assert(util_bitcount(flags) <= 1);
+ if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
+ /* Project: PRE-HSW / Argument: Depth Cache Flush
+ *
+ * "Depth Stall must be clear ([13] of DW1)."
+ */
+ assert(!(flags & PIPE_CONTROL_DEPTH_STALL));
+ }
- if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
- return WriteImmediateData;
+ if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
+ /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
+ *
+ * "This bit must be DISABLED for End-of-pipe (Read) fences,
+ * PS_DEPTH_COUNT or TIMESTAMP queries."
+ *
+ * TODO: Implement end-of-pipe checking.
+ */
+ assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
+ PIPE_CONTROL_WRITE_TIMESTAMP)));
+ }
- if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
- return WritePSDepthCount;
+ if (GEN_GEN < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
+ /* From the PIPE_CONTROL instruction table, bit 1:
+ *
+ * "This bit is ignored if Depth Stall Enable is set.
+ * Further, the render cache is not flushed even if Write Cache
+ * Flush Enable bit is set."
+ *
+ * We assert that the caller doesn't do this combination, to try and
+ * prevent mistakes. It shouldn't hurt the GPU, though.
+ *
+ * We skip this check on Gen11+ as the "Stall and Pixel Scoreboard"
+ * and "Render Target Flush" combo is explicitly required for BTI
+ * update workarounds.
+ */
+ assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_RENDER_TARGET_FLUSH)));
+ }
- if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
- return WriteTimestamp;
+ /* PIPE_CONTROL page workarounds ------------------------------------- */
- return 0;
-}
+ if (IS_GEN_BETWEEN(7, 8) && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
+ /* From the PIPE_CONTROL page itself:
+ *
+ * "IVB, HSW, BDW
+ * Restriction: Pipe_control with CS-stall bit set must be issued
+ * before a pipe-control command that has the State Cache
+ * Invalidate bit set."
+ */
+ flags |= PIPE_CONTROL_CS_STALL;
+ }
-void
-genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset, uint64_t imm)
-{
- if (GEN_GEN >= 8) {
- if (GEN_GEN == 8)
- gen8_add_cs_stall_workaround_bits(&flags);
-
- if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
- if (GEN_GEN == 9) {
- /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
- * lists several workarounds:
- *
- * "Project: SKL, KBL, BXT
- *
- * If the VF Cache Invalidation Enable is set to a 1 in a
- * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
- * sets to 0, with the VF Cache Invalidation Enable set to 0
- * needs to be sent prior to the PIPE_CONTROL with VF Cache
- * Invalidation Enable set to a 1."
- */
- brw_emit_pipe_control_flush(brw, 0);
- }
-
- if (GEN_GEN >= 9) {
- /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
- *
- * "Project: BDW+
- *
- * When VF Cache Invalidate is set “Post Sync Operation” must
- * be enabled to “Write Immediate Data” or “Write PS Depth
- * Count” or “Write Timestamp”."
- *
- * If there's a BO, we're already doing some kind of write.
- * If not, add a write to the workaround BO.
- *
- * XXX: This causes GPU hangs on Broadwell, so restrict it to
- * Gen9+ for now...see this bug for more information:
- * https://bugs.freedesktop.org/show_bug.cgi?id=103787
- */
- if (!bo) {
- flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
- bo = brw->workaround_bo;
- }
- }
+ if (GEN_IS_HASWELL) {
+ /* From the PIPE_CONTROL page itself:
+ *
+ * "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
+ * Prior to programming a PIPECONTROL command with any of the RO
+ * cache invalidation bit set, program a PIPECONTROL flush command
+ * with “CS stall” bit and “HDC Flush” bit set."
+ *
+ * TODO: Actually implement this. What's an HDC Flush?
+ */
+ }
+
+ if (flags & PIPE_CONTROL_FLUSH_LLC) {
+ /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
+ *
+ * "Project: ALL
+ * SW must always program Post-Sync Operation to "Write Immediate
+ * Data" when Flush LLC is set."
+ *
+ * For now, we just require the caller to do it.
+ */
+ assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
+ }
+
+ /* "Post-Sync Operation" workarounds -------------------------------- */
+
+ /* Project: All / Argument: Global Snapshot Count Reset [19]
+ *
+ * "This bit must not be exercised on any product.
+ * Requires stall bit ([20] of DW1) set."
+ *
+ * We don't use this, so we just assert that it isn't used. The
+ * PIPE_CONTROL instruction page indicates that they intended this
+ * as a debug feature and don't think it is useful in production,
+ * but it may actually be usable, should we ever want to.
+ */
+ assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
+
+ if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
+ PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
+ /* Project: All / Arguments:
+ *
+ * - Generic Media State Clear [16]
+ * - Indirect State Pointers Disable [16]
+ *
+ * "Requires stall bit ([20] of DW1) set."
+ *
+ * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
+ * State Clear) says:
+ *
+ * "PIPECONTROL command with “Command Streamer Stall Enable” must be
+ * programmed prior to programming a PIPECONTROL command with "Media
+ * State Clear" set in GPGPU mode of operation"
+ *
+ * This is a subset of the earlier rule, so there's nothing to do.
+ */
+ flags |= PIPE_CONTROL_CS_STALL;
+ }
+
+ if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
+ /* Project: All / Argument: Store Data Index
+ *
+ * "Post-Sync Operation ([15:14] of DW1) must be set to something other
+ * than '0'."
+ *
+ * For now, we just assert that the caller does this. We might want to
+ * automatically add a write to the workaround BO...
+ */
+ assert(non_lri_post_sync_flags != 0);
+ }
+
+ if (flags & PIPE_CONTROL_SYNC_GFDT) {
+ /* Project: All / Argument: Sync GFDT
+ *
+ * "Post-Sync Operation ([15:14] of DW1) must be set to something other
+ * than '0' or 0x2520[13] must be set."
+ *
+ * For now, we just assert that the caller does this.
+ */
+ assert(non_lri_post_sync_flags != 0);
+ }
+
+ if (IS_GENx10_BETWEEN(60, 75) && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
+ /* Project: SNB, IVB, HSW / Argument: TLB inv
+ *
+ * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
+ * must be set to something other than '0'."
+ *
+ * For now, we just assert that the caller does this.
+ */
+ assert(non_lri_post_sync_flags != 0);
+ }
+
+ if (GEN_GEN >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
+ /* Project: IVB+ / Argument: TLB inv
+ *
+ * "Requires stall bit ([20] of DW1) set."
+ *
+ * Also, from the PIPE_CONTROL instruction table:
+ *
+ * "Project: SKL+
+ * Post Sync Operation or CS stall must be set to ensure a TLB
+ * invalidation occurs. Otherwise no cycle will occur to the TLB
+ * cache to invalidate."
+ *
+ * This is not a subset of the earlier rule, so there's nothing to do.
+ */
+ flags |= PIPE_CONTROL_CS_STALL;
+ }
+
+ if (GEN_GEN == 9 && devinfo->gt == 4) {
+ /* TODO: The big Skylake GT4 post sync op workaround */
+ }
+
+ /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
+
+ if (IS_COMPUTE_PIPELINE(brw)) {
+ if (GEN_GEN >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
+ /* Project: SKL+ / Argument: Tex Invalidate
+ * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
+ */
+ flags |= PIPE_CONTROL_CS_STALL;
}
- if (GEN_GEN == 10)
- gen10_add_rcpfe_workaround_bits(&flags);
- } else if (GEN_GEN >= 6) {
- if (GEN_GEN == 6 &&
- (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
- /* Hardware workaround: SNB B-Spec says:
+ if (GEN_GEN == 8 && (post_sync_flags ||
+ (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
+ /* Project: BDW / Arguments:
+ *
+ * - LRI Post Sync Operation [23]
+ * - Post Sync Op [15:14]
+ * - Notify En [8]
+ * - Depth Stall [13]
+ * - Render Target Cache Flush [12]
+ * - Depth Cache Flush [0]
+ * - DC Flush Enable [5]
+ *
+ * "Requires stall bit ([20] of DW) set for all GPGPU and Media
+ * Workloads."
*
- * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
- * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
- * required.
+ * (The docs have separate table rows for each bit, with essentially
+ * the same workaround text. We've combined them here.)
+ */
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ /* Also, from the PIPE_CONTROL instruction table, bit 20:
+ *
+ * "Project: BDW
+ * This bit must be always set when PIPE_CONTROL command is
+ * programmed by GPGPU and MEDIA workloads, except for the cases
+ * when only Read Only Cache Invalidation bits are set (State
+ * Cache Invalidation Enable, Instruction cache Invalidation
+ * Enable, Texture Cache Invalidation Enable, Constant Cache
+ * Invalidation Enable). This is to WA FFDOP CG issue, this WA
+ * need not implemented when FF_DOP_CG is disable via "Fixed
+ * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
+ *
+ * It sounds like we could avoid CS stalls in some cases, but we
+ * don't currently bother. This list isn't exactly the list above,
+ * either...
*/
- brw_emit_post_sync_nonzero_flush(brw);
}
+ }
- flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
+ /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
+ *
+ * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
+ * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
+ *
+ * Note that the kernel does CS stalls between batches, so we only need
+ * to count them within a batch. We currently naively count every 4, and
+ * don't skip the ones with only read-cache-invalidate bits set. This
+ * may or may not be a problem...
+ */
+ if (GEN_GEN == 7 && !GEN_IS_HASWELL) {
+ if (flags & PIPE_CONTROL_CS_STALL) {
+ /* If we're doing a CS stall, reset the counter and carry on. */
+ brw->pipe_controls_since_last_cs_stall = 0;
+ }
+
+ /* If this is the fourth pipe control without a CS stall, do one now. */
+ if (++brw->pipe_controls_since_last_cs_stall == 4) {
+ brw->pipe_controls_since_last_cs_stall = 0;
+ flags |= PIPE_CONTROL_CS_STALL;
+ }
}
+ /* "Stall" workarounds ----------------------------------------------
+ * These have to come after the earlier ones because we may have added
+ * some additional CS stalls above.
+ */
+
+ if (GEN_GEN < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
+ /* Project: PRE-SKL, VLV, CHV
+ *
+ * "[All Stepping][All SKUs]:
+ *
+ * One of the following must also be set:
+ *
+ * - Render Target Cache Flush Enable ([12] of DW1)
+ * - Depth Cache Flush Enable ([0] of DW1)
+ * - Stall at Pixel Scoreboard ([1] of DW1)
+ * - Depth Stall ([13] of DW1)
+ * - Post-Sync Operation ([13] of DW1)
+ * - DC Flush Enable ([5] of DW1)"
+ *
+ * If we don't already have one of those bits set, we choose to add
+ * "Stall at Pixel Scoreboard". Some of the other bits require a
+ * CS stall as a workaround (see above), which would send us into
+ * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard"
+ * appears to be safe, so we choose that.
+ */
+ const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_WRITE_IMMEDIATE |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT |
+ PIPE_CONTROL_WRITE_TIMESTAMP |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_DATA_CACHE_FLUSH;
+ if (!(flags & wa_bits))
+ flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ }
+
+ /* Emit --------------------------------------------------------------- */
+
brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
#if GEN_GEN >= 9
pc.FlushLLC = 0;