diff options
author | Kenneth Graunke <[email protected]> | 2018-11-01 15:55:51 -0700 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2019-03-11 19:32:40 -0700 |
commit | c6af96d1bc18cc5af733ca80b11f4a23c414abe1 (patch) | |
tree | 7fdd4c7d48fa9b46cb791e485656ea6ac4f2be29 /src | |
parent | 2c6f7124086ba6926e0b622c9d1a2c61fddcfb4f (diff) |
i965: Use genxml for emitting PIPE_CONTROL.
While this does add a bunch of boilerplate, it also protects us against
the hardware moving bits, or changing their meaning. For something as
finnicky as PIPE_CONTROL, the extra safety seems worth it.
We turn PIPE_CONTROL_* into an bitfield of arbitrary flags, and then
pack them appropriately.
Reviewed-by: Topi Pohjolainen <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_pipe_control.c | 243 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_pipe_control.h | 58 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 31 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_pipe_control.c | 243 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/meson.build | 4 |
7 files changed, 362 insertions, 230 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index e01c1e5e0de..d1c23cb2fc0 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -116,51 +116,61 @@ i965_FILES = \ i965_gen4_FILES = \ genX_blorp_exec.c \ genX_boilerplate.h \ + genX_pipe_control.c \ genX_state_upload.c i965_gen45_FILES = \ genX_blorp_exec.c \ genX_boilerplate.h \ + genX_pipe_control.c \ genX_state_upload.c i965_gen5_FILES = \ genX_blorp_exec.c \ genX_boilerplate.h \ + genX_pipe_control.c \ genX_state_upload.c i965_gen6_FILES = \ genX_blorp_exec.c \ genX_boilerplate.h \ + genX_pipe_control.c \ genX_state_upload.c i965_gen7_FILES = \ genX_blorp_exec.c \ genX_boilerplate.h \ + genX_pipe_control.c \ genX_state_upload.c i965_gen75_FILES = \ genX_blorp_exec.c \ genX_boilerplate.h \ + genX_pipe_control.c \ genX_state_upload.c i965_gen8_FILES = \ genX_blorp_exec.c \ genX_boilerplate.h \ + genX_pipe_control.c \ genX_state_upload.c i965_gen9_FILES = \ genX_blorp_exec.c \ genX_boilerplate.h \ + genX_pipe_control.c \ genX_state_upload.c i965_gen10_FILES = \ genX_blorp_exec.c \ genX_boilerplate.h \ + genX_pipe_control.c \ genX_state_upload.c i965_gen11_FILES = \ genX_blorp_exec.c \ genX_boilerplate.h \ + genX_pipe_control.c \ genX_state_upload.c i965_oa_GENERATED_FILES = \ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 301d3305133..8b46b678064 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -754,6 +754,9 @@ struct brw_context uint32_t report_id); void (*emit_compute_walker)(struct brw_context *brw); + void (*emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags, + struct brw_bo *bo, uint32_t offset, + uint64_t imm); } vtbl; struct brw_bufmgr *bufmgr; diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c index 704f65100d3..0e96f76a22e 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -23,201 +23,11 @@ #include "brw_context.h" #include "brw_defines.h" +#include "brw_state.h" #include "intel_batchbuffer.h" #include "intel_fbo.h" /** - * According to the latest documentation, any PIPE_CONTROL with the - * "Command Streamer Stall" bit set must also have another bit set, - * with five different options: - * - * - Render Target Cache Flush - * - Depth Cache Flush - * - Stall at Pixel Scoreboard - * - Post-Sync Operation - * - Depth Stall - * - DC Flush Enable - * - * I chose "Stall at Pixel Scoreboard" since we've used it effectively - * in the past, but the choice is fairly arbitrary. - */ -static void -gen8_add_cs_stall_workaround_bits(uint32_t *flags) -{ - uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_WRITE_IMMEDIATE | - PIPE_CONTROL_WRITE_DEPTH_COUNT | - PIPE_CONTROL_WRITE_TIMESTAMP | - PIPE_CONTROL_STALL_AT_SCOREBOARD | - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DATA_CACHE_FLUSH; - - /* If we're doing a CS stall, and don't already have one of the - * workaround bits set, add "Stall at Pixel Scoreboard." - */ - if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0) - *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; -} - -/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT: - * - * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with - * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set." - * - * Note that the kernel does CS stalls between batches, so we only need - * to count them within a batch. - */ -static uint32_t -gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->gen == 7 && !devinfo->is_haswell) { - if (flags & PIPE_CONTROL_CS_STALL) { - /* If we're doing a CS stall, reset the counter and carry on. */ - brw->pipe_controls_since_last_cs_stall = 0; - return 0; - } - - /* If this is the fourth pipe control without a CS stall, do one now. */ - if (++brw->pipe_controls_since_last_cs_stall == 4) { - brw->pipe_controls_since_last_cs_stall = 0; - return PIPE_CONTROL_CS_STALL; - } - } - return 0; -} - -/* #1130 from gen10 workarounds page in h/w specs: - * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is - * not enabled in same PIPE CONTROL and Enable Pixel score board stall if - * Render target cache flush is enabled." - * - * Applicable to CNL B0 and C0 steppings only. - */ -static void -gen10_add_rcpfe_workaround_bits(uint32_t *flags) -{ - if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) { - *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD; - } else if (*flags & - (PIPE_CONTROL_WRITE_IMMEDIATE | - PIPE_CONTROL_WRITE_DEPTH_COUNT | - PIPE_CONTROL_WRITE_TIMESTAMP)) { - *flags = *flags | PIPE_CONTROL_DEPTH_STALL; - } -} - -static void -brw_emit_pipe_control(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, uint64_t imm) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->gen >= 8) { - if (devinfo->gen == 8) - gen8_add_cs_stall_workaround_bits(&flags); - - if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) { - if (devinfo->gen == 9) { - /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description - * lists several workarounds: - * - * "Project: SKL, KBL, BXT - * - * If the VF Cache Invalidation Enable is set to a 1 in a - * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields - * sets to 0, with the VF Cache Invalidation Enable set to 0 - * needs to be sent prior to the PIPE_CONTROL with VF Cache - * Invalidation Enable set to a 1." - */ - brw_emit_pipe_control_flush(brw, 0); - } - - if (devinfo->gen >= 9) { - /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue: - * - * "Project: BDW+ - * - * When VF Cache Invalidate is set “Post Sync Operation” must - * be enabled to “Write Immediate Data” or “Write PS Depth - * Count” or “Write Timestamp”." - * - * If there's a BO, we're already doing some kind of write. - * If not, add a write to the workaround BO. - * - * XXX: This causes GPU hangs on Broadwell, so restrict it to - * Gen9+ for now...see this bug for more information: - * https://bugs.freedesktop.org/show_bug.cgi?id=103787 - */ - if (!bo) { - flags |= PIPE_CONTROL_WRITE_IMMEDIATE; - bo = brw->workaround_bo; - } - } - } - - if (devinfo->gen == 10) - gen10_add_rcpfe_workaround_bits(&flags); - - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); - OUT_BATCH(flags); - if (bo) { - OUT_RELOC64(bo, RELOC_WRITE, offset); - } else { - OUT_BATCH(0); - OUT_BATCH(0); - } - OUT_BATCH(imm); - OUT_BATCH(imm >> 32); - ADVANCE_BATCH(); - } else if (devinfo->gen >= 6) { - if (devinfo->gen == 6 && - (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { - /* Hardware workaround: SNB B-Spec says: - * - * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush - * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is - * required. - */ - brw_emit_post_sync_nonzero_flush(brw); - } - - flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags); - - /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24 - * on later platforms. We always use PPGTT on Gen7+. - */ - unsigned gen6_gtt = devinfo->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0; - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(flags); - if (bo) { - OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, gen6_gtt | offset); - } else { - OUT_BATCH(0); - } - OUT_BATCH(imm); - OUT_BATCH(imm >> 32); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2)); - if (bo) { - OUT_RELOC(bo, RELOC_WRITE, PIPE_CONTROL_GLOBAL_GTT_WRITE | offset); - } else { - OUT_BATCH(0); - } - OUT_BATCH(imm); - OUT_BATCH(imm >> 32); - ADVANCE_BATCH(); - } -} - -/** * Emit a PIPE_CONTROL with various flushing flags. * * The caller is responsible for deciding what flags are appropriate for the @@ -246,7 +56,7 @@ brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags) flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL); } - brw_emit_pipe_control(brw, flags, NULL, 0, 0); + brw->vtbl.emit_raw_pipe_control(brw, flags, NULL, 0, 0); } /** @@ -262,7 +72,7 @@ brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, struct brw_bo *bo, uint32_t offset, uint64_t imm) { - brw_emit_pipe_control(brw, flags, bo, offset, imm); + brw->vtbl.emit_raw_pipe_control(brw, flags, bo, offset, imm); } /** @@ -357,14 +167,14 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw) void gen10_emit_isp_disable(struct brw_context *brw) { - brw_emit_pipe_control(brw, - PIPE_CONTROL_STALL_AT_SCOREBOARD | - PIPE_CONTROL_CS_STALL, - NULL, 0, 0); - brw_emit_pipe_control(brw, - PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE | - PIPE_CONTROL_CS_STALL, - NULL, 0, 0); + brw->vtbl.emit_raw_pipe_control(brw, + PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_CS_STALL, + NULL, 0, 0); + brw->vtbl.emit_raw_pipe_control(brw, + PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE | + PIPE_CONTROL_CS_STALL, + NULL, 0, 0); brw->vs.base.push_constants_dirty = true; brw->tcs.base.push_constants_dirty = true; @@ -561,6 +371,37 @@ int brw_init_pipe_control(struct brw_context *brw, const struct gen_device_info *devinfo) { + switch (devinfo->gen) { + case 11: + brw->vtbl.emit_raw_pipe_control = gen11_emit_raw_pipe_control; + break; + case 10: + brw->vtbl.emit_raw_pipe_control = gen10_emit_raw_pipe_control; + break; + case 9: + brw->vtbl.emit_raw_pipe_control = gen9_emit_raw_pipe_control; + break; + case 8: + brw->vtbl.emit_raw_pipe_control = gen8_emit_raw_pipe_control; + break; + case 7: + brw->vtbl.emit_raw_pipe_control = + devinfo->is_haswell ? gen75_emit_raw_pipe_control + : gen7_emit_raw_pipe_control; + break; + case 6: + brw->vtbl.emit_raw_pipe_control = gen6_emit_raw_pipe_control; + break; + case 5: + brw->vtbl.emit_raw_pipe_control = gen5_emit_raw_pipe_control; + break; + case 4: + brw->vtbl.emit_raw_pipe_control = + devinfo->is_g4x ? gen45_emit_raw_pipe_control + : gen4_emit_raw_pipe_control; + break; + } + if (devinfo->gen < 6) return 0; diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.h b/src/mesa/drivers/dri/i965/brw_pipe_control.h index 69b1c7c31e6..e213f43a4f7 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.h +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.h @@ -32,34 +32,38 @@ struct brw_bo; * * PIPE_CONTROL operation, a combination MI_FLUSH and register write with * additional flushing control. + * + * The bits here are not the actual hardware values. The actual values + * shift around a bit per-generation, so we just have flags for each + * potential operation, and use genxml to encode the actual packet. */ -#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24)) -#define PIPE_CONTROL_LRI_WRITE_IMMEDIATE (1 << 23) /* Gen7+ */ -#define PIPE_CONTROL_CS_STALL (1 << 20) -#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET (1 << 19) -#define PIPE_CONTROL_TLB_INVALIDATE (1 << 18) -#define PIPE_CONTROL_SYNC_GFDT (1 << 17) -#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16) -#define PIPE_CONTROL_NO_WRITE (0 << 14) -#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14) -#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14) -#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14) -#define PIPE_CONTROL_DEPTH_STALL (1 << 13) -#define PIPE_CONTROL_RENDER_TARGET_FLUSH (1 << 12) -#define PIPE_CONTROL_INSTRUCTION_INVALIDATE (1 << 11) -#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1 << 10) /* GM45+ only */ -#define PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE (1 << 9) -#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) -#define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */ -/* GT */ -#define PIPE_CONTROL_DATA_CACHE_FLUSH (1 << 5) -#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) -#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3) -#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) -#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) -#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) -#define PIPE_CONTROL_PPGTT_WRITE (0 << 2) -#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) +enum pipe_control_flags +{ + PIPE_CONTROL_FLUSH_LLC = (1 << 1), + PIPE_CONTROL_LRI_POST_SYNC_OP = (1 << 2), + PIPE_CONTROL_STORE_DATA_INDEX = (1 << 3), + PIPE_CONTROL_CS_STALL = (1 << 4), + PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET = (1 << 5), + PIPE_CONTROL_SYNC_GFDT = (1 << 6), + PIPE_CONTROL_TLB_INVALIDATE = (1 << 7), + PIPE_CONTROL_MEDIA_STATE_CLEAR = (1 << 8), + PIPE_CONTROL_WRITE_IMMEDIATE = (1 << 9), + PIPE_CONTROL_WRITE_DEPTH_COUNT = (1 << 10), + PIPE_CONTROL_WRITE_TIMESTAMP = (1 << 11), + PIPE_CONTROL_DEPTH_STALL = (1 << 12), + PIPE_CONTROL_RENDER_TARGET_FLUSH = (1 << 13), + PIPE_CONTROL_INSTRUCTION_INVALIDATE = (1 << 14), + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE = (1 << 15), + PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16), + PIPE_CONTROL_NOTIFY_ENABLE = (1 << 17), + PIPE_CONTROL_FLUSH_ENABLE = (1 << 18), + PIPE_CONTROL_DATA_CACHE_FLUSH = (1 << 19), + PIPE_CONTROL_VF_CACHE_INVALIDATE = (1 << 20), + PIPE_CONTROL_CONST_CACHE_INVALIDATE = (1 << 21), + PIPE_CONTROL_STATE_CACHE_INVALIDATE = (1 << 22), + PIPE_CONTROL_STALL_AT_SCOREBOARD = (1 << 23), + PIPE_CONTROL_DEPTH_CACHE_FLUSH = (1 << 24), +}; #define PIPE_CONTROL_CACHE_FLUSH_BITS \ (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \ diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 546d103d1a4..402ae692f9f 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -95,6 +95,37 @@ extern const struct brw_tracked_state gen7_urb; extern const struct brw_tracked_state gen8_pma_fix; extern const struct brw_tracked_state brw_cs_work_groups_surface; +void gen4_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, + struct brw_bo *bo, uint32_t offset, + uint64_t imm); +void gen45_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, + struct brw_bo *bo, uint32_t offset, + uint64_t imm); +void gen5_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, + struct brw_bo *bo, uint32_t offset, + uint64_t imm); +void gen6_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, + struct brw_bo *bo, uint32_t offset, + uint64_t imm); +void gen7_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, + struct brw_bo *bo, uint32_t offset, + uint64_t imm); +void gen75_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, + struct brw_bo *bo, uint32_t offset, + uint64_t imm); +void gen8_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, + struct brw_bo *bo, uint32_t offset, + uint64_t imm); +void gen9_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, + struct brw_bo *bo, uint32_t offset, + uint64_t imm); +void gen10_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, + struct brw_bo *bo, uint32_t offset, + uint64_t imm); +void gen11_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, + struct brw_bo *bo, uint32_t offset, + uint64_t imm); + static inline bool brw_state_dirty(const struct brw_context *brw, GLuint mesa_flags, uint64_t brw_flags) diff --git a/src/mesa/drivers/dri/i965/genX_pipe_control.c b/src/mesa/drivers/dri/i965/genX_pipe_control.c new file mode 100644 index 00000000000..8eb37444253 --- /dev/null +++ b/src/mesa/drivers/dri/i965/genX_pipe_control.c @@ -0,0 +1,243 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "genX_boilerplate.h" +#include "brw_defines.h" +#include "brw_state.h" + +/** + * According to the latest documentation, any PIPE_CONTROL with the + * "Command Streamer Stall" bit set must also have another bit set, + * with five different options: + * + * - Render Target Cache Flush + * - Depth Cache Flush + * - Stall at Pixel Scoreboard + * - Post-Sync Operation + * - Depth Stall + * - DC Flush Enable + * + * I chose "Stall at Pixel Scoreboard" since we've used it effectively + * in the past, but the choice is fairly arbitrary. + */ +static void +gen8_add_cs_stall_workaround_bits(uint32_t *flags) +{ + uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_WRITE_IMMEDIATE | + PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_WRITE_TIMESTAMP | + PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DATA_CACHE_FLUSH; + + /* If we're doing a CS stall, and don't already have one of the + * workaround bits set, add "Stall at Pixel Scoreboard." + */ + if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0) + *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; +} + +/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT: + * + * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with + * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set." + * + * Note that the kernel does CS stalls between batches, so we only need + * to count them within a batch. + */ +static uint32_t +gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags) +{ + if (GEN_GEN == 7 && !GEN_IS_HASWELL) { + if (flags & PIPE_CONTROL_CS_STALL) { + /* If we're doing a CS stall, reset the counter and carry on. */ + brw->pipe_controls_since_last_cs_stall = 0; + return 0; + } + + /* If this is the fourth pipe control without a CS stall, do one now. */ + if (++brw->pipe_controls_since_last_cs_stall == 4) { + brw->pipe_controls_since_last_cs_stall = 0; + return PIPE_CONTROL_CS_STALL; + } + } + return 0; +} + +/* #1130 from gen10 workarounds page in h/w specs: + * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is + * not enabled in same PIPE CONTROL and Enable Pixel score board stall if + * Render target cache flush is enabled." + * + * Applicable to CNL B0 and C0 steppings only. + */ +static void +gen10_add_rcpfe_workaround_bits(uint32_t *flags) +{ + if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) { + *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD; + } else if (*flags & + (PIPE_CONTROL_WRITE_IMMEDIATE | + PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_WRITE_TIMESTAMP)) { + *flags = *flags | PIPE_CONTROL_DEPTH_STALL; + } +} + +static unsigned +flags_to_post_sync_op(uint32_t flags) +{ + flags &= PIPE_CONTROL_WRITE_IMMEDIATE | + PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_WRITE_TIMESTAMP; + + assert(util_bitcount(flags) <= 1); + + if (flags & PIPE_CONTROL_WRITE_IMMEDIATE) + return WriteImmediateData; + + if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) + return WritePSDepthCount; + + if (flags & PIPE_CONTROL_WRITE_TIMESTAMP) + return WriteTimestamp; + + return 0; +} + +void +genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags, + struct brw_bo *bo, uint32_t offset, uint64_t imm) +{ + if (GEN_GEN >= 8) { + if (GEN_GEN == 8) + gen8_add_cs_stall_workaround_bits(&flags); + + if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) { + if (GEN_GEN == 9) { + /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description + * lists several workarounds: + * + * "Project: SKL, KBL, BXT + * + * If the VF Cache Invalidation Enable is set to a 1 in a + * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields + * sets to 0, with the VF Cache Invalidation Enable set to 0 + * needs to be sent prior to the PIPE_CONTROL with VF Cache + * Invalidation Enable set to a 1." + */ + brw_emit_pipe_control_flush(brw, 0); + } + + if (GEN_GEN >= 9) { + /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue: + * + * "Project: BDW+ + * + * When VF Cache Invalidate is set “Post Sync Operation” must + * be enabled to “Write Immediate Data” or “Write PS Depth + * Count” or “Write Timestamp”." + * + * If there's a BO, we're already doing some kind of write. + * If not, add a write to the workaround BO. + * + * XXX: This causes GPU hangs on Broadwell, so restrict it to + * Gen9+ for now...see this bug for more information: + * https://bugs.freedesktop.org/show_bug.cgi?id=103787 + */ + if (!bo) { + flags |= PIPE_CONTROL_WRITE_IMMEDIATE; + bo = brw->workaround_bo; + } + } + } + + if (GEN_GEN == 10) + gen10_add_rcpfe_workaround_bits(&flags); + } else if (GEN_GEN >= 6) { + if (GEN_GEN == 6 && + (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { + /* Hardware workaround: SNB B-Spec says: + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush + * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is + * required. + */ + brw_emit_post_sync_nonzero_flush(brw); + } + + flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags); + } + + brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) { + #if GEN_GEN >= 9 + pc.FlushLLC = 0; + #endif + #if GEN_GEN >= 7 + pc.LRIPostSyncOperation = NoLRIOperation; + pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE; + pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH; + #endif + #if GEN_GEN >= 6 + pc.StoreDataIndex = 0; + pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL; + pc.GlobalSnapshotCountReset = + flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET; + pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE; + pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR; + pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD; + pc.RenderTargetCacheFlushEnable = + flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; + pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH; + pc.StateCacheInvalidationEnable = + flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE; + pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; + pc.ConstantCacheInvalidationEnable = + flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE; + #else + pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; + #endif + pc.PostSyncOperation = flags_to_post_sync_op(flags); + pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL; + pc.InstructionCacheInvalidateEnable = + flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE; + pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE; + #if GEN_GEN >= 5 || GEN_IS_G4X + pc.IndirectStatePointersDisable = + flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE; + #endif + #if GEN_GEN >= 6 + pc.TextureCacheInvalidationEnable = + flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + #elif GEN_GEN == 5 || GEN_IS_G4X + pc.TextureCacheFlushEnable = + flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + #endif + pc.Address = ggtt_bo(bo, offset); + if (GEN_GEN < 7 && bo) + pc.DestinationAddressType = DAT_GGTT; + pc.ImmediateData = imm; + } +} diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build index abb7b241f78..95f8cd21cba 100644 --- a/src/mesa/drivers/dri/i965/meson.build +++ b/src/mesa/drivers/dri/i965/meson.build @@ -137,8 +137,8 @@ i965_gen_libs = [] foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110'] i965_gen_libs += static_library( 'i965_gen@0@'.format(v), - ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_state_upload.c', - gen_xml_pack], + ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_pipe_control.c', + 'genX_state_upload.c', gen_xml_pack], include_directories : [inc_common, inc_intel, inc_dri_common], c_args : [ c_vis_args, no_override_init_args, c_sse2_args, |