summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2019-05-22 12:36:17 -0500
committerJason Ekstrand <[email protected]>2019-05-30 14:00:26 +0000
commit9e403dc56e3ab702abc68fd65ed4ab324ba69e69 (patch)
tree27009ed2414fb42f36143fdfa5efff3c924d0fdc /src
parent859de4a74857d2736f6e2dd9d2fd98b92bbc69d9 (diff)
intel/fs: Do a stalling MFENCE in endInvocationInterlock()
Fixes: 939312702e "i965: Add ARB_fragment_shader_interlock support" Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/intel/compiler/brw_eu.h3
-rw-r--r--src/intel/compiler/brw_eu_emit.c8
-rw-r--r--src/intel/compiler/brw_fs_generator.cpp5
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp18
-rw-r--r--src/intel/compiler/brw_vec4_generator.cpp2
5 files changed, 28 insertions, 8 deletions
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 8ef953d5aa4..29965e60a7f 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -1114,7 +1114,8 @@ void
brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg src,
- enum opcode send_op);
+ enum opcode send_op,
+ bool stall);
void
brw_pixel_interpolator_query(struct brw_codegen *p,
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 181932705f3..7b8783ee3d1 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3038,10 +3038,11 @@ void
brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg src,
- enum opcode send_op)
+ enum opcode send_op,
+ bool stall)
{
const struct gen_device_info *devinfo = p->devinfo;
- const bool commit_enable =
+ const bool commit_enable = stall ||
devinfo->gen >= 10 || /* HSD ES # 1404612949 */
(devinfo->gen == 7 && !devinfo->is_haswell);
struct brw_inst *insn;
@@ -3080,6 +3081,9 @@ brw_memory_fence(struct brw_codegen *p,
brw_MOV(p, dst, offset(dst, 1));
}
+ if (stall)
+ brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
+
brw_pop_insn_state(p);
}
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 1149e98ecd6..f91c857678a 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2071,13 +2071,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break;
case SHADER_OPCODE_MEMORY_FENCE:
- brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND);
+ assert(src[1].file == BRW_IMMEDIATE_VALUE);
+ brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, src[1].ud);
break;
case SHADER_OPCODE_INTERLOCK:
assert(devinfo->gen >= 9);
/* The interlock is basically a memory fence issued via sendc */
- brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC);
+ brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC, false);
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 6856eca687a..77b131272ca 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4273,7 +4273,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_memory_barrier: {
const fs_builder ubld = bld.group(8, 0);
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
- ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0))
+ ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
+ brw_vec8_grf(0, 0), brw_imm_ud(0))
->size_written = 2 * REG_SIZE;
break;
}
@@ -5080,7 +5081,20 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
}
case nir_intrinsic_end_invocation_interlock: {
- /* We don't need to do anything here */
+ /* For endInvocationInterlock(), we need to insert a memory fence which
+ * stalls in the shader until the memory transactions prior to that
+ * fence are complete. This ensures that the shader does not end before
+ * any writes from its critical section have landed. Otherwise, you can
+ * end up with a case where the next invocation on that pixel properly
+ * stalls for previous FS invocation on its pixel to complete but
+ * doesn't actually wait for the dataport memory transactions from that
+ * thread to land before submitting its own.
+ */
+ const fs_builder ubld = bld.group(8, 0);
+ const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+ ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
+ brw_vec8_grf(0, 0), brw_imm_ud(1))
+ ->size_written = 2 * REG_SIZE;
break;
}
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp
index 38181bf1469..8f9e4f16677 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -1886,7 +1886,7 @@ generate_code(struct brw_codegen *p,
break;
case SHADER_OPCODE_MEMORY_FENCE:
- brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND);
+ brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false);
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {