summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2015-12-24 13:09:26 -0800
committerKenneth Graunke <[email protected]>2015-12-28 13:17:03 -0800
commitbd8ab8dedb2cc557ea3cb58d507f237743b3f7f9 (patch)
tree4c5c4b0a081890f90920be3f0f89fa6151f0a141
parentb7793783b3df94880655234bc2a9054eddf01913 (diff)
i965: Don't set interleave or complete on TCS EOT message.
Setting interleave on the TCS EOT message causes Ivybridge hardware to GPU hang like crazy. Individual tests would pass, but running even a simple test like nop.shader_test in a loop would hang within 1-3 runs. Adding sleep delays worked around the problem, somehow. Interleave doesn't make much sense given that we only have one patch URB handle, not two. Complete doesn't seem useful either. There's no reason to actually set those bits. We were just being lazy. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Edward O'Callaghan <[email protected]> Reviewed-by: Jordan Justen <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp36
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp6
5 files changed, 41 insertions, 5 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index d0137481c6c..10a6d39db85 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1315,6 +1315,7 @@ enum opcode {
TCS_OPCODE_CREATE_BARRIER_HEADER,
TCS_OPCODE_SRC0_010_IS_ZERO,
TCS_OPCODE_RELEASE_INPUT,
+ TCS_OPCODE_THREAD_END,
TES_OPCODE_GET_PRIMITIVE_ID,
TES_OPCODE_CREATE_INPUT_READ_HEADER,
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index f692bc2de35..d4b6410815e 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -572,6 +572,8 @@ brw_instruction_name(enum opcode op)
return "tcs_src0<0,1,0>_is_zero";
case TCS_OPCODE_RELEASE_INPUT:
return "tcs_release_input";
+ case TCS_OPCODE_THREAD_END:
+ return "tcs_thread_end";
case TES_OPCODE_CREATE_INPUT_READ_HEADER:
return "tes_create_input_read_header";
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index f1c3d37ce1c..f0f18ca7768 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -276,6 +276,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
case SHADER_OPCODE_POW:
return 2;
case VS_OPCODE_URB_WRITE:
+ case TCS_OPCODE_THREAD_END:
return 1;
case VS_OPCODE_PULL_CONSTANT_LOAD:
return 2;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index cce2b4d1f4c..6b03a1c3db5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -758,8 +758,12 @@ generate_tcs_urb_write(struct brw_codegen *p,
true /* header */, false /* eot */);
brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_WRITE_OWORD);
brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
- brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
- brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
+ if (inst->urb_write_flags & BRW_URB_WRITE_EOT) {
+ brw_inst_set_eot(devinfo, send, 1);
+ } else {
+ brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
+ brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
+ }
/* what happens to swizzles? */
}
@@ -969,6 +973,30 @@ generate_tcs_release_input(struct brw_codegen *p,
}
static void
+generate_tcs_thread_end(struct brw_codegen *p, vec4_instruction *inst)
+{
+ struct brw_reg header = brw_message_reg(inst->base_mrf);
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, header, brw_imm_ud(0));
+ brw_MOV(p, get_element_ud(header, 0),
+ retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ brw_pop_insn_state(p);
+
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ inst->base_mrf, /* starting mrf reg nr */
+ header,
+ BRW_URB_WRITE_EOT | inst->urb_write_flags,
+ inst->mlen,
+ 0, /* response len */
+ 0, /* urb destination offset */
+ 0);
+}
+
+static void
generate_tes_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
{
brw_push_insn_state(p);
@@ -1892,6 +1920,10 @@ generate_code(struct brw_codegen *p,
generate_tcs_release_input(p, dst, src[0], src[1]);
break;
+ case TCS_OPCODE_THREAD_END:
+ generate_tcs_thread_end(p, inst);
+ break;
+
case SHADER_OPCODE_BARRIER:
brw_barrier(p, src[0]);
brw_WAIT(p);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index 7693f095a52..fb6ca8ee5f9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -203,9 +203,9 @@ vec4_tcs_visitor::emit_thread_end()
if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
emit_shader_time_end();
- inst = emit(VS_OPCODE_URB_WRITE);
- inst->mlen = 1; /* just the header, no data. */
- inst->urb_write_flags = BRW_URB_WRITE_EOT_COMPLETE;
+ inst = emit(TCS_OPCODE_THREAD_END);
+ inst->base_mrf = 14;
+ inst->mlen = 1;
}