aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp46
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp40
5 files changed, 93 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 61bcebdbc4b..d0137481c6c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1313,6 +1313,8 @@ enum opcode {
TCS_OPCODE_SET_OUTPUT_URB_OFFSETS,
TCS_OPCODE_GET_PRIMITIVE_ID,
TCS_OPCODE_CREATE_BARRIER_HEADER,
+ TCS_OPCODE_SRC0_010_IS_ZERO,
+ TCS_OPCODE_RELEASE_INPUT,
TES_OPCODE_GET_PRIMITIVE_ID,
TES_OPCODE_CREATE_INPUT_READ_HEADER,
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 3a36678e8d5..f692bc2de35 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -568,6 +568,10 @@ brw_instruction_name(enum opcode op)
return "tcs_get_primitive_id";
case TCS_OPCODE_CREATE_BARRIER_HEADER:
return "tcs_create_barrier_header";
+ case TCS_OPCODE_SRC0_010_IS_ZERO:
+ return "tcs_src0<0,1,0>_is_zero";
+ case TCS_OPCODE_RELEASE_INPUT:
+ return "tcs_release_input";
case TES_OPCODE_CREATE_INPUT_READ_HEADER:
return "tes_create_input_read_header";
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
@@ -1009,6 +1013,7 @@ backend_instruction::has_side_effects() const
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case FS_OPCODE_FB_WRITE:
case SHADER_OPCODE_BARRIER:
+ case TCS_OPCODE_RELEASE_INPUT:
return true;
default:
return false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 116dd353016..f1c3d37ce1c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -157,6 +157,7 @@ vec4_instruction::is_send_from_grf()
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case VEC4_OPCODE_URB_READ:
case TCS_OPCODE_URB_WRITE:
+ case TCS_OPCODE_RELEASE_INPUT:
case SHADER_OPCODE_BARRIER:
return true;
default:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index cbf8b1d0bd0..cce2b4d1f4c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -933,6 +933,42 @@ generate_vec4_urb_read(struct brw_codegen *p,
}
static void
+generate_tcs_release_input(struct brw_codegen *p,
+ struct brw_reg header,
+ struct brw_reg vertex,
+ struct brw_reg is_unpaired)
+{
+ const struct brw_device_info *devinfo = p->devinfo;
+
+ assert(vertex.file == BRW_IMMEDIATE_VALUE);
+ assert(vertex.type == BRW_REGISTER_TYPE_UD);
+
+ /* m0.0-0.1: URB handles */
+ struct brw_reg urb_handles =
+ retype(brw_vec2_grf(1 + (vertex.ud >> 3), vertex.ud & 7),
+ BRW_REGISTER_TYPE_UD);
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, header, brw_imm_ud(0));
+ brw_MOV(p, vec2(get_element_ud(header, 0)), urb_handles);
+ brw_pop_insn_state(p);
+
+ brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, send, brw_null_reg());
+ brw_set_src0(p, send, header);
+ brw_set_message_descriptor(p, send, BRW_SFID_URB,
+ 1 /* mlen */, 0 /* rlen */,
+ true /* header */, false /* eot */);
+ brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_READ_OWORD);
+ brw_inst_set_urb_complete(devinfo, send, 1);
+ brw_inst_set_urb_swizzle_control(devinfo, send, is_unpaired.ud ?
+ BRW_URB_SWIZZLE_NONE :
+ BRW_URB_SWIZZLE_INTERLEAVE);
+}
+
+static void
generate_tes_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
{
brw_push_insn_state(p);
@@ -1846,6 +1882,16 @@ generate_code(struct brw_codegen *p,
generate_tes_get_primitive_id(p, dst);
break;
+ case TCS_OPCODE_SRC0_010_IS_ZERO:
+ /* If src_reg had stride like fs_reg, we wouldn't need this. */
+ brw_MOV(p, brw_null_reg(), stride(src[0], 0, 1, 0));
+ brw_inst_set_cond_modifier(devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
+ break;
+
+ case TCS_OPCODE_RELEASE_INPUT:
+ generate_tcs_release_input(p, dst, src[0], src[1]);
+ break;
+
case SHADER_OPCODE_BARRIER:
brw_barrier(p, src[0]);
brw_WAIT(p);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index 507db749e63..7693f095a52 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -156,16 +156,54 @@ vec4_tcs_visitor::emit_prolog()
void
vec4_tcs_visitor::emit_thread_end()
{
+ vec4_instruction *inst;
current_annotation = "thread end";
if (nir->info.tcs.vertices_out % 2) {
emit(BRW_OPCODE_ENDIF);
}
+ if (devinfo->gen == 7) {
+ struct brw_tcs_prog_data *tcs_prog_data =
+ (struct brw_tcs_prog_data *) prog_data;
+
+ current_annotation = "release input vertices";
+
+ /* Synchronize all threads, so we know that no one is still
+ * using the input URB handles.
+ */
+ if (tcs_prog_data->instances > 1) {
+ dst_reg header = dst_reg(this, glsl_type::uvec4_type);
+ emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
+ emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
+ }
+
+ /* Make thread 0 (invocations <1, 0>) release pairs of ICP handles.
+ * We want to compare the bottom half of invocation_id with 0, but
+ * use that truth value for the top half as well. Unfortunately,
+ * we don't have stride in the vec4 world, nor UV immediates in
+ * align16, so we need an opcode to get invocation_id<0,4,0>.
+ */
+ emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(), invocation_id);
+ emit(IF(BRW_PREDICATE_NORMAL));
+ for (unsigned i = 0; i < key->input_vertices; i += 2) {
+ /* If we have an odd number of input vertices, the last will be
+ * unpaired. We don't want to use an interleaved URB write in
+ * that case.
+ */
+ const bool is_unpaired = i == key->input_vertices - 1;
+
+ dst_reg header(this, glsl_type::uvec4_type);
+ emit(TCS_OPCODE_RELEASE_INPUT, header, brw_imm_ud(i),
+ brw_imm_ud(is_unpaired));
+ }
+ emit(BRW_OPCODE_ENDIF);
+ }
+
if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
emit_shader_time_end();
- vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+ inst = emit(VS_OPCODE_URB_WRITE);
inst->mlen = 1; /* just the header, no data. */
inst->urb_write_flags = BRW_URB_WRITE_EOT_COMPLETE;
}