diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 46 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 40 |
5 files changed, 93 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 61bcebdbc4b..d0137481c6c 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1313,6 +1313,8 @@ enum opcode { TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, TCS_OPCODE_GET_PRIMITIVE_ID, TCS_OPCODE_CREATE_BARRIER_HEADER, + TCS_OPCODE_SRC0_010_IS_ZERO, + TCS_OPCODE_RELEASE_INPUT, TES_OPCODE_GET_PRIMITIVE_ID, TES_OPCODE_CREATE_INPUT_READ_HEADER, diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 3a36678e8d5..f692bc2de35 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -568,6 +568,10 @@ brw_instruction_name(enum opcode op) return "tcs_get_primitive_id"; case TCS_OPCODE_CREATE_BARRIER_HEADER: return "tcs_create_barrier_header"; + case TCS_OPCODE_SRC0_010_IS_ZERO: + return "tcs_src0<0,1,0>_is_zero"; + case TCS_OPCODE_RELEASE_INPUT: + return "tcs_release_input"; case TES_OPCODE_CREATE_INPUT_READ_HEADER: return "tes_create_input_read_header"; case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: @@ -1009,6 +1013,7 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: case FS_OPCODE_FB_WRITE: case SHADER_OPCODE_BARRIER: + case TCS_OPCODE_RELEASE_INPUT: return true; default: return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 116dd353016..f1c3d37ce1c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -157,6 +157,7 @@ vec4_instruction::is_send_from_grf() case SHADER_OPCODE_TYPED_SURFACE_WRITE: case VEC4_OPCODE_URB_READ: case TCS_OPCODE_URB_WRITE: + case TCS_OPCODE_RELEASE_INPUT: case SHADER_OPCODE_BARRIER: return true; default: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index cbf8b1d0bd0..cce2b4d1f4c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -933,6 +933,42 @@ generate_vec4_urb_read(struct brw_codegen *p, } static void +generate_tcs_release_input(struct brw_codegen *p, + struct brw_reg header, + struct brw_reg vertex, + struct brw_reg is_unpaired) +{ + const struct brw_device_info *devinfo = p->devinfo; + + assert(vertex.file == BRW_IMMEDIATE_VALUE); + assert(vertex.type == BRW_REGISTER_TYPE_UD); + + /* m0.0-0.1: URB handles */ + struct brw_reg urb_handles = + retype(brw_vec2_grf(1 + (vertex.ud >> 3), vertex.ud & 7), + BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, header, brw_imm_ud(0)); + brw_MOV(p, vec2(get_element_ud(header, 0)), urb_handles); + brw_pop_insn_state(p); + + brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, brw_null_reg()); + brw_set_src0(p, send, header); + brw_set_message_descriptor(p, send, BRW_SFID_URB, + 1 /* mlen */, 0 /* rlen */, + true /* header */, false /* eot */); + brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_READ_OWORD); + brw_inst_set_urb_complete(devinfo, send, 1); + brw_inst_set_urb_swizzle_control(devinfo, send, is_unpaired.ud ? + BRW_URB_SWIZZLE_NONE : + BRW_URB_SWIZZLE_INTERLEAVE); +} + +static void generate_tes_get_primitive_id(struct brw_codegen *p, struct brw_reg dst) { brw_push_insn_state(p); @@ -1846,6 +1882,16 @@ generate_code(struct brw_codegen *p, generate_tes_get_primitive_id(p, dst); break; + case TCS_OPCODE_SRC0_010_IS_ZERO: + /* If src_reg had stride like fs_reg, we wouldn't need this. */ + brw_MOV(p, brw_null_reg(), stride(src[0], 0, 1, 0)); + brw_inst_set_cond_modifier(devinfo, brw_last_inst, BRW_CONDITIONAL_Z); + break; + + case TCS_OPCODE_RELEASE_INPUT: + generate_tcs_release_input(p, dst, src[0], src[1]); + break; + case SHADER_OPCODE_BARRIER: brw_barrier(p, src[0]); brw_WAIT(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index 507db749e63..7693f095a52 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -156,16 +156,54 @@ vec4_tcs_visitor::emit_prolog() void vec4_tcs_visitor::emit_thread_end() { + vec4_instruction *inst; current_annotation = "thread end"; if (nir->info.tcs.vertices_out % 2) { emit(BRW_OPCODE_ENDIF); } + if (devinfo->gen == 7) { + struct brw_tcs_prog_data *tcs_prog_data = + (struct brw_tcs_prog_data *) prog_data; + + current_annotation = "release input vertices"; + + /* Synchronize all threads, so we know that no one is still + * using the input URB handles. + */ + if (tcs_prog_data->instances > 1) { + dst_reg header = dst_reg(this, glsl_type::uvec4_type); + emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header); + emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header)); + } + + /* Make thread 0 (invocations <1, 0>) release pairs of ICP handles. + * We want to compare the bottom half of invocation_id with 0, but + * use that truth value for the top half as well. Unfortunately, + * we don't have stride in the vec4 world, nor UV immediates in + * align16, so we need an opcode to get invocation_id<0,4,0>. + */ + emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(), invocation_id); + emit(IF(BRW_PREDICATE_NORMAL)); + for (unsigned i = 0; i < key->input_vertices; i += 2) { + /* If we have an odd number of input vertices, the last will be + * unpaired. We don't want to use an interleaved URB write in + * that case. + */ + const bool is_unpaired = i == key->input_vertices - 1; + + dst_reg header(this, glsl_type::uvec4_type); + emit(TCS_OPCODE_RELEASE_INPUT, header, brw_imm_ud(i), + brw_imm_ud(is_unpaired)); + } + emit(BRW_OPCODE_ENDIF); + } + if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME)) emit_shader_time_end(); - vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); + inst = emit(VS_OPCODE_URB_WRITE); inst->mlen = 1; /* just the header, no data. */ inst->urb_write_flags = BRW_URB_WRITE_EOT_COMPLETE; } |