diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 52 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 38 |
6 files changed, 63 insertions, 35 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index da6ed5b973e..a97a9443f3d 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -948,6 +948,7 @@ enum opcode { VS_OPCODE_URB_WRITE, VS_OPCODE_PULL_CONSTANT_LOAD, VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, + VS_OPCODE_SET_SIMD4X2_HEADER_GEN9, VS_OPCODE_UNPACK_FLAGS_SIMD4X2, /** diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 335a8007e12..0d6ac0c9311 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -568,6 +568,10 @@ brw_instruction_name(enum opcode op) return "pull_constant_load"; case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: return "pull_constant_load_gen7"; + + case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: + return "set_simd4x2_header_gen9"; + case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: return "unpack_flags_simd4x2"; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 03639248864..a0ee2cc603b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -500,6 +500,8 @@ private: struct brw_reg dst, struct brw_reg surf_index, struct brw_reg offset); + void generate_set_simd4x2_header_gen9(vec4_instruction *inst, + struct brw_reg dst); void generate_unpack_flags(struct brw_reg dst); void generate_untyped_atomic(vec4_instruction *inst, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp index 980e266750a..70d2af5e911 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp @@ -44,6 +44,7 @@ can_do_writemask(const struct brw_context *brw, case SHADER_OPCODE_GEN4_SCRATCH_READ: case VS_OPCODE_PULL_CONSTANT_LOAD: case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: return false; default: /* The MATH instruction on Gen6 only executes in align1 mode, which does diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index e4addf7d2fd..b22a5556bfc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1039,38 +1039,18 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, { assert(surf_index.type == BRW_REGISTER_TYPE_UD); - struct brw_reg src = offset; - bool header_present = false; - int mlen = 1; - - if (brw->gen >= 9) { - /* Skylake requires a message header in order to use SIMD4x2 mode. */ - src = retype(brw_vec4_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD); - mlen = 2; - header_present = true; - - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, vec8(src), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - brw_set_default_access_mode(p, BRW_ALIGN_1); - - brw_MOV(p, get_element_ud(src, 2), - brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2)); - brw_pop_insn_state(p); - } - if (surf_index.file == BRW_IMMEDIATE_VALUE) { brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, src); + brw_set_src0(p, insn, offset); brw_set_sampler_message(p, insn, surf_index.dw1.ud, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ - mlen, - header_present, + inst->mlen, + inst->header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); @@ -1095,14 +1075,14 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, /* dst = send(offset, a0.0 | <descriptor>) */ brw_inst *insn = brw_send_indirect_message( - p, BRW_SFID_SAMPLER, dst, src, addr); + p, BRW_SFID_SAMPLER, dst, offset, addr); brw_set_sampler_message(p, insn, 0 /* surface */, 0 /* sampler */, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1 /* rlen */, - mlen /* mlen */, - header_present /* header */, + inst->mlen, + inst->header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); @@ -1113,6 +1093,22 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, } void +vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst, + struct brw_reg dst) +{ + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, vec8(dst), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, get_element_ud(dst, 2), + brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2)); + + brw_pop_insn_state(p); +} + +void vec4_generator::generate_untyped_atomic(vec4_instruction *inst, struct brw_reg dst, struct brw_reg atomic_op, @@ -1435,6 +1431,10 @@ vec4_generator::generate_code(const cfg_t *cfg) generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); break; + case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: + generate_set_simd4x2_header_gen9(inst, dst); + break; + case GS_OPCODE_URB_WRITE: generate_gs_urb_write(inst); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f7d542b3266..3d16caa74f0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1313,16 +1313,36 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, vec4_instruction *pull; - if (brw->gen >= 7) { - dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + if (brw->gen >= 9) { + /* Gen9+ needs a message header in order to use SIMD4x2 mode */ + src_reg header(this, glsl_type::uvec4_type, 2); - /* We have to use a message header on Skylake to get SIMD4x2 mode. - * Reserve space for the register. - */ - if (brw->gen >= 9) { - grf_offset.reg_offset++; - alloc.sizes[grf_offset.reg] = 2; - } + pull = new(mem_ctx) + vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9, + dst_reg(header)); + + if (before_inst) + emit_before(before_block, before_inst, pull); + else + emit(pull); + + dst_reg index_reg = retype(offset(dst_reg(header), 1), + offset_reg.type); + pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg); + + if (before_inst) + emit_before(before_block, before_inst, pull); + else + emit(pull); + + pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, + dst, + surf_index, + header); + pull->mlen = 2; + pull->header_present = true; + } else if (brw->gen >= 7) { + dst_reg grf_offset = dst_reg(this, glsl_type::int_type); grf_offset.type = offset_reg.type; |