diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 75 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 |
6 files changed, 98 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 634885b0d47..5072e680db1 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -918,6 +918,12 @@ enum opcode { SHADER_OPCODE_URB_WRITE_SIMD8, + /** + * Pick the channel from its first source register given by the index + * specified as second source. Useful for variable indexing of surfaces. + */ + SHADER_OPCODE_BROADCAST, + VEC4_OPCODE_MOV_BYTES, VEC4_OPCODE_PACK_BYTES, VEC4_OPCODE_UNPACK_UNIFORM, diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 84c1e57b536..a0c938a142b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -461,6 +461,12 @@ brw_pixel_interpolator_query(struct brw_codegen *p, unsigned msg_length, unsigned response_length); +void +brw_broadcast(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg idx); + /*********************************************************************** * brw_eu_util.c: */ diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 30396623612..73bed49ecee 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -3212,6 +3212,81 @@ brw_pixel_interpolator_query(struct brw_codegen *p, brw_inst_set_pi_message_data(devinfo, insn, data); } +void +brw_broadcast(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg idx) +{ + const struct brw_device_info *devinfo = p->devinfo; + const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1; + brw_inst *inst; + + assert(src.file == BRW_GENERAL_REGISTER_FILE && + src.address_mode == BRW_ADDRESS_DIRECT); + + if ((src.vstride == 0 && (src.hstride == 0 || !align1)) || + idx.file == BRW_IMMEDIATE_VALUE) { + /* Trivial, the source is already uniform or the index is a constant. + * We will typically not get here if the optimizer is doing its job, but + * asserting would be mean. + */ + const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.dw1.ud : 0; + brw_MOV(p, dst, + (align1 ? stride(suboffset(src, i), 0, 1, 0) : + stride(suboffset(src, 4 * i), 0, 4, 1))); + } else { + if (align1) { + const struct brw_reg addr = + retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); + const unsigned offset = src.nr * REG_SIZE + src.subnr; + /* Limit in bytes of the signed indirect addressing immediate. */ + const unsigned limit = 512; + + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + + /* Take into account the component size and horizontal stride. */ + assert(src.vstride == src.hstride + src.width); + brw_SHL(p, addr, vec1(idx), + brw_imm_ud(_mesa_logbase2(type_sz(src.type)) + + src.hstride - 1)); + + /* We can only address up to limit bytes using the indirect + * addressing immediate, account for the difference if the source + * register is above this limit. + */ + if (offset >= limit) + brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit)); + + brw_pop_insn_state(p); + + /* Use indirect addressing to fetch the specified component. */ + brw_MOV(p, dst, + retype(brw_vec1_indirect(addr.subnr, offset % limit), + src.type)); + } else { + /* In SIMD4x2 mode the index can be either zero or one, replicate it + * to all bits of a flag register, + */ + inst = brw_MOV(p, + brw_null_reg(), + stride(brw_swizzle1(idx, 0), 0, 4, 1)); + brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE); + brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ); + brw_inst_set_flag_reg_nr(devinfo, inst, 1); + + /* and use predicated SEL to pick the right channel. */ + inst = brw_SEL(p, dst, + stride(suboffset(src, 4), 0, 4, 1), + stride(src, 0, 4, 1)); + brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL); + brw_inst_set_flag_reg_nr(devinfo, inst, 1); + } + } +} + /** * This instruction is generated as a single-channel align1 instruction by * both the VS and FS stages when using INTEL_DEBUG=shader_time. diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index b81978c8775..2c6a12e4bec 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -2061,6 +2061,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_set_simd4x2_offset(inst, dst, src[0]); break; + case SHADER_OPCODE_BROADCAST: + brw_broadcast(p, dst, src[0], src[1]); + break; + case FS_OPCODE_SET_OMASK: generate_set_omask(inst, dst, src[0]); break; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 20588e9a0fe..1944c2648c8 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -517,6 +517,9 @@ brw_instruction_name(enum opcode op) case SHADER_OPCODE_URB_WRITE_SIMD8: return "gen8_urb_write_simd8"; + case SHADER_OPCODE_BROADCAST: + return "broadcast"; + case VEC4_OPCODE_MOV_BYTES: return "mov_bytes"; case VEC4_OPCODE_PACK_BYTES: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index c15fa165ec8..3dc808c85ee 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1512,6 +1512,10 @@ vec4_generator::generate_code(const cfg_t *cfg) brw_memory_fence(p, dst); break; + case SHADER_OPCODE_BROADCAST: + brw_broadcast(p, dst, src[0], src[1]); + break; + case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: generate_unpack_flags(dst); break; |