diff options
author | Ian Romanick <[email protected]> | 2018-09-11 16:49:51 -0700 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-01-23 00:18:57 +0000 |
commit | de6c0f848797d26d28223bcad25da03654461874 (patch) | |
tree | 57802cd7c0e36551c4477bb84e3d11105a28b3e9 /src/intel/compiler | |
parent | 58907568ec526df87fa87177441743fa0d1d0a66 (diff) |
intel/fs: Implement support for NIR opcodes for INTEL_shader_integer_functions2
v2: Remove smashing type to D for nir_op_irhadd. Caio noticed it was
odd, and removing it fixes an assertion failure in the crucible
func.shader.averageRounded.int64_t test (because the source should be
W).
v3: Emit BRW_OPCODE_MUL directly for nir_op_umul_32x16 and
nir_op_imul_32x16. Suggested by Curro.
v4: Smash types of MUL instruction generated for nir_op_umul_32x16 and
nir_op_imul_32x16. With this change, I get the same assembly now as I
did with v2.
v5: Remove support for pre-Gen7. The integer multiply path was
incorrect, and, since the extension isn't enabled pre-Gen7, there's no
way to test it.
Reviewed-by: Caio Marcelo de Oliveira Filho <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/767>
Diffstat (limited to 'src/intel/compiler')
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 4cfac360874..fab27c10d8c 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1263,11 +1263,45 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, inst->saturate = instr->dest.saturate; break; + case nir_op_iadd_sat: case nir_op_uadd_sat: inst = bld.ADD(result, op[0], op[1]); inst->saturate = true; break; + case nir_op_isub_sat: + bld.emit(SHADER_OPCODE_ISUB_SAT, result, op[0], op[1]); + break; + + case nir_op_usub_sat: + bld.emit(SHADER_OPCODE_USUB_SAT, result, op[0], op[1]); + break; + + case nir_op_irhadd: + case nir_op_urhadd: + assert(nir_dest_bit_size(instr->dest.dest) < 64); + inst = bld.AVG(result, op[0], op[1]); + break; + + case nir_op_ihadd: + case nir_op_uhadd: { + assert(nir_dest_bit_size(instr->dest.dest) < 64); + fs_reg tmp = bld.vgrf(result.type); + + if (devinfo->gen >= 8) { + op[0] = resolve_source_modifiers(op[0]); + op[1] = resolve_source_modifiers(op[1]); + } + + /* AVG(x, y) - ((x ^ y) & 1) */ + bld.XOR(tmp, op[0], op[1]); + bld.AND(tmp, tmp, retype(brw_imm_ud(1), result.type)); + bld.AVG(result, op[0], op[1]); + inst = bld.ADD(result, result, tmp); + inst->src[1].negate = true; + break; + } + case nir_op_fmul: for (unsigned i = 0; i < 2; i++) { if (can_fuse_fmul_fsign(instr, i)) { @@ -1296,6 +1330,34 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, bld.MUL(result, op[0], op[1]); break; + case nir_op_imul_32x16: + case nir_op_umul_32x16: { + const bool ud = instr->op == nir_op_umul_32x16; + + assert(nir_dest_bit_size(instr->dest.dest) == 32); + + /* Before Gen7, the order of the 32-bit source and the 16-bit source was + * swapped. The extension isn't enabled on those platforms, so don't + * pretend to support the differences. + */ + assert(devinfo->gen >= 7); + + if (op[1].file == IMM) + op[1] = ud ? brw_imm_uw(op[1].ud) : brw_imm_w(op[1].d); + else { + const enum brw_reg_type word_type = + ud ? BRW_REGISTER_TYPE_UW : BRW_REGISTER_TYPE_W; + + op[1] = subscript(op[1], word_type, 0); + } + + const enum brw_reg_type dword_type = + ud ? BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_D; + + bld.MUL(result, retype(op[0], dword_type), op[1]); + break; + } + case nir_op_imul: assert(nir_dest_bit_size(instr->dest.dest) < 64); bld.MUL(result, op[0], op[1]); @@ -1746,6 +1808,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, break; } + case nir_op_uclz: + assert(nir_dest_bit_size(instr->dest.dest) == 32); + bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), op[0]); + break; + case nir_op_ifind_msb: { assert(nir_dest_bit_size(instr->dest.dest) < 64); |