From de6c0f848797d26d28223bcad25da03654461874 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 11 Sep 2018 16:49:51 -0700 Subject: intel/fs: Implement support for NIR opcodes for INTEL_shader_integer_functions2 v2: Remove smashing type to D for nir_op_irhadd. Caio noticed it was odd, and removing it fixes an assertion failure in the crucible func.shader.averageRounded.int64_t test (because the source should be W). v3: Emit BRW_OPCODE_MUL directly for nir_op_umul_32x16 and nir_op_imul_32x16. Suggested by Curro. v4: Smash types of MUL instruction generated for nir_op_umul_32x16 and nir_op_imul_32x16. With this change, I get the same assembly now as I did with v2. v5: Remove support for pre-Gen7. The integer multiply path was incorrect, and, since the extension isn't enabled pre-Gen7, there's no way to test it. Reviewed-by: Caio Marcelo de Oliveira Filho Part-of: --- src/intel/compiler/brw_fs_nir.cpp | 67 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 4cfac360874..fab27c10d8c 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1263,11 +1263,45 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, inst->saturate = instr->dest.saturate; break; + case nir_op_iadd_sat: case nir_op_uadd_sat: inst = bld.ADD(result, op[0], op[1]); inst->saturate = true; break; + case nir_op_isub_sat: + bld.emit(SHADER_OPCODE_ISUB_SAT, result, op[0], op[1]); + break; + + case nir_op_usub_sat: + bld.emit(SHADER_OPCODE_USUB_SAT, result, op[0], op[1]); + break; + + case nir_op_irhadd: + case nir_op_urhadd: + assert(nir_dest_bit_size(instr->dest.dest) < 64); + inst = bld.AVG(result, op[0], op[1]); + break; + + case nir_op_ihadd: + case nir_op_uhadd: { + assert(nir_dest_bit_size(instr->dest.dest) < 64); + fs_reg tmp = bld.vgrf(result.type); + + if (devinfo->gen >= 8) { + op[0] = resolve_source_modifiers(op[0]); + op[1] = resolve_source_modifiers(op[1]); + } + + /* AVG(x, y) - ((x ^ y) & 1) */ + bld.XOR(tmp, op[0], op[1]); + bld.AND(tmp, tmp, retype(brw_imm_ud(1), result.type)); + bld.AVG(result, op[0], op[1]); + inst = bld.ADD(result, result, tmp); + inst->src[1].negate = true; + break; + } + case nir_op_fmul: for (unsigned i = 0; i < 2; i++) { if (can_fuse_fmul_fsign(instr, i)) { @@ -1296,6 +1330,34 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, bld.MUL(result, op[0], op[1]); break; + case nir_op_imul_32x16: + case nir_op_umul_32x16: { + const bool ud = instr->op == nir_op_umul_32x16; + + assert(nir_dest_bit_size(instr->dest.dest) == 32); + + /* Before Gen7, the order of the 32-bit source and the 16-bit source was + * swapped. The extension isn't enabled on those platforms, so don't + * pretend to support the differences. + */ + assert(devinfo->gen >= 7); + + if (op[1].file == IMM) + op[1] = ud ? brw_imm_uw(op[1].ud) : brw_imm_w(op[1].d); + else { + const enum brw_reg_type word_type = + ud ? BRW_REGISTER_TYPE_UW : BRW_REGISTER_TYPE_W; + + op[1] = subscript(op[1], word_type, 0); + } + + const enum brw_reg_type dword_type = + ud ? BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_D; + + bld.MUL(result, retype(op[0], dword_type), op[1]); + break; + } + case nir_op_imul: assert(nir_dest_bit_size(instr->dest.dest) < 64); bld.MUL(result, op[0], op[1]); @@ -1746,6 +1808,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, break; } + case nir_op_uclz: + assert(nir_dest_bit_size(instr->dest.dest) == 32); + bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), op[0]); + break; + case nir_op_ifind_msb: { assert(nir_dest_bit_size(instr->dest.dest) < 64); -- cgit v1.2.3