diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 66 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 36 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 31 |
4 files changed, 70 insertions, 64 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b63ca23e3d8..cb13fcb1cc8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3523,6 +3523,71 @@ fs_visitor::lower_load_payload() return progress; } +bool +fs_visitor::lower_integer_multiplication() +{ + bool progress = false; + + /* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit operation + * directly, but Cherryview cannot. + */ + if (devinfo->gen >= 8 && !devinfo->is_cherryview) + return false; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + if (inst->opcode != BRW_OPCODE_MUL || + inst->dst.is_accumulator() || + (inst->dst.type != BRW_REGISTER_TYPE_D && + inst->dst.type != BRW_REGISTER_TYPE_UD)) + continue; + +#define insert(instr) inst->insert_before(block, instr) + + /* The MUL instruction isn't commutative. On Gen <= 6, only the low + * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of + * src1 are used. + * + * If multiplying by an immediate value that fits in 16-bits, do a + * single MUL instruction with that value in the proper location. + */ + if (inst->src[1].file == IMM && + inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) { + if (devinfo->gen < 7) { + fs_reg imm(GRF, alloc.allocate(dispatch_width / 8), + inst->dst.type, dispatch_width); + insert(MOV(imm, inst->src[1])); + insert(MUL(inst->dst, imm, inst->src[0])); + } else { + insert(MUL(inst->dst, inst->src[0], inst->src[1])); + } + } else { + if (devinfo->gen >= 7) + no16("SIMD16 integer multiply unsupported\n"); + + const unsigned channels = dispatch_width; + const enum brw_reg_type type = inst->dst.type; + const fs_reg acc(retype(brw_acc_reg(channels), type)); + const fs_reg null(retype(brw_null_vec(channels), type)); + + const fs_reg &src0 = inst->src[0]; + const fs_reg &src1 = inst->src[1]; + + insert(MUL(acc, src0, src1)); + insert(MACH(null, src0, src1)); + insert(MOV(inst->dst, acc)); + } +#undef insert + + inst->remove(block); + progress = true; + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + void fs_visitor::dump_instructions() { @@ -4001,6 +4066,7 @@ fs_visitor::optimize() } OPT(opt_combine_constants); + OPT(lower_integer_multiplication); lower_uniform_pull_constant_loads(); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 991cff96325..f2aa0ae9576 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -241,6 +241,7 @@ public: void no16(const char *msg, ...); void lower_uniform_pull_constant_loads(); bool lower_load_payload(); + bool lower_integer_multiplication(); bool opt_combine_constants(); void emit_dummy_fs(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 9cfd0e792a2..5dd8363b91e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -780,41 +780,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; - case nir_op_imul: { - if (devinfo->gen >= 8) { - emit(MUL(result, op[0], op[1])); - break; - } else { - nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src); - nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src); - - if (value0 && value0->u[0] < (1 << 16)) { - if (devinfo->gen < 7) { - emit(MUL(result, op[0], op[1])); - } else { - emit(MUL(result, op[1], op[0])); - } - break; - } else if (value1 && value1->u[0] < (1 << 16)) { - if (devinfo->gen < 7) { - emit(MUL(result, op[1], op[0])); - } else { - emit(MUL(result, op[0], op[1])); - } - break; - } - } - - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(reg_null_d, op[0], op[1])); - emit(MOV(result, fs_reg(acc))); + case nir_op_imul: + emit(MUL(result, op[0], op[1])); break; - } case nir_op_imul_high: case nir_op_umul_high: { diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index abaea5f4e13..ead77686640 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -873,36 +873,7 @@ fs_visitor::visit(ir_expression *ir) unreachable("not reached: should be handled by ir_sub_to_add_neg"); case ir_binop_mul: - if (devinfo->gen < 8 && ir->type->is_integer()) { - /* For integer multiplication, the MUL uses the low 16 bits - * of one of the operands (src0 on gen6, src1 on gen7). The - * MACH accumulates in the contribution of the upper 16 bits - * of that operand. - */ - if (ir->operands[0]->is_uint16_constant()) { - if (devinfo->gen < 7) - emit(MUL(this->result, op[0], op[1])); - else - emit(MUL(this->result, op[1], op[0])); - } else if (ir->operands[1]->is_uint16_constant()) { - if (devinfo->gen < 7) - emit(MUL(this->result, op[1], op[0])); - else - emit(MUL(this->result, op[0], op[1])); - } else { - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - this->result.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(reg_null_d, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); - } - } else { - emit(MUL(this->result, op[0], op[1])); - } + emit(MUL(this->result, op[0], op[1])); break; case ir_binop_imul_high: { if (devinfo->gen >= 7) |