diff options
author | Francisco Jerez <[email protected]> | 2018-12-29 01:44:00 -0800 |
---|---|---|
committer | Francisco Jerez <[email protected]> | 2019-01-09 12:03:08 -0800 |
commit | c5f9c0009d5161e059e54a76fbdb910a6c151f9f (patch) | |
tree | 5e2e76ecf69efa9d610341692500efa855bb0a5d | |
parent | 0206ffc28db1c12a62c96384760c38f21ec1b420 (diff) |
intel/fs: Handle source modifiers in lower_integer_multiplication().
lower_integer_multiplication() implements 32x32-bit multiplication on
some platforms by bit-casting one of the 32-bit sources into two
16-bit unsigned integer portions. This can give incorrect results if
the original instruction specified a source modifier. Fix it by
emitting an additional MOV instruction implementing the source
modifiers where necessary.
Cc: [email protected]
Reviewed-by: Iago Toral Quiroga <[email protected]>
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 20 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.h | 19 |
2 files changed, 37 insertions, 2 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 590a9b32a8e..2f0f0151219 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -3862,6 +3862,9 @@ fs_visitor::lower_integer_multiplication() high.offset = inst->dst.offset % REG_SIZE; if (devinfo->gen >= 7) { + if (inst->src[1].abs) + lower_src_modifiers(this, block, inst, 1); + if (inst->src[1].file == IMM) { ibld.MUL(low, inst->src[0], brw_imm_uw(inst->src[1].ud & 0xffff)); @@ -3874,6 +3877,9 @@ fs_visitor::lower_integer_multiplication() subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1)); } } else { + if (inst->src[0].abs) + lower_src_modifiers(this, block, inst, 0); + ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0), inst->src[1]); ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1), @@ -3891,6 +3897,18 @@ fs_visitor::lower_integer_multiplication() } } else if (inst->opcode == SHADER_OPCODE_MULH) { + /* According to the BDW+ BSpec page for the "Multiply Accumulate + * High" instruction: + * + * "An added preliminary mov is required for source modification on + * src1: + * mov (8) r3.0<1>:d -r3<8;8,1>:d + * mul (8) acc0:d r2.0<8;8,1>:d r3.0<16;8,2>:uw + * mach (8) r5.0<1>:d r2.0<8;8,1>:d r3.0<8;8,1>:d" + */ + if (devinfo->gen >= 8 && (inst->src[1].negate || inst->src[1].abs)) + lower_src_modifiers(this, block, inst, 1); + /* Should have been lowered to 8-wide. */ assert(inst->exec_size <= get_lowered_simd_width(devinfo, inst)); const fs_reg acc = retype(brw_acc_reg(inst->exec_size), @@ -3906,8 +3924,6 @@ fs_visitor::lower_integer_multiplication() * On Gen8, the multiply instruction does a full 32x32-bit * multiply, but in order to do a 64-bit multiply we can simulate * the previous behavior and then use a MACH instruction. - * - * FINISHME: Don't use source modifiers on src1. */ assert(mul->src[1].type == BRW_REGISTER_TYPE_D || mul->src[1].type == BRW_REGISTER_TYPE_UD); diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 163c0008820..53d9b6ce7bf 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -531,6 +531,25 @@ namespace brw { return fs_reg(retype(brw_vec8_grf(regs[0], 0), type)); } } + + /** + * Remove any modifiers from the \p i-th source region of the instruction, + * including negate, abs and any implicit type conversion to the execution + * type. Instead any source modifiers will be implemented as a separate + * MOV instruction prior to the original instruction. + */ + inline bool + lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i) + { + assert(inst->components_read(i) == 1); + const fs_builder ibld(v, block, inst); + const fs_reg tmp = ibld.vgrf(get_exec_type(inst)); + + ibld.MOV(tmp, inst->src[i]); + inst->src[i] = tmp; + + return true; + } } void shuffle_from_32bit_read(const brw::fs_builder &bld, |