diff options
author | Iago Toral Quiroga <[email protected]> | 2016-06-08 11:04:34 +0200 |
---|---|---|
committer | Samuel Iglesias Gonsálvez <[email protected]> | 2017-01-03 11:26:51 +0100 |
commit | b3a7d0ee9d5f792ab68fbe77da5e3ea85d4bc4c0 (patch) | |
tree | 32b5d859dec74f02153b1a72573d869bacc90398 | |
parent | 82e9dda8bf8875d232840585f48763c7a7092918 (diff) |
i965/vec4: Lower 64-bit MAD
The previous patch made sure that we do not generate MAD instructions
for any NIR's 64-bit ffma, but there is nothing preventing i965 from
producing MAD instructions as a result of lowerings or optimization
passes. This patch makes sure that any 64-bit MAD produced inside the
driver after translating from NIR is also converted to MUL+ADD before
we generate code.
v2:
- Use a copy constructor to copy all relevant instruction fields from
the original mad into the add and mul instructions
v3:
- Rename the lowering and fix commit log (Matt)
Signed-off-by: Samuel Iglesias Gonsálvez <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 44 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 1 |
2 files changed, 45 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 040ef0107e7..5ff0d53171a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -2259,6 +2259,49 @@ vec4_visitor::scalarize_df() return progress; } +bool +vec4_visitor::lower_64bit_mad_to_mul_add() +{ + bool progress = false; + + foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { + if (inst->opcode != BRW_OPCODE_MAD) + continue; + + if (type_sz(inst->dst.type) != 8) + continue; + + dst_reg mul_dst = dst_reg(this, glsl_type::dvec4_type); + + /* Use the copy constructor so we copy all relevant instruction fields + * from the original mad into the add and mul instructions + */ + vec4_instruction *mul = new(mem_ctx) vec4_instruction(*inst); + mul->opcode = BRW_OPCODE_MUL; + mul->dst = mul_dst; + mul->src[0] = inst->src[1]; + mul->src[1] = inst->src[2]; + mul->src[2].file = BAD_FILE; + + vec4_instruction *add = new(mem_ctx) vec4_instruction(*inst); + add->opcode = BRW_OPCODE_ADD; + add->src[0] = src_reg(mul_dst); + add->src[1] = inst->src[0]; + add->src[2].file = BAD_FILE; + + inst->insert_before(block, mul); + inst->insert_before(block, add); + inst->remove(block); + + progress = true; + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + /* The align16 hardware can only do 32-bit swizzle channels, so we need to * translate the logical 64-bit swizzle channels that we use in the Vec4 IR * to 32-bit swizzle channels in hardware registers. @@ -2418,6 +2461,7 @@ vec4_visitor::run() if (failed) return false; + OPT(lower_64bit_mad_to_mul_add); OPT(scalarize_df); setup_payload(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 7b7d10c5c9c..c4a0004dbed 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -163,6 +163,7 @@ public: bool lower_simd_width(); bool scalarize_df(); + bool lower_64bit_mad_to_mul_add(); void apply_logical_swizzle(struct brw_reg *hw_reg, vec4_instruction *inst, int arg); |