i965/vec4: Lower 64-bit MAD

The previous patch made sure that we do not generate MAD instructions for any NIR's 64-bit ffma, but there is nothing preventing i965 from producing MAD instructions as a result of lowerings or optimization passes. This patch makes sure that any 64-bit MAD produced inside the driver after translating from NIR is also converted to MUL+ADD before we generate code. v2: - Use a copy constructor to copy all relevant instruction fields from the original mad into the add and mul instructions v3: - Rename the lowering and fix commit log (Matt) Signed-off-by: Samuel Iglesias Gonsálvez <[email protected]> Reviewed-by: Matt Turner <[email protected]>
author: Iago Toral Quiroga <[email protected]> 2016-06-08 11:04:34 +0200
committer: Samuel Iglesias Gonsálvez <[email protected]> 2017-01-03 11:26:51 +0100
commit: b3a7d0ee9d5f792ab68fbe77da5e3ea85d4bc4c0 (patch)
tree: 32b5d859dec74f02153b1a72573d869bacc90398 /src
parent: 82e9dda8bf8875d232840585f48763c7a7092918 (diff)
2 files changed, 45 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 040ef0107e7..5ff0d53171a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -2259,6 +2259,49 @@ vec4_visitor::scalarize_df()
    return progress;
 }
 
+bool
+vec4_visitor::lower_64bit_mad_to_mul_add()
+{
+   bool progress = false;
+
+   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
+      if (inst->opcode != BRW_OPCODE_MAD)
+         continue;
+
+      if (type_sz(inst->dst.type) != 8)
+         continue;
+
+      dst_reg mul_dst = dst_reg(this, glsl_type::dvec4_type);
+
+      /* Use the copy constructor so we copy all relevant instruction fields
+       * from the original mad into the add and mul instructions
+       */
+      vec4_instruction *mul = new(mem_ctx) vec4_instruction(*inst);
+      mul->opcode = BRW_OPCODE_MUL;
+      mul->dst = mul_dst;
+      mul->src[0] = inst->src[1];
+      mul->src[1] = inst->src[2];
+      mul->src[2].file = BAD_FILE;
+
+      vec4_instruction *add = new(mem_ctx) vec4_instruction(*inst);
+      add->opcode = BRW_OPCODE_ADD;
+      add->src[0] = src_reg(mul_dst);
+      add->src[1] = inst->src[0];
+      add->src[2].file = BAD_FILE;
+
+      inst->insert_before(block, mul);
+      inst->insert_before(block, add);
+      inst->remove(block);
+
+      progress = true;
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
+
 /* The align16 hardware can only do 32-bit swizzle channels, so we need to
  * translate the logical 64-bit swizzle channels that we use in the Vec4 IR
  * to 32-bit swizzle channels in hardware registers.
@@ -2418,6 +2461,7 @@ vec4_visitor::run()
    if (failed)
       return false;
 
+   OPT(lower_64bit_mad_to_mul_add);
    OPT(scalarize_df);
 
    setup_payload();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 7b7d10c5c9c..c4a0004dbed 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -163,6 +163,7 @@ public:
 
    bool lower_simd_width();
    bool scalarize_df();
+   bool lower_64bit_mad_to_mul_add();
    void apply_logical_swizzle(struct brw_reg *hw_reg,
                               vec4_instruction *inst, int arg);
author	Iago Toral Quiroga <[email protected]>	2016-06-08 11:04:34 +0200
committer	Samuel Iglesias Gonsálvez <[email protected]>	2017-01-03 11:26:51 +0100
commit	b3a7d0ee9d5f792ab68fbe77da5e3ea85d4bc4c0 (patch)
tree	32b5d859dec74f02153b1a72573d869bacc90398 /src
parent	82e9dda8bf8875d232840585f48763c7a7092918 (diff)