summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIago Toral Quiroga <[email protected]>2016-04-19 12:59:47 +0200
committerSamuel Iglesias Gonsálvez <[email protected]>2016-05-10 11:25:06 +0200
commitba1907f040e9d61be932a8e098061d94d4ba30cb (patch)
tree2758bf763119623de3ba192baaf15b9cdaaa93cd
parent7782f39e759798975ace6f3272dd3f263ddc8702 (diff)
i965/fs: optimize pack double
When we are actually creating a double using values obtained from a previous unpack operation we can bypass the unpack and source from the original double value directly. v2: - Style changes (Topi) - Bail is parent instruction's src is not SSA (Connor) v3: Use subscript() instead of stride() (Curro) Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp29
1 files changed, 29 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 5b5a7667bbb..afa3308135e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1099,6 +1099,35 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
break;
case nir_op_pack_double_2x32_split:
+ /* Optimize the common case where we are re-packing a double with
+ * the result of a previous double unpack. In this case we can take the
+ * 32-bit value to use in the re-pack from the original double and bypass
+ * the unpack operation.
+ */
+ for (int i = 0; i < 2; i++) {
+ if (instr->src[i].src.is_ssa)
+ continue;
+
+ const nir_instr *parent_instr = instr->src[i].src.ssa->parent_instr;
+ if (parent_instr->type == nir_instr_type_alu)
+ continue;
+
+ const nir_alu_instr *alu_parent = nir_instr_as_alu(parent_instr);
+ if (alu_parent->op == nir_op_unpack_double_2x32_split_x ||
+ alu_parent->op == nir_op_unpack_double_2x32_split_y)
+ continue;
+
+ if (!alu_parent->src[0].src.is_ssa)
+ continue;
+
+ op[i] = get_nir_src(alu_parent->src[0].src);
+ op[i] = offset(retype(op[i], BRW_REGISTER_TYPE_DF), bld,
+ alu_parent->src[0].swizzle[channel]);
+ if (alu_parent->op == nir_op_unpack_double_2x32_split_y)
+ op[i] = subscript(op[i], BRW_REGISTER_TYPE_UD, 1);
+ else
+ op[i] = subscript(op[i], BRW_REGISTER_TYPE_UD, 0);
+ }
bld.emit(FS_OPCODE_PACK, result, op[0], op[1]);
break;