summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2011-09-01 16:21:14 -0700
committerEric Anholt <[email protected]>2011-09-08 21:40:39 -0700
commitcc9eb936c220267b6130b705fc696d05906a31df (patch)
tree967b353f13242648f5a7c1d96d8073d7ac7d25cf
parent42ce13195b94d0d51ca8e7fa5eed07fde8f37988 (diff)
i965/vs: Add support for copy propagation of the UNIFORM and ATTR files.
Removes 2.0% of the instructions from 35.7% of vertex shaders in shader-db.
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp13
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp58
3 files changed, 72 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 656589dee3a..436de2fea8e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -32,6 +32,19 @@ extern "C" {
namespace brw {
bool
+vec4_instruction::is_math()
+{
+ return (opcode == SHADER_OPCODE_RCP ||
+ opcode == SHADER_OPCODE_RSQ ||
+ opcode == SHADER_OPCODE_SQRT ||
+ opcode == SHADER_OPCODE_EXP2 ||
+ opcode == SHADER_OPCODE_LOG2 ||
+ opcode == SHADER_OPCODE_SIN ||
+ opcode == SHADER_OPCODE_COS ||
+ opcode == SHADER_OPCODE_POW);
+}
+
+bool
src_reg::equals(src_reg *r)
{
return (file == r->file &&
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 545e8f15a8d..7739a151e49 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -275,6 +275,8 @@ public:
*/
ir_instruction *ir;
const char *annotation;
+
+ bool is_math();
};
class vec4_visitor : public ir_visitor
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 1e24e2e1ccc..4b33df1f105 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -153,6 +153,61 @@ try_constant_propagation(vec4_instruction *inst, int arg, src_reg *values[4])
return false;
}
+static bool
+try_copy_propagation(struct intel_context *intel,
+ vec4_instruction *inst, int arg, src_reg *values[4])
+{
+ /* For constant propagation, we only handle the same constant
+ * across all 4 channels. Some day, we should handle the 8-bit
+ * float vector format, which would let us constant propagate
+ * vectors better.
+ */
+ src_reg value = *values[0];
+ for (int i = 1; i < 4; i++) {
+ /* This is equals() except we don't care about the swizzle. */
+ if (value.file != values[i]->file ||
+ value.reg != values[i]->reg ||
+ value.reg_offset != values[i]->reg_offset ||
+ value.type != values[i]->type ||
+ value.negate != values[i]->negate ||
+ value.abs != values[i]->abs) {
+ return false;
+ }
+ }
+
+ /* Compute the swizzle of the original register by swizzling the
+ * component loaded from each value according to the swizzle of
+ * operand we're going to change.
+ */
+ int s[4];
+ for (int i = 0; i < 4; i++) {
+ s[i] = BRW_GET_SWZ(values[i]->swizzle,
+ BRW_GET_SWZ(inst->src[arg].swizzle, i));
+ }
+ value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
+
+ if (value.file != UNIFORM &&
+ value.file != ATTR)
+ return false;
+
+ if (inst->src[arg].abs) {
+ value.negate = false;
+ value.abs = true;
+ }
+ if (inst->src[arg].negate)
+ value.negate = true;
+
+ /* FINISHME: We can't copy-propagate things that aren't normal
+ * vec8s into gen6 math instructions, because of the weird src
+ * handling for those instructions. Just ignore them for now.
+ */
+ if (intel->gen >= 6 && inst->is_math())
+ return false;
+
+ inst->src[arg] = value;
+ return true;
+}
+
bool
vec4_visitor::opt_copy_propagation()
{
@@ -216,7 +271,8 @@ vec4_visitor::opt_copy_propagation()
if (c != 4)
continue;
- if (try_constant_propagation(inst, i, values))
+ if (try_constant_propagation(inst, i, values) ||
+ try_copy_propagation(intel, inst, i, values))
progress = true;
}