aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-09-21 13:11:54 +0200
committerEric Anholt <[email protected]>2012-10-08 08:50:38 -0700
commitfb5bf03a2092159166229eacf57c71587f762c57 (patch)
tree4d20e67362642dc845518ee7a1c304a56271a235 /src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
parent098acf6c84333edbb7b1228545e4bdb2572ee0cd (diff)
i965/fs: Move constant propagation to the same codebase as copy prop.
This means that we don't get constant prop across into the first block after a BRW_OPCODE_IF or a BRW_OPCODE_DO, but we have hope for properly doing it across control flow at some point. More importantly, with the next commit it will help avoid O(n^2) with instruction count runtime for shaders that have many constant moves. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp124
1 files changed, 123 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 1870f43feba..6eff80285d7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -34,6 +34,9 @@ struct acp_entry : public exec_node {
bool
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
{
+ if (entry->src.file == IMM)
+ return false;
+
if (inst->src[arg].file != entry->dst.file ||
inst->src[arg].reg != entry->dst.reg ||
inst->src[arg].reg_offset != entry->dst.reg_offset) {
@@ -64,6 +67,121 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
return true;
}
+
+bool
+fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
+{
+ bool progress = false;
+
+ if (entry->src.file != IMM)
+ return false;
+
+ for (int i = 2; i >= 0; i--) {
+ if (inst->src[i].file != entry->dst.file ||
+ inst->src[i].reg != entry->dst.reg ||
+ inst->src[i].reg_offset != entry->dst.reg_offset)
+ continue;
+
+ /* Don't bother with cases that should have been taken care of by the
+ * GLSL compiler's constant folding pass.
+ */
+ if (inst->src[i].negate || inst->src[i].abs)
+ continue;
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MOV:
+ inst->src[i] = entry->src;
+ progress = true;
+ break;
+
+ case BRW_OPCODE_MUL:
+ case BRW_OPCODE_ADD:
+ if (i == 1) {
+ inst->src[i] = entry->src;
+ progress = true;
+ } else if (i == 0 && inst->src[1].file != IMM) {
+ /* Fit this constant in by commuting the operands.
+ * Exception: we can't do this for 32-bit integer MUL
+ * because it's asymmetric.
+ */
+ if (inst->opcode == BRW_OPCODE_MUL &&
+ (inst->src[1].type == BRW_REGISTER_TYPE_D ||
+ inst->src[1].type == BRW_REGISTER_TYPE_UD))
+ break;
+ inst->src[0] = inst->src[1];
+ inst->src[1] = entry->src;
+ progress = true;
+ }
+ break;
+
+ case BRW_OPCODE_CMP:
+ case BRW_OPCODE_IF:
+ if (i == 1) {
+ inst->src[i] = entry->src;
+ progress = true;
+ } else if (i == 0 && inst->src[1].file != IMM) {
+ uint32_t new_cmod;
+
+ new_cmod = brw_swap_cmod(inst->conditional_mod);
+ if (new_cmod != ~0u) {
+ /* Fit this constant in by swapping the operands and
+ * flipping the test
+ */
+ inst->src[0] = inst->src[1];
+ inst->src[1] = entry->src;
+ inst->conditional_mod = new_cmod;
+ progress = true;
+ }
+ }
+ break;
+
+ case BRW_OPCODE_SEL:
+ if (i == 1) {
+ inst->src[i] = entry->src;
+ progress = true;
+ } else if (i == 0 && inst->src[1].file != IMM) {
+ inst->src[0] = inst->src[1];
+ inst->src[1] = entry->src;
+
+ /* If this was predicated, flipping operands means
+ * we also need to flip the predicate.
+ */
+ if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
+ inst->predicate_inverse =
+ !inst->predicate_inverse;
+ }
+ progress = true;
+ }
+ break;
+
+ case SHADER_OPCODE_RCP:
+ /* The hardware doesn't do math on immediate values
+ * (because why are you doing that, seriously?), but
+ * the correct answer is to just constant fold it
+ * anyway.
+ */
+ assert(i == 0);
+ if (inst->src[0].imm.f != 0.0f) {
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->src[0] = entry->src;
+ inst->src[0].imm.f = 1.0f / inst->src[0].imm.f;
+ progress = true;
+ }
+ break;
+
+ case FS_OPCODE_PULL_CONSTANT_LOAD:
+ inst->src[i] = entry->src;
+ progress = true;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return progress;
+}
+
/** @file brw_fs_copy_propagation.cpp
*
* Support for local copy propagation by walking the list of instructions
@@ -90,6 +208,9 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
foreach_list(entry_node, acp) {
acp_entry *entry = (acp_entry *)entry_node;
+ if (try_constant_propagate(inst, entry))
+ progress = true;
+
for (int i = 0; i < 3; i++) {
if (try_copy_propagate(inst, i, entry))
progress = true;
@@ -114,7 +235,8 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
((inst->src[0].file == GRF &&
(inst->src[0].reg != inst->dst.reg ||
inst->src[0].reg_offset != inst->dst.reg_offset)) ||
- inst->src[0].file == UNIFORM) &&
+ inst->src[0].file == UNIFORM ||
+ inst->src[0].file == IMM) &&
inst->src[0].type == inst->dst.type &&
!inst->saturate &&
!inst->predicated &&