diff options
author | Eric Anholt <[email protected]> | 2012-09-21 13:11:54 +0200 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2012-10-08 08:50:38 -0700 |
commit | fb5bf03a2092159166229eacf57c71587f762c57 (patch) | |
tree | 4d20e67362642dc845518ee7a1c304a56271a235 /src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | |
parent | 098acf6c84333edbb7b1228545e4bdb2572ee0cd (diff) |
i965/fs: Move constant propagation to the same codebase as copy prop.
This means that we don't get constant prop across into the first block after a
BRW_OPCODE_IF or a BRW_OPCODE_DO, but we have hope for properly doing it
across control flow at some point. More importantly, with the next commit it
will help avoid O(n^2) with instruction count runtime for shaders that have
many constant moves.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 124 |
1 files changed, 123 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 1870f43feba..6eff80285d7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -34,6 +34,9 @@ struct acp_entry : public exec_node { bool fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) { + if (entry->src.file == IMM) + return false; + if (inst->src[arg].file != entry->dst.file || inst->src[arg].reg != entry->dst.reg || inst->src[arg].reg_offset != entry->dst.reg_offset) { @@ -64,6 +67,121 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) return true; } + +bool +fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) +{ + bool progress = false; + + if (entry->src.file != IMM) + return false; + + for (int i = 2; i >= 0; i--) { + if (inst->src[i].file != entry->dst.file || + inst->src[i].reg != entry->dst.reg || + inst->src[i].reg_offset != entry->dst.reg_offset) + continue; + + /* Don't bother with cases that should have been taken care of by the + * GLSL compiler's constant folding pass. + */ + if (inst->src[i].negate || inst->src[i].abs) + continue; + + switch (inst->opcode) { + case BRW_OPCODE_MOV: + inst->src[i] = entry->src; + progress = true; + break; + + case BRW_OPCODE_MUL: + case BRW_OPCODE_ADD: + if (i == 1) { + inst->src[i] = entry->src; + progress = true; + } else if (i == 0 && inst->src[1].file != IMM) { + /* Fit this constant in by commuting the operands. + * Exception: we can't do this for 32-bit integer MUL + * because it's asymmetric. + */ + if (inst->opcode == BRW_OPCODE_MUL && + (inst->src[1].type == BRW_REGISTER_TYPE_D || + inst->src[1].type == BRW_REGISTER_TYPE_UD)) + break; + inst->src[0] = inst->src[1]; + inst->src[1] = entry->src; + progress = true; + } + break; + + case BRW_OPCODE_CMP: + case BRW_OPCODE_IF: + if (i == 1) { + inst->src[i] = entry->src; + progress = true; + } else if (i == 0 && inst->src[1].file != IMM) { + uint32_t new_cmod; + + new_cmod = brw_swap_cmod(inst->conditional_mod); + if (new_cmod != ~0u) { + /* Fit this constant in by swapping the operands and + * flipping the test + */ + inst->src[0] = inst->src[1]; + inst->src[1] = entry->src; + inst->conditional_mod = new_cmod; + progress = true; + } + } + break; + + case BRW_OPCODE_SEL: + if (i == 1) { + inst->src[i] = entry->src; + progress = true; + } else if (i == 0 && inst->src[1].file != IMM) { + inst->src[0] = inst->src[1]; + inst->src[1] = entry->src; + + /* If this was predicated, flipping operands means + * we also need to flip the predicate. + */ + if (inst->conditional_mod == BRW_CONDITIONAL_NONE) { + inst->predicate_inverse = + !inst->predicate_inverse; + } + progress = true; + } + break; + + case SHADER_OPCODE_RCP: + /* The hardware doesn't do math on immediate values + * (because why are you doing that, seriously?), but + * the correct answer is to just constant fold it + * anyway. + */ + assert(i == 0); + if (inst->src[0].imm.f != 0.0f) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[0] = entry->src; + inst->src[0].imm.f = 1.0f / inst->src[0].imm.f; + progress = true; + } + break; + + case FS_OPCODE_PULL_CONSTANT_LOAD: + inst->src[i] = entry->src; + progress = true; + break; + + default: + break; + } + } + + return progress; +} + /** @file brw_fs_copy_propagation.cpp * * Support for local copy propagation by walking the list of instructions @@ -90,6 +208,9 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx, foreach_list(entry_node, acp) { acp_entry *entry = (acp_entry *)entry_node; + if (try_constant_propagate(inst, entry)) + progress = true; + for (int i = 0; i < 3; i++) { if (try_copy_propagate(inst, i, entry)) progress = true; @@ -114,7 +235,8 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx, ((inst->src[0].file == GRF && (inst->src[0].reg != inst->dst.reg || inst->src[0].reg_offset != inst->dst.reg_offset)) || - inst->src[0].file == UNIFORM) && + inst->src[0].file == UNIFORM || + inst->src[0].file == IMM) && inst->src[0].type == inst->dst.type && !inst->saturate && !inst->predicated && |