summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-09-21 16:06:17 +0200
committerEric Anholt <[email protected]>2012-10-08 08:50:38 -0700
commit6a514494fa4c45e921bd6af7f3187a67c1e8d9d2 (patch)
treec3d344de6a37dab2d8e74847d8d4629687c4b087 /src/mesa
parentfb5bf03a2092159166229eacf57c71587f762c57 (diff)
i965/fs: Improve performance of copy/constant propagation.
Use a simple chaining hash table for the ACP. This is not really very good, because we still do a full walk of the tree per destination write, but it still reduces fp-long-alu runtime from 5.3 to 3.9s. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp45
2 files changed, 30 insertions, 18 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 269dd0aceb9..2d923576543 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -246,8 +246,7 @@ public:
bool opt_copy_propagate();
bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
- bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block,
- exec_list *acp);
+ bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block);
bool register_coalesce();
bool register_coalesce_2();
bool compute_to_mrf();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 6eff80285d7..9319529f2d1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -195,38 +195,52 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
* list.
*/
bool
-fs_visitor::opt_copy_propagate_local(void *mem_ctx,
- fs_bblock *block, exec_list *acp)
+fs_visitor::opt_copy_propagate_local(void *mem_ctx, fs_bblock *block)
{
bool progress = false;
+ int acp_count = 16;
+ exec_list acp[acp_count];
for (fs_inst *inst = block->start;
inst != block->end->next;
inst = (fs_inst *)inst->next) {
/* Try propagating into this instruction. */
- foreach_list(entry_node, acp) {
- acp_entry *entry = (acp_entry *)entry_node;
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file != GRF)
+ continue;
- if (try_constant_propagate(inst, entry))
- progress = true;
+ foreach_list(entry_node, &acp[inst->src[i].reg % acp_count]) {
+ acp_entry *entry = (acp_entry *)entry_node;
- for (int i = 0; i < 3; i++) {
- if (try_copy_propagate(inst, i, entry))
- progress = true;
- }
+ if (try_constant_propagate(inst, entry))
+ progress = true;
+
+ if (try_copy_propagate(inst, i, entry))
+ progress = true;
+ }
}
/* kill the destination from the ACP */
if (inst->dst.file == GRF) {
- foreach_list_safe(entry_node, acp) {
+ foreach_list_safe(entry_node, &acp[inst->dst.reg % acp_count]) {
acp_entry *entry = (acp_entry *)entry_node;
- if (inst->overwrites_reg(entry->dst) ||
- inst->overwrites_reg(entry->src)) {
+ if (inst->overwrites_reg(entry->dst)) {
entry->remove();
}
}
+
+ /* Oops, we only have the chaining hash based on the destination, not
+ * the source, so walk across the entire table.
+ */
+ for (int i = 0; i < acp_count; i++) {
+ foreach_list_safe(entry_node, &acp[i]) {
+ acp_entry *entry = (acp_entry *)entry_node;
+ if (inst->overwrites_reg(entry->src))
+ entry->remove();
+ }
+ }
}
/* If this instruction is a raw copy, add it to the ACP. */
@@ -246,7 +260,7 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
acp_entry *entry = ralloc(mem_ctx, acp_entry);
entry->dst = inst->dst;
entry->src = inst->src[0];
- acp->push_tail(entry);
+ acp[entry->dst.reg % acp_count].push_tail(entry);
}
}
@@ -263,9 +277,8 @@ fs_visitor::opt_copy_propagate()
for (int b = 0; b < cfg.num_blocks; b++) {
fs_bblock *block = cfg.blocks[b];
- exec_list acp;
- progress = opt_copy_propagate_local(mem_ctx, block, &acp) || progress;
+ progress = opt_copy_propagate_local(mem_ctx, block) || progress;
}
ralloc_free(mem_ctx);