diff options
author | Eric Anholt <[email protected]> | 2012-09-21 16:06:17 +0200 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2012-10-08 08:50:38 -0700 |
commit | 6a514494fa4c45e921bd6af7f3187a67c1e8d9d2 (patch) | |
tree | c3d344de6a37dab2d8e74847d8d4629687c4b087 /src/mesa | |
parent | fb5bf03a2092159166229eacf57c71587f762c57 (diff) |
i965/fs: Improve performance of copy/constant propagation.
Use a simple chaining hash table for the ACP. This is not really very good,
because we still do a full walk of the tree per destination write, but it
still reduces fp-long-alu runtime from 5.3 to 3.9s.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 45 |
2 files changed, 30 insertions, 18 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 269dd0aceb9..2d923576543 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -246,8 +246,7 @@ public: bool opt_copy_propagate(); bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry); bool try_constant_propagate(fs_inst *inst, acp_entry *entry); - bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block, - exec_list *acp); + bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block); bool register_coalesce(); bool register_coalesce_2(); bool compute_to_mrf(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 6eff80285d7..9319529f2d1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -195,38 +195,52 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) * list. */ bool -fs_visitor::opt_copy_propagate_local(void *mem_ctx, - fs_bblock *block, exec_list *acp) +fs_visitor::opt_copy_propagate_local(void *mem_ctx, fs_bblock *block) { bool progress = false; + int acp_count = 16; + exec_list acp[acp_count]; for (fs_inst *inst = block->start; inst != block->end->next; inst = (fs_inst *)inst->next) { /* Try propagating into this instruction. */ - foreach_list(entry_node, acp) { - acp_entry *entry = (acp_entry *)entry_node; + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != GRF) + continue; - if (try_constant_propagate(inst, entry)) - progress = true; + foreach_list(entry_node, &acp[inst->src[i].reg % acp_count]) { + acp_entry *entry = (acp_entry *)entry_node; - for (int i = 0; i < 3; i++) { - if (try_copy_propagate(inst, i, entry)) - progress = true; - } + if (try_constant_propagate(inst, entry)) + progress = true; + + if (try_copy_propagate(inst, i, entry)) + progress = true; + } } /* kill the destination from the ACP */ if (inst->dst.file == GRF) { - foreach_list_safe(entry_node, acp) { + foreach_list_safe(entry_node, &acp[inst->dst.reg % acp_count]) { acp_entry *entry = (acp_entry *)entry_node; - if (inst->overwrites_reg(entry->dst) || - inst->overwrites_reg(entry->src)) { + if (inst->overwrites_reg(entry->dst)) { entry->remove(); } } + + /* Oops, we only have the chaining hash based on the destination, not + * the source, so walk across the entire table. + */ + for (int i = 0; i < acp_count; i++) { + foreach_list_safe(entry_node, &acp[i]) { + acp_entry *entry = (acp_entry *)entry_node; + if (inst->overwrites_reg(entry->src)) + entry->remove(); + } + } } /* If this instruction is a raw copy, add it to the ACP. */ @@ -246,7 +260,7 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx, acp_entry *entry = ralloc(mem_ctx, acp_entry); entry->dst = inst->dst; entry->src = inst->src[0]; - acp->push_tail(entry); + acp[entry->dst.reg % acp_count].push_tail(entry); } } @@ -263,9 +277,8 @@ fs_visitor::opt_copy_propagate() for (int b = 0; b < cfg.num_blocks; b++) { fs_bblock *block = cfg.blocks[b]; - exec_list acp; - progress = opt_copy_propagate_local(mem_ctx, block, &acp) || progress; + progress = opt_copy_propagate_local(mem_ctx, block) || progress; } ralloc_free(mem_ctx); |