diff options
author | Matt Turner <[email protected]> | 2014-06-11 13:43:15 -0700 |
---|---|---|
committer | Matt Turner <[email protected]> | 2014-07-06 18:18:52 -0700 |
commit | 949991cc996368841531b36053bef101abea7937 (patch) | |
tree | 84b88c21bc46a0c66d96068bd2320bc142c495b1 /src | |
parent | 3c8dc48ad1d4061a2a1d0b9ea3126350b98274f0 (diff) |
i965/vec4: Improve CSE performance by expiring some available expressions.
Port of commit 5daf867f to the vec4 code.
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index 11e911f2b64..296142b740f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -128,6 +128,7 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) void *cse_ctx = ralloc_context(NULL); + int ip = block->start_ip; for (vec4_instruction *inst = (vec4_instruction *)block->start; inst != block->end->next; inst = (vec4_instruction *) inst->next) { @@ -193,6 +194,8 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) foreach_in_list_safe(aeb_entry, entry, aeb) { for (int i = 0; i < 3; i++) { + src_reg *src = &entry->generator->src[i]; + /* Kill all AEB entries that use the destination we just * overwrote. */ @@ -202,8 +205,23 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) ralloc_free(entry); break; } + + /* Kill any AEB entries using registers that don't get reused any + * more -- a sure sign they'll fail operands_match(). + */ + int last_reg_use = MAX2(MAX2(virtual_grf_end[src->reg * 4 + 0], + virtual_grf_end[src->reg * 4 + 1]), + MAX2(virtual_grf_end[src->reg * 4 + 2], + virtual_grf_end[src->reg * 4 + 3])); + if (src->file == GRF && last_reg_use < ip) { + entry->remove(); + ralloc_free(entry); + break; + } } } + + ip++; } ralloc_free(cse_ctx); @@ -219,6 +237,8 @@ vec4_visitor::opt_cse() { bool progress = false; + calculate_live_intervals(); + cfg_t cfg(&instructions); for (int b = 0; b < cfg.num_blocks; b++) { |