summaryrefslogtreecommitdiffstats
path: root/src/intel
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2017-10-23 13:47:10 -0700
committerFrancisco Jerez <[email protected]>2017-12-07 18:27:04 -0800
commit9355116bdad6ee9914554de8e48ba271bd36a8eb (patch)
treecee4ff9bf7eb7df5fb7ea5a430280871cb4e686b /src/intel
parentc3c1aa5aeb921caa2ec18c2320ceb94854e0f47c (diff)
intel/fs: Don't let undefined values prevent copy propagation.
This makes the dataflow propagation logic of the copy propagation pass more intelligent in cases where the destination of a copy is known to be undefined for some incoming CFG edges, building upon the definedness information provided by the last patch. Helps a few programs, and avoids a handful shader-db regressions from the next patch. shader-db results on ILK: total instructions in shared programs: 6541547 -> 6541523 (-0.00%) instructions in affected programs: 360 -> 336 (-6.67%) helped: 8 HURT: 0 LOST: 0 GAINED: 10 shader-db results on BDW: total instructions in shared programs: 8174323 -> 8173882 (-0.01%) instructions in affected programs: 7730 -> 7289 (-5.71%) helped: 5 HURT: 2 LOST: 0 GAINED: 4 shader-db results on SKL: total instructions in shared programs: 8185669 -> 8184598 (-0.01%) instructions in affected programs: 10364 -> 9293 (-10.33%) helped: 5 HURT: 2 LOST: 0 GAINED: 2 Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/intel')
-rw-r--r--src/intel/compiler/brw_fs_copy_propagation.cpp50
1 files changed, 47 insertions, 3 deletions
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp
index d4d01d783ca..af5635eacef 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -36,9 +36,12 @@
#include "util/bitset.h"
#include "brw_fs.h"
+#include "brw_fs_live_variables.h"
#include "brw_cfg.h"
#include "brw_eu.h"
+using namespace brw;
+
namespace { /* avoid conflict with opt_copy_propagation_elements */
struct acp_entry : public exec_node {
fs_reg dst;
@@ -77,12 +80,19 @@ struct block_data {
* course of this block.
*/
BITSET_WORD *kill;
+
+ /**
+ * Which entries in the fs_copy_prop_dataflow acp table are guaranteed to
+ * have a fully uninitialized destination at the end of this block.
+ */
+ BITSET_WORD *undef;
};
class fs_copy_prop_dataflow
{
public:
fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
+ const fs_live_variables *live,
exec_list *out_acp[ACP_HASH_SIZE]);
void setup_initial_values();
@@ -92,6 +102,7 @@ public:
void *mem_ctx;
cfg_t *cfg;
+ const fs_live_variables *live;
acp_entry **acp;
int num_acp;
@@ -102,8 +113,9 @@ public:
} /* anonymous namespace */
fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
+ const fs_live_variables *live,
exec_list *out_acp[ACP_HASH_SIZE])
- : mem_ctx(mem_ctx), cfg(cfg)
+ : mem_ctx(mem_ctx), cfg(cfg), live(live)
{
bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
@@ -124,6 +136,7 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
bd[block->num].liveout = rzalloc_array(bd, BITSET_WORD, bitset_words);
bd[block->num].copy = rzalloc_array(bd, BITSET_WORD, bitset_words);
bd[block->num].kill = rzalloc_array(bd, BITSET_WORD, bitset_words);
+ bd[block->num].undef = rzalloc_array(bd, BITSET_WORD, bitset_words);
for (int i = 0; i < ACP_HASH_SIZE; i++) {
foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) {
@@ -189,6 +202,18 @@ fs_copy_prop_dataflow::setup_initial_values()
}
}
}
+
+ /* Initialize the undef set. */
+ foreach_block (block, cfg) {
+ for (int i = 0; i < num_acp; i++) {
+ BITSET_SET(bd[block->num].undef, i);
+ for (unsigned off = 0; off < acp[i]->size_written; off += REG_SIZE) {
+ if (BITSET_TEST(live->block_data[block->num].defout,
+ live->var_from_reg(byte_offset(acp[i]->dst, off))))
+ BITSET_CLEAR(bd[block->num].undef, i);
+ }
+ }
+ }
}
/**
@@ -229,13 +254,30 @@ fs_copy_prop_dataflow::run()
for (int i = 0; i < bitset_words; i++) {
const BITSET_WORD old_livein = bd[block->num].livein[i];
+ BITSET_WORD livein_from_any_block = 0;
bd[block->num].livein[i] = ~0u;
foreach_list_typed(bblock_link, parent_link, link, &block->parents) {
bblock_t *parent = parent_link->block;
- bd[block->num].livein[i] &= bd[parent->num].liveout[i];
+ /* Consider ACP entries with a known-undefined destination to
+ * be available from the parent. This is valid because we're
+ * free to set the undefined variable equal to the source of
+ * the ACP entry without breaking the application's
+ * expectations, since the variable is undefined.
+ */
+ bd[block->num].livein[i] &= (bd[parent->num].liveout[i] |
+ bd[parent->num].undef[i]);
+ livein_from_any_block |= bd[parent->num].liveout[i];
}
+ /* Limit to the set of ACP entries that can possibly be available
+ * at the start of the block, since propagating from a variable
+ * which is guaranteed to be undefined (rather than potentially
+ * undefined for some dynamic control-flow paths) doesn't seem
+ * particularly useful.
+ */
+ bd[block->num].livein[i] &= livein_from_any_block;
+
if (old_livein != bd[block->num].livein[i])
progress = true;
}
@@ -834,6 +876,8 @@ fs_visitor::opt_copy_propagation()
for (int i = 0; i < cfg->num_blocks; i++)
out_acp[i] = new exec_list [ACP_HASH_SIZE];
+ calculate_live_intervals();
+
/* First, walk through each block doing local copy propagation and getting
* the set of copies available at the end of the block.
*/
@@ -843,7 +887,7 @@ fs_visitor::opt_copy_propagation()
}
/* Do dataflow analysis for those available copies. */
- fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, out_acp);
+ fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, live_intervals, out_acp);
/* Next, re-run local copy propagation, this time with the set of copies
* provided by the dataflow analysis available at the start of a block.