summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2013-10-21 10:57:29 -0700
committerEric Anholt <[email protected]>2013-10-29 00:27:35 -0700
commit415d6dc5bd6915b0c17a1df0f9bd0ef4ca534a81 (patch)
tree7bbe735da9c6ae336d97f7f49864737a618a44fe
parent8bd4476010444642cff71e64146c91a484238dc2 (diff)
i965/vec4: Reduce working set size of live variables computation.
Orbital Explorer was generating a 4000 instruction geometry shader, which was taking 275 trips through dead code elimination and register coalescing, each of which updated live variables to get its work done, and invalidated those live variables afterwards. By using bitfields instead of bools (reducing the working set size by a factor of 8) in live variables analysis, it drops from 88% of the profile to 57%, and reduces overall runtime from I-got-bored-and-killed-it (Paul says 3+ minutes) to 10.5 seconds. Compare to f179f419d1d0a03fad36c2b0a58e8b853bae6118 on the FS side. Reviewed-by: Paul Berry <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp41
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_live_variables.h10
2 files changed, 28 insertions, 23 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index fdf18410477..c568f7546c3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -83,8 +83,8 @@ vec4_live_variables::setup_def_use()
for (int j = 0; j < 4; j++) {
int c = BRW_GET_SWZ(inst->src[i].swizzle, j);
- if (!bd[b].def[reg * 4 + c])
- bd[b].use[reg * 4 + c] = true;
+ if (!BITSET_TEST(bd[b].def, reg * 4 + c))
+ BITSET_SET(bd[b].use, reg * 4 + c);
}
}
}
@@ -99,8 +99,8 @@ vec4_live_variables::setup_def_use()
for (int c = 0; c < 4; c++) {
if (inst->dst.writemask & (1 << c)) {
int reg = inst->dst.reg;
- if (!bd[b].use[reg * 4 + c])
- bd[b].def[reg * 4 + c] = true;
+ if (!BITSET_TEST(bd[b].use, reg * 4 + c))
+ BITSET_SET(bd[b].def, reg * 4 + c);
}
}
}
@@ -126,12 +126,12 @@ vec4_live_variables::compute_live_variables()
for (int b = 0; b < cfg->num_blocks; b++) {
/* Update livein */
- for (int i = 0; i < num_vars; i++) {
- if (bd[b].use[i] || (bd[b].liveout[i] && !bd[b].def[i])) {
- if (!bd[b].livein[i]) {
- bd[b].livein[i] = true;
- cont = true;
- }
+ for (int i = 0; i < bitset_words; i++) {
+ BITSET_WORD new_livein = (bd[b].use[i] |
+ (bd[b].liveout[i] & ~bd[b].def[i]));
+ if (new_livein & ~bd[b].livein[i]) {
+ bd[b].livein[i] |= new_livein;
+ cont = true;
}
}
@@ -140,9 +140,11 @@ vec4_live_variables::compute_live_variables()
bblock_link *link = (bblock_link *)block_node;
bblock_t *block = link->block;
- for (int i = 0; i < num_vars; i++) {
- if (bd[block->block_num].livein[i] && !bd[b].liveout[i]) {
- bd[b].liveout[i] = true;
+ for (int i = 0; i < bitset_words; i++) {
+ BITSET_WORD new_liveout = (bd[block->block_num].livein[i] &
+ ~bd[b].liveout[i]);
+ if (new_liveout) {
+ bd[b].liveout[i] |= new_liveout;
cont = true;
}
}
@@ -159,11 +161,12 @@ vec4_live_variables::vec4_live_variables(vec4_visitor *v, cfg_t *cfg)
num_vars = v->virtual_grf_count * 4;
bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
+ bitset_words = BITSET_WORDS(num_vars);
for (int i = 0; i < cfg->num_blocks; i++) {
- bd[i].def = rzalloc_array(mem_ctx, bool, num_vars);
- bd[i].use = rzalloc_array(mem_ctx, bool, num_vars);
- bd[i].livein = rzalloc_array(mem_ctx, bool, num_vars);
- bd[i].liveout = rzalloc_array(mem_ctx, bool, num_vars);
+ bd[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
+ bd[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
+ bd[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
+ bd[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
}
setup_def_use();
@@ -248,12 +251,12 @@ vec4_visitor::calculate_live_intervals()
for (int b = 0; b < cfg.num_blocks; b++) {
for (int i = 0; i < livevars.num_vars; i++) {
- if (livevars.bd[b].livein[i]) {
+ if (BITSET_TEST(livevars.bd[b].livein, i)) {
start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->start_ip);
end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->start_ip);
}
- if (livevars.bd[b].liveout[i]) {
+ if (BITSET_TEST(livevars.bd[b].liveout, i)) {
start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->end_ip);
end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->end_ip);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
index 296468a2d34..b2d8b339822 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
@@ -25,6 +25,7 @@
*
*/
+#include "main/bitset.h"
#include "brw_vec4.h"
namespace brw {
@@ -36,18 +37,18 @@ struct block_data {
* Note that for our purposes, "defined" means unconditionally, completely
* defined.
*/
- bool *def;
+ BITSET_WORD *def;
/**
* Which variables are used before being defined in the block.
*/
- bool *use;
+ BITSET_WORD *use;
/** Which defs reach the entry point of the block. */
- bool *livein;
+ BITSET_WORD *livein;
/** Which defs reach the exit point of the block. */
- bool *liveout;
+ BITSET_WORD *liveout;
};
class vec4_live_variables {
@@ -65,6 +66,7 @@ public:
void *mem_ctx;
int num_vars;
+ int bitset_words;
/** Per-basic-block information on live variables */
struct block_data *bd;