summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2015-06-29 14:49:08 -0400
committerRob Clark <[email protected]>2015-06-30 12:13:44 -0400
commit00b6b41482985ba4a81fbb479a47c06ec83f3797 (patch)
tree550957bcdf4dea96ac052b92971a1320e8608b26
parent906da495272b1be4c278f5f7402594e3c52521c1 (diff)
freedreno/ir3: cache defining instruction
It is silly to traverse back to find first instruction that writes part of a larger "virtual" register many times per instruction (plus per use as a src to later instructions). Cache this information so we only figure it out once. Signed-off-by: Rob Clark <[email protected]>
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.c7
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_ra.c151
3 files changed, 91 insertions, 69 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index 6f6dad59793..1da6cf0477e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -722,15 +722,16 @@ ir3_clear_mark(struct ir3 *ir)
}
/* note: this will destroy instr->depth, don't do it until after sched! */
-void
+unsigned
ir3_count_instructions(struct ir3 *ir)
{
- unsigned ip = 0;
+ unsigned cnt = 0;
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
- instr->ip = ip++;
+ instr->ip = cnt++;
}
block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
}
+ return cnt;
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 9c35a763d58..bc0144568a5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -431,7 +431,7 @@ static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
void ir3_block_clear_mark(struct ir3_block *block);
void ir3_clear_mark(struct ir3 *shader);
-void ir3_count_instructions(struct ir3 *ir);
+unsigned ir3_count_instructions(struct ir3 *ir);
static inline int ir3_instr_regno(struct ir3_instruction *instr,
struct ir3_register *reg)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index 9f6ff12a119..de48ecfe280 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -241,6 +241,21 @@ ir3_ra_alloc_reg_set(void *memctx)
return set;
}
+/* additional block-data (per-block) */
+struct ir3_ra_block_data {
+ BITSET_WORD *def; /* variables defined before used in block */
+ BITSET_WORD *use; /* variables used before defined in block */
+ BITSET_WORD *livein; /* which defs reach entry point of block */
+ BITSET_WORD *liveout; /* which defs reach exit point of block */
+};
+
+/* additional instruction-data (per-instruction) */
+struct ir3_ra_instr_data {
+ /* cached instruction 'definer' info: */
+ struct ir3_instruction *defn;
+ int off, sz, cls;
+};
+
/* register-assign context, per-shader */
struct ir3_ra_ctx {
struct ir3 *ir;
@@ -254,14 +269,7 @@ struct ir3_ra_ctx {
unsigned class_base[total_class_count];
unsigned instr_cnt;
unsigned *def, *use; /* def/use table */
-};
-
-/* additional block-data (per-block) */
-struct ir3_ra_block_data {
- BITSET_WORD *def; /* variables defined before used in block */
- BITSET_WORD *use; /* variables used before defined in block */
- BITSET_WORD *livein; /* which defs reach entry point of block */
- BITSET_WORD *liveout; /* which defs reach exit point of block */
+ struct ir3_ra_instr_data *instrd;
};
static bool
@@ -307,12 +315,20 @@ writes_gpr(struct ir3_instruction *instr)
}
static struct ir3_instruction *
-get_definer(struct ir3_instruction *instr, int *sz, int *off)
+get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
+ int *sz, int *off)
{
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_instruction *d = NULL;
if (instr->fanin)
- return get_definer(instr->fanin, sz, off);
+ return get_definer(ctx, instr->fanin, sz, off);
+
+ if (id->defn) {
+ *sz = id->sz;
+ *off = id->off;
+ return id->defn;
+ }
if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
/* What about the case where collect is subset of array, we
@@ -330,7 +346,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
if (!src->instr)
continue;
- dd = get_definer(src->instr, &dsz, &doff);
+ dd = get_definer(ctx, src->instr, &dsz, &doff);
if ((!d) || (dd->ip < d->ip)) {
d = dd;
@@ -339,7 +355,6 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
}
}
-
} else if (instr->cp.right || instr->cp.left) {
/* covers also the meta:fo case, which ends up w/ single
* scalar instructions for each component:
@@ -394,7 +409,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
struct ir3_instruction *dd;
int dsz, doff;
- dd = get_definer(phi, &dsz, &doff);
+ dd = get_definer(ctx, phi, &dsz, &doff);
*sz = MAX2(*sz, dsz);
*off = doff;
@@ -428,7 +443,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
struct ir3_instruction *dd;
int dsz, doff;
- dd = get_definer(d->regs[1]->instr, &dsz, &doff);
+ dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff);
/* by definition, should come before: */
debug_assert(dd->ip < d->ip);
@@ -440,9 +455,25 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
d = dd;
}
+ id->defn = d;
+ id->sz = *sz;
+ id->off = *off;
+
return d;
}
+static void
+ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+ if (instr->regs_count == 0)
+ continue;
+ id->defn = get_definer(ctx, instr, &id->sz, &id->off);
+ id->cls = size_to_class(id->sz, is_half(id->defn));
+ }
+}
+
/* give each instruction a name (and ip), and count up the # of names
* of each class
*/
@@ -450,8 +481,7 @@ static void
ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
{
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
#ifdef DEBUG
instr->name = ~0;
@@ -465,9 +495,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if (!writes_gpr(instr))
continue;
- defn = get_definer(instr, &sz, &off);
-
- if (defn != instr)
+ if (id->defn != instr)
continue;
/* arrays which don't fit in one of the pre-defined class
@@ -475,9 +503,8 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
*
* TODO but we still need to allocate names for them, don't we??
*/
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- instr->name = ctx->class_alloc_count[cls]++;
+ if (id->cls >= 0) {
+ instr->name = ctx->class_alloc_count[id->cls]++;
ctx->alloc_count++;
}
}
@@ -486,8 +513,16 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
static void
ra_init(struct ir3_ra_ctx *ctx)
{
+ unsigned n;
+
ir3_clear_mark(ctx->ir);
- ir3_count_instructions(ctx->ir);
+ n = ir3_count_instructions(ctx->ir);
+
+ ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n);
+
+ list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+ ra_block_find_definers(ctx, block);
+ }
list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
ra_block_name_instructions(ctx, block);
@@ -503,6 +538,7 @@ ra_init(struct ir3_ra_ctx *ctx)
}
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
+ ralloc_steal(ctx->g, ctx->instrd);
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
}
@@ -570,39 +606,36 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
*/
if (writes_gpr(instr)) {
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- if (defn == instr) {
+ if (id->defn == instr) {
/* arrays which don't fit in one of the pre-defined class
* sizes are pre-colored:
*/
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, defn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
- ctx->def[name] = defn->ip;
- ctx->use[name] = defn->ip;
+ ctx->def[name] = id->defn->ip;
+ ctx->use[name] = id->defn->ip;
/* since we are in SSA at this point: */
debug_assert(!BITSET_TEST(bd->use, name));
BITSET_SET(bd->def, name);
- if (is_half(defn)) {
+ if (is_half(id->defn)) {
ra_set_node_class(ctx->g, name,
- ctx->set->half_classes[cls - class_count]);
+ ctx->set->half_classes[id->cls - class_count]);
} else {
ra_set_node_class(ctx->g, name,
- ctx->set->classes[cls]);
+ ctx->set->classes[id->cls]);
}
/* extend the live range for phi srcs, which may come
* from the bottom of the loop
*/
- if (defn->regs[0]->flags & IR3_REG_PHI_SRC) {
- struct ir3_instruction *phi = defn->regs[0]->instr;
+ if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) {
+ struct ir3_instruction *phi = id->defn->regs[0]->instr;
foreach_ssa_src(src, phi) {
/* if src is after phi, then we need to extend
* the liverange to the end of src's block:
@@ -621,13 +654,10 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
foreach_ssa_src(src, instr) {
if (writes_gpr(src)) {
- struct ir3_instruction *srcdefn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[src->ip];
- srcdefn = get_definer(src, &sz, &off);
- cls = size_to_class(sz, is_half(srcdefn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, srcdefn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->use[name] = MAX2(ctx->use[name], instr->ip);
if (!BITSET_TEST(bd->def, name))
BITSET_SET(bd->use, name);
@@ -719,13 +749,10 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
/* need to fix things up to keep outputs live: */
for (unsigned i = 0; i < ir->noutputs; i++) {
struct ir3_instruction *instr = ir->outputs[i];
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, defn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->use[name] = ctx->instr_cnt;
}
}
@@ -795,15 +822,12 @@ static void
reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
struct ir3_instruction *instr)
{
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, defn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
unsigned r = ra_get_node_reg(ctx->g, name);
- unsigned num = ctx->set->ra_reg_to_gpr[r] + off;
+ unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
if (reg->flags & IR3_REG_RELATIV)
num += reg->offset;
@@ -811,7 +835,7 @@ reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
reg->num = num;
reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
- if (is_half(defn))
+ if (is_half(id->defn))
reg->flags |= IR3_REG_HALF;
}
}
@@ -866,19 +890,16 @@ ra_alloc(struct ir3_ra_ctx *ctx)
for (j = 0; i < ir->ninputs; i++) {
struct ir3_instruction *instr = ir->inputs[i];
if (instr) {
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- if (defn == instr) {
+ if (id->defn == instr) {
unsigned name, reg;
- cls = size_to_class(sz, is_half(defn));
- name = ra_name(ctx, cls, defn);
- reg = ctx->set->gpr_to_ra_reg[cls][j];
+ name = ra_name(ctx, id->cls, id->defn);
+ reg = ctx->set->gpr_to_ra_reg[id->cls][j];
ra_set_node_reg(ctx->g, name, reg);
- j += sz;
+ j += id->sz;
}
}
}