aboutsummaryrefslogtreecommitdiffstats
path: root/src/freedreno
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2020-03-21 14:44:44 -0700
committerMarge Bot <[email protected]>2020-03-27 22:41:36 +0000
commit6347c2ea89bde624dd16cff6741db57e89d88ad5 (patch)
tree5c887b14ec3a11036245f2d5a07f6d129601f7d1 /src/freedreno
parentbf0aa7ed90231540c66328a515928dd8e3324343 (diff)
freedreno/ir3/ra: add def/use iterators
Decouple the messy logic of figuring out vreg names defined/used by an instruction from the logic of what to do about it by introducing iterators. There is still *some* array vs ssa special casing in ra_block_compute_live_ranges(), but less than before. And this will avoid introducing a second copy of the def/use logic in a following patch which uses the liveranges to calculate the maximum # of live values (which is the optimal target for max physical register window to round-robin within). Signed-off-by: Rob Clark <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/ir3/ir3_ra.c176
-rw-r--r--src/freedreno/ir3/ir3_ra.h159
2 files changed, 202 insertions, 133 deletions
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index d4663f6167d..fa379c3495b 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -585,159 +585,69 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
}
foreach_instr (instr, &block->instr_list) {
- struct ir3_instruction *src;
- struct ir3_register *reg;
-
- if (writes_gpr(instr)) {
- struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- struct ir3_register *dst = instr->regs[0];
-
- if (dst->flags & IR3_REG_ARRAY) {
- struct ir3_array *arr =
- ir3_lookup_array(ctx->ir, dst->array.id);
- unsigned i;
+ foreach_def (name, ctx, instr) {
+ if (name_is_array(ctx, name)) {
+ struct ir3_array *arr = name_to_array(ctx, name);
arr->start_ip = MIN2(arr->start_ip, instr->ip);
arr->end_ip = MAX2(arr->end_ip, instr->ip);
- /* set the node class now.. in case we don't encounter
- * this array dst again. From register_alloc algo's
- * perspective, these are all single/scalar regs:
- */
- for (i = 0; i < arr->length; i++) {
+ for (unsigned i = 0; i < arr->length; i++) {
unsigned name = arr->base + i;
if(arr->half)
ra_set_node_class(ctx->g, name, ctx->set->half_classes[0]);
else
ra_set_node_class(ctx->g, name, ctx->set->classes[0]);
}
-
- /* indirect write is treated like a write to all array
- * elements, since we don't know which one is actually
- * written:
- */
- if (dst->flags & IR3_REG_RELATIV) {
- for (i = 0; i < arr->length; i++) {
- unsigned name = arr->base + i;
- def(name, instr);
- }
+ } else {
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+ if (is_high(instr)) {
+ ra_set_node_class(ctx->g, name,
+ ctx->set->high_classes[id->cls - HIGH_OFFSET]);
+ } else if (is_half(instr)) {
+ ra_set_node_class(ctx->g, name,
+ ctx->set->half_classes[id->cls - HALF_OFFSET]);
} else {
- unsigned name = arr->base + dst->array.offset;
- def(name, instr);
- }
- } else if (id->defn == instr) {
- /* in scalar pass, we aren't considering virtual register
- * classes, ie. if an instruction writes a vec2, then it
- * defines two different scalar register names.
- */
- unsigned n = ctx->scalar_pass ? dest_regs(instr) : 1;
- for (unsigned i = 0; i < n; i++) {
- unsigned name = scalar_name(ctx, instr, i);
-
- /* split/collect instructions have duplicate names
- * as real instructions, so they skip the hashtable:
- */
- if (ctx->name_to_instr && !((instr->opc == OPC_META_SPLIT) ||
- (instr->opc == OPC_META_COLLECT))) {
- /* this is slightly annoying, we can't just use an
- * integer on the stack
- */
- unsigned *key = ralloc(ctx->name_to_instr, unsigned);
- *key = name;
- debug_assert(!_mesa_hash_table_search(ctx->name_to_instr, key));
- _mesa_hash_table_insert(ctx->name_to_instr, key, instr);
- }
-
- /* tex instructions actually have a wrmask, and
- * don't touch masked out components. We can't do
- * anything useful about that in the first pass,
- * but in the scalar pass we can realize these
- * registers are available:
- */
- if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
- !(instr->regs[0]->wrmask & (1 << i)))
- continue;
-
- def(name, instr);
-
- if ((instr->opc == OPC_META_INPUT) && first_non_input)
- use(name, first_non_input);
-
- if (is_high(instr)) {
- ra_set_node_class(ctx->g, name,
- ctx->set->high_classes[id->cls - HIGH_OFFSET]);
- } else if (is_half(instr)) {
- ra_set_node_class(ctx->g, name,
- ctx->set->half_classes[id->cls - HALF_OFFSET]);
- } else {
- ra_set_node_class(ctx->g, name,
- ctx->set->classes[id->cls]);
- }
+ ra_set_node_class(ctx->g, name,
+ ctx->set->classes[id->cls]);
}
}
+
+ def(name, instr);
+
+ if ((instr->opc == OPC_META_INPUT) && first_non_input)
+ use(name, first_non_input);
}
- foreach_src (reg, instr) {
- if (reg->flags & IR3_REG_ARRAY) {
- struct ir3_array *arr =
- ir3_lookup_array(ctx->ir, reg->array.id);
+ foreach_use (name, ctx, instr) {
+ if (name_is_array(ctx, name)) {
+ struct ir3_array *arr = name_to_array(ctx, name);
+
arr->start_ip = MIN2(arr->start_ip, instr->ip);
arr->end_ip = MAX2(arr->end_ip, instr->ip);
- /* indirect read is treated like a read from all array
- * elements, since we don't know which one is actually
- * read:
+ /* NOTE: arrays are not SSA so unconditionally
+ * set use bit:
*/
- if (reg->flags & IR3_REG_RELATIV) {
- unsigned i;
- for (i = 0; i < arr->length; i++) {
- unsigned name = arr->base + i;
- use(name, instr);
- BITSET_SET(bd->use, name);
- }
- } else {
- unsigned name = arr->base + reg->array.offset;
- use(name, instr);
- /* NOTE: arrays are not SSA so unconditionally
- * set use bit:
- */
- BITSET_SET(bd->use, name);
- debug_assert(reg->array.offset < arr->length);
- }
- } else if (ctx->scalar_pass) {
- struct ir3_instruction *src = reg->instr;
- /* skip things that aren't SSA: */
- unsigned n = src ? dest_regs(src) : 0;
-
- /* in scalar pass, we aren't considering virtual register
- * classes, ie. if an instruction writes a vec2, then it
- * defines two different scalar register names.
- *
- * We need to traverse up thru collect/split to find the
- * actual non-meta instruction names for each of the
- * components:
+ BITSET_SET(bd->use, name);
+ }
+
+ use(name, instr);
+ }
+
+ foreach_name (name, ctx, instr) {
+ /* split/collect instructions have duplicate names
+ * as real instructions, so they skip the hashtable:
+ */
+ if (ctx->name_to_instr && !((instr->opc == OPC_META_SPLIT) ||
+ (instr->opc == OPC_META_COLLECT))) {
+ /* this is slightly annoying, we can't just use an
+ * integer on the stack
*/
- for (unsigned i = 0; i < n; i++) {
- /* Need to filter out a couple special cases, ie.
- * writes to a0.x or p0.x:
- */
- if (!writes_gpr(src))
- continue;
-
- /* split takes a src w/ wrmask potentially greater
- * than 0x1, but it really only cares about a single
- * component. This shows up in splits coming out of
- * a tex instruction w/ wrmask=.z, for example.
- */
- if ((instr->opc == OPC_META_SPLIT) &&
- !(i == instr->split.off))
- continue;
-
- use(scalar_name(ctx, src, i), instr);
- }
- } else if ((src = ssa(reg)) && writes_gpr(src)) {
- unsigned name = ra_name(ctx, &ctx->instrd[src->ip]);
- use(name, instr);
+ unsigned *key = ralloc(ctx->name_to_instr, unsigned);
+ *key = name;
+ debug_assert(!_mesa_hash_table_search(ctx->name_to_instr, key));
+ _mesa_hash_table_insert(ctx->name_to_instr, key, instr);
}
}
}
diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h
index f9c2155b7df..db21eb9f220 100644
--- a/src/freedreno/ir3/ir3_ra.h
+++ b/src/freedreno/ir3/ir3_ra.h
@@ -134,6 +134,18 @@ struct ir3_ra_ctx {
/* Tracking for select_reg callback */
unsigned start_search_reg;
unsigned max_target;
+
+ /* Temporary buffer for def/use iterators
+ *
+ * The worst case should probably be an array w/ relative access (ie.
+ * all elements are def'd or use'd), and that can't be larger than
+ * the number of registers.
+ *
+ * NOTE we could declare this on the stack if needed, but I don't
+ * think there is a need for nested iterators.
+ */
+ unsigned namebuf[NUM_REGS];
+ unsigned namecnt, nameidx;
};
static inline int
@@ -182,6 +194,153 @@ writes_gpr(struct ir3_instruction *instr)
return true;
}
+#define NO_NAME ~0
+
+/*
+ * Iterators to iterate the vreg names of an instructions def's and use's
+ */
+
+static inline unsigned
+__ra_name_cnt(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
+{
+ if (!instr)
+ return 0;
+
+ /* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */
+ if (!writes_gpr(instr) || (instr->regs[0]->flags & IR3_REG_ARRAY))
+ return 0;
+
+ /* in scalar pass, we aren't considering virtual register classes, ie.
+ * if an instruction writes a vec2, then it defines two different scalar
+ * register names.
+ */
+ if (ctx->scalar_pass)
+ return dest_regs(instr);
+
+ return 1;
+}
+
+#define foreach_name_n(__name, __n, __ctx, __instr) \
+ for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \
+ (__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++)
+
+#define foreach_name(__name, __ctx, __instr) \
+ foreach_name_n(__name, __n, __ctx, __instr)
+
+static inline unsigned
+__ra_itr_pop(struct ir3_ra_ctx *ctx)
+{
+ if (ctx->nameidx < ctx->namecnt)
+ return ctx->namebuf[ctx->nameidx++];
+ return NO_NAME;
+}
+
+static inline void
+__ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name)
+{
+ assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf));
+ ctx->namebuf[ctx->namecnt++] = name;
+}
+
+static inline unsigned
+__ra_init_def_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
+{
+ /* nested use is not supported: */
+ assert(ctx->namecnt == ctx->nameidx);
+
+ ctx->namecnt = ctx->nameidx = 0;
+
+ if (!writes_gpr(instr))
+ return NO_NAME;
+
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+ struct ir3_register *dst = instr->regs[0];
+
+ if (dst->flags & IR3_REG_ARRAY) {
+ struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id);
+
+ /* indirect write is treated like a write to all array
+ * elements, since we don't know which one is actually
+ * written:
+ */
+ if (dst->flags & IR3_REG_RELATIV) {
+ for (unsigned i = 0; i < arr->length; i++) {
+ __ra_itr_push(ctx, arr->base + i);
+ }
+ } else {
+ __ra_itr_push(ctx, arr->base + dst->array.offset);
+ debug_assert(dst->array.offset < arr->length);
+ }
+ } else if (id->defn == instr) {
+ foreach_name_n (name, i, ctx, instr) {
+ /* tex instructions actually have a wrmask, and
+ * don't touch masked out components. We can't do
+ * anything useful about that in the first pass,
+ * but in the scalar pass we can realize these
+ * registers are available:
+ */
+ if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
+ !(instr->regs[0]->wrmask & (1 << i)))
+ continue;
+ __ra_itr_push(ctx, name);
+ }
+ }
+
+ return __ra_itr_pop(ctx);
+}
+
+static inline unsigned
+__ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
+{
+ /* nested use is not supported: */
+ assert(ctx->namecnt == ctx->nameidx);
+
+ ctx->namecnt = ctx->nameidx = 0;
+
+ struct ir3_register *reg;
+ foreach_src (reg, instr) {
+ if (reg->flags & IR3_REG_ARRAY) {
+ struct ir3_array *arr =
+ ir3_lookup_array(ctx->ir, reg->array.id);
+
+ /* indirect read is treated like a read from all array
+ * elements, since we don't know which one is actually
+ * read:
+ */
+ if (reg->flags & IR3_REG_RELATIV) {
+ for (unsigned i = 0; i < arr->length; i++) {
+ __ra_itr_push(ctx, arr->base + i);
+ }
+ } else {
+ __ra_itr_push(ctx, arr->base + reg->array.offset);
+ debug_assert(reg->array.offset < arr->length);
+ }
+ } else {
+ foreach_name_n (name, i, ctx, reg->instr) {
+ /* split takes a src w/ wrmask potentially greater
+ * than 0x1, but it really only cares about a single
+ * component. This shows up in splits coming out of
+ * a tex instruction w/ wrmask=.z, for example.
+ */
+ if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) &&
+ !(i == instr->split.off))
+ continue;
+ __ra_itr_push(ctx, name);
+ }
+ }
+ }
+
+ return __ra_itr_pop(ctx);
+}
+
+#define foreach_def(__name, __ctx, __instr) \
+ for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \
+ __name != NO_NAME; __name = __ra_itr_pop(__ctx))
+
+#define foreach_use(__name, __ctx, __instr) \
+ for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \
+ __name != NO_NAME; __name = __ra_itr_pop(__ctx))
+
int ra_size_to_class(unsigned sz, bool half, bool high);
#endif /* IR3_RA_H_ */