/* * Copyright (C) 2014 Rob Clark * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Authors: * Rob Clark */ #ifndef IR3_RA_H_ #define IR3_RA_H_ #include "util/bitset.h" static const unsigned class_sizes[] = { 1, 2, 3, 4, 4 + 4, /* txd + 1d/2d */ 4 + 6, /* txd + 3d */ }; #define class_count ARRAY_SIZE(class_sizes) static const unsigned half_class_sizes[] = { 1, 2, 3, 4, }; #define half_class_count ARRAY_SIZE(half_class_sizes) /* seems to just be used for compute shaders? Seems like vec1 and vec3 * are sufficient (for now?) */ static const unsigned high_class_sizes[] = { 1, 3, }; #define high_class_count ARRAY_SIZE(high_class_sizes) #define total_class_count (class_count + half_class_count + high_class_count) /* Below a0.x are normal regs. RA doesn't need to assign a0.x/p0.x. */ #define NUM_REGS (4 * 48) /* r0 to r47 */ #define NUM_HIGH_REGS (4 * 8) /* r48 to r55 */ #define FIRST_HIGH_REG (4 * 48) /* Number of virtual regs in a given class: */ static inline unsigned CLASS_REGS(unsigned i) { assert(i < class_count); return (NUM_REGS - (class_sizes[i] - 1)); } static inline unsigned HALF_CLASS_REGS(unsigned i) { assert(i < half_class_count); return (NUM_REGS - (half_class_sizes[i] - 1)); } static inline unsigned HIGH_CLASS_REGS(unsigned i) { assert(i < high_class_count); return (NUM_HIGH_REGS - (high_class_sizes[i] - 1)); } #define HALF_OFFSET (class_count) #define HIGH_OFFSET (class_count + half_class_count) /* register-set, created one time, used for all shaders: */ struct ir3_ra_reg_set { struct ra_regs *regs; unsigned int classes[class_count]; unsigned int half_classes[half_class_count]; unsigned int high_classes[high_class_count]; /* The virtual register space flattens out all the classes, * starting with full, followed by half and then high, ie: * * scalar full (starting at zero) * vec2 full * vec3 full * ... * vecN full * scalar half (starting at first_half_reg) * vec2 half * ... * vecN half * scalar high (starting at first_high_reg) * ... * vecN high * */ unsigned first_half_reg, first_high_reg; /* maps flat virtual register space to base gpr: */ uint16_t *ra_reg_to_gpr; /* maps cls,gpr to flat virtual register space: */ uint16_t **gpr_to_ra_reg; }; /* additional block-data (per-block) */ struct ir3_ra_block_data { BITSET_WORD *def; /* variables defined before used in block */ BITSET_WORD *use; /* variables used before defined in block */ BITSET_WORD *livein; /* which defs reach entry point of block */ BITSET_WORD *liveout; /* which defs reach exit point of block */ }; /* additional instruction-data (per-instruction) */ struct ir3_ra_instr_data { /* cached instruction 'definer' info: */ struct ir3_instruction *defn; int off, sz, cls; }; /* register-assign context, per-shader */ struct ir3_ra_ctx { struct ir3_shader_variant *v; struct ir3 *ir; struct ir3_ra_reg_set *set; struct ra_graph *g; /* Are we in the scalar assignment pass? In this pass, all larger- * than-vec1 vales have already been assigned and pre-colored, so * we only consider scalar values. */ bool scalar_pass; unsigned alloc_count; unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */ unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors pre-a6xx */ /* one per class, plus one slot for arrays: */ unsigned class_alloc_count[total_class_count + 1]; unsigned class_base[total_class_count + 1]; unsigned instr_cnt; unsigned *def, *use; /* def/use table */ struct ir3_ra_instr_data *instrd; /* Mapping vreg name back to instruction, used select reg callback: */ struct hash_table *name_to_instr; /* Tracking for select_reg callback */ unsigned start_search_reg; unsigned max_target; /* Temporary buffer for def/use iterators * * The worst case should probably be an array w/ relative access (ie. * all elements are def'd or use'd), and that can't be larger than * the number of registers. * * NOTE we could declare this on the stack if needed, but I don't * think there is a need for nested iterators. */ unsigned namebuf[NUM_REGS]; unsigned namecnt, nameidx; }; static inline int ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id) { unsigned name; debug_assert(id->cls >= 0); debug_assert(id->cls < total_class_count); /* we shouldn't get arrays here.. */ name = ctx->class_base[id->cls] + id->defn->name; debug_assert(name < ctx->alloc_count); return name; } /* Get the scalar name of the n'th component of an instruction dst: */ static inline int scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n) { if (ctx->scalar_pass) { if (instr->opc == OPC_META_SPLIT) { debug_assert(n == 0); /* split results in a scalar */ struct ir3_instruction *src = instr->regs[1]->instr; return scalar_name(ctx, src, instr->split.off); } else if (instr->opc == OPC_META_COLLECT) { debug_assert(n < (instr->regs_count + 1)); struct ir3_instruction *src = instr->regs[n + 1]->instr; return scalar_name(ctx, src, 0); } } else { debug_assert(n == 0); } return ra_name(ctx, &ctx->instrd[instr->ip]) + n; } #define NO_NAME ~0 /* * Iterators to iterate the vreg names of an instructions def's and use's */ static inline unsigned __ra_name_cnt(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr) { if (!instr) return 0; /* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */ if (!writes_gpr(instr) || (instr->regs[0]->flags & IR3_REG_ARRAY)) return 0; /* in scalar pass, we aren't considering virtual register classes, ie. * if an instruction writes a vec2, then it defines two different scalar * register names. */ if (ctx->scalar_pass) return dest_regs(instr); return 1; } #define foreach_name_n(__name, __n, __ctx, __instr) \ for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \ (__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++) #define foreach_name(__name, __ctx, __instr) \ foreach_name_n(__name, __n, __ctx, __instr) static inline unsigned __ra_itr_pop(struct ir3_ra_ctx *ctx) { if (ctx->nameidx < ctx->namecnt) return ctx->namebuf[ctx->nameidx++]; return NO_NAME; } static inline void __ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name) { assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf)); ctx->namebuf[ctx->namecnt++] = name; } static inline unsigned __ra_init_def_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr) { /* nested use is not supported: */ assert(ctx->namecnt == ctx->nameidx); ctx->namecnt = ctx->nameidx = 0; if (!writes_gpr(instr)) return NO_NAME; struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; struct ir3_register *dst = instr->regs[0]; if (dst->flags & IR3_REG_ARRAY) { struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id); /* indirect write is treated like a write to all array * elements, since we don't know which one is actually * written: */ if (dst->flags & IR3_REG_RELATIV) { for (unsigned i = 0; i < arr->length; i++) { __ra_itr_push(ctx, arr->base + i); } } else { __ra_itr_push(ctx, arr->base + dst->array.offset); debug_assert(dst->array.offset < arr->length); } } else if (id->defn == instr) { foreach_name_n (name, i, ctx, instr) { /* tex instructions actually have a wrmask, and * don't touch masked out components. We can't do * anything useful about that in the first pass, * but in the scalar pass we can realize these * registers are available: */ if (ctx->scalar_pass && is_tex_or_prefetch(instr) && !(instr->regs[0]->wrmask & (1 << i))) continue; __ra_itr_push(ctx, name); } } return __ra_itr_pop(ctx); } static inline unsigned __ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr) { /* nested use is not supported: */ assert(ctx->namecnt == ctx->nameidx); ctx->namecnt = ctx->nameidx = 0; struct ir3_register *reg; foreach_src (reg, instr) { if (reg->flags & IR3_REG_ARRAY) { struct ir3_array *arr = ir3_lookup_array(ctx->ir, reg->array.id); /* indirect read is treated like a read from all array * elements, since we don't know which one is actually * read: */ if (reg->flags & IR3_REG_RELATIV) { for (unsigned i = 0; i < arr->length; i++) { __ra_itr_push(ctx, arr->base + i); } } else { __ra_itr_push(ctx, arr->base + reg->array.offset); debug_assert(reg->array.offset < arr->length); } } else { foreach_name_n (name, i, ctx, reg->instr) { /* split takes a src w/ wrmask potentially greater * than 0x1, but it really only cares about a single * component. This shows up in splits coming out of * a tex instruction w/ wrmask=.z, for example. */ if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) && !(i == instr->split.off)) continue; __ra_itr_push(ctx, name); } } } return __ra_itr_pop(ctx); } #define foreach_def(__name, __ctx, __instr) \ for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \ __name != NO_NAME; __name = __ra_itr_pop(__ctx)) #define foreach_use(__name, __ctx, __instr) \ for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \ __name != NO_NAME; __name = __ra_itr_pop(__ctx)) int ra_size_to_class(unsigned sz, bool half, bool high); int ra_class_to_size(unsigned class, bool *half, bool *high); #endif /* IR3_RA_H_ */