diff options
-rw-r--r-- | src/freedreno/ir3/ir3.c | 2 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3.h | 17 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_legalize.c | 31 |
3 files changed, 30 insertions, 20 deletions
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 463c7664332..209dfb36b25 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -996,8 +996,6 @@ static struct ir3_register * reg_create(struct ir3 *shader, reg->wrmask = 1; reg->flags = flags; reg->num = num; - if (shader->compiler->gpu_id >= 600) - reg->merged = true; return reg; } diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index b191c0af44c..d3e4a988dd9 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -121,9 +121,7 @@ struct ir3_register { * Note the size field isn't important for relative const (since * we don't have to do register allocation for constants). */ - unsigned size : 15; - - bool merged : 1; /* half-regs conflict with full regs (ie >= a6xx) */ + unsigned size : 16; /* normal registers: * the component is in the low two bits of the reg #, so @@ -1763,13 +1761,14 @@ INSTR0(META_TEX_PREFETCH); typedef BITSET_DECLARE(regmaskstate_t, 2 * MAX_REG); typedef struct { + bool mergedregs; regmaskstate_t mask; } regmask_t; static inline bool __regmask_get(regmask_t *regmask, struct ir3_register *reg, unsigned n) { - if (reg->merged) { + if (regmask->mergedregs) { /* a6xx+ case, with merged register file, we track things in terms * of half-precision registers, with a full precisions register * using two half-precision slots: @@ -1794,7 +1793,7 @@ __regmask_get(regmask_t *regmask, struct ir3_register *reg, unsigned n) static inline void __regmask_set(regmask_t *regmask, struct ir3_register *reg, unsigned n) { - if (reg->merged) { + if (regmask->mergedregs) { /* a6xx+ case, with merged register file, we track things in terms * of half-precision registers, with a full precisions register * using two half-precision slots: @@ -1816,9 +1815,10 @@ __regmask_set(regmask_t *regmask, struct ir3_register *reg, unsigned n) } } -static inline void regmask_init(regmask_t *regmask) +static inline void regmask_init(regmask_t *regmask, bool mergedregs) { - memset(regmask, 0, sizeof(*regmask)); + memset(®mask->mask, 0, sizeof(regmask->mask)); + regmask->mergedregs = mergedregs; } static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) @@ -1835,6 +1835,9 @@ static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b) { + assert(dst->mergedregs == a->mergedregs); + assert(dst->mergedregs == b->mergedregs); + for (unsigned i = 0; i < ARRAY_SIZE(dst->mask); i++) dst->mask[i] = a->mask[i] | b->mask[i]; } diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 3dc6a6c7371..9b5dae7b39e 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -88,6 +88,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) struct ir3_legalize_state *state = &bd->state; bool last_input_needs_ss = false; bool has_tex_prefetch = false; + bool mergedregs = ctx->compiler->gpu_id >= 600; /* our input state is the OR of all predecessor blocks' state: */ set_foreach(block->predecessors, entry) { @@ -132,15 +133,15 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) if (last_n && is_barrier(last_n)) { n->flags |= IR3_INSTR_SS | IR3_INSTR_SY; last_input_needs_ss = false; - regmask_init(&state->needs_ss_war); - regmask_init(&state->needs_ss); - regmask_init(&state->needs_sy); + regmask_init(&state->needs_ss_war, mergedregs); + regmask_init(&state->needs_ss, mergedregs); + regmask_init(&state->needs_sy, mergedregs); } if (last_n && (last_n->opc == OPC_PREDT)) { n->flags |= IR3_INSTR_SS; - regmask_init(&state->needs_ss_war); - regmask_init(&state->needs_ss); + regmask_init(&state->needs_ss_war, mergedregs); + regmask_init(&state->needs_ss, mergedregs); } /* NOTE: consider dst register too.. it could happen that @@ -161,13 +162,13 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) if (regmask_get(&state->needs_ss, reg)) { n->flags |= IR3_INSTR_SS; last_input_needs_ss = false; - regmask_init(&state->needs_ss_war); - regmask_init(&state->needs_ss); + regmask_init(&state->needs_ss_war, mergedregs); + regmask_init(&state->needs_ss, mergedregs); } if (regmask_get(&state->needs_sy, reg)) { n->flags |= IR3_INSTR_SY; - regmask_init(&state->needs_sy); + regmask_init(&state->needs_sy, mergedregs); } } @@ -184,8 +185,8 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) if (regmask_get(&state->needs_ss_war, reg)) { n->flags |= IR3_INSTR_SS; last_input_needs_ss = false; - regmask_init(&state->needs_ss_war); - regmask_init(&state->needs_ss); + regmask_init(&state->needs_ss_war, mergedregs); + regmask_init(&state->needs_ss, mergedregs); } if (last_rel && (reg->num == regid(REG_A0, 0))) { @@ -710,6 +711,7 @@ bool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) { struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx); + bool mergedregs = ctx->compiler->gpu_id >= 600; bool progress; ctx->so = so; @@ -719,7 +721,14 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) /* allocate per-block data: */ foreach_block (block, &ir->block_list) { - block->data = rzalloc(ctx, struct ir3_legalize_block_data); + struct ir3_legalize_block_data *bd = + rzalloc(ctx, struct ir3_legalize_block_data); + + regmask_init(&bd->state.needs_ss_war, mergedregs); + regmask_init(&bd->state.needs_ss, mergedregs); + regmask_init(&bd->state.needs_sy, mergedregs); + + block->data = bd; } ir3_remove_nops(ir); |