aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/freedreno/ir3/ir3.c2
-rw-r--r--src/freedreno/ir3/ir3.h17
-rw-r--r--src/freedreno/ir3/ir3_legalize.c31
3 files changed, 30 insertions, 20 deletions
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 463c7664332..209dfb36b25 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -996,8 +996,6 @@ static struct ir3_register * reg_create(struct ir3 *shader,
reg->wrmask = 1;
reg->flags = flags;
reg->num = num;
- if (shader->compiler->gpu_id >= 600)
- reg->merged = true;
return reg;
}
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index b191c0af44c..d3e4a988dd9 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -121,9 +121,7 @@ struct ir3_register {
* Note the size field isn't important for relative const (since
* we don't have to do register allocation for constants).
*/
- unsigned size : 15;
-
- bool merged : 1; /* half-regs conflict with full regs (ie >= a6xx) */
+ unsigned size : 16;
/* normal registers:
* the component is in the low two bits of the reg #, so
@@ -1763,13 +1761,14 @@ INSTR0(META_TEX_PREFETCH);
typedef BITSET_DECLARE(regmaskstate_t, 2 * MAX_REG);
typedef struct {
+ bool mergedregs;
regmaskstate_t mask;
} regmask_t;
static inline bool
__regmask_get(regmask_t *regmask, struct ir3_register *reg, unsigned n)
{
- if (reg->merged) {
+ if (regmask->mergedregs) {
/* a6xx+ case, with merged register file, we track things in terms
* of half-precision registers, with a full precisions register
* using two half-precision slots:
@@ -1794,7 +1793,7 @@ __regmask_get(regmask_t *regmask, struct ir3_register *reg, unsigned n)
static inline void
__regmask_set(regmask_t *regmask, struct ir3_register *reg, unsigned n)
{
- if (reg->merged) {
+ if (regmask->mergedregs) {
/* a6xx+ case, with merged register file, we track things in terms
* of half-precision registers, with a full precisions register
* using two half-precision slots:
@@ -1816,9 +1815,10 @@ __regmask_set(regmask_t *regmask, struct ir3_register *reg, unsigned n)
}
}
-static inline void regmask_init(regmask_t *regmask)
+static inline void regmask_init(regmask_t *regmask, bool mergedregs)
{
- memset(regmask, 0, sizeof(*regmask));
+ memset(&regmask->mask, 0, sizeof(regmask->mask));
+ regmask->mergedregs = mergedregs;
}
static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
@@ -1835,6 +1835,9 @@ static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
{
+ assert(dst->mergedregs == a->mergedregs);
+ assert(dst->mergedregs == b->mergedregs);
+
for (unsigned i = 0; i < ARRAY_SIZE(dst->mask); i++)
dst->mask[i] = a->mask[i] | b->mask[i];
}
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c
index 3dc6a6c7371..9b5dae7b39e 100644
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -88,6 +88,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
struct ir3_legalize_state *state = &bd->state;
bool last_input_needs_ss = false;
bool has_tex_prefetch = false;
+ bool mergedregs = ctx->compiler->gpu_id >= 600;
/* our input state is the OR of all predecessor blocks' state: */
set_foreach(block->predecessors, entry) {
@@ -132,15 +133,15 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
if (last_n && is_barrier(last_n)) {
n->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
last_input_needs_ss = false;
- regmask_init(&state->needs_ss_war);
- regmask_init(&state->needs_ss);
- regmask_init(&state->needs_sy);
+ regmask_init(&state->needs_ss_war, mergedregs);
+ regmask_init(&state->needs_ss, mergedregs);
+ regmask_init(&state->needs_sy, mergedregs);
}
if (last_n && (last_n->opc == OPC_PREDT)) {
n->flags |= IR3_INSTR_SS;
- regmask_init(&state->needs_ss_war);
- regmask_init(&state->needs_ss);
+ regmask_init(&state->needs_ss_war, mergedregs);
+ regmask_init(&state->needs_ss, mergedregs);
}
/* NOTE: consider dst register too.. it could happen that
@@ -161,13 +162,13 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
if (regmask_get(&state->needs_ss, reg)) {
n->flags |= IR3_INSTR_SS;
last_input_needs_ss = false;
- regmask_init(&state->needs_ss_war);
- regmask_init(&state->needs_ss);
+ regmask_init(&state->needs_ss_war, mergedregs);
+ regmask_init(&state->needs_ss, mergedregs);
}
if (regmask_get(&state->needs_sy, reg)) {
n->flags |= IR3_INSTR_SY;
- regmask_init(&state->needs_sy);
+ regmask_init(&state->needs_sy, mergedregs);
}
}
@@ -184,8 +185,8 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
if (regmask_get(&state->needs_ss_war, reg)) {
n->flags |= IR3_INSTR_SS;
last_input_needs_ss = false;
- regmask_init(&state->needs_ss_war);
- regmask_init(&state->needs_ss);
+ regmask_init(&state->needs_ss_war, mergedregs);
+ regmask_init(&state->needs_ss, mergedregs);
}
if (last_rel && (reg->num == regid(REG_A0, 0))) {
@@ -710,6 +711,7 @@ bool
ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
{
struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx);
+ bool mergedregs = ctx->compiler->gpu_id >= 600;
bool progress;
ctx->so = so;
@@ -719,7 +721,14 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
/* allocate per-block data: */
foreach_block (block, &ir->block_list) {
- block->data = rzalloc(ctx, struct ir3_legalize_block_data);
+ struct ir3_legalize_block_data *bd =
+ rzalloc(ctx, struct ir3_legalize_block_data);
+
+ regmask_init(&bd->state.needs_ss_war, mergedregs);
+ regmask_init(&bd->state.needs_ss, mergedregs);
+ regmask_init(&bd->state.needs_sy, mergedregs);
+
+ block->data = bd;
}
ir3_remove_nops(ir);