diff options
-rw-r--r-- | src/freedreno/ir3/ir3_legalize.c | 15 |
1 files changed, 12 insertions, 3 deletions
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index a5e5fd1fa26..c39b3a11011 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -87,6 +87,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) struct list_head instr_list; struct ir3_legalize_state prev_state = bd->state; struct ir3_legalize_state *state = &bd->state; + bool last_input_needs_ss = false; /* our input state is the OR of all predecessor blocks' state: */ for (unsigned i = 0; i < block->predecessors_count; i++) { @@ -125,8 +126,10 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val); } - if (last_n && is_barrier(last_n)) + if (last_n && is_barrier(last_n)) { n->flags |= IR3_INSTR_SS | IR3_INSTR_SY; + last_input_needs_ss = false; + } /* NOTE: consider dst register too.. it could happen that * texture sample instruction (for example) writes some @@ -145,6 +148,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) */ if (regmask_get(&state->needs_ss, reg)) { n->flags |= IR3_INSTR_SS; + last_input_needs_ss = false; regmask_init(&state->needs_ss_war); regmask_init(&state->needs_ss); } @@ -167,6 +171,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) reg = n->regs[0]; if (regmask_get(&state->needs_ss_war, reg)) { n->flags |= IR3_INSTR_SS; + last_input_needs_ss = false; regmask_init(&state->needs_ss_war); regmask_init(&state->needs_ss); } @@ -237,6 +242,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) } else if (n->opc == OPC_RESINFO) { regmask_set(&state->needs_ss, n->regs[0]); ir3_NOP(block)->flags |= IR3_INSTR_SS; + last_input_needs_ss = false; } else if (is_load(n)) { /* seems like ldlv needs (ss) bit instead?? which is odd but * makes a bunch of flat-varying tests start working on a4xx. @@ -271,8 +277,10 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) } } - if (is_input(n)) + if (is_input(n)) { last_input = n; + last_input_needs_ss |= (n->opc == OPC_LDLV); + } last_n = n; } @@ -289,7 +297,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) /* (ss)bary.f (ei)r63.x, 0, r0.x */ baryf = ir3_instr_create(block, OPC_BARY_F); - baryf->flags |= IR3_INSTR_SS; ir3_reg_create(baryf, regid(63, 0), 0); ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0; ir3_reg_create(baryf, regid(0, 0), 0); @@ -301,6 +308,8 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) last_input = baryf; } last_input->regs[0]->flags |= IR3_REG_EI; + if (last_input_needs_ss) + last_input->flags |= IR3_INSTR_SS; } if (last_rel) |