From d4ae5ca823227214dd1f536e5f4058bede20b2dd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 5 Oct 2016 09:07:46 -0700 Subject: vc4: Fix live intervals analysis for screening defs in if statements. If a conditional assignment is only conditioned on the exec mask, that's still screening off the value in the executed channels (and, since we're not storing to the unexcuted channels, we don't care what's in there). Fixes a bunch of extra register pressure on Processing's Ribbons demo, which is failing to allocate. --- src/gallium/drivers/vc4/vc4_program.c | 5 ++++- src/gallium/drivers/vc4/vc4_qir.h | 7 +++++-- src/gallium/drivers/vc4/vc4_qir_live_variables.c | 13 +++++++++++-- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/vc4') diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index a91e6200e8a..81ac070d463 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -169,8 +169,11 @@ ntq_store_dest(struct vc4_compile *c, nir_dest *dest, int chan, * channel is active. */ if (c->execute.file != QFILE_NULL) { + struct qinst *mov; + qir_SF(c, c->execute); - qir_MOV_cond(c, QPU_COND_ZS, qregs[chan], result); + mov = qir_MOV_cond(c, QPU_COND_ZS, qregs[chan], result); + mov->cond_is_exec_mask = true; } else { qir_MOV_dest(c, qregs[chan], result); } diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 36652167595..4d41c427c10 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -195,6 +195,7 @@ struct qinst { struct qreg dst; struct qreg *src; bool sf; + bool cond_is_exec_mask; uint8_t cond; }; @@ -835,11 +836,13 @@ qir_ROT_MUL(struct vc4_compile *c, struct qreg val, uint32_t rot) QPU_SMALL_IMM_MUL_ROT + rot))); } -static inline void +static inline struct qinst * qir_MOV_cond(struct vc4_compile *c, uint8_t cond, struct qreg dest, struct qreg src) { - qir_MOV_dest(c, dest, src)->cond = cond; + struct qinst *mov = qir_MOV_dest(c, dest, src); + mov->cond = cond; + return mov; } static inline struct qinst * diff --git a/src/gallium/drivers/vc4/vc4_qir_live_variables.c b/src/gallium/drivers/vc4/vc4_qir_live_variables.c index eac350ab852..beefb0d7f8a 100644 --- a/src/gallium/drivers/vc4/vc4_qir_live_variables.c +++ b/src/gallium/drivers/vc4/vc4_qir_live_variables.c @@ -113,8 +113,17 @@ qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip, if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var)) return; - /* Easy, common case: unconditional full register update. */ - if (inst->cond == QPU_COND_ALWAYS && !inst->dst.pack) { + /* Easy, common case: unconditional full register update. + * + * We treat conditioning on the exec mask as the same as not being + * conditional. This makes sure that if the register gets set on + * either side of an if, it is treated as being screened off before + * the if. Otherwise, if there was no intervening def, its live + * interval doesn't extend back to the start of he program, and if too + * many registers did that we'd fail to register allocate. + */ + if ((inst->cond == QPU_COND_ALWAYS || + inst->cond_is_exec_mask) && !inst->dst.pack) { BITSET_SET(block->def, var); return; } -- cgit v1.2.3