diff options
author | Rob Clark <[email protected]> | 2014-02-15 19:01:38 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2014-02-16 08:17:23 -0500 |
commit | d73b2c0517feb37a77d1b28b6cc063d699374867 (patch) | |
tree | a5d00295cc0d314df881ba9b3ab6ee355ef5baba /src/gallium/drivers | |
parent | e8cca57a3f709b9b8bce0b25290d6d8091bbdda7 (diff) |
freedreno/a3xx/compiler: use (ss) for WAR hazards
Seems texture sample instructions don't immediately consume there
src(s). In fact, some shaders from blob compiler seem to indiciate that
it does not even count the texture sample instructions when calculating
number of delay slots to fill for non-sample instructions. (Although so
far it seems inconclusive as to whether this is required.)
In particular, when a src register of a previous texture sample
instruction is clobbered, the (ss) bit is needed to synchronize with the
tex pipeline to ensure it has picked up the previous values before they
are overwritten.
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/ir3_ra.c | 21 |
1 files changed, 19 insertions, 2 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c index 5bc1e2f1619..f4834a3778c 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c @@ -480,22 +480,25 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) struct ir3_instruction *end = ir3_instr_create(block, 0, OPC_END); struct ir3_instruction *last_input = NULL; + regmask_t needs_ss_war; regmask_t needs_ss; regmask_t needs_sy; + regmask_init(&needs_ss_war); regmask_init(&needs_ss); regmask_init(&needs_sy); shader->instrs_count = 0; for (n = block->head; n; n = n->next) { + struct ir3_register *reg; unsigned i; if (is_meta(n)) continue; for (i = 1; i < n->regs_count; i++) { - struct ir3_register *reg = n->regs[i]; + reg = n->regs[i]; if (reg_gpr(reg)) { @@ -515,12 +518,26 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) } } + if (n->regs_count > 0) { + reg = n->regs[0]; + if (regmask_get(&needs_ss_war, reg)) { + n->flags |= IR3_INSTR_SS; + regmask_init(&needs_ss_war); // ??? I assume? + } + } + shader->instrs[shader->instrs_count++] = n; if (is_sfu(n)) regmask_set(&needs_ss, n->regs[0]); - if (is_tex(n)) + if (is_tex(n)) { regmask_set(&needs_sy, n->regs[0]); + for (i = 1; i < n->regs_count; i++) { + reg = n->regs[i]; + if (reg_gpr(reg)) + regmask_set(&needs_ss_war, reg); + } + } if (is_input(n)) last_input = n; } |