summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2014-02-15 19:01:38 -0500
committerRob Clark <[email protected]>2014-02-16 08:17:23 -0500
commitd73b2c0517feb37a77d1b28b6cc063d699374867 (patch)
treea5d00295cc0d314df881ba9b3ab6ee355ef5baba
parente8cca57a3f709b9b8bce0b25290d6d8091bbdda7 (diff)
freedreno/a3xx/compiler: use (ss) for WAR hazards
Seems texture sample instructions don't immediately consume there src(s). In fact, some shaders from blob compiler seem to indiciate that it does not even count the texture sample instructions when calculating number of delay slots to fill for non-sample instructions. (Although so far it seems inconclusive as to whether this is required.) In particular, when a src register of a previous texture sample instruction is clobbered, the (ss) bit is needed to synchronize with the tex pipeline to ensure it has picked up the previous values before they are overwritten. Signed-off-by: Rob Clark <[email protected]>
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir3_ra.c21
1 files changed, 19 insertions, 2 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
index 5bc1e2f1619..f4834a3778c 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
@@ -480,22 +480,25 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
struct ir3_instruction *end =
ir3_instr_create(block, 0, OPC_END);
struct ir3_instruction *last_input = NULL;
+ regmask_t needs_ss_war;
regmask_t needs_ss;
regmask_t needs_sy;
+ regmask_init(&needs_ss_war);
regmask_init(&needs_ss);
regmask_init(&needs_sy);
shader->instrs_count = 0;
for (n = block->head; n; n = n->next) {
+ struct ir3_register *reg;
unsigned i;
if (is_meta(n))
continue;
for (i = 1; i < n->regs_count; i++) {
- struct ir3_register *reg = n->regs[i];
+ reg = n->regs[i];
if (reg_gpr(reg)) {
@@ -515,12 +518,26 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
}
}
+ if (n->regs_count > 0) {
+ reg = n->regs[0];
+ if (regmask_get(&needs_ss_war, reg)) {
+ n->flags |= IR3_INSTR_SS;
+ regmask_init(&needs_ss_war); // ??? I assume?
+ }
+ }
+
shader->instrs[shader->instrs_count++] = n;
if (is_sfu(n))
regmask_set(&needs_ss, n->regs[0]);
- if (is_tex(n))
+ if (is_tex(n)) {
regmask_set(&needs_sy, n->regs[0]);
+ for (i = 1; i < n->regs_count; i++) {
+ reg = n->regs[i];
+ if (reg_gpr(reg))
+ regmask_set(&needs_ss_war, reg);
+ }
+ }
if (is_input(n))
last_input = n;
}