diff options
author | Rob Clark <[email protected]> | 2018-02-04 12:42:19 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2018-02-10 14:54:58 -0500 |
commit | ffb00f6841768e842a010f895b52314e1eeb5828 (patch) | |
tree | cce8be9ee5009a8c897cedd7a14ea27ebfcb8d0e /src/gallium/drivers/freedreno | |
parent | f54d2b4f10481913528b4ef3d68a99b59104f053 (diff) |
freedreno/ir3: account for arrays in delayslot calc
Normally false-deps are not something to consider, since they mostly
exist for delay-slot related reasons:
* barriers
* ordering writes after read
* SSBO/image access ordering
The exception is a false-dependency on an array store.
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno')
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_depth.c | 32 |
1 files changed, 30 insertions, 2 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 55ca5333b47..b58bf8ff3ae 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -49,14 +49,42 @@ * blocks depth sorted list, which is used by the scheduling pass. */ +/* generally don't count false dependencies, since this can just be + * something like a barrier, or SSBO store. The exception is array + * dependencies if the assigner is an array write and the consumer + * reads the same array. + */ +static bool +ignore_dep(struct ir3_instruction *assigner, + struct ir3_instruction *consumer, unsigned n) +{ + if (!__is_false_dep(consumer, n)) + return false; + + if (assigner->barrier_class & IR3_BARRIER_ARRAY_W) { + struct ir3_register *dst = assigner->regs[0]; + struct ir3_register *src; + + debug_assert(dst->flags & IR3_REG_ARRAY); + + foreach_src(src, consumer) { + if ((src->flags & IR3_REG_ARRAY) && + (dst->array.id == src->array.id)) { + return false; + } + } + } + + return true; +} + /* calculate required # of delay slots between the instruction that * assigns a value and the one that consumes */ int ir3_delayslots(struct ir3_instruction *assigner, struct ir3_instruction *consumer, unsigned n) { - /* don't count false-dependencies: */ - if (__is_false_dep(consumer, n)) + if (ignore_dep(assigner, consumer, n)) return 0; /* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal |