summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2018-02-04 12:42:19 -0500
committerRob Clark <[email protected]>2018-02-10 14:54:58 -0500
commitffb00f6841768e842a010f895b52314e1eeb5828 (patch)
treecce8be9ee5009a8c897cedd7a14ea27ebfcb8d0e /src/gallium/drivers
parentf54d2b4f10481913528b4ef3d68a99b59104f053 (diff)
freedreno/ir3: account for arrays in delayslot calc
Normally false-deps are not something to consider, since they mostly exist for delay-slot related reasons: * barriers * ordering writes after read * SSBO/image access ordering The exception is a false-dependency on an array store. Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_depth.c32
1 files changed, 30 insertions, 2 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
index 55ca5333b47..b58bf8ff3ae 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -49,14 +49,42 @@
* blocks depth sorted list, which is used by the scheduling pass.
*/
+/* generally don't count false dependencies, since this can just be
+ * something like a barrier, or SSBO store. The exception is array
+ * dependencies if the assigner is an array write and the consumer
+ * reads the same array.
+ */
+static bool
+ignore_dep(struct ir3_instruction *assigner,
+ struct ir3_instruction *consumer, unsigned n)
+{
+ if (!__is_false_dep(consumer, n))
+ return false;
+
+ if (assigner->barrier_class & IR3_BARRIER_ARRAY_W) {
+ struct ir3_register *dst = assigner->regs[0];
+ struct ir3_register *src;
+
+ debug_assert(dst->flags & IR3_REG_ARRAY);
+
+ foreach_src(src, consumer) {
+ if ((src->flags & IR3_REG_ARRAY) &&
+ (dst->array.id == src->array.id)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
/* calculate required # of delay slots between the instruction that
* assigns a value and the one that consumes
*/
int ir3_delayslots(struct ir3_instruction *assigner,
struct ir3_instruction *consumer, unsigned n)
{
- /* don't count false-dependencies: */
- if (__is_false_dep(consumer, n))
+ if (ignore_dep(assigner, consumer, n))
return 0;
/* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal