From 56565b7bba54b8298d2c14c66bb87c59930b09ee Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 25 Feb 2020 10:44:26 -0800 Subject: freedreno/ir3: update SFU delay 1) emperically, 10 seems like a more accurate # than 4 2) push "soft" delay handling into ir3_delayslots(), as we should also be using it to calculate the costs that the schedulers use Signed-off-by: Rob Clark Part-of: --- src/freedreno/ir3/ir3_delay.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'src/freedreno/ir3/ir3_delay.c') diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c index 0b796a4183a..5839128a4c6 100644 --- a/src/freedreno/ir3/ir3_delay.c +++ b/src/freedreno/ir3/ir3_delay.c @@ -69,7 +69,7 @@ ignore_dep(struct ir3_instruction *assigner, */ int ir3_delayslots(struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned n) + struct ir3_instruction *consumer, unsigned n, bool soft) { if (ignore_dep(assigner, consumer, n)) return 0; @@ -85,6 +85,20 @@ ir3_delayslots(struct ir3_instruction *assigner, if (writes_addr(assigner)) return 6; + /* On a6xx, it takes the number of delay slots to get a SFU result + * back (ie. using nop's instead of (ss) is: + * + * 8 - single warp + * 9 - two warps + * 10 - four warps + * + * and so on. Not quite sure where it tapers out (ie. how many + * warps share an SFU unit). But 10 seems like a reasonable # + * to choose: + */ + if (soft && is_sfu(assigner)) + return 10; + /* handled via sync flags: */ if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner)) return 0; @@ -195,15 +209,7 @@ delay_calc_srcn(struct ir3_block *block, delay = MAX2(delay, d); } } else { - if (soft) { - if (is_sfu(assigner)) { - delay = 4; - } else { - delay = ir3_delayslots(assigner, consumer, srcn); - } - } else { - delay = ir3_delayslots(assigner, consumer, srcn); - } + delay = ir3_delayslots(assigner, consumer, srcn, soft); delay -= distance(block, assigner, delay, pred); } -- cgit v1.2.3