diff options
author | Rob Clark <[email protected]> | 2020-02-25 10:44:26 -0800 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-02-28 16:53:41 +0000 |
commit | 56565b7bba54b8298d2c14c66bb87c59930b09ee (patch) | |
tree | a90cd5469a0cd18242a4289bbf0019c97d3c1c05 | |
parent | 2cf4b5f29edbd7a01590fdf244fead5551db8d3f (diff) |
freedreno/ir3: update SFU delay
1) emperically, 10 seems like a more accurate # than 4
2) push "soft" delay handling into ir3_delayslots(), as
we should also be using it to calculate the costs
that the schedulers use
Signed-off-by: Rob Clark <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3989>
-rw-r--r-- | src/freedreno/ir3/ir3.h | 2 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_delay.c | 26 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_depth.c | 2 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_postsched.c | 2 |
4 files changed, 19 insertions, 13 deletions
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 21fd8c602b9..b66d8e2d6fd 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1157,7 +1157,7 @@ void ir3_print_instr(struct ir3_instruction *instr); /* delay calculation: */ int ir3_delayslots(struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned n); + struct ir3_instruction *consumer, unsigned n, bool soft); unsigned ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr, bool soft, bool pred); void ir3_remove_nops(struct ir3 *ir); diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c index 0b796a4183a..5839128a4c6 100644 --- a/src/freedreno/ir3/ir3_delay.c +++ b/src/freedreno/ir3/ir3_delay.c @@ -69,7 +69,7 @@ ignore_dep(struct ir3_instruction *assigner, */ int ir3_delayslots(struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned n) + struct ir3_instruction *consumer, unsigned n, bool soft) { if (ignore_dep(assigner, consumer, n)) return 0; @@ -85,6 +85,20 @@ ir3_delayslots(struct ir3_instruction *assigner, if (writes_addr(assigner)) return 6; + /* On a6xx, it takes the number of delay slots to get a SFU result + * back (ie. using nop's instead of (ss) is: + * + * 8 - single warp + * 9 - two warps + * 10 - four warps + * + * and so on. Not quite sure where it tapers out (ie. how many + * warps share an SFU unit). But 10 seems like a reasonable # + * to choose: + */ + if (soft && is_sfu(assigner)) + return 10; + /* handled via sync flags: */ if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner)) return 0; @@ -195,15 +209,7 @@ delay_calc_srcn(struct ir3_block *block, delay = MAX2(delay, d); } } else { - if (soft) { - if (is_sfu(assigner)) { - delay = 4; - } else { - delay = ir3_delayslots(assigner, consumer, srcn); - } - } else { - delay = ir3_delayslots(assigner, consumer, srcn); - } + delay = ir3_delayslots(assigner, consumer, srcn, soft); delay -= distance(block, assigner, delay, pred); } diff --git a/src/freedreno/ir3/ir3_depth.c b/src/freedreno/ir3/ir3_depth.c index 135d4365d2e..6bb946871e5 100644 --- a/src/freedreno/ir3/ir3_depth.c +++ b/src/freedreno/ir3/ir3_depth.c @@ -89,7 +89,7 @@ ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep) if (i == 0) continue; - sd = ir3_delayslots(src, instr, i) + src->depth; + sd = ir3_delayslots(src, instr, i, true) + src->depth; sd += boost; instr->depth = MAX2(instr->depth, sd); diff --git a/src/freedreno/ir3/ir3_postsched.c b/src/freedreno/ir3/ir3_postsched.c index 4290e882249..47a8e52fdeb 100644 --- a/src/freedreno/ir3/ir3_postsched.c +++ b/src/freedreno/ir3/ir3_postsched.c @@ -380,7 +380,7 @@ calculate_deps(struct ir3_postsched_deps_state *state, struct ir3_postsched_node *dep = dep_reg(state, reg->num + b); if (dep && (state->direction == F)) { - unsigned d = ir3_delayslots(dep->instr, node->instr, i); + unsigned d = ir3_delayslots(dep->instr, node->instr, i, true); node->delay = MAX2(node->delay, d); } } |