diff options
author | Rob Clark <[email protected]> | 2019-11-08 12:55:27 -0800 |
---|---|---|
committer | Rob Clark <[email protected]> | 2019-11-12 13:57:52 -0800 |
commit | b22617fb57be54a859a8d62a5e545afcb38266e9 (patch) | |
tree | 65f1307aa332c1730c7fc05a264280ee23408e46 /src/freedreno/ir3 | |
parent | 4bb697d938d17bbdd5124db0102d97fb9ead2229 (diff) |
freedreno/ir3: fix gpu hang with pre-fs-tex-fetch
For pre-fs-dispatch texture fetch, we need to assign bary_ij to r0.x,
even if it is not used in the shader (ie. only varying use is for tex
coords). But if, for example, gl_FragCoord is used, it could get
assigned on top of bary_ij, resulting in a GPU hang.
The solution to this is two-fold: (1) the inputs/outputs rework has the
benefit of making RA realize bary_ij is a vec2, even if there are no
split/collect instructions (due to no varying fetches in the shader
itself). And (2) extend the live ranges of meta:input instructions to
the first non-input, to prevent RA from assigning the same register to
multiple inputs.
Backport note: because of (1) above, a better solution for 19.3 would be
to revert f30c256ec05.
Fixes: f30c256ec05 ("freedreno/ir3: enable pre-fs texture fetch for a6xx")
Signed-off-by: Rob Clark <[email protected]>
Reviewed-by: Kristian H. Kristensen <[email protected]>
Reviewed-by: Eric Anholt <[email protected]>
Diffstat (limited to 'src/freedreno/ir3')
-rw-r--r-- | src/freedreno/ir3/ir3_ra.c | 12 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_sched.c | 30 |
2 files changed, 32 insertions, 10 deletions
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index 1f24b5eadac..67d8a93884e 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -701,6 +701,15 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) block->data = bd; + struct ir3_instruction *first_non_input = NULL; + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + if (instr->opc != OPC_META_INPUT) { + first_non_input = instr; + break; + } + } + + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { struct ir3_instruction *src; struct ir3_register *reg; @@ -767,6 +776,9 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) def(name, id->defn); + if (instr->opc == OPC_META_INPUT) + use(name, first_non_input); + if (is_high(id->defn)) { ra_set_node_class(ctx->g, name, ctx->set->high_classes[id->cls - HIGH_OFFSET]); diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c index a70251374ee..247221d3a03 100644 --- a/src/freedreno/ir3/ir3_sched.c +++ b/src/freedreno/ir3/ir3_sched.c @@ -778,18 +778,28 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) list_inithead(&block->instr_list); list_inithead(&ctx->depth_list); - /* first a pre-pass to schedule all meta:input instructions - * (which need to appear first so that RA knows the register is - * occupied), and move remaining to depth sorted list: + /* First schedule all meta:input instructions, followed by + * tex-prefetch. We want all of the instructions that load + * values into registers before the shader starts to go + * before any other instructions. But in particular we + * want inputs to come before prefetches. This is because + * a FS's bary_ij input may not actually be live in the + * shader, but it should not be scheduled on top of any + * other input (but can be overwritten by a tex prefetch) + * + * Finally, move all the remaining instructions to the depth- + * list */ - list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { - if ((instr->opc == OPC_META_INPUT) || - (instr->opc == OPC_META_TEX_PREFETCH)) { + list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) + if (instr->opc == OPC_META_INPUT) schedule(ctx, instr); - } else { - ir3_insert_by_depth(instr, &ctx->depth_list); - } - } + + list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) + if (instr->opc == OPC_META_TEX_PREFETCH) + schedule(ctx, instr); + + list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) + ir3_insert_by_depth(instr, &ctx->depth_list); while (!list_is_empty(&ctx->depth_list)) { struct ir3_sched_notes notes = {0}; |