From 482e1b99558fdfa9fe23999400a2973acb3728a1 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 11 Oct 2019 15:57:22 -0700 Subject: freedreno/ir3: add meta instruction for pre-fs texture fetch Add a placeholder instruction to track texture fetches made prior to FS shader dispatch. These, like meta:input instructions are scheduled before any real instructions, so that RA realizes their result values are live before the first real instruction. And to give legalize a way to track usage of fetched sample requiring (sy) sync flags. There is some related special handling for varying texcoord inputs used for pre-fs-fetch, so that they are not DCE'd and remain in linkage between FS and previous stage. Note that we could almost avoid this special handling by giving meta:tex_prefetch real src arguments, except that in the FS stage, inputs are actual bary.f/ldlv instructions. Signed-off-by: Rob Clark Reviewed-by: Kristian H. Kristensen --- src/freedreno/ir3/instr-a3xx.h | 5 +++++ src/freedreno/ir3/ir3.h | 7 +++++++ src/freedreno/ir3/ir3_compiler_nir.c | 10 ++++++++++ src/freedreno/ir3/ir3_legalize.c | 7 +++++-- src/freedreno/ir3/ir3_print.c | 4 ++++ src/freedreno/ir3/ir3_sched.c | 3 ++- 6 files changed, 33 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index b0db28eb635..3887736444d 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -212,6 +212,11 @@ typedef enum { OPC_META_FO = _OPC(-1, 2), OPC_META_FI = _OPC(-1, 3), + /* placeholder for texture fetches that run before FS invocation + * starts: + */ + OPC_META_TEX_PREFETCH = _OPC(-1, 4), + } opc_t; #define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 8b8788a8a97..8b7bdc618b2 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -267,6 +267,10 @@ struct ir3_instruction { struct { int off; /* component/offset */ } fo; + struct { + unsigned samp, tex; + unsigned input_offset; + } prefetch; struct { /* for sysvals, identifies the sysval type. Mostly so we can * identify the special cases where a sysval should not be DCE'd @@ -1465,6 +1469,9 @@ INSTR4F(G, ATOMIC_XOR) INSTR0(BAR) INSTR0(FENCE) +/* meta instructions: */ +INSTR0(META_TEX_PREFETCH); + /* ************************************************************************* */ /* split this out or find some helper to use.. like main/bitset.h.. */ diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 39bef63a780..38c60f8cfe6 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2635,6 +2635,16 @@ pack_inlocs(struct ir3_context *ctx) compile_assert(ctx, i < so->inputs_count); used_components[i] |= 1 << j; + } else if (instr->opc == OPC_META_TEX_PREFETCH) { + for (int n = 0; n < 2; n++) { + unsigned inloc = instr->prefetch.input_offset + n; + unsigned i = inloc / 4; + unsigned j = inloc % 4; + + compile_assert(ctx, i < so->inputs_count); + + used_components[i] |= 1 << j; + } } } } diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 1af80367e50..9a0c83042a4 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -118,7 +118,10 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) n->flags &= ~(IR3_INSTR_SS | IR3_INSTR_SY); - if (is_meta(n)) + /* _meta::tex_prefetch instructions removed later in + * collect_tex_prefetches() + */ + if (is_meta(n) && (n->opc != OPC_META_TEX_PREFETCH)) continue; if (is_input(n)) { @@ -237,7 +240,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) if (is_sfu(n)) regmask_set(&state->needs_ss, n->regs[0]); - if (is_tex(n)) { + if (is_tex(n) || (n->opc == OPC_META_TEX_PREFETCH)) { regmask_set(&state->needs_sy, n->regs[0]); ctx->need_pixlod = true; } else if (n->opc == OPC_RESINFO) { diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c index cc6572d9056..63bb5b16f7a 100644 --- a/src/freedreno/ir3/ir3_print.c +++ b/src/freedreno/ir3/ir3_print.c @@ -53,6 +53,7 @@ static void print_instr_name(struct ir3_instruction *instr) case OPC_META_INPUT: printf("_meta:in"); break; case OPC_META_FO: printf("_meta:fo"); break; case OPC_META_FI: printf("_meta:fi"); break; + case OPC_META_TEX_PREFETCH: printf("_meta:tex_prefetch"); break; /* shouldn't hit here.. just for debugging: */ default: printf("_meta:%d", instr->opc); break; @@ -181,6 +182,9 @@ print_instr(struct ir3_instruction *instr, int lvl) if (instr->opc == OPC_META_FO) { printf(", off=%d", instr->fo.off); + } else if (instr->opc == OPC_META_TEX_PREFETCH) { + printf(", tex=%d, samp=%d, input_offset=%d", instr->prefetch.tex, + instr->prefetch.samp, instr->prefetch.input_offset); } if (is_flow(instr) && instr->cat0.target) { diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c index 96897f60e92..635c693095a 100644 --- a/src/freedreno/ir3/ir3_sched.c +++ b/src/freedreno/ir3/ir3_sched.c @@ -788,7 +788,8 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) * occupied), and move remaining to depth sorted list: */ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { - if (instr->opc == OPC_META_INPUT) { + if ((instr->opc == OPC_META_INPUT) || + (instr->opc == OPC_META_TEX_PREFETCH)) { schedule(ctx, instr); } else { ir3_insert_by_depth(instr, &ctx->depth_list); -- cgit v1.2.3