diff options
-rw-r--r-- | src/freedreno/ir3/instr-a3xx.h | 5 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3.h | 7 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_compiler_nir.c | 10 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_legalize.c | 7 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_print.c | 4 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_sched.c | 3 |
6 files changed, 33 insertions, 3 deletions
diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index b0db28eb635..3887736444d 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -212,6 +212,11 @@ typedef enum { OPC_META_FO = _OPC(-1, 2), OPC_META_FI = _OPC(-1, 3), + /* placeholder for texture fetches that run before FS invocation + * starts: + */ + OPC_META_TEX_PREFETCH = _OPC(-1, 4), + } opc_t; #define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 8b8788a8a97..8b7bdc618b2 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -268,6 +268,10 @@ struct ir3_instruction { int off; /* component/offset */ } fo; struct { + unsigned samp, tex; + unsigned input_offset; + } prefetch; + struct { /* for sysvals, identifies the sysval type. Mostly so we can * identify the special cases where a sysval should not be DCE'd * (currently, just pre-fs texture fetch) @@ -1465,6 +1469,9 @@ INSTR4F(G, ATOMIC_XOR) INSTR0(BAR) INSTR0(FENCE) +/* meta instructions: */ +INSTR0(META_TEX_PREFETCH); + /* ************************************************************************* */ /* split this out or find some helper to use.. like main/bitset.h.. */ diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 39bef63a780..38c60f8cfe6 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2635,6 +2635,16 @@ pack_inlocs(struct ir3_context *ctx) compile_assert(ctx, i < so->inputs_count); used_components[i] |= 1 << j; + } else if (instr->opc == OPC_META_TEX_PREFETCH) { + for (int n = 0; n < 2; n++) { + unsigned inloc = instr->prefetch.input_offset + n; + unsigned i = inloc / 4; + unsigned j = inloc % 4; + + compile_assert(ctx, i < so->inputs_count); + + used_components[i] |= 1 << j; + } } } } diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 1af80367e50..9a0c83042a4 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -118,7 +118,10 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) n->flags &= ~(IR3_INSTR_SS | IR3_INSTR_SY); - if (is_meta(n)) + /* _meta::tex_prefetch instructions removed later in + * collect_tex_prefetches() + */ + if (is_meta(n) && (n->opc != OPC_META_TEX_PREFETCH)) continue; if (is_input(n)) { @@ -237,7 +240,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) if (is_sfu(n)) regmask_set(&state->needs_ss, n->regs[0]); - if (is_tex(n)) { + if (is_tex(n) || (n->opc == OPC_META_TEX_PREFETCH)) { regmask_set(&state->needs_sy, n->regs[0]); ctx->need_pixlod = true; } else if (n->opc == OPC_RESINFO) { diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c index cc6572d9056..63bb5b16f7a 100644 --- a/src/freedreno/ir3/ir3_print.c +++ b/src/freedreno/ir3/ir3_print.c @@ -53,6 +53,7 @@ static void print_instr_name(struct ir3_instruction *instr) case OPC_META_INPUT: printf("_meta:in"); break; case OPC_META_FO: printf("_meta:fo"); break; case OPC_META_FI: printf("_meta:fi"); break; + case OPC_META_TEX_PREFETCH: printf("_meta:tex_prefetch"); break; /* shouldn't hit here.. just for debugging: */ default: printf("_meta:%d", instr->opc); break; @@ -181,6 +182,9 @@ print_instr(struct ir3_instruction *instr, int lvl) if (instr->opc == OPC_META_FO) { printf(", off=%d", instr->fo.off); + } else if (instr->opc == OPC_META_TEX_PREFETCH) { + printf(", tex=%d, samp=%d, input_offset=%d", instr->prefetch.tex, + instr->prefetch.samp, instr->prefetch.input_offset); } if (is_flow(instr) && instr->cat0.target) { diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c index 96897f60e92..635c693095a 100644 --- a/src/freedreno/ir3/ir3_sched.c +++ b/src/freedreno/ir3/ir3_sched.c @@ -788,7 +788,8 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) * occupied), and move remaining to depth sorted list: */ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { - if (instr->opc == OPC_META_INPUT) { + if ((instr->opc == OPC_META_INPUT) || + (instr->opc == OPC_META_TEX_PREFETCH)) { schedule(ctx, instr); } else { ir3_insert_by_depth(instr, &ctx->depth_list); |