summaryrefslogtreecommitdiffstats
path: root/src/freedreno
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2019-03-24 11:58:21 -0400
committerRob Clark <[email protected]>2019-03-30 12:56:01 -0400
commit831f1a05c0de3f9623115d17bf0904ebe1c4a74e (patch)
treeb9048cb50dd446c8b7fd95c78fb7f9c84b1ee0b1 /src/freedreno
parent91a1354cd6e2cad77799b5ce68927d954bb0213c (diff)
freedreno/ir3: rework varying packing
Originally we kept track of a table of inputs. But with new-style frag inputs this becomes awkward. Re-work it so that initially we assigned un-packed varying locations, and then after the shader is compiled scan to find actual used inputs, and re-pack. Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c128
1 files changed, 98 insertions, 30 deletions
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index afab76ab8c8..ec741ae92d9 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -83,12 +83,12 @@ create_input(struct ir3_context *ctx, unsigned n)
}
static struct ir3_instruction *
-create_frag_input(struct ir3_context *ctx, bool use_ldlv)
+create_frag_input(struct ir3_context *ctx, bool use_ldlv, unsigned n)
{
struct ir3_block *block = ctx->block;
struct ir3_instruction *instr;
- /* actual inloc is assigned and fixed up later: */
- struct ir3_instruction *inloc = create_immed(block, 0);
+ /* packed inloc is fixed up later: */
+ struct ir3_instruction *inloc = create_immed(block, n);
if (use_ldlv) {
instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0);
@@ -2275,7 +2275,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
*/
so->inputs[n].slot = VARYING_SLOT_VAR8;
so->inputs[n].bary = true;
- instr = create_frag_input(ctx, false);
+ instr = create_frag_input(ctx, false, idx);
} else {
bool use_ldlv = false;
@@ -2304,7 +2304,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
so->inputs[n].bary = true;
- instr = create_frag_input(ctx, use_ldlv);
+ instr = create_frag_input(ctx, use_ldlv, idx);
}
compile_assert(ctx, idx < ctx->ir->ninputs);
@@ -2326,6 +2326,92 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
}
}
+/* Initially we assign non-packed inloc's for varyings, as we don't really
+ * know up-front which components will be unused. After all the compilation
+ * stages we scan the shader to see which components are actually used, and
+ * re-pack the inlocs to eliminate unneeded varyings.
+ */
+static void
+pack_inlocs(struct ir3_context *ctx)
+{
+ struct ir3_shader_variant *so = ctx->so;
+ uint8_t used_components[so->inputs_count];
+
+ memset(used_components, 0, sizeof(used_components));
+
+ /*
+ * First Step: scan shader to find which bary.f/ldlv remain:
+ */
+
+ list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ if (is_input(instr)) {
+ unsigned inloc = instr->regs[1]->iim_val;
+ unsigned i = inloc / 4;
+ unsigned j = inloc % 4;
+
+ compile_assert(ctx, instr->regs[1]->flags & IR3_REG_IMMED);
+ compile_assert(ctx, i < so->inputs_count);
+
+ used_components[i] |= 1 << j;
+ }
+ }
+ }
+
+ /*
+ * Second Step: reassign varying inloc/slots:
+ */
+
+ unsigned actual_in = 0;
+ unsigned inloc = 0;
+
+ for (unsigned i = 0; i < so->inputs_count; i++) {
+ unsigned compmask = 0, maxcomp = 0;
+
+ so->inputs[i].ncomp = 0;
+ so->inputs[i].inloc = inloc;
+ so->inputs[i].bary = false;
+
+ for (unsigned j = 0; j < 4; j++) {
+ if (!(used_components[i] & (1 << j)))
+ continue;
+
+ compmask |= (1 << j);
+ actual_in++;
+ so->inputs[i].ncomp++;
+ maxcomp = j + 1;
+
+ /* at this point, since used_components[i] mask is only
+ * considering varyings (ie. not sysvals) we know this
+ * is a varying:
+ */
+ so->inputs[i].bary = true;
+ }
+
+ if (so->inputs[i].bary) {
+ so->varying_in++;
+ so->inputs[i].compmask = (1 << maxcomp) - 1;
+ inloc += maxcomp;
+ }
+ }
+
+ /*
+ * Third Step: reassign packed inloc's:
+ */
+
+ list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ if (is_input(instr)) {
+ unsigned inloc = instr->regs[1]->iim_val;
+ unsigned i = inloc / 4;
+ unsigned j = inloc % 4;
+
+ instr->regs[1]->iim_val = so->inputs[i].inloc + j;
+ }
+ }
+ }
+}
+
static void
setup_output(struct ir3_context *ctx, nir_variable *out)
{
@@ -2596,7 +2682,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
struct ir3_context *ctx;
struct ir3 *ir;
struct ir3_instruction **inputs;
- unsigned i, actual_in, inloc;
+ unsigned i;
int ret = 0, max_bary;
assert(!so->ir);
@@ -2741,6 +2827,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
ir3_print(ir);
}
+ if (so->type == MESA_SHADER_FRAGMENT)
+ pack_inlocs(ctx);
+
/* fixup input/outputs: */
for (i = 0; i < so->outputs_count; i++) {
/* sometimes we get outputs that don't write the .x coord, like:
@@ -2761,34 +2850,15 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
}
/* Note that some or all channels of an input may be unused: */
- actual_in = 0;
- inloc = 0;
for (i = 0; i < so->inputs_count; i++) {
- unsigned j, reg = regid(63,0), compmask = 0, maxcomp = 0;
- so->inputs[i].ncomp = 0;
- so->inputs[i].inloc = inloc;
+ unsigned j, reg = regid(63,0);
for (j = 0; j < 4; j++) {
struct ir3_instruction *in = inputs[(i*4) + j];
+
if (in && !(in->flags & IR3_INSTR_UNUSED)) {
- compmask |= (1 << j);
reg = in->regs[0]->num - j;
- actual_in++;
- so->inputs[i].ncomp++;
- if ((so->type == MESA_SHADER_FRAGMENT) && so->inputs[i].bary) {
- /* assign inloc: */
- assert(in->regs[1]->flags & IR3_REG_IMMED);
- in->regs[1]->iim_val = inloc + j;
- maxcomp = j + 1;
- }
}
}
- if ((so->type == MESA_SHADER_FRAGMENT) && compmask && so->inputs[i].bary) {
- so->varying_in++;
- so->inputs[i].compmask = (1 << maxcomp) - 1;
- inloc += maxcomp;
- } else if (!so->inputs[i].sysval) {
- so->inputs[i].compmask = compmask;
- }
so->inputs[i].regid = reg;
}
@@ -2808,9 +2878,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
so->branchstack = ctx->max_stack;
/* Note that actual_in counts inputs that are not bary.f'd for FS: */
- if (so->type == MESA_SHADER_VERTEX)
- so->total_in = actual_in;
- else
+ if (so->type == MESA_SHADER_FRAGMENT)
so->total_in = max_bary + 1;
so->max_sun = ir->max_sun;