diff options
author | Rob Clark <[email protected]> | 2015-11-26 12:26:53 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2015-11-26 12:35:10 -0500 |
commit | 57fc0dd8d5610a0a25cece53b172b0c992421db0 (patch) | |
tree | 6497707616d1da01502439ac8cb0f81f531f4468 | |
parent | 2181f2cd58f2af1e216618fc6889e23697cec325 (diff) |
freedreno/ir3: assign varying locations later
Rather than assigning inloc up front, when we don't yet know if it will
be unused, assign it last thing before the legalize pass.
Also, realize when inputs are unused (since for frag shader's we can't
rely on them being removed from ir->inputs[]). This doesn't make sense
if we don't also dynamically assign the inloc's, since we could end up
telling the hw the wrong # of varyings (since we currently assume that
the # of varyings and max-inloc are related..)
Signed-off-by: Rob Clark <[email protected]>
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a4xx/fd4_program.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 52 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.h | 10 |
4 files changed, 37 insertions, 29 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 01daa0f6f12..736151651b2 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -264,7 +264,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen + 1, 0))); OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) | A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | - A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(fp->total_in, 4) / 4)); + A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in)); for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) { uint32_t reg = 0; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index ffa53f518f3..0e861b90b12 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -326,7 +326,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in)); OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) | A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | - A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(s[FS].v->total_in, 4) / 4)); + A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(s[FS].v->varying_in)); for (i = 0, j = -1; (i < 16) && (j < (int)s[FS].v->inputs_count); i++) { uint32_t reg = 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 156bb0be247..8617704307c 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -96,9 +96,6 @@ struct ir3_compile { */ struct hash_table *block_ht; - /* for calculating input/output positions/linkages: */ - unsigned next_inloc; - /* a4xx (at least patchlevel 0) cannot seem to flat-interpolate * so we need to use ldlv.u32 to load the varying directly: */ @@ -235,7 +232,6 @@ compile_init(struct ir3_compiler *compiler, ctx->compiler = compiler; ctx->ir = so->ir; ctx->so = so; - ctx->next_inloc = 8; ctx->def_ht = _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); ctx->var_ht = _mesa_hash_table_create(ctx, @@ -722,11 +718,12 @@ create_input(struct ir3_block *block, unsigned n) } static struct ir3_instruction * -create_frag_input(struct ir3_compile *ctx, unsigned n, bool use_ldlv) +create_frag_input(struct ir3_compile *ctx, bool use_ldlv) { struct ir3_block *block = ctx->block; struct ir3_instruction *instr; - struct ir3_instruction *inloc = create_immed(block, n); + /* actual inloc is assigned and fixed up later: */ + struct ir3_instruction *inloc = create_immed(block, 0); if (use_ldlv) { instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0); @@ -2185,8 +2182,6 @@ setup_input(struct ir3_compile *ctx, nir_variable *in) so->inputs[n].slot = slot; so->inputs[n].compmask = (1 << ncomp) - 1; - so->inputs[n].inloc = ctx->next_inloc; - so->inputs[n].interpolate = INTERP_QUALIFIER_NONE; so->inputs_count = MAX2(so->inputs_count, n + 1); so->inputs[n].interpolate = in->data.interpolation; @@ -2231,8 +2226,7 @@ setup_input(struct ir3_compile *ctx, nir_variable *in) so->inputs[n].bary = true; - instr = create_frag_input(ctx, - so->inputs[n].inloc + i - 8, use_ldlv); + instr = create_frag_input(ctx, use_ldlv); } ctx->ir->inputs[idx] = instr; @@ -2247,7 +2241,6 @@ setup_input(struct ir3_compile *ctx, nir_variable *in) } if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) { - ctx->next_inloc += ncomp; so->total_in += ncomp; } } @@ -2471,7 +2464,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_compile *ctx; struct ir3 *ir; struct ir3_instruction **inputs; - unsigned i, j, actual_in; + unsigned i, j, actual_in, inloc; int ret = 0, max_bary; assert(!so->ir); @@ -2591,13 +2584,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, ir3_print(ir); } - ir3_legalize(ir, &so->has_samp, &max_bary); - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER LEGALIZE:\n"); - ir3_print(ir); - } - /* fixup input/outputs: */ for (i = 0; i < so->outputs_count; i++) { so->outputs[i].regid = ir->outputs[i*4]->regs[0]->num; @@ -2611,32 +2597,46 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, /* Note that some or all channels of an input may be unused: */ actual_in = 0; + inloc = 0; for (i = 0; i < so->inputs_count; i++) { unsigned j, regid = ~0, compmask = 0; so->inputs[i].ncomp = 0; + so->inputs[i].inloc = inloc + 8; for (j = 0; j < 4; j++) { struct ir3_instruction *in = inputs[(i*4) + j]; - if (in) { + if (in && !(in->flags & IR3_INSTR_UNUSED)) { compmask |= (1 << j); regid = in->regs[0]->num - j; actual_in++; so->inputs[i].ncomp++; + if ((so->type == SHADER_FRAGMENT) && so->inputs[i].bary) { + /* assign inloc: */ + assert(in->regs[1]->flags & IR3_REG_IMMED); + in->regs[1]->iim_val = inloc++; + } } } + if ((so->type == SHADER_FRAGMENT) && compmask && so->inputs[i].bary) + so->varying_in++; so->inputs[i].regid = regid; so->inputs[i].compmask = compmask; } - /* fragment shader always gets full vec4's even if it doesn't - * fetch all components, but vertex shader we need to update - * with the actual number of components fetch, otherwise thing - * will hang due to mismaptch between VFD_DECODE's and - * TOTALATTRTOVS + /* We need to do legalize after (for frag shader's) the "bary.f" + * offsets (inloc) have been assigned. */ + ir3_legalize(ir, &so->has_samp, &max_bary); + + if (fd_mesa_debug & FD_DBG_OPTMSGS) { + printf("AFTER LEGALIZE:\n"); + ir3_print(ir); + } + + /* Note that actual_in counts inputs that are not bary.f'd for FS: */ if (so->type == SHADER_VERTEX) so->total_in = actual_in; else - so->total_in = align(max_bary + 1, 4); + so->total_in = max_bary + 1; out: if (ret) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 5d1cccb0daa..cf99a4c05ed 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -195,7 +195,15 @@ struct ir3_shader_variant { enum glsl_interp_qualifier interpolate; } inputs[16 + 2]; /* +POSITION +FACE */ - unsigned total_in; /* sum of inputs (scalar) */ + /* sum of input components (scalar). For frag shaders, it only counts + * the varying inputs: + */ + unsigned total_in; + + /* For frag shaders, the total number of inputs (not scalar, + * ie. SP_VS_PARAM_REG.TOTALVSOUTVAR) + */ + unsigned varying_in; /* do we have one or more texture sample instructions: */ bool has_samp; |