summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_program.c2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c52
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h10
4 files changed, 37 insertions, 29 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 01daa0f6f12..736151651b2 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -264,7 +264,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen + 1, 0)));
OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
- A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(fp->total_in, 4) / 4));
+ A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in));
for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) {
uint32_t reg = 0;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index ffa53f518f3..0e861b90b12 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -326,7 +326,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in));
OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
- A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(s[FS].v->total_in, 4) / 4));
+ A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(s[FS].v->varying_in));
for (i = 0, j = -1; (i < 16) && (j < (int)s[FS].v->inputs_count); i++) {
uint32_t reg = 0;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 156bb0be247..8617704307c 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -96,9 +96,6 @@ struct ir3_compile {
*/
struct hash_table *block_ht;
- /* for calculating input/output positions/linkages: */
- unsigned next_inloc;
-
/* a4xx (at least patchlevel 0) cannot seem to flat-interpolate
* so we need to use ldlv.u32 to load the varying directly:
*/
@@ -235,7 +232,6 @@ compile_init(struct ir3_compiler *compiler,
ctx->compiler = compiler;
ctx->ir = so->ir;
ctx->so = so;
- ctx->next_inloc = 8;
ctx->def_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
ctx->var_ht = _mesa_hash_table_create(ctx,
@@ -722,11 +718,12 @@ create_input(struct ir3_block *block, unsigned n)
}
static struct ir3_instruction *
-create_frag_input(struct ir3_compile *ctx, unsigned n, bool use_ldlv)
+create_frag_input(struct ir3_compile *ctx, bool use_ldlv)
{
struct ir3_block *block = ctx->block;
struct ir3_instruction *instr;
- struct ir3_instruction *inloc = create_immed(block, n);
+ /* actual inloc is assigned and fixed up later: */
+ struct ir3_instruction *inloc = create_immed(block, 0);
if (use_ldlv) {
instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0);
@@ -2185,8 +2182,6 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
so->inputs[n].slot = slot;
so->inputs[n].compmask = (1 << ncomp) - 1;
- so->inputs[n].inloc = ctx->next_inloc;
- so->inputs[n].interpolate = INTERP_QUALIFIER_NONE;
so->inputs_count = MAX2(so->inputs_count, n + 1);
so->inputs[n].interpolate = in->data.interpolation;
@@ -2231,8 +2226,7 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
so->inputs[n].bary = true;
- instr = create_frag_input(ctx,
- so->inputs[n].inloc + i - 8, use_ldlv);
+ instr = create_frag_input(ctx, use_ldlv);
}
ctx->ir->inputs[idx] = instr;
@@ -2247,7 +2241,6 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
}
if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) {
- ctx->next_inloc += ncomp;
so->total_in += ncomp;
}
}
@@ -2471,7 +2464,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
struct ir3_compile *ctx;
struct ir3 *ir;
struct ir3_instruction **inputs;
- unsigned i, j, actual_in;
+ unsigned i, j, actual_in, inloc;
int ret = 0, max_bary;
assert(!so->ir);
@@ -2591,13 +2584,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
ir3_print(ir);
}
- ir3_legalize(ir, &so->has_samp, &max_bary);
-
- if (fd_mesa_debug & FD_DBG_OPTMSGS) {
- printf("AFTER LEGALIZE:\n");
- ir3_print(ir);
- }
-
/* fixup input/outputs: */
for (i = 0; i < so->outputs_count; i++) {
so->outputs[i].regid = ir->outputs[i*4]->regs[0]->num;
@@ -2611,32 +2597,46 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
/* Note that some or all channels of an input may be unused: */
actual_in = 0;
+ inloc = 0;
for (i = 0; i < so->inputs_count; i++) {
unsigned j, regid = ~0, compmask = 0;
so->inputs[i].ncomp = 0;
+ so->inputs[i].inloc = inloc + 8;
for (j = 0; j < 4; j++) {
struct ir3_instruction *in = inputs[(i*4) + j];
- if (in) {
+ if (in && !(in->flags & IR3_INSTR_UNUSED)) {
compmask |= (1 << j);
regid = in->regs[0]->num - j;
actual_in++;
so->inputs[i].ncomp++;
+ if ((so->type == SHADER_FRAGMENT) && so->inputs[i].bary) {
+ /* assign inloc: */
+ assert(in->regs[1]->flags & IR3_REG_IMMED);
+ in->regs[1]->iim_val = inloc++;
+ }
}
}
+ if ((so->type == SHADER_FRAGMENT) && compmask && so->inputs[i].bary)
+ so->varying_in++;
so->inputs[i].regid = regid;
so->inputs[i].compmask = compmask;
}
- /* fragment shader always gets full vec4's even if it doesn't
- * fetch all components, but vertex shader we need to update
- * with the actual number of components fetch, otherwise thing
- * will hang due to mismaptch between VFD_DECODE's and
- * TOTALATTRTOVS
+ /* We need to do legalize after (for frag shader's) the "bary.f"
+ * offsets (inloc) have been assigned.
*/
+ ir3_legalize(ir, &so->has_samp, &max_bary);
+
+ if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+ printf("AFTER LEGALIZE:\n");
+ ir3_print(ir);
+ }
+
+ /* Note that actual_in counts inputs that are not bary.f'd for FS: */
if (so->type == SHADER_VERTEX)
so->total_in = actual_in;
else
- so->total_in = align(max_bary + 1, 4);
+ so->total_in = max_bary + 1;
out:
if (ret) {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 5d1cccb0daa..cf99a4c05ed 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -195,7 +195,15 @@ struct ir3_shader_variant {
enum glsl_interp_qualifier interpolate;
} inputs[16 + 2]; /* +POSITION +FACE */
- unsigned total_in; /* sum of inputs (scalar) */
+ /* sum of input components (scalar). For frag shaders, it only counts
+ * the varying inputs:
+ */
+ unsigned total_in;
+
+ /* For frag shaders, the total number of inputs (not scalar,
+ * ie. SP_VS_PARAM_REG.TOTALVSOUTVAR)
+ */
+ unsigned varying_in;
/* do we have one or more texture sample instructions: */
bool has_samp;