diff options
author | Rob Clark <[email protected]> | 2016-11-23 12:21:38 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2016-11-30 12:25:48 -0500 |
commit | 728e2c4d38b2c03ad1fdc997bef70e646ada9fe4 (patch) | |
tree | a70a1dcba9644c3c8582cd696d4d6a274dd220a6 /src/gallium | |
parent | 7a591572876afdd4d3e38416f3148cfe99ea5bbf (diff) |
freedreno/ir3: don't offset inloc by 8
On a3xx/a4xx, the SP_VS_VPC_DST_REG.OUTLOCn is offset by 8, so we used
to add this offset into fs->inputs[n].inloc. But a5xx drops this extra
offset-by-8. So instead make inloc zero based and add the offset when
we emit OUTLOCn values (for the gen's that need the offset).
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a4xx/fd4_program.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.h | 14 |
4 files changed, 15 insertions, 27 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index e72d432c6ca..f43d5c47ce0 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -299,10 +299,10 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1); - reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc); - reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc); - reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc); - reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8); OUT_RING(ring, reg); } @@ -391,10 +391,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, */ unsigned compmask = fp->inputs[j].compmask; - /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG - * instead.. rather than -8 everywhere else.. - */ - uint32_t inloc = fp->inputs[j].inloc - 8; + uint32_t inloc = fp->inputs[j].inloc; if ((fp->inputs[j].interpolate == INTERP_MODE_FLAT) || (fp->inputs[j].rasterflat && emit->rasterflat)) { diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 4db846a6690..3e751253379 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -366,10 +366,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1); - reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc); - reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc); - reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc); - reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc); + reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8); + reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8); + reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8); + reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8); OUT_RING(ring, reg); } @@ -504,10 +504,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, */ unsigned compmask = s[FS].v->inputs[j].compmask; - /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG - * instead.. rather than -8 everywhere else.. - */ - uint32_t inloc = s[FS].v->inputs[j].inloc - 8; + uint32_t inloc = s[FS].v->inputs[j].inloc; if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) || (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index f4d92650595..9cf6717c17d 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -2460,7 +2460,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, for (i = 0; i < so->inputs_count; i++) { unsigned j, regid = ~0, compmask = 0; so->inputs[i].ncomp = 0; - so->inputs[i].inloc = inloc + 8; + so->inputs[i].inloc = inloc; for (j = 0; j < 4; j++) { struct ir3_instruction *in = inputs[(i*4) + j]; if (in && !(in->flags & IR3_INSTR_UNUSED)) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index c46b4522e3c..c603168a04b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -181,16 +181,10 @@ struct ir3_shader_variant { uint8_t regid; uint8_t compmask; uint8_t ncomp; - /* In theory inloc of fs should match outloc of vs. Or - * rather the outloc of the vs is 8 plus the offset passed - * to bary.f. Presumably that +8 is to account for - * gl_Position/gl_PointSize? - * - * NOTE inloc is currently aligned to 4 (we don't try - * to pack varyings). Changing this would likely break - * assumptions in few places (like setting up of flat - * shading in fd3_program) so be sure to check all the - * spots where inloc is used. + /* location of input (ie. offset passed to bary.f, etc). This + * matches the SP_VS_VPC_DST_REG.OUTLOCn value (a3xx and a4xx + * have the OUTLOCn value offset by 8, presumably to account + * for gl_Position/gl_PointSize) */ uint8_t inloc; /* vertex shader specific: */ |