diff options
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.c | 49 |
1 files changed, 37 insertions, 12 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 4ed04b38dea..01daa0f6f12 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -387,23 +387,27 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, /* figure out VARYING_INTERP / FLAT_SHAD register values: */ for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) { + /* NOTE: varyings are packed, so if compmask is 0xb + * then first, third, and fourth component occupy + * three consecutive varying slots: + */ + unsigned compmask = fp->inputs[j].compmask; /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG * instead.. rather than -8 everywhere else.. */ uint32_t inloc = fp->inputs[j].inloc - 8; - /* currently assuming varyings aligned to 4 (not - * packed): - */ - debug_assert((inloc % 4) == 0); - if ((fp->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) || (fp->inputs[j].rasterflat && emit->rasterflat)) { uint32_t loc = inloc; - for (i = 0; i < 4; i++, loc++) { - vinterp[loc / 16] |= FLAT << ((loc % 16) * 2); - flatshade[loc / 32] |= 1 << (loc % 32); + + for (i = 0; i < 4; i++) { + if (compmask & (1 << i)) { + vinterp[loc / 16] |= FLAT << ((loc % 16) * 2); + flatshade[loc / 32] |= 1 << (loc % 32); + loc++; + } } } @@ -416,10 +420,31 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, * interpolation bits for .zw such that they become .01 */ if (emit->sprite_coord_enable & texmask) { - vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09) - << ((inloc % 16) * 2); - vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2); - vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2); + /* mask is two 2-bit fields, where: + * '01' -> S + * '10' -> T + * '11' -> 1 - T (flip mode) + */ + unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001; + uint32_t loc = inloc; + if (compmask & 0x1) { + vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x2) { + vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x4) { + /* .z <- 0.0f */ + vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x8) { + /* .w <- 1.0f */ + vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2); + loc++; + } } } } |