diff options
-rw-r--r-- | src/mesa/pipe/cell/spu/spu_vertex_fetch.c | 160 | ||||
-rw-r--r-- | src/mesa/pipe/cell/spu/spu_vertex_shader.h | 3 |
2 files changed, 98 insertions, 65 deletions
diff --git a/src/mesa/pipe/cell/spu/spu_vertex_fetch.c b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c index 3bbf9b7be4f..45e3c26c001 100644 --- a/src/mesa/pipe/cell/spu/spu_vertex_fetch.c +++ b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c @@ -59,8 +59,59 @@ #define DRAW_DBG 0 +static const qword fetch_shuffle_data[] = { + /* Shuffle used by CVT_64_FLOAT + */ + { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + }, + + /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED + */ + { + 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, + 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, + }, + + /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED + */ + { + 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, + 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, + }, + + /* High value shuffle used by trans4x4. + */ + { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17 + }, + + /* Low value shuffle used by trans4x4. + */ + { + 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F + } +}; + + +static INLINE void +trans4x4(qword row0, qword row1, qword row2, qword row3, qword *out, + const qword *shuffle) +{ + qword t1 = si_shufb(row0, row2, shuffle[3]); + qword t2 = si_shufb(row0, row2, shuffle[4]); + qword t3 = si_shufb(row1, row3, shuffle[3]); + qword t4 = si_shufb(row1, row3, shuffle[4]); + + out[0] = si_shufb(t1, t3, shuffle[3]); + out[1] = si_shufb(t1, t3, shuffle[4]); + out[2] = si_shufb(t2, t4, shuffle[3]); + out[3] = si_shufb(t2, t4, shuffle[4]); +} -static const vec_float4 defaults = { 0.0, 0.0, 0.0, 1.0 }; /** * Fetch between 1 and 32 bytes from an unaligned address @@ -100,140 +151,117 @@ fetch_unaligned(qword *dst, unsigned ea, unsigned size) } -#define CVT_32_FLOAT(q) (*(q)) +#define CVT_32_FLOAT(q, s) (*(q)) static INLINE qword -CVT_64_FLOAT(const qword *qw) +CVT_64_FLOAT(const qword *qw, const qword *shuffle) { - qword shuf_first = (qword) { - 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - }; - qword a = si_frds(qw[0]); qword b = si_frds(si_rotqbyi(qw[0], 8)); qword c = si_frds(qw[1]); qword d = si_frds(si_rotqbyi(qw[1], 8)); - qword ab = si_shufb(a, b, shuf_first); - qword cd = si_shufb(c, d, si_rotqbyi(shuf_first, 8)); + qword ab = si_shufb(a, b, shuffle[0]); + qword cd = si_shufb(c, d, si_rotqbyi(shuffle[0], 8)); return si_or(ab, cd); } static INLINE qword -CVT_8_USCALED(const qword *qw) +CVT_8_USCALED(const qword *qw, const qword *shuffle) { - qword shuffle = (qword) { - 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, - 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, - }; - - return si_cuflt(si_shufb(*qw, *qw, shuffle), 0); + return si_cuflt(si_shufb(*qw, *qw, shuffle[1]), 0); } static INLINE qword -CVT_16_USCALED(const qword *qw) +CVT_16_USCALED(const qword *qw, const qword *shuffle) { - qword shuffle = (qword) { - 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, - 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, - }; - - return si_cuflt(si_shufb(*qw, *qw, shuffle), 0); + return si_cuflt(si_shufb(*qw, *qw, shuffle[2]), 0); } static INLINE qword -CVT_32_USCALED(const qword *qw) +CVT_32_USCALED(const qword *qw, const qword *shuffle) { + (void) shuffle; return si_cuflt(*qw, 0); } static INLINE qword -CVT_8_SSCALED(const qword *qw) +CVT_8_SSCALED(const qword *qw, const qword *shuffle) { - qword shuffle = (qword) { - 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, - 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, - }; - - return si_csflt(si_shufb(*qw, *qw, shuffle), 0); + return si_csflt(si_shufb(*qw, *qw, shuffle[1]), 0); } static INLINE qword -CVT_16_SSCALED(const qword *qw) +CVT_16_SSCALED(const qword *qw, const qword *shuffle) { - qword shuffle = (qword) { - 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, - 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, - }; - - return si_csflt(si_shufb(*qw, *qw, shuffle), 0); + return si_csflt(si_shufb(*qw, *qw, shuffle[2]), 0); } static INLINE qword -CVT_32_SSCALED(const qword *qw) +CVT_32_SSCALED(const qword *qw, const qword *shuffle) { + (void) shuffle; return si_csflt(*qw, 0); } static INLINE qword -CVT_8_UNORM(const qword *qw) +CVT_8_UNORM(const qword *qw, const qword *shuffle) { const qword scale = (qword) spu_splats(1.0f / 255.0f); - return si_fm(CVT_8_USCALED(qw), scale); + return si_fm(CVT_8_USCALED(qw, shuffle), scale); } static INLINE qword -CVT_16_UNORM(const qword *qw) +CVT_16_UNORM(const qword *qw, const qword *shuffle) { const qword scale = (qword) spu_splats(1.0f / 65535.0f); - return si_fm(CVT_16_USCALED(qw), scale); + return si_fm(CVT_16_USCALED(qw, shuffle), scale); } static INLINE qword -CVT_32_UNORM(const qword *qw) +CVT_32_UNORM(const qword *qw, const qword *shuffle) { const qword scale = (qword) spu_splats(1.0f / 4294967295.0f); - return si_fm(CVT_32_USCALED(qw), scale); + return si_fm(CVT_32_USCALED(qw, shuffle), scale); } static INLINE qword -CVT_8_SNORM(const qword *qw) +CVT_8_SNORM(const qword *qw, const qword *shuffle) { const qword scale = (qword) spu_splats(1.0f / 127.0f); - return si_fm(CVT_8_SSCALED(qw), scale); + return si_fm(CVT_8_SSCALED(qw, shuffle), scale); } static INLINE qword -CVT_16_SNORM(const qword *qw) +CVT_16_SNORM(const qword *qw, const qword *shuffle) { const qword scale = (qword) spu_splats(1.0f / 32767.0f); - return si_fm(CVT_16_SSCALED(qw), scale); + return si_fm(CVT_16_SSCALED(qw, shuffle), scale); } static INLINE qword -CVT_32_SNORM(const qword *qw) +CVT_32_SNORM(const qword *qw, const qword *shuffle) { const qword scale = (qword) spu_splats(1.0f / 2147483647.0f); - return si_fm(CVT_32_SSCALED(qw), scale); + return si_fm(CVT_32_SSCALED(qw, shuffle), scale); } #define SZ_4 si_il(0U) -#define SZ_3 si_rotqmbyi(si_il(~0), -12) -#define SZ_2 si_rotqmbyi(si_il(~0), -8) -#define SZ_1 si_rotqmbyi(si_il(~0), -4) +#define SZ_3 si_fsmbi(0x000f) +#define SZ_2 si_fsmbi(0x00ff) +#define SZ_1 si_fsmbi(0x0fff) /** * Fetch a float[4] vertex attribute from memory, doing format/type @@ -244,17 +272,19 @@ CVT_32_SNORM(const qword *qw) */ #define FETCH_ATTRIB( NAME, SZ, CVT, N ) \ static void \ -fetch_##NAME(qword *out, const qword *in) \ +fetch_##NAME(qword *out, const qword *in, qword defaults, \ + const qword *shuffle) \ { \ qword tmp[4]; \ \ - tmp[0] = si_selb(CVT(in + (0 * N)), (qword) defaults, SZ); \ - tmp[1] = si_selb(CVT(in + (1 * N)), (qword) defaults, SZ); \ - tmp[2] = si_selb(CVT(in + (2 * N)), (qword) defaults, SZ); \ - tmp[3] = si_selb(CVT(in + (3 * N)), (qword) defaults, SZ); \ - _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) tmp); \ + tmp[0] = si_selb(CVT(in + (0 * N), shuffle), defaults, SZ); \ + tmp[1] = si_selb(CVT(in + (1 * N), shuffle), defaults, SZ); \ + tmp[2] = si_selb(CVT(in + (2 * N), shuffle), defaults, SZ); \ + tmp[3] = si_selb(CVT(in + (3 * N), shuffle), defaults, SZ); \ + trans4x4(tmp[0], tmp[1], tmp[2], tmp[3], out, shuffle); \ } + FETCH_ATTRIB( R64G64B64A64_FLOAT, SZ_4, CVT_64_FLOAT, 2 ) FETCH_ATTRIB( R64G64B64_FLOAT, SZ_3, CVT_64_FLOAT, 2 ) FETCH_ATTRIB( R64G64_FLOAT, SZ_2, CVT_64_FLOAT, 2 ) @@ -582,6 +612,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, /* loop over vertex attributes (vertex shader inputs) */ for (attr = 0; attr < nr_attrs; attr++) { + const qword default_values = (qword)(vec_float4){ 0.0, 0.0, 0.0, 1.0 }; const unsigned pitch = draw->vertex_fetch.pitch[attr]; const uint64_t src = draw->vertex_fetch.src_ptr[attr]; const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr]; @@ -602,8 +633,8 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, printf("SPU: fetching = 0x%llx\n", addr); #endif - fetch_unaligned(& in[idx], addr, bytes_per_entry); - idx += quads_per_entry; + fetch_unaligned(& in[idx], addr, bytes_per_entry); + idx += quads_per_entry; } /* Be nice and zero out any missing vertices. @@ -613,7 +644,8 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, /* Convert all 4 vertices to vectors of float. */ - (*fetch)(&machine->Inputs[attr].xyzw[0].q, in); + (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, default_values, + fetch_shuffle_data); } } diff --git a/src/mesa/pipe/cell/spu/spu_vertex_shader.h b/src/mesa/pipe/cell/spu/spu_vertex_shader.h index 8b37a239a47..b5bf31e67db 100644 --- a/src/mesa/pipe/cell/spu/spu_vertex_shader.h +++ b/src/mesa/pipe/cell/spu/spu_vertex_shader.h @@ -6,7 +6,8 @@ struct spu_vs_context; -typedef void (*spu_fetch_func)(qword *out, const qword *in); +typedef void (*spu_fetch_func)(qword *out, const qword *in, qword defaults, + const qword *shuffle_data); typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, struct spu_exec_machine *machine, const unsigned *elts, |