summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2016-08-11 14:52:18 -0600
committerTim Rowley <[email protected]>2016-08-17 17:08:55 -0500
commit9a25987b4a5060c5cb535d57bd4a754fe8464999 (patch)
treec8cae678fbfb35f2a4d5f8bbc0e11afff6c58b72 /src/gallium/drivers/swr
parentc7c1a03f909c15338b878418ae76498685aeb59b (diff)
swr: [rasterizer core] use AVX2 permute to simplify PaTriList
Signed-off-by: Tim Rowley <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp36
1 files changed, 35 insertions, 1 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
index 9850b436e39..a95bbbfbd63 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
@@ -197,6 +197,8 @@ bool PaTriList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
{
+#if KNOB_ARCH == KNOB_ARCH_AVX
+
simdvector& a = PaGetSimdVector(pa, 0, slot);
simdvector& b = PaGetSimdVector(pa, 1, slot);
simdvector& c = PaGetSimdVector(pa, 2, slot);
@@ -207,7 +209,7 @@ bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
// v1 -> 1 4 7 10 13 16 19 22
// v2 -> 2 5 8 11 14 17 20 23
- for(int i = 0; i < 4; ++i)
+ for (int i = 0; i < 4; ++i)
{
simdvector& v0 = verts[0];
v0[i] = _simd_blend_ps(a[i], b[i], 0x92);
@@ -231,6 +233,38 @@ bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
v2[i] = _simd_blend_ps(v2[i], s, 0x22);
}
+#elif KNOB_ARCH >= KNOB_ARCH_AVX2
+
+ simdvector &a = PaGetSimdVector(pa, 0, slot);
+ simdvector &b = PaGetSimdVector(pa, 1, slot);
+ simdvector &c = PaGetSimdVector(pa, 2, slot);
+
+ // v0 -> a0 a3 a6 b1 b4 b7 c2 c5
+ // v1 -> a1 a4 a7 b2 b5 c0 c3 c6
+ // v2 -> a2 a5 b0 b3 b6 c1 c4 c7
+
+ const simdscalari perm0 = _simd_set_epi32(5, 2, 7, 4, 1, 6, 3, 0);
+ const simdscalari perm1 = _simd_set_epi32(6, 3, 0, 5, 2, 7, 4, 1);
+ const simdscalari perm2 = _simd_set_epi32(7, 4, 1, 6, 3, 0, 5, 2);
+
+ simdvector &v0 = verts[0];
+ simdvector &v1 = verts[1];
+ simdvector &v2 = verts[2];
+
+ for (int i = 0; i < 4; ++i)
+ {
+ v0[i] = _simd_blend_ps(_simd_blend_ps(a[i], b[i], 0x92), c[i], 0x24);
+ v0[i] = _mm256_permutevar8x32_ps(v0[i], perm0);
+
+ v1[i] = _simd_blend_ps(_simd_blend_ps(a[i], b[i], 0x24), c[i], 0x49);
+ v1[i] = _mm256_permutevar8x32_ps(v1[i], perm1);
+
+ v2[i] = _simd_blend_ps(_simd_blend_ps(a[i], b[i], 0x49), c[i], 0x92);
+ v2[i] = _mm256_permutevar8x32_ps(v2[i], perm2);
+ }
+
+#endif
+
SetNextPaState(pa, PaTriList0, PaTriListSingle0, 0, KNOB_SIMD_WIDTH, true);
return true;
}