diff options
author | Tim Rowley <[email protected]> | 2017-05-02 12:19:23 -0500 |
---|---|---|
committer | Tim Rowley <[email protected]> | 2017-05-30 17:20:44 -0500 |
commit | 9fd68be13333140767822c5f5149956333b726e0 (patch) | |
tree | 77205ecf20b3510fc0c291980eca7ba27e90413f | |
parent | 4c235233652890c7b2f9c100fe7a7021c94ca173 (diff) |
swr/rast: SIMD16 FE - primitive assembly simplification
Reduce/simplify vertex storage usage in PA_STATE_OPT, fix PA
GetNextVSOutput wrap-around behaviour and eliminate unnecessary
SIMDVERTEX copies/storage for tri fan in PA_STATE_OPT
Fixes the OpenGL tri fan test failure under SIMD16 -
triangle-rasterization-overdraw.
Reviewed-by: Bruce Cherniak <[email protected]>
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/pa.h | 29 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp | 53 |
2 files changed, 32 insertions, 50 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h index 403efe057d0..7c390564a41 100644 --- a/src/gallium/drivers/swr/rasterizer/core/pa.h +++ b/src/gallium/drivers/swr/rasterizer/core/pa.h @@ -119,8 +119,6 @@ struct PA_STATE // cuts struct PA_STATE_OPT : public PA_STATE { - SIMDVERTEX leadingVertex; // For tri-fan - uint32_t numPrims{ 0 }; // Total number of primitives for draw. uint32_t numPrimsComplete{ 0 }; // Total number of complete primitives. @@ -128,7 +126,7 @@ struct PA_STATE_OPT : public PA_STATE uint32_t cur{ 0 }; // index to current VS output. uint32_t prev{ 0 }; // index to prev VS output. Not really needed in the state. - uint32_t first{ 0 }; // index to first VS output. Used for trifan. + const uint32_t first{ 0 }; // index to first VS output. Used for tri fan and line loop. uint32_t counter{ 0 }; // state counter bool reset{ false }; // reset state @@ -245,13 +243,27 @@ struct PA_STATE_OPT : public PA_STATE SIMDVERTEX& GetNextVsOutput() { + const uint32_t numSimdVerts = streamSizeInVerts / SIMD_WIDTH; + // increment cur and prev indices - const uint32_t numSimdVerts = this->streamSizeInVerts / SIMD_WIDTH; - this->prev = this->cur; // prev is undefined for first state. - this->cur = this->counter % numSimdVerts; + if (counter < numSimdVerts) + { + // prev undefined for first state + prev = cur; + cur = counter; + } + else + { + // swap/recycle last two simd verts for prev and cur, leave other simd verts intact in the buffer + uint32_t temp = prev; + + prev = cur; + cur = temp; + } + + SWR_ASSERT(cur < numSimdVerts); - SIMDVERTEX* pVertex = (SIMDVERTEX*)pStreamBase; - return pVertex[this->cur]; + return reinterpret_cast<SIMDVERTEX *>(pStreamBase)[cur]; } SIMDMASK& GetNextVsIndices() @@ -317,7 +329,6 @@ struct PA_STATE_OPT : public PA_STATE this->numSimdPrims = 0; this->cur = 0; this->prev = 0; - this->first = 0; this->counter = 0; this->reset = false; } diff --git a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp index d0ee18a1703..897079cb414 100644 --- a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp @@ -1213,10 +1213,6 @@ void PaTriStripSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, __m1 bool PaTriFan0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]) { - // store off leading vertex for attributes - PA_STATE_OPT::SIMDVERTEX* pVertex = (PA_STATE_OPT::SIMDVERTEX*)pa.pStreamBase; - pa.leadingVertex = pVertex[pa.cur]; - SetNextPaState(pa, PaTriFan1, PaTriFanSingle0); return false; // Not enough vertices to assemble 8 triangles. } @@ -1228,11 +1224,7 @@ bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]) simdvector a; simdvector b; -#if 1 const simd16vector &leadvert_16 = PaGetSimdVector_simd16(pa, pa.first, slot); -#else - const simd16vector &leadvert_16 = pa.leadingVertex.attrib[slot]; -#endif if (!pa.useAlternateOffset) { @@ -1260,10 +1252,9 @@ bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]) } #else - simdvector &leadVert = pa.leadingVertex.attrib[slot]; - - simdvector &a = PaGetSimdVector(pa, pa.prev, slot); - simdvector &b = PaGetSimdVector(pa, pa.cur, slot); + const simdvector &leadVert = PaGetSimdVector(pa, pa.first, slot); + const simdvector &a = PaGetSimdVector(pa, pa.prev, slot); + const simdvector &b = PaGetSimdVector(pa, pa.cur, slot); #endif simdscalar s; @@ -1301,23 +1292,7 @@ bool PaTriFan0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) { -#if USE_SIMD16_FRONTEND -#if 1 const simd16vector &a = PaGetSimdVector_simd16(pa, pa.first, slot); -#else - const simd16vector &a = pa.leadingVertex.attrib[slot]; -#endif -#else - simd16vector a; - - { - for (uint32_t i = 0; i < 4; i += 1) - { - a[i] = _simd16_insert_ps(_simd16_setzero_ps(), pa.leadingVertex.attrib[slot][i], 0); - } - } - -#endif const simd16vector &b = PaGetSimdVector_simd16(pa, pa.prev, slot); const simd16vector &c = PaGetSimdVector_simd16(pa, pa.cur, slot); @@ -1353,11 +1328,7 @@ bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) void PaTriFanSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, __m128 verts[]) { #if USE_SIMD16_FRONTEND -#if 1 const simd16vector &a = PaGetSimdVector_simd16(pa, pa.first, slot); -#else - const simd16vector &a = pa.leadingVertex.attrib[slot]; -#endif const simd16vector &b = PaGetSimdVector_simd16(pa, pa.prev, slot); const simd16vector &c = PaGetSimdVector_simd16(pa, pa.cur, slot); @@ -1393,7 +1364,7 @@ void PaTriFanSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, __m128 verts[2] = swizzleLaneN(c, primIndex - 14); } #else - const simdvector &a = pa.leadingVertex.attrib[slot]; + const simdvector &a = PaGetSimdVector(pa, pa.first, slot); const simdvector &b = PaGetSimdVector(pa, pa.prev, slot); const simdvector &c = PaGetSimdVector(pa, pa.cur, slot); @@ -1887,8 +1858,8 @@ bool PaLineList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) void PaLineListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, __m128 verts[]) { #if USE_SIMD16_FRONTEND - const simd16vector &a = PaGetSimdVector_simd16(pa, pa.prev, slot); - const simd16vector &b = PaGetSimdVector_simd16(pa, pa.cur, slot); + const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot); + const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot); if (pa.useAlternateOffset) { @@ -1963,8 +1934,8 @@ void PaLineListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, __m1 break; } #else - const simdvector &a = PaGetSimdVector(pa, pa.prev, slot); - const simdvector &b = PaGetSimdVector(pa, pa.cur, slot); + const simdvector &a = PaGetSimdVector(pa, 0, slot); + const simdvector &b = PaGetSimdVector(pa, 1, slot); switch (primIndex) { @@ -2229,7 +2200,7 @@ bool PaPoints0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]) #if USE_SIMD16_FRONTEND simdvector a; - const simd16vector &a_16 = PaGetSimdVector_simd16(pa, pa.cur, slot); + const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot); if (!pa.useAlternateOffset) { @@ -2247,7 +2218,7 @@ bool PaPoints0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]) } #else - simdvector &a = PaGetSimdVector(pa, pa.cur, slot); + simdvector &a = PaGetSimdVector(pa, 0, slot); #endif verts[0] = a; // points only have 1 vertex. @@ -2271,7 +2242,7 @@ bool PaPoints0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) void PaPointsSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, __m128 verts[]) { #if USE_SIMD16_FRONTEND - const simd16vector &a = PaGetSimdVector_simd16(pa, pa.cur, slot); + const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot); if (pa.useAlternateOffset) { @@ -2280,7 +2251,7 @@ void PaPointsSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, __m128 verts[0] = swizzleLaneN(a, primIndex); #else - const simdvector &a = PaGetSimdVector(pa, pa.cur, slot); + const simdvector &a = PaGetSimdVector(pa, 0, slot); verts[0] = swizzleLaneN(a, primIndex); #endif |