summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp57
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/pa.h13
2 files changed, 54 insertions, 16 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index b09a7985ccc..3886c64ccf6 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -551,6 +551,7 @@ static void StreamOut(
_mm_store_ps((float*)pPrimDataAttrib, attrib[v]);
}
+
soMask &= ~(1 << slot);
}
@@ -1345,8 +1346,6 @@ static void TessellationStages(
const uint32_t numPrims_lo = std::min<uint32_t>(numPrims, KNOB_SIMD_WIDTH);
const uint32_t numPrims_hi = std::max<uint32_t>(numPrims, KNOB_SIMD_WIDTH) - KNOB_SIMD_WIDTH;
- const uint32_t primMask = GenMask(numPrims);
-
const simd16scalari primID = _simd16_set1_epi32(dsContext.PrimitiveID);
const simdscalari primID_lo = _simd16_extract_si(primID, 0);
const simdscalari primID_hi = _simd16_extract_si(primID, 1);
@@ -1390,9 +1389,9 @@ static void TessellationStages(
if (HasRastT::value)
{
#if USE_SIMD16_FRONTEND
- simd16vector prim_simd16[3];
+ simd16vector prim_simd16[3]; // Only deal with triangles, lines, or points
#else
- simdvector prim[3]; // Only deal with triangles, lines, or points
+ simdvector prim[3]; // Only deal with triangles, lines, or points
#endif
AR_BEGIN(FEPAAssemble, pDC->drawId);
bool assemble =
@@ -1407,7 +1406,7 @@ static void TessellationStages(
SWR_ASSERT(pfnClipFunc);
#if USE_SIMD16_FRONTEND
tessPa.useAlternateOffset = false;
- pfnClipFunc(pDC, tessPa, workerId, prim_simd16, primMask, primID, _simd16_set1_epi32(0));
+ pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, _simd16_set1_epi32(0));
#else
pfnClipFunc(pDC, tessPa, workerId, prim,
GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), _simd_set1_epi32(0));
@@ -1420,9 +1419,21 @@ static void TessellationStages(
} // while (tessPa.HasWork())
} // for (uint32_t p = 0; p < numPrims; ++p)
+#if USE_SIMD16_FRONTEND
+ if (gt_pTessellationThreadData->pDSOutput != nullptr)
+ {
+ AlignedFree(gt_pTessellationThreadData->pDSOutput);
+ gt_pTessellationThreadData->pDSOutput = nullptr;
+ }
+ gt_pTessellationThreadData->numDSOutputVectors = 0;
+
+#endif
TSDestroyCtx(tsCtx);
}
+THREAD PA_STATE::SIMDVERTEX *pVertexStore = nullptr;
+THREAD uint32_t gVertexStoreSize = 0;
+
//////////////////////////////////////////////////////////////////////////
/// @brief FE handler for SwrDraw.
/// @tparam IsIndexedT - Is indexed drawing enabled
@@ -1530,8 +1541,36 @@ void ProcessDraw(
pSoPrimData = (uint32_t*)pDC->pArena->AllocAligned(4096, 16);
}
+ const uint32_t vertexCount = NumVertsPerPrim(state.topology, state.gsState.gsEnable);
+
+ SWR_ASSERT(vertexCount <= MAX_NUM_VERTS_PER_PRIM);
+
+ // grow the vertex store for the PA as necessary
+ if (gVertexStoreSize < vertexCount)
+ {
+ if (pVertexStore != nullptr)
+ {
+ AlignedFree(pVertexStore);
+ }
+
+ while (gVertexStoreSize < vertexCount)
+ {
+#if USE_SIMD16_FRONTEND
+ gVertexStoreSize += 4; // grow in chunks of 4 simd16vertex
+#else
+ gVertexStoreSize += 8; // grow in chunks of 8 simdvertex
+#endif
+ }
+
+ SWR_ASSERT(gVertexStoreSize <= MAX_NUM_VERTS_PER_PRIM);
+
+ pVertexStore = reinterpret_cast<PA_STATE::SIMDVERTEX *>(AlignedMalloc(gVertexStoreSize * sizeof(pVertexStore[0]), 64));
+
+ SWR_ASSERT(pVertexStore != nullptr);
+ }
+
// choose primitive assembler
- PA_FACTORY<IsIndexedT, IsCutIndexEnabledT> paFactory(pDC, state.topology, work.numVerts);
+ PA_FACTORY<IsIndexedT, IsCutIndexEnabledT> paFactory(pDC, state.topology, work.numVerts, pVertexStore, gVertexStoreSize);
PA_STATE& pa = paFactory.GetPA();
#if USE_SIMD16_FRONTEND
@@ -1689,8 +1728,6 @@ void ProcessDraw(
const uint32_t numPrims_lo = std::min<uint32_t>(numPrims, KNOB_SIMD_WIDTH);
const uint32_t numPrims_hi = std::max<uint32_t>(numPrims, KNOB_SIMD_WIDTH) - KNOB_SIMD_WIDTH;
- const uint32_t primMask = GenMask(numPrims);
-
const simd16scalari primID = pa.GetPrimID(work.startPrimID);
const simdscalari primID_lo = _simd16_extract_si(primID, 0);
const simdscalari primID_hi = _simd16_extract_si(primID, 1);
@@ -1732,7 +1769,7 @@ void ProcessDraw(
StreamOut(pDC, pa, workerId, pSoPrimData, numPrims_hi, 0);
}
#else
- pa.useAlternateOffset = false; // StreamOut() is SIMD16-compatible..
+ pa.useAlternateOffset = false;
StreamOut(pDC, pa, workerId, pSoPrimData, 0);
#endif
}
@@ -1742,7 +1779,7 @@ void ProcessDraw(
SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16);
pa.useAlternateOffset = false;
- pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, primMask, primID, _simd16_setzero_si());
+ pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, _simd16_setzero_si());
}
}
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h
index 10570f43f0f..403efe057d0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ b/src/gallium/drivers/swr/rasterizer/core/pa.h
@@ -136,9 +136,9 @@ struct PA_STATE_OPT : public PA_STATE
uint32_t primIDIncr{ 0 }; // how much to increment for each vector (typically vector / {1, 2})
SIMDSCALARI primID;
- typedef bool(*PFN_PA_FUNC)(PA_STATE_OPT& state, uint32_t slot, simdvector verts[]);
+ typedef bool(*PFN_PA_FUNC)(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
#if ENABLE_AVX512_SIMD16
- typedef bool(*PFN_PA_FUNC_SIMD16)(PA_STATE_OPT& state, uint32_t slot, simd16vector verts[]);
+ typedef bool(*PFN_PA_FUNC_SIMD16)(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
#endif
typedef void(*PFN_PA_SINGLE_FUNC)(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, __m128 verts[]);
@@ -691,6 +691,7 @@ PRAGMA_WARNING_PUSH_DISABLE(4789)
pBase += SIMD_WIDTH;
}
}
+
return true;
}
PRAGMA_WARNING_POP()
@@ -1392,7 +1393,7 @@ private:
template <typename IsIndexedT, typename IsCutIndexEnabledT>
struct PA_FACTORY
{
- PA_FACTORY(DRAW_CONTEXT* pDC, PRIMITIVE_TOPOLOGY in_topo, uint32_t numVerts) : topo(in_topo)
+ PA_FACTORY(DRAW_CONTEXT* pDC, PRIMITIVE_TOPOLOGY in_topo, uint32_t numVerts, PA_STATE::SIMDVERTEX *pVertexStore, uint32_t vertexStoreSize) : topo(in_topo)
{
#if KNOB_ENABLE_CUT_AWARE_PA == TRUE
const API_STATE& state = GetApiState(pDC);
@@ -1408,7 +1409,7 @@ struct PA_FACTORY
memset(&indexStore, 0, sizeof(indexStore));
uint32_t numAttribs = state.feNumAttributes;
- new (&this->paCut) PA_STATE_CUT(pDC, (uint8_t*)&this->vertexStore[0], MAX_NUM_VERTS_PER_PRIM * PA_STATE::SIMD_WIDTH,
+ new (&this->paCut) PA_STATE_CUT(pDC, reinterpret_cast<uint8_t *>(pVertexStore), vertexStoreSize * PA_STATE::SIMD_WIDTH,
&this->indexStore[0], numVerts, numAttribs, state.topology, false);
cutPA = true;
}
@@ -1416,7 +1417,7 @@ struct PA_FACTORY
#endif
{
uint32_t numPrims = GetNumPrims(in_topo, numVerts);
- new (&this->paOpt) PA_STATE_OPT(pDC, numPrims, (uint8_t*)&this->vertexStore[0], MAX_NUM_VERTS_PER_PRIM * PA_STATE::SIMD_WIDTH, false);
+ new (&this->paOpt) PA_STATE_OPT(pDC, numPrims, reinterpret_cast<uint8_t *>(pVertexStore), vertexStoreSize * PA_STATE::SIMD_WIDTH, false);
cutPA = false;
}
@@ -1438,10 +1439,10 @@ struct PA_FACTORY
PA_STATE_OPT paOpt;
PA_STATE_CUT paCut;
+
bool cutPA{ false };
PRIMITIVE_TOPOLOGY topo{ TOP_UNKNOWN };
- PA_STATE::SIMDVERTEX vertexStore[MAX_NUM_VERTS_PER_PRIM];
PA_STATE::SIMDMASK indexStore[MAX_NUM_VERTS_PER_PRIM];
};