summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2016-03-08 11:56:06 -0600
committerTim Rowley <[email protected]>2016-03-25 14:43:14 -0500
commitbef222db22365c2518110d30cd1227625a86195b (patch)
tree20a3555595d6ea6487fa3fe02fbe9407b410e8e4 /src/gallium/drivers/swr
parent3132f731f8c4e6300ee31805be59920543b22557 (diff)
swr: [rasterizer core] Alleviate potential stack overflow for 32bit builds
Move large stack allocations in the GS and clipper into thread local storage.
Diffstat (limited to 'src/gallium/drivers/swr')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.cpp3
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.h6
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp25
3 files changed, 20 insertions, 14 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
index ce27bf71d3c..3a2a8b35be8 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
@@ -31,6 +31,9 @@
#include "common/os.h"
#include "core/clip.h"
+// Temp storage used by the clipper
+THREAD simdvertex tlsTempVertices[7];
+
float ComputeInterpFactor(float boundaryCoord0, float boundaryCoord1)
{
return (boundaryCoord0 / (boundaryCoord0 - boundaryCoord1));
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index b0b95d64f39..4f51388d9cf 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -32,6 +32,9 @@
#include "core/pa.h"
#include "rdtsc_core.h"
+// Temp storage used by the clipper
+extern THREAD simdvertex tlsTempVertices[7];
+
enum SWR_CLIPCODES
{
// Shift clip codes out of the mantissa to prevent denormalized values when used in float compare.
@@ -818,8 +821,7 @@ private:
simdscalari ClipPrims(float* pVertices, const simdscalar& vPrimMask, const simdscalar& vClipMask, int numAttribs)
{
// temp storage
- simdvertex tempVertices[7];
- float* pTempVerts = (float*)&tempVertices[0];
+ float* pTempVerts = (float*)&tlsTempVertices[0];
// zero out num input verts for non-active lanes
simdscalari vNumInPts = _simd_set1_epi32(NumVertsPerPrim);
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index f43a672bd82..d092a8644c6 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -630,6 +630,8 @@ void ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t num
}
}
+THREAD SWR_GS_CONTEXT tlsGsContext;
+
//////////////////////////////////////////////////////////////////////////
/// @brief Implements GS stage.
/// @param pDC - pointer to draw context.
@@ -651,7 +653,6 @@ static void GeometryShaderStage(
{
RDTSC_START(FEGeometryShader);
- SWR_GS_CONTEXT gsContext;
SWR_CONTEXT* pContext = pDC->pContext;
const API_STATE& state = GetApiState(pDC);
@@ -660,9 +661,9 @@ static void GeometryShaderStage(
SWR_ASSERT(pGsOut != nullptr, "GS output buffer should be initialized");
SWR_ASSERT(pCutBuffer != nullptr, "GS output cut buffer should be initialized");
- gsContext.pStream = (uint8_t*)pGsOut;
- gsContext.pCutOrStreamIdBuffer = (uint8_t*)pCutBuffer;
- gsContext.PrimitiveID = primID;
+ tlsGsContext.pStream = (uint8_t*)pGsOut;
+ tlsGsContext.pCutOrStreamIdBuffer = (uint8_t*)pCutBuffer;
+ tlsGsContext.PrimitiveID = primID;
uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, true);
simdvector attrib[MAX_ATTRIBUTES];
@@ -675,7 +676,7 @@ static void GeometryShaderStage(
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
{
- gsContext.vert[i].attrib[attribSlot] = attrib[i];
+ tlsGsContext.vert[i].attrib[attribSlot] = attrib[i];
}
}
@@ -683,7 +684,7 @@ static void GeometryShaderStage(
pa.Assemble(VERTEX_POSITION_SLOT, attrib);
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
{
- gsContext.vert[i].attrib[VERTEX_POSITION_SLOT] = attrib[i];
+ tlsGsContext.vert[i].attrib[VERTEX_POSITION_SLOT] = attrib[i];
}
const uint32_t vertexStride = sizeof(simdvertex);
@@ -710,14 +711,14 @@ static void GeometryShaderStage(
for (uint32_t instance = 0; instance < pState->instanceCount; ++instance)
{
- gsContext.InstanceID = instance;
- gsContext.mask = GenerateMask(numInputPrims);
+ tlsGsContext.InstanceID = instance;
+ tlsGsContext.mask = GenerateMask(numInputPrims);
// execute the geometry shader
- state.pfnGsFunc(GetPrivateState(pDC), &gsContext);
+ state.pfnGsFunc(GetPrivateState(pDC), &tlsGsContext);
- gsContext.pStream += instanceStride;
- gsContext.pCutOrStreamIdBuffer += cutInstanceStride;
+ tlsGsContext.pStream += instanceStride;
+ tlsGsContext.pCutOrStreamIdBuffer += cutInstanceStride;
}
// set up new binner and state for the GS output topology
@@ -736,7 +737,7 @@ static void GeometryShaderStage(
// foreach input prim:
// - setup a new PA based on the emitted verts for that prim
// - loop over the new verts, calling PA to assemble each prim
- uint32_t* pVertexCount = (uint32_t*)&gsContext.vertexCount;
+ uint32_t* pVertexCount = (uint32_t*)&tlsGsContext.vertexCount;
uint32_t* pPrimitiveId = (uint32_t*)&primID;
uint32_t totalPrimsGenerated = 0;