summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorAlok Hota <[email protected]>2018-08-28 12:23:31 -0500
committerAlok Hota <[email protected]>2019-02-15 14:54:09 -0600
commit0b4db4370544459fcd47499d9f8663e421fcae34 (patch)
tree44310f8570a122e99f29ad945d4a30d034cb8a19 /src/gallium/drivers
parentdc7b3c95a45078fcbf32e68bf6b2c972c6df41c9 (diff)
swr/rast: FP consistency between POSH/RENDER pipes
- Ensure all threads have optimal floating-point control state - Disable auto-generation of fused FP ops for VERTEX shader stage - Disable "fast" FP ops for VERTEX shader stage Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/os.h21
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp6
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp8
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp9
4 files changed, 33 insertions, 11 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h b/src/gallium/drivers/swr/rasterizer/common/os.h
index 314d8184374..b00beeb36dd 100644
--- a/src/gallium/drivers/swr/rasterizer/common/os.h
+++ b/src/gallium/drivers/swr/rasterizer/common/os.h
@@ -294,4 +294,25 @@ int SWR_API
std::string* pOptStdErr = nullptr, ///< (Optional Out) Standard Error text
const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text
+
+/// Helper for setting up FP state
+/// @returns old csr state
+static INLINE uint32_t SetOptimalVectorCSR()
+{
+ uint32_t oldCSR = _mm_getcsr();
+
+ uint32_t newCSR = (oldCSR & ~(_MM_ROUND_MASK | _MM_DENORMALS_ZERO_MASK | _MM_FLUSH_ZERO_MASK));
+ newCSR |= (_MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
+ _mm_setcsr(newCSR);
+
+ return oldCSR;
+}
+
+/// Set Vector CSR state.
+/// @param csrState - should be value returned from SetOptimalVectorCSR()
+static INLINE void RestoreVectorCSR(uint32_t csrState)
+{
+ _mm_setcsr(csrState);
+}
+
#endif //__SWR_OS_H__
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index acbc7e077b1..203a74bdb24 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -250,9 +250,7 @@ void QueueWork(SWR_CONTEXT* pContext)
if (pContext->threadInfo.SINGLE_THREADED)
{
- // flush denormals to 0
- uint32_t mxcsr = _mm_getcsr();
- _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
+ uint32_t mxcsr = SetOptimalVectorCSR();
if (IsDraw)
{
@@ -274,7 +272,7 @@ void QueueWork(SWR_CONTEXT* pContext)
}
// restore csr
- _mm_setcsr(mxcsr);
+ RestoreVectorCSR(mxcsr);
}
else
{
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 8bc97c743eb..24db5275795 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -1840,10 +1840,10 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
{
vIndex = _simd16_add_epi32(_simd16_set1_epi32(work.startVertexID), vScale);
- fetchInfo_lo.xpIndices =
- pDC->pContext->pfnMakeGfxPtr(GetPrivateState(pDC), &vIndex);
- fetchInfo_hi.xpIndices =
- pDC->pContext->pfnMakeGfxPtr(GetPrivateState(pDC), &vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t)); // 1/2 of KNOB_SIMD16_WIDTH
+ fetchInfo_lo.xpIndices = pDC->pContext->pfnMakeGfxPtr(GetPrivateState(pDC), &vIndex);
+ fetchInfo_hi.xpIndices = pDC->pContext->pfnMakeGfxPtr(
+ GetPrivateState(pDC),
+ &vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t)); // 1/2 of KNOB_SIMD16_WIDTH
}
fetchInfo_lo.CurInstance = instanceNum;
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 4523616cba0..e30c1170568 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -421,9 +421,9 @@ INLINE void UpdateClientStats(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CON
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
{
stats.DepthPassCount += dynState.pStats[i].DepthPassCount;
-
stats.PsInvocations += dynState.pStats[i].PsInvocations;
stats.CsInvocations += dynState.pStats[i].CsInvocations;
+
}
@@ -439,6 +439,10 @@ INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONT
pDC->retireCallback.pfnCallbackFunc(pDC->retireCallback.userData,
pDC->retireCallback.userData2,
pDC->retireCallback.userData3);
+
+ // Callbacks to external code *could* change floating point control state
+ // Reset our optimal flags
+ SetOptimalVectorCSR();
}
}
@@ -870,8 +874,7 @@ DWORD workerThreadMain(LPVOID pData)
uint32_t numaNode = pThreadData->numaId - pContext->threadInfo.BASE_NUMA_NODE;
uint32_t numaMask = pContext->threadPool.numaMask;
- // flush denormals to 0
- _mm_setcsr(_mm_getcsr() | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
+ SetOptimalVectorCSR();
// Track tiles locked by other threads. If we try to lock a macrotile and find its already
// locked then we'll add it to this list so that we don't try and lock it again.