summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2016-03-22 17:28:06 -0600
committerTim Rowley <[email protected]>2016-03-25 14:45:40 -0500
commit0767e820fd96e8bac2943fa8942bea3ff81b8bd9 (patch)
tree007fd8c5c1324fefc41da60d591e95d9c725a825 /src
parent813e89c0cc0ea6a6ed4b69303073995b4c4c7666 (diff)
swr: [rasterizer core] Fix Compute workitem retirement
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp37
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/arena.h7
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp7
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.h2
4 files changed, 22 insertions, 31 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 591342239d3..7ca182242e5 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -160,20 +160,12 @@ void WakeAllThreads(SWR_CONTEXT *pContext)
template<bool IsDraw>
void QueueWork(SWR_CONTEXT *pContext)
{
- if (IsDraw)
- {
- // Each worker thread looks at a DC for both FE and BE work at different times and so we
- // multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
- // have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
- // then moved on if all work is done.)
- pContext->pCurDrawContext->threadsDone =
- pContext->NumWorkerThreads ? pContext->NumWorkerThreads * 2 : 2;
- }
- else
- {
- pContext->pCurDrawContext->threadsDone =
- pContext->NumWorkerThreads ? pContext->NumWorkerThreads : 1;
- }
+ // Each worker thread looks at a DC for both FE and BE work at different times and so we
+ // multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
+ // have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
+ // then moved on if all work is done.)
+ pContext->pCurDrawContext->threadsDone =
+ pContext->NumWorkerThreads ? pContext->NumWorkerThreads * 2 : 2;
_ReadWriteBarrier();
{
@@ -201,10 +193,7 @@ void QueueWork(SWR_CONTEXT *pContext)
}
// Dequeue the work here, if not already done, since we're single threaded (i.e. no workers).
- if (!pContext->dcRing.IsEmpty())
- {
- pContext->dcRing.Dequeue();
- }
+ while (CompleteDrawContext(pContext, pContext->pCurDrawContext) > 0) {}
// restore csr
_mm_setcsr(mxcsr);
@@ -252,8 +241,6 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
uint32_t dsIndex = pContext->curStateId % KNOB_MAX_DRAWS_IN_FLIGHT;
pCurDrawContext->pState = &pContext->dsRing[dsIndex];
- auto& stateArena = *(pCurDrawContext->pState->pArena);
-
// Copy previous state to current state.
if (pContext->pPrevDrawContext)
{
@@ -266,7 +253,9 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
{
CopyState(*pCurDrawContext->pState, *pPrevDrawContext->pState);
- stateArena.Reset(true); // Reset memory.
+ // Should have been cleaned up previously
+ SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true);
+
pCurDrawContext->pState->pPrivateState = nullptr;
pContext->curStateId++; // Progress state ring index forward.
@@ -276,16 +265,18 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
// If its a split draw then just copy the state pointer over
// since its the same draw.
pCurDrawContext->pState = pPrevDrawContext->pState;
+ SWR_ASSERT(pPrevDrawContext->cleanupState == false);
}
}
else
{
- stateArena.Reset(); // Reset memory.
+ SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true);
pContext->curStateId++; // Progress state ring index forward.
}
+ SWR_ASSERT(pCurDrawContext->pArena->IsEmpty() == true);
+
pCurDrawContext->dependency = 0;
- pCurDrawContext->pArena->Reset();
pCurDrawContext->pContext = pContext;
pCurDrawContext->isCompute = false; // Dispatch has to set this to true.
diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h
index d777c20a4ee..67d81a44347 100644
--- a/src/gallium/drivers/swr/rasterizer/core/arena.h
+++ b/src/gallium/drivers/swr/rasterizer/core/arena.h
@@ -51,10 +51,7 @@ public:
}
};
-static const size_t ARENA_BLOCK_SHIFT = 5;
-static const size_t ARENA_BLOCK_ALIGN = KNOB_SIMD_WIDTH * 4;
-static_assert((1U << ARENA_BLOCK_SHIFT) == ARENA_BLOCK_ALIGN,
- "Invalid value for ARENA_BLOCK_ALIGN/SHIFT");
+static const size_t ARENA_BLOCK_ALIGN = 64;
struct ArenaBlock
{
@@ -65,7 +62,7 @@ static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
"Increase BLOCK_ALIGN size");
// Caching Allocator for Arena
-template<uint32_t NumBucketsT = 1, uint32_t StartBucketBitT = 20>
+template<uint32_t NumBucketsT = 4, uint32_t StartBucketBitT = 16>
struct CachingAllocatorT : DefaultAllocator
{
static uint32_t GetBucketId(size_t blockSize)
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 521a306b96e..845e28ea497 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -279,11 +279,10 @@ bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint64_t lastReti
return (pDC->dependency > lastRetiredDraw);
}
-
-
-INLINE void CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
+INLINE int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
{
int64_t result = InterlockedDecrement64(&pDC->threadsDone);
+ SWR_ASSERT(result >= 0);
if (result == 0)
{
@@ -299,6 +298,8 @@ INLINE void CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
pContext->dcRing.Dequeue(); // Remove from tail
}
+
+ return result;
}
INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE)
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.h b/src/gallium/drivers/swr/rasterizer/core/threads.h
index 6b37e3ac179..6cc8c96a00f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.h
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.h
@@ -34,6 +34,7 @@
typedef std::thread* THREAD_PTR;
struct SWR_CONTEXT;
+struct DRAW_CONTEXT;
struct THREAD_DATA
{
@@ -63,3 +64,4 @@ void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE, int numaNode);
void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE, TileSet &usedTiles);
void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE);
+int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC); \ No newline at end of file