summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr/rasterizer
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2017-04-19 17:03:32 -0500
committerTim Rowley <[email protected]>2017-04-28 19:56:28 -0500
commitdabd0499a6ba4032f2cf24103a7ca044061a3b98 (patch)
tree8aff57b2d1c5e9a78b1876c7e35311b3a2dfa393 /src/gallium/drivers/swr/rasterizer
parent0424e6249a4ea7d95eeaf4ae081a8b2758f1cc4f (diff)
swr/rast: enable per-warp scratch space for CS
Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp6
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.h7
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.cpp10
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.h2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state.h5
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp3
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.h6
8 files changed, 33 insertions, 8 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 5c3225d58b2..dc8f517386c 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -592,12 +592,16 @@ void SwrSetCsFunc(
HANDLE hContext,
PFN_CS_FUNC pfnCsFunc,
uint32_t totalThreadsInGroup,
- uint32_t totalSpillFillSize)
+ uint32_t totalSpillFillSize,
+ uint32_t scratchSpaceSizePerInstance,
+ uint32_t numInstances)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
pState->pfnCsFunc = pfnCsFunc;
pState->totalThreadsInGroup = totalThreadsInGroup;
pState->totalSpillFillSize = totalSpillFillSize;
+ pState->scratchSpaceSize = scratchSpaceSizePerInstance;
+ pState->scratchSpaceNumInstances = numInstances;
}
void SwrSetTsState(
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h
index b9b994ad379..166598a48d0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -366,11 +366,16 @@ void SWR_API SwrSetGsFunc(
/// @param pfnCsFunc - Pointer to compute shader function
/// @param totalThreadsInGroup - product of thread group dimensions.
/// @param totalSpillFillSize - size in bytes needed for spill/fill.
+/// @param scratchSpaceSizePerInstance - size of the scratch space needed per simd instance
+/// @param numInstances - number of simd instances that are run per execution of the shader
void SWR_API SwrSetCsFunc(
HANDLE hContext,
PFN_CS_FUNC pfnCsFunc,
uint32_t totalThreadsInGroup,
- uint32_t totalSpillFillSize);
+ uint32_t totalSpillFillSize,
+ uint32_t scratchSpaceSizePerInstance,
+ uint32_t numInstances
+ );
//////////////////////////////////////////////////////////////////////////
/// @brief Set tessellation state.
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index 39f4802db4c..16698ef08f9 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -45,7 +45,7 @@ static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
/// @param pDC - pointer to draw context (dispatch).
/// @param workerId - The unique worker ID that is assigned to this thread.
/// @param threadGroupId - the linear index for the thread group within the dispatch.
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
+void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
{
SWR_CONTEXT *pContext = pDC->pContext;
@@ -60,6 +60,12 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
{
pSpillFillBuffer = pDC->pArena->AllocAlignedSync(spillFillSize, KNOB_SIMD_BYTES);
}
+
+ size_t scratchSpaceSize = pDC->pState->state.scratchSpaceSize * pDC->pState->state.scratchSpaceNumInstances;
+ if (scratchSpaceSize && pScratchSpace == nullptr)
+ {
+ pScratchSpace = pDC->pArena->AllocAlignedSync(scratchSpaceSize, KNOB_SIMD_BYTES);
+ }
const API_STATE& state = GetApiState(pDC);
@@ -70,6 +76,8 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
csContext.dispatchDims[2] = pTaskData->threadGroupCountZ;
csContext.pTGSM = pContext->ppScratch[workerId];
csContext.pSpillFillBuffer = (uint8_t*)pSpillFillBuffer;
+ csContext.pScratchSpace = (uint8_t*)pScratchSpace;
+ csContext.scratchSpacePerSimd = pDC->pState->state.scratchSpaceSize;
state.pfnCsFunc(GetPrivateState(pDC), &csContext);
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h
index ade9afccd95..822daa32133 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -34,7 +34,7 @@
#include "depthstencil.h"
#include "rdtsc_core.h"
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer);
+void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 94085e59998..7781feaf101 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -245,6 +245,8 @@ OSALIGNLINE(struct) API_STATE
PFN_CS_FUNC pfnCsFunc;
uint32_t totalThreadsInGroup;
uint32_t totalSpillFillSize;
+ uint32_t scratchSpaceSize;
+ uint32_t scratchSpaceNumInstances;
// FE - Frontend State
SWR_FRONTEND_STATE frontendState;
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 3d0b4ff951f..bf735e03f24 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -378,6 +378,11 @@ struct SWR_CS_CONTEXT
uint8_t* pTGSM; // Thread Group Shared Memory pointer.
uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
+
+ uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is responsible
+ // for subdividing scratch space per instance/simd
+
+ uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH
};
// enums
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 295014d1a7e..e03632b443b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -726,10 +726,11 @@ void WorkOnCompute(
if (queue.getNumQueued() > 0)
{
void* pSpillFillBuffer = nullptr;
+ void* pScratchSpace = nullptr;
uint32_t threadGroupId = 0;
while (queue.getWork(threadGroupId))
{
- queue.dispatch(pDC, workerId, threadGroupId, pSpillFillBuffer);
+ queue.dispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
queue.finishedWork();
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
index bfff339a55f..8f1cd21543d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
@@ -151,7 +151,7 @@ private:
OSALIGNLINE(volatile LONG) mWorkItemsConsumed { 0 };
};
-typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer);
+typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
//////////////////////////////////////////////////////////////////////////
/// DispatchQueue - work queue for dispatch
@@ -231,10 +231,10 @@ public:
//////////////////////////////////////////////////////////////////////////
/// @brief Dispatches a unit of work
- void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
+ void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
{
SWR_ASSERT(mPfnDispatch != nullptr);
- mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer);
+ mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
}
void* mpTaskData{ nullptr }; // The API thread will set this up and the callback task function will interpet this.