summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp35
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.h27
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend_impl.h1
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h5
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/knobs.h3
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp10
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state.h5
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp4
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp3
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder.cpp1
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder.h1
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h11
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp57
14 files changed, 135 insertions, 30 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index f1b0dc03352..a6f86b36f98 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -71,6 +71,21 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
pContext->privateStateSize = pCreateInfo->privateStateSize;
+ // initialize callback functions
+ pContext->pfnLoadTile = pCreateInfo->pfnLoadTile;
+ pContext->pfnStoreTile = pCreateInfo->pfnStoreTile;
+ pContext->pfnTranslateGfxptrForRead = pCreateInfo->pfnTranslateGfxptrForRead;
+ pContext->pfnTranslateGfxptrForWrite = pCreateInfo->pfnTranslateGfxptrForWrite;
+ pContext->pfnMakeGfxPtr = pCreateInfo->pfnMakeGfxPtr;
+ pContext->pfnCreateMemoryContext = pCreateInfo->pfnCreateMemoryContext;
+ pContext->pfnDestroyMemoryContext = pCreateInfo->pfnDestroyMemoryContext;
+ pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
+ pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
+ pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
+
+
+ pContext->hExternalMemory = pCreateInfo->hExternalMemory;
+
pContext->MAX_DRAWS_IN_FLIGHT = KNOB_MAX_DRAWS_IN_FLIGHT;
if (pCreateInfo->MAX_DRAWS_IN_FLIGHT != 0)
{
@@ -169,13 +184,13 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
pContext->threadPool.pThreadData ? pContext->threadPool.pThreadData[i].numaId : 0;
pContext->ppScratch[i] = (uint8_t*)VirtualAllocExNuma(GetCurrentProcess(),
nullptr,
- 32 * sizeof(KILOBYTE),
+ KNOB_WORKER_SCRATCH_SPACE_SIZE,
MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE,
numaNode);
#else
pContext->ppScratch[i] =
- (uint8_t*)AlignedMalloc(32 * sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4);
+ (uint8_t*)AlignedMalloc(KNOB_WORKER_SCRATCH_SPACE_SIZE, KNOB_SIMD_WIDTH * 4);
#endif
#if defined(KNOB_ENABLE_AR)
@@ -200,17 +215,6 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
// initialize hot tile manager
pContext->pHotTileMgr = new HotTileMgr();
- // initialize callback functions
- pContext->pfnLoadTile = pCreateInfo->pfnLoadTile;
- pContext->pfnStoreTile = pCreateInfo->pfnStoreTile;
- pContext->pfnTranslateGfxptrForRead = pCreateInfo->pfnTranslateGfxptrForRead;
- pContext->pfnTranslateGfxptrForWrite = pCreateInfo->pfnTranslateGfxptrForWrite;
- pContext->pfnMakeGfxPtr = pCreateInfo->pfnMakeGfxPtr;
- pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
- pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
- pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
-
-
// pass pointer to bucket manager back to caller
#ifdef KNOB_ENABLE_RDTSC
pCreateInfo->pBucketMgr = pContext->pBucketMgr;
@@ -1531,7 +1535,9 @@ void SWR_API SwrDiscardRect(HANDLE hContext, uint32_t attachmentMask, const SWR_
void SwrDispatch(HANDLE hContext,
uint32_t threadGroupCountX,
uint32_t threadGroupCountY,
- uint32_t threadGroupCountZ)
+ uint32_t threadGroupCountZ
+
+)
{
if (KNOB_TOSS_DRAW)
{
@@ -1551,6 +1557,7 @@ void SwrDispatch(HANDLE hContext,
pTaskData->threadGroupCountX = threadGroupCountX;
pTaskData->threadGroupCountY = threadGroupCountY;
pTaskData->threadGroupCountZ = threadGroupCountZ;
+
pTaskData->enableThreadDispatch = false;
uint32_t totalThreadGroups = threadGroupCountX * threadGroupCountY * threadGroupCountZ;
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h
index 8058defb388..93ea0d42535 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -147,14 +147,20 @@ typedef void(SWR_API* PFN_CLEAR_TILE)(HANDLE hPrivateContex
typedef void*(SWR_API* PFN_TRANSLATE_GFXPTR_FOR_READ)(HANDLE hPrivateContext,
gfxptr_t xpAddr,
- bool* pbNullTileAccessed);
+ bool* pbNullTileAccessed,
+ HANDLE hPrivateWorkerData);
typedef void*(SWR_API* PFN_TRANSLATE_GFXPTR_FOR_WRITE)(HANDLE hPrivateContext,
gfxptr_t xpAddr,
- bool* pbNullTileAccessed);
+ bool* pbNullTileAccessed,
+ HANDLE hPrivateWorkerData);
typedef gfxptr_t(SWR_API* PFN_MAKE_GFXPTR)(HANDLE hPrivateContext, void* sysAddr);
+typedef HANDLE(SWR_API* PFN_CREATE_MEMORY_CONTEXT)(HANDLE hExternalMemory);
+
+typedef void(SWR_API* PFN_DESTROY_MEMORY_CONTEXT)(HANDLE hExternalMemory, HANDLE hMemoryContext);
+
//////////////////////////////////////////////////////////////////////////
/// @brief Callback to allow driver to update their copy of streamout write offset.
/// This is call is made for any draw operation that has streamout enabled
@@ -219,10 +225,11 @@ struct SWR_API_THREADING_INFO
// Independent of KNOB_MAX_THREADS_PER_CORE.
};
-struct SWR_WORKER_DATA
-{
- HANDLE hArContext; // handle to the archrast context
-};
+//////////////////////////////////////////////////////////////////////////
+/// SWR_CONTEXT
+/// Forward Declaration (see context.h for full definition)
+/////////////////////////////////////////////////////////////////////////
+class SWR_CONTEXT;
//////////////////////////////////////////////////////////////////////////
/// SWR_WORKER_PRIVATE_STATE
@@ -233,7 +240,7 @@ struct SWR_WORKER_DATA
/////////////////////////////////////////////////////////////////////////
struct SWR_WORKER_PRIVATE_STATE
{
- typedef void(SWR_API* PFN_WORKER_DATA)(HANDLE hWorkerPrivateData, uint32_t iWorkerNum);
+ typedef void(SWR_API* PFN_WORKER_DATA)(SWR_CONTEXT* pContext, HANDLE hWorkerPrivateData, uint32_t iWorkerNum);
size_t perWorkerPrivateStateSize; ///< Amount of data to allocate per-worker
PFN_WORKER_DATA pfnInitWorkerData; ///< Init function for worker data. If null
@@ -260,6 +267,8 @@ struct SWR_CREATECONTEXT_INFO
PFN_TRANSLATE_GFXPTR_FOR_READ pfnTranslateGfxptrForRead;
PFN_TRANSLATE_GFXPTR_FOR_WRITE pfnTranslateGfxptrForWrite;
PFN_MAKE_GFXPTR pfnMakeGfxPtr;
+ PFN_CREATE_MEMORY_CONTEXT pfnCreateMemoryContext;
+ PFN_DESTROY_MEMORY_CONTEXT pfnDestroyMemoryContext;
PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
PFN_UPDATE_STATS pfnUpdateStats;
PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
@@ -275,6 +284,9 @@ struct SWR_CREATECONTEXT_INFO
// ArchRast event manager.
HANDLE hArEventManager;
+ // handle to external memory for worker datas to create memory contexts
+ HANDLE hExternalMemory;
+
// Input (optional): Threading info that overrides any set KNOB values.
SWR_THREADING_INFO* pThreadInfo;
@@ -588,7 +600,6 @@ SWR_FUNC(void,
uint32_t threadGroupCountY,
uint32_t threadGroupCountZ);
-
/// @note this enum needs to be kept in sync with HOTTILE_STATE!
enum SWR_TILE_STATE
{
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
index 9e74e2cee8e..1bd2e743781 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
@@ -1141,7 +1141,6 @@ void BackendPixelRate(DRAW_CONTEXT* pDC,
// execute pixel shader
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
- UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0);
// update stats
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 8891cc881a3..13cb7c8b856 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -535,6 +535,8 @@ struct SWR_CONTEXT
PFN_TRANSLATE_GFXPTR_FOR_READ pfnTranslateGfxptrForRead;
PFN_TRANSLATE_GFXPTR_FOR_WRITE pfnTranslateGfxptrForWrite;
PFN_MAKE_GFXPTR pfnMakeGfxPtr;
+ PFN_CREATE_MEMORY_CONTEXT pfnCreateMemoryContext;
+ PFN_DESTROY_MEMORY_CONTEXT pfnDestroyMemoryContext;
PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
PFN_UPDATE_STATS pfnUpdateStats;
PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
@@ -558,6 +560,9 @@ struct SWR_CONTEXT
// ArchRast thread contexts.
HANDLE* pArContext;
+ // handle to external memory for worker datas to create memory contexts
+ HANDLE hExternalMemory;
+
BucketManager *pBucketMgr;
};
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index a27b33d2051..1aa98f49fd7 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -578,7 +578,7 @@ static void StreamOut(
{
bool nullTileAccessed = false;
void* pWriteOffset = pDC->pContext->pfnTranslateGfxptrForWrite(
- GetPrivateState(pDC), soContext.pBuffer[i]->pWriteOffset, &nullTileAccessed);
+ GetPrivateState(pDC), soContext.pBuffer[i]->pWriteOffset, &nullTileAccessed, pWorkerData);
*((uint32_t*)pWriteOffset) = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h b/src/gallium/drivers/swr/rasterizer/core/knobs.h
index 92fbf8840e1..8dab50dab01 100644
--- a/src/gallium/drivers/swr/rasterizer/core/knobs.h
+++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h
@@ -84,6 +84,9 @@
#define KNOB_GUARDBAND_WIDTH 32768.0f
#define KNOB_GUARDBAND_HEIGHT 32768.0f
+// Scratch space requirements per worker. Currently only used for TGSM sizing for some stages
+#define KNOB_WORKER_SCRATCH_SPACE_SIZE (32 * 1024)
+
///////////////////////////////
// Macro tile configuration
///////////////////////////////
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
index 44c486c80bf..4f1d8ccff22 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
@@ -271,7 +271,7 @@ void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTi
{48, 49, 52, 53, 56, 57, 60, 61},
{50, 51, 54, 55, 58, 59, 62, 63}};
- OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc;
+ OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc = {};
// pull point information from triangle buffer
// @todo use structs for readability
@@ -287,8 +287,12 @@ void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTi
// mask indices by the maximum valid index for x/y of coveragemap.
uint32_t tX = workDesc.triFlags.coverageMask & 0x7;
uint32_t tY = (workDesc.triFlags.coverageMask >> 4) & 0x7;
- // todo: multisample points?
- triDesc.coverageMask[0] = 1ULL << coverageMap[tY][tX];
+ for (uint32_t i = 0; i < _countof(triDesc.coverageMask); ++i)
+ {
+ triDesc.coverageMask[i] = 1ULL << coverageMap[tY][tX];
+ }
+ triDesc.anyCoveredSamples = triDesc.coverageMask[0];
+ triDesc.innerCoverageMask = triDesc.coverageMask[0];
// no persp divide needed for points
triDesc.pAttribs = triDesc.pPerspAttribs = workDesc.pAttribs;
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 5202e6146a1..25d4fed9578 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -213,6 +213,11 @@ struct SIMDVERTEX_T
typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS];
};
+struct SWR_WORKER_DATA
+{
+ HANDLE hArContext; // handle to the archrast context
+};
+
//////////////////////////////////////////////////////////////////////////
/// SWR_SHADER_STATS
/// @brief Structure passed to shader for stats collection.
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index a0ddd96c61f..987469340d2 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -1216,7 +1216,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
pPool->pThreadData[i].pWorkerPrivateData = pWorkerData;
if (pContext->workerPrivateState.pfnInitWorkerData)
{
- pContext->workerPrivateState.pfnInitWorkerData(pWorkerData, i);
+ pContext->workerPrivateState.pfnInitWorkerData(pContext, pWorkerData, i);
}
pWorkerData = PtrAdd(pWorkerData, perWorkerSize);
}
@@ -1396,7 +1396,7 @@ void DestroyThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
if (pContext->workerPrivateState.pfnFinishWorkerData)
{
pContext->workerPrivateState.pfnFinishWorkerData(
- pPool->pThreadData[t].pWorkerPrivateData, t);
+ pContext, pPool->pThreadData[t].pWorkerPrivateData, t);
}
}
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
index 0f78bd661a5..74edd4febbc 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
@@ -649,7 +649,8 @@ JitCache::JitCache()
int ExecUnhookedProcess(const std::string& CmdLine, std::string* pStdOut, std::string* pStdErr)
{
- return ExecCmd(CmdLine, "", pStdOut, pStdErr);
+
+ return ExecCmd(CmdLine, nullptr, pStdOut, pStdErr);
}
/// Calculate actual directory where module will be cached.
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
index 30481b43208..e7ba0040d9d 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
@@ -110,6 +110,7 @@ namespace SwrJit
mSimdVectorTy = ArrayType::get(mSimdFP32Ty, 4);
mSimdVectorIntTy = ArrayType::get(mSimdInt32Ty, 4);
mSimdVectorTRTy = ArrayType::get(mSimdFP32Ty, 5);
+ mSimdVectorTRIntTy = ArrayType::get(mSimdInt32Ty, 5);
}
/// @brief Mark this alloca as temporary to avoid hoisting later on
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
index 6e1d94b9e68..9f2c199464d 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
@@ -108,6 +108,7 @@ namespace SwrJit
Type* mSimdVectorTy;
Type* mSimdVectorTRTy;
Type* mSimdVectorIntTy;
+ Type* mSimdVectorTRIntTy;
// Built in types: simd16
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
index 3987a5f3476..616c73b254a 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
@@ -51,6 +51,17 @@ Constant* C(const std::initializer_list<Ty>& constList)
}
template <typename Ty>
+Constant* C(const std::vector<Ty>& constList)
+{
+ std::vector<Constant*> vConsts;
+ for (auto i : constList)
+ {
+ vConsts.push_back(C((Ty)i));
+ }
+ return ConstantVector::get(vConsts);
+}
+
+template <typename Ty>
Constant* CA(LLVMContext& ctx, ArrayRef<Ty> constList)
{
return ConstantDataArray::get(ctx, constList);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index fe5b48e584b..72704e94e4c 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -1103,6 +1103,63 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
}
}
+
+typedef void* (*PFN_TRANSLATEGFXADDRESS_FUNC)(void* pdc, gfxptr_t va, bool* out_pbNullTileAccessed, void* pWorkerData);
+
+template <typename T>
+void GetSimdValidIndicesGfx(gfxptr_t indices,
+ gfxptr_t lastIndex,
+ uint32_t vWidth,
+ PFN_TRANSLATEGFXADDRESS_FUNC pfnTranslate,
+ void* pdc,
+ uint32_t* outIndices,
+ void* pWorkerData)
+{
+ SWR_ASSERT(outIndices != nullptr);
+
+ gfxptr_t indexPtr = indices;
+ for (int64_t lane = 0; lane < vWidth; lane++)
+ {
+ uint32_t index = 0;
+
+ if (indexPtr < lastIndex)
+ {
+ // translate indexPtr and load from it
+ T* addr = (T*)pfnTranslate(pdc, indexPtr, nullptr, pWorkerData);
+ SWR_ASSERT(addr != nullptr);
+ index = *addr;
+ }
+
+ // index to 32 bits and insert into the correct simd lane
+ outIndices[lane] = index;
+
+ indexPtr += sizeof(T);
+ }
+}
+
+void GetSimdValid8bitIndicesGfx(gfxptr_t indices,
+ gfxptr_t lastIndex,
+ uint32_t vWidth,
+ PFN_TRANSLATEGFXADDRESS_FUNC pfnTranslate,
+ void* pdc,
+ uint32_t* outIndices,
+ void* pWorkerData)
+{
+ GetSimdValidIndicesGfx<uint8_t>(indices, lastIndex, vWidth, pfnTranslate, pdc, outIndices, pWorkerData);
+}
+
+void GetSimdValid16bitIndicesGfx(gfxptr_t indices,
+ gfxptr_t lastIndex,
+ uint32_t vWidth,
+ PFN_TRANSLATEGFXADDRESS_FUNC pfnTranslate,
+ void* pdc,
+ uint32_t* outIndices,
+ void* pWorkerData)
+{
+ GetSimdValidIndicesGfx<uint16_t>(indices, lastIndex, vWidth, pfnTranslate, pdc, outIndices, pWorkerData);
+}
+
+
template <typename T>
Value* FetchJit::GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex)
{