diff options
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/api.cpp | 41 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 120 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/frontend.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/pa.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/utils.h | 31 |
5 files changed, 101 insertions, 109 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 06cbf7fb93d..e950e92c874 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -41,6 +41,7 @@ #include "core/threads.h" #include "core/tilemgr.h" #include "core/clip.h" +#include "core/utils.h" #include "common/simdintrin.h" #include "common/os.h" @@ -1029,42 +1030,6 @@ uint32_t MaxVertsPerDraw( return vertsPerDraw; } -// Recursive template used to auto-nest conditionals. Converts dynamic boolean function -// arguments to static template arguments. -template <bool... ArgsB> -struct FEDrawChooser -{ - // Last Arg Terminator - static PFN_FE_WORK_FUNC GetFunc(bool bArg) - { - if (bArg) - { - return ProcessDraw<ArgsB..., true>; - } - - return ProcessDraw<ArgsB..., false>; - } - - // Recursively parse args - template <typename... TArgsT> - static PFN_FE_WORK_FUNC GetFunc(bool bArg, TArgsT... remainingArgs) - { - if (bArg) - { - return FEDrawChooser<ArgsB..., true>::GetFunc(remainingArgs...); - } - - return FEDrawChooser<ArgsB..., false>::GetFunc(remainingArgs...); - } -}; - -// Selector for correct templated Draw front-end function -INLINE -static PFN_FE_WORK_FUNC GetFEDrawFunc(bool IsIndexed, bool HasTessellation, bool HasGeometryShader, bool HasStreamOut, bool RasterizerEnabled) -{ - return FEDrawChooser<>::GetFunc(IsIndexed, HasTessellation, HasGeometryShader, HasStreamOut, RasterizerEnabled); -} - ////////////////////////////////////////////////////////////////////////// /// @brief DrawInstanced @@ -1119,7 +1084,7 @@ void DrawInstanced( InitDraw(pDC, isSplitDraw); pDC->FeWork.type = DRAW; - pDC->FeWork.pfnWork = GetFEDrawFunc( + pDC->FeWork.pfnWork = GetProcessDrawFunc( false, // IsIndexed pState->tsState.tsEnable, pState->gsState.gsEnable, @@ -1252,7 +1217,7 @@ void DrawIndexedInstance( InitDraw(pDC, isSplitDraw); pDC->FeWork.type = DRAW; - pDC->FeWork.pfnWork = GetFEDrawFunc( + pDC->FeWork.pfnWork = GetProcessDrawFunc( true, // IsIndexed pState->tsState.tsEnable, pState->gsState.gsEnable, diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 93869610ff9..5dcd05b78d0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -675,8 +675,8 @@ THREAD SWR_GS_CONTEXT tlsGsContext; /// @param pa - The primitive assembly object. /// @param pGsOut - output stream for GS template < - bool HasStreamOutT, - bool HasRastT> + typename HasStreamOutT, + typename HasRastT> static void GeometryShaderStage( DRAW_CONTEXT *pDC, uint32_t workerId, @@ -759,7 +759,7 @@ static void GeometryShaderStage( // set up new binner and state for the GS output topology PFN_PROCESS_PRIMS pfnClipFunc = nullptr; - if (HasRastT) + if (HasRastT::value) { switch (pState->outputTopology) { @@ -819,7 +819,7 @@ static void GeometryShaderStage( else { // early exit if this stream is not enabled for streamout - if (HasStreamOutT && !state.soState.streamEnable[stream]) + if (HasStreamOutT::value && !state.soState.streamEnable[stream]) { continue; } @@ -842,12 +842,12 @@ static void GeometryShaderStage( { totalPrimsGenerated += gsPa.NumPrims(); - if (HasStreamOutT) + if (HasStreamOutT::value) { StreamOut(pDC, gsPa, workerId, pSoPrimData, stream); } - if (HasRastT && state.soState.streamToRasterizer == stream) + if (HasRastT::value && state.soState.streamToRasterizer == stream) { simdscalari vPrimId; // pull primitiveID from the GS output if available @@ -957,9 +957,9 @@ static void AllocateTessellationData(SWR_CONTEXT* pContext) /// @param pa - The primitive assembly object. /// @param pGsOut - output stream for GS template < - bool HasGeometryShaderT, - bool HasStreamOutT, - bool HasRastT> + typename HasGeometryShaderT, + typename HasStreamOutT, + typename HasRastT> static void TessellationStages( DRAW_CONTEXT *pDC, uint32_t workerId, @@ -995,7 +995,7 @@ static void TessellationStages( SWR_ASSERT(tsCtx); PFN_PROCESS_PRIMS pfnClipFunc = nullptr; - if (HasRastT) + if (HasRastT::value) { switch (tsState.postDSTopology) { @@ -1107,7 +1107,7 @@ static void TessellationStages( while (tessPa.HasWork()) { - if (HasGeometryShaderT) + if (HasGeometryShaderT::value) { GeometryShaderStage<HasStreamOutT, HasRastT>( pDC, workerId, tessPa, pGsOut, pCutBuffer, pCutStreamBuffer, pSoPrimData, @@ -1115,12 +1115,12 @@ static void TessellationStages( } else { - if (HasStreamOutT) + if (HasStreamOutT::value) { StreamOut(pDC, tessPa, workerId, pSoPrimData, 0); } - if (HasRastT) + if (HasRastT::value) { simdvector prim[3]; // Only deal with triangles, lines, or points RDTSC_START(FEPAAssemble); @@ -1149,7 +1149,7 @@ static void TessellationStages( /// @brief FE handler for SwrDraw. /// @tparam IsIndexedT - Is indexed drawing enabled /// @tparam HasTessellationT - Is tessellation enabled -/// @tparam HasGeometryShaderT - Is the geometry shader stage enabled +/// @tparam HasGeometryShaderT::value - Is the geometry shader stage enabled /// @tparam HasStreamOutT - Is stream-out enabled /// @tparam HasRastT - Is rasterization enabled /// @param pContext - pointer to SWR context. @@ -1157,11 +1157,11 @@ static void TessellationStages( /// @param workerId - thread's worker id. /// @param pUserData - Pointer to DRAW_WORK template < - bool IsIndexedT, - bool HasTessellationT, - bool HasGeometryShaderT, - bool HasStreamOutT, - bool HasRastT> + typename IsIndexedT, + typename HasTessellationT, + typename HasGeometryShaderT, + typename HasStreamOutT, + typename HasRastT> void ProcessDraw( SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, @@ -1188,7 +1188,7 @@ void ProcessDraw( uint32_t endVertex = work.numVerts; const int32_t* pLastRequestedIndex = nullptr; - if (IsIndexedT) + if (IsIndexedT::value) { switch (work.type) { @@ -1223,7 +1223,7 @@ void ProcessDraw( vsContext.pVin = &vin; - if (IsIndexedT) + if (IsIndexedT::value) { fetchInfo.BaseVertex = work.baseVertex; @@ -1247,12 +1247,12 @@ void ProcessDraw( void* pGsOut = nullptr; void* pCutBuffer = nullptr; void* pStreamCutBuffer = nullptr; - if (HasGeometryShaderT) + if (HasGeometryShaderT::value) { AllocateGsBuffers(pDC, state, &pGsOut, &pCutBuffer, &pStreamCutBuffer); } - if (HasTessellationT) + if (HasTessellationT::value) { SWR_ASSERT(state.tsState.tsEnable == true); SWR_ASSERT(state.pfnHsFunc != nullptr); @@ -1269,7 +1269,7 @@ void ProcessDraw( // allocate space for streamout input prim data uint32_t* pSoPrimData = nullptr; - if (HasStreamOutT) + if (HasStreamOutT::value) { pSoPrimData = (uint32_t*)pDC->pArena->AllocAligned(4096, 16); @@ -1291,7 +1291,7 @@ void ProcessDraw( simdscalari vIndex; uint32_t i = 0; - if (IsIndexedT) + if (IsIndexedT::value) { fetchInfo.pIndices = work.pIB; } @@ -1309,7 +1309,7 @@ void ProcessDraw( // PaGetNextVsOutput currently has the side effect of updating some PA state machine state. // So we need to keep this outside of (i < endVertex) check. simdmask* pvCutIndices = nullptr; - if (IsIndexedT) + if (IsIndexedT::value) { pvCutIndices = &pa.GetNextVsIndices(); } @@ -1332,7 +1332,7 @@ void ProcessDraw( vsContext.mask = GenerateMask(endVertex - i); // forward cut mask to the PA - if (IsIndexedT) + if (IsIndexedT::value) { *pvCutIndices = _simd_movemask_ps(_simd_castsi_ps(fetchInfo.CutMask)); } @@ -1372,12 +1372,12 @@ void ProcessDraw( { UPDATE_STAT(IaPrimitives, pa.NumPrims()); - if (HasTessellationT) + if (HasTessellationT::value) { TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>( pDC, workerId, pa, pGsOut, pCutBuffer, pStreamCutBuffer, pSoPrimData, pa.GetPrimID(work.startPrimID)); } - else if (HasGeometryShaderT) + else if (HasGeometryShaderT::value) { GeometryShaderStage<HasStreamOutT, HasRastT>( pDC, workerId, pa, pGsOut, pCutBuffer, pStreamCutBuffer, pSoPrimData, pa.GetPrimID(work.startPrimID)); @@ -1385,12 +1385,12 @@ void ProcessDraw( else { // If streamout is enabled then stream vertices out to memory. - if (HasStreamOutT) + if (HasStreamOutT::value) { StreamOut(pDC, pa, workerId, pSoPrimData, 0); } - if (HasRastT) + if (HasRastT::value) { SWR_ASSERT(pDC->pState->pfnProcessPrims); pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim, @@ -1403,7 +1403,7 @@ void ProcessDraw( } while (pa.NextPrim()); i += KNOB_SIMD_WIDTH; - if (IsIndexedT) + if (IsIndexedT::value) { fetchInfo.pIndices = (int*)((uint8_t*)fetchInfo.pIndices + KNOB_SIMD_WIDTH * indexSize); } @@ -1417,39 +1417,29 @@ void ProcessDraw( RDTSC_STOP(FEProcessDraw, numPrims * work.numInstances, pDC->drawId); } -// Explicit Instantiation of all combinations -template void ProcessDraw<false, false, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, false, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, false, false, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, false, false, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, false, true, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, false, true, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, false, true, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, false, true, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, true, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, true, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, true, false, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, true, false, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, true, true, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, true, true, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, true, true, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<false, true, true, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, false, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, false, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, false, false, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, false, false, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, false, true, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, false, true, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, false, true, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, false, true, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, true, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, true, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, true, false, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, true, false, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, true, true, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, true, true, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, true, true, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -template void ProcessDraw<true, true, true, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); + +struct FEDrawChooser +{ + typedef PFN_FE_WORK_FUNC FuncType; + + template <typename... ArgsB> + static FuncType GetFunc() + { + return ProcessDraw<ArgsB...>; + } +}; + + +// Selector for correct templated Draw front-end function +PFN_FE_WORK_FUNC GetProcessDrawFunc( + bool IsIndexed, + bool HasTessellation, + bool HasGeometryShader, + bool HasStreamOut, + bool HasRasterization) +{ + return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization); +} ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h index f92f88c3226..8307c0bd2a7 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.h +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h @@ -28,6 +28,7 @@ ******************************************************************************/ #pragma once #include "context.h" +#include <type_traits> INLINE __m128i fpToFixedPoint(const __m128 vIn) @@ -309,9 +310,14 @@ bool CanUseSimplePoints(DRAW_CONTEXT *pDC) uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements); uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts); -// Templated Draw front-end function. All combinations of template parameter values are available -template <bool IsIndexedT, bool HasTessellationT, bool HasGeometryShaderT, bool HasStreamOutT, bool HasRastT> -void ProcessDraw(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); + +// ProcessDraw front-end function. All combinations of parameter values are available +PFN_FE_WORK_FUNC GetProcessDrawFunc( + bool IsIndexed, + bool HasTessellation, + bool HasGeometryShader, + bool HasStreamOut, + bool HasRasterization); void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h index 17f488538d6..067deabb411 100644 --- a/src/gallium/drivers/swr/rasterizer/core/pa.h +++ b/src/gallium/drivers/swr/rasterizer/core/pa.h @@ -1146,14 +1146,14 @@ private: // Primitive Assembler factory class, responsible for creating and initializing the correct assembler // based on state. -template <bool IsIndexedT> +template <typename IsIndexedT> struct PA_FACTORY { PA_FACTORY(DRAW_CONTEXT* pDC, PRIMITIVE_TOPOLOGY in_topo, uint32_t numVerts) : topo(in_topo) { #if KNOB_ENABLE_CUT_AWARE_PA == TRUE const API_STATE& state = GetApiState(pDC); - if ((IsIndexedT && ( + if ((IsIndexedT::value && ( topo == TOP_TRIANGLE_STRIP || topo == TOP_POINT_LIST || topo == TOP_LINE_LIST || topo == TOP_LINE_STRIP || topo == TOP_TRIANGLE_LIST || topo == TOP_LINE_LIST_ADJ || @@ -1162,7 +1162,7 @@ struct PA_FACTORY // non-indexed draws with adjacency topologies must use cut-aware PA until we add support // for them in the optimized PA - (!IsIndexedT && ( + (!IsIndexedT::value && ( topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ || topo == TOP_TRI_STRIP_ADJ))) { memset(&indexStore, 0, sizeof(indexStore)); diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h index 60a3a6af19e..63ecd5cfe1b 100644 --- a/src/gallium/drivers/swr/rasterizer/core/utils.h +++ b/src/gallium/drivers/swr/rasterizer/core/utils.h @@ -28,6 +28,7 @@ #pragma once #include <string.h> +#include <type_traits> #include "common/os.h" #include "common/simdintrin.h" #include "common/swr_assert.h" @@ -834,3 +835,33 @@ public: return T(word & ELEMENT_MASK); } }; + +// Recursive template used to auto-nest conditionals. Converts dynamic boolean function +// arguments to static template arguments. +template <typename TermT, typename... ArgsB> +struct TemplateArgUnroller +{ + // Last Arg Terminator + static typename TermT::FuncType GetFunc(bool bArg) + { + if (bArg) + { + return TermT::template GetFunc<ArgsB..., std::true_type>(); + } + + return TermT::template GetFunc<ArgsB..., std::false_type>(); + } + + // Recursively parse args + template <typename... TArgsT> + static typename TermT::FuncType GetFunc(bool bArg, TArgsT... remainingArgs) + { + if (bArg) + { + return TemplateArgUnroller<TermT, ArgsB..., std::true_type>::GetFunc(remainingArgs...); + } + + return TemplateArgUnroller<TermT, ArgsB..., std::false_type>::GetFunc(remainingArgs...); + } +}; + |