diff options
Diffstat (limited to 'src/gallium')
21 files changed, 290 insertions, 261 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h index bbc9538b86d..b00cbf63eba 100644 --- a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h +++ b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h @@ -48,7 +48,17 @@ extern THREAD UINT tlsThreadId; class BucketManager { public: - BucketManager() {} + + uint32_t mCurrentFrame; + std::vector<uint32_t> mBucketMap; + bool mBucketsInitialized; + std::string mBucketMgrName; + + + BucketManager(std::string name) : mCurrentFrame(0), mBucketsInitialized(false), mBucketMgrName(name) + { + mBucketMap.clear(); + } ~BucketManager(); // removes all registered thread data diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index f9b86cfbb8d..a043a341059 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -65,9 +65,6 @@ void WakeAllThreads(SWR_CONTEXT* pContext) /// @param pCreateInfo - pointer to creation info. HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo) { - RDTSC_RESET(); - RDTSC_INIT(0); - void* pContextMem = AlignedMalloc(sizeof(SWR_CONTEXT), KNOB_SIMD_WIDTH * 4); memset(pContextMem, 0, sizeof(SWR_CONTEXT)); SWR_CONTEXT* pContext = new (pContextMem) SWR_CONTEXT(); @@ -157,6 +154,12 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo) ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API); #endif +#if defined(KNOB_ENABLE_RDTSC) + pContext->pBucketMgr = new BucketManager(pCreateInfo->contextName); + RDTSC_RESET(pContext->pBucketMgr); + RDTSC_INIT(pContext->pBucketMgr, 0); +#endif + // Allocate scratch space for workers. ///@note We could lazily allocate this but its rather small amount of memory. for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i) @@ -205,7 +208,7 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo) // pass pointer to bucket manager back to caller #ifdef KNOB_ENABLE_RDTSC - pCreateInfo->pBucketMgr = &gBucketMgr; + pCreateInfo->pBucketMgr = pContext->pBucketMgr; #endif pCreateInfo->contextSaveSize = sizeof(API_STATE); @@ -277,9 +280,9 @@ void QueueWork(SWR_CONTEXT* pContext) } else { - RDTSC_BEGIN(APIDrawWakeAllThreads, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, APIDrawWakeAllThreads, pDC->drawId); WakeAllThreads(pContext); - RDTSC_END(APIDrawWakeAllThreads, 1); + RDTSC_END(pContext->pBucketMgr, APIDrawWakeAllThreads, 1); } // Set current draw context to NULL so that next state call forces a new draw context to be @@ -300,7 +303,7 @@ INLINE void QueueDispatch(SWR_CONTEXT* pContext) DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT* pContext, bool isSplitDraw = false) { - RDTSC_BEGIN(APIGetDrawContext, 0); + RDTSC_BEGIN(pContext->pBucketMgr, APIGetDrawContext, 0); // If current draw context is null then need to obtain a new draw context to use from ring. if (pContext->pCurDrawContext == nullptr) { @@ -389,7 +392,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT* pContext, bool isSplitDraw = false) SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC"); } - RDTSC_END(APIGetDrawContext, 0); + RDTSC_END(pContext->pBucketMgr, APIGetDrawContext, 0); return pContext->pCurDrawContext; } @@ -441,6 +444,10 @@ void SwrDestroyContext(HANDLE hContext) #endif } +#if defined(KNOB_ENABLE_RDTSC) + delete pContext->pBucketMgr; +#endif + delete[] pContext->ppScratch; AlignedFree(pContext->pStats); @@ -498,7 +505,7 @@ void SWR_API SwrSync(HANDLE hContext, SWR_CONTEXT* pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); - RDTSC_BEGIN(APISync, 0); + RDTSC_BEGIN(pContext->pBucketMgr, APISync, 0); pDC->FeWork.type = SYNC; pDC->FeWork.pfnWork = ProcessSync; @@ -514,7 +521,7 @@ void SWR_API SwrSync(HANDLE hContext, // enqueue QueueDraw(pContext); - RDTSC_END(APISync, 1); + RDTSC_END(pContext->pBucketMgr, APISync, 1); } void SwrStallBE(HANDLE hContext) @@ -529,28 +536,28 @@ void SwrWaitForIdle(HANDLE hContext) { SWR_CONTEXT* pContext = GetContext(hContext); - RDTSC_BEGIN(APIWaitForIdle, 0); + RDTSC_BEGIN(pContext->pBucketMgr, APIWaitForIdle, 0); while (!pContext->dcRing.IsEmpty()) { _mm_pause(); } - RDTSC_END(APIWaitForIdle, 1); + RDTSC_END(pContext->pBucketMgr, APIWaitForIdle, 1); } void SwrWaitForIdleFE(HANDLE hContext) { SWR_CONTEXT* pContext = GetContext(hContext); - RDTSC_BEGIN(APIWaitForIdle, 0); + RDTSC_BEGIN(pContext->pBucketMgr, APIWaitForIdle, 0); while (pContext->drawsOutstandingFE > 0) { _mm_pause(); } - RDTSC_END(APIWaitForIdle, 1); + RDTSC_END(pContext->pBucketMgr, APIWaitForIdle, 1); } void SwrSetVertexBuffers(HANDLE hContext, @@ -1172,7 +1179,7 @@ void DrawInstanced(HANDLE hContext, SWR_CONTEXT* pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); - RDTSC_BEGIN(APIDraw, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, APIDraw, pDC->drawId); uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology); uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw); @@ -1243,7 +1250,7 @@ void DrawInstanced(HANDLE hContext, pDC = GetDrawContext(pContext); pDC->pState->state.rastState.cullMode = oldCullMode; - RDTSC_END(APIDraw, numVertices * numInstances); + RDTSC_END(pContext->pBucketMgr, APIDraw, numVertices * numInstances); } ////////////////////////////////////////////////////////////////////////// @@ -1307,7 +1314,7 @@ void DrawIndexedInstance(HANDLE hContext, DRAW_CONTEXT* pDC = GetDrawContext(pContext); API_STATE* pState = &pDC->pState->state; - RDTSC_BEGIN(APIDrawIndexed, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, APIDrawIndexed, pDC->drawId); uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology); uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw); @@ -1403,7 +1410,7 @@ void DrawIndexedInstance(HANDLE hContext, pDC = GetDrawContext(pContext); pDC->pState->state.rastState.cullMode = oldCullMode; - RDTSC_END(APIDrawIndexed, numIndices * numInstances); + RDTSC_END(pContext->pBucketMgr, APIDrawIndexed, numIndices * numInstances); } ////////////////////////////////////////////////////////////////////////// @@ -1529,7 +1536,7 @@ void SwrDispatch(HANDLE hContext, SWR_CONTEXT* pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); - RDTSC_BEGIN(APIDispatch, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, APIDispatch, pDC->drawId); AR_API_EVENT( DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ)); pDC->isCompute = true; // This is a compute context. @@ -1546,7 +1553,9 @@ void SwrDispatch(HANDLE hContext, pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE); QueueDispatch(pContext); - RDTSC_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ); + RDTSC_END(pContext->pBucketMgr, + APIDispatch, + threadGroupCountX * threadGroupCountY * threadGroupCountZ); } // Deswizzles, converts and stores current contents of the hot tiles to surface @@ -1564,7 +1573,7 @@ void SWR_API SwrStoreTiles(HANDLE hContext, SWR_CONTEXT* pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); - RDTSC_BEGIN(APIStoreTiles, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, APIStoreTiles, pDC->drawId); pDC->FeWork.type = STORETILES; pDC->FeWork.pfnWork = ProcessStoreTiles; @@ -1578,7 +1587,7 @@ void SWR_API SwrStoreTiles(HANDLE hContext, AR_API_EVENT(SwrStoreTilesEvent(pDC->drawId)); - RDTSC_END(APIStoreTiles, 1); + RDTSC_END(pContext->pBucketMgr, APIStoreTiles, 1); } ////////////////////////////////////////////////////////////////////////// @@ -1606,7 +1615,7 @@ void SWR_API SwrClearRenderTarget(HANDLE hContext, SWR_CONTEXT* pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); - RDTSC_BEGIN(APIClearRenderTarget, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, APIClearRenderTarget, pDC->drawId); pDC->FeWork.type = CLEAR; pDC->FeWork.pfnWork = ProcessClear; @@ -1624,7 +1633,7 @@ void SWR_API SwrClearRenderTarget(HANDLE hContext, // enqueue draw QueueDraw(pContext); - RDTSC_END(APIClearRenderTarget, 1); + RDTSC_END(pContext->pBucketMgr, APIClearRenderTarget, 1); } ////////////////////////////////////////////////////////////////////////// @@ -1697,7 +1706,7 @@ void SWR_API SwrEndFrame(HANDLE hContext) DRAW_CONTEXT* pDC = GetDrawContext(pContext); (void)pDC; // var used - RDTSC_ENDFRAME(); + RDTSC_ENDFRAME(pContext->pBucketMgr); AR_API_EVENT(FrameEndEvent(pContext->frameCount, pDC->drawId)); pContext->frameCount++; diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index c842859ba32..a3f065da2ea 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -277,6 +277,8 @@ struct SWR_CREATECONTEXT_INFO // Input: if set to non-zero value, overrides KNOB value for maximum // number of draws in flight uint32_t MAX_DRAWS_IN_FLIGHT; + + std::string contextName; }; ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index ad358bcc0a5..a435fa35998 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -52,7 +52,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, { SWR_CONTEXT* pContext = pDC->pContext; - RDTSC_BEGIN(BEDispatch, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEDispatch, pDC->drawId); const COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pDispatch->GetTasksData(); SWR_ASSERT(pTaskData != nullptr); @@ -90,7 +90,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup); AR_EVENT(CSStats((HANDLE)&csContext.stats)); - RDTSC_END(BEDispatch, 1); + RDTSC_END(pDC->pContext->pBucketMgr, BEDispatch, 1); } ////////////////////////////////////////////////////////////////////////// @@ -119,7 +119,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT* pDC, SWR_CONTEXT* pContext = pDC->pContext; HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; - RDTSC_BEGIN(BEStoreTiles, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEStoreTiles, pDC->drawId); SWR_FORMAT srcFormat; switch (attachment) @@ -194,7 +194,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT* pDC, } } } - RDTSC_END(BEStoreTiles, 1); + RDTSC_END(pDC->pContext->pBucketMgr, BEStoreTiles, 1); } void ProcessStoreTilesBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData) @@ -247,9 +247,9 @@ void BackendNullPS(DRAW_CONTEXT* pDC, SWR_TRIANGLE_DESC& work, RenderOutputBuffers& renderBuffers) { - RDTSC_BEGIN(BENullBackend, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BENullBackend, pDC->drawId); ///@todo: handle center multisample pattern - RDTSC_BEGIN(BESetup, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId); const API_STATE& state = GetApiState(pDC); @@ -262,7 +262,7 @@ void BackendNullPS(DRAW_CONTEXT* pDC, SWR_PS_CONTEXT psContext; // skip SetupPixelShaderContext(&psContext, ...); // not needed here - RDTSC_END(BESetup, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0); simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y))); @@ -305,7 +305,7 @@ void BackendNullPS(DRAW_CONTEXT* pDC, coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz); } - RDTSC_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId); // calculate per sample positions psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample)); @@ -321,7 +321,7 @@ void BackendNullPS(DRAW_CONTEXT* pDC, psContext.vJ.sample); psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); - RDTSC_END(BEBarycentric, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0); // interpolate user clip distance if available if (state.backendState.clipDistanceMask) @@ -335,7 +335,7 @@ void BackendNullPS(DRAW_CONTEXT* pDC, simdscalar vCoverageMask = _simd_vmask_ps(coverageMask); simdscalar stencilPassMask = vCoverageMask; - RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId); simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, @@ -356,7 +356,7 @@ void BackendNullPS(DRAW_CONTEXT* pDC, vCoverageMask, pStencilSample, stencilPassMask); - RDTSC_END(BEEarlyDepthTest, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0); uint32_t statMask = _simd_movemask_ps(depthPassMask); uint32_t statCount = _mm_popcnt_u32(statMask); @@ -378,7 +378,7 @@ void BackendNullPS(DRAW_CONTEXT* pDC, vYSamplePosUL = _simd_add_ps(vYSamplePosUL, dy); } - RDTSC_END(BENullBackend, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BENullBackend, 0); } PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS] = {}; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp index 5750ceac7f0..e772306faec 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp @@ -168,7 +168,7 @@ void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, vo SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason. - RDTSC_BEGIN(BEClear, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEClear, pDC->drawId); if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR) { @@ -226,13 +226,13 @@ void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, vo pHotTile->state = HOTTILE_CLEAR; } - RDTSC_END(BEClear, 1); + RDTSC_END(pDC->pContext->pBucketMgr, BEClear, 1); } else { // Legacy clear CLEAR_DESC* pClear = (CLEAR_DESC*)pUserData; - RDTSC_BEGIN(BEClear, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEClear, pDC->drawId); if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR) { @@ -292,7 +292,7 @@ void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, vo pClear->rect); } - RDTSC_END(BEClear, 1); + RDTSC_END(pDC->pContext->pBucketMgr, BEClear, 1); } } diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h index 83d662bd9a7..ad6b78a665f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h @@ -755,7 +755,7 @@ struct PixelRateZTestLoop _simd_vmask_ps(CalcDepthBoundsAcceptMask(z, minz, maxz))); } - RDTSC_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(psContext.pBucketManager, BEBarycentric, pDC->drawId); // calculate per sample positions psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample)); @@ -778,7 +778,7 @@ struct PixelRateZTestLoop vZ[sample] = state.pfnQuantizeDepth(vZ[sample]); } - RDTSC_END(BEBarycentric, 0); + RDTSC_END(psContext.pBucketManager, BEBarycentric, 0); ///@todo: perspective correct vs non-perspective correct clipping? // if clip distances are enabled, we need to interpolate for each sample @@ -795,7 +795,7 @@ struct PixelRateZTestLoop // ZTest for this sample ///@todo Need to uncomment out this bucket. - // RDTSC_BEGIN(BEDepthBucket, pDC->drawId); + // RDTSC_BEGIN(psContext.pBucketManager, BEDepthBucket, pDC->drawId); depthPassMask[sample] = vCoverageMask[sample]; stencilPassMask[sample] = vCoverageMask[sample]; depthPassMask[sample] = DepthStencilTest(&state, @@ -806,7 +806,7 @@ struct PixelRateZTestLoop vCoverageMask[sample], pStencilSample, &stencilPassMask[sample]); - // RDTSC_END(BEDepthBucket, 0); + // RDTSC_END(psContext.pBucketManager, BEDepthBucket, 0); // early-exit if no pixels passed depth or earlyZ is forced on if (psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample])) @@ -1007,8 +1007,8 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, /// backend - RDTSC_BEGIN(BEPixelRateBackend, pDC->drawId); - RDTSC_BEGIN(BESetup, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelRateBackend, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId); const API_STATE& state = GetApiState(pDC); @@ -1029,7 +1029,7 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, state.colorHottileEnable, renderBuffers); - RDTSC_END(BESetup, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0); PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, @@ -1075,14 +1075,14 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, pCoverageMask, psContext.inputMask, state.blendState.sampleMask); } - RDTSC_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId); CalcPixelBarycentrics(coeffs, psContext); CalcCentroid<T, false>( &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask); - RDTSC_END(BEBarycentric, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0); if (T::bForcedSampleCount) { @@ -1109,12 +1109,12 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, if (state.psState.usesSourceDepth) { - RDTSC_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId); // interpolate and quantize z psContext.vZ = vplaneps( coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center); psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); - RDTSC_END(BEBarycentric, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0); } // pixels that are currently active @@ -1122,10 +1122,10 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, psContext.oMask = T::MultisampleT::FullSampleMask(); // execute pixel shader - RDTSC_BEGIN(BEPixelShader, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId); state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext); UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes))); - RDTSC_END(BEPixelShader, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0); // update stats UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes))); @@ -1159,7 +1159,7 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, for (uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount); sample++) { - RDTSC_BEGIN(BEOutputMerger, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId); // center pattern does a single coverage/depth/stencil test, standard pattern tests // all samples uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample; @@ -1175,7 +1175,7 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, if (!_simd_movemask_ps(depthMask)) { // stencil should already have been written in early/lateZ tests - RDTSC_END(BEOutputMerger, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0); continue; } } @@ -1210,10 +1210,10 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]); } - RDTSC_END(BEOutputMerger, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0); } Endtile: - RDTSC_BEGIN(BEEndTile, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId); for (uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++) { @@ -1242,7 +1242,7 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8; - RDTSC_END(BEEndTile, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0); psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx); psContext.vX.center = _simd_add_ps(psContext.vX.center, dx); @@ -1252,7 +1252,7 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, psContext.vY.center = _simd_add_ps(psContext.vY.center, dy); } - RDTSC_END(BEPixelRateBackend, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEPixelRateBackend, 0); } template <uint32_t sampleCountT = SWR_MULTISAMPLE_1X, diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp index 9b0b80f766f..04e5e3d58bc 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp @@ -45,8 +45,8 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, SWR_TRIANGLE_DESC& work, RenderOutputBuffers& renderBuffers) { - RDTSC_BEGIN(BESampleRateBackend, pDC->drawId); - RDTSC_BEGIN(BESetup, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESampleRateBackend, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId); void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; const API_STATE& state = GetApiState(pDC); @@ -65,7 +65,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, state.colorHottileEnable, renderBuffers); - RDTSC_END(BESetup, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0); psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y))); psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y))); @@ -95,14 +95,14 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, pCoverageMask, psContext.inputMask, state.blendState.sampleMask); } - RDTSC_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId); CalcPixelBarycentrics(coeffs, psContext); CalcCentroid<T, false>( &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask); - RDTSC_END(BEBarycentric, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0); for (uint32_t sample = 0; sample < T::MultisampleT::numSamples; sample++) { @@ -128,7 +128,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz); } - RDTSC_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId); // calculate per sample positions psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample)); @@ -144,7 +144,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, psContext.vJ.sample); psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); - RDTSC_END(BEBarycentric, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0); // interpolate user clip distance if available if (state.backendState.clipDistanceMask) @@ -162,7 +162,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, // Early-Z? if (T::bCanEarlyZ) { - RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, @@ -174,7 +174,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); - RDTSC_END(BEEarlyDepthTest, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0); // early-exit if no samples passed depth or earlyZ is forced on. if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask)) @@ -201,9 +201,9 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, psContext.activeMask = _simd_castps_si(vCoverageMask); // execute pixel shader - RDTSC_BEGIN(BEPixelShader, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId); state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext); - RDTSC_END(BEPixelShader, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0); // update stats UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); @@ -214,7 +214,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, // late-Z if (!T::bCanEarlyZ) { - RDTSC_BEGIN(BELateDepthTest, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BELateDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, @@ -226,7 +226,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); - RDTSC_END(BELateDepthTest, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BELateDepthTest, 0); if (!_simd_movemask_ps(depthPassMask)) { @@ -251,7 +251,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, UPDATE_STAT_BE(DepthPassCount, statCount); // output merger - RDTSC_BEGIN(BEOutputMerger, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId); OutputMerger8x2(pDC, psContext, @@ -278,7 +278,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, pStencilSample, stencilPassMask); } - RDTSC_END(BEOutputMerger, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0); } work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM); } @@ -286,7 +286,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, Endtile: ATTR_UNUSED; - RDTSC_BEGIN(BEEndTile, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId); if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) { @@ -309,7 +309,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8; - RDTSC_END(BEEndTile, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0); psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx); psContext.vX.center = _simd_add_ps(psContext.vX.center, dx); @@ -319,7 +319,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, psContext.vY.center = _simd_add_ps(psContext.vY.center, dy); } - RDTSC_END(BESampleRateBackend, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BESampleRateBackend, 0); } // Recursive template used to auto-nest conditionals. Converts dynamic enum function diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp index 46aabcdf34b..2b868269477 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp @@ -45,8 +45,8 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, SWR_TRIANGLE_DESC& work, RenderOutputBuffers& renderBuffers) { - RDTSC_BEGIN(BESingleSampleBackend, pDC->drawId); - RDTSC_BEGIN(BESetup, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESingleSampleBackend, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId); void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; @@ -66,7 +66,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, state.colorHottileEnable, renderBuffers); - RDTSC_END(BESetup, 1); + RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 1); psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y))); psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y))); @@ -114,7 +114,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, pCoverageMask, psContext.inputMask, state.blendState.sampleMask); } - RDTSC_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId); CalcPixelBarycentrics(coeffs, psContext); @@ -126,7 +126,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center); psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); - RDTSC_END(BEBarycentric, 1); + RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 1); // interpolate user clip distance if available if (state.backendState.clipDistanceMask) @@ -144,7 +144,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, // Early-Z? if (T::bCanEarlyZ) { - RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, @@ -156,7 +156,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); - RDTSC_END(BEEarlyDepthTest, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0); // early-exit if no pixels passed depth or earlyZ is forced on if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask)) @@ -182,9 +182,9 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, psContext.activeMask = _simd_castps_si(vCoverageMask); // execute pixel shader - RDTSC_BEGIN(BEPixelShader, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId); state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext); - RDTSC_END(BEPixelShader, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0); // update stats UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); @@ -195,7 +195,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, // late-Z if (!T::bCanEarlyZ) { - RDTSC_BEGIN(BELateDepthTest, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BELateDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, @@ -207,7 +207,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); - RDTSC_END(BELateDepthTest, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BELateDepthTest, 0); if (!_simd_movemask_ps(depthPassMask)) { @@ -236,7 +236,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, UPDATE_STAT_BE(DepthPassCount, statCount); // output merger - RDTSC_BEGIN(BEOutputMerger, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId); OutputMerger8x2(pDC, psContext, @@ -263,11 +263,11 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, pStencilBuffer, stencilPassMask); } - RDTSC_END(BEOutputMerger, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0); } Endtile: - RDTSC_BEGIN(BEEndTile, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId); work.coverageMask[0] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM); if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) @@ -291,7 +291,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8; - RDTSC_END(BEEndTile, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0); psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx); psContext.vX.center = _simd_add_ps(psContext.vX.center, dx); @@ -301,7 +301,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, psContext.vY.center = _simd_add_ps(psContext.vY.center, dy); } - RDTSC_END(BESingleSampleBackend, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BESingleSampleBackend, 0); } // Recursive template used to auto-nest conditionals. Converts dynamic enum function diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 6dc703c3fa8..dbc387e47e0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -373,7 +373,10 @@ uint32_t SIMDCALL EarlyRasterizer(SIMDBBOX_T<SIMD_T>& er_bbox, Integer<SIMD_T> vNegB1 = SIMD_T::mullo_epi32(vBi[1], SIMD_T::set1_epi32(-1)); Integer<SIMD_T> vNegB2 = SIMD_T::mullo_epi32(vBi[2], SIMD_T::set1_epi32(-1)); - RDTSC_EVENT(FEEarlyRastEnter, _mm_popcnt_u32(oneTileMask & triMask), 0); + RDTSC_EVENT(pDC->pContext->pBucketMgr, + FEEarlyRastEnter, + _mm_popcnt_u32(oneTileMask & triMask), + 0); Integer<SIMD_T> vShiftCntrl = EarlyRastHelper<SIMD_T>::InitShiftCntrl(); Integer<SIMD_T> vCwTris = SIMD_T::set1_epi32(cwTrisMask); @@ -639,7 +642,10 @@ uint32_t SIMDCALL EarlyRasterizer(SIMDBBOX_T<SIMD_T>& er_bbox, if (triMask ^ oldTriMask) { - RDTSC_EVENT(FEEarlyRastExit, _mm_popcnt_u32(triMask & oneTileMask), 0); + RDTSC_EVENT(pDC->pContext->pBucketMgr, + FEEarlyRastExit, + _mm_popcnt_u32(triMask & oneTileMask), + 0); } return triMask; } @@ -668,7 +674,7 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT* pDC, { const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx); - RDTSC_BEGIN(FEBinTriangles, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEBinTriangles, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -806,7 +812,10 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT* pDC, if (origTriMask ^ triMask) { - RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0); + RDTSC_EVENT(pDC->pContext->pBucketMgr, + FECullZeroAreaAndBackface, + _mm_popcnt_u32(origTriMask ^ triMask), + 0); } AR_EVENT(CullInfoEvent(pDC->drawId, cullZeroAreaMask, cullTris, origTriMask)); @@ -917,7 +926,10 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT* pDC, if (origTriMask ^ triMask) { - RDTSC_EVENT(FECullBetweenCenters, _mm_popcnt_u32(origTriMask ^ triMask), 0); + RDTSC_EVENT(pDC->pContext->pBucketMgr, + FECullBetweenCenters, + _mm_popcnt_u32(origTriMask ^ triMask), + 0); } } @@ -1017,7 +1029,7 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT* pDC, if (!triMask) { - RDTSC_END(FEBinTriangles, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1); return; } } @@ -1029,7 +1041,7 @@ endBinTriangles: if (!triMask) { - RDTSC_END(FEBinTriangles, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1); return; } @@ -1065,7 +1077,7 @@ endBinTriangles: BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>( pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx); - RDTSC_END(FEBinTriangles, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1); return; } else if (rastState.fillMode == SWR_FILLMODE_POINT) @@ -1078,7 +1090,7 @@ endBinTriangles: BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>( pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx); - RDTSC_END(FEBinTriangles, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1); return; } @@ -1194,7 +1206,7 @@ endBinTriangles: triMask &= ~(1 << triIndex); } - RDTSC_END(FEBinTriangles, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1); } template <typename CT> @@ -1274,7 +1286,7 @@ void BinPostSetupPointsImpl(DRAW_CONTEXT* pDC, Integer<SIMD_T> const& viewportIdx, Integer<SIMD_T> const& rtIdx) { - RDTSC_BEGIN(FEBinPoints, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEBinPoints, pDC->drawId); Vec4<SIMD_T>& primVerts = prim[0]; @@ -1572,7 +1584,7 @@ void BinPostSetupPointsImpl(DRAW_CONTEXT* pDC, } } - RDTSC_END(FEBinPoints, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEBinPoints, 1); } ////////////////////////////////////////////////////////////////////////// @@ -1674,7 +1686,7 @@ void BinPostSetupLinesImpl(DRAW_CONTEXT* pDC, { const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx); - RDTSC_BEGIN(FEBinLines, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEBinLines, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -1867,7 +1879,7 @@ void BinPostSetupLinesImpl(DRAW_CONTEXT* pDC, endBinLines: - RDTSC_END(FEBinLines, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEBinLines, 1); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp index 87be5bc119b..c399caf239b 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp @@ -185,10 +185,10 @@ void ClipRectangles(DRAW_CONTEXT* pDC, simdscalari const& viewportIdx, simdscalari const& rtIdx) { - RDTSC_BEGIN(FEClipRectangles, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipRectangles, pDC->drawId); Clipper<SIMD256, 3> clipper(workerId, pDC); clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - RDTSC_END(FEClipRectangles, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEClipRectangles, 1); } void ClipTriangles(DRAW_CONTEXT* pDC, @@ -200,10 +200,10 @@ void ClipTriangles(DRAW_CONTEXT* pDC, simdscalari const& viewportIdx, simdscalari const& rtIdx) { - RDTSC_BEGIN(FEClipTriangles, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipTriangles, pDC->drawId); Clipper<SIMD256, 3> clipper(workerId, pDC); clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - RDTSC_END(FEClipTriangles, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEClipTriangles, 1); } void ClipLines(DRAW_CONTEXT* pDC, @@ -215,10 +215,10 @@ void ClipLines(DRAW_CONTEXT* pDC, simdscalari const& viewportIdx, simdscalari const& rtIdx) { - RDTSC_BEGIN(FEClipLines, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipLines, pDC->drawId); Clipper<SIMD256, 2> clipper(workerId, pDC); clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - RDTSC_END(FEClipLines, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEClipLines, 1); } void ClipPoints(DRAW_CONTEXT* pDC, @@ -230,10 +230,10 @@ void ClipPoints(DRAW_CONTEXT* pDC, simdscalari const& viewportIdx, simdscalari const& rtIdx) { - RDTSC_BEGIN(FEClipPoints, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipPoints, pDC->drawId); Clipper<SIMD256, 1> clipper(workerId, pDC); clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - RDTSC_END(FEClipPoints, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEClipPoints, 1); } #if USE_SIMD16_FRONTEND @@ -246,7 +246,7 @@ void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT* pDC, simd16scalari const& viewportIdx, simd16scalari const& rtIdx) { - RDTSC_BEGIN(FEClipRectangles, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipRectangles, pDC->drawId); enum { @@ -258,7 +258,7 @@ void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT* pDC, pa.useAlternateOffset = false; clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - RDTSC_END(FEClipRectangles, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEClipRectangles, 1); } void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT* pDC, @@ -270,7 +270,7 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT* pDC, simd16scalari const& viewportIdx, simd16scalari const& rtIdx) { - RDTSC_BEGIN(FEClipTriangles, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipTriangles, pDC->drawId); enum { @@ -282,7 +282,7 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT* pDC, pa.useAlternateOffset = false; clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - RDTSC_END(FEClipTriangles, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEClipTriangles, 1); } void SIMDCALL ClipLines_simd16(DRAW_CONTEXT* pDC, @@ -294,7 +294,7 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT* pDC, simd16scalari const& viewportIdx, simd16scalari const& rtIdx) { - RDTSC_BEGIN(FEClipLines, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipLines, pDC->drawId); enum { @@ -306,7 +306,7 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT* pDC, pa.useAlternateOffset = false; clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - RDTSC_END(FEClipLines, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEClipLines, 1); } void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT* pDC, @@ -318,7 +318,7 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT* pDC, simd16scalari const& viewportIdx, simd16scalari const& rtIdx) { - RDTSC_BEGIN(FEClipPoints, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipPoints, pDC->drawId); enum { @@ -330,7 +330,7 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT* pDC, pa.useAlternateOffset = false; clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - RDTSC_END(FEClipPoints, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEClipPoints, 1); } #endif diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 33c16538fd9..1965274c82d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -781,7 +781,7 @@ public: if (clipMask) { - RDTSC_BEGIN(FEGuardbandClip, pa.pDC->drawId); + RDTSC_BEGIN(pa.pDC->pContext->pBucketMgr, FEGuardbandClip, pa.pDC->drawId); // we have to clip tris, execute the clipper, which will also // call the binner ClipSimd(prim, @@ -791,7 +791,7 @@ public: primId, viewportIdx, rtIdx); - RDTSC_END(FEGuardbandClip, 1); + RDTSC_END(pa.pDC->pContext->pBucketMgr, FEGuardbandClip, 1); } else if (validMask) { diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 8849e60c91f..d17baea94a0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -40,6 +40,7 @@ #include "core/fifo.hpp" #include "core/knobs.h" #include "common/intrin.h" +#include "common/rdtsc_buckets.h" #include "core/threads.h" #include "ringbuffer.h" #include "archrast/archrast.h" @@ -523,14 +524,14 @@ struct SWR_CONTEXT HotTileMgr* pHotTileMgr; // Callback functions, passed in at create context time - PFN_LOAD_TILE pfnLoadTile; - PFN_STORE_TILE pfnStoreTile; - PFN_TRANSLATE_GFXPTR_FOR_READ pfnTranslateGfxptrForRead; - PFN_TRANSLATE_GFXPTR_FOR_WRITE pfnTranslateGfxptrForWrite; - PFN_MAKE_GFXPTR pfnMakeGfxPtr; - PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset; - PFN_UPDATE_STATS pfnUpdateStats; - PFN_UPDATE_STATS_FE pfnUpdateStatsFE; + PFN_LOAD_TILE pfnLoadTile; + PFN_STORE_TILE pfnStoreTile; + PFN_TRANSLATE_GFXPTR_FOR_READ pfnTranslateGfxptrForRead; + PFN_TRANSLATE_GFXPTR_FOR_WRITE pfnTranslateGfxptrForWrite; + PFN_MAKE_GFXPTR pfnMakeGfxPtr; + PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset; + PFN_UPDATE_STATS pfnUpdateStats; + PFN_UPDATE_STATS_FE pfnUpdateStatsFE; // Global Stats @@ -550,6 +551,8 @@ struct SWR_CONTEXT // ArchRast thread contexts. HANDLE* pArContext; + + BucketManager *pBucketMgr; }; #define UPDATE_STAT_BE(name, count) \ @@ -568,11 +571,11 @@ struct SWR_CONTEXT #define AR_API_CTX pDC->pContext->pArContext[pContext->NumWorkerThreads] #ifdef KNOB_ENABLE_RDTSC -#define RDTSC_BEGIN(type, drawid) RDTSC_START(type) -#define RDTSC_END(type, count) RDTSC_STOP(type, count, 0) +#define RDTSC_BEGIN(pBucketMgr, type, drawid) RDTSC_START(pBucketMgr, type) +#define RDTSC_END(pBucketMgr, type, count) RDTSC_STOP(pBucketMgr, type, count, 0) #else -#define RDTSC_BEGIN(type, count) -#define RDTSC_END(type, count) +#define RDTSC_BEGIN(pBucketMgr, type, drawid) +#define RDTSC_END(pBucketMgr, type, count) #endif #ifdef KNOB_ENABLE_AR diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 85b2e8d54b0..5eda4d7d870 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -135,7 +135,7 @@ void ProcessClear(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, v /// @todo This should go away when we switch this to use compute threading. void ProcessStoreTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData) { - RDTSC_BEGIN(FEProcessStoreTiles, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, FEProcessStoreTiles, pDC->drawId); MacroTileMgr* pTileMgr = pDC->pTileMgr; STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData; @@ -160,7 +160,7 @@ void ProcessStoreTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t worker } } - RDTSC_END(FEProcessStoreTiles, 0); + RDTSC_END(pContext->pBucketMgr, FEProcessStoreTiles, 0); } ////////////////////////////////////////////////////////////////////////// @@ -175,7 +175,7 @@ void ProcessDiscardInvalidateTiles(SWR_CONTEXT* pContext, uint32_t workerId, void* pUserData) { - RDTSC_BEGIN(FEProcessInvalidateTiles, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, FEProcessInvalidateTiles, pDC->drawId); DISCARD_INVALIDATE_TILES_DESC* pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData; MacroTileMgr* pTileMgr = pDC->pTileMgr; @@ -214,7 +214,7 @@ void ProcessDiscardInvalidateTiles(SWR_CONTEXT* pContext, } } - RDTSC_END(FEProcessInvalidateTiles, 0); + RDTSC_END(pContext->pBucketMgr, FEProcessInvalidateTiles, 0); } ////////////////////////////////////////////////////////////////////////// @@ -518,7 +518,7 @@ static INLINE simd16scalari GenerateMask16(uint32_t numItemsRemaining) static void StreamOut( DRAW_CONTEXT* pDC, PA_STATE& pa, uint32_t workerId, uint32_t* pPrimData, uint32_t streamIndex) { - RDTSC_BEGIN(FEStreamout, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEStreamout, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_STREAMOUT_STATE& soState = state.soState; @@ -598,7 +598,7 @@ static void StreamOut( UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded); UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten); - RDTSC_END(FEStreamout, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEStreamout, 1); } #if USE_SIMD16_FRONTEND @@ -834,7 +834,7 @@ static void GeometryShaderStage(DRAW_CONTEXT* pDC, #endif simdscalari const& primID) { - RDTSC_BEGIN(FEGeometryShader, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEGeometryShader, pDC->drawId); void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; @@ -1178,7 +1178,7 @@ static void GeometryShaderStage(DRAW_CONTEXT* pDC, UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount); UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated); AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim * numInputPrims)); - RDTSC_END(FEGeometryShader, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEGeometryShader, 1); } ////////////////////////////////////////////////////////////////////////// @@ -1372,9 +1372,9 @@ static void TessellationStages(DRAW_CONTEXT* pDC, hsContext.mask = GenerateMask(numPrims); // Run the HS - RDTSC_BEGIN(FEHullShader, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEHullShader, pDC->drawId); state.pfnHsFunc(GetPrivateState(pDC), pWorkerData, &hsContext); - RDTSC_END(FEHullShader, 0); + RDTSC_END(pDC->pContext->pBucketMgr, FEHullShader, 0); UPDATE_STAT_FE(HsInvocations, numPrims); AR_EVENT(HSStats((HANDLE)&hsContext.stats)); @@ -1385,10 +1385,10 @@ static void TessellationStages(DRAW_CONTEXT* pDC, { // Run Tessellator SWR_TS_TESSELLATED_DATA tsData = {0}; - RDTSC_BEGIN(FETessellation, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FETessellation, pDC->drawId); TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData); AR_EVENT(TessPrimCount(1)); - RDTSC_END(FETessellation, 0); + RDTSC_END(pDC->pContext->pBucketMgr, FETessellation, 0); if (tsData.NumPrimitives == 0) { @@ -1441,9 +1441,9 @@ static void TessellationStages(DRAW_CONTEXT* pDC, { dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations); - RDTSC_BEGIN(FEDomainShader, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEDomainShader, pDC->drawId); state.pfnDsFunc(GetPrivateState(pDC), pWorkerData, &dsContext); - RDTSC_END(FEDomainShader, 0); + RDTSC_END(pDC->pContext->pBucketMgr, FEDomainShader, 0); AR_EVENT(DSStats((HANDLE)&dsContext.stats)); @@ -1524,14 +1524,14 @@ static void TessellationStages(DRAW_CONTEXT* pDC, #else simdvector prim[3]; // Only deal with triangles, lines, or points #endif - RDTSC_BEGIN(FEPAAssemble, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEPAAssemble, pDC->drawId); bool assemble = #if USE_SIMD16_FRONTEND tessPa.Assemble(VERTEX_POSITION_SLOT, prim_simd16); #else tessPa.Assemble(VERTEX_POSITION_SLOT, prim); #endif - RDTSC_END(FEPAAssemble, 1); + RDTSC_END(pDC->pContext->pBucketMgr, FEPAAssemble, 1); SWR_ASSERT(assemble); SWR_ASSERT(pfnClipFunc); @@ -1663,7 +1663,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo } #endif - RDTSC_BEGIN(FEProcessDraw, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, FEProcessDraw, pDC->drawId); void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; @@ -1895,7 +1895,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo #endif } // 1. Execute FS/VS for a single SIMD. - RDTSC_BEGIN(FEFetchShader, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, FEFetchShader, pDC->drawId); #if USE_SIMD16_SHADERS state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin); #else @@ -1906,7 +1906,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_hi, vin_hi); } #endif - RDTSC_END(FEFetchShader, 0); + RDTSC_END(pContext->pBucketMgr, FEFetchShader, 0); // forward fetch generated vertex IDs to the vertex shader #if USE_SIMD16_SHADERS @@ -1950,7 +1950,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo if (!KNOB_TOSS_FETCH) #endif { - RDTSC_BEGIN(FEVertexShader, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, FEVertexShader, pDC->drawId); #if USE_SIMD16_VS state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo); AR_EVENT(VSStats((HANDLE)&vsContext_lo.stats)); @@ -1964,7 +1964,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo AR_EVENT(VSStats((HANDLE)&vsContext_hi.stats)); } #endif - RDTSC_END(FEVertexShader, 0); + RDTSC_END(pContext->pBucketMgr, FEVertexShader, 0); UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); } @@ -1975,9 +1975,9 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo { simd16vector prim_simd16[MAX_NUM_VERTS_PER_PRIM]; - RDTSC_START(FEPAAssemble); + RDTSC_START(pContext->pBucketMgr, FEPAAssemble); bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim_simd16); - RDTSC_STOP(FEPAAssemble, 1, 0); + RDTSC_STOP(pContext->pBucketMgr, FEPAAssemble, 1, 0); #if KNOB_ENABLE_TOSS_POINTS if (!KNOB_TOSS_FETCH) @@ -2190,9 +2190,9 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo if (i < endVertex) { // 1. Execute FS/VS for a single SIMD. - RDTSC_BEGIN(FEFetchShader, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, FEFetchShader, pDC->drawId); state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo, vout); - RDTSC_END(FEFetchShader, 0); + RDTSC_END(pContext->pBucketMgr, FEFetchShader, 0); // forward fetch generated vertex IDs to the vertex shader vsContext.VertexID = fetchInfo.VertexID; @@ -2212,9 +2212,9 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo if (!KNOB_TOSS_FETCH) #endif { - RDTSC_BEGIN(FEVertexShader, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, FEVertexShader, pDC->drawId); state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext); - RDTSC_END(FEVertexShader, 0); + RDTSC_END(pContext->pBucketMgr, FEVertexShader, 0); UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); AR_EVENT(VSStats((HANDLE)&vsContext.stats)); @@ -2226,9 +2226,9 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo { simdvector prim[MAX_NUM_VERTS_PER_PRIM]; // PaAssemble returns false if there is not enough verts to assemble. - RDTSC_BEGIN(FEPAAssemble, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, FEPAAssemble, pDC->drawId); bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim); - RDTSC_END(FEPAAssemble, 1); + RDTSC_END(pContext->pBucketMgr, FEPAAssemble, 1); #if KNOB_ENABLE_TOSS_POINTS if (!KNOB_TOSS_FETCH) @@ -2339,7 +2339,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo #endif - RDTSC_END(FEProcessDraw, numPrims * work.numInstances); + RDTSC_END(pContext->pBucketMgr, FEProcessDraw, numPrims * work.numInstances); } struct FEDrawChooser diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp index a392035700d..44c486c80bf 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp @@ -53,7 +53,7 @@ void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, voi #endif // bloat line to two tris and call the triangle rasterizer twice - RDTSC_BEGIN(BERasterizeLine, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeLine, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -245,7 +245,7 @@ void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, voi pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc); } - RDTSC_BEGIN(BERasterizeLine, 1); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeLine, 1); } void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData) @@ -308,9 +308,9 @@ void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTi renderBuffers, triDesc.triFlags.renderTargetArrayIndex); - RDTSC_BEGIN(BEPixelBackend, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelBackend, pDC->drawId); backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers); - RDTSC_END(BEPixelBackend, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEPixelBackend, 0); } void RasterizeTriPoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData) diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h index 20206eaaaf5..4a0fd0934b0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h @@ -842,10 +842,10 @@ struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT> } // not trivial accept or reject, must rasterize full tile - RDTSC_BEGIN(BERasterizePartial, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizePartial, pDC->drawId); innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>( pDC, startQuadEdgesAdj, pRastEdges); - RDTSC_END(BERasterizePartial, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BERasterizePartial, 0); } }; @@ -927,8 +927,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, return; } #endif - RDTSC_BEGIN(BERasterizeTriangle, pDC->drawId); - RDTSC_BEGIN(BETriangleSetup, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeTriangle, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BETriangleSetup, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -1103,7 +1103,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0); - RDTSC_END(BETriangleSetup, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BETriangleSetup, 0); // update triangle desc uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT); @@ -1115,12 +1115,12 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, if (numTilesX == 0 || numTilesY == 0) { - RDTSC_EVENT(BEEmptyTriangle, 1, 0); - RDTSC_END(BERasterizeTriangle, 1); + RDTSC_EVENT(pDC->pContext->pBucketMgr, BEEmptyTriangle, 1, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BERasterizeTriangle, 1); return; } - RDTSC_BEGIN(BEStepSetup, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEStepSetup, pDC->drawId); // Step to pixel center of top-left pixel of the triangle bbox // Align intersect bbox (top/left) to raster tile's (top/left). @@ -1232,7 +1232,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, } } - RDTSC_END(BEStepSetup, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEStepSetup, 0); uint32_t tY = minTileY; uint32_t tX = minTileX; @@ -1297,7 +1297,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, { triDesc.innerCoverageMask = 0xffffffffffffffffULL; } - RDTSC_EVENT(BETrivialAccept, 1, 0); + RDTSC_EVENT(pDC->pContext->pBucketMgr, BETrivialAccept, 1, 0); } else { @@ -1339,11 +1339,11 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, } // not trivial accept or reject, must rasterize full tile - RDTSC_BEGIN(BERasterizePartial, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizePartial, pDC->drawId); triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>( pDC, startQuadEdges, rastEdges); - RDTSC_END(BERasterizePartial, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BERasterizePartial, 0); triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum]; @@ -1362,7 +1362,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, { triDesc.coverageMask[sampleNum] = 0; } - RDTSC_EVENT(BETrivialReject, 1, 0); + RDTSC_EVENT(pDC->pContext->pBucketMgr, BETrivialReject, 1, 0); } } @@ -1389,14 +1389,14 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, // Track rasterized subspans AR_EVENT(RasterTileCount(pDC->drawId, 1)); - RDTSC_BEGIN(BEPixelBackend, pDC->drawId); + RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelBackend, pDC->drawId); backendFuncs.pfnBackend(pDC, workerId, tileX << KNOB_TILE_X_DIM_SHIFT, tileY << KNOB_TILE_Y_DIM_SHIFT, triDesc, renderBuffers); - RDTSC_END(BEPixelBackend, 0); + RDTSC_END(pDC->pContext->pBucketMgr, BEPixelBackend, 0); } // step to the next tile in X @@ -1417,7 +1417,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, StepRasterTileY<RT>(state.colorHottileEnable, renderBuffers, currentRenderBufferRow); } - RDTSC_END(BERasterizeTriangle, 1); + RDTSC_END(pDC->pContext->pBucketMgr, BERasterizeTriangle, 1); } // Get pointers to hot tile memory for color RT, depth, stencil diff --git a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp index e858a7d599e..6329b2ec98e 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp @@ -92,9 +92,3 @@ BUCKET_DESC gCoreBuckets[] = { static_assert(NumBuckets == (sizeof(gCoreBuckets) / sizeof(gCoreBuckets[0])), "RDTSC Bucket enum and description table size mismatched."); -/// @todo bucketmanager and mapping should probably be a part of the SWR context -std::vector<uint32_t> gBucketMap; -BucketManager gBucketMgr; - -uint32_t gCurrentFrame = 0; -bool gBucketsInitialized = false; diff --git a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h index dc20e5be98d..0228275bd47 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h +++ b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h @@ -100,90 +100,86 @@ enum CORE_BUCKETS NumBuckets }; -void rdtscReset(); -void rdtscInit(int threadId); -void rdtscStart(uint32_t bucketId); -void rdtscStop(uint32_t bucketId, uint32_t count, uint64_t drawId); -void rdtscEvent(uint32_t bucketId, uint32_t count1, uint32_t count2); -void rdtscEndFrame(); +void rdtscReset(BucketManager* pBucketMgr); +void rdtscInit(BucketManager* pBucketMgr, int threadId); +void rdtscStart(BucketManager* pBucketMgr, uint32_t bucketId); +void rdtscStop(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count, uint64_t drawId); +void rdtscEvent(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count1, uint32_t count2); +void rdtscEndFrame(BucketManager* pBucketMgr); #ifdef KNOB_ENABLE_RDTSC -#define RDTSC_RESET() rdtscReset() -#define RDTSC_INIT(threadId) rdtscInit(threadId) -#define RDTSC_START(bucket) rdtscStart(bucket) -#define RDTSC_STOP(bucket, count, draw) rdtscStop(bucket, count, draw) -#define RDTSC_EVENT(bucket, count1, count2) rdtscEvent(bucket, count1, count2) -#define RDTSC_ENDFRAME() rdtscEndFrame() +#define RDTSC_RESET(pBucketMgr) rdtscReset(pBucketMgr) +#define RDTSC_INIT(pBucketMgr, threadId) rdtscInit(pBucketMgr,threadId) +#define RDTSC_START(pBucketMgr, bucket) rdtscStart(pBucketMgr, bucket) +#define RDTSC_STOP(pBucketMgr, bucket, count, draw) rdtscStop(pBucketMgr, bucket, count, draw) +#define RDTSC_EVENT(pBucketMgr, bucket, count1, count2) rdtscEvent(pBucketMgr, bucket, count1, count2) +#define RDTSC_ENDFRAME(pBucketMgr) rdtscEndFrame(pBucketMgr) #else -#define RDTSC_RESET() -#define RDTSC_INIT(threadId) -#define RDTSC_START(bucket) -#define RDTSC_STOP(bucket, count, draw) -#define RDTSC_EVENT(bucket, count1, count2) -#define RDTSC_ENDFRAME() +#define RDTSC_RESET(pBucketMgr) +#define RDTSC_INIT(pBucketMgr, threadId) +#define RDTSC_START(pBucketMgr, bucket) +#define RDTSC_STOP(pBucketMgr, bucket, count, draw) +#define RDTSC_EVENT(pBucketMgr, bucket, count1, count2) +#define RDTSC_ENDFRAME(pBucketMgr) #endif -extern std::vector<uint32_t> gBucketMap; -extern BucketManager gBucketMgr; extern BUCKET_DESC gCoreBuckets[]; -extern uint32_t gCurrentFrame; -extern bool gBucketsInitialized; -INLINE void rdtscReset() +INLINE void rdtscReset(BucketManager *pBucketMgr) { - gCurrentFrame = 0; - gBucketMgr.ClearThreads(); + pBucketMgr->mCurrentFrame = 0; + pBucketMgr->ClearThreads(); } -INLINE void rdtscInit(int threadId) +INLINE void rdtscInit(BucketManager* pBucketMgr, int threadId) { // register all the buckets once - if (!gBucketsInitialized && (threadId == 0)) + if (!pBucketMgr->mBucketsInitialized && (threadId == 0)) { - gBucketMap.resize(NumBuckets); + pBucketMgr->mBucketMap.resize(NumBuckets); for (uint32_t i = 0; i < NumBuckets; ++i) { - gBucketMap[i] = gBucketMgr.RegisterBucket(gCoreBuckets[i]); + pBucketMgr->mBucketMap[i] = pBucketMgr->RegisterBucket(gCoreBuckets[i]); } - gBucketsInitialized = true; + pBucketMgr->mBucketsInitialized = true; } std::string name = threadId == 0 ? "API" : "WORKER"; - gBucketMgr.RegisterThread(name); + pBucketMgr->RegisterThread(name); } -INLINE void rdtscStart(uint32_t bucketId) +INLINE void rdtscStart(BucketManager* pBucketMgr, uint32_t bucketId) { - uint32_t id = gBucketMap[bucketId]; - gBucketMgr.StartBucket(id); + uint32_t id = pBucketMgr->mBucketMap[bucketId]; + pBucketMgr->StartBucket(id); } -INLINE void rdtscStop(uint32_t bucketId, uint32_t count, uint64_t drawId) +INLINE void rdtscStop(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count, uint64_t drawId) { - uint32_t id = gBucketMap[bucketId]; - gBucketMgr.StopBucket(id); + uint32_t id = pBucketMgr->mBucketMap[bucketId]; + pBucketMgr->StopBucket(id); } -INLINE void rdtscEvent(uint32_t bucketId, uint32_t count1, uint32_t count2) +INLINE void rdtscEvent(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count1, uint32_t count2) { - uint32_t id = gBucketMap[bucketId]; - gBucketMgr.AddEvent(id, count1); + uint32_t id = pBucketMgr->mBucketMap[bucketId]; + pBucketMgr->AddEvent(id, count1); } -INLINE void rdtscEndFrame() +INLINE void rdtscEndFrame(BucketManager* pBucketMgr) { - gCurrentFrame++; + pBucketMgr->mCurrentFrame++; - if (gCurrentFrame == KNOB_BUCKETS_START_FRAME && + if (pBucketMgr->mCurrentFrame == KNOB_BUCKETS_START_FRAME && KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME) { - gBucketMgr.StartCapture(); + pBucketMgr->StartCapture(); } - if (gCurrentFrame == KNOB_BUCKETS_END_FRAME && + if (pBucketMgr->mCurrentFrame == KNOB_BUCKETS_END_FRAME && KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME) { - gBucketMgr.StopCapture(); - gBucketMgr.PrintReport("rdtsc.txt"); + pBucketMgr->StopCapture(); + pBucketMgr->PrintReport("rdtsc.txt"); } } diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index e85144c2582..b6734e2ad0f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -31,6 +31,7 @@ #include "common/formats.h" #include "common/intrin.h" +#include "common/rdtsc_buckets.h" #include <functional> #include <algorithm> @@ -381,6 +382,8 @@ struct SWR_PS_CONTEXT uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast. + + BucketManager *pBucketManager; // @llvm_struct - IN: performance buckets. }; ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index e30c1170568..59e37a488a4 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -609,7 +609,7 @@ bool WorkOnFifoBE(SWR_CONTEXT* pContext, { BE_WORK* pWork; - RDTSC_BEGIN(WorkerFoundWork, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, WorkerFoundWork, pDC->drawId); uint32_t numWorkItems = tile->getNumQueued(); SWR_ASSERT(numWorkItems); @@ -630,7 +630,7 @@ bool WorkOnFifoBE(SWR_CONTEXT* pContext, pWork->pfnWork(pDC, workerId, tileID, &pWork->desc); tile->dequeue(); } - RDTSC_END(WorkerFoundWork, numWorkItems); + RDTSC_END(pContext->pBucketMgr, WorkerFoundWork, numWorkItems); _ReadWriteBarrier(); @@ -868,7 +868,7 @@ DWORD workerThreadMain(LPVOID pData) SetCurrentThreadName(threadName); } - RDTSC_INIT(threadId); + RDTSC_INIT(pContext->pBucketMgr, threadId); // Only need offset numa index from base for correct masking uint32_t numaNode = pThreadData->numaId - pContext->threadInfo.BASE_NUMA_NODE; @@ -936,10 +936,10 @@ DWORD workerThreadMain(LPVOID pData) if (IsBEThread) { - RDTSC_BEGIN(WorkerWorkOnFifoBE, 0); + RDTSC_BEGIN(pContext->pBucketMgr, WorkerWorkOnFifoBE, 0); bShutdown |= WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask); - RDTSC_END(WorkerWorkOnFifoBE, 0); + RDTSC_END(pContext->pBucketMgr, WorkerWorkOnFifoBE, 0); WorkOnCompute(pContext, workerId, curDrawBE); } diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp index 1ea1c4b1a6f..13f4e37f027 100644 --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp @@ -368,7 +368,7 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, if (pHotTile->state == HOTTILE_INVALID) { - RDTSC_BEGIN(BELoadTiles, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, @@ -379,15 +379,15 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; - RDTSC_END(BELoadTiles, 0); + RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { - RDTSC_BEGIN(BELoadTiles, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId); // Clear the tile. ClearColorHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; - RDTSC_END(BELoadTiles, 0); + RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0); } colorHottileEnableMask &= ~(1 << rtSlot); } @@ -399,7 +399,7 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { - RDTSC_BEGIN(BELoadTiles, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, @@ -410,15 +410,15 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; - RDTSC_END(BELoadTiles, 0); + RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { - RDTSC_BEGIN(BELoadTiles, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId); // Clear the tile. ClearDepthHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; - RDTSC_END(BELoadTiles, 0); + RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0); } } @@ -429,7 +429,7 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { - RDTSC_BEGIN(BELoadTiles, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, @@ -440,15 +440,15 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; - RDTSC_END(BELoadTiles, 0); + RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { - RDTSC_BEGIN(BELoadTiles, pDC->drawId); + RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId); // Clear the tile. ClearStencilHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; - RDTSC_END(BELoadTiles, 0); + RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0); } } } diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index b3e67b9c18f..1975e11a9b5 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -489,8 +489,8 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) ctx->max_draws_in_flight = KNOB_MAX_DRAWS_IN_FLIGHT; - SWR_CREATECONTEXT_INFO createInfo; - memset(&createInfo, 0, sizeof(createInfo)); + SWR_CREATECONTEXT_INFO createInfo {0}; + createInfo.privateStateSize = sizeof(swr_draw_context); createInfo.pfnLoadTile = swr_LoadHotTile; createInfo.pfnStoreTile = swr_StoreHotTile; |