summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2016-09-06 12:36:02 -0500
committerTim Rowley <[email protected]>2016-09-19 20:10:19 -0500
commit2f86a9577adf5c43e892f899224d0f73ff1d37c2 (patch)
treecbf7087434879fcfc4ae269cbeb5015ca3cdfc36 /src/gallium
parent04026b43c89c6fdb794650f8c80e356707cc6d69 (diff)
swr: [rasterizer core] Add macros for mapping ArchRast to buckets
Switch all RDTSC_START/STOP macros to use AR_BEGIN/END macros. Signed-off-by: Tim Rowley <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp56
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.cpp154
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.h18
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.cpp15
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.h8
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h36
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp74
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp46
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp14
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp26
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.h2
11 files changed, 249 insertions, 200 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 6bdb8f4b410..df87d14ca3e 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -46,8 +46,6 @@
#include "common/simdintrin.h"
#include "common/os.h"
-#include "archrast/archrast.h"
-
static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y };
void SetupDefaultState(SWR_CONTEXT *pContext);
@@ -264,9 +262,9 @@ void QueueWork(SWR_CONTEXT *pContext)
}
else
{
- RDTSC_START(APIDrawWakeAllThreads);
+ AR_API_BEGIN(APIDrawWakeAllThreads, pDC->drawId);
WakeAllThreads(pContext);
- RDTSC_STOP(APIDrawWakeAllThreads, 1, 0);
+ AR_API_END(APIDrawWakeAllThreads, 1);
}
// Set current draw context to NULL so that next state call forces a new draw context to be created and populated.
@@ -286,7 +284,7 @@ INLINE void QueueDispatch(SWR_CONTEXT* pContext)
DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
{
- RDTSC_START(APIGetDrawContext);
+ AR_API_BEGIN(APIGetDrawContext, 0);
// If current draw context is null then need to obtain a new draw context to use from ring.
if (pContext->pCurDrawContext == nullptr)
{
@@ -372,7 +370,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC");
}
- RDTSC_STOP(APIGetDrawContext, 0, 0);
+ AR_API_END(APIGetDrawContext, 0);
return pContext->pCurDrawContext;
}
@@ -418,13 +416,13 @@ void SetupDefaultState(SWR_CONTEXT *pContext)
void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint64_t userData2, uint64_t userData3)
{
- RDTSC_START(APISync);
-
SWR_ASSERT(pfnFunc != nullptr);
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ AR_API_BEGIN(APISync, 0);
+
pDC->FeWork.type = SYNC;
pDC->FeWork.pfnWork = ProcessSync;
@@ -437,35 +435,35 @@ void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint
//enqueue
QueueDraw(pContext);
- RDTSC_STOP(APISync, 1, 0);
+ AR_API_END(APISync, 1);
}
void SwrWaitForIdle(HANDLE hContext)
{
SWR_CONTEXT *pContext = GetContext(hContext);
- RDTSC_START(APIWaitForIdle);
+ AR_API_BEGIN(APIWaitForIdle, 0);
while (!pContext->dcRing.IsEmpty())
{
_mm_pause();
}
- RDTSC_STOP(APIWaitForIdle, 1, 0);
+ AR_API_END(APIWaitForIdle, 1);
}
void SwrWaitForIdleFE(HANDLE hContext)
{
SWR_CONTEXT *pContext = GetContext(hContext);
- RDTSC_START(APIWaitForIdle);
+ AR_API_BEGIN(APIWaitForIdle, 0);
while (pContext->drawsOutstandingFE > 0)
{
_mm_pause();
}
- RDTSC_STOP(APIWaitForIdle, 1, 0);
+ AR_API_END(APIWaitForIdle, 1);
}
void SwrSetVertexBuffers(
@@ -1080,11 +1078,11 @@ void DrawInstanced(
return;
}
- RDTSC_START(APIDraw);
-
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ AR_API_BEGIN(APIDraw, pDC->drawId);
+
uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw);
uint32_t remainingVerts = numVertices;
@@ -1139,7 +1137,7 @@ void DrawInstanced(
pDC = GetDrawContext(pContext);
pDC->pState->state.rastState.cullMode = oldCullMode;
- RDTSC_STOP(APIDraw, numVertices * numInstances, 0);
+ AR_API_END(APIDraw, numVertices * numInstances);
}
//////////////////////////////////////////////////////////////////////////
@@ -1200,14 +1198,12 @@ void DrawIndexedInstance(
return;
}
- RDTSC_START(APIDrawIndexed);
-
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
API_STATE* pState = &pDC->pState->state;
- AR_BEGIN(AR_API_CTX, APIDrawIndexed, pDC->drawId);
- AR_EVENT(AR_API_CTX, DrawIndexedInstance(topology, numIndices, indexOffset, baseVertex, numInstances, startInstance));
+ AR_API_BEGIN(APIDrawIndexed, pDC->drawId);
+ AR_API_EVENT(DrawIndexedInstance(topology, numIndices, indexOffset, baseVertex, numInstances, startInstance));
uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw);
@@ -1280,8 +1276,7 @@ void DrawIndexedInstance(
pDC = GetDrawContext(pContext);
pDC->pState->state.rastState.cullMode = oldCullMode;
- AR_END(AR_API_CTX, APIDrawIndexed, numIndices * numInstances);
- RDTSC_STOP(APIDrawIndexed, numIndices * numInstances, 0);
+ AR_API_END(APIDrawIndexed, numIndices * numInstances);
}
@@ -1406,10 +1401,11 @@ void SwrDispatch(
return;
}
- RDTSC_START(APIDispatch);
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ AR_API_BEGIN(APIDispatch, pDC->drawId);
+
pDC->isCompute = true; // This is a compute context.
COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64);
@@ -1424,7 +1420,7 @@ void SwrDispatch(
pDC->pDispatch->initialize(totalThreadGroups, pTaskData);
QueueDispatch(pContext);
- RDTSC_STOP(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ, 0);
+ AR_API_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ);
}
// Deswizzles, converts and stores current contents of the hot tiles to surface
@@ -1440,11 +1436,11 @@ void SWR_API SwrStoreTiles(
return;
}
- RDTSC_START(APIStoreTiles);
-
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ AR_API_BEGIN(APIStoreTiles, pDC->drawId);
+
pDC->FeWork.type = STORETILES;
pDC->FeWork.pfnWork = ProcessStoreTiles;
pDC->FeWork.desc.storeTiles.attachment = attachment;
@@ -1455,7 +1451,7 @@ void SWR_API SwrStoreTiles(
//enqueue
QueueDraw(pContext);
- RDTSC_STOP(APIStoreTiles, 0, 0);
+ AR_API_END(APIStoreTiles, 1);
}
//////////////////////////////////////////////////////////////////////////
@@ -1479,11 +1475,11 @@ void SWR_API SwrClearRenderTarget(
return;
}
- RDTSC_START(APIClearRenderTarget);
-
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ AR_API_BEGIN(APIClearRenderTarget, pDC->drawId);
+
CLEAR_FLAGS flags;
flags.bits = 0;
flags.mask = clearMask;
@@ -1503,7 +1499,7 @@ void SWR_API SwrClearRenderTarget(
// enqueue draw
QueueDraw(pContext);
- RDTSC_STOP(APIClearRenderTarget, 0, pDC->drawId);
+ AR_API_END(APIClearRenderTarget, 1);
}
//////////////////////////////////////////////////////////////////////////
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index 0e92ccf2c88..d3d114ecdb0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -47,10 +47,10 @@ static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
/// @param threadGroupId - the linear index for the thread group within the dispatch.
void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
{
- RDTSC_START(BEDispatch);
-
SWR_CONTEXT *pContext = pDC->pContext;
+ AR_BEGIN(BEDispatch, pDC->drawId);
+
const COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pDispatch->GetTasksData();
SWR_ASSERT(pTaskData != nullptr);
@@ -75,7 +75,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
UPDATE_STAT(CsInvocations, state.totalThreadsInGroup);
- RDTSC_STOP(BEDispatch, 1, 0);
+ AR_END(BEDispatch, 1);
}
void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
@@ -180,16 +180,17 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
+
if (KNOB_FAST_CLEAR)
{
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
- SWR_CONTEXT *pContext = pDC->pContext;
SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
uint32_t numSamples = GetNumSamples(sampleCount);
SWR_ASSERT(pClear->flags.bits != 0); // shouldn't be here without a reason.
- RDTSC_START(BEClear);
+ AR_BEGIN(BEClear, pDC->drawId);
if (pClear->flags.mask & SWR_CLEAR_COLOR)
{
@@ -217,13 +218,13 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
pHotTile->state = HOTTILE_CLEAR;
}
- RDTSC_STOP(BEClear, 0, 0);
+ AR_END(BEClear, 1);
}
else
{
// Legacy clear
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
- RDTSC_START(BEClear);
+ AR_BEGIN(BEClear, pDC->drawId);
if (pClear->flags.mask & SWR_CLEAR_COLOR)
{
@@ -265,17 +266,18 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, clearData, pClear->rect);
}
- RDTSC_STOP(BEClear, 0, 0);
+ AR_END(BEClear, 1);
}
}
void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
{
- RDTSC_START(BEStoreTiles);
STORE_TILES_DESC *pDesc = (STORE_TILES_DESC*)pData;
SWR_CONTEXT *pContext = pDC->pContext;
+ AR_BEGIN(BEStoreTiles, pDC->drawId);
+
#ifdef KNOB_ENABLE_RDTSC
uint32_t numTiles = 0;
#endif
@@ -326,7 +328,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
pHotTile->state = (HOTTILE_STATE)pDesc->postStoreTileState;
}
}
- RDTSC_STOP(BEStoreTiles, numTiles, pDC->drawId);
+ AR_END(BEStoreTiles, numTiles);
}
@@ -387,8 +389,10 @@ simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscala
template<typename T>
void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
- RDTSC_START(BESingleSampleBackend);
- RDTSC_START(BESetup);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(BESingleSampleBackend, pDC->drawId);
+ AR_BEGIN(BESetup, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@@ -423,7 +427,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
pColorBase[rt] = renderBuffers.pColor[rt];
}
uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
- RDTSC_STOP(BESetup, 0, 0);
+ AR_END(BESetup, 1);
SWR_PS_CONTEXT psContext;
psContext.pAttribs = work.pAttribs;
@@ -462,7 +466,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, pBlendState->sampleMask);
}
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
// for 1x case, centroid is pixel center
@@ -475,7 +479,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
// interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 1);
simdmask clipCoverageMask = coverageMask & MASK;
// interpolate user clip distance if available
@@ -492,10 +496,10 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
// Early-Z?
if(T::bCanEarlyZ)
{
- RDTSC_START(BEEarlyDepthTest);
+ AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
- RDTSC_STOP(BEEarlyDepthTest, 0, 0);
+ AR_END(BEEarlyDepthTest, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
if(pPSState->forceEarlyZ || !_simd_movemask_ps(depthPassMask))
@@ -514,20 +518,20 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
psContext.activeMask = _simd_castps_si(vCoverageMask);
// execute pixel shader
- RDTSC_START(BEPixelShader);
+ AR_BEGIN(BEPixelShader, pDC->drawId);
UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
- RDTSC_STOP(BEPixelShader, 0, 0);
+ AR_END(BEPixelShader, 0);
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
// late-Z
if(!T::bCanEarlyZ)
{
- RDTSC_START(BELateDepthTest);
+ AR_BEGIN(BELateDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
- RDTSC_STOP(BELateDepthTest, 0, 0);
+ AR_END(BELateDepthTest, 0);
if(!_simd_movemask_ps(depthPassMask))
{
@@ -543,7 +547,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
UPDATE_STAT(DepthPassCount, statCount);
// output merger
- RDTSC_START(BEOutputMerger);
+ AR_BEGIN(BEOutputMerger, pDC->drawId);
OutputMerger(psContext, pColorBase, 0, pBlendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, pPSState->numRenderTargets);
// do final depth write after all pixel kills
@@ -552,11 +556,11 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthBase, depthPassMask, vCoverageMask, pStencilBase, stencilPassMask);
}
- RDTSC_STOP(BEOutputMerger, 0, 0);
+ AR_END(BEOutputMerger, 0);
}
Endtile:
- RDTSC_START(BEEndTile);
+ AR_BEGIN(BEEndTile, pDC->drawId);
coverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
{
@@ -569,17 +573,19 @@ Endtile:
{
pColorBase[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
- RDTSC_STOP(BEEndTile, 0, 0);
+ AR_END(BEEndTile, 0);
}
}
- RDTSC_STOP(BESingleSampleBackend, 0, 0);
+ AR_END(BESingleSampleBackend, 0);
}
template<typename T>
void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
- RDTSC_START(BESampleRateBackend);
- RDTSC_START(BESetup);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(BESampleRateBackend, pDC->drawId);
+ AR_BEGIN(BESetup, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@@ -613,7 +619,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
pColorBase[rt] = renderBuffers.pColor[rt];
}
uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
- RDTSC_STOP(BESetup, 0, 0);
+ AR_END(BESetup, 0);
SWR_PS_CONTEXT psContext;
psContext.pAttribs = work.pAttribs;
@@ -643,9 +649,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
// pixel center
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx));
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
if(T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
{
@@ -657,7 +663,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
if(T::bCentroidPos)
{
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
if(T::bIsStandardPattern)
{
CalcCentroidPos<T>(psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
@@ -668,7 +674,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
psContext.vY.centroid = _simd_add_ps(psContext.vY.UL, _simd_set1_ps(0.5f));
}
CalcCentroidBarycentrics(coeffs, psContext, psContext.vX.UL, psContext.vY.UL);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
}
else
{
@@ -681,7 +687,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
simdmask coverageMask = work.coverageMask[sample] & MASK;
if (coverageMask)
{
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
@@ -691,7 +697,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
// interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
// interpolate user clip distance if available
if (rastState.clipDistanceMask)
@@ -711,10 +717,10 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
// Early-Z?
if (T::bCanEarlyZ)
{
- RDTSC_START(BEEarlyDepthTest);
+ AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
- RDTSC_STOP(BEEarlyDepthTest, 0, 0);
+ AR_END(BEEarlyDepthTest, 0);
// early-exit if no samples passed depth or earlyZ is forced on.
if (pPSState->forceEarlyZ || !_simd_movemask_ps(depthPassMask))
@@ -734,20 +740,20 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
psContext.activeMask = _simd_castps_si(vCoverageMask);
// execute pixel shader
- RDTSC_START(BEPixelShader);
+ AR_BEGIN(BEPixelShader, pDC->drawId);
UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
- RDTSC_STOP(BEPixelShader, 0, 0);
+ AR_END(BEPixelShader, 0);
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
// late-Z
if (!T::bCanEarlyZ)
{
- RDTSC_START(BELateDepthTest);
+ AR_BEGIN(BELateDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
- RDTSC_STOP(BELateDepthTest, 0, 0);
+ AR_END(BELateDepthTest, 0);
if (!_simd_movemask_ps(depthPassMask))
{
@@ -765,7 +771,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
UPDATE_STAT(DepthPassCount, statCount);
// output merger
- RDTSC_START(BEOutputMerger);
+ AR_BEGIN(BEOutputMerger, pDC->drawId);
OutputMerger(psContext, pColorBase, sample, pBlendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, pPSState->numRenderTargets);
// do final depth write after all pixel kills
@@ -774,11 +780,11 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
}
- RDTSC_STOP(BEOutputMerger, 0, 0);
+ AR_END(BEOutputMerger, 0);
}
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
- RDTSC_START(BEEndTile);
+ AR_BEGIN(BEEndTile, pDC->drawId);
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
{
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
@@ -790,17 +796,19 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
{
pColorBase[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
- RDTSC_STOP(BEEndTile, 0, 0);
+ AR_END(BEEndTile, 0);
}
}
- RDTSC_STOP(BESampleRateBackend, 0, 0);
+ AR_END(BESampleRateBackend, 0);
}
template<typename T>
void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
- RDTSC_START(BEPixelRateBackend);
- RDTSC_START(BESetup);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(BEPixelRateBackend, pDC->drawId);
+ AR_BEGIN(BESetup, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@@ -834,7 +842,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
pColorBase[rt] = renderBuffers.pColor[rt];
}
uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
- RDTSC_STOP(BESetup, 0, 0);
+ AR_END(BESetup, 0);
SWR_PS_CONTEXT psContext;
psContext.pAttribs = work.pAttribs;
@@ -852,7 +860,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
psContext.sampleIndex = 0;
- PixelRateZTestLoop<T> PixelRateZTest(pDC, work, coeffs, state, pDepthBase, pStencilBase, rastState.clipDistanceMask);
+ PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBase, pStencilBase, rastState.clipDistanceMask);
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
@@ -868,9 +876,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// set pixel center positions
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx));
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
{
@@ -882,7 +890,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
if(T::bCentroidPos)
{
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
if(T::bIsStandardPattern)
{
CalcCentroidPos<T>(psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
@@ -894,7 +902,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
}
CalcCentroidBarycentrics(coeffs, psContext, psContext.vX.UL, psContext.vY.UL);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
}
else
{
@@ -921,11 +929,11 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
if(pPSState->usesSourceDepth)
{
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
// interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
}
// pixels that are currently active
@@ -933,10 +941,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
psContext.oMask = T::MultisampleT::FullSampleMask();
// execute pixel shader
- RDTSC_START(BEPixelShader);
+ AR_BEGIN(BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
- RDTSC_STOP(BEPixelShader, 0, 0);
+ AR_END(BEPixelShader, 0);
// update active lanes to remove any discarded or oMask'd pixels
activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
@@ -956,7 +964,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// loop over all samples, broadcasting the results of the PS to all passing pixels
for(uint32_t sample = 0; sample < GetNumOMSamples<T>(pBlendState->sampleCount); sample++)
{
- RDTSC_START(BEOutputMerger);
+ AR_BEGIN(BEOutputMerger, pDC->drawId);
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
simdscalar coverageMask, depthMask;
@@ -971,7 +979,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
if(!_simd_movemask_ps(depthMask))
{
// stencil should already have been written in early/lateZ tests
- RDTSC_STOP(BEOutputMerger, 0, 0);
+ AR_END(BEOutputMerger, 0);
continue;
}
}
@@ -987,10 +995,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
}
- RDTSC_STOP(BEOutputMerger, 0, 0);
+ AR_END(BEOutputMerger, 0);
}
Endtile:
- RDTSC_START(BEEndTile);
+ AR_BEGIN(BEEndTile, pDC->drawId);
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
{
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
@@ -1008,19 +1016,21 @@ Endtile:
{
pColorBase[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
- RDTSC_STOP(BEEndTile, 0, 0);
+ AR_END(BEEndTile, 0);
}
}
- RDTSC_STOP(BEPixelRateBackend, 0, 0);
+ AR_END(BEPixelRateBackend, 0);
}
// optimized backend flow with NULL PS
template<uint32_t sampleCountT>
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
- RDTSC_START(BENullBackend);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(BENullBackend, pDC->drawId);
///@todo: handle center multisample pattern
typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T;
- RDTSC_START(BESetup);
+ AR_BEGIN(BESetup, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = pDC->pState->state.rastState;
@@ -1043,7 +1053,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
- RDTSC_STOP(BESetup, 0, 0);
+ AR_END(BESetup, 0);
SWR_PS_CONTEXT psContext;
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
@@ -1065,7 +1075,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
simdmask coverageMask = work.coverageMask[sample] & MASK;
if (coverageMask)
{
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample));
psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample));
@@ -1076,7 +1086,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
// interpolate user clip distance if available
if (rastState.clipDistanceMask)
@@ -1092,12 +1102,12 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample);
uint8_t *pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
- RDTSC_START(BEEarlyDepthTest);
+ AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
- RDTSC_STOP(BEEarlyDepthTest, 0, 0);
+ AR_END(BEEarlyDepthTest, 0);
uint32_t statMask = _simd_movemask_ps(depthPassMask);
uint32_t statCount = _mm_popcnt_u32(statMask);
@@ -1109,7 +1119,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
pStencilBase += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
}
}
- RDTSC_STOP(BENullBackend, 0, 0);
+ AR_END(BENullBackend, 0);
}
void InitClearTilesTable()
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h
index fde5a3f8d9f..9d2f317f316 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -432,15 +432,17 @@ INLINE uint32_t GetNumOMSamples(SWR_MULTISAMPLE_COUNT blendSampleCount)
template<typename T>
struct PixelRateZTestLoop
{
- PixelRateZTestLoop(DRAW_CONTEXT *DC, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
+ PixelRateZTestLoop(DRAW_CONTEXT *DC, uint32_t _workerId, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
uint8_t*& depthBase, uint8_t*& stencilBase, const uint8_t ClipDistanceMask) :
- work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
+ pDC(DC), workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
clipDistanceMask(ClipDistanceMask), pDepthBase(depthBase), pStencilBase(stencilBase) {};
INLINE
uint32_t operator()(simdscalar& activeLanes, SWR_PS_CONTEXT& psContext,
const CORE_BUCKETS BEDepthBucket, uint32_t currentSimdIn8x8 = 0)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
+
uint32_t statCount = 0;
simdscalar anyDepthSamplePassed = _simd_setzero_ps();
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
@@ -454,7 +456,7 @@ struct PixelRateZTestLoop
continue;
}
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
@@ -472,7 +474,7 @@ struct PixelRateZTestLoop
vZ[sample] = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
vZ[sample] = state.pfnQuantizeDepth(vZ[sample]);
}
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
///@todo: perspective correct vs non-perspective correct clipping?
// if clip distances are enabled, we need to interpolate for each sample
@@ -488,13 +490,14 @@ struct PixelRateZTestLoop
uint8_t * pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
// ZTest for this sample
- RDTSC_START(BEDepthBucket);
+ ///@todo Need to uncomment out this bucket.
+ //AR_BEGIN(BEDepthBucket, pDC->drawId);
depthPassMask[sample] = vCoverageMask[sample];
stencilPassMask[sample] = vCoverageMask[sample];
depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
vZ[sample], pDepthSample, vCoverageMask[sample],
pStencilSample, &stencilPassMask[sample]);
- RDTSC_STOP(BEDepthBucket, 0, 0);
+ //AR_END(BEDepthBucket, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
if(psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
@@ -525,6 +528,9 @@ struct PixelRateZTestLoop
private:
// functor inputs
+ DRAW_CONTEXT* pDC;
+ uint32_t workerId;
+
const SWR_TRIANGLE_DESC& work;
const BarycentricCoeffs& coeffs;
const API_STATE& state;
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
index 21cbb0a0629..7b1e09d16e0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
@@ -181,24 +181,27 @@ void Clip(const float *pTriangle, const float *pAttribs, int numAttribs, float *
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
{
- RDTSC_START(FEClipTriangles);
+ SWR_CONTEXT *pContext = pDC->pContext;
+ AR_BEGIN(FEClipTriangles, pDC->drawId);
Clipper<3> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
- RDTSC_STOP(FEClipTriangles, 1, 0);
+ AR_END(FEClipTriangles, 1);
}
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
{
- RDTSC_START(FEClipLines);
+ SWR_CONTEXT *pContext = pDC->pContext;
+ AR_BEGIN(FEClipLines, pDC->drawId);
Clipper<2> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
- RDTSC_STOP(FEClipLines, 1, 0);
+ AR_END(FEClipLines, 1);
}
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
{
- RDTSC_START(FEClipPoints);
+ SWR_CONTEXT *pContext = pDC->pContext;
+ AR_BEGIN(FEClipPoints, pDC->drawId);
Clipper<1> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
- RDTSC_STOP(FEClipPoints, 1, 0);
+ AR_END(FEClipPoints, 1);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 2f3ce85442a..43bc5222c8d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -501,6 +501,10 @@ public:
// execute the clipper stage
void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
{
+ SWR_ASSERT(pa.pDC != nullptr);
+
+ SWR_CONTEXT *pContext = pa.pDC->pContext;
+
// set up binner based on PA state
PFN_PROCESS_PRIMS pfnBinner;
switch (pa.binTopology)
@@ -548,11 +552,11 @@ public:
if (clipMask)
{
- RDTSC_START(FEGuardbandClip);
+ AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
ClipSimd(vMask(primMask), vMask(clipMask), pa, primId, viewportIdx);
- RDTSC_STOP(FEGuardbandClip, 1, 0);
+ AR_END(FEGuardbandClip, 1);
}
else if (validMask)
{
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 6d63e087bcd..a4dbbc5280b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -42,6 +42,7 @@
#include "common/simdintrin.h"
#include "core/threads.h"
#include "ringbuffer.h"
+#include "archrast/archrast.h"
// x.8 fixed point precision values
#define FIXED_POINT_SHIFT 8
@@ -515,15 +516,30 @@ struct SWR_CONTEXT
#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }
// ArchRast instrumentation framework
-#ifdef KNOB_ENABLE_AR
-#define AR_WORKER_CTX pDC->pContext->pArContext[workerId]
-#define AR_API_CTX pDC->pContext->pArContext[pContext->NumWorkerThreads]
+#define AR_WORKER_CTX pContext->pArContext[workerId]
+#define AR_API_CTX pContext->pArContext[pContext->NumWorkerThreads]
-#define AR_BEGIN(ctx, type, id) ArchRast::dispatch(ctx, ArchRast::Start(ArchRast::type, id))
-#define AR_END(ctx, type, count) ArchRast::dispatch(ctx, ArchRast::End(ArchRast::type, count))
-#define AR_EVENT(ctx, event) ArchRast::dispatch(ctx, ArchRast::event)
+#ifdef KNOB_ENABLE_AR
+ #define _AR_BEGIN(ctx, type, id) ArchRast::dispatch(ctx, ArchRast::Start(ArchRast::type, id))
+ #define _AR_END(ctx, type, count) ArchRast::dispatch(ctx, ArchRast::End(ArchRast::type, count))
+ #define _AR_EVENT(ctx, event) ArchRast::dispatch(ctx, ArchRast::event)
#else
-#define AR_BEGIN(ctx, type, id)
-#define AR_END(ctx, type, id)
-#define AR_EVENT(ctx, event)
-#endif \ No newline at end of file
+ #ifdef KNOB_ENABLE_RDTSC
+ #define _AR_BEGIN(ctx, type, id) (void)ctx; RDTSC_START(type)
+ #define _AR_END(ctx, type, id) RDTSC_STOP(type, id, 0)
+ #else
+ #define _AR_BEGIN(ctx, type, id) (void)ctx
+ #define _AR_END(ctx, type, id)
+ #endif
+ #define _AR_EVENT(ctx, event)
+#endif
+
+// Use these macros for api thread.
+#define AR_API_BEGIN(type, id) _AR_BEGIN(AR_API_CTX, type, id)
+#define AR_API_END(type, count) _AR_END(AR_API_CTX, type, count)
+#define AR_API_EVENT(event) _AR_EVENT(AR_API_CTX, event)
+
+// Use these macros for worker threads.
+#define AR_BEGIN(type, id) _AR_BEGIN(AR_WORKER_CTX, type, id)
+#define AR_END(type, count) _AR_END(AR_WORKER_CTX, type, count)
+#define AR_EVENT(event) _AR_EVENT(AR_WORKER_CTX, event)
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index db470784a5e..decc161f1f5 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -130,7 +130,7 @@ void ProcessStoreTiles(
uint32_t workerId,
void *pUserData)
{
- RDTSC_START(FEProcessStoreTiles);
+ AR_BEGIN(FEProcessStoreTiles, pDC->drawId);
MacroTileMgr *pTileMgr = pDC->pTileMgr;
STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData;
@@ -155,7 +155,7 @@ void ProcessStoreTiles(
}
}
- RDTSC_STOP(FEProcessStoreTiles, 0, pDC->drawId);
+ AR_END(FEProcessStoreTiles, 0);
}
//////////////////////////////////////////////////////////////////////////
@@ -171,7 +171,7 @@ void ProcessDiscardInvalidateTiles(
uint32_t workerId,
void *pUserData)
{
- RDTSC_START(FEProcessInvalidateTiles);
+ AR_BEGIN(FEProcessInvalidateTiles, pDC->drawId);
DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
@@ -210,7 +210,7 @@ void ProcessDiscardInvalidateTiles(
}
}
- RDTSC_STOP(FEProcessInvalidateTiles, 0, pDC->drawId);
+ AR_END(FEProcessInvalidateTiles, 0);
}
//////////////////////////////////////////////////////////////////////////
@@ -542,7 +542,9 @@ static void StreamOut(
uint32_t* pPrimData,
uint32_t streamIndex)
{
- RDTSC_START(FEStreamout);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(FEStreamout, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_STREAMOUT_STATE &soState = state.soState;
@@ -615,7 +617,7 @@ static void StreamOut(
UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
- RDTSC_STOP(FEStreamout, 1, 0);
+ AR_END(FEStreamout, 1);
}
//////////////////////////////////////////////////////////////////////////
@@ -698,7 +700,9 @@ static void GeometryShaderStage(
uint32_t* pSoPrimData,
simdscalari primID)
{
- RDTSC_START(FEGeometryShader);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(FEGeometryShader, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_GS_STATE* pState = &state.gsState;
@@ -895,7 +899,7 @@ static void GeometryShaderStage(
UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
- RDTSC_STOP(FEGeometryShader, 1, 0);
+ AR_END(FEGeometryShader, 1);
}
//////////////////////////////////////////////////////////////////////////
@@ -990,6 +994,7 @@ static void TessellationStages(
uint32_t* pSoPrimData,
simdscalari primID)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
const API_STATE& state = GetApiState(pDC);
const SWR_TS_STATE& tsState = state.tsState;
@@ -1053,9 +1058,9 @@ static void TessellationStages(
hsContext.mask = GenerateMask(numPrims);
// Run the HS
- RDTSC_START(FEHullShader);
+ AR_BEGIN(FEHullShader, pDC->drawId);
state.pfnHsFunc(GetPrivateState(pDC), &hsContext);
- RDTSC_STOP(FEHullShader, 0, 0);
+ AR_END(FEHullShader, 0);
UPDATE_STAT_FE(HsInvocations, numPrims);
@@ -1065,9 +1070,9 @@ static void TessellationStages(
{
// Run Tessellator
SWR_TS_TESSELLATED_DATA tsData = { 0 };
- RDTSC_START(FETessellation);
+ AR_BEGIN(FETessellation, pDC->drawId);
TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData);
- RDTSC_STOP(FETessellation, 0, 0);
+ AR_END(FETessellation, 0);
if (tsData.NumPrimitives == 0)
{
@@ -1107,9 +1112,9 @@ static void TessellationStages(
{
dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations);
- RDTSC_START(FEDomainShader);
+ AR_BEGIN(FEDomainShader, pDC->drawId);
state.pfnDsFunc(GetPrivateState(pDC), &dsContext);
- RDTSC_STOP(FEDomainShader, 0, 0);
+ AR_END(FEDomainShader, 0);
dsInvocations += KNOB_SIMD_WIDTH;
}
@@ -1142,12 +1147,12 @@ static void TessellationStages(
if (HasRastT::value)
{
simdvector prim[3]; // Only deal with triangles, lines, or points
- RDTSC_START(FEPAAssemble);
+ AR_BEGIN(FEPAAssemble, pDC->drawId);
#if SWR_ENABLE_ASSERTS
bool assemble =
#endif
tessPa.Assemble(VERTEX_POSITION_SLOT, prim);
- RDTSC_STOP(FEPAAssemble, 1, 0);
+ AR_END(FEPAAssemble, 1);
SWR_ASSERT(assemble);
SWR_ASSERT(pfnClipFunc);
@@ -1196,7 +1201,7 @@ void ProcessDraw(
}
#endif
- RDTSC_START(FEProcessDraw);
+ AR_BEGIN(FEProcessDraw, pDC->drawId);
DRAW_WORK& work = *(DRAW_WORK*)pUserData;
const API_STATE& state = GetApiState(pDC);
@@ -1334,9 +1339,9 @@ void ProcessDraw(
{
// 1. Execute FS/VS for a single SIMD.
- RDTSC_START(FEFetchShader);
+ AR_BEGIN(FEFetchShader, pDC->drawId);
state.pfnFetchFunc(fetchInfo, vin);
- RDTSC_STOP(FEFetchShader, 0, 0);
+ AR_END(FEFetchShader, 0);
// forward fetch generated vertex IDs to the vertex shader
vsContext.VertexID = fetchInfo.VertexID;
@@ -1356,9 +1361,9 @@ void ProcessDraw(
if (!KNOB_TOSS_FETCH)
#endif
{
- RDTSC_START(FEVertexShader);
+ AR_BEGIN(FEVertexShader, pDC->drawId);
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext);
- RDTSC_STOP(FEVertexShader, 0, 0);
+ AR_END(FEVertexShader, 0);
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
}
@@ -1369,9 +1374,9 @@ void ProcessDraw(
{
simdvector prim[MAX_NUM_VERTS_PER_PRIM];
// PaAssemble returns false if there is not enough verts to assemble.
- RDTSC_START(FEPAAssemble);
+ AR_BEGIN(FEPAAssemble, pDC->drawId);
bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim);
- RDTSC_STOP(FEPAAssemble, 1, 0);
+ AR_END(FEPAAssemble, 1);
#if KNOB_ENABLE_TOSS_POINTS
if (!KNOB_TOSS_FETCH)
@@ -1428,7 +1433,7 @@ void ProcessDraw(
pa.Reset();
}
- RDTSC_STOP(FEProcessDraw, numPrims * work.numInstances, pDC->drawId);
+ AR_END(FEProcessDraw, numPrims * work.numInstances);
}
struct FEDrawChooser
@@ -1787,7 +1792,9 @@ void BinTriangles(
simdscalari primID,
simdscalari viewportIdx)
{
- RDTSC_START(FEBinTriangles);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(FEBinTriangles, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@@ -2168,7 +2175,7 @@ void BinTriangles(
}
endBinTriangles:
- RDTSC_STOP(FEBinTriangles, 1, 0);
+ AR_END(FEBinTriangles, 1);
}
struct FEBinTrianglesChooser
@@ -2204,7 +2211,9 @@ void BinPoints(
simdscalari primID,
simdscalari viewportIdx)
{
- RDTSC_START(FEBinPoints);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(FEBinPoints, pDC->drawId);
simdvector& primVerts = prim[0];
@@ -2519,10 +2528,7 @@ void BinPoints(
}
}
-
-
-
- RDTSC_STOP(FEBinPoints, 1, 0);
+ AR_END(FEBinPoints, 1);
}
//////////////////////////////////////////////////////////////////////////
@@ -2542,7 +2548,9 @@ void BinLines(
simdscalari primID,
simdscalari viewportIdx)
{
- RDTSC_START(FEBinLines);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(FEBinLines, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@@ -2765,5 +2773,5 @@ void BinLines(
endBinLines:
- RDTSC_STOP(FEBinLines, 1, 0);
+ AR_END(FEBinLines, 1);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
index c9380dac2ff..6d4e50408fc 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
@@ -758,7 +758,7 @@ INLINE bool TrivialAcceptTest<std::false_type>(const int mask0, const int mask1,
template <typename RT, typename ValidEdgeMaskT, typename InputCoverageT>
struct GenerateSVInnerCoverage
{
- INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, EDGE*, double*, uint64_t &){};
+ INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, uint32_t, EDGE*, double*, uint64_t &){};
};
//////////////////////////////////////////////////////////////////////////
@@ -768,8 +768,10 @@ struct GenerateSVInnerCoverage
template <typename RT>
struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT>
{
- INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC, EDGE* pRastEdges, double* pStartQuadEdges, uint64_t &innerCoverageMask)
+ INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC, uint32_t workerId, EDGE* pRastEdges, double* pStartQuadEdges, uint64_t &innerCoverageMask)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
+
double startQuadEdgesAdj[RT::NumEdgesT::value];
for(uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
{
@@ -777,9 +779,9 @@ struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT>
}
// not trivial accept or reject, must rasterize full tile
- RDTSC_START(BERasterizePartial);
+ AR_BEGIN(BERasterizePartial, pDC->drawId);
innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdgesAdj, pRastEdges);
- RDTSC_STOP(BERasterizePartial, 0, 0);
+ AR_END(BERasterizePartial, 0);
}
};
@@ -835,6 +837,7 @@ struct UpdateEdgeMasksInnerConservative<RT, ValidEdgeMaskT, InnerConservativeCov
template <typename RT>
void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pDesc);
#if KNOB_ENABLE_TOSS_POINTS
if (KNOB_TOSS_BIN_TRIS)
@@ -842,9 +845,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
return;
}
#endif
- RDTSC_START(BERasterizeTriangle);
+ AR_BEGIN(BERasterizeTriangle, pDC->drawId);
+ AR_BEGIN(BETriangleSetup, pDC->drawId);
- RDTSC_START(BETriangleSetup);
const API_STATE &state = GetApiState(pDC);
const SWR_RASTSTATE &rastState = state.rastState;
const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
@@ -1009,7 +1012,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0);
- RDTSC_STOP(BETriangleSetup, 0, pDC->drawId);
+ AR_END(BETriangleSetup, 0);
// update triangle desc
uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
@@ -1022,11 +1025,11 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
if (numTilesX == 0 || numTilesY == 0)
{
RDTSC_EVENT(BEEmptyTriangle, 1, 0);
- RDTSC_STOP(BERasterizeTriangle, 1, 0);
+ AR_END(BERasterizeTriangle, 1);
return;
}
- RDTSC_START(BEStepSetup);
+ AR_BEGIN(BEStepSetup, pDC->drawId);
// Step to pixel center of top-left pixel of the triangle bbox
// Align intersect bbox (top/left) to raster tile's (top/left).
@@ -1134,7 +1137,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
}
}
- RDTSC_STOP(BEStepSetup, 0, pDC->drawId);
+ AR_END(BEStepSetup, 0);
uint32_t tY = minTileY;
uint32_t tX = minTileX;
@@ -1226,14 +1229,14 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
}
// not trivial accept or reject, must rasterize full tile
- RDTSC_START(BERasterizePartial);
+ AR_BEGIN(BERasterizePartial, pDC->drawId);
triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges);
- RDTSC_STOP(BERasterizePartial, 0, 0);
+ AR_END(BERasterizePartial, 0);
triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
// Output SV InnerCoverage, if needed
- GenerateSVInnerCoverage<RT, typename RT::ValidEdgeMaskT, typename RT::InputCoverageT>(pDC, rastEdges, startQuadEdges, triDesc.innerCoverageMask);
+ GenerateSVInnerCoverage<RT, typename RT::ValidEdgeMaskT, typename RT::InputCoverageT>(pDC, workerId, rastEdges, startQuadEdges, triDesc.innerCoverageMask);
}
}
else
@@ -1264,9 +1267,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
UnrollerL<1, RT::MT::numSamples, 1>::step(copyCoverage);
}
- RDTSC_START(BEPixelBackend);
+ AR_BEGIN(BEPixelBackend, pDC->drawId);
backendFuncs.pfnBackend(pDC, workerId, tileX << KNOB_TILE_X_DIM_SHIFT, tileY << KNOB_TILE_Y_DIM_SHIFT, triDesc, renderBuffers);
- RDTSC_STOP(BEPixelBackend, 0, 0);
+ AR_END(BEPixelBackend, 0);
}
// step to the next tile in X
@@ -1285,7 +1288,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
StepRasterTileY<RT>(state.psState.numRenderTargets, renderBuffers, currentRenderBufferRow);
}
- RDTSC_STOP(BERasterizeTriangle, 1, 0);
+ AR_END(BERasterizeTriangle, 1);
}
void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
@@ -1420,6 +1423,8 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
+
#if KNOB_ENABLE_TOSS_POINTS
if (KNOB_TOSS_BIN_TRIS)
{
@@ -1475,9 +1480,9 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi
GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
- RDTSC_START(BEPixelBackend);
+ AR_BEGIN(BEPixelBackend, pDC->drawId);
backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers);
- RDTSC_STOP(BEPixelBackend, 0, 0);
+ AR_END(BEPixelBackend, 0);
}
// Get pointers to hot tile memory for color RT, depth, stencil
@@ -1561,6 +1566,7 @@ INLINE void StepRasterTileY(uint32_t NumRT, RenderOutputBuffers &buffers, Render
void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pData);
#if KNOB_ENABLE_TOSS_POINTS
if (KNOB_TOSS_BIN_TRIS)
@@ -1570,7 +1576,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
#endif
// bloat line to two tris and call the triangle rasterizer twice
- RDTSC_START(BERasterizeLine);
+ AR_BEGIN(BERasterizeLine, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
const SWR_RASTSTATE &rastState = state.rastState;
@@ -1763,7 +1769,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
}
- RDTSC_STOP(BERasterizeLine, 1, 0);
+ AR_END(BERasterizeLine, 1);
}
struct RasterizerChooser
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 24e78123087..446e795fb2b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -501,7 +501,7 @@ void WorkOnFifoBE(
{
BE_WORK *pWork;
- RDTSC_START(WorkerFoundWork);
+ AR_BEGIN(WorkerFoundWork, pDC->drawId);
uint32_t numWorkItems = tile->getNumQueued();
SWR_ASSERT(numWorkItems);
@@ -510,7 +510,7 @@ void WorkOnFifoBE(
SWR_ASSERT(pWork);
if (pWork->type == DRAW)
{
- pContext->pHotTileMgr->InitializeHotTiles(pContext, pDC, tileID);
+ pContext->pHotTileMgr->InitializeHotTiles(pContext, pDC, workerId, tileID);
}
while ((pWork = tile->peek()) != nullptr)
@@ -518,7 +518,7 @@ void WorkOnFifoBE(
pWork->pfnWork(pDC, workerId, tileID, &pWork->desc);
tile->dequeue();
}
- RDTSC_STOP(WorkerFoundWork, numWorkItems, pDC->drawId);
+ AR_END(WorkerFoundWork, numWorkItems);
_ReadWriteBarrier();
@@ -735,12 +735,12 @@ DWORD workerThreadMain(LPVOID pData)
break;
}
- RDTSC_START(WorkerWaitForThreadEvent);
+ AR_BEGIN(WorkerWaitForThreadEvent, 0);
pContext->FifosNotEmpty.wait(lock);
lock.unlock();
- RDTSC_STOP(WorkerWaitForThreadEvent, 0, 0);
+ AR_END(WorkerWaitForThreadEvent, 0);
if (pContext->threadPool.inThreadShutdown)
{
@@ -750,9 +750,9 @@ DWORD workerThreadMain(LPVOID pData)
if (IsBEThread)
{
- RDTSC_START(WorkerWorkOnFifoBE);
+ AR_BEGIN(WorkerWorkOnFifoBE, 0);
WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
- RDTSC_STOP(WorkerWorkOnFifoBE, 0, 0);
+ AR_END(WorkerWorkOnFifoBE, 0);
WorkOnCompute(pContext, workerId, curDrawBE);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
index 1bd1805b52b..bd189abb1a8 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
@@ -281,7 +281,7 @@ void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
/// to avoid unnecessary setup every triangle
/// @todo support deferred clear
/// @param pCreateInfo - pointer to creation info.
-void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID)
+void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID)
{
const API_STATE& state = GetApiState(pDC);
@@ -301,19 +301,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
if (pHotTile->state == HOTTILE_INVALID)
{
- RDTSC_START(BELoadTiles);
+ AR_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ AR_END(BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
- RDTSC_START(BELoadTiles);
+ AR_BEGIN(BELoadTiles, pDC->drawId);
// Clear the tile.
ClearColorHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ AR_END(BELoadTiles, 0);
}
colorHottileEnableMask &= ~(1 << rtSlot);
}
@@ -324,19 +324,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
- RDTSC_START(BELoadTiles);
+ AR_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ AR_END(BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
- RDTSC_START(BELoadTiles);
+ AR_BEGIN(BELoadTiles, pDC->drawId);
// Clear the tile.
ClearDepthHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ AR_END(BELoadTiles, 0);
}
}
@@ -346,19 +346,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
- RDTSC_START(BELoadTiles);
+ AR_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ AR_END(BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
- RDTSC_START(BELoadTiles);
+ AR_BEGIN(BELoadTiles, pDC->drawId);
// Clear the tile.
ClearStencilHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ AR_END(BELoadTiles, 0);
}
}
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
index 4ec02838ab9..2befe97e7c2 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
@@ -291,7 +291,7 @@ public:
}
}
- void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID);
+ void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID);
HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
uint32_t renderTargetArrayIndex = 0);