diff options
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/api.cpp | 1 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/api.h | 21 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/clip.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/context.h | 15 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 18 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/state.h | 16 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/threads.cpp | 21 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_context.cpp | 19 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_context.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_query.cpp | 35 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_query.h | 1 |
11 files changed, 95 insertions, 59 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 0797c8ac86b..d6aa80d678f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -144,6 +144,7 @@ HANDLE SwrCreateContext( pContext->pfnClearTile = pCreateInfo->pfnClearTile; pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset; pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats; + pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE; // pass pointer to bucket manager back to caller #ifdef KNOB_ENABLE_RDTSC diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index 4ee04dc1368..ed18fe01010 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -95,6 +95,16 @@ typedef void(SWR_API *PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext, typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext, const SWR_STATS* pStats); +////////////////////////////////////////////////////////////////////////// +/// @brief Callback to allow driver to update their copy of FE stats. +/// @note Its optimal to have a separate callback for FE stats since +/// there is only one DC per FE thread. This means we do not have +/// to sum up the stats across all of the workers. +/// @param hPrivateContext - handle to private data +/// @param pStats - pointer to draw stats +typedef void(SWR_API *PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext, + const SWR_STATS_FE* pStats); + class BucketManager; ////////////////////////////////////////////////////////////////////////// @@ -121,11 +131,12 @@ struct SWR_CREATECONTEXT_INFO uint32_t privateStateSize; // Callback functions - PFN_LOAD_TILE pfnLoadTile; - PFN_STORE_TILE pfnStoreTile; - PFN_CLEAR_TILE pfnClearTile; - PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset; - PFN_UPDATE_STATS pfnUpdateStats; + PFN_LOAD_TILE pfnLoadTile; + PFN_STORE_TILE pfnStoreTile; + PFN_CLEAR_TILE pfnClearTile; + PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset; + PFN_UPDATE_STATS pfnUpdateStats; + PFN_UPDATE_STATS_FE pfnUpdateStatsFE; // Pointer to rdtsc buckets mgr returned to the caller. // Only populated when KNOB_ENABLE_RDTSC is set diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index b2b3bb4e6fd..a2ba76967fe 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -495,7 +495,7 @@ public: // update global pipeline stat SWR_CONTEXT* pContext = this->pDC->pContext; - UPDATE_STAT(CPrimitives, numClippedPrims); + UPDATE_STAT_FE(CPrimitives, numClippedPrims); } // execute the clipper stage @@ -523,7 +523,7 @@ public: // update clipper invocations pipeline stat SWR_CONTEXT* pContext = this->pDC->pContext; uint32_t numInvoc = _mm_popcnt_u32(primMask); - UPDATE_STAT(CInvocations, numInvoc); + UPDATE_STAT_FE(CInvocations, numInvoc); ComputeClipCodes(prim); @@ -559,7 +559,7 @@ public: { // update CPrimitives pipeline state SWR_CONTEXT* pContext = this->pDC->pContext; - UPDATE_STAT(CPrimitives, _mm_popcnt_u32(validMask)); + UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask)); // forward valid prims directly to binner pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId); diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index c478ee9c261..144fcefb208 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -365,7 +365,8 @@ struct DRAW_DYNAMIC_STATE uint32_t SoWriteOffset[4]; bool SoWriteOffsetDirty[4]; - SWR_STATS stats[KNOB_MAX_NUM_THREADS]; + SWR_STATS_FE statsFE; // Only one FE thread per DC. + SWR_STATS stats[KNOB_MAX_NUM_THREADS]; }; // Draw Context @@ -470,11 +471,12 @@ struct SWR_CONTEXT HotTileMgr *pHotTileMgr; // Callback functions, passed in at create context time - PFN_LOAD_TILE pfnLoadTile; - PFN_STORE_TILE pfnStoreTile; - PFN_CLEAR_TILE pfnClearTile; - PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset; - PFN_UPDATE_STATS pfnUpdateStats; + PFN_LOAD_TILE pfnLoadTile; + PFN_STORE_TILE pfnStoreTile; + PFN_CLEAR_TILE pfnClearTile; + PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset; + PFN_UPDATE_STATS pfnUpdateStats; + PFN_UPDATE_STATS_FE pfnUpdateStatsFE; // Global Stats SWR_STATS stats[KNOB_MAX_NUM_THREADS]; @@ -492,3 +494,4 @@ void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId); void WakeAllThreads(SWR_CONTEXT *pContext); #define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; } +#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; } diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index e32f743de7e..3014c7defc8 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -580,8 +580,8 @@ static void StreamOut( } } - UPDATE_STAT(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded); - UPDATE_STAT(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten); + UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded); + UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten); RDTSC_STOP(FEStreamout, 1, 0); } @@ -843,8 +843,8 @@ static void GeometryShaderStage( } // update GS pipeline stats - UPDATE_STAT(GsInvocations, numInputPrims * pState->instanceCount); - UPDATE_STAT(GsPrimitives, totalPrimsGenerated); + UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount); + UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated); RDTSC_STOP(FEGeometryShader, 1, 0); } @@ -1009,7 +1009,7 @@ static void TessellationStages( state.pfnHsFunc(GetPrivateState(pDC), &hsContext); RDTSC_STOP(FEHullShader, 0, 0); - UPDATE_STAT(HsInvocations, numPrims); + UPDATE_STAT_FE(HsInvocations, numPrims); const uint32_t* pPrimId = (const uint32_t*)&primID; @@ -1065,7 +1065,7 @@ static void TessellationStages( dsInvocations += KNOB_SIMD_WIDTH; } - UPDATE_STAT(DsInvocations, tsData.NumDomainPoints); + UPDATE_STAT_FE(DsInvocations, tsData.NumDomainPoints); PA_TESS tessPa( pDC, @@ -1302,7 +1302,7 @@ void ProcessDraw( *pvCutIndices = _simd_movemask_ps(_simd_castsi_ps(fetchInfo.CutMask)); } - UPDATE_STAT(IaVertices, GetNumInvocations(i, endVertex)); + UPDATE_STAT_FE(IaVertices, GetNumInvocations(i, endVertex)); #if KNOB_ENABLE_TOSS_POINTS if (!KNOB_TOSS_FETCH) @@ -1312,7 +1312,7 @@ void ProcessDraw( state.pfnVertexFunc(GetPrivateState(pDC), &vsContext); RDTSC_STOP(FEVertexShader, 0, 0); - UPDATE_STAT(VsInvocations, GetNumInvocations(i, endVertex)); + UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); } } @@ -1335,7 +1335,7 @@ void ProcessDraw( { if (assemble) { - UPDATE_STAT(IaPrimitives, pa.NumPrims()); + UPDATE_STAT_FE(IaPrimitives, pa.NumPrims()); if (HasTessellationT::value) { diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index fdf5d7ef45c..988de75f4d5 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -564,17 +564,27 @@ struct SWR_STATS uint64_t DepthPassCount; // Number of passing depth tests. Not exact. // Pipeline Stats + uint64_t PsInvocations; // Number of Pixel Shader invocations + uint64_t CsInvocations; // Number of Compute Shader invocations + +}; + +////////////////////////////////////////////////////////////////////////// +/// SWR_STATS +/// +/// @brief All statistics generated by FE. +///////////////////////////////////////////////////////////////////////// +struct SWR_STATS_FE +{ uint64_t IaVertices; // Number of Fetch Shader vertices uint64_t IaPrimitives; // Number of PA primitives. uint64_t VsInvocations; // Number of Vertex Shader invocations uint64_t HsInvocations; // Number of Hull Shader invocations uint64_t DsInvocations; // Number of Domain Shader invocations uint64_t GsInvocations; // Number of Geometry Shader invocations - uint64_t PsInvocations; // Number of Pixel Shader invocations - uint64_t CsInvocations; // Number of Compute Shader invocations + uint64_t GsPrimitives; // Number of prims GS outputs. uint64_t CInvocations; // Number of clipper invocations uint64_t CPrimitives; // Number of clipper primitives. - uint64_t GsPrimitives; // Number of prims GS outputs. // Streamout Stats uint64_t SoPrimStorageNeeded[4]; diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index fb17af15203..dce23b2486e 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -322,23 +322,9 @@ INLINE void UpdateClientStats(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i) { stats.DepthPassCount += dynState.stats[i].DepthPassCount; - stats.IaVertices += dynState.stats[i].IaVertices; - stats.IaPrimitives += dynState.stats[i].IaPrimitives; - stats.VsInvocations += dynState.stats[i].VsInvocations; - stats.HsInvocations += dynState.stats[i].HsInvocations; - stats.DsInvocations += dynState.stats[i].DsInvocations; - stats.GsInvocations += dynState.stats[i].GsInvocations; + stats.PsInvocations += dynState.stats[i].PsInvocations; - stats.CInvocations += dynState.stats[i].CInvocations; stats.CsInvocations += dynState.stats[i].CsInvocations; - stats.CPrimitives += dynState.stats[i].CPrimitives; - stats.GsPrimitives += dynState.stats[i].GsPrimitives; - - for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream) - { - stats.SoPrimStorageNeeded[stream] += dynState.stats[i].SoPrimStorageNeeded[stream]; - stats.SoNumPrimsWritten[stream] += dynState.stats[i].SoNumPrimsWritten[stream]; - } } pContext->pfnUpdateStats(GetPrivateState(pDC), &stats); @@ -560,6 +546,11 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) { _ReadWriteBarrier(); + if (pContext->pfnUpdateStatsFE && GetApiState(pDC).enableStats) + { + pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &pDC->dynState.statsFE); + } + if (pContext->pfnUpdateSoWriteOffset) { for (uint32_t i = 0; i < MAX_SO_BUFFERS; ++i) diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index 53d2b93089b..15e60cddf0a 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -355,15 +355,29 @@ swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats) struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx; SWR_STATS *pSwrStats = &ctx->stats; + pSwrStats->DepthPassCount += pStats->DepthPassCount; + pSwrStats->PsInvocations += pStats->PsInvocations; + pSwrStats->CsInvocations += pStats->CsInvocations; +} + +static void +swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats) +{ + swr_draw_context *pDC = (swr_draw_context*)hPrivateContext; + + if (!pDC) + return; + + struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx; + + SWR_STATS_FE *pSwrStats = &ctx->statsFE; pSwrStats->IaVertices += pStats->IaVertices; pSwrStats->IaPrimitives += pStats->IaPrimitives; pSwrStats->VsInvocations += pStats->VsInvocations; pSwrStats->HsInvocations += pStats->HsInvocations; pSwrStats->DsInvocations += pStats->DsInvocations; pSwrStats->GsInvocations += pStats->GsInvocations; - pSwrStats->PsInvocations += pStats->PsInvocations; - pSwrStats->CsInvocations += pStats->CsInvocations; pSwrStats->CInvocations += pStats->CInvocations; pSwrStats->CPrimitives += pStats->CPrimitives; pSwrStats->GsPrimitives += pStats->GsPrimitives; @@ -389,6 +403,7 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) createInfo.pfnStoreTile = swr_StoreHotTile; createInfo.pfnClearTile = swr_StoreHotTileClear; createInfo.pfnUpdateStats = swr_UpdateStats; + createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE; ctx->swrContext = SwrCreateContext(&createInfo); /* Init Load/Store/ClearTiles Tables */ diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h index 4133720cbf0..b4553fb171b 100644 --- a/src/gallium/drivers/swr/swr_context.h +++ b/src/gallium/drivers/swr/swr_context.h @@ -159,6 +159,7 @@ struct swr_context { struct swr_draw_context swrDC; SWR_STATS stats; + SWR_STATS_FE statsFE; unsigned dirty; /**< Mask of SWR_NEW_x flags */ }; diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp index 35d0e53fb23..c51c529e5f3 100644 --- a/src/gallium/drivers/swr/swr_query.cpp +++ b/src/gallium/drivers/swr/swr_query.cpp @@ -94,6 +94,7 @@ swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq) /* TODO: should fence instead of stalling pipeline */ SwrWaitForIdle(ctx->swrContext); memcpy(&result->core, &ctx->stats, sizeof(result->core)); + memcpy(&result->coreFE, &ctx->statsFE, sizeof(result->coreFE)); #if 0 if (!pq->fence) { @@ -150,17 +151,17 @@ swr_get_query_result(struct pipe_context *pipe, result->u64 = end->timestamp - start->timestamp; break; case PIPE_QUERY_PRIMITIVES_GENERATED: - result->u64 = end->core.IaPrimitives - start->core.IaPrimitives; + result->u64 = end->coreFE.IaPrimitives - start->coreFE.IaPrimitives; break; case PIPE_QUERY_PRIMITIVES_EMITTED: - result->u64 = end->core.SoNumPrimsWritten[index] - - start->core.SoNumPrimsWritten[index]; + result->u64 = end->coreFE.SoNumPrimsWritten[index] + - start->coreFE.SoNumPrimsWritten[index]; break; /* Structures */ case PIPE_QUERY_SO_STATISTICS: { struct pipe_query_data_so_statistics *so_stats = &result->so_statistics; - struct SWR_STATS *start = &pq->start.core; - struct SWR_STATS *end = &pq->end.core; + struct SWR_STATS_FE *start = &pq->start.coreFE; + struct SWR_STATS_FE *end = &pq->end.coreFE; so_stats->num_primitives_written = end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index]; so_stats->primitives_storage_needed = @@ -176,21 +177,23 @@ swr_get_query_result(struct pipe_context *pipe, &result->pipeline_statistics; struct SWR_STATS *start = &pq->start.core; struct SWR_STATS *end = &pq->end.core; - p_stats->ia_vertices = end->IaVertices - start->IaVertices; - p_stats->ia_primitives = end->IaPrimitives - start->IaPrimitives; - p_stats->vs_invocations = end->VsInvocations - start->VsInvocations; - p_stats->gs_invocations = end->GsInvocations - start->GsInvocations; - p_stats->gs_primitives = end->GsPrimitives - start->GsPrimitives; - p_stats->c_invocations = end->CPrimitives - start->CPrimitives; - p_stats->c_primitives = end->CPrimitives - start->CPrimitives; + struct SWR_STATS_FE *startFE = &pq->start.coreFE; + struct SWR_STATS_FE *endFE = &pq->end.coreFE; + p_stats->ia_vertices = endFE->IaVertices - startFE->IaVertices; + p_stats->ia_primitives = endFE->IaPrimitives - startFE->IaPrimitives; + p_stats->vs_invocations = endFE->VsInvocations - startFE->VsInvocations; + p_stats->gs_invocations = endFE->GsInvocations - startFE->GsInvocations; + p_stats->gs_primitives = endFE->GsPrimitives - startFE->GsPrimitives; + p_stats->c_invocations = endFE->CPrimitives - startFE->CPrimitives; + p_stats->c_primitives = endFE->CPrimitives - startFE->CPrimitives; p_stats->ps_invocations = end->PsInvocations - start->PsInvocations; - p_stats->hs_invocations = end->HsInvocations - start->HsInvocations; - p_stats->ds_invocations = end->DsInvocations - start->DsInvocations; + p_stats->hs_invocations = endFE->HsInvocations - startFE->HsInvocations; + p_stats->ds_invocations = endFE->DsInvocations - startFE->DsInvocations; p_stats->cs_invocations = end->CsInvocations - start->CsInvocations; } break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: { - struct SWR_STATS *start = &pq->start.core; - struct SWR_STATS *end = &pq->end.core; + struct SWR_STATS_FE *start = &pq->start.coreFE; + struct SWR_STATS_FE *end = &pq->end.coreFE; uint64_t num_primitives_written = end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index]; uint64_t primitives_storage_needed = diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h index 0ab034d397f..931d687b005 100644 --- a/src/gallium/drivers/swr/swr_query.h +++ b/src/gallium/drivers/swr/swr_query.h @@ -29,6 +29,7 @@ struct swr_query_result { SWR_STATS core; + SWR_STATS_FE coreFE; uint64_t timestamp; }; |