summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp1
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.h21
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.h6
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h15
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp18
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state.h16
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp21
-rw-r--r--src/gallium/drivers/swr/swr_context.cpp19
-rw-r--r--src/gallium/drivers/swr/swr_context.h1
-rw-r--r--src/gallium/drivers/swr/swr_query.cpp35
-rw-r--r--src/gallium/drivers/swr/swr_query.h1
11 files changed, 95 insertions, 59 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 0797c8ac86b..d6aa80d678f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -144,6 +144,7 @@ HANDLE SwrCreateContext(
pContext->pfnClearTile = pCreateInfo->pfnClearTile;
pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
+ pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
// pass pointer to bucket manager back to caller
#ifdef KNOB_ENABLE_RDTSC
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h
index 4ee04dc1368..ed18fe01010 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -95,6 +95,16 @@ typedef void(SWR_API *PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext,
typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext,
const SWR_STATS* pStats);
+//////////////////////////////////////////////////////////////////////////
+/// @brief Callback to allow driver to update their copy of FE stats.
+/// @note Its optimal to have a separate callback for FE stats since
+/// there is only one DC per FE thread. This means we do not have
+/// to sum up the stats across all of the workers.
+/// @param hPrivateContext - handle to private data
+/// @param pStats - pointer to draw stats
+typedef void(SWR_API *PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext,
+ const SWR_STATS_FE* pStats);
+
class BucketManager;
//////////////////////////////////////////////////////////////////////////
@@ -121,11 +131,12 @@ struct SWR_CREATECONTEXT_INFO
uint32_t privateStateSize;
// Callback functions
- PFN_LOAD_TILE pfnLoadTile;
- PFN_STORE_TILE pfnStoreTile;
- PFN_CLEAR_TILE pfnClearTile;
- PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
- PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_LOAD_TILE pfnLoadTile;
+ PFN_STORE_TILE pfnStoreTile;
+ PFN_CLEAR_TILE pfnClearTile;
+ PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+ PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
// Pointer to rdtsc buckets mgr returned to the caller.
// Only populated when KNOB_ENABLE_RDTSC is set
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index b2b3bb4e6fd..a2ba76967fe 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -495,7 +495,7 @@ public:
// update global pipeline stat
SWR_CONTEXT* pContext = this->pDC->pContext;
- UPDATE_STAT(CPrimitives, numClippedPrims);
+ UPDATE_STAT_FE(CPrimitives, numClippedPrims);
}
// execute the clipper stage
@@ -523,7 +523,7 @@ public:
// update clipper invocations pipeline stat
SWR_CONTEXT* pContext = this->pDC->pContext;
uint32_t numInvoc = _mm_popcnt_u32(primMask);
- UPDATE_STAT(CInvocations, numInvoc);
+ UPDATE_STAT_FE(CInvocations, numInvoc);
ComputeClipCodes(prim);
@@ -559,7 +559,7 @@ public:
{
// update CPrimitives pipeline state
SWR_CONTEXT* pContext = this->pDC->pContext;
- UPDATE_STAT(CPrimitives, _mm_popcnt_u32(validMask));
+ UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
// forward valid prims directly to binner
pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId);
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index c478ee9c261..144fcefb208 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -365,7 +365,8 @@ struct DRAW_DYNAMIC_STATE
uint32_t SoWriteOffset[4];
bool SoWriteOffsetDirty[4];
- SWR_STATS stats[KNOB_MAX_NUM_THREADS];
+ SWR_STATS_FE statsFE; // Only one FE thread per DC.
+ SWR_STATS stats[KNOB_MAX_NUM_THREADS];
};
// Draw Context
@@ -470,11 +471,12 @@ struct SWR_CONTEXT
HotTileMgr *pHotTileMgr;
// Callback functions, passed in at create context time
- PFN_LOAD_TILE pfnLoadTile;
- PFN_STORE_TILE pfnStoreTile;
- PFN_CLEAR_TILE pfnClearTile;
- PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
- PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_LOAD_TILE pfnLoadTile;
+ PFN_STORE_TILE pfnStoreTile;
+ PFN_CLEAR_TILE pfnClearTile;
+ PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+ PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
// Global Stats
SWR_STATS stats[KNOB_MAX_NUM_THREADS];
@@ -492,3 +494,4 @@ void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
void WakeAllThreads(SWR_CONTEXT *pContext);
#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; }
+#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index e32f743de7e..3014c7defc8 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -580,8 +580,8 @@ static void StreamOut(
}
}
- UPDATE_STAT(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
- UPDATE_STAT(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
+ UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
+ UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
RDTSC_STOP(FEStreamout, 1, 0);
}
@@ -843,8 +843,8 @@ static void GeometryShaderStage(
}
// update GS pipeline stats
- UPDATE_STAT(GsInvocations, numInputPrims * pState->instanceCount);
- UPDATE_STAT(GsPrimitives, totalPrimsGenerated);
+ UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
+ UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
RDTSC_STOP(FEGeometryShader, 1, 0);
}
@@ -1009,7 +1009,7 @@ static void TessellationStages(
state.pfnHsFunc(GetPrivateState(pDC), &hsContext);
RDTSC_STOP(FEHullShader, 0, 0);
- UPDATE_STAT(HsInvocations, numPrims);
+ UPDATE_STAT_FE(HsInvocations, numPrims);
const uint32_t* pPrimId = (const uint32_t*)&primID;
@@ -1065,7 +1065,7 @@ static void TessellationStages(
dsInvocations += KNOB_SIMD_WIDTH;
}
- UPDATE_STAT(DsInvocations, tsData.NumDomainPoints);
+ UPDATE_STAT_FE(DsInvocations, tsData.NumDomainPoints);
PA_TESS tessPa(
pDC,
@@ -1302,7 +1302,7 @@ void ProcessDraw(
*pvCutIndices = _simd_movemask_ps(_simd_castsi_ps(fetchInfo.CutMask));
}
- UPDATE_STAT(IaVertices, GetNumInvocations(i, endVertex));
+ UPDATE_STAT_FE(IaVertices, GetNumInvocations(i, endVertex));
#if KNOB_ENABLE_TOSS_POINTS
if (!KNOB_TOSS_FETCH)
@@ -1312,7 +1312,7 @@ void ProcessDraw(
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext);
RDTSC_STOP(FEVertexShader, 0, 0);
- UPDATE_STAT(VsInvocations, GetNumInvocations(i, endVertex));
+ UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
}
}
@@ -1335,7 +1335,7 @@ void ProcessDraw(
{
if (assemble)
{
- UPDATE_STAT(IaPrimitives, pa.NumPrims());
+ UPDATE_STAT_FE(IaPrimitives, pa.NumPrims());
if (HasTessellationT::value)
{
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index fdf5d7ef45c..988de75f4d5 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -564,17 +564,27 @@ struct SWR_STATS
uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
// Pipeline Stats
+ uint64_t PsInvocations; // Number of Pixel Shader invocations
+ uint64_t CsInvocations; // Number of Compute Shader invocations
+
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// SWR_STATS
+///
+/// @brief All statistics generated by FE.
+/////////////////////////////////////////////////////////////////////////
+struct SWR_STATS_FE
+{
uint64_t IaVertices; // Number of Fetch Shader vertices
uint64_t IaPrimitives; // Number of PA primitives.
uint64_t VsInvocations; // Number of Vertex Shader invocations
uint64_t HsInvocations; // Number of Hull Shader invocations
uint64_t DsInvocations; // Number of Domain Shader invocations
uint64_t GsInvocations; // Number of Geometry Shader invocations
- uint64_t PsInvocations; // Number of Pixel Shader invocations
- uint64_t CsInvocations; // Number of Compute Shader invocations
+ uint64_t GsPrimitives; // Number of prims GS outputs.
uint64_t CInvocations; // Number of clipper invocations
uint64_t CPrimitives; // Number of clipper primitives.
- uint64_t GsPrimitives; // Number of prims GS outputs.
// Streamout Stats
uint64_t SoPrimStorageNeeded[4];
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index fb17af15203..dce23b2486e 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -322,23 +322,9 @@ INLINE void UpdateClientStats(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
{
stats.DepthPassCount += dynState.stats[i].DepthPassCount;
- stats.IaVertices += dynState.stats[i].IaVertices;
- stats.IaPrimitives += dynState.stats[i].IaPrimitives;
- stats.VsInvocations += dynState.stats[i].VsInvocations;
- stats.HsInvocations += dynState.stats[i].HsInvocations;
- stats.DsInvocations += dynState.stats[i].DsInvocations;
- stats.GsInvocations += dynState.stats[i].GsInvocations;
+
stats.PsInvocations += dynState.stats[i].PsInvocations;
- stats.CInvocations += dynState.stats[i].CInvocations;
stats.CsInvocations += dynState.stats[i].CsInvocations;
- stats.CPrimitives += dynState.stats[i].CPrimitives;
- stats.GsPrimitives += dynState.stats[i].GsPrimitives;
-
- for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
- {
- stats.SoPrimStorageNeeded[stream] += dynState.stats[i].SoPrimStorageNeeded[stream];
- stats.SoNumPrimsWritten[stream] += dynState.stats[i].SoNumPrimsWritten[stream];
- }
}
pContext->pfnUpdateStats(GetPrivateState(pDC), &stats);
@@ -560,6 +546,11 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
{
_ReadWriteBarrier();
+ if (pContext->pfnUpdateStatsFE && GetApiState(pDC).enableStats)
+ {
+ pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &pDC->dynState.statsFE);
+ }
+
if (pContext->pfnUpdateSoWriteOffset)
{
for (uint32_t i = 0; i < MAX_SO_BUFFERS; ++i)
diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
index 53d2b93089b..15e60cddf0a 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -355,15 +355,29 @@ swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats)
struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;
SWR_STATS *pSwrStats = &ctx->stats;
+
pSwrStats->DepthPassCount += pStats->DepthPassCount;
+ pSwrStats->PsInvocations += pStats->PsInvocations;
+ pSwrStats->CsInvocations += pStats->CsInvocations;
+}
+
+static void
+swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats)
+{
+ swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+
+ if (!pDC)
+ return;
+
+ struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;
+
+ SWR_STATS_FE *pSwrStats = &ctx->statsFE;
pSwrStats->IaVertices += pStats->IaVertices;
pSwrStats->IaPrimitives += pStats->IaPrimitives;
pSwrStats->VsInvocations += pStats->VsInvocations;
pSwrStats->HsInvocations += pStats->HsInvocations;
pSwrStats->DsInvocations += pStats->DsInvocations;
pSwrStats->GsInvocations += pStats->GsInvocations;
- pSwrStats->PsInvocations += pStats->PsInvocations;
- pSwrStats->CsInvocations += pStats->CsInvocations;
pSwrStats->CInvocations += pStats->CInvocations;
pSwrStats->CPrimitives += pStats->CPrimitives;
pSwrStats->GsPrimitives += pStats->GsPrimitives;
@@ -389,6 +403,7 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
createInfo.pfnStoreTile = swr_StoreHotTile;
createInfo.pfnClearTile = swr_StoreHotTileClear;
createInfo.pfnUpdateStats = swr_UpdateStats;
+ createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE;
ctx->swrContext = SwrCreateContext(&createInfo);
/* Init Load/Store/ClearTiles Tables */
diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
index 4133720cbf0..b4553fb171b 100644
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -159,6 +159,7 @@ struct swr_context {
struct swr_draw_context swrDC;
SWR_STATS stats;
+ SWR_STATS_FE statsFE;
unsigned dirty; /**< Mask of SWR_NEW_x flags */
};
diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp
index 35d0e53fb23..c51c529e5f3 100644
--- a/src/gallium/drivers/swr/swr_query.cpp
+++ b/src/gallium/drivers/swr/swr_query.cpp
@@ -94,6 +94,7 @@ swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
/* TODO: should fence instead of stalling pipeline */
SwrWaitForIdle(ctx->swrContext);
memcpy(&result->core, &ctx->stats, sizeof(result->core));
+ memcpy(&result->coreFE, &ctx->statsFE, sizeof(result->coreFE));
#if 0
if (!pq->fence) {
@@ -150,17 +151,17 @@ swr_get_query_result(struct pipe_context *pipe,
result->u64 = end->timestamp - start->timestamp;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
- result->u64 = end->core.IaPrimitives - start->core.IaPrimitives;
+ result->u64 = end->coreFE.IaPrimitives - start->coreFE.IaPrimitives;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
- result->u64 = end->core.SoNumPrimsWritten[index]
- - start->core.SoNumPrimsWritten[index];
+ result->u64 = end->coreFE.SoNumPrimsWritten[index]
+ - start->coreFE.SoNumPrimsWritten[index];
break;
/* Structures */
case PIPE_QUERY_SO_STATISTICS: {
struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
- struct SWR_STATS *start = &pq->start.core;
- struct SWR_STATS *end = &pq->end.core;
+ struct SWR_STATS_FE *start = &pq->start.coreFE;
+ struct SWR_STATS_FE *end = &pq->end.coreFE;
so_stats->num_primitives_written =
end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
so_stats->primitives_storage_needed =
@@ -176,21 +177,23 @@ swr_get_query_result(struct pipe_context *pipe,
&result->pipeline_statistics;
struct SWR_STATS *start = &pq->start.core;
struct SWR_STATS *end = &pq->end.core;
- p_stats->ia_vertices = end->IaVertices - start->IaVertices;
- p_stats->ia_primitives = end->IaPrimitives - start->IaPrimitives;
- p_stats->vs_invocations = end->VsInvocations - start->VsInvocations;
- p_stats->gs_invocations = end->GsInvocations - start->GsInvocations;
- p_stats->gs_primitives = end->GsPrimitives - start->GsPrimitives;
- p_stats->c_invocations = end->CPrimitives - start->CPrimitives;
- p_stats->c_primitives = end->CPrimitives - start->CPrimitives;
+ struct SWR_STATS_FE *startFE = &pq->start.coreFE;
+ struct SWR_STATS_FE *endFE = &pq->end.coreFE;
+ p_stats->ia_vertices = endFE->IaVertices - startFE->IaVertices;
+ p_stats->ia_primitives = endFE->IaPrimitives - startFE->IaPrimitives;
+ p_stats->vs_invocations = endFE->VsInvocations - startFE->VsInvocations;
+ p_stats->gs_invocations = endFE->GsInvocations - startFE->GsInvocations;
+ p_stats->gs_primitives = endFE->GsPrimitives - startFE->GsPrimitives;
+ p_stats->c_invocations = endFE->CPrimitives - startFE->CPrimitives;
+ p_stats->c_primitives = endFE->CPrimitives - startFE->CPrimitives;
p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;
- p_stats->hs_invocations = end->HsInvocations - start->HsInvocations;
- p_stats->ds_invocations = end->DsInvocations - start->DsInvocations;
+ p_stats->hs_invocations = endFE->HsInvocations - startFE->HsInvocations;
+ p_stats->ds_invocations = endFE->DsInvocations - startFE->DsInvocations;
p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;
} break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
- struct SWR_STATS *start = &pq->start.core;
- struct SWR_STATS *end = &pq->end.core;
+ struct SWR_STATS_FE *start = &pq->start.coreFE;
+ struct SWR_STATS_FE *end = &pq->end.coreFE;
uint64_t num_primitives_written =
end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
uint64_t primitives_storage_needed =
diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h
index 0ab034d397f..931d687b005 100644
--- a/src/gallium/drivers/swr/swr_query.h
+++ b/src/gallium/drivers/swr/swr_query.h
@@ -29,6 +29,7 @@
struct swr_query_result {
SWR_STATS core;
+ SWR_STATS_FE coreFE;
uint64_t timestamp;
};