diff options
9 files changed, 105 insertions, 11 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp index 1f87dbabcd0..12dfc0e3372 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp +++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp @@ -73,6 +73,18 @@ namespace ArchRast uint32_t rasterTiles = 0; }; + struct CullStats + { + uint32_t degeneratePrimCount = 0; + uint32_t backfacePrimCount = 0; + }; + + struct AlphaStats + { + uint32_t alphaTestCount = 0; + uint32_t alphaBlendCount = 0; + }; + ////////////////////////////////////////////////////////////////////////// /// @brief Event handler that handles API thread events. This is shared /// between the API and its caller (e.g. driver shim) but typically @@ -280,7 +292,12 @@ namespace ArchRast // Rasterized Subspans EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles)); - //Reset Internal Counters + // Alpha Subspans + EventHandlerFile::Handle(AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount)); + + // Primitive Culling + EventHandlerFile::Handle(CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount)); + mDSSingleSample = {}; mDSSampleRate = {}; mDSCombined = {}; @@ -288,6 +305,8 @@ namespace ArchRast mDSNullPS = {}; rastStats = {}; + mCullStats = {}; + mAlphaStats = {}; mNeedFlush = false; } @@ -327,6 +346,18 @@ namespace ArchRast rastStats.rasterTiles += event.data.rasterTiles; } + virtual void Handle(const CullInfoEvent& event) + { + mCullStats.degeneratePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask)); + mCullStats.backfacePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask)); + } + + virtual void Handle(const AlphaInfoEvent& event) + { + mAlphaStats.alphaTestCount += event.data.alphaTestEnable; + mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable; + } + protected: bool mNeedFlush; // Per draw stats @@ -340,6 +371,8 @@ namespace ArchRast TEStats mTS = {}; GSStats mGS = {}; RastStats rastStats = {}; + CullStats mCullStats = {}; + AlphaStats mAlphaStats = {}; }; diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto index 7d9a68d502e..deb0373bf5a 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto @@ -180,6 +180,7 @@ event LateStencilSampleRate uint64_t failCount; }; +// Total Early-Z counts, SingleSample and SampleRate event EarlyZ { uint32_t drawId; @@ -187,6 +188,7 @@ event EarlyZ uint64_t failCount; }; +// Total LateZ counts, SingleSample and SampleRate event LateZ { uint32_t drawId; @@ -194,6 +196,7 @@ event LateZ uint64_t failCount; }; +// Total EarlyStencil counts, SingleSample and SampleRate event EarlyStencil { uint32_t drawId; @@ -201,6 +204,7 @@ event EarlyStencil uint64_t failCount; }; +// Total LateStencil counts, SingleSample and SampleRate event LateStencil { uint32_t drawId; @@ -302,3 +306,18 @@ event ClipperEvent uint32_t trivialAcceptCount; uint32_t mustClipCount; }; + +event CullEvent +{ + uint32_t drawId; + uint64_t backfacePrimCount; + uint64_t degeneratePrimCount; +}; + +event AlphaEvent +{ + uint32_t drawId; + uint32_t alphaTestCount; + uint32_t alphaBlendCount; +}; + diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto index f0a93107e5e..37593befbca 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto @@ -120,6 +120,21 @@ event ClipInfoEvent uint32_t clipMask; }; +event CullInfoEvent +{ + uint32_t drawId; + uint64_t degeneratePrimMask; + uint64_t backfacePrimMask; + uint32_t validMask; +}; + +event AlphaInfoEvent +{ + uint32_t drawId; + uint32_t alphaTestEnable; + uint32_t alphaBlendEnable; +}; + event DrawInstancedEvent { uint32_t drawId; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h index 8c539e31dc6..dd349a1d3c8 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h @@ -709,8 +709,8 @@ static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_P } // Merge Output to 4x2 SIMD Tile Format -INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, - const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask) +INLINE void OutputMerger4x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, + const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, uint32_t workerId) { // type safety guaranteed from template instantiation in BEChooser<>::GetFunc const uint32_t rasterTileColorOffset = RasterTileColorOffset(sample); @@ -747,6 +747,9 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW } } + // Track alpha events + AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended)); + // final write mask simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask)); @@ -777,8 +780,8 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW #if USE_8x2_TILE_BACKEND // Merge Output to 8x2 SIMD16 Tile Format -INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, - const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset) +INLINE void OutputMerger8x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, + const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset, uint32_t workerId) { // type safety guaranteed from template instantiation in BEChooser<>::GetFunc uint32_t rasterTileColorOffset = RasterTileColorOffset(sample); @@ -836,6 +839,9 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW } } + // Track alpha events + AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended)); + // final write mask simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask)); @@ -1003,9 +1009,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t // broadcast the results of the PS to all passing pixels #if USE_8x2_TILE_BACKEND - OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset); + OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset, workerId); #else // USE_8x2_TILE_BACKEND - OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask); + OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, workerId); #endif // USE_8x2_TILE_BACKEND if(!state.psState.forceEarlyZ && !T::bForcedSampleCount) diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp index 16418f7dc87..4982025a781 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp @@ -196,9 +196,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ // output merger RDTSC_BEGIN(BEOutputMerger, pDC->drawId); #if USE_8x2_TILE_BACKEND - OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset); + OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId); #else - OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask); + OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId); #endif // do final depth write after all pixel kills diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp index 4cc1ed5266b..452fba13edf 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp @@ -181,9 +181,9 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 // output merger RDTSC_BEGIN(BEOutputMerger, pDC->drawId); #if USE_8x2_TILE_BACKEND - OutputMerger8x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset); + OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId); #else - OutputMerger4x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask); + OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId, workerId); #endif // do final depth write after all pixel kills diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index c9a37cb17ae..d31fd37095d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -773,6 +773,8 @@ void SIMDCALL BinTrianglesImpl( RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0); } + AR_EVENT(CullInfoEvent(pDC->drawId, cullZeroAreaMask, cullTris, origTriMask)); + /// Note: these variable initializations must stay above any 'goto endBenTriangles' // compute per tri backface uint32_t frontFaceMask = frontWindingTris; diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 8c26ec60a2a..22acbe054a1 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -887,6 +887,8 @@ struct SWR_BLEND_CONTEXT simdvector* result; simdscalari* oMask; simdscalari* pMask; + uint32_t isAlphaTested; + uint32_t isAlphaBlended; }; ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp index 6b7efbfb6d6..912a88fd00d 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp @@ -557,6 +557,8 @@ struct BlendJit : public Builder ppoMask->setName("ppoMask"); Value* ppMask = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pMask }); ppMask->setName("pMask"); + Value* AlphaTest1 = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended }); + ppMask->setName("AlphaTest1"); static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format"); Value* dst[4]; @@ -590,12 +592,22 @@ struct BlendJit : public Builder // alpha test if (state.desc.alphaTestEnable) { + // Gather for archrast stats + STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested }); AlphaTest(state, pBlendState, pSrc0Alpha, ppMask); } + else + { + // Gather for archrast stats + STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested }); + } // color blend if (state.blendState.blendEnable) { + // Gather for archrast stats + STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended }); + // clamp sources Clamp(state.format, src); Clamp(state.format, src1); @@ -647,6 +659,11 @@ struct BlendJit : public Builder STORE(result[i], pResult, { 0, i }); } } + else + { + // Gather for archrast stats + STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended }); + } if(state.blendState.logicOpEnable) { |