diff options
Diffstat (limited to 'src')
6 files changed, 541 insertions, 6 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp index 16b6d3347ac..5bb1c7b337c 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp +++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp @@ -35,16 +35,289 @@ namespace ArchRast { ////////////////////////////////////////////////////////////////////////// + /// @brief struct that keeps track of depth and stencil event information + struct DepthStencilStats + { + uint32_t earlyZTestPassCount = 0; + uint32_t earlyZTestFailCount = 0; + uint32_t lateZTestPassCount = 0; + uint32_t lateZTestFailCount = 0; + uint32_t earlyStencilTestPassCount = 0; + uint32_t earlyStencilTestFailCount = 0; + uint32_t lateStencilTestPassCount = 0; + uint32_t lateStencilTestFailCount = 0; + uint32_t earlyZTestCount = 0; + uint32_t lateZTestCount = 0; + uint32_t earlyStencilTestCount = 0; + uint32_t lateStencilTestCount = 0; + }; + + struct CStats + { + uint32_t clippedVerts = 0; + }; + + struct TEStats + { + uint32_t inputPrims = 0; + //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine. + }; + + struct GSStats + { + uint32_t inputPrimCount; + uint32_t primGeneratedCount; + uint32_t vertsInput; + }; + + ////////////////////////////////////////////////////////////////////////// /// @brief Event handler that saves stat events to event files. This /// handler filters out unwanted events. class EventHandlerStatsFile : public EventHandlerFile { public: + DepthStencilStats DSSingleSample = {}; + DepthStencilStats DSSampleRate = {}; + DepthStencilStats DSPixelRate = {}; + DepthStencilStats DSNullPS = {}; + DepthStencilStats DSOmZ = {}; + CStats CS = {}; + TEStats TS = {}; + GSStats GS = {}; + EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {} // These are events that we're not interested in saving in stats event files. virtual void Handle(Start& event) {} virtual void Handle(End& event) {} + + virtual void Handle(EarlyDepthStencilInfoSingleSample& event) + { + //earlyZ test compute + DSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); + DSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); + DSSingleSample.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask)); + + //earlyStencil test compute + DSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); + DSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); + DSSingleSample.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask)); + + //outputerMerger test compute + DSOmZ.earlyZTestPassCount += DSSingleSample.earlyZTestPassCount; + DSOmZ.earlyZTestFailCount += DSSingleSample.earlyZTestFailCount; + DSOmZ.earlyZTestCount += DSSingleSample.earlyZTestCount; + DSOmZ.earlyStencilTestPassCount += DSSingleSample.earlyStencilTestPassCount; + DSOmZ.earlyStencilTestFailCount += DSSingleSample.earlyStencilTestFailCount; + DSOmZ.earlyStencilTestCount += DSSingleSample.earlyStencilTestCount; + } + + virtual void Handle(EarlyDepthStencilInfoSampleRate& event) + { + //earlyZ test compute + DSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); + DSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); + DSSampleRate.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask)); + + //earlyStencil test compute + DSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); + DSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); + DSSampleRate.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask)); + + //outputerMerger test compute + DSOmZ.earlyZTestPassCount += DSSampleRate.earlyZTestPassCount; + DSOmZ.earlyZTestFailCount += DSSampleRate.earlyZTestFailCount; + DSOmZ.earlyZTestCount += DSSampleRate.earlyZTestCount; + DSOmZ.earlyStencilTestPassCount += DSSampleRate.earlyStencilTestPassCount; + DSOmZ.earlyStencilTestFailCount += DSSampleRate.earlyStencilTestFailCount; + DSOmZ.earlyStencilTestCount += DSSampleRate.earlyStencilTestCount; + } + + virtual void Handle(EarlyDepthStencilInfoNullPS& event) + { + //earlyZ test compute + DSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); + DSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); + DSNullPS.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask)); + + //earlyStencil test compute + DSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); + DSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); + DSNullPS.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask)); + + //outputerMerger test compute + DSOmZ.earlyZTestPassCount += DSNullPS.earlyZTestPassCount; + DSOmZ.earlyZTestFailCount += DSNullPS.earlyZTestFailCount; + DSOmZ.earlyZTestCount += DSNullPS.earlyZTestCount; + DSOmZ.earlyStencilTestPassCount += DSNullPS.earlyStencilTestPassCount; + DSOmZ.earlyStencilTestFailCount += DSNullPS.earlyStencilTestFailCount; + DSOmZ.earlyStencilTestCount += DSNullPS.earlyStencilTestCount; + } + + virtual void Handle(LateDepthStencilInfoSingleSample& event) + { + //lateZ test compute + DSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); + DSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); + DSSingleSample.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask)); + + //lateStencil test compute + DSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); + DSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); + DSSingleSample.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask)); + + //outputerMerger test compute + DSOmZ.lateZTestPassCount += DSSingleSample.lateZTestPassCount; + DSOmZ.lateZTestFailCount += DSSingleSample.lateZTestFailCount; + DSOmZ.lateZTestCount += DSSingleSample.lateZTestCount; + DSOmZ.lateStencilTestPassCount += DSSingleSample.lateStencilTestPassCount; + DSOmZ.lateStencilTestFailCount += DSSingleSample.lateStencilTestFailCount; + DSOmZ.lateStencilTestCount += DSSingleSample.lateStencilTestCount; + } + + virtual void Handle(LateDepthStencilInfoSampleRate& event) + { + //lateZ test compute + DSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); + DSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); + DSSampleRate.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask)); + + //lateStencil test compute + DSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); + DSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); + DSSampleRate.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask)); + + //outputerMerger test compute + DSOmZ.lateZTestPassCount += DSSampleRate.lateZTestPassCount; + DSOmZ.lateZTestFailCount += DSSampleRate.lateZTestFailCount; + DSOmZ.lateZTestCount += DSSampleRate.lateZTestCount; + DSOmZ.lateStencilTestPassCount += DSSampleRate.lateStencilTestPassCount; + DSOmZ.lateStencilTestFailCount += DSSampleRate.lateStencilTestFailCount; + DSOmZ.lateStencilTestCount += DSSampleRate.lateStencilTestCount; + } + + virtual void Handle(LateDepthStencilInfoNullPS& event) + { + //lateZ test compute + DSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); + DSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); + DSNullPS.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask)); + + //lateStencil test compute + DSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); + DSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); + DSNullPS.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask)); + + //outputerMerger test compute + DSOmZ.lateZTestPassCount += DSNullPS.lateZTestPassCount; + DSOmZ.lateZTestFailCount += DSNullPS.lateZTestFailCount; + DSOmZ.lateZTestCount += DSNullPS.lateZTestCount; + DSOmZ.lateStencilTestPassCount += DSNullPS.lateStencilTestPassCount; + DSOmZ.lateStencilTestFailCount += DSNullPS.lateStencilTestFailCount; + DSOmZ.lateStencilTestCount += DSNullPS.lateStencilTestCount; + } + + virtual void Handle(EarlyDepthInfoPixelRate& event) + { + //earlyZ test compute + DSPixelRate.earlyZTestCount += _mm_popcnt_u32(event.data.activeLanes); + DSPixelRate.earlyZTestPassCount += event.data.depthPassCount; + DSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount); + + //outputerMerger test compute + DSOmZ.earlyZTestPassCount += DSPixelRate.earlyZTestPassCount; + DSOmZ.earlyZTestFailCount += DSPixelRate.earlyZTestFailCount; + DSOmZ.earlyZTestCount += DSPixelRate.earlyZTestCount; + } + + + virtual void Handle(LateDepthInfoPixelRate& event) + { + //lateZ test compute + DSPixelRate.lateZTestCount += _mm_popcnt_u32(event.data.activeLanes); + DSPixelRate.lateZTestPassCount += event.data.depthPassCount; + DSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount); + + //outputerMerger test compute + DSOmZ.lateZTestPassCount += DSPixelRate.lateZTestPassCount; + DSOmZ.lateZTestFailCount += DSPixelRate.lateZTestFailCount; + DSOmZ.lateZTestCount += DSPixelRate.lateZTestCount; + + } + + + virtual void Handle(BackendDrawEndEvent& event) + { + //singleSample + EventHandlerFile::Handle(EarlyZSingleSample(event.data.drawId, DSSingleSample.earlyZTestPassCount, DSSingleSample.earlyZTestFailCount, DSSingleSample.earlyZTestCount)); + EventHandlerFile::Handle(LateZSingleSample(event.data.drawId, DSSingleSample.lateZTestPassCount, DSSingleSample.lateZTestFailCount, DSSingleSample.lateZTestCount)); + EventHandlerFile::Handle(EarlyStencilSingleSample(event.data.drawId, DSSingleSample.earlyStencilTestPassCount, DSSingleSample.earlyStencilTestFailCount, DSSingleSample.earlyStencilTestCount)); + EventHandlerFile::Handle(LateStencilSingleSample(event.data.drawId, DSSingleSample.lateStencilTestPassCount, DSSingleSample.lateStencilTestFailCount, DSSingleSample.lateStencilTestCount)); + + //sampleRate + EventHandlerFile::Handle(EarlyZSampleRate(event.data.drawId, DSSampleRate.earlyZTestPassCount, DSSampleRate.earlyZTestFailCount, DSSampleRate.earlyZTestCount)); + EventHandlerFile::Handle(LateZSampleRate(event.data.drawId, DSSampleRate.lateZTestPassCount, DSSampleRate.lateZTestFailCount, DSSampleRate.lateZTestCount)); + EventHandlerFile::Handle(EarlyStencilSampleRate(event.data.drawId, DSSampleRate.earlyStencilTestPassCount, DSSampleRate.earlyStencilTestFailCount, DSSampleRate.earlyStencilTestCount)); + EventHandlerFile::Handle(LateStencilSampleRate(event.data.drawId, DSSampleRate.lateStencilTestPassCount, DSSampleRate.lateStencilTestFailCount, DSSampleRate.lateStencilTestCount)); + + //pixelRate + EventHandlerFile::Handle(EarlyZPixelRate(event.data.drawId, DSPixelRate.earlyZTestPassCount, DSPixelRate.earlyZTestFailCount, DSPixelRate.earlyZTestCount)); + EventHandlerFile::Handle(LateZPixelRate(event.data.drawId, DSPixelRate.lateZTestPassCount, DSPixelRate.lateZTestFailCount, DSPixelRate.lateZTestCount)); + + + //NullPS + EventHandlerFile::Handle(EarlyZNullPS(event.data.drawId, DSNullPS.earlyZTestPassCount, DSNullPS.earlyZTestFailCount, DSNullPS.earlyZTestCount)); + EventHandlerFile::Handle(EarlyStencilNullPS(event.data.drawId, DSNullPS.earlyStencilTestPassCount, DSNullPS.earlyStencilTestFailCount, DSNullPS.earlyStencilTestCount)); + + //OmZ + EventHandlerFile::Handle(EarlyOmZ(event.data.drawId, DSOmZ.earlyZTestPassCount, DSOmZ.earlyZTestFailCount, DSOmZ.earlyZTestCount)); + EventHandlerFile::Handle(EarlyOmStencil(event.data.drawId, DSOmZ.earlyStencilTestPassCount, DSOmZ.earlyStencilTestFailCount, DSOmZ.earlyStencilTestCount)); + EventHandlerFile::Handle(LateOmZ(event.data.drawId, DSOmZ.lateZTestPassCount, DSOmZ.lateZTestFailCount, DSOmZ.lateZTestCount)); + EventHandlerFile::Handle(LateOmStencil(event.data.drawId, DSOmZ.lateStencilTestPassCount, DSOmZ.lateStencilTestFailCount, DSOmZ.lateStencilTestCount)); + + //Reset Internal Counters + DSSingleSample = {}; + DSSampleRate = {}; + DSPixelRate = {}; + DSNullPS = {}; + DSOmZ = {}; + } + + virtual void Handle(FrontendDrawEndEvent& event) + { + //Clipper + EventHandlerFile::Handle(VertsClipped(event.data.drawId, CS.clippedVerts)); + + //Tesselator + EventHandlerFile::Handle(TessPrims(event.data.drawId, TS.inputPrims)); + + //Geometry Shader + EventHandlerFile::Handle(GSInputPrims(event.data.drawId, GS.inputPrimCount)); + EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, GS.primGeneratedCount)); + EventHandlerFile::Handle(GSVertsInput(event.data.drawId, GS.vertsInput)); + + //Reset Internal Counters + CS = {}; + TS = {}; + GS = {}; + } + + virtual void Handle(GSPrimInfo& event) + { + GS.inputPrimCount += event.data.inputPrimCount; + GS.primGeneratedCount += event.data.primGeneratedCount; + GS.vertsInput += event.data.vertsInput; + } + + virtual void Handle(ClipVertexCount& event) + { + CS.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim); + } + + virtual void Handle(TessPrimCount& event) + { + TS.inputPrims += event.data.primCount; + } }; static EventManager* FromHandle(HANDLE hThreadContext) @@ -68,13 +341,11 @@ namespace ArchRast if (type == AR_THREAD::API) { - ThreadStartApiEvent e; - pManager->Dispatch(e); + pHandler->Handle(ThreadStartApiEvent()); } else { - ThreadStartWorkerEvent e; - pManager->Dispatch(e); + pHandler->Handle(ThreadStartWorkerEvent()); } pHandler->MarkHeader(); diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto index 107d7a3b31d..95cb79bd69f 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto @@ -170,3 +170,258 @@ event BackendStatsEvent uint64_t CsInvocations; }; + +event EarlyDepthStencilInfoSingleSample +{ + uint64_t depthPassMask; + uint64_t stencilPassMask; + uint64_t coverageMask; +}; + +event EarlyDepthStencilInfoSampleRate +{ + uint64_t depthPassMask; + uint64_t stencilPassMask; + uint64_t coverageMask; +}; + +event EarlyDepthStencilInfoNullPS +{ + uint64_t depthPassMask; + uint64_t stencilPassMask; + uint64_t coverageMask; +}; + +event LateDepthStencilInfoSingleSample +{ + uint64_t depthPassMask; + uint64_t stencilPassMask; + uint64_t coverageMask; +}; + +event LateDepthStencilInfoSampleRate +{ + uint64_t depthPassMask; + uint64_t stencilPassMask; + uint64_t coverageMask; +}; + +event LateDepthStencilInfoNullPS +{ + uint64_t depthPassMask; + uint64_t stencilPassMask; + uint64_t coverageMask; +}; + +event EarlyDepthInfoPixelRate +{ + uint64_t depthPassCount; + uint64_t activeLanes; +}; + + +event LateDepthInfoPixelRate +{ + uint64_t depthPassCount; + uint64_t activeLanes; +}; + + +event BackendDrawEndEvent +{ + uint32_t drawId; +}; + +event FrontendDrawEndEvent +{ + uint32_t drawId; +}; + +event EarlyZSingleSample +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event LateZSingleSample +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event EarlyStencilSingleSample +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event LateStencilSingleSample +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event EarlyZSampleRate +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event LateZSampleRate +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event EarlyStencilSampleRate +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event LateStencilSampleRate +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event EarlyZNullPS +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event EarlyStencilNullPS +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event EarlyZPixelRate +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event LateZPixelRate +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + + +event EarlyOmZ +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event EarlyOmStencil +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event LateOmZ +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event LateOmStencil +{ + uint32_t drawId; + uint64_t passCount; + uint64_t failCount; + uint64_t testCount; +}; + +event GSPrimInfo +{ + uint64_t inputPrimCount; + uint64_t primGeneratedCount; + uint64_t vertsInput; +}; + +event GSInputPrims +{ + uint32_t drawId; + uint64_t inputPrimCount; +}; + +event GSPrimsGen +{ + uint32_t drawId; + uint64_t primGeneratedCount; +}; + +event GSVertsInput +{ + uint32_t drawId; + uint64_t vertsInput; +}; + +event ClipVertexCount +{ + uint64_t vertsPerPrim; + uint64_t primMask; +}; + +//REMOVE AND REPLACE +event FlushVertClip +{ + uint32_t drawId; +}; + +event VertsClipped +{ + uint32_t drawId; + uint64_t clipCount; +}; + +event TessPrimCount +{ + uint64_t primCount; +}; + +//REMOVE AND REPLACE +event TessPrimFlush +{ + uint32_t drawId; +}; + +event TessPrims +{ + uint32_t drawId; + uint64_t primCount; +};
\ No newline at end of file diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index c5e6b98064d..16c4537b213 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -533,6 +533,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 AR_BEGIN(BEEarlyDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask); + AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); AR_END(BEEarlyDepthTest, 0); // early-exit if no pixels passed depth or earlyZ is forced on @@ -565,6 +566,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 AR_BEGIN(BELateDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask); + AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); AR_END(BELateDepthTest, 0); if (!_simd_movemask_ps(depthPassMask)) @@ -742,6 +744,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ AR_BEGIN(BEEarlyDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); + AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); AR_END(BEEarlyDepthTest, 0); // early-exit if no samples passed depth or earlyZ is forced on. @@ -775,6 +778,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ AR_BEGIN(BELateDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); + AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); AR_END(BELateDepthTest, 0); if (!_simd_movemask_ps(depthPassMask)) @@ -923,6 +927,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t { uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest); UPDATE_STAT_BE(DepthPassCount, depthPassCount); + AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes))); } // if we have no covered samples that passed depth at this point, go to next tile @@ -956,6 +961,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t { uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest); UPDATE_STAT_BE(DepthPassCount, depthPassCount); + AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes))); } // if we have no covered samples that passed depth at this point, skip OM and go to next tile @@ -1134,6 +1140,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, AR_BEGIN(BEEarlyDepthTest, pDC->drawId); simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); + AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ, pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask); AR_END(BEEarlyDepthTest, 0); diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index a208a36a218..c8dce10c9de 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -853,7 +853,7 @@ static void GeometryShaderStage( // update GS pipeline stats UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount); UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated); - + AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim*numInputPrims)); AR_END(FEGeometryShader, 1); } @@ -1027,6 +1027,7 @@ static void TessellationStages( SWR_TS_TESSELLATED_DATA tsData = { 0 }; AR_BEGIN(FETessellation, pDC->drawId); TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData); + AR_EVENT(TessPrimCount(1)); AR_END(FETessellation, 0); if (tsData.NumPrimitives == 0) diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index f7730ffe09f..ee126120697 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -581,6 +581,7 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEX stats.SoPrimStorageNeeded[0], stats.SoPrimStorageNeeded[1], stats.SoPrimStorageNeeded[2], stats.SoPrimStorageNeeded[3], stats.SoNumPrimsWritten[0], stats.SoNumPrimsWritten[1], stats.SoNumPrimsWritten[2], stats.SoNumPrimsWritten[3] )); + AR_EVENT(FrontendDrawEndEvent(pDC->drawId)); pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &stats); } diff --git a/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template b/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template index 95c54426b50..abde3c028bd 100644 --- a/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template +++ b/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template @@ -43,7 +43,7 @@ namespace ArchRast virtual ~EventHandler() {} % for name in protos['event_names']: - virtual void Handle(${name}& event) {} + virtual void Handle(${name}&& event) {} % endfor }; } |