summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr/rasterizer
diff options
context:
space:
mode:
authorJan Zielinski <[email protected]>2019-08-01 14:30:58 +0200
committerJan Zielinski <[email protected]>2019-08-08 11:15:07 +0200
commit387599a66181958e28483b8819e647d60b7158e8 (patch)
tree5d4412d1b475b09eeeeb525ffd10c42629c7df24 /src/gallium/drivers/swr/rasterizer
parentff75c35846535baf6ff2150b18089a58fd156bbe (diff)
swr/rasterizer: Refactor events collection mechanism
Several improvements and cleanups in events and statstics mechanisms Reviewed-by: Alok Hota <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer')
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp126
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/events.proto480
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/events_private.proto48
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py108
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py28
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp20
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp4
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp2
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h1
10 files changed, 382 insertions, 439 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
index 03df614da2a..c1d3f2d6138 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
@@ -87,81 +87,6 @@ namespace ArchRast
uint32_t alphaBlendCount = 0;
};
- struct MemoryStats
- {
- struct MemoryTrackerKey
- {
- uint64_t address;
- uint64_t mask;
- };
-
- struct MemoryTrackerData
- {
- uint32_t accessCountRead;
- uint32_t accessCountWrite;
- uint32_t totalSizeRead;
- uint32_t totalSizeWrite;
- uint64_t tscMin;
- uint64_t tscMax;
- };
-
- struct AddressRangeComparator
- {
- bool operator()(MemoryTrackerKey a, MemoryTrackerKey b) const
- {
- return (a.address & a.mask) < (b.address & b.mask);
- }
- };
-
- typedef std::map<MemoryTrackerKey, MemoryTrackerData, AddressRangeComparator> MemoryTrackerMap;
- MemoryTrackerMap trackedMemory = {};
-
- void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc, uint32_t size)
- {
- MemoryTrackerKey key;
- key.address = address;
- key.mask = addressMask;
-
- MemoryTrackerMap::iterator i = trackedMemory.lower_bound(key);
- if (i != trackedMemory.end() && !(trackedMemory.key_comp()(key, i->first)))
- {
- // already in map
- if (isRead)
- {
- i->second.accessCountRead++;
- i->second.totalSizeRead += size;
- }
- else
- {
- i->second.accessCountWrite++;
- i->second.totalSizeWrite += size;
- }
- i->second.tscMax = tsc;
- }
- else
- {
- // new entry
- MemoryTrackerData data;
- if (isRead)
- {
- data.accessCountRead = 1;
- data.totalSizeRead = size;
- data.accessCountWrite = 0;
- data.totalSizeWrite = 0;
- }
- else
- {
- data.accessCountRead = 0;
- data.totalSizeRead = 0;
- data.accessCountWrite = 1;
- data.totalSizeWrite = size;
- }
- data.tscMin = tsc;
- data.tscMax = tsc;
- trackedMemory.insert(i, MemoryTrackerMap::value_type(key, data));
- }
- }
- };
//////////////////////////////////////////////////////////////////////////
/// @brief Event handler that handles API thread events. This is shared
@@ -258,17 +183,6 @@ namespace ArchRast
EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false)
{
memset(mShaderStats, 0, sizeof(mShaderStats));
-
- // compute address mask for memory tracking
- mAddressMask = 0;
- uint64_t addressRangeBytes = 4096;
- while (addressRangeBytes > 0)
- {
- mAddressMask = (mAddressMask << 1) | 1;
- addressRangeBytes = addressRangeBytes >> 1;
- }
- mMemGranularity = mAddressMask + 1;
- mAddressMask = ~mAddressMask;
}
virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
@@ -674,42 +588,6 @@ namespace ArchRast
mGS = {};
}
- virtual void Handle(const MemoryAccessEvent& event)
- {
- uint64_t trackAddr = event.data.ptr;
- uint64_t nextAddr = (trackAddr & mAddressMask);
- uint32_t sizeTracked = 0;
-
- while (sizeTracked < event.data.size)
- {
- nextAddr += mMemGranularity;
- uint32_t size = nextAddr - trackAddr;
- size = std::min(event.data.size, size);
- mMemoryStats.TrackMemoryAccess(trackAddr, mAddressMask, event.data.isRead, event.data.tsc, size);
- sizeTracked += size;
- trackAddr = nextAddr;
- }
- }
-
- virtual void Handle(const MemoryStatsEndEvent& event)
- {
- MemoryStats::MemoryTrackerMap::iterator i = mMemoryStats.trackedMemory.begin();
- while (i != mMemoryStats.trackedMemory.end())
- {
- MemoryStatsEvent mse(event.data.drawId,
- i->first.address & mAddressMask,
- i->second.accessCountRead,
- i->second.accessCountWrite,
- i->second.totalSizeRead,
- i->second.totalSizeWrite,
- i->second.tscMin,
- i->second.tscMax);
- EventHandlerFile::Handle(mse);
- i++;
- }
- mMemoryStats.trackedMemory.clear();
- }
-
virtual void Handle(const GSPrimInfo& event)
{
mGS.inputPrimCount += event.data.inputPrimCount;
@@ -756,10 +634,6 @@ namespace ArchRast
SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES];
- MemoryStats mMemoryStats = {};
- uint64_t mAddressMask = 0;
- uint64_t mMemGranularity = 0;
-
};
static EventManager* FromHandle(HANDLE hThreadContext)
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
index 8a6093f29be..a53089386f6 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
@@ -37,449 +37,391 @@ event Framework::ThreadStartWorkerEvent
{
};
-event SwrApi::DrawInfoEvent
+///@brief Used as a helper event to indicate end of frame. Does not gaurantee to capture end of frame on all APIs
+event ApiSwr::FrameEndEvent
{
- uint32_t drawId;
- AR_DRAW_TYPE type;
- uint32_t topology;
- uint32_t numVertices;
- uint32_t numIndices;
- int32_t indexOffset;
- int32_t baseVertex;
- uint32_t numInstances;
- uint32_t startInstance;
- uint32_t tsEnable;
- uint32_t gsEnable;
- uint32_t soEnable;
- uint32_t soTopology;
- uint32_t splitId; // Split draw count or id.
+ uint32_t frameId; // current frame id
+ uint32_t nextDrawId; // next draw id (always incremental - does not reset)
};
-event SwrApi::DispatchEvent
+///@brief Synchonization event.
+event ApiSwr::SwrSyncEvent
{
uint32_t drawId;
- uint32_t threadGroupCountX;
- uint32_t threadGroupCountY;
- uint32_t threadGroupCountZ;
};
-event SwrApi::FrameEndEvent
+///@brief Invalidate hot tiles (i.e. tile cache)
+event ApiSwr::SwrInvalidateTilesEvent
{
- uint32_t frameId;
- uint32_t nextDrawId;
+ uint32_t drawId;
};
-///@brief API Stat: Synchonization event.
-event SwrApi::SwrSyncEvent
+///@brief Invalidate and discard hot tiles within pixel region
+event ApiSwr::SwrDiscardRectEvent
{
uint32_t drawId;
};
-///@brief API Stat: Invalidate hot tiles (i.e. tile cache)
-event SwrApi::SwrInvalidateTilesEvent
+///@brief Flush tiles out to memory that is typically owned by driver (e.g. Flush RT cache)
+event ApiSwr::SwrStoreTilesEvent
{
uint32_t drawId;
};
-///@brief API Stat: Invalidate and discard hot tiles within pixel region
-event SwrApi::SwrDiscardRectEvent
+event PipelineStats::DrawInfoEvent
{
uint32_t drawId;
+ AR_DRAW_TYPE type; // type of draw (indexed, instanced, etc)
+ uint32_t topology; // topology of draw
+ uint32_t numVertices; // number of vertices for draw
+ uint32_t numIndices; // number of indices for draw
+ int32_t indexOffset; // offset into index buffer
+ int32_t baseVertex; // which vertex to start with
+ uint32_t numInstances; // number of instances to draw
+ uint32_t startInstance; // which instance to start fetching
+ uint32_t tsEnable; // tesselation enabled
+ uint32_t gsEnable; // geometry shader enabled
+ uint32_t soEnable; // stream-out enabled
+ uint32_t soTopology; // topology of stream-out
+ uint32_t splitId; // split draw count or id
};
-///@brief API Stat: Flush tiles out to memory that is typically owned by driver (e.g. Flush RT cache)
-event SwrApi::SwrStoreTilesEvent
+event PipelineStats::DispatchEvent
{
uint32_t drawId;
+ uint32_t threadGroupCountX; // num thread groups in X dimension
+ uint32_t threadGroupCountY; // num thread groups in Y dimension
+ uint32_t threadGroupCountZ; // num thread groups in Z dimension
};
-event Pipeline::FrontendStatsEvent
+event PipelineStats::FrontendStatsEvent
{
uint32_t drawId;
- uint64_t counter IaVertices;
- uint64_t counter IaPrimitives;
- uint64_t counter VsInvocations;
- uint64_t counter HsInvocations;
- uint64_t counter DsInvocations;
- uint64_t counter GsInvocations;
- uint64_t counter GsPrimitives;
- uint64_t counter CInvocations;
- uint64_t counter CPrimitives;
- uint64_t counter SoPrimStorageNeeded0;
- uint64_t counter SoPrimStorageNeeded1;
- uint64_t counter SoPrimStorageNeeded2;
- uint64_t counter SoPrimStorageNeeded3;
- uint64_t counter SoNumPrimsWritten0;
- uint64_t counter SoNumPrimsWritten1;
- uint64_t counter SoNumPrimsWritten2;
- uint64_t counter SoNumPrimsWritten3;
+ uint64_t IaVertices;
+ uint64_t IaPrimitives;
+ uint64_t VsInvocations;
+ uint64_t HsInvocations;
+ uint64_t DsInvocations;
+ uint64_t GsInvocations;
+ uint64_t GsPrimitives;
+ uint64_t CInvocations;
+ uint64_t CPrimitives;
+ uint64_t SoPrimStorageNeeded0;
+ uint64_t SoPrimStorageNeeded1;
+ uint64_t SoPrimStorageNeeded2;
+ uint64_t SoPrimStorageNeeded3;
+ uint64_t SoNumPrimsWritten0;
+ uint64_t SoNumPrimsWritten1;
+ uint64_t SoNumPrimsWritten2;
+ uint64_t SoNumPrimsWritten3;
};
-event Pipeline::BackendStatsEvent
+event PipelineStats::BackendStatsEvent
{
uint32_t drawId;
- uint64_t counter DepthPassCount;
- uint64_t counter PsInvocations;
- uint64_t counter CsInvocations;
+ uint64_t DepthPassCount;
+ uint64_t PsInvocations;
+ uint64_t CsInvocations;
};
-event Pipeline::EarlyZSingleSample
+event PipelineStats::EarlyZSingleSample
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::LateZSingleSample
+event PipelineStats::LateZSingleSample
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::EarlyStencilSingleSample
+event PipelineStats::EarlyStencilSingleSample
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::LateStencilSingleSample
+event PipelineStats::LateStencilSingleSample
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::EarlyZSampleRate
+event PipelineStats::EarlyZSampleRate
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::LateZSampleRate
+event PipelineStats::LateZSampleRate
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::EarlyStencilSampleRate
+event PipelineStats::EarlyStencilSampleRate
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::LateStencilSampleRate
+event PipelineStats::LateStencilSampleRate
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
// Total Early-Z counts, SingleSample and SampleRate
-event Pipeline::EarlyZ
+event PipelineStats::EarlyZ
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
// Total LateZ counts, SingleSample and SampleRate
-event Pipeline::LateZ
+event PipelineStats::LateZ
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
// Total EarlyStencil counts, SingleSample and SampleRate
-event Pipeline::EarlyStencil
+event PipelineStats::EarlyStencil
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
// Total LateStencil counts, SingleSample and SampleRate
-event Pipeline::LateStencil
+event PipelineStats::LateStencil
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::EarlyZNullPS
+event PipelineStats::EarlyZNullPS
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::EarlyStencilNullPS
+event PipelineStats::EarlyStencilNullPS
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::EarlyZPixelRate
+event PipelineStats::EarlyZPixelRate
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::LateZPixelRate
+event PipelineStats::LateZPixelRate
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::EarlyOmZ
+event PipelineStats::EarlyOmZ
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::EarlyOmStencil
+event PipelineStats::EarlyOmStencil
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::LateOmZ
+event PipelineStats::LateOmZ
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::LateOmStencil
+event PipelineStats::LateOmStencil
{
uint32_t drawId;
- uint64_t counter passCount;
- uint64_t counter failCount;
+ uint64_t passCount;
+ uint64_t failCount;
};
-event Pipeline::GSInputPrims
+event PipelineStats::GSInputPrims
{
uint32_t drawId;
- uint64_t counter inputPrimCount;
+ uint64_t inputPrimCount;
};
-event Pipeline::GSPrimsGen
+event PipelineStats::GSPrimsGen
{
uint32_t drawId;
- uint64_t counter primGeneratedCount;
+ uint64_t primGeneratedCount;
};
-event Pipeline::GSVertsInput
+event PipelineStats::GSVertsInput
{
uint32_t drawId;
- uint64_t counter vertsInput;
+ uint64_t vertsInput;
};
-event Pipeline::TessPrims
+event PipelineStats::TessPrims
{
uint32_t drawId;
- uint64_t counter primCount;
+ uint64_t primCount;
};
-event Pipeline::RasterTiles
+event PipelineStats::RasterTiles
{
uint32_t drawId;
- uint32_t counter rastTileCount;
+ uint32_t rastTileCount;
};
-event Pipeline::ClipperEvent
+event PipelineStats::ClipperEvent
{
uint32_t drawId;
- uint32_t counter trivialRejectCount;
- uint32_t counter trivialAcceptCount;
- uint32_t counter mustClipCount;
+ uint32_t trivialRejectCount;
+ uint32_t trivialAcceptCount;
+ uint32_t mustClipCount;
};
-event Pipeline::CullEvent
+event PipelineStats::CullEvent
{
uint32_t drawId;
- uint64_t counter backfacePrimCount;
- uint64_t counter degeneratePrimCount;
+ uint64_t backfacePrimCount;
+ uint64_t degeneratePrimCount;
};
-event Pipeline::AlphaEvent
+event PipelineStats::AlphaEvent
{
uint32_t drawId;
- uint32_t counter alphaTestCount;
- uint32_t counter alphaBlendCount;
+ uint32_t alphaTestCount;
+ uint32_t alphaBlendCount;
};
-event Shader::VSInfo
+event ShaderStats::VSInfo
{
uint32_t drawId;
- uint32_t counter numInstExecuted;
- uint32_t counter numSampleExecuted;
- uint32_t counter numSampleLExecuted;
- uint32_t counter numSampleBExecuted;
- uint32_t counter numSampleCExecuted;
- uint32_t counter numSampleCLZExecuted;
- uint32_t counter numSampleCDExecuted;
- uint32_t counter numGather4Executed;
- uint32_t counter numGather4CExecuted;
- uint32_t counter numGather4CPOExecuted;
- uint32_t counter numGather4CPOCExecuted;
- uint32_t counter numLodExecuted;
+ uint32_t numInstExecuted;
+ uint32_t numSampleExecuted;
+ uint32_t numSampleLExecuted;
+ uint32_t numSampleBExecuted;
+ uint32_t numSampleCExecuted;
+ uint32_t numSampleCLZExecuted;
+ uint32_t numSampleCDExecuted;
+ uint32_t numGather4Executed;
+ uint32_t numGather4CExecuted;
+ uint32_t numGather4CPOExecuted;
+ uint32_t numGather4CPOCExecuted;
+ uint32_t numLodExecuted;
};
-event Shader::HSInfo
+event ShaderStats::HSInfo
{
uint32_t drawId;
- uint32_t counter numInstExecuted;
- uint32_t counter numSampleExecuted;
- uint32_t counter numSampleLExecuted;
- uint32_t counter numSampleBExecuted;
- uint32_t counter numSampleCExecuted;
- uint32_t counter numSampleCLZExecuted;
- uint32_t counter numSampleCDExecuted;
- uint32_t counter numGather4Executed;
- uint32_t counter numGather4CExecuted;
- uint32_t counter numGather4CPOExecuted;
- uint32_t counter numGather4CPOCExecuted;
- uint32_t counter numLodExecuted;
+ uint32_t numInstExecuted;
+ uint32_t numSampleExecuted;
+ uint32_t numSampleLExecuted;
+ uint32_t numSampleBExecuted;
+ uint32_t numSampleCExecuted;
+ uint32_t numSampleCLZExecuted;
+ uint32_t numSampleCDExecuted;
+ uint32_t numGather4Executed;
+ uint32_t numGather4CExecuted;
+ uint32_t numGather4CPOExecuted;
+ uint32_t numGather4CPOCExecuted;
+ uint32_t numLodExecuted;
};
-event Shader::DSInfo
+event ShaderStats::DSInfo
{
uint32_t drawId;
- uint32_t counter numInstExecuted;
- uint32_t counter numSampleExecuted;
- uint32_t counter numSampleLExecuted;
- uint32_t counter numSampleBExecuted;
- uint32_t counter numSampleCExecuted;
- uint32_t counter numSampleCLZExecuted;
- uint32_t counter numSampleCDExecuted;
- uint32_t counter numGather4Executed;
- uint32_t counter numGather4CExecuted;
- uint32_t counter numGather4CPOExecuted;
- uint32_t counter numGather4CPOCExecuted;
- uint32_t counter numLodExecuted;
+ uint32_t numInstExecuted;
+ uint32_t numSampleExecuted;
+ uint32_t numSampleLExecuted;
+ uint32_t numSampleBExecuted;
+ uint32_t numSampleCExecuted;
+ uint32_t numSampleCLZExecuted;
+ uint32_t numSampleCDExecuted;
+ uint32_t numGather4Executed;
+ uint32_t numGather4CExecuted;
+ uint32_t numGather4CPOExecuted;
+ uint32_t numGather4CPOCExecuted;
+ uint32_t numLodExecuted;
};
-event Shader::GSInfo
+event ShaderStats::GSInfo
{
uint32_t drawId;
- uint32_t counter numInstExecuted;
- uint32_t counter numSampleExecuted;
- uint32_t counter numSampleLExecuted;
- uint32_t counter numSampleBExecuted;
- uint32_t counter numSampleCExecuted;
- uint32_t counter numSampleCLZExecuted;
- uint32_t counter numSampleCDExecuted;
- uint32_t counter numGather4Executed;
- uint32_t counter numGather4CExecuted;
- uint32_t counter numGather4CPOExecuted;
- uint32_t counter numGather4CPOCExecuted;
- uint32_t counter numLodExecuted;
+ uint32_t numInstExecuted;
+ uint32_t numSampleExecuted;
+ uint32_t numSampleLExecuted;
+ uint32_t numSampleBExecuted;
+ uint32_t numSampleCExecuted;
+ uint32_t numSampleCLZExecuted;
+ uint32_t numSampleCDExecuted;
+ uint32_t numGather4Executed;
+ uint32_t numGather4CExecuted;
+ uint32_t numGather4CPOExecuted;
+ uint32_t numGather4CPOCExecuted;
+ uint32_t numLodExecuted;
};
-event Shader::PSInfo
+event ShaderStats::PSInfo
{
uint32_t drawId;
- uint32_t counter numInstExecuted;
- uint32_t counter numSampleExecuted;
- uint32_t counter numSampleLExecuted;
- uint32_t counter numSampleBExecuted;
- uint32_t counter numSampleCExecuted;
- uint32_t counter numSampleCLZExecuted;
- uint32_t counter numSampleCDExecuted;
- uint32_t counter numGather4Executed;
- uint32_t counter numGather4CExecuted;
- uint32_t counter numGather4CPOExecuted;
- uint32_t counter numGather4CPOCExecuted;
- uint32_t counter numLodExecuted;
+ uint32_t numInstExecuted;
+ uint32_t numSampleExecuted;
+ uint32_t numSampleLExecuted;
+ uint32_t numSampleBExecuted;
+ uint32_t numSampleCExecuted;
+ uint32_t numSampleCLZExecuted;
+ uint32_t numSampleCDExecuted;
+ uint32_t numGather4Executed;
+ uint32_t numGather4CExecuted;
+ uint32_t numGather4CPOExecuted;
+ uint32_t numGather4CPOCExecuted;
+ uint32_t numLodExecuted;
};
-event Shader::CSInfo
+event ShaderStats::CSInfo
{
uint32_t drawId;
- uint32_t counter numInstExecuted;
- uint32_t counter numSampleExecuted;
- uint32_t counter numSampleLExecuted;
- uint32_t counter numSampleBExecuted;
- uint32_t counter numSampleCExecuted;
- uint32_t counter numSampleCLZExecuted;
- uint32_t counter numSampleCDExecuted;
- uint32_t counter numGather4Executed;
- uint32_t counter numGather4CExecuted;
- uint32_t counter numGather4CPOExecuted;
- uint32_t counter numGather4CPOCExecuted;
- uint32_t counter numLodExecuted;
-};
-
-event SWTagApi::SWTagEndFrameEvent
-{
- uint64_t frameCount;
- uint32_t renderpassCount;
- uint32_t drawOrDispatchCount;
- uint32_t drawCount;
- uint32_t dispatchCount;
-};
-
-event SWTagApi::SWTagRenderpassEvent
-{
- uint64_t frameCount;
- uint32_t renderpassCount;
- uint32_t drawOrDispatchCount;
- uint32_t drawCount;
- uint32_t dispatchCount;
-};
-
-event SWTagApi::SWTagDrawEvent
-{
- uint64_t frameCount;
- uint32_t renderpassCount;
- uint32_t drawOrDispatchCount;
- uint32_t drawCount;
- uint32_t dispatchCount;
+ uint32_t numInstExecuted;
+ uint32_t numSampleExecuted;
+ uint32_t numSampleLExecuted;
+ uint32_t numSampleBExecuted;
+ uint32_t numSampleCExecuted;
+ uint32_t numSampleCLZExecuted;
+ uint32_t numSampleCDExecuted;
+ uint32_t numGather4Executed;
+ uint32_t numGather4CExecuted;
+ uint32_t numGather4CPOExecuted;
+ uint32_t numGather4CPOCExecuted;
+ uint32_t numLodExecuted;
};
-event SWTagApi::SWTagDispatchEvent
-{
- uint64_t frameCount;
- uint32_t renderpassCount;
- uint32_t drawOrDispatchCount;
- uint32_t drawCount;
- uint32_t dispatchCount;
-};
-
-event SWTagApi::SWTagDriverCallEvent
-{
- char cmd[256];
-};
-
-event SWTag::SWTagFlushEvent
-{
- uint32_t count;
- char reason[256];
- uint32_t type;
-};
-
-event Memory::MemoryStatsEvent
-{
- uint32_t drawId;
- uint64_t baseAddr;
- uint32_t accessCountRead;
- uint32_t accessCountWrite;
- uint32_t totalSizeRead;
- uint32_t totalSizeWrite;
- uint64_t tscMin;
- uint64_t tscMax;
-};
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
index da4419a4626..b57d5c4284f 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
@@ -24,68 +24,68 @@
# ArchRast is to not pollute the Rasty code with lots of calculations, etc. that
# are needed to compute per draw statistics, etc.
-event Pipeline::EarlyDepthStencilInfoSingleSample
+event PipelineStats::EarlyDepthStencilInfoSingleSample
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event Pipeline::EarlyDepthStencilInfoSampleRate
+event PipelineStats::EarlyDepthStencilInfoSampleRate
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event Pipeline::EarlyDepthStencilInfoNullPS
+event PipelineStats::EarlyDepthStencilInfoNullPS
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event Pipeline::LateDepthStencilInfoSingleSample
+event PipelineStats::LateDepthStencilInfoSingleSample
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event Pipeline::LateDepthStencilInfoSampleRate
+event PipelineStats::LateDepthStencilInfoSampleRate
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event Pipeline::LateDepthStencilInfoNullPS
+event PipelineStats::LateDepthStencilInfoNullPS
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event Pipeline::EarlyDepthInfoPixelRate
+event PipelineStats::EarlyDepthInfoPixelRate
{
uint64_t depthPassCount;
uint64_t activeLanes;
};
-event Pipeline::LateDepthInfoPixelRate
+event PipelineStats::LateDepthInfoPixelRate
{
uint64_t depthPassCount;
uint64_t activeLanes;
};
-event Pipeline::BackendDrawEndEvent
+event PipelineStats::BackendDrawEndEvent
{
uint32_t drawId;
};
-event Pipeline::FrontendDrawEndEvent
+event PipelineStats::FrontendDrawEndEvent
{
uint32_t drawId;
};
@@ -105,18 +105,18 @@ event Memory::MemoryStatsEndEvent
uint32_t drawId;
};
-event Pipeline::TessPrimCount
+event PipelineStats::TessPrimCount
{
uint64_t primCount;
};
-event Pipeline::RasterTileCount
+event PipelineStats::RasterTileCount
{
uint32_t drawId;
uint64_t rasterTiles;
};
-event Pipeline::GSPrimInfo
+event PipelineStats::GSPrimInfo
{
uint64_t inputPrimCount;
uint64_t primGeneratedCount;
@@ -128,14 +128,14 @@ event Pipeline::GSPrimInfo
// Trivial reject is numInvocations - pop_cnt32(validMask)
// Trivial accept is validMask & ~clipMask
// Must clip count is pop_cnt32(clipMask)
-event Pipeline::ClipInfoEvent
+event PipelineStats::ClipInfoEvent
{
uint32_t numInvocations;
uint32_t validMask;
uint32_t clipMask;
};
-event Pipeline::CullInfoEvent
+event PipelineStats::CullInfoEvent
{
uint32_t drawId;
uint64_t degeneratePrimMask;
@@ -143,14 +143,14 @@ event Pipeline::CullInfoEvent
uint32_t validMask;
};
-event Pipeline::AlphaInfoEvent
+event PipelineStats::AlphaInfoEvent
{
uint32_t drawId;
uint32_t alphaTestEnable;
uint32_t alphaBlendEnable;
};
-event SwrApi::DrawInstancedEvent
+event PipelineStats::DrawInstancedEvent
{
uint32_t drawId;
uint32_t topology;
@@ -165,7 +165,7 @@ event SwrApi::DrawInstancedEvent
uint32_t splitId; // Split draw count or id.
};
-event SwrApi::DrawIndexedInstancedEvent
+event PipelineStats::DrawIndexedInstancedEvent
{
uint32_t drawId;
uint32_t topology;
@@ -181,32 +181,32 @@ event SwrApi::DrawIndexedInstancedEvent
uint32_t splitId; // Split draw count or id.
};
-event Shader::VSStats
+event ShaderStats::VSStats
{
HANDLE hStats; // SWR_SHADER_STATS
};
-event Shader::HSStats
+event ShaderStats::HSStats
{
HANDLE hStats; // SWR_SHADER_STATS
};
-event Shader::DSStats
+event ShaderStats::DSStats
{
HANDLE hStats; // SWR_SHADER_STATS
};
-event Shader::GSStats
+event ShaderStats::GSStats
{
HANDLE hStats; // SWR_SHADER_STATS
};
-event Shader::PSStats
+event ShaderStats::PSStats
{
HANDLE hStats; // SWR_SHADER_STATS
};
-event Shader::CSStats
+event ShaderStats::CSStats
{
HANDLE hStats; // SWR_SHADER_STATS
}; \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py
index 44f2af036b9..140a39bd68b 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py
@@ -39,14 +39,22 @@ def parse_event_fields(lines, idx, event_dict):
line = lines[idx].rstrip()
idx += 1
- match = re.match(r'(\s*)([\w\*]+)(\s+)(counter\s+)*([\w]+)(\[\d+\])*', line)
+ # ex 1: uint32_t numSampleCLZExecuted; // number of sample_cl_z instructions executed
+ # ex 2: char reason[256]; // size of reason
+ match = re.match(r'^(\s*)([\w\*]+)(\s+)([\w]+)(\[\d+\])*;\s*(\/\/.*)*$', line)
+ # group 1 -
+ # group 2 type
+ # group 3 -
+ # group 4 name
+ # group 5 [array size]
+ # group 6 //comment
if match:
field = {
"type": match.group(2),
- "name": match.group(5),
- "size": int(match.group(6)[1:-1]) if match.group(6) else 1,
- "counter": True if match.group(4) else False
+ "name": match.group(4),
+ "size": int(match.group(5)[1:-1]) if match.group(5) else 1,
+ "desc": match.group(6)[2:].strip() if match.group(6) else "",
}
fields.append(field)
@@ -87,6 +95,53 @@ def parse_protos(files, verbose=False):
"""
Parses a proto file and returns a dictionary of event definitions
"""
+
+ # Protos structure:
+ #
+ # {
+ # "events": {
+ # "defs": { // dict of event definitions where keys are 'group_name::event_name"
+ # ...,
+ # "ApiStat::DrawInfoEvent": {
+ # "id": 3,
+ # "group": "ApiStat",
+ # "name": "DrawInfoEvent", // name of event without 'group_name::' prefix
+ # "desc": "",
+ # "fields": [
+ # {
+ # "type": "uint32_t",
+ # "name": "drawId",
+ # "size": 1,
+ # "desc": "",
+ # },
+ # ...
+ # ]
+ # },
+ # ...
+ # },
+ # "groups": { // dict of groups with lists of event keys
+ # "ApiStat": [
+ # "ApiStat::DispatchEvent",
+ # "ApiStat::DrawInfoEvent",
+ # ...
+ # ],
+ # "Framework": [
+ # "Framework::ThreadStartApiEvent",
+ # "Framework::ThreadStartWorkerEvent",
+ # ...
+ # ],
+ # ...
+ # },
+ # "map": { // map of event ids to match archrast output to event key
+ # "1": "Framework::ThreadStartApiEvent",
+ # "2": "Framework::ThreadStartWorkerEvent",
+ # "3": "ApiStat::DrawInfoEvent",
+ # ...
+ # }
+ # },
+ # "enums": { ... } // enums follow similar defs, map (groups?) structure
+ # }
+
protos = {
'events': {
'defs': {}, # event dictionary containing events with their fields
@@ -111,12 +166,29 @@ def parse_protos(files, verbose=False):
with open(filename, 'r') as f:
lines = f.readlines()
-
+ in_brief = False
+ brief = []
idx = 0
while idx < len(lines):
line = lines[idx].strip()
idx += 1
+ # If currently processing a brief, keep processing or change state
+ if in_brief:
+ match = re.match(r'^\s*\/\/\/\s*(.*)$', line) # i.e. "/// more event desc..."
+ if match:
+ brief.append(match.group(1).strip())
+ continue
+ else:
+ in_brief = False
+
+ # Match event/enum brief
+ match = re.match(r'^\s*\/\/\/\s*@(brief|breif)\s*(.*)$', line) # i.e. "///@brief My event desc..."
+ if match:
+ in_brief = True
+ brief.append(match.group(2).strip())
+ continue
+
# Match event definition
match = re.match(r'event(\s*)(((\w*)::){0,1}(\w+))', line) # i.e. "event SWTag::CounterEvent"
if match:
@@ -124,19 +196,27 @@ def parse_protos(files, verbose=False):
# Parse event attributes
event_key = match.group(2) # i.e. SWTag::CounterEvent
- event_group = match.group(4) if match.group(4) else "" # i.e. SWTag
+ event_group = match.group(4) if match.group(4) else "" # i.e. SWTag
event_name = match.group(5) # i.e. CounterEvent
# Define event attributes
event = {
'id': event_id,
'group': event_group,
- 'name': event_name
+ 'name': event_name,
+ 'desc': ' '.join(brief)
}
+ # Add period at end of event desc if necessary
+ if event["desc"] and event["desc"][-1] != '.':
+ event["desc"] += '.'
+
+ # Reset brief
+ brief = []
# Now add event fields
idx = parse_event_fields(lines, idx, event)
+ # Register event and mapping
protos['events']['defs'][event_key] = event
protos['events']['map'][event_id] = event_key
@@ -152,12 +232,20 @@ def parse_protos(files, verbose=False):
# Define enum attr
enum = {
- 'name': enum_name
+ 'name': enum_name,
+ 'desc': ' '.join(brief)
}
+ # Add period at end of event desc if necessary
+ if enum["desc"] and enum["desc"][-1] != '.':
+ enum["desc"] += '.'
+
+ # Reset brief
+ brief = []
# Now add enum fields
idx = parse_enums(lines, idx, enum)
+ # Register enum and mapping
protos['enums']['defs'][enum_name] = enum
protos['enums']['map'][enum_id] = enum_name
@@ -174,10 +262,6 @@ def parse_protos(files, verbose=False):
return protos
-def get_sorted_protos(protos):
- protos["groups"]
-
-
def main():
# Parse args...
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py
index 351587ad5ca..75eae353ae1 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py
@@ -315,6 +315,34 @@ KNOBS = [
'category' : 'perf_adv',
}],
+ ['AR_ENABLE_PIPELINE_STATS', {
+ 'type' : 'bool',
+ 'default' : 'true',
+ 'desc' : ['Enable pipeline stats when using Archrast'],
+ 'category' : 'archrast',
+ }],
+
+ ['AR_ENABLE_SHADER_STATS', {
+ 'type' : 'bool',
+ 'default' : 'true',
+ 'desc' : ['Enable shader stats when using Archrast'],
+ 'category' : 'archrast',
+ }],
+
+ ['AR_ENABLE_SWTAG_DATA', {
+ 'type' : 'bool',
+ 'default' : 'false',
+ 'desc' : ['Enable SWTag data when using Archrast'],
+ 'category' : 'archrast',
+ }],
+
+ ['AR_ENABLE_SWR_EVENTS', {
+ 'type' : 'bool',
+ 'default' : 'true',
+ 'desc' : ['Enable internal SWR events when using Archrast'],
+ 'category' : 'archrast',
+ }],
+
['AR_ENABLE_PIPELINE_EVENTS', {
'type' : 'bool',
'default' : 'true',
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp
index 8079b0e187a..3ef99da2249 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp
@@ -36,7 +36,14 @@
#include "common/os.h"
#include "core/state.h"
-<% always_enabled_knob_groups = ['', 'Framework', 'SWTagApi', 'SwrApi'] %>
+<%
+ always_enabled_knob_groups = ['Framework', 'SWTagFramework', 'ApiSwr']
+ group_knob_remap_table = {
+ "ShaderStats": "KNOB_AR_ENABLE_SHADER_STATS",
+ "PipelineStats" : "KNOB_AR_ENABLE_PIPELINE_STATS",
+ "SWTagData" : "KNOB_AR_ENABLE_SWTAG_DATA",
+ }
+%>
namespace ArchRast
{
<% sorted_enums = sorted(protos['enums']['defs']) %>
@@ -57,10 +64,12 @@ namespace ArchRast
//////////////////////////////////////////////////////////////////////////
struct Event
{
+ const uint32_t eventId = {0xFFFFFFFF};
Event() {}
virtual ~Event() {}
virtual bool IsEnabled() const { return true; };
+ virtual const uint32_t GetEventId() const = 0;
virtual void Accept(EventHandler* pHandler) const = 0;
};
@@ -94,6 +103,7 @@ namespace ArchRast
struct ${event['name']} : Event
{<%
fields = event['fields'] %>
+ const uint32_t eventId = {${ event['id'] }};
${event['name']}Data data;
// Constructor
@@ -135,8 +145,14 @@ namespace ArchRast
}
virtual void Accept(EventHandler* pHandler) const;
+ inline const uint32_t GetEventId() const { return eventId; }
% if group not in always_enabled_knob_groups:
- <% group_knob_define = 'KNOB_AR_ENABLE_' + group.upper() + '_EVENTS' %>
+ <%
+ if group in group_knob_remap_table:
+ group_knob_define = group_knob_remap_table[group]
+ else:
+ group_knob_define = 'KNOB_AR_ENABLE_' + group.upper() + '_EVENTS'
+ %>
bool IsEnabled() const
{
static const bool IsEventEnabled = true; // TODO: Replace with knob for each event
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
index 3f85c88bd7a..6e9fdb52a74 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
@@ -147,9 +147,9 @@ namespace ArchRast
virtual void Handle(const ${event['name']}& event)
{
% if event['num_fields'] == 0:
- Write(${event['id']}, (char*)&event.data, 0);
+ Write(event.eventId, (char*)&event.data, 0);
% else:
- Write(${event['id']}, (char*)&event.data, sizeof(event.data));
+ Write(event.eventId, (char*)&event.data, sizeof(event.data));
% endif
}
% endfor
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
index ba1ad5effee..1ef83ad10d1 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
@@ -67,7 +67,7 @@ void KnobBase::autoExpandEnvironmentVariables(std::string& text)
#else
{
// unix style variable replacement
- static std::regex env("\\$\\{([^}]+)\\}");
+ static std::regex env("\\$\\{([^}]+?)\\}");
std::smatch match;
while (std::regex_search(text, match, env))
{
@@ -79,7 +79,7 @@ void KnobBase::autoExpandEnvironmentVariables(std::string& text)
}
{
// win32 style variable replacement
- static std::regex env("\\%([^}]+)\\%");
+ static std::regex env("%([^%]+?)%");
std::smatch match;
while (std::regex_search(text, match, env))
{
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 3090a249692..a0ddd96c61f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -458,8 +458,6 @@ INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId,
{
ExecuteCallbacks(pContext, workerId, pDC);
- // Report accumulated memory access stats
- AR_EVENT(MemoryStatsEndEvent(pDC->drawId));
// Cleanup memory allocations
pDC->pArena->Reset(true);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
index d3c732af042..e0bb75cdec9 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
@@ -25,6 +25,7 @@
* @brief Include file for llvm passes
*
******************************************************************************/
+#pragma once
#include "JitManager.h"
#include "builder.h"