summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2017-02-18 00:29:06 -0800
committerTim Rowley <[email protected]>2017-03-20 18:04:53 -0500
commit2cbac00221606fdda6af839afaf64ef649a73f83 (patch)
treeb09569c2afb64fdd083653e8f867d4d2570bc31f /src/gallium
parent0a36a7cf04d4f0e638e702aee9a03c91cdbc1d1a (diff)
swr: [rasterizer archrast/core/scripts] Fix archrast multithreading issue
Per pixel stats are cached but were not always being flushed as threads moved from one draw context to the next. Added an explicit flush to allow all archrast objects to flush any cached events. Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp52
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/archrast.h1
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h8
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h3
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp2
-rw-r--r--src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template2
6 files changed, 52 insertions, 16 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
index ad4d20c2479..acd0a0f50f3 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
@@ -72,7 +72,7 @@ namespace ArchRast
class EventHandlerStatsFile : public EventHandlerFile
{
public:
- EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {}
+ EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id), mNeedFlush(false) {}
// These are events that we're not interested in saving in stats event files.
virtual void Handle(const Start& event) {}
@@ -87,6 +87,7 @@ namespace ArchRast
//earlyStencil test compute
mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mNeedFlush = true;
}
virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
@@ -98,6 +99,7 @@ namespace ArchRast
//earlyStencil test compute
mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mNeedFlush = true;
}
virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
@@ -109,6 +111,7 @@ namespace ArchRast
//earlyStencil test compute
mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mNeedFlush = true;
}
virtual void Handle(const LateDepthStencilInfoSingleSample& event)
@@ -120,6 +123,7 @@ namespace ArchRast
//lateStencil test compute
mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mNeedFlush = true;
}
virtual void Handle(const LateDepthStencilInfoSampleRate& event)
@@ -131,6 +135,7 @@ namespace ArchRast
//lateStencil test compute
mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mNeedFlush = true;
}
virtual void Handle(const LateDepthStencilInfoNullPS& event)
@@ -142,6 +147,7 @@ namespace ArchRast
//lateStencil test compute
mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mNeedFlush = true;
}
virtual void Handle(const EarlyDepthInfoPixelRate& event)
@@ -149,6 +155,7 @@ namespace ArchRast
//earlyZ test compute
mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+ mNeedFlush = true;
}
@@ -157,38 +164,43 @@ namespace ArchRast
//lateZ test compute
mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
-
+ mNeedFlush = true;
}
- virtual void Handle(const BackendDrawEndEvent& event)
+ // Flush cached events for this draw
+ virtual void FlushDraw(uint32_t drawId)
{
+ if (mNeedFlush == false) return;
+
//singleSample
- EventHandlerFile::Handle(EarlyZSingleSample(event.data.drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
- EventHandlerFile::Handle(LateZSingleSample(event.data.drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
- EventHandlerFile::Handle(EarlyStencilSingleSample(event.data.drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount));
- EventHandlerFile::Handle(LateStencilSingleSample(event.data.drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount));
+ EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
+ EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
+ EventHandlerFile::Handle(EarlyStencilSingleSample(drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount));
+ EventHandlerFile::Handle(LateStencilSingleSample(drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount));
//sampleRate
- EventHandlerFile::Handle(EarlyZSampleRate(event.data.drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
- EventHandlerFile::Handle(LateZSampleRate(event.data.drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
- EventHandlerFile::Handle(EarlyStencilSampleRate(event.data.drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount));
- EventHandlerFile::Handle(LateStencilSampleRate(event.data.drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount));
+ EventHandlerFile::Handle(EarlyZSampleRate(drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
+ EventHandlerFile::Handle(LateZSampleRate(drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
+ EventHandlerFile::Handle(EarlyStencilSampleRate(drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount));
+ EventHandlerFile::Handle(LateStencilSampleRate(drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount));
//pixelRate
- EventHandlerFile::Handle(EarlyZPixelRate(event.data.drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
- EventHandlerFile::Handle(LateZPixelRate(event.data.drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
+ EventHandlerFile::Handle(EarlyZPixelRate(drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
+ EventHandlerFile::Handle(LateZPixelRate(drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
//NullPS
- EventHandlerFile::Handle(EarlyZNullPS(event.data.drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
- EventHandlerFile::Handle(EarlyStencilNullPS(event.data.drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
+ EventHandlerFile::Handle(EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
+ EventHandlerFile::Handle(EarlyStencilNullPS(drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
//Reset Internal Counters
mDSSingleSample = {};
mDSSampleRate = {};
mDSPixelRate = {};
mDSNullPS = {};
+
+ mNeedFlush = false;
}
virtual void Handle(const FrontendDrawEndEvent& event)
@@ -228,7 +240,7 @@ namespace ArchRast
}
protected:
-
+ bool mNeedFlush;
// Per draw stats
DepthStencilStats mDSSingleSample = {};
DepthStencilStats mDSSampleRate = {};
@@ -294,4 +306,12 @@ namespace ArchRast
pManager->Dispatch(event);
}
+ // Flush for this thread.
+ void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
+ {
+ EventManager* pManager = FromHandle(hThreadContext);
+ SWR_ASSERT(pManager != nullptr);
+
+ pManager->FlushDraw(drawId);
+ }
}
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.h b/src/gallium/drivers/swr/rasterizer/archrast/archrast.h
index 4783144fcb5..c0f9d6a8194 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.h
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.h
@@ -43,5 +43,6 @@ namespace ArchRast
// Dispatch event for this thread.
void Dispatch(HANDLE hThreadContext, Event& event);
+ void FlushDraw(HANDLE hThreadContext, uint32_t drawId);
};
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h b/src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h
index 78ba8f3e2d7..88edc03f4f4 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h
+++ b/src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h
@@ -69,6 +69,14 @@ namespace ArchRast
event.Accept(pHandler);
}
}
+
+ void FlushDraw(uint32_t drawId)
+ {
+ for (auto pHandler : mHandlers)
+ {
+ pHandler->FlushDraw(drawId);
+ }
+ }
private:
// Handlers stay registered for life
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index e937a631b9e..9da7962826c 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -525,6 +525,7 @@ struct SWR_CONTEXT
#define _AR_BEGIN(ctx, type, id) ArchRast::Dispatch(ctx, ArchRast::Start(ArchRast::type, id))
#define _AR_END(ctx, type, count) ArchRast::Dispatch(ctx, ArchRast::End(ArchRast::type, count))
#define _AR_EVENT(ctx, event) ArchRast::Dispatch(ctx, ArchRast::event)
+ #define _AR_FLUSH(ctx, id) ArchRast::FlushDraw(ctx, id)
#else
#ifdef KNOB_ENABLE_RDTSC
#define _AR_BEGIN(ctx, type, id) (void)ctx; RDTSC_START(type)
@@ -534,6 +535,7 @@ struct SWR_CONTEXT
#define _AR_END(ctx, type, id)
#endif
#define _AR_EVENT(ctx, event)
+ #define _AR_FLUSH(ctx, id)
#endif
// Use these macros for api thread.
@@ -545,3 +547,4 @@ struct SWR_CONTEXT
#define AR_BEGIN(type, id) _AR_BEGIN(AR_WORKER_CTX, type, id)
#define AR_END(type, count) _AR_END(AR_WORKER_CTX, type, count)
#define AR_EVENT(event) _AR_EVENT(AR_WORKER_CTX, event)
+#define AR_FLUSH(id) _AR_FLUSH(AR_WORKER_CTX, id)
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index e11291bb83e..0f6c94c6550 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -396,6 +396,8 @@ INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId,
int32_t result = InterlockedDecrement((volatile LONG*)&pDC->threadsDone);
SWR_ASSERT(result >= 0);
+ AR_FLUSH(pDC->drawId);
+
if (result == 0)
{
ExecuteCallbacks(pContext, workerId, pDC);
diff --git a/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template b/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template
index e6cacd75a27..cfed2aded0c 100644
--- a/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template
+++ b/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template
@@ -42,6 +42,8 @@ namespace ArchRast
EventHandler() {}
virtual ~EventHandler() {}
+ virtual void FlushDraw(uint32_t drawId) {}
+
% for name in protos['event_names']:
virtual void Handle(const ${name}& event) {}
% endfor