diff options
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/api.cpp | 2 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/arena.h | 104 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/threads.cpp | 5 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/threads.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/utils.cpp | 5 |
5 files changed, 110 insertions, 10 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index c3c603d294c..453d0295b54 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -189,7 +189,7 @@ void QueueWork(SWR_CONTEXT *pContext) if (IsDraw) { - std::unordered_set<uint32_t> lockedTiles; + static TileSet lockedTiles; uint64_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId }; WorkOnFifoFE(pContext, 0, curDraw[0], 0); WorkOnFifoBE(pContext, 0, curDraw[1], lockedTiles); diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h index b6b4d829576..4cdb728e1ef 100644 --- a/src/gallium/drivers/swr/rasterizer/core/arena.h +++ b/src/gallium/drivers/swr/rasterizer/core/arena.h @@ -33,6 +33,9 @@ #pragma once #include <mutex> +#include <algorithm> +#include <atomic> +#include "core/utils.h" class DefaultAllocator { @@ -48,7 +51,7 @@ public: } }; -template<typename T = DefaultAllocator> +template<typename MutexT = std::mutex, typename T = DefaultAllocator> class TArena { public: @@ -79,7 +82,7 @@ public: } static const size_t ArenaBlockSize = 1024 * 1024; - size_t blockSize = std::max(m_size + ArenaBlockSize, std::max(size, ArenaBlockSize)); + size_t blockSize = std::max<size_t>(m_size + ArenaBlockSize, std::max(size, ArenaBlockSize)); // Add in one BLOCK_ALIGN unit to store ArenaBlock in. blockSize = AlignUp(blockSize + BLOCK_ALIGN, BLOCK_ALIGN); @@ -111,8 +114,9 @@ public: { void* pAlloc = nullptr; - std::unique_lock<std::mutex> l(m_mutex); + m_mutex.lock(); pAlloc = AllocAligned(size, align); + m_mutex.unlock(); return pAlloc; } @@ -121,8 +125,9 @@ public: { void* pAlloc = nullptr; - std::unique_lock<std::mutex> l(m_mutex); + m_mutex.lock(); pAlloc = Alloc(size); + m_mutex.unlock(); return pAlloc; } @@ -175,7 +180,96 @@ private: size_t m_size = 0; /// @note Mutex is only used by sync allocation functions. - std::mutex m_mutex; + MutexT m_mutex; }; typedef TArena<> Arena; + +struct NullMutex +{ + void lock() {} + void unlock() {} +}; + +// Ref counted Arena for ArenaAllocator +// NOT THREAD SAFE!! +struct RefArena : TArena<NullMutex> +{ + uint32_t AddRef() { return ++m_refCount; } + uint32_t Release() { if (--m_refCount) { return m_refCount; } delete this; return 0; } + + void* allocate(std::size_t n) + { + ++m_numAllocations; + return Alloc(n); + } + + void deallocate(void* p) { --m_numAllocations; } + void clear() { SWR_ASSERT(0 == m_numAllocations); Reset(); } + +private: + uint32_t m_refCount = 0; + uint32_t m_numAllocations = 0; +}; + +#if 0 // THIS DOESN'T WORK!!! +// Arena based replacement for std::allocator +template <typename T> +struct ArenaAllocator +{ + typedef T value_type; + ArenaAllocator() + { + m_pArena = new RefArena(); + m_pArena->AddRef(); + } + ~ArenaAllocator() + { + m_pArena->Release(); m_pArena = nullptr; + } + ArenaAllocator(const ArenaAllocator& copy) + { + m_pArena = const_cast<RefArena*>(copy.m_pArena); m_pArena->AddRef(); + } + + + template <class U> ArenaAllocator(const ArenaAllocator<U>& copy) + { + m_pArena = const_cast<RefArena*>(copy.m_pArena); m_pArena->AddRef(); + } + T* allocate(std::size_t n) + { +#if defined(_DEBUG) + char buf[32]; + sprintf_s(buf, "Alloc: %lld\n", n); + OutputDebugStringA(buf); +#endif + void* p = m_pArena->allocate(n * sizeof(T)); + return static_cast<T*>(p); + } + void deallocate(T* p, std::size_t n) + { +#if defined(_DEBUG) + char buf[32]; + sprintf_s(buf, "Dealloc: %lld\n", n); + OutputDebugStringA(buf); +#endif + m_pArena->deallocate(p); + } + void clear() { m_pArena->clear(); } + + RefArena* m_pArena = nullptr; +}; + +template <class T, class U> +bool operator== (const ArenaAllocator<T>&, const ArenaAllocator<U>&) +{ + return true; +} + +template <class T, class U> +bool operator!= (const ArenaAllocator<T>&, const ArenaAllocator<U>&) +{ + return false; +} +#endif diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index 57408049d03..ff25e82f0fe 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -24,7 +24,6 @@ #include <stdio.h> #include <thread> #include <algorithm> -#include <unordered_set> #include <float.h> #include <vector> #include <utility> @@ -345,7 +344,7 @@ void WorkOnFifoBE( SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE, - std::unordered_set<uint32_t>& lockedTiles) + TileSet& lockedTiles) { // Find the first incomplete draw that has pending work. If no such draw is found then // return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE. @@ -550,7 +549,7 @@ DWORD workerThreadMain(LPVOID pData) // Track tiles locked by other threads. If we try to lock a macrotile and find its already // locked then we'll add it to this list so that we don't try and lock it again. - std::unordered_set<uint32_t> lockedTiles; + TileSet lockedTiles; // each worker has the ability to work on any of the queued draws as long as certain // conditions are met. the data associated diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.h b/src/gallium/drivers/swr/rasterizer/core/threads.h index ec0b735a4ec..6b37e3ac179 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.h +++ b/src/gallium/drivers/swr/rasterizer/core/threads.h @@ -54,10 +54,12 @@ struct THREAD_POOL THREAD_DATA *pThreadData; }; +typedef std::unordered_set<uint32_t> TileSet; + void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool); void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool); // Expose FE and BE worker functions to the API thread if single threaded void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE, int numaNode); -void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE, std::unordered_set<uint32_t> &usedTiles); +void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE, TileSet &usedTiles); void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE); diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.cpp b/src/gallium/drivers/swr/rasterizer/core/utils.cpp index f36452f2cec..a1d665e77cc 100644 --- a/src/gallium/drivers/swr/rasterizer/core/utils.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/utils.cpp @@ -27,6 +27,11 @@ ******************************************************************************/ #if defined(_WIN32) +#if defined(NOMINMAX) +// GDI Plus requires non-std min / max macros be defined :( +#undef NOMINMAX +#endif + #include<Windows.h> #include <Gdiplus.h> #include <Gdiplusheaders.h> |