summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2016-03-21 11:15:32 -0600
committerTim Rowley <[email protected]>2016-03-25 14:45:39 -0500
commitec9d4c4b372df773e4453c228b938e7c6c526c4c (patch)
tree479971ee97a8f57aa346f73870cce8e1045d5cf6
parent12ce9d9aa1819c0d7fb969b459a070c3cc9a617f (diff)
swr: [rasterizer core] Globally cache allocated arena blocks for fast re-allocation.
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp6
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/arena.h58
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h120
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/fifo.hpp6
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp10
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.h10
8 files changed, 168 insertions, 46 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 453d0295b54..6ebb3f87f7a 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -66,11 +66,11 @@ HANDLE SwrCreateContext(
for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc)
{
- pContext->dcRing[dc].pArena = new Arena();
+ pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
pContext->dcRing[dc].pTileMgr = new MacroTileMgr(*(pContext->dcRing[dc].pArena));
pContext->dcRing[dc].pDispatch = new DispatchQueue(); /// @todo Could lazily allocate this if Dispatch seen.
- pContext->dsRing[dc].pArena = new Arena();
+ pContext->dsRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
}
if (!KNOB_SINGLE_THREADED)
@@ -252,7 +252,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
uint32_t dsIndex = pContext->curStateId % KNOB_MAX_DRAWS_IN_FLIGHT;
pCurDrawContext->pState = &pContext->dsRing[dsIndex];
- Arena& stateArena = *(pCurDrawContext->pState->pArena);
+ auto& stateArena = *(pCurDrawContext->pState->pArena);
// Copy previous state to current state.
if (pContext->pPrevDrawContext)
diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h
index 4cdb728e1ef..71fb258f4d4 100644
--- a/src/gallium/drivers/swr/rasterizer/core/arena.h
+++ b/src/gallium/drivers/swr/rasterizer/core/arena.h
@@ -51,6 +51,16 @@ public:
}
};
+static const size_t ARENA_BLOCK_ALIGN = KNOB_SIMD_WIDTH * 4;
+
+struct ArenaBlock
+{
+ void* pMem = nullptr;
+ size_t blockSize = 0;
+ ArenaBlock* pNext = nullptr;
+};
+static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, "Increase BLOCK_ALIGN size");
+
template<typename MutexT = std::mutex, typename T = DefaultAllocator>
class TArena
{
@@ -67,12 +77,12 @@ public:
if (m_pCurBlock)
{
ArenaBlock* pCurBlock = m_pCurBlock;
- pCurBlock->offset = AlignUp(pCurBlock->offset, align);
+ m_offset = AlignUp(m_offset, align);
- if ((pCurBlock->offset + size) <= pCurBlock->blockSize)
+ if ((m_offset + size) <= pCurBlock->blockSize)
{
- void* pMem = PtrAdd(pCurBlock->pMem, pCurBlock->offset);
- pCurBlock->offset += size;
+ void* pMem = PtrAdd(pCurBlock->pMem, m_offset);
+ m_offset += size;
m_size += size;
return pMem;
}
@@ -85,21 +95,21 @@ public:
size_t blockSize = std::max<size_t>(m_size + ArenaBlockSize, std::max(size, ArenaBlockSize));
// Add in one BLOCK_ALIGN unit to store ArenaBlock in.
- blockSize = AlignUp(blockSize + BLOCK_ALIGN, BLOCK_ALIGN);
+ blockSize = AlignUp(blockSize + ARENA_BLOCK_ALIGN, ARENA_BLOCK_ALIGN);
- void *pMem = m_allocator.AllocateAligned(blockSize, BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
+ void *pMem = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
SWR_ASSERT(pMem != nullptr);
ArenaBlock* pNewBlock = new (pMem) ArenaBlock();
if (pNewBlock != nullptr)
{
+ m_offset = 0;
pNewBlock->pNext = m_pCurBlock;
m_pCurBlock = pNewBlock;
- m_pCurBlock->pMem = PtrAdd(pMem, BLOCK_ALIGN);
- m_pCurBlock->blockSize = blockSize - BLOCK_ALIGN;
-
+ m_pCurBlock->pMem = PtrAdd(pMem, ARENA_BLOCK_ALIGN);
+ m_pCurBlock->blockSize = blockSize - ARENA_BLOCK_ALIGN;
}
return AllocAligned(size, align);
@@ -134,10 +144,10 @@ public:
void Reset(bool removeAll = false)
{
+ m_offset = 0;
+
if (m_pCurBlock)
{
- m_pCurBlock->offset = 0;
-
ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
m_pCurBlock->pNext = nullptr;
while (pUsedBlocks)
@@ -162,28 +172,20 @@ public:
private:
- static const size_t BLOCK_ALIGN = KNOB_SIMD_WIDTH * 4;
+ ArenaBlock* m_pCurBlock = nullptr;
+ size_t m_offset = 0;
+ size_t m_size = 0;
+
+ /// @note Mutex is only used by sync allocation functions.
+ MutexT m_mutex;
DefaultAllocator m_defAllocator;
T& m_allocator;
-
- struct ArenaBlock
- {
- void* pMem = nullptr;
- size_t blockSize = 0;
- size_t offset = 0;
- ArenaBlock* pNext = nullptr;
- };
- static_assert(sizeof(ArenaBlock) <= BLOCK_ALIGN, "Increase BLOCK_ALIGN size");
-
- ArenaBlock* m_pCurBlock = nullptr;
- size_t m_size = 0;
-
- /// @note Mutex is only used by sync allocation functions.
- MutexT m_mutex;
};
-typedef TArena<> Arena;
+template<typename T>
+using Arena = TArena<std::mutex, T>;
+using StdArena = Arena<DefaultAllocator>;
struct NullMutex
{
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index ed972fa5478..6240b2e08d3 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -360,6 +360,120 @@ struct BACKEND_FUNCS
PFN_OUTPUT_MERGER pfnOutputMerger;
};
+// Caching Allocator for Arena
+struct CachingAllocator : DefaultAllocator
+{
+ void* AllocateAligned(size_t size, size_t align)
+ {
+ SWR_ASSERT(size >= sizeof(ArenaBlock));
+
+ {
+ // search cached blocks
+ std::lock_guard<std::mutex> l(m_mutex);
+ ArenaBlock* pPrevBlock = &m_cachedBlocks;
+ ArenaBlock* pBlock = m_cachedBlocks.pNext;
+ ArenaBlock* pPotentialBlock = nullptr;
+ ArenaBlock* pPotentialPrev = nullptr;
+
+ while (pBlock)
+ {
+ if (pBlock->blockSize >= (size - ARENA_BLOCK_ALIGN))
+ {
+ if (pBlock == AlignUp(pBlock, align))
+ {
+ if (pBlock->blockSize == size)
+ {
+ // Won't find a better match
+ break;
+ }
+
+ // We could use this as it is larger than we wanted, but
+ // continue to search for a better match
+ pPotentialBlock = pBlock;
+ pPotentialPrev = pPrevBlock;
+ }
+ }
+ else
+ {
+ // Blocks are sorted by size (biggest first)
+ // So, if we get here, there are no blocks
+ // large enough, fall through to allocation.
+ pBlock = nullptr;
+ break;
+ }
+
+ pPrevBlock = pBlock;
+ pBlock = pBlock->pNext;
+ }
+
+ if (!pBlock)
+ {
+ // Couldn't find an exact match, use next biggest size
+ pBlock = pPotentialBlock;
+ pPrevBlock = pPotentialPrev;
+ }
+
+ if (pBlock)
+ {
+ SWR_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock);
+ pPrevBlock->pNext = pBlock->pNext;
+ pBlock->pNext = nullptr;
+
+ return pBlock;
+ }
+ }
+
+ return this->DefaultAllocator::AllocateAligned(size, align);
+ }
+
+ void Free(void* pMem)
+ {
+ if (pMem)
+ {
+ ArenaBlock* pNewBlock = reinterpret_cast<ArenaBlock*>(pMem);
+ SWR_ASSERT(pNewBlock->blockSize >= 0 && pNewBlock->pMem != nullptr);
+
+ std::unique_lock<std::mutex> l(m_mutex);
+ ArenaBlock* pPrevBlock = &m_cachedBlocks;
+ ArenaBlock* pBlock = m_cachedBlocks.pNext;
+
+ while (pBlock)
+ {
+ if (pNewBlock->blockSize >= pBlock->blockSize)
+ {
+ // Insert here
+ break;
+ }
+ pPrevBlock = pBlock;
+ pBlock = pBlock->pNext;
+ }
+
+ // Insert into list
+ SWR_ASSERT(pPrevBlock);
+ pPrevBlock->pNext = pNewBlock;
+ pNewBlock->pNext = pBlock;
+ }
+ }
+
+ ~CachingAllocator()
+ {
+ // Free all cached blocks
+ ArenaBlock* pBlock = m_cachedBlocks.pNext;
+ while (pBlock)
+ {
+ ArenaBlock* pNext = pBlock->pNext;
+ this->DefaultAllocator::Free(pBlock);
+ pBlock = pNext;
+ }
+ }
+
+ ArenaBlock m_cachedBlocks;
+ std::mutex m_mutex;
+
+};
+
+using CachingArena = Arena<CachingAllocator>;
+
// Draw State
struct DRAW_STATE
{
@@ -371,7 +485,7 @@ struct DRAW_STATE
BACKEND_FUNCS backendFuncs;
PFN_PROCESS_PRIMS pfnProcessPrims;
- Arena* pArena; // This should only be used by API thread.
+ CachingArena* pArena; // This should only be used by API thread.
};
// Draw Context
@@ -398,7 +512,7 @@ struct DRAW_CONTEXT
DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
DRAW_STATE* pState;
- Arena* pArena;
+ CachingArena* pArena;
uint8_t* pSpillFill[KNOB_MAX_NUM_THREADS]; // Scratch space used for spill fills.
};
@@ -476,6 +590,8 @@ struct SWR_CONTEXT
// Scratch space for workers.
uint8_t* pScratch[KNOB_MAX_NUM_THREADS];
+
+ CachingAllocator cachingArenaAllocator;
};
void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
diff --git a/src/gallium/drivers/swr/rasterizer/core/fifo.hpp b/src/gallium/drivers/swr/rasterizer/core/fifo.hpp
index 7e556012e6b..ccf0b70544f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/fifo.hpp
+++ b/src/gallium/drivers/swr/rasterizer/core/fifo.hpp
@@ -49,7 +49,8 @@ struct QUEUE
static const uint32_t mBlockSizeShift = 6;
static const uint32_t mBlockSize = 1 << mBlockSizeShift;
- void clear(Arena& arena)
+ template <typename ArenaT>
+ void clear(ArenaT& arena)
{
mHead = 0;
mTail = 0;
@@ -102,7 +103,8 @@ struct QUEUE
mNumEntries --;
}
- bool enqueue_try_nosync(Arena& arena, const T* entry)
+ template <typename ArenaT>
+ bool enqueue_try_nosync(ArenaT& arena, const T* entry)
{
memcpy(&mCurBlock[mTail], entry, sizeof(T));
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index e780ffbf175..36721e00beb 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -881,7 +881,7 @@ static void GeometryShaderStage(
static INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC, const API_STATE& state, void** ppGsOut, void** ppCutBuffer,
void **ppStreamCutBuffer)
{
- Arena* pArena = pDC->pArena;
+ auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
SWR_ASSERT(state.gsState.gsEnable);
// allocate arena space to hold GS output verts
@@ -1813,7 +1813,7 @@ void BinTriangles(
work.pfnWork = gRasterizerTable[rastState.scissorEnable][SWR_MULTISAMPLE_1X];
}
- Arena* pArena = pDC->pArena;
+ auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
// store active attribs
@@ -1985,7 +1985,7 @@ void BinPoints(
work.pfnWork = RasterizeSimplePoint;
- Arena* pArena = pDC->pArena;
+ auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
// store attributes
@@ -2119,7 +2119,7 @@ void BinPoints(
work.pfnWork = RasterizeTriPoint;
- Arena* pArena = pDC->pArena;
+ auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
// store active attribs
@@ -2336,7 +2336,7 @@ void BinLines(
work.pfnWork = RasterizeLine;
- Arena* pArena = pDC->pArena;
+ auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
// store active attribs
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index ff25e82f0fe..ce8646fb28d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -290,7 +290,7 @@ INLINE void CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
_ReadWriteBarrier();
// Cleanup memory allocations
- pDC->pArena->Reset();
+ pDC->pArena->Reset(true);
pDC->pTileMgr->initialize();
pContext->dcRing.Dequeue(); // Remove from tail
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
index f3c24dacb48..89c779e04d9 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
@@ -56,7 +56,7 @@ void DispatchQueue::operator delete(void *p)
_aligned_free(p);
}
-MacroTileMgr::MacroTileMgr(Arena& arena) : mArena(arena)
+MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena)
{
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
index f3e1373b00f..cf9d2fea32a 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
@@ -59,7 +59,8 @@ struct MacroTileQueue
//////////////////////////////////////////////////////////////////////////
/// @brief Clear fifo and unlock it.
- void clear(Arena& arena)
+ template <typename ArenaT>
+ void clear(ArenaT& arena)
{
mFifo.clear(arena);
}
@@ -71,7 +72,8 @@ struct MacroTileQueue
return mFifo.peek();
}
- bool enqueue_try_nosync(Arena& arena, const BE_WORK* entry)
+ template <typename ArenaT>
+ bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry)
{
return mFifo.enqueue_try_nosync(arena, entry);
}
@@ -104,7 +106,7 @@ private:
class MacroTileMgr
{
public:
- MacroTileMgr(Arena& arena);
+ MacroTileMgr(CachingArena& arena);
~MacroTileMgr()
{
for (auto &tile : mTiles)
@@ -142,7 +144,7 @@ public:
void operator delete (void *p);
private:
- Arena& mArena;
+ CachingArena& mArena;
std::unordered_map<uint32_t, MacroTileQueue> mTiles;
// Any tile that has work queued to it is a dirty tile.