diff options
author | Jan Zielinski <[email protected]> | 2019-07-26 16:43:58 +0200 |
---|---|---|
committer | Jan Zielinski <[email protected]> | 2019-08-08 10:16:20 +0200 |
commit | 4f04f260d93268c35d7e447006607fe9f9e35895 (patch) | |
tree | 4d030cacc473112300c9cbd2390087acbb2369e4 /src/gallium/drivers/swr/rasterizer | |
parent | 365ad367f11692c1637ce4c0c4d06f5a4d4bcf38 (diff) |
swr/rasterizer: enable size accumulation in mem stats
Small refactoring is also performed
Reviewed-by: Alok Hota <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer')
11 files changed, 128 insertions, 104 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp index 06e0f616f70..ba99391ae76 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp +++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp @@ -98,6 +98,8 @@ namespace ArchRast { uint32_t accessCountRead; uint32_t accessCountWrite; + uint32_t totalSizeRead; + uint32_t totalSizeWrite; uint64_t tscMin; uint64_t tscMax; }; @@ -113,7 +115,7 @@ namespace ArchRast typedef std::map<MemoryTrackerKey, MemoryTrackerData, AddressRangeComparator> MemoryTrackerMap; MemoryTrackerMap trackedMemory = {}; - void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc) + void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc, uint32_t size) { MemoryTrackerKey key; key.address = address; @@ -126,10 +128,12 @@ namespace ArchRast if (isRead) { i->second.accessCountRead++; + i->second.totalSizeRead += size; } else { i->second.accessCountWrite++; + i->second.totalSizeWrite += size; } i->second.tscMax = tsc; } @@ -140,12 +144,16 @@ namespace ArchRast if (isRead) { data.accessCountRead = 1; + data.totalSizeRead = size; data.accessCountWrite = 0; + data.totalSizeWrite = 0; } else { data.accessCountRead = 0; + data.totalSizeRead = 0; data.accessCountWrite = 1; + data.totalSizeWrite = size; } data.tscMin = tsc; data.tscMax = tsc; @@ -258,6 +266,7 @@ namespace ArchRast mAddressMask = (mAddressMask << 1) | 1; addressRangeBytes = addressRangeBytes >> 1; } + mMemGranularity = mAddressMask + 1; mAddressMask = ~mAddressMask; } @@ -666,7 +675,19 @@ namespace ArchRast virtual void Handle(const MemoryAccessEvent& event) { - mMemoryStats.TrackMemoryAccess(event.data.ptr, mAddressMask, event.data.isRead, event.data.tsc); + uint64_t trackAddr = event.data.ptr; + uint64_t nextAddr = (trackAddr & mAddressMask); + uint32_t sizeTracked = 0; + + while (sizeTracked < event.data.size) + { + nextAddr += mMemGranularity; + uint32_t size = nextAddr - trackAddr; + size = std::min(event.data.size, size); + mMemoryStats.TrackMemoryAccess(trackAddr, mAddressMask, event.data.isRead, event.data.tsc, size); + sizeTracked += size; + trackAddr = nextAddr; + } } virtual void Handle(const MemoryStatsEndEvent& event) @@ -678,6 +699,8 @@ namespace ArchRast i->first.address & mAddressMask, i->second.accessCountRead, i->second.accessCountWrite, + i->second.totalSizeRead, + i->second.totalSizeWrite, i->second.tscMin, i->second.tscMax); EventHandlerFile::Handle(mse); @@ -734,6 +757,7 @@ namespace ArchRast MemoryStats mMemoryStats = {}; uint64_t mAddressMask = 0; + uint64_t mMemGranularity = 0; }; diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto index 1618e5faa4a..471bd0e286a 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto @@ -480,6 +480,8 @@ event MemoryStatsEvent uint64_t baseAddr; uint32_t accessCountRead; uint32_t accessCountWrite; + uint32_t totalSizeRead; + uint32_t totalSizeWrite; uint64_t tscMin; uint64_t tscMax; }; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index a435fa35998..8cf50879726 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -233,7 +233,17 @@ void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT* pDC, numSamples); if (pHotTile) { - pHotTile->state = (HOTTILE_STATE)pDesc->newTileState; + HOTTILE_STATE newState = (HOTTILE_STATE)pDesc->newTileState;; + if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_CLEAR) + { + if (newState == HOTTILE_INVALID) + { + // This is OK for APIs that explicitly allow discards + // (for e.g. depth / stencil data) + //SWR_INVALID("Discarding valid data!"); + } + } + pHotTile->state = newState; } } } diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp index 21e3d47cf9d..5f359ed2113 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp @@ -32,7 +32,6 @@ #include "common/rdtsc_buckets.h" #include "builder_gfx_mem.h" - namespace SwrJit { using namespace llvm; @@ -45,20 +44,18 @@ namespace SwrJit mpfnTrackMemAccess = nullptr; mpParamSimDC = nullptr; mpWorkerData = nullptr; - } void BuilderGfxMem::NotifyPrivateContextSet() { } - void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage) + void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage) { - SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL), + SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT::MEM_CLIENT_INTERNAL), "Internal memory should not be gfxptr_t."); } - ////////////////////////////////////////////////////////////////////////// /// @brief Generate a masked gather operation in LLVM IR. If not /// supported on the underlying platform, emulate it with loads @@ -72,7 +69,7 @@ namespace SwrJit Value* vIndices, Value* vMask, uint8_t scale, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { // address may be coming in as 64bit int now so get the pointer if (pBase->getType() == mInt64Ty) @@ -97,9 +94,8 @@ namespace SwrJit Value* vIndices, Value* vMask, uint8_t scale, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { - // address may be coming in as 64bit int now so get the pointer if (pBase->getType() == mInt64Ty) { @@ -111,19 +107,17 @@ namespace SwrJit } void BuilderGfxMem::SCATTERPS( - Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage) + Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage) { - // address may be coming in as 64bit int now so get the pointer if (pDst->getType() == mInt64Ty) { pDst = INT_TO_PTR(pDst, PointerType::get(mInt8Ty, 0)); } - Builder::SCATTERPS(pDst, vSrc, vOffsets, vMask, usage); + Builder::SCATTERPS(pDst, BITCAST(vSrc, mSimdFP32Ty), vOffsets, vMask, usage); } - Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset) { return ADD(base, offset); @@ -159,7 +153,6 @@ namespace SwrJit SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr), "Access of GFX pointers must have non-null type specified."); - // address may be coming in as 64bit int now so get the pointer if (Ptr->getType() == mInt64Ty) { @@ -169,7 +162,7 @@ namespace SwrJit return Ptr; } - void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, JIT_MEM_CLIENT usage, bool isRead) + void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead) { #if defined(KNOB_ENABLE_AR) if (!KNOB_TRACK_MEMORY_WORKING_SET) @@ -216,7 +209,7 @@ namespace SwrJit return; } - LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage) + LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); TrackerHelper(Ptr, Ty, usage, true); @@ -225,7 +218,7 @@ namespace SwrJit return Builder::LOAD(Ptr, Name); } - LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) + LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); TrackerHelper(Ptr, Ty, usage, true); @@ -234,9 +227,8 @@ namespace SwrJit return Builder::LOAD(Ptr, Name); } - LoadInst* BuilderGfxMem::LOAD( - Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) + Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); TrackerHelper(Ptr, Ty, usage, true); @@ -249,7 +241,7 @@ namespace SwrJit const std::initializer_list<uint32_t>& offset, const llvm::Twine& name, Type* Ty, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { AssertGFXMemoryParams(BasePtr, usage); @@ -274,14 +266,13 @@ namespace SwrJit return LOAD(BasePtr, name, Ty, usage); } - CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr, unsigned Align, Value* Mask, Value* PassThru, const Twine& Name, Type* Ty, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); TrackerHelper(Ptr, Ty, usage, true); @@ -291,7 +282,7 @@ namespace SwrJit } StoreInst* - BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, JIT_MEM_CLIENT usage) + BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); TrackerHelper(Ptr, Ty, usage, false); @@ -304,7 +295,7 @@ namespace SwrJit Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { AssertGFXMemoryParams(BasePtr, usage); TrackerHelper(BasePtr, Ty, usage, false); @@ -314,7 +305,7 @@ namespace SwrJit } CallInst* BuilderGfxMem::MASKED_STORE( - Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, JIT_MEM_CLIENT usage) + Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); @@ -327,7 +318,7 @@ namespace SwrJit Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress, Type* PtrTy, const Twine& Name, - JIT_MEM_CLIENT /* usage */) + MEM_CLIENT /* usage */) { if (PtrTy == nullptr) { @@ -339,7 +330,7 @@ namespace SwrJit Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress, Type* PtrTy, const Twine& Name, - JIT_MEM_CLIENT /* usage */) + MEM_CLIENT /* usage */) { if (PtrTy == nullptr) { diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h index 52bd3ac226c..b6e8ed1d760 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h @@ -51,22 +51,21 @@ namespace SwrJit virtual LoadInst* LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, bool isVolatile, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* BasePtr, const std::initializer_list<uint32_t>& offset, const llvm::Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); - + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual CallInst* MASKED_LOAD(Value* Ptr, unsigned Align, @@ -74,61 +73,57 @@ namespace SwrJit Value* PassThru = nullptr, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + + virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); - virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); - - virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); - virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual Value* GATHERPS(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual Value* GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); - + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); Value* TranslateGfxAddressForRead(Value* xpGfxAddress, Type* PtrTy = nullptr, const Twine& Name = "", - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); Value* TranslateGfxAddressForWrite(Value* xpGfxAddress, Type* PtrTy = nullptr, const Twine& Name = "", - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); - + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); protected: - void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage); + void AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage); virtual void NotifyPrivateContextSet(); virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset); Value* TranslationHelper(Value* Ptr, Type* Ty); - void TrackerHelper(Value* Ptr, Type* Ty, JIT_MEM_CLIENT usage, bool isRead); - + void TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead); FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; } Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; } Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; } Value* GetParamSimDC() { return mpParamSimDC; } - Value* mpWorkerData; private: diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp index 267c5442d2a..b32686c7583 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp @@ -34,7 +34,7 @@ namespace SwrJit { - void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage) + void Builder::AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage) { SWR_ASSERT( ptr->getType() != mInt64Ty, @@ -93,26 +93,26 @@ namespace SwrJit return IN_BOUNDS_GEP(ptr, indices); } - LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage) + LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ptr, Name); } - LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) + LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ptr, Name); } - LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, JIT_MEM_CLIENT usage) + LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ty, Ptr, Name); } LoadInst* - Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) + Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ptr, isVolatile, Name); @@ -122,7 +122,7 @@ namespace SwrJit const std::initializer_list<uint32_t>& indices, const llvm::Twine& name, Type* Ty, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { std::vector<Value*> valIndices; for (auto i : indices) @@ -141,7 +141,7 @@ namespace SwrJit } StoreInst* - Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, JIT_MEM_CLIENT usage) + Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, MEM_CLIENT usage) { std::vector<Value*> valIndices; for (auto i : indices) @@ -186,7 +186,7 @@ namespace SwrJit Value* vIndices, Value* vMask, uint8_t scale, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { AssertMemoryUsageParams(pBase, usage); @@ -206,7 +206,7 @@ namespace SwrJit Value* vIndices, Value* vMask, uint8_t scale, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { AssertMemoryUsageParams(pBase, usage); @@ -243,7 +243,7 @@ namespace SwrJit Value* mask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { const SWR_FORMAT_INFO& info = GetFormatInfo(format); if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32) @@ -262,7 +262,7 @@ namespace SwrJit Value* vMask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { switch (info.bpp / info.numComps) { @@ -336,7 +336,7 @@ namespace SwrJit Value* vMask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { switch (info.bpp / info.numComps) { @@ -643,7 +643,7 @@ namespace SwrJit /// @param vOffsets - vector of byte offsets from pDst /// @param vMask - mask of valid lanes void Builder::SCATTERPS( - Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage) + Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage) { AssertMemoryUsageParams(pDst, usage); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h index ccf42c8dab0..49e132e3756 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h @@ -30,7 +30,7 @@ #pragma once public: -enum class JIT_MEM_CLIENT +enum class MEM_CLIENT { MEM_CLIENT_INTERNAL, GFX_MEM_CLIENT_FETCH, @@ -41,7 +41,7 @@ enum class JIT_MEM_CLIENT protected: virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset); -void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage); +void AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage); public: virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = ""); @@ -57,23 +57,23 @@ Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList) Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList); virtual LoadInst* - LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* - LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, bool isVolatile, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* BasePtr, const std::initializer_list<uint32_t>& offset, const llvm::Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual CallInst* MASKED_LOAD(Value* Ptr, unsigned Align, @@ -81,19 +81,19 @@ virtual CallInst* MASKED_LOAD(Value* Ptr, Value* PassThru = nullptr, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL) + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL) { return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name); } -virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL) +virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL) { return IRB()->CreateStore(Val, Ptr, isVolatile); } -virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); +virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); -virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL) +virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL) { return IRB()->CreateMaskedStore(Val, Ptr, Align, Mask); } @@ -112,14 +112,14 @@ void Gather4(const SWR_FORMAT format, Value* mask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual Value* GATHERPS(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); void GATHER4PS(const SWR_FORMAT_INFO& info, Value* pSrcBase, @@ -127,14 +127,14 @@ void GATHER4PS(const SWR_FORMAT_INFO& info, Value* mask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual Value* GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); void GATHER4DD(const SWR_FORMAT_INFO& info, Value* pSrcBase, @@ -142,7 +142,7 @@ void GATHER4DD(const SWR_FORMAT_INFO& info, Value* mask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1); @@ -152,7 +152,7 @@ virtual void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info, Value* vGatherInput, diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index ed6cac04d01..5b06de352dc 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -774,14 +774,15 @@ namespace SwrJit { SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); Value* fixed = nullptr; -#if 0 - // This doesn't work for negative numbers!! + +#if 0 // This doesn't work for negative numbers!! { fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), C(_MM_FROUND_TO_NEAREST_INT)), mSimdInt32Ty); } -#else + else +#endif { // Do round to nearest int on fractional bits first // Not entirely perfect for negative numbers, but close enough @@ -804,7 +805,7 @@ namespace SwrJit fixed = ASHR(vFixed, vExtraBits, name); } -#endif + return fixed; } @@ -845,8 +846,7 @@ namespace SwrJit { SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); Value* fixed = nullptr; -#if 1 - // KNOB_SIM_FAST_MATH? Below works correctly from a precision +#if 1 // KNOB_SIM_FAST_MATH? Below works correctly from a precision // standpoint... { fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 8601d0529bc..fe5b48e584b 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -205,7 +205,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) ? vIndices = LOAD(indices, "", PointerType::get(mSimdInt32Ty, 0), - JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH) + MEM_CLIENT::GFX_MEM_CLIENT_FETCH) : vIndices = GetSimdValid32bitIndices(indices, pLastIndex); break; // incoming type is already 32bit int default: @@ -382,7 +382,7 @@ void FetchJit::CreateGatherOddFormats( if (info.bpp == 32) { pGather = - GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); + GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH); } else { @@ -416,7 +416,7 @@ void FetchJit::CreateGatherOddFormats( { Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt8Ty, 0)); Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType())); - STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst); + STORE(LOAD(xpSrc, "", mInt8PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst); break; } @@ -424,7 +424,7 @@ void FetchJit::CreateGatherOddFormats( { Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0)); Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType())); - STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst); + STORE(LOAD(xpSrc, "", mInt16PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst); break; } break; @@ -434,12 +434,12 @@ void FetchJit::CreateGatherOddFormats( // First 16-bits of data Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0)); Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType())); - STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst); + STORE(LOAD(xpSrc, "", mInt16PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst); // Last 8-bits of data pDst = BITCAST(GEP(pDst, C(1)), PointerType::get(mInt8Ty, 0)); - xpSrc = ADD(xpSrc, C(2)); - STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst); + xpSrc = ADD(xpSrc, C((int64_t)2)); + STORE(LOAD(xpSrc, "", mInt8PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst); break; } @@ -750,7 +750,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState, // if we have at least one component out of x or y to fetch if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)) { - vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask); + vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH); // e.g. result of first 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy @@ -763,7 +763,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState, // offset base to the next components(zw) in the vertex to gather pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4)); - vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask); + vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH); // e.g. result of second 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw @@ -811,7 +811,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState, vNewOffsets, vGatherMask, 1, - JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); + MEM_CLIENT::GFX_MEM_CLIENT_FETCH); } else { @@ -957,7 +957,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState, vOffsets, vGatherMask, 1, - JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); + MEM_CLIENT::GFX_MEM_CLIENT_FETCH); // e.g. result of an 8x32bit integer gather for 8bit components // 256i - 0 1 2 3 4 5 6 7 // xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw @@ -991,7 +991,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState, vOffsets, vGatherMask, 1, - JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); + MEM_CLIENT::GFX_MEM_CLIENT_FETCH); // e.g. result of first 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy @@ -1009,7 +1009,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState, vOffsets, vGatherMask, 1, - JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); + MEM_CLIENT::GFX_MEM_CLIENT_FETCH); // e.g. result of second 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw @@ -1050,7 +1050,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState, vOffsets, vGatherMask, 1, - JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); + MEM_CLIENT::GFX_MEM_CLIENT_FETCH); if (conversionType == CONVERT_USCALED) { @@ -1147,7 +1147,7 @@ Value* FetchJit::GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex) // if valid, load the index. if not, load 0 from the stack Value* pValid = SELECT(mask, pIndex, pZeroIndex); - Value* index = LOAD(pValid, "valid index", Ty, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); + Value* index = LOAD(pValid, "valid index", Ty, MEM_CLIENT::GFX_MEM_CLIENT_FETCH); // zero extended index to 32 bits and insert into the correct simd lane index = Z_EXT(index, mInt32Ty); @@ -1222,7 +1222,7 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex) VIMMED1(0), "vIndices", PointerType::get(mSimdInt32Ty, 0), - JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); + MEM_CLIENT::GFX_MEM_CLIENT_FETCH); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h b/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h index cc986a78e0a..dcb051c3b53 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h @@ -83,6 +83,8 @@ void JITCALL JitDestroyContext(HANDLE hJitContext); /// @param output - Output containing information about JIT shader ShaderInfo* JITCALL JitCompileShader(HANDLE hJitContext, const JIT_COMPILE_INPUT& input); +ShaderInfo* JITCALL JitGetShader(HANDLE hJitContext, const char* name); + ////////////////////////////////////////////////////////////////////////// /// @brief JIT destroy shader. /// @param hJitContext - Jit Context diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp index c47acf73228..13e70a7f90a 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp @@ -155,7 +155,7 @@ struct StreamOutJit : public BuilderGfxMem // cast mask to <4xi1> Value* mask = ToMask(packedMask); - MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), JIT_MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT); + MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT); } // increment SO buffer @@ -223,7 +223,7 @@ struct StreamOutJit : public BuilderGfxMem Value* pBuf = getSOBuffer(pSoCtx, b); Value* pData = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pBuffer}); Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset}); - pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0)); + pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0)); pOutBufferStartVertex[b] = pOutBuffer[b]; outBufferPitch[b] = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch}); |