summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorJan Zielinski <[email protected]>2019-07-26 16:43:58 +0200
committerJan Zielinski <[email protected]>2019-08-08 10:16:20 +0200
commit4f04f260d93268c35d7e447006607fe9f9e35895 (patch)
tree4d030cacc473112300c9cbd2390087acbb2369e4 /src/gallium/drivers
parent365ad367f11692c1637ce4c0c4d06f5a4d4bcf38 (diff)
swr/rasterizer: enable size accumulation in mem stats
Small refactoring is also performed Reviewed-by: Alok Hota <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp28
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/events.proto2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.cpp12
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp43
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h37
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp26
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h34
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp12
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp32
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/jit_api.h2
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp4
11 files changed, 128 insertions, 104 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
index 06e0f616f70..ba99391ae76 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
@@ -98,6 +98,8 @@ namespace ArchRast
{
uint32_t accessCountRead;
uint32_t accessCountWrite;
+ uint32_t totalSizeRead;
+ uint32_t totalSizeWrite;
uint64_t tscMin;
uint64_t tscMax;
};
@@ -113,7 +115,7 @@ namespace ArchRast
typedef std::map<MemoryTrackerKey, MemoryTrackerData, AddressRangeComparator> MemoryTrackerMap;
MemoryTrackerMap trackedMemory = {};
- void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc)
+ void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc, uint32_t size)
{
MemoryTrackerKey key;
key.address = address;
@@ -126,10 +128,12 @@ namespace ArchRast
if (isRead)
{
i->second.accessCountRead++;
+ i->second.totalSizeRead += size;
}
else
{
i->second.accessCountWrite++;
+ i->second.totalSizeWrite += size;
}
i->second.tscMax = tsc;
}
@@ -140,12 +144,16 @@ namespace ArchRast
if (isRead)
{
data.accessCountRead = 1;
+ data.totalSizeRead = size;
data.accessCountWrite = 0;
+ data.totalSizeWrite = 0;
}
else
{
data.accessCountRead = 0;
+ data.totalSizeRead = 0;
data.accessCountWrite = 1;
+ data.totalSizeWrite = size;
}
data.tscMin = tsc;
data.tscMax = tsc;
@@ -258,6 +266,7 @@ namespace ArchRast
mAddressMask = (mAddressMask << 1) | 1;
addressRangeBytes = addressRangeBytes >> 1;
}
+ mMemGranularity = mAddressMask + 1;
mAddressMask = ~mAddressMask;
}
@@ -666,7 +675,19 @@ namespace ArchRast
virtual void Handle(const MemoryAccessEvent& event)
{
- mMemoryStats.TrackMemoryAccess(event.data.ptr, mAddressMask, event.data.isRead, event.data.tsc);
+ uint64_t trackAddr = event.data.ptr;
+ uint64_t nextAddr = (trackAddr & mAddressMask);
+ uint32_t sizeTracked = 0;
+
+ while (sizeTracked < event.data.size)
+ {
+ nextAddr += mMemGranularity;
+ uint32_t size = nextAddr - trackAddr;
+ size = std::min(event.data.size, size);
+ mMemoryStats.TrackMemoryAccess(trackAddr, mAddressMask, event.data.isRead, event.data.tsc, size);
+ sizeTracked += size;
+ trackAddr = nextAddr;
+ }
}
virtual void Handle(const MemoryStatsEndEvent& event)
@@ -678,6 +699,8 @@ namespace ArchRast
i->first.address & mAddressMask,
i->second.accessCountRead,
i->second.accessCountWrite,
+ i->second.totalSizeRead,
+ i->second.totalSizeWrite,
i->second.tscMin,
i->second.tscMax);
EventHandlerFile::Handle(mse);
@@ -734,6 +757,7 @@ namespace ArchRast
MemoryStats mMemoryStats = {};
uint64_t mAddressMask = 0;
+ uint64_t mMemGranularity = 0;
};
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
index 1618e5faa4a..471bd0e286a 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
@@ -480,6 +480,8 @@ event MemoryStatsEvent
uint64_t baseAddr;
uint32_t accessCountRead;
uint32_t accessCountWrite;
+ uint32_t totalSizeRead;
+ uint32_t totalSizeWrite;
uint64_t tscMin;
uint64_t tscMax;
};
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index a435fa35998..8cf50879726 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -233,7 +233,17 @@ void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT* pDC,
numSamples);
if (pHotTile)
{
- pHotTile->state = (HOTTILE_STATE)pDesc->newTileState;
+ HOTTILE_STATE newState = (HOTTILE_STATE)pDesc->newTileState;;
+ if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_CLEAR)
+ {
+ if (newState == HOTTILE_INVALID)
+ {
+ // This is OK for APIs that explicitly allow discards
+ // (for e.g. depth / stencil data)
+ //SWR_INVALID("Discarding valid data!");
+ }
+ }
+ pHotTile->state = newState;
}
}
}
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
index 21e3d47cf9d..5f359ed2113 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
@@ -32,7 +32,6 @@
#include "common/rdtsc_buckets.h"
#include "builder_gfx_mem.h"
-
namespace SwrJit
{
using namespace llvm;
@@ -45,20 +44,18 @@ namespace SwrJit
mpfnTrackMemAccess = nullptr;
mpParamSimDC = nullptr;
mpWorkerData = nullptr;
-
}
void BuilderGfxMem::NotifyPrivateContextSet()
{
}
- void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
+ void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage)
{
- SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL),
+ SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT::MEM_CLIENT_INTERNAL),
"Internal memory should not be gfxptr_t.");
}
-
//////////////////////////////////////////////////////////////////////////
/// @brief Generate a masked gather operation in LLVM IR. If not
/// supported on the underlying platform, emulate it with loads
@@ -72,7 +69,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
// address may be coming in as 64bit int now so get the pointer
if (pBase->getType() == mInt64Ty)
@@ -97,9 +94,8 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
-
// address may be coming in as 64bit int now so get the pointer
if (pBase->getType() == mInt64Ty)
{
@@ -111,19 +107,17 @@ namespace SwrJit
}
void BuilderGfxMem::SCATTERPS(
- Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
+ Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
{
-
// address may be coming in as 64bit int now so get the pointer
if (pDst->getType() == mInt64Ty)
{
pDst = INT_TO_PTR(pDst, PointerType::get(mInt8Ty, 0));
}
- Builder::SCATTERPS(pDst, vSrc, vOffsets, vMask, usage);
+ Builder::SCATTERPS(pDst, BITCAST(vSrc, mSimdFP32Ty), vOffsets, vMask, usage);
}
-
Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
{
return ADD(base, offset);
@@ -159,7 +153,6 @@ namespace SwrJit
SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr),
"Access of GFX pointers must have non-null type specified.");
-
// address may be coming in as 64bit int now so get the pointer
if (Ptr->getType() == mInt64Ty)
{
@@ -169,7 +162,7 @@ namespace SwrJit
return Ptr;
}
- void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, JIT_MEM_CLIENT usage, bool isRead)
+ void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead)
{
#if defined(KNOB_ENABLE_AR)
if (!KNOB_TRACK_MEMORY_WORKING_SET)
@@ -216,7 +209,7 @@ namespace SwrJit
return;
}
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
+ LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, true);
@@ -225,7 +218,7 @@ namespace SwrJit
return Builder::LOAD(Ptr, Name);
}
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+ LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, true);
@@ -234,9 +227,8 @@ namespace SwrJit
return Builder::LOAD(Ptr, Name);
}
-
LoadInst* BuilderGfxMem::LOAD(
- Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+ Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, true);
@@ -249,7 +241,7 @@ namespace SwrJit
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& name,
Type* Ty,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertGFXMemoryParams(BasePtr, usage);
@@ -274,14 +266,13 @@ namespace SwrJit
return LOAD(BasePtr, name, Ty, usage);
}
-
CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr,
unsigned Align,
Value* Mask,
Value* PassThru,
const Twine& Name,
Type* Ty,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, true);
@@ -291,7 +282,7 @@ namespace SwrJit
}
StoreInst*
- BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, JIT_MEM_CLIENT usage)
+ BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, false);
@@ -304,7 +295,7 @@ namespace SwrJit
Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
Type* Ty,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertGFXMemoryParams(BasePtr, usage);
TrackerHelper(BasePtr, Ty, usage, false);
@@ -314,7 +305,7 @@ namespace SwrJit
}
CallInst* BuilderGfxMem::MASKED_STORE(
- Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, JIT_MEM_CLIENT usage)
+ Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
@@ -327,7 +318,7 @@ namespace SwrJit
Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
- JIT_MEM_CLIENT /* usage */)
+ MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
@@ -339,7 +330,7 @@ namespace SwrJit
Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
- JIT_MEM_CLIENT /* usage */)
+ MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
index 52bd3ac226c..b6e8ed1d760 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
@@ -51,22 +51,21 @@ namespace SwrJit
virtual LoadInst* LOAD(Value* Ptr,
const char* Name,
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
bool isVolatile,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual CallInst* MASKED_LOAD(Value* Ptr,
unsigned Align,
@@ -74,61 +73,57 @@ namespace SwrJit
Value* PassThru = nullptr,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
+ virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
- virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
- virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERPS(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERDD(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual void SCATTERPS(Value* pDst,
Value* vSrc,
Value* vOffsets,
Value* vMask,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
Value* TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy = nullptr,
const Twine& Name = "",
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
Value* TranslateGfxAddressForWrite(Value* xpGfxAddress,
Type* PtrTy = nullptr,
const Twine& Name = "",
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
protected:
- void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage);
+ void AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage);
virtual void NotifyPrivateContextSet();
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
Value* TranslationHelper(Value* Ptr, Type* Ty);
- void TrackerHelper(Value* Ptr, Type* Ty, JIT_MEM_CLIENT usage, bool isRead);
-
+ void TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead);
FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
Value* GetParamSimDC() { return mpParamSimDC; }
-
Value* mpWorkerData;
private:
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index 267c5442d2a..b32686c7583 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -34,7 +34,7 @@
namespace SwrJit
{
- void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage)
+ void Builder::AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage)
{
SWR_ASSERT(
ptr->getType() != mInt64Ty,
@@ -93,26 +93,26 @@ namespace SwrJit
return IN_BOUNDS_GEP(ptr, indices);
}
- LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ty, Ptr, Name);
}
LoadInst*
- Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+ Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, isVolatile, Name);
@@ -122,7 +122,7 @@ namespace SwrJit
const std::initializer_list<uint32_t>& indices,
const llvm::Twine& name,
Type* Ty,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
std::vector<Value*> valIndices;
for (auto i : indices)
@@ -141,7 +141,7 @@ namespace SwrJit
}
StoreInst*
- Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, JIT_MEM_CLIENT usage)
+ Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, MEM_CLIENT usage)
{
std::vector<Value*> valIndices;
for (auto i : indices)
@@ -186,7 +186,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
@@ -206,7 +206,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
@@ -243,7 +243,7 @@ namespace SwrJit
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
@@ -262,7 +262,7 @@ namespace SwrJit
Value* vMask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
@@ -336,7 +336,7 @@ namespace SwrJit
Value* vMask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
@@ -643,7 +643,7 @@ namespace SwrJit
/// @param vOffsets - vector of byte offsets from pDst
/// @param vMask - mask of valid lanes
void Builder::SCATTERPS(
- Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
+ Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
{
AssertMemoryUsageParams(pDst, usage);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
index ccf42c8dab0..49e132e3756 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
@@ -30,7 +30,7 @@
#pragma once
public:
-enum class JIT_MEM_CLIENT
+enum class MEM_CLIENT
{
MEM_CLIENT_INTERNAL,
GFX_MEM_CLIENT_FETCH,
@@ -41,7 +41,7 @@ enum class JIT_MEM_CLIENT
protected:
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
-void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
+void AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage);
public:
virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
@@ -57,23 +57,23 @@ Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList)
Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList);
virtual LoadInst*
- LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst*
- LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
bool isVolatile,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual CallInst* MASKED_LOAD(Value* Ptr,
unsigned Align,
@@ -81,19 +81,19 @@ virtual CallInst* MASKED_LOAD(Value* Ptr,
Value* PassThru = nullptr,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
{
return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
}
-virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
{
return IRB()->CreateStore(Val, Ptr, isVolatile);
}
-virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
{
return IRB()->CreateMaskedStore(Val, Ptr, Align, Mask);
}
@@ -112,14 +112,14 @@ void Gather4(const SWR_FORMAT format,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERPS(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
void GATHER4PS(const SWR_FORMAT_INFO& info,
Value* pSrcBase,
@@ -127,14 +127,14 @@ void GATHER4PS(const SWR_FORMAT_INFO& info,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERDD(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
void GATHER4DD(const SWR_FORMAT_INFO& info,
Value* pSrcBase,
@@ -142,7 +142,7 @@ void GATHER4DD(const SWR_FORMAT_INFO& info,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
@@ -152,7 +152,7 @@ virtual void SCATTERPS(Value* pDst,
Value* vSrc,
Value* vOffsets,
Value* vMask,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
Value* vGatherInput,
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index ed6cac04d01..5b06de352dc 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -774,14 +774,15 @@ namespace SwrJit
{
SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
Value* fixed = nullptr;
-#if 0
- // This doesn't work for negative numbers!!
+
+#if 0 // This doesn't work for negative numbers!!
{
fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
C(_MM_FROUND_TO_NEAREST_INT)),
mSimdInt32Ty);
}
-#else
+ else
+#endif
{
// Do round to nearest int on fractional bits first
// Not entirely perfect for negative numbers, but close enough
@@ -804,7 +805,7 @@ namespace SwrJit
fixed = ASHR(vFixed, vExtraBits, name);
}
-#endif
+
return fixed;
}
@@ -845,8 +846,7 @@ namespace SwrJit
{
SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
Value* fixed = nullptr;
-#if 1
- // KNOB_SIM_FAST_MATH? Below works correctly from a precision
+#if 1 // KNOB_SIM_FAST_MATH? Below works correctly from a precision
// standpoint...
{
fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 8601d0529bc..fe5b48e584b 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -205,7 +205,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
? vIndices = LOAD(indices,
"",
PointerType::get(mSimdInt32Ty, 0),
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH)
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH)
: vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
break; // incoming type is already 32bit int
default:
@@ -382,7 +382,7 @@ void FetchJit::CreateGatherOddFormats(
if (info.bpp == 32)
{
pGather =
- GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
}
else
{
@@ -416,7 +416,7 @@ void FetchJit::CreateGatherOddFormats(
{
Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt8Ty, 0));
Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
- STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
+ STORE(LOAD(xpSrc, "", mInt8PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
break;
}
@@ -424,7 +424,7 @@ void FetchJit::CreateGatherOddFormats(
{
Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0));
Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
- STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
+ STORE(LOAD(xpSrc, "", mInt16PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
break;
}
break;
@@ -434,12 +434,12 @@ void FetchJit::CreateGatherOddFormats(
// First 16-bits of data
Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0));
Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
- STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
+ STORE(LOAD(xpSrc, "", mInt16PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
// Last 8-bits of data
pDst = BITCAST(GEP(pDst, C(1)), PointerType::get(mInt8Ty, 0));
- xpSrc = ADD(xpSrc, C(2));
- STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
+ xpSrc = ADD(xpSrc, C((int64_t)2));
+ STORE(LOAD(xpSrc, "", mInt8PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
break;
}
@@ -750,7 +750,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
// if we have at least one component out of x or y to fetch
if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
{
- vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask);
+ vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
@@ -763,7 +763,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
// offset base to the next components(zw) in the vertex to gather
pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
- vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask);
+ vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
@@ -811,7 +811,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
vNewOffsets,
vGatherMask,
1,
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
}
else
{
@@ -957,7 +957,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
vOffsets,
vGatherMask,
1,
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of an 8x32bit integer gather for 8bit components
// 256i - 0 1 2 3 4 5 6 7
// xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
@@ -991,7 +991,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
vOffsets,
vGatherMask,
1,
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
@@ -1009,7 +1009,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
vOffsets,
vGatherMask,
1,
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
@@ -1050,7 +1050,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
vOffsets,
vGatherMask,
1,
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
if (conversionType == CONVERT_USCALED)
{
@@ -1147,7 +1147,7 @@ Value* FetchJit::GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex)
// if valid, load the index. if not, load 0 from the stack
Value* pValid = SELECT(mask, pIndex, pZeroIndex);
- Value* index = LOAD(pValid, "valid index", Ty, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ Value* index = LOAD(pValid, "valid index", Ty, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// zero extended index to 32 bits and insert into the correct simd lane
index = Z_EXT(index, mInt32Ty);
@@ -1222,7 +1222,7 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
VIMMED1(0),
"vIndices",
PointerType::get(mSimdInt32Ty, 0),
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
}
//////////////////////////////////////////////////////////////////////////
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h b/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h
index cc986a78e0a..dcb051c3b53 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h
@@ -83,6 +83,8 @@ void JITCALL JitDestroyContext(HANDLE hJitContext);
/// @param output - Output containing information about JIT shader
ShaderInfo* JITCALL JitCompileShader(HANDLE hJitContext, const JIT_COMPILE_INPUT& input);
+ShaderInfo* JITCALL JitGetShader(HANDLE hJitContext, const char* name);
+
//////////////////////////////////////////////////////////////////////////
/// @brief JIT destroy shader.
/// @param hJitContext - Jit Context
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
index c47acf73228..13e70a7f90a 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
@@ -155,7 +155,7 @@ struct StreamOutJit : public BuilderGfxMem
// cast mask to <4xi1>
Value* mask = ToMask(packedMask);
- MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), JIT_MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT);
+ MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT);
}
// increment SO buffer
@@ -223,7 +223,7 @@ struct StreamOutJit : public BuilderGfxMem
Value* pBuf = getSOBuffer(pSoCtx, b);
Value* pData = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pBuffer});
Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
- pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0));
+ pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0));
pOutBufferStartVertex[b] = pOutBuffer[b];
outBufferPitch[b] = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch});