From 81371a59093d59963a43b7f1becbed9d3c657e45 Mon Sep 17 00:00:00 2001 From: George Kyriazis Date: Thu, 5 Apr 2018 12:08:15 -0500 Subject: swr/rast: Change gfx pointers to gfxptr_t Changing type to gfxptr for indices and related changes to fetch and mem builder code. Reviewed-by: Bruce Cherniak --- .../swr/rasterizer/codegen/gen_llvm_ir_macros.py | 4 +- src/gallium/drivers/swr/rasterizer/core/api.cpp | 8 +- src/gallium/drivers/swr/rasterizer/core/context.h | 2 +- .../drivers/swr/rasterizer/core/frontend.cpp | 40 ++++----- src/gallium/drivers/swr/rasterizer/core/state.h | 6 +- .../swr/rasterizer/jitter/builder_gfx_mem.cpp | 80 +++++++++++++++-- .../swr/rasterizer/jitter/builder_gfx_mem.h | 24 ++++-- .../drivers/swr/rasterizer/jitter/builder_mem.cpp | 35 ++++++-- .../drivers/swr/rasterizer/jitter/builder_mem.h | 23 +++-- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp | 99 ++++++++++++---------- src/gallium/drivers/swr/swr_state.cpp | 2 +- 11 files changed, 220 insertions(+), 103 deletions(-) (limited to 'src/gallium/drivers/swr') diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index bdd785a155d..2636e60ae9a 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -162,7 +162,9 @@ def parse_ir_builder(input_file): if (func_name == 'CreateInsertNUWNSWBinOp' or func_name == 'CreateMaskedIntrinsic' or func_name == 'CreateAlignmentAssumptionHelper' or - func_name == 'CreateLoad'): + func_name == 'CreateGEP' or + func_name == 'CreateLoad' or + func_name == 'CreateMaskedLoad'): ignore = True # Convert CamelCase to CAMEL_CASE diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 53bd2d28555..3141db69ef1 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -1321,8 +1321,8 @@ void DrawIndexedInstance( } int draw = 0; - uint8_t *pIB = (uint8_t*)pState->indexBuffer.pIndices; - pIB += (uint64_t)indexOffset * (uint64_t)indexSize; + gfxptr_t xpIB = pState->indexBuffer.xpIndices; + xpIB += (uint64_t)indexOffset * (uint64_t)indexSize; pState->topology = topology; pState->forceFront = false; @@ -1360,7 +1360,7 @@ void DrawIndexedInstance( pDC->pState->pfnProcessPrims != nullptr); pDC->FeWork.desc.draw.pDC = pDC; pDC->FeWork.desc.draw.numIndices = numIndicesForDraw; - pDC->FeWork.desc.draw.pIB = (int*)pIB; + pDC->FeWork.desc.draw.xpIB = xpIB; pDC->FeWork.desc.draw.type = pDC->pState->state.indexBuffer.format; pDC->FeWork.desc.draw.numInstances = numInstances; @@ -1376,7 +1376,7 @@ void DrawIndexedInstance( AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndicesForDraw, indexOffset, baseVertex, numInstances, startInstance, pState->tsState.tsEnable, pState->gsState.gsEnable, pState->soState.soEnable, pState->gsState.outputTopology, draw)); - pIB += maxIndicesPerDraw * indexSize; + xpIB += maxIndicesPerDraw * indexSize; remainingIndices -= numIndicesForDraw; draw++; } diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 489aa7862cb..7bc69f50723 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -176,7 +176,7 @@ struct DRAW_WORK }; union { - const int32_t* pIB; // DrawIndexed: App supplied indices + gfxptr_t xpIB; // DrawIndexed: App supplied int32 indices uint32_t startVertex; // Draw: Starting vertex in VB to render from. }; int32_t baseVertex; diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 20768599120..30c2e7bab51 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -1527,28 +1527,24 @@ void ProcessDraw( uint32_t indexSize = 0; uint32_t endVertex = work.numVerts; - const int32_t* pLastRequestedIndex = nullptr; + gfxptr_t xpLastRequestedIndex = 0; if (IsIndexedT::value) { switch (work.type) { case R32_UINT: indexSize = sizeof(uint32_t); - pLastRequestedIndex = &(work.pIB[endVertex]); break; case R16_UINT: indexSize = sizeof(uint16_t); - // nasty address offset to last index - pLastRequestedIndex = (int32_t*)(&(((uint16_t*)work.pIB)[endVertex])); break; case R8_UINT: indexSize = sizeof(uint8_t); - // nasty address offset to last index - pLastRequestedIndex = (int32_t*)(&(((uint8_t*)work.pIB)[endVertex])); break; default: SWR_INVALID("Invalid work.type: %d", work.type); } + xpLastRequestedIndex = work.xpIB + endVertex * indexSize; } else { @@ -1660,10 +1656,10 @@ void ProcessDraw( // if the entire index buffer isn't being consumed, set the last index // so that fetches < a SIMD wide will be masked off - fetchInfo_lo.pLastIndex = (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size); - if (pLastRequestedIndex < fetchInfo_lo.pLastIndex) + fetchInfo_lo.xpLastIndex = state.indexBuffer.xpIndices + state.indexBuffer.size; + if (xpLastRequestedIndex < fetchInfo_lo.xpLastIndex) { - fetchInfo_lo.pLastIndex = pLastRequestedIndex; + fetchInfo_lo.xpLastIndex = xpLastRequestedIndex; } } else @@ -1683,15 +1679,15 @@ void ProcessDraw( if (IsIndexedT::value) { - fetchInfo_lo.pIndices = work.pIB; - fetchInfo_hi.pIndices = (int32_t *)((uint8_t *)fetchInfo_lo.pIndices + KNOB_SIMD_WIDTH * indexSize); // 1/2 of KNOB_SIMD16_WIDTH + fetchInfo_lo.xpIndices = work.xpIB; + fetchInfo_hi.xpIndices = fetchInfo_lo.xpIndices + KNOB_SIMD_WIDTH * indexSize; // 1/2 of KNOB_SIMD16_WIDTH } else { vIndex = _simd16_add_epi32(_simd16_set1_epi32(work.startVertexID), vScale); - fetchInfo_lo.pIndices = (const int32_t *)&vIndex; - fetchInfo_hi.pIndices = (const int32_t *)&vIndex + KNOB_SIMD_WIDTH; // 1/2 of KNOB_SIMD16_WIDTH + fetchInfo_lo.xpIndices = (gfxptr_t)&vIndex; + fetchInfo_hi.xpIndices = (gfxptr_t)&vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t); // 1/2 of KNOB_SIMD16_WIDTH } fetchInfo_lo.CurInstance = instanceNum; @@ -1725,18 +1721,18 @@ void ProcessDraw( { if (!IsIndexedT::value) { - fetchInfo_lo.pLastIndex = fetchInfo_lo.pIndices; + fetchInfo_lo.xpLastIndex = fetchInfo_lo.xpIndices; uint32_t offset; offset = std::min(endVertex-i, (uint32_t) KNOB_SIMD16_WIDTH); #if USE_SIMD16_SHADERS offset *= 4; // convert from index to address - fetchInfo_lo.pLastIndex += offset; + fetchInfo_lo.xpLastIndex += offset; #else - fetchInfo_lo.pLastIndex += std::min(offset, (uint32_t) KNOB_SIMD_WIDTH) * 4; // * 4 for converting index to address + fetchInfo_lo.xpLastIndex += std::min(offset, (uint32_t) KNOB_SIMD_WIDTH) * 4; // * 4 for converting index to address uint32_t offset2 = std::min(offset, (uint32_t) KNOB_SIMD16_WIDTH)-KNOB_SIMD_WIDTH; assert(offset >= 0); - fetchInfo_hi.pLastIndex = fetchInfo_hi.pIndices; - fetchInfo_hi.pLastIndex += offset2 * 4; // * 4 for converting index to address + fetchInfo_hi.xpLastIndex = fetchInfo_hi.xpIndices; + fetchInfo_hi.xpLastIndex += offset2 * 4; // * 4 for converting index to address #endif } // 1. Execute FS/VS for a single SIMD. @@ -1919,8 +1915,8 @@ void ProcessDraw( if (IsIndexedT::value) { - fetchInfo_lo.pIndices = (int32_t *)((uint8_t*)fetchInfo_lo.pIndices + KNOB_SIMD16_WIDTH * indexSize); - fetchInfo_hi.pIndices = (int32_t *)((uint8_t*)fetchInfo_hi.pIndices + KNOB_SIMD16_WIDTH * indexSize); + fetchInfo_lo.xpIndices = fetchInfo_lo.xpIndices + KNOB_SIMD16_WIDTH * indexSize; + fetchInfo_hi.xpIndices = fetchInfo_hi.xpIndices + KNOB_SIMD16_WIDTH * indexSize; } else { @@ -1948,9 +1944,9 @@ void ProcessDraw( // if the entire index buffer isn't being consumed, set the last index // so that fetches < a SIMD wide will be masked off fetchInfo.pLastIndex = (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size); - if (pLastRequestedIndex < fetchInfo.pLastIndex) + if (xpLastRequestedIndex < fetchInfo.pLastIndex) { - fetchInfo.pLastIndex = pLastRequestedIndex; + fetchInfo.pLastIndex = xpLastRequestedIndex; } } else diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 92334469ed6..cdb30f60fdf 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -582,7 +582,7 @@ struct SWR_VERTEX_BUFFER_STATE struct SWR_INDEX_BUFFER_STATE { - const void *pIndices; + gfxptr_t xpIndices; // Format type for indices (e.g. UINT16, UINT32, etc.) SWR_FORMAT format; // @llvm_enum uint32_t size; @@ -598,8 +598,8 @@ struct SWR_INDEX_BUFFER_STATE struct SWR_FETCH_CONTEXT { const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers - const int32_t* pIndices; // IN: pointer to index buffer for indexed draws - const int32_t* pLastIndex; // IN: pointer to end of index buffer, used for bounds checking + gfxptr_t xpIndices; // IN: pointer to int32 index buffer for indexed draws + gfxptr_t xpLastIndex; // IN: pointer to end of index buffer, used for bounds checking uint32_t CurInstance; // IN: current instance uint32_t BaseVertex; // IN: base vertex uint32_t StartVertex; // IN: start vertex diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp index 44fe776d340..6ecd96978dd 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp @@ -40,6 +40,7 @@ namespace SwrJit BuilderGfxMem::BuilderGfxMem(JitManager* pJitMgr) : Builder(pJitMgr) { + mpTranslationFuncTy = nullptr; mpfnTranslateGfxAddress = nullptr; mpParamSimDC = nullptr; @@ -51,8 +52,7 @@ namespace SwrJit void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage) { - SWR_ASSERT(ptr->getType() == mInt64Ty, "GFX addresses must be gfxptr_t and not converted to system pointers."); - SWR_ASSERT(usage != MEM_CLIENT_INTERNAL, "Internal memory should not go through the translation path and should not be gfxptr_t."); + SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL), "Internal memory should not be gfxptr_t."); } ////////////////////////////////////////////////////////////////////////// @@ -106,32 +106,94 @@ namespace SwrJit return ADD(base, offset); } - LoadInst* BuilderGfxMem::LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage) + Value *BuilderGfxMem::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name) { - // the 64 bit gfx pointers are not yet propagated up the stack - // so there is some casting in here and the test for type is not yet enabled + Ptr = TranslationHelper(Ptr, Ty); + return Builder::GEP(Ptr, Idx, nullptr, Name); + } + + Value *BuilderGfxMem::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name) + { + Ptr = TranslationHelper(Ptr, Ty); + return Builder::GEP(Ty, Ptr, Idx, Name); + } + + Value *BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list &indexList, Type *Ty) + { + Ptr = TranslationHelper(Ptr, Ty); + return Builder::GEP(Ptr, indexList); + } + + Value *BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list &indexList, Type *Ty) + { + Ptr = TranslationHelper(Ptr, Ty); + return Builder::GEP(Ptr, indexList); + } + + Value* BuilderGfxMem::TranslationHelper(Value *Ptr, Type *Ty) + { + SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr), "Access of GFX pointers must have non-null type specified."); + + + // address may be coming in as 64bit int now so get the pointer + if (Ptr->getType() == mInt64Ty) + { + Ptr = INT_TO_PTR(Ptr, Ty); + } + + return Ptr; + } + + LoadInst* BuilderGfxMem::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage) + { + AssertGFXMemoryParams(Ptr, usage); + Ptr = TranslationHelper(Ptr, Ty); return Builder::LOAD(Ptr, Name); } - LoadInst* BuilderGfxMem::LOAD(Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage) + LoadInst* BuilderGfxMem::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage) { + AssertGFXMemoryParams(Ptr, usage); + + Ptr = TranslationHelper(Ptr, Ty); return Builder::LOAD(Ptr, Name); } LoadInst* BuilderGfxMem::LOAD(Type *Ty, Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage) { + AssertGFXMemoryParams(Ptr, usage); + + Ptr = TranslationHelper(Ptr, Ty); return Builder::LOAD(Ty, Ptr, Name); } - LoadInst* BuilderGfxMem::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, JIT_MEM_CLIENT usage) + LoadInst* BuilderGfxMem::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage) { + AssertGFXMemoryParams(Ptr, usage); + + Ptr = TranslationHelper(Ptr, Ty); return Builder::LOAD(Ptr, isVolatile, Name); } - LoadInst *BuilderGfxMem::LOAD(Value *BasePtr, const std::initializer_list &offset, const llvm::Twine& name, JIT_MEM_CLIENT usage) + LoadInst *BuilderGfxMem::LOAD(Value *BasePtr, const std::initializer_list &offset, const llvm::Twine& name, Type *Ty, JIT_MEM_CLIENT usage) { - return Builder::LOAD(BasePtr, offset, name); + AssertGFXMemoryParams(BasePtr, usage); + + // This call is just a pass through to the base class. + // It needs to be here to compile due to the combination of virtual overrides and signature overloads. + // It doesn't do anything meaningful because the implementation in the base class is going to call + // another version of LOAD inside itself where the actual per offset translation will take place + // and we can't just translate the BasePtr once, each address needs individual translation. + return Builder::LOAD(BasePtr, offset, name, Ty, usage); + } + + CallInst* BuilderGfxMem::MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage) + { + AssertGFXMemoryParams(Ptr, usage); + + Ptr = TranslationHelper(Ptr, Ty); + return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage); } Value* BuilderGfxMem::TranslateGfxAddress(Value* xpGfxAddress) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h index ab53583c61c..f8ec0acdec3 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h @@ -41,11 +41,18 @@ namespace SwrJit BuilderGfxMem(JitManager* pJitMgr); virtual ~BuilderGfxMem() {} - virtual LoadInst* LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); - virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = ""); + virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = ""); + virtual Value *GEP(Value* Ptr, const std::initializer_list &indexList, Type *Ty = nullptr); + virtual Value *GEP(Value* Ptr, const std::initializer_list &indexList, Type *Ty = nullptr); + + virtual LoadInst* LOAD(Value *Ptr, const char *Name, Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); - virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); - virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list &offset, const llvm::Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list &offset, const llvm::Twine& Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + + virtual CallInst* MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru = nullptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); @@ -62,8 +69,15 @@ namespace SwrJit virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset); + Value* TranslationHelper(Value *Ptr, Type *Ty); + + FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; } + Value* GetTranslationFunction() { return mpfnTranslateGfxAddress; } + Value* GetParamSimDC() { return mpParamSimDC; } + private: - + + FunctionType* mpTranslationFuncTy; Value* mpfnTranslateGfxAddress; Value* mpParamSimDC; }; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp index 4be5f29061e..c5f0b2b9fe0 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp @@ -41,7 +41,17 @@ namespace SwrJit SWR_ASSERT(ptr->getType() != mInt64Ty, "Address appears to be GFX access. Requires translation through BuilderGfxMem."); } - Value *Builder::GEP(Value* ptr, const std::initializer_list &indexList) + Value *Builder::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name) + { + return IRB()->CreateGEP(Ptr, Idx, Name); + } + + Value *Builder::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name) + { + return IRB()->CreateGEP(Ty, Ptr, Idx, Name); + } + + Value *Builder::GEP(Value* ptr, const std::initializer_list &indexList, Type *Ty) { std::vector indices; for (auto i : indexList) @@ -49,7 +59,7 @@ namespace SwrJit return GEPA(ptr, indices); } - Value *Builder::GEP(Value* ptr, const std::initializer_list &indexList) + Value *Builder::GEP(Value* ptr, const std::initializer_list &indexList, Type *Ty) { std::vector indices; for (auto i : indexList) @@ -57,6 +67,16 @@ namespace SwrJit return GEPA(ptr, indices); } + Value *Builder::GEPA(Value *Ptr, ArrayRef IdxList, const Twine &Name) + { + return IRB()->CreateGEP(Ptr, IdxList, Name); + } + + Value *Builder::GEPA(Type *Ty, Value *Ptr, ArrayRef IdxList, const Twine &Name) + { + return IRB()->CreateGEP(Ty, Ptr, IdxList, Name); + } + Value *Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list &indexList) { std::vector indices; @@ -73,13 +93,13 @@ namespace SwrJit return IN_BOUNDS_GEP(ptr, indices); } - LoadInst* Builder::LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage) + LoadInst* Builder::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ptr, Name); } - LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage) + LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ptr, Name); @@ -91,19 +111,18 @@ namespace SwrJit return IRB()->CreateLoad(Ty, Ptr, Name); } - LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, JIT_MEM_CLIENT usage) + LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ptr, isVolatile, Name); } - LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list &indices, const llvm::Twine& name, JIT_MEM_CLIENT usage) + LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list &indices, const llvm::Twine& name, Type *Ty, JIT_MEM_CLIENT usage) { - AssertMemoryUsageParams(basePtr, usage); std::vector valIndices; for (auto i : indices) valIndices.push_back(C(i)); - return LOAD(GEPA(basePtr, valIndices), name); + return Builder::LOAD(GEPA(basePtr, valIndices), name); } LoadInst *Builder::LOADV(Value *basePtr, const std::initializer_list &indices, const llvm::Twine& name) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h index 5ca96e7d86c..f229da38a94 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h @@ -45,16 +45,27 @@ void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage); public: -Value *GEP(Value* ptr, const std::initializer_list &indexList); -Value *GEP(Value* ptr, const std::initializer_list &indexList); +virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = ""); +virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = ""); +virtual Value *GEP(Value* ptr, const std::initializer_list &indexList, Type *Ty = nullptr); +virtual Value *GEP(Value* ptr, const std::initializer_list &indexList, Type *Ty = nullptr); + +Value *GEPA(Value *Ptr, ArrayRef IdxList, const Twine &Name = ""); +Value *GEPA(Type *Ty, Value *Ptr, ArrayRef IdxList, const Twine &Name = ""); + Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list &indexList); Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list &indexList); -virtual LoadInst* LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); -virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); +virtual LoadInst* LOAD(Value *Ptr, const char *Name, Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); +virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); -virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); -virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list &offset, const llvm::Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); +virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); +virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list &offset, const llvm::Twine& Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + +virtual CallInst* MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru = nullptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL) +{ + return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name); +} LoadInst *LOADV(Value *BasePtr, const std::initializer_list &offset, const llvm::Twine& name = ""); StoreInst *STORE(Value *Val, Value *BasePtr, const std::initializer_list &offset); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index c6ff6a8b8ac..0fbfd211ef7 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -133,11 +133,11 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) streams->setName("pStreams"); // SWR_FETCH_CONTEXT::pIndices - Value* indices = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pIndices}); + Value* indices = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_xpIndices}); indices->setName("pIndices"); // SWR_FETCH_CONTEXT::pLastIndex - Value* pLastIndex = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pLastIndex}); + Value* pLastIndex = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_xpLastIndex}); pLastIndex->setName("pLastIndex"); Value* vIndices; @@ -152,12 +152,10 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) } else { - pLastIndex = BITCAST(pLastIndex, Type::getInt8PtrTy(JM()->mContext, 0)); vIndices = GetSimdValid8bitIndices(indices, pLastIndex); } break; case R16_UINT: - indices = BITCAST(indices, Type::getInt16PtrTy(JM()->mContext, 0)); if(fetchState.bDisableIndexOOBCheck) { vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0}); @@ -165,12 +163,11 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) } else { - pLastIndex = BITCAST(pLastIndex, Type::getInt16PtrTy(JM()->mContext, 0)); vIndices = GetSimdValid16bitIndices(indices, pLastIndex); } break; case R32_UINT: - (fetchState.bDisableIndexOOBCheck) ? vIndices = LOAD(BITCAST(indices, PointerType::get(mSimdInt32Ty,0)),{(uint32_t)0}) + (fetchState.bDisableIndexOOBCheck) ? vIndices = LOAD(indices, "", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH) : vIndices = GetSimdValid32bitIndices(indices, pLastIndex); break; // incoming type is already 32bit int default: @@ -967,6 +964,10 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, } } +typedef void*(*PFN_TRANSLATEGFXADDRESS_FUNC)(void* pdc, gfxptr_t va); +extern "C" void GetSimdValid8bitIndicesGfx(gfxptr_t indices, gfxptr_t lastIndex, uint32_t vWidth, PFN_TRANSLATEGFXADDRESS_FUNC pfnTranslate, void* pdc, uint32_t* outIndices); +extern "C" void GetSimdValid16bitIndicesGfx(gfxptr_t indices, gfxptr_t lastIndex, uint32_t vWidth, PFN_TRANSLATEGFXADDRESS_FUNC pfnTranslate, void* pdc, uint32_t* outIndices); + ////////////////////////////////////////////////////////////////////////// /// @brief Loads a simd of valid indices. OOB indices are set to 0 /// *Note* have to do 16bit index checking in scalar until we have AVX-512 @@ -975,30 +976,36 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, /// @param pLastIndex - pointer to last valid index Value* FetchJit::GetSimdValid8bitIndices(Value* pIndices, Value* pLastIndex) { - // can fit 2 16 bit integers per vWidth lane - Value* vIndices = VUNDEF_I(); + SWR_ASSERT(pIndices->getType() == mInt64Ty && pLastIndex->getType() == mInt64Ty, "Function expects gfxptr_t for both input parameters."); - // store 0 index on stack to be used to conditionally load from if index address is OOB - Value* pZeroIndex = ALLOCA(mInt8Ty); - STORE(C((uint8_t)0), pZeroIndex); + Value* vIndices = VUNDEF_I(); - // Load a SIMD of index pointers - for(int64_t lane = 0; lane < mVWidth; lane++) { - // Calculate the address of the requested index - Value *pIndex = GEP(pIndices, C(lane)); + // store 0 index on stack to be used to conditionally load from if index address is OOB + Value* pZeroIndex = ALLOCA(mInt8Ty); + STORE(C((uint8_t)0), pZeroIndex); + + // Load a SIMD of index pointers + for (int64_t lane = 0; lane < mVWidth; lane++) + { + // Calculate the address of the requested index + Value *pIndex = GEP(pIndices, C(lane), mInt8PtrTy); - // check if the address is less than the max index, - Value* mask = ICMP_ULT(pIndex, pLastIndex); + pLastIndex = INT_TO_PTR(pLastIndex, mInt8PtrTy); - // if valid, load the index. if not, load 0 from the stack - Value* pValid = SELECT(mask, pIndex, pZeroIndex); - Value *index = LOAD(pValid, "valid index"); + // check if the address is less than the max index, + Value* mask = ICMP_ULT(pIndex, pLastIndex); - // zero extended index to 32 bits and insert into the correct simd lane - index = Z_EXT(index, mInt32Ty); - vIndices = VINSERT(vIndices, index, lane); + // if valid, load the index. if not, load 0 from the stack + Value* pValid = SELECT(mask, pIndex, pZeroIndex); + Value *index = LOAD(pValid, "valid index", PointerType::get(mInt8Ty, 0), GFX_MEM_CLIENT_FETCH); + + // zero extended index to 32 bits and insert into the correct simd lane + index = Z_EXT(index, mInt32Ty); + vIndices = VINSERT(vIndices, index, lane); + } } + return vIndices; } @@ -1010,30 +1017,36 @@ Value* FetchJit::GetSimdValid8bitIndices(Value* pIndices, Value* pLastIndex) /// @param pLastIndex - pointer to last valid index Value* FetchJit::GetSimdValid16bitIndices(Value* pIndices, Value* pLastIndex) { - // can fit 2 16 bit integers per vWidth lane - Value* vIndices = VUNDEF_I(); + SWR_ASSERT(pIndices->getType() == mInt64Ty && pLastIndex->getType() == mInt64Ty, "Function expects gfxptr_t for both input parameters."); - // store 0 index on stack to be used to conditionally load from if index address is OOB - Value* pZeroIndex = ALLOCA(mInt16Ty); - STORE(C((uint16_t)0), pZeroIndex); + Value* vIndices = VUNDEF_I(); - // Load a SIMD of index pointers - for(int64_t lane = 0; lane < mVWidth; lane++) { - // Calculate the address of the requested index - Value *pIndex = GEP(pIndices, C(lane)); + // store 0 index on stack to be used to conditionally load from if index address is OOB + Value* pZeroIndex = ALLOCA(mInt16Ty); + STORE(C((uint16_t)0), pZeroIndex); - // check if the address is less than the max index, - Value* mask = ICMP_ULT(pIndex, pLastIndex); + // Load a SIMD of index pointers + for (int64_t lane = 0; lane < mVWidth; lane++) + { + // Calculate the address of the requested index + Value *pIndex = GEP(pIndices, C(lane), mInt16PtrTy); + + pLastIndex = INT_TO_PTR(pLastIndex, mInt16PtrTy); + + // check if the address is less than the max index, + Value* mask = ICMP_ULT(pIndex, pLastIndex); - // if valid, load the index. if not, load 0 from the stack - Value* pValid = SELECT(mask, pIndex, pZeroIndex); - Value *index = LOAD(pValid, "valid index", GFX_MEM_CLIENT_FETCH); + // if valid, load the index. if not, load 0 from the stack + Value* pValid = SELECT(mask, pIndex, pZeroIndex); + Value *index = LOAD(pValid, "valid index", PointerType::get(mInt16Ty, 0), GFX_MEM_CLIENT_FETCH); - // zero extended index to 32 bits and insert into the correct simd lane - index = Z_EXT(index, mInt32Ty); - vIndices = VINSERT(vIndices, index, lane); + // zero extended index to 32 bits and insert into the correct simd lane + index = Z_EXT(index, mInt32Ty); + vIndices = VINSERT(vIndices, index, lane); + } } + return vIndices; } @@ -1045,8 +1058,8 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex) { DataLayout dL(JM()->mpCurrentModule); unsigned int ptrSize = dL.getPointerSize() * 8; // ptr size in bits - Value* iLastIndex = PTR_TO_INT(pLastIndex, Type::getIntNTy(JM()->mContext, ptrSize)); - Value* iIndices = PTR_TO_INT(pIndices, Type::getIntNTy(JM()->mContext, ptrSize)); + Value* iLastIndex = pLastIndex; + Value* iIndices = pIndices; // get the number of indices left in the buffer (endPtr - curPtr) / sizeof(index) Value* numIndicesLeft = SUB(iLastIndex,iIndices); @@ -1918,7 +1931,7 @@ void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, con #endif // outputElt * 4 = offsetting by the size of a simdvertex // + c offsets to a 32bit x vWidth row within the current vertex - Value* dest = GEP(pVtxOut, C(outputElt * 4 + c), "destGEP"); + Value* dest = GEP(pVtxOut, C(outputElt * 4 + c), nullptr, "destGEP"); STORE(vVertexElements[c], dest); } } diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index d11323ef2e7..c5d755d7200 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -1375,7 +1375,7 @@ swr_update_derived(struct pipe_context *pipe, SWR_INDEX_BUFFER_STATE swrIndexBuffer; swrIndexBuffer.format = swr_convert_index_type(info.index_size); - swrIndexBuffer.pIndices = p_data; + swrIndexBuffer.xpIndices = (gfxptr_t) p_data; swrIndexBuffer.size = size; ctx->api.pfnSwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer); -- cgit v1.2.3