diff options
Diffstat (limited to 'src/gallium/drivers')
4 files changed, 260 insertions, 253 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp index 5bacf551261..0312fc47fb6 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp @@ -59,7 +59,7 @@ using namespace SwrJit; ////////////////////////////////////////////////////////////////////////// /// @brief Contructor for JitManager. /// @param simdWidth - SIMD width to be used in generated program. -JitManager::JitManager(uint32_t simdWidth, const char *arch, const char *core) : +JitManager::JitManager(uint32_t simdWidth, const char* arch, const char* core) : mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth), mArch(arch) { @@ -153,7 +153,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char *core) : } #if LLVM_USE_INTEL_JITEVENTS - JITEventListener *vTune = JITEventListener::createIntelJITEventListener(); + JITEventListener* vTune = JITEventListener::createIntelJITEventListener(); mpExec->RegisterJITEventListener(vTune); #endif @@ -163,7 +163,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char *core) : #else // typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out); #endif - std::vector<Type *> fsArgs; + std::vector<Type*> fsArgs; // llvm5 is picky and does not take a void * type fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0)); @@ -212,21 +212,21 @@ void JitManager::SetupNewModule() } -DIType * -JitManager::CreateDebugStructType(StructType * pType, - const std::string & name, - DIFile * pFile, +DIType* +JitManager::CreateDebugStructType(StructType* pType, + const std::string& name, + DIFile* pFile, uint32_t lineNum, - const std::vector<std::pair<std::string, uint32_t>> &members) + const std::vector<std::pair<std::string, uint32_t>>& members) { - DIBuilder builder(*mpCurrentModule); - SmallVector<Metadata *, 8> ElemTypes; - DataLayout DL = DataLayout(mpCurrentModule); - uint32_t size = DL.getTypeAllocSizeInBits(pType); - uint32_t alignment = DL.getABITypeAlignment(pType); - DINode::DIFlags flags = DINode::DIFlags::FlagPublic; - - DICompositeType *pDIStructTy = builder.createStructType(pFile, + DIBuilder builder(*mpCurrentModule); + SmallVector<Metadata*, 8> ElemTypes; + DataLayout DL = DataLayout(mpCurrentModule); + uint32_t size = DL.getTypeAllocSizeInBits(pType); + uint32_t alignment = DL.getABITypeAlignment(pType); + DINode::DIFlags flags = DINode::DIFlags::FlagPublic; + + DICompositeType* pDIStructTy = builder.createStructType(pFile, name, pFile, lineNum, @@ -240,14 +240,14 @@ JitManager::CreateDebugStructType(StructType * mDebugStructMap[pType] = pDIStructTy; uint32_t idx = 0; - for (auto &elem : pType->elements()) + for (auto& elem : pType->elements()) { std::string name = members[idx].first; uint32_t lineNum = members[idx].second; size = DL.getTypeAllocSizeInBits(elem); alignment = DL.getABITypeAlignment(elem); uint32_t offset = DL.getStructLayout(pType)->getElementOffsetInBits(idx); - llvm::DIType *pDebugTy = GetDebugType(elem); + llvm::DIType* pDebugTy = GetDebugType(elem); ElemTypes.push_back(builder.createMemberType( pDIStructTy, name, pFile, lineNum, size, alignment, offset, flags, pDebugTy)); @@ -258,22 +258,22 @@ JitManager::CreateDebugStructType(StructType * return pDIStructTy; } -DIType *JitManager::GetDebugArrayType(Type *pTy) +DIType* JitManager::GetDebugArrayType(Type* pTy) { DIBuilder builder(*mpCurrentModule); DataLayout DL = DataLayout(mpCurrentModule); - ArrayType *pArrayTy = cast<ArrayType>(pTy); + ArrayType* pArrayTy = cast<ArrayType>(pTy); uint32_t size = DL.getTypeAllocSizeInBits(pArrayTy); uint32_t alignment = DL.getABITypeAlignment(pArrayTy); - SmallVector<Metadata *, 8> Elems; + SmallVector<Metadata*, 8> Elems; Elems.push_back(builder.getOrCreateSubrange(0, pArrayTy->getNumElements())); return builder.createArrayType( size, alignment, GetDebugType(pArrayTy->getElementType()), builder.getOrCreateArray(Elems)); } // Create a DIType from llvm Type -DIType *JitManager::GetDebugType(Type *pTy) +DIType* JitManager::GetDebugType(Type* pTy) { DIBuilder builder(*mpCurrentModule); Type::TypeID id = pTy->getTypeID(); @@ -317,17 +317,17 @@ DIType *JitManager::GetDebugType(Type *pTy) } // Create a DISubroutineType from an llvm FunctionType -DIType *JitManager::GetDebugFunctionType(Type *pTy) +DIType* JitManager::GetDebugFunctionType(Type* pTy) { - SmallVector<Metadata *, 8> ElemTypes; - FunctionType * pFuncTy = cast<FunctionType>(pTy); - DIBuilder builder(*mpCurrentModule); + SmallVector<Metadata*, 8> ElemTypes; + FunctionType* pFuncTy = cast<FunctionType>(pTy); + DIBuilder builder(*mpCurrentModule); // Add result type ElemTypes.push_back(GetDebugType(pFuncTy->getReturnType())); // Add arguments - for (auto ¶m : pFuncTy->params()) + for (auto& param : pFuncTy->params()) { ElemTypes.push_back(GetDebugType(param)); } @@ -335,10 +335,10 @@ DIType *JitManager::GetDebugFunctionType(Type *pTy) return builder.createSubroutineType(builder.getOrCreateTypeArray(ElemTypes)); } -DIType *JitManager::GetDebugIntegerType(Type *pTy) +DIType* JitManager::GetDebugIntegerType(Type* pTy) { DIBuilder builder(*mpCurrentModule); - IntegerType *pIntTy = cast<IntegerType>(pTy); + IntegerType* pIntTy = cast<IntegerType>(pTy); switch (pIntTy->getBitWidth()) { case 1: @@ -365,14 +365,14 @@ DIType *JitManager::GetDebugIntegerType(Type *pTy) return nullptr; } -DIType *JitManager::GetDebugVectorType(Type *pTy) +DIType* JitManager::GetDebugVectorType(Type* pTy) { - DIBuilder builder(*mpCurrentModule); - VectorType * pVecTy = cast<VectorType>(pTy); - DataLayout DL = DataLayout(mpCurrentModule); - uint32_t size = DL.getTypeAllocSizeInBits(pVecTy); - uint32_t alignment = DL.getABITypeAlignment(pVecTy); - SmallVector<Metadata *, 1> Elems; + DIBuilder builder(*mpCurrentModule); + VectorType* pVecTy = cast<VectorType>(pTy); + DataLayout DL = DataLayout(mpCurrentModule); + uint32_t size = DL.getTypeAllocSizeInBits(pVecTy); + uint32_t alignment = DL.getABITypeAlignment(pVecTy); + SmallVector<Metadata*, 1> Elems; Elems.push_back(builder.getOrCreateSubrange(0, pVecTy->getVectorNumElements())); return builder.createVectorType(size, @@ -385,7 +385,7 @@ DIType *JitManager::GetDebugVectorType(Type *pTy) /// @brief Dump function x86 assembly to file. /// @note This should only be called after the module has been jitted to x86 and the /// module will not be further accessed. -void JitManager::DumpAsm(Function *pFunction, const char *fileName) +void JitManager::DumpAsm(Function* pFunction, const char* fileName) { if (KNOB_DUMP_SHADER_IR) { @@ -393,15 +393,15 @@ void JitManager::DumpAsm(Function *pFunction, const char *fileName) DWORD pid = GetCurrentProcessId(); char procname[MAX_PATH]; GetModuleFileNameA(NULL, procname, MAX_PATH); - const char * pBaseName = strrchr(procname, '\\'); + const char* pBaseName = strrchr(procname, '\\'); std::stringstream outDir; outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends; CreateDirectoryPath(outDir.str().c_str()); #endif std::error_code EC; - Module * pModule = pFunction->getParent(); - const char * funcName = pFunction->getName().data(); + Module* pModule = pFunction->getParent(); + const char* funcName = pFunction->getName().data(); char fName[256]; #if defined(_WIN32) sprintf(fName, "%s\\%s.%s.asm", outDir.str().c_str(), funcName, fileName); @@ -411,11 +411,12 @@ void JitManager::DumpAsm(Function *pFunction, const char *fileName) raw_fd_ostream filestream(fName, EC, llvm::sys::fs::F_None); - legacy::PassManager *pMPasses = new legacy::PassManager(); - auto * pTarget = mpExec->getTargetMachine(); + legacy::PassManager* pMPasses = new legacy::PassManager(); + auto* pTarget = mpExec->getTargetMachine(); pTarget->Options.MCOptions.AsmVerbose = true; #if LLVM_VERSION_MAJOR >= 7 - pTarget->addPassesToEmitFile(*pMPasses, filestream, nullptr, TargetMachine::CGFT_AssemblyFile); + pTarget->addPassesToEmitFile( + *pMPasses, filestream, nullptr, TargetMachine::CGFT_AssemblyFile); #else pTarget->addPassesToEmitFile(*pMPasses, filestream, TargetMachine::CGFT_AssemblyFile); #endif @@ -431,7 +432,7 @@ std::string JitManager::GetOutputDir() DWORD pid = GetCurrentProcessId(); char procname[MAX_PATH]; GetModuleFileNameA(NULL, procname, MAX_PATH); - const char * pBaseName = strrchr(procname, '\\'); + const char* pBaseName = strrchr(procname, '\\'); std::stringstream outDir; outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid; CreateDirectoryPath(outDir.str().c_str()); @@ -442,14 +443,14 @@ std::string JitManager::GetOutputDir() ////////////////////////////////////////////////////////////////////////// /// @brief Dump function to file. -void JitManager::DumpToFile(Module *M, const char *fileName) +void JitManager::DumpToFile(Module* M, const char* fileName) { if (KNOB_DUMP_SHADER_IR) { std::string outDir = GetOutputDir(); std::error_code EC; - const char * funcName = M->getName().data(); + const char* funcName = M->getName().data(); char fName[256]; #if defined(_WIN32) sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName); @@ -464,14 +465,14 @@ void JitManager::DumpToFile(Module *M, const char *fileName) ////////////////////////////////////////////////////////////////////////// /// @brief Dump function to file. -void JitManager::DumpToFile(Function *f, const char *fileName) +void JitManager::DumpToFile(Function* f, const char* fileName) { if (KNOB_DUMP_SHADER_IR) { std::string outDir = GetOutputDir(); std::error_code EC; - const char * funcName = f->getName().data(); + const char* funcName = f->getName().data(); char fName[256]; #if defined(_WIN32) sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName); @@ -489,7 +490,7 @@ void JitManager::DumpToFile(Function *f, const char *fileName) fd.flush(); raw_fd_ostream fd_cfg(fName, EC, llvm::sys::fs::F_Text); - WriteGraph(fd_cfg, (const Function *)f); + WriteGraph(fd_cfg, (const Function*)f); fd_cfg.flush(); } @@ -501,7 +502,7 @@ bool g_DllActive = true; ////////////////////////////////////////////////////////////////////////// /// @brief Create JIT context. /// @param simdWidth - SIMD width to be used in generated program. -HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char *arch, const char *core) +HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char* arch, const char* core) { return new JitManager(targetSimdWidth, arch, core); } @@ -512,7 +513,7 @@ void JITCALL JitDestroyContext(HANDLE hJitContext) { if (g_DllActive) { - delete reinterpret_cast<JitManager *>(hJitContext); + delete reinterpret_cast<JitManager*>(hJitContext); } } } @@ -528,8 +529,8 @@ struct JitCacheFileHeader { void Init(uint32_t llCRC, uint32_t objCRC, - const std::string &moduleID, - const std::string &cpu, + const std::string& moduleID, + const std::string& cpu, uint32_t optLevel, uint64_t objSize) { @@ -545,7 +546,7 @@ struct JitCacheFileHeader bool - IsValid(uint32_t llCRC, const std::string &moduleID, const std::string &cpu, uint32_t optLevel) + IsValid(uint32_t llCRC, const std::string& moduleID, const std::string& cpu, uint32_t optLevel) { if ((m_MagicNumber != JC_MAGIC_NUMBER) || (m_llCRC != llCRC) || (m_platformKey != JC_PLATFORM_KEY) || (m_optLevel != optLevel)) @@ -576,7 +577,7 @@ private: static const size_t JC_STR_MAX_LEN = 32; static const uint32_t JC_PLATFORM_KEY = (LLVM_VERSION_MAJOR << 24) | (LLVM_VERSION_MINOR << 16) | (LLVM_VERSION_PATCH << 8) | - ((sizeof(void *) > sizeof(uint32_t)) ? 1 : 0); + ((sizeof(void*) > sizeof(uint32_t)) ? 1 : 0); uint64_t m_MagicNumber = JC_MAGIC_NUMBER; uint64_t m_objSize = 0; @@ -588,7 +589,7 @@ private: char m_Cpu[JC_STR_MAX_LEN] = {}; }; -static inline uint32_t ComputeModuleCRC(const llvm::Module *M) +static inline uint32_t ComputeModuleCRC(const llvm::Module* M) { std::string bitcodeBuffer; raw_string_ostream bitcodeStream(bitcodeBuffer); @@ -611,7 +612,7 @@ JitCache::JitCache() #if defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__) if (strncmp(KNOB_JIT_CACHE_DIR.c_str(), "~/", 2) == 0) { - char *homedir; + char* homedir; if (!(homedir = getenv("HOME"))) { homedir = getpwuid(getuid())->pw_dir; @@ -626,15 +627,15 @@ JitCache::JitCache() } } -int ExecUnhookedProcess(const std::string &CmdLine, std::string *pStdOut, std::string *pStdErr) +int ExecUnhookedProcess(const std::string& CmdLine, std::string* pStdOut, std::string* pStdErr) { return ExecCmd(CmdLine, "", pStdOut, pStdErr); } /// notifyObjectCompiled - Provides a pointer to compiled code for Module M. -void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj) +void JitCache::notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj) { - const std::string &moduleID = M->getModuleIdentifier(); + const std::string& moduleID = M->getModuleIdentifier(); if (!moduleID.length()) { return; @@ -671,7 +672,7 @@ void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef header.Init(mCurrentModuleCRC, objcrc, moduleID, mCpu, mOptLevel, Obj.getBufferSize()); - fileObj.write((const char *)&header, sizeof(header)); + fileObj.write((const char*)&header, sizeof(header)); fileObj.flush(); } } @@ -679,9 +680,9 @@ void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef /// Returns a pointer to a newly allocated MemoryBuffer that contains the /// object which corresponds with Module M, or 0 if an object is not /// available. -std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module *M) +std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module* M) { - const std::string &moduleID = M->getModuleIdentifier(); + const std::string& moduleID = M->getModuleIdentifier(); mCurrentModuleCRC = ComputeModuleCRC(M); if (!moduleID.length()) @@ -700,8 +701,8 @@ std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module *M) llvm::SmallString<MAX_PATH> objFilePath = filePath; objFilePath += JIT_OBJ_EXT; - FILE *fpObjIn = nullptr; - FILE *fpIn = fopen(filePath.c_str(), "rb"); + FILE* fpObjIn = nullptr; + FILE* fpIn = fopen(filePath.c_str(), "rb"); if (!fpIn) { return nullptr; @@ -732,7 +733,7 @@ std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module *M) #else pBuf = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(size_t(header.GetObjectSize())); #endif - if (!fread(const_cast<char *>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn)) + if (!fread(const_cast<char*>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn)) { pBuf = nullptr; break; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp index df11914db13..c68f3b9a619 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp @@ -37,12 +37,12 @@ namespace SwrJit { using namespace llvm; - BuilderGfxMem::BuilderGfxMem(JitManager *pJitMgr) : Builder(pJitMgr) + BuilderGfxMem::BuilderGfxMem(JitManager* pJitMgr) : Builder(pJitMgr) { - mpTranslationFuncTy = nullptr; - mpfnTranslateGfxAddressForRead = nullptr; + mpTranslationFuncTy = nullptr; + mpfnTranslateGfxAddressForRead = nullptr; mpfnTranslateGfxAddressForWrite = nullptr; - mpParamSimDC = nullptr; + mpParamSimDC = nullptr; } @@ -50,7 +50,7 @@ namespace SwrJit { } - void BuilderGfxMem::AssertGFXMemoryParams(Value *ptr, Builder::JIT_MEM_CLIENT usage) + void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage) { SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL), "Internal memory should not be gfxptr_t."); @@ -65,10 +65,10 @@ namespace SwrJit /// @param vIndices - SIMD wide value of VB byte offsets /// @param vMask - SIMD wide mask that controls whether to access memory or the src values /// @param scale - value to scale indices by - Value *BuilderGfxMem::GATHERPS(Value * vSrc, - Value * pBase, - Value * vIndices, - Value * vMask, + Value* BuilderGfxMem::GATHERPS(Value* vSrc, + Value* pBase, + Value* vIndices, + Value* vMask, uint8_t scale, JIT_MEM_CLIENT usage) { @@ -78,7 +78,7 @@ namespace SwrJit pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0)); } - Value *vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale); + Value* vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale); return vGather; } @@ -90,10 +90,10 @@ namespace SwrJit /// @param vIndices - SIMD wide value of VB byte offsets /// @param vMask - SIMD wide mask that controls whether to access memory or the src values /// @param scale - value to scale indices by - Value *BuilderGfxMem::GATHERDD(Value * vSrc, - Value * pBase, - Value * vIndices, - Value * vMask, + Value* BuilderGfxMem::GATHERDD(Value* vSrc, + Value* pBase, + Value* vIndices, + Value* vMask, uint8_t scale, JIT_MEM_CLIENT usage) { @@ -104,7 +104,7 @@ namespace SwrJit pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0)); } - Value *vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale); + Value* vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale); return vGather; } @@ -122,37 +122,37 @@ namespace SwrJit } - Value *BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value *base, Constant *offset) + Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset) { return ADD(base, offset); } - Value *BuilderGfxMem::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name) + Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name) { Ptr = TranslationHelper(Ptr, Ty); return Builder::GEP(Ptr, Idx, nullptr, Name); } - Value *BuilderGfxMem::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name) + Value* BuilderGfxMem::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name) { Ptr = TranslationHelper(Ptr, Ty); return Builder::GEP(Ty, Ptr, Idx, Name); } - Value *BuilderGfxMem::GEP(Value *Ptr, const std::initializer_list<Value *> &indexList, Type *Ty) + Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty) { Ptr = TranslationHelper(Ptr, Ty); return Builder::GEP(Ptr, indexList); } - Value * - BuilderGfxMem::GEP(Value *Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty) + Value* + BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty) { Ptr = TranslationHelper(Ptr, Ty); return Builder::GEP(Ptr, indexList); } - Value *BuilderGfxMem::TranslationHelper(Value *Ptr, Type *Ty) + Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty) { SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr), "Access of GFX pointers must have non-null type specified."); @@ -167,7 +167,7 @@ namespace SwrJit return Ptr; } - LoadInst *BuilderGfxMem::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage) + LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); @@ -175,7 +175,7 @@ namespace SwrJit return Builder::LOAD(Ptr, Name); } - LoadInst *BuilderGfxMem::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage) + LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); @@ -183,8 +183,9 @@ namespace SwrJit return Builder::LOAD(Ptr, Name); } - LoadInst *BuilderGfxMem::LOAD( - Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage) + + LoadInst* BuilderGfxMem::LOAD( + Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); @@ -192,10 +193,10 @@ namespace SwrJit return Builder::LOAD(Ptr, isVolatile, Name); } - LoadInst *BuilderGfxMem::LOAD(Value * BasePtr, - const std::initializer_list<uint32_t> &offset, - const llvm::Twine & name, - Type * Ty, + LoadInst* BuilderGfxMem::LOAD(Value* BasePtr, + const std::initializer_list<uint32_t>& offset, + const llvm::Twine& name, + Type* Ty, JIT_MEM_CLIENT usage) { AssertGFXMemoryParams(BasePtr, usage); @@ -207,7 +208,7 @@ namespace SwrJit BasePtr = INT_TO_PTR(BasePtr, Ty, name); bNeedTranslation = true; } - std::vector<Value *> valIndices; + std::vector<Value*> valIndices; for (auto i : offset) { valIndices.push_back(C(i)); @@ -221,12 +222,13 @@ namespace SwrJit return LOAD(BasePtr, name, Ty, usage); } - CallInst *BuilderGfxMem::MASKED_LOAD(Value * Ptr, + + CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr, unsigned Align, - Value * Mask, - Value * PassThru, - const Twine & Name, - Type * Ty, + Value* Mask, + Value* PassThru, + const Twine& Name, + Type* Ty, JIT_MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); @@ -235,10 +237,10 @@ namespace SwrJit return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage); } - Value *BuilderGfxMem::TranslateGfxAddressForRead(Value * xpGfxAddress, - Type * PtrTy, - const Twine &Name, - JIT_MEM_CLIENT /* usage */) + Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress, + Type* PtrTy, + const Twine& Name, + JIT_MEM_CLIENT /* usage */) { if (PtrTy == nullptr) { @@ -247,10 +249,10 @@ namespace SwrJit return INT_TO_PTR(xpGfxAddress, PtrTy, Name); } - Value *BuilderGfxMem::TranslateGfxAddressForWrite(Value * xpGfxAddress, - Type * PtrTy, - const Twine &Name, - JIT_MEM_CLIENT /* usage */) + Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress, + Type* PtrTy, + const Twine& Name, + JIT_MEM_CLIENT /* usage */) { if (PtrTy == nullptr) { diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h index dd20c06afef..aefbbef9fba 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h @@ -38,53 +38,54 @@ namespace SwrJit class BuilderGfxMem : public Builder { public: - BuilderGfxMem(JitManager *pJitMgr); + BuilderGfxMem(JitManager* pJitMgr); virtual ~BuilderGfxMem() {} - virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = ""); - virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = ""); - virtual Value * - GEP(Value *Ptr, const std::initializer_list<Value *> &indexList, Type *Ty = nullptr); - virtual Value * - GEP(Value *Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr); + virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = ""); + virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = ""); + virtual Value* + GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr); + virtual Value* + GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty = nullptr); - virtual LoadInst *LOAD(Value * Ptr, - const char * Name, - Type * Ty = nullptr, + virtual LoadInst* LOAD(Value* Ptr, + const char* Name, + Type* Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); - virtual LoadInst *LOAD(Value * Ptr, - const Twine & Name = "", - Type * Ty = nullptr, + virtual LoadInst* LOAD(Value* Ptr, + const Twine& Name = "", + Type* Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); - virtual LoadInst *LOAD(Value * Ptr, + virtual LoadInst* LOAD(Value* Ptr, bool isVolatile, - const Twine & Name = "", - Type * Ty = nullptr, + const Twine& Name = "", + Type* Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); - virtual LoadInst *LOAD(Value * BasePtr, - const std::initializer_list<uint32_t> &offset, - const llvm::Twine & Name = "", - Type * Ty = nullptr, + virtual LoadInst* LOAD(Value* BasePtr, + const std::initializer_list<uint32_t>& offset, + const llvm::Twine& Name = "", + Type* Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); - virtual CallInst *MASKED_LOAD(Value * Ptr, + + virtual CallInst* MASKED_LOAD(Value* Ptr, unsigned Align, - Value * Mask, - Value * PassThru = nullptr, - const Twine & Name = "", - Type * Ty = nullptr, + Value* Mask, + Value* PassThru = nullptr, + const Twine& Name = "", + Type* Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); - virtual Value *GATHERPS(Value * src, - Value * pBase, - Value * indices, - Value * mask, + virtual Value* GATHERPS(Value* src, + Value* pBase, + Value* indices, + Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); - virtual Value *GATHERDD(Value * src, - Value * pBase, - Value * indices, - Value * mask, + virtual Value* GATHERDD(Value* src, + Value* pBase, + Value* indices, + Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); @@ -95,35 +96,35 @@ namespace SwrJit JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); - Value *TranslateGfxAddressForRead(Value * xpGfxAddress, - Type * PtrTy = nullptr, - const Twine & Name = "", + Value* TranslateGfxAddressForRead(Value* xpGfxAddress, + Type* PtrTy = nullptr, + const Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); - Value *TranslateGfxAddressForWrite(Value * xpGfxAddress, - Type * PtrTy = nullptr, - const Twine & Name = "", + Value* TranslateGfxAddressForWrite(Value* xpGfxAddress, + Type* PtrTy = nullptr, + const Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); protected: - void AssertGFXMemoryParams(Value *ptr, Builder::JIT_MEM_CLIENT usage); + void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage); virtual void NotifyPrivateContextSet(); - virtual Value *OFFSET_TO_NEXT_COMPONENT(Value *base, Constant *offset); + virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset); - Value *TranslationHelper(Value *Ptr, Type *Ty); + Value* TranslationHelper(Value* Ptr, Type* Ty); - FunctionType *GetTranslationFunctionType() { return mpTranslationFuncTy; } - Value * GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; } - Value * GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; } - Value * GetParamSimDC() { return mpParamSimDC; } + FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; } + Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; } + Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; } + Value* GetParamSimDC() { return mpParamSimDC; } private: - FunctionType *mpTranslationFuncTy; - Value * mpfnTranslateGfxAddressForRead; - Value * mpfnTranslateGfxAddressForWrite; - Value * mpParamSimDC; + FunctionType* mpTranslationFuncTy; + Value* mpfnTranslateGfxAddressForRead; + Value* mpfnTranslateGfxAddressForWrite; + Value* mpParamSimDC; }; } // namespace SwrJit diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp index c561c8076df..7605823c04d 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp @@ -37,7 +37,7 @@ namespace llvm { // foward declare the initializer - void initializeLowerX86Pass(PassRegistry &); + void initializeLowerX86Pass(PassRegistry&); } // namespace llvm namespace SwrJit @@ -60,7 +60,7 @@ namespace SwrJit struct LowerX86; - typedef std::function<Instruction *(LowerX86 *, TargetArch, TargetWidth, CallInst *)> EmuFunc; + typedef std::function<Instruction*(LowerX86*, TargetArch, TargetWidth, CallInst*)> EmuFunc; struct X86Intrinsic { @@ -83,22 +83,22 @@ namespace SwrJit }; // Forward decls - Instruction *NO_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst); - Instruction * - VPERM_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst); - Instruction * - VGATHER_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst); - Instruction * - VROUND_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst); - Instruction * - VHSUB_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst); - Instruction * - VCONVERT_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst); - - Instruction *DOUBLE_EMU(LowerX86 * pThis, + Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); + Instruction* + VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); + Instruction* + VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); + Instruction* + VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); + Instruction* + VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); + Instruction* + VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); + + Instruction* DOUBLE_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, - CallInst * pCallInst, + CallInst* pCallInst, Intrinsic::ID intrin); static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1; @@ -188,7 +188,7 @@ namespace SwrJit struct LowerX86 : public FunctionPass { - LowerX86(Builder *b = nullptr) : FunctionPass(ID), B(b) + LowerX86(Builder* b = nullptr) : FunctionPass(ID), B(b) { initializeLowerX86Pass(*PassRegistry::getPassRegistry()); @@ -216,12 +216,12 @@ namespace SwrJit // across all intrinsics, and will have to be rethought. Probably need something // similar to llvm's getDeclaration() utility to map a set of inputs to a specific typed // intrinsic. - void GetRequestedWidthAndType(CallInst * pCallInst, + void GetRequestedWidthAndType(CallInst* pCallInst, const StringRef intrinName, - TargetWidth * pWidth, - Type ** pTy) + TargetWidth* pWidth, + Type** pTy) { - Type *pVecTy = pCallInst->getType(); + Type* pVecTy = pCallInst->getType(); // Check for intrinsic specific types // VCVTPD2PS type comes from src, not dst @@ -232,7 +232,7 @@ namespace SwrJit if (!pVecTy->isVectorTy()) { - for (auto &op : pCallInst->arg_operands()) + for (auto& op : pCallInst->arg_operands()) { if (op.get()->getType()->isVectorTy()) { @@ -260,7 +260,7 @@ namespace SwrJit *pTy = pVecTy->getScalarType(); } - Value *GetZeroVec(TargetWidth width, Type *pTy) + Value* GetZeroVec(TargetWidth width, Type* pTy) { uint32_t numElem = 0; switch (width) @@ -278,9 +278,9 @@ namespace SwrJit return ConstantVector::getNullValue(VectorType::get(pTy, numElem)); } - Value *GetMask(TargetWidth width) + Value* GetMask(TargetWidth width) { - Value *mask; + Value* mask; switch (width) { case W256: @@ -296,18 +296,18 @@ namespace SwrJit } // Convert <N x i1> mask to <N x i32> x86 mask - Value *VectorMask(Value *vi1Mask) + Value* VectorMask(Value* vi1Mask) { uint32_t numElem = vi1Mask->getType()->getVectorNumElements(); return B->S_EXT(vi1Mask, VectorType::get(B->mInt32Ty, numElem)); } - Instruction *ProcessIntrinsicAdvanced(CallInst *pCallInst) + Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst) { - Function * pFunc = pCallInst->getCalledFunction(); - auto & intrinsic = intrinsicMap2[mTarget][pFunc->getName()]; + Function* pFunc = pCallInst->getCalledFunction(); + auto& intrinsic = intrinsicMap2[mTarget][pFunc->getName()]; TargetWidth vecWidth; - Type * pElemTy; + Type* pElemTy; GetRequestedWidthAndType(pCallInst, pFunc->getName(), &vecWidth, &pElemTy); // Check if there is a native intrinsic for this instruction @@ -323,9 +323,9 @@ namespace SwrJit } else if (id != Intrinsic::not_intrinsic) { - Function *pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id); - SmallVector<Value *, 8> args; - for (auto &arg : pCallInst->arg_operands()) + Function* pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id); + SmallVector<Value*, 8> args; + for (auto& arg : pCallInst->arg_operands()) { args.push_back(arg.get()); } @@ -361,9 +361,9 @@ namespace SwrJit return nullptr; } - Instruction *ProcessIntrinsic(CallInst *pCallInst) + Instruction* ProcessIntrinsic(CallInst* pCallInst) { - Function *pFunc = pCallInst->getCalledFunction(); + Function* pFunc = pCallInst->getCalledFunction(); // Forward to the advanced support if found if (intrinsicMap2[mTarget].find(pFunc->getName()) != intrinsicMap2[mTarget].end()) @@ -376,11 +376,11 @@ namespace SwrJit pFunc->getName()); Intrinsic::ID x86Intrinsic = intrinsicMap[pFunc->getName()]; - Function * pX86IntrinFunc = + Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic); - SmallVector<Value *, 8> args; - for (auto &arg : pCallInst->arg_operands()) + SmallVector<Value*, 8> args; + for (auto& arg : pCallInst->arg_operands()) { args.push_back(arg.get()); } @@ -390,23 +390,23 @@ namespace SwrJit ////////////////////////////////////////////////////////////////////////// /// @brief LLVM funtion pass run method. /// @param f- The function we're working on with this pass. - virtual bool runOnFunction(Function &F) + virtual bool runOnFunction(Function& F) { - std::vector<Instruction *> toRemove; + std::vector<Instruction*> toRemove; - for (auto &BB : F.getBasicBlockList()) + for (auto& BB : F.getBasicBlockList()) { - for (auto &I : BB.getInstList()) + for (auto& I : BB.getInstList()) { - if (CallInst *pCallInst = dyn_cast<CallInst>(&I)) + if (CallInst* pCallInst = dyn_cast<CallInst>(&I)) { - Function *pFunc = pCallInst->getCalledFunction(); + Function* pFunc = pCallInst->getCalledFunction(); if (pFunc) { if (pFunc->getName().startswith("meta.intrinsic")) { B->IRB()->SetInsertPoint(&I); - Instruction *pReplace = ProcessIntrinsic(pCallInst); + Instruction* pReplace = ProcessIntrinsic(pCallInst); SWR_ASSERT(pReplace); toRemove.push_back(pCallInst); pCallInst->replaceAllUsesWith(pReplace); @@ -416,7 +416,7 @@ namespace SwrJit } } - for (auto *pInst : toRemove) + for (auto* pInst : toRemove) { pInst->eraseFromParent(); } @@ -426,11 +426,11 @@ namespace SwrJit return true; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const {} + virtual void getAnalysisUsage(AnalysisUsage& AU) const {} - JitManager *JM() { return B->JM(); } + JitManager* JM() { return B->JM(); } - Builder *B; + Builder* B; TargetArch mTarget; @@ -439,24 +439,24 @@ namespace SwrJit char LowerX86::ID = 0; // LLVM uses address of ID as the actual ID. - FunctionPass *createLowerX86Pass(Builder *b) { return new LowerX86(b); } + FunctionPass* createLowerX86Pass(Builder* b) { return new LowerX86(b); } - Instruction *NO_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst) + Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst) { SWR_ASSERT(false, "Unimplemented intrinsic emulation."); return nullptr; } - Instruction *VPERM_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst) + Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst) { // Only need vperm emulation for AVX SWR_ASSERT(arch == AVX); - Builder *B = pThis->B; + Builder* B = pThis->B; auto v32A = pCallInst->getArgOperand(0); auto vi32Index = pCallInst->getArgOperand(1); - Value *v32Result; + Value* v32Result; if (isa<Constant>(vi32Index)) { // Can use llvm shuffle vector directly with constant shuffle indices @@ -475,10 +475,10 @@ namespace SwrJit return cast<Instruction>(v32Result); } - Instruction * - VGATHER_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst) + Instruction* + VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst) { - Builder *B = pThis->B; + Builder* B = pThis->B; auto vSrc = pCallInst->getArgOperand(0); auto pBase = pCallInst->getArgOperand(1); auto vi32Indices = pCallInst->getArgOperand(2); @@ -489,7 +489,7 @@ namespace SwrJit uint32_t numElem = vSrc->getType()->getVectorNumElements(); auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty); auto srcTy = vSrc->getType()->getVectorElementType(); - Value * v32Gather; + Value* v32Gather; if (arch == AVX) { // Full emulation for AVX @@ -518,7 +518,7 @@ namespace SwrJit } else if (arch == AVX2 || (arch == AVX512 && width == W256)) { - Function *pX86IntrinFunc; + Function* pX86IntrinFunc; if (srcTy == B->mFP32Ty) { pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, @@ -555,14 +555,14 @@ namespace SwrJit VectorType::get(B->mInt64Ty, v64Mask->getType()->getVectorNumElements())); v64Mask = B->BITCAST(v64Mask, vSrc->getType()); - Value *src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3})); - Value *src1 = B->VSHUFFLE(vSrc, vSrc, B->C({4, 5, 6, 7})); + Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3})); + Value* src1 = B->VSHUFFLE(vSrc, vSrc, B->C({4, 5, 6, 7})); - Value *indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3})); - Value *indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({4, 5, 6, 7})); + Value* indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3})); + Value* indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({4, 5, 6, 7})); - Value *mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3})); - Value *mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7})); + Value* mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3})); + Value* mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7})); src0 = B->BITCAST( src0, @@ -570,7 +570,7 @@ namespace SwrJit mask0 = B->BITCAST( mask0, VectorType::get(B->mInt64Ty, mask0->getType()->getVectorNumElements())); - Value *gather0 = + Value* gather0 = B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale}); src1 = B->BITCAST( src1, @@ -578,7 +578,7 @@ namespace SwrJit mask1 = B->BITCAST( mask1, VectorType::get(B->mInt64Ty, mask1->getType()->getVectorNumElements())); - Value *gather1 = + Value* gather1 = B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale}); v32Gather = B->VSHUFFLE(gather0, gather1, B->C({0, 1, 2, 3, 4, 5, 6, 7})); @@ -589,18 +589,18 @@ namespace SwrJit // Double pump 8-wide for 32bit elements auto v32Mask = pThis->VectorMask(vi1Mask); v32Mask = B->BITCAST(v32Mask, vSrc->getType()); - Value *src0 = B->EXTRACT_16(vSrc, 0); - Value *src1 = B->EXTRACT_16(vSrc, 1); + Value* src0 = B->EXTRACT_16(vSrc, 0); + Value* src1 = B->EXTRACT_16(vSrc, 1); - Value *indices0 = B->EXTRACT_16(vi32Indices, 0); - Value *indices1 = B->EXTRACT_16(vi32Indices, 1); + Value* indices0 = B->EXTRACT_16(vi32Indices, 0); + Value* indices1 = B->EXTRACT_16(vi32Indices, 1); - Value *mask0 = B->EXTRACT_16(v32Mask, 0); - Value *mask1 = B->EXTRACT_16(v32Mask, 1); + Value* mask0 = B->EXTRACT_16(v32Mask, 0); + Value* mask1 = B->EXTRACT_16(v32Mask, 1); - Value *gather0 = + Value* gather0 = B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale}); - Value *gather1 = + Value* gather1 = B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale}); v32Gather = B->JOIN_16(gather0, gather1); @@ -609,8 +609,8 @@ namespace SwrJit } else if (arch == AVX512) { - Value * iMask; - Function *pX86IntrinFunc; + Value* iMask; + Function* pX86IntrinFunc; if (srcTy == B->mFP32Ty) { pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, @@ -643,8 +643,8 @@ namespace SwrJit // No support for vroundps in avx512 (it is available in kncni), so emulate with avx // instructions - Instruction * - VROUND_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst) + Instruction* + VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst) { SWR_ASSERT(arch == AVX512); @@ -676,22 +676,25 @@ namespace SwrJit return nullptr; } - Instruction *VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst) + Instruction* + VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst) { SWR_ASSERT(arch == AVX512); - auto B = pThis->B; + auto B = pThis->B; auto vf32Src = pCallInst->getOperand(0); if (width == W256) { - auto vf32SrcRound = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_round_ps_256); + auto vf32SrcRound = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, + Intrinsic::x86_avx_round_ps_256); return cast<Instruction>(B->FP_TRUNC(vf32SrcRound, B->mFP32Ty)); } else if (width == W512) { // 512 can use intrinsic - auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx512_mask_cvtpd2ps_512); + auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, + Intrinsic::x86_avx512_mask_cvtpd2ps_512); return cast<Instruction>(B->CALL(pfnFunc, vf32Src)); } else @@ -703,7 +706,7 @@ namespace SwrJit } // No support for hsub in AVX512 - Instruction *VHSUB_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst) + Instruction* VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst) { SWR_ASSERT(arch == AVX512); @@ -734,27 +737,27 @@ namespace SwrJit // Double pump input using Intrin template arg. This blindly extracts lower and upper 256 from // each vector argument and calls the 256 wide intrinsic, then merges the results to 512 wide - Instruction *DOUBLE_EMU(LowerX86 * pThis, + Instruction* DOUBLE_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, - CallInst * pCallInst, + CallInst* pCallInst, Intrinsic::ID intrin) { auto B = pThis->B; SWR_ASSERT(width == W512); - Value * result[2]; - Function *pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin); + Value* result[2]; + Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin); for (uint32_t i = 0; i < 2; ++i) { - SmallVector<Value *, 8> args; - for (auto &arg : pCallInst->arg_operands()) + SmallVector<Value*, 8> args; + for (auto& arg : pCallInst->arg_operands()) { auto argType = arg.get()->getType(); if (argType->isVectorTy()) { uint32_t vecWidth = argType->getVectorNumElements(); - Value * lanes = B->CInc<int>(i * vecWidth / 2, vecWidth / 2); - Value * argToPush = B->VSHUFFLE( + Value* lanes = B->CInc<int>(i * vecWidth / 2, vecWidth / 2); + Value* argToPush = B->VSHUFFLE( arg.get(), B->VUNDEF(argType->getVectorElementType(), vecWidth), lanes); args.push_back(argToPush); } @@ -776,7 +779,7 @@ namespace SwrJit { vecWidth = 2; } - Value *lanes = B->CInc<int>(0, vecWidth); + Value* lanes = B->CInc<int>(0, vecWidth); return cast<Instruction>(B->VSHUFFLE(result[0], result[1], lanes)); } |