diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/radeon/AMDIL.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDIL7XXDevice.cpp | 8 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDIL7XXDevice.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILDevice.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp | 8 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILEvergreenDevice.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILPointerManager.cpp | 2551 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILPointerManager.h | 209 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILTargetMachine.cpp | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/Makefile.sources | 1 |
10 files changed, 0 insertions, 2789 deletions
diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h index cc6590c82a9..85d7e03930c 100644 --- a/src/gallium/drivers/radeon/AMDIL.h +++ b/src/gallium/drivers/radeon/AMDIL.h @@ -103,8 +103,6 @@ FunctionPass* /// Pre regalloc passes. FunctionPass* - createAMDILPointerManager(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); -FunctionPass* createAMDILMachinePeephole(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); /// Pre emit passes. diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp index df81c44f288..b89c22b1404 100644 --- a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp +++ b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp @@ -12,7 +12,6 @@ #endif #include "AMDILDevice.h" #include "AMDILIOExpansion.h" -#include "AMDILPointerManager.h" using namespace llvm; @@ -110,13 +109,6 @@ AMDIL7XXDevice::getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const #endif } -FunctionPass* -AMDIL7XXDevice::getPointerManager( - TargetMachine& TM AMDIL_OPT_LEVEL_DECL) const -{ - return new AMDILPointerManager(TM AMDIL_OPT_LEVEL_VAR); -} - AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST) { setCaps(); diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.h b/src/gallium/drivers/radeon/AMDIL7XXDevice.h index 87238e96006..edaf84a3282 100644 --- a/src/gallium/drivers/radeon/AMDIL7XXDevice.h +++ b/src/gallium/drivers/radeon/AMDIL7XXDevice.h @@ -43,8 +43,6 @@ public: getIOExpansion(TargetMachine& AMDIL_OPT_LEVEL_DECL) const; AsmPrinter* getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const; - FunctionPass* - getPointerManager(TargetMachine& AMDIL_OPT_LEVEL_DECL) const; protected: virtual void setCaps(); diff --git a/src/gallium/drivers/radeon/AMDILDevice.h b/src/gallium/drivers/radeon/AMDILDevice.h index 338212101b4..88f8b306b0d 100644 --- a/src/gallium/drivers/radeon/AMDILDevice.h +++ b/src/gallium/drivers/radeon/AMDILDevice.h @@ -93,11 +93,6 @@ public: virtual AsmPrinter* getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const = 0; - // Interface to get the Pointer manager pass for each device. - virtual FunctionPass* - getPointerManager(TargetMachine& AMDIL_OPT_LEVEL_DECL) const = 0; - - // API utilizing more detailed capabilities of each family of // cards. If a capability is supported, then either usesHardware or // usesSoftware returned true. If usesHardware returned true, then diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp index 1af28063da6..19c42bd9daa 100644 --- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp +++ b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp @@ -11,7 +11,6 @@ #include "AMDILEGAsmPrinter.h" #endif #include "AMDILIOExpansion.h" -#include "AMDILPointerManager.h" using namespace llvm; @@ -154,13 +153,6 @@ AMDILEvergreenDevice::getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) con #endif } -FunctionPass* -AMDILEvergreenDevice::getPointerManager( - TargetMachine& TM AMDIL_OPT_LEVEL_DECL) const -{ - return new AMDILEGPointerManager(TM AMDIL_OPT_LEVEL_VAR); -} - AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST) : AMDILEvergreenDevice(ST) { setCaps(); diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.h b/src/gallium/drivers/radeon/AMDILEvergreenDevice.h index 726b479c7ea..b2e0a6a6cf2 100644 --- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.h +++ b/src/gallium/drivers/radeon/AMDILEvergreenDevice.h @@ -44,8 +44,6 @@ public: getIOExpansion(TargetMachine& AMDIL_OPT_LEVEL_DECL) const; virtual AsmPrinter* getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const; - virtual FunctionPass* - getPointerManager(TargetMachine& AMDIL_OPT_LEVEL_DECL) const; protected: virtual void setCaps(); }; // AMDILEvergreenDevice diff --git a/src/gallium/drivers/radeon/AMDILPointerManager.cpp b/src/gallium/drivers/radeon/AMDILPointerManager.cpp deleted file mode 100644 index 9cac61cb718..00000000000 --- a/src/gallium/drivers/radeon/AMDILPointerManager.cpp +++ /dev/null @@ -1,2551 +0,0 @@ -//===-------- AMDILPointerManager.cpp - Manage Pointers for HW-------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// Implementation for the AMDILPointerManager classes. See header file for -// more documentation of class. -// TODO: This fails when function calls are enabled, must always be inlined -//===----------------------------------------------------------------------===// -#include "AMDILPointerManager.h" -#include "AMDILCompilerErrors.h" -#include "AMDILDeviceInfo.h" -#include "AMDILGlobalManager.h" -#include "AMDILKernelManager.h" -#include "AMDILMachineFunctionInfo.h" -#include "AMDILTargetMachine.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/Twine.h" -#include "llvm/ADT/ValueMap.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" -#include "llvm/Instructions.h" -#include "llvm/Metadata.h" -#include "llvm/Module.h" -#include "llvm/Support/FormattedStream.h" - -#include <stdio.h> -using namespace llvm; -char AMDILPointerManager::ID = 0; -namespace llvm { - FunctionPass* - createAMDILPointerManager(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) - { - return tm.getSubtarget<AMDILSubtarget>() - .device()->getPointerManager(tm AMDIL_OPT_LEVEL_VAR); - } -} - -AMDILPointerManager::AMDILPointerManager( - TargetMachine &tm - AMDIL_OPT_LEVEL_DECL) : - MachineFunctionPass(ID), - TM(tm) -{ - mDebug = DEBUGME; - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); -} - -AMDILPointerManager::~AMDILPointerManager() -{ -} - -const char* -AMDILPointerManager::getPassName() const -{ - return "AMD IL Default Pointer Manager Pass"; -} - -void -AMDILPointerManager::getAnalysisUsage(AnalysisUsage &AU) const -{ - AU.setPreservesAll(); - AU.addRequiredID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); -} - -AMDILEGPointerManager::AMDILEGPointerManager( - TargetMachine &tm - AMDIL_OPT_LEVEL_DECL) : - AMDILPointerManager(tm AMDIL_OPT_LEVEL_VAR), - TM(tm) -{ -} - -AMDILEGPointerManager::~AMDILEGPointerManager() -{ -} -std::string -findSamplerName(MachineInstr* MI, - FIPMap &FIToPtrMap, - RVPVec &lookupTable, - const TargetMachine *TM) -{ - std::string sampler = "unknown"; - assert(MI->getNumOperands() == 5 && "Only an " - "image read instruction with 5 arguments can " - "have a sampler."); - assert(MI->getOperand(3).isReg() && - "Argument 3 must be a register to call this function"); - unsigned reg = MI->getOperand(3).getReg(); - // If this register points to an argument, then - // we can return the argument name. - if (lookupTable[reg].second && dyn_cast<Argument>(lookupTable[reg].second)) { - return lookupTable[reg].second->getName(); - } - // Otherwise the sampler is coming from memory somewhere. - // If the sampler memory location can be tracked, then - // we ascertain the sampler name that way. - // The most common case is when optimizations are disabled - // or mem2reg is not enabled, then the sampler when it is - // an argument is passed through the frame index. - - // In the optimized case, the instruction that defined - // register from operand #3 is a private load. - MachineRegisterInfo ®Info = MI->getParent()->getParent()->getRegInfo(); - assert(!regInfo.def_empty(reg) - && "We don't have any defs of this register, but we aren't an argument!"); - MachineOperand *defOp = regInfo.getRegUseDefListHead(reg); - MachineInstr *defMI = defOp->getParent(); - if (isPrivateInst(TM->getInstrInfo(), defMI) && isLoadInst(TM->getInstrInfo(), defMI)) { - if (defMI->getOperand(1).isFI()) { - RegValPair &fiRVP = FIToPtrMap[reg]; - if (fiRVP.second && dyn_cast<Argument>(fiRVP.second)) { - return fiRVP.second->getName(); - } else { - // FIXME: Fix the case where the value stored is not a kernel argument. - assert(!"Found a private load of a sampler where the value isn't an argument!"); - } - } else { - // FIXME: Fix the case where someone dynamically loads a sampler value - // from private memory. This is problematic because we need to know the - // sampler value at compile time and if it is dynamically loaded, we won't - // know what sampler value to use. - assert(!"Found a private load of a sampler that isn't from a frame index!"); - } - } else { - // FIXME: Handle the case where the def is neither a private instruction - // and not a load instruction. This shouldn't occur, but putting an assertion - // just to make sure that it doesn't. - assert(!"Found a case which we don't handle."); - } - return sampler; -} - -const char* -AMDILEGPointerManager::getPassName() const -{ - return "AMD IL EG Pointer Manager Pass"; -} - -// Helper function to determine if the current pointer is from the -// local, region or private address spaces. - static bool -isLRPInst(MachineInstr *MI, - const AMDILTargetMachine *ATM) -{ - const AMDILSubtarget *STM - = ATM->getSubtargetImpl(); - if (!MI) { - return false; - } - if ((isRegionInst(ATM->getInstrInfo(), MI) - && STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) - || (isLocalInst(ATM->getInstrInfo(), MI) - && STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) - || (isPrivateInst(ATM->getInstrInfo(), MI) - && STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem))) { - return true; - } - return false; -} - -/// Helper function to determine if the I/O instruction uses -/// global device memory or not. -static bool -usesGlobal( - const AMDILTargetMachine *ATM, - MachineInstr *MI) { - const AMDILSubtarget *STM - = ATM->getSubtargetImpl(); - switch(MI->getOpcode()) { - ExpandCaseToAllTypes(AMDIL::GLOBALSTORE); - ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE); - ExpandCaseToAllTypes(AMDIL::GLOBALLOAD); - ExpandCaseToAllTypes(AMDIL::GLOBALSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::GLOBALZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::GLOBALAEXTLOAD); - return true; - ExpandCaseToAllTypes(AMDIL::REGIONLOAD); - ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD); - ExpandCaseToAllTypes(AMDIL::REGIONSTORE); - ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE); - return !STM->device()->usesHardware(AMDILDeviceInfo::RegionMem); - ExpandCaseToAllTypes(AMDIL::LOCALLOAD); - ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD); - ExpandCaseToAllTypes(AMDIL::LOCALSTORE); - ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE); - return !STM->device()->usesHardware(AMDILDeviceInfo::LocalMem); - ExpandCaseToAllTypes(AMDIL::CPOOLLOAD); - ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CONSTANTLOAD); - ExpandCaseToAllTypes(AMDIL::CONSTANTSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CONSTANTAEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CONSTANTZEXTLOAD); - return !STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem); - ExpandCaseToAllTypes(AMDIL::PRIVATELOAD); - ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD); - ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD); - ExpandCaseToAllTypes(AMDIL::PRIVATESTORE); - ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE); - return !STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem); - default: - return false; - } - return false; -} - -// Helper function that allocates the default resource ID for the -// respective I/O types. -static void -allocateDefaultID( - const AMDILTargetMachine *ATM, - AMDILAS::InstrResEnc &curRes, - MachineInstr *MI, - bool mDebug) -{ - AMDILMachineFunctionInfo *mMFI = - MI->getParent()->getParent()->getInfo<AMDILMachineFunctionInfo>(); - const AMDILSubtarget *STM - = ATM->getSubtargetImpl(); - if (mDebug) { - dbgs() << "Assigning instruction to default ID. Inst:"; - MI->dump(); - } - // If we use global memory, lets set the Operand to - // the ARENA_UAV_ID. - if (usesGlobal(ATM, MI)) { - curRes.bits.ResourceID = - STM->device()->getResourceID(AMDILDevice::GLOBAL_ID); - if (isAtomicInst(ATM->getInstrInfo(), MI)) { - MI->getOperand(MI->getNumOperands()-1) - .setImm(curRes.bits.ResourceID); - } - AMDILKernelManager *KM = STM->getKernelManager(); - if (curRes.bits.ResourceID == 8 - && !STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) { - KM->setUAVID(NULL, curRes.bits.ResourceID); - mMFI->uav_insert(curRes.bits.ResourceID); - } - } else if (isPrivateInst(ATM->getInstrInfo(), MI)) { - curRes.bits.ResourceID = - STM->device()->getResourceID(AMDILDevice::SCRATCH_ID); - } else if (isLocalInst(ATM->getInstrInfo(), MI) || isLocalAtomic(ATM->getInstrInfo(), MI)) { - curRes.bits.ResourceID = - STM->device()->getResourceID(AMDILDevice::LDS_ID); - AMDILMachineFunctionInfo *mMFI = - MI->getParent()->getParent()->getInfo<AMDILMachineFunctionInfo>(); - mMFI->setUsesLocal(); - if (isAtomicInst(ATM->getInstrInfo(), MI)) { - assert(curRes.bits.ResourceID && "Atomic resource ID " - "cannot be zero!"); - MI->getOperand(MI->getNumOperands()-1) - .setImm(curRes.bits.ResourceID); - } - } else if (isRegionInst(ATM->getInstrInfo(), MI) || isRegionAtomic(ATM->getInstrInfo(), MI)) { - curRes.bits.ResourceID = - STM->device()->getResourceID(AMDILDevice::GDS_ID); - AMDILMachineFunctionInfo *mMFI = - MI->getParent()->getParent()->getInfo<AMDILMachineFunctionInfo>(); - mMFI->setUsesRegion(); - if (isAtomicInst(ATM->getInstrInfo(), MI)) { - assert(curRes.bits.ResourceID && "Atomic resource ID " - "cannot be zero!"); - (MI)->getOperand((MI)->getNumOperands()-1) - .setImm(curRes.bits.ResourceID); - } - } else if (isConstantInst(ATM->getInstrInfo(), MI)) { - // If we are unknown constant instruction and the base pointer is known. - // Set the resource ID accordingly, otherwise use the default constant ID. - // FIXME: this should not require the base pointer to know what constant - // it is from. - AMDILGlobalManager *GM = STM->getGlobalManager(); - MachineFunction *MF = MI->getParent()->getParent(); - if (GM->isKernel(MF->getFunction()->getName())) { - const kernel &krnl = GM->getKernel(MF->getFunction()->getName()); - const Value *V = getBasePointerValue(MI); - if (V && !dyn_cast<AllocaInst>(V)) { - curRes.bits.ResourceID = GM->getConstPtrCB(krnl, V->getName()); - curRes.bits.HardwareInst = 1; - } else if (V && dyn_cast<AllocaInst>(V)) { - // FIXME: Need a better way to fix this. Requires a rewrite of how - // we lower global addresses to various address spaces. - // So for now, lets assume that there is only a single - // constant buffer that can be accessed from a load instruction - // that is derived from an alloca instruction. - curRes.bits.ResourceID = 2; - curRes.bits.HardwareInst = 1; - } else { - if (isStoreInst(ATM->getInstrInfo(), MI)) { - if (mDebug) { - dbgs() << __LINE__ << ": Setting byte store bit on instruction: "; - MI->dump(); - } - curRes.bits.ByteStore = 1; - } - curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::CONSTANT_ID); - } - } else { - if (isStoreInst(ATM->getInstrInfo(), MI)) { - if (mDebug) { - dbgs() << __LINE__ << ": Setting byte store bit on instruction: "; - MI->dump(); - } - curRes.bits.ByteStore = 1; - } - curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::GLOBAL_ID); - AMDILKernelManager *KM = STM->getKernelManager(); - KM->setUAVID(NULL, curRes.bits.ResourceID); - mMFI->uav_insert(curRes.bits.ResourceID); - } - } else if (isAppendInst(ATM->getInstrInfo(), MI)) { - unsigned opcode = MI->getOpcode(); - if (opcode == AMDIL::APPEND_ALLOC - || opcode == AMDIL::APPEND_ALLOC_NORET) { - curRes.bits.ResourceID = 1; - } else { - curRes.bits.ResourceID = 2; - } - } - setAsmPrinterFlags(MI, curRes); -} - -// Function that parses the arguments and updates the lookupTable with the -// pointer -> register mapping. This function also checks for cacheable -// pointers and updates the CacheableSet with the arguments that -// can be cached based on the readonlypointer annotation. The final -// purpose of this function is to update the imageSet and counterSet -// with all pointers that are either images or atomic counters. -uint32_t -parseArguments(MachineFunction &MF, - RVPVec &lookupTable, - const AMDILTargetMachine *ATM, - CacheableSet &cacheablePtrs, - ImageSet &imageSet, - AppendSet &counterSet, - bool mDebug) -{ - const AMDILSubtarget *STM - = ATM->getSubtargetImpl(); - uint32_t writeOnlyImages = 0; - uint32_t readOnlyImages = 0; - std::string cachedKernelName = "llvm.readonlypointer.annotations."; - cachedKernelName.append(MF.getFunction()->getName()); - GlobalVariable *GV = MF.getFunction()->getParent() - ->getGlobalVariable(cachedKernelName); - unsigned cbNum = 0; - unsigned regNum = AMDIL::R1; - AMDILMachineFunctionInfo *mMFI = MF.getInfo<AMDILMachineFunctionInfo>(); - for (Function::const_arg_iterator I = MF.getFunction()->arg_begin(), - E = MF.getFunction()->arg_end(); I != E; ++I) { - const Argument *curArg = I; - if (mDebug) { - dbgs() << "Argument: "; - curArg->dump(); - } - Type *curType = curArg->getType(); - // We are either a scalar or vector type that - // is passed by value that is not a opaque/struct - // type. We just need to increment regNum - // the correct number of times to match the number - // of registers that it takes up. - if (curType->isFPOrFPVectorTy() || - curType->isIntOrIntVectorTy()) { - // We are scalar, so increment once and - // move on - if (!curType->isVectorTy()) { - lookupTable[regNum] = std::make_pair<unsigned, const Value*>(~0U, curArg); - ++regNum; - ++cbNum; - continue; - } - VectorType *VT = dyn_cast<VectorType>(curType); - // We are a vector type. If we are 64bit type, then - // we increment length / 2 times, otherwise we - // increment length / 4 times. The only corner case - // is with vec3 where the vector gets scalarized and - // therefor we need a loop count of 3. - size_t loopCount = VT->getNumElements(); - if (loopCount != 3) { - if (VT->getScalarSizeInBits() == 64) { - loopCount = loopCount >> 1; - } else { - loopCount = (loopCount + 2) >> 2; - } - cbNum += loopCount; - } else { - cbNum++; - } - while (loopCount--) { - lookupTable[regNum] = std::make_pair<unsigned, const Value*>(~0U, curArg); - ++regNum; - } - } else if (curType->isPointerTy()) { - Type *CT = dyn_cast<PointerType>(curType)->getElementType(); - const StructType *ST = dyn_cast<StructType>(CT); - if (ST && ST->isOpaque()) { - StringRef name = ST->getName(); - bool i1d_type = name == "struct._image1d_t"; - bool i1da_type = name == "struct._image1d_array_t"; - bool i1db_type = name == "struct._image1d_buffer_t"; - bool i2d_type = name == "struct._image2d_t"; - bool i2da_type = name == "struct._image2d_array_t"; - bool i3d_type = name == "struct._image3d_t"; - bool c32_type = name == "struct._counter32_t"; - bool c64_type = name == "struct._counter64_t"; - if (i2d_type || i3d_type || i2da_type || - i1d_type || i1db_type || i1da_type) { - imageSet.insert(I); - uint32_t imageNum = readOnlyImages + writeOnlyImages; - if (STM->getGlobalManager() - ->isReadOnlyImage(MF.getFunction()->getName(), imageNum)) { - if (mDebug) { - dbgs() << "Pointer: '" << curArg->getName() - << "' is a read only image # " << readOnlyImages << "!\n"; - } - // We store the cbNum along with the image number so that we can - // correctly encode the 'info' intrinsics. - lookupTable[regNum] = std::make_pair<unsigned, const Value*> - ((cbNum << 16 | readOnlyImages++), curArg); - } else if (STM->getGlobalManager() - ->isWriteOnlyImage(MF.getFunction()->getName(), imageNum)) { - if (mDebug) { - dbgs() << "Pointer: '" << curArg->getName() - << "' is a write only image # " << writeOnlyImages << "!\n"; - } - // We store the cbNum along with the image number so that we can - // correctly encode the 'info' intrinsics. - lookupTable[regNum] = std::make_pair<unsigned, const Value*> - ((cbNum << 16 | writeOnlyImages++), curArg); - } else { - assert(!"Read/Write images are not supported!"); - } - ++regNum; - cbNum += 2; - continue; - } else if (c32_type || c64_type) { - if (mDebug) { - dbgs() << "Pointer: '" << curArg->getName() - << "' is a " << (c32_type ? "32" : "64") - << " bit atomic counter type!\n"; - } - counterSet.push_back(I); - } - } - - if (STM->device()->isSupported(AMDILDeviceInfo::CachedMem) - && GV && GV->hasInitializer()) { - const ConstantArray *nameArray - = dyn_cast_or_null<ConstantArray>(GV->getInitializer()); - if (nameArray) { - for (unsigned x = 0, y = nameArray->getNumOperands(); x < y; ++x) { - const GlobalVariable *gV= dyn_cast_or_null<GlobalVariable>( - nameArray->getOperand(x)->getOperand(0)); - const ConstantDataArray *argName = - dyn_cast_or_null<ConstantDataArray>(gV->getInitializer()); - if (!argName) { - continue; - } - std::string argStr = argName->getAsString(); - std::string curStr = curArg->getName(); - if (!strcmp(argStr.data(), curStr.data())) { - if (mDebug) { - dbgs() << "Pointer: '" << curArg->getName() - << "' is cacheable!\n"; - } - cacheablePtrs.insert(curArg); - } - } - } - } - uint32_t as = dyn_cast<PointerType>(curType)->getAddressSpace(); - // Handle the case where the kernel argument is a pointer - if (mDebug) { - dbgs() << "Pointer: " << curArg->getName() << " is assigned "; - if (as == AMDILAS::GLOBAL_ADDRESS) { - dbgs() << "uav " << STM->device() - ->getResourceID(AMDILDevice::GLOBAL_ID); - } else if (as == AMDILAS::PRIVATE_ADDRESS) { - dbgs() << "scratch " << STM->device() - ->getResourceID(AMDILDevice::SCRATCH_ID); - } else if (as == AMDILAS::LOCAL_ADDRESS) { - dbgs() << "lds " << STM->device() - ->getResourceID(AMDILDevice::LDS_ID); - } else if (as == AMDILAS::CONSTANT_ADDRESS) { - dbgs() << "cb " << STM->device() - ->getResourceID(AMDILDevice::CONSTANT_ID); - } else if (as == AMDILAS::REGION_ADDRESS) { - dbgs() << "gds " << STM->device() - ->getResourceID(AMDILDevice::GDS_ID); - } else { - assert(!"Found an address space that we don't support!"); - } - dbgs() << " @ register " << regNum << ". Inst: "; - curArg->dump(); - } - switch (as) { - default: - lookupTable[regNum] = std::make_pair<unsigned, const Value*> - (STM->device()->getResourceID(AMDILDevice::GLOBAL_ID), curArg); - break; - case AMDILAS::LOCAL_ADDRESS: - lookupTable[regNum] = std::make_pair<unsigned, const Value*> - (STM->device()->getResourceID(AMDILDevice::LDS_ID), curArg); - mMFI->setHasLocalArg(); - break; - case AMDILAS::REGION_ADDRESS: - lookupTable[regNum] = std::make_pair<unsigned, const Value*> - (STM->device()->getResourceID(AMDILDevice::GDS_ID), curArg); - mMFI->setHasRegionArg(); - break; - case AMDILAS::CONSTANT_ADDRESS: - lookupTable[regNum] = std::make_pair<unsigned, const Value*> - (STM->device()->getResourceID(AMDILDevice::CONSTANT_ID), curArg); - break; - case AMDILAS::PRIVATE_ADDRESS: - lookupTable[regNum] = std::make_pair<unsigned, const Value*> - (STM->device()->getResourceID(AMDILDevice::SCRATCH_ID), curArg); - break; - } - // In this case we need to increment it once. - ++regNum; - ++cbNum; - } else { - // Is anything missing that is legal in CL? - assert(0 && "Current type is not supported!"); - lookupTable[regNum] = std::make_pair<unsigned, const Value*> - (STM->device()->getResourceID(AMDILDevice::GLOBAL_ID), curArg); - ++regNum; - ++cbNum; - } - } - return writeOnlyImages; -} -// The call stack is interesting in that even in SSA form, it assigns -// registers to the same value's over and over again. So we need to -// ignore the values that are assigned and just deal with the input -// and return registers. -static void -parseCall( - const AMDILTargetMachine *ATM, - InstPMap &InstToPtrMap, - PtrIMap &PtrToInstMap, - RVPVec &lookupTable, - MachineBasicBlock::iterator &mBegin, - MachineBasicBlock::iterator mEnd, - bool mDebug) -{ - SmallVector<unsigned, 8> inputRegs; - AMDILAS::InstrResEnc curRes; - if (mDebug) { - dbgs() << "Parsing Call Stack Start.\n"; - } - MachineBasicBlock::iterator callInst = mBegin; - MachineInstr *CallMI = callInst; - getAsmPrinterFlags(CallMI, curRes); - MachineInstr *MI = --mBegin; - unsigned reg = AMDIL::R1; - // First we need to check the input registers. - do { - // We stop if we hit the beginning of the call stack - // adjustment. - if (MI->getOpcode() == AMDIL::ADJCALLSTACKDOWN - || MI->getOpcode() == AMDIL::ADJCALLSTACKUP - || MI->getNumOperands() != 2 - || !MI->getOperand(0).isReg()) { - break; - } - reg = MI->getOperand(0).getReg(); - if (MI->getOperand(1).isReg()) { - unsigned reg1 = MI->getOperand(1).getReg(); - inputRegs.push_back(reg1); - if (lookupTable[reg1].second) { - curRes.bits.PointerPath = 1; - } - } - lookupTable.erase(reg); - if ((signed)reg < 0 - || mBegin == CallMI->getParent()->begin()) { - break; - } - MI = --mBegin; - } while (1); - mBegin = callInst; - MI = ++mBegin; - // If the next registers operand 1 is not a register or that register - // is not R1, then we don't have any return values. - if (MI->getNumOperands() == 2 - && MI->getOperand(1).isReg() - && MI->getOperand(1).getReg() == AMDIL::R1) { - // Next we check the output register. - reg = MI->getOperand(0).getReg(); - // Now we link the inputs to the output. - for (unsigned x = 0; x < inputRegs.size(); ++x) { - if (lookupTable[inputRegs[x]].second) { - curRes.bits.PointerPath = 1; - lookupTable[reg] = lookupTable[inputRegs[x]]; - InstToPtrMap[CallMI].insert( - lookupTable[reg].second); - break; - } - } - lookupTable.erase(MI->getOperand(1).getReg()); - } - setAsmPrinterFlags(CallMI, curRes); - if (mDebug) { - dbgs() << "Parsing Call Stack End.\n"; - } - return; -} - -// Detect if the current instruction conflicts with another instruction -// and add the instruction to the correct location accordingly. -static void -detectConflictInst( - MachineInstr *MI, - AMDILAS::InstrResEnc &curRes, - RVPVec &lookupTable, - InstPMap &InstToPtrMap, - bool isLoadStore, - unsigned reg, - unsigned dstReg, - bool mDebug) -{ - // If the instruction does not have a point path flag - // associated with it, then we know that no other pointer - // hits this instruciton. - if (!curRes.bits.PointerPath) { - if (dyn_cast<PointerType>(lookupTable[reg].second->getType())) { - curRes.bits.PointerPath = 1; - } - // We don't want to transfer to the register number - // between load/store because the load dest can be completely - // different pointer path and the store doesn't have a real - // destination register. - if (!isLoadStore) { - if (mDebug) { - if (dyn_cast<PointerType>(lookupTable[reg].second->getType())) { - dbgs() << "Pointer: " << lookupTable[reg].second->getName(); - assert(dyn_cast<PointerType>(lookupTable[reg].second->getType()) - && "Must be a pointer type for an instruction!"); - switch (dyn_cast<PointerType>( - lookupTable[reg].second->getType())->getAddressSpace()) - { - case AMDILAS::GLOBAL_ADDRESS: dbgs() << " UAV: "; break; - case AMDILAS::LOCAL_ADDRESS: dbgs() << " LDS: "; break; - case AMDILAS::REGION_ADDRESS: dbgs() << " GDS: "; break; - case AMDILAS::PRIVATE_ADDRESS: dbgs() << " SCRATCH: "; break; - case AMDILAS::CONSTANT_ADDRESS: dbgs() << " CB: "; break; - - } - dbgs() << lookupTable[reg].first << " Reg: " << reg - << " assigned to reg " << dstReg << ". Inst: "; - MI->dump(); - } - } - // We don't want to do any copies if the register is not virtual - // as it is the result of a CALL. ParseCallInst handles the - // case where the input and output need to be linked up - // if it occurs. The easiest way to check for virtual - // is to check the top bit. - lookupTable[dstReg] = lookupTable[reg]; - } - } else { - if (dyn_cast<PointerType>(lookupTable[reg].second->getType())) { - // Otherwise we have a conflict between two pointers somehow. - curRes.bits.ConflictPtr = 1; - if (mDebug) { - dbgs() << "Pointer: " << lookupTable[reg].second->getName(); - assert(dyn_cast<PointerType>(lookupTable[reg].second->getType()) - && "Must be a pointer type for a conflict instruction!"); - switch (dyn_cast<PointerType>( - lookupTable[reg].second->getType())->getAddressSpace()) - { - case AMDILAS::GLOBAL_ADDRESS: dbgs() << " UAV: "; break; - case AMDILAS::LOCAL_ADDRESS: dbgs() << " LDS: "; break; - case AMDILAS::REGION_ADDRESS: dbgs() << " GDS: "; break; - case AMDILAS::PRIVATE_ADDRESS: dbgs() << " SCRATCH: "; break; - case AMDILAS::CONSTANT_ADDRESS: dbgs() << " CB: "; break; - - } - dbgs() << lookupTable[reg].first << " Reg: " << reg; - if (InstToPtrMap[MI].size() > 1) { - dbgs() << " conflicts with:\n "; - for (PtrSet::iterator psib = InstToPtrMap[MI].begin(), - psie = InstToPtrMap[MI].end(); psib != psie; ++psib) { - dbgs() << "\t\tPointer: " << (*psib)->getName() << " "; - assert(dyn_cast<PointerType>((*psib)->getType()) - && "Must be a pointer type for a conflict instruction!"); - (*psib)->dump(); - } - } else { - dbgs() << "."; - } - dbgs() << " Inst: "; - MI->dump(); - } - } - // Add the conflicting values to the pointer set for the instruction - InstToPtrMap[MI].insert(lookupTable[reg].second); - // We don't want to add the destination register if - // we are a load or store. - if (!isLoadStore) { - InstToPtrMap[MI].insert(lookupTable[dstReg].second); - } - } - setAsmPrinterFlags(MI, curRes); -} - -// In this case we want to handle a load instruction. -static void -parseLoadInst( - const AMDILTargetMachine *ATM, - InstPMap &InstToPtrMap, - PtrIMap &PtrToInstMap, - FIPMap &FIToPtrMap, - RVPVec &lookupTable, - CPoolSet &cpool, - BlockCacheableInfo &bci, - MachineInstr *MI, - bool mDebug) -{ - assert(isLoadInst(ATM->getInstrInfo(), MI) && "Only a load instruction can be parsed by " - "the parseLoadInst function."); - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(MI, curRes); - unsigned dstReg = MI->getOperand(0).getReg(); - unsigned idx = 0; - const Value *basePtr = NULL; - if (MI->getOperand(1).isReg()) { - idx = MI->getOperand(1).getReg(); - basePtr = lookupTable[idx].second; - // If we don't know what value the register - // is assigned to, then we need to special case - // this instruction. - } else if (MI->getOperand(1).isFI()) { - idx = MI->getOperand(1).getIndex(); - lookupTable[dstReg] = FIToPtrMap[idx]; - } else if (MI->getOperand(1).isCPI()) { - cpool.insert(MI); - } - // If we are a hardware local, then we don't need to track as there - // is only one resource ID that we need to know about, so we - // map it using allocateDefaultID, which maps it to the default. - // This is also the case for REGION_ADDRESS and PRIVATE_ADDRESS. - if (isLRPInst(MI, ATM) || !basePtr) { - allocateDefaultID(ATM, curRes, MI, mDebug); - return; - } - // We have a load instruction so we map this instruction - // to the pointer and insert it into the set of known - // load instructions. - InstToPtrMap[MI].insert(basePtr); - PtrToInstMap[basePtr].push_back(MI); - - if (isGlobalInst(ATM->getInstrInfo(), MI)) { - // Add to the cacheable set for the block. If there was a store earlier - // in the block, this call won't actually add it to the cacheable set. - bci.addPossiblyCacheableInst(ATM, MI); - } - - if (mDebug) { - dbgs() << "Assigning instruction to pointer "; - dbgs() << basePtr->getName() << ". Inst: "; - MI->dump(); - } - detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true, - idx, dstReg, mDebug); -} - -// In this case we want to handle a store instruction. -static void -parseStoreInst( - const AMDILTargetMachine *ATM, - InstPMap &InstToPtrMap, - PtrIMap &PtrToInstMap, - FIPMap &FIToPtrMap, - RVPVec &lookupTable, - CPoolSet &cpool, - BlockCacheableInfo &bci, - MachineInstr *MI, - ByteSet &bytePtrs, - ConflictSet &conflictPtrs, - bool mDebug) -{ - assert(isStoreInst(ATM->getInstrInfo(), MI) && "Only a store instruction can be parsed by " - "the parseStoreInst function."); - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(MI, curRes); - unsigned dstReg = MI->getOperand(0).getReg(); - - // If the data part of the store instruction is known to - // be a pointer, then we need to mark this pointer as being - // a byte pointer. This is the conservative case that needs - // to be handled correctly. - if (lookupTable[dstReg].second && lookupTable[dstReg].first != ~0U) { - curRes.bits.ConflictPtr = 1; - if (mDebug) { - dbgs() << "Found a case where the pointer is being stored!\n"; - MI->dump(); - dbgs() << "Pointer is "; - lookupTable[dstReg].second->print(dbgs()); - dbgs() << "\n"; - } - //PtrToInstMap[lookupTable[dstReg].second].push_back(MI); - if (lookupTable[dstReg].second->getType()->isPointerTy()) { - conflictPtrs.insert(lookupTable[dstReg].second); - } - } - - // Before we go through the special cases, for the cacheable information - // all we care is if the store if global or not. - if (!isLRPInst(MI, ATM)) { - bci.setReachesExit(); - } - - // If the address is not a register address, - // then we need to lower it as an unknown id. - if (!MI->getOperand(1).isReg()) { - if (MI->getOperand(1).isCPI()) { - if (mDebug) { - dbgs() << "Found an instruction with a CPI index #" - << MI->getOperand(1).getIndex() << "!\n"; - } - cpool.insert(MI); - } else if (MI->getOperand(1).isFI()) { - if (mDebug) { - dbgs() << "Found an instruction with a frame index #" - << MI->getOperand(1).getIndex() << "!\n"; - } - // If we are a frame index and we are storing a pointer there, lets - // go ahead and assign the pointer to the location within the frame - // index map so that we can get the value out later. - FIToPtrMap[MI->getOperand(1).getIndex()] = lookupTable[dstReg]; - } - - allocateDefaultID(ATM, curRes, MI, mDebug); - return; - } - unsigned reg = MI->getOperand(1).getReg(); - // If we don't know what value the register - // is assigned to, then we need to special case - // this instruction. - if (!lookupTable[reg].second) { - allocateDefaultID(ATM, curRes, MI, mDebug); - return; - } - // const Value *basePtr = lookupTable[reg].second; - // If we are a hardware local, then we don't need to track as there - // is only one resource ID that we need to know about, so we - // map it using allocateDefaultID, which maps it to the default. - // This is also the case for REGION_ADDRESS and PRIVATE_ADDRESS. - if (isLRPInst(MI, ATM)) { - allocateDefaultID(ATM, curRes, MI, mDebug); - return; - } - - // We have a store instruction so we map this instruction - // to the pointer and insert it into the set of known - // store instructions. - InstToPtrMap[MI].insert(lookupTable[reg].second); - PtrToInstMap[lookupTable[reg].second].push_back(MI); - uint16_t RegClass = MI->getDesc().OpInfo[0].RegClass; - switch (RegClass) { - default: - break; - case AMDIL::GPRI8RegClassID: - case AMDIL::GPRV2I8RegClassID: - case AMDIL::GPRI16RegClassID: - if (usesGlobal(ATM, MI)) { - if (mDebug) { - dbgs() << "Annotating instruction as Byte Store. Inst: "; - MI->dump(); - } - curRes.bits.ByteStore = 1; - setAsmPrinterFlags(MI, curRes); - const PointerType *PT = dyn_cast<PointerType>( - lookupTable[reg].second->getType()); - if (PT) { - bytePtrs.insert(lookupTable[reg].second); - } - } - break; - }; - // If we are a truncating store, then we need to determine the - // size of the pointer that we are truncating to, and if we - // are less than 32 bits, we need to mark the pointer as a - // byte store pointer. - switch (MI->getOpcode()) { - case AMDIL::GLOBALTRUNCSTORE_i16i8: - case AMDIL::GLOBALTRUNCSTORE_v2i16i8: - case AMDIL::GLOBALTRUNCSTORE_i32i8: - case AMDIL::GLOBALTRUNCSTORE_v2i32i8: - case AMDIL::GLOBALTRUNCSTORE_i64i8: - case AMDIL::GLOBALTRUNCSTORE_v2i64i8: - case AMDIL::GLOBALTRUNCSTORE_i32i16: - case AMDIL::GLOBALTRUNCSTORE_i64i16: - case AMDIL::GLOBALSTORE_i8: - case AMDIL::GLOBALSTORE_i16: - curRes.bits.ByteStore = 1; - setAsmPrinterFlags(MI, curRes); - bytePtrs.insert(lookupTable[reg].second); - break; - default: - break; - } - - if (mDebug) { - dbgs() << "Assigning instruction to pointer "; - dbgs() << lookupTable[reg].second->getName() << ". Inst: "; - MI->dump(); - } - detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true, - reg, dstReg, mDebug); -} - -// In this case we want to handle an atomic instruction. -static void -parseAtomicInst( - const AMDILTargetMachine *ATM, - InstPMap &InstToPtrMap, - PtrIMap &PtrToInstMap, - RVPVec &lookupTable, - BlockCacheableInfo &bci, - MachineInstr *MI, - ByteSet &bytePtrs, - bool mDebug) -{ - assert(isAtomicInst(ATM->getInstrInfo(), MI) && "Only an atomic instruction can be parsed by " - "the parseAtomicInst function."); - AMDILAS::InstrResEnc curRes; - unsigned dstReg = MI->getOperand(0).getReg(); - unsigned reg = 0; - getAsmPrinterFlags(MI, curRes); - unsigned numOps = MI->getNumOperands(); - bool found = false; - while (--numOps) { - MachineOperand &Op = MI->getOperand(numOps); - if (!Op.isReg()) { - continue; - } - reg = Op.getReg(); - // If the register is not known to be owned by a pointer - // then we can ignore it - if (!lookupTable[reg].second) { - continue; - } - // if the pointer is known to be local, region or private, then we - // can ignore it. Although there are no private atomics, we still - // do this check so we don't have to write a new function to check - // for only local and region. - if (isLRPInst(MI, ATM)) { - continue; - } - found = true; - InstToPtrMap[MI].insert(lookupTable[reg].second); - PtrToInstMap[lookupTable[reg].second].push_back(MI); - - // We now know we have an atomic operation on global memory. - // This is a store so must update the cacheable information. - bci.setReachesExit(); - - // Only do if have SC with arena atomic bug fix (EPR 326883). - // TODO: enable once SC with EPR 326883 has been promoted to CAL. - if (ATM->getSubtargetImpl()->calVersion() >= CAL_VERSION_SC_150) { - // Force pointers that are used by atomics to be in the arena. - // If they were allowed to be accessed as RAW they would cause - // all access to use the slow complete path. - if (mDebug) { - dbgs() << __LINE__ << ": Setting byte store bit on atomic instruction: "; - MI->dump(); - } - curRes.bits.ByteStore = 1; - bytePtrs.insert(lookupTable[reg].second); - } - - if (mDebug) { - dbgs() << "Assigning instruction to pointer "; - dbgs() << lookupTable[reg].second->getName() << ". Inst: "; - MI->dump(); - } - detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true, - reg, dstReg, mDebug); - } - if (!found) { - allocateDefaultID(ATM, curRes, MI, mDebug); - } -} -// In this case we want to handle a counter instruction. -static void -parseAppendInst( - const AMDILTargetMachine *ATM, - InstPMap &InstToPtrMap, - PtrIMap &PtrToInstMap, - RVPVec &lookupTable, - MachineInstr *MI, - bool mDebug) -{ - assert(isAppendInst(ATM->getInstrInfo(), MI) && "Only an atomic counter instruction can be " - "parsed by the parseAppendInst function."); - AMDILAS::InstrResEnc curRes; - unsigned dstReg = MI->getOperand(0).getReg(); - unsigned reg = MI->getOperand(1).getReg(); - getAsmPrinterFlags(MI, curRes); - // If the register is not known to be owned by a pointer - // then we set it to the default - if (!lookupTable[reg].second) { - allocateDefaultID(ATM, curRes, MI, mDebug); - return; - } - InstToPtrMap[MI].insert(lookupTable[reg].second); - PtrToInstMap[lookupTable[reg].second].push_back(MI); - if (mDebug) { - dbgs() << "Assigning instruction to pointer "; - dbgs() << lookupTable[reg].second->getName() << ". Inst: "; - MI->dump(); - } - detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true, - reg, dstReg, mDebug); -} -// In this case we want to handle an Image instruction. -static void -parseImageInst( - const AMDILTargetMachine *ATM, - InstPMap &InstToPtrMap, - PtrIMap &PtrToInstMap, - FIPMap &FIToPtrMap, - RVPVec &lookupTable, - MachineInstr *MI, - bool mDebug) -{ - assert(isImageInst(ATM->getInstrInfo(), MI) && "Only an image instruction can be " - "parsed by the parseImageInst function."); - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(MI, curRes); - // AMDILKernelManager *km = - // (AMDILKernelManager *)ATM->getSubtargetImpl()->getKernelManager(); - AMDILMachineFunctionInfo *mMFI = MI->getParent()->getParent() - ->getInfo<AMDILMachineFunctionInfo>(); - if (MI->getOpcode() == AMDIL::IMAGE2D_WRITE - || MI->getOpcode() == AMDIL::IMAGE3D_WRITE) { - unsigned dstReg = MI->getOperand(0).getReg(); - curRes.bits.ResourceID = lookupTable[dstReg].first & 0xFFFF; - curRes.bits.isImage = 1; - InstToPtrMap[MI].insert(lookupTable[dstReg].second); - PtrToInstMap[lookupTable[dstReg].second].push_back(MI); - if (mDebug) { - dbgs() << "Assigning instruction to pointer "; - dbgs() << lookupTable[dstReg].second->getName() << ". Inst: "; - MI->dump(); - } - } else { - // unsigned dstReg = MI->getOperand(0).getReg(); - unsigned reg = MI->getOperand(1).getReg(); - - // If the register is not known to be owned by a pointer - // then we set it to the default - if (!lookupTable[reg].second) { - assert(!"This should not happen for images!"); - allocateDefaultID(ATM, curRes, MI, mDebug); - return; - } - InstToPtrMap[MI].insert(lookupTable[reg].second); - PtrToInstMap[lookupTable[reg].second].push_back(MI); - if (mDebug) { - dbgs() << "Assigning instruction to pointer "; - dbgs() << lookupTable[reg].second->getName() << ". Inst: "; - MI->dump(); - } - switch (MI->getOpcode()) { - case AMDIL::IMAGE2D_READ: - case AMDIL::IMAGE2D_READ_UNNORM: - case AMDIL::IMAGE3D_READ: - case AMDIL::IMAGE3D_READ_UNNORM: - curRes.bits.ResourceID = lookupTable[reg].first & 0xFFFF; - if (MI->getOperand(3).isReg()) { - // Our sampler is not a literal value. - char buffer[256]; - memset(buffer, 0, sizeof(buffer)); - std::string sampler_name = ""; - unsigned reg = MI->getOperand(3).getReg(); - if (lookupTable[reg].second) { - sampler_name = lookupTable[reg].second->getName(); - } - if (sampler_name.empty()) { - sampler_name = findSamplerName(MI, lookupTable, FIToPtrMap, ATM); - } - uint32_t val = mMFI->addSampler(sampler_name, ~0U); - if (mDebug) { - dbgs() << "Mapping kernel sampler " << sampler_name - << " to sampler number " << val << " for Inst:\n"; - MI->dump(); - } - MI->getOperand(3).ChangeToImmediate(val); - } else { - // Our sampler is known at runtime as a literal, lets make sure - // that the metadata for it is known. - char buffer[256]; - memset(buffer, 0, sizeof(buffer)); - sprintf(buffer,"_%d", (int32_t)MI->getOperand(3).getImm()); - std::string sampler_name = std::string("unknown") + std::string(buffer); - uint32_t val = mMFI->addSampler(sampler_name, MI->getOperand(3).getImm()); - if (mDebug) { - dbgs() << "Mapping internal sampler " << sampler_name - << " to sampler number " << val << " for Inst:\n"; - MI->dump(); - } - MI->getOperand(3).setImm(val); - } - break; - case AMDIL::IMAGE2D_INFO0: - case AMDIL::IMAGE3D_INFO0: - curRes.bits.ResourceID = lookupTable[reg].first >> 16; - break; - case AMDIL::IMAGE2D_INFO1: - case AMDIL::IMAGE2DA_INFO1: - curRes.bits.ResourceID = (lookupTable[reg].first >> 16) + 1; - break; - }; - curRes.bits.isImage = 1; - } - setAsmPrinterFlags(MI, curRes); -} -// This case handles the rest of the instructions -static void -parseInstruction( - const AMDILTargetMachine *ATM, - InstPMap &InstToPtrMap, - PtrIMap &PtrToInstMap, - RVPVec &lookupTable, - CPoolSet &cpool, - MachineInstr *MI, - bool mDebug) -{ - assert(!isAtomicInst(ATM->getInstrInfo(), MI) && !isStoreInst(ATM->getInstrInfo(), MI) && !isLoadInst(ATM->getInstrInfo(), MI) && - !isAppendInst(ATM->getInstrInfo(), MI) && !isImageInst(ATM->getInstrInfo(), MI) && - "Atomic/Load/Store/Append/Image insts should not be handled here!"); - unsigned numOps = MI->getNumOperands(); - // If we don't have any operands, we can skip this instruction - if (!numOps) { - return; - } - // if the dst operand is not a register, then we can skip - // this instruction. That is because we are probably a branch - // or jump instruction. - if (!MI->getOperand(0).isReg()) { - return; - } - // If we are a LOADCONST_i32, we might be a sampler, so we need - // to propogate the LOADCONST to IMAGE[2|3]D_READ instructions. - if (MI->getOpcode() == AMDIL::LOADCONST_i32) { - uint32_t val = MI->getOperand(1).getImm(); - MachineOperand* oldPtr = &MI->getOperand(0); - MachineOperand* moPtr = oldPtr->getNextOperandForReg(); - while (moPtr) { - oldPtr = moPtr; - moPtr = oldPtr->getNextOperandForReg(); - switch (oldPtr->getParent()->getOpcode()) { - default: - break; - case AMDIL::IMAGE2D_READ: - case AMDIL::IMAGE2D_READ_UNNORM: - case AMDIL::IMAGE3D_READ: - case AMDIL::IMAGE3D_READ_UNNORM: - if (mDebug) { - dbgs() << "Found a constant sampler for image read inst: "; - oldPtr->getParent()->print(dbgs()); - } - oldPtr->ChangeToImmediate(val); - break; - } - } - } - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(MI, curRes); - unsigned dstReg = MI->getOperand(0).getReg(); - unsigned reg = 0; - while (--numOps) { - MachineOperand &Op = MI->getOperand(numOps); - // if the operand is not a register, then we can ignore it - if (!Op.isReg()) { - if (Op.isCPI()) { - cpool.insert(MI); - } - continue; - } - reg = Op.getReg(); - // If the register is not known to be owned by a pointer - // then we can ignore it - if (!lookupTable[reg].second) { - continue; - } - detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, false, - reg, dstReg, mDebug); - - } -} - -// This function parses the basic block and based on the instruction type, -// calls the function to finish parsing the instruction. -static void -parseBasicBlock( - const AMDILTargetMachine *ATM, - MachineBasicBlock *MB, - InstPMap &InstToPtrMap, - PtrIMap &PtrToInstMap, - FIPMap &FIToPtrMap, - RVPVec &lookupTable, - ByteSet &bytePtrs, - ConflictSet &conflictPtrs, - CPoolSet &cpool, - BlockCacheableInfo &bci, - bool mDebug) -{ - for (MachineBasicBlock::iterator mbb = MB->begin(), mbe = MB->end(); - mbb != mbe; ++mbb) { - MachineInstr *MI = mbb; - if (MI->getOpcode() == AMDIL::CALL) { - parseCall(ATM, InstToPtrMap, PtrToInstMap, lookupTable, - mbb, mbe, mDebug); - } - else if (isLoadInst(ATM->getInstrInfo(), MI)) { - parseLoadInst(ATM, InstToPtrMap, PtrToInstMap, - FIToPtrMap, lookupTable, cpool, bci, MI, mDebug); - } else if (isStoreInst(ATM->getInstrInfo(), MI)) { - parseStoreInst(ATM, InstToPtrMap, PtrToInstMap, - FIToPtrMap, lookupTable, cpool, bci, MI, bytePtrs, conflictPtrs, mDebug); - } else if (isAtomicInst(ATM->getInstrInfo(), MI)) { - parseAtomicInst(ATM, InstToPtrMap, PtrToInstMap, - lookupTable, bci, MI, bytePtrs, mDebug); - } else if (isAppendInst(ATM->getInstrInfo(), MI)) { - parseAppendInst(ATM, InstToPtrMap, PtrToInstMap, - lookupTable, MI, mDebug); - } else if (isImageInst(ATM->getInstrInfo(), MI)) { - parseImageInst(ATM, InstToPtrMap, PtrToInstMap, - FIToPtrMap, lookupTable, MI, mDebug); - } else { - parseInstruction(ATM, InstToPtrMap, PtrToInstMap, - lookupTable, cpool, MI, mDebug); - } - } -} - -// Follows the Reverse Post Order Traversal of the basic blocks to -// determine which order to parse basic blocks in. -void -parseFunction( - const AMDILPointerManager *PM, - const AMDILTargetMachine *ATM, - MachineFunction &MF, - InstPMap &InstToPtrMap, - PtrIMap &PtrToInstMap, - FIPMap &FIToPtrMap, - RVPVec &lookupTable, - ByteSet &bytePtrs, - ConflictSet &conflictPtrs, - CPoolSet &cpool, - MBBCacheableMap &mbbCacheable, - bool mDebug) -{ - if (mDebug) { - MachineDominatorTree *dominatorTree = &PM - ->getAnalysis<MachineDominatorTree>(); - dominatorTree->dump(); - } - - std::list<MachineBasicBlock*> prop_worklist; - - ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); - for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator - curBlock = RPOT.begin(), endBlock = RPOT.end(); - curBlock != endBlock; ++curBlock) { - MachineBasicBlock *MB = (*curBlock); - BlockCacheableInfo &bci = mbbCacheable[MB]; - for (MachineBasicBlock::pred_iterator mbbit = MB->pred_begin(), - mbbitend = MB->pred_end(); - mbbit != mbbitend; - mbbit++) { - MBBCacheableMap::const_iterator mbbcmit = mbbCacheable.find(*mbbit); - if (mbbcmit != mbbCacheable.end() && - mbbcmit->second.storeReachesExit()) { - bci.setReachesTop(); - break; - } - } - - if (mDebug) { - dbgs() << "[BlockOrdering] Parsing CurrentBlock: " - << MB->getNumber() << "\n"; - } - parseBasicBlock(ATM, MB, InstToPtrMap, PtrToInstMap, - FIToPtrMap, lookupTable, bytePtrs, conflictPtrs, cpool, bci, mDebug); - - if (bci.storeReachesExit()) - prop_worklist.push_back(MB); - - if (mDebug) { - dbgs() << "BCI info: Top: " << bci.storeReachesTop() << " Exit: " - << bci.storeReachesExit() << "\n Instructions:\n"; - for (CacheableInstrSet::const_iterator cibit = bci.cacheableBegin(), - cibitend = bci.cacheableEnd(); - cibit != cibitend; - cibit++) - { - (*cibit)->dump(); - } - } - } - - // This loop pushes any "storeReachesExit" flags into successor - // blocks until the flags have been fully propagated. This will - // ensure that blocks that have reachable stores due to loops - // are labeled appropriately. - while (!prop_worklist.empty()) { - MachineBasicBlock *wlb = prop_worklist.front(); - prop_worklist.pop_front(); - for (MachineBasicBlock::succ_iterator mbbit = wlb->succ_begin(), - mbbitend = wlb->succ_end(); - mbbit != mbbitend; - mbbit++) - { - BlockCacheableInfo &blockCache = mbbCacheable[*mbbit]; - if (!blockCache.storeReachesTop()) { - blockCache.setReachesTop(); - prop_worklist.push_back(*mbbit); - } - if (mDebug) { - dbgs() << "BCI Prop info: " << (*mbbit)->getNumber() << " Top: " - << blockCache.storeReachesTop() << " Exit: " - << blockCache.storeReachesExit() - << "\n"; - } - } - } -} - -// Helper function that dumps to dbgs() information about -// a pointer set. - void -dumpPointers(AppendSet &Ptrs, const char *str) -{ - if (Ptrs.empty()) { - return; - } - dbgs() << "[Dump]" << str << " found: " << "\n"; - for (AppendSet::iterator sb = Ptrs.begin(); - sb != Ptrs.end(); ++sb) { - (*sb)->dump(); - } - dbgs() << "\n"; -} -// Helper function that dumps to dbgs() information about -// a pointer set. - void -dumpPointers(PtrSet &Ptrs, const char *str) -{ - if (Ptrs.empty()) { - return; - } - dbgs() << "[Dump]" << str << " found: " << "\n"; - for (PtrSet::iterator sb = Ptrs.begin(); - sb != Ptrs.end(); ++sb) { - (*sb)->dump(); - } - dbgs() << "\n"; -} -// Function that detects all the conflicting pointers and adds -// the pointers that are detected to the conflict set, otherwise -// they are added to the raw or byte set based on their usage. -void -detectConflictingPointers( - const AMDILTargetMachine *ATM, - InstPMap &InstToPtrMap, - ByteSet &bytePtrs, - RawSet &rawPtrs, - ConflictSet &conflictPtrs, - bool mDebug) -{ - if (InstToPtrMap.empty()) { - return; - } - PtrSet aliasedPtrs; - const AMDILSubtarget *STM = ATM->getSubtargetImpl(); - for (InstPMap::iterator - mapIter = InstToPtrMap.begin(), iterEnd = InstToPtrMap.end(); - mapIter != iterEnd; ++mapIter) { - if (mDebug) { - dbgs() << "Instruction: "; - (mapIter)->first->dump(); - } - MachineInstr* MI = mapIter->first; - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(MI, curRes); - if (curRes.bits.isImage) { - continue; - } - bool byte = false; - // We might have a case where more than 1 pointers is going to the same - // I/O instruction - if (mDebug) { - dbgs() << "Base Pointer[s]:\n"; - } - for (PtrSet::iterator cfIter = mapIter->second.begin(), - cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) { - if (mDebug) { - (*cfIter)->dump(); - } - if (bytePtrs.count(*cfIter)) { - if (mDebug) { - dbgs() << "Byte pointer found!\n"; - } - byte = true; - break; - } - } - if (byte) { - for (PtrSet::iterator cfIter = mapIter->second.begin(), - cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) { - const Value *ptr = (*cfIter); - if (isLRPInst(mapIter->first, ATM)) { - // We don't need to deal with pointers to local/region/private - // memory regions - continue; - } - if (mDebug) { - dbgs() << "Adding pointer " << (ptr)->getName() - << " to byte set!\n"; - } - const PointerType *PT = dyn_cast<PointerType>(ptr->getType()); - if (PT) { - bytePtrs.insert(ptr); - } - } - } else { - for (PtrSet::iterator cfIter = mapIter->second.begin(), - cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) { - const Value *ptr = (*cfIter); - // bool aliased = false; - if (isLRPInst(mapIter->first, ATM)) { - // We don't need to deal with pointers to local/region/private - // memory regions - continue; - } - const Argument *arg = dyn_cast_or_null<Argument>(*cfIter); - if (!arg) { - continue; - } - if (!STM->device()->isSupported(AMDILDeviceInfo::NoAlias) - && !arg->hasNoAliasAttr()) { - if (mDebug) { - dbgs() << "Possible aliased pointer found!\n"; - } - aliasedPtrs.insert(ptr); - } - if (mapIter->second.size() > 1) { - if (mDebug) { - dbgs() << "Adding pointer " << ptr->getName() - << " to conflict set!\n"; - } - const PointerType *PT = dyn_cast<PointerType>(ptr->getType()); - if (PT) { - conflictPtrs.insert(ptr); - } - } - if (mDebug) { - dbgs() << "Adding pointer " << ptr->getName() - << " to raw set!\n"; - } - const PointerType *PT = dyn_cast<PointerType>(ptr->getType()); - if (PT) { - rawPtrs.insert(ptr); - } - } - } - if (mDebug) { - dbgs() << "\n"; - } - } - // If we have any aliased pointers and byte pointers exist, - // then make sure that all of the aliased pointers are - // part of the byte pointer set. - if (!bytePtrs.empty()) { - for (PtrSet::iterator aIter = aliasedPtrs.begin(), - aEnd = aliasedPtrs.end(); aIter != aEnd; ++aIter) { - if (mDebug) { - dbgs() << "Moving " << (*aIter)->getName() - << " from raw to byte.\n"; - } - bytePtrs.insert(*aIter); - rawPtrs.erase(*aIter); - } - } -} -// Function that detects aliased constant pool operations. -void -detectAliasedCPoolOps( - TargetMachine &TM, - CPoolSet &cpool, - bool mDebug - ) -{ - const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); - if (mDebug && !cpool.empty()) { - dbgs() << "Instructions w/ CPool Ops: \n"; - } - // The algorithm for detecting aliased cpool is as follows. - // For each instruction that has a cpool argument - // follow def-use chain - // if instruction is a load and load is a private load, - // switch to constant pool load - for (CPoolSet::iterator cpb = cpool.begin(), cpe = cpool.end(); - cpb != cpe; ++cpb) { - if (mDebug) { - (*cpb)->dump(); - } - std::queue<MachineInstr*> queue; - std::set<MachineInstr*> visited; - queue.push(*cpb); - MachineInstr *cur; - while (!queue.empty()) { - cur = queue.front(); - queue.pop(); - if (visited.count(cur)) { - continue; - } - if (isLoadInst(TM.getInstrInfo(), cur) && isPrivateInst(TM.getInstrInfo(), cur)) { - // If we are a private load and the register is - // used in the address register, we need to - // switch from private to constant pool load. - if (mDebug) { - dbgs() << "Found an instruction that is a private load " - << "but should be a constant pool load.\n"; - cur->print(dbgs()); - dbgs() << "\n"; - } - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(cur, curRes); - curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::GLOBAL_ID); - curRes.bits.ConflictPtr = 1; - setAsmPrinterFlags(cur, curRes); - cur->setDesc(TM.getInstrInfo()->get( - (cur->getOpcode() - AMDIL::PRIVATEAEXTLOAD_f32) - + AMDIL::CPOOLAEXTLOAD_f32)); - } else { - if (cur->getOperand(0).isReg()) { - MachineOperand* ptr = cur->getOperand(0).getNextOperandForReg(); - while (ptr && !ptr->isDef() && ptr->isReg()) { - queue.push(ptr->getParent()); - ptr = ptr->getNextOperandForReg(); - } - } - } - visited.insert(cur); - } - } -} -// Function that detects fully cacheable pointers. Fully cacheable pointers -// are pointers that have no writes to them and -fno-alias is specified. -void -detectFullyCacheablePointers( - const AMDILTargetMachine *ATM, - PtrIMap &PtrToInstMap, - RawSet &rawPtrs, - CacheableSet &cacheablePtrs, - ConflictSet &conflictPtrs, - bool mDebug - ) -{ - if (PtrToInstMap.empty()) { - return; - } - const AMDILSubtarget *STM - = ATM->getSubtargetImpl(); - // 4XXX hardware doesn't support cached uav opcodes and we assume - // no aliasing for this to work. Also in debug mode we don't do - // any caching. - if (STM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX - || !STM->device()->isSupported(AMDILDeviceInfo::CachedMem)) { - return; - } - if (STM->device()->isSupported(AMDILDeviceInfo::NoAlias)) { - for (PtrIMap::iterator mapIter = PtrToInstMap.begin(), - iterEnd = PtrToInstMap.end(); mapIter != iterEnd; ++mapIter) { - if (mDebug) { - dbgs() << "Instruction: "; - mapIter->first->dump(); - } - // Skip the pointer if we have already detected it. - if (cacheablePtrs.count(mapIter->first)) { - continue; - } - bool cacheable = true; - for (std::vector<MachineInstr*>::iterator - miBegin = mapIter->second.begin(), - miEnd = mapIter->second.end(); miBegin != miEnd; ++miBegin) { - if (isStoreInst(ATM->getInstrInfo(), *miBegin) || - isImageInst(ATM->getInstrInfo(), *miBegin) || - isAtomicInst(ATM->getInstrInfo(), *miBegin)) { - cacheable = false; - break; - } - } - // we aren't cacheable, so lets move on to the next instruction - if (!cacheable) { - continue; - } - // If we are in the conflict set, lets move to the next instruction - // FIXME: we need to check to see if the pointers that conflict with - // the current pointer are also cacheable. If they are, then add them - // to the cacheable list and not fail. - if (conflictPtrs.count(mapIter->first)) { - continue; - } - // Otherwise if we have no stores and no conflicting pointers, we can - // be added to the cacheable set. - if (mDebug) { - dbgs() << "Adding pointer " << mapIter->first->getName(); - dbgs() << " to cached set!\n"; - } - const PointerType *PT = dyn_cast<PointerType>(mapIter->first->getType()); - if (PT) { - cacheablePtrs.insert(mapIter->first); - } - } - } -} - -// Are any of the pointers in PtrSet also in the BytePtrs or the CachePtrs? -static bool -ptrSetIntersectsByteOrCache( - PtrSet &cacheSet, - ByteSet &bytePtrs, - CacheableSet &cacheablePtrs - ) -{ - for (PtrSet::const_iterator psit = cacheSet.begin(), - psitend = cacheSet.end(); - psit != psitend; - psit++) { - if (bytePtrs.find(*psit) != bytePtrs.end() || - cacheablePtrs.find(*psit) != cacheablePtrs.end()) { - return true; - } - } - return false; -} - -// Function that detects which instructions are cacheable even if -// all instructions of the pointer are not cacheable. The resulting -// set of instructions will not contain Ptrs that are in the cacheable -// ptr set (under the assumption they will get marked cacheable already) -// or pointers in the byte set, since they are not cacheable. -void -detectCacheableInstrs( - MBBCacheableMap &bbCacheable, - InstPMap &InstToPtrMap, - CacheableSet &cacheablePtrs, - ByteSet &bytePtrs, - CacheableInstrSet &cacheableSet, - bool mDebug - ) - -{ - for (MBBCacheableMap::const_iterator mbbcit = bbCacheable.begin(), - mbbcitend = bbCacheable.end(); - mbbcit != mbbcitend; - mbbcit++) { - for (CacheableInstrSet::const_iterator bciit - = mbbcit->second.cacheableBegin(), - bciitend - = mbbcit->second.cacheableEnd(); - bciit != bciitend; - bciit++) { - if (!ptrSetIntersectsByteOrCache(InstToPtrMap[*bciit], - bytePtrs, - cacheablePtrs)) { - cacheableSet.insert(*bciit); - } - } - } -} -// This function annotates the cacheable pointers with the -// CacheableRead bit. The cacheable read bit is set -// when the number of write images is not equal to the max -// or if the default RAW_UAV_ID is equal to 11. The first -// condition means that there is a raw uav between 0 and 7 -// that is available for cacheable reads and the second -// condition means that UAV 11 is available for cacheable -// reads. -void -annotateCacheablePtrs( - TargetMachine &TM, - PtrIMap &PtrToInstMap, - CacheableSet &cacheablePtrs, - ByteSet &bytePtrs, - uint32_t numWriteImages, - bool mDebug) -{ - const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); - // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager(); - PtrSet::iterator siBegin, siEnd; - std::vector<MachineInstr*>::iterator miBegin, miEnd; - AMDILMachineFunctionInfo *mMFI = NULL; - // First we can check the cacheable pointers - for (siBegin = cacheablePtrs.begin(), siEnd = cacheablePtrs.end(); - siBegin != siEnd; ++siBegin) { - assert(!bytePtrs.count(*siBegin) && "Found a cacheable pointer " - "that also exists as a byte pointer!"); - for (miBegin = PtrToInstMap[*siBegin].begin(), - miEnd = PtrToInstMap[*siBegin].end(); - miBegin != miEnd; ++miBegin) { - if (mDebug) { - dbgs() << "Annotating pointer as cacheable. Inst: "; - (*miBegin)->dump(); - } - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(*miBegin, curRes); - assert(!curRes.bits.ByteStore && "No cacheable pointers should have the " - "byte Store flag set!"); - // If UAV11 is enabled, then we can enable cached reads. - if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) { - curRes.bits.CacheableRead = 1; - curRes.bits.ResourceID = 11; - setAsmPrinterFlags(*miBegin, curRes); - if (!mMFI) { - mMFI = (*miBegin)->getParent()->getParent() - ->getInfo<AMDILMachineFunctionInfo>(); - } - mMFI->uav_insert(curRes.bits.ResourceID); - } - } - } -} - -// A byte pointer is a pointer that along the pointer path has a -// byte store assigned to it. -void -annotateBytePtrs( - TargetMachine &TM, - PtrIMap &PtrToInstMap, - ByteSet &bytePtrs, - RawSet &rawPtrs, - bool mDebug - ) -{ - const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); - AMDILKernelManager *KM = STM->getKernelManager(); - PtrSet::iterator siBegin, siEnd; - std::vector<MachineInstr*>::iterator miBegin, miEnd; - uint32_t arenaID = STM->device() - ->getResourceID(AMDILDevice::ARENA_UAV_ID); - if (STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) { - arenaID = ARENA_SEGMENT_RESERVED_UAVS + 1; - } - AMDILMachineFunctionInfo *mMFI = NULL; - for (siBegin = bytePtrs.begin(), siEnd = bytePtrs.end(); - siBegin != siEnd; ++siBegin) { - const Value* val = (*siBegin); - const PointerType *PT = dyn_cast<PointerType>(val->getType()); - if (!PT) { - continue; - } - const Argument *curArg = dyn_cast<Argument>(val); - assert(!rawPtrs.count(*siBegin) && "Found a byte pointer " - "that also exists as a raw pointer!"); - bool arenaInc = false; - for (miBegin = PtrToInstMap[*siBegin].begin(), - miEnd = PtrToInstMap[*siBegin].end(); - miBegin != miEnd; ++miBegin) { - if (mDebug) { - dbgs() << "Annotating pointer as arena. Inst: "; - (*miBegin)->dump(); - } - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(*miBegin, curRes); - - if (STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem) - && PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) { - // If hardware constant mem is enabled, then we need to - // get the constant pointer CB number and use that to specify - // the resource ID. - AMDILGlobalManager *GM = STM->getGlobalManager(); - const StringRef funcName = (*miBegin)->getParent()->getParent() - ->getFunction()->getName(); - if (GM->isKernel(funcName)) { - const kernel &krnl = GM->getKernel(funcName); - curRes.bits.ResourceID = GM->getConstPtrCB(krnl, - (*siBegin)->getName()); - curRes.bits.HardwareInst = 1; - } else { - curRes.bits.ResourceID = STM->device() - ->getResourceID(AMDILDevice::CONSTANT_ID); - } - } else if (STM->device()->usesHardware(AMDILDeviceInfo::LocalMem) - && PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) { - // If hardware local mem is enabled, get the local mem ID from - // the device to use as the ResourceID - curRes.bits.ResourceID = STM->device() - ->getResourceID(AMDILDevice::LDS_ID); - if (isAtomicInst(TM.getInstrInfo(), *miBegin)) { - assert(curRes.bits.ResourceID && "Atomic resource ID " - "cannot be non-zero!"); - (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) - .setImm(curRes.bits.ResourceID); - } - } else if (STM->device()->usesHardware(AMDILDeviceInfo::RegionMem) - && PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) { - // If hardware region mem is enabled, get the gds mem ID from - // the device to use as the ResourceID - curRes.bits.ResourceID = STM->device() - ->getResourceID(AMDILDevice::GDS_ID); - if (isAtomicInst(TM.getInstrInfo(), *miBegin)) { - assert(curRes.bits.ResourceID && "Atomic resource ID " - "cannot be non-zero!"); - (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) - .setImm(curRes.bits.ResourceID); - } - } else if (STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem) - && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { - curRes.bits.ResourceID = STM->device() - ->getResourceID(AMDILDevice::SCRATCH_ID); - } else { - if (mDebug) { - dbgs() << __LINE__ << ": Setting byte store bit on instruction: "; - (*miBegin)->print(dbgs()); - } - curRes.bits.ByteStore = 1; - curRes.bits.ResourceID = (curArg && curArg->hasNoAliasAttr()) ? arenaID - : STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID); - if (STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) { - arenaInc = true; - } - if (isAtomicInst(TM.getInstrInfo(), *miBegin) && - STM->device()->isSupported(AMDILDeviceInfo::ArenaUAV)) { - (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) - .setImm(curRes.bits.ResourceID); - // If we are an arena instruction, we need to switch the atomic opcode - // from the global version to the arena version. - MachineInstr *MI = *miBegin; - MI->setDesc( - TM.getInstrInfo()->get( - (MI->getOpcode() - AMDIL::ATOM_G_ADD) + AMDIL::ATOM_A_ADD)); - } - if (mDebug) { - dbgs() << "Annotating pointer as arena. Inst: "; - (*miBegin)->dump(); - } - } - setAsmPrinterFlags(*miBegin, curRes); - KM->setUAVID(*siBegin, curRes.bits.ResourceID); - if (!mMFI) { - mMFI = (*miBegin)->getParent()->getParent() - ->getInfo<AMDILMachineFunctionInfo>(); - } - mMFI->uav_insert(curRes.bits.ResourceID); - } - if (arenaInc) { - ++arenaID; - } - } -} -// An append pointer is a opaque object that has append instructions -// in its path. -void -annotateAppendPtrs( - TargetMachine &TM, - PtrIMap &PtrToInstMap, - AppendSet &appendPtrs, - bool mDebug) -{ - unsigned currentCounter = 0; - // const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); - // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager(); - MachineFunction *MF = NULL; - for (AppendSet::iterator asBegin = appendPtrs.begin(), - asEnd = appendPtrs.end(); asBegin != asEnd; ++asBegin) - { - bool usesWrite = false; - bool usesRead = false; - const Value* curVal = *asBegin; - if (mDebug) { - dbgs() << "Counter: " << curVal->getName() - << " assigned the counter " << currentCounter << "\n"; - } - for (std::vector<MachineInstr*>::iterator - miBegin = PtrToInstMap[curVal].begin(), - miEnd = PtrToInstMap[curVal].end(); miBegin != miEnd; ++miBegin) { - MachineInstr *MI = *miBegin; - if (!MF) { - MF = MI->getParent()->getParent(); - } - unsigned opcode = MI->getOpcode(); - switch (opcode) { - default: - if (mDebug) { - dbgs() << "Skipping instruction: "; - MI->dump(); - } - break; - case AMDIL::APPEND_ALLOC: - case AMDIL::APPEND_ALLOC_NORET: - usesWrite = true; - MI->getOperand(1).ChangeToImmediate(currentCounter); - if (mDebug) { - dbgs() << "Assing to counter " << currentCounter << " Inst: "; - MI->dump(); - } - break; - case AMDIL::APPEND_CONSUME: - case AMDIL::APPEND_CONSUME_NORET: - usesRead = true; - MI->getOperand(1).ChangeToImmediate(currentCounter); - if (mDebug) { - dbgs() << "Assing to counter " << currentCounter << " Inst: "; - MI->dump(); - } - break; - }; - } - if (usesWrite && usesRead && MF) { - MF->getInfo<AMDILMachineFunctionInfo>()->addErrorMsg( - amd::CompilerErrorMessage[INCORRECT_COUNTER_USAGE]); - } - ++currentCounter; - } -} -// A raw pointer is any pointer that does not have byte store in its path. -static void -annotateRawPtrs( - TargetMachine &TM, - PtrIMap &PtrToInstMap, - RawSet &rawPtrs, - ByteSet &bytePtrs, - uint32_t numWriteImages, - bool mDebug - ) -{ - const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); - AMDILKernelManager *KM = STM->getKernelManager(); - PtrSet::iterator siBegin, siEnd; - std::vector<MachineInstr*>::iterator miBegin, miEnd; - AMDILMachineFunctionInfo *mMFI = NULL; - - // Now all of the raw pointers will go to the raw uav. - for (siBegin = rawPtrs.begin(), siEnd = rawPtrs.end(); - siBegin != siEnd; ++siBegin) { - const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType()); - if (!PT) { - continue; - } - assert(!bytePtrs.count(*siBegin) && "Found a raw pointer " - " that also exists as a byte pointers!"); - for (miBegin = PtrToInstMap[*siBegin].begin(), - miEnd = PtrToInstMap[*siBegin].end(); - miBegin != miEnd; ++miBegin) { - if (mDebug) { - dbgs() << "Annotating pointer as raw. Inst: "; - (*miBegin)->dump(); - } - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(*miBegin, curRes); - if (!curRes.bits.ConflictPtr) { - assert(!curRes.bits.ByteStore - && "Found a instruction that is marked as " - "raw but has a byte store bit set!"); - } else if (curRes.bits.ConflictPtr) { - if (curRes.bits.ByteStore) { - curRes.bits.ByteStore = 0; - } - } - if (STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem) - && PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) { - // If hardware constant mem is enabled, then we need to - // get the constant pointer CB number and use that to specify - // the resource ID. - AMDILGlobalManager *GM = STM->getGlobalManager(); - const StringRef funcName = (*miBegin)->getParent()->getParent() - ->getFunction()->getName(); - if (GM->isKernel(funcName)) { - const kernel &krnl = GM->getKernel(funcName); - curRes.bits.ResourceID = GM->getConstPtrCB(krnl, - (*siBegin)->getName()); - curRes.bits.HardwareInst = 1; - } else { - curRes.bits.ResourceID = STM->device() - ->getResourceID(AMDILDevice::CONSTANT_ID); - } - } else if (STM->device()->usesHardware(AMDILDeviceInfo::LocalMem) - && PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) { - // If hardware local mem is enabled, get the local mem ID from - // the device to use as the ResourceID - curRes.bits.ResourceID = STM->device() - ->getResourceID(AMDILDevice::LDS_ID); - if (isAtomicInst(TM.getInstrInfo(), *miBegin)) { - assert(curRes.bits.ResourceID && "Atomic resource ID " - "cannot be non-zero!"); - (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) - .setImm(curRes.bits.ResourceID); - } - } else if (STM->device()->usesHardware(AMDILDeviceInfo::RegionMem) - && PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) { - // If hardware region mem is enabled, get the gds mem ID from - // the device to use as the ResourceID - curRes.bits.ResourceID = STM->device() - ->getResourceID(AMDILDevice::GDS_ID); - if (isAtomicInst(TM.getInstrInfo(), *miBegin)) { - assert(curRes.bits.ResourceID && "Atomic resource ID " - "cannot be non-zero!"); - (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) - .setImm(curRes.bits.ResourceID); - } - } else if (STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem) - && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { - curRes.bits.ResourceID = STM->device() - ->getResourceID(AMDILDevice::SCRATCH_ID); - } else if (!STM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) { - // If multi uav is enabled, then the resource ID is either the - // number of write images that are available or the device - // raw uav id if it is 11. - if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) > - STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { - curRes.bits.ResourceID = STM->device() - ->getResourceID(AMDILDevice::RAW_UAV_ID); - } else if (numWriteImages != OPENCL_MAX_WRITE_IMAGES) { - if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) - < numWriteImages) { - curRes.bits.ResourceID = numWriteImages; - } else { - curRes.bits.ResourceID = STM->device() - ->getResourceID(AMDILDevice::RAW_UAV_ID); - } - } else { - if (mDebug) { - dbgs() << __LINE__ << ": Setting byte store bit on instruction: "; - (*miBegin)->print(dbgs()); - } - curRes.bits.ByteStore = 1; - curRes.bits.ResourceID = STM->device() - ->getResourceID(AMDILDevice::ARENA_UAV_ID); - } - if (isAtomicInst(TM.getInstrInfo(), *miBegin)) { - (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) - .setImm(curRes.bits.ResourceID); - if (curRes.bits.ResourceID - == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { - assert(0 && "Found an atomic instruction that has " - "an arena uav id!"); - } - } - KM->setUAVID(*siBegin, curRes.bits.ResourceID); - if (!mMFI) { - mMFI = (*miBegin)->getParent()->getParent() - ->getInfo<AMDILMachineFunctionInfo>(); - } - mMFI->uav_insert(curRes.bits.ResourceID); - } - setAsmPrinterFlags(*miBegin, curRes); - } - } - -} - -void -annotateCacheableInstrs( - TargetMachine &TM, - CacheableInstrSet &cacheableSet, - bool mDebug) -{ - const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); - // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager(); - - CacheableInstrSet::iterator miBegin, miEnd; - - for (miBegin = cacheableSet.begin(), - miEnd = cacheableSet.end(); - miBegin != miEnd; ++miBegin) { - if (mDebug) { - dbgs() << "Annotating instr as cacheable. Inst: "; - (*miBegin)->dump(); - } - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(*miBegin, curRes); - // If UAV11 is enabled, then we can enable cached reads. - if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) { - curRes.bits.CacheableRead = 1; - curRes.bits.ResourceID = 11; - setAsmPrinterFlags(*miBegin, curRes); - } - } -} - -// Annotate the instructions along various pointer paths. The paths that -// are handled are the raw, byte and cacheable pointer paths. -static void -annotatePtrPath( - TargetMachine &TM, - PtrIMap &PtrToInstMap, - RawSet &rawPtrs, - ByteSet &bytePtrs, - CacheableSet &cacheablePtrs, - uint32_t numWriteImages, - bool mDebug - ) -{ - if (PtrToInstMap.empty()) { - return; - } - // First we can check the cacheable pointers - annotateCacheablePtrs(TM, PtrToInstMap, cacheablePtrs, - bytePtrs, numWriteImages, mDebug); - - // Next we annotate the byte pointers - annotateBytePtrs(TM, PtrToInstMap, bytePtrs, rawPtrs, mDebug); - - // Next we annotate the raw pointers - annotateRawPtrs(TM, PtrToInstMap, rawPtrs, bytePtrs, - numWriteImages, mDebug); -} -// Allocate MultiUAV pointer ID's for the raw/conflict pointers. -static void -allocateMultiUAVPointers( - MachineFunction &MF, - const AMDILTargetMachine *ATM, - PtrIMap &PtrToInstMap, - RawSet &rawPtrs, - ConflictSet &conflictPtrs, - CacheableSet &cacheablePtrs, - uint32_t numWriteImages, - bool mDebug) -{ - if (PtrToInstMap.empty()) { - return; - } - AMDILMachineFunctionInfo *mMFI = MF.getInfo<AMDILMachineFunctionInfo>(); - uint32_t curUAV = numWriteImages; - bool increment = true; - const AMDILSubtarget *STM - = ATM->getSubtargetImpl(); - // If the RAW_UAV_ID is a value that is larger than the max number of write - // images, then we use that UAV ID. - if (numWriteImages >= OPENCL_MAX_WRITE_IMAGES) { - curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); - increment = false; - } - AMDILKernelManager *KM = STM->getKernelManager(); - PtrSet::iterator siBegin, siEnd; - std::vector<MachineInstr*>::iterator miBegin, miEnd; - // First lets handle the raw pointers. - for (siBegin = rawPtrs.begin(), siEnd = rawPtrs.end(); - siBegin != siEnd; ++siBegin) { - assert((*siBegin)->getType()->isPointerTy() && "We must be a pointer type " - "to be processed at this point!"); - const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType()); - if (conflictPtrs.count(*siBegin) || !PT) { - continue; - } - // We only want to process global address space pointers - if (PT->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) { - if ((PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS - && STM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) - || (PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS - && STM->device()->usesSoftware(AMDILDeviceInfo::ConstantMem)) - || (PT->getAddressSpace() == AMDILAS::REGION_ADDRESS - && STM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))) { - // If we are using software emulated hardware features, then - // we need to specify that they use the raw uav and not - // zero-copy uav. The easiest way to do this is to assume they - // conflict with another pointer. Any pointer that conflicts - // with another pointer is assigned to the raw uav or the - // arena uav if no raw uav exists. - const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType()); - if (PT) { - conflictPtrs.insert(*siBegin); - } - } - if (PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { - if (STM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)) { - const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType()); - if (PT) { - conflictPtrs.insert(*siBegin); - } - } else { - if (mDebug) { - dbgs() << "Scratch Pointer '" << (*siBegin)->getName() - << "' being assigned uav "<< - STM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << "\n"; - } - for (miBegin = PtrToInstMap[*siBegin].begin(), - miEnd = PtrToInstMap[*siBegin].end(); - miBegin != miEnd; ++miBegin) { - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(*miBegin, curRes); - curRes.bits.ResourceID = STM->device() - ->getResourceID(AMDILDevice::SCRATCH_ID); - if (mDebug) { - dbgs() << "Updated instruction to bitmask "; - dbgs().write_hex(curRes.u16all); - dbgs() << " with ResID " << curRes.bits.ResourceID; - dbgs() << ". Inst: "; - (*miBegin)->dump(); - } - setAsmPrinterFlags((*miBegin), curRes); - KM->setUAVID(*siBegin, curRes.bits.ResourceID); - mMFI->uav_insert(curRes.bits.ResourceID); - } - } - } - continue; - } - // If more than just UAV 11 is cacheable, then we can remove - // this check. - if (cacheablePtrs.count(*siBegin)) { - if (mDebug) { - dbgs() << "Raw Pointer '" << (*siBegin)->getName() - << "' is cacheable, not allocating a multi-uav for it!\n"; - } - continue; - } - if (mDebug) { - dbgs() << "Raw Pointer '" << (*siBegin)->getName() - << "' being assigned uav " << curUAV << "\n"; - } - if (PtrToInstMap[*siBegin].empty()) { - KM->setUAVID(*siBegin, curUAV); - mMFI->uav_insert(curUAV); - } - // For all instructions here, we are going to set the new UAV to the curUAV - // number and not the value that it currently is set to. - for (miBegin = PtrToInstMap[*siBegin].begin(), - miEnd = PtrToInstMap[*siBegin].end(); - miBegin != miEnd; ++miBegin) { - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(*miBegin, curRes); - curRes.bits.ResourceID = curUAV; - if (isAtomicInst(ATM->getInstrInfo(), *miBegin)) { - (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) - .setImm(curRes.bits.ResourceID); - if (curRes.bits.ResourceID - == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { - assert(0 && "Found an atomic instruction that has " - "an arena uav id!"); - } - } - if (curUAV == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { - if (mDebug) { - dbgs() << __LINE__ << ": Setting byte store bit on instruction: "; - (*miBegin)->print(dbgs()); - } - curRes.bits.ByteStore = 1; - curRes.bits.CacheableRead = 0; - } - if (mDebug) { - dbgs() << "Updated instruction to bitmask "; - dbgs().write_hex(curRes.u16all); - dbgs() << " with ResID " << curRes.bits.ResourceID; - dbgs() << ". Inst: "; - (*miBegin)->dump(); - } - setAsmPrinterFlags(*miBegin, curRes); - KM->setUAVID(*siBegin, curRes.bits.ResourceID); - mMFI->uav_insert(curRes.bits.ResourceID); - } - // If we make it here, we can increment the uav counter if we are less - // than the max write image count. Otherwise we set it to the default - // UAV and leave it. - if (increment && curUAV < (OPENCL_MAX_WRITE_IMAGES - 1)) { - ++curUAV; - } else { - curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); - increment = false; - } - } - if (numWriteImages == 8) { - curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); - } - // Now lets handle the conflict pointers - for (siBegin = conflictPtrs.begin(), siEnd = conflictPtrs.end(); - siBegin != siEnd; ++siBegin) { - assert((*siBegin)->getType()->isPointerTy() && "We must be a pointer type " - "to be processed at this point!"); - const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType()); - // We only want to process global address space pointers - if (!PT || PT->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) { - continue; - } - if (mDebug) { - dbgs() << "Conflict Pointer '" << (*siBegin)->getName() - << "' being assigned uav " << curUAV << "\n"; - } - if (PtrToInstMap[*siBegin].empty()) { - KM->setUAVID(*siBegin, curUAV); - mMFI->uav_insert(curUAV); - } - for (miBegin = PtrToInstMap[*siBegin].begin(), - miEnd = PtrToInstMap[*siBegin].end(); - miBegin != miEnd; ++miBegin) { - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(*miBegin, curRes); - curRes.bits.ResourceID = curUAV; - if (isAtomicInst(ATM->getInstrInfo(), *miBegin)) { - (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) - .setImm(curRes.bits.ResourceID); - if (curRes.bits.ResourceID - == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { - assert(0 && "Found an atomic instruction that has " - "an arena uav id!"); - } - } - if (curUAV == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { - if (mDebug) { - dbgs() << __LINE__ << ": Setting byte store bit on instruction: "; - (*miBegin)->print(dbgs()); - } - curRes.bits.ByteStore = 1; - } - if (mDebug) { - dbgs() << "Updated instruction to bitmask "; - dbgs().write_hex(curRes.u16all); - dbgs() << " with ResID " << curRes.bits.ResourceID; - dbgs() << ". Inst: "; - (*miBegin)->dump(); - } - setAsmPrinterFlags(*miBegin, curRes); - KM->setUAVID(*siBegin, curRes.bits.ResourceID); - mMFI->uav_insert(curRes.bits.ResourceID); - } - } -} -// The first thing we should do is to allocate the default -// ID for each load/store/atomic instruction so that -// it is correctly allocated. Everything else after this -// is just an optimization to more efficiently allocate -// resource ID's. -void -allocateDefaultIDs( - const AMDILTargetMachine *ATM, - MachineFunction &MF, - bool mDebug) -{ - for (MachineFunction::iterator mfBegin = MF.begin(), - mfEnd = MF.end(); mfBegin != mfEnd; ++mfBegin) { - MachineBasicBlock *MB = mfBegin; - for (MachineBasicBlock::iterator mbb = MB->begin(), mbe = MB->end(); - mbb != mbe; ++mbb) { - MachineInstr *MI = mbb; - if (isLoadInst(ATM->getInstrInfo(), MI) - || isStoreInst(ATM->getInstrInfo(), MI) - || isAtomicInst(ATM->getInstrInfo(), MI)) { - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(MI, curRes); - allocateDefaultID(ATM, curRes, MI, mDebug); - } - } - } -} - - bool -AMDILEGPointerManager::runOnMachineFunction(MachineFunction &MF) -{ - bool changed = false; - const AMDILTargetMachine *ATM - = reinterpret_cast<const AMDILTargetMachine*>(&TM); - AMDILMachineFunctionInfo *mMFI = - MF.getInfo<AMDILMachineFunctionInfo>(); - if (mDebug) { - dbgs() << getPassName() << "\n"; - dbgs() << MF.getFunction()->getName() << "\n"; - MF.dump(); - } - // Start out by allocating the default ID's to all instructions in the - // function. - allocateDefaultIDs(ATM, MF, mDebug); - - // A set of all pointers are tracked in this map and - // if multiple pointers are detected, they go to the same - // set. - PtrIMap PtrToInstMap; - - // All of the instructions that are loads, stores or pointer - // conflicts are tracked in the map with a set of all values - // that reference the instruction stored. - InstPMap InstToPtrMap; - - // In order to track across stack entries, we need a map between a - // frame index and a pointer. That way when we load from a frame - // index, we know what pointer was stored to the frame index. - FIPMap FIToPtrMap; - - // Set of all the pointers that are byte pointers. Byte pointers - // are required to have their instructions go to the arena. - ByteSet bytePtrs; - - // Set of all the pointers that are cacheable. All of the cache pointers - // are required to go to a raw uav and cannot go to arena. - CacheableSet cacheablePtrs; - - // Set of all the pointers that go into a raw buffer. A pointer can - // exist in either rawPtrs or bytePtrs but not both. - RawSet rawPtrs; - - // Set of all the pointers that end up having a conflicting instruction - // somewhere in the pointer path. - ConflictSet conflictPtrs; - - // Set of all pointers that are images - ImageSet images; - - // Set of all pointers that are counters - AppendSet counters; - - // Set of all pointers that load from a constant pool - CPoolSet cpool; - - // Mapping from BB to infomation about the cacheability of the - // global load instructions in it. - MBBCacheableMap bbCacheable; - - // A set of load instructions that are cacheable - // even if all the load instructions of the ptr are not. - CacheableInstrSet cacheableSet; - - // The lookup table holds all of the registers that - // are used as we assign pointers values to them. - // If two pointers collide on the lookup table, then - // we assign them to the same UAV. If one of the - // pointers is byte addressable, then we assign - // them to arena, otherwise we assign them to raw. - RVPVec lookupTable; - - // First we need to go through all of the arguments and assign the - // live in registers to the lookup table and the pointer mapping. - uint32_t numWriteImages = parseArguments(MF, lookupTable, ATM, - cacheablePtrs, images, counters, mDebug); - - // Lets do some error checking on the results of the parsing. - if (counters.size() > OPENCL_MAX_NUM_ATOMIC_COUNTERS) { - mMFI->addErrorMsg( - amd::CompilerErrorMessage[INSUFFICIENT_COUNTER_RESOURCES]); - } - if (numWriteImages > OPENCL_MAX_WRITE_IMAGES - || (images.size() - numWriteImages > OPENCL_MAX_READ_IMAGES)) { - mMFI->addErrorMsg( - amd::CompilerErrorMessage[INSUFFICIENT_IMAGE_RESOURCES]); - } - - // Now lets parse all of the instructions and update our - // lookup tables. - parseFunction(this, ATM, MF, InstToPtrMap, PtrToInstMap, - FIToPtrMap, lookupTable, bytePtrs, conflictPtrs, cpool, - bbCacheable, mDebug); - - // We need to go over our pointer map and find all the conflicting - // pointers that have byte stores and put them in the bytePtr map. - // All conflicting pointers that don't have byte stores go into - // the rawPtr map. - detectConflictingPointers(ATM, InstToPtrMap, bytePtrs, rawPtrs, - conflictPtrs, mDebug); - - // The next step is to detect whether the pointer should be added to - // the fully cacheable set or not. A pointer is marked as cacheable if - // no store instruction exists. - detectFullyCacheablePointers(ATM, PtrToInstMap, rawPtrs, - cacheablePtrs, conflictPtrs, mDebug); - - // Disable partially cacheable for now when multiUAV is on. - // SC versions before SC139 have a bug that generates incorrect - // addressing for some cached accesses. - if (!ATM->getSubtargetImpl() - ->device()->isSupported(AMDILDeviceInfo::MultiUAV) && - ATM->getSubtargetImpl()->calVersion() >= CAL_VERSION_SC_139) { - // Now we take the set of loads that have no reachable stores and - // create a list of additional instructions (those that aren't already - // in a cacheablePtr set) that are safe to mark as cacheable. - detectCacheableInstrs(bbCacheable, InstToPtrMap, cacheablePtrs, - bytePtrs, cacheableSet, mDebug); - - // Annotate the additional instructions computed above as cacheable. - // Note that this should not touch any instructions annotated in - // annotatePtrPath. - annotateCacheableInstrs(TM, cacheableSet, mDebug); - } - - // Now that we have detected everything we need to detect, lets go through an - // annotate the instructions along the pointer path for each of the - // various pointer types. - annotatePtrPath(TM, PtrToInstMap, rawPtrs, bytePtrs, - cacheablePtrs, numWriteImages, mDebug); - - // Annotate the atomic counter path if any exists. - annotateAppendPtrs(TM, PtrToInstMap, counters, mDebug); - - // If we support MultiUAV, then we need to determine how - // many write images exist so that way we know how many UAV are - // left to allocate to buffers. - if (ATM->getSubtargetImpl() - ->device()->isSupported(AMDILDeviceInfo::MultiUAV)) { - // We now have (OPENCL_MAX_WRITE_IMAGES - numPtrs) buffers open for - // multi-uav allocation. - allocateMultiUAVPointers(MF, ATM, PtrToInstMap, rawPtrs, - conflictPtrs, cacheablePtrs, numWriteImages, mDebug); - } - - // The last step is to detect if we have any alias constant pool operations. - // This is not likely, but does happen on occasion with double precision - // operations. - detectAliasedCPoolOps(TM, cpool, mDebug); - if (mDebug) { - dumpPointers(bytePtrs, "Byte Store Ptrs"); - dumpPointers(rawPtrs, "Raw Ptrs"); - dumpPointers(cacheablePtrs, "Cache Load Ptrs"); - dumpPointers(counters, "Atomic Counters"); - dumpPointers(images, "Images"); - } - return changed; -} - -// The default pointer manager just assigns the default ID's to -// each load/store instruction and does nothing else. This is -// the pointer manager for the 7XX series of cards. - bool -AMDILPointerManager::runOnMachineFunction(MachineFunction &MF) -{ - bool changed = false; - const AMDILTargetMachine *ATM - = reinterpret_cast<const AMDILTargetMachine*>(&TM); - if (mDebug) { - dbgs() << getPassName() << "\n"; - dbgs() << MF.getFunction()->getName() << "\n"; - MF.dump(); - } - // On the 7XX we don't have to do any special processing, so we - // can just allocate the default ID and be done with it. - allocateDefaultIDs(ATM, MF, mDebug); - return changed; -} diff --git a/src/gallium/drivers/radeon/AMDILPointerManager.h b/src/gallium/drivers/radeon/AMDILPointerManager.h deleted file mode 100644 index 2c471fb4d65..00000000000 --- a/src/gallium/drivers/radeon/AMDILPointerManager.h +++ /dev/null @@ -1,209 +0,0 @@ -//===-------- AMDILPointerManager.h - Manage Pointers for HW ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// The AMDIL Pointer Manager is a class that does all the checking for -// different pointer characteristics. Pointers have attributes that need -// to be attached to them in order to correctly codegen them efficiently. -// This class will analyze the pointers of a function and then traverse the uses -// of the pointers and determine if a pointer can be cached, should belong in -// the arena, and what UAV it should belong to. There are seperate classes for -// each unique generation of devices. This pass only works in SSA form. -//===----------------------------------------------------------------------===// -#ifndef _AMDIL_POINTER_MANAGER_H_ -#define _AMDIL_POINTER_MANAGER_H_ -#undef DEBUG_TYPE -#undef DEBUGME -#define DEBUG_TYPE "PointerManager" -#if !defined(NDEBUG) -#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) -#else -#define DEBUGME (false) -#endif -#include "AMDIL.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetMachine.h" - -#include <list> -#include <map> -#include <queue> -#include <set> - -namespace llvm { - class Value; - class MachineBasicBlock; - // Typedefing the multiple different set types to that it is - // easier to read what each set is supposed to handle. This - // also allows it easier to track which set goes to which - // argument in a function call. - typedef std::set<const Value*> PtrSet; - - // A Byte set is the set of all base pointers that must - // be allocated to the arena path. - typedef PtrSet ByteSet; - - // A Raw set is the set of all base pointers that can be - // allocated to the raw path. - typedef PtrSet RawSet; - - // A cacheable set is the set of all base pointers that - // are deamed cacheable based on annotations or - // compiler options. - typedef PtrSet CacheableSet; - - // A conflict set is a set of all base pointers whose - // use/def chains conflict with another base pointer. - typedef PtrSet ConflictSet; - - // An image set is a set of all read/write only image pointers. - typedef PtrSet ImageSet; - - // An append set is a set of atomic counter base pointers - typedef std::vector<const Value*> AppendSet; - - // A ConstantSet is a set of constant pool instructions - typedef std::set<MachineInstr*> CPoolSet; - - // A CacheableInstSet set is a set of instructions that are cachable - // even if the pointer is not generally cacheable. - typedef std::set<MachineInstr*> CacheableInstrSet; - - // A pair that maps a virtual register to the equivalent base - // pointer value that it was derived from. - typedef std::pair<unsigned, const Value*> RegValPair; - - // A map that maps between the base pointe rvalue and an array - // of instructions that are part of the pointer chain. A pointer - // chain is a recursive def/use chain of all instructions that don't - // store data to memory unless the pointer is the data being stored. - typedef std::map<const Value*, std::vector<MachineInstr*> > PtrIMap; - - // A map that holds a set of all base pointers that are used in a machine - // instruction. This helps to detect when conflict pointers are found - // such as when pointer subtraction occurs. - typedef std::map<MachineInstr*, PtrSet> InstPMap; - - // A map that holds the frame index to RegValPair so that writes of - // pointers to the stack can be tracked. - typedef std::map<unsigned, RegValPair > FIPMap; - - // A small vector impl that holds all of the register to base pointer - // mappings for a given function. - typedef std::map<unsigned, RegValPair> RVPVec; - - - - // The default pointer manager. This handles pointer - // resource allocation for default ID's only. - // There is no special processing. - class AMDILPointerManager : public MachineFunctionPass - { - public: - AMDILPointerManager( - TargetMachine &tm - AMDIL_OPT_LEVEL_DECL); - virtual ~AMDILPointerManager(); - virtual const char* - getPassName() const; - virtual bool - runOnMachineFunction(MachineFunction &F); - virtual void - getAnalysisUsage(AnalysisUsage &AU) const; - static char ID; - protected: - bool mDebug; - private: - TargetMachine &TM; - }; // class AMDILPointerManager - - // The pointer manager for Evergreen and Northern Island - // devices. This pointer manager allocates and trackes - // cached memory, arena resources, raw resources and - // whether multi-uav is utilized or not. - class AMDILEGPointerManager : public AMDILPointerManager - { - public: - AMDILEGPointerManager( - TargetMachine &tm - AMDIL_OPT_LEVEL_DECL); - virtual ~AMDILEGPointerManager(); - virtual const char* - getPassName() const; - virtual bool - runOnMachineFunction(MachineFunction &F); - private: - TargetMachine &TM; - }; // class AMDILEGPointerManager - - // Information related to the cacheability of instructions in a basic block. - // This is used during the parse phase of the pointer algorithm to track - // the reachability of stores within a basic block. - class BlockCacheableInfo { - public: - BlockCacheableInfo() : - mStoreReachesTop(false), - mStoreReachesExit(false), - mCacheableSet() - {}; - - bool storeReachesTop() const { return mStoreReachesTop; } - bool storeReachesExit() const { return mStoreReachesExit; } - CacheableInstrSet::const_iterator - cacheableBegin() const { return mCacheableSet.begin(); } - CacheableInstrSet::const_iterator - cacheableEnd() const { return mCacheableSet.end(); } - - // mark the block as having a global store that reaches it. This - // will also set the store reaches exit flag, and clear the list - // of loads (since they are now reachable by a store.) - bool setReachesTop() { - bool changedExit = !mStoreReachesExit; - - if (!mStoreReachesTop) - mCacheableSet.clear(); - - mStoreReachesTop = true; - mStoreReachesExit = true; - return changedExit; - } - - // Mark the block as having a store that reaches the exit of the - // block. - void setReachesExit() { - mStoreReachesExit = true; - } - - // If the top or the exit of the block are not marked as reachable - // by a store, add the load to the list of cacheable loads. - void addPossiblyCacheableInst(const TargetMachine * tm, MachineInstr *load) { - // By definition, if store reaches top, then store reaches exit. - // So, we only test for exit here. - // If we have a volatile load we cannot cache it. - if (mStoreReachesExit || isVolatileInst(tm->getInstrInfo(), load)) { - return; - } - - mCacheableSet.insert(load); - } - - private: - bool mStoreReachesTop; // Does a global store reach the top of this block? - bool mStoreReachesExit;// Does a global store reach the exit of this block? - CacheableInstrSet mCacheableSet; // The set of loads in the block not - // reachable by a global store. - }; - // Map from MachineBasicBlock to it's cacheable load info. - typedef std::map<MachineBasicBlock*, BlockCacheableInfo> MBBCacheableMap; -} // end llvm namespace -#endif // _AMDIL_POINTER_MANAGER_H_ diff --git a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp index 6146dded3aa..cc565081e10 100644 --- a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp +++ b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp @@ -173,7 +173,6 @@ bool AMDILPassConfig::addPreRegAlloc() } PM.add(createAMDILMachinePeephole(*TM)); - PM.add(createAMDILPointerManager(*TM)); return false; } diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index fad207a6d9f..138b562fb07 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -46,7 +46,6 @@ CPP_SOURCES := \ AMDILModuleInfo.cpp \ AMDILNIDevice.cpp \ AMDILPeepholeOptimizer.cpp \ - AMDILPointerManager.cpp \ AMDILPrintfConvert.cpp \ AMDILRegisterInfo.cpp \ AMDILSIDevice.cpp \ |