diff options
Diffstat (limited to 'src/gallium/drivers/radeon/AMDILPointerManager.cpp')
-rw-r--r-- | src/gallium/drivers/radeon/AMDILPointerManager.cpp | 2551 |
1 files changed, 2551 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeon/AMDILPointerManager.cpp b/src/gallium/drivers/radeon/AMDILPointerManager.cpp new file mode 100644 index 00000000000..9cac61cb718 --- /dev/null +++ b/src/gallium/drivers/radeon/AMDILPointerManager.cpp @@ -0,0 +1,2551 @@ +//===-------- AMDILPointerManager.cpp - Manage Pointers for HW-------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// Implementation for the AMDILPointerManager classes. See header file for +// more documentation of class. +// TODO: This fails when function calls are enabled, must always be inlined +//===----------------------------------------------------------------------===// +#include "AMDILPointerManager.h" +#include "AMDILCompilerErrors.h" +#include "AMDILDeviceInfo.h" +#include "AMDILGlobalManager.h" +#include "AMDILKernelManager.h" +#include "AMDILMachineFunctionInfo.h" +#include "AMDILTargetMachine.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/ValueMap.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalValue.h" +#include "llvm/Instructions.h" +#include "llvm/Metadata.h" +#include "llvm/Module.h" +#include "llvm/Support/FormattedStream.h" + +#include <stdio.h> +using namespace llvm; +char AMDILPointerManager::ID = 0; +namespace llvm { + FunctionPass* + createAMDILPointerManager(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) + { + return tm.getSubtarget<AMDILSubtarget>() + .device()->getPointerManager(tm AMDIL_OPT_LEVEL_VAR); + } +} + +AMDILPointerManager::AMDILPointerManager( + TargetMachine &tm + AMDIL_OPT_LEVEL_DECL) : + MachineFunctionPass(ID), + TM(tm) +{ + mDebug = DEBUGME; + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); +} + +AMDILPointerManager::~AMDILPointerManager() +{ +} + +const char* +AMDILPointerManager::getPassName() const +{ + return "AMD IL Default Pointer Manager Pass"; +} + +void +AMDILPointerManager::getAnalysisUsage(AnalysisUsage &AU) const +{ + AU.setPreservesAll(); + AU.addRequiredID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +AMDILEGPointerManager::AMDILEGPointerManager( + TargetMachine &tm + AMDIL_OPT_LEVEL_DECL) : + AMDILPointerManager(tm AMDIL_OPT_LEVEL_VAR), + TM(tm) +{ +} + +AMDILEGPointerManager::~AMDILEGPointerManager() +{ +} +std::string +findSamplerName(MachineInstr* MI, + FIPMap &FIToPtrMap, + RVPVec &lookupTable, + const TargetMachine *TM) +{ + std::string sampler = "unknown"; + assert(MI->getNumOperands() == 5 && "Only an " + "image read instruction with 5 arguments can " + "have a sampler."); + assert(MI->getOperand(3).isReg() && + "Argument 3 must be a register to call this function"); + unsigned reg = MI->getOperand(3).getReg(); + // If this register points to an argument, then + // we can return the argument name. + if (lookupTable[reg].second && dyn_cast<Argument>(lookupTable[reg].second)) { + return lookupTable[reg].second->getName(); + } + // Otherwise the sampler is coming from memory somewhere. + // If the sampler memory location can be tracked, then + // we ascertain the sampler name that way. + // The most common case is when optimizations are disabled + // or mem2reg is not enabled, then the sampler when it is + // an argument is passed through the frame index. + + // In the optimized case, the instruction that defined + // register from operand #3 is a private load. + MachineRegisterInfo ®Info = MI->getParent()->getParent()->getRegInfo(); + assert(!regInfo.def_empty(reg) + && "We don't have any defs of this register, but we aren't an argument!"); + MachineOperand *defOp = regInfo.getRegUseDefListHead(reg); + MachineInstr *defMI = defOp->getParent(); + if (isPrivateInst(TM->getInstrInfo(), defMI) && isLoadInst(TM->getInstrInfo(), defMI)) { + if (defMI->getOperand(1).isFI()) { + RegValPair &fiRVP = FIToPtrMap[reg]; + if (fiRVP.second && dyn_cast<Argument>(fiRVP.second)) { + return fiRVP.second->getName(); + } else { + // FIXME: Fix the case where the value stored is not a kernel argument. + assert(!"Found a private load of a sampler where the value isn't an argument!"); + } + } else { + // FIXME: Fix the case where someone dynamically loads a sampler value + // from private memory. This is problematic because we need to know the + // sampler value at compile time and if it is dynamically loaded, we won't + // know what sampler value to use. + assert(!"Found a private load of a sampler that isn't from a frame index!"); + } + } else { + // FIXME: Handle the case where the def is neither a private instruction + // and not a load instruction. This shouldn't occur, but putting an assertion + // just to make sure that it doesn't. + assert(!"Found a case which we don't handle."); + } + return sampler; +} + +const char* +AMDILEGPointerManager::getPassName() const +{ + return "AMD IL EG Pointer Manager Pass"; +} + +// Helper function to determine if the current pointer is from the +// local, region or private address spaces. + static bool +isLRPInst(MachineInstr *MI, + const AMDILTargetMachine *ATM) +{ + const AMDILSubtarget *STM + = ATM->getSubtargetImpl(); + if (!MI) { + return false; + } + if ((isRegionInst(ATM->getInstrInfo(), MI) + && STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) + || (isLocalInst(ATM->getInstrInfo(), MI) + && STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) + || (isPrivateInst(ATM->getInstrInfo(), MI) + && STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem))) { + return true; + } + return false; +} + +/// Helper function to determine if the I/O instruction uses +/// global device memory or not. +static bool +usesGlobal( + const AMDILTargetMachine *ATM, + MachineInstr *MI) { + const AMDILSubtarget *STM + = ATM->getSubtargetImpl(); + switch(MI->getOpcode()) { + ExpandCaseToAllTypes(AMDIL::GLOBALSTORE); + ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE); + ExpandCaseToAllTypes(AMDIL::GLOBALLOAD); + ExpandCaseToAllTypes(AMDIL::GLOBALSEXTLOAD); + ExpandCaseToAllTypes(AMDIL::GLOBALZEXTLOAD); + ExpandCaseToAllTypes(AMDIL::GLOBALAEXTLOAD); + return true; + ExpandCaseToAllTypes(AMDIL::REGIONLOAD); + ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD); + ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD); + ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD); + ExpandCaseToAllTypes(AMDIL::REGIONSTORE); + ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE); + return !STM->device()->usesHardware(AMDILDeviceInfo::RegionMem); + ExpandCaseToAllTypes(AMDIL::LOCALLOAD); + ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD); + ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD); + ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD); + ExpandCaseToAllTypes(AMDIL::LOCALSTORE); + ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE); + return !STM->device()->usesHardware(AMDILDeviceInfo::LocalMem); + ExpandCaseToAllTypes(AMDIL::CPOOLLOAD); + ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD); + ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD); + ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD); + ExpandCaseToAllTypes(AMDIL::CONSTANTLOAD); + ExpandCaseToAllTypes(AMDIL::CONSTANTSEXTLOAD); + ExpandCaseToAllTypes(AMDIL::CONSTANTAEXTLOAD); + ExpandCaseToAllTypes(AMDIL::CONSTANTZEXTLOAD); + return !STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem); + ExpandCaseToAllTypes(AMDIL::PRIVATELOAD); + ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD); + ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD); + ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD); + ExpandCaseToAllTypes(AMDIL::PRIVATESTORE); + ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE); + return !STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem); + default: + return false; + } + return false; +} + +// Helper function that allocates the default resource ID for the +// respective I/O types. +static void +allocateDefaultID( + const AMDILTargetMachine *ATM, + AMDILAS::InstrResEnc &curRes, + MachineInstr *MI, + bool mDebug) +{ + AMDILMachineFunctionInfo *mMFI = + MI->getParent()->getParent()->getInfo<AMDILMachineFunctionInfo>(); + const AMDILSubtarget *STM + = ATM->getSubtargetImpl(); + if (mDebug) { + dbgs() << "Assigning instruction to default ID. Inst:"; + MI->dump(); + } + // If we use global memory, lets set the Operand to + // the ARENA_UAV_ID. + if (usesGlobal(ATM, MI)) { + curRes.bits.ResourceID = + STM->device()->getResourceID(AMDILDevice::GLOBAL_ID); + if (isAtomicInst(ATM->getInstrInfo(), MI)) { + MI->getOperand(MI->getNumOperands()-1) + .setImm(curRes.bits.ResourceID); + } + AMDILKernelManager *KM = STM->getKernelManager(); + if (curRes.bits.ResourceID == 8 + && !STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) { + KM->setUAVID(NULL, curRes.bits.ResourceID); + mMFI->uav_insert(curRes.bits.ResourceID); + } + } else if (isPrivateInst(ATM->getInstrInfo(), MI)) { + curRes.bits.ResourceID = + STM->device()->getResourceID(AMDILDevice::SCRATCH_ID); + } else if (isLocalInst(ATM->getInstrInfo(), MI) || isLocalAtomic(ATM->getInstrInfo(), MI)) { + curRes.bits.ResourceID = + STM->device()->getResourceID(AMDILDevice::LDS_ID); + AMDILMachineFunctionInfo *mMFI = + MI->getParent()->getParent()->getInfo<AMDILMachineFunctionInfo>(); + mMFI->setUsesLocal(); + if (isAtomicInst(ATM->getInstrInfo(), MI)) { + assert(curRes.bits.ResourceID && "Atomic resource ID " + "cannot be zero!"); + MI->getOperand(MI->getNumOperands()-1) + .setImm(curRes.bits.ResourceID); + } + } else if (isRegionInst(ATM->getInstrInfo(), MI) || isRegionAtomic(ATM->getInstrInfo(), MI)) { + curRes.bits.ResourceID = + STM->device()->getResourceID(AMDILDevice::GDS_ID); + AMDILMachineFunctionInfo *mMFI = + MI->getParent()->getParent()->getInfo<AMDILMachineFunctionInfo>(); + mMFI->setUsesRegion(); + if (isAtomicInst(ATM->getInstrInfo(), MI)) { + assert(curRes.bits.ResourceID && "Atomic resource ID " + "cannot be zero!"); + (MI)->getOperand((MI)->getNumOperands()-1) + .setImm(curRes.bits.ResourceID); + } + } else if (isConstantInst(ATM->getInstrInfo(), MI)) { + // If we are unknown constant instruction and the base pointer is known. + // Set the resource ID accordingly, otherwise use the default constant ID. + // FIXME: this should not require the base pointer to know what constant + // it is from. + AMDILGlobalManager *GM = STM->getGlobalManager(); + MachineFunction *MF = MI->getParent()->getParent(); + if (GM->isKernel(MF->getFunction()->getName())) { + const kernel &krnl = GM->getKernel(MF->getFunction()->getName()); + const Value *V = getBasePointerValue(MI); + if (V && !dyn_cast<AllocaInst>(V)) { + curRes.bits.ResourceID = GM->getConstPtrCB(krnl, V->getName()); + curRes.bits.HardwareInst = 1; + } else if (V && dyn_cast<AllocaInst>(V)) { + // FIXME: Need a better way to fix this. Requires a rewrite of how + // we lower global addresses to various address spaces. + // So for now, lets assume that there is only a single + // constant buffer that can be accessed from a load instruction + // that is derived from an alloca instruction. + curRes.bits.ResourceID = 2; + curRes.bits.HardwareInst = 1; + } else { + if (isStoreInst(ATM->getInstrInfo(), MI)) { + if (mDebug) { + dbgs() << __LINE__ << ": Setting byte store bit on instruction: "; + MI->dump(); + } + curRes.bits.ByteStore = 1; + } + curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::CONSTANT_ID); + } + } else { + if (isStoreInst(ATM->getInstrInfo(), MI)) { + if (mDebug) { + dbgs() << __LINE__ << ": Setting byte store bit on instruction: "; + MI->dump(); + } + curRes.bits.ByteStore = 1; + } + curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::GLOBAL_ID); + AMDILKernelManager *KM = STM->getKernelManager(); + KM->setUAVID(NULL, curRes.bits.ResourceID); + mMFI->uav_insert(curRes.bits.ResourceID); + } + } else if (isAppendInst(ATM->getInstrInfo(), MI)) { + unsigned opcode = MI->getOpcode(); + if (opcode == AMDIL::APPEND_ALLOC + || opcode == AMDIL::APPEND_ALLOC_NORET) { + curRes.bits.ResourceID = 1; + } else { + curRes.bits.ResourceID = 2; + } + } + setAsmPrinterFlags(MI, curRes); +} + +// Function that parses the arguments and updates the lookupTable with the +// pointer -> register mapping. This function also checks for cacheable +// pointers and updates the CacheableSet with the arguments that +// can be cached based on the readonlypointer annotation. The final +// purpose of this function is to update the imageSet and counterSet +// with all pointers that are either images or atomic counters. +uint32_t +parseArguments(MachineFunction &MF, + RVPVec &lookupTable, + const AMDILTargetMachine *ATM, + CacheableSet &cacheablePtrs, + ImageSet &imageSet, + AppendSet &counterSet, + bool mDebug) +{ + const AMDILSubtarget *STM + = ATM->getSubtargetImpl(); + uint32_t writeOnlyImages = 0; + uint32_t readOnlyImages = 0; + std::string cachedKernelName = "llvm.readonlypointer.annotations."; + cachedKernelName.append(MF.getFunction()->getName()); + GlobalVariable *GV = MF.getFunction()->getParent() + ->getGlobalVariable(cachedKernelName); + unsigned cbNum = 0; + unsigned regNum = AMDIL::R1; + AMDILMachineFunctionInfo *mMFI = MF.getInfo<AMDILMachineFunctionInfo>(); + for (Function::const_arg_iterator I = MF.getFunction()->arg_begin(), + E = MF.getFunction()->arg_end(); I != E; ++I) { + const Argument *curArg = I; + if (mDebug) { + dbgs() << "Argument: "; + curArg->dump(); + } + Type *curType = curArg->getType(); + // We are either a scalar or vector type that + // is passed by value that is not a opaque/struct + // type. We just need to increment regNum + // the correct number of times to match the number + // of registers that it takes up. + if (curType->isFPOrFPVectorTy() || + curType->isIntOrIntVectorTy()) { + // We are scalar, so increment once and + // move on + if (!curType->isVectorTy()) { + lookupTable[regNum] = std::make_pair<unsigned, const Value*>(~0U, curArg); + ++regNum; + ++cbNum; + continue; + } + VectorType *VT = dyn_cast<VectorType>(curType); + // We are a vector type. If we are 64bit type, then + // we increment length / 2 times, otherwise we + // increment length / 4 times. The only corner case + // is with vec3 where the vector gets scalarized and + // therefor we need a loop count of 3. + size_t loopCount = VT->getNumElements(); + if (loopCount != 3) { + if (VT->getScalarSizeInBits() == 64) { + loopCount = loopCount >> 1; + } else { + loopCount = (loopCount + 2) >> 2; + } + cbNum += loopCount; + } else { + cbNum++; + } + while (loopCount--) { + lookupTable[regNum] = std::make_pair<unsigned, const Value*>(~0U, curArg); + ++regNum; + } + } else if (curType->isPointerTy()) { + Type *CT = dyn_cast<PointerType>(curType)->getElementType(); + const StructType *ST = dyn_cast<StructType>(CT); + if (ST && ST->isOpaque()) { + StringRef name = ST->getName(); + bool i1d_type = name == "struct._image1d_t"; + bool i1da_type = name == "struct._image1d_array_t"; + bool i1db_type = name == "struct._image1d_buffer_t"; + bool i2d_type = name == "struct._image2d_t"; + bool i2da_type = name == "struct._image2d_array_t"; + bool i3d_type = name == "struct._image3d_t"; + bool c32_type = name == "struct._counter32_t"; + bool c64_type = name == "struct._counter64_t"; + if (i2d_type || i3d_type || i2da_type || + i1d_type || i1db_type || i1da_type) { + imageSet.insert(I); + uint32_t imageNum = readOnlyImages + writeOnlyImages; + if (STM->getGlobalManager() + ->isReadOnlyImage(MF.getFunction()->getName(), imageNum)) { + if (mDebug) { + dbgs() << "Pointer: '" << curArg->getName() + << "' is a read only image # " << readOnlyImages << "!\n"; + } + // We store the cbNum along with the image number so that we can + // correctly encode the 'info' intrinsics. + lookupTable[regNum] = std::make_pair<unsigned, const Value*> + ((cbNum << 16 | readOnlyImages++), curArg); + } else if (STM->getGlobalManager() + ->isWriteOnlyImage(MF.getFunction()->getName(), imageNum)) { + if (mDebug) { + dbgs() << "Pointer: '" << curArg->getName() + << "' is a write only image # " << writeOnlyImages << "!\n"; + } + // We store the cbNum along with the image number so that we can + // correctly encode the 'info' intrinsics. + lookupTable[regNum] = std::make_pair<unsigned, const Value*> + ((cbNum << 16 | writeOnlyImages++), curArg); + } else { + assert(!"Read/Write images are not supported!"); + } + ++regNum; + cbNum += 2; + continue; + } else if (c32_type || c64_type) { + if (mDebug) { + dbgs() << "Pointer: '" << curArg->getName() + << "' is a " << (c32_type ? "32" : "64") + << " bit atomic counter type!\n"; + } + counterSet.push_back(I); + } + } + + if (STM->device()->isSupported(AMDILDeviceInfo::CachedMem) + && GV && GV->hasInitializer()) { + const ConstantArray *nameArray + = dyn_cast_or_null<ConstantArray>(GV->getInitializer()); + if (nameArray) { + for (unsigned x = 0, y = nameArray->getNumOperands(); x < y; ++x) { + const GlobalVariable *gV= dyn_cast_or_null<GlobalVariable>( + nameArray->getOperand(x)->getOperand(0)); + const ConstantDataArray *argName = + dyn_cast_or_null<ConstantDataArray>(gV->getInitializer()); + if (!argName) { + continue; + } + std::string argStr = argName->getAsString(); + std::string curStr = curArg->getName(); + if (!strcmp(argStr.data(), curStr.data())) { + if (mDebug) { + dbgs() << "Pointer: '" << curArg->getName() + << "' is cacheable!\n"; + } + cacheablePtrs.insert(curArg); + } + } + } + } + uint32_t as = dyn_cast<PointerType>(curType)->getAddressSpace(); + // Handle the case where the kernel argument is a pointer + if (mDebug) { + dbgs() << "Pointer: " << curArg->getName() << " is assigned "; + if (as == AMDILAS::GLOBAL_ADDRESS) { + dbgs() << "uav " << STM->device() + ->getResourceID(AMDILDevice::GLOBAL_ID); + } else if (as == AMDILAS::PRIVATE_ADDRESS) { + dbgs() << "scratch " << STM->device() + ->getResourceID(AMDILDevice::SCRATCH_ID); + } else if (as == AMDILAS::LOCAL_ADDRESS) { + dbgs() << "lds " << STM->device() + ->getResourceID(AMDILDevice::LDS_ID); + } else if (as == AMDILAS::CONSTANT_ADDRESS) { + dbgs() << "cb " << STM->device() + ->getResourceID(AMDILDevice::CONSTANT_ID); + } else if (as == AMDILAS::REGION_ADDRESS) { + dbgs() << "gds " << STM->device() + ->getResourceID(AMDILDevice::GDS_ID); + } else { + assert(!"Found an address space that we don't support!"); + } + dbgs() << " @ register " << regNum << ". Inst: "; + curArg->dump(); + } + switch (as) { + default: + lookupTable[regNum] = std::make_pair<unsigned, const Value*> + (STM->device()->getResourceID(AMDILDevice::GLOBAL_ID), curArg); + break; + case AMDILAS::LOCAL_ADDRESS: + lookupTable[regNum] = std::make_pair<unsigned, const Value*> + (STM->device()->getResourceID(AMDILDevice::LDS_ID), curArg); + mMFI->setHasLocalArg(); + break; + case AMDILAS::REGION_ADDRESS: + lookupTable[regNum] = std::make_pair<unsigned, const Value*> + (STM->device()->getResourceID(AMDILDevice::GDS_ID), curArg); + mMFI->setHasRegionArg(); + break; + case AMDILAS::CONSTANT_ADDRESS: + lookupTable[regNum] = std::make_pair<unsigned, const Value*> + (STM->device()->getResourceID(AMDILDevice::CONSTANT_ID), curArg); + break; + case AMDILAS::PRIVATE_ADDRESS: + lookupTable[regNum] = std::make_pair<unsigned, const Value*> + (STM->device()->getResourceID(AMDILDevice::SCRATCH_ID), curArg); + break; + } + // In this case we need to increment it once. + ++regNum; + ++cbNum; + } else { + // Is anything missing that is legal in CL? + assert(0 && "Current type is not supported!"); + lookupTable[regNum] = std::make_pair<unsigned, const Value*> + (STM->device()->getResourceID(AMDILDevice::GLOBAL_ID), curArg); + ++regNum; + ++cbNum; + } + } + return writeOnlyImages; +} +// The call stack is interesting in that even in SSA form, it assigns +// registers to the same value's over and over again. So we need to +// ignore the values that are assigned and just deal with the input +// and return registers. +static void +parseCall( + const AMDILTargetMachine *ATM, + InstPMap &InstToPtrMap, + PtrIMap &PtrToInstMap, + RVPVec &lookupTable, + MachineBasicBlock::iterator &mBegin, + MachineBasicBlock::iterator mEnd, + bool mDebug) +{ + SmallVector<unsigned, 8> inputRegs; + AMDILAS::InstrResEnc curRes; + if (mDebug) { + dbgs() << "Parsing Call Stack Start.\n"; + } + MachineBasicBlock::iterator callInst = mBegin; + MachineInstr *CallMI = callInst; + getAsmPrinterFlags(CallMI, curRes); + MachineInstr *MI = --mBegin; + unsigned reg = AMDIL::R1; + // First we need to check the input registers. + do { + // We stop if we hit the beginning of the call stack + // adjustment. + if (MI->getOpcode() == AMDIL::ADJCALLSTACKDOWN + || MI->getOpcode() == AMDIL::ADJCALLSTACKUP + || MI->getNumOperands() != 2 + || !MI->getOperand(0).isReg()) { + break; + } + reg = MI->getOperand(0).getReg(); + if (MI->getOperand(1).isReg()) { + unsigned reg1 = MI->getOperand(1).getReg(); + inputRegs.push_back(reg1); + if (lookupTable[reg1].second) { + curRes.bits.PointerPath = 1; + } + } + lookupTable.erase(reg); + if ((signed)reg < 0 + || mBegin == CallMI->getParent()->begin()) { + break; + } + MI = --mBegin; + } while (1); + mBegin = callInst; + MI = ++mBegin; + // If the next registers operand 1 is not a register or that register + // is not R1, then we don't have any return values. + if (MI->getNumOperands() == 2 + && MI->getOperand(1).isReg() + && MI->getOperand(1).getReg() == AMDIL::R1) { + // Next we check the output register. + reg = MI->getOperand(0).getReg(); + // Now we link the inputs to the output. + for (unsigned x = 0; x < inputRegs.size(); ++x) { + if (lookupTable[inputRegs[x]].second) { + curRes.bits.PointerPath = 1; + lookupTable[reg] = lookupTable[inputRegs[x]]; + InstToPtrMap[CallMI].insert( + lookupTable[reg].second); + break; + } + } + lookupTable.erase(MI->getOperand(1).getReg()); + } + setAsmPrinterFlags(CallMI, curRes); + if (mDebug) { + dbgs() << "Parsing Call Stack End.\n"; + } + return; +} + +// Detect if the current instruction conflicts with another instruction +// and add the instruction to the correct location accordingly. +static void +detectConflictInst( + MachineInstr *MI, + AMDILAS::InstrResEnc &curRes, + RVPVec &lookupTable, + InstPMap &InstToPtrMap, + bool isLoadStore, + unsigned reg, + unsigned dstReg, + bool mDebug) +{ + // If the instruction does not have a point path flag + // associated with it, then we know that no other pointer + // hits this instruciton. + if (!curRes.bits.PointerPath) { + if (dyn_cast<PointerType>(lookupTable[reg].second->getType())) { + curRes.bits.PointerPath = 1; + } + // We don't want to transfer to the register number + // between load/store because the load dest can be completely + // different pointer path and the store doesn't have a real + // destination register. + if (!isLoadStore) { + if (mDebug) { + if (dyn_cast<PointerType>(lookupTable[reg].second->getType())) { + dbgs() << "Pointer: " << lookupTable[reg].second->getName(); + assert(dyn_cast<PointerType>(lookupTable[reg].second->getType()) + && "Must be a pointer type for an instruction!"); + switch (dyn_cast<PointerType>( + lookupTable[reg].second->getType())->getAddressSpace()) + { + case AMDILAS::GLOBAL_ADDRESS: dbgs() << " UAV: "; break; + case AMDILAS::LOCAL_ADDRESS: dbgs() << " LDS: "; break; + case AMDILAS::REGION_ADDRESS: dbgs() << " GDS: "; break; + case AMDILAS::PRIVATE_ADDRESS: dbgs() << " SCRATCH: "; break; + case AMDILAS::CONSTANT_ADDRESS: dbgs() << " CB: "; break; + + } + dbgs() << lookupTable[reg].first << " Reg: " << reg + << " assigned to reg " << dstReg << ". Inst: "; + MI->dump(); + } + } + // We don't want to do any copies if the register is not virtual + // as it is the result of a CALL. ParseCallInst handles the + // case where the input and output need to be linked up + // if it occurs. The easiest way to check for virtual + // is to check the top bit. + lookupTable[dstReg] = lookupTable[reg]; + } + } else { + if (dyn_cast<PointerType>(lookupTable[reg].second->getType())) { + // Otherwise we have a conflict between two pointers somehow. + curRes.bits.ConflictPtr = 1; + if (mDebug) { + dbgs() << "Pointer: " << lookupTable[reg].second->getName(); + assert(dyn_cast<PointerType>(lookupTable[reg].second->getType()) + && "Must be a pointer type for a conflict instruction!"); + switch (dyn_cast<PointerType>( + lookupTable[reg].second->getType())->getAddressSpace()) + { + case AMDILAS::GLOBAL_ADDRESS: dbgs() << " UAV: "; break; + case AMDILAS::LOCAL_ADDRESS: dbgs() << " LDS: "; break; + case AMDILAS::REGION_ADDRESS: dbgs() << " GDS: "; break; + case AMDILAS::PRIVATE_ADDRESS: dbgs() << " SCRATCH: "; break; + case AMDILAS::CONSTANT_ADDRESS: dbgs() << " CB: "; break; + + } + dbgs() << lookupTable[reg].first << " Reg: " << reg; + if (InstToPtrMap[MI].size() > 1) { + dbgs() << " conflicts with:\n "; + for (PtrSet::iterator psib = InstToPtrMap[MI].begin(), + psie = InstToPtrMap[MI].end(); psib != psie; ++psib) { + dbgs() << "\t\tPointer: " << (*psib)->getName() << " "; + assert(dyn_cast<PointerType>((*psib)->getType()) + && "Must be a pointer type for a conflict instruction!"); + (*psib)->dump(); + } + } else { + dbgs() << "."; + } + dbgs() << " Inst: "; + MI->dump(); + } + } + // Add the conflicting values to the pointer set for the instruction + InstToPtrMap[MI].insert(lookupTable[reg].second); + // We don't want to add the destination register if + // we are a load or store. + if (!isLoadStore) { + InstToPtrMap[MI].insert(lookupTable[dstReg].second); + } + } + setAsmPrinterFlags(MI, curRes); +} + +// In this case we want to handle a load instruction. +static void +parseLoadInst( + const AMDILTargetMachine *ATM, + InstPMap &InstToPtrMap, + PtrIMap &PtrToInstMap, + FIPMap &FIToPtrMap, + RVPVec &lookupTable, + CPoolSet &cpool, + BlockCacheableInfo &bci, + MachineInstr *MI, + bool mDebug) +{ + assert(isLoadInst(ATM->getInstrInfo(), MI) && "Only a load instruction can be parsed by " + "the parseLoadInst function."); + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(MI, curRes); + unsigned dstReg = MI->getOperand(0).getReg(); + unsigned idx = 0; + const Value *basePtr = NULL; + if (MI->getOperand(1).isReg()) { + idx = MI->getOperand(1).getReg(); + basePtr = lookupTable[idx].second; + // If we don't know what value the register + // is assigned to, then we need to special case + // this instruction. + } else if (MI->getOperand(1).isFI()) { + idx = MI->getOperand(1).getIndex(); + lookupTable[dstReg] = FIToPtrMap[idx]; + } else if (MI->getOperand(1).isCPI()) { + cpool.insert(MI); + } + // If we are a hardware local, then we don't need to track as there + // is only one resource ID that we need to know about, so we + // map it using allocateDefaultID, which maps it to the default. + // This is also the case for REGION_ADDRESS and PRIVATE_ADDRESS. + if (isLRPInst(MI, ATM) || !basePtr) { + allocateDefaultID(ATM, curRes, MI, mDebug); + return; + } + // We have a load instruction so we map this instruction + // to the pointer and insert it into the set of known + // load instructions. + InstToPtrMap[MI].insert(basePtr); + PtrToInstMap[basePtr].push_back(MI); + + if (isGlobalInst(ATM->getInstrInfo(), MI)) { + // Add to the cacheable set for the block. If there was a store earlier + // in the block, this call won't actually add it to the cacheable set. + bci.addPossiblyCacheableInst(ATM, MI); + } + + if (mDebug) { + dbgs() << "Assigning instruction to pointer "; + dbgs() << basePtr->getName() << ". Inst: "; + MI->dump(); + } + detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true, + idx, dstReg, mDebug); +} + +// In this case we want to handle a store instruction. +static void +parseStoreInst( + const AMDILTargetMachine *ATM, + InstPMap &InstToPtrMap, + PtrIMap &PtrToInstMap, + FIPMap &FIToPtrMap, + RVPVec &lookupTable, + CPoolSet &cpool, + BlockCacheableInfo &bci, + MachineInstr *MI, + ByteSet &bytePtrs, + ConflictSet &conflictPtrs, + bool mDebug) +{ + assert(isStoreInst(ATM->getInstrInfo(), MI) && "Only a store instruction can be parsed by " + "the parseStoreInst function."); + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(MI, curRes); + unsigned dstReg = MI->getOperand(0).getReg(); + + // If the data part of the store instruction is known to + // be a pointer, then we need to mark this pointer as being + // a byte pointer. This is the conservative case that needs + // to be handled correctly. + if (lookupTable[dstReg].second && lookupTable[dstReg].first != ~0U) { + curRes.bits.ConflictPtr = 1; + if (mDebug) { + dbgs() << "Found a case where the pointer is being stored!\n"; + MI->dump(); + dbgs() << "Pointer is "; + lookupTable[dstReg].second->print(dbgs()); + dbgs() << "\n"; + } + //PtrToInstMap[lookupTable[dstReg].second].push_back(MI); + if (lookupTable[dstReg].second->getType()->isPointerTy()) { + conflictPtrs.insert(lookupTable[dstReg].second); + } + } + + // Before we go through the special cases, for the cacheable information + // all we care is if the store if global or not. + if (!isLRPInst(MI, ATM)) { + bci.setReachesExit(); + } + + // If the address is not a register address, + // then we need to lower it as an unknown id. + if (!MI->getOperand(1).isReg()) { + if (MI->getOperand(1).isCPI()) { + if (mDebug) { + dbgs() << "Found an instruction with a CPI index #" + << MI->getOperand(1).getIndex() << "!\n"; + } + cpool.insert(MI); + } else if (MI->getOperand(1).isFI()) { + if (mDebug) { + dbgs() << "Found an instruction with a frame index #" + << MI->getOperand(1).getIndex() << "!\n"; + } + // If we are a frame index and we are storing a pointer there, lets + // go ahead and assign the pointer to the location within the frame + // index map so that we can get the value out later. + FIToPtrMap[MI->getOperand(1).getIndex()] = lookupTable[dstReg]; + } + + allocateDefaultID(ATM, curRes, MI, mDebug); + return; + } + unsigned reg = MI->getOperand(1).getReg(); + // If we don't know what value the register + // is assigned to, then we need to special case + // this instruction. + if (!lookupTable[reg].second) { + allocateDefaultID(ATM, curRes, MI, mDebug); + return; + } + // const Value *basePtr = lookupTable[reg].second; + // If we are a hardware local, then we don't need to track as there + // is only one resource ID that we need to know about, so we + // map it using allocateDefaultID, which maps it to the default. + // This is also the case for REGION_ADDRESS and PRIVATE_ADDRESS. + if (isLRPInst(MI, ATM)) { + allocateDefaultID(ATM, curRes, MI, mDebug); + return; + } + + // We have a store instruction so we map this instruction + // to the pointer and insert it into the set of known + // store instructions. + InstToPtrMap[MI].insert(lookupTable[reg].second); + PtrToInstMap[lookupTable[reg].second].push_back(MI); + uint16_t RegClass = MI->getDesc().OpInfo[0].RegClass; + switch (RegClass) { + default: + break; + case AMDIL::GPRI8RegClassID: + case AMDIL::GPRV2I8RegClassID: + case AMDIL::GPRI16RegClassID: + if (usesGlobal(ATM, MI)) { + if (mDebug) { + dbgs() << "Annotating instruction as Byte Store. Inst: "; + MI->dump(); + } + curRes.bits.ByteStore = 1; + setAsmPrinterFlags(MI, curRes); + const PointerType *PT = dyn_cast<PointerType>( + lookupTable[reg].second->getType()); + if (PT) { + bytePtrs.insert(lookupTable[reg].second); + } + } + break; + }; + // If we are a truncating store, then we need to determine the + // size of the pointer that we are truncating to, and if we + // are less than 32 bits, we need to mark the pointer as a + // byte store pointer. + switch (MI->getOpcode()) { + case AMDIL::GLOBALTRUNCSTORE_i16i8: + case AMDIL::GLOBALTRUNCSTORE_v2i16i8: + case AMDIL::GLOBALTRUNCSTORE_i32i8: + case AMDIL::GLOBALTRUNCSTORE_v2i32i8: + case AMDIL::GLOBALTRUNCSTORE_i64i8: + case AMDIL::GLOBALTRUNCSTORE_v2i64i8: + case AMDIL::GLOBALTRUNCSTORE_i32i16: + case AMDIL::GLOBALTRUNCSTORE_i64i16: + case AMDIL::GLOBALSTORE_i8: + case AMDIL::GLOBALSTORE_i16: + curRes.bits.ByteStore = 1; + setAsmPrinterFlags(MI, curRes); + bytePtrs.insert(lookupTable[reg].second); + break; + default: + break; + } + + if (mDebug) { + dbgs() << "Assigning instruction to pointer "; + dbgs() << lookupTable[reg].second->getName() << ". Inst: "; + MI->dump(); + } + detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true, + reg, dstReg, mDebug); +} + +// In this case we want to handle an atomic instruction. +static void +parseAtomicInst( + const AMDILTargetMachine *ATM, + InstPMap &InstToPtrMap, + PtrIMap &PtrToInstMap, + RVPVec &lookupTable, + BlockCacheableInfo &bci, + MachineInstr *MI, + ByteSet &bytePtrs, + bool mDebug) +{ + assert(isAtomicInst(ATM->getInstrInfo(), MI) && "Only an atomic instruction can be parsed by " + "the parseAtomicInst function."); + AMDILAS::InstrResEnc curRes; + unsigned dstReg = MI->getOperand(0).getReg(); + unsigned reg = 0; + getAsmPrinterFlags(MI, curRes); + unsigned numOps = MI->getNumOperands(); + bool found = false; + while (--numOps) { + MachineOperand &Op = MI->getOperand(numOps); + if (!Op.isReg()) { + continue; + } + reg = Op.getReg(); + // If the register is not known to be owned by a pointer + // then we can ignore it + if (!lookupTable[reg].second) { + continue; + } + // if the pointer is known to be local, region or private, then we + // can ignore it. Although there are no private atomics, we still + // do this check so we don't have to write a new function to check + // for only local and region. + if (isLRPInst(MI, ATM)) { + continue; + } + found = true; + InstToPtrMap[MI].insert(lookupTable[reg].second); + PtrToInstMap[lookupTable[reg].second].push_back(MI); + + // We now know we have an atomic operation on global memory. + // This is a store so must update the cacheable information. + bci.setReachesExit(); + + // Only do if have SC with arena atomic bug fix (EPR 326883). + // TODO: enable once SC with EPR 326883 has been promoted to CAL. + if (ATM->getSubtargetImpl()->calVersion() >= CAL_VERSION_SC_150) { + // Force pointers that are used by atomics to be in the arena. + // If they were allowed to be accessed as RAW they would cause + // all access to use the slow complete path. + if (mDebug) { + dbgs() << __LINE__ << ": Setting byte store bit on atomic instruction: "; + MI->dump(); + } + curRes.bits.ByteStore = 1; + bytePtrs.insert(lookupTable[reg].second); + } + + if (mDebug) { + dbgs() << "Assigning instruction to pointer "; + dbgs() << lookupTable[reg].second->getName() << ". Inst: "; + MI->dump(); + } + detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true, + reg, dstReg, mDebug); + } + if (!found) { + allocateDefaultID(ATM, curRes, MI, mDebug); + } +} +// In this case we want to handle a counter instruction. +static void +parseAppendInst( + const AMDILTargetMachine *ATM, + InstPMap &InstToPtrMap, + PtrIMap &PtrToInstMap, + RVPVec &lookupTable, + MachineInstr *MI, + bool mDebug) +{ + assert(isAppendInst(ATM->getInstrInfo(), MI) && "Only an atomic counter instruction can be " + "parsed by the parseAppendInst function."); + AMDILAS::InstrResEnc curRes; + unsigned dstReg = MI->getOperand(0).getReg(); + unsigned reg = MI->getOperand(1).getReg(); + getAsmPrinterFlags(MI, curRes); + // If the register is not known to be owned by a pointer + // then we set it to the default + if (!lookupTable[reg].second) { + allocateDefaultID(ATM, curRes, MI, mDebug); + return; + } + InstToPtrMap[MI].insert(lookupTable[reg].second); + PtrToInstMap[lookupTable[reg].second].push_back(MI); + if (mDebug) { + dbgs() << "Assigning instruction to pointer "; + dbgs() << lookupTable[reg].second->getName() << ". Inst: "; + MI->dump(); + } + detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true, + reg, dstReg, mDebug); +} +// In this case we want to handle an Image instruction. +static void +parseImageInst( + const AMDILTargetMachine *ATM, + InstPMap &InstToPtrMap, + PtrIMap &PtrToInstMap, + FIPMap &FIToPtrMap, + RVPVec &lookupTable, + MachineInstr *MI, + bool mDebug) +{ + assert(isImageInst(ATM->getInstrInfo(), MI) && "Only an image instruction can be " + "parsed by the parseImageInst function."); + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(MI, curRes); + // AMDILKernelManager *km = + // (AMDILKernelManager *)ATM->getSubtargetImpl()->getKernelManager(); + AMDILMachineFunctionInfo *mMFI = MI->getParent()->getParent() + ->getInfo<AMDILMachineFunctionInfo>(); + if (MI->getOpcode() == AMDIL::IMAGE2D_WRITE + || MI->getOpcode() == AMDIL::IMAGE3D_WRITE) { + unsigned dstReg = MI->getOperand(0).getReg(); + curRes.bits.ResourceID = lookupTable[dstReg].first & 0xFFFF; + curRes.bits.isImage = 1; + InstToPtrMap[MI].insert(lookupTable[dstReg].second); + PtrToInstMap[lookupTable[dstReg].second].push_back(MI); + if (mDebug) { + dbgs() << "Assigning instruction to pointer "; + dbgs() << lookupTable[dstReg].second->getName() << ". Inst: "; + MI->dump(); + } + } else { + // unsigned dstReg = MI->getOperand(0).getReg(); + unsigned reg = MI->getOperand(1).getReg(); + + // If the register is not known to be owned by a pointer + // then we set it to the default + if (!lookupTable[reg].second) { + assert(!"This should not happen for images!"); + allocateDefaultID(ATM, curRes, MI, mDebug); + return; + } + InstToPtrMap[MI].insert(lookupTable[reg].second); + PtrToInstMap[lookupTable[reg].second].push_back(MI); + if (mDebug) { + dbgs() << "Assigning instruction to pointer "; + dbgs() << lookupTable[reg].second->getName() << ". Inst: "; + MI->dump(); + } + switch (MI->getOpcode()) { + case AMDIL::IMAGE2D_READ: + case AMDIL::IMAGE2D_READ_UNNORM: + case AMDIL::IMAGE3D_READ: + case AMDIL::IMAGE3D_READ_UNNORM: + curRes.bits.ResourceID = lookupTable[reg].first & 0xFFFF; + if (MI->getOperand(3).isReg()) { + // Our sampler is not a literal value. + char buffer[256]; + memset(buffer, 0, sizeof(buffer)); + std::string sampler_name = ""; + unsigned reg = MI->getOperand(3).getReg(); + if (lookupTable[reg].second) { + sampler_name = lookupTable[reg].second->getName(); + } + if (sampler_name.empty()) { + sampler_name = findSamplerName(MI, lookupTable, FIToPtrMap, ATM); + } + uint32_t val = mMFI->addSampler(sampler_name, ~0U); + if (mDebug) { + dbgs() << "Mapping kernel sampler " << sampler_name + << " to sampler number " << val << " for Inst:\n"; + MI->dump(); + } + MI->getOperand(3).ChangeToImmediate(val); + } else { + // Our sampler is known at runtime as a literal, lets make sure + // that the metadata for it is known. + char buffer[256]; + memset(buffer, 0, sizeof(buffer)); + sprintf(buffer,"_%d", (int32_t)MI->getOperand(3).getImm()); + std::string sampler_name = std::string("unknown") + std::string(buffer); + uint32_t val = mMFI->addSampler(sampler_name, MI->getOperand(3).getImm()); + if (mDebug) { + dbgs() << "Mapping internal sampler " << sampler_name + << " to sampler number " << val << " for Inst:\n"; + MI->dump(); + } + MI->getOperand(3).setImm(val); + } + break; + case AMDIL::IMAGE2D_INFO0: + case AMDIL::IMAGE3D_INFO0: + curRes.bits.ResourceID = lookupTable[reg].first >> 16; + break; + case AMDIL::IMAGE2D_INFO1: + case AMDIL::IMAGE2DA_INFO1: + curRes.bits.ResourceID = (lookupTable[reg].first >> 16) + 1; + break; + }; + curRes.bits.isImage = 1; + } + setAsmPrinterFlags(MI, curRes); +} +// This case handles the rest of the instructions +static void +parseInstruction( + const AMDILTargetMachine *ATM, + InstPMap &InstToPtrMap, + PtrIMap &PtrToInstMap, + RVPVec &lookupTable, + CPoolSet &cpool, + MachineInstr *MI, + bool mDebug) +{ + assert(!isAtomicInst(ATM->getInstrInfo(), MI) && !isStoreInst(ATM->getInstrInfo(), MI) && !isLoadInst(ATM->getInstrInfo(), MI) && + !isAppendInst(ATM->getInstrInfo(), MI) && !isImageInst(ATM->getInstrInfo(), MI) && + "Atomic/Load/Store/Append/Image insts should not be handled here!"); + unsigned numOps = MI->getNumOperands(); + // If we don't have any operands, we can skip this instruction + if (!numOps) { + return; + } + // if the dst operand is not a register, then we can skip + // this instruction. That is because we are probably a branch + // or jump instruction. + if (!MI->getOperand(0).isReg()) { + return; + } + // If we are a LOADCONST_i32, we might be a sampler, so we need + // to propogate the LOADCONST to IMAGE[2|3]D_READ instructions. + if (MI->getOpcode() == AMDIL::LOADCONST_i32) { + uint32_t val = MI->getOperand(1).getImm(); + MachineOperand* oldPtr = &MI->getOperand(0); + MachineOperand* moPtr = oldPtr->getNextOperandForReg(); + while (moPtr) { + oldPtr = moPtr; + moPtr = oldPtr->getNextOperandForReg(); + switch (oldPtr->getParent()->getOpcode()) { + default: + break; + case AMDIL::IMAGE2D_READ: + case AMDIL::IMAGE2D_READ_UNNORM: + case AMDIL::IMAGE3D_READ: + case AMDIL::IMAGE3D_READ_UNNORM: + if (mDebug) { + dbgs() << "Found a constant sampler for image read inst: "; + oldPtr->getParent()->print(dbgs()); + } + oldPtr->ChangeToImmediate(val); + break; + } + } + } + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(MI, curRes); + unsigned dstReg = MI->getOperand(0).getReg(); + unsigned reg = 0; + while (--numOps) { + MachineOperand &Op = MI->getOperand(numOps); + // if the operand is not a register, then we can ignore it + if (!Op.isReg()) { + if (Op.isCPI()) { + cpool.insert(MI); + } + continue; + } + reg = Op.getReg(); + // If the register is not known to be owned by a pointer + // then we can ignore it + if (!lookupTable[reg].second) { + continue; + } + detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, false, + reg, dstReg, mDebug); + + } +} + +// This function parses the basic block and based on the instruction type, +// calls the function to finish parsing the instruction. +static void +parseBasicBlock( + const AMDILTargetMachine *ATM, + MachineBasicBlock *MB, + InstPMap &InstToPtrMap, + PtrIMap &PtrToInstMap, + FIPMap &FIToPtrMap, + RVPVec &lookupTable, + ByteSet &bytePtrs, + ConflictSet &conflictPtrs, + CPoolSet &cpool, + BlockCacheableInfo &bci, + bool mDebug) +{ + for (MachineBasicBlock::iterator mbb = MB->begin(), mbe = MB->end(); + mbb != mbe; ++mbb) { + MachineInstr *MI = mbb; + if (MI->getOpcode() == AMDIL::CALL) { + parseCall(ATM, InstToPtrMap, PtrToInstMap, lookupTable, + mbb, mbe, mDebug); + } + else if (isLoadInst(ATM->getInstrInfo(), MI)) { + parseLoadInst(ATM, InstToPtrMap, PtrToInstMap, + FIToPtrMap, lookupTable, cpool, bci, MI, mDebug); + } else if (isStoreInst(ATM->getInstrInfo(), MI)) { + parseStoreInst(ATM, InstToPtrMap, PtrToInstMap, + FIToPtrMap, lookupTable, cpool, bci, MI, bytePtrs, conflictPtrs, mDebug); + } else if (isAtomicInst(ATM->getInstrInfo(), MI)) { + parseAtomicInst(ATM, InstToPtrMap, PtrToInstMap, + lookupTable, bci, MI, bytePtrs, mDebug); + } else if (isAppendInst(ATM->getInstrInfo(), MI)) { + parseAppendInst(ATM, InstToPtrMap, PtrToInstMap, + lookupTable, MI, mDebug); + } else if (isImageInst(ATM->getInstrInfo(), MI)) { + parseImageInst(ATM, InstToPtrMap, PtrToInstMap, + FIToPtrMap, lookupTable, MI, mDebug); + } else { + parseInstruction(ATM, InstToPtrMap, PtrToInstMap, + lookupTable, cpool, MI, mDebug); + } + } +} + +// Follows the Reverse Post Order Traversal of the basic blocks to +// determine which order to parse basic blocks in. +void +parseFunction( + const AMDILPointerManager *PM, + const AMDILTargetMachine *ATM, + MachineFunction &MF, + InstPMap &InstToPtrMap, + PtrIMap &PtrToInstMap, + FIPMap &FIToPtrMap, + RVPVec &lookupTable, + ByteSet &bytePtrs, + ConflictSet &conflictPtrs, + CPoolSet &cpool, + MBBCacheableMap &mbbCacheable, + bool mDebug) +{ + if (mDebug) { + MachineDominatorTree *dominatorTree = &PM + ->getAnalysis<MachineDominatorTree>(); + dominatorTree->dump(); + } + + std::list<MachineBasicBlock*> prop_worklist; + + ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); + for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator + curBlock = RPOT.begin(), endBlock = RPOT.end(); + curBlock != endBlock; ++curBlock) { + MachineBasicBlock *MB = (*curBlock); + BlockCacheableInfo &bci = mbbCacheable[MB]; + for (MachineBasicBlock::pred_iterator mbbit = MB->pred_begin(), + mbbitend = MB->pred_end(); + mbbit != mbbitend; + mbbit++) { + MBBCacheableMap::const_iterator mbbcmit = mbbCacheable.find(*mbbit); + if (mbbcmit != mbbCacheable.end() && + mbbcmit->second.storeReachesExit()) { + bci.setReachesTop(); + break; + } + } + + if (mDebug) { + dbgs() << "[BlockOrdering] Parsing CurrentBlock: " + << MB->getNumber() << "\n"; + } + parseBasicBlock(ATM, MB, InstToPtrMap, PtrToInstMap, + FIToPtrMap, lookupTable, bytePtrs, conflictPtrs, cpool, bci, mDebug); + + if (bci.storeReachesExit()) + prop_worklist.push_back(MB); + + if (mDebug) { + dbgs() << "BCI info: Top: " << bci.storeReachesTop() << " Exit: " + << bci.storeReachesExit() << "\n Instructions:\n"; + for (CacheableInstrSet::const_iterator cibit = bci.cacheableBegin(), + cibitend = bci.cacheableEnd(); + cibit != cibitend; + cibit++) + { + (*cibit)->dump(); + } + } + } + + // This loop pushes any "storeReachesExit" flags into successor + // blocks until the flags have been fully propagated. This will + // ensure that blocks that have reachable stores due to loops + // are labeled appropriately. + while (!prop_worklist.empty()) { + MachineBasicBlock *wlb = prop_worklist.front(); + prop_worklist.pop_front(); + for (MachineBasicBlock::succ_iterator mbbit = wlb->succ_begin(), + mbbitend = wlb->succ_end(); + mbbit != mbbitend; + mbbit++) + { + BlockCacheableInfo &blockCache = mbbCacheable[*mbbit]; + if (!blockCache.storeReachesTop()) { + blockCache.setReachesTop(); + prop_worklist.push_back(*mbbit); + } + if (mDebug) { + dbgs() << "BCI Prop info: " << (*mbbit)->getNumber() << " Top: " + << blockCache.storeReachesTop() << " Exit: " + << blockCache.storeReachesExit() + << "\n"; + } + } + } +} + +// Helper function that dumps to dbgs() information about +// a pointer set. + void +dumpPointers(AppendSet &Ptrs, const char *str) +{ + if (Ptrs.empty()) { + return; + } + dbgs() << "[Dump]" << str << " found: " << "\n"; + for (AppendSet::iterator sb = Ptrs.begin(); + sb != Ptrs.end(); ++sb) { + (*sb)->dump(); + } + dbgs() << "\n"; +} +// Helper function that dumps to dbgs() information about +// a pointer set. + void +dumpPointers(PtrSet &Ptrs, const char *str) +{ + if (Ptrs.empty()) { + return; + } + dbgs() << "[Dump]" << str << " found: " << "\n"; + for (PtrSet::iterator sb = Ptrs.begin(); + sb != Ptrs.end(); ++sb) { + (*sb)->dump(); + } + dbgs() << "\n"; +} +// Function that detects all the conflicting pointers and adds +// the pointers that are detected to the conflict set, otherwise +// they are added to the raw or byte set based on their usage. +void +detectConflictingPointers( + const AMDILTargetMachine *ATM, + InstPMap &InstToPtrMap, + ByteSet &bytePtrs, + RawSet &rawPtrs, + ConflictSet &conflictPtrs, + bool mDebug) +{ + if (InstToPtrMap.empty()) { + return; + } + PtrSet aliasedPtrs; + const AMDILSubtarget *STM = ATM->getSubtargetImpl(); + for (InstPMap::iterator + mapIter = InstToPtrMap.begin(), iterEnd = InstToPtrMap.end(); + mapIter != iterEnd; ++mapIter) { + if (mDebug) { + dbgs() << "Instruction: "; + (mapIter)->first->dump(); + } + MachineInstr* MI = mapIter->first; + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(MI, curRes); + if (curRes.bits.isImage) { + continue; + } + bool byte = false; + // We might have a case where more than 1 pointers is going to the same + // I/O instruction + if (mDebug) { + dbgs() << "Base Pointer[s]:\n"; + } + for (PtrSet::iterator cfIter = mapIter->second.begin(), + cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) { + if (mDebug) { + (*cfIter)->dump(); + } + if (bytePtrs.count(*cfIter)) { + if (mDebug) { + dbgs() << "Byte pointer found!\n"; + } + byte = true; + break; + } + } + if (byte) { + for (PtrSet::iterator cfIter = mapIter->second.begin(), + cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) { + const Value *ptr = (*cfIter); + if (isLRPInst(mapIter->first, ATM)) { + // We don't need to deal with pointers to local/region/private + // memory regions + continue; + } + if (mDebug) { + dbgs() << "Adding pointer " << (ptr)->getName() + << " to byte set!\n"; + } + const PointerType *PT = dyn_cast<PointerType>(ptr->getType()); + if (PT) { + bytePtrs.insert(ptr); + } + } + } else { + for (PtrSet::iterator cfIter = mapIter->second.begin(), + cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) { + const Value *ptr = (*cfIter); + // bool aliased = false; + if (isLRPInst(mapIter->first, ATM)) { + // We don't need to deal with pointers to local/region/private + // memory regions + continue; + } + const Argument *arg = dyn_cast_or_null<Argument>(*cfIter); + if (!arg) { + continue; + } + if (!STM->device()->isSupported(AMDILDeviceInfo::NoAlias) + && !arg->hasNoAliasAttr()) { + if (mDebug) { + dbgs() << "Possible aliased pointer found!\n"; + } + aliasedPtrs.insert(ptr); + } + if (mapIter->second.size() > 1) { + if (mDebug) { + dbgs() << "Adding pointer " << ptr->getName() + << " to conflict set!\n"; + } + const PointerType *PT = dyn_cast<PointerType>(ptr->getType()); + if (PT) { + conflictPtrs.insert(ptr); + } + } + if (mDebug) { + dbgs() << "Adding pointer " << ptr->getName() + << " to raw set!\n"; + } + const PointerType *PT = dyn_cast<PointerType>(ptr->getType()); + if (PT) { + rawPtrs.insert(ptr); + } + } + } + if (mDebug) { + dbgs() << "\n"; + } + } + // If we have any aliased pointers and byte pointers exist, + // then make sure that all of the aliased pointers are + // part of the byte pointer set. + if (!bytePtrs.empty()) { + for (PtrSet::iterator aIter = aliasedPtrs.begin(), + aEnd = aliasedPtrs.end(); aIter != aEnd; ++aIter) { + if (mDebug) { + dbgs() << "Moving " << (*aIter)->getName() + << " from raw to byte.\n"; + } + bytePtrs.insert(*aIter); + rawPtrs.erase(*aIter); + } + } +} +// Function that detects aliased constant pool operations. +void +detectAliasedCPoolOps( + TargetMachine &TM, + CPoolSet &cpool, + bool mDebug + ) +{ + const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); + if (mDebug && !cpool.empty()) { + dbgs() << "Instructions w/ CPool Ops: \n"; + } + // The algorithm for detecting aliased cpool is as follows. + // For each instruction that has a cpool argument + // follow def-use chain + // if instruction is a load and load is a private load, + // switch to constant pool load + for (CPoolSet::iterator cpb = cpool.begin(), cpe = cpool.end(); + cpb != cpe; ++cpb) { + if (mDebug) { + (*cpb)->dump(); + } + std::queue<MachineInstr*> queue; + std::set<MachineInstr*> visited; + queue.push(*cpb); + MachineInstr *cur; + while (!queue.empty()) { + cur = queue.front(); + queue.pop(); + if (visited.count(cur)) { + continue; + } + if (isLoadInst(TM.getInstrInfo(), cur) && isPrivateInst(TM.getInstrInfo(), cur)) { + // If we are a private load and the register is + // used in the address register, we need to + // switch from private to constant pool load. + if (mDebug) { + dbgs() << "Found an instruction that is a private load " + << "but should be a constant pool load.\n"; + cur->print(dbgs()); + dbgs() << "\n"; + } + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(cur, curRes); + curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::GLOBAL_ID); + curRes.bits.ConflictPtr = 1; + setAsmPrinterFlags(cur, curRes); + cur->setDesc(TM.getInstrInfo()->get( + (cur->getOpcode() - AMDIL::PRIVATEAEXTLOAD_f32) + + AMDIL::CPOOLAEXTLOAD_f32)); + } else { + if (cur->getOperand(0).isReg()) { + MachineOperand* ptr = cur->getOperand(0).getNextOperandForReg(); + while (ptr && !ptr->isDef() && ptr->isReg()) { + queue.push(ptr->getParent()); + ptr = ptr->getNextOperandForReg(); + } + } + } + visited.insert(cur); + } + } +} +// Function that detects fully cacheable pointers. Fully cacheable pointers +// are pointers that have no writes to them and -fno-alias is specified. +void +detectFullyCacheablePointers( + const AMDILTargetMachine *ATM, + PtrIMap &PtrToInstMap, + RawSet &rawPtrs, + CacheableSet &cacheablePtrs, + ConflictSet &conflictPtrs, + bool mDebug + ) +{ + if (PtrToInstMap.empty()) { + return; + } + const AMDILSubtarget *STM + = ATM->getSubtargetImpl(); + // 4XXX hardware doesn't support cached uav opcodes and we assume + // no aliasing for this to work. Also in debug mode we don't do + // any caching. + if (STM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX + || !STM->device()->isSupported(AMDILDeviceInfo::CachedMem)) { + return; + } + if (STM->device()->isSupported(AMDILDeviceInfo::NoAlias)) { + for (PtrIMap::iterator mapIter = PtrToInstMap.begin(), + iterEnd = PtrToInstMap.end(); mapIter != iterEnd; ++mapIter) { + if (mDebug) { + dbgs() << "Instruction: "; + mapIter->first->dump(); + } + // Skip the pointer if we have already detected it. + if (cacheablePtrs.count(mapIter->first)) { + continue; + } + bool cacheable = true; + for (std::vector<MachineInstr*>::iterator + miBegin = mapIter->second.begin(), + miEnd = mapIter->second.end(); miBegin != miEnd; ++miBegin) { + if (isStoreInst(ATM->getInstrInfo(), *miBegin) || + isImageInst(ATM->getInstrInfo(), *miBegin) || + isAtomicInst(ATM->getInstrInfo(), *miBegin)) { + cacheable = false; + break; + } + } + // we aren't cacheable, so lets move on to the next instruction + if (!cacheable) { + continue; + } + // If we are in the conflict set, lets move to the next instruction + // FIXME: we need to check to see if the pointers that conflict with + // the current pointer are also cacheable. If they are, then add them + // to the cacheable list and not fail. + if (conflictPtrs.count(mapIter->first)) { + continue; + } + // Otherwise if we have no stores and no conflicting pointers, we can + // be added to the cacheable set. + if (mDebug) { + dbgs() << "Adding pointer " << mapIter->first->getName(); + dbgs() << " to cached set!\n"; + } + const PointerType *PT = dyn_cast<PointerType>(mapIter->first->getType()); + if (PT) { + cacheablePtrs.insert(mapIter->first); + } + } + } +} + +// Are any of the pointers in PtrSet also in the BytePtrs or the CachePtrs? +static bool +ptrSetIntersectsByteOrCache( + PtrSet &cacheSet, + ByteSet &bytePtrs, + CacheableSet &cacheablePtrs + ) +{ + for (PtrSet::const_iterator psit = cacheSet.begin(), + psitend = cacheSet.end(); + psit != psitend; + psit++) { + if (bytePtrs.find(*psit) != bytePtrs.end() || + cacheablePtrs.find(*psit) != cacheablePtrs.end()) { + return true; + } + } + return false; +} + +// Function that detects which instructions are cacheable even if +// all instructions of the pointer are not cacheable. The resulting +// set of instructions will not contain Ptrs that are in the cacheable +// ptr set (under the assumption they will get marked cacheable already) +// or pointers in the byte set, since they are not cacheable. +void +detectCacheableInstrs( + MBBCacheableMap &bbCacheable, + InstPMap &InstToPtrMap, + CacheableSet &cacheablePtrs, + ByteSet &bytePtrs, + CacheableInstrSet &cacheableSet, + bool mDebug + ) + +{ + for (MBBCacheableMap::const_iterator mbbcit = bbCacheable.begin(), + mbbcitend = bbCacheable.end(); + mbbcit != mbbcitend; + mbbcit++) { + for (CacheableInstrSet::const_iterator bciit + = mbbcit->second.cacheableBegin(), + bciitend + = mbbcit->second.cacheableEnd(); + bciit != bciitend; + bciit++) { + if (!ptrSetIntersectsByteOrCache(InstToPtrMap[*bciit], + bytePtrs, + cacheablePtrs)) { + cacheableSet.insert(*bciit); + } + } + } +} +// This function annotates the cacheable pointers with the +// CacheableRead bit. The cacheable read bit is set +// when the number of write images is not equal to the max +// or if the default RAW_UAV_ID is equal to 11. The first +// condition means that there is a raw uav between 0 and 7 +// that is available for cacheable reads and the second +// condition means that UAV 11 is available for cacheable +// reads. +void +annotateCacheablePtrs( + TargetMachine &TM, + PtrIMap &PtrToInstMap, + CacheableSet &cacheablePtrs, + ByteSet &bytePtrs, + uint32_t numWriteImages, + bool mDebug) +{ + const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); + // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager(); + PtrSet::iterator siBegin, siEnd; + std::vector<MachineInstr*>::iterator miBegin, miEnd; + AMDILMachineFunctionInfo *mMFI = NULL; + // First we can check the cacheable pointers + for (siBegin = cacheablePtrs.begin(), siEnd = cacheablePtrs.end(); + siBegin != siEnd; ++siBegin) { + assert(!bytePtrs.count(*siBegin) && "Found a cacheable pointer " + "that also exists as a byte pointer!"); + for (miBegin = PtrToInstMap[*siBegin].begin(), + miEnd = PtrToInstMap[*siBegin].end(); + miBegin != miEnd; ++miBegin) { + if (mDebug) { + dbgs() << "Annotating pointer as cacheable. Inst: "; + (*miBegin)->dump(); + } + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(*miBegin, curRes); + assert(!curRes.bits.ByteStore && "No cacheable pointers should have the " + "byte Store flag set!"); + // If UAV11 is enabled, then we can enable cached reads. + if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) { + curRes.bits.CacheableRead = 1; + curRes.bits.ResourceID = 11; + setAsmPrinterFlags(*miBegin, curRes); + if (!mMFI) { + mMFI = (*miBegin)->getParent()->getParent() + ->getInfo<AMDILMachineFunctionInfo>(); + } + mMFI->uav_insert(curRes.bits.ResourceID); + } + } + } +} + +// A byte pointer is a pointer that along the pointer path has a +// byte store assigned to it. +void +annotateBytePtrs( + TargetMachine &TM, + PtrIMap &PtrToInstMap, + ByteSet &bytePtrs, + RawSet &rawPtrs, + bool mDebug + ) +{ + const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); + AMDILKernelManager *KM = STM->getKernelManager(); + PtrSet::iterator siBegin, siEnd; + std::vector<MachineInstr*>::iterator miBegin, miEnd; + uint32_t arenaID = STM->device() + ->getResourceID(AMDILDevice::ARENA_UAV_ID); + if (STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) { + arenaID = ARENA_SEGMENT_RESERVED_UAVS + 1; + } + AMDILMachineFunctionInfo *mMFI = NULL; + for (siBegin = bytePtrs.begin(), siEnd = bytePtrs.end(); + siBegin != siEnd; ++siBegin) { + const Value* val = (*siBegin); + const PointerType *PT = dyn_cast<PointerType>(val->getType()); + if (!PT) { + continue; + } + const Argument *curArg = dyn_cast<Argument>(val); + assert(!rawPtrs.count(*siBegin) && "Found a byte pointer " + "that also exists as a raw pointer!"); + bool arenaInc = false; + for (miBegin = PtrToInstMap[*siBegin].begin(), + miEnd = PtrToInstMap[*siBegin].end(); + miBegin != miEnd; ++miBegin) { + if (mDebug) { + dbgs() << "Annotating pointer as arena. Inst: "; + (*miBegin)->dump(); + } + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(*miBegin, curRes); + + if (STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem) + && PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) { + // If hardware constant mem is enabled, then we need to + // get the constant pointer CB number and use that to specify + // the resource ID. + AMDILGlobalManager *GM = STM->getGlobalManager(); + const StringRef funcName = (*miBegin)->getParent()->getParent() + ->getFunction()->getName(); + if (GM->isKernel(funcName)) { + const kernel &krnl = GM->getKernel(funcName); + curRes.bits.ResourceID = GM->getConstPtrCB(krnl, + (*siBegin)->getName()); + curRes.bits.HardwareInst = 1; + } else { + curRes.bits.ResourceID = STM->device() + ->getResourceID(AMDILDevice::CONSTANT_ID); + } + } else if (STM->device()->usesHardware(AMDILDeviceInfo::LocalMem) + && PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) { + // If hardware local mem is enabled, get the local mem ID from + // the device to use as the ResourceID + curRes.bits.ResourceID = STM->device() + ->getResourceID(AMDILDevice::LDS_ID); + if (isAtomicInst(TM.getInstrInfo(), *miBegin)) { + assert(curRes.bits.ResourceID && "Atomic resource ID " + "cannot be non-zero!"); + (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) + .setImm(curRes.bits.ResourceID); + } + } else if (STM->device()->usesHardware(AMDILDeviceInfo::RegionMem) + && PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) { + // If hardware region mem is enabled, get the gds mem ID from + // the device to use as the ResourceID + curRes.bits.ResourceID = STM->device() + ->getResourceID(AMDILDevice::GDS_ID); + if (isAtomicInst(TM.getInstrInfo(), *miBegin)) { + assert(curRes.bits.ResourceID && "Atomic resource ID " + "cannot be non-zero!"); + (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) + .setImm(curRes.bits.ResourceID); + } + } else if (STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem) + && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { + curRes.bits.ResourceID = STM->device() + ->getResourceID(AMDILDevice::SCRATCH_ID); + } else { + if (mDebug) { + dbgs() << __LINE__ << ": Setting byte store bit on instruction: "; + (*miBegin)->print(dbgs()); + } + curRes.bits.ByteStore = 1; + curRes.bits.ResourceID = (curArg && curArg->hasNoAliasAttr()) ? arenaID + : STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID); + if (STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) { + arenaInc = true; + } + if (isAtomicInst(TM.getInstrInfo(), *miBegin) && + STM->device()->isSupported(AMDILDeviceInfo::ArenaUAV)) { + (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) + .setImm(curRes.bits.ResourceID); + // If we are an arena instruction, we need to switch the atomic opcode + // from the global version to the arena version. + MachineInstr *MI = *miBegin; + MI->setDesc( + TM.getInstrInfo()->get( + (MI->getOpcode() - AMDIL::ATOM_G_ADD) + AMDIL::ATOM_A_ADD)); + } + if (mDebug) { + dbgs() << "Annotating pointer as arena. Inst: "; + (*miBegin)->dump(); + } + } + setAsmPrinterFlags(*miBegin, curRes); + KM->setUAVID(*siBegin, curRes.bits.ResourceID); + if (!mMFI) { + mMFI = (*miBegin)->getParent()->getParent() + ->getInfo<AMDILMachineFunctionInfo>(); + } + mMFI->uav_insert(curRes.bits.ResourceID); + } + if (arenaInc) { + ++arenaID; + } + } +} +// An append pointer is a opaque object that has append instructions +// in its path. +void +annotateAppendPtrs( + TargetMachine &TM, + PtrIMap &PtrToInstMap, + AppendSet &appendPtrs, + bool mDebug) +{ + unsigned currentCounter = 0; + // const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); + // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager(); + MachineFunction *MF = NULL; + for (AppendSet::iterator asBegin = appendPtrs.begin(), + asEnd = appendPtrs.end(); asBegin != asEnd; ++asBegin) + { + bool usesWrite = false; + bool usesRead = false; + const Value* curVal = *asBegin; + if (mDebug) { + dbgs() << "Counter: " << curVal->getName() + << " assigned the counter " << currentCounter << "\n"; + } + for (std::vector<MachineInstr*>::iterator + miBegin = PtrToInstMap[curVal].begin(), + miEnd = PtrToInstMap[curVal].end(); miBegin != miEnd; ++miBegin) { + MachineInstr *MI = *miBegin; + if (!MF) { + MF = MI->getParent()->getParent(); + } + unsigned opcode = MI->getOpcode(); + switch (opcode) { + default: + if (mDebug) { + dbgs() << "Skipping instruction: "; + MI->dump(); + } + break; + case AMDIL::APPEND_ALLOC: + case AMDIL::APPEND_ALLOC_NORET: + usesWrite = true; + MI->getOperand(1).ChangeToImmediate(currentCounter); + if (mDebug) { + dbgs() << "Assing to counter " << currentCounter << " Inst: "; + MI->dump(); + } + break; + case AMDIL::APPEND_CONSUME: + case AMDIL::APPEND_CONSUME_NORET: + usesRead = true; + MI->getOperand(1).ChangeToImmediate(currentCounter); + if (mDebug) { + dbgs() << "Assing to counter " << currentCounter << " Inst: "; + MI->dump(); + } + break; + }; + } + if (usesWrite && usesRead && MF) { + MF->getInfo<AMDILMachineFunctionInfo>()->addErrorMsg( + amd::CompilerErrorMessage[INCORRECT_COUNTER_USAGE]); + } + ++currentCounter; + } +} +// A raw pointer is any pointer that does not have byte store in its path. +static void +annotateRawPtrs( + TargetMachine &TM, + PtrIMap &PtrToInstMap, + RawSet &rawPtrs, + ByteSet &bytePtrs, + uint32_t numWriteImages, + bool mDebug + ) +{ + const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); + AMDILKernelManager *KM = STM->getKernelManager(); + PtrSet::iterator siBegin, siEnd; + std::vector<MachineInstr*>::iterator miBegin, miEnd; + AMDILMachineFunctionInfo *mMFI = NULL; + + // Now all of the raw pointers will go to the raw uav. + for (siBegin = rawPtrs.begin(), siEnd = rawPtrs.end(); + siBegin != siEnd; ++siBegin) { + const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType()); + if (!PT) { + continue; + } + assert(!bytePtrs.count(*siBegin) && "Found a raw pointer " + " that also exists as a byte pointers!"); + for (miBegin = PtrToInstMap[*siBegin].begin(), + miEnd = PtrToInstMap[*siBegin].end(); + miBegin != miEnd; ++miBegin) { + if (mDebug) { + dbgs() << "Annotating pointer as raw. Inst: "; + (*miBegin)->dump(); + } + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(*miBegin, curRes); + if (!curRes.bits.ConflictPtr) { + assert(!curRes.bits.ByteStore + && "Found a instruction that is marked as " + "raw but has a byte store bit set!"); + } else if (curRes.bits.ConflictPtr) { + if (curRes.bits.ByteStore) { + curRes.bits.ByteStore = 0; + } + } + if (STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem) + && PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) { + // If hardware constant mem is enabled, then we need to + // get the constant pointer CB number and use that to specify + // the resource ID. + AMDILGlobalManager *GM = STM->getGlobalManager(); + const StringRef funcName = (*miBegin)->getParent()->getParent() + ->getFunction()->getName(); + if (GM->isKernel(funcName)) { + const kernel &krnl = GM->getKernel(funcName); + curRes.bits.ResourceID = GM->getConstPtrCB(krnl, + (*siBegin)->getName()); + curRes.bits.HardwareInst = 1; + } else { + curRes.bits.ResourceID = STM->device() + ->getResourceID(AMDILDevice::CONSTANT_ID); + } + } else if (STM->device()->usesHardware(AMDILDeviceInfo::LocalMem) + && PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) { + // If hardware local mem is enabled, get the local mem ID from + // the device to use as the ResourceID + curRes.bits.ResourceID = STM->device() + ->getResourceID(AMDILDevice::LDS_ID); + if (isAtomicInst(TM.getInstrInfo(), *miBegin)) { + assert(curRes.bits.ResourceID && "Atomic resource ID " + "cannot be non-zero!"); + (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) + .setImm(curRes.bits.ResourceID); + } + } else if (STM->device()->usesHardware(AMDILDeviceInfo::RegionMem) + && PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) { + // If hardware region mem is enabled, get the gds mem ID from + // the device to use as the ResourceID + curRes.bits.ResourceID = STM->device() + ->getResourceID(AMDILDevice::GDS_ID); + if (isAtomicInst(TM.getInstrInfo(), *miBegin)) { + assert(curRes.bits.ResourceID && "Atomic resource ID " + "cannot be non-zero!"); + (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) + .setImm(curRes.bits.ResourceID); + } + } else if (STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem) + && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { + curRes.bits.ResourceID = STM->device() + ->getResourceID(AMDILDevice::SCRATCH_ID); + } else if (!STM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) { + // If multi uav is enabled, then the resource ID is either the + // number of write images that are available or the device + // raw uav id if it is 11. + if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) > + STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { + curRes.bits.ResourceID = STM->device() + ->getResourceID(AMDILDevice::RAW_UAV_ID); + } else if (numWriteImages != OPENCL_MAX_WRITE_IMAGES) { + if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) + < numWriteImages) { + curRes.bits.ResourceID = numWriteImages; + } else { + curRes.bits.ResourceID = STM->device() + ->getResourceID(AMDILDevice::RAW_UAV_ID); + } + } else { + if (mDebug) { + dbgs() << __LINE__ << ": Setting byte store bit on instruction: "; + (*miBegin)->print(dbgs()); + } + curRes.bits.ByteStore = 1; + curRes.bits.ResourceID = STM->device() + ->getResourceID(AMDILDevice::ARENA_UAV_ID); + } + if (isAtomicInst(TM.getInstrInfo(), *miBegin)) { + (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) + .setImm(curRes.bits.ResourceID); + if (curRes.bits.ResourceID + == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { + assert(0 && "Found an atomic instruction that has " + "an arena uav id!"); + } + } + KM->setUAVID(*siBegin, curRes.bits.ResourceID); + if (!mMFI) { + mMFI = (*miBegin)->getParent()->getParent() + ->getInfo<AMDILMachineFunctionInfo>(); + } + mMFI->uav_insert(curRes.bits.ResourceID); + } + setAsmPrinterFlags(*miBegin, curRes); + } + } + +} + +void +annotateCacheableInstrs( + TargetMachine &TM, + CacheableInstrSet &cacheableSet, + bool mDebug) +{ + const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); + // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager(); + + CacheableInstrSet::iterator miBegin, miEnd; + + for (miBegin = cacheableSet.begin(), + miEnd = cacheableSet.end(); + miBegin != miEnd; ++miBegin) { + if (mDebug) { + dbgs() << "Annotating instr as cacheable. Inst: "; + (*miBegin)->dump(); + } + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(*miBegin, curRes); + // If UAV11 is enabled, then we can enable cached reads. + if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) { + curRes.bits.CacheableRead = 1; + curRes.bits.ResourceID = 11; + setAsmPrinterFlags(*miBegin, curRes); + } + } +} + +// Annotate the instructions along various pointer paths. The paths that +// are handled are the raw, byte and cacheable pointer paths. +static void +annotatePtrPath( + TargetMachine &TM, + PtrIMap &PtrToInstMap, + RawSet &rawPtrs, + ByteSet &bytePtrs, + CacheableSet &cacheablePtrs, + uint32_t numWriteImages, + bool mDebug + ) +{ + if (PtrToInstMap.empty()) { + return; + } + // First we can check the cacheable pointers + annotateCacheablePtrs(TM, PtrToInstMap, cacheablePtrs, + bytePtrs, numWriteImages, mDebug); + + // Next we annotate the byte pointers + annotateBytePtrs(TM, PtrToInstMap, bytePtrs, rawPtrs, mDebug); + + // Next we annotate the raw pointers + annotateRawPtrs(TM, PtrToInstMap, rawPtrs, bytePtrs, + numWriteImages, mDebug); +} +// Allocate MultiUAV pointer ID's for the raw/conflict pointers. +static void +allocateMultiUAVPointers( + MachineFunction &MF, + const AMDILTargetMachine *ATM, + PtrIMap &PtrToInstMap, + RawSet &rawPtrs, + ConflictSet &conflictPtrs, + CacheableSet &cacheablePtrs, + uint32_t numWriteImages, + bool mDebug) +{ + if (PtrToInstMap.empty()) { + return; + } + AMDILMachineFunctionInfo *mMFI = MF.getInfo<AMDILMachineFunctionInfo>(); + uint32_t curUAV = numWriteImages; + bool increment = true; + const AMDILSubtarget *STM + = ATM->getSubtargetImpl(); + // If the RAW_UAV_ID is a value that is larger than the max number of write + // images, then we use that UAV ID. + if (numWriteImages >= OPENCL_MAX_WRITE_IMAGES) { + curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); + increment = false; + } + AMDILKernelManager *KM = STM->getKernelManager(); + PtrSet::iterator siBegin, siEnd; + std::vector<MachineInstr*>::iterator miBegin, miEnd; + // First lets handle the raw pointers. + for (siBegin = rawPtrs.begin(), siEnd = rawPtrs.end(); + siBegin != siEnd; ++siBegin) { + assert((*siBegin)->getType()->isPointerTy() && "We must be a pointer type " + "to be processed at this point!"); + const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType()); + if (conflictPtrs.count(*siBegin) || !PT) { + continue; + } + // We only want to process global address space pointers + if (PT->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) { + if ((PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS + && STM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) + || (PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS + && STM->device()->usesSoftware(AMDILDeviceInfo::ConstantMem)) + || (PT->getAddressSpace() == AMDILAS::REGION_ADDRESS + && STM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))) { + // If we are using software emulated hardware features, then + // we need to specify that they use the raw uav and not + // zero-copy uav. The easiest way to do this is to assume they + // conflict with another pointer. Any pointer that conflicts + // with another pointer is assigned to the raw uav or the + // arena uav if no raw uav exists. + const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType()); + if (PT) { + conflictPtrs.insert(*siBegin); + } + } + if (PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { + if (STM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)) { + const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType()); + if (PT) { + conflictPtrs.insert(*siBegin); + } + } else { + if (mDebug) { + dbgs() << "Scratch Pointer '" << (*siBegin)->getName() + << "' being assigned uav "<< + STM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << "\n"; + } + for (miBegin = PtrToInstMap[*siBegin].begin(), + miEnd = PtrToInstMap[*siBegin].end(); + miBegin != miEnd; ++miBegin) { + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(*miBegin, curRes); + curRes.bits.ResourceID = STM->device() + ->getResourceID(AMDILDevice::SCRATCH_ID); + if (mDebug) { + dbgs() << "Updated instruction to bitmask "; + dbgs().write_hex(curRes.u16all); + dbgs() << " with ResID " << curRes.bits.ResourceID; + dbgs() << ". Inst: "; + (*miBegin)->dump(); + } + setAsmPrinterFlags((*miBegin), curRes); + KM->setUAVID(*siBegin, curRes.bits.ResourceID); + mMFI->uav_insert(curRes.bits.ResourceID); + } + } + } + continue; + } + // If more than just UAV 11 is cacheable, then we can remove + // this check. + if (cacheablePtrs.count(*siBegin)) { + if (mDebug) { + dbgs() << "Raw Pointer '" << (*siBegin)->getName() + << "' is cacheable, not allocating a multi-uav for it!\n"; + } + continue; + } + if (mDebug) { + dbgs() << "Raw Pointer '" << (*siBegin)->getName() + << "' being assigned uav " << curUAV << "\n"; + } + if (PtrToInstMap[*siBegin].empty()) { + KM->setUAVID(*siBegin, curUAV); + mMFI->uav_insert(curUAV); + } + // For all instructions here, we are going to set the new UAV to the curUAV + // number and not the value that it currently is set to. + for (miBegin = PtrToInstMap[*siBegin].begin(), + miEnd = PtrToInstMap[*siBegin].end(); + miBegin != miEnd; ++miBegin) { + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(*miBegin, curRes); + curRes.bits.ResourceID = curUAV; + if (isAtomicInst(ATM->getInstrInfo(), *miBegin)) { + (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) + .setImm(curRes.bits.ResourceID); + if (curRes.bits.ResourceID + == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { + assert(0 && "Found an atomic instruction that has " + "an arena uav id!"); + } + } + if (curUAV == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { + if (mDebug) { + dbgs() << __LINE__ << ": Setting byte store bit on instruction: "; + (*miBegin)->print(dbgs()); + } + curRes.bits.ByteStore = 1; + curRes.bits.CacheableRead = 0; + } + if (mDebug) { + dbgs() << "Updated instruction to bitmask "; + dbgs().write_hex(curRes.u16all); + dbgs() << " with ResID " << curRes.bits.ResourceID; + dbgs() << ". Inst: "; + (*miBegin)->dump(); + } + setAsmPrinterFlags(*miBegin, curRes); + KM->setUAVID(*siBegin, curRes.bits.ResourceID); + mMFI->uav_insert(curRes.bits.ResourceID); + } + // If we make it here, we can increment the uav counter if we are less + // than the max write image count. Otherwise we set it to the default + // UAV and leave it. + if (increment && curUAV < (OPENCL_MAX_WRITE_IMAGES - 1)) { + ++curUAV; + } else { + curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); + increment = false; + } + } + if (numWriteImages == 8) { + curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); + } + // Now lets handle the conflict pointers + for (siBegin = conflictPtrs.begin(), siEnd = conflictPtrs.end(); + siBegin != siEnd; ++siBegin) { + assert((*siBegin)->getType()->isPointerTy() && "We must be a pointer type " + "to be processed at this point!"); + const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType()); + // We only want to process global address space pointers + if (!PT || PT->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) { + continue; + } + if (mDebug) { + dbgs() << "Conflict Pointer '" << (*siBegin)->getName() + << "' being assigned uav " << curUAV << "\n"; + } + if (PtrToInstMap[*siBegin].empty()) { + KM->setUAVID(*siBegin, curUAV); + mMFI->uav_insert(curUAV); + } + for (miBegin = PtrToInstMap[*siBegin].begin(), + miEnd = PtrToInstMap[*siBegin].end(); + miBegin != miEnd; ++miBegin) { + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(*miBegin, curRes); + curRes.bits.ResourceID = curUAV; + if (isAtomicInst(ATM->getInstrInfo(), *miBegin)) { + (*miBegin)->getOperand((*miBegin)->getNumOperands()-1) + .setImm(curRes.bits.ResourceID); + if (curRes.bits.ResourceID + == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { + assert(0 && "Found an atomic instruction that has " + "an arena uav id!"); + } + } + if (curUAV == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { + if (mDebug) { + dbgs() << __LINE__ << ": Setting byte store bit on instruction: "; + (*miBegin)->print(dbgs()); + } + curRes.bits.ByteStore = 1; + } + if (mDebug) { + dbgs() << "Updated instruction to bitmask "; + dbgs().write_hex(curRes.u16all); + dbgs() << " with ResID " << curRes.bits.ResourceID; + dbgs() << ". Inst: "; + (*miBegin)->dump(); + } + setAsmPrinterFlags(*miBegin, curRes); + KM->setUAVID(*siBegin, curRes.bits.ResourceID); + mMFI->uav_insert(curRes.bits.ResourceID); + } + } +} +// The first thing we should do is to allocate the default +// ID for each load/store/atomic instruction so that +// it is correctly allocated. Everything else after this +// is just an optimization to more efficiently allocate +// resource ID's. +void +allocateDefaultIDs( + const AMDILTargetMachine *ATM, + MachineFunction &MF, + bool mDebug) +{ + for (MachineFunction::iterator mfBegin = MF.begin(), + mfEnd = MF.end(); mfBegin != mfEnd; ++mfBegin) { + MachineBasicBlock *MB = mfBegin; + for (MachineBasicBlock::iterator mbb = MB->begin(), mbe = MB->end(); + mbb != mbe; ++mbb) { + MachineInstr *MI = mbb; + if (isLoadInst(ATM->getInstrInfo(), MI) + || isStoreInst(ATM->getInstrInfo(), MI) + || isAtomicInst(ATM->getInstrInfo(), MI)) { + AMDILAS::InstrResEnc curRes; + getAsmPrinterFlags(MI, curRes); + allocateDefaultID(ATM, curRes, MI, mDebug); + } + } + } +} + + bool +AMDILEGPointerManager::runOnMachineFunction(MachineFunction &MF) +{ + bool changed = false; + const AMDILTargetMachine *ATM + = reinterpret_cast<const AMDILTargetMachine*>(&TM); + AMDILMachineFunctionInfo *mMFI = + MF.getInfo<AMDILMachineFunctionInfo>(); + if (mDebug) { + dbgs() << getPassName() << "\n"; + dbgs() << MF.getFunction()->getName() << "\n"; + MF.dump(); + } + // Start out by allocating the default ID's to all instructions in the + // function. + allocateDefaultIDs(ATM, MF, mDebug); + + // A set of all pointers are tracked in this map and + // if multiple pointers are detected, they go to the same + // set. + PtrIMap PtrToInstMap; + + // All of the instructions that are loads, stores or pointer + // conflicts are tracked in the map with a set of all values + // that reference the instruction stored. + InstPMap InstToPtrMap; + + // In order to track across stack entries, we need a map between a + // frame index and a pointer. That way when we load from a frame + // index, we know what pointer was stored to the frame index. + FIPMap FIToPtrMap; + + // Set of all the pointers that are byte pointers. Byte pointers + // are required to have their instructions go to the arena. + ByteSet bytePtrs; + + // Set of all the pointers that are cacheable. All of the cache pointers + // are required to go to a raw uav and cannot go to arena. + CacheableSet cacheablePtrs; + + // Set of all the pointers that go into a raw buffer. A pointer can + // exist in either rawPtrs or bytePtrs but not both. + RawSet rawPtrs; + + // Set of all the pointers that end up having a conflicting instruction + // somewhere in the pointer path. + ConflictSet conflictPtrs; + + // Set of all pointers that are images + ImageSet images; + + // Set of all pointers that are counters + AppendSet counters; + + // Set of all pointers that load from a constant pool + CPoolSet cpool; + + // Mapping from BB to infomation about the cacheability of the + // global load instructions in it. + MBBCacheableMap bbCacheable; + + // A set of load instructions that are cacheable + // even if all the load instructions of the ptr are not. + CacheableInstrSet cacheableSet; + + // The lookup table holds all of the registers that + // are used as we assign pointers values to them. + // If two pointers collide on the lookup table, then + // we assign them to the same UAV. If one of the + // pointers is byte addressable, then we assign + // them to arena, otherwise we assign them to raw. + RVPVec lookupTable; + + // First we need to go through all of the arguments and assign the + // live in registers to the lookup table and the pointer mapping. + uint32_t numWriteImages = parseArguments(MF, lookupTable, ATM, + cacheablePtrs, images, counters, mDebug); + + // Lets do some error checking on the results of the parsing. + if (counters.size() > OPENCL_MAX_NUM_ATOMIC_COUNTERS) { + mMFI->addErrorMsg( + amd::CompilerErrorMessage[INSUFFICIENT_COUNTER_RESOURCES]); + } + if (numWriteImages > OPENCL_MAX_WRITE_IMAGES + || (images.size() - numWriteImages > OPENCL_MAX_READ_IMAGES)) { + mMFI->addErrorMsg( + amd::CompilerErrorMessage[INSUFFICIENT_IMAGE_RESOURCES]); + } + + // Now lets parse all of the instructions and update our + // lookup tables. + parseFunction(this, ATM, MF, InstToPtrMap, PtrToInstMap, + FIToPtrMap, lookupTable, bytePtrs, conflictPtrs, cpool, + bbCacheable, mDebug); + + // We need to go over our pointer map and find all the conflicting + // pointers that have byte stores and put them in the bytePtr map. + // All conflicting pointers that don't have byte stores go into + // the rawPtr map. + detectConflictingPointers(ATM, InstToPtrMap, bytePtrs, rawPtrs, + conflictPtrs, mDebug); + + // The next step is to detect whether the pointer should be added to + // the fully cacheable set or not. A pointer is marked as cacheable if + // no store instruction exists. + detectFullyCacheablePointers(ATM, PtrToInstMap, rawPtrs, + cacheablePtrs, conflictPtrs, mDebug); + + // Disable partially cacheable for now when multiUAV is on. + // SC versions before SC139 have a bug that generates incorrect + // addressing for some cached accesses. + if (!ATM->getSubtargetImpl() + ->device()->isSupported(AMDILDeviceInfo::MultiUAV) && + ATM->getSubtargetImpl()->calVersion() >= CAL_VERSION_SC_139) { + // Now we take the set of loads that have no reachable stores and + // create a list of additional instructions (those that aren't already + // in a cacheablePtr set) that are safe to mark as cacheable. + detectCacheableInstrs(bbCacheable, InstToPtrMap, cacheablePtrs, + bytePtrs, cacheableSet, mDebug); + + // Annotate the additional instructions computed above as cacheable. + // Note that this should not touch any instructions annotated in + // annotatePtrPath. + annotateCacheableInstrs(TM, cacheableSet, mDebug); + } + + // Now that we have detected everything we need to detect, lets go through an + // annotate the instructions along the pointer path for each of the + // various pointer types. + annotatePtrPath(TM, PtrToInstMap, rawPtrs, bytePtrs, + cacheablePtrs, numWriteImages, mDebug); + + // Annotate the atomic counter path if any exists. + annotateAppendPtrs(TM, PtrToInstMap, counters, mDebug); + + // If we support MultiUAV, then we need to determine how + // many write images exist so that way we know how many UAV are + // left to allocate to buffers. + if (ATM->getSubtargetImpl() + ->device()->isSupported(AMDILDeviceInfo::MultiUAV)) { + // We now have (OPENCL_MAX_WRITE_IMAGES - numPtrs) buffers open for + // multi-uav allocation. + allocateMultiUAVPointers(MF, ATM, PtrToInstMap, rawPtrs, + conflictPtrs, cacheablePtrs, numWriteImages, mDebug); + } + + // The last step is to detect if we have any alias constant pool operations. + // This is not likely, but does happen on occasion with double precision + // operations. + detectAliasedCPoolOps(TM, cpool, mDebug); + if (mDebug) { + dumpPointers(bytePtrs, "Byte Store Ptrs"); + dumpPointers(rawPtrs, "Raw Ptrs"); + dumpPointers(cacheablePtrs, "Cache Load Ptrs"); + dumpPointers(counters, "Atomic Counters"); + dumpPointers(images, "Images"); + } + return changed; +} + +// The default pointer manager just assigns the default ID's to +// each load/store instruction and does nothing else. This is +// the pointer manager for the 7XX series of cards. + bool +AMDILPointerManager::runOnMachineFunction(MachineFunction &MF) +{ + bool changed = false; + const AMDILTargetMachine *ATM + = reinterpret_cast<const AMDILTargetMachine*>(&TM); + if (mDebug) { + dbgs() << getPassName() << "\n"; + dbgs() << MF.getFunction()->getName() << "\n"; + MF.dump(); + } + // On the 7XX we don't have to do any special processing, so we + // can just allocate the default ID and be done with it. + allocateDefaultIDs(ATM, MF, mDebug); + return changed; +} |