//===-- AMDILGlobalManager.cpp - TODO: Add brief description -------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //==-----------------------------------------------------------------------===// #include "AMDILGlobalManager.h" #include "AMDILDevices.h" #include "AMDILKernelManager.h" #include "AMDILSubtarget.h" #include "AMDILAlgorithms.tpp" #include "AMDILGlobalManager.h" #include "AMDILDevices.h" #include "AMDILKernelManager.h" #include "AMDILSubtarget.h" #include "AMDILUtilityFunctions.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Support/FormattedStream.h" #include using namespace llvm; AMDILGlobalManager::AMDILGlobalManager(bool debugMode) { mOffset = 0; mReservedBuffs = 0; symTab = NULL; mCurrentCPOffset = 0; mDebugMode = debugMode; } AMDILGlobalManager::~AMDILGlobalManager() { } void AMDILGlobalManager::print(llvm::raw_ostream &O) { if (!mDebugMode) { return; } O << ";AMDIL Global Manager State Dump:\n"; O << ";\tSubtarget: " << mSTM << "\tSymbol Table: " << symTab << "\n"; O << ";\tConstant Offset: " << mOffset << "\tCP Offset: " << mCurrentCPOffset << "\tReserved Buffers: " << mReservedBuffs << "\n"; if (!mImageNameMap.empty()) { llvm::DenseMap::iterator imb, ime; O << ";\tGlobal Image Mapping: \n"; for (imb = mImageNameMap.begin(), ime = mImageNameMap.end(); imb != ime; ++imb) { O << ";\t\tImage ID: " << imb->first << "\tName: " << imb->second << "\n"; } } std::set::iterator sb, se; if (!mByteStore.empty()) { O << ";Byte Store Kernels: \n"; for (sb = mByteStore.begin(), se = mByteStore.end(); sb != se; ++sb) { O << ";\t\t" << *sb << "\n"; } } if (!mIgnoreStr.empty()) { O << ";\tIgnored Data Strings: \n"; for (sb = mIgnoreStr.begin(), se = mIgnoreStr.end(); sb != se; ++sb) { O << ";\t\t" << *sb << "\n"; } } } void AMDILGlobalManager::dump() { print(errs()); } static const constPtr *getConstPtr(const kernel &krnl, const std::string &arg) { llvm::SmallVector::const_iterator begin, end; for (begin = krnl.constPtr.begin(), end = krnl.constPtr.end(); begin != end; ++begin) { if (!strcmp(begin->name.data(),arg.c_str())) { return &(*begin); } } return NULL; } #if 0 static bool structContainsSub32bitType(const StructType *ST) { StructType::element_iterator eib, eie; for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { Type *ptr = *eib; uint32_t size = (uint32_t)GET_SCALAR_SIZE(ptr); if (!size) { if (const StructType *ST = dyn_cast(ptr)) { if (structContainsSub32bitType(ST)) { return true; } } } else if (size < 32) { return true; } } return false; } #endif void AMDILGlobalManager::processModule(const Module &M, const AMDILTargetMachine *mTM) { Module::const_global_iterator GI; Module::const_global_iterator GE; symTab = "NoSymTab"; mSTM = mTM->getSubtargetImpl(); for (GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { const GlobalValue *GV = GI; if (mDebugMode) { GV->dump(); errs() << "\n"; } llvm::StringRef GVName = GV->getName(); const char *name = GVName.data(); if (!strncmp(name, "sgv", 3)) { mKernelArgs[GVName] = parseSGV(GV); } else if (!strncmp(name, "fgv", 3)) { // we can ignore this since we don't care about the filename // string } else if (!strncmp(name, "lvgv", 4)) { mLocalArgs[GVName] = parseLVGV(GV); } else if (!strncmp(name, "llvm.image.annotations", 22)) { if (strstr(name, "__OpenCL") && strstr(name, "_kernel")) { // we only want to parse the image information if the // image is a kernel, we might have to parse out the // information if a function is found that is not // inlined. parseImageAnnotate(GV); } } else if (!strncmp(name, "llvm.global.annotations", 23)) { parseGlobalAnnotate(GV); } else if (!strncmp(name, "llvm.constpointer.annotations", 29)) { if (strstr(name, "__OpenCL") && strstr(name, "_kernel")) { // we only want to parse constant pointer information // if it is a kernel parseConstantPtrAnnotate(GV); } } else if (!strncmp(name, "llvm.readonlypointer.annotations", 32)) { // These are skipped as we handle them later in AMDILPointerManager.cpp } else if (GV->getType()->getAddressSpace() == 3) { // *** Match cl_kernel.h local AS # parseAutoArray(GV, false); } else if (strstr(name, "clregion")) { parseAutoArray(GV, true); } else if (!GV->use_empty() && mIgnoreStr.find(GVName) == mIgnoreStr.end()) { parseConstantPtr(GV); } } allocateGlobalCB(); safeForEach(M.begin(), M.end(), std::bind1st( std::mem_fun(&AMDILGlobalManager::checkConstPtrsUseHW), this)); } void AMDILGlobalManager::allocateGlobalCB(void) { uint32_t maxCBSize = mSTM->device()->getMaxCBSize(); uint32_t offset = 0; uint32_t curCB = 0; uint32_t swoffset = 0; for (StringMap::iterator cpb = mConstMems.begin(), cpe = mConstMems.end(); cpb != cpe; ++cpb) { bool constHW = mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem); cpb->second.usesHardware = false; if (constHW) { // If we have a limit on the max CB Size, then we need to make sure that // the constant sizes fall within the limits. if (cpb->second.size <= maxCBSize) { if (offset + cpb->second.size > maxCBSize) { offset = 0; curCB++; } if (curCB < mSTM->device()->getMaxNumCBs()) { cpb->second.cbNum = curCB + CB_BASE_OFFSET; cpb->second.offset = offset; offset += (cpb->second.size + 15) & (~15); cpb->second.usesHardware = true; continue; } } } cpb->second.cbNum = 0; cpb->second.offset = swoffset; swoffset += (cpb->second.size + 15) & (~15); } if (!mConstMems.empty()) { mReservedBuffs = curCB + 1; } } bool AMDILGlobalManager::checkConstPtrsUseHW(llvm::Module::const_iterator *FCI) { Function::const_arg_iterator AI, AE; const Function *func = *FCI; std::string name = func->getName(); if (!strstr(name.c_str(), "__OpenCL") || !strstr(name.c_str(), "_kernel")) { return false; } kernel &krnl = mKernels[name]; if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) { for (AI = func->arg_begin(), AE = func->arg_end(); AI != AE; ++AI) { const Argument *Arg = &(*AI); const PointerType *P = dyn_cast(Arg->getType()); if (!P) { continue; } if (P->getAddressSpace() != AMDILAS::CONSTANT_ADDRESS) { continue; } const constPtr *ptr = getConstPtr(krnl, Arg->getName()); if (ptr) { continue; } constPtr constAttr; constAttr.name = Arg->getName(); constAttr.size = this->mSTM->device()->getMaxCBSize(); constAttr.base = Arg; constAttr.isArgument = true; constAttr.isArray = false; constAttr.offset = 0; constAttr.usesHardware = mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem); if (constAttr.usesHardware) { constAttr.cbNum = krnl.constPtr.size() + 2; } else { constAttr.cbNum = 0; } krnl.constPtr.push_back(constAttr); } } // Now lets make sure that only the N largest buffers // get allocated in hardware if we have too many buffers uint32_t numPtrs = krnl.constPtr.size(); if (numPtrs > (this->mSTM->device()->getMaxNumCBs() - mReservedBuffs)) { // TODO: Change this routine so it sorts // constPtr instead of pulling the sizes out // and then grab the N largest and disable the rest llvm::SmallVector sizes; for (uint32_t x = 0; x < numPtrs; ++x) { sizes.push_back(krnl.constPtr[x].size); } std::sort(sizes.begin(), sizes.end()); uint32_t numToDisable = numPtrs - (mSTM->device()->getMaxNumCBs() - mReservedBuffs); uint32_t safeSize = sizes[numToDisable-1]; for (uint32_t x = 0; x < numPtrs && numToDisable; ++x) { if (krnl.constPtr[x].size <= safeSize) { krnl.constPtr[x].usesHardware = false; --numToDisable; } } } // Renumber all of the valid CB's so that // they are linear increase uint32_t CBid = 2 + mReservedBuffs; for (uint32_t x = 0; x < numPtrs; ++x) { if (krnl.constPtr[x].usesHardware) { krnl.constPtr[x].cbNum = CBid++; } } for (StringMap::iterator cpb = mConstMems.begin(), cpe = mConstMems.end(); cpb != cpe; ++cpb) { if (cpb->second.usesHardware) { krnl.constPtr.push_back(cpb->second); } } for (uint32_t x = 0; x < krnl.constPtr.size(); ++x) { constPtr &c = krnl.constPtr[x]; uint32_t cbNum = c.cbNum - CB_BASE_OFFSET; if (cbNum < HW_MAX_NUM_CB && c.cbNum >= CB_BASE_OFFSET) { if ((c.size + c.offset) > krnl.constSizes[cbNum]) { krnl.constSizes[cbNum] = ((c.size + c.offset) + 15) & ~15; } } else { krnl.constPtr[x].usesHardware = false; } } return false; } int32_t AMDILGlobalManager::getArrayOffset(const llvm::StringRef &a) const { StringMap::const_iterator iter = mArrayMems.find(a); if (iter != mArrayMems.end()) { return iter->second.offset; } else { return -1; } } int32_t AMDILGlobalManager::getConstOffset(const llvm::StringRef &a) const { StringMap::const_iterator iter = mConstMems.find(a); if (iter != mConstMems.end()) { return iter->second.offset; } else { return -1; } } bool AMDILGlobalManager::getConstHWBit(const llvm::StringRef &name) const { StringMap::const_iterator iter = mConstMems.find(name); if (iter != mConstMems.end()) { return iter->second.usesHardware; } else { return false; } } // As of right now we only care about the required group size // so we can skip the variable encoding kernelArg AMDILGlobalManager::parseSGV(const GlobalValue *G) { kernelArg nArg; const GlobalVariable *GV = dyn_cast(G); memset(&nArg, 0, sizeof(nArg)); for (int x = 0; x < 3; ++x) { nArg.reqGroupSize[x] = mSTM->getDefaultSize(x); nArg.reqRegionSize[x] = mSTM->getDefaultSize(x); } if (!GV || !GV->hasInitializer()) { return nArg; } const Constant *CV = GV->getInitializer(); const ConstantDataArray *CA =dyn_cast_or_null(CV); if (!CA || !CA->isString()) { return nArg; } std::string init = CA->getAsString(); size_t pos = init.find("RWG"); if (pos != llvm::StringRef::npos) { pos += 3; std::string LWS = init.substr(pos, init.length() - pos); const char *lws = LWS.c_str(); sscanf(lws, "%u,%u,%u", &(nArg.reqGroupSize[0]), &(nArg.reqGroupSize[1]), &(nArg.reqGroupSize[2])); nArg.mHasRWG = true; } pos = init.find("RWR"); if (pos != llvm::StringRef::npos) { pos += 3; std::string LWS = init.substr(pos, init.length() - pos); const char *lws = LWS.c_str(); sscanf(lws, "%u,%u,%u", &(nArg.reqRegionSize[0]), &(nArg.reqRegionSize[1]), &(nArg.reqRegionSize[2])); nArg.mHasRWR = true; } return nArg; } localArg AMDILGlobalManager::parseLVGV(const GlobalValue *G) { localArg nArg; const GlobalVariable *GV = dyn_cast(G); nArg.name = ""; if (!GV || !GV->hasInitializer()) { return nArg; } const ConstantArray *CA = dyn_cast_or_null(GV->getInitializer()); if (!CA) { return nArg; } for (size_t x = 0, y = CA->getNumOperands(); x < y; ++x) { const Value *local = CA->getOperand(x); const ConstantExpr *CE = dyn_cast_or_null(local); if (!CE || !CE->getNumOperands()) { continue; } nArg.name = (*(CE->op_begin()))->getName(); if (mArrayMems.find(nArg.name) != mArrayMems.end()) { nArg.local.push_back(&(mArrayMems[nArg.name])); } } return nArg; } void AMDILGlobalManager::parseConstantPtrAnnotate(const GlobalValue *G) { const GlobalVariable *GV = dyn_cast_or_null(G); const ConstantArray *CA = dyn_cast_or_null(GV->getInitializer()); if (!CA) { return; } uint32_t numOps = CA->getNumOperands(); for (uint32_t x = 0; x < numOps; ++x) { const Value *V = CA->getOperand(x); const ConstantStruct *CS = dyn_cast_or_null(V); if (!CS) { continue; } assert(CS->getNumOperands() == 2 && "There can only be 2" " fields, a name and size"); const ConstantExpr *nameField = dyn_cast(CS->getOperand(0)); const ConstantInt *sizeField = dyn_cast(CS->getOperand(1)); assert(nameField && "There must be a constant name field"); assert(sizeField && "There must be a constant size field"); const GlobalVariable *nameGV = dyn_cast(nameField->getOperand(0)); const ConstantDataArray *nameArray = dyn_cast(nameGV->getInitializer()); // Lets add this string to the set of strings we should ignore processing mIgnoreStr.insert(nameGV->getName()); if (mConstMems.find(nameGV->getName()) != mConstMems.end()) { // If we already processesd this string as a constant, lets remove it from // the list of known constants. This way we don't process unneeded data // and don't generate code/metadata for strings that are never used. mConstMems.erase(mConstMems.find(nameGV->getName())); } else { mIgnoreStr.insert(CS->getOperand(0)->getName()); } constPtr constAttr; constAttr.name = nameArray->getAsString(); constAttr.size = (sizeField->getZExtValue() + 15) & ~15; constAttr.base = CS; constAttr.isArgument = true; constAttr.isArray = false; constAttr.cbNum = 0; constAttr.offset = 0; constAttr.usesHardware = (constAttr.size <= mSTM->device()->getMaxCBSize()); // Now that we have all our constant information, // lets update the kernel llvm::StringRef kernelName = G->getName().data() + 30; kernel k; if (mKernels.find(kernelName) != mKernels.end()) { k = mKernels[kernelName]; } else { k.curSize = 0; k.curRSize = 0; k.curHWSize = 0; k.curHWRSize = 0; k.constSize = 0; k.lvgv = NULL; k.sgv = NULL; memset(k.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB); } constAttr.cbNum = k.constPtr.size() + 2; k.constPtr.push_back(constAttr); mKernels[kernelName] = k; } } void AMDILGlobalManager::parseImageAnnotate(const GlobalValue *G) { const GlobalVariable *GV = dyn_cast(G); const ConstantArray *CA = dyn_cast(GV->getInitializer()); if (!CA) { return; } if (isa(CA)) { return; } uint32_t e = CA->getNumOperands(); if (!e) { return; } kernel k; llvm::StringRef name = G->getName().data() + 23; if (mKernels.find(name) != mKernels.end()) { k = mKernels[name]; } else { k.curSize = 0; k.curRSize = 0; k.curHWSize = 0; k.curHWRSize = 0; k.constSize = 0; k.lvgv = NULL; k.sgv = NULL; memset(k.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB); } for (uint32_t i = 0; i != e; ++i) { const Value *V = CA->getOperand(i); const Constant *C = dyn_cast(V); const ConstantStruct *CS = dyn_cast(C); if (CS && CS->getNumOperands() == 2) { if (mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()) != mConstMems.end()) { // If we already processesd this string as a constant, lets remove it // from the list of known constants. This way we don't process unneeded // data and don't generate code/metadata for strings that are never // used. mConstMems.erase( mConstMems.find(CS->getOperand(0)->getOperand(0)->getName())); } else { mIgnoreStr.insert(CS->getOperand(0)->getOperand(0)->getName()); } const ConstantInt *CI = dyn_cast(CS->getOperand(1)); uint32_t val = (uint32_t)CI->getZExtValue(); if (val == 1) { k.readOnly.insert(i); } else if (val == 2) { k.writeOnly.insert(i); } else { assert(!"Unknown image type value!"); } } } mKernels[name] = k; } void AMDILGlobalManager::parseAutoArray(const GlobalValue *GV, bool isRegion) { const GlobalVariable *G = dyn_cast(GV); Type *Ty = (G) ? G->getType() : NULL; arraymem tmp; tmp.isHW = true; tmp.offset = 0; tmp.vecSize = getTypeSize(Ty, true); tmp.isRegion = isRegion; mArrayMems[GV->getName()] = tmp; } void AMDILGlobalManager::parseConstantPtr(const GlobalValue *GV) { const GlobalVariable *G = dyn_cast(GV); Type *Ty = (G) ? G->getType() : NULL; constPtr constAttr; constAttr.name = G->getName(); constAttr.size = getTypeSize(Ty, true); constAttr.base = GV; constAttr.isArgument = false; constAttr.isArray = true; constAttr.offset = 0; constAttr.cbNum = 0; constAttr.usesHardware = false; mConstMems[GV->getName()] = constAttr; } void AMDILGlobalManager::parseGlobalAnnotate(const GlobalValue *G) { const GlobalVariable *GV = dyn_cast(G); if (!GV->hasInitializer()) { return; } const Constant *CT = GV->getInitializer(); if (!CT || isa(CT)) { return; } const ConstantArray *CA = dyn_cast(CT); if (!CA) { return; } unsigned int nKernels = CA->getNumOperands(); for (unsigned int i = 0, e = nKernels; i != e; ++i) { parseKernelInformation(CA->getOperand(i)); } } void AMDILGlobalManager::parseKernelInformation(const Value *V) { if (isa(V)) { return; } const ConstantStruct *CS = dyn_cast_or_null(V); if (!CS) { return; } uint32_t N = CS->getNumOperands(); if (N != 5) { return; } kernel tmp; tmp.curSize = 0; tmp.curRSize = 0; tmp.curHWSize = 0; tmp.curHWRSize = 0; // The first operand is always a pointer to the kernel. const Constant *CV = dyn_cast(CS->getOperand(0)); llvm::StringRef kernelName = ""; if (CV->getNumOperands()) { kernelName = (*(CV->op_begin()))->getName(); } // If we have images, then we have already created the kernel and we just need // to get the kernel information. if (mKernels.find(kernelName) != mKernels.end()) { tmp = mKernels[kernelName]; } else { tmp.curSize = 0; tmp.curRSize = 0; tmp.curHWSize = 0; tmp.curHWRSize = 0; tmp.constSize = 0; tmp.lvgv = NULL; tmp.sgv = NULL; memset(tmp.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB); } // The second operand is SGV, there can only be one so we don't need to worry // about parsing out multiple data points. CV = dyn_cast(CS->getOperand(1)); llvm::StringRef sgvName; if (CV->getNumOperands()) { sgvName = (*(CV->op_begin()))->getName(); } if (mKernelArgs.find(sgvName) != mKernelArgs.end()) { tmp.sgv = &mKernelArgs[sgvName]; } // The third operand is FGV, which is skipped // The fourth operand is LVGV // There can be multiple local arrays, so we // need to handle each one seperatly CV = dyn_cast(CS->getOperand(3)); llvm::StringRef lvgvName = ""; if (CV->getNumOperands()) { lvgvName = (*(CV->op_begin()))->getName(); } if (mLocalArgs.find(lvgvName) != mLocalArgs.end()) { localArg *ptr = &mLocalArgs[lvgvName]; tmp.lvgv = ptr; llvm::SmallVector::iterator ib, ie; for (ib = ptr->local.begin(), ie = ptr->local.end(); ib != ie; ++ib) { if ((*ib)->isRegion) { if ((*ib)->isHW) { (*ib)->offset = tmp.curHWRSize; tmp.curHWRSize += ((*ib)->vecSize + 15) & ~15; } else { (*ib)->offset = tmp.curRSize; tmp.curRSize += ((*ib)->vecSize + 15) & ~15; } } else { if ((*ib)->isHW) { (*ib)->offset = tmp.curHWSize; tmp.curHWSize += ((*ib)->vecSize + 15) & ~15; } else { (*ib)->offset = tmp.curSize; tmp.curSize += ((*ib)->vecSize + 15) & ~15; } } } } // The fifth operand is NULL mKernels[kernelName] = tmp; } const kernel &AMDILGlobalManager::getKernel(const llvm::StringRef &name) const { StringMap::const_iterator iter = mKernels.find(name); assert(isKernel(name) && "Must be a kernel to call getKernel"); return iter->second; } bool AMDILGlobalManager::isKernel(const llvm::StringRef &name) const { return (mKernels.find(name) != mKernels.end()); } bool AMDILGlobalManager::isWriteOnlyImage(const llvm::StringRef &name, uint32_t iID) const { const StringMap::const_iterator kiter = mKernels.find(name); if (kiter == mKernels.end()) { return false; } return kiter->second.writeOnly.count(iID); } uint32_t AMDILGlobalManager::getNumWriteImages(const llvm::StringRef &name) const { char *env = NULL; env = getenv("GPU_DISABLE_RAW_UAV"); if (env && env[0] == '1') { return 8; } const StringMap::const_iterator kiter = mKernels.find(name); if (kiter == mKernels.end()) { return 0; } else { return kiter->second.writeOnly.size(); } } bool AMDILGlobalManager::isReadOnlyImage(const llvm::StringRef &name, uint32_t iID) const { const StringMap::const_iterator kiter = mKernels.find(name); if (kiter == mKernels.end()) { return false; } return kiter->second.readOnly.count(iID); } bool AMDILGlobalManager::hasRWG(const llvm::StringRef &name) const { StringMap::const_iterator iter = mKernels.find(name); if (iter != mKernels.end()) { kernelArg *ptr = iter->second.sgv; if (ptr) { return ptr->mHasRWG; } } return false; } bool AMDILGlobalManager::hasRWR(const llvm::StringRef &name) const { StringMap::const_iterator iter = mKernels.find(name); if (iter != mKernels.end()) { kernelArg *ptr = iter->second.sgv; if (ptr) { return ptr->mHasRWR; } } return false; } uint32_t AMDILGlobalManager::getMaxGroupSize(const llvm::StringRef &name) const { StringMap::const_iterator iter = mKernels.find(name); if (iter != mKernels.end()) { kernelArg *sgv = iter->second.sgv; if (sgv) { return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2]; } } return mSTM->getDefaultSize(0) * mSTM->getDefaultSize(1) * mSTM->getDefaultSize(2); } uint32_t AMDILGlobalManager::getMaxRegionSize(const llvm::StringRef &name) const { StringMap::const_iterator iter = mKernels.find(name); if (iter != mKernels.end()) { kernelArg *sgv = iter->second.sgv; if (sgv) { return sgv->reqRegionSize[0] * sgv->reqRegionSize[1] * sgv->reqRegionSize[2]; } } return mSTM->getDefaultSize(0) * mSTM->getDefaultSize(1) * mSTM->getDefaultSize(2); } uint32_t AMDILGlobalManager::getRegionSize(const llvm::StringRef &name) const { StringMap::const_iterator iter = mKernels.find(name); if (iter != mKernels.end()) { return iter->second.curRSize; } else { return 0; } } uint32_t AMDILGlobalManager::getLocalSize(const llvm::StringRef &name) const { StringMap::const_iterator iter = mKernels.find(name); if (iter != mKernels.end()) { return iter->second.curSize; } else { return 0; } } uint32_t AMDILGlobalManager::getConstSize(const llvm::StringRef &name) const { StringMap::const_iterator iter = mKernels.find(name); if (iter != mKernels.end()) { return iter->second.constSize; } else { return 0; } } uint32_t AMDILGlobalManager::getHWRegionSize(const llvm::StringRef &name) const { StringMap::const_iterator iter = mKernels.find(name); if (iter != mKernels.end()) { return iter->second.curHWRSize; } else { return 0; } } uint32_t AMDILGlobalManager::getHWLocalSize(const llvm::StringRef &name) const { StringMap::const_iterator iter = mKernels.find(name); if (iter != mKernels.end()) { return iter->second.curHWSize; } else { return 0; } } int32_t AMDILGlobalManager::getArgID(const Argument *arg) { DenseMap::iterator argiter = mArgIDMap.find(arg); if (argiter != mArgIDMap.end()) { return argiter->second; } else { return -1; } } uint32_t AMDILGlobalManager::getLocal(const llvm::StringRef &name, uint32_t dim) const { StringMap::const_iterator iter = mKernels.find(name); if (iter != mKernels.end() && iter->second.sgv) { kernelArg *sgv = iter->second.sgv; switch (dim) { default: break; case 0: case 1: case 2: return sgv->reqGroupSize[dim]; break; case 3: return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2]; }; } switch (dim) { default: return 1; case 3: return mSTM->getDefaultSize(0) * mSTM->getDefaultSize(1) * mSTM->getDefaultSize(2); case 2: case 1: case 0: return mSTM->getDefaultSize(dim); break; }; return 1; } uint32_t AMDILGlobalManager::getRegion(const llvm::StringRef &name, uint32_t dim) const { StringMap::const_iterator iter = mKernels.find(name); if (iter != mKernels.end() && iter->second.sgv) { kernelArg *sgv = iter->second.sgv; switch (dim) { default: break; case 0: case 1: case 2: return sgv->reqRegionSize[dim]; break; case 3: return sgv->reqRegionSize[0] * sgv->reqRegionSize[1] * sgv->reqRegionSize[2]; }; } switch (dim) { default: return 1; case 3: return mSTM->getDefaultSize(0) * mSTM->getDefaultSize(1) * mSTM->getDefaultSize(2); case 2: case 1: case 0: return mSTM->getDefaultSize(dim); break; }; return 1; } StringMap::iterator AMDILGlobalManager::consts_begin() { return mConstMems.begin(); } StringMap::iterator AMDILGlobalManager::consts_end() { return mConstMems.end(); } bool AMDILGlobalManager::byteStoreExists(StringRef S) const { return mByteStore.find(S) != mByteStore.end(); } bool AMDILGlobalManager::usesHWConstant(const kernel &krnl, const llvm::StringRef &arg) { const constPtr *curConst = getConstPtr(krnl, arg); if (curConst) { return curConst->usesHardware; } else { return false; } } uint32_t AMDILGlobalManager::getConstPtrSize(const kernel &krnl, const llvm::StringRef &arg) { const constPtr *curConst = getConstPtr(krnl, arg); if (curConst) { return curConst->size; } else { return 0; } } uint32_t AMDILGlobalManager::getConstPtrOff(const kernel &krnl, const llvm::StringRef &arg) { const constPtr *curConst = getConstPtr(krnl, arg); if (curConst) { return curConst->offset; } else { return 0; } } uint32_t AMDILGlobalManager::getConstPtrCB(const kernel &krnl, const llvm::StringRef &arg) { const constPtr *curConst = getConstPtr(krnl, arg); if (curConst) { return curConst->cbNum; } else { return 0; } } void AMDILGlobalManager::calculateCPOffsets(const MachineFunction *MF, kernel &krnl) { const MachineConstantPool *MCP = MF->getConstantPool(); if (!MCP) { return; } const std::vector consts = MCP->getConstants(); size_t numConsts = consts.size(); for (size_t x = 0; x < numConsts; ++x) { krnl.CPOffsets.push_back( std::make_pair( mCurrentCPOffset, consts[x].Val.ConstVal)); size_t curSize = getTypeSize(consts[x].Val.ConstVal->getType(), true); // Align the size to the vector boundary curSize = (curSize + 15) & (~15); mCurrentCPOffset += curSize; } } bool AMDILGlobalManager::isConstPtrArray(const kernel &krnl, const llvm::StringRef &arg) { const constPtr *curConst = getConstPtr(krnl, arg); if (curConst) { return curConst->isArray; } else { return false; } } bool AMDILGlobalManager::isConstPtrArgument(const kernel &krnl, const llvm::StringRef &arg) { const constPtr *curConst = getConstPtr(krnl, arg); if (curConst) { return curConst->isArgument; } else { return false; } } const Value *AMDILGlobalManager::getConstPtrValue(const kernel &krnl, const llvm::StringRef &arg) { const constPtr *curConst = getConstPtr(krnl, arg); if (curConst) { return curConst->base; } else { return NULL; } } static void dumpZeroElements(const StructType * const T, llvm::raw_ostream &O, bool asBytes); static void dumpZeroElements(const IntegerType * const T, llvm::raw_ostream &O, bool asBytes); static void dumpZeroElements(const ArrayType * const T, llvm::raw_ostream &O, bool asBytes); static void dumpZeroElements(const VectorType * const T, llvm::raw_ostream &O, bool asBytes); static void dumpZeroElements(const Type * const T, llvm::raw_ostream &O, bool asBytes); void dumpZeroElements(const Type * const T, llvm::raw_ostream &O, bool asBytes) { if (!T) { return; } switch(T->getTypeID()) { case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: case Type::LabelTyID: assert(0 && "These types are not supported by this backend"); default: case Type::DoubleTyID: if (asBytes) { O << ":0:0:0:0:0:0:0:0"; } else { O << ":0"; } break; case Type::FloatTyID: case Type::PointerTyID: case Type::FunctionTyID: if (asBytes) { O << ":0:0:0:0"; } else { O << ":0"; } break; case Type::IntegerTyID: dumpZeroElements(dyn_cast(T), O, asBytes); break; case Type::StructTyID: { const StructType *ST = cast(T); if (!ST->isOpaque()) { dumpZeroElements(dyn_cast(T), O, asBytes); } else { // A pre-LLVM 3.0 opaque type if (asBytes) { O << ":0:0:0:0"; } else { O << ":0"; } } } break; case Type::ArrayTyID: dumpZeroElements(dyn_cast(T), O, asBytes); break; case Type::VectorTyID: dumpZeroElements(dyn_cast(T), O, asBytes); break; }; } void dumpZeroElements(const StructType * const ST, llvm::raw_ostream &O, bool asBytes) { if (!ST) { return; } Type *curType; StructType::element_iterator eib = ST->element_begin(); StructType::element_iterator eie = ST->element_end(); for (;eib != eie; ++eib) { curType = *eib; dumpZeroElements(curType, O, asBytes); } } void dumpZeroElements(const IntegerType * const IT, llvm::raw_ostream &O, bool asBytes) { if (asBytes) { unsigned byteWidth = (IT->getBitWidth() >> 3); for (unsigned x = 0; x < byteWidth; ++x) { O << ":0"; } } } void dumpZeroElements(const ArrayType * const AT, llvm::raw_ostream &O, bool asBytes) { size_t size = AT->getNumElements(); for (size_t x = 0; x < size; ++x) { dumpZeroElements(AT->getElementType(), O, asBytes); } } void dumpZeroElements(const VectorType * const VT, llvm::raw_ostream &O, bool asBytes) { size_t size = VT->getNumElements(); for (size_t x = 0; x < size; ++x) { dumpZeroElements(VT->getElementType(), O, asBytes); } } void AMDILGlobalManager::printConstantValue(const Constant *CAval, llvm::raw_ostream &O, bool asBytes) { if (const ConstantFP *CFP = dyn_cast(CAval)) { bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble; if (isDouble) { double val = CFP->getValueAPF().convertToDouble(); union dtol_union { double d; uint64_t l; char c[8]; } conv; conv.d = val; if (!asBytes) { O << ":"; O.write_hex(conv.l); } else { for (int i = 0; i < 8; ++i) { O << ":"; O.write_hex((unsigned)conv.c[i] & 0xFF); } } } else { float val = CFP->getValueAPF().convertToFloat(); union ftoi_union { float f; uint32_t u; char c[4]; } conv; conv.f = val; if (!asBytes) { O << ":"; O.write_hex(conv.u); } else { for (int i = 0; i < 4; ++i) { O << ":"; O.write_hex((unsigned)conv.c[i] & 0xFF); } } } } else if (const ConstantInt *CI = dyn_cast(CAval)) { uint64_t zVal = CI->getValue().getZExtValue(); if (!asBytes) { O << ":"; O.write_hex(zVal); } else { switch (CI->getBitWidth()) { default: { union ltob_union { uint64_t l; char c[8]; } conv; conv.l = zVal; for (int i = 0; i < 8; ++i) { O << ":"; O.write_hex((unsigned)conv.c[i] & 0xFF); } } break; case 8: O << ":"; O.write_hex(zVal & 0xFF); break; case 16: { union stob_union { uint16_t s; char c[2]; } conv; conv.s = (uint16_t)zVal; O << ":"; O.write_hex((unsigned)conv.c[0] & 0xFF); O << ":"; O.write_hex((unsigned)conv.c[1] & 0xFF); } break; case 32: { union itob_union { uint32_t i; char c[4]; } conv; conv.i = (uint32_t)zVal; for (int i = 0; i < 4; ++i) { O << ":"; O.write_hex((unsigned)conv.c[i] & 0xFF); } } break; } } } else if (const ConstantVector *CV = dyn_cast(CAval)) { int y = CV->getNumOperands()-1; int x = 0; for (; x < y; ++x) { printConstantValue(CV->getOperand(x), O, asBytes); } printConstantValue(CV->getOperand(x), O, asBytes); } else if (const ConstantStruct *CS = dyn_cast(CAval)) { int y = CS->getNumOperands(); int x = 0; for (; x < y; ++x) { printConstantValue(CS->getOperand(x), O, asBytes); } } else if (const ConstantAggregateZero *CAZ = dyn_cast(CAval)) { int y = CAZ->getNumOperands(); if (y > 0) { int x = 0; for (; x < y; ++x) { printConstantValue((llvm::Constant *)CAZ->getOperand(x), O, asBytes); } } else { if (asBytes) { dumpZeroElements(CAval->getType(), O, asBytes); } else { int y = getNumElements(CAval->getType())-1; for (int x = 0; x < y; ++x) { O << ":0"; } O << ":0"; } } } else if (const ConstantArray *CA = dyn_cast(CAval)) { int y = CA->getNumOperands(); int x = 0; for (; x < y; ++x) { printConstantValue(CA->getOperand(x), O, asBytes); } } else if (dyn_cast(CAval)) { O << ":0"; //assert(0 && "Hit condition which was not expected"); } else if (dyn_cast(CAval)) { O << ":0"; //assert(0 && "Hit condition which was not expected"); } else if (dyn_cast(CAval)) { O << ":0"; //assert(0 && "Hit condition which was not expected"); } else { assert(0 && "Hit condition which was not expected"); } } static bool isStruct(Type * const T) { if (!T) { return false; } switch (T->getTypeID()) { default: return false; case Type::PointerTyID: return isStruct(T->getContainedType(0)); case Type::StructTyID: return true; case Type::ArrayTyID: case Type::VectorTyID: return isStruct(dyn_cast(T)->getElementType()); }; } void AMDILGlobalManager::dumpDataToCB(llvm::raw_ostream &O, AMDILKernelManager *km, uint32_t id) { uint32_t size = 0; for (StringMap::iterator cmb = consts_begin(), cme = consts_end(); cmb != cme; ++cmb) { if (id == cmb->second.cbNum) { size += (cmb->second.size + 15) & (~15); } } if (id == 0) { O << ";#DATASTART:" << (size + mCurrentCPOffset) << "\n"; if (mCurrentCPOffset) { for (StringMap::iterator kcpb = mKernels.begin(), kcpe = mKernels.end(); kcpb != kcpe; ++kcpb) { const kernel& k = kcpb->second; size_t numConsts = k.CPOffsets.size(); for (size_t x = 0; x < numConsts; ++x) { size_t offset = k.CPOffsets[x].first; const Constant *C = k.CPOffsets[x].second; Type *Ty = C->getType(); size_t size = (isStruct(Ty) ? getTypeSize(Ty, true) : getNumElements(Ty)); O << ";#" << km->getTypeName(Ty, symTab) << ":"; O << offset << ":" << size ; printConstantValue(C, O, isStruct(Ty)); O << "\n"; } } } } else { O << ";#DATASTART:" << id << ":" << size << "\n"; } for (StringMap::iterator cmb = consts_begin(), cme = consts_end(); cmb != cme; ++cmb) { if (cmb->second.cbNum != id) { continue; } const GlobalVariable *G = dyn_cast(cmb->second.base); Type *Ty = (G) ? G->getType() : NULL; size_t offset = cmb->second.offset; const Constant *C = G->getInitializer(); size_t size = (isStruct(Ty) ? getTypeSize(Ty, true) : getNumElements(Ty)); O << ";#" << km->getTypeName(Ty, symTab) << ":"; if (!id) { O << (offset + mCurrentCPOffset) << ":" << size; } else { O << offset << ":" << size; } if (C) { printConstantValue(C, O, isStruct(Ty)); } else { assert(0 && "Cannot have a constant pointer" " without an initializer!"); } O <<"\n"; } if (id == 0) { O << ";#DATAEND\n"; } else { O << ";#DATAEND:" << id << "\n"; } } void AMDILGlobalManager::dumpDataSection(llvm::raw_ostream &O, AMDILKernelManager *km) { if (mConstMems.empty() && !mCurrentCPOffset) { return; } else { llvm::DenseSet const_set; for (StringMap::iterator cmb = consts_begin(), cme = consts_end(); cmb != cme; ++cmb) { const_set.insert(cmb->second.cbNum); } if (mCurrentCPOffset) { const_set.insert(0); } for (llvm::DenseSet::iterator setb = const_set.begin(), sete = const_set.end(); setb != sete; ++setb) { dumpDataToCB(O, km, *setb); } } } /// Create a function ID if it is not known or return the known /// function ID. uint32_t AMDILGlobalManager::getOrCreateFunctionID(const GlobalValue* func) { if (func->getName().size()) { return getOrCreateFunctionID(func->getName()); } uint32_t id; if (mFuncPtrNames.find(func) == mFuncPtrNames.end()) { id = mFuncPtrNames.size() + RESERVED_FUNCS + mFuncNames.size(); mFuncPtrNames[func] = id; } else { id = mFuncPtrNames[func]; } return id; } uint32_t AMDILGlobalManager::getOrCreateFunctionID(const std::string &func) { uint32_t id; if (mFuncNames.find(func) == mFuncNames.end()) { id = mFuncNames.size() + RESERVED_FUNCS + mFuncPtrNames.size(); mFuncNames[func] = id; } else { id = mFuncNames[func]; } return id; }