diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp | 11 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILGlobalManager.cpp | 1353 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILGlobalManager.h | 256 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILISelLowering.cpp | 73 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILKernelManager.cpp | 1356 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILKernelManager.h | 177 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILLiteralManager.cpp | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp | 62 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILPrintfConvert.cpp | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILSubtarget.cpp | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/Makefile.sources | 2 |
11 files changed, 23 insertions, 3275 deletions
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp index b006f84629e..f8e1cd95dc9 100644 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp +++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp @@ -13,8 +13,6 @@ #include "AMDGPUTargetMachine.h" #include "AMDGPU.h" -#include "AMDILGlobalManager.h" -#include "AMDILKernelManager.h" #include "AMDILTargetMachine.h" #include "R600ISelLowering.h" #include "R600InstrInfo.h" @@ -44,16 +42,9 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, : AMDILTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), Subtarget(TT, CPU, FS), - mGM(new AMDILGlobalManager(0 /* Debug mode */)), - mKM(new AMDILKernelManager(this, mGM)), mDump(false) { - /* XXX: Add these two initializations to fix a segfault, not sure if this - * is correct. These are normally initialized in the AsmPrinter, but AMDGPU - * does not use the asm printer */ - Subtarget.setGlobalManager(mGM); - Subtarget.setKernelManager(mKM); /* TLInfo uses InstrInfo so it must be initialized after. */ if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { InstrInfo = new R600InstrInfo(*this); @@ -66,8 +57,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, AMDGPUTargetMachine::~AMDGPUTargetMachine() { - delete mGM; - delete mKM; } bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM, diff --git a/src/gallium/drivers/radeon/AMDILGlobalManager.cpp b/src/gallium/drivers/radeon/AMDILGlobalManager.cpp deleted file mode 100644 index eafd36eaa4e..00000000000 --- a/src/gallium/drivers/radeon/AMDILGlobalManager.cpp +++ /dev/null @@ -1,1353 +0,0 @@ -//===-- AMDILGlobalManager.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -#include "AMDILGlobalManager.h" -#include "AMDILDevices.h" -#include "AMDILKernelManager.h" -#include "AMDILSubtarget.h" - -#include "AMDILAlgorithms.tpp" -#include "AMDILGlobalManager.h" -#include "AMDILDevices.h" -#include "AMDILKernelManager.h" -#include "AMDILSubtarget.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Support/FormattedStream.h" - -#include <cstdio> - -using namespace llvm; - -AMDILGlobalManager::AMDILGlobalManager(bool debugMode) { - mOffset = 0; - mReservedBuffs = 0; - symTab = NULL; - mCurrentCPOffset = 0; - mDebugMode = debugMode; -} - -AMDILGlobalManager::~AMDILGlobalManager() { -} - -void AMDILGlobalManager::print(llvm::raw_ostream &O) { - if (!mDebugMode) { - return; - } - O << ";AMDIL Global Manager State Dump:\n"; - O << ";\tSubtarget: " << mSTM << "\tSymbol Table: " << symTab - << "\n"; - O << ";\tConstant Offset: " << mOffset << "\tCP Offset: " - << mCurrentCPOffset << "\tReserved Buffers: " << mReservedBuffs - << "\n"; - if (!mImageNameMap.empty()) { - llvm::DenseMap<uint32_t, llvm::StringRef>::iterator imb, ime; - O << ";\tGlobal Image Mapping: \n"; - for (imb = mImageNameMap.begin(), ime = mImageNameMap.end(); imb != ime; - ++imb) { - O << ";\t\tImage ID: " << imb->first << "\tName: " - << imb->second << "\n"; - } - } - std::set<llvm::StringRef>::iterator sb, se; - if (!mByteStore.empty()) { - O << ";Byte Store Kernels: \n"; - for (sb = mByteStore.begin(), se = mByteStore.end(); sb != se; ++sb) { - O << ";\t\t" << *sb << "\n"; - } - } - if (!mIgnoreStr.empty()) { - O << ";\tIgnored Data Strings: \n"; - for (sb = mIgnoreStr.begin(), se = mIgnoreStr.end(); sb != se; ++sb) { - O << ";\t\t" << *sb << "\n"; - } - } -} - -void AMDILGlobalManager::dump() { - print(errs()); -} - -static const constPtr *getConstPtr(const kernel &krnl, const std::string &arg) { - llvm::SmallVector<constPtr, DEFAULT_VEC_SLOTS>::const_iterator begin, end; - for (begin = krnl.constPtr.begin(), end = krnl.constPtr.end(); - begin != end; ++begin) { - if (!strcmp(begin->name.data(),arg.c_str())) { - return &(*begin); - } - } - return NULL; -} -#if 0 -static bool structContainsSub32bitType(const StructType *ST) { - StructType::element_iterator eib, eie; - for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { - Type *ptr = *eib; - uint32_t size = (uint32_t)GET_SCALAR_SIZE(ptr); - if (!size) { - if (const StructType *ST = dyn_cast<StructType>(ptr)) { - if (structContainsSub32bitType(ST)) { - return true; - } - } - } else if (size < 32) { - return true; - } - } - return false; -} -#endif - -void AMDILGlobalManager::processModule(const Module &M, - const AMDILTargetMachine *mTM) -{ - Module::const_global_iterator GI; - Module::const_global_iterator GE; - symTab = "NoSymTab"; - mSTM = mTM->getSubtargetImpl(); - for (GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { - const GlobalValue *GV = GI; - if (mDebugMode) { - GV->dump(); - errs() << "\n"; - } - llvm::StringRef GVName = GV->getName(); - const char *name = GVName.data(); - if (!strncmp(name, "sgv", 3)) { - mKernelArgs[GVName] = parseSGV(GV); - } else if (!strncmp(name, "fgv", 3)) { - // we can ignore this since we don't care about the filename - // string - } else if (!strncmp(name, "lvgv", 4)) { - mLocalArgs[GVName] = parseLVGV(GV); - } else if (!strncmp(name, "llvm.image.annotations", 22)) { - if (strstr(name, "__OpenCL") - && strstr(name, "_kernel")) { - // we only want to parse the image information if the - // image is a kernel, we might have to parse out the - // information if a function is found that is not - // inlined. - parseImageAnnotate(GV); - } - } else if (!strncmp(name, "llvm.global.annotations", 23)) { - parseGlobalAnnotate(GV); - } else if (!strncmp(name, "llvm.constpointer.annotations", 29)) { - if (strstr(name, "__OpenCL") - && strstr(name, "_kernel")) { - // we only want to parse constant pointer information - // if it is a kernel - parseConstantPtrAnnotate(GV); - } - } else if (!strncmp(name, "llvm.readonlypointer.annotations", 32)) { - // These are skipped as we handle them later in AMDILPointerManager.cpp - } else if (GV->getType()->getAddressSpace() == 3) { // *** Match cl_kernel.h local AS # - parseAutoArray(GV, false); - } else if (strstr(name, "clregion")) { - parseAutoArray(GV, true); - } else if (!GV->use_empty() - && mIgnoreStr.find(GVName) == mIgnoreStr.end()) { - parseConstantPtr(GV); - } - } - allocateGlobalCB(); - - safeForEach(M.begin(), M.end(), - std::bind1st( - std::mem_fun(&AMDILGlobalManager::checkConstPtrsUseHW), - this)); -} - -void AMDILGlobalManager::allocateGlobalCB(void) { - uint32_t maxCBSize = mSTM->device()->getMaxCBSize(); - uint32_t offset = 0; - uint32_t curCB = 0; - uint32_t swoffset = 0; - for (StringMap<constPtr>::iterator cpb = mConstMems.begin(), - cpe = mConstMems.end(); cpb != cpe; ++cpb) { - bool constHW = mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem); - cpb->second.usesHardware = false; - if (constHW) { - // If we have a limit on the max CB Size, then we need to make sure that - // the constant sizes fall within the limits. - if (cpb->second.size <= maxCBSize) { - if (offset + cpb->second.size > maxCBSize) { - offset = 0; - curCB++; - } - if (curCB < mSTM->device()->getMaxNumCBs()) { - cpb->second.cbNum = curCB + CB_BASE_OFFSET; - cpb->second.offset = offset; - offset += (cpb->second.size + 15) & (~15); - cpb->second.usesHardware = true; - continue; - } - } - } - cpb->second.cbNum = 0; - cpb->second.offset = swoffset; - swoffset += (cpb->second.size + 15) & (~15); - } - if (!mConstMems.empty()) { - mReservedBuffs = curCB + 1; - } -} - -bool AMDILGlobalManager::checkConstPtrsUseHW(llvm::Module::const_iterator *FCI) -{ - Function::const_arg_iterator AI, AE; - const Function *func = *FCI; - std::string name = func->getName(); - if (!strstr(name.c_str(), "__OpenCL") - || !strstr(name.c_str(), "_kernel")) { - return false; - } - kernel &krnl = mKernels[name]; - if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) { - for (AI = func->arg_begin(), AE = func->arg_end(); - AI != AE; ++AI) { - const Argument *Arg = &(*AI); - const PointerType *P = dyn_cast<PointerType>(Arg->getType()); - if (!P) { - continue; - } - if (P->getAddressSpace() != AMDILAS::CONSTANT_ADDRESS) { - continue; - } - const constPtr *ptr = getConstPtr(krnl, Arg->getName()); - if (ptr) { - continue; - } - constPtr constAttr; - constAttr.name = Arg->getName(); - constAttr.size = this->mSTM->device()->getMaxCBSize(); - constAttr.base = Arg; - constAttr.isArgument = true; - constAttr.isArray = false; - constAttr.offset = 0; - constAttr.usesHardware = - mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem); - if (constAttr.usesHardware) { - constAttr.cbNum = krnl.constPtr.size() + 2; - } else { - constAttr.cbNum = 0; - } - krnl.constPtr.push_back(constAttr); - } - } - // Now lets make sure that only the N largest buffers - // get allocated in hardware if we have too many buffers - uint32_t numPtrs = krnl.constPtr.size(); - if (numPtrs > (this->mSTM->device()->getMaxNumCBs() - mReservedBuffs)) { - // TODO: Change this routine so it sorts - // constPtr instead of pulling the sizes out - // and then grab the N largest and disable the rest - llvm::SmallVector<uint32_t, 16> sizes; - for (uint32_t x = 0; x < numPtrs; ++x) { - sizes.push_back(krnl.constPtr[x].size); - } - std::sort(sizes.begin(), sizes.end()); - uint32_t numToDisable = numPtrs - (mSTM->device()->getMaxNumCBs() - - mReservedBuffs); - uint32_t safeSize = sizes[numToDisable-1]; - for (uint32_t x = 0; x < numPtrs && numToDisable; ++x) { - if (krnl.constPtr[x].size <= safeSize) { - krnl.constPtr[x].usesHardware = false; - --numToDisable; - } - } - } - // Renumber all of the valid CB's so that - // they are linear increase - uint32_t CBid = 2 + mReservedBuffs; - for (uint32_t x = 0; x < numPtrs; ++x) { - if (krnl.constPtr[x].usesHardware) { - krnl.constPtr[x].cbNum = CBid++; - } - } - for (StringMap<constPtr>::iterator cpb = mConstMems.begin(), - cpe = mConstMems.end(); cpb != cpe; ++cpb) { - if (cpb->second.usesHardware) { - krnl.constPtr.push_back(cpb->second); - } - } - for (uint32_t x = 0; x < krnl.constPtr.size(); ++x) { - constPtr &c = krnl.constPtr[x]; - uint32_t cbNum = c.cbNum - CB_BASE_OFFSET; - if (cbNum < HW_MAX_NUM_CB && c.cbNum >= CB_BASE_OFFSET) { - if ((c.size + c.offset) > krnl.constSizes[cbNum]) { - krnl.constSizes[cbNum] = - ((c.size + c.offset) + 15) & ~15; - } - } else { - krnl.constPtr[x].usesHardware = false; - } - } - return false; -} - -int32_t AMDILGlobalManager::getArrayOffset(const llvm::StringRef &a) const { - StringMap<arraymem>::const_iterator iter = mArrayMems.find(a); - if (iter != mArrayMems.end()) { - return iter->second.offset; - } else { - return -1; - } -} - -int32_t AMDILGlobalManager::getConstOffset(const llvm::StringRef &a) const { - StringMap<constPtr>::const_iterator iter = mConstMems.find(a); - if (iter != mConstMems.end()) { - return iter->second.offset; - } else { - return -1; - } -} - -bool AMDILGlobalManager::getConstHWBit(const llvm::StringRef &name) const { - StringMap<constPtr>::const_iterator iter = mConstMems.find(name); - if (iter != mConstMems.end()) { - return iter->second.usesHardware; - } else { - return false; - } -} - -// As of right now we only care about the required group size -// so we can skip the variable encoding -kernelArg AMDILGlobalManager::parseSGV(const GlobalValue *G) { - kernelArg nArg; - const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); - memset(&nArg, 0, sizeof(nArg)); - for (int x = 0; x < 3; ++x) { - nArg.reqGroupSize[x] = mSTM->getDefaultSize(x); - nArg.reqRegionSize[x] = mSTM->getDefaultSize(x); - } - if (!GV || !GV->hasInitializer()) { - return nArg; - } - const Constant *CV = GV->getInitializer(); - const ConstantDataArray *CA =dyn_cast_or_null<ConstantDataArray>(CV); - - if (!CA || !CA->isString()) { - return nArg; - } - std::string init = CA->getAsString(); - size_t pos = init.find("RWG"); - if (pos != llvm::StringRef::npos) { - pos += 3; - std::string LWS = init.substr(pos, init.length() - pos); - const char *lws = LWS.c_str(); - sscanf(lws, "%u,%u,%u", &(nArg.reqGroupSize[0]), - &(nArg.reqGroupSize[1]), - &(nArg.reqGroupSize[2])); - nArg.mHasRWG = true; - } - pos = init.find("RWR"); - if (pos != llvm::StringRef::npos) { - pos += 3; - std::string LWS = init.substr(pos, init.length() - pos); - const char *lws = LWS.c_str(); - sscanf(lws, "%u,%u,%u", &(nArg.reqRegionSize[0]), - &(nArg.reqRegionSize[1]), - &(nArg.reqRegionSize[2])); - nArg.mHasRWR = true; - } - return nArg; -} - -localArg AMDILGlobalManager::parseLVGV(const GlobalValue *G) { - localArg nArg; - const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); - nArg.name = ""; - if (!GV || !GV->hasInitializer()) { - return nArg; - } - const ConstantArray *CA = - dyn_cast_or_null<ConstantArray>(GV->getInitializer()); - if (!CA) { - return nArg; - } - for (size_t x = 0, y = CA->getNumOperands(); x < y; ++x) { - const Value *local = CA->getOperand(x); - const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(local); - if (!CE || !CE->getNumOperands()) { - continue; - } - nArg.name = (*(CE->op_begin()))->getName(); - if (mArrayMems.find(nArg.name) != mArrayMems.end()) { - nArg.local.push_back(&(mArrayMems[nArg.name])); - } - } - return nArg; -} - -void AMDILGlobalManager::parseConstantPtrAnnotate(const GlobalValue *G) { - const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(G); - const ConstantArray *CA = - dyn_cast_or_null<ConstantArray>(GV->getInitializer()); - if (!CA) { - return; - } - uint32_t numOps = CA->getNumOperands(); - for (uint32_t x = 0; x < numOps; ++x) { - const Value *V = CA->getOperand(x); - const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V); - if (!CS) { - continue; - } - assert(CS->getNumOperands() == 2 && "There can only be 2" - " fields, a name and size"); - const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CS->getOperand(0)); - const ConstantInt *sizeField = dyn_cast<ConstantInt>(CS->getOperand(1)); - assert(nameField && "There must be a constant name field"); - assert(sizeField && "There must be a constant size field"); - const GlobalVariable *nameGV = - dyn_cast<GlobalVariable>(nameField->getOperand(0)); - const ConstantDataArray *nameArray = - dyn_cast<ConstantDataArray>(nameGV->getInitializer()); - // Lets add this string to the set of strings we should ignore processing - mIgnoreStr.insert(nameGV->getName()); - if (mConstMems.find(nameGV->getName()) - != mConstMems.end()) { - // If we already processesd this string as a constant, lets remove it from - // the list of known constants. This way we don't process unneeded data - // and don't generate code/metadata for strings that are never used. - mConstMems.erase(mConstMems.find(nameGV->getName())); - } else { - mIgnoreStr.insert(CS->getOperand(0)->getName()); - } - constPtr constAttr; - constAttr.name = nameArray->getAsString(); - constAttr.size = (sizeField->getZExtValue() + 15) & ~15; - constAttr.base = CS; - constAttr.isArgument = true; - constAttr.isArray = false; - constAttr.cbNum = 0; - constAttr.offset = 0; - constAttr.usesHardware = (constAttr.size <= mSTM->device()->getMaxCBSize()); - // Now that we have all our constant information, - // lets update the kernel - llvm::StringRef kernelName = G->getName().data() + 30; - kernel k; - if (mKernels.find(kernelName) != mKernels.end()) { - k = mKernels[kernelName]; - } else { - k.curSize = 0; - k.curRSize = 0; - k.curHWSize = 0; - k.curHWRSize = 0; - k.constSize = 0; - k.lvgv = NULL; - k.sgv = NULL; - memset(k.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB); - } - constAttr.cbNum = k.constPtr.size() + 2; - k.constPtr.push_back(constAttr); - mKernels[kernelName] = k; - } -} - -void AMDILGlobalManager::parseImageAnnotate(const GlobalValue *G) { - const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); - const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); - if (!CA) { - return; - } - if (isa<GlobalValue>(CA)) { - return; - } - uint32_t e = CA->getNumOperands(); - if (!e) { - return; - } - kernel k; - llvm::StringRef name = G->getName().data() + 23; - if (mKernels.find(name) != mKernels.end()) { - k = mKernels[name]; - } else { - k.curSize = 0; - k.curRSize = 0; - k.curHWSize = 0; - k.curHWRSize = 0; - k.constSize = 0; - k.lvgv = NULL; - k.sgv = NULL; - memset(k.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB); - } - for (uint32_t i = 0; i != e; ++i) { - const Value *V = CA->getOperand(i); - const Constant *C = dyn_cast<Constant>(V); - const ConstantStruct *CS = dyn_cast<ConstantStruct>(C); - if (CS && CS->getNumOperands() == 2) { - if (mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()) != - mConstMems.end()) { - // If we already processesd this string as a constant, lets remove it - // from the list of known constants. This way we don't process unneeded - // data and don't generate code/metadata for strings that are never - // used. - mConstMems.erase( - mConstMems.find(CS->getOperand(0)->getOperand(0)->getName())); - } else { - mIgnoreStr.insert(CS->getOperand(0)->getOperand(0)->getName()); - } - const ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(1)); - uint32_t val = (uint32_t)CI->getZExtValue(); - if (val == 1) { - k.readOnly.insert(i); - } else if (val == 2) { - k.writeOnly.insert(i); - } else { - assert(!"Unknown image type value!"); - } - } - } - mKernels[name] = k; -} - -void AMDILGlobalManager::parseAutoArray(const GlobalValue *GV, bool isRegion) { - const GlobalVariable *G = dyn_cast<GlobalVariable>(GV); - Type *Ty = (G) ? G->getType() : NULL; - arraymem tmp; - tmp.isHW = true; - tmp.offset = 0; - tmp.vecSize = getTypeSize(Ty, true); - tmp.isRegion = isRegion; - mArrayMems[GV->getName()] = tmp; -} - -void AMDILGlobalManager::parseConstantPtr(const GlobalValue *GV) { - const GlobalVariable *G = dyn_cast<GlobalVariable>(GV); - Type *Ty = (G) ? G->getType() : NULL; - constPtr constAttr; - constAttr.name = G->getName(); - constAttr.size = getTypeSize(Ty, true); - constAttr.base = GV; - constAttr.isArgument = false; - constAttr.isArray = true; - constAttr.offset = 0; - constAttr.cbNum = 0; - constAttr.usesHardware = false; - mConstMems[GV->getName()] = constAttr; -} - -void AMDILGlobalManager::parseGlobalAnnotate(const GlobalValue *G) { - const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); - if (!GV->hasInitializer()) { - return; - } - const Constant *CT = GV->getInitializer(); - if (!CT || isa<GlobalValue>(CT)) { - return; - } - const ConstantArray *CA = dyn_cast<ConstantArray>(CT); - if (!CA) { - return; - } - - unsigned int nKernels = CA->getNumOperands(); - for (unsigned int i = 0, e = nKernels; i != e; ++i) { - parseKernelInformation(CA->getOperand(i)); - } -} - -void AMDILGlobalManager::parseKernelInformation(const Value *V) { - if (isa<GlobalValue>(V)) { - return; - } - const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V); - if (!CS) { - return; - } - uint32_t N = CS->getNumOperands(); - if (N != 5) { - return; - } - kernel tmp; - - tmp.curSize = 0; - tmp.curRSize = 0; - tmp.curHWSize = 0; - tmp.curHWRSize = 0; - // The first operand is always a pointer to the kernel. - const Constant *CV = dyn_cast<Constant>(CS->getOperand(0)); - llvm::StringRef kernelName = ""; - if (CV->getNumOperands()) { - kernelName = (*(CV->op_begin()))->getName(); - } - - // If we have images, then we have already created the kernel and we just need - // to get the kernel information. - if (mKernels.find(kernelName) != mKernels.end()) { - tmp = mKernels[kernelName]; - } else { - tmp.curSize = 0; - tmp.curRSize = 0; - tmp.curHWSize = 0; - tmp.curHWRSize = 0; - tmp.constSize = 0; - tmp.lvgv = NULL; - tmp.sgv = NULL; - memset(tmp.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB); - } - - - // The second operand is SGV, there can only be one so we don't need to worry - // about parsing out multiple data points. - CV = dyn_cast<Constant>(CS->getOperand(1)); - - llvm::StringRef sgvName; - if (CV->getNumOperands()) { - sgvName = (*(CV->op_begin()))->getName(); - } - - if (mKernelArgs.find(sgvName) != mKernelArgs.end()) { - tmp.sgv = &mKernelArgs[sgvName]; - } - // The third operand is FGV, which is skipped - // The fourth operand is LVGV - // There can be multiple local arrays, so we - // need to handle each one seperatly - CV = dyn_cast<Constant>(CS->getOperand(3)); - llvm::StringRef lvgvName = ""; - if (CV->getNumOperands()) { - lvgvName = (*(CV->op_begin()))->getName(); - } - if (mLocalArgs.find(lvgvName) != mLocalArgs.end()) { - localArg *ptr = &mLocalArgs[lvgvName]; - tmp.lvgv = ptr; - llvm::SmallVector<arraymem *, DEFAULT_VEC_SLOTS>::iterator ib, ie; - for (ib = ptr->local.begin(), ie = ptr->local.end(); ib != ie; ++ib) { - if ((*ib)->isRegion) { - if ((*ib)->isHW) { - (*ib)->offset = tmp.curHWRSize; - tmp.curHWRSize += ((*ib)->vecSize + 15) & ~15; - } else { - (*ib)->offset = tmp.curRSize; - tmp.curRSize += ((*ib)->vecSize + 15) & ~15; - } - } else { - if ((*ib)->isHW) { - (*ib)->offset = tmp.curHWSize; - tmp.curHWSize += ((*ib)->vecSize + 15) & ~15; - } else { - (*ib)->offset = tmp.curSize; - tmp.curSize += ((*ib)->vecSize + 15) & ~15; - } - } - } - } - - // The fifth operand is NULL - mKernels[kernelName] = tmp; -} - -const kernel &AMDILGlobalManager::getKernel(const llvm::StringRef &name) const { - StringMap<kernel>::const_iterator iter = mKernels.find(name); - assert(isKernel(name) && "Must be a kernel to call getKernel"); - return iter->second; -} - -bool AMDILGlobalManager::isKernel(const llvm::StringRef &name) const { - return (mKernels.find(name) != mKernels.end()); -} - -bool AMDILGlobalManager::isWriteOnlyImage(const llvm::StringRef &name, - uint32_t iID) const { - const StringMap<kernel>::const_iterator kiter = mKernels.find(name); - if (kiter == mKernels.end()) { - return false; - } - return kiter->second.writeOnly.count(iID); -} - -uint32_t -AMDILGlobalManager::getNumWriteImages(const llvm::StringRef &name) const { - char *env = NULL; - env = getenv("GPU_DISABLE_RAW_UAV"); - if (env && env[0] == '1') { - return 8; - } - const StringMap<kernel>::const_iterator kiter = mKernels.find(name); - if (kiter == mKernels.end()) { - return 0; - } else { - return kiter->second.writeOnly.size(); - } -} - -bool AMDILGlobalManager::isReadOnlyImage(const llvm::StringRef &name, - uint32_t iID) const { - const StringMap<kernel>::const_iterator kiter = mKernels.find(name); - if (kiter == mKernels.end()) { - return false; - } - return kiter->second.readOnly.count(iID); -} - -bool AMDILGlobalManager::hasRWG(const llvm::StringRef &name) const { - StringMap<kernel>::const_iterator iter = mKernels.find(name); - if (iter != mKernels.end()) { - kernelArg *ptr = iter->second.sgv; - if (ptr) { - return ptr->mHasRWG; - } - } - return false; -} - -bool AMDILGlobalManager::hasRWR(const llvm::StringRef &name) const { - StringMap<kernel>::const_iterator iter = mKernels.find(name); - if (iter != mKernels.end()) { - kernelArg *ptr = iter->second.sgv; - if (ptr) { - return ptr->mHasRWR; - } - } - return false; -} - -uint32_t -AMDILGlobalManager::getMaxGroupSize(const llvm::StringRef &name) const { - StringMap<kernel>::const_iterator iter = mKernels.find(name); - if (iter != mKernels.end()) { - kernelArg *sgv = iter->second.sgv; - if (sgv) { - return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2]; - } - } - return mSTM->getDefaultSize(0) * - mSTM->getDefaultSize(1) * - mSTM->getDefaultSize(2); -} - -uint32_t -AMDILGlobalManager::getMaxRegionSize(const llvm::StringRef &name) const { - StringMap<kernel>::const_iterator iter = mKernels.find(name); - if (iter != mKernels.end()) { - kernelArg *sgv = iter->second.sgv; - if (sgv) { - return sgv->reqRegionSize[0] * - sgv->reqRegionSize[1] * - sgv->reqRegionSize[2]; - } - } - return mSTM->getDefaultSize(0) * - mSTM->getDefaultSize(1) * - mSTM->getDefaultSize(2); -} - -uint32_t AMDILGlobalManager::getRegionSize(const llvm::StringRef &name) const { - StringMap<kernel>::const_iterator iter = mKernels.find(name); - if (iter != mKernels.end()) { - return iter->second.curRSize; - } else { - return 0; - } -} - -uint32_t AMDILGlobalManager::getLocalSize(const llvm::StringRef &name) const { - StringMap<kernel>::const_iterator iter = mKernels.find(name); - if (iter != mKernels.end()) { - return iter->second.curSize; - } else { - return 0; - } -} - -uint32_t AMDILGlobalManager::getConstSize(const llvm::StringRef &name) const { - StringMap<kernel>::const_iterator iter = mKernels.find(name); - if (iter != mKernels.end()) { - return iter->second.constSize; - } else { - return 0; - } -} - -uint32_t -AMDILGlobalManager::getHWRegionSize(const llvm::StringRef &name) const { - StringMap<kernel>::const_iterator iter = mKernels.find(name); - if (iter != mKernels.end()) { - return iter->second.curHWRSize; - } else { - return 0; - } -} - -uint32_t AMDILGlobalManager::getHWLocalSize(const llvm::StringRef &name) const { - StringMap<kernel>::const_iterator iter = mKernels.find(name); - if (iter != mKernels.end()) { - return iter->second.curHWSize; - } else { - return 0; - } -} - -int32_t AMDILGlobalManager::getArgID(const Argument *arg) { - DenseMap<const Argument *, int32_t>::iterator argiter = mArgIDMap.find(arg); - if (argiter != mArgIDMap.end()) { - return argiter->second; - } else { - return -1; - } -} - - -uint32_t -AMDILGlobalManager::getLocal(const llvm::StringRef &name, uint32_t dim) const { - StringMap<kernel>::const_iterator iter = mKernels.find(name); - if (iter != mKernels.end() && iter->second.sgv) { - kernelArg *sgv = iter->second.sgv; - switch (dim) { - default: break; - case 0: - case 1: - case 2: - return sgv->reqGroupSize[dim]; - break; - case 3: - return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2]; - }; - } - switch (dim) { - default: - return 1; - case 3: - return mSTM->getDefaultSize(0) * - mSTM->getDefaultSize(1) * - mSTM->getDefaultSize(2); - case 2: - case 1: - case 0: - return mSTM->getDefaultSize(dim); - break; - }; - return 1; -} - -uint32_t -AMDILGlobalManager::getRegion(const llvm::StringRef &name, uint32_t dim) const { - StringMap<kernel>::const_iterator iter = mKernels.find(name); - if (iter != mKernels.end() && iter->second.sgv) { - kernelArg *sgv = iter->second.sgv; - switch (dim) { - default: break; - case 0: - case 1: - case 2: - return sgv->reqRegionSize[dim]; - break; - case 3: - return sgv->reqRegionSize[0] * - sgv->reqRegionSize[1] * - sgv->reqRegionSize[2]; - }; - } - switch (dim) { - default: - return 1; - case 3: - return mSTM->getDefaultSize(0) * - mSTM->getDefaultSize(1) * - mSTM->getDefaultSize(2); - case 2: - case 1: - case 0: - return mSTM->getDefaultSize(dim); - break; - }; - return 1; -} - -StringMap<constPtr>::iterator AMDILGlobalManager::consts_begin() { - return mConstMems.begin(); -} - - -StringMap<constPtr>::iterator AMDILGlobalManager::consts_end() { - return mConstMems.end(); -} - -bool AMDILGlobalManager::byteStoreExists(StringRef S) const { - return mByteStore.find(S) != mByteStore.end(); -} - -bool AMDILGlobalManager::usesHWConstant(const kernel &krnl, - const llvm::StringRef &arg) { - const constPtr *curConst = getConstPtr(krnl, arg); - if (curConst) { - return curConst->usesHardware; - } else { - return false; - } -} - -uint32_t AMDILGlobalManager::getConstPtrSize(const kernel &krnl, - const llvm::StringRef &arg) -{ - const constPtr *curConst = getConstPtr(krnl, arg); - if (curConst) { - return curConst->size; - } else { - return 0; - } -} - -uint32_t AMDILGlobalManager::getConstPtrOff(const kernel &krnl, - const llvm::StringRef &arg) -{ - const constPtr *curConst = getConstPtr(krnl, arg); - if (curConst) { - return curConst->offset; - } else { - return 0; - } -} - -uint32_t AMDILGlobalManager::getConstPtrCB(const kernel &krnl, - const llvm::StringRef &arg) -{ - const constPtr *curConst = getConstPtr(krnl, arg); - if (curConst) { - return curConst->cbNum; - } else { - return 0; - } -} - -void AMDILGlobalManager::calculateCPOffsets(const MachineFunction *MF, - kernel &krnl) -{ - const MachineConstantPool *MCP = MF->getConstantPool(); - if (!MCP) { - return; - } - const std::vector<MachineConstantPoolEntry> consts = MCP->getConstants(); - size_t numConsts = consts.size(); - for (size_t x = 0; x < numConsts; ++x) { - krnl.CPOffsets.push_back( - std::make_pair<uint32_t, const Constant*>( - mCurrentCPOffset, consts[x].Val.ConstVal)); - size_t curSize = getTypeSize(consts[x].Val.ConstVal->getType(), true); - // Align the size to the vector boundary - curSize = (curSize + 15) & (~15); - mCurrentCPOffset += curSize; - } -} - -bool AMDILGlobalManager::isConstPtrArray(const kernel &krnl, - const llvm::StringRef &arg) { - const constPtr *curConst = getConstPtr(krnl, arg); - if (curConst) { - return curConst->isArray; - } else { - return false; - } -} - -bool AMDILGlobalManager::isConstPtrArgument(const kernel &krnl, - const llvm::StringRef &arg) -{ - const constPtr *curConst = getConstPtr(krnl, arg); - if (curConst) { - return curConst->isArgument; - } else { - return false; - } -} - -const Value *AMDILGlobalManager::getConstPtrValue(const kernel &krnl, - const llvm::StringRef &arg) { - const constPtr *curConst = getConstPtr(krnl, arg); - if (curConst) { - return curConst->base; - } else { - return NULL; - } -} - -static void -dumpZeroElements(const StructType * const T, llvm::raw_ostream &O, bool asBytes); -static void -dumpZeroElements(const IntegerType * const T, llvm::raw_ostream &O, bool asBytes); -static void -dumpZeroElements(const ArrayType * const T, llvm::raw_ostream &O, bool asBytes); -static void -dumpZeroElements(const VectorType * const T, llvm::raw_ostream &O, bool asBytes); -static void -dumpZeroElements(const Type * const T, llvm::raw_ostream &O, bool asBytes); - -void dumpZeroElements(const Type * const T, llvm::raw_ostream &O, bool asBytes) { - if (!T) { - return; - } - switch(T->getTypeID()) { - case Type::X86_FP80TyID: - case Type::FP128TyID: - case Type::PPC_FP128TyID: - case Type::LabelTyID: - assert(0 && "These types are not supported by this backend"); - default: - case Type::DoubleTyID: - if (asBytes) { - O << ":0:0:0:0:0:0:0:0"; - } else { - O << ":0"; - } - break; - case Type::FloatTyID: - case Type::PointerTyID: - case Type::FunctionTyID: - if (asBytes) { - O << ":0:0:0:0"; - } else { - O << ":0"; - } - break; - case Type::IntegerTyID: - dumpZeroElements(dyn_cast<IntegerType>(T), O, asBytes); - break; - case Type::StructTyID: - { - const StructType *ST = cast<StructType>(T); - if (!ST->isOpaque()) { - dumpZeroElements(dyn_cast<StructType>(T), O, asBytes); - } else { // A pre-LLVM 3.0 opaque type - if (asBytes) { - O << ":0:0:0:0"; - } else { - O << ":0"; - } - } - } - break; - case Type::ArrayTyID: - dumpZeroElements(dyn_cast<ArrayType>(T), O, asBytes); - break; - case Type::VectorTyID: - dumpZeroElements(dyn_cast<VectorType>(T), O, asBytes); - break; - }; -} - -void -dumpZeroElements(const StructType * const ST, llvm::raw_ostream &O, bool asBytes) { - if (!ST) { - return; - } - Type *curType; - StructType::element_iterator eib = ST->element_begin(); - StructType::element_iterator eie = ST->element_end(); - for (;eib != eie; ++eib) { - curType = *eib; - dumpZeroElements(curType, O, asBytes); - } -} - -void -dumpZeroElements(const IntegerType * const IT, llvm::raw_ostream &O, bool asBytes) { - if (asBytes) { - unsigned byteWidth = (IT->getBitWidth() >> 3); - for (unsigned x = 0; x < byteWidth; ++x) { - O << ":0"; - } - } -} - -void -dumpZeroElements(const ArrayType * const AT, llvm::raw_ostream &O, bool asBytes) { - size_t size = AT->getNumElements(); - for (size_t x = 0; x < size; ++x) { - dumpZeroElements(AT->getElementType(), O, asBytes); - } -} - -void -dumpZeroElements(const VectorType * const VT, llvm::raw_ostream &O, bool asBytes) { - size_t size = VT->getNumElements(); - for (size_t x = 0; x < size; ++x) { - dumpZeroElements(VT->getElementType(), O, asBytes); - } -} - -void AMDILGlobalManager::printConstantValue(const Constant *CAval, - llvm::raw_ostream &O, bool asBytes) { - if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CAval)) { - bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble; - if (isDouble) { - double val = CFP->getValueAPF().convertToDouble(); - union dtol_union { - double d; - uint64_t l; - char c[8]; - } conv; - conv.d = val; - if (!asBytes) { - O << ":"; - O.write_hex(conv.l); - } else { - for (int i = 0; i < 8; ++i) { - O << ":"; - O.write_hex((unsigned)conv.c[i] & 0xFF); - } - } - } else { - float val = CFP->getValueAPF().convertToFloat(); - union ftoi_union { - float f; - uint32_t u; - char c[4]; - } conv; - conv.f = val; - if (!asBytes) { - O << ":"; - O.write_hex(conv.u); - } else { - for (int i = 0; i < 4; ++i) { - O << ":"; - O.write_hex((unsigned)conv.c[i] & 0xFF); - } - } - } - } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CAval)) { - uint64_t zVal = CI->getValue().getZExtValue(); - if (!asBytes) { - O << ":"; - O.write_hex(zVal); - } else { - switch (CI->getBitWidth()) { - default: - { - union ltob_union { - uint64_t l; - char c[8]; - } conv; - conv.l = zVal; - for (int i = 0; i < 8; ++i) { - O << ":"; - O.write_hex((unsigned)conv.c[i] & 0xFF); - } - } - break; - case 8: - O << ":"; - O.write_hex(zVal & 0xFF); - break; - case 16: - { - union stob_union { - uint16_t s; - char c[2]; - } conv; - conv.s = (uint16_t)zVal; - O << ":"; - O.write_hex((unsigned)conv.c[0] & 0xFF); - O << ":"; - O.write_hex((unsigned)conv.c[1] & 0xFF); - } - break; - case 32: - { - union itob_union { - uint32_t i; - char c[4]; - } conv; - conv.i = (uint32_t)zVal; - for (int i = 0; i < 4; ++i) { - O << ":"; - O.write_hex((unsigned)conv.c[i] & 0xFF); - } - } - break; - } - } - } else if (const ConstantVector *CV = dyn_cast<ConstantVector>(CAval)) { - int y = CV->getNumOperands()-1; - int x = 0; - for (; x < y; ++x) { - printConstantValue(CV->getOperand(x), O, asBytes); - } - printConstantValue(CV->getOperand(x), O, asBytes); - } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CAval)) { - int y = CS->getNumOperands(); - int x = 0; - for (; x < y; ++x) { - printConstantValue(CS->getOperand(x), O, asBytes); - } - } else if (const ConstantAggregateZero *CAZ - = dyn_cast<ConstantAggregateZero>(CAval)) { - int y = CAZ->getNumOperands(); - if (y > 0) { - int x = 0; - for (; x < y; ++x) { - printConstantValue((llvm::Constant *)CAZ->getOperand(x), - O, asBytes); - } - } else { - if (asBytes) { - dumpZeroElements(CAval->getType(), O, asBytes); - } else { - int y = getNumElements(CAval->getType())-1; - for (int x = 0; x < y; ++x) { - O << ":0"; - } - O << ":0"; - } - } - } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CAval)) { - int y = CA->getNumOperands(); - int x = 0; - for (; x < y; ++x) { - printConstantValue(CA->getOperand(x), O, asBytes); - } - } else if (dyn_cast<ConstantPointerNull>(CAval)) { - O << ":0"; - //assert(0 && "Hit condition which was not expected"); - } else if (dyn_cast<ConstantExpr>(CAval)) { - O << ":0"; - //assert(0 && "Hit condition which was not expected"); - } else if (dyn_cast<UndefValue>(CAval)) { - O << ":0"; - //assert(0 && "Hit condition which was not expected"); - } else { - assert(0 && "Hit condition which was not expected"); - } -} - -static bool isStruct(Type * const T) -{ - if (!T) { - return false; - } - switch (T->getTypeID()) { - default: - return false; - case Type::PointerTyID: - return isStruct(T->getContainedType(0)); - case Type::StructTyID: - return true; - case Type::ArrayTyID: - case Type::VectorTyID: - return isStruct(dyn_cast<SequentialType>(T)->getElementType()); - }; - -} - -void AMDILGlobalManager::dumpDataToCB(llvm::raw_ostream &O, AMDILKernelManager *km, - uint32_t id) { - uint32_t size = 0; - for (StringMap<constPtr>::iterator cmb = consts_begin(), - cme = consts_end(); cmb != cme; ++cmb) { - if (id == cmb->second.cbNum) { - size += (cmb->second.size + 15) & (~15); - } - } - if (id == 0) { - O << ";#DATASTART:" << (size + mCurrentCPOffset) << "\n"; - if (mCurrentCPOffset) { - for (StringMap<kernel>::iterator kcpb = mKernels.begin(), - kcpe = mKernels.end(); kcpb != kcpe; ++kcpb) { - const kernel& k = kcpb->second; - size_t numConsts = k.CPOffsets.size(); - for (size_t x = 0; x < numConsts; ++x) { - size_t offset = k.CPOffsets[x].first; - const Constant *C = k.CPOffsets[x].second; - Type *Ty = C->getType(); - size_t size = (isStruct(Ty) ? getTypeSize(Ty, true) - : getNumElements(Ty)); - O << ";#" << km->getTypeName(Ty, symTab) << ":"; - O << offset << ":" << size ; - printConstantValue(C, O, isStruct(Ty)); - O << "\n"; - } - } - } - } else { - O << ";#DATASTART:" << id << ":" << size << "\n"; - } - - for (StringMap<constPtr>::iterator cmb = consts_begin(), cme = consts_end(); - cmb != cme; ++cmb) { - if (cmb->second.cbNum != id) { - continue; - } - const GlobalVariable *G = dyn_cast<GlobalVariable>(cmb->second.base); - Type *Ty = (G) ? G->getType() : NULL; - size_t offset = cmb->second.offset; - const Constant *C = G->getInitializer(); - size_t size = (isStruct(Ty) - ? getTypeSize(Ty, true) - : getNumElements(Ty)); - O << ";#" << km->getTypeName(Ty, symTab) << ":"; - if (!id) { - O << (offset + mCurrentCPOffset) << ":" << size; - } else { - O << offset << ":" << size; - } - if (C) { - printConstantValue(C, O, isStruct(Ty)); - } else { - assert(0 && "Cannot have a constant pointer" - " without an initializer!"); - } - O <<"\n"; - } - if (id == 0) { - O << ";#DATAEND\n"; - } else { - O << ";#DATAEND:" << id << "\n"; - } -} - -void -AMDILGlobalManager::dumpDataSection(llvm::raw_ostream &O, AMDILKernelManager *km) { - if (mConstMems.empty() && !mCurrentCPOffset) { - return; - } else { - llvm::DenseSet<uint32_t> const_set; - for (StringMap<constPtr>::iterator cmb = consts_begin(), cme = consts_end(); - cmb != cme; ++cmb) { - const_set.insert(cmb->second.cbNum); - } - if (mCurrentCPOffset) { - const_set.insert(0); - } - for (llvm::DenseSet<uint32_t>::iterator setb = const_set.begin(), - sete = const_set.end(); setb != sete; ++setb) { - dumpDataToCB(O, km, *setb); - } - } -} - -/// Create a function ID if it is not known or return the known -/// function ID. -uint32_t AMDILGlobalManager::getOrCreateFunctionID(const GlobalValue* func) { - if (func->getName().size()) { - return getOrCreateFunctionID(func->getName()); - } - uint32_t id; - if (mFuncPtrNames.find(func) == mFuncPtrNames.end()) { - id = mFuncPtrNames.size() + RESERVED_FUNCS + mFuncNames.size(); - mFuncPtrNames[func] = id; - } else { - id = mFuncPtrNames[func]; - } - return id; -} -uint32_t AMDILGlobalManager::getOrCreateFunctionID(const std::string &func) { - uint32_t id; - if (mFuncNames.find(func) == mFuncNames.end()) { - id = mFuncNames.size() + RESERVED_FUNCS + mFuncPtrNames.size(); - mFuncNames[func] = id; - } else { - id = mFuncNames[func]; - } - return id; -} diff --git a/src/gallium/drivers/radeon/AMDILGlobalManager.h b/src/gallium/drivers/radeon/AMDILGlobalManager.h deleted file mode 100644 index 1b0361e0174..00000000000 --- a/src/gallium/drivers/radeon/AMDILGlobalManager.h +++ /dev/null @@ -1,256 +0,0 @@ -//===-- AMDILGlobalManager.h - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -// ==-----------------------------------------------------------------------===// -// -// Class that handles parsing and storing global variables that are relevant to -// the compilation of the module. -// -// ==-----------------------------------------------------------------------===// - -#ifndef _AMDILGLOBALMANAGER_H_ -#define _AMDILGLOBALMANAGER_H_ - -#include "AMDIL.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/Module.h" -#include "llvm/Support/raw_ostream.h" - -#include <set> -#include <string> - -#define CB_BASE_OFFSET 2 - -namespace llvm { - -class PointerType; -class AMDILKernelManager; -class AMDILSubtarget; -class TypeSymbolTable; -class Argument; -class GlobalValue; -class MachineFunction; - -/// structure that holds information for a single local/region address array -typedef struct _arrayMemRec { - uint32_t vecSize; // size of each vector - uint32_t offset; // offset into the memory section - bool isHW; // flag to specify if HW is used or SW is used - bool isRegion; // flag to specify if GDS is used or not -} arraymem; - -/// Structure that holds information for all local/region address -/// arrays in the kernel -typedef struct _localArgRec { - llvm::SmallVector<arraymem *, DEFAULT_VEC_SLOTS> local; - std::string name; // Kernel Name -} localArg; - -/// structure that holds information about a constant address -/// space pointer that is a kernel argument -typedef struct _constPtrRec { - const Value *base; - uint32_t size; - uint32_t offset; - uint32_t cbNum; // value of 0 means that it does not use hw CB - bool isArray; - bool isArgument; - bool usesHardware; - std::string name; -} constPtr; - -/// Structure that holds information for each kernel argument -typedef struct _kernelArgRec { - uint32_t reqGroupSize[3]; - uint32_t reqRegionSize[3]; - llvm::SmallVector<uint32_t, DEFAULT_VEC_SLOTS> argInfo; - bool mHasRWG; - bool mHasRWR; -} kernelArg; - -/// Structure that holds information for each kernel -typedef struct _kernelRec { - mutable uint32_t curSize; - mutable uint32_t curRSize; - mutable uint32_t curHWSize; - mutable uint32_t curHWRSize; - uint32_t constSize; - kernelArg *sgv; - localArg *lvgv; - llvm::SmallVector<struct _constPtrRec, DEFAULT_VEC_SLOTS> constPtr; - uint32_t constSizes[HW_MAX_NUM_CB]; - llvm::SmallSet<uint32_t, OPENCL_MAX_READ_IMAGES> readOnly; - llvm::SmallSet<uint32_t, OPENCL_MAX_WRITE_IMAGES> writeOnly; - llvm::SmallVector<std::pair<uint32_t, const Constant *>, - DEFAULT_VEC_SLOTS> CPOffsets; -} kernel; - -class AMDILGlobalManager { -public: - AMDILGlobalManager(bool debugMode = false); - ~AMDILGlobalManager(); - - /// Process the given module and parse out the global variable metadata passed - /// down from the frontend-compiler - void processModule(const Module &MF, const AMDILTargetMachine* mTM); - - /// Returns whether the current name is the name of a kernel function or a - /// normal function - bool isKernel(const llvm::StringRef &name) const; - - /// Returns true if the image ID corresponds to a read only image. - bool isReadOnlyImage(const llvm::StringRef &name, uint32_t iID) const; - - /// Returns true if the image ID corresponds to a write only image. - bool isWriteOnlyImage(const llvm::StringRef &name, uint32_t iID) const; - - /// Returns the number of write only images for the kernel. - uint32_t getNumWriteImages(const llvm::StringRef &name) const; - - /// Gets the group size of the kernel for the given dimension. - uint32_t getLocal(const llvm::StringRef &name, uint32_t dim) const; - - /// Gets the region size of the kernel for the given dimension. - uint32_t getRegion(const llvm::StringRef &name, uint32_t dim) const; - - /// Get the Region memory size in 1d for the given function/kernel. - uint32_t getRegionSize(const llvm::StringRef &name) const; - - /// Get the region memory size in 1d for the given function/kernel. - uint32_t getLocalSize(const llvm::StringRef &name) const; - - // Get the max group size in one 1D for the given function/kernel. - uint32_t getMaxGroupSize(const llvm::StringRef &name) const; - - // Get the max region size in one 1D for the given function/kernel. - uint32_t getMaxRegionSize(const llvm::StringRef &name) const; - - /// Get the constant memory size in 1d for the given function/kernel. - uint32_t getConstSize(const llvm::StringRef &name) const; - - /// Get the HW local size in 1d for the given function/kernel We need to - /// seperate SW local and HW local for the case where some local memory is - /// emulated in global and some is using the hardware features. The main - /// problem is that in OpenCL 1.0/1.1 cl_khr_byte_addressable_store allows - /// these actions to happen on all memory spaces, but the hardware can only - /// write byte address stores to UAV and LDS, not GDS or Stack. - uint32_t getHWLocalSize(const llvm::StringRef &name) const; - uint32_t getHWRegionSize(const llvm::StringRef &name) const; - - /// Get the offset of the array for the kernel. - int32_t getArrayOffset(const llvm::StringRef &name) const; - - /// Get the offset of the const memory for the kernel. - int32_t getConstOffset(const llvm::StringRef &name) const; - - /// Get the boolean value if this particular constant uses HW or not. - bool getConstHWBit(const llvm::StringRef &name) const; - - /// Get a reference to the kernel metadata information for the given function - /// name. - const kernel &getKernel(const llvm::StringRef &name) const; - - /// Returns whether a reqd_workgroup_size attribute has been used or not. - bool hasRWG(const llvm::StringRef &name) const; - - /// Returns whether a reqd_workregion_size attribute has been used or not. - bool hasRWR(const llvm::StringRef &name) const; - - - /// Dump the data section to the output stream for the given kernel. - void dumpDataSection(llvm::raw_ostream &O, AMDILKernelManager *km); - - /// Iterate through the constants that are global to the compilation unit. - StringMap<constPtr>::iterator consts_begin(); - StringMap<constPtr>::iterator consts_end(); - - /// Query if the kernel has a byte store. - bool byteStoreExists(llvm::StringRef S) const; - - /// Query if the kernel and argument uses hardware constant memory. - bool usesHWConstant(const kernel &krnl, const llvm::StringRef &arg); - - /// Query if the constant pointer is an argument. - bool isConstPtrArgument(const kernel &krnl, const llvm::StringRef &arg); - - /// Query if the constant pointer is an array that is globally scoped. - bool isConstPtrArray(const kernel &krnl, const llvm::StringRef &arg); - - /// Query the size of the constant pointer. - uint32_t getConstPtrSize(const kernel &krnl, const llvm::StringRef &arg); - - /// Query the offset of the constant pointer. - uint32_t getConstPtrOff(const kernel &krnl, const llvm::StringRef &arg); - - /// Query the constant buffer number for a constant pointer. - uint32_t getConstPtrCB(const kernel &krnl, const llvm::StringRef &arg); - - /// Query the Value* that the constant pointer originates from. - const Value *getConstPtrValue(const kernel &krnl, const llvm::StringRef &arg); - - /// Get the ID of the argument. - int32_t getArgID(const Argument *arg); - - /// Get the unique function ID for the specific function name and create a new - /// unique ID if it is not found. - uint32_t getOrCreateFunctionID(const GlobalValue* func); - uint32_t getOrCreateFunctionID(const std::string& func); - - /// Calculate the offsets of the constant pool for the given kernel and - /// machine function. - void calculateCPOffsets(const MachineFunction *MF, kernel &krnl); - - /// Print the global manager to the output stream. - void print(llvm::raw_ostream& O); - - /// Dump the global manager to the output stream - debug use. - void dump(); - -private: - /// Various functions that parse global value information and store them in - /// the global manager. This approach is used instead of dynamic parsing as it - /// might require more space, but should allow caching of data that gets - /// requested multiple times. - kernelArg parseSGV(const GlobalValue *GV); - localArg parseLVGV(const GlobalValue *GV); - void parseGlobalAnnotate(const GlobalValue *G); - void parseImageAnnotate(const GlobalValue *G); - void parseConstantPtrAnnotate(const GlobalValue *G); - void printConstantValue(const Constant *CAval, - llvm::raw_ostream& O, - bool asByte); - void parseKernelInformation(const Value *V); - void parseAutoArray(const GlobalValue *G, bool isRegion); - void parseConstantPtr(const GlobalValue *G); - void allocateGlobalCB(); - void dumpDataToCB(llvm::raw_ostream &O, AMDILKernelManager *km, uint32_t id); - bool checkConstPtrsUseHW(Module::const_iterator *F); - - llvm::StringMap<arraymem> mArrayMems; - llvm::StringMap<localArg> mLocalArgs; - llvm::StringMap<kernelArg> mKernelArgs; - llvm::StringMap<kernel> mKernels; - llvm::StringMap<constPtr> mConstMems; - llvm::StringMap<uint32_t> mFuncNames; - llvm::DenseMap<const GlobalValue*, uint32_t> mFuncPtrNames; - llvm::DenseMap<uint32_t, llvm::StringRef> mImageNameMap; - std::set<llvm::StringRef> mByteStore; - std::set<llvm::StringRef> mIgnoreStr; - llvm::DenseMap<const Argument *, int32_t> mArgIDMap; - const char *symTab; - const AMDILSubtarget *mSTM; - size_t mOffset; - uint32_t mReservedBuffs; - uint32_t mCurrentCPOffset; - bool mDebugMode; -}; -} // namespace llvm -#endif // __AMDILGLOBALMANAGER_H_ diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp index 6f78d15ad0b..0f76babb807 100644 --- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp @@ -14,9 +14,7 @@ #include "AMDILISelLowering.h" #include "AMDILDevices.h" -#include "AMDILGlobalManager.h" #include "AMDILIntrinsicInfo.h" -#include "AMDILKernelManager.h" #include "AMDILMachineFunctionInfo.h" #include "AMDILSubtarget.h" #include "AMDILTargetMachine.h" @@ -31,6 +29,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -1905,11 +1904,6 @@ AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, isRet = false; IntNo = AMDILISD::APPEND_CONSUME_NORET; break; }; - const AMDILSubtarget *stm = &this->getTargetMachine() - .getSubtarget<AMDILSubtarget>(); - AMDILKernelManager *KM = const_cast<AMDILKernelManager*>( - stm->getKernelManager()); - KM->setOutputInst(); Info.opc = IntNo; Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32; @@ -2134,58 +2128,33 @@ AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const SDValue DST = Op; const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op); const GlobalValue *G = GADN->getGlobal(); - const AMDILSubtarget *stm = &this->getTargetMachine() - .getSubtarget<AMDILSubtarget>(); - const AMDILGlobalManager *GM = stm->getGlobalManager(); DebugLoc DL = Op.getDebugLoc(); - int64_t base_offset = GADN->getOffset(); - int32_t arrayoffset = GM->getArrayOffset(G->getName()); - int32_t constoffset = GM->getConstOffset(G->getName()); - if (arrayoffset != -1) { - DST = DAG.getConstant(arrayoffset, MVT::i32); - DST = DAG.getNode(ISD::ADD, DL, MVT::i32, - DST, DAG.getConstant(base_offset, MVT::i32)); - } else if (constoffset != -1) { - if (GM->getConstHWBit(G->getName())) { - DST = DAG.getConstant(constoffset, MVT::i32); - DST = DAG.getNode(ISD::ADD, DL, MVT::i32, - DST, DAG.getConstant(base_offset, MVT::i32)); - } else { - SDValue addr = DAG.getTargetGlobalAddress(G, DL, MVT::i32); - SDValue DPReg = DAG.getRegister(AMDIL::SDP, MVT::i32); - DPReg = DAG.getNode(ISD::ADD, DL, MVT::i32, DPReg, - DAG.getConstant(base_offset, MVT::i32)); - DST = DAG.getNode(AMDILISD::ADDADDR, DL, MVT::i32, addr, DPReg); - } + const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); + if (!GV) { + DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); } else { - const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); - if (!GV) { - DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); - } else { - if (GV->hasInitializer()) { - const Constant *C = dyn_cast<Constant>(GV->getInitializer()); - if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) { - DST = DAG.getConstant(CI->getValue(), Op.getValueType()); - - } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) { - DST = DAG.getConstantFP(CF->getValueAPF(), - Op.getValueType()); - } else if (dyn_cast<ConstantAggregateZero>(C)) { - EVT VT = Op.getValueType(); - if (VT.isInteger()) { - DST = DAG.getConstant(0, VT); - } else { - DST = DAG.getConstantFP(0, VT); - } + if (GV->hasInitializer()) { + const Constant *C = dyn_cast<Constant>(GV->getInitializer()); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) { + DST = DAG.getConstant(CI->getValue(), Op.getValueType()); + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) { + DST = DAG.getConstantFP(CF->getValueAPF(), + Op.getValueType()); + } else if (dyn_cast<ConstantAggregateZero>(C)) { + EVT VT = Op.getValueType(); + if (VT.isInteger()) { + DST = DAG.getConstant(0, VT); } else { - assert(!"lowering this type of Global Address " - "not implemented yet!"); - C->dump(); - DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); + DST = DAG.getConstantFP(0, VT); } } else { + assert(!"lowering this type of Global Address " + "not implemented yet!"); + C->dump(); DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); } + } else { + DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); } } return DST; diff --git a/src/gallium/drivers/radeon/AMDILKernelManager.cpp b/src/gallium/drivers/radeon/AMDILKernelManager.cpp deleted file mode 100644 index 4df81ff5078..00000000000 --- a/src/gallium/drivers/radeon/AMDILKernelManager.cpp +++ /dev/null @@ -1,1356 +0,0 @@ -//===-- AMDILKernelManager.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -#include "AMDILKernelManager.h" - -#include "AMDILAlgorithms.tpp" -#include "AMDILKernelManager.h" -#ifdef UPSTREAM_LLVM -#include "AMDILAsmPrinter.h" -#endif -#include "AMDILCompilerErrors.h" -#include "AMDILDeviceInfo.h" -#include "AMDILDevices.h" -#include "AMDILGlobalManager.h" -#include "AMDILMachineFunctionInfo.h" -#include "AMDILModuleInfo.h" -#include "AMDILSubtarget.h" -#include "AMDILTargetMachine.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/MathExtras.h" - -#include <stdio.h> - -using namespace llvm; -#define NUM_EXTRA_SLOTS_PER_IMAGE 1 - -static bool errorPrint(const char *ptr, llvm::raw_ostream &O) { - if (ptr[0] == 'E') { - O << ";error:" << ptr << "\n"; - } else { - O << ";warning:" << ptr << "\n"; - } - return false; -} - -#if 0 -static bool -samplerPrint(StringMap<SamplerInfo>::iterator &data, llvm::raw_ostream &O) { - O << ";sampler:" << (*data).second.name << ":" << (*data).second.idx - << ":" << ((*data).second.val == (uint32_t)-1 ? 0 : 1) - << ":" << ((*data).second.val != (uint32_t)-1 ? (*data).second.val : 0) - << "\n"; - return false; -} -#endif - -static bool arenaPrint(uint32_t val, llvm::raw_ostream &O) { - if (val >= ARENA_SEGMENT_RESERVED_UAVS) { - O << "dcl_arena_uav_id(" << val << ")\n"; - } - return false; -} - -static bool uavPrint(uint32_t val, llvm::raw_ostream &O) { - if (val < 8 || val == 11){ - O << "dcl_raw_uav_id(" << val << ")\n"; - } - return false; -} - -static bool uavPrintSI(uint32_t val, llvm::raw_ostream &O) { - O << "dcl_typeless_uav_id(" << val << ")_stride(4)_length(4)_access(read_write)\n"; - return false; -} - -static bool -printfPrint(std::pair<const std::string, PrintfInfo *> &data, llvm::raw_ostream &O) { - O << ";printf_fmt:" << data.second->getPrintfID(); - // Number of operands - O << ":" << data.second->getNumOperands(); - // Size of each operand - for (size_t i = 0, e = data.second->getNumOperands(); i < e; ++i) { - O << ":" << (data.second->getOperandID(i) >> 3); - } - const char *ptr = data.first.c_str(); - uint32_t size = data.first.size() - 1; - // The format string size - O << ":" << size << ":"; - for (size_t i = 0; i < size; ++i) { - if (ptr[i] == '\r') { - O << "\\r"; - } else if (ptr[i] == '\n') { - O << "\\n"; - } else { - O << ptr[i]; - } - } - O << ";\n"; // c_str() is cheap way to trim - return false; -} - - -void AMDILKernelManager::updatePtrArg(Function::const_arg_iterator Ip, - int numWriteImages, int raw_uav_buffer, - int counter, bool isKernel, - const Function *F) { - assert(F && "Cannot pass a NULL Pointer to F!"); - assert(Ip->getType()->isPointerTy() && - "Argument must be a pointer to be passed into this function!\n"); - std::string ptrArg(";pointer:"); - const char *symTab = "NoSymTab"; - uint32_t ptrID = getUAVID(Ip); - const PointerType *PT = cast<PointerType>(Ip->getType()); - uint32_t Align = 4; - const char *MemType = "uav"; - if (PT->getElementType()->isSized()) { - Align = NextPowerOf2((uint32_t)mTM->getTargetData()-> - getTypeAllocSize(PT->getElementType())); - } - ptrArg += Ip->getName().str() + ":" + getTypeName(PT, symTab) + ":1:1:" + - itostr(counter * 16) + ":"; - switch (PT->getAddressSpace()) { - case AMDILAS::ADDRESS_NONE: - //O << "No Address space qualifier!"; - mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]); - assert(1); - break; - case AMDILAS::GLOBAL_ADDRESS: - if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) { - if (ptrID >= ARENA_SEGMENT_RESERVED_UAVS) { - ptrID = 8; - } - } - mMFI->uav_insert(ptrID); - break; - case AMDILAS::CONSTANT_ADDRESS: { - if (isKernel && mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)){ - const kernel t = mGM->getKernel(F->getName()); - if (mGM->usesHWConstant(t, Ip->getName())) { - MemType = "hc\0"; - ptrID = mGM->getConstPtrCB(t, Ip->getName()); - } else { - MemType = "c\0"; - mMFI->uav_insert(ptrID); - } - } else { - MemType = "c\0"; - mMFI->uav_insert(ptrID); - } - break; - } - default: - case AMDILAS::PRIVATE_ADDRESS: - if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) { - MemType = (mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) - ? "up\0" : "hp\0"; - } else { - MemType = "p\0"; - mMFI->uav_insert(ptrID); - } - break; - case AMDILAS::REGION_ADDRESS: - mMFI->setUsesRegion(); - if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) { - MemType = "hr\0"; - ptrID = 0; - } else { - MemType = "r\0"; - mMFI->uav_insert(ptrID); - } - break; - case AMDILAS::LOCAL_ADDRESS: - mMFI->setUsesLocal(); - if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) { - MemType = "hl\0"; - ptrID = 1; - } else { - MemType = "l\0"; - mMFI->uav_insert(ptrID); - } - break; - }; - ptrArg += std::string(MemType) + ":"; - ptrArg += itostr(ptrID) + ":"; - ptrArg += itostr(Align); - mMFI->addMetadata(ptrArg, true); -} - -AMDILKernelManager::AMDILKernelManager(AMDILTargetMachine *TM, - AMDILGlobalManager *GM) -{ - mTM = TM; - mSTM = mTM->getSubtargetImpl(); - mGM = GM; - clear(); -} - -AMDILKernelManager::~AMDILKernelManager() { - clear(); -} - -void -AMDILKernelManager::setMF(MachineFunction *MF) -{ - mMF = MF; - mMFI = MF->getInfo<AMDILMachineFunctionInfo>(); -} - -void AMDILKernelManager::clear() { - mUniqueID = 0; - mIsKernel = false; - mWasKernel = false; - mHasImageWrite = false; - mHasOutputInst = false; -} - -bool AMDILKernelManager::useCompilerWrite(const MachineInstr *MI) { - return (MI->getOpcode() == AMDIL::RETURN && wasKernel() && !mHasImageWrite - && !mHasOutputInst); -} - -void AMDILKernelManager::processArgMetadata(llvm::raw_ostream &O, - uint32_t buf, - bool isKernel) -{ - const Function *F = mMF->getFunction(); - const char * symTab = "NoSymTab"; - Function::const_arg_iterator Ip = F->arg_begin(); - Function::const_arg_iterator Ep = F->arg_end(); - - if (F->hasStructRetAttr()) { - assert(Ip != Ep && "Invalid struct return fucntion!"); - mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]); - ++Ip; - } - uint32_t mCBSize = 0; - int raw_uav_buffer = mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); - bool MultiUAV = mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV); - bool ArenaSegment = - mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment); - int numWriteImages = - mSTM->getGlobalManager()->getNumWriteImages(F->getName()); - if (numWriteImages == OPENCL_MAX_WRITE_IMAGES || MultiUAV || ArenaSegment) { - if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { - raw_uav_buffer = mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID); - } - } - uint32_t CounterNum = 0; - uint32_t ROArg = 0; - uint32_t WOArg = 0; - uint32_t NumArg = 0; - while (Ip != Ep) { - Type *cType = Ip->getType(); - if (cType->isIntOrIntVectorTy() || cType->isFPOrFPVectorTy()) { - std::string argMeta(";value:"); - argMeta += Ip->getName().str() + ":" + getTypeName(cType, symTab) + ":"; - int bitsize = cType->getPrimitiveSizeInBits(); - int numEle = 1; - if (cType->getTypeID() == Type::VectorTyID) { - numEle = cast<VectorType>(cType)->getNumElements(); - } - argMeta += itostr(numEle) + ":1:" + itostr(mCBSize << 4); - mMFI->addMetadata(argMeta, true); - - // FIXME: simplify - if ((bitsize / numEle) < 32) { - bitsize = numEle >> 2; - } else { - bitsize >>= 7; - } - if (!bitsize) { - bitsize = 1; - } - - mCBSize += bitsize; - ++NumArg; - } else if (const PointerType *PT = dyn_cast<PointerType>(cType)) { - Type *CT = PT->getElementType(); - const StructType *ST = dyn_cast<StructType>(CT); - if (ST && ST->isOpaque()) { - StringRef name = ST->getName(); - bool i1d = name.equals( "struct._image1d_t" ); - bool i1da = name.equals( "struct._image1d_array_t" ); - bool i1db = name.equals( "struct._image1d_buffer_t" ); - bool i2d = name.equals( "struct._image2d_t" ); - bool i2da = name.equals( "struct._image2d_array_t" ); - bool i3d = name.equals( "struct._image3d_t" ); - bool c32 = name.equals( "struct._counter32_t" ); - bool c64 = name.equals( "struct._counter64_t" ); - if (i1d || i1da || i1db || i2d | i2da || i3d) { - if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) { - std::string imageArg(";image:"); - imageArg += Ip->getName().str() + ":"; - if (i1d) imageArg += "1D:"; - else if (i1da) imageArg += "1DA:"; - else if (i1db) imageArg += "1DB:"; - else if (i2d) imageArg += "2D:"; - else if (i2da) imageArg += "2DA:"; - else if (i3d) imageArg += "3D:"; - - if (isKernel) { - if (mGM->isReadOnlyImage (mMF->getFunction()->getName(), - (ROArg + WOArg))) { - imageArg += "RO:" + itostr(ROArg); - O << "dcl_resource_id(" << ROArg << ")_type("; - if (i1d) O << "1d"; - else if (i1da) O << "1darray"; - else if (i1db) O << "buffer"; - else if (i2d) O << "2d"; - else if (i2da) O << "2darray"; - else if (i3d) O << "3d"; - O << ")_fmtx(unknown)_fmty(unknown)" - << "_fmtz(unknown)_fmtw(unknown)\n"; - ++ROArg; - } else if (mGM->isWriteOnlyImage(mMF->getFunction()->getName(), - (ROArg + WOArg))) { - uint32_t offset = 0; - offset += WOArg; - imageArg += "WO:" + itostr(offset & 0x7); - O << "dcl_uav_id(" << ((offset) & 0x7) << ")_type("; - if (i1d) O << "1d"; - else if (i1da) O << "1darray"; - else if (i1db) O << "buffer"; - else if (i2d) O << "2d"; - else if (i2da) O << "2darray"; - else if (i3d) O << "3d"; - O << ")_fmtx(uint)\n"; - ++WOArg; - } else { - imageArg += "RW:" + itostr(ROArg + WOArg); - } - } - imageArg += ":1:" + itostr(mCBSize * 16); - mMFI->addMetadata(imageArg, true); - mMFI->addi32Literal(mCBSize); - mCBSize += NUM_EXTRA_SLOTS_PER_IMAGE + 1; - ++NumArg; - } else { - mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]); - ++NumArg; - } - } else if (c32 || c64) { - std::string counterArg(";counter:"); - counterArg += Ip->getName().str() + ":" - + itostr(c32 ? 32 : 64) + ":" - + itostr(CounterNum++) + ":1:" + itostr(mCBSize * 16); - mMFI->addMetadata(counterArg, true); - ++NumArg; - ++mCBSize; - } else { - updatePtrArg(Ip, numWriteImages, raw_uav_buffer, mCBSize, isKernel, - F); - ++NumArg; - ++mCBSize; - } - } - else if (CT->getTypeID() == Type::StructTyID - && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { - const TargetData *td = mTM->getTargetData(); - const StructLayout *sl = td->getStructLayout(dyn_cast<StructType>(CT)); - int bytesize = sl->getSizeInBytes(); - int reservedsize = (bytesize + 15) & ~15; - int numSlots = reservedsize >> 4; - if (!numSlots) { - numSlots = 1; - } - std::string structArg(";value:"); - structArg += Ip->getName().str() + ":struct:" - + itostr(bytesize) + ":1:" + itostr(mCBSize * 16); - mMFI->addMetadata(structArg, true); - mCBSize += numSlots; - ++NumArg; - } else if (CT->isIntOrIntVectorTy() - || CT->isFPOrFPVectorTy() - || CT->getTypeID() == Type::ArrayTyID - || CT->getTypeID() == Type::PointerTyID - || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) { - updatePtrArg(Ip, numWriteImages, raw_uav_buffer, mCBSize, isKernel, F); - ++NumArg; - ++mCBSize; - } else { - assert(0 && "Cannot process current pointer argument"); - mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]); - ++NumArg; - } - } else { - assert(0 && "Cannot process current kernel argument"); - mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]); - ++NumArg; - } - ++Ip; - } -} - -void AMDILKernelManager::printHeader(AMDILAsmPrinter *AsmPrinter, - llvm::raw_ostream &O, - const std::string &name) { -#ifdef UPSTREAM_LLVM - mName = name; - std::string kernelName; - kernelName = name; - int kernelId = mGM->getOrCreateFunctionID(kernelName); - O << "func " << kernelId << " ; " << kernelName << "\n"; - if (mSTM->is64bit()) { - O << "mov " << AsmPrinter->getRegisterName(AMDIL::SDP) << ", cb0[8].xy\n"; - } else { - O << "mov " << AsmPrinter->getRegisterName(AMDIL::SDP) << ", cb0[8].x\n"; - } - O << "mov " << AsmPrinter->getRegisterName(AMDIL::SP) << ", l1.0\n"; -#endif -} - -void AMDILKernelManager::printGroupSize(llvm::raw_ostream& O) { - // The HD4XXX generation of hardware does not support a 3D launch, so we need - // to use dcl_num_thread_per_group to specify the launch size. If the launch - // size is specified via a kernel attribute, we print it here. Otherwise we - // use the the default size. - if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { - if (mGM->hasRWG(mName) - || !mMFI->usesLocal()) { - // if the user has specified what the required workgroup size is then we - // need to compile for that size and that size only. Otherwise we compile - // for the max workgroup size that is passed in as an option to the - // backend. - O << "dcl_num_thread_per_group "; - O << mGM->getLocal(mName, 0) << ", "; - O << mGM->getLocal(mName, 1) << ", "; - O << mGM->getLocal(mName, 2) << " \n"; - } else { - // If the kernel uses local memory, then the kernel is being - // compiled in single wavefront mode. So we have to generate code slightly - // different. - O << "dcl_num_thread_per_group " - << mSTM->device()->getWavefrontSize() - << ", 1, 1 \n"; - } - } else { - // Otherwise we generate for devices that support 3D launch natively. If - // the reqd_workgroup_size attribute was specified, then we can specify the - // exact launch dimensions. - if (mGM->hasRWG(mName)) { - O << "dcl_num_thread_per_group "; - O << mGM->getLocal(mName, 0) << ", "; - O << mGM->getLocal(mName, 1) << ", "; - O << mGM->getLocal(mName, 2) << " \n"; - } else { - // Otherwise we specify the largest workgroup size that can be launched. - O << "dcl_max_thread_per_group " << mGM->getLocal(mName, 3) << " \n"; - } - } - // Now that we have specified the workgroup size, lets declare the local - // memory size. If we are using hardware and we know the value at compile - // time, then we need to declare the correct value. Otherwise we should just - // declare the maximum size. - if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) { - size_t kernelLocalSize = (mGM->getHWLocalSize(mName) + 3) & ~3; - if (kernelLocalSize > mSTM->device()->getMaxLDSSize()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_LOCAL_RESOURCES]); - } - // If there is a local pointer as a kernel argument, we don't know the size - // at compile time, so we reserve all of the space. - if (mMFI->usesLocal() && (mMFI->hasLocalArg() || !kernelLocalSize)) { - O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") " - << mSTM->device()->getMaxLDSSize() << "\n"; - mMFI->setUsesMem(AMDILDevice::LDS_ID); - } else if (kernelLocalSize) { - // We know the size, so lets declare it correctly. - O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") " - << kernelLocalSize << "\n"; - mMFI->setUsesMem(AMDILDevice::LDS_ID); - } - } - // If the device supports the region memory extension, which maps to our - // hardware GDS memory, then lets declare it so we can use it later on. - if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) { - size_t kernelGDSSize = (mGM->getHWRegionSize(mName) + 3) & ~3; - if (kernelGDSSize > mSTM->device()->getMaxGDSSize()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_REGION_RESOURCES]); - } - // If there is a region pointer as a kernel argument, we don't know the size - // at compile time, so we reserved all of the space. - if (mMFI->usesRegion() && (mMFI->hasRegionArg() || !kernelGDSSize)) { - O << "dcl_gds_id(" << DEFAULT_GDS_ID << - ") " << mSTM->device()->getMaxGDSSize() << "\n"; - mMFI->setUsesMem(AMDILDevice::GDS_ID); - } else if (kernelGDSSize) { - // We know the size, so lets declare it. - O << "dcl_gds_id(" << DEFAULT_GDS_ID << - ") " << kernelGDSSize << "\n"; - mMFI->setUsesMem(AMDILDevice::GDS_ID); - } - } -} - -void -AMDILKernelManager::printDecls(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O) { - // If we are a HD4XXX generation device, then we only support a single uav - // surface, so we declare it and leave - if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { - O << "dcl_raw_uav_id(" - << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) - << ")\n"; - mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID); - getIntrinsicSetup(AsmPrinter, O); - return; - } - // If we are supporting multiple uav's view the MultiUAV capability, then we - // need to print out the declarations here. MultiUAV conflicts with write - // images, so they only use 8 - NumWriteImages uav's. Therefor only pointers - // with ID's < 8 will get printed. - if (mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) { - binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), uavPrint, O); - mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID); - } - // If arena segments are supported, then we should emit them now. Arena - // segments are similiar to MultiUAV, except ArenaSegments are virtual and up - // to 1024 of them can coexist. These are more compiler hints for CAL and thus - // cannot overlap in any form. Each ID maps to a seperate piece of memory and - // CAL determines whether the load/stores should go to the fast path/slow path - // based on the usage and instruction. - if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) { - binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), arenaPrint, O); - } - // Now that we have printed out all of the arena and multi uav declaration, - // now we must print out the default raw uav id. This always exists on HD5XXX - // and HD6XXX hardware. The reason is that the hardware supports 12 UAV's and - // 11 are taken up by MultiUAV/Write Images and Arena. However, if we do not - // have UAV 11 as the raw UAV and there are 8 write images, we must revert - // everything to the arena and not print out the default raw uav id. - if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD5XXX - || mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX) { - if ((mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) < 11 && - mSTM->getGlobalManager()->getNumWriteImages(mName) - != OPENCL_MAX_WRITE_IMAGES - && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) - || mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) { - if (!mMFI->usesMem(AMDILDevice::RAW_UAV_ID) - && mMFI->uav_count(mSTM->device()-> - getResourceID(AMDILDevice::RAW_UAV_ID))) { - O << "dcl_raw_uav_id(" - << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); - O << ")\n"; - mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID); - } - } - // If we have not printed out the arena ID yet, then do so here. - if (!mMFI->usesMem(AMDILDevice::ARENA_UAV_ID) - && mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaUAV)) { - O << "dcl_arena_uav_id(" - << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID) << ")\n"; - mMFI->setUsesMem(AMDILDevice::ARENA_UAV_ID); - } - } else if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), uavPrintSI, O); - mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID); - } - getIntrinsicSetup(AsmPrinter, O); -} - -void AMDILKernelManager::getIntrinsicSetup(AMDILAsmPrinter *AsmPrinter, - llvm::raw_ostream &O) -{ - O << "mov r0.z, vThreadGrpIdFlat.x\n" - << "mov r1022.xyz0, vTidInGrp.xyz\n"; - if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD4XXX) { - O << "mov r1023.xyz0, vThreadGrpId.xyz\n"; - } else { - O << "imul r0.w, cb0[2].x, cb0[2].y\n" - // Calculates the local id. - // Calculates the group id. - << "umod r1023.x, r0.z, cb0[2].x\n" - << "udiv r1023.y, r0.z, cb0[2].x\n" - << "umod r1023.y, r1023.y, cb0[2].y\n" - << "udiv r1023.z, r0.z, r0.w\n"; - } - // Calculates the global id. - if (mGM->hasRWG(mName) && 0) { - // Anytime we declare a literal, we need to reserve it, if it is not emitted - // in emitLiterals. - mMFI->addReservedLiterals(1); - O << "dcl_literal l" << mMFI->getNumLiterals() + 1 << ", "; - O << mGM->getLocal(mName, 0) << ", "; - O << mGM->getLocal(mName, 1) << ", "; - O << mGM->getLocal(mName, 2) << ", "; - O << "0\n"; - O << "imad r1021.xyz0, r1023.xyz, l" << mMFI->getNumLiterals() + 1 << ".xyz, r1022.xyz\n"; - mMFI->addReservedLiterals(1); - } else { - O << "imad r1021.xyz0, r1023.xyz, cb0[1].xyz, r1022.xyz\n"; - } - - // Add the global/group offset for multi-launch support. - O << "iadd r1021.xyz0, r1021.xyz0, cb0[6].xyz0\n" - << "iadd r1023.xyz0, r1023.xyz0, cb0[7].xyz0\n" - // moves the flat group id. - << "mov r1023.w, r0.z\n"; -#ifdef UPSTREAM_LLVM - if (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) { - if (mSTM->is64bit()) { - O << "umul " << AsmPrinter->getRegisterName(AMDIL::T2) - << ".x0, r1023.w, cb0[4].z\n" - << "i64add " << AsmPrinter->getRegisterName(AMDIL::T2) - << ".xy, " << AsmPrinter->getRegisterName(AMDIL::T2) - << ".xy, cb0[4].xy\n"; - - } else { - O << "imad " << AsmPrinter->getRegisterName(AMDIL::T2) - << ".x, r1023.w, cb0[4].y, cb0[4].x\n"; - } - } - // Shift the flat group id to be in bytes instead of dwords. - O << "ishl r1023.w, r1023.w, l0.z\n"; - if (mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)) { - if (mSTM->is64bit()) { - O << "umul " << AsmPrinter->getRegisterName(AMDIL::T1) - << ".x0, vAbsTidFlat.x, cb0[3].z\n" - << "i64add " << AsmPrinter->getRegisterName(AMDIL::T1) - << ".xy, " << AsmPrinter->getRegisterName(AMDIL::T1) - << ".xy, cb0[3].xy\n"; - - } else { - O << "imad " << AsmPrinter->getRegisterName(AMDIL::T1) - << ".x, vAbsTidFlat.x, cb0[3].y, cb0[3].x\n"; - } - } else { - O << "mov " << AsmPrinter->getRegisterName(AMDIL::T1) << ".x, l0.0\n"; - } -#endif - if (mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) { - O << "udiv r1024.xyz, r1021.xyz, cb0[10].xyz\n"; - if (mGM->hasRWR(mName) && 0) { - // Anytime we declare a literal, we need to reserve it, if it is not emitted - // in emitLiterals. - mMFI->addReservedLiterals(1); - O << "dcl_literal l" << mMFI->getNumLiterals() + 1 << ", "; - O << mGM->getLocal(mName, 0) << ", "; - O << mGM->getLocal(mName, 1) << ", "; - O << mGM->getLocal(mName, 2) << ", "; - O << "0\n"; - O << "imad r1025.xyz0, r1023.xyz, l" << mMFI->getNumLiterals() + 1 << ".xyz, r1022.xyz\n"; - mMFI->addReservedLiterals(1); - } else { - O << "imad r1025.xyz0, r1023.xyz, cb0[1].xyz, r1022.xyz\n"; - } - } -} - -void AMDILKernelManager::printFooter(llvm::raw_ostream &O) { - O << "ret\n"; - O << "endfunc ; " << mName << "\n"; -} - -void -AMDILKernelManager::printMetaData(llvm::raw_ostream &O, uint32_t id, bool kernel) { - if (kernel) { - int kernelId = mGM->getOrCreateFunctionID(mName); - mMFI->addCalledFunc(id); - mUniqueID = kernelId; - mIsKernel = true; - } - printKernelArgs(O); - if (kernel) { - mIsKernel = false; - mMFI->eraseCalledFunc(id); - mUniqueID = id; - } -} - -void AMDILKernelManager::setKernel(bool kernel) { - mIsKernel = kernel; - if (kernel) { - mWasKernel = mIsKernel; - } -} - -void AMDILKernelManager::setID(uint32_t id) -{ - mUniqueID = id; -} - -void AMDILKernelManager::setName(const std::string &name) { - mName = name; -} - -bool AMDILKernelManager::isKernel() { - return mIsKernel; -} - -bool AMDILKernelManager::wasKernel() { - return mWasKernel; -} - -void AMDILKernelManager::setImageWrite() { - mHasImageWrite = true; -} - -void AMDILKernelManager::setOutputInst() { - mHasOutputInst = true; -} - -void AMDILKernelManager::printConstantToRegMapping( - AMDILAsmPrinter *RegNames, - uint32_t &LII, - llvm::raw_ostream &O, - uint32_t &Counter, - uint32_t Buffer, - uint32_t n, - const char *lit, - uint32_t fcall, - bool isImage, - bool isHWCB) -{ -#ifdef UPSTREAM_LLVM - // TODO: This needs to be enabled or SC will never statically index into the - // CB when a pointer is used. - if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem) && isHWCB) { - const char *name = RegNames->getRegisterName(LII); - O << "mov " << name << ", l5.x\n"; - ++LII; - Counter++; - return; - } - for (uint32_t x = 0; x < n; ++x) { - const char *name = RegNames->getRegisterName(LII); - if (isImage) { - O << "mov " << name << ", l" << mMFI->getIntLits(Counter++) << "\n"; - } else { - O << "mov " << name << ", cb" <<Buffer<< "[" <<Counter++<< "]\n"; - } - switch(fcall) { - case 1093: - O << "ishr " << name << ", " << name << ".xxyy, l3.0y0y\n" - "ishl " << name << ", " << name << ", l3.y\n" - "ishr " << name << ", " << name << ", l3.y\n"; - break; - case 1092: - O << "ishr " << name << ", " << name << ".xx, l3.0y\n" - "ishl " << name << ", " << name << ", l3.y\n" - "ishr " << name << ", " << name << ", l3.y\n"; - break; - case 1091: - O << "ishr " << name << ", " << name << ".xxxx, l3.0zyx\n" - "ishl " << name << ", " << name << ", l3.x\n" - "ishr " << name << ", " << name << ", l3.x\n"; - break; - case 1090: - O << "ishr " << name << ", " << name << ".xx, l3.0z\n" - "ishl " << name << ".xy__, " << name << ".xy, l3.x\n" - "ishr " << name << ".xy__, " << name << ".xy, l3.x\n"; - break; - default: - break; - }; - if (lit) { - O << "ishl " << name << ", " << name - << ", " << lit << "\n"; - O << "ishr " << name << ", " << name - << ", " << lit << "\n"; - } - if (isImage) { - Counter += NUM_EXTRA_SLOTS_PER_IMAGE; - } - ++LII; - } -#endif -} - -void -AMDILKernelManager::printCopyStructPrivate(const StructType *ST, - llvm::raw_ostream &O, - size_t stackSize, - uint32_t Buffer, - uint32_t mLitIdx, - uint32_t &Counter) -{ - size_t n = ((stackSize + 15) & ~15) >> 4; - for (size_t x = 0; x < n; ++x) { - O << "mov r2, cb" << Buffer << "[" << Counter++ << "]\n"; - O << "mov r1.x, r0.x\n"; - if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { - if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) { - O << "ishr r1.x, r1.x, l0.x\n"; - O << "mov x" << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID) - <<"[r1.x], r2\n"; - } else { - O << "uav_raw_store_id(" << - mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID) - << ") mem0, r1.x, r2\n"; - } - } else { - O << "uav_raw_store_id(" << - mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID) - << ") mem0, r1.x, r2\n"; - } - O << "iadd r0.x, r0.x, l" << mLitIdx << ".z\n"; - } -} - -void AMDILKernelManager::printKernelArgs(llvm::raw_ostream &O) { - std::string version(";version:"); - version += itostr(AMDIL_MAJOR_VERSION) + ":" - + itostr(AMDIL_MINOR_VERSION) + ":" + itostr(AMDIL_REVISION_NUMBER); - O << ";ARGSTART:" <<mName<< "\n"; - if (mIsKernel) { - O << version << "\n"; - O << ";device:" <<mSTM->getDeviceName() << "\n"; - } - O << ";uniqueid:" <<mUniqueID<< "\n"; - - size_t local = mGM->getLocalSize(mName); - size_t hwlocal = ((mGM->getHWLocalSize(mName) + 3) & (~0x3)); - size_t region = mGM->getRegionSize(mName); - size_t hwregion = ((mGM->getHWRegionSize(mName) + 3) & (~0x3)); - bool usehwlocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem); - bool usehwprivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem); - bool usehwregion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem); - bool useuavprivate = mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV); - if (mIsKernel) { - O << ";memory:" << ((usehwprivate) ? - (useuavprivate) ? "uav" : "hw" : "" ) << "private:" - <<(((mMFI->getStackSize() + 15) & (~0xF)))<< "\n"; - } - if (mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) { - O << ";memory:" << ((usehwregion) ? "hw" : "") << "region:" - << ((usehwregion) ? hwregion : hwregion + region) << "\n"; - } - O << ";memory:" << ((usehwlocal) ? "hw" : "") << "local:" - << ((usehwlocal) ? hwlocal : hwlocal + local) << "\n"; - - if (mIsKernel) { - if (mGM->hasRWG(mName)) { - O << ";cws:" << mGM->getLocal(mName, 0) << ":"; - O << mGM->getLocal(mName, 1) << ":"; - O << mGM->getLocal(mName, 2) << "\n"; - } - if (mGM->hasRWR(mName)) { - O << ";crs:" << mGM->getRegion(mName, 0) << ":"; - O << mGM->getRegion(mName, 1) << ":"; - O << mGM->getRegion(mName, 2) << "\n"; - } - } - if (mIsKernel) { - for (std::vector<std::string>::iterator ib = mMFI->kernel_md_begin(), - ie = mMFI->kernel_md_end(); ib != ie; ++ib) { - O << (*ib) << "\n"; - } - } - for (std::set<std::string>::iterator ib = mMFI->func_md_begin(), - ie = mMFI->func_md_end(); ib != ie; ++ib) { - O << (*ib) << "\n"; - } - if (!mMFI->func_empty()) { - O << ";function:" << mMFI->func_size(); - binaryForEach(mMFI->func_begin(), mMFI->func_end(), commaPrint, O); - O << "\n"; - } - - if (!mSTM->device()->isSupported(AMDILDeviceInfo::MacroDB) - && !mMFI->intr_empty()) { - O << ";intrinsic:" << mMFI->intr_size(); - binaryForEach(mMFI->intr_begin(), mMFI->intr_end(), commaPrint, O); - O << "\n"; - } - - if (!mIsKernel) { - binaryForEach(mMFI->printf_begin(), mMFI->printf_end(), printfPrint, O); - mMF->getMMI().getObjFileInfo<AMDILModuleInfo>().add_printf_offset( - mMFI->printf_size()); - } else { - for (StringMap<SamplerInfo>::iterator - smb = mMFI->sampler_begin(), - sme = mMFI->sampler_end(); smb != sme; ++ smb) { - O << ";sampler:" << (*smb).second.name << ":" << (*smb).second.idx - << ":" << ((*smb).second.val == (uint32_t)-1 ? 0 : 1) - << ":" << ((*smb).second.val != (uint32_t)-1 ? (*smb).second.val : 0) - << "\n"; - } - } - if (mSTM->is64bit()) { - O << ";memory:64bitABI\n"; - } - - if (mMFI->errors_empty()) { - binaryForEach(mMFI->errors_begin(), mMFI->errors_end(), errorPrint, O); - } - // This has to come last - if (mIsKernel - && mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { - if (mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) > - mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { - if (mMFI->uav_size() == 1) { - if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment) - && *(mMFI->uav_begin()) >= ARENA_SEGMENT_RESERVED_UAVS) { - O << ";uavid:" - << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID); - O << "\n"; - } else { - O << ";uavid:" << *(mMFI->uav_begin()) << "\n"; - } - } else if (mMFI->uav_count(mSTM->device()-> - getResourceID(AMDILDevice::RAW_UAV_ID))) { - O << ";uavid:" - << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); - O << "\n"; - } else { - O << ";uavid:" - << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID); - O << "\n"; - } - } else if (mSTM->getGlobalManager()->getNumWriteImages(mName) != - OPENCL_MAX_WRITE_IMAGES - && !mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment) - && mMFI->uav_count(mSTM->device()-> - getResourceID(AMDILDevice::RAW_UAV_ID))) { - O << ";uavid:" - << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) << "\n"; - } else if (mMFI->uav_size() == 1) { - O << ";uavid:" << *(mMFI->uav_begin()) << "\n"; - } else { - O << ";uavid:" - << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID); - O << "\n"; - } - } - O << ";ARGEND:" << mName << "\n"; -} - -void AMDILKernelManager::printArgCopies(llvm::raw_ostream &O, - AMDILAsmPrinter *RegNames) -{ - Function::const_arg_iterator I = mMF->getFunction()->arg_begin(); - Function::const_arg_iterator Ie = mMF->getFunction()->arg_end(); - uint32_t Counter = 0; - - if (mMFI->getArgSize()) { - O << "dcl_cb cb1"; - O << "[" << (mMFI->getArgSize() >> 4) << "]\n"; - mMFI->setUsesMem(AMDILDevice::CONSTANT_ID); - } - const Function *F = mMF->getFunction(); - // Get the stack size - uint32_t stackSize = mMFI->getStackSize(); - uint32_t privateSize = mMFI->getScratchSize(); - uint32_t stackOffset = (privateSize + 15) & (~0xF); - if (stackSize - && mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) { - // TODO: If the size is too large, we need to fall back to software emulated - // instead of using the hardware capability. - int size = (((stackSize + 15) & (~0xF)) >> 4); - if (size > 4096) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_PRIVATE_RESOURCES]); - } - if (size) { - // For any stack variables, we need to declare the literals for them so that - // we can use them when we copy our data to the stack. - mMFI->addReservedLiterals(1); - // Anytime we declare a literal, we need to reserve it, if it is not emitted - // in emitLiterals. -#ifdef UPSTREAM_LLVM - O << "dcl_literal l" << mMFI->getNumLiterals() << ", " << stackSize << ", " - << privateSize << ", 16, " << ((stackSize == privateSize) ? 0 : stackOffset) << "\n" - << "iadd r0.x, " << RegNames->getRegisterName(AMDIL::T1) << ".x, l" - << mMFI->getNumLiterals() << ".w\n"; - if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { - O << "dcl_indexed_temp_array x" - << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << "[" - << size << "]\n"; - } else { - O << "dcl_typeless_uav_id(" - << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID) - << ")_stride(4)_length(" << (size << 4 )<< ")_access(private)\n"; - - } - O << "mov " << RegNames->getRegisterName(AMDIL::FP) - << ".x, l" << mMFI->getNumLiterals() << ".0\n"; -#endif - mMFI->setUsesMem(AMDILDevice::SCRATCH_ID); - } - } - I = mMF->getFunction()->arg_begin(); - int32_t count = 0; - // uint32_t Image = 0; - bool displaced1 = false; - bool displaced2 = false; - uint32_t curReg = AMDIL::R1; - // TODO: We don't handle arguments that were pushed onto the stack! - for (; I != Ie; ++I) { - Type *curType = I->getType(); - unsigned int Buffer = 1; - O << "; Kernel arg setup: " << I->getName() << "\n"; - if (curType->isIntegerTy() || curType->isFloatingPointTy()) { - switch (curType->getPrimitiveSizeInBits()) { - default: - printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1); - break; - case 16: - printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, - "l3.y" ); - break; - case 8: - printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, "l3.x" ); - break; - } -#ifdef UPSTREAM_LLVM - } else if (const VectorType *VT = dyn_cast<VectorType>(curType)) { - Type *ET = VT->getElementType(); - int numEle = VT->getNumElements(); - switch (ET->getPrimitiveSizeInBits()) { - default: - if (numEle == 3) { - O << "mov " << RegNames->getRegisterName(curReg); - O << ".x, cb" << Buffer << "[" << Counter << "].x\n"; - curReg++; - O << "mov " << RegNames->getRegisterName(curReg); - O << ".x, cb" << Buffer << "[" << Counter << "].y\n"; - curReg++; - O << "mov " << RegNames->getRegisterName(curReg); - O << ".x, cb" << Buffer << "[" << Counter << "].z\n"; - curReg++; - Counter++; - } else { - printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, - (numEle+2) >> 2); - } - break; - case 64: - if (numEle == 3) { - O << "mov " << RegNames->getRegisterName(curReg); - O << ".xy, cb" << Buffer << "[" << Counter << "].xy\n"; - curReg++; - O << "mov " << RegNames->getRegisterName(curReg); - O << ".xy, cb" << Buffer << "[" << Counter++ << "].zw\n"; - curReg++; - O << "mov " << RegNames->getRegisterName(curReg); - O << ".xy, cb" << Buffer << "[" << Counter << "].xy\n"; - curReg++; - Counter++; - } else { - printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, - (numEle) >> 1); - } - break; - case 16: - { - switch (numEle) { - default: - printConstantToRegMapping(RegNames, curReg, O, Counter, - Buffer, (numEle+2) >> 2, "l3.y", 1093); - if (numEle == 3) { - O << "mov " << RegNames->getRegisterName(curReg) << ".x, "; - O << RegNames->getRegisterName(curReg) << ".y\n"; - ++curReg; - O << "mov " << RegNames->getRegisterName(curReg) << ".x, "; - O << RegNames->getRegisterName(curReg) << ".z\n"; - ++curReg; - } - break; - case 2: - printConstantToRegMapping(RegNames, curReg, O, Counter, - Buffer, 1, "l3.y", 1092); - break; - } - break; - } - case 8: - { - switch (numEle) { - default: - printConstantToRegMapping(RegNames, curReg, O, Counter, - Buffer, (numEle+2) >> 2, "l3.x", 1091); - if (numEle == 3) { - O << "mov " << RegNames->getRegisterName(curReg) << ".x, "; - O << RegNames->getRegisterName(curReg) << ".y\n"; - ++curReg; - O << "mov " << RegNames->getRegisterName(curReg) << ".x, "; - O << RegNames->getRegisterName(curReg) << ".z\n"; - ++curReg; - } - break; - case 2: - printConstantToRegMapping(RegNames, curReg, O, Counter, - Buffer, 1, "l3.x", 1090); - break; - } - break; - } - } -#endif - } else if (const PointerType *PT = dyn_cast<PointerType>(curType)) { - Type *CT = PT->getElementType(); - const StructType *ST = dyn_cast<StructType>(CT); - if (ST && ST->isOpaque()) { - bool i1d = ST->getName() == "struct._image1d_t"; - bool i1da = ST->getName() == "struct._image1d_array_t"; - bool i1db = ST->getName() == "struct._image1d_buffer_t"; - bool i2d = ST->getName() == "struct._image2d_t"; - bool i2da = ST->getName() == "struct._image2d_array_t"; - bool i3d = ST->getName() == "struct._image3d_t"; - bool is_image = i1d || i1da || i1db || i2d || i2da || i3d; - if (is_image) { - if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) { - printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, - 1, NULL, 0, is_image); - } else { - mMFI->addErrorMsg( - amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]); - ++curReg; - } - } else { - printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1); - } - } else if (CT->isStructTy() - && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { - StructType *ST = dyn_cast<StructType>(CT); - bool i1d = ST->getName() == "struct._image1d_t"; - bool i1da = ST->getName() == "struct._image1d_array_t"; - bool i1db = ST->getName() == "struct._image1d_buffer_t"; - bool i2d = ST->getName() == "struct._image2d_t"; - bool i2da = ST->getName() == "struct._image2d_array_t"; - bool i3d = ST->getName() == "struct._image3d_t"; - bool is_image = i1d || i1da || i1db || i2d || i2da || i3d; - if (is_image) { - if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) { - printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, - 1, NULL, 0, is_image); - } else { - mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]); - ++curReg; - } - } else { - if (count) { - // Anytime we declare a literal, we need to reserve it, if it - // is not emitted in emitLiterals. - mMFI->addReservedLiterals(1); - O << "dcl_literal l" << mMFI->getNumLiterals() << ", " - << -stackSize << ", " << stackSize << ", 16, " - << stackOffset << "\n"; - } - ++count; - size_t structSize; - structSize = (getTypeSize(ST) + 15) & ~15; - stackOffset += structSize; -#ifdef UPSTREAM_LLVM - O << "mov " << RegNames->getRegisterName((curReg)) << ", l" - << mMFI->getNumLiterals()<< ".w\n"; - if (!displaced1) { - O << "mov r1011, r1\n"; - displaced1 = true; - } - if (!displaced2 && strcmp(RegNames->getRegisterName(curReg), "r1")) { - O << "mov r1010, r2\n"; - displaced2 = true; - } -#endif - printCopyStructPrivate(ST, O, structSize, Buffer, mMFI->getNumLiterals(), - Counter); - ++curReg; - } - } else if (CT->isIntOrIntVectorTy() - || CT->isFPOrFPVectorTy() - || CT->isArrayTy() - || CT->isPointerTy() - || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) { - if (PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) { - const kernel& krnl = mGM->getKernel(F->getName()); - printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, - 1, NULL, 0, false, - mGM->usesHWConstant(krnl, I->getName())); - } else if (PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) { - // TODO: If we are region address space, the first region pointer, no - // array pointers exist, and hardware RegionMem is enabled then we can - // zero out register as the initial offset is zero. - printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1); - } else if (PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) { - // TODO: If we are local address space, the first local pointer, no - // array pointers exist, and hardware LocalMem is enabled then we can - // zero out register as the initial offset is zero. - printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1); - } else { - printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1); - } - } else { - assert(0 && "Current type is not supported!"); - mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]); - ++curReg; - } - } else { - assert(0 && "Current type is not supported!"); - mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]); - ++curReg; - } - } - if (displaced1) { - O << "mov r1, r1011\n"; - } - if (displaced2) { - O << "mov r2, r1010\n"; - } - if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) { - const kernel& krnl = mGM->getKernel(F->getName()); - uint32_t constNum = 0; - for (uint32_t x = 0; x < mSTM->device()->getMaxNumCBs(); ++x) { - if (krnl.constSizes[x]) { - O << "dcl_cb cb" << x + CB_BASE_OFFSET; - O << "[" << (((krnl.constSizes[x] + 15) & ~15) >> 4) << "]\n"; - ++constNum; - mMFI->setUsesMem(AMDILDevice::CONSTANT_ID); - } - } - // TODO: If we run out of constant resources, we need to push some of the - // constant pointers to the software emulated section. - if (constNum > mSTM->device()->getMaxNumCBs()) { - assert(0 && "Max constant buffer limit passed!"); - mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_CONSTANT_RESOURCES]); - } - } -} - - const char * -AMDILKernelManager::getTypeName(const Type *ptr, const char *symTab) -{ - // symTab argument is ignored... - LLVMContext& ctx = ptr->getContext(); - switch (ptr->getTypeID()) { - case Type::StructTyID: - { - const StructType *ST = cast<StructType>(ptr); - if (!ST->isOpaque()) - return "struct"; - // ptr is a pre-LLVM 3.0 "opaque" type. - StringRef name = ST->getName(); - if (name.equals( "struct._event_t" )) return "event"; - if (name.equals( "struct._image1d_t" )) return "image1d"; - if (name.equals( "struct._image1d_array_t" )) return "image1d_array"; - if (name.equals( "struct._image2d_t" )) return "image2d"; - if (name.equals( "struct._image2d_array_t" )) return "image2d_array"; - if (name.equals( "struct._image3d_t" )) return "image3d"; - if (name.equals( "struct._counter32_t" )) return "counter32"; - if (name.equals( "struct._counter64_t" )) return "counter64"; - return "opaque"; - break; - } - case Type::FloatTyID: - return "float"; - case Type::DoubleTyID: - { - const AMDILSubtarget *mSTM= mTM->getSubtargetImpl(); - if (!mSTM->device()->usesHardware(AMDILDeviceInfo::DoubleOps)) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[DOUBLE_NOT_SUPPORTED]); - } - return "double"; - } - case Type::IntegerTyID: - { - if (ptr == Type::getInt8Ty(ctx)) { - return "i8"; - } else if (ptr == Type::getInt16Ty(ctx)) { - return "i16"; - } else if (ptr == Type::getInt32Ty(ctx)) { - return "i32"; - } else if(ptr == Type::getInt64Ty(ctx)) { - return "i64"; - } - break; - } - default: - break; - case Type::ArrayTyID: - { - const ArrayType *AT = cast<ArrayType>(ptr); - const Type *name = AT->getElementType(); - return getTypeName(name, symTab); - break; - } - case Type::VectorTyID: - { - const VectorType *VT = cast<VectorType>(ptr); - const Type *name = VT->getElementType(); - return getTypeName(name, symTab); - break; - } - case Type::PointerTyID: - { - const PointerType *PT = cast<PointerType>(ptr); - const Type *name = PT->getElementType(); - return getTypeName(name, symTab); - break; - } - case Type::FunctionTyID: - { - const FunctionType *FT = cast<FunctionType>(ptr); - const Type *name = FT->getReturnType(); - return getTypeName(name, symTab); - break; - } - } - ptr->dump(); - mMFI->addErrorMsg(amd::CompilerErrorMessage[UNKNOWN_TYPE_NAME]); - return "unknown"; -} - -void AMDILKernelManager::emitLiterals(llvm::raw_ostream &O) { - char buffer[256]; - std::map<uint32_t, uint32_t>::iterator ilb, ile; - for (ilb = mMFI->begin_32(), ile = mMFI->end_32(); ilb != ile; ++ilb) { - uint32_t a = ilb->first; - O << "dcl_literal l" <<ilb->second<< ", "; - sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x", a, a, a, a); - O << buffer << "; f32:i32 " << ilb->first << "\n"; - } - std::map<uint64_t, uint32_t>::iterator llb, lle; - for (llb = mMFI->begin_64(), lle = mMFI->end_64(); llb != lle; ++llb) { - uint32_t v[2]; - uint64_t a = llb->first; - memcpy(v, &a, sizeof(uint64_t)); - O << "dcl_literal l" <<llb->second<< ", "; - sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x; f64:i64 ", - v[0], v[1], v[0], v[1]); - O << buffer << llb->first << "\n"; - } - std::map<std::pair<uint64_t, uint64_t>, uint32_t>::iterator vlb, vle; - for (vlb = mMFI->begin_128(), vle = mMFI->end_128(); vlb != vle; ++vlb) { - uint32_t v[2][2]; - uint64_t a = vlb->first.first; - uint64_t b = vlb->first.second; - memcpy(v[0], &a, sizeof(uint64_t)); - memcpy(v[1], &b, sizeof(uint64_t)); - O << "dcl_literal l" << vlb->second << ", "; - sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x; f128:i128 ", - v[0][0], v[0][1], v[1][0], v[1][1]); - O << buffer << vlb->first.first << vlb->first.second << "\n"; - } -} - -// If the value is not known, then the uav is set, otherwise the mValueIDMap -// is used. -void AMDILKernelManager::setUAVID(const Value *value, uint32_t ID) { - if (value) { - mValueIDMap[value] = ID; - } -} - -uint32_t AMDILKernelManager::getUAVID(const Value *value) { - if (mValueIDMap.find(value) != mValueIDMap.end()) { - return mValueIDMap[value]; - } - - if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { - return mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID); - } else { - return mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); - } -} - diff --git a/src/gallium/drivers/radeon/AMDILKernelManager.h b/src/gallium/drivers/radeon/AMDILKernelManager.h deleted file mode 100644 index d5eb296cbf2..00000000000 --- a/src/gallium/drivers/radeon/AMDILKernelManager.h +++ /dev/null @@ -1,177 +0,0 @@ -//===-- AMDILKernelManager.h - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// Class that handles the metadata/abi management for the -// ASM printer. Handles the parsing and generation of the metadata -// for each kernel and keeps track of its arguments. -// -//==-----------------------------------------------------------------------===// -#ifndef _AMDILKERNELMANAGER_H_ -#define _AMDILKERNELMANAGER_H_ -#include "AMDIL.h" -#include "AMDILDevice.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/ValueMap.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/Function.h" - -#include <map> -#include <set> -#include <string> - -#define IMAGETYPE_2D 0 -#define IMAGETYPE_3D 1 -#define RESERVED_LIT_COUNT 6 - -namespace llvm { -class AMDILGlobalManager; -class AMDILSubtarget; -class AMDILMachineFunctionInfo; -class AMDILTargetMachine; -class AMDILAsmPrinter; -class StructType; -class Value; -class TypeSymbolTable; -class MachineFunction; -class MachineInstr; -class ConstantFP; -class PrintfInfo; - - -class AMDILKernelManager { -public: - typedef enum { - RELEASE_ONLY, - DEBUG_ONLY, - ALWAYS - } ErrorMsgEnum; - AMDILKernelManager(AMDILTargetMachine *TM, AMDILGlobalManager *GM); - virtual ~AMDILKernelManager(); - - /// Clear the state of the KernelManager putting it in its most initial state. - void clear(); - void setMF(MachineFunction *MF); - - /// Process the specific kernel parsing out the parameter information for the - /// kernel. - void processArgMetadata(llvm::raw_ostream &O, - uint32_t buf, bool kernel); - - - /// Prints the header for the kernel which includes the groupsize declaration - /// and calculation of the local/group/global id's. - void printHeader(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O, - const std::string &name); - - virtual void printDecls(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O); - virtual void printGroupSize(llvm::raw_ostream &O); - - /// Copies the data from the runtime setup constant buffers into registers so - /// that the program can correctly access memory or data that was set by the - /// host program. - void printArgCopies(llvm::raw_ostream &O, AMDILAsmPrinter* RegNames); - - /// Prints out the end of the function. - void printFooter(llvm::raw_ostream &O); - - /// Prints out the metadata for the specific function depending if it is a - /// kernel or not. - void printMetaData(llvm::raw_ostream &O, uint32_t id, bool isKernel = false); - - /// Set bool value on whether to consider the function a kernel or a normal - /// function. - void setKernel(bool kernel); - - /// Set the unique ID of the kernel/function. - void setID(uint32_t id); - - /// Set the name of the kernel/function. - void setName(const std::string &name); - - /// Flag to specify whether the function is a kernel or not. - bool isKernel(); - - /// Flag that specifies whether this function has a kernel wrapper. - bool wasKernel(); - - void getIntrinsicSetup(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O); - - // Returns whether a compiler needs to insert a write to memory or not. - bool useCompilerWrite(const MachineInstr *MI); - - // Set the flag that there exists an image write. - void setImageWrite(); - void setOutputInst(); - - const char *getTypeName(const Type *name, const char * symTab); - - void emitLiterals(llvm::raw_ostream &O); - - // Set the uav id for the specific pointer value. If value is NULL, then the - // ID sets the default ID. - void setUAVID(const Value *value, uint32_t ID); - - // Get the UAV id for the specific pointer value. - uint32_t getUAVID(const Value *value); - -private: - - /// Helper function that prints the actual metadata and should only be called - /// by printMetaData. - void printKernelArgs(llvm::raw_ostream &O); - void printCopyStructPrivate(const StructType *ST, - llvm::raw_ostream &O, - size_t stackSize, - uint32_t Buffer, - uint32_t mLitIdx, - uint32_t &counter); - virtual void - printConstantToRegMapping(AMDILAsmPrinter *RegNames, - uint32_t &LII, - llvm::raw_ostream &O, - uint32_t &counter, - uint32_t Buffer, - uint32_t n, - const char *lit = NULL, - uint32_t fcall = 0, - bool isImage = false, - bool isHWCB = false); - void updatePtrArg(llvm::Function::const_arg_iterator Ip, - int numWriteImages, - int raw_uav_buffer, - int counter, - bool isKernel, - const Function *F); - /// Name of the current kernel. - std::string mName; - uint32_t mUniqueID; - bool mIsKernel; - bool mWasKernel; - bool mCompilerWrite; - /// Flag to specify if an image write has occured or not in order to not add a - /// compiler specific write if no other writes to memory occured. - bool mHasImageWrite; - bool mHasOutputInst; - - /// Map from const Value * to UAV ID. - std::map<const Value *, uint32_t> mValueIDMap; - - AMDILTargetMachine * mTM; - const AMDILSubtarget * mSTM; - AMDILGlobalManager * mGM; - /// This is the global offset of the printf string id's. - MachineFunction *mMF; - AMDILMachineFunctionInfo *mMFI; -}; // class AMDILKernelManager - -} // llvm namespace -#endif // _AMDILKERNELMANAGER_H_ diff --git a/src/gallium/drivers/radeon/AMDILLiteralManager.cpp b/src/gallium/drivers/radeon/AMDILLiteralManager.cpp index 43167f57001..52bf7674e7d 100644 --- a/src/gallium/drivers/radeon/AMDILLiteralManager.cpp +++ b/src/gallium/drivers/radeon/AMDILLiteralManager.cpp @@ -12,7 +12,6 @@ #include "AMDIL.h" #include "AMDILAlgorithms.tpp" -#include "AMDILKernelManager.h" #include "AMDILMachineFunctionInfo.h" #include "AMDILSubtarget.h" #include "AMDILTargetMachine.h" @@ -43,7 +42,6 @@ namespace { bool trackLiterals(MachineBasicBlock::iterator *bbb); TargetMachine &TM; const AMDILSubtarget *mSTM; - AMDILKernelManager *mKM; AMDILMachineFunctionInfo *mMFI; int32_t mLitIdx; bool mChanged; @@ -71,7 +69,6 @@ bool AMDILLiteralManager::runOnMachineFunction(MachineFunction &MF) { const AMDILTargetMachine *amdtm = reinterpret_cast<const AMDILTargetMachine *>(&TM); mSTM = dynamic_cast<const AMDILSubtarget *>(amdtm->getSubtargetImpl()); - mKM = const_cast<AMDILKernelManager *>(mSTM->getKernelManager()); safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(), std::bind1st(std::mem_fun(&AMDILLiteralManager::trackLiterals), this)); return mChanged; diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp index 9383bfcb77b..4859fe9df51 100644 --- a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp +++ b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp @@ -16,8 +16,6 @@ #include "AMDILAlgorithms.tpp" #include "AMDILDevices.h" -#include "AMDILGlobalManager.h" -#include "AMDILKernelManager.h" #include "AMDILMachineFunctionInfo.h" #include "AMDILUtilityFunctions.h" #include "llvm/ADT/Statistic.h" @@ -39,7 +37,6 @@ STATISTIC(PointerAssignments, "Number of dynamic pointer " "assigments discovered"); STATISTIC(PointerSubtract, "Number of pointer subtractions discovered"); #endif -STATISTIC(LocalFuncs, "Number of get_local_size(N) functions removed"); using namespace llvm; // The Peephole optimization pass is used to do simple last minute optimizations @@ -76,7 +73,6 @@ private: void doIsConstCallConversionIfNeeded(); bool mChanged; bool mDebug; - bool mRWGOpt; bool mConvertAtomics; CodeGenOpt::Level optLevel; // Run a series of tests to see if we can optimize a CALL instruction. @@ -104,7 +100,6 @@ private: // specified then the result of get_local_size is known at compile time and // can be returned accordingly. bool isRWGLocalOpt(CallInst *CI); - void expandRWGLocalOpt(CallInst *CI); // On northern island cards, the division is slightly less accurate than on // previous generations, so we need to utilize a more accurate division. So we // can translate the accurate divide to a normal divide on all other cards. @@ -251,15 +246,12 @@ AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F) // arena path. Function::arg_iterator argB = F.arg_begin(); Function::arg_iterator argE = F.arg_end(); - AMDILKernelManager *KM = mSTM->getKernelManager(); AMDILMachineFunctionInfo *mMFI = getAnalysis<MachineFunctionAnalysis>().getMF() .getInfo<AMDILMachineFunctionInfo>(); for (; argB != argE; ++argB) { if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaUAV)) { - KM->setUAVID(argB,mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)); mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)); } else { - KM->setUAVID(argB,mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID)); mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID)); } } @@ -276,16 +268,6 @@ AMDILPeepholeOpt::runOnFunction(Function &MF) } mCTX = &MF.getType()->getContext(); mConvertAtomics = true; - if (dumpAllIntoArena(MF)) { - for (Function::const_arg_iterator cab = MF.arg_begin(), - cae = MF.arg_end(); cab != cae; ++cab) { - const Argument *arg = cab; - AMDILKernelManager *KM = mSTM->getKernelManager(); - KM->setUAVID(getBasePointerValue(arg), - mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID)); - } - } - mRWGOpt = mSTM->getGlobalManager()->hasRWG(MF.getName()); safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(), std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations), this)); @@ -313,10 +295,6 @@ AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) CI->eraseFromParent(); return true; } - if (isRWGLocalOpt(CI)) { - expandRWGLocalOpt(CI); - return false; - } if (propagateSamplerInst(CI)) { return false; } @@ -390,26 +368,7 @@ AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) } StringRef name = F->getName(); if (name.startswith("__atom") && name.find("_g") != StringRef::npos) { - Value *ptr = CI->getOperand(0); - const Value *basePtr = getBasePointerValue(ptr); - const Argument *Arg = dyn_cast<Argument>(basePtr); - if (Arg) { - AMDILGlobalManager *GM = mSTM->getGlobalManager(); - int32_t id = GM->getArgID(Arg); - if (id >= 0) { - std::stringstream ss; - ss << name.data() << "_" << id << '\n'; - std::string val; - ss >> val; - F = dyn_cast<Function>( - F->getParent() ->getOrInsertFunction(val, F->getFunctionType())); - atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F)); - } else { - mConvertAtomics = false; - } - } else { - mConvertAtomics = false; - } + mConvertAtomics = false; } return false; } @@ -1088,28 +1047,11 @@ AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI) bool AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI) { - return (CI != NULL && mRWGOpt + return (CI != NULL && CI->getOperand(CI->getNumOperands() - 1)->getName() == "__amdil_get_local_size_int"); } -void -AMDILPeepholeOpt::expandRWGLocalOpt(CallInst *CI) -{ - assert(isRWGLocalOpt(CI) && - "This optmization only works when the call inst is get_local_size!"); - std::vector<Constant *> consts; - for (uint32_t x = 0; x < 3; ++x) { - uint32_t val = mSTM->getGlobalManager()->getLocal(mF->getName(), x); - consts.push_back(ConstantInt::get(Type::getInt32Ty(*mCTX), val)); - } - consts.push_back(ConstantInt::get(Type::getInt32Ty(*mCTX), 0)); - Value *cVec = ConstantVector::get(consts); - CI->replaceAllUsesWith(cVec); - ++LocalFuncs; - return; -} - bool AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI) { diff --git a/src/gallium/drivers/radeon/AMDILPrintfConvert.cpp b/src/gallium/drivers/radeon/AMDILPrintfConvert.cpp index 95614f477c0..17d9e8e3013 100644 --- a/src/gallium/drivers/radeon/AMDILPrintfConvert.cpp +++ b/src/gallium/drivers/radeon/AMDILPrintfConvert.cpp @@ -15,7 +15,6 @@ #endif #include "AMDILAlgorithms.tpp" -#include "AMDILKernelManager.h" #include "AMDILMachineFunctionInfo.h" #include "AMDILModuleInfo.h" #include "AMDILTargetMachine.h" @@ -50,7 +49,6 @@ namespace private: bool expandPrintf(BasicBlock::iterator *bbb); AMDILMachineFunctionInfo *mMFI; - AMDILKernelManager *mKM; bool mChanged; SmallVector<int64_t, DEFAULT_VEC_SLOTS> bVecMap; }; @@ -256,7 +254,6 @@ AMDILPrintfConvert::expandPrintf(BasicBlock::iterator *bbb) AMDILPrintfConvert::runOnFunction(Function &MF) { mChanged = false; - mKM = TM.getSubtarget<AMDILSubtarget>().getKernelManager(); mMFI = getAnalysis<MachineFunctionAnalysis>().getMF() .getInfo<AMDILMachineFunctionInfo>(); bVecMap.clear(); diff --git a/src/gallium/drivers/radeon/AMDILSubtarget.cpp b/src/gallium/drivers/radeon/AMDILSubtarget.cpp index 898833d9c0e..11b6bbe0c01 100644 --- a/src/gallium/drivers/radeon/AMDILSubtarget.cpp +++ b/src/gallium/drivers/radeon/AMDILSubtarget.cpp @@ -14,8 +14,6 @@ #include "AMDILSubtarget.h" #include "AMDIL.h" #include "AMDILDevices.h" -#include "AMDILGlobalManager.h" -#include "AMDILKernelManager.h" #include "AMDILUtilityFunctions.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index bfffdd1eeb8..524289f5e0d 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -26,13 +26,11 @@ CPP_SOURCES := \ AMDILEvergreenDevice.cpp \ AMDILELFWriterInfo.cpp \ AMDILFrameLowering.cpp \ - AMDILGlobalManager.cpp \ AMDILInliner.cpp \ AMDILInstrInfo.cpp \ AMDILIntrinsicInfo.cpp \ AMDILISelDAGToDAG.cpp \ AMDILISelLowering.cpp \ - AMDILKernelManager.cpp \ AMDILLiteralManager.cpp \ AMDILMachineFunctionInfo.cpp \ AMDILMachinePeephole.cpp \ |