summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp11
-rw-r--r--src/gallium/drivers/radeon/AMDILGlobalManager.cpp1353
-rw-r--r--src/gallium/drivers/radeon/AMDILGlobalManager.h256
-rw-r--r--src/gallium/drivers/radeon/AMDILISelLowering.cpp73
-rw-r--r--src/gallium/drivers/radeon/AMDILKernelManager.cpp1356
-rw-r--r--src/gallium/drivers/radeon/AMDILKernelManager.h177
-rw-r--r--src/gallium/drivers/radeon/AMDILLiteralManager.cpp3
-rw-r--r--src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp62
-rw-r--r--src/gallium/drivers/radeon/AMDILPrintfConvert.cpp3
-rw-r--r--src/gallium/drivers/radeon/AMDILSubtarget.cpp2
-rw-r--r--src/gallium/drivers/radeon/Makefile.sources2
11 files changed, 23 insertions, 3275 deletions
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
index b006f84629e..f8e1cd95dc9 100644
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
@@ -13,8 +13,6 @@
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
-#include "AMDILGlobalManager.h"
-#include "AMDILKernelManager.h"
#include "AMDILTargetMachine.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
@@ -44,16 +42,9 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
:
AMDILTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
Subtarget(TT, CPU, FS),
- mGM(new AMDILGlobalManager(0 /* Debug mode */)),
- mKM(new AMDILKernelManager(this, mGM)),
mDump(false)
{
- /* XXX: Add these two initializations to fix a segfault, not sure if this
- * is correct. These are normally initialized in the AsmPrinter, but AMDGPU
- * does not use the asm printer */
- Subtarget.setGlobalManager(mGM);
- Subtarget.setKernelManager(mKM);
/* TLInfo uses InstrInfo so it must be initialized after. */
if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
InstrInfo = new R600InstrInfo(*this);
@@ -66,8 +57,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
AMDGPUTargetMachine::~AMDGPUTargetMachine()
{
- delete mGM;
- delete mKM;
}
bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
diff --git a/src/gallium/drivers/radeon/AMDILGlobalManager.cpp b/src/gallium/drivers/radeon/AMDILGlobalManager.cpp
deleted file mode 100644
index eafd36eaa4e..00000000000
--- a/src/gallium/drivers/radeon/AMDILGlobalManager.cpp
+++ /dev/null
@@ -1,1353 +0,0 @@
-//===-- AMDILGlobalManager.cpp - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#include "AMDILGlobalManager.h"
-#include "AMDILDevices.h"
-#include "AMDILKernelManager.h"
-#include "AMDILSubtarget.h"
-
-#include "AMDILAlgorithms.tpp"
-#include "AMDILGlobalManager.h"
-#include "AMDILDevices.h"
-#include "AMDILKernelManager.h"
-#include "AMDILSubtarget.h"
-#include "AMDILUtilityFunctions.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Support/FormattedStream.h"
-
-#include <cstdio>
-
-using namespace llvm;
-
-AMDILGlobalManager::AMDILGlobalManager(bool debugMode) {
- mOffset = 0;
- mReservedBuffs = 0;
- symTab = NULL;
- mCurrentCPOffset = 0;
- mDebugMode = debugMode;
-}
-
-AMDILGlobalManager::~AMDILGlobalManager() {
-}
-
-void AMDILGlobalManager::print(llvm::raw_ostream &O) {
- if (!mDebugMode) {
- return;
- }
- O << ";AMDIL Global Manager State Dump:\n";
- O << ";\tSubtarget: " << mSTM << "\tSymbol Table: " << symTab
- << "\n";
- O << ";\tConstant Offset: " << mOffset << "\tCP Offset: "
- << mCurrentCPOffset << "\tReserved Buffers: " << mReservedBuffs
- << "\n";
- if (!mImageNameMap.empty()) {
- llvm::DenseMap<uint32_t, llvm::StringRef>::iterator imb, ime;
- O << ";\tGlobal Image Mapping: \n";
- for (imb = mImageNameMap.begin(), ime = mImageNameMap.end(); imb != ime;
- ++imb) {
- O << ";\t\tImage ID: " << imb->first << "\tName: "
- << imb->second << "\n";
- }
- }
- std::set<llvm::StringRef>::iterator sb, se;
- if (!mByteStore.empty()) {
- O << ";Byte Store Kernels: \n";
- for (sb = mByteStore.begin(), se = mByteStore.end(); sb != se; ++sb) {
- O << ";\t\t" << *sb << "\n";
- }
- }
- if (!mIgnoreStr.empty()) {
- O << ";\tIgnored Data Strings: \n";
- for (sb = mIgnoreStr.begin(), se = mIgnoreStr.end(); sb != se; ++sb) {
- O << ";\t\t" << *sb << "\n";
- }
- }
-}
-
-void AMDILGlobalManager::dump() {
- print(errs());
-}
-
-static const constPtr *getConstPtr(const kernel &krnl, const std::string &arg) {
- llvm::SmallVector<constPtr, DEFAULT_VEC_SLOTS>::const_iterator begin, end;
- for (begin = krnl.constPtr.begin(), end = krnl.constPtr.end();
- begin != end; ++begin) {
- if (!strcmp(begin->name.data(),arg.c_str())) {
- return &(*begin);
- }
- }
- return NULL;
-}
-#if 0
-static bool structContainsSub32bitType(const StructType *ST) {
- StructType::element_iterator eib, eie;
- for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
- Type *ptr = *eib;
- uint32_t size = (uint32_t)GET_SCALAR_SIZE(ptr);
- if (!size) {
- if (const StructType *ST = dyn_cast<StructType>(ptr)) {
- if (structContainsSub32bitType(ST)) {
- return true;
- }
- }
- } else if (size < 32) {
- return true;
- }
- }
- return false;
-}
-#endif
-
-void AMDILGlobalManager::processModule(const Module &M,
- const AMDILTargetMachine *mTM)
-{
- Module::const_global_iterator GI;
- Module::const_global_iterator GE;
- symTab = "NoSymTab";
- mSTM = mTM->getSubtargetImpl();
- for (GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
- const GlobalValue *GV = GI;
- if (mDebugMode) {
- GV->dump();
- errs() << "\n";
- }
- llvm::StringRef GVName = GV->getName();
- const char *name = GVName.data();
- if (!strncmp(name, "sgv", 3)) {
- mKernelArgs[GVName] = parseSGV(GV);
- } else if (!strncmp(name, "fgv", 3)) {
- // we can ignore this since we don't care about the filename
- // string
- } else if (!strncmp(name, "lvgv", 4)) {
- mLocalArgs[GVName] = parseLVGV(GV);
- } else if (!strncmp(name, "llvm.image.annotations", 22)) {
- if (strstr(name, "__OpenCL")
- && strstr(name, "_kernel")) {
- // we only want to parse the image information if the
- // image is a kernel, we might have to parse out the
- // information if a function is found that is not
- // inlined.
- parseImageAnnotate(GV);
- }
- } else if (!strncmp(name, "llvm.global.annotations", 23)) {
- parseGlobalAnnotate(GV);
- } else if (!strncmp(name, "llvm.constpointer.annotations", 29)) {
- if (strstr(name, "__OpenCL")
- && strstr(name, "_kernel")) {
- // we only want to parse constant pointer information
- // if it is a kernel
- parseConstantPtrAnnotate(GV);
- }
- } else if (!strncmp(name, "llvm.readonlypointer.annotations", 32)) {
- // These are skipped as we handle them later in AMDILPointerManager.cpp
- } else if (GV->getType()->getAddressSpace() == 3) { // *** Match cl_kernel.h local AS #
- parseAutoArray(GV, false);
- } else if (strstr(name, "clregion")) {
- parseAutoArray(GV, true);
- } else if (!GV->use_empty()
- && mIgnoreStr.find(GVName) == mIgnoreStr.end()) {
- parseConstantPtr(GV);
- }
- }
- allocateGlobalCB();
-
- safeForEach(M.begin(), M.end(),
- std::bind1st(
- std::mem_fun(&AMDILGlobalManager::checkConstPtrsUseHW),
- this));
-}
-
-void AMDILGlobalManager::allocateGlobalCB(void) {
- uint32_t maxCBSize = mSTM->device()->getMaxCBSize();
- uint32_t offset = 0;
- uint32_t curCB = 0;
- uint32_t swoffset = 0;
- for (StringMap<constPtr>::iterator cpb = mConstMems.begin(),
- cpe = mConstMems.end(); cpb != cpe; ++cpb) {
- bool constHW = mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
- cpb->second.usesHardware = false;
- if (constHW) {
- // If we have a limit on the max CB Size, then we need to make sure that
- // the constant sizes fall within the limits.
- if (cpb->second.size <= maxCBSize) {
- if (offset + cpb->second.size > maxCBSize) {
- offset = 0;
- curCB++;
- }
- if (curCB < mSTM->device()->getMaxNumCBs()) {
- cpb->second.cbNum = curCB + CB_BASE_OFFSET;
- cpb->second.offset = offset;
- offset += (cpb->second.size + 15) & (~15);
- cpb->second.usesHardware = true;
- continue;
- }
- }
- }
- cpb->second.cbNum = 0;
- cpb->second.offset = swoffset;
- swoffset += (cpb->second.size + 15) & (~15);
- }
- if (!mConstMems.empty()) {
- mReservedBuffs = curCB + 1;
- }
-}
-
-bool AMDILGlobalManager::checkConstPtrsUseHW(llvm::Module::const_iterator *FCI)
-{
- Function::const_arg_iterator AI, AE;
- const Function *func = *FCI;
- std::string name = func->getName();
- if (!strstr(name.c_str(), "__OpenCL")
- || !strstr(name.c_str(), "_kernel")) {
- return false;
- }
- kernel &krnl = mKernels[name];
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
- for (AI = func->arg_begin(), AE = func->arg_end();
- AI != AE; ++AI) {
- const Argument *Arg = &(*AI);
- const PointerType *P = dyn_cast<PointerType>(Arg->getType());
- if (!P) {
- continue;
- }
- if (P->getAddressSpace() != AMDILAS::CONSTANT_ADDRESS) {
- continue;
- }
- const constPtr *ptr = getConstPtr(krnl, Arg->getName());
- if (ptr) {
- continue;
- }
- constPtr constAttr;
- constAttr.name = Arg->getName();
- constAttr.size = this->mSTM->device()->getMaxCBSize();
- constAttr.base = Arg;
- constAttr.isArgument = true;
- constAttr.isArray = false;
- constAttr.offset = 0;
- constAttr.usesHardware =
- mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
- if (constAttr.usesHardware) {
- constAttr.cbNum = krnl.constPtr.size() + 2;
- } else {
- constAttr.cbNum = 0;
- }
- krnl.constPtr.push_back(constAttr);
- }
- }
- // Now lets make sure that only the N largest buffers
- // get allocated in hardware if we have too many buffers
- uint32_t numPtrs = krnl.constPtr.size();
- if (numPtrs > (this->mSTM->device()->getMaxNumCBs() - mReservedBuffs)) {
- // TODO: Change this routine so it sorts
- // constPtr instead of pulling the sizes out
- // and then grab the N largest and disable the rest
- llvm::SmallVector<uint32_t, 16> sizes;
- for (uint32_t x = 0; x < numPtrs; ++x) {
- sizes.push_back(krnl.constPtr[x].size);
- }
- std::sort(sizes.begin(), sizes.end());
- uint32_t numToDisable = numPtrs - (mSTM->device()->getMaxNumCBs() -
- mReservedBuffs);
- uint32_t safeSize = sizes[numToDisable-1];
- for (uint32_t x = 0; x < numPtrs && numToDisable; ++x) {
- if (krnl.constPtr[x].size <= safeSize) {
- krnl.constPtr[x].usesHardware = false;
- --numToDisable;
- }
- }
- }
- // Renumber all of the valid CB's so that
- // they are linear increase
- uint32_t CBid = 2 + mReservedBuffs;
- for (uint32_t x = 0; x < numPtrs; ++x) {
- if (krnl.constPtr[x].usesHardware) {
- krnl.constPtr[x].cbNum = CBid++;
- }
- }
- for (StringMap<constPtr>::iterator cpb = mConstMems.begin(),
- cpe = mConstMems.end(); cpb != cpe; ++cpb) {
- if (cpb->second.usesHardware) {
- krnl.constPtr.push_back(cpb->second);
- }
- }
- for (uint32_t x = 0; x < krnl.constPtr.size(); ++x) {
- constPtr &c = krnl.constPtr[x];
- uint32_t cbNum = c.cbNum - CB_BASE_OFFSET;
- if (cbNum < HW_MAX_NUM_CB && c.cbNum >= CB_BASE_OFFSET) {
- if ((c.size + c.offset) > krnl.constSizes[cbNum]) {
- krnl.constSizes[cbNum] =
- ((c.size + c.offset) + 15) & ~15;
- }
- } else {
- krnl.constPtr[x].usesHardware = false;
- }
- }
- return false;
-}
-
-int32_t AMDILGlobalManager::getArrayOffset(const llvm::StringRef &a) const {
- StringMap<arraymem>::const_iterator iter = mArrayMems.find(a);
- if (iter != mArrayMems.end()) {
- return iter->second.offset;
- } else {
- return -1;
- }
-}
-
-int32_t AMDILGlobalManager::getConstOffset(const llvm::StringRef &a) const {
- StringMap<constPtr>::const_iterator iter = mConstMems.find(a);
- if (iter != mConstMems.end()) {
- return iter->second.offset;
- } else {
- return -1;
- }
-}
-
-bool AMDILGlobalManager::getConstHWBit(const llvm::StringRef &name) const {
- StringMap<constPtr>::const_iterator iter = mConstMems.find(name);
- if (iter != mConstMems.end()) {
- return iter->second.usesHardware;
- } else {
- return false;
- }
-}
-
-// As of right now we only care about the required group size
-// so we can skip the variable encoding
-kernelArg AMDILGlobalManager::parseSGV(const GlobalValue *G) {
- kernelArg nArg;
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
- memset(&nArg, 0, sizeof(nArg));
- for (int x = 0; x < 3; ++x) {
- nArg.reqGroupSize[x] = mSTM->getDefaultSize(x);
- nArg.reqRegionSize[x] = mSTM->getDefaultSize(x);
- }
- if (!GV || !GV->hasInitializer()) {
- return nArg;
- }
- const Constant *CV = GV->getInitializer();
- const ConstantDataArray *CA =dyn_cast_or_null<ConstantDataArray>(CV);
-
- if (!CA || !CA->isString()) {
- return nArg;
- }
- std::string init = CA->getAsString();
- size_t pos = init.find("RWG");
- if (pos != llvm::StringRef::npos) {
- pos += 3;
- std::string LWS = init.substr(pos, init.length() - pos);
- const char *lws = LWS.c_str();
- sscanf(lws, "%u,%u,%u", &(nArg.reqGroupSize[0]),
- &(nArg.reqGroupSize[1]),
- &(nArg.reqGroupSize[2]));
- nArg.mHasRWG = true;
- }
- pos = init.find("RWR");
- if (pos != llvm::StringRef::npos) {
- pos += 3;
- std::string LWS = init.substr(pos, init.length() - pos);
- const char *lws = LWS.c_str();
- sscanf(lws, "%u,%u,%u", &(nArg.reqRegionSize[0]),
- &(nArg.reqRegionSize[1]),
- &(nArg.reqRegionSize[2]));
- nArg.mHasRWR = true;
- }
- return nArg;
-}
-
-localArg AMDILGlobalManager::parseLVGV(const GlobalValue *G) {
- localArg nArg;
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
- nArg.name = "";
- if (!GV || !GV->hasInitializer()) {
- return nArg;
- }
- const ConstantArray *CA =
- dyn_cast_or_null<ConstantArray>(GV->getInitializer());
- if (!CA) {
- return nArg;
- }
- for (size_t x = 0, y = CA->getNumOperands(); x < y; ++x) {
- const Value *local = CA->getOperand(x);
- const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(local);
- if (!CE || !CE->getNumOperands()) {
- continue;
- }
- nArg.name = (*(CE->op_begin()))->getName();
- if (mArrayMems.find(nArg.name) != mArrayMems.end()) {
- nArg.local.push_back(&(mArrayMems[nArg.name]));
- }
- }
- return nArg;
-}
-
-void AMDILGlobalManager::parseConstantPtrAnnotate(const GlobalValue *G) {
- const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(G);
- const ConstantArray *CA =
- dyn_cast_or_null<ConstantArray>(GV->getInitializer());
- if (!CA) {
- return;
- }
- uint32_t numOps = CA->getNumOperands();
- for (uint32_t x = 0; x < numOps; ++x) {
- const Value *V = CA->getOperand(x);
- const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
- if (!CS) {
- continue;
- }
- assert(CS->getNumOperands() == 2 && "There can only be 2"
- " fields, a name and size");
- const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CS->getOperand(0));
- const ConstantInt *sizeField = dyn_cast<ConstantInt>(CS->getOperand(1));
- assert(nameField && "There must be a constant name field");
- assert(sizeField && "There must be a constant size field");
- const GlobalVariable *nameGV =
- dyn_cast<GlobalVariable>(nameField->getOperand(0));
- const ConstantDataArray *nameArray =
- dyn_cast<ConstantDataArray>(nameGV->getInitializer());
- // Lets add this string to the set of strings we should ignore processing
- mIgnoreStr.insert(nameGV->getName());
- if (mConstMems.find(nameGV->getName())
- != mConstMems.end()) {
- // If we already processesd this string as a constant, lets remove it from
- // the list of known constants. This way we don't process unneeded data
- // and don't generate code/metadata for strings that are never used.
- mConstMems.erase(mConstMems.find(nameGV->getName()));
- } else {
- mIgnoreStr.insert(CS->getOperand(0)->getName());
- }
- constPtr constAttr;
- constAttr.name = nameArray->getAsString();
- constAttr.size = (sizeField->getZExtValue() + 15) & ~15;
- constAttr.base = CS;
- constAttr.isArgument = true;
- constAttr.isArray = false;
- constAttr.cbNum = 0;
- constAttr.offset = 0;
- constAttr.usesHardware = (constAttr.size <= mSTM->device()->getMaxCBSize());
- // Now that we have all our constant information,
- // lets update the kernel
- llvm::StringRef kernelName = G->getName().data() + 30;
- kernel k;
- if (mKernels.find(kernelName) != mKernels.end()) {
- k = mKernels[kernelName];
- } else {
- k.curSize = 0;
- k.curRSize = 0;
- k.curHWSize = 0;
- k.curHWRSize = 0;
- k.constSize = 0;
- k.lvgv = NULL;
- k.sgv = NULL;
- memset(k.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
- }
- constAttr.cbNum = k.constPtr.size() + 2;
- k.constPtr.push_back(constAttr);
- mKernels[kernelName] = k;
- }
-}
-
-void AMDILGlobalManager::parseImageAnnotate(const GlobalValue *G) {
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
- const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
- if (!CA) {
- return;
- }
- if (isa<GlobalValue>(CA)) {
- return;
- }
- uint32_t e = CA->getNumOperands();
- if (!e) {
- return;
- }
- kernel k;
- llvm::StringRef name = G->getName().data() + 23;
- if (mKernels.find(name) != mKernels.end()) {
- k = mKernels[name];
- } else {
- k.curSize = 0;
- k.curRSize = 0;
- k.curHWSize = 0;
- k.curHWRSize = 0;
- k.constSize = 0;
- k.lvgv = NULL;
- k.sgv = NULL;
- memset(k.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
- }
- for (uint32_t i = 0; i != e; ++i) {
- const Value *V = CA->getOperand(i);
- const Constant *C = dyn_cast<Constant>(V);
- const ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
- if (CS && CS->getNumOperands() == 2) {
- if (mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()) !=
- mConstMems.end()) {
- // If we already processesd this string as a constant, lets remove it
- // from the list of known constants. This way we don't process unneeded
- // data and don't generate code/metadata for strings that are never
- // used.
- mConstMems.erase(
- mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()));
- } else {
- mIgnoreStr.insert(CS->getOperand(0)->getOperand(0)->getName());
- }
- const ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(1));
- uint32_t val = (uint32_t)CI->getZExtValue();
- if (val == 1) {
- k.readOnly.insert(i);
- } else if (val == 2) {
- k.writeOnly.insert(i);
- } else {
- assert(!"Unknown image type value!");
- }
- }
- }
- mKernels[name] = k;
-}
-
-void AMDILGlobalManager::parseAutoArray(const GlobalValue *GV, bool isRegion) {
- const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
- Type *Ty = (G) ? G->getType() : NULL;
- arraymem tmp;
- tmp.isHW = true;
- tmp.offset = 0;
- tmp.vecSize = getTypeSize(Ty, true);
- tmp.isRegion = isRegion;
- mArrayMems[GV->getName()] = tmp;
-}
-
-void AMDILGlobalManager::parseConstantPtr(const GlobalValue *GV) {
- const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
- Type *Ty = (G) ? G->getType() : NULL;
- constPtr constAttr;
- constAttr.name = G->getName();
- constAttr.size = getTypeSize(Ty, true);
- constAttr.base = GV;
- constAttr.isArgument = false;
- constAttr.isArray = true;
- constAttr.offset = 0;
- constAttr.cbNum = 0;
- constAttr.usesHardware = false;
- mConstMems[GV->getName()] = constAttr;
-}
-
-void AMDILGlobalManager::parseGlobalAnnotate(const GlobalValue *G) {
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
- if (!GV->hasInitializer()) {
- return;
- }
- const Constant *CT = GV->getInitializer();
- if (!CT || isa<GlobalValue>(CT)) {
- return;
- }
- const ConstantArray *CA = dyn_cast<ConstantArray>(CT);
- if (!CA) {
- return;
- }
-
- unsigned int nKernels = CA->getNumOperands();
- for (unsigned int i = 0, e = nKernels; i != e; ++i) {
- parseKernelInformation(CA->getOperand(i));
- }
-}
-
-void AMDILGlobalManager::parseKernelInformation(const Value *V) {
- if (isa<GlobalValue>(V)) {
- return;
- }
- const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
- if (!CS) {
- return;
- }
- uint32_t N = CS->getNumOperands();
- if (N != 5) {
- return;
- }
- kernel tmp;
-
- tmp.curSize = 0;
- tmp.curRSize = 0;
- tmp.curHWSize = 0;
- tmp.curHWRSize = 0;
- // The first operand is always a pointer to the kernel.
- const Constant *CV = dyn_cast<Constant>(CS->getOperand(0));
- llvm::StringRef kernelName = "";
- if (CV->getNumOperands()) {
- kernelName = (*(CV->op_begin()))->getName();
- }
-
- // If we have images, then we have already created the kernel and we just need
- // to get the kernel information.
- if (mKernels.find(kernelName) != mKernels.end()) {
- tmp = mKernels[kernelName];
- } else {
- tmp.curSize = 0;
- tmp.curRSize = 0;
- tmp.curHWSize = 0;
- tmp.curHWRSize = 0;
- tmp.constSize = 0;
- tmp.lvgv = NULL;
- tmp.sgv = NULL;
- memset(tmp.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
- }
-
-
- // The second operand is SGV, there can only be one so we don't need to worry
- // about parsing out multiple data points.
- CV = dyn_cast<Constant>(CS->getOperand(1));
-
- llvm::StringRef sgvName;
- if (CV->getNumOperands()) {
- sgvName = (*(CV->op_begin()))->getName();
- }
-
- if (mKernelArgs.find(sgvName) != mKernelArgs.end()) {
- tmp.sgv = &mKernelArgs[sgvName];
- }
- // The third operand is FGV, which is skipped
- // The fourth operand is LVGV
- // There can be multiple local arrays, so we
- // need to handle each one seperatly
- CV = dyn_cast<Constant>(CS->getOperand(3));
- llvm::StringRef lvgvName = "";
- if (CV->getNumOperands()) {
- lvgvName = (*(CV->op_begin()))->getName();
- }
- if (mLocalArgs.find(lvgvName) != mLocalArgs.end()) {
- localArg *ptr = &mLocalArgs[lvgvName];
- tmp.lvgv = ptr;
- llvm::SmallVector<arraymem *, DEFAULT_VEC_SLOTS>::iterator ib, ie;
- for (ib = ptr->local.begin(), ie = ptr->local.end(); ib != ie; ++ib) {
- if ((*ib)->isRegion) {
- if ((*ib)->isHW) {
- (*ib)->offset = tmp.curHWRSize;
- tmp.curHWRSize += ((*ib)->vecSize + 15) & ~15;
- } else {
- (*ib)->offset = tmp.curRSize;
- tmp.curRSize += ((*ib)->vecSize + 15) & ~15;
- }
- } else {
- if ((*ib)->isHW) {
- (*ib)->offset = tmp.curHWSize;
- tmp.curHWSize += ((*ib)->vecSize + 15) & ~15;
- } else {
- (*ib)->offset = tmp.curSize;
- tmp.curSize += ((*ib)->vecSize + 15) & ~15;
- }
- }
- }
- }
-
- // The fifth operand is NULL
- mKernels[kernelName] = tmp;
-}
-
-const kernel &AMDILGlobalManager::getKernel(const llvm::StringRef &name) const {
- StringMap<kernel>::const_iterator iter = mKernels.find(name);
- assert(isKernel(name) && "Must be a kernel to call getKernel");
- return iter->second;
-}
-
-bool AMDILGlobalManager::isKernel(const llvm::StringRef &name) const {
- return (mKernels.find(name) != mKernels.end());
-}
-
-bool AMDILGlobalManager::isWriteOnlyImage(const llvm::StringRef &name,
- uint32_t iID) const {
- const StringMap<kernel>::const_iterator kiter = mKernels.find(name);
- if (kiter == mKernels.end()) {
- return false;
- }
- return kiter->second.writeOnly.count(iID);
-}
-
-uint32_t
-AMDILGlobalManager::getNumWriteImages(const llvm::StringRef &name) const {
- char *env = NULL;
- env = getenv("GPU_DISABLE_RAW_UAV");
- if (env && env[0] == '1') {
- return 8;
- }
- const StringMap<kernel>::const_iterator kiter = mKernels.find(name);
- if (kiter == mKernels.end()) {
- return 0;
- } else {
- return kiter->second.writeOnly.size();
- }
-}
-
-bool AMDILGlobalManager::isReadOnlyImage(const llvm::StringRef &name,
- uint32_t iID) const {
- const StringMap<kernel>::const_iterator kiter = mKernels.find(name);
- if (kiter == mKernels.end()) {
- return false;
- }
- return kiter->second.readOnly.count(iID);
-}
-
-bool AMDILGlobalManager::hasRWG(const llvm::StringRef &name) const {
- StringMap<kernel>::const_iterator iter = mKernels.find(name);
- if (iter != mKernels.end()) {
- kernelArg *ptr = iter->second.sgv;
- if (ptr) {
- return ptr->mHasRWG;
- }
- }
- return false;
-}
-
-bool AMDILGlobalManager::hasRWR(const llvm::StringRef &name) const {
- StringMap<kernel>::const_iterator iter = mKernels.find(name);
- if (iter != mKernels.end()) {
- kernelArg *ptr = iter->second.sgv;
- if (ptr) {
- return ptr->mHasRWR;
- }
- }
- return false;
-}
-
-uint32_t
-AMDILGlobalManager::getMaxGroupSize(const llvm::StringRef &name) const {
- StringMap<kernel>::const_iterator iter = mKernels.find(name);
- if (iter != mKernels.end()) {
- kernelArg *sgv = iter->second.sgv;
- if (sgv) {
- return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2];
- }
- }
- return mSTM->getDefaultSize(0) *
- mSTM->getDefaultSize(1) *
- mSTM->getDefaultSize(2);
-}
-
-uint32_t
-AMDILGlobalManager::getMaxRegionSize(const llvm::StringRef &name) const {
- StringMap<kernel>::const_iterator iter = mKernels.find(name);
- if (iter != mKernels.end()) {
- kernelArg *sgv = iter->second.sgv;
- if (sgv) {
- return sgv->reqRegionSize[0] *
- sgv->reqRegionSize[1] *
- sgv->reqRegionSize[2];
- }
- }
- return mSTM->getDefaultSize(0) *
- mSTM->getDefaultSize(1) *
- mSTM->getDefaultSize(2);
-}
-
-uint32_t AMDILGlobalManager::getRegionSize(const llvm::StringRef &name) const {
- StringMap<kernel>::const_iterator iter = mKernels.find(name);
- if (iter != mKernels.end()) {
- return iter->second.curRSize;
- } else {
- return 0;
- }
-}
-
-uint32_t AMDILGlobalManager::getLocalSize(const llvm::StringRef &name) const {
- StringMap<kernel>::const_iterator iter = mKernels.find(name);
- if (iter != mKernels.end()) {
- return iter->second.curSize;
- } else {
- return 0;
- }
-}
-
-uint32_t AMDILGlobalManager::getConstSize(const llvm::StringRef &name) const {
- StringMap<kernel>::const_iterator iter = mKernels.find(name);
- if (iter != mKernels.end()) {
- return iter->second.constSize;
- } else {
- return 0;
- }
-}
-
-uint32_t
-AMDILGlobalManager::getHWRegionSize(const llvm::StringRef &name) const {
- StringMap<kernel>::const_iterator iter = mKernels.find(name);
- if (iter != mKernels.end()) {
- return iter->second.curHWRSize;
- } else {
- return 0;
- }
-}
-
-uint32_t AMDILGlobalManager::getHWLocalSize(const llvm::StringRef &name) const {
- StringMap<kernel>::const_iterator iter = mKernels.find(name);
- if (iter != mKernels.end()) {
- return iter->second.curHWSize;
- } else {
- return 0;
- }
-}
-
-int32_t AMDILGlobalManager::getArgID(const Argument *arg) {
- DenseMap<const Argument *, int32_t>::iterator argiter = mArgIDMap.find(arg);
- if (argiter != mArgIDMap.end()) {
- return argiter->second;
- } else {
- return -1;
- }
-}
-
-
-uint32_t
-AMDILGlobalManager::getLocal(const llvm::StringRef &name, uint32_t dim) const {
- StringMap<kernel>::const_iterator iter = mKernels.find(name);
- if (iter != mKernels.end() && iter->second.sgv) {
- kernelArg *sgv = iter->second.sgv;
- switch (dim) {
- default: break;
- case 0:
- case 1:
- case 2:
- return sgv->reqGroupSize[dim];
- break;
- case 3:
- return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2];
- };
- }
- switch (dim) {
- default:
- return 1;
- case 3:
- return mSTM->getDefaultSize(0) *
- mSTM->getDefaultSize(1) *
- mSTM->getDefaultSize(2);
- case 2:
- case 1:
- case 0:
- return mSTM->getDefaultSize(dim);
- break;
- };
- return 1;
-}
-
-uint32_t
-AMDILGlobalManager::getRegion(const llvm::StringRef &name, uint32_t dim) const {
- StringMap<kernel>::const_iterator iter = mKernels.find(name);
- if (iter != mKernels.end() && iter->second.sgv) {
- kernelArg *sgv = iter->second.sgv;
- switch (dim) {
- default: break;
- case 0:
- case 1:
- case 2:
- return sgv->reqRegionSize[dim];
- break;
- case 3:
- return sgv->reqRegionSize[0] *
- sgv->reqRegionSize[1] *
- sgv->reqRegionSize[2];
- };
- }
- switch (dim) {
- default:
- return 1;
- case 3:
- return mSTM->getDefaultSize(0) *
- mSTM->getDefaultSize(1) *
- mSTM->getDefaultSize(2);
- case 2:
- case 1:
- case 0:
- return mSTM->getDefaultSize(dim);
- break;
- };
- return 1;
-}
-
-StringMap<constPtr>::iterator AMDILGlobalManager::consts_begin() {
- return mConstMems.begin();
-}
-
-
-StringMap<constPtr>::iterator AMDILGlobalManager::consts_end() {
- return mConstMems.end();
-}
-
-bool AMDILGlobalManager::byteStoreExists(StringRef S) const {
- return mByteStore.find(S) != mByteStore.end();
-}
-
-bool AMDILGlobalManager::usesHWConstant(const kernel &krnl,
- const llvm::StringRef &arg) {
- const constPtr *curConst = getConstPtr(krnl, arg);
- if (curConst) {
- return curConst->usesHardware;
- } else {
- return false;
- }
-}
-
-uint32_t AMDILGlobalManager::getConstPtrSize(const kernel &krnl,
- const llvm::StringRef &arg)
-{
- const constPtr *curConst = getConstPtr(krnl, arg);
- if (curConst) {
- return curConst->size;
- } else {
- return 0;
- }
-}
-
-uint32_t AMDILGlobalManager::getConstPtrOff(const kernel &krnl,
- const llvm::StringRef &arg)
-{
- const constPtr *curConst = getConstPtr(krnl, arg);
- if (curConst) {
- return curConst->offset;
- } else {
- return 0;
- }
-}
-
-uint32_t AMDILGlobalManager::getConstPtrCB(const kernel &krnl,
- const llvm::StringRef &arg)
-{
- const constPtr *curConst = getConstPtr(krnl, arg);
- if (curConst) {
- return curConst->cbNum;
- } else {
- return 0;
- }
-}
-
-void AMDILGlobalManager::calculateCPOffsets(const MachineFunction *MF,
- kernel &krnl)
-{
- const MachineConstantPool *MCP = MF->getConstantPool();
- if (!MCP) {
- return;
- }
- const std::vector<MachineConstantPoolEntry> consts = MCP->getConstants();
- size_t numConsts = consts.size();
- for (size_t x = 0; x < numConsts; ++x) {
- krnl.CPOffsets.push_back(
- std::make_pair<uint32_t, const Constant*>(
- mCurrentCPOffset, consts[x].Val.ConstVal));
- size_t curSize = getTypeSize(consts[x].Val.ConstVal->getType(), true);
- // Align the size to the vector boundary
- curSize = (curSize + 15) & (~15);
- mCurrentCPOffset += curSize;
- }
-}
-
-bool AMDILGlobalManager::isConstPtrArray(const kernel &krnl,
- const llvm::StringRef &arg) {
- const constPtr *curConst = getConstPtr(krnl, arg);
- if (curConst) {
- return curConst->isArray;
- } else {
- return false;
- }
-}
-
-bool AMDILGlobalManager::isConstPtrArgument(const kernel &krnl,
- const llvm::StringRef &arg)
-{
- const constPtr *curConst = getConstPtr(krnl, arg);
- if (curConst) {
- return curConst->isArgument;
- } else {
- return false;
- }
-}
-
-const Value *AMDILGlobalManager::getConstPtrValue(const kernel &krnl,
- const llvm::StringRef &arg) {
- const constPtr *curConst = getConstPtr(krnl, arg);
- if (curConst) {
- return curConst->base;
- } else {
- return NULL;
- }
-}
-
-static void
-dumpZeroElements(const StructType * const T, llvm::raw_ostream &O, bool asBytes);
-static void
-dumpZeroElements(const IntegerType * const T, llvm::raw_ostream &O, bool asBytes);
-static void
-dumpZeroElements(const ArrayType * const T, llvm::raw_ostream &O, bool asBytes);
-static void
-dumpZeroElements(const VectorType * const T, llvm::raw_ostream &O, bool asBytes);
-static void
-dumpZeroElements(const Type * const T, llvm::raw_ostream &O, bool asBytes);
-
-void dumpZeroElements(const Type * const T, llvm::raw_ostream &O, bool asBytes) {
- if (!T) {
- return;
- }
- switch(T->getTypeID()) {
- case Type::X86_FP80TyID:
- case Type::FP128TyID:
- case Type::PPC_FP128TyID:
- case Type::LabelTyID:
- assert(0 && "These types are not supported by this backend");
- default:
- case Type::DoubleTyID:
- if (asBytes) {
- O << ":0:0:0:0:0:0:0:0";
- } else {
- O << ":0";
- }
- break;
- case Type::FloatTyID:
- case Type::PointerTyID:
- case Type::FunctionTyID:
- if (asBytes) {
- O << ":0:0:0:0";
- } else {
- O << ":0";
- }
- break;
- case Type::IntegerTyID:
- dumpZeroElements(dyn_cast<IntegerType>(T), O, asBytes);
- break;
- case Type::StructTyID:
- {
- const StructType *ST = cast<StructType>(T);
- if (!ST->isOpaque()) {
- dumpZeroElements(dyn_cast<StructType>(T), O, asBytes);
- } else { // A pre-LLVM 3.0 opaque type
- if (asBytes) {
- O << ":0:0:0:0";
- } else {
- O << ":0";
- }
- }
- }
- break;
- case Type::ArrayTyID:
- dumpZeroElements(dyn_cast<ArrayType>(T), O, asBytes);
- break;
- case Type::VectorTyID:
- dumpZeroElements(dyn_cast<VectorType>(T), O, asBytes);
- break;
- };
-}
-
-void
-dumpZeroElements(const StructType * const ST, llvm::raw_ostream &O, bool asBytes) {
- if (!ST) {
- return;
- }
- Type *curType;
- StructType::element_iterator eib = ST->element_begin();
- StructType::element_iterator eie = ST->element_end();
- for (;eib != eie; ++eib) {
- curType = *eib;
- dumpZeroElements(curType, O, asBytes);
- }
-}
-
-void
-dumpZeroElements(const IntegerType * const IT, llvm::raw_ostream &O, bool asBytes) {
- if (asBytes) {
- unsigned byteWidth = (IT->getBitWidth() >> 3);
- for (unsigned x = 0; x < byteWidth; ++x) {
- O << ":0";
- }
- }
-}
-
-void
-dumpZeroElements(const ArrayType * const AT, llvm::raw_ostream &O, bool asBytes) {
- size_t size = AT->getNumElements();
- for (size_t x = 0; x < size; ++x) {
- dumpZeroElements(AT->getElementType(), O, asBytes);
- }
-}
-
-void
-dumpZeroElements(const VectorType * const VT, llvm::raw_ostream &O, bool asBytes) {
- size_t size = VT->getNumElements();
- for (size_t x = 0; x < size; ++x) {
- dumpZeroElements(VT->getElementType(), O, asBytes);
- }
-}
-
-void AMDILGlobalManager::printConstantValue(const Constant *CAval,
- llvm::raw_ostream &O, bool asBytes) {
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CAval)) {
- bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
- if (isDouble) {
- double val = CFP->getValueAPF().convertToDouble();
- union dtol_union {
- double d;
- uint64_t l;
- char c[8];
- } conv;
- conv.d = val;
- if (!asBytes) {
- O << ":";
- O.write_hex(conv.l);
- } else {
- for (int i = 0; i < 8; ++i) {
- O << ":";
- O.write_hex((unsigned)conv.c[i] & 0xFF);
- }
- }
- } else {
- float val = CFP->getValueAPF().convertToFloat();
- union ftoi_union {
- float f;
- uint32_t u;
- char c[4];
- } conv;
- conv.f = val;
- if (!asBytes) {
- O << ":";
- O.write_hex(conv.u);
- } else {
- for (int i = 0; i < 4; ++i) {
- O << ":";
- O.write_hex((unsigned)conv.c[i] & 0xFF);
- }
- }
- }
- } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CAval)) {
- uint64_t zVal = CI->getValue().getZExtValue();
- if (!asBytes) {
- O << ":";
- O.write_hex(zVal);
- } else {
- switch (CI->getBitWidth()) {
- default:
- {
- union ltob_union {
- uint64_t l;
- char c[8];
- } conv;
- conv.l = zVal;
- for (int i = 0; i < 8; ++i) {
- O << ":";
- O.write_hex((unsigned)conv.c[i] & 0xFF);
- }
- }
- break;
- case 8:
- O << ":";
- O.write_hex(zVal & 0xFF);
- break;
- case 16:
- {
- union stob_union {
- uint16_t s;
- char c[2];
- } conv;
- conv.s = (uint16_t)zVal;
- O << ":";
- O.write_hex((unsigned)conv.c[0] & 0xFF);
- O << ":";
- O.write_hex((unsigned)conv.c[1] & 0xFF);
- }
- break;
- case 32:
- {
- union itob_union {
- uint32_t i;
- char c[4];
- } conv;
- conv.i = (uint32_t)zVal;
- for (int i = 0; i < 4; ++i) {
- O << ":";
- O.write_hex((unsigned)conv.c[i] & 0xFF);
- }
- }
- break;
- }
- }
- } else if (const ConstantVector *CV = dyn_cast<ConstantVector>(CAval)) {
- int y = CV->getNumOperands()-1;
- int x = 0;
- for (; x < y; ++x) {
- printConstantValue(CV->getOperand(x), O, asBytes);
- }
- printConstantValue(CV->getOperand(x), O, asBytes);
- } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CAval)) {
- int y = CS->getNumOperands();
- int x = 0;
- for (; x < y; ++x) {
- printConstantValue(CS->getOperand(x), O, asBytes);
- }
- } else if (const ConstantAggregateZero *CAZ
- = dyn_cast<ConstantAggregateZero>(CAval)) {
- int y = CAZ->getNumOperands();
- if (y > 0) {
- int x = 0;
- for (; x < y; ++x) {
- printConstantValue((llvm::Constant *)CAZ->getOperand(x),
- O, asBytes);
- }
- } else {
- if (asBytes) {
- dumpZeroElements(CAval->getType(), O, asBytes);
- } else {
- int y = getNumElements(CAval->getType())-1;
- for (int x = 0; x < y; ++x) {
- O << ":0";
- }
- O << ":0";
- }
- }
- } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CAval)) {
- int y = CA->getNumOperands();
- int x = 0;
- for (; x < y; ++x) {
- printConstantValue(CA->getOperand(x), O, asBytes);
- }
- } else if (dyn_cast<ConstantPointerNull>(CAval)) {
- O << ":0";
- //assert(0 && "Hit condition which was not expected");
- } else if (dyn_cast<ConstantExpr>(CAval)) {
- O << ":0";
- //assert(0 && "Hit condition which was not expected");
- } else if (dyn_cast<UndefValue>(CAval)) {
- O << ":0";
- //assert(0 && "Hit condition which was not expected");
- } else {
- assert(0 && "Hit condition which was not expected");
- }
-}
-
-static bool isStruct(Type * const T)
-{
- if (!T) {
- return false;
- }
- switch (T->getTypeID()) {
- default:
- return false;
- case Type::PointerTyID:
- return isStruct(T->getContainedType(0));
- case Type::StructTyID:
- return true;
- case Type::ArrayTyID:
- case Type::VectorTyID:
- return isStruct(dyn_cast<SequentialType>(T)->getElementType());
- };
-
-}
-
-void AMDILGlobalManager::dumpDataToCB(llvm::raw_ostream &O, AMDILKernelManager *km,
- uint32_t id) {
- uint32_t size = 0;
- for (StringMap<constPtr>::iterator cmb = consts_begin(),
- cme = consts_end(); cmb != cme; ++cmb) {
- if (id == cmb->second.cbNum) {
- size += (cmb->second.size + 15) & (~15);
- }
- }
- if (id == 0) {
- O << ";#DATASTART:" << (size + mCurrentCPOffset) << "\n";
- if (mCurrentCPOffset) {
- for (StringMap<kernel>::iterator kcpb = mKernels.begin(),
- kcpe = mKernels.end(); kcpb != kcpe; ++kcpb) {
- const kernel& k = kcpb->second;
- size_t numConsts = k.CPOffsets.size();
- for (size_t x = 0; x < numConsts; ++x) {
- size_t offset = k.CPOffsets[x].first;
- const Constant *C = k.CPOffsets[x].second;
- Type *Ty = C->getType();
- size_t size = (isStruct(Ty) ? getTypeSize(Ty, true)
- : getNumElements(Ty));
- O << ";#" << km->getTypeName(Ty, symTab) << ":";
- O << offset << ":" << size ;
- printConstantValue(C, O, isStruct(Ty));
- O << "\n";
- }
- }
- }
- } else {
- O << ";#DATASTART:" << id << ":" << size << "\n";
- }
-
- for (StringMap<constPtr>::iterator cmb = consts_begin(), cme = consts_end();
- cmb != cme; ++cmb) {
- if (cmb->second.cbNum != id) {
- continue;
- }
- const GlobalVariable *G = dyn_cast<GlobalVariable>(cmb->second.base);
- Type *Ty = (G) ? G->getType() : NULL;
- size_t offset = cmb->second.offset;
- const Constant *C = G->getInitializer();
- size_t size = (isStruct(Ty)
- ? getTypeSize(Ty, true)
- : getNumElements(Ty));
- O << ";#" << km->getTypeName(Ty, symTab) << ":";
- if (!id) {
- O << (offset + mCurrentCPOffset) << ":" << size;
- } else {
- O << offset << ":" << size;
- }
- if (C) {
- printConstantValue(C, O, isStruct(Ty));
- } else {
- assert(0 && "Cannot have a constant pointer"
- " without an initializer!");
- }
- O <<"\n";
- }
- if (id == 0) {
- O << ";#DATAEND\n";
- } else {
- O << ";#DATAEND:" << id << "\n";
- }
-}
-
-void
-AMDILGlobalManager::dumpDataSection(llvm::raw_ostream &O, AMDILKernelManager *km) {
- if (mConstMems.empty() && !mCurrentCPOffset) {
- return;
- } else {
- llvm::DenseSet<uint32_t> const_set;
- for (StringMap<constPtr>::iterator cmb = consts_begin(), cme = consts_end();
- cmb != cme; ++cmb) {
- const_set.insert(cmb->second.cbNum);
- }
- if (mCurrentCPOffset) {
- const_set.insert(0);
- }
- for (llvm::DenseSet<uint32_t>::iterator setb = const_set.begin(),
- sete = const_set.end(); setb != sete; ++setb) {
- dumpDataToCB(O, km, *setb);
- }
- }
-}
-
-/// Create a function ID if it is not known or return the known
-/// function ID.
-uint32_t AMDILGlobalManager::getOrCreateFunctionID(const GlobalValue* func) {
- if (func->getName().size()) {
- return getOrCreateFunctionID(func->getName());
- }
- uint32_t id;
- if (mFuncPtrNames.find(func) == mFuncPtrNames.end()) {
- id = mFuncPtrNames.size() + RESERVED_FUNCS + mFuncNames.size();
- mFuncPtrNames[func] = id;
- } else {
- id = mFuncPtrNames[func];
- }
- return id;
-}
-uint32_t AMDILGlobalManager::getOrCreateFunctionID(const std::string &func) {
- uint32_t id;
- if (mFuncNames.find(func) == mFuncNames.end()) {
- id = mFuncNames.size() + RESERVED_FUNCS + mFuncPtrNames.size();
- mFuncNames[func] = id;
- } else {
- id = mFuncNames[func];
- }
- return id;
-}
diff --git a/src/gallium/drivers/radeon/AMDILGlobalManager.h b/src/gallium/drivers/radeon/AMDILGlobalManager.h
deleted file mode 100644
index 1b0361e0174..00000000000
--- a/src/gallium/drivers/radeon/AMDILGlobalManager.h
+++ /dev/null
@@ -1,256 +0,0 @@
-//===-- AMDILGlobalManager.h - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-// ==-----------------------------------------------------------------------===//
-//
-// Class that handles parsing and storing global variables that are relevant to
-// the compilation of the module.
-//
-// ==-----------------------------------------------------------------------===//
-
-#ifndef _AMDILGLOBALMANAGER_H_
-#define _AMDILGLOBALMANAGER_H_
-
-#include "AMDIL.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Module.h"
-#include "llvm/Support/raw_ostream.h"
-
-#include <set>
-#include <string>
-
-#define CB_BASE_OFFSET 2
-
-namespace llvm {
-
-class PointerType;
-class AMDILKernelManager;
-class AMDILSubtarget;
-class TypeSymbolTable;
-class Argument;
-class GlobalValue;
-class MachineFunction;
-
-/// structure that holds information for a single local/region address array
-typedef struct _arrayMemRec {
- uint32_t vecSize; // size of each vector
- uint32_t offset; // offset into the memory section
- bool isHW; // flag to specify if HW is used or SW is used
- bool isRegion; // flag to specify if GDS is used or not
-} arraymem;
-
-/// Structure that holds information for all local/region address
-/// arrays in the kernel
-typedef struct _localArgRec {
- llvm::SmallVector<arraymem *, DEFAULT_VEC_SLOTS> local;
- std::string name; // Kernel Name
-} localArg;
-
-/// structure that holds information about a constant address
-/// space pointer that is a kernel argument
-typedef struct _constPtrRec {
- const Value *base;
- uint32_t size;
- uint32_t offset;
- uint32_t cbNum; // value of 0 means that it does not use hw CB
- bool isArray;
- bool isArgument;
- bool usesHardware;
- std::string name;
-} constPtr;
-
-/// Structure that holds information for each kernel argument
-typedef struct _kernelArgRec {
- uint32_t reqGroupSize[3];
- uint32_t reqRegionSize[3];
- llvm::SmallVector<uint32_t, DEFAULT_VEC_SLOTS> argInfo;
- bool mHasRWG;
- bool mHasRWR;
-} kernelArg;
-
-/// Structure that holds information for each kernel
-typedef struct _kernelRec {
- mutable uint32_t curSize;
- mutable uint32_t curRSize;
- mutable uint32_t curHWSize;
- mutable uint32_t curHWRSize;
- uint32_t constSize;
- kernelArg *sgv;
- localArg *lvgv;
- llvm::SmallVector<struct _constPtrRec, DEFAULT_VEC_SLOTS> constPtr;
- uint32_t constSizes[HW_MAX_NUM_CB];
- llvm::SmallSet<uint32_t, OPENCL_MAX_READ_IMAGES> readOnly;
- llvm::SmallSet<uint32_t, OPENCL_MAX_WRITE_IMAGES> writeOnly;
- llvm::SmallVector<std::pair<uint32_t, const Constant *>,
- DEFAULT_VEC_SLOTS> CPOffsets;
-} kernel;
-
-class AMDILGlobalManager {
-public:
- AMDILGlobalManager(bool debugMode = false);
- ~AMDILGlobalManager();
-
- /// Process the given module and parse out the global variable metadata passed
- /// down from the frontend-compiler
- void processModule(const Module &MF, const AMDILTargetMachine* mTM);
-
- /// Returns whether the current name is the name of a kernel function or a
- /// normal function
- bool isKernel(const llvm::StringRef &name) const;
-
- /// Returns true if the image ID corresponds to a read only image.
- bool isReadOnlyImage(const llvm::StringRef &name, uint32_t iID) const;
-
- /// Returns true if the image ID corresponds to a write only image.
- bool isWriteOnlyImage(const llvm::StringRef &name, uint32_t iID) const;
-
- /// Returns the number of write only images for the kernel.
- uint32_t getNumWriteImages(const llvm::StringRef &name) const;
-
- /// Gets the group size of the kernel for the given dimension.
- uint32_t getLocal(const llvm::StringRef &name, uint32_t dim) const;
-
- /// Gets the region size of the kernel for the given dimension.
- uint32_t getRegion(const llvm::StringRef &name, uint32_t dim) const;
-
- /// Get the Region memory size in 1d for the given function/kernel.
- uint32_t getRegionSize(const llvm::StringRef &name) const;
-
- /// Get the region memory size in 1d for the given function/kernel.
- uint32_t getLocalSize(const llvm::StringRef &name) const;
-
- // Get the max group size in one 1D for the given function/kernel.
- uint32_t getMaxGroupSize(const llvm::StringRef &name) const;
-
- // Get the max region size in one 1D for the given function/kernel.
- uint32_t getMaxRegionSize(const llvm::StringRef &name) const;
-
- /// Get the constant memory size in 1d for the given function/kernel.
- uint32_t getConstSize(const llvm::StringRef &name) const;
-
- /// Get the HW local size in 1d for the given function/kernel We need to
- /// seperate SW local and HW local for the case where some local memory is
- /// emulated in global and some is using the hardware features. The main
- /// problem is that in OpenCL 1.0/1.1 cl_khr_byte_addressable_store allows
- /// these actions to happen on all memory spaces, but the hardware can only
- /// write byte address stores to UAV and LDS, not GDS or Stack.
- uint32_t getHWLocalSize(const llvm::StringRef &name) const;
- uint32_t getHWRegionSize(const llvm::StringRef &name) const;
-
- /// Get the offset of the array for the kernel.
- int32_t getArrayOffset(const llvm::StringRef &name) const;
-
- /// Get the offset of the const memory for the kernel.
- int32_t getConstOffset(const llvm::StringRef &name) const;
-
- /// Get the boolean value if this particular constant uses HW or not.
- bool getConstHWBit(const llvm::StringRef &name) const;
-
- /// Get a reference to the kernel metadata information for the given function
- /// name.
- const kernel &getKernel(const llvm::StringRef &name) const;
-
- /// Returns whether a reqd_workgroup_size attribute has been used or not.
- bool hasRWG(const llvm::StringRef &name) const;
-
- /// Returns whether a reqd_workregion_size attribute has been used or not.
- bool hasRWR(const llvm::StringRef &name) const;
-
-
- /// Dump the data section to the output stream for the given kernel.
- void dumpDataSection(llvm::raw_ostream &O, AMDILKernelManager *km);
-
- /// Iterate through the constants that are global to the compilation unit.
- StringMap<constPtr>::iterator consts_begin();
- StringMap<constPtr>::iterator consts_end();
-
- /// Query if the kernel has a byte store.
- bool byteStoreExists(llvm::StringRef S) const;
-
- /// Query if the kernel and argument uses hardware constant memory.
- bool usesHWConstant(const kernel &krnl, const llvm::StringRef &arg);
-
- /// Query if the constant pointer is an argument.
- bool isConstPtrArgument(const kernel &krnl, const llvm::StringRef &arg);
-
- /// Query if the constant pointer is an array that is globally scoped.
- bool isConstPtrArray(const kernel &krnl, const llvm::StringRef &arg);
-
- /// Query the size of the constant pointer.
- uint32_t getConstPtrSize(const kernel &krnl, const llvm::StringRef &arg);
-
- /// Query the offset of the constant pointer.
- uint32_t getConstPtrOff(const kernel &krnl, const llvm::StringRef &arg);
-
- /// Query the constant buffer number for a constant pointer.
- uint32_t getConstPtrCB(const kernel &krnl, const llvm::StringRef &arg);
-
- /// Query the Value* that the constant pointer originates from.
- const Value *getConstPtrValue(const kernel &krnl, const llvm::StringRef &arg);
-
- /// Get the ID of the argument.
- int32_t getArgID(const Argument *arg);
-
- /// Get the unique function ID for the specific function name and create a new
- /// unique ID if it is not found.
- uint32_t getOrCreateFunctionID(const GlobalValue* func);
- uint32_t getOrCreateFunctionID(const std::string& func);
-
- /// Calculate the offsets of the constant pool for the given kernel and
- /// machine function.
- void calculateCPOffsets(const MachineFunction *MF, kernel &krnl);
-
- /// Print the global manager to the output stream.
- void print(llvm::raw_ostream& O);
-
- /// Dump the global manager to the output stream - debug use.
- void dump();
-
-private:
- /// Various functions that parse global value information and store them in
- /// the global manager. This approach is used instead of dynamic parsing as it
- /// might require more space, but should allow caching of data that gets
- /// requested multiple times.
- kernelArg parseSGV(const GlobalValue *GV);
- localArg parseLVGV(const GlobalValue *GV);
- void parseGlobalAnnotate(const GlobalValue *G);
- void parseImageAnnotate(const GlobalValue *G);
- void parseConstantPtrAnnotate(const GlobalValue *G);
- void printConstantValue(const Constant *CAval,
- llvm::raw_ostream& O,
- bool asByte);
- void parseKernelInformation(const Value *V);
- void parseAutoArray(const GlobalValue *G, bool isRegion);
- void parseConstantPtr(const GlobalValue *G);
- void allocateGlobalCB();
- void dumpDataToCB(llvm::raw_ostream &O, AMDILKernelManager *km, uint32_t id);
- bool checkConstPtrsUseHW(Module::const_iterator *F);
-
- llvm::StringMap<arraymem> mArrayMems;
- llvm::StringMap<localArg> mLocalArgs;
- llvm::StringMap<kernelArg> mKernelArgs;
- llvm::StringMap<kernel> mKernels;
- llvm::StringMap<constPtr> mConstMems;
- llvm::StringMap<uint32_t> mFuncNames;
- llvm::DenseMap<const GlobalValue*, uint32_t> mFuncPtrNames;
- llvm::DenseMap<uint32_t, llvm::StringRef> mImageNameMap;
- std::set<llvm::StringRef> mByteStore;
- std::set<llvm::StringRef> mIgnoreStr;
- llvm::DenseMap<const Argument *, int32_t> mArgIDMap;
- const char *symTab;
- const AMDILSubtarget *mSTM;
- size_t mOffset;
- uint32_t mReservedBuffs;
- uint32_t mCurrentCPOffset;
- bool mDebugMode;
-};
-} // namespace llvm
-#endif // __AMDILGLOBALMANAGER_H_
diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
index 6f78d15ad0b..0f76babb807 100644
--- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp
+++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
@@ -14,9 +14,7 @@
#include "AMDILISelLowering.h"
#include "AMDILDevices.h"
-#include "AMDILGlobalManager.h"
#include "AMDILIntrinsicInfo.h"
-#include "AMDILKernelManager.h"
#include "AMDILMachineFunctionInfo.h"
#include "AMDILSubtarget.h"
#include "AMDILTargetMachine.h"
@@ -31,6 +29,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -1905,11 +1904,6 @@ AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
isRet = false;
IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
};
- const AMDILSubtarget *stm = &this->getTargetMachine()
- .getSubtarget<AMDILSubtarget>();
- AMDILKernelManager *KM = const_cast<AMDILKernelManager*>(
- stm->getKernelManager());
- KM->setOutputInst();
Info.opc = IntNo;
Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
@@ -2134,58 +2128,33 @@ AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
SDValue DST = Op;
const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *G = GADN->getGlobal();
- const AMDILSubtarget *stm = &this->getTargetMachine()
- .getSubtarget<AMDILSubtarget>();
- const AMDILGlobalManager *GM = stm->getGlobalManager();
DebugLoc DL = Op.getDebugLoc();
- int64_t base_offset = GADN->getOffset();
- int32_t arrayoffset = GM->getArrayOffset(G->getName());
- int32_t constoffset = GM->getConstOffset(G->getName());
- if (arrayoffset != -1) {
- DST = DAG.getConstant(arrayoffset, MVT::i32);
- DST = DAG.getNode(ISD::ADD, DL, MVT::i32,
- DST, DAG.getConstant(base_offset, MVT::i32));
- } else if (constoffset != -1) {
- if (GM->getConstHWBit(G->getName())) {
- DST = DAG.getConstant(constoffset, MVT::i32);
- DST = DAG.getNode(ISD::ADD, DL, MVT::i32,
- DST, DAG.getConstant(base_offset, MVT::i32));
- } else {
- SDValue addr = DAG.getTargetGlobalAddress(G, DL, MVT::i32);
- SDValue DPReg = DAG.getRegister(AMDIL::SDP, MVT::i32);
- DPReg = DAG.getNode(ISD::ADD, DL, MVT::i32, DPReg,
- DAG.getConstant(base_offset, MVT::i32));
- DST = DAG.getNode(AMDILISD::ADDADDR, DL, MVT::i32, addr, DPReg);
- }
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ if (!GV) {
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
} else {
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
- if (!GV) {
- DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
- } else {
- if (GV->hasInitializer()) {
- const Constant *C = dyn_cast<Constant>(GV->getInitializer());
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
- DST = DAG.getConstant(CI->getValue(), Op.getValueType());
-
- } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
- DST = DAG.getConstantFP(CF->getValueAPF(),
- Op.getValueType());
- } else if (dyn_cast<ConstantAggregateZero>(C)) {
- EVT VT = Op.getValueType();
- if (VT.isInteger()) {
- DST = DAG.getConstant(0, VT);
- } else {
- DST = DAG.getConstantFP(0, VT);
- }
+ if (GV->hasInitializer()) {
+ const Constant *C = dyn_cast<Constant>(GV->getInitializer());
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ DST = DAG.getConstant(CI->getValue(), Op.getValueType());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
+ DST = DAG.getConstantFP(CF->getValueAPF(),
+ Op.getValueType());
+ } else if (dyn_cast<ConstantAggregateZero>(C)) {
+ EVT VT = Op.getValueType();
+ if (VT.isInteger()) {
+ DST = DAG.getConstant(0, VT);
} else {
- assert(!"lowering this type of Global Address "
- "not implemented yet!");
- C->dump();
- DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ DST = DAG.getConstantFP(0, VT);
}
} else {
+ assert(!"lowering this type of Global Address "
+ "not implemented yet!");
+ C->dump();
DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
}
+ } else {
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
}
}
return DST;
diff --git a/src/gallium/drivers/radeon/AMDILKernelManager.cpp b/src/gallium/drivers/radeon/AMDILKernelManager.cpp
deleted file mode 100644
index 4df81ff5078..00000000000
--- a/src/gallium/drivers/radeon/AMDILKernelManager.cpp
+++ /dev/null
@@ -1,1356 +0,0 @@
-//===-- AMDILKernelManager.cpp - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#include "AMDILKernelManager.h"
-
-#include "AMDILAlgorithms.tpp"
-#include "AMDILKernelManager.h"
-#ifdef UPSTREAM_LLVM
-#include "AMDILAsmPrinter.h"
-#endif
-#include "AMDILCompilerErrors.h"
-#include "AMDILDeviceInfo.h"
-#include "AMDILDevices.h"
-#include "AMDILGlobalManager.h"
-#include "AMDILMachineFunctionInfo.h"
-#include "AMDILModuleInfo.h"
-#include "AMDILSubtarget.h"
-#include "AMDILTargetMachine.h"
-#include "AMDILUtilityFunctions.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/MathExtras.h"
-
-#include <stdio.h>
-
-using namespace llvm;
-#define NUM_EXTRA_SLOTS_PER_IMAGE 1
-
-static bool errorPrint(const char *ptr, llvm::raw_ostream &O) {
- if (ptr[0] == 'E') {
- O << ";error:" << ptr << "\n";
- } else {
- O << ";warning:" << ptr << "\n";
- }
- return false;
-}
-
-#if 0
-static bool
-samplerPrint(StringMap<SamplerInfo>::iterator &data, llvm::raw_ostream &O) {
- O << ";sampler:" << (*data).second.name << ":" << (*data).second.idx
- << ":" << ((*data).second.val == (uint32_t)-1 ? 0 : 1)
- << ":" << ((*data).second.val != (uint32_t)-1 ? (*data).second.val : 0)
- << "\n";
- return false;
-}
-#endif
-
-static bool arenaPrint(uint32_t val, llvm::raw_ostream &O) {
- if (val >= ARENA_SEGMENT_RESERVED_UAVS) {
- O << "dcl_arena_uav_id(" << val << ")\n";
- }
- return false;
-}
-
-static bool uavPrint(uint32_t val, llvm::raw_ostream &O) {
- if (val < 8 || val == 11){
- O << "dcl_raw_uav_id(" << val << ")\n";
- }
- return false;
-}
-
-static bool uavPrintSI(uint32_t val, llvm::raw_ostream &O) {
- O << "dcl_typeless_uav_id(" << val << ")_stride(4)_length(4)_access(read_write)\n";
- return false;
-}
-
-static bool
-printfPrint(std::pair<const std::string, PrintfInfo *> &data, llvm::raw_ostream &O) {
- O << ";printf_fmt:" << data.second->getPrintfID();
- // Number of operands
- O << ":" << data.second->getNumOperands();
- // Size of each operand
- for (size_t i = 0, e = data.second->getNumOperands(); i < e; ++i) {
- O << ":" << (data.second->getOperandID(i) >> 3);
- }
- const char *ptr = data.first.c_str();
- uint32_t size = data.first.size() - 1;
- // The format string size
- O << ":" << size << ":";
- for (size_t i = 0; i < size; ++i) {
- if (ptr[i] == '\r') {
- O << "\\r";
- } else if (ptr[i] == '\n') {
- O << "\\n";
- } else {
- O << ptr[i];
- }
- }
- O << ";\n"; // c_str() is cheap way to trim
- return false;
-}
-
-
-void AMDILKernelManager::updatePtrArg(Function::const_arg_iterator Ip,
- int numWriteImages, int raw_uav_buffer,
- int counter, bool isKernel,
- const Function *F) {
- assert(F && "Cannot pass a NULL Pointer to F!");
- assert(Ip->getType()->isPointerTy() &&
- "Argument must be a pointer to be passed into this function!\n");
- std::string ptrArg(";pointer:");
- const char *symTab = "NoSymTab";
- uint32_t ptrID = getUAVID(Ip);
- const PointerType *PT = cast<PointerType>(Ip->getType());
- uint32_t Align = 4;
- const char *MemType = "uav";
- if (PT->getElementType()->isSized()) {
- Align = NextPowerOf2((uint32_t)mTM->getTargetData()->
- getTypeAllocSize(PT->getElementType()));
- }
- ptrArg += Ip->getName().str() + ":" + getTypeName(PT, symTab) + ":1:1:" +
- itostr(counter * 16) + ":";
- switch (PT->getAddressSpace()) {
- case AMDILAS::ADDRESS_NONE:
- //O << "No Address space qualifier!";
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
- assert(1);
- break;
- case AMDILAS::GLOBAL_ADDRESS:
- if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
- if (ptrID >= ARENA_SEGMENT_RESERVED_UAVS) {
- ptrID = 8;
- }
- }
- mMFI->uav_insert(ptrID);
- break;
- case AMDILAS::CONSTANT_ADDRESS: {
- if (isKernel && mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)){
- const kernel t = mGM->getKernel(F->getName());
- if (mGM->usesHWConstant(t, Ip->getName())) {
- MemType = "hc\0";
- ptrID = mGM->getConstPtrCB(t, Ip->getName());
- } else {
- MemType = "c\0";
- mMFI->uav_insert(ptrID);
- }
- } else {
- MemType = "c\0";
- mMFI->uav_insert(ptrID);
- }
- break;
- }
- default:
- case AMDILAS::PRIVATE_ADDRESS:
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
- MemType = (mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV))
- ? "up\0" : "hp\0";
- } else {
- MemType = "p\0";
- mMFI->uav_insert(ptrID);
- }
- break;
- case AMDILAS::REGION_ADDRESS:
- mMFI->setUsesRegion();
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
- MemType = "hr\0";
- ptrID = 0;
- } else {
- MemType = "r\0";
- mMFI->uav_insert(ptrID);
- }
- break;
- case AMDILAS::LOCAL_ADDRESS:
- mMFI->setUsesLocal();
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
- MemType = "hl\0";
- ptrID = 1;
- } else {
- MemType = "l\0";
- mMFI->uav_insert(ptrID);
- }
- break;
- };
- ptrArg += std::string(MemType) + ":";
- ptrArg += itostr(ptrID) + ":";
- ptrArg += itostr(Align);
- mMFI->addMetadata(ptrArg, true);
-}
-
-AMDILKernelManager::AMDILKernelManager(AMDILTargetMachine *TM,
- AMDILGlobalManager *GM)
-{
- mTM = TM;
- mSTM = mTM->getSubtargetImpl();
- mGM = GM;
- clear();
-}
-
-AMDILKernelManager::~AMDILKernelManager() {
- clear();
-}
-
-void
-AMDILKernelManager::setMF(MachineFunction *MF)
-{
- mMF = MF;
- mMFI = MF->getInfo<AMDILMachineFunctionInfo>();
-}
-
-void AMDILKernelManager::clear() {
- mUniqueID = 0;
- mIsKernel = false;
- mWasKernel = false;
- mHasImageWrite = false;
- mHasOutputInst = false;
-}
-
-bool AMDILKernelManager::useCompilerWrite(const MachineInstr *MI) {
- return (MI->getOpcode() == AMDIL::RETURN && wasKernel() && !mHasImageWrite
- && !mHasOutputInst);
-}
-
-void AMDILKernelManager::processArgMetadata(llvm::raw_ostream &O,
- uint32_t buf,
- bool isKernel)
-{
- const Function *F = mMF->getFunction();
- const char * symTab = "NoSymTab";
- Function::const_arg_iterator Ip = F->arg_begin();
- Function::const_arg_iterator Ep = F->arg_end();
-
- if (F->hasStructRetAttr()) {
- assert(Ip != Ep && "Invalid struct return fucntion!");
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
- ++Ip;
- }
- uint32_t mCBSize = 0;
- int raw_uav_buffer = mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
- bool MultiUAV = mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV);
- bool ArenaSegment =
- mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment);
- int numWriteImages =
- mSTM->getGlobalManager()->getNumWriteImages(F->getName());
- if (numWriteImages == OPENCL_MAX_WRITE_IMAGES || MultiUAV || ArenaSegment) {
- if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
- raw_uav_buffer = mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
- }
- }
- uint32_t CounterNum = 0;
- uint32_t ROArg = 0;
- uint32_t WOArg = 0;
- uint32_t NumArg = 0;
- while (Ip != Ep) {
- Type *cType = Ip->getType();
- if (cType->isIntOrIntVectorTy() || cType->isFPOrFPVectorTy()) {
- std::string argMeta(";value:");
- argMeta += Ip->getName().str() + ":" + getTypeName(cType, symTab) + ":";
- int bitsize = cType->getPrimitiveSizeInBits();
- int numEle = 1;
- if (cType->getTypeID() == Type::VectorTyID) {
- numEle = cast<VectorType>(cType)->getNumElements();
- }
- argMeta += itostr(numEle) + ":1:" + itostr(mCBSize << 4);
- mMFI->addMetadata(argMeta, true);
-
- // FIXME: simplify
- if ((bitsize / numEle) < 32) {
- bitsize = numEle >> 2;
- } else {
- bitsize >>= 7;
- }
- if (!bitsize) {
- bitsize = 1;
- }
-
- mCBSize += bitsize;
- ++NumArg;
- } else if (const PointerType *PT = dyn_cast<PointerType>(cType)) {
- Type *CT = PT->getElementType();
- const StructType *ST = dyn_cast<StructType>(CT);
- if (ST && ST->isOpaque()) {
- StringRef name = ST->getName();
- bool i1d = name.equals( "struct._image1d_t" );
- bool i1da = name.equals( "struct._image1d_array_t" );
- bool i1db = name.equals( "struct._image1d_buffer_t" );
- bool i2d = name.equals( "struct._image2d_t" );
- bool i2da = name.equals( "struct._image2d_array_t" );
- bool i3d = name.equals( "struct._image3d_t" );
- bool c32 = name.equals( "struct._counter32_t" );
- bool c64 = name.equals( "struct._counter64_t" );
- if (i1d || i1da || i1db || i2d | i2da || i3d) {
- if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
- std::string imageArg(";image:");
- imageArg += Ip->getName().str() + ":";
- if (i1d) imageArg += "1D:";
- else if (i1da) imageArg += "1DA:";
- else if (i1db) imageArg += "1DB:";
- else if (i2d) imageArg += "2D:";
- else if (i2da) imageArg += "2DA:";
- else if (i3d) imageArg += "3D:";
-
- if (isKernel) {
- if (mGM->isReadOnlyImage (mMF->getFunction()->getName(),
- (ROArg + WOArg))) {
- imageArg += "RO:" + itostr(ROArg);
- O << "dcl_resource_id(" << ROArg << ")_type(";
- if (i1d) O << "1d";
- else if (i1da) O << "1darray";
- else if (i1db) O << "buffer";
- else if (i2d) O << "2d";
- else if (i2da) O << "2darray";
- else if (i3d) O << "3d";
- O << ")_fmtx(unknown)_fmty(unknown)"
- << "_fmtz(unknown)_fmtw(unknown)\n";
- ++ROArg;
- } else if (mGM->isWriteOnlyImage(mMF->getFunction()->getName(),
- (ROArg + WOArg))) {
- uint32_t offset = 0;
- offset += WOArg;
- imageArg += "WO:" + itostr(offset & 0x7);
- O << "dcl_uav_id(" << ((offset) & 0x7) << ")_type(";
- if (i1d) O << "1d";
- else if (i1da) O << "1darray";
- else if (i1db) O << "buffer";
- else if (i2d) O << "2d";
- else if (i2da) O << "2darray";
- else if (i3d) O << "3d";
- O << ")_fmtx(uint)\n";
- ++WOArg;
- } else {
- imageArg += "RW:" + itostr(ROArg + WOArg);
- }
- }
- imageArg += ":1:" + itostr(mCBSize * 16);
- mMFI->addMetadata(imageArg, true);
- mMFI->addi32Literal(mCBSize);
- mCBSize += NUM_EXTRA_SLOTS_PER_IMAGE + 1;
- ++NumArg;
- } else {
- mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
- ++NumArg;
- }
- } else if (c32 || c64) {
- std::string counterArg(";counter:");
- counterArg += Ip->getName().str() + ":"
- + itostr(c32 ? 32 : 64) + ":"
- + itostr(CounterNum++) + ":1:" + itostr(mCBSize * 16);
- mMFI->addMetadata(counterArg, true);
- ++NumArg;
- ++mCBSize;
- } else {
- updatePtrArg(Ip, numWriteImages, raw_uav_buffer, mCBSize, isKernel,
- F);
- ++NumArg;
- ++mCBSize;
- }
- }
- else if (CT->getTypeID() == Type::StructTyID
- && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
- const TargetData *td = mTM->getTargetData();
- const StructLayout *sl = td->getStructLayout(dyn_cast<StructType>(CT));
- int bytesize = sl->getSizeInBytes();
- int reservedsize = (bytesize + 15) & ~15;
- int numSlots = reservedsize >> 4;
- if (!numSlots) {
- numSlots = 1;
- }
- std::string structArg(";value:");
- structArg += Ip->getName().str() + ":struct:"
- + itostr(bytesize) + ":1:" + itostr(mCBSize * 16);
- mMFI->addMetadata(structArg, true);
- mCBSize += numSlots;
- ++NumArg;
- } else if (CT->isIntOrIntVectorTy()
- || CT->isFPOrFPVectorTy()
- || CT->getTypeID() == Type::ArrayTyID
- || CT->getTypeID() == Type::PointerTyID
- || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
- updatePtrArg(Ip, numWriteImages, raw_uav_buffer, mCBSize, isKernel, F);
- ++NumArg;
- ++mCBSize;
- } else {
- assert(0 && "Cannot process current pointer argument");
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
- ++NumArg;
- }
- } else {
- assert(0 && "Cannot process current kernel argument");
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
- ++NumArg;
- }
- ++Ip;
- }
-}
-
-void AMDILKernelManager::printHeader(AMDILAsmPrinter *AsmPrinter,
- llvm::raw_ostream &O,
- const std::string &name) {
-#ifdef UPSTREAM_LLVM
- mName = name;
- std::string kernelName;
- kernelName = name;
- int kernelId = mGM->getOrCreateFunctionID(kernelName);
- O << "func " << kernelId << " ; " << kernelName << "\n";
- if (mSTM->is64bit()) {
- O << "mov " << AsmPrinter->getRegisterName(AMDIL::SDP) << ", cb0[8].xy\n";
- } else {
- O << "mov " << AsmPrinter->getRegisterName(AMDIL::SDP) << ", cb0[8].x\n";
- }
- O << "mov " << AsmPrinter->getRegisterName(AMDIL::SP) << ", l1.0\n";
-#endif
-}
-
-void AMDILKernelManager::printGroupSize(llvm::raw_ostream& O) {
- // The HD4XXX generation of hardware does not support a 3D launch, so we need
- // to use dcl_num_thread_per_group to specify the launch size. If the launch
- // size is specified via a kernel attribute, we print it here. Otherwise we
- // use the the default size.
- if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
- if (mGM->hasRWG(mName)
- || !mMFI->usesLocal()) {
- // if the user has specified what the required workgroup size is then we
- // need to compile for that size and that size only. Otherwise we compile
- // for the max workgroup size that is passed in as an option to the
- // backend.
- O << "dcl_num_thread_per_group ";
- O << mGM->getLocal(mName, 0) << ", ";
- O << mGM->getLocal(mName, 1) << ", ";
- O << mGM->getLocal(mName, 2) << " \n";
- } else {
- // If the kernel uses local memory, then the kernel is being
- // compiled in single wavefront mode. So we have to generate code slightly
- // different.
- O << "dcl_num_thread_per_group "
- << mSTM->device()->getWavefrontSize()
- << ", 1, 1 \n";
- }
- } else {
- // Otherwise we generate for devices that support 3D launch natively. If
- // the reqd_workgroup_size attribute was specified, then we can specify the
- // exact launch dimensions.
- if (mGM->hasRWG(mName)) {
- O << "dcl_num_thread_per_group ";
- O << mGM->getLocal(mName, 0) << ", ";
- O << mGM->getLocal(mName, 1) << ", ";
- O << mGM->getLocal(mName, 2) << " \n";
- } else {
- // Otherwise we specify the largest workgroup size that can be launched.
- O << "dcl_max_thread_per_group " << mGM->getLocal(mName, 3) << " \n";
- }
- }
- // Now that we have specified the workgroup size, lets declare the local
- // memory size. If we are using hardware and we know the value at compile
- // time, then we need to declare the correct value. Otherwise we should just
- // declare the maximum size.
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
- size_t kernelLocalSize = (mGM->getHWLocalSize(mName) + 3) & ~3;
- if (kernelLocalSize > mSTM->device()->getMaxLDSSize()) {
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_LOCAL_RESOURCES]);
- }
- // If there is a local pointer as a kernel argument, we don't know the size
- // at compile time, so we reserve all of the space.
- if (mMFI->usesLocal() && (mMFI->hasLocalArg() || !kernelLocalSize)) {
- O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") "
- << mSTM->device()->getMaxLDSSize() << "\n";
- mMFI->setUsesMem(AMDILDevice::LDS_ID);
- } else if (kernelLocalSize) {
- // We know the size, so lets declare it correctly.
- O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") "
- << kernelLocalSize << "\n";
- mMFI->setUsesMem(AMDILDevice::LDS_ID);
- }
- }
- // If the device supports the region memory extension, which maps to our
- // hardware GDS memory, then lets declare it so we can use it later on.
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
- size_t kernelGDSSize = (mGM->getHWRegionSize(mName) + 3) & ~3;
- if (kernelGDSSize > mSTM->device()->getMaxGDSSize()) {
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_REGION_RESOURCES]);
- }
- // If there is a region pointer as a kernel argument, we don't know the size
- // at compile time, so we reserved all of the space.
- if (mMFI->usesRegion() && (mMFI->hasRegionArg() || !kernelGDSSize)) {
- O << "dcl_gds_id(" << DEFAULT_GDS_ID <<
- ") " << mSTM->device()->getMaxGDSSize() << "\n";
- mMFI->setUsesMem(AMDILDevice::GDS_ID);
- } else if (kernelGDSSize) {
- // We know the size, so lets declare it.
- O << "dcl_gds_id(" << DEFAULT_GDS_ID <<
- ") " << kernelGDSSize << "\n";
- mMFI->setUsesMem(AMDILDevice::GDS_ID);
- }
- }
-}
-
-void
-AMDILKernelManager::printDecls(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O) {
- // If we are a HD4XXX generation device, then we only support a single uav
- // surface, so we declare it and leave
- if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
- O << "dcl_raw_uav_id("
- << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
- << ")\n";
- mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
- getIntrinsicSetup(AsmPrinter, O);
- return;
- }
- // If we are supporting multiple uav's view the MultiUAV capability, then we
- // need to print out the declarations here. MultiUAV conflicts with write
- // images, so they only use 8 - NumWriteImages uav's. Therefor only pointers
- // with ID's < 8 will get printed.
- if (mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
- binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), uavPrint, O);
- mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
- }
- // If arena segments are supported, then we should emit them now. Arena
- // segments are similiar to MultiUAV, except ArenaSegments are virtual and up
- // to 1024 of them can coexist. These are more compiler hints for CAL and thus
- // cannot overlap in any form. Each ID maps to a seperate piece of memory and
- // CAL determines whether the load/stores should go to the fast path/slow path
- // based on the usage and instruction.
- if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
- binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), arenaPrint, O);
- }
- // Now that we have printed out all of the arena and multi uav declaration,
- // now we must print out the default raw uav id. This always exists on HD5XXX
- // and HD6XXX hardware. The reason is that the hardware supports 12 UAV's and
- // 11 are taken up by MultiUAV/Write Images and Arena. However, if we do not
- // have UAV 11 as the raw UAV and there are 8 write images, we must revert
- // everything to the arena and not print out the default raw uav id.
- if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD5XXX
- || mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX) {
- if ((mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) < 11 &&
- mSTM->getGlobalManager()->getNumWriteImages(mName)
- != OPENCL_MAX_WRITE_IMAGES
- && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV))
- || mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) {
- if (!mMFI->usesMem(AMDILDevice::RAW_UAV_ID)
- && mMFI->uav_count(mSTM->device()->
- getResourceID(AMDILDevice::RAW_UAV_ID))) {
- O << "dcl_raw_uav_id("
- << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
- O << ")\n";
- mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
- }
- }
- // If we have not printed out the arena ID yet, then do so here.
- if (!mMFI->usesMem(AMDILDevice::ARENA_UAV_ID)
- && mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaUAV)) {
- O << "dcl_arena_uav_id("
- << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID) << ")\n";
- mMFI->setUsesMem(AMDILDevice::ARENA_UAV_ID);
- }
- } else if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), uavPrintSI, O);
- mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
- }
- getIntrinsicSetup(AsmPrinter, O);
-}
-
-void AMDILKernelManager::getIntrinsicSetup(AMDILAsmPrinter *AsmPrinter,
- llvm::raw_ostream &O)
-{
- O << "mov r0.z, vThreadGrpIdFlat.x\n"
- << "mov r1022.xyz0, vTidInGrp.xyz\n";
- if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD4XXX) {
- O << "mov r1023.xyz0, vThreadGrpId.xyz\n";
- } else {
- O << "imul r0.w, cb0[2].x, cb0[2].y\n"
- // Calculates the local id.
- // Calculates the group id.
- << "umod r1023.x, r0.z, cb0[2].x\n"
- << "udiv r1023.y, r0.z, cb0[2].x\n"
- << "umod r1023.y, r1023.y, cb0[2].y\n"
- << "udiv r1023.z, r0.z, r0.w\n";
- }
- // Calculates the global id.
- if (mGM->hasRWG(mName) && 0) {
- // Anytime we declare a literal, we need to reserve it, if it is not emitted
- // in emitLiterals.
- mMFI->addReservedLiterals(1);
- O << "dcl_literal l" << mMFI->getNumLiterals() + 1 << ", ";
- O << mGM->getLocal(mName, 0) << ", ";
- O << mGM->getLocal(mName, 1) << ", ";
- O << mGM->getLocal(mName, 2) << ", ";
- O << "0\n";
- O << "imad r1021.xyz0, r1023.xyz, l" << mMFI->getNumLiterals() + 1 << ".xyz, r1022.xyz\n";
- mMFI->addReservedLiterals(1);
- } else {
- O << "imad r1021.xyz0, r1023.xyz, cb0[1].xyz, r1022.xyz\n";
- }
-
- // Add the global/group offset for multi-launch support.
- O << "iadd r1021.xyz0, r1021.xyz0, cb0[6].xyz0\n"
- << "iadd r1023.xyz0, r1023.xyz0, cb0[7].xyz0\n"
- // moves the flat group id.
- << "mov r1023.w, r0.z\n";
-#ifdef UPSTREAM_LLVM
- if (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) {
- if (mSTM->is64bit()) {
- O << "umul " << AsmPrinter->getRegisterName(AMDIL::T2)
- << ".x0, r1023.w, cb0[4].z\n"
- << "i64add " << AsmPrinter->getRegisterName(AMDIL::T2)
- << ".xy, " << AsmPrinter->getRegisterName(AMDIL::T2)
- << ".xy, cb0[4].xy\n";
-
- } else {
- O << "imad " << AsmPrinter->getRegisterName(AMDIL::T2)
- << ".x, r1023.w, cb0[4].y, cb0[4].x\n";
- }
- }
- // Shift the flat group id to be in bytes instead of dwords.
- O << "ishl r1023.w, r1023.w, l0.z\n";
- if (mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)) {
- if (mSTM->is64bit()) {
- O << "umul " << AsmPrinter->getRegisterName(AMDIL::T1)
- << ".x0, vAbsTidFlat.x, cb0[3].z\n"
- << "i64add " << AsmPrinter->getRegisterName(AMDIL::T1)
- << ".xy, " << AsmPrinter->getRegisterName(AMDIL::T1)
- << ".xy, cb0[3].xy\n";
-
- } else {
- O << "imad " << AsmPrinter->getRegisterName(AMDIL::T1)
- << ".x, vAbsTidFlat.x, cb0[3].y, cb0[3].x\n";
- }
- } else {
- O << "mov " << AsmPrinter->getRegisterName(AMDIL::T1) << ".x, l0.0\n";
- }
-#endif
- if (mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
- O << "udiv r1024.xyz, r1021.xyz, cb0[10].xyz\n";
- if (mGM->hasRWR(mName) && 0) {
- // Anytime we declare a literal, we need to reserve it, if it is not emitted
- // in emitLiterals.
- mMFI->addReservedLiterals(1);
- O << "dcl_literal l" << mMFI->getNumLiterals() + 1 << ", ";
- O << mGM->getLocal(mName, 0) << ", ";
- O << mGM->getLocal(mName, 1) << ", ";
- O << mGM->getLocal(mName, 2) << ", ";
- O << "0\n";
- O << "imad r1025.xyz0, r1023.xyz, l" << mMFI->getNumLiterals() + 1 << ".xyz, r1022.xyz\n";
- mMFI->addReservedLiterals(1);
- } else {
- O << "imad r1025.xyz0, r1023.xyz, cb0[1].xyz, r1022.xyz\n";
- }
- }
-}
-
-void AMDILKernelManager::printFooter(llvm::raw_ostream &O) {
- O << "ret\n";
- O << "endfunc ; " << mName << "\n";
-}
-
-void
-AMDILKernelManager::printMetaData(llvm::raw_ostream &O, uint32_t id, bool kernel) {
- if (kernel) {
- int kernelId = mGM->getOrCreateFunctionID(mName);
- mMFI->addCalledFunc(id);
- mUniqueID = kernelId;
- mIsKernel = true;
- }
- printKernelArgs(O);
- if (kernel) {
- mIsKernel = false;
- mMFI->eraseCalledFunc(id);
- mUniqueID = id;
- }
-}
-
-void AMDILKernelManager::setKernel(bool kernel) {
- mIsKernel = kernel;
- if (kernel) {
- mWasKernel = mIsKernel;
- }
-}
-
-void AMDILKernelManager::setID(uint32_t id)
-{
- mUniqueID = id;
-}
-
-void AMDILKernelManager::setName(const std::string &name) {
- mName = name;
-}
-
-bool AMDILKernelManager::isKernel() {
- return mIsKernel;
-}
-
-bool AMDILKernelManager::wasKernel() {
- return mWasKernel;
-}
-
-void AMDILKernelManager::setImageWrite() {
- mHasImageWrite = true;
-}
-
-void AMDILKernelManager::setOutputInst() {
- mHasOutputInst = true;
-}
-
-void AMDILKernelManager::printConstantToRegMapping(
- AMDILAsmPrinter *RegNames,
- uint32_t &LII,
- llvm::raw_ostream &O,
- uint32_t &Counter,
- uint32_t Buffer,
- uint32_t n,
- const char *lit,
- uint32_t fcall,
- bool isImage,
- bool isHWCB)
-{
-#ifdef UPSTREAM_LLVM
- // TODO: This needs to be enabled or SC will never statically index into the
- // CB when a pointer is used.
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem) && isHWCB) {
- const char *name = RegNames->getRegisterName(LII);
- O << "mov " << name << ", l5.x\n";
- ++LII;
- Counter++;
- return;
- }
- for (uint32_t x = 0; x < n; ++x) {
- const char *name = RegNames->getRegisterName(LII);
- if (isImage) {
- O << "mov " << name << ", l" << mMFI->getIntLits(Counter++) << "\n";
- } else {
- O << "mov " << name << ", cb" <<Buffer<< "[" <<Counter++<< "]\n";
- }
- switch(fcall) {
- case 1093:
- O << "ishr " << name << ", " << name << ".xxyy, l3.0y0y\n"
- "ishl " << name << ", " << name << ", l3.y\n"
- "ishr " << name << ", " << name << ", l3.y\n";
- break;
- case 1092:
- O << "ishr " << name << ", " << name << ".xx, l3.0y\n"
- "ishl " << name << ", " << name << ", l3.y\n"
- "ishr " << name << ", " << name << ", l3.y\n";
- break;
- case 1091:
- O << "ishr " << name << ", " << name << ".xxxx, l3.0zyx\n"
- "ishl " << name << ", " << name << ", l3.x\n"
- "ishr " << name << ", " << name << ", l3.x\n";
- break;
- case 1090:
- O << "ishr " << name << ", " << name << ".xx, l3.0z\n"
- "ishl " << name << ".xy__, " << name << ".xy, l3.x\n"
- "ishr " << name << ".xy__, " << name << ".xy, l3.x\n";
- break;
- default:
- break;
- };
- if (lit) {
- O << "ishl " << name << ", " << name
- << ", " << lit << "\n";
- O << "ishr " << name << ", " << name
- << ", " << lit << "\n";
- }
- if (isImage) {
- Counter += NUM_EXTRA_SLOTS_PER_IMAGE;
- }
- ++LII;
- }
-#endif
-}
-
-void
-AMDILKernelManager::printCopyStructPrivate(const StructType *ST,
- llvm::raw_ostream &O,
- size_t stackSize,
- uint32_t Buffer,
- uint32_t mLitIdx,
- uint32_t &Counter)
-{
- size_t n = ((stackSize + 15) & ~15) >> 4;
- for (size_t x = 0; x < n; ++x) {
- O << "mov r2, cb" << Buffer << "[" << Counter++ << "]\n";
- O << "mov r1.x, r0.x\n";
- if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
- O << "ishr r1.x, r1.x, l0.x\n";
- O << "mov x" << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
- <<"[r1.x], r2\n";
- } else {
- O << "uav_raw_store_id(" <<
- mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID)
- << ") mem0, r1.x, r2\n";
- }
- } else {
- O << "uav_raw_store_id(" <<
- mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
- << ") mem0, r1.x, r2\n";
- }
- O << "iadd r0.x, r0.x, l" << mLitIdx << ".z\n";
- }
-}
-
-void AMDILKernelManager::printKernelArgs(llvm::raw_ostream &O) {
- std::string version(";version:");
- version += itostr(AMDIL_MAJOR_VERSION) + ":"
- + itostr(AMDIL_MINOR_VERSION) + ":" + itostr(AMDIL_REVISION_NUMBER);
- O << ";ARGSTART:" <<mName<< "\n";
- if (mIsKernel) {
- O << version << "\n";
- O << ";device:" <<mSTM->getDeviceName() << "\n";
- }
- O << ";uniqueid:" <<mUniqueID<< "\n";
-
- size_t local = mGM->getLocalSize(mName);
- size_t hwlocal = ((mGM->getHWLocalSize(mName) + 3) & (~0x3));
- size_t region = mGM->getRegionSize(mName);
- size_t hwregion = ((mGM->getHWRegionSize(mName) + 3) & (~0x3));
- bool usehwlocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
- bool usehwprivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
- bool usehwregion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
- bool useuavprivate = mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV);
- if (mIsKernel) {
- O << ";memory:" << ((usehwprivate) ?
- (useuavprivate) ? "uav" : "hw" : "" ) << "private:"
- <<(((mMFI->getStackSize() + 15) & (~0xF)))<< "\n";
- }
- if (mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
- O << ";memory:" << ((usehwregion) ? "hw" : "") << "region:"
- << ((usehwregion) ? hwregion : hwregion + region) << "\n";
- }
- O << ";memory:" << ((usehwlocal) ? "hw" : "") << "local:"
- << ((usehwlocal) ? hwlocal : hwlocal + local) << "\n";
-
- if (mIsKernel) {
- if (mGM->hasRWG(mName)) {
- O << ";cws:" << mGM->getLocal(mName, 0) << ":";
- O << mGM->getLocal(mName, 1) << ":";
- O << mGM->getLocal(mName, 2) << "\n";
- }
- if (mGM->hasRWR(mName)) {
- O << ";crs:" << mGM->getRegion(mName, 0) << ":";
- O << mGM->getRegion(mName, 1) << ":";
- O << mGM->getRegion(mName, 2) << "\n";
- }
- }
- if (mIsKernel) {
- for (std::vector<std::string>::iterator ib = mMFI->kernel_md_begin(),
- ie = mMFI->kernel_md_end(); ib != ie; ++ib) {
- O << (*ib) << "\n";
- }
- }
- for (std::set<std::string>::iterator ib = mMFI->func_md_begin(),
- ie = mMFI->func_md_end(); ib != ie; ++ib) {
- O << (*ib) << "\n";
- }
- if (!mMFI->func_empty()) {
- O << ";function:" << mMFI->func_size();
- binaryForEach(mMFI->func_begin(), mMFI->func_end(), commaPrint, O);
- O << "\n";
- }
-
- if (!mSTM->device()->isSupported(AMDILDeviceInfo::MacroDB)
- && !mMFI->intr_empty()) {
- O << ";intrinsic:" << mMFI->intr_size();
- binaryForEach(mMFI->intr_begin(), mMFI->intr_end(), commaPrint, O);
- O << "\n";
- }
-
- if (!mIsKernel) {
- binaryForEach(mMFI->printf_begin(), mMFI->printf_end(), printfPrint, O);
- mMF->getMMI().getObjFileInfo<AMDILModuleInfo>().add_printf_offset(
- mMFI->printf_size());
- } else {
- for (StringMap<SamplerInfo>::iterator
- smb = mMFI->sampler_begin(),
- sme = mMFI->sampler_end(); smb != sme; ++ smb) {
- O << ";sampler:" << (*smb).second.name << ":" << (*smb).second.idx
- << ":" << ((*smb).second.val == (uint32_t)-1 ? 0 : 1)
- << ":" << ((*smb).second.val != (uint32_t)-1 ? (*smb).second.val : 0)
- << "\n";
- }
- }
- if (mSTM->is64bit()) {
- O << ";memory:64bitABI\n";
- }
-
- if (mMFI->errors_empty()) {
- binaryForEach(mMFI->errors_begin(), mMFI->errors_end(), errorPrint, O);
- }
- // This has to come last
- if (mIsKernel
- && mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
- if (mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) >
- mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
- if (mMFI->uav_size() == 1) {
- if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
- && *(mMFI->uav_begin()) >= ARENA_SEGMENT_RESERVED_UAVS) {
- O << ";uavid:"
- << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
- O << "\n";
- } else {
- O << ";uavid:" << *(mMFI->uav_begin()) << "\n";
- }
- } else if (mMFI->uav_count(mSTM->device()->
- getResourceID(AMDILDevice::RAW_UAV_ID))) {
- O << ";uavid:"
- << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
- O << "\n";
- } else {
- O << ";uavid:"
- << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
- O << "\n";
- }
- } else if (mSTM->getGlobalManager()->getNumWriteImages(mName) !=
- OPENCL_MAX_WRITE_IMAGES
- && !mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
- && mMFI->uav_count(mSTM->device()->
- getResourceID(AMDILDevice::RAW_UAV_ID))) {
- O << ";uavid:"
- << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) << "\n";
- } else if (mMFI->uav_size() == 1) {
- O << ";uavid:" << *(mMFI->uav_begin()) << "\n";
- } else {
- O << ";uavid:"
- << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
- O << "\n";
- }
- }
- O << ";ARGEND:" << mName << "\n";
-}
-
-void AMDILKernelManager::printArgCopies(llvm::raw_ostream &O,
- AMDILAsmPrinter *RegNames)
-{
- Function::const_arg_iterator I = mMF->getFunction()->arg_begin();
- Function::const_arg_iterator Ie = mMF->getFunction()->arg_end();
- uint32_t Counter = 0;
-
- if (mMFI->getArgSize()) {
- O << "dcl_cb cb1";
- O << "[" << (mMFI->getArgSize() >> 4) << "]\n";
- mMFI->setUsesMem(AMDILDevice::CONSTANT_ID);
- }
- const Function *F = mMF->getFunction();
- // Get the stack size
- uint32_t stackSize = mMFI->getStackSize();
- uint32_t privateSize = mMFI->getScratchSize();
- uint32_t stackOffset = (privateSize + 15) & (~0xF);
- if (stackSize
- && mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
- // TODO: If the size is too large, we need to fall back to software emulated
- // instead of using the hardware capability.
- int size = (((stackSize + 15) & (~0xF)) >> 4);
- if (size > 4096) {
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_PRIVATE_RESOURCES]);
- }
- if (size) {
- // For any stack variables, we need to declare the literals for them so that
- // we can use them when we copy our data to the stack.
- mMFI->addReservedLiterals(1);
- // Anytime we declare a literal, we need to reserve it, if it is not emitted
- // in emitLiterals.
-#ifdef UPSTREAM_LLVM
- O << "dcl_literal l" << mMFI->getNumLiterals() << ", " << stackSize << ", "
- << privateSize << ", 16, " << ((stackSize == privateSize) ? 0 : stackOffset) << "\n"
- << "iadd r0.x, " << RegNames->getRegisterName(AMDIL::T1) << ".x, l"
- << mMFI->getNumLiterals() << ".w\n";
- if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
- O << "dcl_indexed_temp_array x"
- << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << "["
- << size << "]\n";
- } else {
- O << "dcl_typeless_uav_id("
- << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
- << ")_stride(4)_length(" << (size << 4 )<< ")_access(private)\n";
-
- }
- O << "mov " << RegNames->getRegisterName(AMDIL::FP)
- << ".x, l" << mMFI->getNumLiterals() << ".0\n";
-#endif
- mMFI->setUsesMem(AMDILDevice::SCRATCH_ID);
- }
- }
- I = mMF->getFunction()->arg_begin();
- int32_t count = 0;
- // uint32_t Image = 0;
- bool displaced1 = false;
- bool displaced2 = false;
- uint32_t curReg = AMDIL::R1;
- // TODO: We don't handle arguments that were pushed onto the stack!
- for (; I != Ie; ++I) {
- Type *curType = I->getType();
- unsigned int Buffer = 1;
- O << "; Kernel arg setup: " << I->getName() << "\n";
- if (curType->isIntegerTy() || curType->isFloatingPointTy()) {
- switch (curType->getPrimitiveSizeInBits()) {
- default:
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
- break;
- case 16:
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1,
- "l3.y" );
- break;
- case 8:
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, "l3.x" );
- break;
- }
-#ifdef UPSTREAM_LLVM
- } else if (const VectorType *VT = dyn_cast<VectorType>(curType)) {
- Type *ET = VT->getElementType();
- int numEle = VT->getNumElements();
- switch (ET->getPrimitiveSizeInBits()) {
- default:
- if (numEle == 3) {
- O << "mov " << RegNames->getRegisterName(curReg);
- O << ".x, cb" << Buffer << "[" << Counter << "].x\n";
- curReg++;
- O << "mov " << RegNames->getRegisterName(curReg);
- O << ".x, cb" << Buffer << "[" << Counter << "].y\n";
- curReg++;
- O << "mov " << RegNames->getRegisterName(curReg);
- O << ".x, cb" << Buffer << "[" << Counter << "].z\n";
- curReg++;
- Counter++;
- } else {
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
- (numEle+2) >> 2);
- }
- break;
- case 64:
- if (numEle == 3) {
- O << "mov " << RegNames->getRegisterName(curReg);
- O << ".xy, cb" << Buffer << "[" << Counter << "].xy\n";
- curReg++;
- O << "mov " << RegNames->getRegisterName(curReg);
- O << ".xy, cb" << Buffer << "[" << Counter++ << "].zw\n";
- curReg++;
- O << "mov " << RegNames->getRegisterName(curReg);
- O << ".xy, cb" << Buffer << "[" << Counter << "].xy\n";
- curReg++;
- Counter++;
- } else {
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
- (numEle) >> 1);
- }
- break;
- case 16:
- {
- switch (numEle) {
- default:
- printConstantToRegMapping(RegNames, curReg, O, Counter,
- Buffer, (numEle+2) >> 2, "l3.y", 1093);
- if (numEle == 3) {
- O << "mov " << RegNames->getRegisterName(curReg) << ".x, ";
- O << RegNames->getRegisterName(curReg) << ".y\n";
- ++curReg;
- O << "mov " << RegNames->getRegisterName(curReg) << ".x, ";
- O << RegNames->getRegisterName(curReg) << ".z\n";
- ++curReg;
- }
- break;
- case 2:
- printConstantToRegMapping(RegNames, curReg, O, Counter,
- Buffer, 1, "l3.y", 1092);
- break;
- }
- break;
- }
- case 8:
- {
- switch (numEle) {
- default:
- printConstantToRegMapping(RegNames, curReg, O, Counter,
- Buffer, (numEle+2) >> 2, "l3.x", 1091);
- if (numEle == 3) {
- O << "mov " << RegNames->getRegisterName(curReg) << ".x, ";
- O << RegNames->getRegisterName(curReg) << ".y\n";
- ++curReg;
- O << "mov " << RegNames->getRegisterName(curReg) << ".x, ";
- O << RegNames->getRegisterName(curReg) << ".z\n";
- ++curReg;
- }
- break;
- case 2:
- printConstantToRegMapping(RegNames, curReg, O, Counter,
- Buffer, 1, "l3.x", 1090);
- break;
- }
- break;
- }
- }
-#endif
- } else if (const PointerType *PT = dyn_cast<PointerType>(curType)) {
- Type *CT = PT->getElementType();
- const StructType *ST = dyn_cast<StructType>(CT);
- if (ST && ST->isOpaque()) {
- bool i1d = ST->getName() == "struct._image1d_t";
- bool i1da = ST->getName() == "struct._image1d_array_t";
- bool i1db = ST->getName() == "struct._image1d_buffer_t";
- bool i2d = ST->getName() == "struct._image2d_t";
- bool i2da = ST->getName() == "struct._image2d_array_t";
- bool i3d = ST->getName() == "struct._image3d_t";
- bool is_image = i1d || i1da || i1db || i2d || i2da || i3d;
- if (is_image) {
- if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
- 1, NULL, 0, is_image);
- } else {
- mMFI->addErrorMsg(
- amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
- ++curReg;
- }
- } else {
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
- }
- } else if (CT->isStructTy()
- && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
- StructType *ST = dyn_cast<StructType>(CT);
- bool i1d = ST->getName() == "struct._image1d_t";
- bool i1da = ST->getName() == "struct._image1d_array_t";
- bool i1db = ST->getName() == "struct._image1d_buffer_t";
- bool i2d = ST->getName() == "struct._image2d_t";
- bool i2da = ST->getName() == "struct._image2d_array_t";
- bool i3d = ST->getName() == "struct._image3d_t";
- bool is_image = i1d || i1da || i1db || i2d || i2da || i3d;
- if (is_image) {
- if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
- 1, NULL, 0, is_image);
- } else {
- mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
- ++curReg;
- }
- } else {
- if (count) {
- // Anytime we declare a literal, we need to reserve it, if it
- // is not emitted in emitLiterals.
- mMFI->addReservedLiterals(1);
- O << "dcl_literal l" << mMFI->getNumLiterals() << ", "
- << -stackSize << ", " << stackSize << ", 16, "
- << stackOffset << "\n";
- }
- ++count;
- size_t structSize;
- structSize = (getTypeSize(ST) + 15) & ~15;
- stackOffset += structSize;
-#ifdef UPSTREAM_LLVM
- O << "mov " << RegNames->getRegisterName((curReg)) << ", l"
- << mMFI->getNumLiterals()<< ".w\n";
- if (!displaced1) {
- O << "mov r1011, r1\n";
- displaced1 = true;
- }
- if (!displaced2 && strcmp(RegNames->getRegisterName(curReg), "r1")) {
- O << "mov r1010, r2\n";
- displaced2 = true;
- }
-#endif
- printCopyStructPrivate(ST, O, structSize, Buffer, mMFI->getNumLiterals(),
- Counter);
- ++curReg;
- }
- } else if (CT->isIntOrIntVectorTy()
- || CT->isFPOrFPVectorTy()
- || CT->isArrayTy()
- || CT->isPointerTy()
- || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
- if (PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) {
- const kernel& krnl = mGM->getKernel(F->getName());
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
- 1, NULL, 0, false,
- mGM->usesHWConstant(krnl, I->getName()));
- } else if (PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) {
- // TODO: If we are region address space, the first region pointer, no
- // array pointers exist, and hardware RegionMem is enabled then we can
- // zero out register as the initial offset is zero.
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
- } else if (PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
- // TODO: If we are local address space, the first local pointer, no
- // array pointers exist, and hardware LocalMem is enabled then we can
- // zero out register as the initial offset is zero.
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
- } else {
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
- }
- } else {
- assert(0 && "Current type is not supported!");
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
- ++curReg;
- }
- } else {
- assert(0 && "Current type is not supported!");
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
- ++curReg;
- }
- }
- if (displaced1) {
- O << "mov r1, r1011\n";
- }
- if (displaced2) {
- O << "mov r2, r1010\n";
- }
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
- const kernel& krnl = mGM->getKernel(F->getName());
- uint32_t constNum = 0;
- for (uint32_t x = 0; x < mSTM->device()->getMaxNumCBs(); ++x) {
- if (krnl.constSizes[x]) {
- O << "dcl_cb cb" << x + CB_BASE_OFFSET;
- O << "[" << (((krnl.constSizes[x] + 15) & ~15) >> 4) << "]\n";
- ++constNum;
- mMFI->setUsesMem(AMDILDevice::CONSTANT_ID);
- }
- }
- // TODO: If we run out of constant resources, we need to push some of the
- // constant pointers to the software emulated section.
- if (constNum > mSTM->device()->getMaxNumCBs()) {
- assert(0 && "Max constant buffer limit passed!");
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_CONSTANT_RESOURCES]);
- }
- }
-}
-
- const char *
-AMDILKernelManager::getTypeName(const Type *ptr, const char *symTab)
-{
- // symTab argument is ignored...
- LLVMContext& ctx = ptr->getContext();
- switch (ptr->getTypeID()) {
- case Type::StructTyID:
- {
- const StructType *ST = cast<StructType>(ptr);
- if (!ST->isOpaque())
- return "struct";
- // ptr is a pre-LLVM 3.0 "opaque" type.
- StringRef name = ST->getName();
- if (name.equals( "struct._event_t" )) return "event";
- if (name.equals( "struct._image1d_t" )) return "image1d";
- if (name.equals( "struct._image1d_array_t" )) return "image1d_array";
- if (name.equals( "struct._image2d_t" )) return "image2d";
- if (name.equals( "struct._image2d_array_t" )) return "image2d_array";
- if (name.equals( "struct._image3d_t" )) return "image3d";
- if (name.equals( "struct._counter32_t" )) return "counter32";
- if (name.equals( "struct._counter64_t" )) return "counter64";
- return "opaque";
- break;
- }
- case Type::FloatTyID:
- return "float";
- case Type::DoubleTyID:
- {
- const AMDILSubtarget *mSTM= mTM->getSubtargetImpl();
- if (!mSTM->device()->usesHardware(AMDILDeviceInfo::DoubleOps)) {
- mMFI->addErrorMsg(amd::CompilerErrorMessage[DOUBLE_NOT_SUPPORTED]);
- }
- return "double";
- }
- case Type::IntegerTyID:
- {
- if (ptr == Type::getInt8Ty(ctx)) {
- return "i8";
- } else if (ptr == Type::getInt16Ty(ctx)) {
- return "i16";
- } else if (ptr == Type::getInt32Ty(ctx)) {
- return "i32";
- } else if(ptr == Type::getInt64Ty(ctx)) {
- return "i64";
- }
- break;
- }
- default:
- break;
- case Type::ArrayTyID:
- {
- const ArrayType *AT = cast<ArrayType>(ptr);
- const Type *name = AT->getElementType();
- return getTypeName(name, symTab);
- break;
- }
- case Type::VectorTyID:
- {
- const VectorType *VT = cast<VectorType>(ptr);
- const Type *name = VT->getElementType();
- return getTypeName(name, symTab);
- break;
- }
- case Type::PointerTyID:
- {
- const PointerType *PT = cast<PointerType>(ptr);
- const Type *name = PT->getElementType();
- return getTypeName(name, symTab);
- break;
- }
- case Type::FunctionTyID:
- {
- const FunctionType *FT = cast<FunctionType>(ptr);
- const Type *name = FT->getReturnType();
- return getTypeName(name, symTab);
- break;
- }
- }
- ptr->dump();
- mMFI->addErrorMsg(amd::CompilerErrorMessage[UNKNOWN_TYPE_NAME]);
- return "unknown";
-}
-
-void AMDILKernelManager::emitLiterals(llvm::raw_ostream &O) {
- char buffer[256];
- std::map<uint32_t, uint32_t>::iterator ilb, ile;
- for (ilb = mMFI->begin_32(), ile = mMFI->end_32(); ilb != ile; ++ilb) {
- uint32_t a = ilb->first;
- O << "dcl_literal l" <<ilb->second<< ", ";
- sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x", a, a, a, a);
- O << buffer << "; f32:i32 " << ilb->first << "\n";
- }
- std::map<uint64_t, uint32_t>::iterator llb, lle;
- for (llb = mMFI->begin_64(), lle = mMFI->end_64(); llb != lle; ++llb) {
- uint32_t v[2];
- uint64_t a = llb->first;
- memcpy(v, &a, sizeof(uint64_t));
- O << "dcl_literal l" <<llb->second<< ", ";
- sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x; f64:i64 ",
- v[0], v[1], v[0], v[1]);
- O << buffer << llb->first << "\n";
- }
- std::map<std::pair<uint64_t, uint64_t>, uint32_t>::iterator vlb, vle;
- for (vlb = mMFI->begin_128(), vle = mMFI->end_128(); vlb != vle; ++vlb) {
- uint32_t v[2][2];
- uint64_t a = vlb->first.first;
- uint64_t b = vlb->first.second;
- memcpy(v[0], &a, sizeof(uint64_t));
- memcpy(v[1], &b, sizeof(uint64_t));
- O << "dcl_literal l" << vlb->second << ", ";
- sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x; f128:i128 ",
- v[0][0], v[0][1], v[1][0], v[1][1]);
- O << buffer << vlb->first.first << vlb->first.second << "\n";
- }
-}
-
-// If the value is not known, then the uav is set, otherwise the mValueIDMap
-// is used.
-void AMDILKernelManager::setUAVID(const Value *value, uint32_t ID) {
- if (value) {
- mValueIDMap[value] = ID;
- }
-}
-
-uint32_t AMDILKernelManager::getUAVID(const Value *value) {
- if (mValueIDMap.find(value) != mValueIDMap.end()) {
- return mValueIDMap[value];
- }
-
- if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
- return mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
- } else {
- return mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
- }
-}
-
diff --git a/src/gallium/drivers/radeon/AMDILKernelManager.h b/src/gallium/drivers/radeon/AMDILKernelManager.h
deleted file mode 100644
index d5eb296cbf2..00000000000
--- a/src/gallium/drivers/radeon/AMDILKernelManager.h
+++ /dev/null
@@ -1,177 +0,0 @@
-//===-- AMDILKernelManager.h - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Class that handles the metadata/abi management for the
-// ASM printer. Handles the parsing and generation of the metadata
-// for each kernel and keeps track of its arguments.
-//
-//==-----------------------------------------------------------------------===//
-#ifndef _AMDILKERNELMANAGER_H_
-#define _AMDILKERNELMANAGER_H_
-#include "AMDIL.h"
-#include "AMDILDevice.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/ValueMap.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/Function.h"
-
-#include <map>
-#include <set>
-#include <string>
-
-#define IMAGETYPE_2D 0
-#define IMAGETYPE_3D 1
-#define RESERVED_LIT_COUNT 6
-
-namespace llvm {
-class AMDILGlobalManager;
-class AMDILSubtarget;
-class AMDILMachineFunctionInfo;
-class AMDILTargetMachine;
-class AMDILAsmPrinter;
-class StructType;
-class Value;
-class TypeSymbolTable;
-class MachineFunction;
-class MachineInstr;
-class ConstantFP;
-class PrintfInfo;
-
-
-class AMDILKernelManager {
-public:
- typedef enum {
- RELEASE_ONLY,
- DEBUG_ONLY,
- ALWAYS
- } ErrorMsgEnum;
- AMDILKernelManager(AMDILTargetMachine *TM, AMDILGlobalManager *GM);
- virtual ~AMDILKernelManager();
-
- /// Clear the state of the KernelManager putting it in its most initial state.
- void clear();
- void setMF(MachineFunction *MF);
-
- /// Process the specific kernel parsing out the parameter information for the
- /// kernel.
- void processArgMetadata(llvm::raw_ostream &O,
- uint32_t buf, bool kernel);
-
-
- /// Prints the header for the kernel which includes the groupsize declaration
- /// and calculation of the local/group/global id's.
- void printHeader(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O,
- const std::string &name);
-
- virtual void printDecls(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O);
- virtual void printGroupSize(llvm::raw_ostream &O);
-
- /// Copies the data from the runtime setup constant buffers into registers so
- /// that the program can correctly access memory or data that was set by the
- /// host program.
- void printArgCopies(llvm::raw_ostream &O, AMDILAsmPrinter* RegNames);
-
- /// Prints out the end of the function.
- void printFooter(llvm::raw_ostream &O);
-
- /// Prints out the metadata for the specific function depending if it is a
- /// kernel or not.
- void printMetaData(llvm::raw_ostream &O, uint32_t id, bool isKernel = false);
-
- /// Set bool value on whether to consider the function a kernel or a normal
- /// function.
- void setKernel(bool kernel);
-
- /// Set the unique ID of the kernel/function.
- void setID(uint32_t id);
-
- /// Set the name of the kernel/function.
- void setName(const std::string &name);
-
- /// Flag to specify whether the function is a kernel or not.
- bool isKernel();
-
- /// Flag that specifies whether this function has a kernel wrapper.
- bool wasKernel();
-
- void getIntrinsicSetup(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O);
-
- // Returns whether a compiler needs to insert a write to memory or not.
- bool useCompilerWrite(const MachineInstr *MI);
-
- // Set the flag that there exists an image write.
- void setImageWrite();
- void setOutputInst();
-
- const char *getTypeName(const Type *name, const char * symTab);
-
- void emitLiterals(llvm::raw_ostream &O);
-
- // Set the uav id for the specific pointer value. If value is NULL, then the
- // ID sets the default ID.
- void setUAVID(const Value *value, uint32_t ID);
-
- // Get the UAV id for the specific pointer value.
- uint32_t getUAVID(const Value *value);
-
-private:
-
- /// Helper function that prints the actual metadata and should only be called
- /// by printMetaData.
- void printKernelArgs(llvm::raw_ostream &O);
- void printCopyStructPrivate(const StructType *ST,
- llvm::raw_ostream &O,
- size_t stackSize,
- uint32_t Buffer,
- uint32_t mLitIdx,
- uint32_t &counter);
- virtual void
- printConstantToRegMapping(AMDILAsmPrinter *RegNames,
- uint32_t &LII,
- llvm::raw_ostream &O,
- uint32_t &counter,
- uint32_t Buffer,
- uint32_t n,
- const char *lit = NULL,
- uint32_t fcall = 0,
- bool isImage = false,
- bool isHWCB = false);
- void updatePtrArg(llvm::Function::const_arg_iterator Ip,
- int numWriteImages,
- int raw_uav_buffer,
- int counter,
- bool isKernel,
- const Function *F);
- /// Name of the current kernel.
- std::string mName;
- uint32_t mUniqueID;
- bool mIsKernel;
- bool mWasKernel;
- bool mCompilerWrite;
- /// Flag to specify if an image write has occured or not in order to not add a
- /// compiler specific write if no other writes to memory occured.
- bool mHasImageWrite;
- bool mHasOutputInst;
-
- /// Map from const Value * to UAV ID.
- std::map<const Value *, uint32_t> mValueIDMap;
-
- AMDILTargetMachine * mTM;
- const AMDILSubtarget * mSTM;
- AMDILGlobalManager * mGM;
- /// This is the global offset of the printf string id's.
- MachineFunction *mMF;
- AMDILMachineFunctionInfo *mMFI;
-}; // class AMDILKernelManager
-
-} // llvm namespace
-#endif // _AMDILKERNELMANAGER_H_
diff --git a/src/gallium/drivers/radeon/AMDILLiteralManager.cpp b/src/gallium/drivers/radeon/AMDILLiteralManager.cpp
index 43167f57001..52bf7674e7d 100644
--- a/src/gallium/drivers/radeon/AMDILLiteralManager.cpp
+++ b/src/gallium/drivers/radeon/AMDILLiteralManager.cpp
@@ -12,7 +12,6 @@
#include "AMDIL.h"
#include "AMDILAlgorithms.tpp"
-#include "AMDILKernelManager.h"
#include "AMDILMachineFunctionInfo.h"
#include "AMDILSubtarget.h"
#include "AMDILTargetMachine.h"
@@ -43,7 +42,6 @@ namespace {
bool trackLiterals(MachineBasicBlock::iterator *bbb);
TargetMachine &TM;
const AMDILSubtarget *mSTM;
- AMDILKernelManager *mKM;
AMDILMachineFunctionInfo *mMFI;
int32_t mLitIdx;
bool mChanged;
@@ -71,7 +69,6 @@ bool AMDILLiteralManager::runOnMachineFunction(MachineFunction &MF) {
const AMDILTargetMachine *amdtm =
reinterpret_cast<const AMDILTargetMachine *>(&TM);
mSTM = dynamic_cast<const AMDILSubtarget *>(amdtm->getSubtargetImpl());
- mKM = const_cast<AMDILKernelManager *>(mSTM->getKernelManager());
safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
std::bind1st(std::mem_fun(&AMDILLiteralManager::trackLiterals), this));
return mChanged;
diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
index 9383bfcb77b..4859fe9df51 100644
--- a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
+++ b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
@@ -16,8 +16,6 @@
#include "AMDILAlgorithms.tpp"
#include "AMDILDevices.h"
-#include "AMDILGlobalManager.h"
-#include "AMDILKernelManager.h"
#include "AMDILMachineFunctionInfo.h"
#include "AMDILUtilityFunctions.h"
#include "llvm/ADT/Statistic.h"
@@ -39,7 +37,6 @@ STATISTIC(PointerAssignments, "Number of dynamic pointer "
"assigments discovered");
STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
#endif
-STATISTIC(LocalFuncs, "Number of get_local_size(N) functions removed");
using namespace llvm;
// The Peephole optimization pass is used to do simple last minute optimizations
@@ -76,7 +73,6 @@ private:
void doIsConstCallConversionIfNeeded();
bool mChanged;
bool mDebug;
- bool mRWGOpt;
bool mConvertAtomics;
CodeGenOpt::Level optLevel;
// Run a series of tests to see if we can optimize a CALL instruction.
@@ -104,7 +100,6 @@ private:
// specified then the result of get_local_size is known at compile time and
// can be returned accordingly.
bool isRWGLocalOpt(CallInst *CI);
- void expandRWGLocalOpt(CallInst *CI);
// On northern island cards, the division is slightly less accurate than on
// previous generations, so we need to utilize a more accurate division. So we
// can translate the accurate divide to a normal divide on all other cards.
@@ -251,15 +246,12 @@ AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
// arena path.
Function::arg_iterator argB = F.arg_begin();
Function::arg_iterator argE = F.arg_end();
- AMDILKernelManager *KM = mSTM->getKernelManager();
AMDILMachineFunctionInfo *mMFI = getAnalysis<MachineFunctionAnalysis>().getMF()
.getInfo<AMDILMachineFunctionInfo>();
for (; argB != argE; ++argB) {
if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaUAV)) {
- KM->setUAVID(argB,mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID));
mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID));
} else {
- KM->setUAVID(argB,mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
}
}
@@ -276,16 +268,6 @@ AMDILPeepholeOpt::runOnFunction(Function &MF)
}
mCTX = &MF.getType()->getContext();
mConvertAtomics = true;
- if (dumpAllIntoArena(MF)) {
- for (Function::const_arg_iterator cab = MF.arg_begin(),
- cae = MF.arg_end(); cab != cae; ++cab) {
- const Argument *arg = cab;
- AMDILKernelManager *KM = mSTM->getKernelManager();
- KM->setUAVID(getBasePointerValue(arg),
- mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
- }
- }
- mRWGOpt = mSTM->getGlobalManager()->hasRWG(MF.getName());
safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations),
this));
@@ -313,10 +295,6 @@ AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
CI->eraseFromParent();
return true;
}
- if (isRWGLocalOpt(CI)) {
- expandRWGLocalOpt(CI);
- return false;
- }
if (propagateSamplerInst(CI)) {
return false;
}
@@ -390,26 +368,7 @@ AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
}
StringRef name = F->getName();
if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
- Value *ptr = CI->getOperand(0);
- const Value *basePtr = getBasePointerValue(ptr);
- const Argument *Arg = dyn_cast<Argument>(basePtr);
- if (Arg) {
- AMDILGlobalManager *GM = mSTM->getGlobalManager();
- int32_t id = GM->getArgID(Arg);
- if (id >= 0) {
- std::stringstream ss;
- ss << name.data() << "_" << id << '\n';
- std::string val;
- ss >> val;
- F = dyn_cast<Function>(
- F->getParent() ->getOrInsertFunction(val, F->getFunctionType()));
- atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
- } else {
- mConvertAtomics = false;
- }
- } else {
- mConvertAtomics = false;
- }
+ mConvertAtomics = false;
}
return false;
}
@@ -1088,28 +1047,11 @@ AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI)
bool
AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI)
{
- return (CI != NULL && mRWGOpt
+ return (CI != NULL
&& CI->getOperand(CI->getNumOperands() - 1)->getName()
== "__amdil_get_local_size_int");
}
-void
-AMDILPeepholeOpt::expandRWGLocalOpt(CallInst *CI)
-{
- assert(isRWGLocalOpt(CI) &&
- "This optmization only works when the call inst is get_local_size!");
- std::vector<Constant *> consts;
- for (uint32_t x = 0; x < 3; ++x) {
- uint32_t val = mSTM->getGlobalManager()->getLocal(mF->getName(), x);
- consts.push_back(ConstantInt::get(Type::getInt32Ty(*mCTX), val));
- }
- consts.push_back(ConstantInt::get(Type::getInt32Ty(*mCTX), 0));
- Value *cVec = ConstantVector::get(consts);
- CI->replaceAllUsesWith(cVec);
- ++LocalFuncs;
- return;
-}
-
bool
AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI)
{
diff --git a/src/gallium/drivers/radeon/AMDILPrintfConvert.cpp b/src/gallium/drivers/radeon/AMDILPrintfConvert.cpp
index 95614f477c0..17d9e8e3013 100644
--- a/src/gallium/drivers/radeon/AMDILPrintfConvert.cpp
+++ b/src/gallium/drivers/radeon/AMDILPrintfConvert.cpp
@@ -15,7 +15,6 @@
#endif
#include "AMDILAlgorithms.tpp"
-#include "AMDILKernelManager.h"
#include "AMDILMachineFunctionInfo.h"
#include "AMDILModuleInfo.h"
#include "AMDILTargetMachine.h"
@@ -50,7 +49,6 @@ namespace
private:
bool expandPrintf(BasicBlock::iterator *bbb);
AMDILMachineFunctionInfo *mMFI;
- AMDILKernelManager *mKM;
bool mChanged;
SmallVector<int64_t, DEFAULT_VEC_SLOTS> bVecMap;
};
@@ -256,7 +254,6 @@ AMDILPrintfConvert::expandPrintf(BasicBlock::iterator *bbb)
AMDILPrintfConvert::runOnFunction(Function &MF)
{
mChanged = false;
- mKM = TM.getSubtarget<AMDILSubtarget>().getKernelManager();
mMFI = getAnalysis<MachineFunctionAnalysis>().getMF()
.getInfo<AMDILMachineFunctionInfo>();
bVecMap.clear();
diff --git a/src/gallium/drivers/radeon/AMDILSubtarget.cpp b/src/gallium/drivers/radeon/AMDILSubtarget.cpp
index 898833d9c0e..11b6bbe0c01 100644
--- a/src/gallium/drivers/radeon/AMDILSubtarget.cpp
+++ b/src/gallium/drivers/radeon/AMDILSubtarget.cpp
@@ -14,8 +14,6 @@
#include "AMDILSubtarget.h"
#include "AMDIL.h"
#include "AMDILDevices.h"
-#include "AMDILGlobalManager.h"
-#include "AMDILKernelManager.h"
#include "AMDILUtilityFunctions.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index bfffdd1eeb8..524289f5e0d 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -26,13 +26,11 @@ CPP_SOURCES := \
AMDILEvergreenDevice.cpp \
AMDILELFWriterInfo.cpp \
AMDILFrameLowering.cpp \
- AMDILGlobalManager.cpp \
AMDILInliner.cpp \
AMDILInstrInfo.cpp \
AMDILIntrinsicInfo.cpp \
AMDILISelDAGToDAG.cpp \
AMDILISelLowering.cpp \
- AMDILKernelManager.cpp \
AMDILLiteralManager.cpp \
AMDILMachineFunctionInfo.cpp \
AMDILMachinePeephole.cpp \