summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeon/AMDILKernelManager.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/radeon/AMDILKernelManager.cpp')
-rw-r--r--src/gallium/drivers/radeon/AMDILKernelManager.cpp1356
1 files changed, 1356 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeon/AMDILKernelManager.cpp b/src/gallium/drivers/radeon/AMDILKernelManager.cpp
new file mode 100644
index 00000000000..4df81ff5078
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILKernelManager.cpp
@@ -0,0 +1,1356 @@
+//===-- AMDILKernelManager.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILKernelManager.h"
+
+#include "AMDILAlgorithms.tpp"
+#include "AMDILKernelManager.h"
+#ifdef UPSTREAM_LLVM
+#include "AMDILAsmPrinter.h"
+#endif
+#include "AMDILCompilerErrors.h"
+#include "AMDILDeviceInfo.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MathExtras.h"
+
+#include <stdio.h>
+
+using namespace llvm;
+#define NUM_EXTRA_SLOTS_PER_IMAGE 1
+
+static bool errorPrint(const char *ptr, llvm::raw_ostream &O) {
+ if (ptr[0] == 'E') {
+ O << ";error:" << ptr << "\n";
+ } else {
+ O << ";warning:" << ptr << "\n";
+ }
+ return false;
+}
+
+#if 0
+static bool
+samplerPrint(StringMap<SamplerInfo>::iterator &data, llvm::raw_ostream &O) {
+ O << ";sampler:" << (*data).second.name << ":" << (*data).second.idx
+ << ":" << ((*data).second.val == (uint32_t)-1 ? 0 : 1)
+ << ":" << ((*data).second.val != (uint32_t)-1 ? (*data).second.val : 0)
+ << "\n";
+ return false;
+}
+#endif
+
+static bool arenaPrint(uint32_t val, llvm::raw_ostream &O) {
+ if (val >= ARENA_SEGMENT_RESERVED_UAVS) {
+ O << "dcl_arena_uav_id(" << val << ")\n";
+ }
+ return false;
+}
+
+static bool uavPrint(uint32_t val, llvm::raw_ostream &O) {
+ if (val < 8 || val == 11){
+ O << "dcl_raw_uav_id(" << val << ")\n";
+ }
+ return false;
+}
+
+static bool uavPrintSI(uint32_t val, llvm::raw_ostream &O) {
+ O << "dcl_typeless_uav_id(" << val << ")_stride(4)_length(4)_access(read_write)\n";
+ return false;
+}
+
+static bool
+printfPrint(std::pair<const std::string, PrintfInfo *> &data, llvm::raw_ostream &O) {
+ O << ";printf_fmt:" << data.second->getPrintfID();
+ // Number of operands
+ O << ":" << data.second->getNumOperands();
+ // Size of each operand
+ for (size_t i = 0, e = data.second->getNumOperands(); i < e; ++i) {
+ O << ":" << (data.second->getOperandID(i) >> 3);
+ }
+ const char *ptr = data.first.c_str();
+ uint32_t size = data.first.size() - 1;
+ // The format string size
+ O << ":" << size << ":";
+ for (size_t i = 0; i < size; ++i) {
+ if (ptr[i] == '\r') {
+ O << "\\r";
+ } else if (ptr[i] == '\n') {
+ O << "\\n";
+ } else {
+ O << ptr[i];
+ }
+ }
+ O << ";\n"; // c_str() is cheap way to trim
+ return false;
+}
+
+
+void AMDILKernelManager::updatePtrArg(Function::const_arg_iterator Ip,
+ int numWriteImages, int raw_uav_buffer,
+ int counter, bool isKernel,
+ const Function *F) {
+ assert(F && "Cannot pass a NULL Pointer to F!");
+ assert(Ip->getType()->isPointerTy() &&
+ "Argument must be a pointer to be passed into this function!\n");
+ std::string ptrArg(";pointer:");
+ const char *symTab = "NoSymTab";
+ uint32_t ptrID = getUAVID(Ip);
+ const PointerType *PT = cast<PointerType>(Ip->getType());
+ uint32_t Align = 4;
+ const char *MemType = "uav";
+ if (PT->getElementType()->isSized()) {
+ Align = NextPowerOf2((uint32_t)mTM->getTargetData()->
+ getTypeAllocSize(PT->getElementType()));
+ }
+ ptrArg += Ip->getName().str() + ":" + getTypeName(PT, symTab) + ":1:1:" +
+ itostr(counter * 16) + ":";
+ switch (PT->getAddressSpace()) {
+ case AMDILAS::ADDRESS_NONE:
+ //O << "No Address space qualifier!";
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ assert(1);
+ break;
+ case AMDILAS::GLOBAL_ADDRESS:
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
+ if (ptrID >= ARENA_SEGMENT_RESERVED_UAVS) {
+ ptrID = 8;
+ }
+ }
+ mMFI->uav_insert(ptrID);
+ break;
+ case AMDILAS::CONSTANT_ADDRESS: {
+ if (isKernel && mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)){
+ const kernel t = mGM->getKernel(F->getName());
+ if (mGM->usesHWConstant(t, Ip->getName())) {
+ MemType = "hc\0";
+ ptrID = mGM->getConstPtrCB(t, Ip->getName());
+ } else {
+ MemType = "c\0";
+ mMFI->uav_insert(ptrID);
+ }
+ } else {
+ MemType = "c\0";
+ mMFI->uav_insert(ptrID);
+ }
+ break;
+ }
+ default:
+ case AMDILAS::PRIVATE_ADDRESS:
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ MemType = (mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV))
+ ? "up\0" : "hp\0";
+ } else {
+ MemType = "p\0";
+ mMFI->uav_insert(ptrID);
+ }
+ break;
+ case AMDILAS::REGION_ADDRESS:
+ mMFI->setUsesRegion();
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ MemType = "hr\0";
+ ptrID = 0;
+ } else {
+ MemType = "r\0";
+ mMFI->uav_insert(ptrID);
+ }
+ break;
+ case AMDILAS::LOCAL_ADDRESS:
+ mMFI->setUsesLocal();
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ MemType = "hl\0";
+ ptrID = 1;
+ } else {
+ MemType = "l\0";
+ mMFI->uav_insert(ptrID);
+ }
+ break;
+ };
+ ptrArg += std::string(MemType) + ":";
+ ptrArg += itostr(ptrID) + ":";
+ ptrArg += itostr(Align);
+ mMFI->addMetadata(ptrArg, true);
+}
+
+AMDILKernelManager::AMDILKernelManager(AMDILTargetMachine *TM,
+ AMDILGlobalManager *GM)
+{
+ mTM = TM;
+ mSTM = mTM->getSubtargetImpl();
+ mGM = GM;
+ clear();
+}
+
+AMDILKernelManager::~AMDILKernelManager() {
+ clear();
+}
+
+void
+AMDILKernelManager::setMF(MachineFunction *MF)
+{
+ mMF = MF;
+ mMFI = MF->getInfo<AMDILMachineFunctionInfo>();
+}
+
+void AMDILKernelManager::clear() {
+ mUniqueID = 0;
+ mIsKernel = false;
+ mWasKernel = false;
+ mHasImageWrite = false;
+ mHasOutputInst = false;
+}
+
+bool AMDILKernelManager::useCompilerWrite(const MachineInstr *MI) {
+ return (MI->getOpcode() == AMDIL::RETURN && wasKernel() && !mHasImageWrite
+ && !mHasOutputInst);
+}
+
+void AMDILKernelManager::processArgMetadata(llvm::raw_ostream &O,
+ uint32_t buf,
+ bool isKernel)
+{
+ const Function *F = mMF->getFunction();
+ const char * symTab = "NoSymTab";
+ Function::const_arg_iterator Ip = F->arg_begin();
+ Function::const_arg_iterator Ep = F->arg_end();
+
+ if (F->hasStructRetAttr()) {
+ assert(Ip != Ep && "Invalid struct return fucntion!");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ ++Ip;
+ }
+ uint32_t mCBSize = 0;
+ int raw_uav_buffer = mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+ bool MultiUAV = mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV);
+ bool ArenaSegment =
+ mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment);
+ int numWriteImages =
+ mSTM->getGlobalManager()->getNumWriteImages(F->getName());
+ if (numWriteImages == OPENCL_MAX_WRITE_IMAGES || MultiUAV || ArenaSegment) {
+ if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ raw_uav_buffer = mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ }
+ }
+ uint32_t CounterNum = 0;
+ uint32_t ROArg = 0;
+ uint32_t WOArg = 0;
+ uint32_t NumArg = 0;
+ while (Ip != Ep) {
+ Type *cType = Ip->getType();
+ if (cType->isIntOrIntVectorTy() || cType->isFPOrFPVectorTy()) {
+ std::string argMeta(";value:");
+ argMeta += Ip->getName().str() + ":" + getTypeName(cType, symTab) + ":";
+ int bitsize = cType->getPrimitiveSizeInBits();
+ int numEle = 1;
+ if (cType->getTypeID() == Type::VectorTyID) {
+ numEle = cast<VectorType>(cType)->getNumElements();
+ }
+ argMeta += itostr(numEle) + ":1:" + itostr(mCBSize << 4);
+ mMFI->addMetadata(argMeta, true);
+
+ // FIXME: simplify
+ if ((bitsize / numEle) < 32) {
+ bitsize = numEle >> 2;
+ } else {
+ bitsize >>= 7;
+ }
+ if (!bitsize) {
+ bitsize = 1;
+ }
+
+ mCBSize += bitsize;
+ ++NumArg;
+ } else if (const PointerType *PT = dyn_cast<PointerType>(cType)) {
+ Type *CT = PT->getElementType();
+ const StructType *ST = dyn_cast<StructType>(CT);
+ if (ST && ST->isOpaque()) {
+ StringRef name = ST->getName();
+ bool i1d = name.equals( "struct._image1d_t" );
+ bool i1da = name.equals( "struct._image1d_array_t" );
+ bool i1db = name.equals( "struct._image1d_buffer_t" );
+ bool i2d = name.equals( "struct._image2d_t" );
+ bool i2da = name.equals( "struct._image2d_array_t" );
+ bool i3d = name.equals( "struct._image3d_t" );
+ bool c32 = name.equals( "struct._counter32_t" );
+ bool c64 = name.equals( "struct._counter64_t" );
+ if (i1d || i1da || i1db || i2d | i2da || i3d) {
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
+ std::string imageArg(";image:");
+ imageArg += Ip->getName().str() + ":";
+ if (i1d) imageArg += "1D:";
+ else if (i1da) imageArg += "1DA:";
+ else if (i1db) imageArg += "1DB:";
+ else if (i2d) imageArg += "2D:";
+ else if (i2da) imageArg += "2DA:";
+ else if (i3d) imageArg += "3D:";
+
+ if (isKernel) {
+ if (mGM->isReadOnlyImage (mMF->getFunction()->getName(),
+ (ROArg + WOArg))) {
+ imageArg += "RO:" + itostr(ROArg);
+ O << "dcl_resource_id(" << ROArg << ")_type(";
+ if (i1d) O << "1d";
+ else if (i1da) O << "1darray";
+ else if (i1db) O << "buffer";
+ else if (i2d) O << "2d";
+ else if (i2da) O << "2darray";
+ else if (i3d) O << "3d";
+ O << ")_fmtx(unknown)_fmty(unknown)"
+ << "_fmtz(unknown)_fmtw(unknown)\n";
+ ++ROArg;
+ } else if (mGM->isWriteOnlyImage(mMF->getFunction()->getName(),
+ (ROArg + WOArg))) {
+ uint32_t offset = 0;
+ offset += WOArg;
+ imageArg += "WO:" + itostr(offset & 0x7);
+ O << "dcl_uav_id(" << ((offset) & 0x7) << ")_type(";
+ if (i1d) O << "1d";
+ else if (i1da) O << "1darray";
+ else if (i1db) O << "buffer";
+ else if (i2d) O << "2d";
+ else if (i2da) O << "2darray";
+ else if (i3d) O << "3d";
+ O << ")_fmtx(uint)\n";
+ ++WOArg;
+ } else {
+ imageArg += "RW:" + itostr(ROArg + WOArg);
+ }
+ }
+ imageArg += ":1:" + itostr(mCBSize * 16);
+ mMFI->addMetadata(imageArg, true);
+ mMFI->addi32Literal(mCBSize);
+ mCBSize += NUM_EXTRA_SLOTS_PER_IMAGE + 1;
+ ++NumArg;
+ } else {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
+ ++NumArg;
+ }
+ } else if (c32 || c64) {
+ std::string counterArg(";counter:");
+ counterArg += Ip->getName().str() + ":"
+ + itostr(c32 ? 32 : 64) + ":"
+ + itostr(CounterNum++) + ":1:" + itostr(mCBSize * 16);
+ mMFI->addMetadata(counterArg, true);
+ ++NumArg;
+ ++mCBSize;
+ } else {
+ updatePtrArg(Ip, numWriteImages, raw_uav_buffer, mCBSize, isKernel,
+ F);
+ ++NumArg;
+ ++mCBSize;
+ }
+ }
+ else if (CT->getTypeID() == Type::StructTyID
+ && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ const TargetData *td = mTM->getTargetData();
+ const StructLayout *sl = td->getStructLayout(dyn_cast<StructType>(CT));
+ int bytesize = sl->getSizeInBytes();
+ int reservedsize = (bytesize + 15) & ~15;
+ int numSlots = reservedsize >> 4;
+ if (!numSlots) {
+ numSlots = 1;
+ }
+ std::string structArg(";value:");
+ structArg += Ip->getName().str() + ":struct:"
+ + itostr(bytesize) + ":1:" + itostr(mCBSize * 16);
+ mMFI->addMetadata(structArg, true);
+ mCBSize += numSlots;
+ ++NumArg;
+ } else if (CT->isIntOrIntVectorTy()
+ || CT->isFPOrFPVectorTy()
+ || CT->getTypeID() == Type::ArrayTyID
+ || CT->getTypeID() == Type::PointerTyID
+ || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+ updatePtrArg(Ip, numWriteImages, raw_uav_buffer, mCBSize, isKernel, F);
+ ++NumArg;
+ ++mCBSize;
+ } else {
+ assert(0 && "Cannot process current pointer argument");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ ++NumArg;
+ }
+ } else {
+ assert(0 && "Cannot process current kernel argument");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ ++NumArg;
+ }
+ ++Ip;
+ }
+}
+
+void AMDILKernelManager::printHeader(AMDILAsmPrinter *AsmPrinter,
+ llvm::raw_ostream &O,
+ const std::string &name) {
+#ifdef UPSTREAM_LLVM
+ mName = name;
+ std::string kernelName;
+ kernelName = name;
+ int kernelId = mGM->getOrCreateFunctionID(kernelName);
+ O << "func " << kernelId << " ; " << kernelName << "\n";
+ if (mSTM->is64bit()) {
+ O << "mov " << AsmPrinter->getRegisterName(AMDIL::SDP) << ", cb0[8].xy\n";
+ } else {
+ O << "mov " << AsmPrinter->getRegisterName(AMDIL::SDP) << ", cb0[8].x\n";
+ }
+ O << "mov " << AsmPrinter->getRegisterName(AMDIL::SP) << ", l1.0\n";
+#endif
+}
+
+void AMDILKernelManager::printGroupSize(llvm::raw_ostream& O) {
+ // The HD4XXX generation of hardware does not support a 3D launch, so we need
+ // to use dcl_num_thread_per_group to specify the launch size. If the launch
+ // size is specified via a kernel attribute, we print it here. Otherwise we
+ // use the the default size.
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ if (mGM->hasRWG(mName)
+ || !mMFI->usesLocal()) {
+ // if the user has specified what the required workgroup size is then we
+ // need to compile for that size and that size only. Otherwise we compile
+ // for the max workgroup size that is passed in as an option to the
+ // backend.
+ O << "dcl_num_thread_per_group ";
+ O << mGM->getLocal(mName, 0) << ", ";
+ O << mGM->getLocal(mName, 1) << ", ";
+ O << mGM->getLocal(mName, 2) << " \n";
+ } else {
+ // If the kernel uses local memory, then the kernel is being
+ // compiled in single wavefront mode. So we have to generate code slightly
+ // different.
+ O << "dcl_num_thread_per_group "
+ << mSTM->device()->getWavefrontSize()
+ << ", 1, 1 \n";
+ }
+ } else {
+ // Otherwise we generate for devices that support 3D launch natively. If
+ // the reqd_workgroup_size attribute was specified, then we can specify the
+ // exact launch dimensions.
+ if (mGM->hasRWG(mName)) {
+ O << "dcl_num_thread_per_group ";
+ O << mGM->getLocal(mName, 0) << ", ";
+ O << mGM->getLocal(mName, 1) << ", ";
+ O << mGM->getLocal(mName, 2) << " \n";
+ } else {
+ // Otherwise we specify the largest workgroup size that can be launched.
+ O << "dcl_max_thread_per_group " << mGM->getLocal(mName, 3) << " \n";
+ }
+ }
+ // Now that we have specified the workgroup size, lets declare the local
+ // memory size. If we are using hardware and we know the value at compile
+ // time, then we need to declare the correct value. Otherwise we should just
+ // declare the maximum size.
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ size_t kernelLocalSize = (mGM->getHWLocalSize(mName) + 3) & ~3;
+ if (kernelLocalSize > mSTM->device()->getMaxLDSSize()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_LOCAL_RESOURCES]);
+ }
+ // If there is a local pointer as a kernel argument, we don't know the size
+ // at compile time, so we reserve all of the space.
+ if (mMFI->usesLocal() && (mMFI->hasLocalArg() || !kernelLocalSize)) {
+ O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") "
+ << mSTM->device()->getMaxLDSSize() << "\n";
+ mMFI->setUsesMem(AMDILDevice::LDS_ID);
+ } else if (kernelLocalSize) {
+ // We know the size, so lets declare it correctly.
+ O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") "
+ << kernelLocalSize << "\n";
+ mMFI->setUsesMem(AMDILDevice::LDS_ID);
+ }
+ }
+ // If the device supports the region memory extension, which maps to our
+ // hardware GDS memory, then lets declare it so we can use it later on.
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ size_t kernelGDSSize = (mGM->getHWRegionSize(mName) + 3) & ~3;
+ if (kernelGDSSize > mSTM->device()->getMaxGDSSize()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_REGION_RESOURCES]);
+ }
+ // If there is a region pointer as a kernel argument, we don't know the size
+ // at compile time, so we reserved all of the space.
+ if (mMFI->usesRegion() && (mMFI->hasRegionArg() || !kernelGDSSize)) {
+ O << "dcl_gds_id(" << DEFAULT_GDS_ID <<
+ ") " << mSTM->device()->getMaxGDSSize() << "\n";
+ mMFI->setUsesMem(AMDILDevice::GDS_ID);
+ } else if (kernelGDSSize) {
+ // We know the size, so lets declare it.
+ O << "dcl_gds_id(" << DEFAULT_GDS_ID <<
+ ") " << kernelGDSSize << "\n";
+ mMFI->setUsesMem(AMDILDevice::GDS_ID);
+ }
+ }
+}
+
+void
+AMDILKernelManager::printDecls(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O) {
+ // If we are a HD4XXX generation device, then we only support a single uav
+ // surface, so we declare it and leave
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ O << "dcl_raw_uav_id("
+ << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+ << ")\n";
+ mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
+ getIntrinsicSetup(AsmPrinter, O);
+ return;
+ }
+ // If we are supporting multiple uav's view the MultiUAV capability, then we
+ // need to print out the declarations here. MultiUAV conflicts with write
+ // images, so they only use 8 - NumWriteImages uav's. Therefor only pointers
+ // with ID's < 8 will get printed.
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
+ binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), uavPrint, O);
+ mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
+ }
+ // If arena segments are supported, then we should emit them now. Arena
+ // segments are similiar to MultiUAV, except ArenaSegments are virtual and up
+ // to 1024 of them can coexist. These are more compiler hints for CAL and thus
+ // cannot overlap in any form. Each ID maps to a seperate piece of memory and
+ // CAL determines whether the load/stores should go to the fast path/slow path
+ // based on the usage and instruction.
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
+ binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), arenaPrint, O);
+ }
+ // Now that we have printed out all of the arena and multi uav declaration,
+ // now we must print out the default raw uav id. This always exists on HD5XXX
+ // and HD6XXX hardware. The reason is that the hardware supports 12 UAV's and
+ // 11 are taken up by MultiUAV/Write Images and Arena. However, if we do not
+ // have UAV 11 as the raw UAV and there are 8 write images, we must revert
+ // everything to the arena and not print out the default raw uav id.
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD5XXX
+ || mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX) {
+ if ((mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) < 11 &&
+ mSTM->getGlobalManager()->getNumWriteImages(mName)
+ != OPENCL_MAX_WRITE_IMAGES
+ && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV))
+ || mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) {
+ if (!mMFI->usesMem(AMDILDevice::RAW_UAV_ID)
+ && mMFI->uav_count(mSTM->device()->
+ getResourceID(AMDILDevice::RAW_UAV_ID))) {
+ O << "dcl_raw_uav_id("
+ << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+ O << ")\n";
+ mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
+ }
+ }
+ // If we have not printed out the arena ID yet, then do so here.
+ if (!mMFI->usesMem(AMDILDevice::ARENA_UAV_ID)
+ && mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaUAV)) {
+ O << "dcl_arena_uav_id("
+ << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID) << ")\n";
+ mMFI->setUsesMem(AMDILDevice::ARENA_UAV_ID);
+ }
+ } else if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), uavPrintSI, O);
+ mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
+ }
+ getIntrinsicSetup(AsmPrinter, O);
+}
+
+void AMDILKernelManager::getIntrinsicSetup(AMDILAsmPrinter *AsmPrinter,
+ llvm::raw_ostream &O)
+{
+ O << "mov r0.z, vThreadGrpIdFlat.x\n"
+ << "mov r1022.xyz0, vTidInGrp.xyz\n";
+ if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD4XXX) {
+ O << "mov r1023.xyz0, vThreadGrpId.xyz\n";
+ } else {
+ O << "imul r0.w, cb0[2].x, cb0[2].y\n"
+ // Calculates the local id.
+ // Calculates the group id.
+ << "umod r1023.x, r0.z, cb0[2].x\n"
+ << "udiv r1023.y, r0.z, cb0[2].x\n"
+ << "umod r1023.y, r1023.y, cb0[2].y\n"
+ << "udiv r1023.z, r0.z, r0.w\n";
+ }
+ // Calculates the global id.
+ if (mGM->hasRWG(mName) && 0) {
+ // Anytime we declare a literal, we need to reserve it, if it is not emitted
+ // in emitLiterals.
+ mMFI->addReservedLiterals(1);
+ O << "dcl_literal l" << mMFI->getNumLiterals() + 1 << ", ";
+ O << mGM->getLocal(mName, 0) << ", ";
+ O << mGM->getLocal(mName, 1) << ", ";
+ O << mGM->getLocal(mName, 2) << ", ";
+ O << "0\n";
+ O << "imad r1021.xyz0, r1023.xyz, l" << mMFI->getNumLiterals() + 1 << ".xyz, r1022.xyz\n";
+ mMFI->addReservedLiterals(1);
+ } else {
+ O << "imad r1021.xyz0, r1023.xyz, cb0[1].xyz, r1022.xyz\n";
+ }
+
+ // Add the global/group offset for multi-launch support.
+ O << "iadd r1021.xyz0, r1021.xyz0, cb0[6].xyz0\n"
+ << "iadd r1023.xyz0, r1023.xyz0, cb0[7].xyz0\n"
+ // moves the flat group id.
+ << "mov r1023.w, r0.z\n";
+#ifdef UPSTREAM_LLVM
+ if (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) {
+ if (mSTM->is64bit()) {
+ O << "umul " << AsmPrinter->getRegisterName(AMDIL::T2)
+ << ".x0, r1023.w, cb0[4].z\n"
+ << "i64add " << AsmPrinter->getRegisterName(AMDIL::T2)
+ << ".xy, " << AsmPrinter->getRegisterName(AMDIL::T2)
+ << ".xy, cb0[4].xy\n";
+
+ } else {
+ O << "imad " << AsmPrinter->getRegisterName(AMDIL::T2)
+ << ".x, r1023.w, cb0[4].y, cb0[4].x\n";
+ }
+ }
+ // Shift the flat group id to be in bytes instead of dwords.
+ O << "ishl r1023.w, r1023.w, l0.z\n";
+ if (mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)) {
+ if (mSTM->is64bit()) {
+ O << "umul " << AsmPrinter->getRegisterName(AMDIL::T1)
+ << ".x0, vAbsTidFlat.x, cb0[3].z\n"
+ << "i64add " << AsmPrinter->getRegisterName(AMDIL::T1)
+ << ".xy, " << AsmPrinter->getRegisterName(AMDIL::T1)
+ << ".xy, cb0[3].xy\n";
+
+ } else {
+ O << "imad " << AsmPrinter->getRegisterName(AMDIL::T1)
+ << ".x, vAbsTidFlat.x, cb0[3].y, cb0[3].x\n";
+ }
+ } else {
+ O << "mov " << AsmPrinter->getRegisterName(AMDIL::T1) << ".x, l0.0\n";
+ }
+#endif
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ O << "udiv r1024.xyz, r1021.xyz, cb0[10].xyz\n";
+ if (mGM->hasRWR(mName) && 0) {
+ // Anytime we declare a literal, we need to reserve it, if it is not emitted
+ // in emitLiterals.
+ mMFI->addReservedLiterals(1);
+ O << "dcl_literal l" << mMFI->getNumLiterals() + 1 << ", ";
+ O << mGM->getLocal(mName, 0) << ", ";
+ O << mGM->getLocal(mName, 1) << ", ";
+ O << mGM->getLocal(mName, 2) << ", ";
+ O << "0\n";
+ O << "imad r1025.xyz0, r1023.xyz, l" << mMFI->getNumLiterals() + 1 << ".xyz, r1022.xyz\n";
+ mMFI->addReservedLiterals(1);
+ } else {
+ O << "imad r1025.xyz0, r1023.xyz, cb0[1].xyz, r1022.xyz\n";
+ }
+ }
+}
+
+void AMDILKernelManager::printFooter(llvm::raw_ostream &O) {
+ O << "ret\n";
+ O << "endfunc ; " << mName << "\n";
+}
+
+void
+AMDILKernelManager::printMetaData(llvm::raw_ostream &O, uint32_t id, bool kernel) {
+ if (kernel) {
+ int kernelId = mGM->getOrCreateFunctionID(mName);
+ mMFI->addCalledFunc(id);
+ mUniqueID = kernelId;
+ mIsKernel = true;
+ }
+ printKernelArgs(O);
+ if (kernel) {
+ mIsKernel = false;
+ mMFI->eraseCalledFunc(id);
+ mUniqueID = id;
+ }
+}
+
+void AMDILKernelManager::setKernel(bool kernel) {
+ mIsKernel = kernel;
+ if (kernel) {
+ mWasKernel = mIsKernel;
+ }
+}
+
+void AMDILKernelManager::setID(uint32_t id)
+{
+ mUniqueID = id;
+}
+
+void AMDILKernelManager::setName(const std::string &name) {
+ mName = name;
+}
+
+bool AMDILKernelManager::isKernel() {
+ return mIsKernel;
+}
+
+bool AMDILKernelManager::wasKernel() {
+ return mWasKernel;
+}
+
+void AMDILKernelManager::setImageWrite() {
+ mHasImageWrite = true;
+}
+
+void AMDILKernelManager::setOutputInst() {
+ mHasOutputInst = true;
+}
+
+void AMDILKernelManager::printConstantToRegMapping(
+ AMDILAsmPrinter *RegNames,
+ uint32_t &LII,
+ llvm::raw_ostream &O,
+ uint32_t &Counter,
+ uint32_t Buffer,
+ uint32_t n,
+ const char *lit,
+ uint32_t fcall,
+ bool isImage,
+ bool isHWCB)
+{
+#ifdef UPSTREAM_LLVM
+ // TODO: This needs to be enabled or SC will never statically index into the
+ // CB when a pointer is used.
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem) && isHWCB) {
+ const char *name = RegNames->getRegisterName(LII);
+ O << "mov " << name << ", l5.x\n";
+ ++LII;
+ Counter++;
+ return;
+ }
+ for (uint32_t x = 0; x < n; ++x) {
+ const char *name = RegNames->getRegisterName(LII);
+ if (isImage) {
+ O << "mov " << name << ", l" << mMFI->getIntLits(Counter++) << "\n";
+ } else {
+ O << "mov " << name << ", cb" <<Buffer<< "[" <<Counter++<< "]\n";
+ }
+ switch(fcall) {
+ case 1093:
+ O << "ishr " << name << ", " << name << ".xxyy, l3.0y0y\n"
+ "ishl " << name << ", " << name << ", l3.y\n"
+ "ishr " << name << ", " << name << ", l3.y\n";
+ break;
+ case 1092:
+ O << "ishr " << name << ", " << name << ".xx, l3.0y\n"
+ "ishl " << name << ", " << name << ", l3.y\n"
+ "ishr " << name << ", " << name << ", l3.y\n";
+ break;
+ case 1091:
+ O << "ishr " << name << ", " << name << ".xxxx, l3.0zyx\n"
+ "ishl " << name << ", " << name << ", l3.x\n"
+ "ishr " << name << ", " << name << ", l3.x\n";
+ break;
+ case 1090:
+ O << "ishr " << name << ", " << name << ".xx, l3.0z\n"
+ "ishl " << name << ".xy__, " << name << ".xy, l3.x\n"
+ "ishr " << name << ".xy__, " << name << ".xy, l3.x\n";
+ break;
+ default:
+ break;
+ };
+ if (lit) {
+ O << "ishl " << name << ", " << name
+ << ", " << lit << "\n";
+ O << "ishr " << name << ", " << name
+ << ", " << lit << "\n";
+ }
+ if (isImage) {
+ Counter += NUM_EXTRA_SLOTS_PER_IMAGE;
+ }
+ ++LII;
+ }
+#endif
+}
+
+void
+AMDILKernelManager::printCopyStructPrivate(const StructType *ST,
+ llvm::raw_ostream &O,
+ size_t stackSize,
+ uint32_t Buffer,
+ uint32_t mLitIdx,
+ uint32_t &Counter)
+{
+ size_t n = ((stackSize + 15) & ~15) >> 4;
+ for (size_t x = 0; x < n; ++x) {
+ O << "mov r2, cb" << Buffer << "[" << Counter++ << "]\n";
+ O << "mov r1.x, r0.x\n";
+ if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ O << "ishr r1.x, r1.x, l0.x\n";
+ O << "mov x" << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
+ <<"[r1.x], r2\n";
+ } else {
+ O << "uav_raw_store_id(" <<
+ mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID)
+ << ") mem0, r1.x, r2\n";
+ }
+ } else {
+ O << "uav_raw_store_id(" <<
+ mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
+ << ") mem0, r1.x, r2\n";
+ }
+ O << "iadd r0.x, r0.x, l" << mLitIdx << ".z\n";
+ }
+}
+
+void AMDILKernelManager::printKernelArgs(llvm::raw_ostream &O) {
+ std::string version(";version:");
+ version += itostr(AMDIL_MAJOR_VERSION) + ":"
+ + itostr(AMDIL_MINOR_VERSION) + ":" + itostr(AMDIL_REVISION_NUMBER);
+ O << ";ARGSTART:" <<mName<< "\n";
+ if (mIsKernel) {
+ O << version << "\n";
+ O << ";device:" <<mSTM->getDeviceName() << "\n";
+ }
+ O << ";uniqueid:" <<mUniqueID<< "\n";
+
+ size_t local = mGM->getLocalSize(mName);
+ size_t hwlocal = ((mGM->getHWLocalSize(mName) + 3) & (~0x3));
+ size_t region = mGM->getRegionSize(mName);
+ size_t hwregion = ((mGM->getHWRegionSize(mName) + 3) & (~0x3));
+ bool usehwlocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ bool usehwprivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
+ bool usehwregion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ bool useuavprivate = mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV);
+ if (mIsKernel) {
+ O << ";memory:" << ((usehwprivate) ?
+ (useuavprivate) ? "uav" : "hw" : "" ) << "private:"
+ <<(((mMFI->getStackSize() + 15) & (~0xF)))<< "\n";
+ }
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ O << ";memory:" << ((usehwregion) ? "hw" : "") << "region:"
+ << ((usehwregion) ? hwregion : hwregion + region) << "\n";
+ }
+ O << ";memory:" << ((usehwlocal) ? "hw" : "") << "local:"
+ << ((usehwlocal) ? hwlocal : hwlocal + local) << "\n";
+
+ if (mIsKernel) {
+ if (mGM->hasRWG(mName)) {
+ O << ";cws:" << mGM->getLocal(mName, 0) << ":";
+ O << mGM->getLocal(mName, 1) << ":";
+ O << mGM->getLocal(mName, 2) << "\n";
+ }
+ if (mGM->hasRWR(mName)) {
+ O << ";crs:" << mGM->getRegion(mName, 0) << ":";
+ O << mGM->getRegion(mName, 1) << ":";
+ O << mGM->getRegion(mName, 2) << "\n";
+ }
+ }
+ if (mIsKernel) {
+ for (std::vector<std::string>::iterator ib = mMFI->kernel_md_begin(),
+ ie = mMFI->kernel_md_end(); ib != ie; ++ib) {
+ O << (*ib) << "\n";
+ }
+ }
+ for (std::set<std::string>::iterator ib = mMFI->func_md_begin(),
+ ie = mMFI->func_md_end(); ib != ie; ++ib) {
+ O << (*ib) << "\n";
+ }
+ if (!mMFI->func_empty()) {
+ O << ";function:" << mMFI->func_size();
+ binaryForEach(mMFI->func_begin(), mMFI->func_end(), commaPrint, O);
+ O << "\n";
+ }
+
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::MacroDB)
+ && !mMFI->intr_empty()) {
+ O << ";intrinsic:" << mMFI->intr_size();
+ binaryForEach(mMFI->intr_begin(), mMFI->intr_end(), commaPrint, O);
+ O << "\n";
+ }
+
+ if (!mIsKernel) {
+ binaryForEach(mMFI->printf_begin(), mMFI->printf_end(), printfPrint, O);
+ mMF->getMMI().getObjFileInfo<AMDILModuleInfo>().add_printf_offset(
+ mMFI->printf_size());
+ } else {
+ for (StringMap<SamplerInfo>::iterator
+ smb = mMFI->sampler_begin(),
+ sme = mMFI->sampler_end(); smb != sme; ++ smb) {
+ O << ";sampler:" << (*smb).second.name << ":" << (*smb).second.idx
+ << ":" << ((*smb).second.val == (uint32_t)-1 ? 0 : 1)
+ << ":" << ((*smb).second.val != (uint32_t)-1 ? (*smb).second.val : 0)
+ << "\n";
+ }
+ }
+ if (mSTM->is64bit()) {
+ O << ";memory:64bitABI\n";
+ }
+
+ if (mMFI->errors_empty()) {
+ binaryForEach(mMFI->errors_begin(), mMFI->errors_end(), errorPrint, O);
+ }
+ // This has to come last
+ if (mIsKernel
+ && mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ if (mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) >
+ mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+ if (mMFI->uav_size() == 1) {
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
+ && *(mMFI->uav_begin()) >= ARENA_SEGMENT_RESERVED_UAVS) {
+ O << ";uavid:"
+ << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ O << "\n";
+ } else {
+ O << ";uavid:" << *(mMFI->uav_begin()) << "\n";
+ }
+ } else if (mMFI->uav_count(mSTM->device()->
+ getResourceID(AMDILDevice::RAW_UAV_ID))) {
+ O << ";uavid:"
+ << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+ O << "\n";
+ } else {
+ O << ";uavid:"
+ << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ O << "\n";
+ }
+ } else if (mSTM->getGlobalManager()->getNumWriteImages(mName) !=
+ OPENCL_MAX_WRITE_IMAGES
+ && !mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
+ && mMFI->uav_count(mSTM->device()->
+ getResourceID(AMDILDevice::RAW_UAV_ID))) {
+ O << ";uavid:"
+ << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) << "\n";
+ } else if (mMFI->uav_size() == 1) {
+ O << ";uavid:" << *(mMFI->uav_begin()) << "\n";
+ } else {
+ O << ";uavid:"
+ << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ O << "\n";
+ }
+ }
+ O << ";ARGEND:" << mName << "\n";
+}
+
+void AMDILKernelManager::printArgCopies(llvm::raw_ostream &O,
+ AMDILAsmPrinter *RegNames)
+{
+ Function::const_arg_iterator I = mMF->getFunction()->arg_begin();
+ Function::const_arg_iterator Ie = mMF->getFunction()->arg_end();
+ uint32_t Counter = 0;
+
+ if (mMFI->getArgSize()) {
+ O << "dcl_cb cb1";
+ O << "[" << (mMFI->getArgSize() >> 4) << "]\n";
+ mMFI->setUsesMem(AMDILDevice::CONSTANT_ID);
+ }
+ const Function *F = mMF->getFunction();
+ // Get the stack size
+ uint32_t stackSize = mMFI->getStackSize();
+ uint32_t privateSize = mMFI->getScratchSize();
+ uint32_t stackOffset = (privateSize + 15) & (~0xF);
+ if (stackSize
+ && mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ // TODO: If the size is too large, we need to fall back to software emulated
+ // instead of using the hardware capability.
+ int size = (((stackSize + 15) & (~0xF)) >> 4);
+ if (size > 4096) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_PRIVATE_RESOURCES]);
+ }
+ if (size) {
+ // For any stack variables, we need to declare the literals for them so that
+ // we can use them when we copy our data to the stack.
+ mMFI->addReservedLiterals(1);
+ // Anytime we declare a literal, we need to reserve it, if it is not emitted
+ // in emitLiterals.
+#ifdef UPSTREAM_LLVM
+ O << "dcl_literal l" << mMFI->getNumLiterals() << ", " << stackSize << ", "
+ << privateSize << ", 16, " << ((stackSize == privateSize) ? 0 : stackOffset) << "\n"
+ << "iadd r0.x, " << RegNames->getRegisterName(AMDIL::T1) << ".x, l"
+ << mMFI->getNumLiterals() << ".w\n";
+ if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ O << "dcl_indexed_temp_array x"
+ << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << "["
+ << size << "]\n";
+ } else {
+ O << "dcl_typeless_uav_id("
+ << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
+ << ")_stride(4)_length(" << (size << 4 )<< ")_access(private)\n";
+
+ }
+ O << "mov " << RegNames->getRegisterName(AMDIL::FP)
+ << ".x, l" << mMFI->getNumLiterals() << ".0\n";
+#endif
+ mMFI->setUsesMem(AMDILDevice::SCRATCH_ID);
+ }
+ }
+ I = mMF->getFunction()->arg_begin();
+ int32_t count = 0;
+ // uint32_t Image = 0;
+ bool displaced1 = false;
+ bool displaced2 = false;
+ uint32_t curReg = AMDIL::R1;
+ // TODO: We don't handle arguments that were pushed onto the stack!
+ for (; I != Ie; ++I) {
+ Type *curType = I->getType();
+ unsigned int Buffer = 1;
+ O << "; Kernel arg setup: " << I->getName() << "\n";
+ if (curType->isIntegerTy() || curType->isFloatingPointTy()) {
+ switch (curType->getPrimitiveSizeInBits()) {
+ default:
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+ break;
+ case 16:
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1,
+ "l3.y" );
+ break;
+ case 8:
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, "l3.x" );
+ break;
+ }
+#ifdef UPSTREAM_LLVM
+ } else if (const VectorType *VT = dyn_cast<VectorType>(curType)) {
+ Type *ET = VT->getElementType();
+ int numEle = VT->getNumElements();
+ switch (ET->getPrimitiveSizeInBits()) {
+ default:
+ if (numEle == 3) {
+ O << "mov " << RegNames->getRegisterName(curReg);
+ O << ".x, cb" << Buffer << "[" << Counter << "].x\n";
+ curReg++;
+ O << "mov " << RegNames->getRegisterName(curReg);
+ O << ".x, cb" << Buffer << "[" << Counter << "].y\n";
+ curReg++;
+ O << "mov " << RegNames->getRegisterName(curReg);
+ O << ".x, cb" << Buffer << "[" << Counter << "].z\n";
+ curReg++;
+ Counter++;
+ } else {
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+ (numEle+2) >> 2);
+ }
+ break;
+ case 64:
+ if (numEle == 3) {
+ O << "mov " << RegNames->getRegisterName(curReg);
+ O << ".xy, cb" << Buffer << "[" << Counter << "].xy\n";
+ curReg++;
+ O << "mov " << RegNames->getRegisterName(curReg);
+ O << ".xy, cb" << Buffer << "[" << Counter++ << "].zw\n";
+ curReg++;
+ O << "mov " << RegNames->getRegisterName(curReg);
+ O << ".xy, cb" << Buffer << "[" << Counter << "].xy\n";
+ curReg++;
+ Counter++;
+ } else {
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+ (numEle) >> 1);
+ }
+ break;
+ case 16:
+ {
+ switch (numEle) {
+ default:
+ printConstantToRegMapping(RegNames, curReg, O, Counter,
+ Buffer, (numEle+2) >> 2, "l3.y", 1093);
+ if (numEle == 3) {
+ O << "mov " << RegNames->getRegisterName(curReg) << ".x, ";
+ O << RegNames->getRegisterName(curReg) << ".y\n";
+ ++curReg;
+ O << "mov " << RegNames->getRegisterName(curReg) << ".x, ";
+ O << RegNames->getRegisterName(curReg) << ".z\n";
+ ++curReg;
+ }
+ break;
+ case 2:
+ printConstantToRegMapping(RegNames, curReg, O, Counter,
+ Buffer, 1, "l3.y", 1092);
+ break;
+ }
+ break;
+ }
+ case 8:
+ {
+ switch (numEle) {
+ default:
+ printConstantToRegMapping(RegNames, curReg, O, Counter,
+ Buffer, (numEle+2) >> 2, "l3.x", 1091);
+ if (numEle == 3) {
+ O << "mov " << RegNames->getRegisterName(curReg) << ".x, ";
+ O << RegNames->getRegisterName(curReg) << ".y\n";
+ ++curReg;
+ O << "mov " << RegNames->getRegisterName(curReg) << ".x, ";
+ O << RegNames->getRegisterName(curReg) << ".z\n";
+ ++curReg;
+ }
+ break;
+ case 2:
+ printConstantToRegMapping(RegNames, curReg, O, Counter,
+ Buffer, 1, "l3.x", 1090);
+ break;
+ }
+ break;
+ }
+ }
+#endif
+ } else if (const PointerType *PT = dyn_cast<PointerType>(curType)) {
+ Type *CT = PT->getElementType();
+ const StructType *ST = dyn_cast<StructType>(CT);
+ if (ST && ST->isOpaque()) {
+ bool i1d = ST->getName() == "struct._image1d_t";
+ bool i1da = ST->getName() == "struct._image1d_array_t";
+ bool i1db = ST->getName() == "struct._image1d_buffer_t";
+ bool i2d = ST->getName() == "struct._image2d_t";
+ bool i2da = ST->getName() == "struct._image2d_array_t";
+ bool i3d = ST->getName() == "struct._image3d_t";
+ bool is_image = i1d || i1da || i1db || i2d || i2da || i3d;
+ if (is_image) {
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+ 1, NULL, 0, is_image);
+ } else {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
+ ++curReg;
+ }
+ } else {
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+ }
+ } else if (CT->isStructTy()
+ && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ StructType *ST = dyn_cast<StructType>(CT);
+ bool i1d = ST->getName() == "struct._image1d_t";
+ bool i1da = ST->getName() == "struct._image1d_array_t";
+ bool i1db = ST->getName() == "struct._image1d_buffer_t";
+ bool i2d = ST->getName() == "struct._image2d_t";
+ bool i2da = ST->getName() == "struct._image2d_array_t";
+ bool i3d = ST->getName() == "struct._image3d_t";
+ bool is_image = i1d || i1da || i1db || i2d || i2da || i3d;
+ if (is_image) {
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+ 1, NULL, 0, is_image);
+ } else {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
+ ++curReg;
+ }
+ } else {
+ if (count) {
+ // Anytime we declare a literal, we need to reserve it, if it
+ // is not emitted in emitLiterals.
+ mMFI->addReservedLiterals(1);
+ O << "dcl_literal l" << mMFI->getNumLiterals() << ", "
+ << -stackSize << ", " << stackSize << ", 16, "
+ << stackOffset << "\n";
+ }
+ ++count;
+ size_t structSize;
+ structSize = (getTypeSize(ST) + 15) & ~15;
+ stackOffset += structSize;
+#ifdef UPSTREAM_LLVM
+ O << "mov " << RegNames->getRegisterName((curReg)) << ", l"
+ << mMFI->getNumLiterals()<< ".w\n";
+ if (!displaced1) {
+ O << "mov r1011, r1\n";
+ displaced1 = true;
+ }
+ if (!displaced2 && strcmp(RegNames->getRegisterName(curReg), "r1")) {
+ O << "mov r1010, r2\n";
+ displaced2 = true;
+ }
+#endif
+ printCopyStructPrivate(ST, O, structSize, Buffer, mMFI->getNumLiterals(),
+ Counter);
+ ++curReg;
+ }
+ } else if (CT->isIntOrIntVectorTy()
+ || CT->isFPOrFPVectorTy()
+ || CT->isArrayTy()
+ || CT->isPointerTy()
+ || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+ if (PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) {
+ const kernel& krnl = mGM->getKernel(F->getName());
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+ 1, NULL, 0, false,
+ mGM->usesHWConstant(krnl, I->getName()));
+ } else if (PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) {
+ // TODO: If we are region address space, the first region pointer, no
+ // array pointers exist, and hardware RegionMem is enabled then we can
+ // zero out register as the initial offset is zero.
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+ } else if (PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
+ // TODO: If we are local address space, the first local pointer, no
+ // array pointers exist, and hardware LocalMem is enabled then we can
+ // zero out register as the initial offset is zero.
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+ } else {
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+ }
+ } else {
+ assert(0 && "Current type is not supported!");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ ++curReg;
+ }
+ } else {
+ assert(0 && "Current type is not supported!");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ ++curReg;
+ }
+ }
+ if (displaced1) {
+ O << "mov r1, r1011\n";
+ }
+ if (displaced2) {
+ O << "mov r2, r1010\n";
+ }
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ const kernel& krnl = mGM->getKernel(F->getName());
+ uint32_t constNum = 0;
+ for (uint32_t x = 0; x < mSTM->device()->getMaxNumCBs(); ++x) {
+ if (krnl.constSizes[x]) {
+ O << "dcl_cb cb" << x + CB_BASE_OFFSET;
+ O << "[" << (((krnl.constSizes[x] + 15) & ~15) >> 4) << "]\n";
+ ++constNum;
+ mMFI->setUsesMem(AMDILDevice::CONSTANT_ID);
+ }
+ }
+ // TODO: If we run out of constant resources, we need to push some of the
+ // constant pointers to the software emulated section.
+ if (constNum > mSTM->device()->getMaxNumCBs()) {
+ assert(0 && "Max constant buffer limit passed!");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_CONSTANT_RESOURCES]);
+ }
+ }
+}
+
+ const char *
+AMDILKernelManager::getTypeName(const Type *ptr, const char *symTab)
+{
+ // symTab argument is ignored...
+ LLVMContext& ctx = ptr->getContext();
+ switch (ptr->getTypeID()) {
+ case Type::StructTyID:
+ {
+ const StructType *ST = cast<StructType>(ptr);
+ if (!ST->isOpaque())
+ return "struct";
+ // ptr is a pre-LLVM 3.0 "opaque" type.
+ StringRef name = ST->getName();
+ if (name.equals( "struct._event_t" )) return "event";
+ if (name.equals( "struct._image1d_t" )) return "image1d";
+ if (name.equals( "struct._image1d_array_t" )) return "image1d_array";
+ if (name.equals( "struct._image2d_t" )) return "image2d";
+ if (name.equals( "struct._image2d_array_t" )) return "image2d_array";
+ if (name.equals( "struct._image3d_t" )) return "image3d";
+ if (name.equals( "struct._counter32_t" )) return "counter32";
+ if (name.equals( "struct._counter64_t" )) return "counter64";
+ return "opaque";
+ break;
+ }
+ case Type::FloatTyID:
+ return "float";
+ case Type::DoubleTyID:
+ {
+ const AMDILSubtarget *mSTM= mTM->getSubtargetImpl();
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::DoubleOps)) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[DOUBLE_NOT_SUPPORTED]);
+ }
+ return "double";
+ }
+ case Type::IntegerTyID:
+ {
+ if (ptr == Type::getInt8Ty(ctx)) {
+ return "i8";
+ } else if (ptr == Type::getInt16Ty(ctx)) {
+ return "i16";
+ } else if (ptr == Type::getInt32Ty(ctx)) {
+ return "i32";
+ } else if(ptr == Type::getInt64Ty(ctx)) {
+ return "i64";
+ }
+ break;
+ }
+ default:
+ break;
+ case Type::ArrayTyID:
+ {
+ const ArrayType *AT = cast<ArrayType>(ptr);
+ const Type *name = AT->getElementType();
+ return getTypeName(name, symTab);
+ break;
+ }
+ case Type::VectorTyID:
+ {
+ const VectorType *VT = cast<VectorType>(ptr);
+ const Type *name = VT->getElementType();
+ return getTypeName(name, symTab);
+ break;
+ }
+ case Type::PointerTyID:
+ {
+ const PointerType *PT = cast<PointerType>(ptr);
+ const Type *name = PT->getElementType();
+ return getTypeName(name, symTab);
+ break;
+ }
+ case Type::FunctionTyID:
+ {
+ const FunctionType *FT = cast<FunctionType>(ptr);
+ const Type *name = FT->getReturnType();
+ return getTypeName(name, symTab);
+ break;
+ }
+ }
+ ptr->dump();
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[UNKNOWN_TYPE_NAME]);
+ return "unknown";
+}
+
+void AMDILKernelManager::emitLiterals(llvm::raw_ostream &O) {
+ char buffer[256];
+ std::map<uint32_t, uint32_t>::iterator ilb, ile;
+ for (ilb = mMFI->begin_32(), ile = mMFI->end_32(); ilb != ile; ++ilb) {
+ uint32_t a = ilb->first;
+ O << "dcl_literal l" <<ilb->second<< ", ";
+ sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x", a, a, a, a);
+ O << buffer << "; f32:i32 " << ilb->first << "\n";
+ }
+ std::map<uint64_t, uint32_t>::iterator llb, lle;
+ for (llb = mMFI->begin_64(), lle = mMFI->end_64(); llb != lle; ++llb) {
+ uint32_t v[2];
+ uint64_t a = llb->first;
+ memcpy(v, &a, sizeof(uint64_t));
+ O << "dcl_literal l" <<llb->second<< ", ";
+ sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x; f64:i64 ",
+ v[0], v[1], v[0], v[1]);
+ O << buffer << llb->first << "\n";
+ }
+ std::map<std::pair<uint64_t, uint64_t>, uint32_t>::iterator vlb, vle;
+ for (vlb = mMFI->begin_128(), vle = mMFI->end_128(); vlb != vle; ++vlb) {
+ uint32_t v[2][2];
+ uint64_t a = vlb->first.first;
+ uint64_t b = vlb->first.second;
+ memcpy(v[0], &a, sizeof(uint64_t));
+ memcpy(v[1], &b, sizeof(uint64_t));
+ O << "dcl_literal l" << vlb->second << ", ";
+ sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x; f128:i128 ",
+ v[0][0], v[0][1], v[1][0], v[1][1]);
+ O << buffer << vlb->first.first << vlb->first.second << "\n";
+ }
+}
+
+// If the value is not known, then the uav is set, otherwise the mValueIDMap
+// is used.
+void AMDILKernelManager::setUAVID(const Value *value, uint32_t ID) {
+ if (value) {
+ mValueIDMap[value] = ID;
+ }
+}
+
+uint32_t AMDILKernelManager::getUAVID(const Value *value) {
+ if (mValueIDMap.find(value) != mValueIDMap.end()) {
+ return mValueIDMap[value];
+ }
+
+ if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ return mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ } else {
+ return mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+ }
+}
+