//===-- AMDILKernelManager.cpp - TODO: Add brief description -------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //==-----------------------------------------------------------------------===// #include "AMDILKernelManager.h" #include "AMDILAlgorithms.tpp" #include "AMDILKernelManager.h" #ifdef UPSTREAM_LLVM #include "AMDILAsmPrinter.h" #endif #include "AMDILCompilerErrors.h" #include "AMDILDeviceInfo.h" #include "AMDILDevices.h" #include "AMDILGlobalManager.h" #include "AMDILMachineFunctionInfo.h" #include "AMDILModuleInfo.h" #include "AMDILSubtarget.h" #include "AMDILTargetMachine.h" #include "AMDILUtilityFunctions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/MathExtras.h" #include using namespace llvm; #define NUM_EXTRA_SLOTS_PER_IMAGE 1 static bool errorPrint(const char *ptr, llvm::raw_ostream &O) { if (ptr[0] == 'E') { O << ";error:" << ptr << "\n"; } else { O << ";warning:" << ptr << "\n"; } return false; } #if 0 static bool samplerPrint(StringMap::iterator &data, llvm::raw_ostream &O) { O << ";sampler:" << (*data).second.name << ":" << (*data).second.idx << ":" << ((*data).second.val == (uint32_t)-1 ? 0 : 1) << ":" << ((*data).second.val != (uint32_t)-1 ? (*data).second.val : 0) << "\n"; return false; } #endif static bool arenaPrint(uint32_t val, llvm::raw_ostream &O) { if (val >= ARENA_SEGMENT_RESERVED_UAVS) { O << "dcl_arena_uav_id(" << val << ")\n"; } return false; } static bool uavPrint(uint32_t val, llvm::raw_ostream &O) { if (val < 8 || val == 11){ O << "dcl_raw_uav_id(" << val << ")\n"; } return false; } static bool uavPrintSI(uint32_t val, llvm::raw_ostream &O) { O << "dcl_typeless_uav_id(" << val << ")_stride(4)_length(4)_access(read_write)\n"; return false; } static bool printfPrint(std::pair &data, llvm::raw_ostream &O) { O << ";printf_fmt:" << data.second->getPrintfID(); // Number of operands O << ":" << data.second->getNumOperands(); // Size of each operand for (size_t i = 0, e = data.second->getNumOperands(); i < e; ++i) { O << ":" << (data.second->getOperandID(i) >> 3); } const char *ptr = data.first.c_str(); uint32_t size = data.first.size() - 1; // The format string size O << ":" << size << ":"; for (size_t i = 0; i < size; ++i) { if (ptr[i] == '\r') { O << "\\r"; } else if (ptr[i] == '\n') { O << "\\n"; } else { O << ptr[i]; } } O << ";\n"; // c_str() is cheap way to trim return false; } void AMDILKernelManager::updatePtrArg(Function::const_arg_iterator Ip, int numWriteImages, int raw_uav_buffer, int counter, bool isKernel, const Function *F) { assert(F && "Cannot pass a NULL Pointer to F!"); assert(Ip->getType()->isPointerTy() && "Argument must be a pointer to be passed into this function!\n"); std::string ptrArg(";pointer:"); const char *symTab = "NoSymTab"; uint32_t ptrID = getUAVID(Ip); const PointerType *PT = cast(Ip->getType()); uint32_t Align = 4; const char *MemType = "uav"; if (PT->getElementType()->isSized()) { Align = NextPowerOf2((uint32_t)mTM->getTargetData()-> getTypeAllocSize(PT->getElementType())); } ptrArg += Ip->getName().str() + ":" + getTypeName(PT, symTab) + ":1:1:" + itostr(counter * 16) + ":"; switch (PT->getAddressSpace()) { case AMDILAS::ADDRESS_NONE: //O << "No Address space qualifier!"; mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]); assert(1); break; case AMDILAS::GLOBAL_ADDRESS: if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) { if (ptrID >= ARENA_SEGMENT_RESERVED_UAVS) { ptrID = 8; } } mMFI->uav_insert(ptrID); break; case AMDILAS::CONSTANT_ADDRESS: { if (isKernel && mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)){ const kernel t = mGM->getKernel(F->getName()); if (mGM->usesHWConstant(t, Ip->getName())) { MemType = "hc\0"; ptrID = mGM->getConstPtrCB(t, Ip->getName()); } else { MemType = "c\0"; mMFI->uav_insert(ptrID); } } else { MemType = "c\0"; mMFI->uav_insert(ptrID); } break; } default: case AMDILAS::PRIVATE_ADDRESS: if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) { MemType = (mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) ? "up\0" : "hp\0"; } else { MemType = "p\0"; mMFI->uav_insert(ptrID); } break; case AMDILAS::REGION_ADDRESS: mMFI->setUsesRegion(); if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) { MemType = "hr\0"; ptrID = 0; } else { MemType = "r\0"; mMFI->uav_insert(ptrID); } break; case AMDILAS::LOCAL_ADDRESS: mMFI->setUsesLocal(); if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) { MemType = "hl\0"; ptrID = 1; } else { MemType = "l\0"; mMFI->uav_insert(ptrID); } break; }; ptrArg += std::string(MemType) + ":"; ptrArg += itostr(ptrID) + ":"; ptrArg += itostr(Align); mMFI->addMetadata(ptrArg, true); } AMDILKernelManager::AMDILKernelManager(AMDILTargetMachine *TM, AMDILGlobalManager *GM) { mTM = TM; mSTM = mTM->getSubtargetImpl(); mGM = GM; clear(); } AMDILKernelManager::~AMDILKernelManager() { clear(); } void AMDILKernelManager::setMF(MachineFunction *MF) { mMF = MF; mMFI = MF->getInfo(); } void AMDILKernelManager::clear() { mUniqueID = 0; mIsKernel = false; mWasKernel = false; mHasImageWrite = false; mHasOutputInst = false; } bool AMDILKernelManager::useCompilerWrite(const MachineInstr *MI) { return (MI->getOpcode() == AMDIL::RETURN && wasKernel() && !mHasImageWrite && !mHasOutputInst); } void AMDILKernelManager::processArgMetadata(llvm::raw_ostream &O, uint32_t buf, bool isKernel) { const Function *F = mMF->getFunction(); const char * symTab = "NoSymTab"; Function::const_arg_iterator Ip = F->arg_begin(); Function::const_arg_iterator Ep = F->arg_end(); if (F->hasStructRetAttr()) { assert(Ip != Ep && "Invalid struct return fucntion!"); mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]); ++Ip; } uint32_t mCBSize = 0; int raw_uav_buffer = mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); bool MultiUAV = mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV); bool ArenaSegment = mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment); int numWriteImages = mSTM->getGlobalManager()->getNumWriteImages(F->getName()); if (numWriteImages == OPENCL_MAX_WRITE_IMAGES || MultiUAV || ArenaSegment) { if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { raw_uav_buffer = mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID); } } uint32_t CounterNum = 0; uint32_t ROArg = 0; uint32_t WOArg = 0; uint32_t NumArg = 0; while (Ip != Ep) { Type *cType = Ip->getType(); if (cType->isIntOrIntVectorTy() || cType->isFPOrFPVectorTy()) { std::string argMeta(";value:"); argMeta += Ip->getName().str() + ":" + getTypeName(cType, symTab) + ":"; int bitsize = cType->getPrimitiveSizeInBits(); int numEle = 1; if (cType->getTypeID() == Type::VectorTyID) { numEle = cast(cType)->getNumElements(); } argMeta += itostr(numEle) + ":1:" + itostr(mCBSize << 4); mMFI->addMetadata(argMeta, true); // FIXME: simplify if ((bitsize / numEle) < 32) { bitsize = numEle >> 2; } else { bitsize >>= 7; } if (!bitsize) { bitsize = 1; } mCBSize += bitsize; ++NumArg; } else if (const PointerType *PT = dyn_cast(cType)) { Type *CT = PT->getElementType(); const StructType *ST = dyn_cast(CT); if (ST && ST->isOpaque()) { StringRef name = ST->getName(); bool i1d = name.equals( "struct._image1d_t" ); bool i1da = name.equals( "struct._image1d_array_t" ); bool i1db = name.equals( "struct._image1d_buffer_t" ); bool i2d = name.equals( "struct._image2d_t" ); bool i2da = name.equals( "struct._image2d_array_t" ); bool i3d = name.equals( "struct._image3d_t" ); bool c32 = name.equals( "struct._counter32_t" ); bool c64 = name.equals( "struct._counter64_t" ); if (i1d || i1da || i1db || i2d | i2da || i3d) { if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) { std::string imageArg(";image:"); imageArg += Ip->getName().str() + ":"; if (i1d) imageArg += "1D:"; else if (i1da) imageArg += "1DA:"; else if (i1db) imageArg += "1DB:"; else if (i2d) imageArg += "2D:"; else if (i2da) imageArg += "2DA:"; else if (i3d) imageArg += "3D:"; if (isKernel) { if (mGM->isReadOnlyImage (mMF->getFunction()->getName(), (ROArg + WOArg))) { imageArg += "RO:" + itostr(ROArg); O << "dcl_resource_id(" << ROArg << ")_type("; if (i1d) O << "1d"; else if (i1da) O << "1darray"; else if (i1db) O << "buffer"; else if (i2d) O << "2d"; else if (i2da) O << "2darray"; else if (i3d) O << "3d"; O << ")_fmtx(unknown)_fmty(unknown)" << "_fmtz(unknown)_fmtw(unknown)\n"; ++ROArg; } else if (mGM->isWriteOnlyImage(mMF->getFunction()->getName(), (ROArg + WOArg))) { uint32_t offset = 0; offset += WOArg; imageArg += "WO:" + itostr(offset & 0x7); O << "dcl_uav_id(" << ((offset) & 0x7) << ")_type("; if (i1d) O << "1d"; else if (i1da) O << "1darray"; else if (i1db) O << "buffer"; else if (i2d) O << "2d"; else if (i2da) O << "2darray"; else if (i3d) O << "3d"; O << ")_fmtx(uint)\n"; ++WOArg; } else { imageArg += "RW:" + itostr(ROArg + WOArg); } } imageArg += ":1:" + itostr(mCBSize * 16); mMFI->addMetadata(imageArg, true); mMFI->addi32Literal(mCBSize); mCBSize += NUM_EXTRA_SLOTS_PER_IMAGE + 1; ++NumArg; } else { mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]); ++NumArg; } } else if (c32 || c64) { std::string counterArg(";counter:"); counterArg += Ip->getName().str() + ":" + itostr(c32 ? 32 : 64) + ":" + itostr(CounterNum++) + ":1:" + itostr(mCBSize * 16); mMFI->addMetadata(counterArg, true); ++NumArg; ++mCBSize; } else { updatePtrArg(Ip, numWriteImages, raw_uav_buffer, mCBSize, isKernel, F); ++NumArg; ++mCBSize; } } else if (CT->getTypeID() == Type::StructTyID && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { const TargetData *td = mTM->getTargetData(); const StructLayout *sl = td->getStructLayout(dyn_cast(CT)); int bytesize = sl->getSizeInBytes(); int reservedsize = (bytesize + 15) & ~15; int numSlots = reservedsize >> 4; if (!numSlots) { numSlots = 1; } std::string structArg(";value:"); structArg += Ip->getName().str() + ":struct:" + itostr(bytesize) + ":1:" + itostr(mCBSize * 16); mMFI->addMetadata(structArg, true); mCBSize += numSlots; ++NumArg; } else if (CT->isIntOrIntVectorTy() || CT->isFPOrFPVectorTy() || CT->getTypeID() == Type::ArrayTyID || CT->getTypeID() == Type::PointerTyID || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) { updatePtrArg(Ip, numWriteImages, raw_uav_buffer, mCBSize, isKernel, F); ++NumArg; ++mCBSize; } else { assert(0 && "Cannot process current pointer argument"); mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]); ++NumArg; } } else { assert(0 && "Cannot process current kernel argument"); mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]); ++NumArg; } ++Ip; } } void AMDILKernelManager::printHeader(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O, const std::string &name) { #ifdef UPSTREAM_LLVM mName = name; std::string kernelName; kernelName = name; int kernelId = mGM->getOrCreateFunctionID(kernelName); O << "func " << kernelId << " ; " << kernelName << "\n"; if (mSTM->is64bit()) { O << "mov " << AsmPrinter->getRegisterName(AMDIL::SDP) << ", cb0[8].xy\n"; } else { O << "mov " << AsmPrinter->getRegisterName(AMDIL::SDP) << ", cb0[8].x\n"; } O << "mov " << AsmPrinter->getRegisterName(AMDIL::SP) << ", l1.0\n"; #endif } void AMDILKernelManager::printGroupSize(llvm::raw_ostream& O) { // The HD4XXX generation of hardware does not support a 3D launch, so we need // to use dcl_num_thread_per_group to specify the launch size. If the launch // size is specified via a kernel attribute, we print it here. Otherwise we // use the the default size. if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { if (mGM->hasRWG(mName) || !mMFI->usesLocal()) { // if the user has specified what the required workgroup size is then we // need to compile for that size and that size only. Otherwise we compile // for the max workgroup size that is passed in as an option to the // backend. O << "dcl_num_thread_per_group "; O << mGM->getLocal(mName, 0) << ", "; O << mGM->getLocal(mName, 1) << ", "; O << mGM->getLocal(mName, 2) << " \n"; } else { // If the kernel uses local memory, then the kernel is being // compiled in single wavefront mode. So we have to generate code slightly // different. O << "dcl_num_thread_per_group " << mSTM->device()->getWavefrontSize() << ", 1, 1 \n"; } } else { // Otherwise we generate for devices that support 3D launch natively. If // the reqd_workgroup_size attribute was specified, then we can specify the // exact launch dimensions. if (mGM->hasRWG(mName)) { O << "dcl_num_thread_per_group "; O << mGM->getLocal(mName, 0) << ", "; O << mGM->getLocal(mName, 1) << ", "; O << mGM->getLocal(mName, 2) << " \n"; } else { // Otherwise we specify the largest workgroup size that can be launched. O << "dcl_max_thread_per_group " << mGM->getLocal(mName, 3) << " \n"; } } // Now that we have specified the workgroup size, lets declare the local // memory size. If we are using hardware and we know the value at compile // time, then we need to declare the correct value. Otherwise we should just // declare the maximum size. if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) { size_t kernelLocalSize = (mGM->getHWLocalSize(mName) + 3) & ~3; if (kernelLocalSize > mSTM->device()->getMaxLDSSize()) { mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_LOCAL_RESOURCES]); } // If there is a local pointer as a kernel argument, we don't know the size // at compile time, so we reserve all of the space. if (mMFI->usesLocal() && (mMFI->hasLocalArg() || !kernelLocalSize)) { O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") " << mSTM->device()->getMaxLDSSize() << "\n"; mMFI->setUsesMem(AMDILDevice::LDS_ID); } else if (kernelLocalSize) { // We know the size, so lets declare it correctly. O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") " << kernelLocalSize << "\n"; mMFI->setUsesMem(AMDILDevice::LDS_ID); } } // If the device supports the region memory extension, which maps to our // hardware GDS memory, then lets declare it so we can use it later on. if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) { size_t kernelGDSSize = (mGM->getHWRegionSize(mName) + 3) & ~3; if (kernelGDSSize > mSTM->device()->getMaxGDSSize()) { mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_REGION_RESOURCES]); } // If there is a region pointer as a kernel argument, we don't know the size // at compile time, so we reserved all of the space. if (mMFI->usesRegion() && (mMFI->hasRegionArg() || !kernelGDSSize)) { O << "dcl_gds_id(" << DEFAULT_GDS_ID << ") " << mSTM->device()->getMaxGDSSize() << "\n"; mMFI->setUsesMem(AMDILDevice::GDS_ID); } else if (kernelGDSSize) { // We know the size, so lets declare it. O << "dcl_gds_id(" << DEFAULT_GDS_ID << ") " << kernelGDSSize << "\n"; mMFI->setUsesMem(AMDILDevice::GDS_ID); } } } void AMDILKernelManager::printDecls(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O) { // If we are a HD4XXX generation device, then we only support a single uav // surface, so we declare it and leave if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { O << "dcl_raw_uav_id(" << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) << ")\n"; mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID); getIntrinsicSetup(AsmPrinter, O); return; } // If we are supporting multiple uav's view the MultiUAV capability, then we // need to print out the declarations here. MultiUAV conflicts with write // images, so they only use 8 - NumWriteImages uav's. Therefor only pointers // with ID's < 8 will get printed. if (mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) { binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), uavPrint, O); mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID); } // If arena segments are supported, then we should emit them now. Arena // segments are similiar to MultiUAV, except ArenaSegments are virtual and up // to 1024 of them can coexist. These are more compiler hints for CAL and thus // cannot overlap in any form. Each ID maps to a seperate piece of memory and // CAL determines whether the load/stores should go to the fast path/slow path // based on the usage and instruction. if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) { binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), arenaPrint, O); } // Now that we have printed out all of the arena and multi uav declaration, // now we must print out the default raw uav id. This always exists on HD5XXX // and HD6XXX hardware. The reason is that the hardware supports 12 UAV's and // 11 are taken up by MultiUAV/Write Images and Arena. However, if we do not // have UAV 11 as the raw UAV and there are 8 write images, we must revert // everything to the arena and not print out the default raw uav id. if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD5XXX || mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX) { if ((mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) < 11 && mSTM->getGlobalManager()->getNumWriteImages(mName) != OPENCL_MAX_WRITE_IMAGES && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) || mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) { if (!mMFI->usesMem(AMDILDevice::RAW_UAV_ID) && mMFI->uav_count(mSTM->device()-> getResourceID(AMDILDevice::RAW_UAV_ID))) { O << "dcl_raw_uav_id(" << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); O << ")\n"; mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID); } } // If we have not printed out the arena ID yet, then do so here. if (!mMFI->usesMem(AMDILDevice::ARENA_UAV_ID) && mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaUAV)) { O << "dcl_arena_uav_id(" << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID) << ")\n"; mMFI->setUsesMem(AMDILDevice::ARENA_UAV_ID); } } else if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), uavPrintSI, O); mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID); } getIntrinsicSetup(AsmPrinter, O); } void AMDILKernelManager::getIntrinsicSetup(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O) { O << "mov r0.z, vThreadGrpIdFlat.x\n" << "mov r1022.xyz0, vTidInGrp.xyz\n"; if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD4XXX) { O << "mov r1023.xyz0, vThreadGrpId.xyz\n"; } else { O << "imul r0.w, cb0[2].x, cb0[2].y\n" // Calculates the local id. // Calculates the group id. << "umod r1023.x, r0.z, cb0[2].x\n" << "udiv r1023.y, r0.z, cb0[2].x\n" << "umod r1023.y, r1023.y, cb0[2].y\n" << "udiv r1023.z, r0.z, r0.w\n"; } // Calculates the global id. if (mGM->hasRWG(mName) && 0) { // Anytime we declare a literal, we need to reserve it, if it is not emitted // in emitLiterals. mMFI->addReservedLiterals(1); O << "dcl_literal l" << mMFI->getNumLiterals() + 1 << ", "; O << mGM->getLocal(mName, 0) << ", "; O << mGM->getLocal(mName, 1) << ", "; O << mGM->getLocal(mName, 2) << ", "; O << "0\n"; O << "imad r1021.xyz0, r1023.xyz, l" << mMFI->getNumLiterals() + 1 << ".xyz, r1022.xyz\n"; mMFI->addReservedLiterals(1); } else { O << "imad r1021.xyz0, r1023.xyz, cb0[1].xyz, r1022.xyz\n"; } // Add the global/group offset for multi-launch support. O << "iadd r1021.xyz0, r1021.xyz0, cb0[6].xyz0\n" << "iadd r1023.xyz0, r1023.xyz0, cb0[7].xyz0\n" // moves the flat group id. << "mov r1023.w, r0.z\n"; #ifdef UPSTREAM_LLVM if (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) { if (mSTM->is64bit()) { O << "umul " << AsmPrinter->getRegisterName(AMDIL::T2) << ".x0, r1023.w, cb0[4].z\n" << "i64add " << AsmPrinter->getRegisterName(AMDIL::T2) << ".xy, " << AsmPrinter->getRegisterName(AMDIL::T2) << ".xy, cb0[4].xy\n"; } else { O << "imad " << AsmPrinter->getRegisterName(AMDIL::T2) << ".x, r1023.w, cb0[4].y, cb0[4].x\n"; } } // Shift the flat group id to be in bytes instead of dwords. O << "ishl r1023.w, r1023.w, l0.z\n"; if (mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)) { if (mSTM->is64bit()) { O << "umul " << AsmPrinter->getRegisterName(AMDIL::T1) << ".x0, vAbsTidFlat.x, cb0[3].z\n" << "i64add " << AsmPrinter->getRegisterName(AMDIL::T1) << ".xy, " << AsmPrinter->getRegisterName(AMDIL::T1) << ".xy, cb0[3].xy\n"; } else { O << "imad " << AsmPrinter->getRegisterName(AMDIL::T1) << ".x, vAbsTidFlat.x, cb0[3].y, cb0[3].x\n"; } } else { O << "mov " << AsmPrinter->getRegisterName(AMDIL::T1) << ".x, l0.0\n"; } #endif if (mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) { O << "udiv r1024.xyz, r1021.xyz, cb0[10].xyz\n"; if (mGM->hasRWR(mName) && 0) { // Anytime we declare a literal, we need to reserve it, if it is not emitted // in emitLiterals. mMFI->addReservedLiterals(1); O << "dcl_literal l" << mMFI->getNumLiterals() + 1 << ", "; O << mGM->getLocal(mName, 0) << ", "; O << mGM->getLocal(mName, 1) << ", "; O << mGM->getLocal(mName, 2) << ", "; O << "0\n"; O << "imad r1025.xyz0, r1023.xyz, l" << mMFI->getNumLiterals() + 1 << ".xyz, r1022.xyz\n"; mMFI->addReservedLiterals(1); } else { O << "imad r1025.xyz0, r1023.xyz, cb0[1].xyz, r1022.xyz\n"; } } } void AMDILKernelManager::printFooter(llvm::raw_ostream &O) { O << "ret\n"; O << "endfunc ; " << mName << "\n"; } void AMDILKernelManager::printMetaData(llvm::raw_ostream &O, uint32_t id, bool kernel) { if (kernel) { int kernelId = mGM->getOrCreateFunctionID(mName); mMFI->addCalledFunc(id); mUniqueID = kernelId; mIsKernel = true; } printKernelArgs(O); if (kernel) { mIsKernel = false; mMFI->eraseCalledFunc(id); mUniqueID = id; } } void AMDILKernelManager::setKernel(bool kernel) { mIsKernel = kernel; if (kernel) { mWasKernel = mIsKernel; } } void AMDILKernelManager::setID(uint32_t id) { mUniqueID = id; } void AMDILKernelManager::setName(const std::string &name) { mName = name; } bool AMDILKernelManager::isKernel() { return mIsKernel; } bool AMDILKernelManager::wasKernel() { return mWasKernel; } void AMDILKernelManager::setImageWrite() { mHasImageWrite = true; } void AMDILKernelManager::setOutputInst() { mHasOutputInst = true; } void AMDILKernelManager::printConstantToRegMapping( AMDILAsmPrinter *RegNames, uint32_t &LII, llvm::raw_ostream &O, uint32_t &Counter, uint32_t Buffer, uint32_t n, const char *lit, uint32_t fcall, bool isImage, bool isHWCB) { #ifdef UPSTREAM_LLVM // TODO: This needs to be enabled or SC will never statically index into the // CB when a pointer is used. if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem) && isHWCB) { const char *name = RegNames->getRegisterName(LII); O << "mov " << name << ", l5.x\n"; ++LII; Counter++; return; } for (uint32_t x = 0; x < n; ++x) { const char *name = RegNames->getRegisterName(LII); if (isImage) { O << "mov " << name << ", l" << mMFI->getIntLits(Counter++) << "\n"; } else { O << "mov " << name << ", cb" <> 4; for (size_t x = 0; x < n; ++x) { O << "mov r2, cb" << Buffer << "[" << Counter++ << "]\n"; O << "mov r1.x, r0.x\n"; if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) { O << "ishr r1.x, r1.x, l0.x\n"; O << "mov x" << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID) <<"[r1.x], r2\n"; } else { O << "uav_raw_store_id(" << mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID) << ") mem0, r1.x, r2\n"; } } else { O << "uav_raw_store_id(" << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << ") mem0, r1.x, r2\n"; } O << "iadd r0.x, r0.x, l" << mLitIdx << ".z\n"; } } void AMDILKernelManager::printKernelArgs(llvm::raw_ostream &O) { std::string version(";version:"); version += itostr(AMDIL_MAJOR_VERSION) + ":" + itostr(AMDIL_MINOR_VERSION) + ":" + itostr(AMDIL_REVISION_NUMBER); O << ";ARGSTART:" <getDeviceName() << "\n"; } O << ";uniqueid:" <getLocalSize(mName); size_t hwlocal = ((mGM->getHWLocalSize(mName) + 3) & (~0x3)); size_t region = mGM->getRegionSize(mName); size_t hwregion = ((mGM->getHWRegionSize(mName) + 3) & (~0x3)); bool usehwlocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem); bool usehwprivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem); bool usehwregion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem); bool useuavprivate = mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV); if (mIsKernel) { O << ";memory:" << ((usehwprivate) ? (useuavprivate) ? "uav" : "hw" : "" ) << "private:" <<(((mMFI->getStackSize() + 15) & (~0xF)))<< "\n"; } if (mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) { O << ";memory:" << ((usehwregion) ? "hw" : "") << "region:" << ((usehwregion) ? hwregion : hwregion + region) << "\n"; } O << ";memory:" << ((usehwlocal) ? "hw" : "") << "local:" << ((usehwlocal) ? hwlocal : hwlocal + local) << "\n"; if (mIsKernel) { if (mGM->hasRWG(mName)) { O << ";cws:" << mGM->getLocal(mName, 0) << ":"; O << mGM->getLocal(mName, 1) << ":"; O << mGM->getLocal(mName, 2) << "\n"; } if (mGM->hasRWR(mName)) { O << ";crs:" << mGM->getRegion(mName, 0) << ":"; O << mGM->getRegion(mName, 1) << ":"; O << mGM->getRegion(mName, 2) << "\n"; } } if (mIsKernel) { for (std::vector::iterator ib = mMFI->kernel_md_begin(), ie = mMFI->kernel_md_end(); ib != ie; ++ib) { O << (*ib) << "\n"; } } for (std::set::iterator ib = mMFI->func_md_begin(), ie = mMFI->func_md_end(); ib != ie; ++ib) { O << (*ib) << "\n"; } if (!mMFI->func_empty()) { O << ";function:" << mMFI->func_size(); binaryForEach(mMFI->func_begin(), mMFI->func_end(), commaPrint, O); O << "\n"; } if (!mSTM->device()->isSupported(AMDILDeviceInfo::MacroDB) && !mMFI->intr_empty()) { O << ";intrinsic:" << mMFI->intr_size(); binaryForEach(mMFI->intr_begin(), mMFI->intr_end(), commaPrint, O); O << "\n"; } if (!mIsKernel) { binaryForEach(mMFI->printf_begin(), mMFI->printf_end(), printfPrint, O); mMF->getMMI().getObjFileInfo().add_printf_offset( mMFI->printf_size()); } else { for (StringMap::iterator smb = mMFI->sampler_begin(), sme = mMFI->sampler_end(); smb != sme; ++ smb) { O << ";sampler:" << (*smb).second.name << ":" << (*smb).second.idx << ":" << ((*smb).second.val == (uint32_t)-1 ? 0 : 1) << ":" << ((*smb).second.val != (uint32_t)-1 ? (*smb).second.val : 0) << "\n"; } } if (mSTM->is64bit()) { O << ";memory:64bitABI\n"; } if (mMFI->errors_empty()) { binaryForEach(mMFI->errors_begin(), mMFI->errors_end(), errorPrint, O); } // This has to come last if (mIsKernel && mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { if (mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) > mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) { if (mMFI->uav_size() == 1) { if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment) && *(mMFI->uav_begin()) >= ARENA_SEGMENT_RESERVED_UAVS) { O << ";uavid:" << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID); O << "\n"; } else { O << ";uavid:" << *(mMFI->uav_begin()) << "\n"; } } else if (mMFI->uav_count(mSTM->device()-> getResourceID(AMDILDevice::RAW_UAV_ID))) { O << ";uavid:" << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); O << "\n"; } else { O << ";uavid:" << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID); O << "\n"; } } else if (mSTM->getGlobalManager()->getNumWriteImages(mName) != OPENCL_MAX_WRITE_IMAGES && !mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment) && mMFI->uav_count(mSTM->device()-> getResourceID(AMDILDevice::RAW_UAV_ID))) { O << ";uavid:" << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) << "\n"; } else if (mMFI->uav_size() == 1) { O << ";uavid:" << *(mMFI->uav_begin()) << "\n"; } else { O << ";uavid:" << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID); O << "\n"; } } O << ";ARGEND:" << mName << "\n"; } void AMDILKernelManager::printArgCopies(llvm::raw_ostream &O, AMDILAsmPrinter *RegNames) { Function::const_arg_iterator I = mMF->getFunction()->arg_begin(); Function::const_arg_iterator Ie = mMF->getFunction()->arg_end(); uint32_t Counter = 0; if (mMFI->getArgSize()) { O << "dcl_cb cb1"; O << "[" << (mMFI->getArgSize() >> 4) << "]\n"; mMFI->setUsesMem(AMDILDevice::CONSTANT_ID); } const Function *F = mMF->getFunction(); // Get the stack size uint32_t stackSize = mMFI->getStackSize(); uint32_t privateSize = mMFI->getScratchSize(); uint32_t stackOffset = (privateSize + 15) & (~0xF); if (stackSize && mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) { // TODO: If the size is too large, we need to fall back to software emulated // instead of using the hardware capability. int size = (((stackSize + 15) & (~0xF)) >> 4); if (size > 4096) { mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_PRIVATE_RESOURCES]); } if (size) { // For any stack variables, we need to declare the literals for them so that // we can use them when we copy our data to the stack. mMFI->addReservedLiterals(1); // Anytime we declare a literal, we need to reserve it, if it is not emitted // in emitLiterals. #ifdef UPSTREAM_LLVM O << "dcl_literal l" << mMFI->getNumLiterals() << ", " << stackSize << ", " << privateSize << ", 16, " << ((stackSize == privateSize) ? 0 : stackOffset) << "\n" << "iadd r0.x, " << RegNames->getRegisterName(AMDIL::T1) << ".x, l" << mMFI->getNumLiterals() << ".w\n"; if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { O << "dcl_indexed_temp_array x" << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << "[" << size << "]\n"; } else { O << "dcl_typeless_uav_id(" << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << ")_stride(4)_length(" << (size << 4 )<< ")_access(private)\n"; } O << "mov " << RegNames->getRegisterName(AMDIL::FP) << ".x, l" << mMFI->getNumLiterals() << ".0\n"; #endif mMFI->setUsesMem(AMDILDevice::SCRATCH_ID); } } I = mMF->getFunction()->arg_begin(); int32_t count = 0; // uint32_t Image = 0; bool displaced1 = false; bool displaced2 = false; uint32_t curReg = AMDIL::R1; // TODO: We don't handle arguments that were pushed onto the stack! for (; I != Ie; ++I) { Type *curType = I->getType(); unsigned int Buffer = 1; O << "; Kernel arg setup: " << I->getName() << "\n"; if (curType->isIntegerTy() || curType->isFloatingPointTy()) { switch (curType->getPrimitiveSizeInBits()) { default: printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1); break; case 16: printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, "l3.y" ); break; case 8: printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, "l3.x" ); break; } #ifdef UPSTREAM_LLVM } else if (const VectorType *VT = dyn_cast(curType)) { Type *ET = VT->getElementType(); int numEle = VT->getNumElements(); switch (ET->getPrimitiveSizeInBits()) { default: if (numEle == 3) { O << "mov " << RegNames->getRegisterName(curReg); O << ".x, cb" << Buffer << "[" << Counter << "].x\n"; curReg++; O << "mov " << RegNames->getRegisterName(curReg); O << ".x, cb" << Buffer << "[" << Counter << "].y\n"; curReg++; O << "mov " << RegNames->getRegisterName(curReg); O << ".x, cb" << Buffer << "[" << Counter << "].z\n"; curReg++; Counter++; } else { printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, (numEle+2) >> 2); } break; case 64: if (numEle == 3) { O << "mov " << RegNames->getRegisterName(curReg); O << ".xy, cb" << Buffer << "[" << Counter << "].xy\n"; curReg++; O << "mov " << RegNames->getRegisterName(curReg); O << ".xy, cb" << Buffer << "[" << Counter++ << "].zw\n"; curReg++; O << "mov " << RegNames->getRegisterName(curReg); O << ".xy, cb" << Buffer << "[" << Counter << "].xy\n"; curReg++; Counter++; } else { printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, (numEle) >> 1); } break; case 16: { switch (numEle) { default: printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, (numEle+2) >> 2, "l3.y", 1093); if (numEle == 3) { O << "mov " << RegNames->getRegisterName(curReg) << ".x, "; O << RegNames->getRegisterName(curReg) << ".y\n"; ++curReg; O << "mov " << RegNames->getRegisterName(curReg) << ".x, "; O << RegNames->getRegisterName(curReg) << ".z\n"; ++curReg; } break; case 2: printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, "l3.y", 1092); break; } break; } case 8: { switch (numEle) { default: printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, (numEle+2) >> 2, "l3.x", 1091); if (numEle == 3) { O << "mov " << RegNames->getRegisterName(curReg) << ".x, "; O << RegNames->getRegisterName(curReg) << ".y\n"; ++curReg; O << "mov " << RegNames->getRegisterName(curReg) << ".x, "; O << RegNames->getRegisterName(curReg) << ".z\n"; ++curReg; } break; case 2: printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, "l3.x", 1090); break; } break; } } #endif } else if (const PointerType *PT = dyn_cast(curType)) { Type *CT = PT->getElementType(); const StructType *ST = dyn_cast(CT); if (ST && ST->isOpaque()) { bool i1d = ST->getName() == "struct._image1d_t"; bool i1da = ST->getName() == "struct._image1d_array_t"; bool i1db = ST->getName() == "struct._image1d_buffer_t"; bool i2d = ST->getName() == "struct._image2d_t"; bool i2da = ST->getName() == "struct._image2d_array_t"; bool i3d = ST->getName() == "struct._image3d_t"; bool is_image = i1d || i1da || i1db || i2d || i2da || i3d; if (is_image) { if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) { printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, NULL, 0, is_image); } else { mMFI->addErrorMsg( amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]); ++curReg; } } else { printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1); } } else if (CT->isStructTy() && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { StructType *ST = dyn_cast(CT); bool i1d = ST->getName() == "struct._image1d_t"; bool i1da = ST->getName() == "struct._image1d_array_t"; bool i1db = ST->getName() == "struct._image1d_buffer_t"; bool i2d = ST->getName() == "struct._image2d_t"; bool i2da = ST->getName() == "struct._image2d_array_t"; bool i3d = ST->getName() == "struct._image3d_t"; bool is_image = i1d || i1da || i1db || i2d || i2da || i3d; if (is_image) { if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) { printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, NULL, 0, is_image); } else { mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]); ++curReg; } } else { if (count) { // Anytime we declare a literal, we need to reserve it, if it // is not emitted in emitLiterals. mMFI->addReservedLiterals(1); O << "dcl_literal l" << mMFI->getNumLiterals() << ", " << -stackSize << ", " << stackSize << ", 16, " << stackOffset << "\n"; } ++count; size_t structSize; structSize = (getTypeSize(ST) + 15) & ~15; stackOffset += structSize; #ifdef UPSTREAM_LLVM O << "mov " << RegNames->getRegisterName((curReg)) << ", l" << mMFI->getNumLiterals()<< ".w\n"; if (!displaced1) { O << "mov r1011, r1\n"; displaced1 = true; } if (!displaced2 && strcmp(RegNames->getRegisterName(curReg), "r1")) { O << "mov r1010, r2\n"; displaced2 = true; } #endif printCopyStructPrivate(ST, O, structSize, Buffer, mMFI->getNumLiterals(), Counter); ++curReg; } } else if (CT->isIntOrIntVectorTy() || CT->isFPOrFPVectorTy() || CT->isArrayTy() || CT->isPointerTy() || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) { if (PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) { const kernel& krnl = mGM->getKernel(F->getName()); printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, NULL, 0, false, mGM->usesHWConstant(krnl, I->getName())); } else if (PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) { // TODO: If we are region address space, the first region pointer, no // array pointers exist, and hardware RegionMem is enabled then we can // zero out register as the initial offset is zero. printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1); } else if (PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) { // TODO: If we are local address space, the first local pointer, no // array pointers exist, and hardware LocalMem is enabled then we can // zero out register as the initial offset is zero. printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1); } else { printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1); } } else { assert(0 && "Current type is not supported!"); mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]); ++curReg; } } else { assert(0 && "Current type is not supported!"); mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]); ++curReg; } } if (displaced1) { O << "mov r1, r1011\n"; } if (displaced2) { O << "mov r2, r1010\n"; } if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) { const kernel& krnl = mGM->getKernel(F->getName()); uint32_t constNum = 0; for (uint32_t x = 0; x < mSTM->device()->getMaxNumCBs(); ++x) { if (krnl.constSizes[x]) { O << "dcl_cb cb" << x + CB_BASE_OFFSET; O << "[" << (((krnl.constSizes[x] + 15) & ~15) >> 4) << "]\n"; ++constNum; mMFI->setUsesMem(AMDILDevice::CONSTANT_ID); } } // TODO: If we run out of constant resources, we need to push some of the // constant pointers to the software emulated section. if (constNum > mSTM->device()->getMaxNumCBs()) { assert(0 && "Max constant buffer limit passed!"); mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_CONSTANT_RESOURCES]); } } } const char * AMDILKernelManager::getTypeName(const Type *ptr, const char *symTab) { // symTab argument is ignored... LLVMContext& ctx = ptr->getContext(); switch (ptr->getTypeID()) { case Type::StructTyID: { const StructType *ST = cast(ptr); if (!ST->isOpaque()) return "struct"; // ptr is a pre-LLVM 3.0 "opaque" type. StringRef name = ST->getName(); if (name.equals( "struct._event_t" )) return "event"; if (name.equals( "struct._image1d_t" )) return "image1d"; if (name.equals( "struct._image1d_array_t" )) return "image1d_array"; if (name.equals( "struct._image2d_t" )) return "image2d"; if (name.equals( "struct._image2d_array_t" )) return "image2d_array"; if (name.equals( "struct._image3d_t" )) return "image3d"; if (name.equals( "struct._counter32_t" )) return "counter32"; if (name.equals( "struct._counter64_t" )) return "counter64"; return "opaque"; break; } case Type::FloatTyID: return "float"; case Type::DoubleTyID: { const AMDILSubtarget *mSTM= mTM->getSubtargetImpl(); if (!mSTM->device()->usesHardware(AMDILDeviceInfo::DoubleOps)) { mMFI->addErrorMsg(amd::CompilerErrorMessage[DOUBLE_NOT_SUPPORTED]); } return "double"; } case Type::IntegerTyID: { if (ptr == Type::getInt8Ty(ctx)) { return "i8"; } else if (ptr == Type::getInt16Ty(ctx)) { return "i16"; } else if (ptr == Type::getInt32Ty(ctx)) { return "i32"; } else if(ptr == Type::getInt64Ty(ctx)) { return "i64"; } break; } default: break; case Type::ArrayTyID: { const ArrayType *AT = cast(ptr); const Type *name = AT->getElementType(); return getTypeName(name, symTab); break; } case Type::VectorTyID: { const VectorType *VT = cast(ptr); const Type *name = VT->getElementType(); return getTypeName(name, symTab); break; } case Type::PointerTyID: { const PointerType *PT = cast(ptr); const Type *name = PT->getElementType(); return getTypeName(name, symTab); break; } case Type::FunctionTyID: { const FunctionType *FT = cast(ptr); const Type *name = FT->getReturnType(); return getTypeName(name, symTab); break; } } ptr->dump(); mMFI->addErrorMsg(amd::CompilerErrorMessage[UNKNOWN_TYPE_NAME]); return "unknown"; } void AMDILKernelManager::emitLiterals(llvm::raw_ostream &O) { char buffer[256]; std::map::iterator ilb, ile; for (ilb = mMFI->begin_32(), ile = mMFI->end_32(); ilb != ile; ++ilb) { uint32_t a = ilb->first; O << "dcl_literal l" <second<< ", "; sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x", a, a, a, a); O << buffer << "; f32:i32 " << ilb->first << "\n"; } std::map::iterator llb, lle; for (llb = mMFI->begin_64(), lle = mMFI->end_64(); llb != lle; ++llb) { uint32_t v[2]; uint64_t a = llb->first; memcpy(v, &a, sizeof(uint64_t)); O << "dcl_literal l" <second<< ", "; sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x; f64:i64 ", v[0], v[1], v[0], v[1]); O << buffer << llb->first << "\n"; } std::map, uint32_t>::iterator vlb, vle; for (vlb = mMFI->begin_128(), vle = mMFI->end_128(); vlb != vle; ++vlb) { uint32_t v[2][2]; uint64_t a = vlb->first.first; uint64_t b = vlb->first.second; memcpy(v[0], &a, sizeof(uint64_t)); memcpy(v[1], &b, sizeof(uint64_t)); O << "dcl_literal l" << vlb->second << ", "; sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x; f128:i128 ", v[0][0], v[0][1], v[1][0], v[1][1]); O << buffer << vlb->first.first << vlb->first.second << "\n"; } } // If the value is not known, then the uav is set, otherwise the mValueIDMap // is used. void AMDILKernelManager::setUAVID(const Value *value, uint32_t ID) { if (value) { mValueIDMap[value] = ID; } } uint32_t AMDILKernelManager::getUAVID(const Value *value) { if (mValueIDMap.find(value) != mValueIDMap.end()) { return mValueIDMap[value]; } if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { return mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID); } else { return mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID); } }