diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDIL.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDIL789IOExpansion.cpp | 723 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDIL7XXDevice.cpp | 8 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDIL7XXDevice.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDIL7XXIOExpansion.cpp | 548 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILDevice.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILEGIOExpansion.cpp | 1093 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp | 7 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILEvergreenDevice.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILIOExpansion.cpp | 1160 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILIOExpansion.h | 320 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILImageExpansion.cpp | 171 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILTargetMachine.cpp | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/Makefile.sources | 5 |
15 files changed, 0 insertions, 4048 deletions
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp index b37c6edea15..b006f84629e 100644 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp +++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp @@ -174,7 +174,6 @@ bool AMDGPUPassConfig::addPreEmitPass() { PM.add(createSIPropagateImmReadsPass(*TM)); } - PM.add(createAMDILIOExpansion(*TM)); return false; } diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h index 85d7e03930c..5fe53edbb22 100644 --- a/src/gallium/drivers/radeon/AMDIL.h +++ b/src/gallium/drivers/radeon/AMDIL.h @@ -112,8 +112,6 @@ FunctionPass* createAMDILCFGStructurizerPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); FunctionPass* createAMDILLiteralManager(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); -FunctionPass* - createAMDILIOExpansion(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); extern Target TheAMDILTarget; extern Target TheAMDGPUTarget; diff --git a/src/gallium/drivers/radeon/AMDIL789IOExpansion.cpp b/src/gallium/drivers/radeon/AMDIL789IOExpansion.cpp deleted file mode 100644 index cf5afb9d195..00000000000 --- a/src/gallium/drivers/radeon/AMDIL789IOExpansion.cpp +++ /dev/null @@ -1,723 +0,0 @@ -//===-- AMDIL789IOExpansion.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// @file AMDIL789IOExpansion.cpp -// @details Implementation of the IO expansion class for 789 devices. -// -#include "AMDILCompilerErrors.h" -#include "AMDILCompilerWarnings.h" -#include "AMDILDevices.h" -#include "AMDILGlobalManager.h" -#include "AMDILIOExpansion.h" -#include "AMDILKernelManager.h" -#include "AMDILMachineFunctionInfo.h" -#include "AMDILTargetMachine.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Support/DebugLoc.h" -#include "llvm/Value.h" - -using namespace llvm; -AMDIL789IOExpansion::AMDIL789IOExpansion(TargetMachine &tm - AMDIL_OPT_LEVEL_DECL) -: AMDILIOExpansion(tm AMDIL_OPT_LEVEL_VAR) -{ -} - -AMDIL789IOExpansion::~AMDIL789IOExpansion() { -} - -const char *AMDIL789IOExpansion::getPassName() const -{ - return "AMDIL 789 IO Expansion Pass"; -} -// This code produces the following pseudo-IL: -// mov r1007, $src.y000 -// cmov_logical r1007.x___, $flag.yyyy, r1007.xxxx, $src.xxxx -// mov r1006, $src.z000 -// cmov_logical r1007.x___, $flag.zzzz, r1006.xxxx, r1007.xxxx -// mov r1006, $src.w000 -// cmov_logical $dst.x___, $flag.wwww, r1006.xxxx, r1007.xxxx -void -AMDIL789IOExpansion::emitComponentExtract(MachineInstr *MI, - unsigned flag, unsigned src, unsigned dst, bool before) -{ - MachineBasicBlock::iterator I = *MI; - DebugLoc DL = MI->getDebugLoc(); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007) - .addReg(src) - .addImm(2); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007) - .addReg(flag) - .addReg(AMDIL::R1007) - .addReg(src); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1006) - .addReg(src) - .addImm(3); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1007) - .addReg(flag) - .addReg(AMDIL::R1006) - .addReg(AMDIL::R1007); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1006) - .addReg(src) - .addImm(4); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_W_i32), dst) - .addReg(flag) - .addReg(AMDIL::R1006) - .addReg(AMDIL::R1007); - -} -// We have a 128 bit load but a 8/16/32bit value, so we need to -// select the correct component and make sure that the correct -// bits are selected. For the 8 and 16 bit cases we need to -// extract from the component the correct bits and for 32 bits -// we just need to select the correct component. - void -AMDIL789IOExpansion::emitDataLoadSelect(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - DebugLoc DL = MI->getDebugLoc(); - emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1011, AMDIL::R1011, false); - if (getMemorySize(MI) == 1) { - // This produces the following pseudo-IL: - // iand r1006.x___, r1010.xxxx, l14.xxxx - // mov r1006, r1006.xxxx - // iadd r1006, r1006, {0, -1, 2, 3} - // ieq r1008, r1006, 0 - // mov r1011, r1011.xxxx - // ishr r1011, r1011, {0, 8, 16, 24} - // mov r1007, r1011.y000 - // cmov_logical r1007.x___, r1008.yyyy, r1007.xxxx, r1011.xxxx - // mov r1006, r1011.z000 - // cmov_logical r1007.x___, r1008.zzzz, r1006.xxxx, r1007.xxxx - // mov r1006, r1011.w000 - // cmov_logical r1011.x___, r1008.wwww, r1006.xxxx, r1007.xxxx - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1006) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1006) - .addReg(AMDIL::R1006); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1006) - .addReg(AMDIL::R1006) - .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32, - (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32)))); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1006) - .addImm(mMFI->addi32Literal(0)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1011) - .addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32))); - emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1011, AMDIL::R1011, false); - } else if (getMemorySize(MI) == 2) { - // This produces the following pseudo-IL: - // ishr r1007.x___, r1010.xxxx, 1 - // iand r1008.x___, r1007.xxxx, 1 - // ishr r1007.x___, r1011.xxxx, 16 - // cmov_logical r1011.x___, r1008.xxxx, r1007.xxxx, r1011.xxxx - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(1)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1007) - .addImm(mMFI->addi32Literal(1)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1011) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1007) - .addReg(AMDIL::R1011); - } -} -// This function does address calculations modifications to load from a vector -// register type instead of a dword addressed load. - void -AMDIL789IOExpansion::emitVectorAddressCalc(MachineInstr *MI, bool is32bit, bool needsSelect) -{ - MachineBasicBlock::iterator I = *MI; - DebugLoc DL = MI->getDebugLoc(); - // This produces the following pseudo-IL: - // ishr r1007.x___, r1010.xxxx, (is32bit) ? 2 : 3 - // iand r1008.x___, r1007.xxxx, (is32bit) ? 3 : 1 - // ishr r1007.x___, r1007.xxxx, (is32bit) ? 2 : 1 - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal((is32bit) ? 0x2 : 3)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1007) - .addImm(mMFI->addi32Literal((is32bit) ? 3 : 1)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007) - .addReg(AMDIL::R1007) - .addImm(mMFI->addi32Literal((is32bit) ? 2 : 1)); - if (needsSelect) { - // If the component selection is required, the following - // pseudo-IL is produced. - // mov r1008, r1008.xxxx - // iadd r1008, r1008, (is32bit) ? {0, -1, -2, -3} : {0, 0, -1, -1} - // ieq r1008, r1008, 0 - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi128Literal((is32bit) ? 0xFFFFFFFFULL << 32 : 0ULL, - (is32bit) ? 0xFFFFFFFEULL | (0xFFFFFFFDULL << 32) : - -1ULL)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(0)); - } -} -// This function emits a switch statement and writes 32bit/64bit -// value to a 128bit vector register type. - void -AMDIL789IOExpansion::emitVectorSwitchWrite(MachineInstr *MI, bool is32bit) -{ - MachineBasicBlock::iterator I = *MI; - uint32_t xID = getPointerID(MI); - assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n"); - // This section generates the following pseudo-IL: - // switch r1008.x - // default - // mov x1[r1007.x].(is32bit) ? x___ : xy__, r1011.x{y} - // break - // case 1 - // mov x1[r1007.x].(is32bit) ? _y__ : __zw, r1011.x{yxy} - // break - // if is32bit is true, case 2 and 3 are emitted. - // case 2 - // mov x1[r1007.x].__z_, r1011.x - // break - // case 3 - // mov x1[r1007.x].___w, r1011.x - // break - // endswitch - DebugLoc DL; - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::SWITCH)) - .addReg(AMDIL::R1008); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::DEFAULT)); - BuildMI(*mBB, I, DL, - mTII->get((is32bit) ? AMDIL::SCRATCHSTORE_X : AMDIL::SCRATCHSTORE_XY) - , AMDIL::R1007) - .addReg(AMDIL::R1011) - .addImm(xID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BREAK)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CASE)).addImm(1); - BuildMI(*mBB, I, DL, - mTII->get((is32bit) ? AMDIL::SCRATCHSTORE_Y : AMDIL::SCRATCHSTORE_ZW), AMDIL::R1007) - .addReg(AMDIL::R1011) - .addImm(xID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BREAK)); - if (is32bit) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CASE)).addImm(2); - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::SCRATCHSTORE_Z), AMDIL::R1007) - .addReg(AMDIL::R1011) - .addImm(xID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BREAK)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CASE)).addImm(3); - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::SCRATCHSTORE_W), AMDIL::R1007) - .addReg(AMDIL::R1011) - .addImm(xID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BREAK)); - } - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ENDSWITCH)); - -} - void -AMDIL789IOExpansion::expandPrivateLoad(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem); - if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) { - return expandGlobalLoad(MI); - } - if (!mMFI->usesMem(AMDILDevice::SCRATCH_ID) - && mKM->isKernel()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]); - } - uint32_t xID = getPointerID(MI); - assert(xID && "Found a scratch load that was incorrectly marked as zero ID!\n"); - if (!xID) { - xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID); - mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]); - } - DebugLoc DL; - // These instructions go before the current MI. - expandLoadStartCode(MI); - switch (getMemorySize(MI)) { - default: - // Since the private register is a 128 bit aligned, we have to align the address - // first, since our source address is 32bit aligned and then load the data. - // This produces the following pseudo-IL: - // ishr r1010.x___, r1010.xxxx, 4 - // mov r1011, x1[r1010.x] - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::SHR_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(4)); - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(xID); - break; - case 1: - case 2: - case 4: - emitVectorAddressCalc(MI, true, true); - // This produces the following pseudo-IL: - // mov r1011, x1[r1007.x] - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011) - .addReg(AMDIL::R1007) - .addImm(xID); - // These instructions go after the current MI. - emitDataLoadSelect(MI); - break; - case 8: - emitVectorAddressCalc(MI, false, true); - // This produces the following pseudo-IL: - // mov r1011, x1[r1007.x] - // mov r1007, r1011.zw00 - // cmov_logical r1011.xy__, r1008.xxxx, r1011.xy, r1007.zw - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011) - .addReg(AMDIL::R1007) - .addImm(xID); - // These instructions go after the current MI. - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::VEXTRACT_v2i64), AMDIL::R1007) - .addReg(AMDIL::R1011) - .addImm(2); - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::CMOVLOG_i64), AMDIL::R1011) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1007); - break; - } - expandPackedData(MI); - expandExtendLoad(MI); - BuildMI(*mBB, I, MI->getDebugLoc(), - mTII->get(getMoveInstFromID( - MI->getDesc().OpInfo[0].RegClass)), - MI->getOperand(0).getReg()) - .addReg(AMDIL::R1011); -} - - - void -AMDIL789IOExpansion::expandConstantLoad(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - if (!isHardwareInst(MI) || MI->memoperands_empty()) { - return expandGlobalLoad(MI); - } - uint32_t cID = getPointerID(MI); - if (cID < 2) { - return expandGlobalLoad(MI); - } - if (!mMFI->usesMem(AMDILDevice::CONSTANT_ID) - && mKM->isKernel()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]); - } - - DebugLoc DL; - // These instructions go before the current MI. - expandLoadStartCode(MI); - switch (getMemorySize(MI)) { - default: - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::SHR_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(4)); - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::CBLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(cID); - break; - case 1: - case 2: - case 4: - emitVectorAddressCalc(MI, true, true); - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::CBLOAD), AMDIL::R1011) - .addReg(AMDIL::R1007) - .addImm(cID); - // These instructions go after the current MI. - emitDataLoadSelect(MI); - break; - case 8: - emitVectorAddressCalc(MI, false, true); - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::CBLOAD), AMDIL::R1011) - .addReg(AMDIL::R1007) - .addImm(cID); - // These instructions go after the current MI. - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::VEXTRACT_v2i64), AMDIL::R1007) - .addReg(AMDIL::R1011) - .addImm(2); - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1008) - .addReg(AMDIL::R1008); - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::CMOVLOG_i64), AMDIL::R1011) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1007); - break; - } - expandPackedData(MI); - expandExtendLoad(MI); - BuildMI(*mBB, I, MI->getDebugLoc(), - mTII->get(getMoveInstFromID( - MI->getDesc().OpInfo[0].RegClass)), - MI->getOperand(0).getReg()) - .addReg(AMDIL::R1011); - MI->getOperand(0).setReg(AMDIL::R1011); -} - - void -AMDIL789IOExpansion::expandConstantPoolLoad(MachineInstr *MI) -{ - if (!isStaticCPLoad(MI)) { - return expandConstantLoad(MI); - } else { - uint32_t idx = MI->getOperand(1).getIndex(); - const MachineConstantPool *MCP = MI->getParent()->getParent() - ->getConstantPool(); - const std::vector<MachineConstantPoolEntry> &consts - = MCP->getConstants(); - const Constant *C = consts[idx].Val.ConstVal; - emitCPInst(MI, C, mKM, 0, isExtendLoad(MI)); - } -} - - void -AMDIL789IOExpansion::expandPrivateStore(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem); - if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) { - return expandGlobalStore(MI); - } - if (!mMFI->usesMem(AMDILDevice::SCRATCH_ID) - && mKM->isKernel()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]); - } - uint32_t xID = getPointerID(MI); - assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n"); - if (!xID) { - xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID); - mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]); - } - DebugLoc DL; - // These instructions go before the current MI. - expandStoreSetupCode(MI); - switch (getMemorySize(MI)) { - default: - // This section generates the following pseudo-IL: - // ishr r1010.x___, r1010.xxxx, 4 - // mov x1[r1010.x], r1011 - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::SHR_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(4)); - BuildMI(*mBB, I, MI->getDebugLoc(), - mTII->get(AMDIL::SCRATCHSTORE), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(xID); - break; - case 1: - emitVectorAddressCalc(MI, true, true); - // This section generates the following pseudo-IL: - // mov r1002, x1[r1007.x] - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002) - .addReg(AMDIL::R1007) - .addImm(xID); - emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1002, AMDIL::R1002, true); - // This section generates the following pseudo-IL: - // iand r1003.x, r1010.x, 3 - // mov r1003, r1003.xxxx - // iadd r1000, r1003, {0, -1, -2, -3} - // ieq r1000, r1000, 0 - // mov r1002, r1002.xxxx - // ishr r1002, r1002, {0, 8, 16, 24} - // mov r1011, r1011.xxxx - // cmov_logical r1002, r1000, r1011, r1002 - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1003) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1003) - .addReg(AMDIL::R1003); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1001) - .addReg(AMDIL::R1003) - .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32, - (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32)))); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1001) - .addReg(AMDIL::R1001) - .addImm(mMFI->addi32Literal(0)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1002) - .addReg(AMDIL::R1002); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1002) - .addReg(AMDIL::R1002) - .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32))); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1011) - .addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_v4i32), AMDIL::R1002) - .addReg(AMDIL::R1001) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1002); - if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { - // This section generates the following pseudo-IL: - // iand r1002, r1002, 0xFF - // ishl r1002, r1002, {0, 8, 16, 24} - // ior r1002.xy, r1002.xy, r1002.zw - // ior r1011.x, r1002.x, r1002.y - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1002) - .addReg(AMDIL::R1002) - .addImm(mMFI->addi32Literal(0xFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1002) - .addReg(AMDIL::R1002) - .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32))); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::R1002) - .addReg(AMDIL::R1002).addReg(AMDIL::R1002); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011) - .addReg(AMDIL::R1002).addReg(AMDIL::R1002); - } else { - // This section generates the following pseudo-IL: - // mov r1001.xy, r1002.yw - // mov r1002.xy, r1002.xz - // ubit_insert r1002.xy, 8, 8, r1001.xy, r1002.xy - // mov r1001.x, r1002.y - // ubit_insert r1011.x, 16, 16, r1002.y, r1002.x - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::R1001) - .addReg(AMDIL::R1002); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::R1002) - .addReg(AMDIL::R1002); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::R1002) - .addImm(mMFI->addi32Literal(8)) - .addImm(mMFI->addi32Literal(8)) - .addReg(AMDIL::R1001) - .addReg(AMDIL::R1002); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI), AMDIL::R1001) - .addReg(AMDIL::R1002); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)) - .addImm(mMFI->addi32Literal(16)) - .addReg(AMDIL::R1001) - .addReg(AMDIL::R1002); - } - emitVectorAddressCalc(MI, true, false); - emitVectorSwitchWrite(MI, true); - break; - case 2: - emitVectorAddressCalc(MI, true, true); - // This section generates the following pseudo-IL: - // mov r1002, x1[r1007.x] - BuildMI(*mBB, I, DL, - mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002) - .addReg(AMDIL::R1007) - .addImm(xID); - emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1002, AMDIL::R1002, true); - // This section generates the following pseudo-IL: - // ishr r1003.x, r1010.x, 1 - // iand r1003.x, r1003.x, 1 - // ishr r1001.x, r1002.x, 16 - // cmov_logical r1002.x, r1003.x, r1002.x, r1011.x - // cmov_logical r1001.x, r1003.x, r1011.x, r1001.x - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1003) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(1)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1003) - .addReg(AMDIL::R1003) - .addImm(mMFI->addi32Literal(1)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1001) - .addReg(AMDIL::R1002) - .addImm(mMFI->addi32Literal(16)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1002) - .addReg(AMDIL::R1003) - .addReg(AMDIL::R1002) - .addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1001) - .addReg(AMDIL::R1003) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1001); - if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { - // This section generates the following pseudo-IL: - // iand r1002.x, r1002.x, 0xFFFF - // iand r1001.x, r1001.x, 0xFFFF - // ishl r1001.x, r1002.x, 16 - // ior r1011.x, r1002.x, r1001.x - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1002) - .addReg(AMDIL::R1002) - .addImm(mMFI->addi32Literal(0xFFFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1001) - .addReg(AMDIL::R1001) - .addImm(mMFI->addi32Literal(0xFFFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1001) - .addReg(AMDIL::R1001) - .addImm(mMFI->addi32Literal(16)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_OR_i32), AMDIL::R1011) - .addReg(AMDIL::R1002).addReg(AMDIL::R1001); - } else { - // This section generates the following pseudo-IL: - // ubit_insert r1011.x, 16, 16, r1001.y, r1002.x - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)) - .addImm(mMFI->addi32Literal(16)) - .addReg(AMDIL::R1001) - .addReg(AMDIL::R1002); - } - emitVectorAddressCalc(MI, true, false); - emitVectorSwitchWrite(MI, true); - break; - case 4: - emitVectorAddressCalc(MI, true, false); - emitVectorSwitchWrite(MI, true); - break; - case 8: - emitVectorAddressCalc(MI, false, false); - emitVectorSwitchWrite(MI, false); - break; - }; -} - void -AMDIL789IOExpansion::expandStoreSetupCode(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - DebugLoc DL; - if (MI->getOperand(0).isUndef()) { - BuildMI(*mBB, I, DL, mTII->get(getMoveInstFromID( - MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011) - .addImm(mMFI->addi32Literal(0)); - } else { - BuildMI(*mBB, I, DL, mTII->get(getMoveInstFromID( - MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011) - .addReg(MI->getOperand(0).getReg()); - } - expandTruncData(MI); - if (MI->getOperand(2).isReg()) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_i32), AMDIL::R1010) - .addReg(MI->getOperand(1).getReg()) - .addReg(MI->getOperand(2).getReg()); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::MOVE_i32), AMDIL::R1010) - .addReg(MI->getOperand(1).getReg()); - } - expandAddressCalc(MI); - expandPackedData(MI); -} - - -void -AMDIL789IOExpansion::expandPackedData(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - if (!isPackedData(MI)) { - return; - } - DebugLoc DL; - // If we have packed data, then the shift size is no longer - // the same as the load size and we need to adjust accordingly - switch(getPackedID(MI)) { - default: - break; - case PACK_V2I8: - { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi64Literal(0xFFULL | (0xFFULL << 32))); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::R1011) - .addReg(AMDIL::R1011).addImm(mMFI->addi64Literal(8ULL << 32)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011) - .addReg(AMDIL::R1011).addReg(AMDIL::R1011); - } - break; - case PACK_V4I8: - { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0xFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32)))); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::R1011) - .addReg(AMDIL::R1011).addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011) - .addReg(AMDIL::R1011).addReg(AMDIL::R1011); - } - break; - case PACK_V2I16: - { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0xFFFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi64Literal(16ULL << 32)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011) - .addReg(AMDIL::R1011).addReg(AMDIL::R1011); - } - break; - case PACK_V4I16: - { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0xFFFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi64Literal(16ULL << 32)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v4i16), AMDIL::R1011) - .addReg(AMDIL::R1011).addReg(AMDIL::R1011); - } - break; - case UNPACK_V2I8: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::R1012) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(8)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011) - .addReg(AMDIL::R1011).addReg(AMDIL::R1012); - break; - case UNPACK_V4I8: - { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i8), AMDIL::R1011) - .addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::USHRVEC_v4i8), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32)))); - } - break; - case UNPACK_V2I16: - { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::R1012) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011) - .addReg(AMDIL::R1011).addReg(AMDIL::R1012); - } - break; - case UNPACK_V4I16: - { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::USHRVEC_v2i32), AMDIL::R1012) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011) - .addReg(AMDIL::R1011).addReg(AMDIL::R1012); - } - break; - }; -} diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp index b89c22b1404..6625dd77d5f 100644 --- a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp +++ b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp @@ -11,7 +11,6 @@ #include "AMDIL7XXAsmPrinter.h" #endif #include "AMDILDevice.h" -#include "AMDILIOExpansion.h" using namespace llvm; @@ -92,13 +91,6 @@ uint32_t AMDIL7XXDevice::getMaxNumUAVs() const return 1; } -FunctionPass* -AMDIL7XXDevice::getIOExpansion( - TargetMachine& TM AMDIL_OPT_LEVEL_DECL) const -{ - return new AMDIL7XXIOExpansion(TM AMDIL_OPT_LEVEL_VAR); -} - AsmPrinter* AMDIL7XXDevice::getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const { diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.h b/src/gallium/drivers/radeon/AMDIL7XXDevice.h index edaf84a3282..1e9efc408f0 100644 --- a/src/gallium/drivers/radeon/AMDIL7XXDevice.h +++ b/src/gallium/drivers/radeon/AMDIL7XXDevice.h @@ -39,8 +39,6 @@ public: virtual uint32_t getGeneration() const; virtual uint32_t getResourceID(uint32_t DeviceID) const; virtual uint32_t getMaxNumUAVs() const; - FunctionPass* - getIOExpansion(TargetMachine& AMDIL_OPT_LEVEL_DECL) const; AsmPrinter* getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const; diff --git a/src/gallium/drivers/radeon/AMDIL7XXIOExpansion.cpp b/src/gallium/drivers/radeon/AMDIL7XXIOExpansion.cpp deleted file mode 100644 index cddde313e2b..00000000000 --- a/src/gallium/drivers/radeon/AMDIL7XXIOExpansion.cpp +++ /dev/null @@ -1,548 +0,0 @@ -//===-- AMDIL7XXIOExpansion.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// @file AMDIL7XXIOExpansion.cpp -// @details Implementation of the IO Printing class for 7XX devices -// -#include "AMDILCompilerErrors.h" -#include "AMDILCompilerWarnings.h" -#include "AMDILDevices.h" -#include "AMDILGlobalManager.h" -#include "AMDILIOExpansion.h" -#include "AMDILKernelManager.h" -#include "AMDILMachineFunctionInfo.h" -#include "AMDILTargetMachine.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Support/DebugLoc.h" -#include "llvm/Value.h" - -using namespace llvm; -AMDIL7XXIOExpansion::AMDIL7XXIOExpansion(TargetMachine &tm - AMDIL_OPT_LEVEL_DECL) : AMDIL789IOExpansion(tm AMDIL_OPT_LEVEL_VAR) -{ -} - -AMDIL7XXIOExpansion::~AMDIL7XXIOExpansion() { -} -const char *AMDIL7XXIOExpansion::getPassName() const -{ - return "AMDIL 7XX IO Expansion Pass"; -} - - void -AMDIL7XXIOExpansion::expandGlobalLoad(MachineInstr *MI) -{ - DebugLoc DL; - // These instructions go before the current MI. - expandLoadStartCode(MI); - uint32_t ID = getPointerID(MI); - mKM->setOutputInst(); - switch(getMemorySize(MI)) { - default: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v4i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - break; - case 4: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - break; - case 8: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v2i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - break; - case 1: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(0xFFFFFFFC)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32, - (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32)))); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1012) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(0)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008) - .addReg(AMDIL::R1012) - .addImm(mMFI->addi32Literal(0)) - .addImm(mMFI->addi32Literal(24)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1008) - .addReg(AMDIL::R1012) - .addImm(mMFI->addi32Literal(8)) - .addReg(AMDIL::R1008); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1008) - .addReg(AMDIL::R1012) - .addImm(mMFI->addi32Literal(16)) - .addReg(AMDIL::R1008); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i8), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - break; - case 2: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(1)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(0xFFFFFFFC)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(16)) - .addImm(mMFI->addi32Literal(0)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i16), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - break; - } - // These instructions go after the current MI. - expandPackedData(MI); - expandExtendLoad(MI); - BuildMI(*mBB, MI, MI->getDebugLoc(), - mTII->get(getMoveInstFromID( - MI->getDesc().OpInfo[0].RegClass))) - .addOperand(MI->getOperand(0)) - .addReg(AMDIL::R1011); - MI->getOperand(0).setReg(AMDIL::R1011); -} - - void -AMDIL7XXIOExpansion::expandRegionLoad(MachineInstr *MI) -{ - bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem); - if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) { - mMFI->addErrorMsg( - amd::CompilerErrorMessage[REGION_MEMORY_ERROR]); - return; - } - if (!HWRegion || !isHardwareRegion(MI)) { - return expandGlobalLoad(MI); - } - if (!mMFI->usesMem(AMDILDevice::GDS_ID) - && mKM->isKernel()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]); - } - uint32_t gID = getPointerID(MI); - assert(gID && "Found a GDS load that was incorrectly marked as zero ID!\n"); - if (!gID) { - gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID); - mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]); - } - - DebugLoc DL; - // These instructions go before the current MI. - expandLoadStartCode(MI); - switch (getMemorySize(MI)) { - default: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32))); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_Y), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_Z), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_W), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - break; - case 1: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(8)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(0xFFFFFFFC)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - // The instruction would normally fit in right here so everything created - // after this point needs to go into the afterInst vector. - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(24)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(24)); - break; - case 2: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(8)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(0xFFFFFFFC)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - // The instruction would normally fit in right here so everything created - // after this point needs to go into the afterInst vector. - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)); - break; - case 4: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - break; - case 8: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010) - .addReg(AMDIL::R1010); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi64Literal(1ULL << 32)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_Y), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - break; - } - - // These instructions go after the current MI. - expandPackedData(MI); - expandExtendLoad(MI); - BuildMI(*mBB, MI, MI->getDebugLoc(), - mTII->get(getMoveInstFromID( - MI->getDesc().OpInfo[0].RegClass))) - .addOperand(MI->getOperand(0)) - .addReg(AMDIL::R1011); - MI->getOperand(0).setReg(AMDIL::R1011); -} - void -AMDIL7XXIOExpansion::expandLocalLoad(MachineInstr *MI) -{ - bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem); - if (!HWLocal || !isHardwareLocal(MI)) { - return expandGlobalLoad(MI); - } - if (!mMFI->usesMem(AMDILDevice::LDS_ID) - && mKM->isKernel()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]); - } - uint32_t lID = getPointerID(MI); - assert(lID && "Found a LDS load that was incorrectly marked as zero ID!\n"); - if (!lID) { - lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID); - mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]); - } - DebugLoc DL; - // These instructions go before the current MI. - expandLoadStartCode(MI); - switch (getMemorySize(MI)) { - default: - case 8: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - break; - case 4: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - break; - case 1: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(8)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(0xFFFFFFFC)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(24)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(24)); - break; - case 2: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(8)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(0xFFFFFFFC)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)); - break; - } - - // These instructions go after the current MI. - expandPackedData(MI); - expandExtendLoad(MI); - BuildMI(*mBB, MI, MI->getDebugLoc(), - mTII->get(getMoveInstFromID( - MI->getDesc().OpInfo[0].RegClass))) - .addOperand(MI->getOperand(0)) - .addReg(AMDIL::R1011); - MI->getOperand(0).setReg(AMDIL::R1011); -} - - void -AMDIL7XXIOExpansion::expandGlobalStore(MachineInstr *MI) -{ - uint32_t ID = getPointerID(MI); - mKM->setOutputInst(); - DebugLoc DL = MI->getDebugLoc(); - // These instructions go before the current MI. - expandStoreSetupCode(MI); - switch (getMemorySize(MI)) { - default: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - break; - case 1: - mMFI->addErrorMsg( - amd::CompilerErrorMessage[BYTE_STORE_ERROR]); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - break; - case 2: - mMFI->addErrorMsg( - amd::CompilerErrorMessage[BYTE_STORE_ERROR]); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - break; - case 4: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - break; - case 8: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - break; - }; -} - - void -AMDIL7XXIOExpansion::expandRegionStore(MachineInstr *MI) -{ - bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem); - if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) { - mMFI->addErrorMsg( - amd::CompilerErrorMessage[REGION_MEMORY_ERROR]); - return; - } - if (!HWRegion || !isHardwareRegion(MI)) { - return expandGlobalStore(MI); - } - DebugLoc DL = MI->getDebugLoc(); - mKM->setOutputInst(); - if (!mMFI->usesMem(AMDILDevice::GDS_ID) - && mKM->isKernel()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]); - } - uint32_t gID = getPointerID(MI); - assert(gID && "Found a GDS store that was incorrectly marked as zero ID!\n"); - if (!gID) { - gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID); - mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]); - } - - // These instructions go before the current MI. - expandStoreSetupCode(MI); - switch (getMemorySize(MI)) { - default: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32))); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_Y), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_Z), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_W), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - break; - case 1: - mMFI->addErrorMsg( - amd::CompilerErrorMessage[BYTE_STORE_ERROR]); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0xFF)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1012) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32, - (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32)))); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1006) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(8)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1007) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(0xFFFFFF00)) - .addImm(mMFI->addi32Literal(0x00FFFFFF)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1007) - .addImm(mMFI->addi32Literal(0xFF00FFFF)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1012) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1007) - .addImm(mMFI->addi32Literal(0xFFFF00FF)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1007); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - break; - case 2: - mMFI->addErrorMsg( - amd::CompilerErrorMessage[BYTE_STORE_ERROR]); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0x0000FFFF)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(1)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1012) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(0x0000FFFF)) - .addImm(mMFI->addi32Literal(0xFFFF0000)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(16)) - .addImm(mMFI->addi32Literal(0)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - break; - case 4: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - break; - case 8: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010) - .addReg(AMDIL::R1010); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi64Literal(1ULL << 32)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_Y), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - break; - }; -} - - void -AMDIL7XXIOExpansion::expandLocalStore(MachineInstr *MI) -{ - bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem); - if (!HWLocal || !isHardwareLocal(MI)) { - return expandGlobalStore(MI); - } - uint32_t lID = getPointerID(MI); - assert(lID && "Found a LDS store that was incorrectly marked as zero ID!\n"); - if (!lID) { - lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID); - mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]); - } - DebugLoc DL = MI->getDebugLoc(); - // These instructions go before the current MI. - expandStoreSetupCode(MI); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTOREVEC), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(lID); -} diff --git a/src/gallium/drivers/radeon/AMDILDevice.h b/src/gallium/drivers/radeon/AMDILDevice.h index 88f8b306b0d..d843c18afbd 100644 --- a/src/gallium/drivers/radeon/AMDILDevice.h +++ b/src/gallium/drivers/radeon/AMDILDevice.h @@ -22,7 +22,6 @@ namespace llvm { class AMDILSubtarget; class AMDILAsmPrinter; - class AMDILIOExpansion; class AMDILPointerManager; class AsmPrinter; class MCStreamer; @@ -85,10 +84,6 @@ public: // Get the max number of UAV's for this device. virtual uint32_t getMaxNumUAVs() const = 0; - // Interface to get the IO Expansion pass for each device. - virtual FunctionPass* - getIOExpansion(TargetMachine& AMDIL_OPT_LEVEL_DECL) const = 0; - // Interface to get the Asm printer for each device. virtual AsmPrinter* getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const = 0; diff --git a/src/gallium/drivers/radeon/AMDILEGIOExpansion.cpp b/src/gallium/drivers/radeon/AMDILEGIOExpansion.cpp deleted file mode 100644 index 185fc70a00b..00000000000 --- a/src/gallium/drivers/radeon/AMDILEGIOExpansion.cpp +++ /dev/null @@ -1,1093 +0,0 @@ -//===-- AMDILEGIOExpansion.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// @file AMDILEGIOExpansion.cpp -// @details Implementation of IO expansion class for evergreen and NI devices. -// -#include "AMDILCompilerErrors.h" -#include "AMDILCompilerWarnings.h" -#include "AMDILDevices.h" -#include "AMDILGlobalManager.h" -#include "AMDILIOExpansion.h" -#include "AMDILKernelManager.h" -#include "AMDILMachineFunctionInfo.h" -#include "AMDILTargetMachine.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Support/DebugLoc.h" -#include "llvm/Value.h" - -using namespace llvm; -AMDILEGIOExpansion::AMDILEGIOExpansion(TargetMachine &tm - AMDIL_OPT_LEVEL_DECL) : AMDILImageExpansion(tm AMDIL_OPT_LEVEL_VAR) -{ -} - -AMDILEGIOExpansion::~AMDILEGIOExpansion() { -} -const char *AMDILEGIOExpansion::getPassName() const -{ - return "AMDIL EG/NI IO Expansion Pass"; -} - bool -AMDILEGIOExpansion::isImageIO(MachineInstr *MI) -{ - if (!MI->getOperand(0).isGlobal()) { - return false; - } - const llvm::StringRef& nameRef = MI->getOperand(0).getGlobal()->getName(); - const char *name = nameRef.data(); - if (nameRef.size() > 8 && !strncmp(name, "__amdil_", 8)) { - name += 8; - if (!strncmp(name, "sample_data", 11) - || !strncmp(name, "write_image", 11) - || !strncmp(name, "get_image2d_params", 18) - || !strncmp(name, "get_image3d_params", 18)) { - return true; - } - } - return false; -} -bool -AMDILEGIOExpansion::isIOInstruction(MachineInstr *MI) -{ - if (!MI) { - return false; - } - switch (MI->getOpcode()) { - default: - return AMDILIOExpansion::isIOInstruction(MI); - case AMDIL::IMAGE2D_READ: - case AMDIL::IMAGE2D_READ_UNNORM: - case AMDIL::IMAGE2D_WRITE: - case AMDIL::IMAGE2D_INFO0: - case AMDIL::IMAGE2D_INFO1: - case AMDIL::IMAGE3D_READ: - case AMDIL::IMAGE3D_READ_UNNORM: - case AMDIL::IMAGE3D_WRITE: - case AMDIL::IMAGE3D_INFO0: - case AMDIL::IMAGE3D_INFO1: - return true; - }; - return false; -} -void -AMDILEGIOExpansion::expandIOInstruction(MachineInstr *MI) -{ - assert(isIOInstruction(MI) && "Must be an IO instruction to " - "be passed to this function!"); - switch (MI->getOpcode()) { - default: - AMDILIOExpansion::expandIOInstruction(MI); - break; - case AMDIL::IMAGE2D_READ: - case AMDIL::IMAGE3D_READ: - case AMDIL::IMAGE2D_READ_UNNORM: - case AMDIL::IMAGE3D_READ_UNNORM: - expandImageLoad(mBB, MI); - break; - case AMDIL::IMAGE2D_WRITE: - case AMDIL::IMAGE3D_WRITE: - expandImageStore(mBB, MI); - break; - case AMDIL::IMAGE2D_INFO0: - case AMDIL::IMAGE2D_INFO1: - case AMDIL::IMAGE3D_INFO0: - case AMDIL::IMAGE3D_INFO1: - expandImageParam(mBB, MI); - break; - }; -} - bool -AMDILEGIOExpansion::isCacheableOp(MachineInstr *MI) -{ - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(MI, curRes); - // We only support caching on UAV11 - JeffG - if (curRes.bits.ResourceID == 11) { - return curRes.bits.CacheableRead; - } else { - return false; - } -} - bool -AMDILEGIOExpansion::isArenaOp(MachineInstr *MI) -{ - AMDILAS::InstrResEnc curRes; - getAsmPrinterFlags(MI, curRes); - return curRes.bits.ResourceID - == mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID) - || curRes.bits.ResourceID >= ARENA_SEGMENT_RESERVED_UAVS; -} - void -AMDILEGIOExpansion::expandPackedData(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - if (!isPackedData(MI)) { - return; - } - // There is a bug in the CAL compiler that incorrectly - // errors when the UBIT_INSERT instruction is - if (mSTM->calVersion() < CAL_VERSION_SC_137) { - AMDIL789IOExpansion::expandPackedData(MI); - return; - } - DebugLoc DL; - // If we have packed data, then the shift size is no longer - // the same as the load size and we need to adjust accordingly - switch(getPackedID(MI)) { - default: - break; - case PACK_V2I8: - { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI), AMDIL::R1012) - .addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011) - .addImm(mMFI->addi32Literal(8)).addImm(mMFI->addi32Literal(8)) - .addReg(AMDIL::R1012).addReg(AMDIL::R1011); - } - break; - case PACK_V4I8: - { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::R1012) - .addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011) - .addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), - AMDIL::R1011) - .addImm(mMFI->addi64Literal(8ULL | (8ULL << 32))) - .addImm(mMFI->addi64Literal(8ULL | (8ULL << 32))) - .addReg(AMDIL::R1012).addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI), AMDIL::R1012) - .addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)).addImm(mMFI->addi32Literal(16)) - .addReg(AMDIL::R1012).addReg(AMDIL::R1011); - } - break; - case PACK_V2I16: - { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI), AMDIL::R1012) - .addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)).addImm(mMFI->addi32Literal(16)) - .addReg(AMDIL::R1012).addReg(AMDIL::R1011); - } - break; - case PACK_V4I16: - { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::R1012) - .addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011) - .addReg(AMDIL::R1011); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::R1011) - .addImm(mMFI->addi64Literal(16ULL | (16ULL << 32))) - .addImm(mMFI->addi64Literal(16ULL | (16ULL << 32))) - .addReg(AMDIL::R1012).addReg(AMDIL::R1011); - } - break; - case UNPACK_V2I8: - case UNPACK_V4I8: - case UNPACK_V2I16: - case UNPACK_V4I16: - AMDIL789IOExpansion::expandPackedData(MI); - break; - }; -} - - void -AMDILEGIOExpansion::expandGlobalLoad(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - bool usesArena = isArenaOp(MI); - bool cacheable = isCacheableOp(MI); - uint32_t ID = getPointerID(MI); - mKM->setOutputInst(); - if (!mMFI->usesMem(AMDILDevice::RAW_UAV_ID) - && !mMFI->usesMem(AMDILDevice::ARENA_UAV_ID) - && mKM->isKernel()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]); - } - // These instructions are generated before the current MI. - expandLoadStartCode(MI); - expandArenaSetup(MI); - DebugLoc DL; - if (getMemorySize(MI) == 1) { - if (usesArena) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i8), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(0xFFFFFFFC)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32, - (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32)))); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1012) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(0)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008) - .addReg(AMDIL::R1012) - .addImm(mMFI->addi32Literal(0)) - .addImm(mMFI->addi32Literal(24)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1008) - .addReg(AMDIL::R1012) - .addImm(mMFI->addi32Literal(8)) - .addReg(AMDIL::R1008); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1008) - .addReg(AMDIL::R1012) - .addImm(mMFI->addi32Literal(16)) - .addReg(AMDIL::R1008); - if (cacheable) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32), - AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), - AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID); - } - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_v4i8), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - } - } else if (getMemorySize(MI) == 2) { - if (usesArena) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i16), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(1)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(0xFFFFFFFC)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(16)) - .addImm(mMFI->addi32Literal(0)); - if (cacheable) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32), - AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), - AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID); - } - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i16), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - } - } else if (getMemorySize(MI) == 4) { - if (usesArena) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - } else { - if (cacheable) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32), - AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), - AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID); - } - } - } else if (getMemorySize(MI) == 8) { - if (usesArena) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_Y_i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007) - .addReg(AMDIL::R1010) - .addImm(2); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1008) - .addReg(AMDIL::R1007) - .addImm(ID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - } - } else { - if (cacheable) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_v2i32), - AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOAD_v2i32), - AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID); - } - } - } else { - if (usesArena) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_Y_i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_Z_i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_W_i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(ID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007) - .addReg(AMDIL::R1010) - .addImm(2); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1008) - .addReg(AMDIL::R1007) - .addImm(ID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007) - .addReg(AMDIL::R1010) - .addImm(3); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1008) - .addReg(AMDIL::R1007) - .addImm(ID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007) - .addReg(AMDIL::R1010) - .addImm(4); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1006) - .addReg(AMDIL::R1007) - .addImm(ID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1008) - .addReg(AMDIL::R1006) - .addReg(AMDIL::R1008); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - } - } else { - if (cacheable) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_v4i32), - AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOAD_v4i32), - AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID); - } - } - } - // These instructions are generated after the current MI. - expandPackedData(MI); - expandExtendLoad(MI); - BuildMI(*mBB, I, MI->getDebugLoc(), - mTII->get(getMoveInstFromID( - MI->getDesc().OpInfo[0].RegClass))) - .addOperand(MI->getOperand(0)) - .addReg(AMDIL::R1011); - MI->getOperand(0).setReg(AMDIL::R1011); -} - - void -AMDILEGIOExpansion::expandRegionLoad(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem); - if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) { - mMFI->addErrorMsg( - amd::CompilerErrorMessage[REGION_MEMORY_ERROR]); - return; - } - if (!HWRegion || !isHardwareRegion(MI)) { - return expandGlobalLoad(MI); - } - if (!mMFI->usesMem(AMDILDevice::GDS_ID) - && mKM->isKernel()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]); - } - DebugLoc DL; - unsigned mulOp = 0; - uint32_t gID = getPointerID(MI); - assert(gID && "Found a GDS load that was incorrectly marked as zero ID!\n"); - if (!gID) { - gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID); - mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]); - } - // These instructions are generated before the current MI. - expandLoadStartCode(MI); - switch (getMemorySize(MI)) { - default: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32))); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD_Y), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD_Z), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD_W), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - break; - case 1: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::RegionMem)) - ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32; - BuildMI(*mBB, I, DL, mTII->get(mulOp), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(8)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(0xFFFFFFFC)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - // The instruction would normally fit in right here so everything created - // after this point needs to go into the afterInst vector. - BuildMI(*mBB, I, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::R1011) - .addImm(mMFI->addi32Literal(8)) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1011); - break; - case 2: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::RegionMem)) - ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32; - BuildMI(*mBB, I, DL, mTII->get(mulOp), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(8)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(0xFFFFFFFC)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1011); - break; - case 4: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - break; - case 8: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010) - .addReg(AMDIL::R1010); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi64Literal(1ULL << 32)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD_Y), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(gID); - break; - }; - - // These instructions are generated after the current MI. - expandPackedData(MI); - expandExtendLoad(MI); - BuildMI(*mBB, I, MI->getDebugLoc(), - mTII->get(getMoveInstFromID( - MI->getDesc().OpInfo[0].RegClass))) - .addOperand(MI->getOperand(0)) - .addReg(AMDIL::R1011); - MI->getOperand(0).setReg(AMDIL::R1011); -} - void -AMDILEGIOExpansion::expandLocalLoad(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem); - if (!HWLocal || !isHardwareLocal(MI)) { - return expandGlobalLoad(MI); - } - if (!mMFI->usesMem(AMDILDevice::LDS_ID) - && mKM->isKernel()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]); - } - uint32_t lID = getPointerID(MI); - assert(lID && "Found a LDS load that was incorrectly marked as zero ID!\n"); - if (!lID) { - lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID); - mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]); - } - DebugLoc DL; - unsigned mulOp = 0; - // These instructions are generated before the current MI. - expandLoadStartCode(MI); - switch (getMemorySize(MI)) { - default: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOADVEC_v4i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - break; - case 8: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOADVEC_v2i32), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - break; - case 4: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - break; - case 1: - if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) - ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32; - BuildMI(*mBB, I, DL, mTII->get(mulOp), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(8)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(0xFFFFFFFC)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::R1011) - .addImm(mMFI->addi32Literal(8)) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1011); - } else { - if (isSWSExtLoadInst(MI)) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD_i8), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD_u8), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - } - } - break; - case 2: - if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) - ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32; - BuildMI(*mBB, I, DL, mTII->get(mulOp), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(8)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(0xFFFFFFFC)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::R1011) - .addImm(mMFI->addi32Literal(16)) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1011); - } else { - if (isSWSExtLoadInst(MI)) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD_i16), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD_u16), AMDIL::R1011) - .addReg(AMDIL::R1010) - .addImm(lID); - } - } - break; - } - - // These instructions are generated after the current MI. - expandPackedData(MI); - expandExtendLoad(MI); - BuildMI(*mBB, I, MI->getDebugLoc(), - mTII->get(getMoveInstFromID( - MI->getDesc().OpInfo[0].RegClass))) - .addOperand(MI->getOperand(0)) - .addReg(AMDIL::R1011); - MI->getOperand(0).setReg(AMDIL::R1011); -} - void -AMDILEGIOExpansion::expandGlobalStore(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - bool usesArena = isArenaOp(MI); - uint32_t ID = getPointerID(MI); - mKM->setOutputInst(); - if (!mMFI->usesMem(AMDILDevice::RAW_UAV_ID) - && !mMFI->usesMem(AMDILDevice::ARENA_UAV_ID) - && mKM->isKernel()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]); - } - DebugLoc DL; - // These instructions are expandted before the current MI. - expandStoreSetupCode(MI); - expandArenaSetup(MI); - switch (getMemorySize(MI)) { - default: - if (usesArena) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENASTORE_Y_i32), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENASTORE_Z_i32), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_W_i32), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007) - .addReg(AMDIL::R1010) - .addImm(2); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1011) - .addImm(2); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(ID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007) - .addReg(AMDIL::R1010) - .addImm(3); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1011) - .addImm(3); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(ID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007) - .addReg(AMDIL::R1010) - .addImm(4); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1011) - .addImm(4); - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(ID); - } - } else { - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - } - break; - case 1: - if (usesArena) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0xFF)); - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i8), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - } else { - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - } - break; - case 2: - if (usesArena) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0xFFFF)); - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i16), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - } else { - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - } - break; - case 4: - if (usesArena) { - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - } else { - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - } - break; - case 8: - if (usesArena) { - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) { - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_Y_i32), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007) - .addReg(AMDIL::R1010) - .addImm(2); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1011) - .addImm(2); - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1007) - .addReg(AMDIL::R1008) - .addImm(ID); - } - } else { - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(ID); - } - break; - }; -} - void -AMDILEGIOExpansion::expandRegionStore(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem); - if (!HWRegion || !isHardwareRegion(MI)) { - return expandGlobalStore(MI); - } - mKM->setOutputInst(); - if (!mMFI->usesMem(AMDILDevice::GDS_ID) - && mKM->isKernel()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]); - } - uint32_t gID = getPointerID(MI); - assert(gID && "Found a GDS store that was incorrectly marked as zero ID!\n"); - if (!gID) { - gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID); - mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]); - } - DebugLoc DL; - unsigned mulOp = HWRegion ? AMDIL::UMUL24_i32 : AMDIL::UMUL24_i32; - // These instructions are expandted before the current MI. - expandStoreSetupCode(MI); - expandArenaSetup(MI); - switch (getMemorySize(MI)) { - default: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32))); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSSTORE_Y), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSSTORE_Z), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::GDSSTORE_W), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - break; - case 1: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0xFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1012) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32, - (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32)))); - BuildMI(*mBB, I, DL, mTII->get(mulOp), AMDIL::R1006) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(8)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1007) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(0xFFFFFF00)) - .addImm(mMFI->addi32Literal(0x00FFFFFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1007) - .addImm(mMFI->addi32Literal(0xFF00FFFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1012) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1007) - .addImm(mMFI->addi32Literal(0xFFFF00FF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1007); - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_R_MSKOR), AMDIL::R1010) - .addReg(AMDIL::R1012) - .addReg(AMDIL::R1011) - .addImm(gID); - break; - case 2: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0x0000FFFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(1)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1012) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(0x0000FFFF)) - .addImm(mMFI->addi32Literal(0xFFFF0000)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(16)) - .addImm(mMFI->addi32Literal(0)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_R_MSKOR), AMDIL::R1010) - .addReg(AMDIL::R1012) - .addReg(AMDIL::R1011) - .addImm(gID); - break; - case 4: - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::GDSSTORE), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - break; - case 8: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010) - .addReg(AMDIL::R1010); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi64Literal(1ULL << 32)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::GDSSTORE_Y), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(gID); - break; - }; - -} - - void -AMDILEGIOExpansion::expandLocalStore(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem); - if (!HWLocal || !isHardwareLocal(MI)) { - return expandGlobalStore(MI); - } - DebugLoc DL; - if (!mMFI->usesMem(AMDILDevice::LDS_ID) - && mKM->isKernel()) { - mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]); - } - uint32_t lID = getPointerID(MI); - assert(lID && "Found a LDS store that was incorrectly marked as zero ID!\n"); - if (!lID) { - lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID); - mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]); - } - unsigned mulOp = HWLocal ? AMDIL::UMUL24_i32 : AMDIL::UMUL24_i32; - // These instructions are expandted before the current MI. - expandStoreSetupCode(MI); - switch (getMemorySize(MI)) { - default: - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTOREVEC_v4i32), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(lID); - break; - case 8: - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTOREVEC_v2i32), AMDIL::MEM) - .addReg(AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(lID); - break; - case 4: - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTORE), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(lID); - break; - case 1: - if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0xFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1012) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32, - (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32)))); - BuildMI(*mBB, I, DL, mTII->get(mulOp), AMDIL::R1006) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(8)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1007) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(0xFFFFFF00)) - .addImm(mMFI->addi32Literal(0x00FFFFFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1007) - .addImm(mMFI->addi32Literal(0xFF00FFFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1012) - .addReg(AMDIL::R1008) - .addReg(AMDIL::R1007) - .addImm(mMFI->addi32Literal(0xFFFF00FF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1007); - if (mSTM->calVersion() >= CAL_VERSION_SC_137) { - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_L_MSKOR_NORET), - AMDIL::R1010) - .addReg(AMDIL::R1012) - .addReg(AMDIL::R1011) - .addImm(lID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ATOM_L_ADD_NORET), - AMDIL::R1010) - .addReg(AMDIL::R1012) - .addImm(lID); - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_L_OR_NORET), - AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(lID); - } - } else { - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTORE_i8), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(lID); - } - break; - case 2: - if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0x0000FFFF)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi32Literal(3)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(1)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1012) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(0x0000FFFF)) - .addImm(mMFI->addi32Literal(0xFFFF0000)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008) - .addReg(AMDIL::R1008) - .addImm(mMFI->addi32Literal(16)) - .addImm(mMFI->addi32Literal(0)); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1008); - if (mSTM->calVersion() >= CAL_VERSION_SC_137) { - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_L_MSKOR_NORET), - AMDIL::R1010) - .addReg(AMDIL::R1012) - .addReg(AMDIL::R1011) - .addImm(lID); - } else { - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ATOM_L_ADD_NORET), - AMDIL::R1010) - .addReg(AMDIL::R1012) - .addImm(lID); - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_L_OR_NORET), - AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(lID); - } - } else { - BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTORE_i16), AMDIL::R1010) - .addReg(AMDIL::R1011) - .addImm(lID); - } - break; - } -} - - - void -AMDILEGIOExpansion::expandStoreSetupCode(MachineInstr *MI) -{ - AMDIL789IOExpansion::expandStoreSetupCode(MI); -} - void -AMDILEGIOExpansion::expandArenaSetup(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - if (!isArenaOp(MI)) { - return; - } - const MCInstrDesc &TID = (MI->getDesc()); - const MCOperandInfo &TOI = TID.OpInfo[0]; - unsigned short RegClass = TOI.RegClass; - DebugLoc DL; - switch (RegClass) { - case AMDIL::GPRV4I16RegClassID: - case AMDIL::GPRI64RegClassID: - case AMDIL::GPRF64RegClassID: - case AMDIL::GPRV2I32RegClassID: - case AMDIL::GPRV2F32RegClassID: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010) - .addReg(AMDIL::R1010); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi64Literal(4ULL << 32)); - break; - default: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010) - .addReg(AMDIL::R1010) - .addImm(mMFI->addi128Literal(4ULL << 32, 8ULL | (12ULL << 32))); - break; - case AMDIL::GPRI8RegClassID: - case AMDIL::GPRV2I8RegClassID: - case AMDIL::GPRI16RegClassID: - case AMDIL::GPRV2I16RegClassID: - case AMDIL::GPRV4I8RegClassID: - case AMDIL::GPRI32RegClassID: - case AMDIL::GPRF32RegClassID: - break; - }; -} - diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp index 19c42bd9daa..7b5c52345d2 100644 --- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp +++ b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp @@ -10,7 +10,6 @@ #ifdef UPSTREAM_LLVM #include "AMDILEGAsmPrinter.h" #endif -#include "AMDILIOExpansion.h" using namespace llvm; @@ -136,12 +135,6 @@ void AMDILEvergreenDevice::setCaps() { } mHWBits.set(AMDILDeviceInfo::TmrReg); } -FunctionPass* -AMDILEvergreenDevice::getIOExpansion( - TargetMachine& TM AMDIL_OPT_LEVEL_DECL) const -{ - return new AMDILEGIOExpansion(TM AMDIL_OPT_LEVEL_VAR); -} AsmPrinter* AMDILEvergreenDevice::getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.h b/src/gallium/drivers/radeon/AMDILEvergreenDevice.h index b2e0a6a6cf2..f0e8d8c2e1e 100644 --- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.h +++ b/src/gallium/drivers/radeon/AMDILEvergreenDevice.h @@ -40,8 +40,6 @@ public: virtual uint32_t getGeneration() const; virtual uint32_t getMaxNumUAVs() const; virtual uint32_t getResourceID(uint32_t) const; - virtual FunctionPass* - getIOExpansion(TargetMachine& AMDIL_OPT_LEVEL_DECL) const; virtual AsmPrinter* getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const; protected: diff --git a/src/gallium/drivers/radeon/AMDILIOExpansion.cpp b/src/gallium/drivers/radeon/AMDILIOExpansion.cpp deleted file mode 100644 index 68d8eef344d..00000000000 --- a/src/gallium/drivers/radeon/AMDILIOExpansion.cpp +++ /dev/null @@ -1,1160 +0,0 @@ -//===----------- AMDILIOExpansion.cpp - IO Expansion Pass -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// The AMDIL IO Expansion class expands pseudo IO instructions into a sequence -// of instructions that produces the correct results. These instructions are -// not expanded earlier in the pass because any pass before this can assume to -// be able to generate a load/store instruction. So this pass can only have -// passes that execute after it if no load/store instructions can be generated. -//===----------------------------------------------------------------------===// -#include "AMDILIOExpansion.h" -#include "AMDIL.h" -#include "AMDILDevices.h" -#include "AMDILGlobalManager.h" -#include "AMDILKernelManager.h" -#include "AMDILMachineFunctionInfo.h" -#include "AMDILTargetMachine.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Support/DebugLoc.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Value.h" - -using namespace llvm; - -char AMDILIOExpansion::ID = 0; -namespace llvm { - FunctionPass* - createAMDILIOExpansion(TargetMachine &TM AMDIL_OPT_LEVEL_DECL) - { - return TM.getSubtarget<AMDILSubtarget>() - .device()->getIOExpansion(TM AMDIL_OPT_LEVEL_VAR); - } -} - -AMDILIOExpansion::AMDILIOExpansion(TargetMachine &tm - AMDIL_OPT_LEVEL_DECL) : - MachineFunctionPass(ID), TM(tm) -{ - mSTM = &tm.getSubtarget<AMDILSubtarget>(); - mDebug = DEBUGME; - mTII = tm.getInstrInfo(); - mKM = NULL; -} - -AMDILIOExpansion::~AMDILIOExpansion() -{ -} - bool -AMDILIOExpansion::runOnMachineFunction(MachineFunction &MF) -{ - mKM = const_cast<AMDILKernelManager*>(mSTM->getKernelManager()); - mMFI = MF.getInfo<AMDILMachineFunctionInfo>(); - for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); - MFI != MFE; ++MFI) { - MachineBasicBlock *MBB = MFI; - for (MachineBasicBlock::iterator MBI = MBB->begin(), MBE = MBB->end(); - MBI != MBE; ++MBI) { - MachineInstr *MI = MBI; - if (isIOInstruction(MI)) { - mBB = MBB; - saveInst = false; - expandIOInstruction(MI); - if (!saveInst) { - // erase returns the instruction after - // and we want the instruction before - MBI = MBB->erase(MI); - --MBI; - } - } - } - } - return false; -} -const char *AMDILIOExpansion::getPassName() const -{ - return "AMDIL Generic IO Expansion Pass"; -} - bool -AMDILIOExpansion::isIOInstruction(MachineInstr *MI) -{ - if (!MI) { - return false; - } - switch(MI->getOpcode()) { - default: - return false; - ExpandCaseToAllTypes(AMDIL::CPOOLLOAD) - ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD) - ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD) - ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD) - ExpandCaseToAllTypes(AMDIL::CONSTANTLOAD) - ExpandCaseToAllTypes(AMDIL::CONSTANTSEXTLOAD) - ExpandCaseToAllTypes(AMDIL::CONSTANTZEXTLOAD) - ExpandCaseToAllTypes(AMDIL::CONSTANTAEXTLOAD) - ExpandCaseToAllTypes(AMDIL::PRIVATELOAD) - ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD) - ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD) - ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD) - ExpandCaseToAllTypes(AMDIL::PRIVATESTORE) - ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE) - ExpandCaseToAllTypes(AMDIL::REGIONSTORE) - ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE) - ExpandCaseToAllTypes(AMDIL::REGIONLOAD) - ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD) - ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD) - ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD) - ExpandCaseToAllTypes(AMDIL::LOCALSTORE) - ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE) - ExpandCaseToAllTypes(AMDIL::LOCALLOAD) - ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD) - ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD) - ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD) - ExpandCaseToAllTypes(AMDIL::GLOBALLOAD) - ExpandCaseToAllTypes(AMDIL::GLOBALSEXTLOAD) - ExpandCaseToAllTypes(AMDIL::GLOBALAEXTLOAD) - ExpandCaseToAllTypes(AMDIL::GLOBALZEXTLOAD) - ExpandCaseToAllTypes(AMDIL::GLOBALSTORE) - ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE) - return true; - }; - return false; -} -void -AMDILIOExpansion::expandIOInstruction(MachineInstr *MI) -{ - assert(isIOInstruction(MI) && "Must be an IO instruction to " - "be passed to this function!"); - switch (MI->getOpcode()) { - default: - assert(0 && "Not an IO Instruction!"); - ExpandCaseToAllTypes(AMDIL::GLOBALLOAD); - ExpandCaseToAllTypes(AMDIL::GLOBALSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::GLOBALZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::GLOBALAEXTLOAD); - expandGlobalLoad(MI); - break; - ExpandCaseToAllTypes(AMDIL::REGIONLOAD); - ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD); - expandRegionLoad(MI); - break; - ExpandCaseToAllTypes(AMDIL::LOCALLOAD); - ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD); - expandLocalLoad(MI); - break; - ExpandCaseToAllTypes(AMDIL::CONSTANTLOAD); - ExpandCaseToAllTypes(AMDIL::CONSTANTSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CONSTANTZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CONSTANTAEXTLOAD); - expandConstantLoad(MI); - break; - ExpandCaseToAllTypes(AMDIL::PRIVATELOAD); - ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD); - ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD); - expandPrivateLoad(MI); - break; - ExpandCaseToAllTypes(AMDIL::CPOOLLOAD); - ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD); - expandConstantPoolLoad(MI); - break; - ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE) - ExpandCaseToAllTypes(AMDIL::GLOBALSTORE); - expandGlobalStore(MI); - break; - ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE); - ExpandCaseToAllTypes(AMDIL::PRIVATESTORE); - expandPrivateStore(MI); - break; - ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE); - ExpandCaseToAllTypes(AMDIL::REGIONSTORE); - expandRegionStore(MI); - break; - ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE); - ExpandCaseToAllTypes(AMDIL::LOCALSTORE); - expandLocalStore(MI); - break; - } -} - bool -AMDILIOExpansion::isAddrCalcInstr(MachineInstr *MI) -{ - switch(MI->getOpcode()) { - ExpandCaseToAllTypes(AMDIL::PRIVATELOAD) - ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD) - ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD) - ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD) - { - // This section of code is a workaround for the problem of - // globally scoped constant address variables. The problems - // comes that although they are declared in the constant - // address space, all variables must be allocated in the - // private address space. So when there is a load from - // the global address, it automatically goes into the private - // address space. However, the data section is placed in the - // constant address space so we need to check to see if our - // load base address is a global variable or not. Only if it - // is not a global variable can we do the address calculation - // into the private memory ring. - - MachineMemOperand& memOp = (**MI->memoperands_begin()); - const Value *V = memOp.getValue(); - if (V) { - const GlobalValue *GV = dyn_cast<GlobalVariable>(V); - return mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem) - && !(GV); - } else { - return false; - } - } - ExpandCaseToAllTypes(AMDIL::CPOOLLOAD); - ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD); - return MI->getOperand(1).isReg(); - ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE); - ExpandCaseToAllTypes(AMDIL::PRIVATESTORE); - return mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem); - ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE); - ExpandCaseToAllTypes(AMDIL::LOCALSTORE); - ExpandCaseToAllTypes(AMDIL::LOCALLOAD); - ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD); - return mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem); - }; - return false; - -} - bool -AMDILIOExpansion::isExtendLoad(MachineInstr *MI) -{ - return isSExtLoadInst(TM.getInstrInfo(), MI) || - isZExtLoadInst(TM.getInstrInfo(), MI) || - isAExtLoadInst(TM.getInstrInfo(), MI) - || isSWSExtLoadInst(MI); -} - - bool -AMDILIOExpansion::isHardwareRegion(MachineInstr *MI) -{ - switch(MI->getOpcode()) { - default: - return false; - break; - ExpandCaseToAllTypes(AMDIL::REGIONLOAD) - ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD) - ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD) - ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD) - ExpandCaseToAllTypes(AMDIL::REGIONSTORE) - ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE) - return mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem); - }; - return false; -} - bool -AMDILIOExpansion::isHardwareLocal(MachineInstr *MI) -{ - switch(MI->getOpcode()) { - default: - return false; - break; - ExpandCaseToAllTypes(AMDIL::LOCALLOAD) - ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD) - ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD) - ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD) - ExpandCaseToAllTypes(AMDIL::LOCALSTORE) - ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE) - return mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem); - }; - return false; -} - bool -AMDILIOExpansion::isPackedData(MachineInstr *MI) -{ - switch(MI->getOpcode()) { - default: - if (isTruncStoreInst(TM.getInstrInfo(), MI)) { - switch (MI->getDesc().OpInfo[0].RegClass) { - default: - break; - case AMDIL::GPRV2I64RegClassID: - case AMDIL::GPRV2I32RegClassID: - switch (getMemorySize(MI)) { - case 2: - case 4: - return true; - default: - break; - } - break; - case AMDIL::GPRV4I32RegClassID: - switch (getMemorySize(MI)) { - case 4: - case 8: - return true; - default: - break; - } - break; - } - } - break; - ExpandCaseToPackedTypes(AMDIL::CPOOLLOAD); - ExpandCaseToPackedTypes(AMDIL::CPOOLSEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::CPOOLZEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::CPOOLAEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::GLOBALLOAD); - ExpandCaseToPackedTypes(AMDIL::GLOBALSEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::GLOBALZEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::GLOBALAEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::LOCALLOAD); - ExpandCaseToPackedTypes(AMDIL::LOCALSEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::LOCALZEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::LOCALAEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::REGIONLOAD); - ExpandCaseToPackedTypes(AMDIL::REGIONSEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::REGIONZEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::REGIONAEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::PRIVATELOAD); - ExpandCaseToPackedTypes(AMDIL::PRIVATESEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::PRIVATEZEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::PRIVATEAEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::CONSTANTLOAD); - ExpandCaseToPackedTypes(AMDIL::CONSTANTSEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::CONSTANTAEXTLOAD); - ExpandCaseToPackedTypes(AMDIL::CONSTANTZEXTLOAD); - ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE) - ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE); - ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE); - ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE); - ExpandCaseToPackedTypes(AMDIL::GLOBALSTORE); - ExpandCaseToPackedTypes(AMDIL::PRIVATESTORE); - ExpandCaseToPackedTypes(AMDIL::LOCALSTORE); - ExpandCaseToPackedTypes(AMDIL::REGIONSTORE); - return true; - } - return false; -} - - bool -AMDILIOExpansion::isStaticCPLoad(MachineInstr *MI) -{ - switch(MI->getOpcode()) { - ExpandCaseToAllTypes(AMDIL::CPOOLLOAD); - ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD); - ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD); - { - uint32_t x = 0; - uint32_t num = MI->getNumOperands(); - for (x = 0; x < num; ++x) { - if (MI->getOperand(x).isCPI()) { - return true; - } - } - } - break; - default: - break; - } - return false; -} - - bool -AMDILIOExpansion::isNbitType(Type *mType, uint32_t nBits, bool isScalar) -{ - if (!mType) { - return false; - } - if (dyn_cast<PointerType>(mType)) { - PointerType *PT = dyn_cast<PointerType>(mType); - return isNbitType(PT->getElementType(), nBits); - } else if (dyn_cast<StructType>(mType)) { - return getTypeSize(mType) == nBits; - } else if (dyn_cast<VectorType>(mType)) { - VectorType *VT = dyn_cast<VectorType>(mType); - size_t size = VT->getScalarSizeInBits(); - return (isScalar ? - VT->getNumElements() * size == nBits : size == nBits); - } else if (dyn_cast<ArrayType>(mType)) { - ArrayType *AT = dyn_cast<ArrayType>(mType); - size_t size = AT->getScalarSizeInBits(); - return (isScalar ? - AT->getNumElements() * size == nBits : size == nBits); - } else if (mType->isSized()) { - return mType->getScalarSizeInBits() == nBits; - } else { - assert(0 && "Found a type that we don't know how to handle!"); - return false; - } -} - - bool -AMDILIOExpansion::isHardwareInst(MachineInstr *MI) -{ - AMDILAS::InstrResEnc curRes; - curRes.u16all = MI->getAsmPrinterFlags(); - return curRes.bits.HardwareInst; -} - -REG_PACKED_TYPE -AMDILIOExpansion::getPackedID(MachineInstr *MI) -{ - switch (MI->getOpcode()) { - default: - break; - case AMDIL::GLOBALTRUNCSTORE_v2i64i8: - case AMDIL::REGIONTRUNCSTORE_v2i64i8: - case AMDIL::LOCALTRUNCSTORE_v2i64i8: - case AMDIL::PRIVATETRUNCSTORE_v2i64i8: - case AMDIL::GLOBALTRUNCSTORE_v2i32i8: - case AMDIL::REGIONTRUNCSTORE_v2i32i8: - case AMDIL::LOCALTRUNCSTORE_v2i32i8: - case AMDIL::PRIVATETRUNCSTORE_v2i32i8: - case AMDIL::GLOBALTRUNCSTORE_v2i16i8: - case AMDIL::REGIONTRUNCSTORE_v2i16i8: - case AMDIL::LOCALTRUNCSTORE_v2i16i8: - case AMDIL::PRIVATETRUNCSTORE_v2i16i8: - case AMDIL::GLOBALSTORE_v2i8: - case AMDIL::LOCALSTORE_v2i8: - case AMDIL::REGIONSTORE_v2i8: - case AMDIL::PRIVATESTORE_v2i8: - return PACK_V2I8; - case AMDIL::GLOBALTRUNCSTORE_v4i32i8: - case AMDIL::REGIONTRUNCSTORE_v4i32i8: - case AMDIL::LOCALTRUNCSTORE_v4i32i8: - case AMDIL::PRIVATETRUNCSTORE_v4i32i8: - case AMDIL::GLOBALTRUNCSTORE_v4i16i8: - case AMDIL::REGIONTRUNCSTORE_v4i16i8: - case AMDIL::LOCALTRUNCSTORE_v4i16i8: - case AMDIL::PRIVATETRUNCSTORE_v4i16i8: - case AMDIL::GLOBALSTORE_v4i8: - case AMDIL::LOCALSTORE_v4i8: - case AMDIL::REGIONSTORE_v4i8: - case AMDIL::PRIVATESTORE_v4i8: - return PACK_V4I8; - case AMDIL::GLOBALTRUNCSTORE_v2i64i16: - case AMDIL::REGIONTRUNCSTORE_v2i64i16: - case AMDIL::LOCALTRUNCSTORE_v2i64i16: - case AMDIL::PRIVATETRUNCSTORE_v2i64i16: - case AMDIL::GLOBALTRUNCSTORE_v2i32i16: - case AMDIL::REGIONTRUNCSTORE_v2i32i16: - case AMDIL::LOCALTRUNCSTORE_v2i32i16: - case AMDIL::PRIVATETRUNCSTORE_v2i32i16: - case AMDIL::GLOBALSTORE_v2i16: - case AMDIL::LOCALSTORE_v2i16: - case AMDIL::REGIONSTORE_v2i16: - case AMDIL::PRIVATESTORE_v2i16: - return PACK_V2I16; - case AMDIL::GLOBALTRUNCSTORE_v4i32i16: - case AMDIL::REGIONTRUNCSTORE_v4i32i16: - case AMDIL::LOCALTRUNCSTORE_v4i32i16: - case AMDIL::PRIVATETRUNCSTORE_v4i32i16: - case AMDIL::GLOBALSTORE_v4i16: - case AMDIL::LOCALSTORE_v4i16: - case AMDIL::REGIONSTORE_v4i16: - case AMDIL::PRIVATESTORE_v4i16: - return PACK_V4I16; - case AMDIL::GLOBALLOAD_v2i8: - case AMDIL::GLOBALSEXTLOAD_v2i8: - case AMDIL::GLOBALAEXTLOAD_v2i8: - case AMDIL::GLOBALZEXTLOAD_v2i8: - case AMDIL::LOCALLOAD_v2i8: - case AMDIL::LOCALSEXTLOAD_v2i8: - case AMDIL::LOCALAEXTLOAD_v2i8: - case AMDIL::LOCALZEXTLOAD_v2i8: - case AMDIL::REGIONLOAD_v2i8: - case AMDIL::REGIONSEXTLOAD_v2i8: - case AMDIL::REGIONAEXTLOAD_v2i8: - case AMDIL::REGIONZEXTLOAD_v2i8: - case AMDIL::PRIVATELOAD_v2i8: - case AMDIL::PRIVATESEXTLOAD_v2i8: - case AMDIL::PRIVATEAEXTLOAD_v2i8: - case AMDIL::PRIVATEZEXTLOAD_v2i8: - case AMDIL::CONSTANTLOAD_v2i8: - case AMDIL::CONSTANTSEXTLOAD_v2i8: - case AMDIL::CONSTANTAEXTLOAD_v2i8: - case AMDIL::CONSTANTZEXTLOAD_v2i8: - return UNPACK_V2I8; - case AMDIL::GLOBALLOAD_v4i8: - case AMDIL::GLOBALSEXTLOAD_v4i8: - case AMDIL::GLOBALAEXTLOAD_v4i8: - case AMDIL::GLOBALZEXTLOAD_v4i8: - case AMDIL::LOCALLOAD_v4i8: - case AMDIL::LOCALSEXTLOAD_v4i8: - case AMDIL::LOCALAEXTLOAD_v4i8: - case AMDIL::LOCALZEXTLOAD_v4i8: - case AMDIL::REGIONLOAD_v4i8: - case AMDIL::REGIONSEXTLOAD_v4i8: - case AMDIL::REGIONAEXTLOAD_v4i8: - case AMDIL::REGIONZEXTLOAD_v4i8: - case AMDIL::PRIVATELOAD_v4i8: - case AMDIL::PRIVATESEXTLOAD_v4i8: - case AMDIL::PRIVATEAEXTLOAD_v4i8: - case AMDIL::PRIVATEZEXTLOAD_v4i8: - case AMDIL::CONSTANTLOAD_v4i8: - case AMDIL::CONSTANTSEXTLOAD_v4i8: - case AMDIL::CONSTANTAEXTLOAD_v4i8: - case AMDIL::CONSTANTZEXTLOAD_v4i8: - return UNPACK_V4I8; - case AMDIL::GLOBALLOAD_v2i16: - case AMDIL::GLOBALSEXTLOAD_v2i16: - case AMDIL::GLOBALAEXTLOAD_v2i16: - case AMDIL::GLOBALZEXTLOAD_v2i16: - case AMDIL::LOCALLOAD_v2i16: - case AMDIL::LOCALSEXTLOAD_v2i16: - case AMDIL::LOCALAEXTLOAD_v2i16: - case AMDIL::LOCALZEXTLOAD_v2i16: - case AMDIL::REGIONLOAD_v2i16: - case AMDIL::REGIONSEXTLOAD_v2i16: - case AMDIL::REGIONAEXTLOAD_v2i16: - case AMDIL::REGIONZEXTLOAD_v2i16: - case AMDIL::PRIVATELOAD_v2i16: - case AMDIL::PRIVATESEXTLOAD_v2i16: - case AMDIL::PRIVATEAEXTLOAD_v2i16: - case AMDIL::PRIVATEZEXTLOAD_v2i16: - case AMDIL::CONSTANTLOAD_v2i16: - case AMDIL::CONSTANTSEXTLOAD_v2i16: - case AMDIL::CONSTANTAEXTLOAD_v2i16: - case AMDIL::CONSTANTZEXTLOAD_v2i16: - return UNPACK_V2I16; - case AMDIL::GLOBALLOAD_v4i16: - case AMDIL::GLOBALSEXTLOAD_v4i16: - case AMDIL::GLOBALAEXTLOAD_v4i16: - case AMDIL::GLOBALZEXTLOAD_v4i16: - case AMDIL::LOCALLOAD_v4i16: - case AMDIL::LOCALSEXTLOAD_v4i16: - case AMDIL::LOCALAEXTLOAD_v4i16: - case AMDIL::LOCALZEXTLOAD_v4i16: - case AMDIL::REGIONLOAD_v4i16: - case AMDIL::REGIONSEXTLOAD_v4i16: - case AMDIL::REGIONAEXTLOAD_v4i16: - case AMDIL::REGIONZEXTLOAD_v4i16: - case AMDIL::PRIVATELOAD_v4i16: - case AMDIL::PRIVATESEXTLOAD_v4i16: - case AMDIL::PRIVATEAEXTLOAD_v4i16: - case AMDIL::PRIVATEZEXTLOAD_v4i16: - case AMDIL::CONSTANTLOAD_v4i16: - case AMDIL::CONSTANTSEXTLOAD_v4i16: - case AMDIL::CONSTANTAEXTLOAD_v4i16: - case AMDIL::CONSTANTZEXTLOAD_v4i16: - return UNPACK_V4I16; - }; - return NO_PACKING; -} - - uint32_t -AMDILIOExpansion::getPointerID(MachineInstr *MI) -{ - AMDILAS::InstrResEnc curInst; - getAsmPrinterFlags(MI, curInst); - return curInst.bits.ResourceID; -} - - uint32_t -AMDILIOExpansion::getShiftSize(MachineInstr *MI) -{ - switch(getPackedID(MI)) { - default: - return 0; - case PACK_V2I8: - case PACK_V4I8: - case UNPACK_V2I8: - case UNPACK_V4I8: - return 1; - case PACK_V2I16: - case PACK_V4I16: - case UNPACK_V2I16: - case UNPACK_V4I16: - return 2; - } - return 0; -} - uint32_t -AMDILIOExpansion::getMemorySize(MachineInstr *MI) -{ - if (MI->memoperands_empty()) { - return 4; - } - return (uint32_t)((*MI->memoperands_begin())->getSize()); -} - - void -AMDILIOExpansion::expandLongExtend(MachineInstr *MI, - uint32_t numComps, uint32_t size, bool signedShift) -{ - DebugLoc DL = MI->getDebugLoc(); - switch(size) { - default: - assert(0 && "Found a case we don't handle!"); - break; - case 8: - if (numComps == 1) { - expandLongExtendSub32(MI, AMDIL::SHL_i8, AMDIL::SHRVEC_v2i32, - AMDIL::USHRVEC_i8, - 24, (24ULL | (31ULL << 32)), 24, AMDIL::LCREATE, signedShift); - } else if (numComps == 2) { - expandLongExtendSub32(MI, AMDIL::SHL_v2i8, AMDIL::SHRVEC_v4i32, - AMDIL::USHRVEC_v2i8, - 24, (24ULL | (31ULL << 32)), 24, AMDIL::LCREATE_v2i64, signedShift); - } else { - assert(0 && "Found a case we don't handle!"); - } - break; - case 16: - if (numComps == 1) { - expandLongExtendSub32(MI, AMDIL::SHL_i16, AMDIL::SHRVEC_v2i32, - AMDIL::USHRVEC_i16, - 16, (16ULL | (31ULL << 32)), 16, AMDIL::LCREATE, signedShift); - } else if (numComps == 2) { - expandLongExtendSub32(MI, AMDIL::SHL_v2i16, AMDIL::SHRVEC_v4i32, - AMDIL::USHRVEC_v2i16, - 16, (16ULL | (31ULL << 32)), 16, AMDIL::LCREATE_v2i64, signedShift); - } else { - assert(0 && "Found a case we don't handle!"); - } - break; - case 32: - if (numComps == 1) { - if (signedShift) { - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_i32), AMDIL::R1012) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(31)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011) - .addReg(AMDIL::R1011).addReg(AMDIL::R1012); - } else { - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0)); - } - } else if (numComps == 2) { - if (signedShift) { - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_v2i32), AMDIL::R1012) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(31)); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addReg(AMDIL::R1012); - } else { - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0)); - } - } else { - assert(0 && "Found a case we don't handle!"); - } - }; -} - void -AMDILIOExpansion::expandLongExtendSub32(MachineInstr *MI, - unsigned SHLop, unsigned SHRop, unsigned USHRop, - unsigned SHLimm, uint64_t SHRimm, unsigned USHRimm, - unsigned LCRop, bool signedShift) -{ - DebugLoc DL = MI->getDebugLoc(); - BuildMI(*mBB, MI, DL, mTII->get(SHLop), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(SHLimm)); - if (signedShift) { - BuildMI(*mBB, MI, DL, mTII->get(LCRop), AMDIL::R1011) - .addReg(AMDIL::R1011).addReg(AMDIL::R1011); - BuildMI(*mBB, MI, DL, mTII->get(SHRop), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi64Literal(SHRimm)); - } else { - BuildMI(*mBB, MI, DL, mTII->get(USHRop), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(USHRimm)); - BuildMI(*mBB, MI, MI->getDebugLoc(), mTII->get(LCRop), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0)); - } -} - - void -AMDILIOExpansion::expandIntegerExtend(MachineInstr *MI, unsigned SHLop, - unsigned SHRop, unsigned offset) -{ - DebugLoc DL = MI->getDebugLoc(); - offset = mMFI->addi32Literal(offset); - BuildMI(*mBB, MI, DL, - mTII->get(SHLop), AMDIL::R1011) - .addReg(AMDIL::R1011).addImm(offset); - BuildMI(*mBB, MI, DL, - mTII->get(SHRop), AMDIL::R1011) - .addReg(AMDIL::R1011).addImm(offset); -} - void -AMDILIOExpansion::expandExtendLoad(MachineInstr *MI) -{ - if (!isExtendLoad(MI)) { - return; - } - Type *mType = NULL; - if (!MI->memoperands_empty()) { - MachineMemOperand *memOp = (*MI->memoperands_begin()); - const Value *moVal = (memOp) ? memOp->getValue() : NULL; - mType = (moVal) ? moVal->getType() : NULL; - } - unsigned opcode = 0; - DebugLoc DL = MI->getDebugLoc(); - if (isZExtLoadInst(TM.getInstrInfo(), MI) || isAExtLoadInst(TM.getInstrInfo(), MI) || isSExtLoadInst(TM.getInstrInfo(), MI)) { - switch(MI->getDesc().OpInfo[0].RegClass) { - default: - assert(0 && "Found an extending load that we don't handle!"); - break; - case AMDIL::GPRI16RegClassID: - if (!isHardwareLocal(MI) - || mSTM->device()->usesSoftware(AMDILDeviceInfo::ByteLDSOps)) { - opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_i16 : AMDIL::USHRVEC_i16; - expandIntegerExtend(MI, AMDIL::SHL_i16, opcode, 24); - } - break; - case AMDIL::GPRV2I16RegClassID: - opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v2i16 : AMDIL::USHRVEC_v2i16; - expandIntegerExtend(MI, AMDIL::SHL_v2i16, opcode, 24); - break; - case AMDIL::GPRV4I8RegClassID: - opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v4i8 : AMDIL::USHRVEC_v4i8; - expandIntegerExtend(MI, AMDIL::SHL_v4i8, opcode, 24); - break; - case AMDIL::GPRV4I16RegClassID: - opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v4i16 : AMDIL::USHRVEC_v4i16; - expandIntegerExtend(MI, AMDIL::SHL_v4i16, opcode, 24); - break; - case AMDIL::GPRI32RegClassID: - // We can be a i8 or i16 bit sign extended value - if (isNbitType(mType, 8) || getMemorySize(MI) == 1) { - opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_i32 : AMDIL::USHRVEC_i32; - expandIntegerExtend(MI, AMDIL::SHL_i32, opcode, 24); - } else if (isNbitType(mType, 16) || getMemorySize(MI) == 2) { - opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_i32 : AMDIL::USHRVEC_i32; - expandIntegerExtend(MI, AMDIL::SHL_i32, opcode, 16); - } else { - assert(0 && "Found an extending load that we don't handle!"); - } - break; - case AMDIL::GPRV2I32RegClassID: - // We can be a v2i8 or v2i16 bit sign extended value - if (isNbitType(mType, 8, false) || getMemorySize(MI) == 2) { - opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v2i32 : AMDIL::USHRVEC_v2i32; - expandIntegerExtend(MI, AMDIL::SHL_v2i32, opcode, 24); - } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 4) { - opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v2i32 : AMDIL::USHRVEC_v2i32; - expandIntegerExtend(MI, AMDIL::SHL_v2i32, opcode, 16); - } else { - assert(0 && "Found an extending load that we don't handle!"); - } - break; - case AMDIL::GPRV4I32RegClassID: - // We can be a v4i8 or v4i16 bit sign extended value - if (isNbitType(mType, 8, false) || getMemorySize(MI) == 4) { - opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v4i32 : AMDIL::USHRVEC_v4i32; - expandIntegerExtend(MI, AMDIL::SHL_v4i32, opcode, 24); - } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 8) { - opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v4i32 : AMDIL::USHRVEC_v4i32; - expandIntegerExtend(MI, AMDIL::SHL_v4i32, opcode, 16); - } else { - assert(0 && "Found an extending load that we don't handle!"); - } - break; - case AMDIL::GPRI64RegClassID: - // We can be a i8, i16 or i32 bit sign extended value - if (isNbitType(mType, 8) || getMemorySize(MI) == 1) { - expandLongExtend(MI, 1, 8, isSExtLoadInst(TM.getInstrInfo(), MI)); - } else if (isNbitType(mType, 16) || getMemorySize(MI) == 2) { - expandLongExtend(MI, 1, 16, isSExtLoadInst(TM.getInstrInfo(), MI)); - } else if (isNbitType(mType, 32) || getMemorySize(MI) == 4) { - expandLongExtend(MI, 1, 32, isSExtLoadInst(TM.getInstrInfo(), MI)); - } else { - assert(0 && "Found an extending load that we don't handle!"); - } - break; - case AMDIL::GPRV2I64RegClassID: - // We can be a v2i8, v2i16 or v2i32 bit sign extended value - if (isNbitType(mType, 8, false) || getMemorySize(MI) == 2) { - expandLongExtend(MI, 2, 8, isSExtLoadInst(TM.getInstrInfo(), MI)); - } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 4) { - expandLongExtend(MI, 2, 16, isSExtLoadInst(TM.getInstrInfo(), MI)); - } else if (isNbitType(mType, 32, false) || getMemorySize(MI) == 8) { - expandLongExtend(MI, 2, 32, isSExtLoadInst(TM.getInstrInfo(), MI)); - } else { - assert(0 && "Found an extending load that we don't handle!"); - } - break; - case AMDIL::GPRF32RegClassID: - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::HTOF_f32), AMDIL::R1011) - .addReg(AMDIL::R1011); - break; - case AMDIL::GPRV2F32RegClassID: - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::HTOF_v2f32), AMDIL::R1011) - .addReg(AMDIL::R1011); - break; - case AMDIL::GPRV4F32RegClassID: - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::HTOF_v4f32), AMDIL::R1011) - .addReg(AMDIL::R1011); - break; - case AMDIL::GPRF64RegClassID: - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::FTOD), AMDIL::R1011) - .addReg(AMDIL::R1011); - break; - case AMDIL::GPRV2F64RegClassID: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v2f32), - AMDIL::R1012).addReg(AMDIL::R1011).addImm(2); - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::FTOD), AMDIL::R1011) - .addReg(AMDIL::R1011); - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::FTOD), AMDIL::R1012) - .addReg(AMDIL::R1012); - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::VINSERT_v2f64), AMDIL::R1011) - .addReg(AMDIL::R1011).addReg(AMDIL::R1012) - .addImm(1 << 8).addImm(1 << 8); - break; - }; - } else if (isSWSExtLoadInst(MI)) { - switch(MI->getDesc().OpInfo[0].RegClass) { - case AMDIL::GPRI8RegClassID: - if (!isHardwareLocal(MI) - || mSTM->device()->usesSoftware(AMDILDeviceInfo::ByteLDSOps)) { - expandIntegerExtend(MI, AMDIL::SHL_i8, AMDIL::SHRVEC_i8, 24); - } - break; - case AMDIL::GPRV2I8RegClassID: - expandIntegerExtend(MI, AMDIL::SHL_v2i8, AMDIL::SHRVEC_v2i8, 24); - break; - case AMDIL::GPRV4I8RegClassID: - expandIntegerExtend(MI, AMDIL::SHL_v4i8, AMDIL::SHRVEC_v4i8, 24); - break; - case AMDIL::GPRI16RegClassID: - if (!isHardwareLocal(MI) - || mSTM->device()->usesSoftware(AMDILDeviceInfo::ByteLDSOps)) { - expandIntegerExtend(MI, AMDIL::SHL_i16, AMDIL::SHRVEC_i16, 16); - } - break; - case AMDIL::GPRV2I16RegClassID: - expandIntegerExtend(MI, AMDIL::SHL_v2i16, AMDIL::SHRVEC_v2i16, 16); - break; - case AMDIL::GPRV4I16RegClassID: - expandIntegerExtend(MI, AMDIL::SHL_v4i16, AMDIL::SHRVEC_v4i16, 16); - break; - - }; - } -} - - void -AMDILIOExpansion::expandTruncData(MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - if (!isTruncStoreInst(TM.getInstrInfo(), MI)) { - return; - } - DebugLoc DL = MI->getDebugLoc(); - switch (MI->getOpcode()) { - default: - MI->dump(); - assert(!"Found a trunc store instructions we don't handle!"); - break; - case AMDIL::GLOBALTRUNCSTORE_i64i8: - case AMDIL::GLOBALTRUNCSTORE_v2i64i8: - case AMDIL::LOCALTRUNCSTORE_i64i8: - case AMDIL::LOCALTRUNCSTORE_v2i64i8: - case AMDIL::REGIONTRUNCSTORE_i64i8: - case AMDIL::REGIONTRUNCSTORE_v2i64i8: - case AMDIL::PRIVATETRUNCSTORE_i64i8: - case AMDIL::PRIVATETRUNCSTORE_v2i64i8: - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011) - .addReg(AMDIL::R1011); - case AMDIL::GLOBALTRUNCSTORE_i16i8: - case AMDIL::GLOBALTRUNCSTORE_v2i16i8: - case AMDIL::GLOBALTRUNCSTORE_v4i16i8: - case AMDIL::LOCALTRUNCSTORE_i16i8: - case AMDIL::LOCALTRUNCSTORE_v2i16i8: - case AMDIL::LOCALTRUNCSTORE_v4i16i8: - case AMDIL::REGIONTRUNCSTORE_i16i8: - case AMDIL::REGIONTRUNCSTORE_v2i16i8: - case AMDIL::REGIONTRUNCSTORE_v4i16i8: - case AMDIL::PRIVATETRUNCSTORE_i16i8: - case AMDIL::PRIVATETRUNCSTORE_v2i16i8: - case AMDIL::PRIVATETRUNCSTORE_v4i16i8: - case AMDIL::GLOBALTRUNCSTORE_i32i8: - case AMDIL::GLOBALTRUNCSTORE_v2i32i8: - case AMDIL::GLOBALTRUNCSTORE_v4i32i8: - case AMDIL::LOCALTRUNCSTORE_i32i8: - case AMDIL::LOCALTRUNCSTORE_v2i32i8: - case AMDIL::LOCALTRUNCSTORE_v4i32i8: - case AMDIL::REGIONTRUNCSTORE_i32i8: - case AMDIL::REGIONTRUNCSTORE_v2i32i8: - case AMDIL::REGIONTRUNCSTORE_v4i32i8: - case AMDIL::PRIVATETRUNCSTORE_i32i8: - case AMDIL::PRIVATETRUNCSTORE_v2i32i8: - case AMDIL::PRIVATETRUNCSTORE_v4i32i8: - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0xFF)); - break; - case AMDIL::GLOBALTRUNCSTORE_i64i16: - case AMDIL::GLOBALTRUNCSTORE_v2i64i16: - case AMDIL::LOCALTRUNCSTORE_i64i16: - case AMDIL::LOCALTRUNCSTORE_v2i64i16: - case AMDIL::REGIONTRUNCSTORE_i64i16: - case AMDIL::REGIONTRUNCSTORE_v2i64i16: - case AMDIL::PRIVATETRUNCSTORE_i64i16: - case AMDIL::PRIVATETRUNCSTORE_v2i64i16: - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011) - .addReg(AMDIL::R1011); - case AMDIL::GLOBALTRUNCSTORE_i32i16: - case AMDIL::GLOBALTRUNCSTORE_v2i32i16: - case AMDIL::GLOBALTRUNCSTORE_v4i32i16: - case AMDIL::LOCALTRUNCSTORE_i32i16: - case AMDIL::LOCALTRUNCSTORE_v2i32i16: - case AMDIL::LOCALTRUNCSTORE_v4i32i16: - case AMDIL::REGIONTRUNCSTORE_i32i16: - case AMDIL::REGIONTRUNCSTORE_v2i32i16: - case AMDIL::REGIONTRUNCSTORE_v4i32i16: - case AMDIL::PRIVATETRUNCSTORE_i32i16: - case AMDIL::PRIVATETRUNCSTORE_v2i32i16: - case AMDIL::PRIVATETRUNCSTORE_v4i32i16: - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011) - .addReg(AMDIL::R1011) - .addImm(mMFI->addi32Literal(0xFFFF)); - break; - case AMDIL::GLOBALTRUNCSTORE_i64i32: - case AMDIL::LOCALTRUNCSTORE_i64i32: - case AMDIL::REGIONTRUNCSTORE_i64i32: - case AMDIL::PRIVATETRUNCSTORE_i64i32: - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::LLO), AMDIL::R1011) - .addReg(AMDIL::R1011); - break; - case AMDIL::GLOBALTRUNCSTORE_v2i64i32: - case AMDIL::LOCALTRUNCSTORE_v2i64i32: - case AMDIL::REGIONTRUNCSTORE_v2i64i32: - case AMDIL::PRIVATETRUNCSTORE_v2i64i32: - BuildMI(*mBB, MI, DL, - mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011) - .addReg(AMDIL::R1011); - break; - case AMDIL::GLOBALTRUNCSTORE_f64f32: - case AMDIL::LOCALTRUNCSTORE_f64f32: - case AMDIL::REGIONTRUNCSTORE_f64f32: - case AMDIL::PRIVATETRUNCSTORE_f64f32: - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF), - AMDIL::R1011).addReg(AMDIL::R1011); - break; - case AMDIL::GLOBALTRUNCSTORE_v2f64f32: - case AMDIL::LOCALTRUNCSTORE_v2f64f32: - case AMDIL::REGIONTRUNCSTORE_v2f64f32: - case AMDIL::PRIVATETRUNCSTORE_v2f64f32: - BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v2f64), - AMDIL::R1012).addReg(AMDIL::R1011).addImm(2); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF), - AMDIL::R1011).addReg(AMDIL::R1011); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF), - AMDIL::R1012).addReg(AMDIL::R1012); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VINSERT_v2f32), - AMDIL::R1011).addReg(AMDIL::R1011).addReg(AMDIL::R1012) - .addImm(1 << 8).addImm(1 << 8); - break; - } -} - void -AMDILIOExpansion::expandAddressCalc(MachineInstr *MI) -{ - if (!isAddrCalcInstr(MI)) { - return; - } - DebugLoc DL = MI->getDebugLoc(); - switch(MI->getOpcode()) { - ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE) - ExpandCaseToAllTypes(AMDIL::PRIVATESTORE) - ExpandCaseToAllTypes(AMDIL::PRIVATELOAD) - ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD) - ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD) - ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD) - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_i32), - AMDIL::R1010).addReg(AMDIL::R1010).addReg(AMDIL::T1); - break; - ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE) - ExpandCaseToAllTypes(AMDIL::LOCALLOAD) - ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD) - ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD) - ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD) - ExpandCaseToAllTypes(AMDIL::LOCALSTORE) - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_i32), - AMDIL::R1010).addReg(AMDIL::R1010).addReg(AMDIL::T2); - break; - ExpandCaseToAllTypes(AMDIL::CPOOLLOAD) - ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD) - ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD) - ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD) - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_i32), - AMDIL::R1010).addReg(AMDIL::R1010).addReg(AMDIL::SDP); - break; - default: - return; - } -} - void -AMDILIOExpansion::expandLoadStartCode(MachineInstr *MI) -{ - DebugLoc DL = MI->getDebugLoc(); - if (MI->getOperand(2).isReg()) { - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_i32), - AMDIL::R1010).addReg(MI->getOperand(1).getReg()) - .addReg(MI->getOperand(2).getReg()); - } else { - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::MOVE_i32), - AMDIL::R1010).addReg(MI->getOperand(1).getReg()); - } - MI->getOperand(1).setReg(AMDIL::R1010); - expandAddressCalc(MI); -} - void -AMDILIOExpansion::emitStaticCPLoad(MachineInstr* MI, int swizzle, - int id, bool ExtFPLoad) -{ - DebugLoc DL = MI->getDebugLoc(); - switch(swizzle) { - default: - BuildMI(*mBB, MI, DL, mTII->get(ExtFPLoad - ? AMDIL::DTOF : AMDIL::MOVE_i32), - MI->getOperand(0).getReg()) - .addImm(id); - break; - case 1: - case 2: - case 3: - BuildMI(*mBB, MI, DL, mTII->get(ExtFPLoad - ? AMDIL::DTOF : AMDIL::MOVE_i32), AMDIL::R1001) - .addImm(id); - BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VINSERT_v4i32), - MI->getOperand(0).getReg()) - .addReg(MI->getOperand(0).getReg()) - .addReg(AMDIL::R1001) - .addImm(swizzle + 1); - break; - }; -} - void -AMDILIOExpansion::emitCPInst(MachineInstr* MI, - const Constant* C, AMDILKernelManager* KM, int swizzle, bool ExtFPLoad) -{ - if (const ConstantFP* CFP = dyn_cast<ConstantFP>(C)) { - if (CFP->getType()->isFloatTy()) { - uint32_t val = (uint32_t)(CFP->getValueAPF().bitcastToAPInt() - .getZExtValue()); - uint32_t id = mMFI->addi32Literal(val); - if (!id) { - const APFloat &APF = CFP->getValueAPF(); - union dtol_union { - double d; - uint64_t ul; - } conv; - if (&APF.getSemantics() - == (const llvm::fltSemantics*)&APFloat::IEEEsingle) { - float fval = APF.convertToFloat(); - conv.d = (double)fval; - } else { - conv.d = APF.convertToDouble(); - } - id = mMFI->addi64Literal(conv.ul); - } - emitStaticCPLoad(MI, swizzle, id, ExtFPLoad); - } else { - const APFloat &APF = CFP->getValueAPF(); - union ftol_union { - double d; - uint64_t ul; - } conv; - if (&APF.getSemantics() - == (const llvm::fltSemantics*)&APFloat::IEEEsingle) { - float fval = APF.convertToFloat(); - conv.d = (double)fval; - } else { - conv.d = APF.convertToDouble(); - } - uint32_t id = mMFI->getLongLits(conv.ul); - if (!id) { - id = mMFI->getIntLits((uint32_t)conv.ul); - } - emitStaticCPLoad(MI, swizzle, id, ExtFPLoad); - } - } else if (const ConstantInt* CI = dyn_cast<ConstantInt>(C)) { - int64_t val = 0; - if (CI) { - val = CI->getSExtValue(); - } - if (CI->getBitWidth() == 64) { - emitStaticCPLoad(MI, swizzle, mMFI->addi64Literal(val), ExtFPLoad); - } else { - emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(val), ExtFPLoad); - } - } else if (const ConstantArray* CA = dyn_cast<ConstantArray>(C)) { - uint32_t size = CA->getNumOperands(); - assert(size < 5 && "Cannot handle a constant array where size > 4"); - if (size > 4) { - size = 4; - } - for (uint32_t x = 0; x < size; ++x) { - emitCPInst(MI, CA->getOperand(0), KM, x, ExtFPLoad); - } - } else if (const ConstantAggregateZero* CAZ - = dyn_cast<ConstantAggregateZero>(C)) { - if (CAZ->isNullValue()) { - emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(0), ExtFPLoad); - } - } else if (const ConstantStruct* CS = dyn_cast<ConstantStruct>(C)) { - uint32_t size = CS->getNumOperands(); - assert(size < 5 && "Cannot handle a constant array where size > 4"); - if (size > 4) { - size = 4; - } - for (uint32_t x = 0; x < size; ++x) { - emitCPInst(MI, CS->getOperand(0), KM, x, ExtFPLoad); - } - } else if (const ConstantVector* CV = dyn_cast<ConstantVector>(C)) { - // TODO: Make this handle vectors natively up to the correct - // size - uint32_t size = CV->getNumOperands(); - assert(size < 5 && "Cannot handle a constant array where size > 4"); - if (size > 4) { - size = 4; - } - for (uint32_t x = 0; x < size; ++x) { - emitCPInst(MI, CV->getOperand(0), KM, x, ExtFPLoad); - } - } else { - // TODO: Do we really need to handle ConstantPointerNull? - // What about BlockAddress, ConstantExpr and Undef? - // How would these even be generated by a valid CL program? - assert(0 && "Found a constant type that I don't know how to handle"); - } -} - diff --git a/src/gallium/drivers/radeon/AMDILIOExpansion.h b/src/gallium/drivers/radeon/AMDILIOExpansion.h deleted file mode 100644 index af4709a892c..00000000000 --- a/src/gallium/drivers/radeon/AMDILIOExpansion.h +++ /dev/null @@ -1,320 +0,0 @@ -//===----------- AMDILIOExpansion.h - IO Expansion Pass -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// The AMDIL IO Expansion class expands pseudo IO instructions into a sequence -// of instructions that produces the correct results. These instructions are -// not expanded earlier in the backend because any pass before this can assume to -// be able to generate a load/store instruction. So this pass can only have -// passes that execute after it if no load/store instructions can be generated -// in those passes. -//===----------------------------------------------------------------------===// -#ifndef _AMDILIOEXPANSION_H_ -#define _AMDILIOEXPANSION_H_ -#undef DEBUG_TYPE -#undef DEBUGME -#define DEBUG_TYPE "IOExpansion" -#if !defined(NDEBUG) -#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) -#else -#define DEBUGME (false) -#endif -#include "AMDIL.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - class MachineFunction; - class AMDILKernelManager; - class AMDILMachineFunctionInfo; - class AMDILSubtarget; - class MachineInstr; - class Constant; - class TargetInstrInfo; - class Type; - typedef enum { - NO_PACKING = 0, - PACK_V2I8, - PACK_V4I8, - PACK_V2I16, - PACK_V4I16, - UNPACK_V2I8, - UNPACK_V4I8, - UNPACK_V2I16, - UNPACK_V4I16, - UNPACK_LAST - } REG_PACKED_TYPE; - class AMDILIOExpansion : public MachineFunctionPass - { - public: - virtual ~AMDILIOExpansion(); - virtual const char* getPassName() const; - bool runOnMachineFunction(MachineFunction &MF); - static char ID; - protected: - AMDILIOExpansion(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); - TargetMachine &TM; - // - // @param MI Machine instruction to check. - // @brief checks to see if the machine instruction - // is an I/O instruction or not. - // - // @return true if I/O, false otherwise. - // - virtual bool - isIOInstruction(MachineInstr *MI); - // Wrapper function that calls the appropriate I/O - // expansion function based on the instruction type. - virtual void - expandIOInstruction(MachineInstr *MI); - virtual void - expandGlobalStore(MachineInstr *MI) = 0; - virtual void - expandLocalStore(MachineInstr *MI) = 0; - virtual void - expandRegionStore(MachineInstr *MI) = 0; - virtual void - expandPrivateStore(MachineInstr *MI) = 0; - virtual void - expandGlobalLoad(MachineInstr *MI) = 0; - virtual void - expandRegionLoad(MachineInstr *MI) = 0; - virtual void - expandLocalLoad(MachineInstr *MI) = 0; - virtual void - expandPrivateLoad(MachineInstr *MI) = 0; - virtual void - expandConstantLoad(MachineInstr *MI) = 0; - virtual void - expandConstantPoolLoad(MachineInstr *MI) = 0; - bool - isAddrCalcInstr(MachineInstr *MI); - bool - isExtendLoad(MachineInstr *MI); - bool - isHardwareRegion(MachineInstr *MI); - bool - isHardwareLocal(MachineInstr *MI); - bool - isPackedData(MachineInstr *MI); - bool - isStaticCPLoad(MachineInstr *MI); - bool - isNbitType(Type *MI, uint32_t nBits, bool isScalar = true); - bool - isHardwareInst(MachineInstr *MI); - uint32_t - getMemorySize(MachineInstr *MI); - REG_PACKED_TYPE - getPackedID(MachineInstr *MI); - uint32_t - getShiftSize(MachineInstr *MI); - uint32_t - getPointerID(MachineInstr *MI); - void - expandTruncData(MachineInstr *MI); - void - expandLoadStartCode(MachineInstr *MI); - virtual void - expandStoreSetupCode(MachineInstr *MI) = 0; - void - expandAddressCalc(MachineInstr *MI); - void - expandLongExtend(MachineInstr *MI, - uint32_t numComponents, uint32_t size, bool signedShift); - void - expandLongExtendSub32(MachineInstr *MI, - unsigned SHLop, unsigned SHRop, unsigned USHRop, - unsigned SHLimm, uint64_t SHRimm, unsigned USHRimm, - unsigned LCRop, bool signedShift); - void - expandIntegerExtend(MachineInstr *MI, unsigned, unsigned, unsigned); - void - expandExtendLoad(MachineInstr *MI); - virtual void - expandPackedData(MachineInstr *MI) = 0; - void - emitCPInst(MachineInstr* MI, const Constant* C, - AMDILKernelManager* KM, int swizzle, bool ExtFPLoad); - - bool mDebug; - const AMDILSubtarget *mSTM; - AMDILKernelManager *mKM; - MachineBasicBlock *mBB; - AMDILMachineFunctionInfo *mMFI; - const TargetInstrInfo *mTII; - bool saveInst; - private: - void - emitStaticCPLoad(MachineInstr* MI, int swizzle, int id, - bool ExtFPLoad); - }; // class AMDILIOExpansion - - // Intermediate class that holds I/O code expansion that is common to the - // 7XX, Evergreen and Northern Island family of chips. - class AMDIL789IOExpansion : public AMDILIOExpansion { - public: - virtual ~AMDIL789IOExpansion(); - virtual const char* getPassName() const; - protected: - AMDIL789IOExpansion(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); - virtual void - expandGlobalStore(MachineInstr *MI) = 0; - virtual void - expandLocalStore(MachineInstr *MI) = 0; - virtual void - expandRegionStore(MachineInstr *MI) = 0; - virtual void - expandGlobalLoad(MachineInstr *MI) = 0; - virtual void - expandRegionLoad(MachineInstr *MI) = 0; - virtual void - expandLocalLoad(MachineInstr *MI) = 0; - virtual void - expandPrivateStore(MachineInstr *MI); - virtual void - expandConstantLoad(MachineInstr *MI); - virtual void - expandPrivateLoad(MachineInstr *MI) ; - virtual void - expandConstantPoolLoad(MachineInstr *MI); - void - expandStoreSetupCode(MachineInstr *MI); - virtual void - expandPackedData(MachineInstr *MI); - private: - void emitVectorAddressCalc(MachineInstr *MI, bool is32bit, - bool needsSelect); - void emitVectorSwitchWrite(MachineInstr *MI, bool is32bit); - void emitComponentExtract(MachineInstr *MI, unsigned flag, unsigned src, - unsigned dst, bool beforeInst); - void emitDataLoadSelect(MachineInstr *MI); - }; // class AMDIL789IOExpansion - // Class that handles I/O emission for the 7XX family of devices. - class AMDIL7XXIOExpansion : public AMDIL789IOExpansion { - public: - AMDIL7XXIOExpansion(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); - - ~AMDIL7XXIOExpansion(); - const char* getPassName() const; - protected: - void - expandGlobalStore(MachineInstr *MI); - void - expandLocalStore(MachineInstr *MI); - void - expandRegionStore(MachineInstr *MI); - void - expandGlobalLoad(MachineInstr *MI); - void - expandRegionLoad(MachineInstr *MI); - void - expandLocalLoad(MachineInstr *MI); - }; // class AMDIL7XXIOExpansion - - // Class that handles image functions to expand them into the - // correct set of I/O instructions. - class AMDILImageExpansion : public AMDIL789IOExpansion { - public: - AMDILImageExpansion(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); - - virtual ~AMDILImageExpansion(); - protected: - // - // @param MI Instruction iterator that has the sample instruction - // that needs to be taken care of. - // @brief transforms the __amdil_sample_data function call into a - // sample instruction in IL. - // - // @warning This function only works correctly if all functions get - // inlined - // - virtual void - expandImageLoad(MachineBasicBlock *BB, MachineInstr *MI); - // - // @param MI Instruction iterator that has the write instruction that - // needs to be taken care of. - // @brief transforms the __amdil_write_data function call into a - // simple UAV write instruction in IL. - // - // @warning This function only works correctly if all functions get - // inlined - // - virtual void - expandImageStore(MachineBasicBlock *BB, MachineInstr *MI); - // - // @param MI Instruction interator that has the image parameter - // instruction - // @brief transforms the __amdil_get_image_params function call into - // a copy of data from a specific constant buffer to the register - // - // @warning This function only works correctly if all functions get - // inlined - // - virtual void - expandImageParam(MachineBasicBlock *BB, MachineInstr *MI); - - // - // @param MI Insturction that points to the image - // @brief transforms __amdil_sample_data into a sequence of - // if/else that selects the correct sample instruction. - // - // @warning This function is inefficient and works with no - // inlining. - // - virtual void - expandInefficientImageLoad(MachineBasicBlock *BB, MachineInstr *MI); - private: - AMDILImageExpansion(); // Do not implement. - - }; // class AMDILImageExpansion - - // Class that expands IO instructions for Evergreen and Northern - // Island family of devices. - class AMDILEGIOExpansion : public AMDILImageExpansion { - public: - AMDILEGIOExpansion(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); - - virtual ~AMDILEGIOExpansion(); - const char* getPassName() const; - protected: - virtual bool - isIOInstruction(MachineInstr *MI); - virtual void - expandIOInstruction(MachineInstr *MI); - bool - isImageIO(MachineInstr *MI); - virtual void - expandGlobalStore(MachineInstr *MI); - void - expandLocalStore(MachineInstr *MI); - void - expandRegionStore(MachineInstr *MI); - virtual void - expandGlobalLoad(MachineInstr *MI); - void - expandRegionLoad(MachineInstr *MI); - void - expandLocalLoad(MachineInstr *MI); - virtual bool - isCacheableOp(MachineInstr *MI); - void - expandStoreSetupCode(MachineInstr *MI); - void - expandPackedData(MachineInstr *MI); - private: - bool - isArenaOp(MachineInstr *MI); - void - expandArenaSetup(MachineInstr *MI); - }; // class AMDILEGIOExpansion -} // namespace llvm -#endif // _AMDILIOEXPANSION_H_ diff --git a/src/gallium/drivers/radeon/AMDILImageExpansion.cpp b/src/gallium/drivers/radeon/AMDILImageExpansion.cpp deleted file mode 100644 index e6fe37a6b99..00000000000 --- a/src/gallium/drivers/radeon/AMDILImageExpansion.cpp +++ /dev/null @@ -1,171 +0,0 @@ -//===-- AMDILImageExpansion.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// @file AMDILImageExpansion.cpp -// @details Implementatino of the Image expansion class for image capable devices -// -#include "AMDILIOExpansion.h" -#include "AMDILKernelManager.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Support/DebugLoc.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Value.h" - -using namespace llvm; - -AMDILImageExpansion::AMDILImageExpansion(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) - : AMDIL789IOExpansion(tm AMDIL_OPT_LEVEL_VAR) -{ -} - -AMDILImageExpansion::~AMDILImageExpansion() -{ -} -void AMDILImageExpansion::expandInefficientImageLoad( - MachineBasicBlock *mBB, MachineInstr *MI) -{ -#if 0 - const llvm::StringRef &name = MI->getOperand(0).getGlobal()->getName(); - const char *tReg1, *tReg2, *tReg3, *tReg4; - tReg1 = mASM->getRegisterName(MI->getOperand(1).getReg()); - if (MI->getOperand(2).isReg()) { - tReg2 = mASM->getRegisterName(MI->getOperand(2).getReg()); - } else { - tReg2 = mASM->getRegisterName(AMDIL::R1); - O << "\tmov " << tReg2 << ", l" << MI->getOperand(2).getImm() << "\n"; - } - if (MI->getOperand(3).isReg()) { - tReg3 = mASM->getRegisterName(MI->getOperand(3).getReg()); - } else { - tReg3 = mASM->getRegisterName(AMDIL::R2); - O << "\tmov " << tReg3 << ", l" << MI->getOperand(3).getImm() << "\n"; - } - if (MI->getOperand(4).isReg()) { - tReg4 = mASM->getRegisterName(MI->getOperand(4).getReg()); - } else { - tReg4 = mASM->getRegisterName(AMDIL::R3); - O << "\tmov " << tReg2 << ", l" << MI->getOperand(4).getImm() << "\n"; - } - bool internalSampler = false; - //bool linear = true; - unsigned ImageCount = 3; // OPENCL_MAX_READ_IMAGES - unsigned SamplerCount = 3; // OPENCL_MAX_SAMPLERS - if (ImageCount - 1) { - O << "\tswitch " << mASM->getRegisterName(MI->getOperand(1).getReg()) - << "\n"; - } - for (unsigned rID = 0; rID < ImageCount; ++rID) { - if (ImageCount - 1) { - if (!rID) { - O << "\tdefault\n"; - } else { - O << "\tcase " << rID << "\n" ; - } - O << "\tswitch " << mASM->getRegisterName(MI->getOperand(2).getReg()) - << "\n"; - } - for (unsigned sID = 0; sID < SamplerCount; ++sID) { - if (SamplerCount - 1) { - if (!sID) { - O << "\tdefault\n"; - } else { - O << "\tcase " << sID << "\n" ; - } - } - if (internalSampler) { - // Check if sampler has normalized setting. - O << "\tand r0.x, " << tReg2 << ".x, l0.y\n" - << "\tif_logicalz r0.x\n" - << "\tflr " << tReg3 << ", " << tReg3 << "\n" - << "\tsample_resource(" << rID << ")_sampler(" - << sID << ")_coordtype(unnormalized) " - << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n" - << "\telse\n" - << "\tiadd " << tReg1 << ".y, " << tReg1 << ".x, l0.y\n" - << "\titof " << tReg2 << ", cb1[" << tReg1 << ".x].xyz\n" - << "\tmul " << tReg3 << ", " << tReg3 << ", " << tReg2 << "\n" - << "\tflr " << tReg3 << ", " << tReg3 << "\n" - << "\tmul " << tReg3 << ", " << tReg3 << ", cb1[" - << tReg1 << ".y].xyz\n" - << "\tsample_resource(" << rID << ")_sampler(" - << sID << ")_coordtype(normalized) " - << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n" - << "\tendif\n"; - } else { - O << "\tiadd " << tReg1 << ".y, " << tReg1 << ".x, l0.y\n" - // Check if sampler has normalized setting. - << "\tand r0, " << tReg2 << ".x, l0.y\n" - // Convert image dimensions to float. - << "\titof " << tReg4 << ", cb1[" << tReg1 << ".x].xyz\n" - // Move into R0 1 if unnormalized or dimensions if normalized. - << "\tcmov_logical r0, r0, " << tReg4 << ", r1.1111\n" - // Make coordinates unnormalized. - << "\tmul " << tReg3 << ", r0, " << tReg3 << "\n" - // Get linear filtering if set. - << "\tand " << tReg4 << ", " << tReg2 << ".x, l6.x\n" - // Save unnormalized coordinates in R0. - << "\tmov r0, " << tReg3 << "\n" - // Floor the coordinates due to HW incompatibility with precision - // requirements. - << "\tflr " << tReg3 << ", " << tReg3 << "\n" - // get Origianl coordinates (without floor) if linear filtering - << "\tcmov_logical " << tReg3 << ", " << tReg4 - << ".xxxx, r0, " << tReg3 << "\n" - // Normalize the coordinates with multiplying by 1/dimensions - << "\tmul " << tReg3 << ", " << tReg3 << ", cb1[" - << tReg1 << ".y].xyz\n" - << "\tsample_resource(" << rID << ")_sampler(" - << sID << ")_coordtype(normalized) " - << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n"; - } - if (SamplerCount - 1) { - O << "\tbreak\n"; - } - } - if (SamplerCount - 1) { - O << "\tendswitch\n"; - } - if (ImageCount - 1) { - O << "\tbreak\n"; - } - } - if (ImageCount - 1) { - O << "\tendswitch\n"; - } -#endif -} - void -AMDILImageExpansion::expandImageLoad(MachineBasicBlock *mBB, MachineInstr *MI) -{ - uint32_t imageID = getPointerID(MI); - MI->getOperand(1).ChangeToImmediate(imageID); - saveInst = true; -} - void -AMDILImageExpansion::expandImageStore(MachineBasicBlock *mBB, MachineInstr *MI) -{ - uint32_t imageID = getPointerID(MI); - mKM->setOutputInst(); - MI->getOperand(0).ChangeToImmediate(imageID); - saveInst = true; -} - void -AMDILImageExpansion::expandImageParam(MachineBasicBlock *mBB, MachineInstr *MI) -{ - MachineBasicBlock::iterator I = *MI; - uint32_t ID = getPointerID(MI); - DebugLoc DL = MI->getDebugLoc(); - BuildMI(*mBB, I, DL, mTII->get(AMDIL::CBLOAD), - MI->getOperand(0).getReg()) - .addImm(ID) - .addImm(1); -} diff --git a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp index cc565081e10..f7ce8b1c926 100644 --- a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp +++ b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp @@ -188,7 +188,6 @@ bool AMDILPassConfig::addPreEmitPass() PM.add(createAMDILCFGPreparationPass(*TM)); PM.add(createAMDILCFGStructurizerPass(*TM)); PM.add(createAMDILLiteralManager(*TM)); - PM.add(createAMDILIOExpansion(*TM)); return true; } diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index 138b562fb07..156aaa00903 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -19,23 +19,18 @@ GENERATED_SOURCES := \ CPP_SOURCES := \ AMDIL7XXDevice.cpp \ - AMDIL7XXIOExpansion.cpp \ - AMDIL789IOExpansion.cpp \ AMDILAsmBackend.cpp \ AMDILBarrierDetect.cpp \ AMDILCFGStructurizer.cpp \ AMDILDevice.cpp \ AMDILDeviceInfo.cpp \ - AMDILEGIOExpansion.cpp \ AMDILEvergreenDevice.cpp \ AMDILELFWriterInfo.cpp \ AMDILFrameLowering.cpp \ AMDILGlobalManager.cpp \ - AMDILImageExpansion.cpp \ AMDILInliner.cpp \ AMDILInstrInfo.cpp \ AMDILIntrinsicInfo.cpp \ - AMDILIOExpansion.cpp \ AMDILISelDAGToDAG.cpp \ AMDILISelLowering.cpp \ AMDILKernelManager.cpp \ |